1
0

PucReadmeParser.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. <?php
  2. if ( !class_exists('PucReadmeParser', false) ):
  3. /**
  4. * This is a slightly modified version of github.com/markjaquith/WordPress-Plugin-Readme-Parser
  5. * It uses Parsedown instead of the "Markdown Extra" parser.
  6. */
  7. class PucReadmeParser {
  8. function __construct() {
  9. // This space intentionally blank
  10. }
  11. function parse_readme( $file ) {
  12. $file_contents = @implode('', @file($file));
  13. return $this->parse_readme_contents( $file_contents );
  14. }
  15. function parse_readme_contents( $file_contents ) {
  16. $file_contents = str_replace(array("\r\n", "\r"), "\n", $file_contents);
  17. $file_contents = trim($file_contents);
  18. if ( 0 === strpos( $file_contents, "\xEF\xBB\xBF" ) )
  19. $file_contents = substr( $file_contents, 3 );
  20. // Markdown transformations
  21. $file_contents = preg_replace( "|^###([^#]+)#*?\s*?\n|im", '=$1='."\n", $file_contents );
  22. $file_contents = preg_replace( "|^##([^#]+)#*?\s*?\n|im", '==$1=='."\n", $file_contents );
  23. $file_contents = preg_replace( "|^#([^#]+)#*?\s*?\n|im", '===$1==='."\n", $file_contents );
  24. // === Plugin Name ===
  25. // Must be the very first thing.
  26. if ( !preg_match('|^===(.*)===|', $file_contents, $_name) )
  27. return array(); // require a name
  28. $name = trim($_name[1], '=');
  29. $name = $this->sanitize_text( $name );
  30. $file_contents = $this->chop_string( $file_contents, $_name[0] );
  31. // Requires at least: 1.5
  32. if ( preg_match('|Requires at least:(.*)|i', $file_contents, $_requires_at_least) )
  33. $requires_at_least = $this->sanitize_text($_requires_at_least[1]);
  34. else
  35. $requires_at_least = NULL;
  36. // Tested up to: 2.1
  37. if ( preg_match('|Tested up to:(.*)|i', $file_contents, $_tested_up_to) )
  38. $tested_up_to = $this->sanitize_text( $_tested_up_to[1] );
  39. else
  40. $tested_up_to = NULL;
  41. // Requires PHP: 5.2.4
  42. if ( preg_match('|Requires PHP:(.*)|i', $file_contents, $_requires_php) ) {
  43. $requires_php = $this->sanitize_text( $_requires_php[1] );
  44. } else {
  45. $requires_php = null;
  46. }
  47. // Stable tag: 10.4-ride-the-fire-eagle-danger-day
  48. if ( preg_match('|Stable tag:(.*)|i', $file_contents, $_stable_tag) )
  49. $stable_tag = $this->sanitize_text( $_stable_tag[1] );
  50. else
  51. $stable_tag = NULL; // we assume trunk, but don't set it here to tell the difference between specified trunk and default trunk
  52. // Tags: some tag, another tag, we like tags
  53. if ( preg_match('|Tags:(.*)|i', $file_contents, $_tags) ) {
  54. $tags = preg_split('|,[\s]*?|', trim($_tags[1]));
  55. foreach ( array_keys($tags) as $t )
  56. $tags[$t] = $this->sanitize_text( $tags[$t] );
  57. } else {
  58. $tags = array();
  59. }
  60. // Contributors: markjaquith, mdawaffe, zefrank
  61. $contributors = array();
  62. if ( preg_match('|Contributors:(.*)|i', $file_contents, $_contributors) ) {
  63. $temp_contributors = preg_split('|,[\s]*|', trim($_contributors[1]));
  64. foreach ( array_keys($temp_contributors) as $c ) {
  65. $tmp_sanitized = $this->user_sanitize( $temp_contributors[$c] );
  66. if ( strlen(trim($tmp_sanitized)) > 0 )
  67. $contributors[$c] = $tmp_sanitized;
  68. unset($tmp_sanitized);
  69. }
  70. }
  71. // Donate Link: URL
  72. if ( preg_match('|Donate link:(.*)|i', $file_contents, $_donate_link) )
  73. $donate_link = esc_url( $_donate_link[1] );
  74. else
  75. $donate_link = NULL;
  76. // togs, conts, etc are optional and order shouldn't matter. So we chop them only after we've grabbed their values.
  77. foreach ( array('tags', 'contributors', 'requires_at_least', 'tested_up_to', 'stable_tag', 'donate_link') as $chop ) {
  78. if ( $$chop ) {
  79. $_chop = '_' . $chop;
  80. $file_contents = $this->chop_string( $file_contents, ${$_chop}[0] );
  81. }
  82. }
  83. $file_contents = trim($file_contents);
  84. // short-description fu
  85. if ( !preg_match('/(^(.*?))^[\s]*=+?[\s]*.+?[\s]*=+?/ms', $file_contents, $_short_description) )
  86. $_short_description = array( 1 => &$file_contents, 2 => &$file_contents );
  87. $short_desc_filtered = $this->sanitize_text( $_short_description[2] );
  88. $short_desc_length = strlen($short_desc_filtered);
  89. $short_description = substr($short_desc_filtered, 0, 150);
  90. if ( $short_desc_length > strlen($short_description) )
  91. $truncated = true;
  92. else
  93. $truncated = false;
  94. if ( $_short_description[1] )
  95. $file_contents = $this->chop_string( $file_contents, $_short_description[1] ); // yes, the [1] is intentional
  96. // == Section ==
  97. // Break into sections
  98. // $_sections[0] will be the title of the first section, $_sections[1] will be the content of the first section
  99. // the array alternates from there: title2, content2, title3, content3... and so forth
  100. $_sections = preg_split('/^[\s]*==[\s]*(.+?)[\s]*==/m', $file_contents, -1, PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY);
  101. $sections = array();
  102. for ( $i=0; $i < count($_sections); $i +=2 ) {
  103. $title = $this->sanitize_text( $_sections[$i] );
  104. if ( isset($_sections[$i+1]) ) {
  105. $content = preg_replace('/(^[\s]*)=[\s]+(.+?)[\s]+=/m', '$1<h4>$2</h4>', $_sections[$i+1]);
  106. $content = $this->filter_text( $content, true );
  107. } else {
  108. $content = '';
  109. }
  110. $sections[str_replace(' ', '_', strtolower($title))] = array('title' => $title, 'content' => $content);
  111. }
  112. // Special sections
  113. // This is where we nab our special sections, so we can enforce their order and treat them differently, if needed
  114. // upgrade_notice is not a section, but parse it like it is for now
  115. $final_sections = array();
  116. foreach ( array('description', 'installation', 'frequently_asked_questions', 'screenshots', 'changelog', 'change_log', 'upgrade_notice') as $special_section ) {
  117. if ( isset($sections[$special_section]) ) {
  118. $final_sections[$special_section] = $sections[$special_section]['content'];
  119. unset($sections[$special_section]);
  120. }
  121. }
  122. if ( isset($final_sections['change_log']) && empty($final_sections['changelog']) )
  123. $final_sections['changelog'] = $final_sections['change_log'];
  124. $final_screenshots = array();
  125. if ( isset($final_sections['screenshots']) ) {
  126. preg_match_all('|<li>(.*?)</li>|s', $final_sections['screenshots'], $screenshots, PREG_SET_ORDER);
  127. if ( $screenshots ) {
  128. foreach ( (array) $screenshots as $ss )
  129. $final_screenshots[] = $ss[1];
  130. }
  131. }
  132. // Parse the upgrade_notice section specially:
  133. // 1.0 => blah, 1.1 => fnord
  134. $upgrade_notice = array();
  135. if ( isset($final_sections['upgrade_notice']) ) {
  136. $split = preg_split( '#<h4>(.*?)</h4>#', $final_sections['upgrade_notice'], -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
  137. if ( count($split) >= 2 ) {
  138. for ( $i = 0; $i < count( $split ); $i += 2 ) {
  139. $upgrade_notice[$this->sanitize_text( $split[$i] )] = substr( $this->sanitize_text( $split[$i + 1] ), 0, 300 );
  140. }
  141. }
  142. unset( $final_sections['upgrade_notice'] );
  143. }
  144. // No description?
  145. // No problem... we'll just fall back to the old style of description
  146. // We'll even let you use markup this time!
  147. $excerpt = false;
  148. if ( !isset($final_sections['description']) ) {
  149. $final_sections = array_merge(array('description' => $this->filter_text( $_short_description[2], true )), $final_sections);
  150. $excerpt = true;
  151. }
  152. // dump the non-special sections into $remaining_content
  153. // their order will be determined by their original order in the readme.txt
  154. $remaining_content = '';
  155. foreach ( $sections as $s_name => $s_data ) {
  156. $remaining_content .= "\n<h3>{$s_data['title']}</h3>\n{$s_data['content']}";
  157. }
  158. $remaining_content = trim($remaining_content);
  159. // All done!
  160. // $r['tags'] and $r['contributors'] are simple arrays
  161. // $r['sections'] is an array with named elements
  162. $r = array(
  163. 'name' => $name,
  164. 'tags' => $tags,
  165. 'requires_at_least' => $requires_at_least,
  166. 'tested_up_to' => $tested_up_to,
  167. 'requires_php' => $requires_php,
  168. 'stable_tag' => $stable_tag,
  169. 'contributors' => $contributors,
  170. 'donate_link' => $donate_link,
  171. 'short_description' => $short_description,
  172. 'screenshots' => $final_screenshots,
  173. 'is_excerpt' => $excerpt,
  174. 'is_truncated' => $truncated,
  175. 'sections' => $final_sections,
  176. 'remaining_content' => $remaining_content,
  177. 'upgrade_notice' => $upgrade_notice
  178. );
  179. return $r;
  180. }
  181. function chop_string( $string, $chop ) { // chop a "prefix" from a string: Agressive! uses strstr not 0 === strpos
  182. if ( $_string = strstr($string, $chop) ) {
  183. $_string = substr($_string, strlen($chop));
  184. return trim($_string);
  185. } else {
  186. return trim($string);
  187. }
  188. }
  189. function user_sanitize( $text, $strict = false ) { // whitelisted chars
  190. if ( function_exists('user_sanitize') ) // bbPress native
  191. return user_sanitize( $text, $strict );
  192. if ( $strict ) {
  193. $text = preg_replace('/[^a-z0-9-]/i', '', $text);
  194. $text = preg_replace('|-+|', '-', $text);
  195. } else {
  196. $text = preg_replace('/[^a-z0-9_-]/i', '', $text);
  197. }
  198. return $text;
  199. }
  200. function sanitize_text( $text ) { // not fancy
  201. $text = strip_tags($text);
  202. $text = esc_html($text);
  203. $text = trim($text);
  204. return $text;
  205. }
  206. function filter_text( $text, $markdown = false ) { // fancy, Markdown
  207. $text = trim($text);
  208. $text = call_user_func( array( __CLASS__, 'code_trick' ), $text, $markdown ); // A better parser than Markdown's for: backticks -> CODE
  209. if ( $markdown ) { // Parse markdown.
  210. if ( !class_exists('Parsedown', false) ) {
  211. /** @noinspection PhpIncludeInspection */
  212. require_once(dirname(__FILE__) . '/Parsedown' . (version_compare(PHP_VERSION, '5.3.0', '>=') ? '' : 'Legacy') . '.php');
  213. }
  214. $instance = Parsedown::instance();
  215. $text = $instance->text($text);
  216. }
  217. $allowed = array(
  218. 'a' => array(
  219. 'href' => array(),
  220. 'title' => array(),
  221. 'rel' => array()),
  222. 'blockquote' => array('cite' => array()),
  223. 'br' => array(),
  224. 'p' => array(),
  225. 'code' => array(),
  226. 'pre' => array(),
  227. 'em' => array(),
  228. 'strong' => array(),
  229. 'ul' => array(),
  230. 'ol' => array(),
  231. 'li' => array(),
  232. 'h3' => array(),
  233. 'h4' => array()
  234. );
  235. $text = balanceTags($text);
  236. $text = wp_kses( $text, $allowed );
  237. $text = trim($text);
  238. return $text;
  239. }
  240. function code_trick( $text, $markdown ) { // Don't use bbPress native function - it's incompatible with Markdown
  241. // If doing markdown, first take any user formatted code blocks and turn them into backticks so that
  242. // markdown will preserve things like underscores in code blocks
  243. if ( $markdown )
  244. $text = preg_replace_callback("!(<pre><code>|<code>)(.*?)(</code></pre>|</code>)!s", array( __CLASS__,'decodeit'), $text);
  245. $text = str_replace(array("\r\n", "\r"), "\n", $text);
  246. if ( !$markdown ) {
  247. // This gets the "inline" code blocks, but can't be used with Markdown.
  248. $text = preg_replace_callback("|(`)(.*?)`|", array( __CLASS__, 'encodeit'), $text);
  249. // This gets the "block level" code blocks and converts them to PRE CODE
  250. $text = preg_replace_callback("!(^|\n)`(.*?)`!s", array( __CLASS__, 'encodeit'), $text);
  251. } else {
  252. // Markdown can do inline code, we convert bbPress style block level code to Markdown style
  253. $text = preg_replace_callback("!(^|\n)([ \t]*?)`(.*?)`!s", array( __CLASS__, 'indent'), $text);
  254. }
  255. return $text;
  256. }
  257. function indent( $matches ) {
  258. $text = $matches[3];
  259. $text = preg_replace('|^|m', $matches[2] . ' ', $text);
  260. return $matches[1] . $text;
  261. }
  262. function encodeit( $matches ) {
  263. if ( function_exists('encodeit') ) // bbPress native
  264. return encodeit( $matches );
  265. $text = trim($matches[2]);
  266. $text = htmlspecialchars($text, ENT_QUOTES);
  267. $text = str_replace(array("\r\n", "\r"), "\n", $text);
  268. $text = preg_replace("|\n\n\n+|", "\n\n", $text);
  269. $text = str_replace('&amp;lt;', '&lt;', $text);
  270. $text = str_replace('&amp;gt;', '&gt;', $text);
  271. $text = "<code>$text</code>";
  272. if ( "`" != $matches[1] )
  273. $text = "<pre>$text</pre>";
  274. return $text;
  275. }
  276. function decodeit( $matches ) {
  277. if ( function_exists('decodeit') ) // bbPress native
  278. return decodeit( $matches );
  279. $text = $matches[2];
  280. $trans_table = array_flip(get_html_translation_table(HTML_ENTITIES));
  281. $text = strtr($text, $trans_table);
  282. $text = str_replace('<br />', '', $text);
  283. $text = str_replace('&#38;', '&', $text);
  284. $text = str_replace('&#39;', "'", $text);
  285. if ( '<pre><code>' == $matches[1] )
  286. $text = "\n$text\n";
  287. return "`$text`";
  288. }
  289. } // end class
  290. endif;