Speed Booster Pack - Version 4.1.1

Version Description

Download this release

Release Info

Developer optimocha
Plugin Icon 128x128 Speed Booster Pack
Version 4.1.1
Comparing to
See all releases

Code changes from version 4.1.0 to 4.1.1

Files changed (178) hide show
  1. README.txt +9 -0
  2. admin/class-speed-booster-pack-admin.php +19 -41
  3. admin/js/speed-booster-pack-admin.js +1 -1
  4. includes/classes/class-sbp-cache-warmup.php +3 -2
  5. includes/classes/class-sbp-cloudflare.php +3 -3
  6. includes/classes/class-sbp-css-minifier.php +3 -4
  7. includes/classes/class-sbp-warmup-process.php +1 -14
  8. includes/sbp-helpers.php +1 -1
  9. speed-booster-pack.php +3 -2
  10. vendor/simplehtmldom/simplehtmldom/.gitattributes +0 -13
  11. vendor/simplehtmldom/simplehtmldom/.gitignore +0 -12
  12. vendor/simplehtmldom/simplehtmldom/.travis.yml +0 -46
  13. vendor/simplehtmldom/simplehtmldom/CHANGELOG.md +368 -368
  14. vendor/simplehtmldom/simplehtmldom/Debug.php +149 -149
  15. vendor/simplehtmldom/simplehtmldom/HtmlDocument.php +1133 -1133
  16. vendor/simplehtmldom/simplehtmldom/HtmlNode.php +1441 -1441
  17. vendor/simplehtmldom/simplehtmldom/HtmlWeb.php +134 -134
  18. vendor/simplehtmldom/simplehtmldom/LICENSE +20 -20
  19. vendor/simplehtmldom/simplehtmldom/README.md +0 -116
  20. vendor/simplehtmldom/simplehtmldom/composer.json +57 -57
  21. vendor/simplehtmldom/simplehtmldom/constants.php +28 -28
  22. vendor/simplehtmldom/simplehtmldom/example/example_advanced_selector.php +0 -59
  23. vendor/simplehtmldom/simplehtmldom/example/example_basic_selector.php +0 -35
  24. vendor/simplehtmldom/simplehtmldom/example/example_callback.php +0 -21
  25. vendor/simplehtmldom/simplehtmldom/example/example_extract_html.php +0 -7
  26. vendor/simplehtmldom/simplehtmldom/example/example_modify_contents.php +0 -15
  27. vendor/simplehtmldom/simplehtmldom/example/scraping/example_scraping_digg.php +0 -55
  28. vendor/simplehtmldom/simplehtmldom/example/scraping/example_scraping_imdb.php +0 -22
  29. vendor/simplehtmldom/simplehtmldom/example/scraping/example_scraping_slashdot.php +0 -33
  30. vendor/simplehtmldom/simplehtmldom/manual/README.md +0 -75
  31. vendor/simplehtmldom/simplehtmldom/manual/custom_theme/main.html +0 -7
  32. vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/disable.md +0 -9
  33. vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/enable.md +0 -9
  34. vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/index.md +0 -31
  35. vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/log.md +0 -9
  36. vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/log_once.md +0 -9
  37. vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/setDebugHandler.md +0 -9
  38. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__call.md +0 -9
  39. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__construct.md +0 -22
  40. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__debugInfo.md +0 -9
  41. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__destruct.md +0 -9
  42. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__get.md +0 -19
  43. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__toString.md +0 -9
  44. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/as_text_node.md +0 -15
  45. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/childNodes.md +0 -13
  46. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/copy_skip.md +0 -15
  47. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/copy_until.md +0 -15
  48. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/copy_until_char.md +0 -15
  49. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/createElement.md +0 -16
  50. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/createTextNode.md +0 -11
  51. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/decode.md +0 -9
  52. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/dump.md +0 -15
  53. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/expect.md +0 -15
  54. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/find.md +0 -17
  55. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/firstChild.md +0 -9
  56. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/getElementById.md +0 -15
  57. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/getElementByTagName.md +0 -15
  58. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/getElementsById.md +0 -16
  59. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/getElementsByTagName.md +0 -16
  60. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/index.md +0 -38
  61. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/lastChild.md +0 -9
  62. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/link_nodes.md +0 -14
  63. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/load.md +0 -20
  64. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/loadFile.md +0 -11
  65. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/parse.md +0 -9
  66. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/parse_attr.md +0 -15
  67. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/parse_charset.md +0 -17
  68. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/prepare.md +0 -16
  69. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/read_tag.md +0 -11
  70. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/remove_callback.md +0 -9
  71. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/remove_noise.md +0 -16
  72. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/restore_noise.md +0 -15
  73. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/save.md +0 -15
  74. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/search_noise.md +0 -15
  75. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/set_callback.md +0 -14
  76. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/skip.md +0 -14
  77. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__call.md +0 -9
  78. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__construct.md +0 -13
  79. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__debugInfo.md +0 -9
  80. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__destruct.md +0 -9
  81. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__get.md +0 -24
  82. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__isset.md +0 -21
  83. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__set.md +0 -20
  84. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__toString.md +0 -9
  85. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__unset.md +0 -9
  86. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/addClass.md +0 -25
  87. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/appendChild.md +0 -15
  88. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/childNodes.md +0 -13
  89. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/clear.md +0 -9
  90. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/convert_text.md +0 -15
  91. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/definitions.md +0 -96
  92. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/dump.md +0 -14
  93. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/dump_node.md +0 -13
  94. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/expect.md +0 -15
  95. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/find.md +0 -47
  96. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/find_ancestor_tag.md +0 -13
  97. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/firstChild.md +0 -9
  98. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getAllAttributes.md +0 -9
  99. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getAttribute.md +0 -13
  100. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getElementById.md +0 -13
  101. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getElementByTagName.md +0 -13
  102. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getElementsById.md +0 -14
  103. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getElementsByTagName.md +0 -14
  104. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/get_display_size.md +0 -11
  105. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/hasAttribute.md +0 -13
  106. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/hasChildNodes.md +0 -9
  107. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/hasClass.md +0 -19
  108. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/index.md +0 -28
  109. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/innertext.md +0 -9
  110. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/is_block_element.md +0 -16
  111. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/is_inline_element.md +0 -16
  112. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/is_utf8.md +0 -13
  113. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/lastChild.md +0 -9
  114. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/makeup.md +0 -9
  115. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/match.md +0 -21
  116. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/nextSibling.md +0 -9
  117. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/nodeName.md +0 -9
  118. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/outertext.md +0 -9
  119. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/parent.md +0 -14
  120. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/parentNode.md +0 -9
  121. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/parse_selector.md +0 -13
  122. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/previousSibling.md +0 -9
  123. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/remove.md +0 -41
  124. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/removeAttribute.md +0 -13
  125. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/removeChild.md +0 -45
  126. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/removeClass.md +0 -27
  127. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/save.md +0 -22
  128. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/seek.md +0 -21
  129. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/setAttribute.md +0 -14
  130. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/text.md +0 -27
  131. vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/xmltext.md +0 -9
  132. vendor/simplehtmldom/simplehtmldom/manual/docs/api/constants.md +0 -31
  133. vendor/simplehtmldom/simplehtmldom/manual/docs/extra.css +0 -9
  134. vendor/simplehtmldom/simplehtmldom/manual/docs/faq/0001.md +0 -67
  135. vendor/simplehtmldom/simplehtmldom/manual/docs/index.md +0 -33
  136. vendor/simplehtmldom/simplehtmldom/manual/docs/manual/accessing-element-attributes.md +0 -40
  137. vendor/simplehtmldom/simplehtmldom/manual/docs/manual/adding-nodes.md +0 -123
  138. vendor/simplehtmldom/simplehtmldom/manual/docs/manual/creating-dom-objects.md +0 -37
  139. vendor/simplehtmldom/simplehtmldom/manual/docs/manual/customizing-parsing-behavior.md +0 -18
  140. vendor/simplehtmldom/simplehtmldom/manual/docs/manual/finding-html-elements.md +0 -90
  141. vendor/simplehtmldom/simplehtmldom/manual/docs/manual/saving-dom-objects.md +0 -11
  142. vendor/simplehtmldom/simplehtmldom/manual/docs/manual/traversing-dom-tree.md +0 -23
  143. vendor/simplehtmldom/simplehtmldom/manual/docs/quick-start.md +0 -98
  144. vendor/simplehtmldom/simplehtmldom/manual/docs/requirements.md +0 -7
  145. vendor/simplehtmldom/simplehtmldom/manual/mkdocs.yml +0 -152
  146. vendor/simplehtmldom/simplehtmldom/phpcompatibility.xml +0 -11
  147. vendor/simplehtmldom/simplehtmldom/phpcs.xml +0 -48
  148. vendor/simplehtmldom/simplehtmldom/phpunit.xml +0 -41
  149. vendor/simplehtmldom/simplehtmldom/release.sh +0 -69
  150. vendor/simplehtmldom/simplehtmldom/simple_html_dom.php +153 -153
  151. vendor/simplehtmldom/simplehtmldom/tests/attribute_test.php +0 -50
  152. vendor/simplehtmldom/simplehtmldom/tests/bug_report_test.php +0 -476
  153. vendor/simplehtmldom/simplehtmldom/tests/callback_test.php +0 -45
  154. vendor/simplehtmldom/simplehtmldom/tests/cdata_test.php +0 -69
  155. vendor/simplehtmldom/simplehtmldom/tests/charset_test.php +0 -80
  156. vendor/simplehtmldom/simplehtmldom/tests/comment_test.php +0 -93
  157. vendor/simplehtmldom/simplehtmldom/tests/css_selector_test.php +0 -646
  158. vendor/simplehtmldom/simplehtmldom/tests/debug_info_test.php +0 -37
  159. vendor/simplehtmldom/simplehtmldom/tests/debug_with_callback_test.php +0 -83
  160. vendor/simplehtmldom/simplehtmldom/tests/doctype_test.php +0 -47
  161. vendor/simplehtmldom/simplehtmldom/tests/dom_manipulation_test.php +0 -102
  162. vendor/simplehtmldom/simplehtmldom/tests/entity_decoding_test.php +0 -62
  163. vendor/simplehtmldom/simplehtmldom/tests/file_get_html_test.php +0 -80
  164. vendor/simplehtmldom/simplehtmldom/tests/htmldocument___call_test.php +0 -39
  165. vendor/simplehtmldom/simplehtmldom/tests/htmldocument_test.php +0 -249
  166. vendor/simplehtmldom/simplehtmldom/tests/htmlnode___call_test.php +0 -77
  167. vendor/simplehtmldom/simplehtmldom/tests/htmlnode_test.php +0 -628
  168. vendor/simplehtmldom/simplehtmldom/tests/htmlweb_curl_test.php +0 -49
  169. vendor/simplehtmldom/simplehtmldom/tests/htmlweb_fopen_test.php +0 -53
  170. vendor/simplehtmldom/simplehtmldom/tests/htmlweb_test.php +0 -49
  171. vendor/simplehtmldom/simplehtmldom/tests/memory_parsing_test.php +0 -131
  172. vendor/simplehtmldom/simplehtmldom/tests/optional_tags_test.php +0 -791
  173. vendor/simplehtmldom/simplehtmldom/tests/script_test.php +0 -69
  174. vendor/simplehtmldom/simplehtmldom/tests/self_closing_tags_test.php +0 -417
  175. vendor/simplehtmldom/simplehtmldom/tests/server_side_script_test.php +0 -28
  176. vendor/simplehtmldom/simplehtmldom/tests/str_get_html_test.php +0 -18
  177. vendor/simplehtmldom/simplehtmldom/tests/style_test.php +0 -58
  178. vendor/simplehtmldom/simplehtmldom/tests/whitespace_test.php +0 -483
README.txt CHANGED
@@ -6,6 +6,7 @@ Stable tag: 4.1.0
6
  Requires at least: 4.6
7
  Tested up to: 5.6
8
  Requires PHP: 5.6
 
9
  License: GPLv3 or later
10
  License URI: https://www.gnu.org/licenses/gpl-3.0.html
11
 
@@ -104,6 +105,14 @@ All the time! We're always looking for new ways to get this plugin to a better s
104
 
105
  == Changelog ==
106
 
 
 
 
 
 
 
 
 
107
  = 4.1.0 =
108
 
109
  *Release Date: 21 January 2021*
6
  Requires at least: 4.6
7
  Tested up to: 5.6
8
  Requires PHP: 5.6
9
+ Stable tag: 4.1.1
10
  License: GPLv3 or later
11
  License URI: https://www.gnu.org/licenses/gpl-3.0.html
12
 
105
 
106
  == Changelog ==
107
 
108
+ *Release Date: 28 January 2021*
109
+
110
+ * **Improved**: Wrote better descriptions for certain settings.
111
+ * **Fixed**: Tiny warning on Cloudflare settings.
112
+ * **Fixed**: Inline CSS wasn't able to parse some `url()` values, until now.
113
+ * **Fixed**: Cache warmup now handles pages with a better working process.
114
+ * **Fixed**: SBP won't try to _optimize_ files that are generated on-the-fly by WordPress (like robots.txt or the RSS feeds).
115
+
116
  = 4.1.0 =
117
 
118
  *Release Date: 21 January 2021*
admin/class-speed-booster-pack-admin.php CHANGED
@@ -282,7 +282,7 @@ class Speed_Booster_Pack_Admin {
282
  '<strong>' . __( 'Special', 'speed-booster-pack' ) . '</strong>: ' . __( 'This module has features for specific cases like CDN usage, localizing tracker scripts, adding custom JavaScript code and optimizations for some popular plugins.', 'speed-booster-pack' ) . '</li><li>' .
283
  '<strong>' . __( 'Tweaks', 'speed-booster-pack' ) . '</strong>: ' . __( 'This module lets you tweak the WordPress core and your page sources by dequeueing core scripts/styles, decluttering &lt;head&gt;, optimizing revisions and the Heartbeat API and so on.', 'speed-booster-pack' ) . '</li></ul>' .
284
  '<p>' . __( 'Feel free to experiment, and don\'t forget to create exclude rules when necessary!', 'speed-booster-pack' ) . '</p>',
285
- ], // LAHMACUNTODO: Fetching clouflare settings ibaresi ekle.
286
  [
287
  'type' => 'subheading',
288
  'content' => __( 'Upcoming features', 'speed-booster-pack' ),
@@ -351,9 +351,7 @@ class Speed_Booster_Pack_Admin {
351
  'id' => 'caching_warmup_after_clear',
352
  'type' => 'switcher',
353
  'title' => __( 'Warm up cache on clear', 'speed-booster-pack' ),
354
- // BEYNTODO: Change text
355
- 'desc' => __( 'Warm up cache everytime cache cleared.', 'speed-booster-pack' ),
356
- // BEYNTODO: Change text
357
  'dependency' => [ 'module_caching', '==', '1', '', 'visible' ],
358
  ],
359
  [
@@ -434,37 +432,37 @@ class Speed_Booster_Pack_Admin {
434
  'desc' => __( 'You can find your zone ID in the Overview tab on your Cloudflare panel.', 'speed-booster-pack' ),
435
  ],
436
  [
437
- 'title' => __( 'Toggle Rocket Loader', 'speed-booster-pack' ), // BEYNTODO: Change title
438
  'id' => 'cf_rocket_loader_enable',
439
  'class' => 'with-preloader',
440
  'type' => 'switcher',
441
  ],
442
  [
443
- 'title' => __( 'Toggle Development Mode', 'speed-booster-pack' ), // BEYNTODO: Change title
444
  'id' => 'cf_dev_mode_enable',
445
  'class' => 'with-preloader',
446
  'type' => 'switcher',
447
  ],
448
  [
449
- 'title' => __( 'Toggle CSS Minify', 'speed-booster-pack' ), // BEYNTODO: Change title
450
  'id' => 'cf_css_minify_enable',
451
  'class' => 'with-preloader',
452
  'type' => 'switcher',
453
  ],
454
  [
455
- 'title' => __( 'Toggle HTML Minify', 'speed-booster-pack' ), // BEYNTODO: Change title
456
  'id' => 'cf_html_minify_enable',
457
  'class' => 'with-preloader',
458
  'type' => 'switcher',
459
  ],
460
  [
461
- 'title' => __( 'Toggle JS Minify', 'speed-booster-pack' ), // BEYNTODO: Change title
462
  'id' => 'cf_js_minify_enable',
463
  'class' => 'with-preloader',
464
  'type' => 'switcher',
465
  ],
466
  [
467
- 'title' => __( 'Browser Cache TTL', 'speed-booster-pack' ), // BEYNTODO: Change title
468
  'id' => 'cf_browser_cache_ttl',
469
  'class' => 'with-preloader',
470
  'type' => 'select',
@@ -498,12 +496,12 @@ class Speed_Booster_Pack_Admin {
498
  'type' => 'content',
499
  'content' => '
500
  <span>
501
- <a href="#" class="button button-small sbp-cloudflare-test">Test Your Cloudflare Connection <span class="sbp-cloudflare-spinner"></span></a>
502
  <span class="sbp-cloudflare-fetching">' . __( 'Fetching Cloudflare settings...', 'speed-booster-pack' ) . '</span>
503
  </span>
504
  <span class="sbp-cloudflare-info-text sbp-cloudflare-incorrect" style="color:red; vertical-align: middle;"><i class="fa fa-exclamation-triangle"></i> ' . __( 'Your Cloudflare credentials are incorrect.', 'speed-booster-pack' ) . '</span>
505
  <span class="sbp-cloudflare-info-text sbp-cloudflare-correct" style="color:green; vertical-align: middle;"><i class="fa fa-check-circle"></i> ' . __( 'Your Cloudflare credentials are correct.', 'speed-booster-pack' ) . '</span>
506
- <span class="sbp-cloudflare-info-text sbp-cloudflare-warning" style="color:orange; vertical-align: middle;"><i class="fa fa-exclamation-circle"></i> ' . __( 'You should provide your Cloudflare credentials and save settings to see CloudFlare options.', 'speed-booster-pack' ) . '</span>
507
  ',
508
  ],
509
  ];
@@ -567,13 +565,13 @@ class Speed_Booster_Pack_Admin {
567
  'title' => __( 'Included Directories', 'speed-booster-pack' ),
568
  'id' => 'cdn_includes',
569
  'type' => 'code_editor',
570
- 'desc' => __( 'Write included directory names', 'speed-booster-pack' ), // BEYNTODO: Change text
571
  ],
572
  [
573
  'title' => __( 'Excluded Extensions', 'speed-booster-pack' ),
574
  'id' => 'cdn_excludes',
575
  'type' => 'code_editor',
576
- 'desc' => __( 'Excluded file extensions', 'speed-booster-pack' ), // BEYNTODO: Change text
577
  ],
578
  ],
579
  $cloudflare_fields,
@@ -589,19 +587,18 @@ class Speed_Booster_Pack_Admin {
589
  );
590
  /* END Section: CDN & Proxy */
591
 
592
- // BEYNTODO: Need to check the new section.
593
- /* BEGIN Section: CSS Optimizations */
594
  CSF::createSection(
595
  $prefix,
596
  [
597
- 'title' => __( 'CSS Optimizations', 'speed-booster-pack' ),
598
  'id' => 'css',
599
  'icon' => 'fa fa-palette',
600
  'fields' => [
601
 
602
  [
603
  /* translators: used like "Enable/Disable XXX" where "XXX" is the module name. */
604
- 'title' => __( 'Enable/Disable', 'speed-booster-pack' ) . ' ' . __( 'CSS Optimizations', 'speed-booster-pack' ),
605
  'id' => 'module_css',
606
  'class' => 'module-css',
607
  'type' => 'switcher',
@@ -640,6 +637,7 @@ class Speed_Booster_Pack_Admin {
640
  [
641
  'id' => 'is_front_page',
642
  'type' => 'code_editor',
 
643
  'desc' => sprintf( __( 'This CSS block will be injected into the front page of your website. %1$s%2$s%3$s', 'speed-booster-pack' ), '<a href="https://developer.wordpress.org/reference/functions/is_front_page/" rel="external noopener" target="_blank">', sprintf( __( 'Learn more about %s.', 'speed-booster-pack' ), '<code>is_front_page()</code>' ), '</a>' ),
644
  ],
645
  ],
@@ -709,10 +707,8 @@ class Speed_Booster_Pack_Admin {
709
  ],
710
  [
711
  'title' => __( 'Remove critical CSS after onload', 'speed-booster-pack' ),
712
- // BEYNTODO: Change Text
713
  'id' => 'remove_criticalcss',
714
- 'desc' => __( 'Remove critical CSS after all css files loaded.', 'speed-booster-pack' ),
715
- // BEYNTODO: Change Text
716
  'type' => 'switcher',
717
  'default' => true,
718
  'dependency' => [ 'module_css|enable_criticalcss', '==|==', '1|1', '', 'visible' ],
@@ -746,7 +742,7 @@ class Speed_Booster_Pack_Admin {
746
  ],
747
  ]
748
  );
749
- /* END Section: CSS Optimizations */
750
 
751
  /* BEGIN Section: Assets */
752
  $asset_fields = [
@@ -1333,14 +1329,12 @@ class Speed_Booster_Pack_Admin {
1333
 
1334
  // Warmup Notice
1335
  if ( get_transient( 'sbp_warmup_started' ) ) {
1336
- // BEYNTODO: Add translator note
1337
  SBP_Notice_Manager::display_notice( 'sbp_warmup_started', '<p><strong>' . SBP_PLUGIN_NAME . ':</strong> ' . __( 'Cache warmup started.', 'speed-booster-pack' ) . '</p>', 'success', true, 'recurrent' );
1338
  }
1339
 
1340
  // Warmup Notice
1341
  if ( get_transient( 'sbp_warmup_complete' ) ) {
1342
- // BEYNTODO: Add translator note
1343
- SBP_Notice_Manager::display_notice( 'sbp_warmup_complete', '<p><strong>' . SBP_PLUGIN_NAME . ':</strong> ' . __( 'Static cache files created.', 'speed-booster-pack' ) . '</p>', 'success', true, 'recurrent' );
1344
  }
1345
 
1346
  // WP-Config Inject File Error
@@ -1352,22 +1346,6 @@ class Speed_Booster_Pack_Admin {
1352
  if ( get_transient( 'sbp_wp_config_error' ) ) {
1353
  SBP_Notice_Manager::display_notice( 'sbp_wp_config_error', '<p><strong>' . SBP_PLUGIN_NAME . '</strong> ' . __( 'Can not write wp-config.php file. Some ' . SBP_PLUGIN_NAME . ' features may not work. Please check your file permissions.', 'speed-booster-pack' ) . '</p>', 'error', true, 'recurrent' );
1354
  }
1355
-
1356
- // WP-Config File Error
1357
- if ( get_transient( 'sbp_warmup_errors' ) ) {
1358
- $list = '';
1359
- $errors = get_transient( 'sbp_warmup_errors' );
1360
- if ( is_array( $errors ) ) {
1361
- foreach ( $errors as $error ) {
1362
- $extras = [];
1363
- if ( isset( $error['options']['user-agent'] ) && $error['options']['user-agent'] === 'Mobile' ) {
1364
- $extras[] = '(Mobile)';
1365
- }
1366
- $list .= '<li><a href="' . $error['url'] . '" target="_blank">' . $error['url'] . ' ' . implode( ' ', $extras ) . '</a></li>';
1367
- }
1368
- SBP_Notice_Manager::display_notice( 'sbp_warmup_errors', '<p><strong>' . SBP_PLUGIN_NAME . '</strong> ' . __( 'Cache warmup completed but following pages may not be cached. Please check this pages are available. (Hover this notice to see all errors)', 'speed-booster-pack' ) . '</p><ul class="warmup-cache-error-list">' . $list . '</ul>', 'error', true, 'recurrent' );
1369
- }
1370
- }
1371
  }
1372
 
1373
  private function initialize_announce4wp() {
282
  '<strong>' . __( 'Special', 'speed-booster-pack' ) . '</strong>: ' . __( 'This module has features for specific cases like CDN usage, localizing tracker scripts, adding custom JavaScript code and optimizations for some popular plugins.', 'speed-booster-pack' ) . '</li><li>' .
283
  '<strong>' . __( 'Tweaks', 'speed-booster-pack' ) . '</strong>: ' . __( 'This module lets you tweak the WordPress core and your page sources by dequeueing core scripts/styles, decluttering &lt;head&gt;, optimizing revisions and the Heartbeat API and so on.', 'speed-booster-pack' ) . '</li></ul>' .
284
  '<p>' . __( 'Feel free to experiment, and don\'t forget to create exclude rules when necessary!', 'speed-booster-pack' ) . '</p>',
285
+ ], // Z_TODO: Fetching clouflare settings ibaresi ekle.
286
  [
287
  'type' => 'subheading',
288
  'content' => __( 'Upcoming features', 'speed-booster-pack' ),
351
  'id' => 'caching_warmup_after_clear',
352
  'type' => 'switcher',
353
  'title' => __( 'Warm up cache on clear', 'speed-booster-pack' ),
354
+ 'desc' => __( 'Creates cache files for the front page and all pages that are linked from the front page, each time the cache is cleared. Note that even though you don\'t turn this option on, you can manually warm up the cache from your admin bar.', 'speed-booster-pack' ),
 
 
355
  'dependency' => [ 'module_caching', '==', '1', '', 'visible' ],
356
  ],
357
  [
432
  'desc' => __( 'You can find your zone ID in the Overview tab on your Cloudflare panel.', 'speed-booster-pack' ),
433
  ],
434
  [
435
+ 'title' => __( 'Rocket Loader', 'speed-booster-pack' ),
436
  'id' => 'cf_rocket_loader_enable',
437
  'class' => 'with-preloader',
438
  'type' => 'switcher',
439
  ],
440
  [
441
+ 'title' => __( 'Development Mode', 'speed-booster-pack' ),
442
  'id' => 'cf_dev_mode_enable',
443
  'class' => 'with-preloader',
444
  'type' => 'switcher',
445
  ],
446
  [
447
+ 'title' => __( 'Minify CSS', 'speed-booster-pack' ),
448
  'id' => 'cf_css_minify_enable',
449
  'class' => 'with-preloader',
450
  'type' => 'switcher',
451
  ],
452
  [
453
+ 'title' => __( 'Minify HTML', 'speed-booster-pack' ),
454
  'id' => 'cf_html_minify_enable',
455
  'class' => 'with-preloader',
456
  'type' => 'switcher',
457
  ],
458
  [
459
+ 'title' => __( 'Minify JS', 'speed-booster-pack' ),
460
  'id' => 'cf_js_minify_enable',
461
  'class' => 'with-preloader',
462
  'type' => 'switcher',
463
  ],
464
  [
465
+ 'title' => __( 'Browser Cache TTL', 'speed-booster-pack' ),
466
  'id' => 'cf_browser_cache_ttl',
467
  'class' => 'with-preloader',
468
  'type' => 'select',
496
  'type' => 'content',
497
  'content' => '
498
  <span>
499
+ <a href="#" class="button button-small sbp-cloudflare-test">' . __( 'Test Cloudflare connection', 'speed-booster-pack' ) . '<span class="sbp-cloudflare-spinner"></span></a>
500
  <span class="sbp-cloudflare-fetching">' . __( 'Fetching Cloudflare settings...', 'speed-booster-pack' ) . '</span>
501
  </span>
502
  <span class="sbp-cloudflare-info-text sbp-cloudflare-incorrect" style="color:red; vertical-align: middle;"><i class="fa fa-exclamation-triangle"></i> ' . __( 'Your Cloudflare credentials are incorrect.', 'speed-booster-pack' ) . '</span>
503
  <span class="sbp-cloudflare-info-text sbp-cloudflare-correct" style="color:green; vertical-align: middle;"><i class="fa fa-check-circle"></i> ' . __( 'Your Cloudflare credentials are correct.', 'speed-booster-pack' ) . '</span>
504
+ <span class="sbp-cloudflare-info-text sbp-cloudflare-warning" style="color:orange; vertical-align: middle;"><i class="fa fa-exclamation-circle"></i> ' . __( 'Enter your Cloudflare credentials and save settings to see CloudFlare options.', 'speed-booster-pack' ) . '</span>
505
  ',
506
  ],
507
  ];
565
  'title' => __( 'Included Directories', 'speed-booster-pack' ),
566
  'id' => 'cdn_includes',
567
  'type' => 'code_editor',
568
+ 'desc' => __( 'Anything other than WordPress\'s existing directories should be entered here to be rewritten with the CDN domain. Separated by new lines.', 'speed-booster-pack' ),
569
  ],
570
  [
571
  'title' => __( 'Excluded Extensions', 'speed-booster-pack' ),
572
  'id' => 'cdn_excludes',
573
  'type' => 'code_editor',
574
+ 'desc' => __( 'If you want to exclude certain file types, enter the extensions here. Separated by new lines.', 'speed-booster-pack' ),
575
  ],
576
  ],
577
  $cloudflare_fields,
587
  );
588
  /* END Section: CDN & Proxy */
589
 
590
+ /* BEGIN Section: Optimize CSS */
 
591
  CSF::createSection(
592
  $prefix,
593
  [
594
+ 'title' => __( 'Optimize CSS', 'speed-booster-pack' ),
595
  'id' => 'css',
596
  'icon' => 'fa fa-palette',
597
  'fields' => [
598
 
599
  [
600
  /* translators: used like "Enable/Disable XXX" where "XXX" is the module name. */
601
+ 'title' => __( 'Enable/Disable', 'speed-booster-pack' ) . ' ' . __( 'Optimize CSS', 'speed-booster-pack' ),
602
  'id' => 'module_css',
603
  'class' => 'module-css',
604
  'type' => 'switcher',
637
  [
638
  'id' => 'is_front_page',
639
  'type' => 'code_editor',
640
+ // Z_TODO: Edit the following description.
641
  'desc' => sprintf( __( 'This CSS block will be injected into the front page of your website. %1$s%2$s%3$s', 'speed-booster-pack' ), '<a href="https://developer.wordpress.org/reference/functions/is_front_page/" rel="external noopener" target="_blank">', sprintf( __( 'Learn more about %s.', 'speed-booster-pack' ), '<code>is_front_page()</code>' ), '</a>' ),
642
  ],
643
  ],
707
  ],
708
  [
709
  'title' => __( 'Remove critical CSS after onload', 'speed-booster-pack' ),
 
710
  'id' => 'remove_criticalcss',
711
+ 'desc' => __( 'Remove critical CSS the onload event fires on browser. Enable this only if you\'re having styling issues when the pages finish loading.', 'speed-booster-pack' ),
 
712
  'type' => 'switcher',
713
  'default' => true,
714
  'dependency' => [ 'module_css|enable_criticalcss', '==|==', '1|1', '', 'visible' ],
742
  ],
743
  ]
744
  );
745
+ /* END Section: Optimize CSS */
746
 
747
  /* BEGIN Section: Assets */
748
  $asset_fields = [
1329
 
1330
  // Warmup Notice
1331
  if ( get_transient( 'sbp_warmup_started' ) ) {
 
1332
  SBP_Notice_Manager::display_notice( 'sbp_warmup_started', '<p><strong>' . SBP_PLUGIN_NAME . ':</strong> ' . __( 'Cache warmup started.', 'speed-booster-pack' ) . '</p>', 'success', true, 'recurrent' );
1333
  }
1334
 
1335
  // Warmup Notice
1336
  if ( get_transient( 'sbp_warmup_complete' ) ) {
1337
+ SBP_Notice_Manager::display_notice( 'sbp_warmup_complete', '<p><strong>' . SBP_PLUGIN_NAME . ':</strong> ' . __( 'Static cache files created.', 'speed-booster-pack' ) . '</p>', 'info', true, 'recurrent' );
 
1338
  }
1339
 
1340
  // WP-Config Inject File Error
1346
  if ( get_transient( 'sbp_wp_config_error' ) ) {
1347
  SBP_Notice_Manager::display_notice( 'sbp_wp_config_error', '<p><strong>' . SBP_PLUGIN_NAME . '</strong> ' . __( 'Can not write wp-config.php file. Some ' . SBP_PLUGIN_NAME . ' features may not work. Please check your file permissions.', 'speed-booster-pack' ) . '</p>', 'error', true, 'recurrent' );
1348
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1349
  }
1350
 
1351
  private function initialize_announce4wp() {
admin/js/speed-booster-pack-admin.js CHANGED
@@ -153,7 +153,7 @@
153
  } else if (response.status === 'empty_info') {
154
  $('.sbp-cloudflare-warning').show();
155
  } else {
156
- // LAHMACUNTODO: Baglanti hatalarinda ozel uyari goster.
157
  $('.sbp-cloudflare-incorrect').show();
158
  $('.with-preloader::before, .with-preloader::after').remove();
159
  }
153
  } else if (response.status === 'empty_info') {
154
  $('.sbp-cloudflare-warning').show();
155
  } else {
156
+ // Z_TODO: Baglanti hatalarinda ozel uyari goster.
157
  $('.sbp-cloudflare-incorrect').show();
158
  $('.with-preloader::before, .with-preloader::after').remove();
159
  }
includes/classes/class-sbp-cache-warmup.php CHANGED
@@ -78,8 +78,9 @@ class SBP_Cache_Warmup extends SBP_Abstract_Module {
78
  $urls[] = $href;
79
  $this->warmup_process->push_to_queue( [ 'url' => $href ] );
80
  if ( sbp_get_option( 'caching_separate_mobile' ) ) {
81
- $this->warmup_process->push_to_queue( [ 'url' => $href,
82
- 'options' => [ 'user-agent' => 'Mobile' ]
 
83
  ] );
84
  }
85
  }
78
  $urls[] = $href;
79
  $this->warmup_process->push_to_queue( [ 'url' => $href ] );
80
  if ( sbp_get_option( 'caching_separate_mobile' ) ) {
81
+ $this->warmup_process->push_to_queue( [
82
+ 'url' => $href,
83
+ 'options' => [ 'user-agent' => 'Mobile' ],
84
  ] );
85
  }
86
  }
includes/classes/class-sbp-cloudflare.php CHANGED
@@ -93,7 +93,7 @@ class SBP_Cloudflare extends SBP_Abstract_Module {
93
  if ( self::is_cloudflare_active() ) {
94
  $result = self::send_request( 'purge_cache', 'POST', [ 'purge_everything' => true ] );
95
 
96
- if ( true === $result['success'] ) {
97
  return true;
98
  }
99
  }
@@ -112,7 +112,7 @@ class SBP_Cloudflare extends SBP_Abstract_Module {
112
  public static function check_credentials( $override_credentials = [] ) {
113
  $result = self::send_request( 'check_credentials', 'GET', [], $override_credentials );
114
 
115
- if ( true === $result['success'] ) {
116
  return true;
117
  }
118
 
@@ -231,7 +231,7 @@ class SBP_Cloudflare extends SBP_Abstract_Module {
231
  $settings = [];
232
 
233
  $result = self::send_request( 'settings' );
234
- if ( $result['success'] ) {
235
  foreach ( $result['result'] as $setting ) {
236
  if ( in_array( $setting['id'], $settings_to_fetch ) ) {
237
  $settings[ $setting['id'] ] = $setting;
93
  if ( self::is_cloudflare_active() ) {
94
  $result = self::send_request( 'purge_cache', 'POST', [ 'purge_everything' => true ] );
95
 
96
+ if ( $result && true === $result['success'] ) {
97
  return true;
98
  }
99
  }
112
  public static function check_credentials( $override_credentials = [] ) {
113
  $result = self::send_request( 'check_credentials', 'GET', [], $override_credentials );
114
 
115
+ if ( $result && true === $result['success'] ) {
116
  return true;
117
  }
118
 
231
  $settings = [];
232
 
233
  $result = self::send_request( 'settings' );
234
+ if ( $result && $result['success'] ) {
235
  foreach ( $result['result'] as $setting ) {
236
  if ( in_array( $setting['id'], $settings_to_fetch ) ) {
237
  $settings[ $setting['id'] ] = $setting;
includes/classes/class-sbp-css-minifier.php CHANGED
@@ -105,7 +105,7 @@ class SBP_CSS_Minifier extends SBP_Abstract_Module {
105
  $css = $this->minify_css( $css );
106
  }
107
 
108
- $css = $this->rebuilding_css_urls( $css, $url );
109
 
110
  return $css;
111
  }
@@ -113,17 +113,16 @@ class SBP_CSS_Minifier extends SBP_Abstract_Module {
113
  return false;
114
  }
115
 
116
- private function rebuilding_css_urls( $css, $url ) {
117
  $css_dir = substr( $url, 0, strrpos( $url, '/' ) );
118
 
119
  // remove empty url() declarations
120
  $css = preg_replace( "/url\(\s?\)/", "", $css );
121
  // new regex expression
122
- $css = preg_replace( "/url(?!\(['\"]?(data:|http:|https:))\(['\"]?([^\/][^'\"\)]*)['\"]?\)/i",
123
  "url({$css_dir}/$2)",
124
  $css );
125
 
126
-
127
  return $css;
128
  }
129
 
105
  $css = $this->minify_css( $css );
106
  }
107
 
108
+ $css = $this->rebuild_css_urls( $css, $url );
109
 
110
  return $css;
111
  }
113
  return false;
114
  }
115
 
116
+ private function rebuild_css_urls( $css, $url ) {
117
  $css_dir = substr( $url, 0, strrpos( $url, '/' ) );
118
 
119
  // remove empty url() declarations
120
  $css = preg_replace( "/url\(\s?\)/", "", $css );
121
  // new regex expression
122
+ $css = preg_replace( "/url\s*(?!\(['\"]?(data:|http:|https:))\(\s*['\"]?([^\/][^'\"\)]*)['\"]?\s*\)/i",
123
  "url({$css_dir}/$2)",
124
  $css );
125
 
 
126
  return $css;
127
  }
128
 
includes/classes/class-sbp-warmup-process.php CHANGED
@@ -9,9 +9,6 @@ if ( ! defined( 'WPINC' ) ) {
9
 
10
  class SBP_Warmup_Process extends \WP_Background_Process {
11
  protected $action = 'warmup';
12
- private $done = [];
13
- private $success = [];
14
- private $failed = [];
15
  private $begun = false;
16
 
17
  protected function task( $item ) {
@@ -28,14 +25,7 @@ class SBP_Warmup_Process extends \WP_Background_Process {
28
  'limit_response_size' => 100,
29
  ], $options );
30
 
31
- $this->done[] = $item;
32
-
33
- $response = wp_remote_get( $item['url'], $args );
34
- if ( 200 !== wp_remote_retrieve_response_code( $response ) ) {
35
- $this->failed[] = $item;
36
- } else {
37
- $this->success[] = $item;
38
- }
39
 
40
  if ( $this->begun === false ) {
41
  set_transient( 'sbp_warmup_started', 1 );
@@ -46,9 +36,6 @@ class SBP_Warmup_Process extends \WP_Background_Process {
46
  }
47
 
48
  protected function complete() {
49
- /* translator: %s is the url of the page */
50
- // BEYNTODO: Change Text
51
- set_transient( 'sbp_warmup_errors', $this->failed );
52
  set_transient( 'sbp_warmup_complete', true );
53
  delete_transient( 'sbp_warmup_started' );
54
  parent::complete();
9
 
10
  class SBP_Warmup_Process extends \WP_Background_Process {
11
  protected $action = 'warmup';
 
 
 
12
  private $begun = false;
13
 
14
  protected function task( $item ) {
25
  'limit_response_size' => 100,
26
  ], $options );
27
 
28
+ wp_remote_get( $item['url'], $args );
 
 
 
 
 
 
 
29
 
30
  if ( $this->begun === false ) {
31
  set_transient( 'sbp_warmup_started', 1 );
36
  }
37
 
38
  protected function complete() {
 
 
 
39
  set_transient( 'sbp_warmup_complete', true );
40
  delete_transient( 'sbp_warmup_started' );
41
  parent::complete();
includes/sbp-helpers.php CHANGED
@@ -49,7 +49,7 @@ if ( ! function_exists( 'sbp_get_hosting_restrictions' ) ) {
49
  return 'Kinsta';
50
  }
51
 
52
- if ( function_exists( 'is_wpe' ) || function_exists( 'is_wpe_snapshot' ) ) { // LAHMACUNTODO: Check here
53
  return [ 'name' => 'WP Engine', 'disable_features' => [] ];
54
  }
55
 
49
  return 'Kinsta';
50
  }
51
 
52
+ if ( function_exists( 'is_wpe' ) || function_exists( 'is_wpe_snapshot' ) ) { // Z_TODO: Check here
53
  return [ 'name' => 'WP Engine', 'disable_features' => [] ];
54
  }
55
 
speed-booster-pack.php CHANGED
@@ -6,7 +6,7 @@
6
  * Plugin Name: Speed Booster Pack
7
  * Plugin URI: https://speedboosterpack.com
8
  * Description: PageSpeed optimization is vital for SEO: A faster website equals better conversions. Optimize & cache your site with this smart plugin!
9
- * Version: 4.1.0
10
  * Author: Optimocha
11
  * Author URI: https://optimocha.com
12
  * License: GPLv3 or later
@@ -32,7 +32,7 @@ define( 'SBP_PLUGIN_NAME', 'Speed Booster Pack' );
32
  /**
33
  * Current plugin version.
34
  */
35
- define( 'SBP_VERSION', '4.1.0' );
36
 
37
  /**
38
  * Plugin website URL.
@@ -165,6 +165,7 @@ function sbp_autoloader( $class_name ) {
165
  * @since 4.0.0
166
  */
167
  function run_speed_booster_pack() {
 
168
 
169
  $plugin = new Speed_Booster_Pack();
170
  $plugin->run();
6
  * Plugin Name: Speed Booster Pack
7
  * Plugin URI: https://speedboosterpack.com
8
  * Description: PageSpeed optimization is vital for SEO: A faster website equals better conversions. Optimize & cache your site with this smart plugin!
9
+ * Version: 4.1.1
10
  * Author: Optimocha
11
  * Author URI: https://optimocha.com
12
  * License: GPLv3 or later
32
  /**
33
  * Current plugin version.
34
  */
35
+ define( 'SBP_VERSION', '4.1.1' );
36
 
37
  /**
38
  * Plugin website URL.
165
  * @since 4.0.0
166
  */
167
  function run_speed_booster_pack() {
168
+ if( preg_match( '/(\.txt|\.pdf|\.xml|\.ico|\.gz|\/feed\/?)/', $_SERVER['REQUEST_URI'] ) ) {return;}
169
 
170
  $plugin = new Speed_Booster_Pack();
171
  $plugin->run();
vendor/simplehtmldom/simplehtmldom/.gitattributes DELETED
@@ -1,13 +0,0 @@
1
- * text=auto
2
-
3
- # Ignored files during git-archive
4
- .gitattributes export-ignore
5
- .gitignore export-ignore
6
- .travis.yml export-ignore
7
- phpcompatibility.xml export-ignore
8
- phpunit.xml export-ignore
9
- phpcs.xml export-ignore
10
- release.sh export-ignore
11
- testcase/ export-ignore
12
- tests/ export-ignore
13
- manual/site/ export-ignore
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/.gitignore DELETED
@@ -1,12 +0,0 @@
1
- # Build files
2
- simplehtmldom_*.zip
3
-
4
- # Unit test data files
5
- tests/data/*
6
- !tests/data/.gitkeep
7
-
8
- # vscode configuration files
9
- .vscode/
10
-
11
- # Generated site data
12
- manual/site/
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/.travis.yml DELETED
@@ -1,46 +0,0 @@
1
- dist: trusty
2
- language: php
3
-
4
- install:
5
- - composer global require dealerdirect/phpcodesniffer-composer-installer;
6
- - composer global require phpcompatibility/php-compatibility;
7
- - if [[ "$PHPUNIT" ]]; then
8
- composer global require phpunit/phpunit ^$PHPUNIT;
9
- fi
10
-
11
- script:
12
- - phpenv rehash
13
- # Run PHP_CodeSniffer on all versions
14
- - ~/.config/composer/vendor/bin/phpcs . --standard=phpcs.xml --warning-severity=0 --extensions=php -p;
15
- # Check PHP compatibility for the lowest and highest supported version
16
- - if [[ $TRAVIS_PHP_VERSION == "5.6" || $TRAVIS_PHP_VERSION == "7.3" ]]; then
17
- ~/.config/composer/vendor/bin/phpcs . --standard=phpcompatibility.xml --extensions=php -p;
18
- fi
19
- # Run unit tests on highest major version
20
- - if [[ ${TRAVIS_PHP_VERSION:0:1} == "7" ]]; then
21
- ~/.config/composer/vendor/bin/phpunit --configuration=phpunit.xml --include-path=lib/;
22
- fi
23
-
24
- php:
25
- - 7.3
26
-
27
- env:
28
- - PHPUNIT=6
29
- - PHPUNIT=7
30
- - PHPUNIT=8
31
-
32
- matrix:
33
- fast_finish: true
34
-
35
- include:
36
- - php: 5.6
37
- env: PHPUNIT=
38
- - php: 7.0
39
- - php: 7.1
40
- - php: 7.2
41
-
42
- allow_failures:
43
- - php: 7.3
44
- env: PHPUNIT=7
45
- - php: 7.3
46
- env: PHPUNIT=8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/CHANGELOG.md CHANGED
@@ -1,369 +1,369 @@
1
- # Changelog
2
- All notable changes to this project will be documented in this file.
3
-
4
- The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
5
-
6
-
7
- ## [Unreleased]
8
- ### Changed
9
- - Comments that start with `>` or `->` are now considered malformed in accordance with [section 12.1.6](https://html.spec.whatwg.org/multipage/syntax.html#comments) of the [HTML specification](https://html.spec.whatwg.org/multipage/). Comments may still contain the strings `<!--` or `--!>` and they may still end with `<!-` contrary to the specification.
10
-
11
- ## [2.0-RC2] - 2019-11-09
12
-
13
- **Important**: This is a release **candidate**, which means some features might not yet be stable or emit unexpected behavior. Please don't hesitate to report broken or unstable features.
14
-
15
- ### Added
16
- - Added a `README` file.
17
- - Added a `composer` file.
18
- - Added `.travis.yml` for automated unit tests with `Travis-CI`.
19
- - Added the magic method `__debugInfo` to `HtmlDocument` and `HtmlNode` in order to reduce the memory footprint and to prevent recursion errors when using `print_r` and `var_dump`.
20
- - Added the magic method `__call` to `HtmlDocument` and `HtmlNode` as a wrapper for deprecated methods using the lowercase calling convention (see below).
21
- - Added unit tests `attribute_test.php`, `callback_test.php`, `debug_info_test.php`, `doctype_test.php`, `script_test.php`, `server_side_script_test.php`, `style_test.php` and `dom_manipulation_test.php`.
22
- - Added and extended unit tests for `cdata_test.php` and `comment_test.php`.
23
- - Added a new `Debug` class to inform users about deprecated functions, malformed documents and parsing issues.
24
- - Added full support for `script` element parsing.
25
- ### Changed
26
- - Renamed unit test `simple_html_dom_test.php` to `htmldocument_test.php`.
27
- - Renamed unit test `simple_html_dom_node_test.php` to `htmlnode_test.php`.
28
- - Changed the implementation of destructors for better garbage collection.
29
- - Changed how literal elements (`script`, `style`, `cdata`, "comment" and `code`) are handled by `HtmlDocument`.
30
- ### Deprecated
31
- - `HtmlDocument::clear()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `unset()` instead.
32
- - `HtmlDocument::load_file()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlDocument::loadFile()` instead.
33
- - `HtmlNode::children()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::childNodes()` instead.
34
- - `HtmlNode::first_child()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::firstChild()` instead.
35
- - `HtmlNode::has_child()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::hasChild()` instead.
36
- - `HtmlNode::last_child()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::lastChild()` instead.
37
- - `HtmlNode::next_sibling()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::nextSibling()` instead.
38
- - `HtmlNode::prev_sibling()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::previousSibling()` instead.
39
- - Support for Smarty scripts has been deprecated and will be removed in the next major version of simplehtmldom.
40
- - Support for server-side scripts has been deprecated and will be removed in the next major version of simplehtmldom.
41
- ### Removed
42
- - Removed the `testcase/` folder as all tests are covered by unit tests inside `tests/`.
43
- ### Fixed
44
- - Fixed a bug with boolean attributes that were incorrectly represented with a value of "1" when saving the DOM.
45
- - Fixed a bug with comment and CDATA parsing that could cause an infinite loop if any of these elements contained `script`, `style`, `code`, server-side php or Smarty tags.
46
- - Fixed a bug with comment and CDATA parsing that resulted in whitespace and newlines being removed when loading a document with `$stripRN = true` (default setting).
47
- - Fixed a bug with attribute values that resulted in incorrectly encoded content when using `outertext()`, `innertext()` or `save()`.
48
- - Fixed a bug with charset encoding that resulted in partially encoded documents depending on the use of `outertext()` and `innertext()` [#178](https://sourceforge.net/p/simplehtmldom/bugs/178/)
49
- - Fixed multiple bugs related to DOM manipulation when using `HtmlDocument::createElement()`, `HtmlDocument::createTextNode()` and `HtmlNode::appendChild()`.
50
-
51
- ## [2.0-RC1] - 2019-10-20
52
-
53
- **Important**: This is a release **candidate**, which means some features might not yet be stable or emit unexpected behavior. Please don't hesitate to report broken or unstable features.
54
-
55
- ### Added
56
- - Added unit tests
57
- - Added tests for whitespace handling.
58
- - Added tests for entity decoding.
59
- - Added tests for node functions after calling remove().
60
- - Added tests for `maxLen` in `file_get_html`.
61
- - Added tests for `simple_html_dom_node`.
62
- - Added tests for `HtmlWeb`.
63
- - Added test for bug [#172](https://sourceforge.net/p/simplehtmldom/bugs/172/)
64
- - Added optional argument `$trim = true` to `$node->text()`
65
- - Added attribute value normalization
66
- - https://www.w3.org/TR/html/syntax.html#attribute-values
67
- - https://www.w3.org/TR/xml/#AVNormalize
68
- - Added automatic HTML entity decoding when loading documents [feature:#52]
69
- - Added [the negation pseudo-class](https://www.w3.org/TR/selectors-3/#negation)
70
- - Added `simple_html_dom::expect()`.
71
- - Added `simple_html_dom_node::expect()`.
72
- - Added the ability to parse CDATA sections.
73
- - Added `HtmlWeb` to directly load webpages via cURL or fopen as DOM.
74
- - Added `HtmlDocument`, `HtmlNode`, `HtmlWeb` and `constants` to namespace `simplehtmldom`.
75
- - Added a new element type `HDOM_TYPE_CDATA` for CDATA sections.
76
- - Added full support for parsing comments and CDATA sections.
77
- ### Changed
78
- - `simple_html_dom::doc` is now unset after loading the DOM.
79
- - `simple_html_dom::restore_noise()` now clears restored elements.
80
- - `simple_html_dom_node::_[HDOM_INFO_ENDSPACE]` now only exists if needed.
81
- - `simple_html_dom_node::_[HDOM_INFO_SPACE]`
82
- - Now stores elements by attribute names.
83
- - Now only exists if needed (defaults to `array(' ', '', '')`).
84
- - `simple_html_dom_node::_[HDOM_INFO_QUOTE]`
85
- - Now stores elements by attribute names.
86
- - Now only exists if needed (defaults to `HDOM_QUOTE_DOUBLE`).
87
- - `simple_html_dom_node::text()` now supports all block and inline level elements.
88
- - `simple_html_dom_node::text()` now skips empty block elements.
89
- - `simple_html_dom_node::text()` now properly handles `&nbsp` characters.
90
- - `simple_html_dom_node::removeChild()` now removes all types of childs.
91
- - Increased `MAX_FILE_SIZE` from 0.6 MB (600000 Bytes) to 2.5 MiB (2621440 Bytes)
92
- - `HDOM_INFO_INNER` (innertext) is now stored as part of the owning element.
93
- - Moved and renamed `simple_html_dom` to `HtmlDocument`.
94
- - Moved and renamed `simple_html_dom_node` to `HtmlNode`.
95
- - Moved constants to `constants.php`
96
- - Moved `HDOM_TYPE_*`, `HDOM_INFO_*` and `HDOM_QUOTE_*` constants into `HtmlNode`.
97
- ### Removed
98
- - Removed `/example/scraping/example_scraping_general.php`.
99
- - Removed `/example/simple_html_dom_utility.php`.
100
- - Removed `/app`.
101
- - Removed `/testcase/reader`.
102
- - Removed `simple_html_dom_node::tag_start`.
103
- ### Fixed
104
- - Fixed fatal error when removing nodes from the DOM (#172)
105
- - Fixed `simple_html_dom::parse()` to work after removing elements from the DOM.
106
- - Fixed `simple_html_dom_node::text()` to properly handle UTF-8 characters.
107
- - Fixed all scripts in the example folder.
108
- - Fixed `file_get_html` to return false if the file size is larger than `maxLen`.
109
- - Fixed a bug that caused the parser to convert UTF-8 to UTF-8 on mistake.
110
- - Fixed `simple_html_dom::loadFile` to properly forward arguments to `simple_html_dom::load_file`.
111
- - Fixed handling of optional closing tags to end on the last element.
112
- - Fixed broken support for `text` nodes when using `find` (#175).
113
-
114
- ## [1.9] - 2019-05-30
115
- ### Added
116
- - Added unit test for bug reports
117
- - Added test for bug [#153](https://sourceforge.net/p/simplehtmldom/bugs/153/)
118
- - Added test for bug [#163](https://sourceforge.net/p/simplehtmldom/bugs/163/)
119
- - Added test for bug [#166](https://sourceforge.net/p/simplehtmldom/bugs/166/)
120
- - Added test for bug [#169](https://sourceforge.net/p/simplehtmldom/bugs/169/)
121
- - Added unit test for character sets UTF-8, CP1251 and CP1252 (#142)
122
- - Added support for meta charset to parse_charset
123
- - Added detection for CP1251 to parse_charset, using iconv
124
- - Added LICENSE file (MIT) to the project root
125
- - Added functions to `simple_html_dom_node`
126
- - `remove`: Removes the current node recursively from the DOM tree
127
- - `removeChild`: Removes a child node recursively from the DOM tree
128
- - `hasClass`: Checks if the current node has the specified class name
129
- - `addClass`: Adds one or more classes to the current node
130
- - `removeClass`: Removes one or more classes from the current node
131
- - `save`: Saves the current node to disk
132
- ### Changed
133
- - Changed manual from custom implementation to MkDocs (https://www.mkdocs.org/)
134
- ### Fixed
135
- - Fixed warning when trying to clear() the DOM on a null nodes list (#153)
136
- - Fixed missing whitespace when returning plaintext (#163)
137
- - Fixed broken detection of duplicate attributes (#166)
138
- - Fixed broken detection of CP1252 (ISO-8859-1) documents (#142)
139
- - Fixed error using next-sibling combinator ('E + F') on last child
140
- - Fixed selector parsing for attribute selectors ending on "s" or "i" (#169)
141
-
142
- ## [1.8.1] - 2019-01-13
143
- ### Fixed
144
- - Fixed various bugs related to parsing classes and ids
145
-
146
- ## [1.8] - 2019-01-13
147
- ### Added
148
- - Added documentation for `simple_html_dom_node::find`
149
- - Added documentation for `simple_html_dom_node::parse_selector`
150
- - Added documentation for `simple_html_dom_node::seek`
151
- - Added documentation for `simple_html_dom_node::match`
152
- - Added unit tests for bug reports
153
- - Added test for bug [#62](https://sourceforge.net/p/simplehtmldom/bugs/62/)
154
- - Added test for bug [#79](https://sourceforge.net/p/simplehtmldom/bugs/79/)
155
- - Added test for bug [#144](https://sourceforge.net/p/simplehtmldom/bugs/144/)
156
- - Added unit tests for CSS selectors
157
- - Added ability to define constants before simple_html_dom does
158
- - 'DEFAULT_TARGET_CHARSET'
159
- - 'DEFAULT_BR_TEXT'
160
- - 'DEFAULT_SPAN_TEXT'
161
- - 'MAX_FILE_SIZE'
162
- - Added support for CSS combinators
163
- - Added support for Child Combinator (`>`)
164
- - Added support for Next Sibling Combinator (`+`)
165
- - Added support for Subsequent Sibling Combinator (`~`)
166
- - Added support for multiclass selectors (`.class.class.class`)
167
- - Added support for multiattribute selectors (`[attr1][attr2][attribute3]`)
168
- - Added support for attribute selectors
169
- - Added support for pipe selectors (`|=`)
170
- - Added support for tilde selectors (`~=`)
171
- - Added support for case sensitivity selectors (`i` and `s`)
172
- - Added unit tests for PHP compatibility to PHP 5.6+
173
- - Added coding standard using PHP_CodeSniffer
174
- ### Changed
175
- - Removed automatic filtering of 'tbody' selectors (#79)
176
- > Remove 'tbody' from all selectors to maintain the previous state!
177
- - Coding standard using PHP_CodeSniffer
178
- ### Fixed
179
- - Fixed broken CSS selector attributes with value "0" (#62)
180
- - Fixed broken simple_html_dom::load_file
181
- - Fixed forward slashes in CSS selector breaks value matching using '*=' (#144)
182
- - Fixed Universal Selectors
183
-
184
- ## [1.7] - 2018-12-10
185
- ### Added
186
- - Added code documentation to improve readability
187
- - Added unit tests for `simple_html_dom::$self_closing_tags`
188
- - Added unit tests for `simple_html_dom::$optional_closing_tags`
189
- - Added unit tests for bug reports
190
- - Added test for bug [#56](https://sourceforge.net/p/simplehtmldom/bugs/56/)
191
- - Added test for bug [#97](https://sourceforge.net/p/simplehtmldom/bugs/97/)
192
- - Added test for bug [#116](https://sourceforge.net/p/simplehtmldom/bugs/116/)
193
- - Added test for bug [#121](https://sourceforge.net/p/simplehtmldom/bugs/127/)
194
- - Added test for bug [#127](https://sourceforge.net/p/simplehtmldom/bugs/127/)
195
- - Added test for bug [#154](https://sourceforge.net/p/simplehtmldom/bugs/154/)
196
- - Added test for bug [#160](https://sourceforge.net/p/simplehtmldom/bugs/160/)
197
- - Added unit tests for memory management of the parser
198
- - Added bit flags to `simple_html_dom::load()`
199
- - Added bit flag `HDOM_SMARTY_AS_TEXT` to optionally filter Smarty scripts (#154)\
200
- **Note**: Smarty scripts are no longer filtered by default!\
201
- - Added build script to automate releases
202
- - Added support for attributes without whitespace to separate them
203
- ### Changed
204
- - Improved documentation and readability for `$self_closing_tags`
205
- - Improved documentation and readability for `$block_tags`
206
- - Improved documentation and readability for `$optional_closing_tags`
207
- - Updated list of `simple_html_dom::$self_closing_tags`
208
- - Removed 'spacer' (obsolete)
209
- - Added 'area'
210
- - Added 'col'
211
- - Added 'meta'
212
- - Added 'param'
213
- - Added 'source'
214
- - Added 'track'
215
- - Added 'wbr'
216
- - Updated list of `simple_html_dom::$optional_closing_tags`
217
- - Removed "nobr" (obsolete)
218
- - Added 'th' as closable element to 'td'
219
- - Added 'td' as closable element to 'th'
220
- - Added 'optgroup' with 'optgroup' and 'option' as closable elements
221
- - Added 'optgroup' as closable element to 'option'
222
- - Added 'rp' with 'rp' and 'rt' as closable elements
223
- - Added 'rt' with 'rt' and 'rp' as closable elements
224
- - Clarified meaning of `simple_html_dom->parent`
225
- - Changed default `$offset` for `file_get_html()` from -1 to 0 (#161)
226
- - Changed `simple_html_dom::load()` to remove script tags before replacing newline characters
227
- - `simple_html_dom_node::text()` no longer adds whitespace to top level span elements (only to sub-elements)
228
- - `simple_html_dom_node::text()` adds blank lines between paragraphs
229
- - Normalized line endings in the repository to LF via `.gitattributes`
230
- - Improved performance of `simple_html_dom::parse_charset()` by approximately 25%
231
- - Improved performance of `simple_html_dom::parse()` by approximately 10%
232
- ### Deprecated
233
- - `str_get_html()` is deprecated and should be replaced by `new simple_html_dom()`
234
- ### Removed
235
- - Removed protected function `simple_html_dom::copy_until_char_escaped()`
236
- ### Fixed
237
- - Fixed compatibility issues with PHP 7.3
238
- - Fixed typo (#147)
239
- - Fixed handling of incorrectly escaped text (#160)
240
- - Restore functionality of `$maxLen` in `file_get_html()`
241
- - Fixed load_file breaks if an error ocurred in another script
242
-
243
- ## [1.6] - 2014-05-28
244
- ### Added
245
- - Added some ability to insert and create nodes
246
- - Add ability to search the "noise" array
247
-
248
- ## [1.5] - 2012-09-10
249
- ### Added
250
- - Added flag: LOCK_EX while calling "file_put_contents()"
251
- - Added support for detecting the source html character set. This is used to convert characters when plaintext is requested.
252
- - Other little fixes and features, too numerous to categorize
253
- ### Changed
254
- - Error of "file_get_contents()" will be thrown as an exception
255
- ### Fixed
256
- - Fixed the typo of "token_blank_t"
257
- - Memory leak fixed
258
-
259
- ## [1.11] - 2008-12-14
260
- ### Added
261
- - Supports xpath generated from Firebug
262
- - New method "dump" of "simple_html_dom_node"
263
- - New attribute "xmltext" of "simple_html_dom_node"
264
- ### Changed
265
- - Remove preg_quote on selector match function: `[attribute*=value]`
266
- - Element "Comment" will treat as children
267
- ### Fixed
268
- - Fixed the problem with `<pre>`
269
- - Fixed bug #2207477 (does not load some pages properly)
270
- - Fixed bug #2315853 (Error with character after < sign)
271
-
272
- ## [1.10] - 2008-10-25
273
- ### Changed
274
- - Negative indexes supports of "find" method, thanks for Vadim Voituk
275
- - Constructor with automatically load contents either text or file/url, thanks for Antcs
276
- - Fully supports wildcard in selectors
277
- ### Fixed
278
- - Fixed bug of confusing by the < symbol inside the text
279
- - Fixed bug of dash in selectors
280
- - Fixed bug of `<nobr>`
281
- - Fixed bug #2155883 (Nested List Parses Incorrectly)
282
- - Fixed bug #2155113 (error with unclosed html tags)
283
-
284
- ## [1.00] - 2008-09-05
285
- ### Added
286
- - New method "getAllAttributes" of "simple_html_dom_node"
287
- - Supports full javascript string in selector: `$e->find("a[onclick=alert('hello')]")`
288
- ### Changed
289
- - Changed selector "*=" to case-insentive
290
- ### Fixed
291
- - Fixed the bug of selector in some critical conditions
292
- - Fixed the bug of striping php tags
293
- - Fixed the bug of remove_noise()
294
- - Fixed the bug of noise in attributes
295
-
296
- ## [0.99] - 2008-08-03
297
- ### Changed
298
- - Performance tuning (boost 10%)
299
- - Memory requirement reduced by 25%
300
- - Changed function name from "file_get_dom()" to "file_get_html()"
301
- - Changed function name from "str_get_dom()" to "str_get_html()"
302
- ### Fixed
303
- - Fixed bug #2011286 (Error with unclosed html tags)
304
- - Fixed bug #2012551 (Error parsing divs)
305
- - Fixed bug #2020924 (Error for missed tag)
306
- - Fixed bug (problem with `<body>` tag's innertext)
307
-
308
- ## [0.98] - 2008-06-24
309
- ### Added
310
- - Supports "multiple class" selector feature: `<div class="a b c"></div>`
311
- - New "callback function" feature
312
- - New "multiple selectors" feature: $dom->find('p,a,b')
313
- - New examples
314
- - Supports extract contents from HTML features: $dom->plaintext
315
- ### Changed
316
- - Performance tuning (boost 20%)
317
- - Changed simple_html_dom_node method name from "text()" to "makeup()"
318
- ### Fixed
319
- - Fixed the bug of $dom->clear()
320
- - Fixed the bug of text nodes' innertext
321
- - Fixed the bug of comment nodes' innertext
322
- - Fixed the bug of decendent selector with optional tags
323
-
324
- ## [0.97] - 2008-05-09
325
- ### Added
326
- - New node type "comment" (eg. $dom->find('comment'))
327
- - Add self-closing tags: 'base', 'spacer'
328
- - New example "simple_html_dom_utility.php"
329
- ### Changed
330
- - File and class name changed (html_dom_parser->simple_html_dom)
331
- ### Removed
332
- - ($dom->save_file) will not support anymore
333
- - Remove example "example_customize_parser.php"
334
- ### Fixed
335
- - Fixed the bug of outertext (th)
336
- - Fixed the bug of regular expression escaping chars ($dom->find)
337
- - Fixed the bug while line-breaker and "\t" in tags
338
-
339
- ## [0.96] - 2008-04-27
340
- ### Added
341
- - Reference section in manual
342
- - Added traverse section in manual
343
- - Added the solution while server behind proxy in FAQ (Thanks to Yousuke Shaggy)
344
- - New method to remove attribute.
345
- - New DOM operations(first_child, last_child, next_sibling, previous_sibling) (Request #1936000)
346
- ### Changed
347
- - Now file_get_dom supports full file_get_contents parameters
348
- ### Fixed
349
- - Fixed the bug of self-closing tags in the end of file
350
- - Fixed the bug of blanks in the end of tag
351
- - Fixed some typo of testcase
352
-
353
- ## [0.95] - 2008-04-13
354
- ### Added
355
- - Supports tag name with namespace
356
- ### Changed
357
- - New attribute filters (Thanks to Yousuke Kumakura)
358
- - Refine structure of testcase
359
- ### Fixed
360
- - Fix the bug of optional-closing tags
361
- - Fix the bug of parsing the line break next to the tag's name
362
-
363
- ## [0.94] - 2008-04-06
364
- ### Added
365
- - Add FAQ section in manual
366
- ### Fixed
367
- - Fixed infinity loop while the source content is BAD HTML
368
- - Fixed the bug of adding new attributes to self closing tags
369
  - Fixed the bug of customize parser without $dom->remove_noise()
1
+ # Changelog
2
+ All notable changes to this project will be documented in this file.
3
+
4
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
5
+
6
+
7
+ ## [Unreleased]
8
+ ### Changed
9
+ - Comments that start with `>` or `->` are now considered malformed in accordance with [section 12.1.6](https://html.spec.whatwg.org/multipage/syntax.html#comments) of the [HTML specification](https://html.spec.whatwg.org/multipage/). Comments may still contain the strings `<!--` or `--!>` and they may still end with `<!-` contrary to the specification.
10
+
11
+ ## [2.0-RC2] - 2019-11-09
12
+
13
+ **Important**: This is a release **candidate**, which means some features might not yet be stable or emit unexpected behavior. Please don't hesitate to report broken or unstable features.
14
+
15
+ ### Added
16
+ - Added a `README` file.
17
+ - Added a `composer` file.
18
+ - Added `.travis.yml` for automated unit tests with `Travis-CI`.
19
+ - Added the magic method `__debugInfo` to `HtmlDocument` and `HtmlNode` in order to reduce the memory footprint and to prevent recursion errors when using `print_r` and `var_dump`.
20
+ - Added the magic method `__call` to `HtmlDocument` and `HtmlNode` as a wrapper for deprecated methods using the lowercase calling convention (see below).
21
+ - Added unit tests `attribute_test.php`, `callback_test.php`, `debug_info_test.php`, `doctype_test.php`, `script_test.php`, `server_side_script_test.php`, `style_test.php` and `dom_manipulation_test.php`.
22
+ - Added and extended unit tests for `cdata_test.php` and `comment_test.php`.
23
+ - Added a new `Debug` class to inform users about deprecated functions, malformed documents and parsing issues.
24
+ - Added full support for `script` element parsing.
25
+ ### Changed
26
+ - Renamed unit test `simple_html_dom_test.php` to `htmldocument_test.php`.
27
+ - Renamed unit test `simple_html_dom_node_test.php` to `htmlnode_test.php`.
28
+ - Changed the implementation of destructors for better garbage collection.
29
+ - Changed how literal elements (`script`, `style`, `cdata`, "comment" and `code`) are handled by `HtmlDocument`.
30
+ ### Deprecated
31
+ - `HtmlDocument::clear()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `unset()` instead.
32
+ - `HtmlDocument::load_file()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlDocument::loadFile()` instead.
33
+ - `HtmlNode::children()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::childNodes()` instead.
34
+ - `HtmlNode::first_child()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::firstChild()` instead.
35
+ - `HtmlNode::has_child()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::hasChild()` instead.
36
+ - `HtmlNode::last_child()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::lastChild()` instead.
37
+ - `HtmlNode::next_sibling()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::nextSibling()` instead.
38
+ - `HtmlNode::prev_sibling()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::previousSibling()` instead.
39
+ - Support for Smarty scripts has been deprecated and will be removed in the next major version of simplehtmldom.
40
+ - Support for server-side scripts has been deprecated and will be removed in the next major version of simplehtmldom.
41
+ ### Removed
42
+ - Removed the `testcase/` folder as all tests are covered by unit tests inside `tests/`.
43
+ ### Fixed
44
+ - Fixed a bug with boolean attributes that were incorrectly represented with a value of "1" when saving the DOM.
45
+ - Fixed a bug with comment and CDATA parsing that could cause an infinite loop if any of these elements contained `script`, `style`, `code`, server-side php or Smarty tags.
46
+ - Fixed a bug with comment and CDATA parsing that resulted in whitespace and newlines being removed when loading a document with `$stripRN = true` (default setting).
47
+ - Fixed a bug with attribute values that resulted in incorrectly encoded content when using `outertext()`, `innertext()` or `save()`.
48
+ - Fixed a bug with charset encoding that resulted in partially encoded documents depending on the use of `outertext()` and `innertext()` [#178](https://sourceforge.net/p/simplehtmldom/bugs/178/)
49
+ - Fixed multiple bugs related to DOM manipulation when using `HtmlDocument::createElement()`, `HtmlDocument::createTextNode()` and `HtmlNode::appendChild()`.
50
+
51
+ ## [2.0-RC1] - 2019-10-20
52
+
53
+ **Important**: This is a release **candidate**, which means some features might not yet be stable or emit unexpected behavior. Please don't hesitate to report broken or unstable features.
54
+
55
+ ### Added
56
+ - Added unit tests
57
+ - Added tests for whitespace handling.
58
+ - Added tests for entity decoding.
59
+ - Added tests for node functions after calling remove().
60
+ - Added tests for `maxLen` in `file_get_html`.
61
+ - Added tests for `simple_html_dom_node`.
62
+ - Added tests for `HtmlWeb`.
63
+ - Added test for bug [#172](https://sourceforge.net/p/simplehtmldom/bugs/172/)
64
+ - Added optional argument `$trim = true` to `$node->text()`
65
+ - Added attribute value normalization
66
+ - https://www.w3.org/TR/html/syntax.html#attribute-values
67
+ - https://www.w3.org/TR/xml/#AVNormalize
68
+ - Added automatic HTML entity decoding when loading documents [feature:#52]
69
+ - Added [the negation pseudo-class](https://www.w3.org/TR/selectors-3/#negation)
70
+ - Added `simple_html_dom::expect()`.
71
+ - Added `simple_html_dom_node::expect()`.
72
+ - Added the ability to parse CDATA sections.
73
+ - Added `HtmlWeb` to directly load webpages via cURL or fopen as DOM.
74
+ - Added `HtmlDocument`, `HtmlNode`, `HtmlWeb` and `constants` to namespace `simplehtmldom`.
75
+ - Added a new element type `HDOM_TYPE_CDATA` for CDATA sections.
76
+ - Added full support for parsing comments and CDATA sections.
77
+ ### Changed
78
+ - `simple_html_dom::doc` is now unset after loading the DOM.
79
+ - `simple_html_dom::restore_noise()` now clears restored elements.
80
+ - `simple_html_dom_node::_[HDOM_INFO_ENDSPACE]` now only exists if needed.
81
+ - `simple_html_dom_node::_[HDOM_INFO_SPACE]`
82
+ - Now stores elements by attribute names.
83
+ - Now only exists if needed (defaults to `array(' ', '', '')`).
84
+ - `simple_html_dom_node::_[HDOM_INFO_QUOTE]`
85
+ - Now stores elements by attribute names.
86
+ - Now only exists if needed (defaults to `HDOM_QUOTE_DOUBLE`).
87
+ - `simple_html_dom_node::text()` now supports all block and inline level elements.
88
+ - `simple_html_dom_node::text()` now skips empty block elements.
89
+ - `simple_html_dom_node::text()` now properly handles `&nbsp` characters.
90
+ - `simple_html_dom_node::removeChild()` now removes all types of childs.
91
+ - Increased `MAX_FILE_SIZE` from 0.6 MB (600000 Bytes) to 2.5 MiB (2621440 Bytes)
92
+ - `HDOM_INFO_INNER` (innertext) is now stored as part of the owning element.
93
+ - Moved and renamed `simple_html_dom` to `HtmlDocument`.
94
+ - Moved and renamed `simple_html_dom_node` to `HtmlNode`.
95
+ - Moved constants to `constants.php`
96
+ - Moved `HDOM_TYPE_*`, `HDOM_INFO_*` and `HDOM_QUOTE_*` constants into `HtmlNode`.
97
+ ### Removed
98
+ - Removed `/example/scraping/example_scraping_general.php`.
99
+ - Removed `/example/simple_html_dom_utility.php`.
100
+ - Removed `/app`.
101
+ - Removed `/testcase/reader`.
102
+ - Removed `simple_html_dom_node::tag_start`.
103
+ ### Fixed
104
+ - Fixed fatal error when removing nodes from the DOM (#172)
105
+ - Fixed `simple_html_dom::parse()` to work after removing elements from the DOM.
106
+ - Fixed `simple_html_dom_node::text()` to properly handle UTF-8 characters.
107
+ - Fixed all scripts in the example folder.
108
+ - Fixed `file_get_html` to return false if the file size is larger than `maxLen`.
109
+ - Fixed a bug that caused the parser to convert UTF-8 to UTF-8 on mistake.
110
+ - Fixed `simple_html_dom::loadFile` to properly forward arguments to `simple_html_dom::load_file`.
111
+ - Fixed handling of optional closing tags to end on the last element.
112
+ - Fixed broken support for `text` nodes when using `find` (#175).
113
+
114
+ ## [1.9] - 2019-05-30
115
+ ### Added
116
+ - Added unit test for bug reports
117
+ - Added test for bug [#153](https://sourceforge.net/p/simplehtmldom/bugs/153/)
118
+ - Added test for bug [#163](https://sourceforge.net/p/simplehtmldom/bugs/163/)
119
+ - Added test for bug [#166](https://sourceforge.net/p/simplehtmldom/bugs/166/)
120
+ - Added test for bug [#169](https://sourceforge.net/p/simplehtmldom/bugs/169/)
121
+ - Added unit test for character sets UTF-8, CP1251 and CP1252 (#142)
122
+ - Added support for meta charset to parse_charset
123
+ - Added detection for CP1251 to parse_charset, using iconv
124
+ - Added LICENSE file (MIT) to the project root
125
+ - Added functions to `simple_html_dom_node`
126
+ - `remove`: Removes the current node recursively from the DOM tree
127
+ - `removeChild`: Removes a child node recursively from the DOM tree
128
+ - `hasClass`: Checks if the current node has the specified class name
129
+ - `addClass`: Adds one or more classes to the current node
130
+ - `removeClass`: Removes one or more classes from the current node
131
+ - `save`: Saves the current node to disk
132
+ ### Changed
133
+ - Changed manual from custom implementation to MkDocs (https://www.mkdocs.org/)
134
+ ### Fixed
135
+ - Fixed warning when trying to clear() the DOM on a null nodes list (#153)
136
+ - Fixed missing whitespace when returning plaintext (#163)
137
+ - Fixed broken detection of duplicate attributes (#166)
138
+ - Fixed broken detection of CP1252 (ISO-8859-1) documents (#142)
139
+ - Fixed error using next-sibling combinator ('E + F') on last child
140
+ - Fixed selector parsing for attribute selectors ending on "s" or "i" (#169)
141
+
142
+ ## [1.8.1] - 2019-01-13
143
+ ### Fixed
144
+ - Fixed various bugs related to parsing classes and ids
145
+
146
+ ## [1.8] - 2019-01-13
147
+ ### Added
148
+ - Added documentation for `simple_html_dom_node::find`
149
+ - Added documentation for `simple_html_dom_node::parse_selector`
150
+ - Added documentation for `simple_html_dom_node::seek`
151
+ - Added documentation for `simple_html_dom_node::match`
152
+ - Added unit tests for bug reports
153
+ - Added test for bug [#62](https://sourceforge.net/p/simplehtmldom/bugs/62/)
154
+ - Added test for bug [#79](https://sourceforge.net/p/simplehtmldom/bugs/79/)
155
+ - Added test for bug [#144](https://sourceforge.net/p/simplehtmldom/bugs/144/)
156
+ - Added unit tests for CSS selectors
157
+ - Added ability to define constants before simple_html_dom does
158
+ - 'DEFAULT_TARGET_CHARSET'
159
+ - 'DEFAULT_BR_TEXT'
160
+ - 'DEFAULT_SPAN_TEXT'
161
+ - 'MAX_FILE_SIZE'
162
+ - Added support for CSS combinators
163
+ - Added support for Child Combinator (`>`)
164
+ - Added support for Next Sibling Combinator (`+`)
165
+ - Added support for Subsequent Sibling Combinator (`~`)
166
+ - Added support for multiclass selectors (`.class.class.class`)
167
+ - Added support for multiattribute selectors (`[attr1][attr2][attribute3]`)
168
+ - Added support for attribute selectors
169
+ - Added support for pipe selectors (`|=`)
170
+ - Added support for tilde selectors (`~=`)
171
+ - Added support for case sensitivity selectors (`i` and `s`)
172
+ - Added unit tests for PHP compatibility to PHP 5.6+
173
+ - Added coding standard using PHP_CodeSniffer
174
+ ### Changed
175
+ - Removed automatic filtering of 'tbody' selectors (#79)
176
+ > Remove 'tbody' from all selectors to maintain the previous state!
177
+ - Coding standard using PHP_CodeSniffer
178
+ ### Fixed
179
+ - Fixed broken CSS selector attributes with value "0" (#62)
180
+ - Fixed broken simple_html_dom::load_file
181
+ - Fixed forward slashes in CSS selector breaks value matching using '*=' (#144)
182
+ - Fixed Universal Selectors
183
+
184
+ ## [1.7] - 2018-12-10
185
+ ### Added
186
+ - Added code documentation to improve readability
187
+ - Added unit tests for `simple_html_dom::$self_closing_tags`
188
+ - Added unit tests for `simple_html_dom::$optional_closing_tags`
189
+ - Added unit tests for bug reports
190
+ - Added test for bug [#56](https://sourceforge.net/p/simplehtmldom/bugs/56/)
191
+ - Added test for bug [#97](https://sourceforge.net/p/simplehtmldom/bugs/97/)
192
+ - Added test for bug [#116](https://sourceforge.net/p/simplehtmldom/bugs/116/)
193
+ - Added test for bug [#121](https://sourceforge.net/p/simplehtmldom/bugs/127/)
194
+ - Added test for bug [#127](https://sourceforge.net/p/simplehtmldom/bugs/127/)
195
+ - Added test for bug [#154](https://sourceforge.net/p/simplehtmldom/bugs/154/)
196
+ - Added test for bug [#160](https://sourceforge.net/p/simplehtmldom/bugs/160/)
197
+ - Added unit tests for memory management of the parser
198
+ - Added bit flags to `simple_html_dom::load()`
199
+ - Added bit flag `HDOM_SMARTY_AS_TEXT` to optionally filter Smarty scripts (#154)\
200
+ **Note**: Smarty scripts are no longer filtered by default!\
201
+ - Added build script to automate releases
202
+ - Added support for attributes without whitespace to separate them
203
+ ### Changed
204
+ - Improved documentation and readability for `$self_closing_tags`
205
+ - Improved documentation and readability for `$block_tags`
206
+ - Improved documentation and readability for `$optional_closing_tags`
207
+ - Updated list of `simple_html_dom::$self_closing_tags`
208
+ - Removed 'spacer' (obsolete)
209
+ - Added 'area'
210
+ - Added 'col'
211
+ - Added 'meta'
212
+ - Added 'param'
213
+ - Added 'source'
214
+ - Added 'track'
215
+ - Added 'wbr'
216
+ - Updated list of `simple_html_dom::$optional_closing_tags`
217
+ - Removed "nobr" (obsolete)
218
+ - Added 'th' as closable element to 'td'
219
+ - Added 'td' as closable element to 'th'
220
+ - Added 'optgroup' with 'optgroup' and 'option' as closable elements
221
+ - Added 'optgroup' as closable element to 'option'
222
+ - Added 'rp' with 'rp' and 'rt' as closable elements
223
+ - Added 'rt' with 'rt' and 'rp' as closable elements
224
+ - Clarified meaning of `simple_html_dom->parent`
225
+ - Changed default `$offset` for `file_get_html()` from -1 to 0 (#161)
226
+ - Changed `simple_html_dom::load()` to remove script tags before replacing newline characters
227
+ - `simple_html_dom_node::text()` no longer adds whitespace to top level span elements (only to sub-elements)
228
+ - `simple_html_dom_node::text()` adds blank lines between paragraphs
229
+ - Normalized line endings in the repository to LF via `.gitattributes`
230
+ - Improved performance of `simple_html_dom::parse_charset()` by approximately 25%
231
+ - Improved performance of `simple_html_dom::parse()` by approximately 10%
232
+ ### Deprecated
233
+ - `str_get_html()` is deprecated and should be replaced by `new simple_html_dom()`
234
+ ### Removed
235
+ - Removed protected function `simple_html_dom::copy_until_char_escaped()`
236
+ ### Fixed
237
+ - Fixed compatibility issues with PHP 7.3
238
+ - Fixed typo (#147)
239
+ - Fixed handling of incorrectly escaped text (#160)
240
+ - Restore functionality of `$maxLen` in `file_get_html()`
241
+ - Fixed load_file breaks if an error ocurred in another script
242
+
243
+ ## [1.6] - 2014-05-28
244
+ ### Added
245
+ - Added some ability to insert and create nodes
246
+ - Add ability to search the "noise" array
247
+
248
+ ## [1.5] - 2012-09-10
249
+ ### Added
250
+ - Added flag: LOCK_EX while calling "file_put_contents()"
251
+ - Added support for detecting the source html character set. This is used to convert characters when plaintext is requested.
252
+ - Other little fixes and features, too numerous to categorize
253
+ ### Changed
254
+ - Error of "file_get_contents()" will be thrown as an exception
255
+ ### Fixed
256
+ - Fixed the typo of "token_blank_t"
257
+ - Memory leak fixed
258
+
259
+ ## [1.11] - 2008-12-14
260
+ ### Added
261
+ - Supports xpath generated from Firebug
262
+ - New method "dump" of "simple_html_dom_node"
263
+ - New attribute "xmltext" of "simple_html_dom_node"
264
+ ### Changed
265
+ - Remove preg_quote on selector match function: `[attribute*=value]`
266
+ - Element "Comment" will treat as children
267
+ ### Fixed
268
+ - Fixed the problem with `<pre>`
269
+ - Fixed bug #2207477 (does not load some pages properly)
270
+ - Fixed bug #2315853 (Error with character after < sign)
271
+
272
+ ## [1.10] - 2008-10-25
273
+ ### Changed
274
+ - Negative indexes supports of "find" method, thanks for Vadim Voituk
275
+ - Constructor with automatically load contents either text or file/url, thanks for Antcs
276
+ - Fully supports wildcard in selectors
277
+ ### Fixed
278
+ - Fixed bug of confusing by the < symbol inside the text
279
+ - Fixed bug of dash in selectors
280
+ - Fixed bug of `<nobr>`
281
+ - Fixed bug #2155883 (Nested List Parses Incorrectly)
282
+ - Fixed bug #2155113 (error with unclosed html tags)
283
+
284
+ ## [1.00] - 2008-09-05
285
+ ### Added
286
+ - New method "getAllAttributes" of "simple_html_dom_node"
287
+ - Supports full javascript string in selector: `$e->find("a[onclick=alert('hello')]")`
288
+ ### Changed
289
+ - Changed selector "*=" to case-insentive
290
+ ### Fixed
291
+ - Fixed the bug of selector in some critical conditions
292
+ - Fixed the bug of striping php tags
293
+ - Fixed the bug of remove_noise()
294
+ - Fixed the bug of noise in attributes
295
+
296
+ ## [0.99] - 2008-08-03
297
+ ### Changed
298
+ - Performance tuning (boost 10%)
299
+ - Memory requirement reduced by 25%
300
+ - Changed function name from "file_get_dom()" to "file_get_html()"
301
+ - Changed function name from "str_get_dom()" to "str_get_html()"
302
+ ### Fixed
303
+ - Fixed bug #2011286 (Error with unclosed html tags)
304
+ - Fixed bug #2012551 (Error parsing divs)
305
+ - Fixed bug #2020924 (Error for missed tag)
306
+ - Fixed bug (problem with `<body>` tag's innertext)
307
+
308
+ ## [0.98] - 2008-06-24
309
+ ### Added
310
+ - Supports "multiple class" selector feature: `<div class="a b c"></div>`
311
+ - New "callback function" feature
312
+ - New "multiple selectors" feature: $dom->find('p,a,b')
313
+ - New examples
314
+ - Supports extract contents from HTML features: $dom->plaintext
315
+ ### Changed
316
+ - Performance tuning (boost 20%)
317
+ - Changed simple_html_dom_node method name from "text()" to "makeup()"
318
+ ### Fixed
319
+ - Fixed the bug of $dom->clear()
320
+ - Fixed the bug of text nodes' innertext
321
+ - Fixed the bug of comment nodes' innertext
322
+ - Fixed the bug of decendent selector with optional tags
323
+
324
+ ## [0.97] - 2008-05-09
325
+ ### Added
326
+ - New node type "comment" (eg. $dom->find('comment'))
327
+ - Add self-closing tags: 'base', 'spacer'
328
+ - New example "simple_html_dom_utility.php"
329
+ ### Changed
330
+ - File and class name changed (html_dom_parser->simple_html_dom)
331
+ ### Removed
332
+ - ($dom->save_file) will not support anymore
333
+ - Remove example "example_customize_parser.php"
334
+ ### Fixed
335
+ - Fixed the bug of outertext (th)
336
+ - Fixed the bug of regular expression escaping chars ($dom->find)
337
+ - Fixed the bug while line-breaker and "\t" in tags
338
+
339
+ ## [0.96] - 2008-04-27
340
+ ### Added
341
+ - Reference section in manual
342
+ - Added traverse section in manual
343
+ - Added the solution while server behind proxy in FAQ (Thanks to Yousuke Shaggy)
344
+ - New method to remove attribute.
345
+ - New DOM operations(first_child, last_child, next_sibling, previous_sibling) (Request #1936000)
346
+ ### Changed
347
+ - Now file_get_dom supports full file_get_contents parameters
348
+ ### Fixed
349
+ - Fixed the bug of self-closing tags in the end of file
350
+ - Fixed the bug of blanks in the end of tag
351
+ - Fixed some typo of testcase
352
+
353
+ ## [0.95] - 2008-04-13
354
+ ### Added
355
+ - Supports tag name with namespace
356
+ ### Changed
357
+ - New attribute filters (Thanks to Yousuke Kumakura)
358
+ - Refine structure of testcase
359
+ ### Fixed
360
+ - Fix the bug of optional-closing tags
361
+ - Fix the bug of parsing the line break next to the tag's name
362
+
363
+ ## [0.94] - 2008-04-06
364
+ ### Added
365
+ - Add FAQ section in manual
366
+ ### Fixed
367
+ - Fixed infinity loop while the source content is BAD HTML
368
+ - Fixed the bug of adding new attributes to self closing tags
369
  - Fixed the bug of customize parser without $dom->remove_noise()
vendor/simplehtmldom/simplehtmldom/Debug.php CHANGED
@@ -1,149 +1,149 @@
1
- <?php namespace simplehtmldom;
2
-
3
- /**
4
- * Website: http://sourceforge.net/projects/simplehtmldom/
5
- * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
6
- *
7
- * Licensed under The MIT License
8
- * See the LICENSE file in the project root for more information.
9
- *
10
- * Authors:
11
- * S.C. Chen
12
- * John Schlick
13
- * Rus Carroll
14
- * logmanoriginal
15
- *
16
- * Contributors:
17
- * Yousuke Kumakura
18
- * Vadim Voituk
19
- * Antcs
20
- *
21
- * Version $Rev$
22
- */
23
-
24
- /**
25
- * Implements functions for debugging purposes. Debugging can be enabled and
26
- * disabled on demand. Debug messages are send to error_log by default but it
27
- * is also possible to register a custom debug handler.
28
- */
29
- class Debug {
30
-
31
- private static $enabled = false;
32
- private static $debugHandler = null;
33
- private static $callerLock = array();
34
-
35
- /**
36
- * Checks whether debug mode is enabled.
37
- *
38
- * @return bool True if debug mode is enabled, false otherwise.
39
- */
40
- public static function isEnabled()
41
- {
42
- return self::$enabled;
43
- }
44
-
45
- /**
46
- * Enables debug mode
47
- */
48
- public static function enable()
49
- {
50
- self::$enabled = true;
51
- self::log('Debug mode has been enabled');
52
- }
53
-
54
- /**
55
- * Disables debug mode
56
- */
57
- public static function disable()
58
- {
59
- self::log('Debug mode has been disabled');
60
- self::$enabled = false;
61
- }
62
-
63
- /**
64
- * Sets the debug handler.
65
- *
66
- * `null`: error_log (default)
67
- */
68
- public static function setDebugHandler($function = null)
69
- {
70
- if ($function === self::$debugHandler) return;
71
-
72
- self::log('New debug handler registered');
73
- self::$debugHandler = $function;
74
- }
75
-
76
- /**
77
- * This is the actual log function. It allows to set a custom backtrace to
78
- * eliminate traces of this class.
79
- */
80
- private static function log_trace($message, $backtrace)
81
- {
82
- $idx = 0;
83
- $debugmessage = '';
84
-
85
- foreach($backtrace as $caller)
86
- {
87
- if (!isset($caller['file']) && !isset($caller['line'])) {
88
- break; // Unknown caller
89
- }
90
-
91
- $debugmessage .= ' [' . $caller['file'] . ':' . $caller['line'];
92
-
93
- if ($idx > 1) { // Do not include the call to Debug::log
94
- $debugmessage .= ' '
95
- . $caller['class']
96
- . $caller['type']
97
- . $caller['function']
98
- . '()';
99
- }
100
-
101
- $debugmessage .= ']';
102
-
103
- // Stop at the first caller that isn't part of simplehtmldom
104
- if (!isset($caller['class']) || strpos($caller['class'], 'simplehtmldom\\') !== 0) {
105
- break;
106
- }
107
- }
108
-
109
- $output = '[DEBUG] ' . trim($debugmessage) . ' "' . $message . '"';
110
-
111
- if (is_null(self::$debugHandler)) {
112
- error_log($output);
113
- } else {
114
- call_user_func_array(self::$debugHandler, array($output));
115
- }
116
- }
117
-
118
- /**
119
- * Adds a debug message to error_log if debug mode is enabled. Does nothing
120
- * if debug mode is disabled.
121
- *
122
- * @param string $text The message to add to error_log
123
- */
124
- public static function log($message)
125
- {
126
- if (!self::isEnabled()) return;
127
-
128
- $backtrace = debug_backtrace(DEBUG_BACKTRACE_PROVIDE_OBJECT);
129
- self::log_trace($message, $backtrace);
130
- }
131
-
132
- /**
133
- * Adds a debug message to error_log if debug mode is enabled. Does nothing
134
- * if debug mode is disabled. Each message is logged only once.
135
- *
136
- * @param string $text The message to add to error_log
137
- */
138
- public static function log_once($message)
139
- {
140
- if (!self::isEnabled()) return;
141
-
142
- // Keep track of caller (file & line)
143
- $backtrace = debug_backtrace(DEBUG_BACKTRACE_PROVIDE_OBJECT);
144
- if (in_array($backtrace[0], self::$callerLock, true)) return;
145
-
146
- self::$callerLock[] = $backtrace[0];
147
- self::log_trace($message, $backtrace);
148
- }
149
- }
1
+ <?php namespace simplehtmldom;
2
+
3
+ /**
4
+ * Website: http://sourceforge.net/projects/simplehtmldom/
5
+ * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
6
+ *
7
+ * Licensed under The MIT License
8
+ * See the LICENSE file in the project root for more information.
9
+ *
10
+ * Authors:
11
+ * S.C. Chen
12
+ * John Schlick
13
+ * Rus Carroll
14
+ * logmanoriginal
15
+ *
16
+ * Contributors:
17
+ * Yousuke Kumakura
18
+ * Vadim Voituk
19
+ * Antcs
20
+ *
21
+ * Version $Rev$
22
+ */
23
+
24
+ /**
25
+ * Implements functions for debugging purposes. Debugging can be enabled and
26
+ * disabled on demand. Debug messages are send to error_log by default but it
27
+ * is also possible to register a custom debug handler.
28
+ */
29
+ class Debug {
30
+
31
+ private static $enabled = false;
32
+ private static $debugHandler = null;
33
+ private static $callerLock = array();
34
+
35
+ /**
36
+ * Checks whether debug mode is enabled.
37
+ *
38
+ * @return bool True if debug mode is enabled, false otherwise.
39
+ */
40
+ public static function isEnabled()
41
+ {
42
+ return self::$enabled;
43
+ }
44
+
45
+ /**
46
+ * Enables debug mode
47
+ */
48
+ public static function enable()
49
+ {
50
+ self::$enabled = true;
51
+ self::log('Debug mode has been enabled');
52
+ }
53
+
54
+ /**
55
+ * Disables debug mode
56
+ */
57
+ public static function disable()
58
+ {
59
+ self::log('Debug mode has been disabled');
60
+ self::$enabled = false;
61
+ }
62
+
63
+ /**
64
+ * Sets the debug handler.
65
+ *
66
+ * `null`: error_log (default)
67
+ */
68
+ public static function setDebugHandler($function = null)
69
+ {
70
+ if ($function === self::$debugHandler) return;
71
+
72
+ self::log('New debug handler registered');
73
+ self::$debugHandler = $function;
74
+ }
75
+
76
+ /**
77
+ * This is the actual log function. It allows to set a custom backtrace to
78
+ * eliminate traces of this class.
79
+ */
80
+ private static function log_trace($message, $backtrace)
81
+ {
82
+ $idx = 0;
83
+ $debugmessage = '';
84
+
85
+ foreach($backtrace as $caller)
86
+ {
87
+ if (!isset($caller['file']) && !isset($caller['line'])) {
88
+ break; // Unknown caller
89
+ }
90
+
91
+ $debugmessage .= ' [' . $caller['file'] . ':' . $caller['line'];
92
+
93
+ if ($idx > 1) { // Do not include the call to Debug::log
94
+ $debugmessage .= ' '
95
+ . $caller['class']
96
+ . $caller['type']
97
+ . $caller['function']
98
+ . '()';
99
+ }
100
+
101
+ $debugmessage .= ']';
102
+
103
+ // Stop at the first caller that isn't part of simplehtmldom
104
+ if (!isset($caller['class']) || strpos($caller['class'], 'simplehtmldom\\') !== 0) {
105
+ break;
106
+ }
107
+ }
108
+
109
+ $output = '[DEBUG] ' . trim($debugmessage) . ' "' . $message . '"';
110
+
111
+ if (is_null(self::$debugHandler)) {
112
+ error_log($output);
113
+ } else {
114
+ call_user_func_array(self::$debugHandler, array($output));
115
+ }
116
+ }
117
+
118
+ /**
119
+ * Adds a debug message to error_log if debug mode is enabled. Does nothing
120
+ * if debug mode is disabled.
121
+ *
122
+ * @param string $text The message to add to error_log
123
+ */
124
+ public static function log($message)
125
+ {
126
+ if (!self::isEnabled()) return;
127
+
128
+ $backtrace = debug_backtrace(DEBUG_BACKTRACE_PROVIDE_OBJECT);
129
+ self::log_trace($message, $backtrace);
130
+ }
131
+
132
+ /**
133
+ * Adds a debug message to error_log if debug mode is enabled. Does nothing
134
+ * if debug mode is disabled. Each message is logged only once.
135
+ *
136
+ * @param string $text The message to add to error_log
137
+ */
138
+ public static function log_once($message)
139
+ {
140
+ if (!self::isEnabled()) return;
141
+
142
+ // Keep track of caller (file & line)
143
+ $backtrace = debug_backtrace(DEBUG_BACKTRACE_PROVIDE_OBJECT);
144
+ if (in_array($backtrace[0], self::$callerLock, true)) return;
145
+
146
+ self::$callerLock[] = $backtrace[0];
147
+ self::log_trace($message, $backtrace);
148
+ }
149
+ }
vendor/simplehtmldom/simplehtmldom/HtmlDocument.php CHANGED
@@ -1,1133 +1,1133 @@
1
- <?php namespace simplehtmldom;
2
-
3
- /**
4
- * Website: http://sourceforge.net/projects/simplehtmldom/
5
- * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
6
- *
7
- * Licensed under The MIT License
8
- * See the LICENSE file in the project root for more information.
9
- *
10
- * Authors:
11
- * S.C. Chen
12
- * John Schlick
13
- * Rus Carroll
14
- * logmanoriginal
15
- *
16
- * Contributors:
17
- * Yousuke Kumakura
18
- * Vadim Voituk
19
- * Antcs
20
- *
21
- * Version $Rev$
22
- */
23
-
24
- include_once 'constants.php';
25
- include_once 'HtmlNode.php';
26
- include_once 'Debug.php';
27
-
28
- class HtmlDocument
29
- {
30
- public $root = null;
31
- public $nodes = array();
32
- public $callback = null;
33
- public $lowercase = false;
34
- public $original_size;
35
- public $size;
36
-
37
- protected $pos;
38
- protected $doc;
39
- protected $char;
40
-
41
- protected $cursor;
42
- protected $parent;
43
- protected $noise = array();
44
- protected $token_blank = " \t\r\n";
45
- protected $token_equal = ' =/>';
46
- protected $token_slash = " />\r\n\t";
47
- protected $token_attr = ' >';
48
-
49
- public $_charset = '';
50
- public $_target_charset = '';
51
-
52
- public $default_br_text = '';
53
- public $default_span_text = '';
54
-
55
- protected $self_closing_tags = array(
56
- 'area' => 1,
57
- 'base' => 1,
58
- 'br' => 1,
59
- 'col' => 1,
60
- 'embed' => 1,
61
- 'hr' => 1,
62
- 'img' => 1,
63
- 'input' => 1,
64
- 'link' => 1,
65
- 'meta' => 1,
66
- 'param' => 1,
67
- 'source' => 1,
68
- 'track' => 1,
69
- 'wbr' => 1
70
- );
71
- protected $block_tags = array(
72
- 'body' => 1,
73
- 'div' => 1,
74
- 'form' => 1,
75
- 'root' => 1,
76
- 'span' => 1,
77
- 'table' => 1
78
- );
79
- protected $optional_closing_tags = array(
80
- // Not optional, see
81
- // https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element
82
- 'b' => array('b' => 1),
83
- 'dd' => array('dd' => 1, 'dt' => 1),
84
- // Not optional, see
85
- // https://www.w3.org/TR/html/grouping-content.html#the-dl-element
86
- 'dl' => array('dd' => 1, 'dt' => 1),
87
- 'dt' => array('dd' => 1, 'dt' => 1),
88
- 'li' => array('li' => 1),
89
- 'optgroup' => array('optgroup' => 1, 'option' => 1),
90
- 'option' => array('optgroup' => 1, 'option' => 1),
91
- 'p' => array('p' => 1),
92
- 'rp' => array('rp' => 1, 'rt' => 1),
93
- 'rt' => array('rp' => 1, 'rt' => 1),
94
- 'td' => array('td' => 1, 'th' => 1),
95
- 'th' => array('td' => 1, 'th' => 1),
96
- 'tr' => array('td' => 1, 'th' => 1, 'tr' => 1),
97
- );
98
-
99
- function __call($func, $args)
100
- {
101
- // Allow users to call methods with lower_case syntax
102
- switch($func)
103
- {
104
- case 'load_file':
105
- $actual_function = 'loadFile'; break;
106
- case 'clear': return; /* no-op */
107
- default:
108
- trigger_error(
109
- 'Call to undefined method ' . __CLASS__ . '::' . $func . '()',
110
- E_USER_ERROR
111
- );
112
- }
113
-
114
- // phpcs:ignore Generic.Files.LineLength
115
- Debug::log(__CLASS__ . '->' . $func . '() has been deprecated and will be removed in the next major version of simplehtmldom. Use ' . __CLASS__ . '->' . $actual_function . '() instead.');
116
-
117
- return call_user_func_array(array($this, $actual_function), $args);
118
- }
119
-
120
- function __construct(
121
- $str = null,
122
- $lowercase = true,
123
- $forceTagsClosed = true,
124
- $target_charset = DEFAULT_TARGET_CHARSET,
125
- $stripRN = true,
126
- $defaultBRText = DEFAULT_BR_TEXT,
127
- $defaultSpanText = DEFAULT_SPAN_TEXT,
128
- $options = 0)
129
- {
130
- if ($str) {
131
- if (preg_match('/^http:\/\//i', $str) || is_file($str)) {
132
- $this->load_file($str);
133
- } else {
134
- $this->load(
135
- $str,
136
- $lowercase,
137
- $stripRN,
138
- $defaultBRText,
139
- $defaultSpanText,
140
- $options
141
- );
142
- }
143
- } else {
144
- $this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText);
145
- }
146
- // Forcing tags to be closed implies that we don't trust the html, but
147
- // it can lead to parsing errors if we SHOULD trust the html.
148
- if (!$forceTagsClosed) {
149
- $this->optional_closing_array = array();
150
- }
151
-
152
- $this->_target_charset = $target_charset;
153
- }
154
-
155
- function __debugInfo()
156
- {
157
- return array(
158
- 'root' => $this->root,
159
- 'noise' => empty($this->noise) ? 'none' : $this->noise,
160
- 'charset' => $this->_charset,
161
- 'target charset' => $this->_target_charset,
162
- 'original size' => $this->original_size
163
- );
164
- }
165
-
166
- function __destruct()
167
- {
168
- if (isset($this->nodes)) {
169
- foreach ($this->nodes as $n) {
170
- $n->clear();
171
- }
172
- }
173
- }
174
-
175
- function load(
176
- $str,
177
- $lowercase = true,
178
- $stripRN = true,
179
- $defaultBRText = DEFAULT_BR_TEXT,
180
- $defaultSpanText = DEFAULT_SPAN_TEXT,
181
- $options = 0)
182
- {
183
- // prepare
184
- $this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText);
185
-
186
- if ($stripRN) {
187
- // Temporarily remove any element that shouldn't loose whitespace
188
- $this->remove_noise("'<\s*script[^>]*>(.*?)<\s*/\s*script\s*>'is");
189
- $this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is");
190
- $this->remove_noise("'<!--(?!>|\->)(.*?)-->'is");
191
- $this->remove_noise("'<\s*style[^>]*>(.*?)<\s*/\s*style\s*>'is");
192
- $this->remove_noise("'<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>'is");
193
-
194
- // Edited By Speed Booster Pack. Do not minify html!
195
- // Remove whitespace and newlines between tags
196
- $this->doc = preg_replace('/\>([\t\s]*[\r\n]^[\t\s]*)\</m', '><', $this->doc);
197
-
198
- // Remove whitespace and newlines in text
199
- $this->doc = preg_replace('/([\t\s]*[\r\n]^[\t\s]*)/m', ' ', $this->doc);
200
-
201
- // Restore temporarily removed elements and calculate new size
202
- $this->doc = $this->restore_noise($this->doc);
203
- $this->size = strlen($this->doc);
204
- }
205
-
206
- $this->remove_noise("'(<\?)(.*?)(\?>)'s", true); // server-side script
207
- if (count($this->noise)) {
208
- // phpcs:ignore Generic.Files.LineLength
209
- Debug::log('Support for server-side scripts has been deprecated and will be removed in the next major version of simplehtmldom.');
210
- }
211
-
212
- if($options & HDOM_SMARTY_AS_TEXT) { // Strip Smarty scripts
213
- $this->remove_noise("'(\{\w)(.*?)(\})'s", true);
214
- // phpcs:ignore Generic.Files.LineLength
215
- Debug::log('Support for Smarty scripts has been deprecated and will be removed in the next major version of simplehtmldom.');
216
- }
217
-
218
- // parsing
219
- $this->parse($stripRN);
220
- // end
221
- $this->root->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
222
- $this->parse_charset();
223
- $this->decode();
224
- unset($this->doc);
225
-
226
- // make load function chainable
227
- return $this;
228
- }
229
-
230
- function set_callback($function_name)
231
- {
232
- $this->callback = $function_name;
233
- }
234
-
235
- function remove_callback()
236
- {
237
- $this->callback = null;
238
- }
239
-
240
- function save($filepath = '')
241
- {
242
- $ret = $this->root->innertext();
243
- if ($filepath !== '') { file_put_contents($filepath, $ret, LOCK_EX); }
244
- return $ret;
245
- }
246
-
247
- function find($selector, $idx = null, $lowercase = false)
248
- {
249
- return $this->root->find($selector, $idx, $lowercase);
250
- }
251
-
252
- function expect($selector, $idx = null, $lowercase = false)
253
- {
254
- return $this->root->expect($selector, $idx, $lowercase);
255
- }
256
-
257
- /** @codeCoverageIgnore */
258
- function dump($show_attr = true)
259
- {
260
- $this->root->dump($show_attr);
261
- }
262
-
263
- protected function prepare(
264
- $str, $lowercase = true,
265
- $defaultBRText = DEFAULT_BR_TEXT,
266
- $defaultSpanText = DEFAULT_SPAN_TEXT)
267
- {
268
- $this->clear();
269
-
270
- $this->doc = trim($str);
271
- $this->size = strlen($this->doc);
272
- $this->original_size = $this->size; // original size of the html
273
- $this->pos = 0;
274
- $this->cursor = 1;
275
- $this->noise = array();
276
- $this->nodes = array();
277
- $this->lowercase = $lowercase;
278
- $this->default_br_text = $defaultBRText;
279
- $this->default_span_text = $defaultSpanText;
280
- $this->root = new HtmlNode($this);
281
- $this->root->tag = 'root';
282
- $this->root->_[HtmlNode::HDOM_INFO_BEGIN] = -1;
283
- $this->root->nodetype = HtmlNode::HDOM_TYPE_ROOT;
284
- $this->parent = $this->root;
285
- if ($this->size > 0) { $this->char = $this->doc[0]; }
286
- }
287
-
288
- protected function decode()
289
- {
290
- foreach($this->nodes as $node) {
291
- if (isset($node->_[HtmlNode::HDOM_INFO_TEXT])) {
292
- $node->_[HtmlNode::HDOM_INFO_TEXT] = html_entity_decode(
293
- $this->restore_noise($node->_[HtmlNode::HDOM_INFO_TEXT]),
294
- ENT_QUOTES | ENT_HTML5,
295
- $this->_target_charset
296
- );
297
- }
298
- if (isset($node->_[HtmlNode::HDOM_INFO_INNER])) {
299
- $node->_[HtmlNode::HDOM_INFO_INNER] = html_entity_decode(
300
- $this->restore_noise($node->_[HtmlNode::HDOM_INFO_INNER]),
301
- ENT_QUOTES | ENT_HTML5,
302
- $this->_target_charset
303
- );
304
- }
305
- if (isset($node->attr) && is_array($node->attr)) {
306
- foreach($node->attr as $a => $v) {
307
- if ($v === true) continue;
308
- $node->attr[$a] = html_entity_decode(
309
- $v,
310
- ENT_QUOTES | ENT_HTML5,
311
- $this->_target_charset
312
- );
313
- }
314
- }
315
- }
316
- }
317
-
318
- protected function parse($trim = false)
319
- {
320
- while (true) {
321
-
322
- if ($this->char !== '<') {
323
- $content = $this->copy_until_char('<');
324
-
325
- if ($content !== '') {
326
-
327
- // Skip whitespace between tags? (</a> <b>)
328
- if ($trim && trim($content) === '') {
329
- continue;
330
- }
331
-
332
- $node = new HtmlNode($this);
333
- ++$this->cursor;
334
- $node->_[HtmlNode::HDOM_INFO_TEXT] = $content;
335
- $this->link_nodes($node, false);
336
-
337
- }
338
- }
339
-
340
- if($this->read_tag($trim) === false) {
341
- break;
342
- }
343
- }
344
- }
345
-
346
- protected function parse_charset()
347
- {
348
- $charset = null;
349
-
350
- if (function_exists('get_last_retrieve_url_contents_content_type')) {
351
- $contentTypeHeader = get_last_retrieve_url_contents_content_type();
352
- $success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches);
353
- if ($success) {
354
- $charset = $matches[1];
355
- }
356
-
357
- // phpcs:ignore Generic.Files.LineLength
358
- Debug::log('Determining charset using get_last_retrieve_url_contents_content_type() ' . ($success ? 'successful' : 'failed'));
359
- }
360
-
361
- if (empty($charset)) {
362
- // https://www.w3.org/TR/html/document-metadata.html#statedef-http-equiv-content-type
363
- $el = $this->root->find('meta[http-equiv=Content-Type]', 0, true);
364
-
365
- if (!empty($el)) {
366
- $fullvalue = $el->content;
367
-
368
- if (!empty($fullvalue)) {
369
- $success = preg_match(
370
- '/charset=(.+)/i',
371
- $fullvalue,
372
- $matches
373
- );
374
-
375
- if ($success) {
376
- $charset = $matches[1];
377
- }
378
- }
379
- }
380
- }
381
-
382
- if (empty($charset)) {
383
- // https://www.w3.org/TR/html/document-metadata.html#character-encoding-declaration
384
- if ($meta = $this->root->find('meta[charset]', 0)) {
385
- $charset = $meta->charset;
386
- }
387
- }
388
-
389
- if (empty($charset)) {
390
- // Try to guess the charset based on the content
391
- // Requires Multibyte String (mbstring) support (optional)
392
- if (function_exists('mb_detect_encoding')) {
393
- /**
394
- * mb_detect_encoding() is not intended to distinguish between
395
- * charsets, especially single-byte charsets. Its primary
396
- * purpose is to detect which multibyte encoding is in use,
397
- * i.e. UTF-8, UTF-16, shift-JIS, etc.
398
- *
399
- * -- https://bugs.php.net/bug.php?id=38138
400
- *
401
- * Adding both CP1251/ISO-8859-5 and CP1252/ISO-8859-1 will
402
- * always result in CP1251/ISO-8859-5 and vice versa.
403
- *
404
- * Thus, only detect if it's either UTF-8 or CP1252/ISO-8859-1
405
- * to stay compatible.
406
- */
407
- $encoding = mb_detect_encoding(
408
- $this->doc,
409
- array( 'UTF-8', 'CP1252', 'ISO-8859-1' )
410
- );
411
-
412
- if ($encoding === 'CP1252' || $encoding === 'ISO-8859-1') {
413
- // Due to a limitation of mb_detect_encoding
414
- // 'CP1251'/'ISO-8859-5' will be detected as
415
- // 'CP1252'/'ISO-8859-1'. This will cause iconv to fail, in
416
- // which case we can simply assume it is the other charset.
417
- if (!@iconv('CP1252', 'UTF-8', $this->doc)) {
418
- $encoding = 'CP1251';
419
- }
420
- }
421
-
422
- if ($encoding !== false) {
423
- $charset = $encoding;
424
- }
425
- }
426
- }
427
-
428
- if (empty($charset)) {
429
- Debug::log('Unable to determine charset from source document. Assuming UTF-8');
430
- $charset = 'UTF-8';
431
- }
432
-
433
- // Since CP1252 is a superset, if we get one of it's subsets, we want
434
- // it instead.
435
- if ((strtolower($charset) == 'iso-8859-1')
436
- || (strtolower($charset) == 'latin1')
437
- || (strtolower($charset) == 'latin-1')) {
438
- $charset = 'CP1252';
439
- }
440
-
441
- return $this->_charset = $charset;
442
- }
443
-
444
- protected function read_tag($trim)
445
- {
446
- if ($this->char !== '<') { // End Of File
447
- $this->root->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
448
-
449
- // We might be in a nest of unclosed elements for which the end tags
450
- // can be omitted. Close them for faster seek operations.
451
- do {
452
- if (isset($this->optional_closing_tags[strtolower($this->parent->tag)])) {
453
- $this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
454
- }
455
- } while ($this->parent = $this->parent->parent);
456
-
457
- return false;
458
- }
459
-
460
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
461
-
462
- if ($trim) { // "< /html>"
463
- $this->skip($this->token_blank);
464
- }
465
-
466
- // End tag: https://dev.w3.org/html5/pf-summary/syntax.html#end-tags
467
- if ($this->char === '/') {
468
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
469
-
470
- $tag = $this->copy_until_char('>');
471
- $tag = $trim ? ltrim($tag, $this->token_blank) : $tag;
472
-
473
- // Skip attributes and whitespace in end tags
474
- if ($trim && ($pos = strpos($tag, ' ')) !== false) {
475
- // phpcs:ignore Generic.Files.LineLength
476
- Debug::log_once('Source document contains superfluous whitespace in end tags (</html >).');
477
- $tag = substr($tag, 0, $pos);
478
- }
479
-
480
- if (strcasecmp($this->parent->tag, $tag)) { // Parent is not start tag
481
- $parent_lower = strtolower($this->parent->tag);
482
- $tag_lower = strtolower($tag);
483
- if (isset($this->optional_closing_tags[$parent_lower]) && isset($this->block_tags[$tag_lower])) {
484
- $org_parent = $this->parent;
485
-
486
- // Look for the start tag
487
- while (($this->parent->parent) && strtolower($this->parent->tag) !== $tag_lower){
488
- // Close any unclosed element with optional end tags
489
- if (isset($this->optional_closing_tags[strtolower($this->parent->tag)]))
490
- $this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
491
- $this->parent = $this->parent->parent;
492
- }
493
-
494
- // No start tag, close grandparent
495
- if (strtolower($this->parent->tag) !== $tag_lower) {
496
- $this->parent = $org_parent;
497
-
498
- if ($this->parent->parent) {
499
- $this->parent = $this->parent->parent;
500
- }
501
-
502
- $this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
503
- return $this->as_text_node($tag);
504
- }
505
- } elseif (($this->parent->parent) && isset($this->block_tags[$tag_lower])) {
506
- // grandparent exists + current is block tag
507
- // Parent has no end tag
508
- $this->parent->_[HtmlNode::HDOM_INFO_END] = 0;
509
- $org_parent = $this->parent;
510
-
511
- // Find start tag
512
- while (($this->parent->parent) && strtolower($this->parent->tag) !== $tag_lower) {
513
- $this->parent = $this->parent->parent;
514
- }
515
-
516
- // No start tag, close parent
517
- if (strtolower($this->parent->tag) !== $tag_lower) {
518
- $this->parent = $org_parent; // restore origonal parent
519
- $this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
520
- return $this->as_text_node($tag);
521
- }
522
- } elseif (($this->parent->parent) && strtolower($this->parent->parent->tag) === $tag_lower) {
523
- // Grandparent exists and current tag closes it
524
- $this->parent->_[HtmlNode::HDOM_INFO_END] = 0;
525
- $this->parent = $this->parent->parent;
526
- } else { // Random tag, add as text node
527
- return $this->as_text_node($tag);
528
- }
529
- }
530
-
531
- // Link with start tag
532
- $this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
533
-
534
- if ($this->parent->parent) {
535
- $this->parent = $this->parent->parent;
536
- }
537
-
538
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
539
- return true;
540
- }
541
-
542
- // Start tag: https://dev.w3.org/html5/pf-summary/syntax.html#start-tags
543
- $node = new HtmlNode($this);
544
- $node->_[HtmlNode::HDOM_INFO_BEGIN] = $this->cursor++;
545
-
546
- // Tag name
547
- $tag = $this->copy_until($this->token_slash);
548
-
549
- if (isset($tag[0]) && $tag[0] === '!') { // Doctype, CData, Comment
550
- if (isset($tag[2]) && $tag[1] === '-' && $tag[2] === '-') { // Comment ("<!--")
551
-
552
- /**
553
- * Comments must have the following format:
554
- *
555
- * 1. The string "<!--"
556
- *
557
- * 2. Optionally, text, with the additional restriction that the
558
- * text must not start with the string ">", nor start with the
559
- * string "->", nor contain the strings "<!--", "-->", or "--!>",
560
- * nor end with the string "<!-".
561
- *
562
- * 3. The string "-->"
563
- *
564
- * -- https://www.w3.org/TR/html53/syntax.html#comments
565
- */
566
-
567
- // Go back until $tag only contains start of comment "!--".
568
- while (strlen($tag) > 3) {
569
- $this->char = $this->doc[--$this->pos]; // previous
570
- $tag = substr($tag, 0, strlen($tag) - 1);
571
- }
572
-
573
- $node->nodetype = HtmlNode::HDOM_TYPE_COMMENT;
574
- $node->tag = 'comment';
575
-
576
- $data = '';
577
-
578
- while(true) {
579
- // Copy until first char of end tag
580
- $data .= $this->copy_until_char('-');
581
-
582
- // Look ahead in the document, maybe we are at the end
583
- if (($this->pos + 3) > $this->size) { // End of document
584
- Debug::log('Source document ended unexpectedly!');
585
- break;
586
- } elseif (substr($this->doc, $this->pos, 3) === '-->') { // end
587
- $data .= $this->copy_until_char('>');
588
- break;
589
- }
590
-
591
- $data .= $this->char;
592
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
593
- }
594
-
595
- if (substr($data, 0, 1) === '>') { // "<!-->"
596
- Debug::log('Comment must not start with the string ">"!');
597
- $this->pos -= strlen($data);
598
- $this->char = $this->doc[$this->pos];
599
- $data = '';
600
- }
601
-
602
- if (substr($data, 0, 2) === '->') { // "<!--->"
603
- Debug::log('Comment must not start with the string "->"!');
604
- $this->pos -= strlen($data);
605
- $this->char = $this->doc[$this->pos];
606
- $data = '';
607
- }
608
-
609
- if (strpos($data, '<!--') !== false) { // "<!--<!---->"
610
- Debug::log('Comment must not contain the string "<!--"!');
611
- // simplehtmldom can work with it anyway
612
- }
613
-
614
- if (strpos($data, '--!>') !== false) { // "<!----!>-->"
615
- Debug::log('Comment must not contain the string "--!>"!');
616
- // simplehtmldom can work with it anyway
617
- }
618
-
619
- if (substr($data, -3, 3) === '<!-') { // "<!--<!--->"
620
- Debug::log('Comment must not end with "<!-"!');
621
- // simplehtmldom can work with it anyway
622
- }
623
-
624
- $tag .= $data;
625
- $tag = $this->restore_noise($tag);
626
-
627
- // Comment starts after "!--" and ends before "--" (5 chars total)
628
- $node->_[HtmlNode::HDOM_INFO_INNER] = substr($tag, 3, strlen($tag) - 5);
629
- } elseif (substr($tag, 1, 7) === '[CDATA[') {
630
-
631
- // Go back until $tag only contains start of cdata "![CDATA[".
632
- while (strlen($tag) > 8) {
633
- $this->char = $this->doc[--$this->pos]; // previous
634
- $tag = substr($tag, 0, strlen($tag) - 1);
635
- }
636
-
637
- // CDATA can contain HTML stuff, need to find closing tags first
638
- $node->nodetype = HtmlNode::HDOM_TYPE_CDATA;
639
- $node->tag = 'cdata';
640
-
641
- $data = '';
642
-
643
- // There is a rare chance of empty CDATA: "<[CDATA[]]>"
644
- // In which case the current char is the first "[" of the end tag
645
- // But the CDATA could also just be a bracket: "<[CDATA[]]]>"
646
- while(true) {
647
- // Copy until first char of end tag
648
- $data .= $this->copy_until_char(']');
649
-
650
- // Look ahead in the document, maybe we are at the end
651
- if (($this->pos + 3) > $this->size) { // End of document
652
- Debug::log('Source document ended unexpectedly!');
653
- break;
654
- } elseif (substr($this->doc, $this->pos, 3) === ']]>') { // end
655
- $data .= $this->copy_until_char('>');
656
- break;
657
- }
658
-
659
- $data .= $this->char;
660
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
661
- }
662
-
663
- $tag .= $data;
664
- $tag = $this->restore_noise($tag);
665
-
666
- // CDATA starts after "![CDATA[" and ends before "]]" (10 chars total)
667
- $node->_[HtmlNode::HDOM_INFO_INNER] = substr($tag, 8, strlen($tag) - 10);
668
- } else { // Unknown
669
- Debug::log('Source document contains unknown declaration: <' . $tag);
670
- $node->nodetype = HtmlNode::HDOM_TYPE_UNKNOWN;
671
- $node->tag = 'unknown';
672
- }
673
-
674
- $node->_[HtmlNode::HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until_char('>');
675
-
676
- if ($this->char === '>') {
677
- $node->_[HtmlNode::HDOM_INFO_TEXT] .= '>';
678
- }
679
-
680
- $this->link_nodes($node, true);
681
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
682
- return true;
683
- }
684
-
685
- if (!preg_match('/^\w[\w:-]*$/', $tag)) { // Invalid tag name
686
- $node->_[HtmlNode::HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>');
687
-
688
- if ($this->char === '>') { // End tag
689
- $node->_[HtmlNode::HDOM_INFO_TEXT] .= '>';
690
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
691
- }
692
-
693
- $this->link_nodes($node, false);
694
- Debug::log('Source document contains invalid tag name: ' . $node->_[HtmlNode::HDOM_INFO_TEXT]);
695
- return true;
696
- }
697
-
698
- // Valid tag name
699
- $node->nodetype = HtmlNode::HDOM_TYPE_ELEMENT;
700
- $tag_lower = strtolower($tag);
701
- $node->tag = ($this->lowercase) ? $tag_lower : $tag;
702
-
703
- if (isset($this->optional_closing_tags[$tag_lower])) { // Optional closing tag
704
- while (isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)])) {
705
- // Previous element was the last element of ancestor
706
- $this->parent->_[HtmlNode::HDOM_INFO_END] = $node->_[HtmlNode::HDOM_INFO_BEGIN] - 1;
707
- $this->parent = $this->parent->parent;
708
- }
709
- $node->parent = $this->parent;
710
- }
711
-
712
- $guard = 0; // prevent infinity loop
713
-
714
- // [0] Space between tag and first attribute
715
- $space = array($this->copy_skip($this->token_blank), '', '');
716
-
717
- do { // Parse attributes
718
- $name = $this->copy_until($this->token_equal);
719
-
720
- if ($name === '' && $this->char !== null && $space[0] === '') {
721
- break;
722
- }
723
-
724
- if ($guard === $this->pos) { // Escape infinite loop
725
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
726
- continue;
727
- }
728
-
729
- $guard = $this->pos;
730
-
731
- if ($this->pos >= $this->size - 1 && $this->char !== '>') { // End Of File
732
- Debug::log('Source document ended unexpectedly!');
733
- $node->nodetype = HtmlNode::HDOM_TYPE_TEXT;
734
- $node->_[HtmlNode::HDOM_INFO_END] = 0;
735
- $node->_[HtmlNode::HDOM_INFO_TEXT] = '<' . $tag . $space[0] . $name;
736
- $node->tag = 'text';
737
- $this->link_nodes($node, false);
738
- return true;
739
- }
740
-
741
- if ($name === '/' || $name === '') { // No more attributes
742
- break;
743
- }
744
-
745
- // [1] Whitespace after attribute name
746
- $space[1] = (strpos($this->token_blank, $this->char) === false) ? '' : $this->copy_skip($this->token_blank);
747
-
748
- $name = $this->restore_noise($name); // might be a noisy name
749
-
750
- if ($this->lowercase) {
751
- $name = strtolower($name);
752
- }
753
-
754
- if ($this->char === '=') { // Attribute with value
755
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
756
- $this->parse_attr($node, $name, $space, $trim); // get attribute value
757
- } else { // Attribute without value
758
- $node->_[HtmlNode::HDOM_INFO_QUOTE][$name] = HtmlNode::HDOM_QUOTE_NO;
759
- $node->attr[$name] = true;
760
- if ($this->char !== '>') {
761
- $this->char = $this->doc[--$this->pos];
762
- } // prev
763
- }
764
-
765
- // Space before attribute and around equal sign
766
- if (!$trim && $space !== array(' ', '', '')) {
767
- // phpcs:ignore Generic.Files.LineLength
768
- Debug::log_once('Source document contains superfluous whitespace in attributes (<e attribute = "value">). Enable trimming or fix attribute spacing for best performance.');
769
- $node->_[HtmlNode::HDOM_INFO_SPACE][$name] = $space;
770
- }
771
-
772
- // prepare for next attribute
773
- $space = array(
774
- ((strpos($this->token_blank, $this->char) === false) ? '' : $this->copy_skip($this->token_blank)),
775
- '',
776
- ''
777
- );
778
- } while ($this->char !== '>' && $this->char !== '/');
779
-
780
- $this->link_nodes($node, true);
781
-
782
- // Space after last attribute before closing the tag
783
- if (!$trim && $space[0] !== '') {
784
- // phpcs:ignore Generic.Files.LineLength
785
- Debug::log_once('Source document contains superfluous whitespace before the closing braket (<e attribute="value" >). Enable trimming or remove spaces before closing brackets for best performance.');
786
- $node->_[HtmlNode::HDOM_INFO_ENDSPACE] = $space[0];
787
- }
788
-
789
- $rest = ($this->char === '>') ? '' : $this->copy_until_char('>');
790
- $rest = ($trim) ? trim($rest) : $rest; // <html / >
791
-
792
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
793
-
794
- if (trim($rest) === '/') { // Void element
795
- if ($rest !== '') {
796
- if (isset($node->_[HtmlNode::HDOM_INFO_ENDSPACE])) {
797
- $node->_[HtmlNode::HDOM_INFO_ENDSPACE] .= $rest;
798
- } else {
799
- $node->_[HtmlNode::HDOM_INFO_ENDSPACE] = $rest;
800
- }
801
- }
802
- $node->_[HtmlNode::HDOM_INFO_END] = 0;
803
- } elseif (!isset($this->self_closing_tags[strtolower($node->tag)])) {
804
- $innertext = $this->copy_until_char('<');
805
- if ($innertext !== '') {
806
- $node->_[HtmlNode::HDOM_INFO_INNER] = $innertext;
807
- }
808
- $this->parent = $node;
809
- }
810
-
811
- if ($node->tag === 'br') {
812
- $node->_[HtmlNode::HDOM_INFO_INNER] = $this->default_br_text;
813
- } elseif ($node->tag === 'script') {
814
- $data = '';
815
-
816
- // There is a rare chance of empty script: "<script></script>"
817
- // In which case the current char is the start of the end tag
818
- // But the script could also just contain tags: "<script><div></script>"
819
- while(true) {
820
- // Copy until first char of end tag
821
- $data .= $this->copy_until_char('<');
822
-
823
- // Look ahead in the document, maybe we are at the end
824
- if (($this->pos + 9) > $this->size) { // End of document
825
- Debug::log('Source document ended unexpectedly!');
826
- break;
827
- } elseif (substr($this->doc, $this->pos, 8) === '</script') { // end
828
- $this->skip('>'); // don't include the end tag
829
- break;
830
- }
831
-
832
- // Note: A script tag may contain any other tag except </script>
833
- // which needs to be escaped as <\/script>
834
-
835
- $data .= $this->char;
836
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
837
- }
838
-
839
- $node = new HtmlNode($this);
840
- ++$this->cursor;
841
- $node->_[HtmlNode::HDOM_INFO_TEXT] = $data;
842
- $this->link_nodes($node, false);
843
- }
844
-
845
- return true;
846
- }
847
-
848
- protected function parse_attr($node, $name, &$space, $trim)
849
- {
850
- $is_duplicate = isset($node->attr[$name]);
851
-
852
- if (!$is_duplicate) // Copy whitespace between "=" and value
853
- $space[2] = (strpos($this->token_blank, $this->char) === false) ? '' : $this->copy_skip($this->token_blank);
854
-
855
- switch ($this->char) {
856
- case '"':
857
- $quote_type = HtmlNode::HDOM_QUOTE_DOUBLE;
858
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
859
- $value = $this->copy_until_char('"');
860
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
861
- break;
862
- case '\'':
863
- // phpcs:ignore Generic.Files.LineLength
864
- Debug::log_once('Source document contains attribute values with single quotes (<e attribute=\'value\'>). Use double quotes for best performance.');
865
- $quote_type = HtmlNode::HDOM_QUOTE_SINGLE;
866
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
867
- $value = $this->copy_until_char('\'');
868
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
869
- break;
870
- default:
871
- // phpcs:ignore Generic.Files.LineLength
872
- Debug::log_once('Source document contains attribute values without quotes (<e attribute=value>). Use double quotes for best performance');
873
- $quote_type = HtmlNode::HDOM_QUOTE_NO;
874
- $value = $this->copy_until($this->token_attr);
875
- }
876
-
877
- $value = $this->restore_noise($value);
878
-
879
- if ($trim) {
880
- // Attribute values must not contain control characters other than space
881
- // https://www.w3.org/TR/html/dom.html#text-content
882
- // https://www.w3.org/TR/html/syntax.html#attribute-values
883
- // https://www.w3.org/TR/xml/#AVNormalize
884
- $value = preg_replace("/[\r\n\t\s]+/u", ' ', $value);
885
- $value = trim($value);
886
- }
887
-
888
- if (!$is_duplicate) {
889
- if ($quote_type !== HtmlNode::HDOM_QUOTE_DOUBLE) {
890
- $node->_[HtmlNode::HDOM_INFO_QUOTE][$name] = $quote_type;
891
- }
892
- $node->attr[$name] = $value;
893
- }
894
- }
895
-
896
- protected function link_nodes(&$node, $is_child)
897
- {
898
- $node->parent = $this->parent;
899
- $this->parent->nodes[] = $node;
900
- if ($is_child) {
901
- $this->parent->children[] = $node;
902
- }
903
- }
904
-
905
- protected function as_text_node($tag)
906
- {
907
- $node = new HtmlNode($this);
908
- ++$this->cursor;
909
- $node->_[HtmlNode::HDOM_INFO_TEXT] = '</' . $tag . '>';
910
- $this->link_nodes($node, false);
911
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
912
- return true;
913
- }
914
-
915
- protected function skip($chars)
916
- {
917
- $this->pos += strspn($this->doc, $chars, $this->pos);
918
- $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
919
- }
920
-
921
- protected function copy_skip($chars)
922
- {
923
- $pos = $this->pos;
924
- $len = strspn($this->doc, $chars, $pos);
925
- if ($len === 0) { return ''; }
926
- $this->pos += $len;
927
- $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
928
- return substr($this->doc, $pos, $len);
929
- }
930
-
931
- protected function copy_until($chars)
932
- {
933
- $pos = $this->pos;
934
- $len = strcspn($this->doc, $chars, $pos);
935
- $this->pos += $len;
936
- $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
937
- return substr($this->doc, $pos, $len);
938
- }
939
-
940
- protected function copy_until_char($char)
941
- {
942
- if ($this->char === null) { return ''; }
943
-
944
- if (($pos = strpos($this->doc, $char, $this->pos)) === false) {
945
- $ret = substr($this->doc, $this->pos, $this->size - $this->pos);
946
- $this->char = null;
947
- $this->pos = $this->size;
948
- return $ret;
949
- }
950
-
951
- if ($pos === $this->pos) { return ''; }
952
-
953
- $pos_old = $this->pos;
954
- $this->char = $this->doc[$pos];
955
- $this->pos = $pos;
956
- return substr($this->doc, $pos_old, $pos - $pos_old);
957
- }
958
-
959
- protected function remove_noise($pattern, $remove_tag = false)
960
- {
961
- $count = preg_match_all(
962
- $pattern,
963
- $this->doc,
964
- $matches,
965
- PREG_SET_ORDER | PREG_OFFSET_CAPTURE
966
- );
967
-
968
- for ($i = $count - 1; $i > -1; --$i) {
969
- $key = '___noise___' . sprintf('% 5d', count($this->noise) + 1000);
970
-
971
- $idx = ($remove_tag) ? 0 : 1; // 0 = entire match, 1 = submatch
972
- $this->noise[$key] = $matches[$i][$idx][0];
973
- $this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
974
- }
975
-
976
- // reset the length of content
977
- $this->size = strlen($this->doc);
978
-
979
- if ($this->size > 0) {
980
- $this->char = $this->doc[0];
981
- }
982
- }
983
-
984
- function restore_noise($text)
985
- {
986
- if (empty($this->noise)) return $text; // nothing to restore
987
- $pos = 0;
988
- while (($pos = strpos($text, '___noise___', $pos)) !== false) {
989
- // Sometimes there is a broken piece of markup, and we don't GET the
990
- // pos+11 etc... token which indicates a problem outside of us...
991
-
992
- // todo: "___noise___1000" (or any number with four or more digits)
993
- // in the DOM causes an infinite loop which could be utilized by
994
- // malicious software
995
- if (strlen($text) > $pos + 15) {
996
- $key = '___noise___'
997
- . $text[$pos + 11]
998
- . $text[$pos + 12]
999
- . $text[$pos + 13]
1000
- . $text[$pos + 14]
1001
- . $text[$pos + 15];
1002
-
1003
- if (isset($this->noise[$key])) {
1004
- $text = substr($text, 0, $pos)
1005
- . $this->noise[$key]
1006
- . substr($text, $pos + 16);
1007
-
1008
- unset($this->noise[$key]);
1009
- } else {
1010
- Debug::log_once('Noise restoration failed. DOM has been corrupted!');
1011
- // do this to prevent an infinite loop.
1012
- // FIXME: THis causes an infinite loop because the keyword ___NOISE___ is included in the key!
1013
- $text = substr($text, 0, $pos)
1014
- . 'UNDEFINED NOISE FOR KEY: '
1015
- . $key
1016
- . substr($text, $pos + 16);
1017
- }
1018
- } else {
1019
- // There is no valid key being given back to us... We must get
1020
- // rid of the ___noise___ or we will have a problem.
1021
- Debug::log_once('Noise restoration failed. The provided key is incomplete: ' . $text);
1022
- $text = substr($text, 0, $pos)
1023
- . 'NO NUMERIC NOISE KEY'
1024
- . substr($text, $pos + 11);
1025
- }
1026
- }
1027
- return $text;
1028
- }
1029
-
1030
- function search_noise($text)
1031
- {
1032
- foreach($this->noise as $noiseElement) {
1033
- if (strpos($noiseElement, $text) !== false) {
1034
- return $noiseElement;
1035
- }
1036
- }
1037
- }
1038
-
1039
- function __toString()
1040
- {
1041
- return $this->root->innertext();
1042
- }
1043
-
1044
- function __get($name)
1045
- {
1046
- switch ($name) {
1047
- case 'outertext':
1048
- return $this->root->innertext();
1049
- case 'innertext':
1050
- return $this->root->innertext();
1051
- case 'plaintext':
1052
- return $this->root->text();
1053
- case 'charset':
1054
- return $this->_charset;
1055
- case 'target_charset':
1056
- return $this->_target_charset;
1057
- }
1058
- }
1059
-
1060
- function childNodes($idx = -1)
1061
- {
1062
- return $this->root->childNodes($idx);
1063
- }
1064
-
1065
- function firstChild()
1066
- {
1067
- return $this->root->firstChild();
1068
- }
1069
-
1070
- function lastChild()
1071
- {
1072
- return $this->root->lastChild();
1073
- }
1074
-
1075
- function createElement($name, $value = null)
1076
- {
1077
- $node = new HtmlNode(null);
1078
- $node->nodetype = HtmlNode::HDOM_TYPE_ELEMENT;
1079
- $node->_[HtmlNode::HDOM_INFO_BEGIN] = 1;
1080
- $node->_[HtmlNode::HDOM_INFO_END] = 1;
1081
-
1082
- if ($value !== null) {
1083
- $node->_[HtmlNode::HDOM_INFO_INNER] = $value;
1084
- }
1085
-
1086
- $node->tag = $name;
1087
-
1088
- return $node;
1089
- }
1090
-
1091
- function createTextNode($value)
1092
- {
1093
- $node = new HtmlNode($this);
1094
- $node->nodetype = HtmlNode::HDOM_TYPE_TEXT;
1095
-
1096
- if ($value !== null) {
1097
- $node->_[HtmlNode::HDOM_INFO_TEXT] = $value;
1098
- }
1099
-
1100
- return $node;
1101
- }
1102
-
1103
- function getElementById($id)
1104
- {
1105
- return $this->find("#$id", 0);
1106
- }
1107
-
1108
- function getElementsById($id, $idx = null)
1109
- {
1110
- return $this->find("#$id", $idx);
1111
- }
1112
-
1113
- function getElementByTagName($name)
1114
- {
1115
- return $this->find($name, 0);
1116
- }
1117
-
1118
- function getElementsByTagName($name, $idx = null)
1119
- {
1120
- return $this->find($name, $idx);
1121
- }
1122
-
1123
- function loadFile($file)
1124
- {
1125
- $args = func_get_args();
1126
-
1127
- if(($doc = call_user_func_array('file_get_contents', $args)) !== false) {
1128
- $this->load($doc, true);
1129
- } else {
1130
- return false;
1131
- }
1132
- }
1133
- }
1
+ <?php namespace simplehtmldom;
2
+
3
+ /**
4
+ * Website: http://sourceforge.net/projects/simplehtmldom/
5
+ * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
6
+ *
7
+ * Licensed under The MIT License
8
+ * See the LICENSE file in the project root for more information.
9
+ *
10
+ * Authors:
11
+ * S.C. Chen
12
+ * John Schlick
13
+ * Rus Carroll
14
+ * logmanoriginal
15
+ *
16
+ * Contributors:
17
+ * Yousuke Kumakura
18
+ * Vadim Voituk
19
+ * Antcs
20
+ *
21
+ * Version $Rev$
22
+ */
23
+
24
+ include_once 'constants.php';
25
+ include_once 'HtmlNode.php';
26
+ include_once 'Debug.php';
27
+
28
+ class HtmlDocument
29
+ {
30
+ public $root = null;
31
+ public $nodes = array();
32
+ public $callback = null;
33
+ public $lowercase = false;
34
+ public $original_size;
35
+ public $size;
36
+
37
+ protected $pos;
38
+ protected $doc;
39
+ protected $char;
40
+
41
+ protected $cursor;
42
+ protected $parent;
43
+ protected $noise = array();
44
+ protected $token_blank = " \t\r\n";
45
+ protected $token_equal = ' =/>';
46
+ protected $token_slash = " />\r\n\t";
47
+ protected $token_attr = ' >';
48
+
49
+ public $_charset = '';
50
+ public $_target_charset = '';
51
+
52
+ public $default_br_text = '';
53
+ public $default_span_text = '';
54
+
55
+ protected $self_closing_tags = array(
56
+ 'area' => 1,
57
+ 'base' => 1,
58
+ 'br' => 1,
59
+ 'col' => 1,
60
+ 'embed' => 1,
61
+ 'hr' => 1,
62
+ 'img' => 1,
63
+ 'input' => 1,
64
+ 'link' => 1,
65
+ 'meta' => 1,
66
+ 'param' => 1,
67
+ 'source' => 1,
68
+ 'track' => 1,
69
+ 'wbr' => 1
70
+ );
71
+ protected $block_tags = array(
72
+ 'body' => 1,
73
+ 'div' => 1,
74
+ 'form' => 1,
75
+ 'root' => 1,
76
+ 'span' => 1,
77
+ 'table' => 1
78
+ );
79
+ protected $optional_closing_tags = array(
80
+ // Not optional, see
81
+ // https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element
82
+ 'b' => array('b' => 1),
83
+ 'dd' => array('dd' => 1, 'dt' => 1),
84
+ // Not optional, see
85
+ // https://www.w3.org/TR/html/grouping-content.html#the-dl-element
86
+ 'dl' => array('dd' => 1, 'dt' => 1),
87
+ 'dt' => array('dd' => 1, 'dt' => 1),
88
+ 'li' => array('li' => 1),
89
+ 'optgroup' => array('optgroup' => 1, 'option' => 1),
90
+ 'option' => array('optgroup' => 1, 'option' => 1),
91
+ 'p' => array('p' => 1),
92
+ 'rp' => array('rp' => 1, 'rt' => 1),
93
+ 'rt' => array('rp' => 1, 'rt' => 1),
94
+ 'td' => array('td' => 1, 'th' => 1),
95
+ 'th' => array('td' => 1, 'th' => 1),
96
+ 'tr' => array('td' => 1, 'th' => 1, 'tr' => 1),
97
+ );
98
+
99
+ function __call($func, $args)
100
+ {
101
+ // Allow users to call methods with lower_case syntax
102
+ switch($func)
103
+ {
104
+ case 'load_file':
105
+ $actual_function = 'loadFile'; break;
106
+ case 'clear': return; /* no-op */
107
+ default:
108
+ trigger_error(
109
+ 'Call to undefined method ' . __CLASS__ . '::' . $func . '()',
110
+ E_USER_ERROR
111
+ );
112
+ }
113
+
114
+ // phpcs:ignore Generic.Files.LineLength
115
+ Debug::log(__CLASS__ . '->' . $func . '() has been deprecated and will be removed in the next major version of simplehtmldom. Use ' . __CLASS__ . '->' . $actual_function . '() instead.');
116
+
117
+ return call_user_func_array(array($this, $actual_function), $args);
118
+ }
119
+
120
+ function __construct(
121
+ $str = null,
122
+ $lowercase = true,
123
+ $forceTagsClosed = true,
124
+ $target_charset = DEFAULT_TARGET_CHARSET,
125
+ $stripRN = true,
126
+ $defaultBRText = DEFAULT_BR_TEXT,
127
+ $defaultSpanText = DEFAULT_SPAN_TEXT,
128
+ $options = 0)
129
+ {
130
+ if ($str) {
131
+ if (preg_match('/^http:\/\//i', $str) || is_file($str)) {
132
+ $this->load_file($str);
133
+ } else {
134
+ $this->load(
135
+ $str,
136
+ $lowercase,
137
+ $stripRN,
138
+ $defaultBRText,
139
+ $defaultSpanText,
140
+ $options
141
+ );
142
+ }
143
+ } else {
144
+ $this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText);
145
+ }
146
+ // Forcing tags to be closed implies that we don't trust the html, but
147
+ // it can lead to parsing errors if we SHOULD trust the html.
148
+ if (!$forceTagsClosed) {
149
+ $this->optional_closing_array = array();
150
+ }
151
+
152
+ $this->_target_charset = $target_charset;
153
+ }
154
+
155
+ function __debugInfo()
156
+ {
157
+ return array(
158
+ 'root' => $this->root,
159
+ 'noise' => empty($this->noise) ? 'none' : $this->noise,
160
+ 'charset' => $this->_charset,
161
+ 'target charset' => $this->_target_charset,
162
+ 'original size' => $this->original_size
163
+ );
164
+ }
165
+
166
+ function __destruct()
167
+ {
168
+ if (isset($this->nodes)) {
169
+ foreach ($this->nodes as $n) {
170
+ $n->clear();
171
+ }
172
+ }
173
+ }
174
+
175
+ function load(
176
+ $str,
177
+ $lowercase = true,
178
+ $stripRN = true,
179
+ $defaultBRText = DEFAULT_BR_TEXT,
180
+ $defaultSpanText = DEFAULT_SPAN_TEXT,
181
+ $options = 0)
182
+ {
183
+ // prepare
184
+ $this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText);
185
+
186
+ if ($stripRN) {
187
+ // Temporarily remove any element that shouldn't loose whitespace
188
+ $this->remove_noise("'<\s*script[^>]*>(.*?)<\s*/\s*script\s*>'is");
189
+ $this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is");
190
+ $this->remove_noise("'<!--(?!>|\->)(.*?)-->'is");
191
+ $this->remove_noise("'<\s*style[^>]*>(.*?)<\s*/\s*style\s*>'is");
192
+ $this->remove_noise("'<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>'is");
193
+
194
+ // Edited By Speed Booster Pack. Do not minify html!
195
+ // Remove whitespace and newlines between tags
196
+ $this->doc = preg_replace('/\>([\t\s]*[\r\n]^[\t\s]*)\</m', '><', $this->doc);
197
+
198
+ // Remove whitespace and newlines in text
199
+ $this->doc = preg_replace('/([\t\s]*[\r\n]^[\t\s]*)/m', ' ', $this->doc);
200
+
201
+ // Restore temporarily removed elements and calculate new size
202
+ $this->doc = $this->restore_noise($this->doc);
203
+ $this->size = strlen($this->doc);
204
+ }
205
+
206
+ $this->remove_noise("'(<\?)(.*?)(\?>)'s", true); // server-side script
207
+ if (count($this->noise)) {
208
+ // phpcs:ignore Generic.Files.LineLength
209
+ Debug::log('Support for server-side scripts has been deprecated and will be removed in the next major version of simplehtmldom.');
210
+ }
211
+
212
+ if($options & HDOM_SMARTY_AS_TEXT) { // Strip Smarty scripts
213
+ $this->remove_noise("'(\{\w)(.*?)(\})'s", true);
214
+ // phpcs:ignore Generic.Files.LineLength
215
+ Debug::log('Support for Smarty scripts has been deprecated and will be removed in the next major version of simplehtmldom.');
216
+ }
217
+
218
+ // parsing
219
+ $this->parse($stripRN);
220
+ // end
221
+ $this->root->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
222
+ $this->parse_charset();
223
+ $this->decode();
224
+ unset($this->doc);
225
+
226
+ // make load function chainable
227
+ return $this;
228
+ }
229
+
230
+ function set_callback($function_name)
231
+ {
232
+ $this->callback = $function_name;
233
+ }
234
+
235
+ function remove_callback()
236
+ {
237
+ $this->callback = null;
238
+ }
239
+
240
+ function save($filepath = '')
241
+ {
242
+ $ret = $this->root->innertext();
243
+ if ($filepath !== '') { file_put_contents($filepath, $ret, LOCK_EX); }
244
+ return $ret;
245
+ }
246
+
247
+ function find($selector, $idx = null, $lowercase = false)
248
+ {
249
+ return $this->root->find($selector, $idx, $lowercase);
250
+ }
251
+
252
+ function expect($selector, $idx = null, $lowercase = false)
253
+ {
254
+ return $this->root->expect($selector, $idx, $lowercase);
255
+ }
256
+
257
+ /** @codeCoverageIgnore */
258
+ function dump($show_attr = true)
259
+ {
260
+ $this->root->dump($show_attr);
261
+ }
262
+
263
+ protected function prepare(
264
+ $str, $lowercase = true,
265
+ $defaultBRText = DEFAULT_BR_TEXT,
266
+ $defaultSpanText = DEFAULT_SPAN_TEXT)
267
+ {
268
+ $this->clear();
269
+
270
+ $this->doc = trim($str);
271
+ $this->size = strlen($this->doc);
272
+ $this->original_size = $this->size; // original size of the html
273
+ $this->pos = 0;
274
+ $this->cursor = 1;
275
+ $this->noise = array();
276
+ $this->nodes = array();
277
+ $this->lowercase = $lowercase;
278
+ $this->default_br_text = $defaultBRText;
279
+ $this->default_span_text = $defaultSpanText;
280
+ $this->root = new HtmlNode($this);
281
+ $this->root->tag = 'root';
282
+ $this->root->_[HtmlNode::HDOM_INFO_BEGIN] = -1;
283
+ $this->root->nodetype = HtmlNode::HDOM_TYPE_ROOT;
284
+ $this->parent = $this->root;
285
+ if ($this->size > 0) { $this->char = $this->doc[0]; }
286
+ }
287
+
288
+ protected function decode()
289
+ {
290
+ foreach($this->nodes as $node) {
291
+ if (isset($node->_[HtmlNode::HDOM_INFO_TEXT])) {
292
+ $node->_[HtmlNode::HDOM_INFO_TEXT] = html_entity_decode(
293
+ $this->restore_noise($node->_[HtmlNode::HDOM_INFO_TEXT]),
294
+ ENT_QUOTES | ENT_HTML5,
295
+ $this->_target_charset
296
+ );
297
+ }
298
+ if (isset($node->_[HtmlNode::HDOM_INFO_INNER])) {
299
+ $node->_[HtmlNode::HDOM_INFO_INNER] = html_entity_decode(
300
+ $this->restore_noise($node->_[HtmlNode::HDOM_INFO_INNER]),
301
+ ENT_QUOTES | ENT_HTML5,
302
+ $this->_target_charset
303
+ );
304
+ }
305
+ if (isset($node->attr) && is_array($node->attr)) {
306
+ foreach($node->attr as $a => $v) {
307
+ if ($v === true) continue;
308
+ $node->attr[$a] = html_entity_decode(
309
+ $v,
310
+ ENT_QUOTES | ENT_HTML5,
311
+ $this->_target_charset
312
+ );
313
+ }
314
+ }
315
+ }
316
+ }
317
+
318
+ protected function parse($trim = false)
319
+ {
320
+ while (true) {
321
+
322
+ if ($this->char !== '<') {
323
+ $content = $this->copy_until_char('<');
324
+
325
+ if ($content !== '') {
326
+
327
+ // Skip whitespace between tags? (</a> <b>)
328
+ if ($trim && trim($content) === '') {
329
+ continue;
330
+ }
331
+
332
+ $node = new HtmlNode($this);
333
+ ++$this->cursor;
334
+ $node->_[HtmlNode::HDOM_INFO_TEXT] = $content;
335
+ $this->link_nodes($node, false);
336
+
337
+ }
338
+ }
339
+
340
+ if($this->read_tag($trim) === false) {
341
+ break;
342
+ }
343
+ }
344
+ }
345
+
346
+ protected function parse_charset()
347
+ {
348
+ $charset = null;
349
+
350
+ if (function_exists('get_last_retrieve_url_contents_content_type')) {
351
+ $contentTypeHeader = get_last_retrieve_url_contents_content_type();
352
+ $success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches);
353
+ if ($success) {
354
+ $charset = $matches[1];
355
+ }
356
+
357
+ // phpcs:ignore Generic.Files.LineLength
358
+ Debug::log('Determining charset using get_last_retrieve_url_contents_content_type() ' . ($success ? 'successful' : 'failed'));
359
+ }
360
+
361
+ if (empty($charset)) {
362
+ // https://www.w3.org/TR/html/document-metadata.html#statedef-http-equiv-content-type
363
+ $el = $this->root->find('meta[http-equiv=Content-Type]', 0, true);
364
+
365
+ if (!empty($el)) {
366
+ $fullvalue = $el->content;
367
+
368
+ if (!empty($fullvalue)) {
369
+ $success = preg_match(
370
+ '/charset=(.+)/i',
371
+ $fullvalue,
372
+ $matches
373
+ );
374
+
375
+ if ($success) {
376
+ $charset = $matches[1];
377
+ }
378
+ }
379
+ }
380
+ }
381
+
382
+ if (empty($charset)) {
383
+ // https://www.w3.org/TR/html/document-metadata.html#character-encoding-declaration
384
+ if ($meta = $this->root->find('meta[charset]', 0)) {
385
+ $charset = $meta->charset;
386
+ }
387
+ }
388
+
389
+ if (empty($charset)) {
390
+ // Try to guess the charset based on the content
391
+ // Requires Multibyte String (mbstring) support (optional)
392
+ if (function_exists('mb_detect_encoding')) {
393
+ /**
394
+ * mb_detect_encoding() is not intended to distinguish between
395
+ * charsets, especially single-byte charsets. Its primary
396
+ * purpose is to detect which multibyte encoding is in use,
397
+ * i.e. UTF-8, UTF-16, shift-JIS, etc.
398
+ *
399
+ * -- https://bugs.php.net/bug.php?id=38138
400
+ *
401
+ * Adding both CP1251/ISO-8859-5 and CP1252/ISO-8859-1 will
402
+ * always result in CP1251/ISO-8859-5 and vice versa.
403
+ *
404
+ * Thus, only detect if it's either UTF-8 or CP1252/ISO-8859-1
405
+ * to stay compatible.
406
+ */
407
+ $encoding = mb_detect_encoding(
408
+ $this->doc,
409
+ array( 'UTF-8', 'CP1252', 'ISO-8859-1' )
410
+ );
411
+
412
+ if ($encoding === 'CP1252' || $encoding === 'ISO-8859-1') {
413
+ // Due to a limitation of mb_detect_encoding
414
+ // 'CP1251'/'ISO-8859-5' will be detected as
415
+ // 'CP1252'/'ISO-8859-1'. This will cause iconv to fail, in
416
+ // which case we can simply assume it is the other charset.
417
+ if (!@iconv('CP1252', 'UTF-8', $this->doc)) {
418
+ $encoding = 'CP1251';
419
+ }
420
+ }
421
+
422
+ if ($encoding !== false) {
423
+ $charset = $encoding;
424
+ }
425
+ }
426
+ }
427
+
428
+ if (empty($charset)) {
429
+ Debug::log('Unable to determine charset from source document. Assuming UTF-8');
430
+ $charset = 'UTF-8';
431
+ }
432
+
433
+ // Since CP1252 is a superset, if we get one of it's subsets, we want
434
+ // it instead.
435
+ if ((strtolower($charset) == 'iso-8859-1')
436
+ || (strtolower($charset) == 'latin1')
437
+ || (strtolower($charset) == 'latin-1')) {
438
+ $charset = 'CP1252';
439
+ }
440
+
441
+ return $this->_charset = $charset;
442
+ }
443
+
444
+ protected function read_tag($trim)
445
+ {
446
+ if ($this->char !== '<') { // End Of File
447
+ $this->root->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
448
+
449
+ // We might be in a nest of unclosed elements for which the end tags
450
+ // can be omitted. Close them for faster seek operations.
451
+ do {
452
+ if (isset($this->optional_closing_tags[strtolower($this->parent->tag)])) {
453
+ $this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
454
+ }
455
+ } while ($this->parent = $this->parent->parent);
456
+
457
+ return false;
458
+ }
459
+
460
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
461
+
462
+ if ($trim) { // "< /html>"
463
+ $this->skip($this->token_blank);
464
+ }
465
+
466
+ // End tag: https://dev.w3.org/html5/pf-summary/syntax.html#end-tags
467
+ if ($this->char === '/') {
468
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
469
+
470
+ $tag = $this->copy_until_char('>');
471
+ $tag = $trim ? ltrim($tag, $this->token_blank) : $tag;
472
+
473
+ // Skip attributes and whitespace in end tags
474
+ if ($trim && ($pos = strpos($tag, ' ')) !== false) {
475
+ // phpcs:ignore Generic.Files.LineLength
476
+ Debug::log_once('Source document contains superfluous whitespace in end tags (</html >).');
477
+ $tag = substr($tag, 0, $pos);
478
+ }
479
+
480
+ if (strcasecmp($this->parent->tag, $tag)) { // Parent is not start tag
481
+ $parent_lower = strtolower($this->parent->tag);
482
+ $tag_lower = strtolower($tag);
483
+ if (isset($this->optional_closing_tags[$parent_lower]) && isset($this->block_tags[$tag_lower])) {
484
+ $org_parent = $this->parent;
485
+
486
+ // Look for the start tag
487
+ while (($this->parent->parent) && strtolower($this->parent->tag) !== $tag_lower){
488
+ // Close any unclosed element with optional end tags
489
+ if (isset($this->optional_closing_tags[strtolower($this->parent->tag)]))
490
+ $this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
491
+ $this->parent = $this->parent->parent;
492
+ }
493
+
494
+ // No start tag, close grandparent
495
+ if (strtolower($this->parent->tag) !== $tag_lower) {
496
+ $this->parent = $org_parent;
497
+
498
+ if ($this->parent->parent) {
499
+ $this->parent = $this->parent->parent;
500
+ }
501
+
502
+ $this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
503
+ return $this->as_text_node($tag);
504
+ }
505
+ } elseif (($this->parent->parent) && isset($this->block_tags[$tag_lower])) {
506
+ // grandparent exists + current is block tag
507
+ // Parent has no end tag
508
+ $this->parent->_[HtmlNode::HDOM_INFO_END] = 0;
509
+ $org_parent = $this->parent;
510
+
511
+ // Find start tag
512
+ while (($this->parent->parent) && strtolower($this->parent->tag) !== $tag_lower) {
513
+ $this->parent = $this->parent->parent;
514
+ }
515
+
516
+ // No start tag, close parent
517
+ if (strtolower($this->parent->tag) !== $tag_lower) {
518
+ $this->parent = $org_parent; // restore origonal parent
519
+ $this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
520
+ return $this->as_text_node($tag);
521
+ }
522
+ } elseif (($this->parent->parent) && strtolower($this->parent->parent->tag) === $tag_lower) {
523
+ // Grandparent exists and current tag closes it
524
+ $this->parent->_[HtmlNode::HDOM_INFO_END] = 0;
525
+ $this->parent = $this->parent->parent;
526
+ } else { // Random tag, add as text node
527
+ return $this->as_text_node($tag);
528
+ }
529
+ }
530
+
531
+ // Link with start tag
532
+ $this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
533
+
534
+ if ($this->parent->parent) {
535
+ $this->parent = $this->parent->parent;
536
+ }
537
+
538
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
539
+ return true;
540
+ }
541
+
542
+ // Start tag: https://dev.w3.org/html5/pf-summary/syntax.html#start-tags
543
+ $node = new HtmlNode($this);
544
+ $node->_[HtmlNode::HDOM_INFO_BEGIN] = $this->cursor++;
545
+
546
+ // Tag name
547
+ $tag = $this->copy_until($this->token_slash);
548
+
549
+ if (isset($tag[0]) && $tag[0] === '!') { // Doctype, CData, Comment
550
+ if (isset($tag[2]) && $tag[1] === '-' && $tag[2] === '-') { // Comment ("<!--")
551
+
552
+ /**
553
+ * Comments must have the following format:
554
+ *
555
+ * 1. The string "<!--"
556
+ *
557
+ * 2. Optionally, text, with the additional restriction that the
558
+ * text must not start with the string ">", nor start with the
559
+ * string "->", nor contain the strings "<!--", "-->", or "--!>",
560
+ * nor end with the string "<!-".
561
+ *
562
+ * 3. The string "-->"
563
+ *
564
+ * -- https://www.w3.org/TR/html53/syntax.html#comments
565
+ */
566
+
567
+ // Go back until $tag only contains start of comment "!--".
568
+ while (strlen($tag) > 3) {
569
+ $this->char = $this->doc[--$this->pos]; // previous
570
+ $tag = substr($tag, 0, strlen($tag) - 1);
571
+ }
572
+
573
+ $node->nodetype = HtmlNode::HDOM_TYPE_COMMENT;
574
+ $node->tag = 'comment';
575
+
576
+ $data = '';
577
+
578
+ while(true) {
579
+ // Copy until first char of end tag
580
+ $data .= $this->copy_until_char('-');
581
+
582
+ // Look ahead in the document, maybe we are at the end
583
+ if (($this->pos + 3) > $this->size) { // End of document
584
+ Debug::log('Source document ended unexpectedly!');
585
+ break;
586
+ } elseif (substr($this->doc, $this->pos, 3) === '-->') { // end
587
+ $data .= $this->copy_until_char('>');
588
+ break;
589
+ }
590
+
591
+ $data .= $this->char;
592
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
593
+ }
594
+
595
+ if (substr($data, 0, 1) === '>') { // "<!-->"
596
+ Debug::log('Comment must not start with the string ">"!');
597
+ $this->pos -= strlen($data);
598
+ $this->char = $this->doc[$this->pos];
599
+ $data = '';
600
+ }
601
+
602
+ if (substr($data, 0, 2) === '->') { // "<!--->"
603
+ Debug::log('Comment must not start with the string "->"!');
604
+ $this->pos -= strlen($data);
605
+ $this->char = $this->doc[$this->pos];
606
+ $data = '';
607
+ }
608
+
609
+ if (strpos($data, '<!--') !== false) { // "<!--<!---->"
610
+ Debug::log('Comment must not contain the string "<!--"!');
611
+ // simplehtmldom can work with it anyway
612
+ }
613
+
614
+ if (strpos($data, '--!>') !== false) { // "<!----!>-->"
615
+ Debug::log('Comment must not contain the string "--!>"!');
616
+ // simplehtmldom can work with it anyway
617
+ }
618
+
619
+ if (substr($data, -3, 3) === '<!-') { // "<!--<!--->"
620
+ Debug::log('Comment must not end with "<!-"!');
621
+ // simplehtmldom can work with it anyway
622
+ }
623
+
624
+ $tag .= $data;
625
+ $tag = $this->restore_noise($tag);
626
+
627
+ // Comment starts after "!--" and ends before "--" (5 chars total)
628
+ $node->_[HtmlNode::HDOM_INFO_INNER] = substr($tag, 3, strlen($tag) - 5);
629
+ } elseif (substr($tag, 1, 7) === '[CDATA[') {
630
+
631
+ // Go back until $tag only contains start of cdata "![CDATA[".
632
+ while (strlen($tag) > 8) {
633
+ $this->char = $this->doc[--$this->pos]; // previous
634
+ $tag = substr($tag, 0, strlen($tag) - 1);
635
+ }
636
+
637
+ // CDATA can contain HTML stuff, need to find closing tags first
638
+ $node->nodetype = HtmlNode::HDOM_TYPE_CDATA;
639
+ $node->tag = 'cdata';
640
+
641
+ $data = '';
642
+
643
+ // There is a rare chance of empty CDATA: "<[CDATA[]]>"
644
+ // In which case the current char is the first "[" of the end tag
645
+ // But the CDATA could also just be a bracket: "<[CDATA[]]]>"
646
+ while(true) {
647
+ // Copy until first char of end tag
648
+ $data .= $this->copy_until_char(']');
649
+
650
+ // Look ahead in the document, maybe we are at the end
651
+ if (($this->pos + 3) > $this->size) { // End of document
652
+ Debug::log('Source document ended unexpectedly!');
653
+ break;
654
+ } elseif (substr($this->doc, $this->pos, 3) === ']]>') { // end
655
+ $data .= $this->copy_until_char('>');
656
+ break;
657
+ }
658
+
659
+ $data .= $this->char;
660
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
661
+ }
662
+
663
+ $tag .= $data;
664
+ $tag = $this->restore_noise($tag);
665
+
666
+ // CDATA starts after "![CDATA[" and ends before "]]" (10 chars total)
667
+ $node->_[HtmlNode::HDOM_INFO_INNER] = substr($tag, 8, strlen($tag) - 10);
668
+ } else { // Unknown
669
+ Debug::log('Source document contains unknown declaration: <' . $tag);
670
+ $node->nodetype = HtmlNode::HDOM_TYPE_UNKNOWN;
671
+ $node->tag = 'unknown';
672
+ }
673
+
674
+ $node->_[HtmlNode::HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until_char('>');
675
+
676
+ if ($this->char === '>') {
677
+ $node->_[HtmlNode::HDOM_INFO_TEXT] .= '>';
678
+ }
679
+
680
+ $this->link_nodes($node, true);
681
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
682
+ return true;
683
+ }
684
+
685
+ if (!preg_match('/^\w[\w:-]*$/', $tag)) { // Invalid tag name
686
+ $node->_[HtmlNode::HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>');
687
+
688
+ if ($this->char === '>') { // End tag
689
+ $node->_[HtmlNode::HDOM_INFO_TEXT] .= '>';
690
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
691
+ }
692
+
693
+ $this->link_nodes($node, false);
694
+ Debug::log('Source document contains invalid tag name: ' . $node->_[HtmlNode::HDOM_INFO_TEXT]);
695
+ return true;
696
+ }
697
+
698
+ // Valid tag name
699
+ $node->nodetype = HtmlNode::HDOM_TYPE_ELEMENT;
700
+ $tag_lower = strtolower($tag);
701
+ $node->tag = ($this->lowercase) ? $tag_lower : $tag;
702
+
703
+ if (isset($this->optional_closing_tags[$tag_lower])) { // Optional closing tag
704
+ while (isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)])) {
705
+ // Previous element was the last element of ancestor
706
+ $this->parent->_[HtmlNode::HDOM_INFO_END] = $node->_[HtmlNode::HDOM_INFO_BEGIN] - 1;
707
+ $this->parent = $this->parent->parent;
708
+ }
709
+ $node->parent = $this->parent;
710
+ }
711
+
712
+ $guard = 0; // prevent infinity loop
713
+
714
+ // [0] Space between tag and first attribute
715
+ $space = array($this->copy_skip($this->token_blank), '', '');
716
+
717
+ do { // Parse attributes
718
+ $name = $this->copy_until($this->token_equal);
719
+
720
+ if ($name === '' && $this->char !== null && $space[0] === '') {
721
+ break;
722
+ }
723
+
724
+ if ($guard === $this->pos) { // Escape infinite loop
725
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
726
+ continue;
727
+ }
728
+
729
+ $guard = $this->pos;
730
+
731
+ if ($this->pos >= $this->size - 1 && $this->char !== '>') { // End Of File
732
+ Debug::log('Source document ended unexpectedly!');
733
+ $node->nodetype = HtmlNode::HDOM_TYPE_TEXT;
734
+ $node->_[HtmlNode::HDOM_INFO_END] = 0;
735
+ $node->_[HtmlNode::HDOM_INFO_TEXT] = '<' . $tag . $space[0] . $name;
736
+ $node->tag = 'text';
737
+ $this->link_nodes($node, false);
738
+ return true;
739
+ }
740
+
741
+ if ($name === '/' || $name === '') { // No more attributes
742
+ break;
743
+ }
744
+
745
+ // [1] Whitespace after attribute name
746
+ $space[1] = (strpos($this->token_blank, $this->char) === false) ? '' : $this->copy_skip($this->token_blank);
747
+
748
+ $name = $this->restore_noise($name); // might be a noisy name
749
+
750
+ if ($this->lowercase) {
751
+ $name = strtolower($name);
752
+ }
753
+
754
+ if ($this->char === '=') { // Attribute with value
755
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
756
+ $this->parse_attr($node, $name, $space, $trim); // get attribute value
757
+ } else { // Attribute without value
758
+ $node->_[HtmlNode::HDOM_INFO_QUOTE][$name] = HtmlNode::HDOM_QUOTE_NO;
759
+ $node->attr[$name] = true;
760
+ if ($this->char !== '>') {
761
+ $this->char = $this->doc[--$this->pos];
762
+ } // prev
763
+ }
764
+
765
+ // Space before attribute and around equal sign
766
+ if (!$trim && $space !== array(' ', '', '')) {
767
+ // phpcs:ignore Generic.Files.LineLength
768
+ Debug::log_once('Source document contains superfluous whitespace in attributes (<e attribute = "value">). Enable trimming or fix attribute spacing for best performance.');
769
+ $node->_[HtmlNode::HDOM_INFO_SPACE][$name] = $space;
770
+ }
771
+
772
+ // prepare for next attribute
773
+ $space = array(
774
+ ((strpos($this->token_blank, $this->char) === false) ? '' : $this->copy_skip($this->token_blank)),
775
+ '',
776
+ ''
777
+ );
778
+ } while ($this->char !== '>' && $this->char !== '/');
779
+
780
+ $this->link_nodes($node, true);
781
+
782
+ // Space after last attribute before closing the tag
783
+ if (!$trim && $space[0] !== '') {
784
+ // phpcs:ignore Generic.Files.LineLength
785
+ Debug::log_once('Source document contains superfluous whitespace before the closing braket (<e attribute="value" >). Enable trimming or remove spaces before closing brackets for best performance.');
786
+ $node->_[HtmlNode::HDOM_INFO_ENDSPACE] = $space[0];
787
+ }
788
+
789
+ $rest = ($this->char === '>') ? '' : $this->copy_until_char('>');
790
+ $rest = ($trim) ? trim($rest) : $rest; // <html / >
791
+
792
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
793
+
794
+ if (trim($rest) === '/') { // Void element
795
+ if ($rest !== '') {
796
+ if (isset($node->_[HtmlNode::HDOM_INFO_ENDSPACE])) {
797
+ $node->_[HtmlNode::HDOM_INFO_ENDSPACE] .= $rest;
798
+ } else {
799
+ $node->_[HtmlNode::HDOM_INFO_ENDSPACE] = $rest;
800
+ }
801
+ }
802
+ $node->_[HtmlNode::HDOM_INFO_END] = 0;
803
+ } elseif (!isset($this->self_closing_tags[strtolower($node->tag)])) {
804
+ $innertext = $this->copy_until_char('<');
805
+ if ($innertext !== '') {
806
+ $node->_[HtmlNode::HDOM_INFO_INNER] = $innertext;
807
+ }
808
+ $this->parent = $node;
809
+ }
810
+
811
+ if ($node->tag === 'br') {
812
+ $node->_[HtmlNode::HDOM_INFO_INNER] = $this->default_br_text;
813
+ } elseif ($node->tag === 'script') {
814
+ $data = '';
815
+
816
+ // There is a rare chance of empty script: "<script></script>"
817
+ // In which case the current char is the start of the end tag
818
+ // But the script could also just contain tags: "<script><div></script>"
819
+ while(true) {
820
+ // Copy until first char of end tag
821
+ $data .= $this->copy_until_char('<');
822
+
823
+ // Look ahead in the document, maybe we are at the end
824
+ if (($this->pos + 9) > $this->size) { // End of document
825
+ Debug::log('Source document ended unexpectedly!');
826
+ break;
827
+ } elseif (substr($this->doc, $this->pos, 8) === '</script') { // end
828
+ $this->skip('>'); // don't include the end tag
829
+ break;
830
+ }
831
+
832
+ // Note: A script tag may contain any other tag except </script>
833
+ // which needs to be escaped as <\/script>
834
+
835
+ $data .= $this->char;
836
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
837
+ }
838
+
839
+ $node = new HtmlNode($this);
840
+ ++$this->cursor;
841
+ $node->_[HtmlNode::HDOM_INFO_TEXT] = $data;
842
+ $this->link_nodes($node, false);
843
+ }
844
+
845
+ return true;
846
+ }
847
+
848
+ protected function parse_attr($node, $name, &$space, $trim)
849
+ {
850
+ $is_duplicate = isset($node->attr[$name]);
851
+
852
+ if (!$is_duplicate) // Copy whitespace between "=" and value
853
+ $space[2] = (strpos($this->token_blank, $this->char) === false) ? '' : $this->copy_skip($this->token_blank);
854
+
855
+ switch ($this->char) {
856
+ case '"':
857
+ $quote_type = HtmlNode::HDOM_QUOTE_DOUBLE;
858
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
859
+ $value = $this->copy_until_char('"');
860
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
861
+ break;
862
+ case '\'':
863
+ // phpcs:ignore Generic.Files.LineLength
864
+ Debug::log_once('Source document contains attribute values with single quotes (<e attribute=\'value\'>). Use double quotes for best performance.');
865
+ $quote_type = HtmlNode::HDOM_QUOTE_SINGLE;
866
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
867
+ $value = $this->copy_until_char('\'');
868
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
869
+ break;
870
+ default:
871
+ // phpcs:ignore Generic.Files.LineLength
872
+ Debug::log_once('Source document contains attribute values without quotes (<e attribute=value>). Use double quotes for best performance');
873
+ $quote_type = HtmlNode::HDOM_QUOTE_NO;
874
+ $value = $this->copy_until($this->token_attr);
875
+ }
876
+
877
+ $value = $this->restore_noise($value);
878
+
879
+ if ($trim) {
880
+ // Attribute values must not contain control characters other than space
881
+ // https://www.w3.org/TR/html/dom.html#text-content
882
+ // https://www.w3.org/TR/html/syntax.html#attribute-values
883
+ // https://www.w3.org/TR/xml/#AVNormalize
884
+ $value = preg_replace("/[\r\n\t\s]+/u", ' ', $value);
885
+ $value = trim($value);
886
+ }
887
+
888
+ if (!$is_duplicate) {
889
+ if ($quote_type !== HtmlNode::HDOM_QUOTE_DOUBLE) {
890
+ $node->_[HtmlNode::HDOM_INFO_QUOTE][$name] = $quote_type;
891
+ }
892
+ $node->attr[$name] = $value;
893
+ }
894
+ }
895
+
896
+ protected function link_nodes(&$node, $is_child)
897
+ {
898
+ $node->parent = $this->parent;
899
+ $this->parent->nodes[] = $node;
900
+ if ($is_child) {
901
+ $this->parent->children[] = $node;
902
+ }
903
+ }
904
+
905
+ protected function as_text_node($tag)
906
+ {
907
+ $node = new HtmlNode($this);
908
+ ++$this->cursor;
909
+ $node->_[HtmlNode::HDOM_INFO_TEXT] = '</' . $tag . '>';
910
+ $this->link_nodes($node, false);
911
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
912
+ return true;
913
+ }
914
+
915
+ protected function skip($chars)
916
+ {
917
+ $this->pos += strspn($this->doc, $chars, $this->pos);
918
+ $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
919
+ }
920
+
921
+ protected function copy_skip($chars)
922
+ {
923
+ $pos = $this->pos;
924
+ $len = strspn($this->doc, $chars, $pos);
925
+ if ($len === 0) { return ''; }
926
+ $this->pos += $len;
927
+ $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
928
+ return substr($this->doc, $pos, $len);
929
+ }
930
+
931
+ protected function copy_until($chars)
932
+ {
933
+ $pos = $this->pos;
934
+ $len = strcspn($this->doc, $chars, $pos);
935
+ $this->pos += $len;
936
+ $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
937
+ return substr($this->doc, $pos, $len);
938
+ }
939
+
940
+ protected function copy_until_char($char)
941
+ {
942
+ if ($this->char === null) { return ''; }
943
+
944
+ if (($pos = strpos($this->doc, $char, $this->pos)) === false) {
945
+ $ret = substr($this->doc, $this->pos, $this->size - $this->pos);
946
+ $this->char = null;
947
+ $this->pos = $this->size;
948
+ return $ret;
949
+ }
950
+
951
+ if ($pos === $this->pos) { return ''; }
952
+
953
+ $pos_old = $this->pos;
954
+ $this->char = $this->doc[$pos];
955
+ $this->pos = $pos;
956
+ return substr($this->doc, $pos_old, $pos - $pos_old);
957
+ }
958
+
959
+ protected function remove_noise($pattern, $remove_tag = false)
960
+ {
961
+ $count = preg_match_all(
962
+ $pattern,
963
+ $this->doc,
964
+ $matches,
965
+ PREG_SET_ORDER | PREG_OFFSET_CAPTURE
966
+ );
967
+
968
+ for ($i = $count - 1; $i > -1; --$i) {
969
+ $key = '___noise___' . sprintf('% 5d', count($this->noise) + 1000);
970
+
971
+ $idx = ($remove_tag) ? 0 : 1; // 0 = entire match, 1 = submatch
972
+ $this->noise[$key] = $matches[$i][$idx][0];
973
+ $this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
974
+ }
975
+
976
+ // reset the length of content
977
+ $this->size = strlen($this->doc);
978
+
979
+ if ($this->size > 0) {
980
+ $this->char = $this->doc[0];
981
+ }
982
+ }
983
+
984
+ function restore_noise($text)
985
+ {
986
+ if (empty($this->noise)) return $text; // nothing to restore
987
+ $pos = 0;
988
+ while (($pos = strpos($text, '___noise___', $pos)) !== false) {
989
+ // Sometimes there is a broken piece of markup, and we don't GET the
990
+ // pos+11 etc... token which indicates a problem outside of us...
991
+
992
+ // todo: "___noise___1000" (or any number with four or more digits)
993
+ // in the DOM causes an infinite loop which could be utilized by
994
+ // malicious software
995
+ if (strlen($text) > $pos + 15) {
996
+ $key = '___noise___'
997
+ . $text[$pos + 11]
998
+ . $text[$pos + 12]
999
+ . $text[$pos + 13]
1000
+ . $text[$pos + 14]
1001
+ . $text[$pos + 15];
1002
+
1003
+ if (isset($this->noise[$key])) {
1004
+ $text = substr($text, 0, $pos)
1005
+ . $this->noise[$key]
1006
+ . substr($text, $pos + 16);
1007
+
1008
+ unset($this->noise[$key]);
1009
+ } else {
1010
+ Debug::log_once('Noise restoration failed. DOM has been corrupted!');
1011
+ // do this to prevent an infinite loop.
1012
+ // FIXME: THis causes an infinite loop because the keyword ___NOISE___ is included in the key!
1013
+ $text = substr($text, 0, $pos)
1014
+ . 'UNDEFINED NOISE FOR KEY: '
1015
+ . $key
1016
+ . substr($text, $pos + 16);
1017
+ }
1018
+ } else {
1019
+ // There is no valid key being given back to us... We must get
1020
+ // rid of the ___noise___ or we will have a problem.
1021
+ Debug::log_once('Noise restoration failed. The provided key is incomplete: ' . $text);
1022
+ $text = substr($text, 0, $pos)
1023
+ . 'NO NUMERIC NOISE KEY'
1024
+ . substr($text, $pos + 11);
1025
+ }
1026
+ }
1027
+ return $text;
1028
+ }
1029
+
1030
+ function search_noise($text)
1031
+ {
1032
+ foreach($this->noise as $noiseElement) {
1033
+ if (strpos($noiseElement, $text) !== false) {
1034
+ return $noiseElement;
1035
+ }
1036
+ }
1037
+ }
1038
+
1039
+ function __toString()
1040
+ {
1041
+ return $this->root->innertext();
1042
+ }
1043
+
1044
+ function __get($name)
1045
+ {
1046
+ switch ($name) {
1047
+ case 'outertext':
1048
+ return $this->root->innertext();
1049
+ case 'innertext':
1050
+ return $this->root->innertext();
1051
+ case 'plaintext':
1052
+ return $this->root->text();
1053
+ case 'charset':
1054
+ return $this->_charset;
1055
+ case 'target_charset':
1056
+ return $this->_target_charset;
1057
+ }
1058
+ }
1059
+
1060
+ function childNodes($idx = -1)
1061
+ {
1062
+ return $this->root->childNodes($idx);
1063
+ }
1064
+
1065
+ function firstChild()
1066
+ {
1067
+ return $this->root->firstChild();
1068
+ }
1069
+
1070
+ function lastChild()
1071
+ {
1072
+ return $this->root->lastChild();
1073
+ }
1074
+
1075
+ function createElement($name, $value = null)
1076
+ {
1077
+ $node = new HtmlNode(null);
1078
+ $node->nodetype = HtmlNode::HDOM_TYPE_ELEMENT;
1079
+ $node->_[HtmlNode::HDOM_INFO_BEGIN] = 1;
1080
+ $node->_[HtmlNode::HDOM_INFO_END] = 1;
1081
+
1082
+ if ($value !== null) {
1083
+ $node->_[HtmlNode::HDOM_INFO_INNER] = $value;
1084
+ }
1085
+
1086
+ $node->tag = $name;
1087
+
1088
+ return $node;
1089
+ }
1090
+
1091
+ function createTextNode($value)
1092
+ {
1093
+ $node = new HtmlNode($this);
1094
+ $node->nodetype = HtmlNode::HDOM_TYPE_TEXT;
1095
+
1096
+ if ($value !== null) {
1097
+ $node->_[HtmlNode::HDOM_INFO_TEXT] = $value;
1098
+ }
1099
+
1100
+ return $node;
1101
+ }
1102
+
1103
+ function getElementById($id)
1104
+ {
1105
+ return $this->find("#$id", 0);
1106
+ }
1107
+
1108
+ function getElementsById($id, $idx = null)
1109
+ {
1110
+ return $this->find("#$id", $idx);
1111
+ }
1112
+
1113
+ function getElementByTagName($name)
1114
+ {
1115
+ return $this->find($name, 0);
1116
+ }
1117
+
1118
+ function getElementsByTagName($name, $idx = null)
1119
+ {
1120
+ return $this->find($name, $idx);
1121
+ }
1122
+
1123
+ function loadFile($file)
1124
+ {
1125
+ $args = func_get_args();
1126
+
1127
+ if(($doc = call_user_func_array('file_get_contents', $args)) !== false) {
1128
+ $this->load($doc, true);
1129
+ } else {
1130
+ return false;
1131
+ }
1132
+ }
1133
+ }
vendor/simplehtmldom/simplehtmldom/HtmlNode.php CHANGED
@@ -1,1441 +1,1441 @@
1
- <?php namespace simplehtmldom;
2
-
3
- /**
4
- * Website: http://sourceforge.net/projects/simplehtmldom/
5
- * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
6
- *
7
- * Licensed under The MIT License
8
- * See the LICENSE file in the project root for more information.
9
- *
10
- * Authors:
11
- * S.C. Chen
12
- * John Schlick
13
- * Rus Carroll
14
- * logmanoriginal
15
- *
16
- * Contributors:
17
- * Yousuke Kumakura
18
- * Vadim Voituk
19
- * Antcs
20
- *
21
- * Version $Rev$
22
- */
23
-
24
- include_once 'constants.php';
25
- include_once 'Debug.php';
26
-
27
- class HtmlNode
28
- {
29
- const HDOM_TYPE_ELEMENT = 1;
30
- const HDOM_TYPE_COMMENT = 2;
31
- const HDOM_TYPE_TEXT = 3;
32
- const HDOM_TYPE_ROOT = 5;
33
- const HDOM_TYPE_UNKNOWN = 6;
34
- const HDOM_TYPE_CDATA = 7;
35
-
36
- const HDOM_QUOTE_DOUBLE = 0;
37
- const HDOM_QUOTE_SINGLE = 1;
38
- const HDOM_QUOTE_NO = 3;
39
-
40
- const HDOM_INFO_BEGIN = 0;
41
- const HDOM_INFO_END = 1;
42
- const HDOM_INFO_QUOTE = 2;
43
- const HDOM_INFO_SPACE = 3;
44
- const HDOM_INFO_TEXT = 4;
45
- const HDOM_INFO_INNER = 5;
46
- const HDOM_INFO_OUTER = 6;
47
- const HDOM_INFO_ENDSPACE = 7;
48
-
49
- public $nodetype = self::HDOM_TYPE_TEXT;
50
- public $tag = 'text';
51
- public $attr = array();
52
- public $children = array();
53
- public $nodes = array();
54
- public $parent = null;
55
- public $_ = array();
56
- private $dom = null;
57
-
58
- function __call($func, $args)
59
- {
60
- // Allow users to call methods with lower_case syntax
61
- switch($func)
62
- {
63
- case 'children':
64
- $actual_function = 'childNodes'; break;
65
- case 'first_child':
66
- $actual_function = 'firstChild'; break;
67
- case 'has_child':
68
- $actual_function = 'hasChildNodes'; break;
69
- case 'last_child':
70
- $actual_function = 'lastChild'; break;
71
- case 'next_sibling':
72
- $actual_function = 'nextSibling'; break;
73
- case 'prev_sibling':
74
- $actual_function = 'previousSibling'; break;
75
- default:
76
- trigger_error(
77
- 'Call to undefined method ' . __CLASS__ . '::' . $func . '()',
78
- E_USER_ERROR
79
- );
80
- }
81
-
82
- // phpcs:ignore Generic.Files.LineLength
83
- Debug::log(__CLASS__ . '->' . $func . '() has been deprecated and will be removed in the next major version of simplehtmldom. Use ' . __CLASS__ . '->' . $actual_function . '() instead.');
84
-
85
- return call_user_func_array(array($this, $actual_function), $args);
86
- }
87
-
88
- function __construct($dom)
89
- {
90
- if ($dom === null) return $this;
91
-
92
- $this->dom = $dom;
93
- $dom->nodes[] = $this;
94
- }
95
-
96
- function __debugInfo()
97
- {
98
- // Translate node type to human-readable form
99
- switch($this->nodetype)
100
- {
101
- case self::HDOM_TYPE_ELEMENT:
102
- $nodetype = "HDOM_TYPE_ELEMENT ($this->nodetype)";
103
- break;
104
- case self::HDOM_TYPE_COMMENT:
105
- $nodetype = "HDOM_TYPE_COMMENT ($this->nodetype)";
106
- break;
107
- case self::HDOM_TYPE_TEXT:
108
- $nodetype = "HDOM_TYPE_TEXT ($this->nodetype)";
109
- break;
110
- case self::HDOM_TYPE_ROOT:
111
- $nodetype = "HDOM_TYPE_ROOT ($this->nodetype)";
112
- break;
113
- case self::HDOM_TYPE_CDATA:
114
- $nodetype = "HDOM_TYPE_CDATA ($this->nodetype)";
115
- break;
116
- case self::HDOM_TYPE_UNKNOWN:
117
- default:
118
- $nodetype = "HDOM_TYPE_UNKNOWN ($this->nodetype)";
119
- }
120
-
121
- return array(
122
- 'nodetype' => $nodetype,
123
- 'tag' => $this->tag,
124
- 'attributes' => empty($this->attr) ? 'none' : $this->attr,
125
- 'nodes' => empty($this->nodes) ? 'none' : $this->nodes
126
- );
127
- }
128
-
129
- function __toString()
130
- {
131
- return $this->outertext();
132
- }
133
-
134
- function clear()
135
- {
136
- unset($this->dom); // Break link to origin
137
- unset($this->parent); // Break link to branch
138
- }
139
-
140
- /** @codeCoverageIgnore */
141
- function dump($show_attr = true, $depth = 0)
142
- {
143
- echo str_repeat("\t", $depth) . $this->tag;
144
-
145
- if ($show_attr && count($this->attr) > 0) {
146
- echo '(';
147
- foreach ($this->attr as $k => $v) {
148
- echo "[$k]=>\"$v\", ";
149
- }
150
- echo ')';
151
- }
152
-
153
- echo "\n";
154
-
155
- if ($this->nodes) {
156
- foreach ($this->nodes as $node) {
157
- $node->dump($show_attr, $depth + 1);
158
- }
159
- }
160
- }
161
-
162
- /** @codeCoverageIgnore */
163
- function dump_node($echo = true)
164
- {
165
- $string = $this->tag;
166
-
167
- if (count($this->attr) > 0) {
168
- $string .= '(';
169
- foreach ($this->attr as $k => $v) {
170
- $string .= "[$k]=>\"$v\", ";
171
- }
172
- $string .= ')';
173
- }
174
-
175
- if (count($this->_) > 0) {
176
- $string .= ' $_ (';
177
- foreach ($this->_ as $k => $v) {
178
- if (is_array($v)) {
179
- $string .= "[$k]=>(";
180
- foreach ($v as $k2 => $v2) {
181
- $string .= "[$k2]=>\"$v2\", ";
182
- }
183
- $string .= ')';
184
- } else {
185
- $string .= "[$k]=>\"$v\", ";
186
- }
187
- }
188
- $string .= ')';
189
- }
190
-
191
- if (isset($this->text)) {
192
- $string .= " text: ({$this->text})";
193
- }
194
-
195
- $string .= ' HDOM_INNER_INFO: ';
196
-
197
- if (isset($node->_[self::HDOM_INFO_INNER])) {
198
- $string .= "'" . $node->_[self::HDOM_INFO_INNER] . "'";
199
- } else {
200
- $string .= ' NULL ';
201
- }
202
-
203
- $string .= ' children: ' . count($this->children);
204
- $string .= ' nodes: ' . count($this->nodes);
205
- $string .= "\n";
206
-
207
- if ($echo) {
208
- echo $string;
209
- return;
210
- } else {
211
- return $string;
212
- }
213
- }
214
-
215
- function parent($parent = null)
216
- {
217
- // I am SURE that this doesn't work properly.
218
- // It fails to unset the current node from it's current parents nodes or
219
- // children list first.
220
- if ($parent !== null) {
221
- $this->parent = $parent;
222
- $this->parent->nodes[] = $this;
223
- $this->parent->children[] = $this;
224
- }
225
-
226
- return $this->parent;
227
- }
228
-
229
- function find_ancestor_tag($tag)
230
- {
231
- if ($this->parent === null) return null;
232
-
233
- $ancestor = $this->parent;
234
-
235
- while (!is_null($ancestor)) {
236
- if ($ancestor->tag === $tag) {
237
- break;
238
- }
239
-
240
- $ancestor = $ancestor->parent;
241
- }
242
-
243
- return $ancestor;
244
- }
245
-
246
- function innertext()
247
- {
248
- if (isset($this->_[self::HDOM_INFO_INNER])) {
249
- $ret = $this->_[self::HDOM_INFO_INNER];
250
- } elseif (isset($this->_[self::HDOM_INFO_TEXT])) {
251
- $ret = $this->_[self::HDOM_INFO_TEXT];
252
- } else {
253
- $ret = '';
254
- }
255
-
256
- foreach ($this->nodes as $n) {
257
- $ret .= $n->outertext();
258
- }
259
-
260
- return $this->convert_text($ret);
261
- }
262
-
263
- function outertext()
264
- {
265
- if ($this->tag === 'root') {
266
- return $this->innertext();
267
- }
268
-
269
- // todo: What is the use of this callback? Remove?
270
- if ($this->dom && $this->dom->callback !== null) {
271
- call_user_func_array($this->dom->callback, array($this));
272
- }
273
-
274
- if (isset($this->_[self::HDOM_INFO_OUTER])) {
275
- return $this->convert_text($this->_[self::HDOM_INFO_OUTER]);
276
- }
277
-
278
- if (isset($this->_[self::HDOM_INFO_TEXT])) {
279
- return $this->convert_text($this->_[self::HDOM_INFO_TEXT]);
280
- }
281
-
282
- $ret = '';
283
-
284
- if (isset($this->_[self::HDOM_INFO_BEGIN])) {
285
- $ret = $this->makeup();
286
- }
287
-
288
- if (isset($this->_[self::HDOM_INFO_INNER])) {
289
- // todo: <br> should either never have self::HDOM_INFO_INNER or always
290
- if ($this->tag !== 'br') {
291
- $ret .= $this->_[self::HDOM_INFO_INNER];
292
- }
293
- }
294
-
295
- if ($this->nodes) {
296
- foreach ($this->nodes as $n) {
297
- $ret .= $n->outertext();
298
- }
299
- }
300
-
301
- if (isset($this->_[self::HDOM_INFO_END]) && $this->_[self::HDOM_INFO_END] != 0) {
302
- $ret .= '</' . $this->tag . '>';
303
- }
304
-
305
- return $this->convert_text($ret);
306
- }
307
-
308
- /**
309
- * Returns true if the provided element is a block level element
310
- * @link https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php
311
- */
312
- protected function is_block_element($node)
313
- {
314
- // todo: When we have the utility class this should be moved there
315
- return in_array(strtolower($node->tag), array(
316
- 'p',
317
- 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
318
- 'ol', 'ul',
319
- 'pre',
320
- 'address',
321
- 'blockquote',
322
- 'dl',
323
- 'div',
324
- 'fieldset',
325
- 'form',
326
- 'hr',
327
- 'noscript',
328
- 'table'
329
- ));
330
- }
331
-
332
- /**
333
- * Returns true if the provided element is an inline level element
334
- * @link https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php
335
- */
336
- protected function is_inline_element($node)
337
- {
338
- // todo: When we have the utility class this should be moved there
339
- return in_array(strtolower($node->tag), array(
340
- 'b', 'big', 'i', 'small', 'tt',
341
- 'abbr', 'acronym', 'cite', 'code', 'dfn', 'em', 'kbd', 'strong', 'samp', 'var',
342
- 'a', 'bdo', 'br', 'img', 'map', 'object', 'q', 'script', 'span', 'sub', 'sup',
343
- 'button', 'input', 'label', 'select', 'textarea'
344
- ));
345
- }
346
-
347
- function text($trim = true)
348
- {
349
- $ret = '';
350
-
351
- if (strtolower($this->tag) === 'script') {
352
- $ret = '';
353
- } elseif (strtolower($this->tag) === 'style') {
354
- $ret = '';
355
- } elseif ($this->nodetype === self::HDOM_TYPE_COMMENT) {
356
- $ret = '';
357
- } elseif ($this->nodetype === self::HDOM_TYPE_CDATA) {
358
- $ret = $this->_[self::HDOM_INFO_INNER];
359
- } elseif ($this->nodetype === self::HDOM_TYPE_UNKNOWN) {
360
- $ret = '';
361
- } elseif (isset($this->_[self::HDOM_INFO_INNER])) {
362
- $ret = $this->_[self::HDOM_INFO_INNER];
363
- } elseif ($this->nodetype === self::HDOM_TYPE_TEXT) {
364
- $ret = $this->_[self::HDOM_INFO_TEXT];
365
- }
366
-
367
- if (is_null($this->nodes)) {
368
- return '';
369
- }
370
-
371
- foreach ($this->nodes as $n) {
372
- if ($this->is_block_element($n)) {
373
-
374
- $block = ltrim($this->convert_text($n->text(false)));
375
-
376
- if (empty($block))
377
- continue;
378
-
379
- $ret = rtrim($ret) . "\n\n" . $block;
380
-
381
- } elseif ($this->is_inline_element($n)) {
382
- // todo: <br> introduces code smell because no space but \n
383
- if (strtolower($n->tag) === 'br') {
384
- $ret .= $this->dom->default_br_text ?: DEFAULT_BR_TEXT;
385
- } else {
386
- $inline = ltrim($this->convert_text($n->text(false)));
387
-
388
- if (empty($inline))
389
- continue;
390
-
391
- $ret = $ret . $this->convert_text($n->text(false));
392
- }
393
- } else {
394
- $ret .= $this->convert_text($n->text(false));
395
- }
396
- }
397
-
398
- // Reduce whitespace at start/end to a single (or none) space
399
- $ret = preg_replace('/[ \t\n\r\0\x0B\xC2\xA0]+$/u', $trim ? '' : ' ', $ret);
400
- $ret = preg_replace('/^[ \t\n\r\0\x0B\xC2\xA0]+/u', $trim ? '' : ' ', $ret);
401
-
402
- return $ret;
403
- }
404
-
405
- function xmltext()
406
- {
407
- $ret = $this->innertext();
408
- $ret = str_ireplace('<![CDATA[', '', $ret);
409
- $ret = str_replace(']]>', '', $ret);
410
- return $ret;
411
- }
412
-
413
- function makeup()
414
- {
415
- // text, comment, unknown
416
- if (isset($this->_[self::HDOM_INFO_TEXT])) {
417
- return $this->_[self::HDOM_INFO_TEXT];
418
- }
419
-
420
- $ret = '<' . $this->tag;
421
-
422
- foreach ($this->attr as $key => $val) {
423
-
424
- // skip removed attribute
425
- if ($val === null || $val === false) { continue; }
426
-
427
- if (isset($this->_[self::HDOM_INFO_SPACE][$key])) {
428
- $ret .= $this->_[self::HDOM_INFO_SPACE][$key][0];
429
- } else {
430
- $ret .= ' ';
431
- }
432
-
433
- //no value attr: nowrap, checked selected...
434
- if ($val === true) {
435
- $ret .= $key;
436
- } else {
437
- if (isset($this->_[self::HDOM_INFO_QUOTE][$key])) {
438
- $quote_type = $this->_[self::HDOM_INFO_QUOTE][$key];
439
- } else {
440
- $quote_type = self::HDOM_QUOTE_DOUBLE;
441
- }
442
-
443
- switch ($quote_type)
444
- {
445
- case self::HDOM_QUOTE_SINGLE:
446
- $quote = '\'';
447
- $val = htmlentities($val, ENT_QUOTES, $this->dom->target_charset);
448
- break;
449
- case self::HDOM_QUOTE_NO:
450
- $quote = '';
451
- break;
452
- case self::HDOM_QUOTE_DOUBLE:
453
- default:
454
- $quote = '"';
455
- $val = htmlentities($val, ENT_COMPAT, $this->dom->target_charset);
456
- }
457
-
458
- $ret .= $key
459
- . (isset($this->_[self::HDOM_INFO_SPACE][$key]) ? $this->_[self::HDOM_INFO_SPACE][$key][1] : '')
460
- . '='
461
- . (isset($this->_[self::HDOM_INFO_SPACE][$key]) ? $this->_[self::HDOM_INFO_SPACE][$key][2] : '')
462
- . $quote
463
- . $val
464
- . $quote;
465
- }
466
- }
467
-
468
- if(isset($this->_[self::HDOM_INFO_ENDSPACE])) {
469
- $ret .= $this->_[self::HDOM_INFO_ENDSPACE];
470
- }
471
-
472
- return $ret . '>';
473
- }
474
-
475
- function find($selector, $idx = null, $lowercase = false)
476
- {
477
- $selectors = $this->parse_selector($selector);
478
- if (($count = count($selectors)) === 0) { return array(); }
479
- $found_keys = array();
480
-
481
- // find each selector
482
- for ($c = 0; $c < $count; ++$c) {
483
- // The change on the below line was documented on the sourceforge
484
- // code tracker id 2788009
485
- // used to be: if (($levle=count($selectors[0]))===0) return array();
486
- if (($levle = count($selectors[$c])) === 0) {
487
- Debug::log_once('Empty selector (' . $selector . ') matches nothing.');
488
- return array();
489
- }
490
-
491
- if (!isset($this->_[self::HDOM_INFO_BEGIN])) {
492
- Debug::log_once('Invalid operation. The current node has no start tag.');
493
- return array();
494
- }
495
-
496
- $head = array($this->_[self::HDOM_INFO_BEGIN] => 1);
497
- $cmd = ' '; // Combinator
498
-
499
- // handle descendant selectors, no recursive!
500
- for ($l = 0; $l < $levle; ++$l) {
501
- $ret = array();
502
-
503
- foreach ($head as $k => $v) {
504
- $n = ($k === -1) ? $this->dom->root : $this->dom->nodes[$k];
505
- //PaperG - Pass this optional parameter on to the seek function.
506
- $n->seek($selectors[$c][$l], $ret, $cmd, $lowercase);
507
- }
508
-
509
- $head = $ret;
510
- $cmd = $selectors[$c][$l][6]; // Next Combinator
511
- }
512
-
513
- foreach ($head as $k => $v) {
514
- if (!isset($found_keys[$k])) {
515
- $found_keys[$k] = 1;
516
- }
517
- }
518
- }
519
-
520
- // sort keys
521
- ksort($found_keys);
522
-
523
- $found = array();
524
- foreach ($found_keys as $k => $v) {
525
- $found[] = $this->dom->nodes[$k];
526
- }
527
-
528
- // return nth-element or array
529
- if (is_null($idx)) { return $found; }
530
- elseif ($idx < 0) { $idx = count($found) + $idx; }
531
- return (isset($found[$idx])) ? $found[$idx] : null;
532
- }
533
-
534
- function expect($selector, $idx = null, $lowercase = false)
535
- {
536
- return $this->find($selector, $idx, $lowercase) ?: null;
537
- }
538
-
539
- protected function seek($selector, &$ret, $parent_cmd, $lowercase = false)
540
- {
541
- list($ps_selector, $tag, $ps_element, $id, $class, $attributes, $cmb) = $selector;
542
- $nodes = array();
543
-
544
- if ($parent_cmd === ' ') { // Descendant Combinator
545
- // Find parent closing tag if the current element doesn't have a closing
546
- // tag (i.e. void element)
547
- $end = (!empty($this->_[self::HDOM_INFO_END])) ? $this->_[self::HDOM_INFO_END] : 0;
548
- if ($end == 0 && $this->parent) {
549
- $parent = $this->parent;
550
- while ($parent !== null && !isset($parent->_[self::HDOM_INFO_END])) {
551
- $end -= 1;
552
- $parent = $parent->parent;
553
- }
554
- $end += $parent->_[self::HDOM_INFO_END];
555
- }
556
-
557
- if ($end === 0) {
558
- $end = count($this->dom->nodes);
559
- }
560
-
561
- // Get list of target nodes
562
- $nodes_start = $this->_[self::HDOM_INFO_BEGIN] + 1;
563
-
564
- // remove() makes $this->dom->nodes non-contiguous; use what is left.
565
- $nodes = array_intersect_key(
566
- $this->dom->nodes,
567
- array_flip(range($nodes_start, $end))
568
- );
569
- } elseif ($parent_cmd === '>') { // Child Combinator
570
- $nodes = $this->children;
571
- } elseif ($parent_cmd === '+'
572
- && $this->parent
573
- && in_array($this, $this->parent->children)) { // Next-Sibling Combinator
574
- $index = array_search($this, $this->parent->children, true) + 1;
575
- if ($index < count($this->parent->children))
576
- $nodes[] = $this->parent->children[$index];
577
- } elseif ($parent_cmd === '~'
578
- && $this->parent
579
- && in_array($this, $this->parent->children)) { // Subsequent Sibling Combinator
580
- $index = array_search($this, $this->parent->children, true);
581
- $nodes = array_slice($this->parent->children, $index);
582
- }
583
-
584
- // Go throgh each element starting at this element until the end tag
585
- // Note: If this element is a void tag, any previous void element is
586
- // skipped.
587
- foreach($nodes as $node) {
588
- $pass = true;
589
-
590
- // Skip root nodes
591
- if(!$node->parent) {
592
- unset($node);
593
- continue;
594
- }
595
-
596
- // Handle 'text' selector
597
- if($pass && $tag === 'text') {
598
-
599
- if($node->tag === 'text') {
600
- $ret[array_search($node, $this->dom->nodes, true)] = 1;
601
- }
602
-
603
- if(isset($node->_[self::HDOM_INFO_INNER])) {
604
- $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
605
- }
606
-
607
- unset($node);
608
- continue;
609
-
610
- }
611
-
612
- // Handle 'cdata' selector
613
- if($pass && $tag === 'cdata') {
614
-
615
- if($node->tag === 'cdata') {
616
- $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
617
- }
618
-
619
- unset($node);
620
- continue;
621
-
622
- }
623
-
624
- // Handle 'comment'
625
- if($pass && $tag === 'comment' && $node->tag === 'comment') {
626
- $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
627
- unset($node);
628
- continue;
629
- }
630
-
631
- // Skip if node isn't a child node (i.e. text nodes)
632
- if($pass && !in_array($node, $node->parent->children, true)) {
633
- unset($node);
634
- continue;
635
- }
636
-
637
- // Skip if tag doesn't match
638
- if ($pass && $tag !== '' && $tag !== $node->tag && $tag !== '*') {
639
- $pass = false;
640
- }
641
-
642
- // Skip if ID doesn't exist
643
- if ($pass && $id !== '' && !isset($node->attr['id'])) {
644
- $pass = false;
645
- }
646
-
647
- // Check if ID matches
648
- if ($pass && $id !== '' && isset($node->attr['id'])) {
649
- // Note: Only consider the first ID (as browsers do)
650
- $node_id = explode(' ', trim($node->attr['id']))[0];
651
-
652
- if($id !== $node_id) { $pass = false; }
653
- }
654
-
655
- // Check if all class(es) exist
656
- if ($pass && $class !== '' && is_array($class) && !empty($class)) {
657
- if (isset($node->attr['class'])) {
658
- // Apply the same rules for the pattern and attribute value
659
- // Attribute values must not contain control characters other than space
660
- // https://www.w3.org/TR/html/dom.html#text-content
661
- // https://www.w3.org/TR/html/syntax.html#attribute-values
662
- // https://www.w3.org/TR/xml/#AVNormalize
663
- $node_classes = preg_replace("/[\r\n\t\s]+/u", ' ', $node->attr['class']);
664
- $node_classes = trim($node_classes);
665
- $node_classes = explode(' ', $node_classes);
666
-
667
- if ($lowercase) {
668
- $node_classes = array_map('strtolower', $node_classes);
669
- }
670
-
671
- foreach($class as $c) {
672
- if(!in_array($c, $node_classes)) {
673
- $pass = false;
674
- break;
675
- }
676
- }
677
- } else {
678
- $pass = false;
679
- }
680
- }
681
-
682
- // Check attributes
683
- if ($pass
684
- && $attributes !== ''
685
- && is_array($attributes)
686
- && !empty($attributes)) {
687
- foreach($attributes as $a) {
688
- list (
689
- $att_name,
690
- $att_expr,
691
- $att_val,
692
- $att_inv,
693
- $att_case_sensitivity
694
- ) = $a;
695
-
696
- // Handle indexing attributes (i.e. "[2]")
697
- /**
698
- * Note: This is not supported by the CSS Standard but adds
699
- * the ability to select items compatible to XPath (i.e.
700
- * the 3rd element within it's parent).
701
- *
702
- * Note: This doesn't conflict with the CSS Standard which
703
- * doesn't work on numeric attributes anyway.
704
- */
705
- if (is_numeric($att_name)
706
- && $att_expr === ''
707
- && $att_val === '') {
708
- $count = 0;
709
-
710
- // Find index of current element in parent
711
- foreach ($node->parent->children as $c) {
712
- if ($c->tag === $node->tag) ++$count;
713
- if ($c === $node) break;
714
- }
715
-
716
- // If this is the correct node, continue with next
717
- // attribute
718
- if ($count === (int)$att_name) continue;
719
- }
720
-
721
- // Check attribute availability
722
- if ($att_inv) { // Attribute should NOT be set
723
- if (isset($node->attr[$att_name])) {
724
- $pass = false;
725
- break;
726
- }
727
- } else { // Attribute should be set
728
- // todo: "plaintext" is not a valid CSS selector!
729
- if ($att_name !== 'plaintext'
730
- && !isset($node->attr[$att_name])) {
731
- $pass = false;
732
- break;
733
- }
734
- }
735
-
736
- // Continue with next attribute if expression isn't defined
737
- if ($att_expr === '') continue;
738
-
739
- // If they have told us that this is a "plaintext"
740
- // search then we want the plaintext of the node - right?
741
- // todo "plaintext" is not a valid CSS selector!
742
- if ($att_name === 'plaintext') {
743
- $nodeKeyValue = $node->text();
744
- } else {
745
- $nodeKeyValue = $node->attr[$att_name];
746
- }
747
-
748
- // If lowercase is set, do a case insensitive test of
749
- // the value of the selector.
750
- if ($lowercase) {
751
- $check = $this->match(
752
- $att_expr,
753
- strtolower($att_val),
754
- strtolower($nodeKeyValue),
755
- $att_case_sensitivity
756
- );
757
- } else {
758
- $check = $this->match(
759
- $att_expr,
760
- $att_val,
761
- $nodeKeyValue,
762
- $att_case_sensitivity
763
- );
764
- }
765
-
766
- $check = $ps_element === 'not' ? !$check : $check;
767
-
768
- if (!$check) {
769
- $pass = false;
770
- break;
771
- }
772
- }
773
- }
774
-
775
- // Found a match. Add to list and clear node
776
- $pass = $ps_selector === 'not' ? !$pass : $pass;
777
- if ($pass) $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
778
- unset($node);
779
- }
780
- }
781
-
782
- protected function match($exp, $pattern, $value, $case_sensitivity)
783
- {
784
- if ($case_sensitivity === 'i') {
785
- $pattern = strtolower($pattern);
786
- $value = strtolower($value);
787
- }
788
-
789
- // Apply the same rules for the pattern and attribute value
790
- // Attribute values must not contain control characters other than space
791
- // https://www.w3.org/TR/html/dom.html#text-content
792
- // https://www.w3.org/TR/html/syntax.html#attribute-values
793
- // https://www.w3.org/TR/xml/#AVNormalize
794
- $pattern = preg_replace("/[\r\n\t\s]+/u", ' ', $pattern);
795
- $pattern = trim($pattern);
796
-
797
- $value = preg_replace("/[\r\n\t\s]+/u", ' ', $value);
798
- $value = trim($value);
799
-
800
- switch ($exp) {
801
- case '=':
802
- return ($value === $pattern);
803
- case '!=':
804
- return ($value !== $pattern);
805
- case '^=':
806
- return preg_match('/^' . preg_quote($pattern, '/') . '/', $value);
807
- case '$=':
808
- return preg_match('/' . preg_quote($pattern, '/') . '$/', $value);
809
- case '*=':
810
- return preg_match('/' . preg_quote($pattern, '/') . '/', $value);
811
- case '|=':
812
- /**
813
- * [att|=val]
814
- *
815
- * Represents an element with the att attribute, its value
816
- * either being exactly "val" or beginning with "val"
817
- * immediately followed by "-" (U+002D).
818
- */
819
- return strpos($value, $pattern) === 0;
820
- case '~=':
821
- /**
822
- * [att~=val]
823
- *
824
- * Represents an element with the att attribute whose value is a
825
- * whitespace-separated list of words, one of which is exactly
826
- * "val". If "val" contains whitespace, it will never represent
827
- * anything (since the words are separated by spaces). Also if
828
- * "val" is the empty string, it will never represent anything.
829
- */
830
- return in_array($pattern, explode(' ', trim($value)), true);
831
- }
832
-
833
- Debug::log('Unhandled attribute selector: ' . $exp . '!');
834
- return false;
835
- }
836
-
837
- protected function parse_selector($selector_string)
838
- {
839
- /**
840
- * Pattern of CSS selectors, modified from mootools (https://mootools.net/)
841
- *
842
- * Paperg: Add the colon to the attribute, so that it properly finds
843
- * <tag attr:ibute="something" > like google does.
844
- *
845
- * Note: if you try to look at this attribute, you MUST use getAttribute
846
- * since $dom->x:y will fail the php syntax check.
847
- *
848
- * Notice the \[ starting the attribute? and the @? following? This
849
- * implies that an attribute can begin with an @ sign that is not
850
- * captured. This implies that an html attribute specifier may start
851
- * with an @ sign that is NOT captured by the expression. Farther study
852
- * is required to determine of this should be documented or removed.
853
- *
854
- * Matches selectors in this order:
855
- *
856
- * [0] - full match
857
- *
858
- * [1] - pseudo selector
859
- * (?:\:(\w+)\()?
860
- * Matches the pseudo selector (optional)
861
- *
862
- * [2] - tag name
863
- * ([\w:\*-]*)
864
- * Matches the tag name consisting of zero or more words, colons,
865
- * asterisks and hyphens.
866
- *
867
- * [3] - pseudo selector
868
- * (?:\:(\w+)\()?
869
- * Matches the pseudo selector (optional)
870
- *
871
- * [4] - id name
872
- * (?:\#([\w-]+))
873
- * Optionally matches a id name, consisting of an "#" followed by
874
- * the id name (one or more words and hyphens).
875
- *
876
- * [5] - class names (including dots)
877
- * (?:\.([\w\.-]+))?
878
- * Optionally matches a list of classs, consisting of an "."
879
- * followed by the class name (one or more words and hyphens)
880
- * where multiple classes can be chained (i.e. ".foo.bar.baz")
881
- *
882
- * [6] - attributes
883
- * ((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?
884
- * Optionally matches the attributes list
885
- *
886
- * [7] - separator
887
- * ([\/, >+~]+)
888
- * Matches the selector list separator
889
- */
890
- // phpcs:ignore Generic.Files.LineLength
891
- $pattern = "/(?:\:(\w+)\()?([\w:\*-]*)(?:\:(\w+)\()?(?:\#([\w-]+))?(?:|\.([\w\.-]+))?((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?(?:\))?(?:\))?([\/, >+~]+)/is";
892
-
893
- preg_match_all(
894
- $pattern,
895
- trim($selector_string) . ' ', // Add final ' ' as pseudo separator
896
- $matches,
897
- PREG_SET_ORDER
898
- );
899
-
900
- $selectors = array();
901
- $result = array();
902
-
903
- foreach ($matches as $m) {
904
- $m[0] = trim($m[0]);
905
-
906
- // Skip NoOps
907
- if ($m[0] === '' || $m[0] === '/' || $m[0] === '//') { continue; }
908
-
909
- array_shift($m);
910
-
911
- // Convert to lowercase
912
- if ($this->dom->lowercase) {
913
- $m[1] = strtolower($m[1]);
914
- }
915
-
916
- // Extract classes
917
- if ($m[4] !== '') { $m[4] = explode('.', $m[4]); }
918
-
919
- /* Extract attributes (pattern based on the pattern above!)
920
-
921
- * [0] - full match
922
- * [1] - attribute name
923
- * [2] - attribute expression
924
- * [3] - attribute value
925
- * [4] - case sensitivity
926
- *
927
- * Note: Attributes can be negated with a "!" prefix to their name
928
- */
929
- if($m[5] !== '') {
930
- preg_match_all(
931
- "/\[@?(!?[\w:-]+)(?:([!*^$|~]?=)[\"']?(.*?)[\"']?)?(?:\s+?([iIsS])?)?\]/is",
932
- trim($m[5]),
933
- $attributes,
934
- PREG_SET_ORDER
935
- );
936
-
937
- // Replace element by array
938
- $m[5] = array();
939
-
940
- foreach($attributes as $att) {
941
- // Skip empty matches
942
- if(trim($att[0]) === '') { continue; }
943
-
944
- $inverted = (isset($att[1][0]) && $att[1][0] === '!');
945
- $m[5][] = array(
946
- $inverted ? substr($att[1], 1) : $att[1], // Name
947
- (isset($att[2])) ? $att[2] : '', // Expression
948
- (isset($att[3])) ? $att[3] : '', // Value
949
- $inverted, // Inverted Flag
950
- (isset($att[4])) ? strtolower($att[4]) : '', // Case-Sensitivity
951
- );
952
- }
953
- }
954
-
955
- // Sanitize Separator
956
- if ($m[6] !== '' && trim($m[6]) === '') { // Descendant Separator
957
- $m[6] = ' ';
958
- } else { // Other Separator
959
- $m[6] = trim($m[6]);
960
- }
961
-
962
- // Clear Separator if it's a Selector List
963
- if ($is_list = ($m[6] === ',')) { $m[6] = ''; }
964
-
965
- $result[] = $m;
966
-
967
- if ($is_list) { // Selector List
968
- $selectors[] = $result;
969
- $result = array();
970
- }
971
- }
972
-
973
- if (count($result) > 0) { $selectors[] = $result; }
974
- return $selectors;
975
- }
976
-
977
- function __get($name)
978
- {
979
- if (isset($this->attr[$name])) {
980
- return $this->convert_text($this->attr[$name]);
981
- }
982
-
983
- switch ($name) {
984
- case 'outertext': return $this->outertext();
985
- case 'innertext': return $this->innertext();
986
- case 'plaintext': return $this->text();
987
- case 'xmltext': return $this->xmltext();
988
- }
989
-
990
- return false;
991
- }
992
-
993
- function __set($name, $value)
994
- {
995
- switch ($name) {
996
- case 'outertext':
997
- $this->_[self::HDOM_INFO_OUTER] = $value;
998
- break;
999
- case 'innertext':
1000
- if (isset($this->_[self::HDOM_INFO_TEXT])) {
1001
- $this->_[self::HDOM_INFO_TEXT] = '';
1002
- }
1003
- $this->_[self::HDOM_INFO_INNER] = $value;
1004
- break;
1005
- default: $this->attr[$name] = $value;
1006
- }
1007
- }
1008
-
1009
- function __isset($name)
1010
- {
1011
- switch ($name) {
1012
- case 'outertext': return true;
1013
- case 'innertext': return true;
1014
- case 'plaintext': return true;
1015
- }
1016
-
1017
- return isset($this->attr[$name]);
1018
- }
1019
-
1020
- function __unset($name)
1021
- {
1022
- if (isset($this->attr[$name])) { unset($this->attr[$name]); }
1023
- }
1024
-
1025
- function convert_text($text)
1026
- {
1027
- $converted_text = $text;
1028
-
1029
- $sourceCharset = '';
1030
- $targetCharset = '';
1031
-
1032
- if ($this->dom) {
1033
- $sourceCharset = strtoupper($this->dom->_charset);
1034
- $targetCharset = strtoupper($this->dom->_target_charset);
1035
- }
1036
-
1037
- if (!empty($sourceCharset) && !empty($targetCharset)) {
1038
- if (strtoupper($sourceCharset) === strtoupper($targetCharset)) {
1039
- $converted_text = $text;
1040
- } elseif ((strtoupper($targetCharset) === 'UTF-8') && (self::is_utf8($text))) {
1041
- Debug::log_once('The source charset was incorrectly detected as ' . $sourceCharset . ' but should have been UTF-8');
1042
- $converted_text = $text;
1043
- } else {
1044
- $converted_text = iconv($sourceCharset, $targetCharset, $text);
1045
- }
1046
- }
1047
-
1048
- // Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output.
1049
- if ($targetCharset === 'UTF-8') {
1050
- if (substr($converted_text, 0, 3) === "\xef\xbb\xbf") {
1051
- $converted_text = substr($converted_text, 3);
1052
- }
1053
-
1054
- if (substr($converted_text, -3) === "\xef\xbb\xbf") {
1055
- $converted_text = substr($converted_text, 0, -3);
1056
- }
1057
- }
1058
-
1059
- return $converted_text;
1060
- }
1061
-
1062
- static function is_utf8($str)
1063
- {
1064
- $c = 0; $b = 0;
1065
- $bits = 0;
1066
- $len = strlen($str);
1067
- for($i = 0; $i < $len; $i++) {
1068
- $c = ord($str[$i]);
1069
- if($c > 128) {
1070
- if(($c >= 254)) { return false; }
1071
- elseif($c >= 252) { $bits = 6; }
1072
- elseif($c >= 248) { $bits = 5; }
1073
- elseif($c >= 240) { $bits = 4; }
1074
- elseif($c >= 224) { $bits = 3; }
1075
- elseif($c >= 192) { $bits = 2; }
1076
- else { return false; }
1077
- if(($i + $bits) > $len) { return false; }
1078
- while($bits > 1) {
1079
- $i++;
1080
- $b = ord($str[$i]);
1081
- if($b < 128 || $b > 191) { return false; }
1082
- $bits--;
1083
- }
1084
- }
1085
- }
1086
- return true;
1087
- }
1088
-
1089
- function get_display_size()
1090
- {
1091
- $width = -1;
1092
- $height = -1;
1093
-
1094
- if ($this->tag !== 'img') {
1095
- return false;
1096
- }
1097
-
1098
- // See if there is aheight or width attribute in the tag itself.
1099
- if (isset($this->attr['width'])) {
1100
- $width = $this->attr['width'];
1101
- }
1102
-
1103
- if (isset($this->attr['height'])) {
1104
- $height = $this->attr['height'];
1105
- }
1106
-
1107
- // Now look for an inline style.
1108
- if (isset($this->attr['style'])) {
1109
- // Thanks to user gnarf from stackoverflow for this regular expression.
1110
- $attributes = array();
1111
-
1112
- preg_match_all(
1113
- '/([\w-]+)\s*:\s*([^;]+)\s*;?/',
1114
- $this->attr['style'],
1115
- $matches,
1116
- PREG_SET_ORDER
1117
- );
1118
-
1119
- foreach ($matches as $match) {
1120
- $attributes[$match[1]] = $match[2];
1121
- }
1122
-
1123
- // If there is a width in the style attributes:
1124
- if (isset($attributes['width']) && $width == -1) {
1125
- // check that the last two characters are px (pixels)
1126
- if (strtolower(substr($attributes['width'], -2)) === 'px') {
1127
- $proposed_width = substr($attributes['width'], 0, -2);
1128
- // Now make sure that it's an integer and not something stupid.
1129
- if (filter_var($proposed_width, FILTER_VALIDATE_INT)) {
1130
- $width = $proposed_width;
1131
- }
1132
- }
1133
- }
1134
-
1135
- // If there is a width in the style attributes:
1136
- if (isset($attributes['height']) && $height == -1) {
1137
- // check that the last two characters are px (pixels)
1138
- if (strtolower(substr($attributes['height'], -2)) == 'px') {
1139
- $proposed_height = substr($attributes['height'], 0, -2);
1140
- // Now make sure that it's an integer and not something stupid.
1141
- if (filter_var($proposed_height, FILTER_VALIDATE_INT)) {
1142
- $height = $proposed_height;
1143
- }
1144
- }
1145
- }
1146
-
1147
- }
1148
-
1149
- // Future enhancement:
1150
- // Look in the tag to see if there is a class or id specified that has
1151
- // a height or width attribute to it.
1152
-
1153
- // Far future enhancement
1154
- // Look at all the parent tags of this image to see if they specify a
1155
- // class or id that has an img selector that specifies a height or width
1156
- // Note that in this case, the class or id will have the img subselector
1157
- // for it to apply to the image.
1158
-
1159
- // ridiculously far future development
1160
- // If the class or id is specified in a SEPARATE css file thats not on
1161
- // the page, go get it and do what we were just doing for the ones on
1162
- // the page.
1163
-
1164
- $result = array(
1165
- 'height' => $height,
1166
- 'width' => $width
1167
- );
1168
-
1169
- return $result;
1170
- }
1171
-
1172
- function save($filepath = '')
1173
- {
1174
- $ret = $this->outertext();
1175
-
1176
- if ($filepath !== '') {
1177
- file_put_contents($filepath, $ret, LOCK_EX);
1178
- }
1179
-
1180
- return $ret;
1181
- }
1182
-
1183
- function addClass($class)
1184
- {
1185
- if (is_string($class)) {
1186
- $class = explode(' ', $class);
1187
- }
1188
-
1189
- if (is_array($class)) {
1190
- foreach($class as $c) {
1191
- if (isset($this->class)) {
1192
- if ($this->hasClass($c)) {
1193
- continue;
1194
- } else {
1195
- $this->class .= ' ' . $c;
1196
- }
1197
- } else {
1198
- $this->class = $c;
1199
- }
1200
- }
1201
- }
1202
- }
1203
-
1204
- function hasClass($class)
1205
- {
1206
- if (is_string($class)) {
1207
- if (isset($this->class)) {
1208
- return in_array($class, explode(' ', $this->class), true);
1209
- }
1210
- }
1211
-
1212
- return false;
1213
- }
1214
-
1215
- function removeClass($class = null)
1216
- {
1217
- if (!isset($this->class)) {
1218
- return;
1219
- }
1220
-
1221
- if (is_null($class)) {
1222
- $this->removeAttribute('class');
1223
- return;
1224
- }
1225
-
1226
- if (is_string($class)) {
1227
- $class = explode(' ', $class);
1228
- }
1229
-
1230
- if (is_array($class)) {
1231
- $class = array_diff(explode(' ', $this->class), $class);
1232
- if (empty($class)) {
1233
- $this->removeAttribute('class');
1234
- } else {
1235
- $this->class = implode(' ', $class);
1236
- }
1237
- }
1238
- }
1239
-
1240
- function getAllAttributes()
1241
- {
1242
- return $this->attr;
1243
- }
1244
-
1245
- function getAttribute($name)
1246
- {
1247
- return $this->$name;
1248
- }
1249
-
1250
- function setAttribute($name, $value)
1251
- {
1252
- $this->$name = $value;
1253
- }
1254
-
1255
- function hasAttribute($name)
1256
- {
1257
- return isset($this->$name);
1258
- }
1259
-
1260
- function removeAttribute($name)
1261
- {
1262
- unset($this->$name);
1263
- }
1264
-
1265
- function remove()
1266
- {
1267
- if ($this->parent) {
1268
- $this->parent->removeChild($this);
1269
- }
1270
- }
1271
-
1272
- function removeChild($node)
1273
- {
1274
- foreach($node->children as $child) {
1275
- $node->removeChild($child);
1276
- }
1277
-
1278
- // No need to re-index node->children because it is about to be removed!
1279
-
1280
- foreach($node->nodes as $entity) {
1281
- $enidx = array_search($entity, $node->nodes, true);
1282
- $edidx = array_search($entity, $node->dom->nodes, true);
1283
-
1284
- if ($enidx !== false) {
1285
- unset($node->nodes[$enidx]);
1286
- }
1287
-
1288
- if ($edidx !== false) {
1289
- unset($node->dom->nodes[$edidx]);
1290
- }
1291
- }
1292
-
1293
- // No need to re-index node->nodes because it is about to be removed!
1294
-
1295
- $nidx = array_search($node, $this->nodes, true);
1296
- $cidx = array_search($node, $this->children, true);
1297
- $didx = array_search($node, $this->dom->nodes, true);
1298
-
1299
- if ($nidx !== false) {
1300
- unset($this->nodes[$nidx]);
1301
- }
1302
-
1303
- $this->nodes = array_values($this->nodes);
1304
-
1305
- if ($cidx !== false) {
1306
- unset($this->children[$cidx]);
1307
- }
1308
-
1309
- $this->children = array_values($this->children);
1310
-
1311
- if ($didx !== false) {
1312
- unset($this->dom->nodes[$didx]);
1313
- }
1314
-
1315
- // Do not re-index dom->nodes because nodes point to other nodes in the
1316
- // array explicitly!
1317
-
1318
- $node->clear();
1319
- }
1320
-
1321
- function getElementById($id)
1322
- {
1323
- return $this->find("#$id", 0);
1324
- }
1325
-
1326
- function getElementsById($id, $idx = null)
1327
- {
1328
- return $this->find("#$id", $idx);
1329
- }
1330
-
1331
- function getElementByTagName($name)
1332
- {
1333
- return $this->find($name, 0);
1334
- }
1335
-
1336
- function getElementsByTagName($name, $idx = null)
1337
- {
1338
- return $this->find($name, $idx);
1339
- }
1340
-
1341
- function parentNode()
1342
- {
1343
- return $this->parent();
1344
- }
1345
-
1346
- function childNodes($idx = -1)
1347
- {
1348
- if ($idx === -1) {
1349
- return $this->children;
1350
- }
1351
-
1352
- if (isset($this->children[$idx])) {
1353
- return $this->children[$idx];
1354
- }
1355
-
1356
- return null;
1357
- }
1358
-
1359
- function firstChild()
1360
- {
1361
- if (count($this->children) > 0) {
1362
- return $this->children[0];
1363
- }
1364
- return null;
1365
- }
1366
-
1367
- function lastChild()
1368
- {
1369
- if (count($this->children) > 0) {
1370
- return end($this->children);
1371
- }
1372
- return null;
1373
- }
1374
-
1375
- function nextSibling()
1376
- {
1377
- if ($this->parent === null) {
1378
- return null;
1379
- }
1380
-
1381
- $idx = array_search($this, $this->parent->children, true);
1382
-
1383
- if ($idx !== false && isset($this->parent->children[$idx + 1])) {
1384
- return $this->parent->children[$idx + 1];
1385
- }
1386
-
1387
- return null;
1388
- }
1389
-
1390
- function previousSibling()
1391
- {
1392
- if ($this->parent === null) {
1393
- return null;
1394
- }
1395
-
1396
- $idx = array_search($this, $this->parent->children, true);
1397
-
1398
- if ($idx !== false && $idx > 0) {
1399
- return $this->parent->children[$idx - 1];
1400
- }
1401
-
1402
- return null;
1403
-
1404
- }
1405
-
1406
- function hasChildNodes()
1407
- {
1408
- return !empty($this->children);
1409
- }
1410
-
1411
- function nodeName()
1412
- {
1413
- return $this->tag;
1414
- }
1415
-
1416
- function appendChild($node)
1417
- {
1418
- $node->parent = $this;
1419
- $this->nodes[] = $node;
1420
- $this->children[] = $node;
1421
-
1422
- if ($this->dom) { // Attach current node to DOM (recursively)
1423
- $children = array($node);
1424
-
1425
- while($children) {
1426
- $child = array_pop($children);
1427
- $children = array_merge($children, $child->children);
1428
-
1429
- $this->dom->nodes[] = $child;
1430
- $child->dom = $this->dom;
1431
- $child->_[self::HDOM_INFO_BEGIN] = count($this->dom->nodes) - 1;
1432
- $child->_[self::HDOM_INFO_END] = $child->_[self::HDOM_INFO_BEGIN];
1433
- }
1434
-
1435
- $this->dom->root->_[self::HDOM_INFO_END] = count($this->dom->nodes) - 1;
1436
- }
1437
-
1438
- return $this;
1439
- }
1440
-
1441
- }
1
+ <?php namespace simplehtmldom;
2
+
3
+ /**
4
+ * Website: http://sourceforge.net/projects/simplehtmldom/
5
+ * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
6
+ *
7
+ * Licensed under The MIT License
8
+ * See the LICENSE file in the project root for more information.
9
+ *
10
+ * Authors:
11
+ * S.C. Chen
12
+ * John Schlick
13
+ * Rus Carroll
14
+ * logmanoriginal
15
+ *
16
+ * Contributors:
17
+ * Yousuke Kumakura
18
+ * Vadim Voituk
19
+ * Antcs
20
+ *
21
+ * Version $Rev$
22
+ */
23
+
24
+ include_once 'constants.php';
25
+ include_once 'Debug.php';
26
+
27
+ class HtmlNode
28
+ {
29
+ const HDOM_TYPE_ELEMENT = 1;
30
+ const HDOM_TYPE_COMMENT = 2;
31
+ const HDOM_TYPE_TEXT = 3;
32
+ const HDOM_TYPE_ROOT = 5;
33
+ const HDOM_TYPE_UNKNOWN = 6;
34
+ const HDOM_TYPE_CDATA = 7;
35
+
36
+ const HDOM_QUOTE_DOUBLE = 0;
37
+ const HDOM_QUOTE_SINGLE = 1;
38
+ const HDOM_QUOTE_NO = 3;
39
+
40
+ const HDOM_INFO_BEGIN = 0;
41
+ const HDOM_INFO_END = 1;
42
+ const HDOM_INFO_QUOTE = 2;
43
+ const HDOM_INFO_SPACE = 3;
44
+ const HDOM_INFO_TEXT = 4;
45
+ const HDOM_INFO_INNER = 5;
46
+ const HDOM_INFO_OUTER = 6;
47
+ const HDOM_INFO_ENDSPACE = 7;
48
+
49
+ public $nodetype = self::HDOM_TYPE_TEXT;
50
+ public $tag = 'text';
51
+ public $attr = array();
52
+ public $children = array();
53
+ public $nodes = array();
54
+ public $parent = null;
55
+ public $_ = array();
56
+ private $dom = null;
57
+
58
+ function __call($func, $args)
59
+ {
60
+ // Allow users to call methods with lower_case syntax
61
+ switch($func)
62
+ {
63
+ case 'children':
64
+ $actual_function = 'childNodes'; break;
65
+ case 'first_child':
66
+ $actual_function = 'firstChild'; break;
67
+ case 'has_child':
68
+ $actual_function = 'hasChildNodes'; break;
69
+ case 'last_child':
70
+ $actual_function = 'lastChild'; break;
71
+ case 'next_sibling':
72
+ $actual_function = 'nextSibling'; break;
73
+ case 'prev_sibling':
74
+ $actual_function = 'previousSibling'; break;
75
+ default:
76
+ trigger_error(
77
+ 'Call to undefined method ' . __CLASS__ . '::' . $func . '()',
78
+ E_USER_ERROR
79
+ );
80
+ }
81
+
82
+ // phpcs:ignore Generic.Files.LineLength
83
+ Debug::log(__CLASS__ . '->' . $func . '() has been deprecated and will be removed in the next major version of simplehtmldom. Use ' . __CLASS__ . '->' . $actual_function . '() instead.');
84
+
85
+ return call_user_func_array(array($this, $actual_function), $args);
86
+ }
87
+
88
+ function __construct($dom)
89
+ {
90
+ if ($dom === null) return $this;
91
+
92
+ $this->dom = $dom;
93
+ $dom->nodes[] = $this;
94
+ }
95
+
96
+ function __debugInfo()
97
+ {
98
+ // Translate node type to human-readable form
99
+ switch($this->nodetype)
100
+ {
101
+ case self::HDOM_TYPE_ELEMENT:
102
+ $nodetype = "HDOM_TYPE_ELEMENT ($this->nodetype)";
103
+ break;
104
+ case self::HDOM_TYPE_COMMENT:
105
+ $nodetype = "HDOM_TYPE_COMMENT ($this->nodetype)";
106
+ break;
107
+ case self::HDOM_TYPE_TEXT:
108
+ $nodetype = "HDOM_TYPE_TEXT ($this->nodetype)";
109
+ break;
110
+ case self::HDOM_TYPE_ROOT:
111
+ $nodetype = "HDOM_TYPE_ROOT ($this->nodetype)";
112
+ break;
113
+ case self::HDOM_TYPE_CDATA:
114
+ $nodetype = "HDOM_TYPE_CDATA ($this->nodetype)";
115
+ break;
116
+ case self::HDOM_TYPE_UNKNOWN:
117
+ default:
118
+ $nodetype = "HDOM_TYPE_UNKNOWN ($this->nodetype)";
119
+ }
120
+
121
+ return array(
122
+ 'nodetype' => $nodetype,
123
+ 'tag' => $this->tag,
124
+ 'attributes' => empty($this->attr) ? 'none' : $this->attr,
125
+ 'nodes' => empty($this->nodes) ? 'none' : $this->nodes
126
+ );
127
+ }
128
+
129
+ function __toString()
130
+ {
131
+ return $this->outertext();
132
+ }
133
+
134
+ function clear()
135
+ {
136
+ unset($this->dom); // Break link to origin
137
+ unset($this->parent); // Break link to branch
138
+ }
139
+
140
+ /** @codeCoverageIgnore */
141
+ function dump($show_attr = true, $depth = 0)
142
+ {
143
+ echo str_repeat("\t", $depth) . $this->tag;
144
+
145
+ if ($show_attr && count($this->attr) > 0) {
146
+ echo '(';
147
+ foreach ($this->attr as $k => $v) {
148
+ echo "[$k]=>\"$v\", ";
149
+ }
150
+ echo ')';
151
+ }
152
+
153
+ echo "\n";
154
+
155
+ if ($this->nodes) {
156
+ foreach ($this->nodes as $node) {
157
+ $node->dump($show_attr, $depth + 1);
158
+ }
159
+ }
160
+ }
161
+
162
+ /** @codeCoverageIgnore */
163
+ function dump_node($echo = true)
164
+ {
165
+ $string = $this->tag;
166
+
167
+ if (count($this->attr) > 0) {
168
+ $string .= '(';
169
+ foreach ($this->attr as $k => $v) {
170
+ $string .= "[$k]=>\"$v\", ";
171
+ }
172
+ $string .= ')';
173
+ }
174
+
175
+ if (count($this->_) > 0) {
176
+ $string .= ' $_ (';
177
+ foreach ($this->_ as $k => $v) {
178
+ if (is_array($v)) {
179
+ $string .= "[$k]=>(";
180
+ foreach ($v as $k2 => $v2) {
181
+ $string .= "[$k2]=>\"$v2\", ";
182
+ }
183
+ $string .= ')';
184
+ } else {
185
+ $string .= "[$k]=>\"$v\", ";
186
+ }
187
+ }
188
+ $string .= ')';
189
+ }
190
+
191
+ if (isset($this->text)) {
192
+ $string .= " text: ({$this->text})";
193
+ }
194
+
195
+ $string .= ' HDOM_INNER_INFO: ';
196
+
197
+ if (isset($node->_[self::HDOM_INFO_INNER])) {
198
+ $string .= "'" . $node->_[self::HDOM_INFO_INNER] . "'";
199
+ } else {
200
+ $string .= ' NULL ';
201
+ }
202
+
203
+ $string .= ' children: ' . count($this->children);
204
+ $string .= ' nodes: ' . count($this->nodes);
205
+ $string .= "\n";
206
+
207
+ if ($echo) {
208
+ echo $string;
209
+ return;
210
+ } else {
211
+ return $string;
212
+ }
213
+ }
214
+
215
+ function parent($parent = null)
216
+ {
217
+ // I am SURE that this doesn't work properly.
218
+ // It fails to unset the current node from it's current parents nodes or
219
+ // children list first.
220
+ if ($parent !== null) {
221
+ $this->parent = $parent;
222
+ $this->parent->nodes[] = $this;
223
+ $this->parent->children[] = $this;
224
+ }
225
+
226
+ return $this->parent;
227
+ }
228
+
229
+ function find_ancestor_tag($tag)
230
+ {
231
+ if ($this->parent === null) return null;
232
+
233
+ $ancestor = $this->parent;
234
+
235
+ while (!is_null($ancestor)) {
236
+ if ($ancestor->tag === $tag) {
237
+ break;
238
+ }
239
+
240
+ $ancestor = $ancestor->parent;
241
+ }
242
+
243
+ return $ancestor;
244
+ }
245
+
246
+ function innertext()
247
+ {
248
+ if (isset($this->_[self::HDOM_INFO_INNER])) {
249
+ $ret = $this->_[self::HDOM_INFO_INNER];
250
+ } elseif (isset($this->_[self::HDOM_INFO_TEXT])) {
251
+ $ret = $this->_[self::HDOM_INFO_TEXT];
252
+ } else {
253
+ $ret = '';
254
+ }
255
+
256
+ foreach ($this->nodes as $n) {
257
+ $ret .= $n->outertext();
258
+ }
259
+
260
+ return $this->convert_text($ret);
261
+ }
262
+
263
+ function outertext()
264
+ {
265
+ if ($this->tag === 'root') {
266
+ return $this->innertext();
267
+ }
268
+
269
+ // todo: What is the use of this callback? Remove?
270
+ if ($this->dom && $this->dom->callback !== null) {
271
+ call_user_func_array($this->dom->callback, array($this));
272
+ }
273
+
274
+ if (isset($this->_[self::HDOM_INFO_OUTER])) {
275
+ return $this->convert_text($this->_[self::HDOM_INFO_OUTER]);
276
+ }
277
+
278
+ if (isset($this->_[self::HDOM_INFO_TEXT])) {
279
+ return $this->convert_text($this->_[self::HDOM_INFO_TEXT]);
280
+ }
281
+
282
+ $ret = '';
283
+
284
+ if (isset($this->_[self::HDOM_INFO_BEGIN])) {
285
+ $ret = $this->makeup();
286
+ }
287
+
288
+ if (isset($this->_[self::HDOM_INFO_INNER])) {
289
+ // todo: <br> should either never have self::HDOM_INFO_INNER or always
290
+ if ($this->tag !== 'br') {
291
+ $ret .= $this->_[self::HDOM_INFO_INNER];
292
+ }
293
+ }
294
+
295
+ if ($this->nodes) {
296
+ foreach ($this->nodes as $n) {
297
+ $ret .= $n->outertext();
298
+ }
299
+ }
300
+
301
+ if (isset($this->_[self::HDOM_INFO_END]) && $this->_[self::HDOM_INFO_END] != 0) {
302
+ $ret .= '</' . $this->tag . '>';
303
+ }
304
+
305
+ return $this->convert_text($ret);
306
+ }
307
+
308
+ /**
309
+ * Returns true if the provided element is a block level element
310
+ * @link https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php
311
+ */
312
+ protected function is_block_element($node)
313
+ {
314
+ // todo: When we have the utility class this should be moved there
315
+ return in_array(strtolower($node->tag), array(
316
+ 'p',
317
+ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
318
+ 'ol', 'ul',
319
+ 'pre',
320
+ 'address',
321
+ 'blockquote',
322
+ 'dl',
323
+ 'div',
324
+ 'fieldset',
325
+ 'form',
326
+ 'hr',
327
+ 'noscript',
328
+ 'table'
329
+ ));
330
+ }
331
+
332
+ /**
333
+ * Returns true if the provided element is an inline level element
334
+ * @link https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php
335
+ */
336
+ protected function is_inline_element($node)
337
+ {
338
+ // todo: When we have the utility class this should be moved there
339
+ return in_array(strtolower($node->tag), array(
340
+ 'b', 'big', 'i', 'small', 'tt',
341
+ 'abbr', 'acronym', 'cite', 'code', 'dfn', 'em', 'kbd', 'strong', 'samp', 'var',
342
+ 'a', 'bdo', 'br', 'img', 'map', 'object', 'q', 'script', 'span', 'sub', 'sup',
343
+ 'button', 'input', 'label', 'select', 'textarea'
344
+ ));
345
+ }
346
+
347
+ function text($trim = true)
348
+ {
349
+ $ret = '';
350
+
351
+ if (strtolower($this->tag) === 'script') {
352
+ $ret = '';
353
+ } elseif (strtolower($this->tag) === 'style') {
354
+ $ret = '';
355
+ } elseif ($this->nodetype === self::HDOM_TYPE_COMMENT) {
356
+ $ret = '';
357
+ } elseif ($this->nodetype === self::HDOM_TYPE_CDATA) {
358
+ $ret = $this->_[self::HDOM_INFO_INNER];
359
+ } elseif ($this->nodetype === self::HDOM_TYPE_UNKNOWN) {
360
+ $ret = '';
361
+ } elseif (isset($this->_[self::HDOM_INFO_INNER])) {
362
+ $ret = $this->_[self::HDOM_INFO_INNER];
363
+ } elseif ($this->nodetype === self::HDOM_TYPE_TEXT) {
364
+ $ret = $this->_[self::HDOM_INFO_TEXT];
365
+ }
366
+
367
+ if (is_null($this->nodes)) {
368
+ return '';
369
+ }
370
+
371
+ foreach ($this->nodes as $n) {
372
+ if ($this->is_block_element($n)) {
373
+
374
+ $block = ltrim($this->convert_text($n->text(false)));
375
+
376
+ if (empty($block))
377
+ continue;
378
+
379
+ $ret = rtrim($ret) . "\n\n" . $block;
380
+
381
+ } elseif ($this->is_inline_element($n)) {
382
+ // todo: <br> introduces code smell because no space but \n
383
+ if (strtolower($n->tag) === 'br') {
384
+ $ret .= $this->dom->default_br_text ?: DEFAULT_BR_TEXT;
385
+ } else {
386
+ $inline = ltrim($this->convert_text($n->text(false)));
387
+
388
+ if (empty($inline))
389
+ continue;
390
+
391
+ $ret = $ret . $this->convert_text($n->text(false));
392
+ }
393
+ } else {
394
+ $ret .= $this->convert_text($n->text(false));
395
+ }
396
+ }
397
+
398
+ // Reduce whitespace at start/end to a single (or none) space
399
+ $ret = preg_replace('/[ \t\n\r\0\x0B\xC2\xA0]+$/u', $trim ? '' : ' ', $ret);
400
+ $ret = preg_replace('/^[ \t\n\r\0\x0B\xC2\xA0]+/u', $trim ? '' : ' ', $ret);
401
+
402
+ return $ret;
403
+ }
404
+
405
+ function xmltext()
406
+ {
407
+ $ret = $this->innertext();
408
+ $ret = str_ireplace('<![CDATA[', '', $ret);
409
+ $ret = str_replace(']]>', '', $ret);
410
+ return $ret;
411
+ }
412
+
413
+ function makeup()
414
+ {
415
+ // text, comment, unknown
416
+ if (isset($this->_[self::HDOM_INFO_TEXT])) {
417
+ return $this->_[self::HDOM_INFO_TEXT];
418
+ }
419
+
420
+ $ret = '<' . $this->tag;
421
+
422
+ foreach ($this->attr as $key => $val) {
423
+
424
+ // skip removed attribute
425
+ if ($val === null || $val === false) { continue; }
426
+
427
+ if (isset($this->_[self::HDOM_INFO_SPACE][$key])) {
428
+ $ret .= $this->_[self::HDOM_INFO_SPACE][$key][0];
429
+ } else {
430
+ $ret .= ' ';
431
+ }
432
+
433
+ //no value attr: nowrap, checked selected...
434
+ if ($val === true) {
435
+ $ret .= $key;
436
+ } else {
437
+ if (isset($this->_[self::HDOM_INFO_QUOTE][$key])) {
438
+ $quote_type = $this->_[self::HDOM_INFO_QUOTE][$key];
439
+ } else {
440
+ $quote_type = self::HDOM_QUOTE_DOUBLE;
441
+ }
442
+
443
+ switch ($quote_type)
444
+ {
445
+ case self::HDOM_QUOTE_SINGLE:
446
+ $quote = '\'';
447
+ $val = htmlentities($val, ENT_QUOTES, $this->dom->target_charset);
448
+ break;
449
+ case self::HDOM_QUOTE_NO:
450
+ $quote = '';
451
+ break;
452
+ case self::HDOM_QUOTE_DOUBLE:
453
+ default:
454
+ $quote = '"';
455
+ $val = htmlentities($val, ENT_COMPAT, $this->dom->target_charset);
456
+ }
457
+
458
+ $ret .= $key
459
+ . (isset($this->_[self::HDOM_INFO_SPACE][$key]) ? $this->_[self::HDOM_INFO_SPACE][$key][1] : '')
460
+ . '='
461
+ . (isset($this->_[self::HDOM_INFO_SPACE][$key]) ? $this->_[self::HDOM_INFO_SPACE][$key][2] : '')
462
+ . $quote
463
+ . $val
464
+ . $quote;
465
+ }
466
+ }
467
+
468
+ if(isset($this->_[self::HDOM_INFO_ENDSPACE])) {
469
+ $ret .= $this->_[self::HDOM_INFO_ENDSPACE];
470
+ }
471
+
472
+ return $ret . '>';
473
+ }
474
+
475
+ function find($selector, $idx = null, $lowercase = false)
476
+ {
477
+ $selectors = $this->parse_selector($selector);
478
+ if (($count = count($selectors)) === 0) { return array(); }
479
+ $found_keys = array();
480
+
481
+ // find each selector
482
+ for ($c = 0; $c < $count; ++$c) {
483
+ // The change on the below line was documented on the sourceforge
484
+ // code tracker id 2788009
485
+ // used to be: if (($levle=count($selectors[0]))===0) return array();
486
+ if (($levle = count($selectors[$c])) === 0) {
487
+ Debug::log_once('Empty selector (' . $selector . ') matches nothing.');
488
+ return array();
489
+ }
490
+
491
+ if (!isset($this->_[self::HDOM_INFO_BEGIN])) {
492
+ Debug::log_once('Invalid operation. The current node has no start tag.');
493
+ return array();
494
+ }
495
+
496
+ $head = array($this->_[self::HDOM_INFO_BEGIN] => 1);
497
+ $cmd = ' '; // Combinator
498
+
499
+ // handle descendant selectors, no recursive!
500
+ for ($l = 0; $l < $levle; ++$l) {
501
+ $ret = array();
502
+
503
+ foreach ($head as $k => $v) {
504
+ $n = ($k === -1) ? $this->dom->root : $this->dom->nodes[$k];
505
+ //PaperG - Pass this optional parameter on to the seek function.
506
+ $n->seek($selectors[$c][$l], $ret, $cmd, $lowercase);
507
+ }
508
+
509
+ $head = $ret;
510
+ $cmd = $selectors[$c][$l][6]; // Next Combinator
511
+ }
512
+
513
+ foreach ($head as $k => $v) {
514
+ if (!isset($found_keys[$k])) {
515
+ $found_keys[$k] = 1;
516
+ }
517
+ }
518
+ }
519
+
520
+ // sort keys
521
+ ksort($found_keys);
522
+
523
+ $found = array();
524
+ foreach ($found_keys as $k => $v) {
525
+ $found[] = $this->dom->nodes[$k];
526
+ }
527
+
528
+ // return nth-element or array
529
+ if (is_null($idx)) { return $found; }
530
+ elseif ($idx < 0) { $idx = count($found) + $idx; }
531
+ return (isset($found[$idx])) ? $found[$idx] : null;
532
+ }
533
+
534
+ function expect($selector, $idx = null, $lowercase = false)
535
+ {
536
+ return $this->find($selector, $idx, $lowercase) ?: null;
537
+ }
538
+
539
+ protected function seek($selector, &$ret, $parent_cmd, $lowercase = false)
540
+ {
541
+ list($ps_selector, $tag, $ps_element, $id, $class, $attributes, $cmb) = $selector;
542
+ $nodes = array();
543
+
544
+ if ($parent_cmd === ' ') { // Descendant Combinator
545
+ // Find parent closing tag if the current element doesn't have a closing
546
+ // tag (i.e. void element)
547
+ $end = (!empty($this->_[self::HDOM_INFO_END])) ? $this->_[self::HDOM_INFO_END] : 0;
548
+ if ($end == 0 && $this->parent) {
549
+ $parent = $this->parent;
550
+ while ($parent !== null && !isset($parent->_[self::HDOM_INFO_END])) {
551
+ $end -= 1;
552
+ $parent = $parent->parent;
553
+ }
554
+ $end += $parent->_[self::HDOM_INFO_END];
555
+ }
556
+
557
+ if ($end === 0) {
558
+ $end = count($this->dom->nodes);
559
+ }
560
+
561
+ // Get list of target nodes
562
+ $nodes_start = $this->_[self::HDOM_INFO_BEGIN] + 1;
563
+
564
+ // remove() makes $this->dom->nodes non-contiguous; use what is left.
565
+ $nodes = array_intersect_key(
566
+ $this->dom->nodes,
567
+ array_flip(range($nodes_start, $end))
568
+ );
569
+ } elseif ($parent_cmd === '>') { // Child Combinator
570
+ $nodes = $this->children;
571
+ } elseif ($parent_cmd === '+'
572
+ && $this->parent
573
+ && in_array($this, $this->parent->children)) { // Next-Sibling Combinator
574
+ $index = array_search($this, $this->parent->children, true) + 1;
575
+ if ($index < count($this->parent->children))
576
+ $nodes[] = $this->parent->children[$index];
577
+ } elseif ($parent_cmd === '~'
578
+ && $this->parent
579
+ && in_array($this, $this->parent->children)) { // Subsequent Sibling Combinator
580
+ $index = array_search($this, $this->parent->children, true);
581
+ $nodes = array_slice($this->parent->children, $index);
582
+ }
583
+
584
+ // Go throgh each element starting at this element until the end tag
585
+ // Note: If this element is a void tag, any previous void element is
586
+ // skipped.
587
+ foreach($nodes as $node) {
588
+ $pass = true;
589
+
590
+ // Skip root nodes
591
+ if(!$node->parent) {
592
+ unset($node);
593
+ continue;
594
+ }
595
+
596
+ // Handle 'text' selector
597
+ if($pass && $tag === 'text') {
598
+
599
+ if($node->tag === 'text') {
600
+ $ret[array_search($node, $this->dom->nodes, true)] = 1;
601
+ }
602
+
603
+ if(isset($node->_[self::HDOM_INFO_INNER])) {
604
+ $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
605
+ }
606
+
607
+ unset($node);
608
+ continue;
609
+
610
+ }
611
+
612
+ // Handle 'cdata' selector
613
+ if($pass && $tag === 'cdata') {
614
+
615
+ if($node->tag === 'cdata') {
616
+ $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
617
+ }
618
+
619
+ unset($node);
620
+ continue;
621
+
622
+ }
623
+
624
+ // Handle 'comment'
625
+ if($pass && $tag === 'comment' && $node->tag === 'comment') {
626
+ $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
627
+ unset($node);
628
+ continue;
629
+ }
630
+
631
+ // Skip if node isn't a child node (i.e. text nodes)
632
+ if($pass && !in_array($node, $node->parent->children, true)) {
633
+ unset($node);
634
+ continue;
635
+ }
636
+
637
+ // Skip if tag doesn't match
638
+ if ($pass && $tag !== '' && $tag !== $node->tag && $tag !== '*') {
639
+ $pass = false;
640
+ }
641
+
642
+ // Skip if ID doesn't exist
643
+ if ($pass && $id !== '' && !isset($node->attr['id'])) {
644
+ $pass = false;
645
+ }
646
+
647
+ // Check if ID matches
648
+ if ($pass && $id !== '' && isset($node->attr['id'])) {
649
+ // Note: Only consider the first ID (as browsers do)
650
+ $node_id = explode(' ', trim($node->attr['id']))[0];
651
+
652
+ if($id !== $node_id) { $pass = false; }
653
+ }
654
+
655
+ // Check if all class(es) exist
656
+ if ($pass && $class !== '' && is_array($class) && !empty($class)) {
657
+ if (isset($node->attr['class'])) {
658
+ // Apply the same rules for the pattern and attribute value
659
+ // Attribute values must not contain control characters other than space
660
+ // https://www.w3.org/TR/html/dom.html#text-content
661
+ // https://www.w3.org/TR/html/syntax.html#attribute-values
662
+ // https://www.w3.org/TR/xml/#AVNormalize
663
+ $node_classes = preg_replace("/[\r\n\t\s]+/u", ' ', $node->attr['class']);
664
+ $node_classes = trim($node_classes);
665
+ $node_classes = explode(' ', $node_classes);
666
+
667
+ if ($lowercase) {
668
+ $node_classes = array_map('strtolower', $node_classes);
669
+ }
670
+
671
+ foreach($class as $c) {
672
+ if(!in_array($c, $node_classes)) {
673
+ $pass = false;
674
+ break;
675
+ }
676
+ }
677
+ } else {
678
+ $pass = false;
679
+ }
680
+ }
681
+
682
+ // Check attributes
683
+ if ($pass
684
+ && $attributes !== ''
685
+ && is_array($attributes)
686
+ && !empty($attributes)) {
687
+ foreach($attributes as $a) {
688
+ list (
689
+ $att_name,
690
+ $att_expr,
691
+ $att_val,
692
+ $att_inv,
693
+ $att_case_sensitivity
694
+ ) = $a;
695
+
696
+ // Handle indexing attributes (i.e. "[2]")
697
+ /**
698
+ * Note: This is not supported by the CSS Standard but adds
699
+ * the ability to select items compatible to XPath (i.e.
700
+ * the 3rd element within it's parent).
701
+ *
702
+ * Note: This doesn't conflict with the CSS Standard which
703
+ * doesn't work on numeric attributes anyway.
704
+ */
705
+ if (is_numeric($att_name)
706
+ && $att_expr === ''
707
+ && $att_val === '') {
708
+ $count = 0;
709
+
710
+ // Find index of current element in parent
711
+ foreach ($node->parent->children as $c) {
712
+ if ($c->tag === $node->tag) ++$count;
713
+ if ($c === $node) break;
714
+ }
715
+
716
+ // If this is the correct node, continue with next
717
+ // attribute
718
+ if ($count === (int)$att_name) continue;
719
+ }
720
+
721
+ // Check attribute availability
722
+ if ($att_inv) { // Attribute should NOT be set
723
+ if (isset($node->attr[$att_name])) {
724
+ $pass = false;
725
+ break;
726
+ }
727
+ } else { // Attribute should be set
728
+ // todo: "plaintext" is not a valid CSS selector!
729
+ if ($att_name !== 'plaintext'
730
+ && !isset($node->attr[$att_name])) {
731
+ $pass = false;
732
+ break;
733
+ }
734
+ }
735
+
736
+ // Continue with next attribute if expression isn't defined
737
+ if ($att_expr === '') continue;
738
+
739
+ // If they have told us that this is a "plaintext"
740
+ // search then we want the plaintext of the node - right?
741
+ // todo "plaintext" is not a valid CSS selector!
742
+ if ($att_name === 'plaintext') {
743
+ $nodeKeyValue = $node->text();
744
+ } else {
745
+ $nodeKeyValue = $node->attr[$att_name];
746
+ }
747
+
748
+ // If lowercase is set, do a case insensitive test of
749
+ // the value of the selector.
750
+ if ($lowercase) {
751
+ $check = $this->match(
752
+ $att_expr,
753
+ strtolower($att_val),
754
+ strtolower($nodeKeyValue),
755
+ $att_case_sensitivity
756
+ );
757
+ } else {
758
+ $check = $this->match(
759
+ $att_expr,
760
+ $att_val,
761
+ $nodeKeyValue,
762
+ $att_case_sensitivity
763
+ );
764
+ }
765
+
766
+ $check = $ps_element === 'not' ? !$check : $check;
767
+
768
+ if (!$check) {
769
+ $pass = false;
770
+ break;
771
+ }
772
+ }
773
+ }
774
+
775
+ // Found a match. Add to list and clear node
776
+ $pass = $ps_selector === 'not' ? !$pass : $pass;
777
+ if ($pass) $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
778
+ unset($node);
779
+ }
780
+ }
781
+
782
+ protected function match($exp, $pattern, $value, $case_sensitivity)
783
+ {
784
+ if ($case_sensitivity === 'i') {
785
+ $pattern = strtolower($pattern);
786
+ $value = strtolower($value);
787
+ }
788
+
789
+ // Apply the same rules for the pattern and attribute value
790
+ // Attribute values must not contain control characters other than space
791
+ // https://www.w3.org/TR/html/dom.html#text-content
792
+ // https://www.w3.org/TR/html/syntax.html#attribute-values
793
+ // https://www.w3.org/TR/xml/#AVNormalize
794
+ $pattern = preg_replace("/[\r\n\t\s]+/u", ' ', $pattern);
795
+ $pattern = trim($pattern);
796
+
797
+ $value = preg_replace("/[\r\n\t\s]+/u", ' ', $value);
798
+ $value = trim($value);
799
+
800
+ switch ($exp) {
801
+ case '=':
802
+ return ($value === $pattern);
803
+ case '!=':
804
+ return ($value !== $pattern);
805
+ case '^=':
806
+ return preg_match('/^' . preg_quote($pattern, '/') . '/', $value);
807
+ case '$=':
808
+ return preg_match('/' . preg_quote($pattern, '/') . '$/', $value);
809
+ case '*=':
810
+ return preg_match('/' . preg_quote($pattern, '/') . '/', $value);
811
+ case '|=':
812
+ /**
813
+ * [att|=val]
814
+ *
815
+ * Represents an element with the att attribute, its value
816
+ * either being exactly "val" or beginning with "val"
817
+ * immediately followed by "-" (U+002D).
818
+ */
819
+ return strpos($value, $pattern) === 0;
820
+ case '~=':
821
+ /**
822
+ * [att~=val]
823
+ *
824
+ * Represents an element with the att attribute whose value is a
825
+ * whitespace-separated list of words, one of which is exactly
826
+ * "val". If "val" contains whitespace, it will never represent
827
+ * anything (since the words are separated by spaces). Also if
828
+ * "val" is the empty string, it will never represent anything.
829
+ */
830
+ return in_array($pattern, explode(' ', trim($value)), true);
831
+ }
832
+
833
+ Debug::log('Unhandled attribute selector: ' . $exp . '!');
834
+ return false;
835
+ }
836
+
837
+ protected function parse_selector($selector_string)
838
+ {
839
+ /**
840
+ * Pattern of CSS selectors, modified from mootools (https://mootools.net/)
841
+ *
842
+ * Paperg: Add the colon to the attribute, so that it properly finds
843
+ * <tag attr:ibute="something" > like google does.
844
+ *
845
+ * Note: if you try to look at this attribute, you MUST use getAttribute
846
+ * since $dom->x:y will fail the php syntax check.
847
+ *
848
+ * Notice the \[ starting the attribute? and the @? following? This
849
+ * implies that an attribute can begin with an @ sign that is not
850
+ * captured. This implies that an html attribute specifier may start
851
+ * with an @ sign that is NOT captured by the expression. Farther study
852
+ * is required to determine of this should be documented or removed.
853
+ *
854
+ * Matches selectors in this order:
855
+ *
856
+ * [0] - full match
857
+ *
858
+ * [1] - pseudo selector
859
+ * (?:\:(\w+)\()?
860
+ * Matches the pseudo selector (optional)
861
+ *
862
+ * [2] - tag name
863
+ * ([\w:\*-]*)
864
+ * Matches the tag name consisting of zero or more words, colons,
865
+ * asterisks and hyphens.
866
+ *
867
+ * [3] - pseudo selector
868
+ * (?:\:(\w+)\()?
869
+ * Matches the pseudo selector (optional)
870
+ *
871
+ * [4] - id name
872
+ * (?:\#([\w-]+))
873
+ * Optionally matches a id name, consisting of an "#" followed by
874
+ * the id name (one or more words and hyphens).
875
+ *
876
+ * [5] - class names (including dots)
877
+ * (?:\.([\w\.-]+))?
878
+ * Optionally matches a list of classs, consisting of an "."
879
+ * followed by the class name (one or more words and hyphens)
880
+ * where multiple classes can be chained (i.e. ".foo.bar.baz")
881
+ *
882
+ * [6] - attributes
883
+ * ((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?
884
+ * Optionally matches the attributes list
885
+ *
886
+ * [7] - separator
887
+ * ([\/, >+~]+)
888
+ * Matches the selector list separator
889
+ */
890
+ // phpcs:ignore Generic.Files.LineLength
891
+ $pattern = "/(?:\:(\w+)\()?([\w:\*-]*)(?:\:(\w+)\()?(?:\#([\w-]+))?(?:|\.([\w\.-]+))?((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?(?:\))?(?:\))?([\/, >+~]+)/is";
892
+
893
+ preg_match_all(
894
+ $pattern,
895
+ trim($selector_string) . ' ', // Add final ' ' as pseudo separator
896
+ $matches,
897
+ PREG_SET_ORDER
898
+ );
899
+
900
+ $selectors = array();
901
+ $result = array();
902
+
903
+ foreach ($matches as $m) {
904
+ $m[0] = trim($m[0]);
905
+
906
+ // Skip NoOps
907
+ if ($m[0] === '' || $m[0] === '/' || $m[0] === '//') { continue; }
908
+
909
+ array_shift($m);
910
+
911
+ // Convert to lowercase
912
+ if ($this->dom->lowercase) {
913
+ $m[1] = strtolower($m[1]);
914
+ }
915
+
916
+ // Extract classes
917
+ if ($m[4] !== '') { $m[4] = explode('.', $m[4]); }
918
+
919
+ /* Extract attributes (pattern based on the pattern above!)
920
+
921
+ * [0] - full match
922
+ * [1] - attribute name
923
+ * [2] - attribute expression
924
+ * [3] - attribute value
925
+ * [4] - case sensitivity
926
+ *
927
+ * Note: Attributes can be negated with a "!" prefix to their name
928
+ */
929
+ if($m[5] !== '') {
930
+ preg_match_all(
931
+ "/\[@?(!?[\w:-]+)(?:([!*^$|~]?=)[\"']?(.*?)[\"']?)?(?:\s+?([iIsS])?)?\]/is",
932
+ trim($m[5]),
933
+ $attributes,
934
+ PREG_SET_ORDER
935
+ );
936
+
937
+ // Replace element by array
938
+ $m[5] = array();
939
+
940
+ foreach($attributes as $att) {
941
+ // Skip empty matches
942
+ if(trim($att[0]) === '') { continue; }
943
+
944
+ $inverted = (isset($att[1][0]) && $att[1][0] === '!');
945
+ $m[5][] = array(
946
+ $inverted ? substr($att[1], 1) : $att[1], // Name
947
+ (isset($att[2])) ? $att[2] : '', // Expression
948
+ (isset($att[3])) ? $att[3] : '', // Value
949
+ $inverted, // Inverted Flag
950
+ (isset($att[4])) ? strtolower($att[4]) : '', // Case-Sensitivity
951
+ );
952
+ }
953
+ }
954
+
955
+ // Sanitize Separator
956
+ if ($m[6] !== '' && trim($m[6]) === '') { // Descendant Separator
957
+ $m[6] = ' ';
958
+ } else { // Other Separator
959
+ $m[6] = trim($m[6]);
960
+ }
961
+
962
+ // Clear Separator if it's a Selector List
963
+ if ($is_list = ($m[6] === ',')) { $m[6] = ''; }
964
+
965
+ $result[] = $m;
966
+
967
+ if ($is_list) { // Selector List
968
+ $selectors[] = $result;
969
+ $result = array();
970
+ }
971
+ }
972
+
973
+ if (count($result) > 0) { $selectors[] = $result; }
974
+ return $selectors;
975
+ }
976
+
977
+ function __get($name)
978
+ {
979
+ if (isset($this->attr[$name])) {
980
+ return $this->convert_text($this->attr[$name]);
981
+ }
982
+
983
+ switch ($name) {
984
+ case 'outertext': return $this->outertext();
985
+ case 'innertext': return $this->innertext();
986
+ case 'plaintext': return $this->text();
987
+ case 'xmltext': return $this->xmltext();
988
+ }
989
+
990
+ return false;
991
+ }
992
+
993
+ function __set($name, $value)
994
+ {
995
+ switch ($name) {
996
+ case 'outertext':
997
+ $this->_[self::HDOM_INFO_OUTER] = $value;
998
+ break;
999
+ case 'innertext':
1000
+ if (isset($this->_[self::HDOM_INFO_TEXT])) {
1001
+ $this->_[self::HDOM_INFO_TEXT] = '';
1002
+ }
1003
+ $this->_[self::HDOM_INFO_INNER] = $value;
1004
+ break;
1005
+ default: $this->attr[$name] = $value;
1006
+ }
1007
+ }
1008
+
1009
+ function __isset($name)
1010
+ {
1011
+ switch ($name) {
1012
+ case 'outertext': return true;
1013
+ case 'innertext': return true;
1014
+ case 'plaintext': return true;
1015
+ }
1016
+
1017
+ return isset($this->attr[$name]);
1018
+ }
1019
+
1020
+ function __unset($name)
1021
+ {
1022
+ if (isset($this->attr[$name])) { unset($this->attr[$name]); }
1023
+ }
1024
+
1025
+ function convert_text($text)
1026
+ {
1027
+ $converted_text = $text;
1028
+
1029
+ $sourceCharset = '';
1030
+ $targetCharset = '';
1031
+
1032
+ if ($this->dom) {
1033
+ $sourceCharset = strtoupper($this->dom->_charset);
1034
+ $targetCharset = strtoupper($this->dom->_target_charset);
1035
+ }
1036
+
1037
+ if (!empty($sourceCharset) && !empty($targetCharset)) {
1038
+ if (strtoupper($sourceCharset) === strtoupper($targetCharset)) {
1039
+ $converted_text = $text;
1040
+ } elseif ((strtoupper($targetCharset) === 'UTF-8') && (self::is_utf8($text))) {
1041
+ Debug::log_once('The source charset was incorrectly detected as ' . $sourceCharset . ' but should have been UTF-8');
1042
+ $converted_text = $text;
1043
+ } else {
1044
+ $converted_text = iconv($sourceCharset, $targetCharset, $text);
1045
+ }
1046
+ }
1047
+
1048
+ // Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output.
1049
+ if ($targetCharset === 'UTF-8') {
1050
+ if (substr($converted_text, 0, 3) === "\xef\xbb\xbf") {
1051
+ $converted_text = substr($converted_text, 3);
1052
+ }
1053
+
1054
+ if (substr($converted_text, -3) === "\xef\xbb\xbf") {
1055
+ $converted_text = substr($converted_text, 0, -3);
1056
+ }
1057
+ }
1058
+
1059
+ return $converted_text;
1060
+ }
1061
+
1062
+ static function is_utf8($str)
1063
+ {
1064
+ $c = 0; $b = 0;
1065
+ $bits = 0;
1066
+ $len = strlen($str);
1067
+ for($i = 0; $i < $len; $i++) {
1068
+ $c = ord($str[$i]);
1069
+ if($c > 128) {
1070
+ if(($c >= 254)) { return false; }
1071
+ elseif($c >= 252) { $bits = 6; }
1072
+ elseif($c >= 248) { $bits = 5; }
1073
+ elseif($c >= 240) { $bits = 4; }
1074
+ elseif($c >= 224) { $bits = 3; }
1075
+ elseif($c >= 192) { $bits = 2; }
1076
+ else { return false; }
1077
+ if(($i + $bits) > $len) { return false; }
1078
+ while($bits > 1) {
1079
+ $i++;
1080
+ $b = ord($str[$i]);
1081
+ if($b < 128 || $b > 191) { return false; }
1082
+ $bits--;
1083
+ }
1084
+ }
1085
+ }
1086
+ return true;
1087
+ }
1088
+
1089
+ function get_display_size()
1090
+ {
1091
+ $width = -1;
1092
+ $height = -1;
1093
+
1094
+ if ($this->tag !== 'img') {
1095
+ return false;
1096
+ }
1097
+
1098
+ // See if there is aheight or width attribute in the tag itself.
1099
+ if (isset($this->attr['width'])) {
1100
+ $width = $this->attr['width'];
1101
+ }
1102
+
1103
+ if (isset($this->attr['height'])) {
1104
+ $height = $this->attr['height'];
1105
+ }
1106
+
1107
+ // Now look for an inline style.
1108
+ if (isset($this->attr['style'])) {
1109
+ // Thanks to user gnarf from stackoverflow for this regular expression.
1110
+ $attributes = array();
1111
+
1112
+ preg_match_all(
1113
+ '/([\w-]+)\s*:\s*([^;]+)\s*;?/',
1114
+ $this->attr['style'],
1115
+ $matches,
1116
+ PREG_SET_ORDER
1117
+ );
1118
+
1119
+ foreach ($matches as $match) {
1120
+ $attributes[$match[1]] = $match[2];
1121
+ }
1122
+
1123
+ // If there is a width in the style attributes:
1124
+ if (isset($attributes['width']) && $width == -1) {
1125
+ // check that the last two characters are px (pixels)
1126
+ if (strtolower(substr($attributes['width'], -2)) === 'px') {
1127
+ $proposed_width = substr($attributes['width'], 0, -2);
1128
+ // Now make sure that it's an integer and not something stupid.
1129
+ if (filter_var($proposed_width, FILTER_VALIDATE_INT)) {
1130
+ $width = $proposed_width;
1131
+ }
1132
+ }
1133
+ }
1134
+
1135
+ // If there is a width in the style attributes:
1136
+ if (isset($attributes['height']) && $height == -1) {
1137
+ // check that the last two characters are px (pixels)
1138
+ if (strtolower(substr($attributes['height'], -2)) == 'px') {
1139
+ $proposed_height = substr($attributes['height'], 0, -2);
1140
+ // Now make sure that it's an integer and not something stupid.
1141
+ if (filter_var($proposed_height, FILTER_VALIDATE_INT)) {
1142
+ $height = $proposed_height;
1143
+ }
1144
+ }
1145
+ }
1146
+
1147
+ }
1148
+
1149
+ // Future enhancement:
1150
+ // Look in the tag to see if there is a class or id specified that has
1151
+ // a height or width attribute to it.
1152
+
1153
+ // Far future enhancement
1154
+ // Look at all the parent tags of this image to see if they specify a
1155
+ // class or id that has an img selector that specifies a height or width
1156
+ // Note that in this case, the class or id will have the img subselector
1157
+ // for it to apply to the image.
1158
+
1159
+ // ridiculously far future development
1160
+ // If the class or id is specified in a SEPARATE css file thats not on
1161
+ // the page, go get it and do what we were just doing for the ones on
1162
+ // the page.
1163
+
1164
+ $result = array(
1165
+ 'height' => $height,
1166
+ 'width' => $width
1167
+ );
1168
+
1169
+ return $result;
1170
+ }
1171
+
1172
+ function save($filepath = '')
1173
+ {
1174
+ $ret = $this->outertext();
1175
+
1176
+ if ($filepath !== '') {
1177
+ file_put_contents($filepath, $ret, LOCK_EX);
1178
+ }
1179
+
1180
+ return $ret;
1181
+ }
1182
+
1183
+ function addClass($class)
1184
+ {
1185
+ if (is_string($class)) {
1186
+ $class = explode(' ', $class);
1187
+ }
1188
+
1189
+ if (is_array($class)) {
1190
+ foreach($class as $c) {
1191
+ if (isset($this->class)) {
1192
+ if ($this->hasClass($c)) {
1193
+ continue;
1194
+ } else {
1195
+ $this->class .= ' ' . $c;
1196
+ }
1197
+ } else {
1198
+ $this->class = $c;
1199
+ }
1200
+ }
1201
+ }
1202
+ }
1203
+
1204
+ function hasClass($class)
1205
+ {
1206
+ if (is_string($class)) {
1207
+ if (isset($this->class)) {
1208
+ return in_array($class, explode(' ', $this->class), true);
1209
+ }
1210
+ }
1211
+
1212
+ return false;
1213
+ }
1214
+
1215
+ function removeClass($class = null)
1216
+ {
1217
+ if (!isset($this->class)) {
1218
+ return;
1219
+ }
1220
+
1221
+ if (is_null($class)) {
1222
+ $this->removeAttribute('class');
1223
+ return;
1224
+ }
1225
+
1226
+ if (is_string($class)) {
1227
+ $class = explode(' ', $class);
1228
+ }
1229
+
1230
+ if (is_array($class)) {
1231
+ $class = array_diff(explode(' ', $this->class), $class);
1232
+ if (empty($class)) {
1233
+ $this->removeAttribute('class');
1234
+ } else {
1235
+ $this->class = implode(' ', $class);
1236
+ }
1237
+ }
1238
+ }
1239
+
1240
+ function getAllAttributes()
1241
+ {
1242
+ return $this->attr;
1243
+ }
1244
+
1245
+ function getAttribute($name)
1246
+ {
1247
+ return $this->$name;
1248
+ }
1249
+
1250
+ function setAttribute($name, $value)
1251
+ {
1252
+ $this->$name = $value;
1253
+ }
1254
+
1255
+ function hasAttribute($name)
1256
+ {
1257
+ return isset($this->$name);
1258
+ }
1259
+
1260
+ function removeAttribute($name)
1261
+ {
1262
+ unset($this->$name);
1263
+ }
1264
+
1265
+ function remove()
1266
+ {
1267
+ if ($this->parent) {
1268
+ $this->parent->removeChild($this);
1269
+ }
1270
+ }
1271
+
1272
+ function removeChild($node)
1273
+ {
1274
+ foreach($node->children as $child) {
1275
+ $node->removeChild($child);
1276
+ }
1277
+
1278
+ // No need to re-index node->children because it is about to be removed!
1279
+
1280
+ foreach($node->nodes as $entity) {
1281
+ $enidx = array_search($entity, $node->nodes, true);
1282
+ $edidx = array_search($entity, $node->dom->nodes, true);
1283
+
1284
+ if ($enidx !== false) {
1285
+ unset($node->nodes[$enidx]);
1286
+ }
1287
+
1288
+ if ($edidx !== false) {
1289
+ unset($node->dom->nodes[$edidx]);
1290
+ }
1291
+ }
1292
+
1293
+ // No need to re-index node->nodes because it is about to be removed!
1294
+
1295
+ $nidx = array_search($node, $this->nodes, true);
1296
+ $cidx = array_search($node, $this->children, true);
1297
+ $didx = array_search($node, $this->dom->nodes, true);
1298
+
1299
+ if ($nidx !== false) {
1300
+ unset($this->nodes[$nidx]);
1301
+ }
1302
+
1303
+ $this->nodes = array_values($this->nodes);
1304
+
1305
+ if ($cidx !== false) {
1306
+ unset($this->children[$cidx]);
1307
+ }
1308
+
1309
+ $this->children = array_values($this->children);
1310
+
1311
+ if ($didx !== false) {
1312
+ unset($this->dom->nodes[$didx]);
1313
+ }
1314
+
1315
+ // Do not re-index dom->nodes because nodes point to other nodes in the
1316
+ // array explicitly!
1317
+
1318
+ $node->clear();
1319
+ }
1320
+
1321
+ function getElementById($id)
1322
+ {
1323
+ return $this->find("#$id", 0);
1324
+ }
1325
+
1326
+ function getElementsById($id, $idx = null)
1327
+ {
1328
+ return $this->find("#$id", $idx);
1329
+ }
1330
+
1331
+ function getElementByTagName($name)
1332
+ {
1333
+ return $this->find($name, 0);
1334
+ }
1335
+
1336
+ function getElementsByTagName($name, $idx = null)
1337
+ {
1338
+ return $this->find($name, $idx);
1339
+ }
1340
+
1341
+ function parentNode()
1342
+ {
1343
+ return $this->parent();
1344
+ }
1345
+
1346
+ function childNodes($idx = -1)
1347
+ {
1348
+ if ($idx === -1) {
1349
+ return $this->children;
1350
+ }
1351
+
1352
+ if (isset($this->children[$idx])) {
1353
+ return $this->children[$idx];
1354
+ }
1355
+
1356
+ return null;
1357
+ }
1358
+
1359
+ function firstChild()
1360
+ {
1361
+ if (count($this->children) > 0) {
1362
+ return $this->children[0];
1363
+ }
1364
+ return null;
1365
+ }
1366
+
1367
+ function lastChild()
1368
+ {
1369
+ if (count($this->children) > 0) {
1370
+ return end($this->children);
1371
+ }
1372
+ return null;
1373
+ }
1374
+
1375
+ function nextSibling()
1376
+ {
1377
+ if ($this->parent === null) {
1378
+ return null;
1379
+ }
1380
+
1381
+ $idx = array_search($this, $this->parent->children, true);
1382
+
1383
+ if ($idx !== false && isset($this->parent->children[$idx + 1])) {
1384
+ return $this->parent->children[$idx + 1];
1385
+ }
1386
+
1387
+ return null;
1388
+ }
1389
+
1390
+ function previousSibling()
1391
+ {
1392
+ if ($this->parent === null) {
1393
+ return null;
1394
+ }
1395
+
1396
+ $idx = array_search($this, $this->parent->children, true);
1397
+
1398
+ if ($idx !== false && $idx > 0) {
1399
+ return $this->parent->children[$idx - 1];
1400
+ }
1401
+
1402
+ return null;
1403
+
1404
+ }
1405
+
1406
+ function hasChildNodes()
1407
+ {
1408
+ return !empty($this->children);
1409
+ }
1410
+
1411
+ function nodeName()
1412
+ {
1413
+ return $this->tag;
1414
+ }
1415
+
1416
+ function appendChild($node)
1417
+ {
1418
+ $node->parent = $this;
1419
+ $this->nodes[] = $node;
1420
+ $this->children[] = $node;
1421
+
1422
+ if ($this->dom) { // Attach current node to DOM (recursively)
1423
+ $children = array($node);
1424
+
1425
+ while($children) {
1426
+ $child = array_pop($children);
1427
+ $children = array_merge($children, $child->children);
1428
+
1429
+ $this->dom->nodes[] = $child;
1430
+ $child->dom = $this->dom;
1431
+ $child->_[self::HDOM_INFO_BEGIN] = count($this->dom->nodes) - 1;
1432
+ $child->_[self::HDOM_INFO_END] = $child->_[self::HDOM_INFO_BEGIN];
1433
+ }
1434
+
1435
+ $this->dom->root->_[self::HDOM_INFO_END] = count($this->dom->nodes) - 1;
1436
+ }
1437
+
1438
+ return $this;
1439
+ }
1440
+
1441
+ }
vendor/simplehtmldom/simplehtmldom/HtmlWeb.php CHANGED
@@ -1,134 +1,134 @@
1
- <?php namespace simplehtmldom;
2
-
3
- /**
4
- * Website: http://sourceforge.net/projects/simplehtmldom/
5
- * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
6
- *
7
- * Licensed under The MIT License
8
- * See the LICENSE file in the project root for more information.
9
- *
10
- * Authors:
11
- * S.C. Chen
12
- * John Schlick
13
- * Rus Carroll
14
- * logmanoriginal
15
- *
16
- * Contributors:
17
- * Yousuke Kumakura
18
- * Vadim Voituk
19
- * Antcs
20
- *
21
- * Version $Rev$
22
- */
23
-
24
- include_once 'HtmlDocument.php';
25
-
26
- class HtmlWeb {
27
-
28
- /**
29
- * @return HtmlDocument Returns the DOM for a webpage
30
- * @return null Returns null if the cURL extension is not loaded and allow_url_fopen=Off
31
- * @return null Returns null if the provided URL is invalid (not PHP_URL_SCHEME)
32
- * @return null Returns null if the provided URL does not specify the HTTP or HTTPS protocol
33
- */
34
- function load($url)
35
- {
36
- if(!filter_var($url, FILTER_VALIDATE_URL)) {
37
- return null;
38
- }
39
-
40
- if($scheme = parse_url($url, PHP_URL_SCHEME)) {
41
- switch(strtolower($scheme)) {
42
- case 'http':
43
- case 'https': break;
44
- default: return null;
45
- }
46
-
47
- if(extension_loaded('curl')) {
48
- return $this->load_curl($url);
49
- } elseif(ini_get('allow_url_fopen')) {
50
- return $this->load_fopen($url);
51
- } else {
52
- error_log(__FUNCTION__ . ' requires either the cURL extension or allow_url_fopen=On in php.ini');
53
- }
54
- }
55
-
56
- return null;
57
- }
58
-
59
- /**
60
- * cURL implementation of load
61
- */
62
- private function load_curl($url)
63
- {
64
- $ch = curl_init();
65
-
66
- curl_setopt($ch, CURLOPT_URL, $url);
67
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
68
- curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
69
-
70
- // There is no guarantee this request will be fulfilled
71
- // -- https://www.php.net/manual/en/function.curl-setopt.php
72
- curl_setopt($ch, CURLOPT_BUFFERSIZE, MAX_FILE_SIZE);
73
-
74
- // There is no guarantee this request will be fulfilled
75
- $header = array(
76
- 'Accept: text/html', // Prefer HTML format
77
- 'Accept-Charset: utf-8', // Prefer UTF-8 encoding
78
- );
79
- curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
80
-
81
- $doc = curl_exec($ch);
82
-
83
- if(curl_getinfo($ch, CURLINFO_RESPONSE_CODE) !== 200) {
84
- return null;
85
- }
86
-
87
- curl_close($ch);
88
-
89
- if(strlen($doc) > MAX_FILE_SIZE) {
90
- return null;
91
- }
92
-
93
- return new HtmlDocument($doc);
94
- }
95
-
96
- /**
97
- * fopen implementation of load
98
- */
99
- private function load_fopen($url)
100
- {
101
- // There is no guarantee this request will be fulfilled
102
- $context = stream_context_create(array('http' => array(
103
- 'header' => array(
104
- 'Accept: text/html', // Prefer HTML format
105
- 'Accept-Charset: utf-8', // Prefer UTF-8 encoding
106
- ),
107
- 'ignore_errors' => true // Always fetch content
108
- )));
109
-
110
- $doc = file_get_contents($url, false, $context, 0, MAX_FILE_SIZE + 1);
111
-
112
- if(isset($http_response_header)) {
113
- foreach($http_response_header as $rh) {
114
- // https://stackoverflow.com/a/1442526
115
- $parts = explode(' ', $rh, 3);
116
-
117
- if(preg_match('/HTTP\/\d\.\d/', $parts[0])) {
118
- $code = $parts[1];
119
- }
120
- } // Last code is final status
121
-
122
- if(!isset($code) || $code !== '200') {
123
- return null;
124
- }
125
- }
126
-
127
- if(strlen($doc) > MAX_FILE_SIZE) {
128
- return null;
129
- }
130
-
131
- return new HtmlDocument($doc);
132
- }
133
-
134
- }
1
+ <?php namespace simplehtmldom;
2
+
3
+ /**
4
+ * Website: http://sourceforge.net/projects/simplehtmldom/
5
+ * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
6
+ *
7
+ * Licensed under The MIT License
8
+ * See the LICENSE file in the project root for more information.
9
+ *
10
+ * Authors:
11
+ * S.C. Chen
12
+ * John Schlick
13
+ * Rus Carroll
14
+ * logmanoriginal
15
+ *
16
+ * Contributors:
17
+ * Yousuke Kumakura
18
+ * Vadim Voituk
19
+ * Antcs
20
+ *
21
+ * Version $Rev$
22
+ */
23
+
24
+ include_once 'HtmlDocument.php';
25
+
26
+ class HtmlWeb {
27
+
28
+ /**
29
+ * @return HtmlDocument Returns the DOM for a webpage
30
+ * @return null Returns null if the cURL extension is not loaded and allow_url_fopen=Off
31
+ * @return null Returns null if the provided URL is invalid (not PHP_URL_SCHEME)
32
+ * @return null Returns null if the provided URL does not specify the HTTP or HTTPS protocol
33
+ */
34
+ function load($url)
35
+ {
36
+ if(!filter_var($url, FILTER_VALIDATE_URL)) {
37
+ return null;
38
+ }
39
+
40
+ if($scheme = parse_url($url, PHP_URL_SCHEME)) {
41
+ switch(strtolower($scheme)) {
42
+ case 'http':
43
+ case 'https': break;
44
+ default: return null;
45
+ }
46
+
47
+ if(extension_loaded('curl')) {
48
+ return $this->load_curl($url);
49
+ } elseif(ini_get('allow_url_fopen')) {
50
+ return $this->load_fopen($url);
51
+ } else {
52
+ error_log(__FUNCTION__ . ' requires either the cURL extension or allow_url_fopen=On in php.ini');
53
+ }
54
+ }
55
+
56
+ return null;
57
+ }
58
+
59
+ /**
60
+ * cURL implementation of load
61
+ */
62
+ private function load_curl($url)
63
+ {
64
+ $ch = curl_init();
65
+
66
+ curl_setopt($ch, CURLOPT_URL, $url);
67
+ curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
68
+ curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
69
+
70
+ // There is no guarantee this request will be fulfilled
71
+ // -- https://www.php.net/manual/en/function.curl-setopt.php
72
+ curl_setopt($ch, CURLOPT_BUFFERSIZE, MAX_FILE_SIZE);
73
+
74
+ // There is no guarantee this request will be fulfilled
75
+ $header = array(
76
+ 'Accept: text/html', // Prefer HTML format
77
+ 'Accept-Charset: utf-8', // Prefer UTF-8 encoding
78
+ );
79
+ curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
80
+
81
+ $doc = curl_exec($ch);
82
+
83
+ if(curl_getinfo($ch, CURLINFO_RESPONSE_CODE) !== 200) {
84
+ return null;
85
+ }
86
+
87
+ curl_close($ch);
88
+
89
+ if(strlen($doc) > MAX_FILE_SIZE) {
90
+ return null;
91
+ }
92
+
93
+ return new HtmlDocument($doc);
94
+ }
95
+
96
+ /**
97
+ * fopen implementation of load
98
+ */
99
+ private function load_fopen($url)
100
+ {
101
+ // There is no guarantee this request will be fulfilled
102
+ $context = stream_context_create(array('http' => array(
103
+ 'header' => array(
104
+ 'Accept: text/html', // Prefer HTML format
105
+ 'Accept-Charset: utf-8', // Prefer UTF-8 encoding
106
+ ),
107
+ 'ignore_errors' => true // Always fetch content
108
+ )));
109
+
110
+ $doc = file_get_contents($url, false, $context, 0, MAX_FILE_SIZE + 1);
111
+
112
+ if(isset($http_response_header)) {
113
+ foreach($http_response_header as $rh) {
114
+ // https://stackoverflow.com/a/1442526
115
+ $parts = explode(' ', $rh, 3);
116
+
117
+ if(preg_match('/HTTP\/\d\.\d/', $parts[0])) {
118
+ $code = $parts[1];
119
+ }
120
+ } // Last code is final status
121
+
122
+ if(!isset($code) || $code !== '200') {
123
+ return null;
124
+ }
125
+ }
126
+
127
+ if(strlen($doc) > MAX_FILE_SIZE) {
128
+ return null;
129
+ }
130
+
131
+ return new HtmlDocument($doc);
132
+ }
133
+
134
+ }
vendor/simplehtmldom/simplehtmldom/LICENSE CHANGED
@@ -1,21 +1,21 @@
1
- MIT License
2
-
3
- Copyright (c) 2019 S.C. Chen, John Schlick, logmanoriginal
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
  SOFTWARE.
1
+ MIT License
2
+
3
+ Copyright (c) 2019 S.C. Chen, John Schlick, logmanoriginal
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
  SOFTWARE.
vendor/simplehtmldom/simplehtmldom/README.md DELETED
@@ -1,116 +0,0 @@
1
- # PHP Simple HTML DOM Parser
2
-
3
- [![LICENSE](https://img.shields.io/github/license/simplehtmldom/simplehtmldom?logo=github&style=for-the-badge)](https://github.com/simplehtmldom/simplehtmldom/blob/master/LICENSE)
4
- [![RELEASE](https://img.shields.io/github/v/tag/simplehtmldom/simplehtmldom?label=release&logo=github&style=for-the-badge)](https://sourceforge.com/projects/simplehtmldom/files/simplehtmldom/)
5
- [![BUILD STATUS](https://img.shields.io/travis/com/simplehtmldom/simplehtmldom?logo=travis&style=for-the-badge)](https://travis-ci.com/simplehtmldom/simplehtmldom)
6
- [![PACKAGIST](https://img.shields.io/packagist/v/simplehtmldom/simplehtmldom?logo=composer&style=for-the-badge)](https://packagist.org/packages/simplehtmldom/simplehtmldom)
7
-
8
- simplehtmldom is a fast and reliable HTML DOM parser for PHP.
9
-
10
- ## Key features
11
-
12
- * Purely PHP-based DOM parser (no XML extensions required).
13
- * Works with well-formed and broken HTML documents.
14
- * Loads webpages, local files and document strings.
15
- * Supports CSS selectors.
16
-
17
- ## Requirements
18
-
19
- simplehtmldom requires **PHP 5.6 or higher** with [ext-iconv](https://www.php.net/manual/en/book.iconv.php) enabled. Following extensions enable additional features of the parser:
20
-
21
- * [ext-mbstring](https://secure.php.net/manual/en/book.mbstring.php) (recommended) \
22
- Enables better detection for multi-byte documents.
23
- * [ext-curl](https://secure.php.net/manual/en/book.curl.php) \
24
- Enables cURL support for the class `HtmlWeb`.
25
- * [ext-openssl](https://secure.php.net/manual/en/book.openssl.php) (recommended when using cURL) \
26
- Enables SSL support for cURL.
27
-
28
- ## Installation
29
-
30
- **Manually**:
31
-
32
- Download the latest release from [SourceForge](https://sourceforge.net/projects/simplehtmldom/files/latest) and extract the files in the vendor folder of your project.
33
-
34
- **Composer**:
35
-
36
- ```sh
37
- composer require simplehtmldom/simplehtmldom
38
- ```
39
-
40
- **Git**:
41
-
42
- ```
43
- git clone git://git.code.sf.net/p/simplehtmldom/repository simplehtmldom
44
- ```
45
-
46
- _Note_: The [GitHub repository](https://github.com/simplehtmldom/simplehtmldom) serves as a mirror for the SourceForge project. We currently accept pull requests and issues only via SourceForge.
47
-
48
- ## Usage
49
-
50
- This example illustrates how to return the page title:
51
-
52
- <details><summary>Manually</summary>
53
-
54
- ```
55
- <?php
56
- include_once 'HtmlWeb.php';
57
- use simplehtmldom\HtmlWeb;
58
-
59
- $client = new HtmlWeb();
60
- $html = $client->load('https://www.google.com/search?q=simplehtmldom');
61
-
62
- // Returns the page title
63
- echo $html->find('title', 0)->plaintext . PHP_EOL;
64
- ```
65
-
66
- </details>
67
-
68
- <details><summary>Using composer</summary>
69
-
70
- ```
71
- <?php
72
- include_once 'vendor/autoload.php';
73
- use simplehtmldom\HtmlWeb;
74
-
75
- $client = new HtmlWeb();
76
- $html = $client->load('https://www.google.com/search?q=simplehtmldom');
77
-
78
- // Returns the page title
79
- echo $html->find('title', 0)->plaintext . PHP_EOL;
80
- ```
81
-
82
- </details>
83
-
84
- Find more examples in the installation folder under `examples`.
85
-
86
- ## Documentation
87
-
88
- The documentation for this library is hosted at [https://simplehtmldom.sourceforge.io/docs/](https://simplehtmldom.sourceforge.io/docs/)
89
-
90
- ## Getting involved
91
-
92
- There are various ways for you to get involved with simplehtmldom. Here are a few:
93
-
94
- * Share this project with your friends (Twitter, Facebook, ..._you name it_...).
95
- * Report [bugs](https://sourceforge.net/p/simplehtmldom/bugs/) (SourceForge).
96
- * Request [features](https://sourceforge.net/p/simplehtmldom/feature-requests/) (SourceForge).
97
- * Discuss existing bugs, features and ideas.
98
-
99
- If you want to contribute code to the project, please open a [feature request](https://sourceforge.net/p/simplehtmldom/feature-requests/) and include your patch with the message.
100
-
101
- ## Authors
102
-
103
- * [S.C. Chen](https://sourceforge.net/u/me578022/)
104
- * [John Schlick](https://sourceforge.net/u/john_schlick/)
105
- * [logmanoriginal](https://sourceforge.net/u/logmanoriginal/)
106
- * Rus Carroll
107
- * Yousuke Kumakura
108
- * Vadim Voituk
109
-
110
- ## License
111
-
112
- The source code for simplehtmldom is licensed under the MIT license. For further information read the LICENSE file in the root directory (should be located next to this README file).
113
-
114
- ## Technical notes
115
-
116
- simplehtmldom is a purely PHP-based DOM parser that doesn't rely on external libraries like [libxml](https://www.php.net/manual/en/book.libxml.php), [SimpleXML](https://www.php.net/manual/en/book.simplexml.php) or [PHP DOM](https://www.php.net/manual/en/book.dom.php). Doing so provides better control over the parsing algorithm and a much simpler API that even novice users can learn to use in a short amount of time.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/composer.json CHANGED
@@ -1,58 +1,58 @@
1
- {
2
- "name": "simplehtmldom/simplehtmldom",
3
- "description": "A fast, simple and reliable HTML document parser for PHP.",
4
- "keywords": [
5
- "php",
6
- "html",
7
- "dom",
8
- "parser",
9
- "simple",
10
- "simplehtmldom"
11
- ],
12
- "homepage": "https://simplehtmldom.sourceforge.io/",
13
- "license": "MIT",
14
- "authors": [
15
- {
16
- "name": "S.C. Chen",
17
- "role": "Developer"
18
- },
19
- {
20
- "name": "John Schlick",
21
- "role": "Developer"
22
- },
23
- {
24
- "name": "logmanoriginal",
25
- "role": "Developer"
26
- }
27
- ],
28
- "support": {
29
- "issues": "https://sourceforge.net/p/simplehtmldom/bugs/",
30
- "wiki": "https://simplehtmldom.sourceforge.io/docs/",
31
- "source": "https://sourceforge.net/p/simplehtmldom/repository/",
32
- "rss": "https://sourceforge.net/p/simplehtmldom/news/feed.rss"
33
- },
34
- "autoload": {
35
- "classmap": [
36
- "./"
37
- ],
38
- "exclude-from-classmap": [
39
- "/example/",
40
- "/manual/",
41
- "/testcase/",
42
- "/tests/",
43
- "simple_html_dom.php"
44
- ]
45
- },
46
- "require": {
47
- "php": ">=5.6",
48
- "ext-iconv": "*"
49
- },
50
- "require-dev": {
51
- "phpunit/phpunit": "^6 || ^7"
52
- },
53
- "suggest": {
54
- "ext-mbstring": "Allows better decoding for multi-byte documents",
55
- "ext-curl": "Needed to support cURL downloads in class HtmlWeb",
56
- "ext-openssl": "Allows loading HTTPS pages when using cURL"
57
- }
58
  }
1
+ {
2
+ "name": "simplehtmldom/simplehtmldom",
3
+ "description": "A fast, simple and reliable HTML document parser for PHP.",
4
+ "keywords": [
5
+ "php",
6
+ "html",
7
+ "dom",
8
+ "parser",
9
+ "simple",
10
+ "simplehtmldom"
11
+ ],
12
+ "homepage": "https://simplehtmldom.sourceforge.io/",
13
+ "license": "MIT",
14
+ "authors": [
15
+ {
16
+ "name": "S.C. Chen",
17
+ "role": "Developer"
18
+ },
19
+ {
20
+ "name": "John Schlick",
21
+ "role": "Developer"
22
+ },
23
+ {
24
+ "name": "logmanoriginal",
25
+ "role": "Developer"
26
+ }
27
+ ],
28
+ "support": {
29
+ "issues": "https://sourceforge.net/p/simplehtmldom/bugs/",
30
+ "wiki": "https://simplehtmldom.sourceforge.io/docs/",
31
+ "source": "https://sourceforge.net/p/simplehtmldom/repository/",
32
+ "rss": "https://sourceforge.net/p/simplehtmldom/news/feed.rss"
33
+ },
34
+ "autoload": {
35
+ "classmap": [
36
+ "./"
37
+ ],
38
+ "exclude-from-classmap": [
39
+ "/example/",
40
+ "/manual/",
41
+ "/testcase/",
42
+ "/tests/",
43
+ "simple_html_dom.php"
44
+ ]
45
+ },
46
+ "require": {
47
+ "php": ">=5.6",
48
+ "ext-iconv": "*"
49
+ },
50
+ "require-dev": {
51
+ "phpunit/phpunit": "^6 || ^7"
52
+ },
53
+ "suggest": {
54
+ "ext-mbstring": "Allows better decoding for multi-byte documents",
55
+ "ext-curl": "Needed to support cURL downloads in class HtmlWeb",
56
+ "ext-openssl": "Allows loading HTTPS pages when using cURL"
57
+ }
58
  }
vendor/simplehtmldom/simplehtmldom/constants.php CHANGED
@@ -1,28 +1,28 @@
1
- <?php namespace simplehtmldom;
2
-
3
- /**
4
- * Website: http://sourceforge.net/projects/simplehtmldom/
5
- * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
6
- *
7
- * Licensed under The MIT License
8
- * See the LICENSE file in the project root for more information.
9
- *
10
- * Authors:
11
- * S.C. Chen
12
- * John Schlick
13
- * Rus Carroll
14
- * logmanoriginal
15
- *
16
- * Contributors:
17
- * Yousuke Kumakura
18
- * Vadim Voituk
19
- * Antcs
20
- *
21
- * Version $Rev$
22
- */
23
-
24
- defined(__NAMESPACE__ . '\DEFAULT_TARGET_CHARSET') || define(__NAMESPACE__ . '\DEFAULT_TARGET_CHARSET', 'UTF-8');
25
- defined(__NAMESPACE__ . '\DEFAULT_BR_TEXT') || define(__NAMESPACE__ . '\DEFAULT_BR_TEXT', "\r\n");
26
- defined(__NAMESPACE__ . '\DEFAULT_SPAN_TEXT') || define(__NAMESPACE__ . '\DEFAULT_SPAN_TEXT', ' ');
27
- defined(__NAMESPACE__ . '\MAX_FILE_SIZE') || define(__NAMESPACE__ . '\MAX_FILE_SIZE', 2621440);
28
- define(__NAMESPACE__ . '\HDOM_SMARTY_AS_TEXT', 1);
1
+ <?php namespace simplehtmldom;
2
+
3
+ /**
4
+ * Website: http://sourceforge.net/projects/simplehtmldom/
5
+ * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
6
+ *
7
+ * Licensed under The MIT License
8
+ * See the LICENSE file in the project root for more information.
9
+ *
10
+ * Authors:
11
+ * S.C. Chen
12
+ * John Schlick
13
+ * Rus Carroll
14
+ * logmanoriginal
15
+ *
16
+ * Contributors:
17
+ * Yousuke Kumakura
18
+ * Vadim Voituk
19
+ * Antcs
20
+ *
21
+ * Version $Rev$
22
+ */
23
+
24
+ defined(__NAMESPACE__ . '\DEFAULT_TARGET_CHARSET') || define(__NAMESPACE__ . '\DEFAULT_TARGET_CHARSET', 'UTF-8');
25
+ defined(__NAMESPACE__ . '\DEFAULT_BR_TEXT') || define(__NAMESPACE__ . '\DEFAULT_BR_TEXT', "\r\n");
26
+ defined(__NAMESPACE__ . '\DEFAULT_SPAN_TEXT') || define(__NAMESPACE__ . '\DEFAULT_SPAN_TEXT', ' ');
27
+ defined(__NAMESPACE__ . '\MAX_FILE_SIZE') || define(__NAMESPACE__ . '\MAX_FILE_SIZE', 2621440);
28
+ define(__NAMESPACE__ . '\HDOM_SMARTY_AS_TEXT', 1);
vendor/simplehtmldom/simplehtmldom/example/example_advanced_selector.php DELETED
@@ -1,59 +0,0 @@
1
- <?php
2
- // This example illustrates how to use advanced selector features
3
- include_once '../HtmlDocument.php';
4
- use simplehtmldom\HtmlDocument;
5
-
6
- // -----------------------------------------------------------------------------
7
- echo '<h2>Descendant selectors</h2>' . PHP_EOL;
8
-
9
- $doc = <<<HTML
10
- <div>
11
- <div>
12
- <div class="foo bar">ok</div>
13
- </div>
14
- </div>
15
- HTML;
16
-
17
- echo (new HtmlDocument())->load($doc)->find('div div div', 0)->innertext . PHP_EOL;
18
-
19
- // -----------------------------------------------------------------------------
20
- echo '<h2>Nested selectors</h2>' . PHP_EOL;
21
-
22
- $doc = <<<HTML
23
- <ul id="ul1">
24
- <li>item:<span>1</span></li>
25
- <li>item:<span>2</span></li>
26
- </ul>
27
- <ul id="ul2">
28
- <li>item:<span>3</span></li>
29
- <li>item:<span>4</span></li>
30
- </ul>
31
- HTML;
32
-
33
- $html = (new HtmlDocument())->load($doc);
34
-
35
- foreach($html->find('ul') as $ul) {
36
- foreach($ul->find('li') as $li)
37
- echo $li->innertext . '<br>' . PHP_EOL;
38
- }
39
-
40
- // -----------------------------------------------------------------------------
41
- echo '<h2>Parsing checkboxes</h2>' . PHP_EOL;
42
-
43
- $doc = <<<HTML
44
- <form name="form1" method="post" action="">
45
- <input type="checkbox" name="checkbox1" value="checkbox1" checked>item1<br>
46
- <input type="checkbox" name="checkbox2" value="checkbox2">item2<br>
47
- <input type="checkbox" name="checkbox3" value="checkbox3" checked>item3<br>
48
- </form>
49
- HTML;
50
-
51
- $html = (new HtmlDocument())->load($doc);
52
-
53
- foreach($html->find('input[type=checkbox]') as $checkbox) {
54
- if ($checkbox->checked) {
55
- echo $checkbox->name . ' is checked<br>' . PHP_EOL;
56
- } else {
57
- echo $checkbox->name . ' is not checked<br>' . PHP_EOL;
58
- }
59
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/example/example_basic_selector.php DELETED
@@ -1,35 +0,0 @@
1
- <?php
2
- // This example illustrates how to use basic selectors to retrieve HTML contents
3
- include_once '../HtmlWeb.php';
4
- use simplehtmldom\HtmlWeb;
5
-
6
- // get DOM from URL or file
7
- $doc = new HtmlWeb();
8
- $html = $doc->load('http://www.google.com/');
9
-
10
- // find all links
11
- foreach($html->find('a') as $e)
12
- echo $e->href . '<br>' . PHP_EOL;
13
-
14
- // find all images
15
- foreach($html->find('img') as $e)
16
- echo $e->src . '<br>' . PHP_EOL;
17
-
18
- // find all images with full tag
19
- foreach($html->find('img') as $e)
20
- echo $e->outertext . '<br>' . PHP_EOL;
21
-
22
- // find all div tags with id="gbar"
23
- foreach($html->find('div#gbar') as $e)
24
- echo $e->innertext . '<br>' . PHP_EOL;
25
-
26
- // find all span tags with class="gb1"
27
- foreach($html->find('span.gb1') as $e)
28
- echo $e->outertext . '<br>' . PHP_EOL;
29
-
30
- // find all td tags with attribute align="center"
31
- foreach($html->find('td[align=center]') as $e)
32
- echo $e->innertext . '<br>' . PHP_EOL;
33
-
34
- // extract text from HTML
35
- echo $html->plaintext . PHP_EOL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/example/example_callback.php DELETED
@@ -1,21 +0,0 @@
1
- <?php
2
- // This example illustrates how to utilize the callback feature to manipulate the DOM
3
- include_once '../HtmlWeb.php';
4
- use simplehtmldom\HtmlWeb;
5
-
6
- // Write a callback function with one parameter for the element
7
- function my_callback($element) {
8
- if ($element->tag === 'a') { // Invalidate all anchors
9
- $element->href = '#';
10
- }
11
- }
12
-
13
- // Load the document
14
- $doc = new HtmlWeb();
15
- $html = $doc->load('https://www.google.com/');
16
-
17
- // Register the callback function
18
- $html->set_callback('my_callback');
19
-
20
- // The callback function is invoked automatically when accessing the contents.
21
- echo $html;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/example/example_extract_html.php DELETED
@@ -1,7 +0,0 @@
1
- <?php
2
- // This example illustrates how to extract text content from a webpage
3
- include_once '../HtmlWeb.php';
4
- use simplehtmldom\HtmlWeb;
5
-
6
- $doc = new HtmlWeb();
7
- echo $doc->load('https://www.google.com/')->plaintext;
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/example/example_modify_contents.php DELETED
@@ -1,15 +0,0 @@
1
- <?php
2
- // This example illustrates how to modify HTML contents
3
- include_once '../HtmlWeb.php';
4
- use simplehtmldom\HtmlWeb;
5
-
6
- // Load the document
7
- $doc = new HtmlWeb();
8
- $html = $doc->load('https://www.google.com/');
9
-
10
- // Remove all images and inputs from the DOM
11
- foreach($html->find('img, input') as $element) {
12
- $element->remove();
13
- }
14
-
15
- echo $html;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/example/scraping/example_scraping_digg.php DELETED
@@ -1,55 +0,0 @@
1
- <?php
2
- /**
3
- * This example loads the main page from https://digg.com/, extracts news items
4
- * and returns the details in a custom format.
5
- */
6
- include_once '../../HtmlWeb.php';
7
- use simplehtmldom\HtmlWeb;
8
-
9
- // Download a page
10
- $doc = new HtmlWeb();
11
- $html = $doc->load('https://digg.com/');
12
-
13
- // Loop through all articles in the page
14
- foreach($html->find('article') as $article) {
15
-
16
- // Find the title of the current article
17
- if($title = $article->find('h2', 0)) {
18
- $item['title'] = trim($title->plaintext);
19
- } else {
20
- $item['title'] = 'Unknown title';
21
- }
22
-
23
- // Find the description of the current article
24
- if($details = $article->find('div.description', 0)) {
25
- $item['details'] = trim($details->plaintext);
26
- } else {
27
- $item['details'] = '...';
28
- }
29
-
30
- // Find the tags for the current article
31
- if($diggs = $article->find('a[rel="tag"]', 0)) {
32
- $item['diggs'] = trim($diggs->plaintext);
33
- } else {
34
- $item['diggs'] = '';
35
- }
36
-
37
- $data[] = $item;
38
- }
39
-
40
- // (optional) Release memory
41
- $html->clear();
42
- unset($html);
43
-
44
- // Display your own page to the user
45
- foreach($data as $item) {
46
- echo <<<EOD
47
-
48
- <h2>{$item['title']}</h2>
49
- <ul>
50
- <li>{$item['details']}</li>
51
- <li>{$item['diggs']}</li>
52
- </ul>
53
-
54
- EOD;
55
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/example/scraping/example_scraping_imdb.php DELETED
@@ -1,22 +0,0 @@
1
- <?php
2
- /**
3
- * This example loads a page from IMDb and displays the most important details
4
- * in a custom format.
5
- */
6
- include_once '../../HtmlWeb.php';
7
- use simplehtmldom\HtmlWeb;
8
-
9
- // Load the page into memory
10
- $doc = new HtmlWeb();
11
- $html = $doc->load('https://imdb.com/title/tt0335266/');
12
-
13
- // Extract details
14
- $title = $html->find('title', 0)->plaintext;
15
- $rating = $html->find('div[class="ratingValue"] span', 0)->plaintext;
16
- $storyline = $html->find('#titleStoryLine p', 0)->plaintext;
17
-
18
- // Clean up memory
19
- $html->clear();
20
- unset($html);
21
-
22
- echo '<h1>' . $title . '</h1><p>Rating: ' . $rating . '<br>' . $storyline . '</p>';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/example/scraping/example_scraping_slashdot.php DELETED
@@ -1,33 +0,0 @@
1
- <?php
2
- /**
3
- * This example loads a page from Slashdot and displays articles in a custom
4
- * format.
5
- */
6
- include_once '../../HtmlWeb.php';
7
- use simplehtmldom\HtmlWeb;
8
-
9
- // Load the page into memory
10
- $doc = new HtmlWeb();
11
- $html = $doc->load('https://slashdot.org/');
12
-
13
- // Find and extract all articles
14
- foreach($html->find('#firehoselist > [id^="firehose-"]') as $article) {
15
- $item['title'] = trim($article->find('[id^="title-"]', 0)->plaintext);
16
- $item['body'] = trim($article->find('[id^="text-"]', 0)->plaintext);
17
-
18
- $data[] = $item;
19
- }
20
-
21
- // clean up memory
22
- $html->clear();
23
- unset($html);
24
-
25
- // Return custom page
26
- foreach($data as $item) {
27
- echo <<<EOD
28
-
29
- <h2>{$item['title']}</h2>
30
- <p>{$item['body']}</p>
31
-
32
- EOD;
33
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/README.md DELETED
@@ -1,75 +0,0 @@
1
- This folder contains the source files for http://simplehtmldom.sourceforge.io/,
2
- the project page for PHP Simple HTML DOM Parser.
3
-
4
- Source files are written in Markdown: https://en.wikipedia.org/wiki/Markdown
5
-
6
- Site data is generated by MkDocs, a lightweight static site generator for project
7
- documentation: https://www.mkdocs.org/
8
-
9
- # Folder structure
10
-
11
- `custom_theme` : Contains customizations to the theme provided by MkDocs.
12
- `docs` : Contains the source files for the project page (the actual pages).
13
- `site` : Contains the output files for the project page when build with MkDocs.
14
- `extra.css` : Customizations to the styles provided by MkDocs.
15
- `mkdocs.yml` : The configuration file that is used by MkDocs to generate pages.
16
-
17
- # Adding new pages
18
-
19
- Place new files in `docs`. Use subfolders (as few levels as possible) to
20
- separate categories.
21
-
22
- Files added to the manual will **not** appear on the project page automatically.
23
- All pages need to be specified in the _mkdocs.yml_ file under "nav:". Simply add
24
- the relative path to the new file where appropriate.
25
-
26
- Note: Files are not added automatically because they are sorted by name if not
27
- specified manually. Since readability is key factor for manuals, the files must
28
- be sorted in a way that makes it clear to users.
29
-
30
- # Setting up MkDocs
31
-
32
- The installation instructions for MkDocs are provided on their homepage:
33
- https://www.mkdocs.org/#installation
34
-
35
- MkDocs automatically builds the project based on the _mkdocs.yml_ file. Find the
36
- specification for this file at https://www.mkdocs.org/user-guide/configuration/.
37
-
38
- # Building project pages
39
-
40
- The build process depends on your installation of MkDocs. Typically MkDocs is
41
- made available via the command line.
42
-
43
- ## Step 1 - Check your version of MkDocs
44
-
45
- To check your version of MkDocs run this command:
46
-
47
- `mkdocs --version` or
48
- `python3 -m mkdocs --version`
49
-
50
- Should return `version 1.0.4` or higher. If it doesn't make sure to install the
51
- latest version using `pip install mkdocs` or `python3 -m pip install mkdocs`. If
52
- you don't have pip installed, install it via package manager or follow the
53
- instructions at https://pip.pypa.io/en/stable/installing/
54
-
55
- Make sure to also install the `redirects` plugin to allow for page redirects:
56
- `pip install mkdocs-redirects` or `python3 -m pip install mkdocs-redirects`.
57
-
58
- ## Step 2 - View the project locally
59
-
60
- MkDocs allows you to view the project files in a browser on your local machine:
61
-
62
- `mkdocs serve` or
63
- `python3 -m mkdocs serve`
64
-
65
- If the process is successful you can access the site at http://127.0.0.1:8000.
66
-
67
- ## Step 3 - Build the project
68
-
69
- If you are satisfied with the results of the project, build the final project
70
- with this command:
71
-
72
- `mkdocs build` or
73
- `python3 -m mkdocs build`
74
-
75
- Find the output files in the `site` folder.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/custom_theme/main.html DELETED
@@ -1,7 +0,0 @@
1
- {% extends "base.html" %}
2
-
3
- {% block footer %}
4
- {% include "footer.html" %}
5
- <hr>
6
- <a class="logo" href="https://sourceforge.net/p/simplehtmldom/"><img alt="Download PHP Simple HTML DOM Parser" src="https://sourceforge.net/sflogo.php?type=16&group_id=218559" ></a>
7
- {% endblock %}
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/disable.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: disable()
3
- ---
4
-
5
- ```php
6
- Debug::disable ()
7
- ```
8
-
9
- Globally disables debug messages.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/enable.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: enable()
3
- ---
4
-
5
- ```php
6
- Debug::enable ()
7
- ```
8
-
9
- Globally enables debug messages.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/index.md DELETED
@@ -1,31 +0,0 @@
1
- ---
2
- title: Debug
3
- ---
4
-
5
- Provides the ability to see debug messages for deprecated functions, malformed documents and parsing issues. Debug messages can be send to a custom message handler or written to error_log (default).
6
-
7
- ## Example
8
-
9
- ```php
10
- <?php
11
- include_once 'HtmlDocument.php';
12
- include_once 'Debug.php';
13
- use simplehtmldom\HtmlDocument;
14
- use simplehtmldom\Debug;
15
-
16
- Debug::enable();
17
-
18
- $dom = new HtmlDocument();
19
- $dom->load('<html></html>');
20
- $dom->root->children(); // This causes a deprecation warning
21
-
22
- Debug::disable();
23
- ```
24
-
25
- **Output**
26
-
27
- ```
28
- [DEBUG] [/var/www/html/simplehtmldom/Debug.php:30] [/var/www/html/simplehtmldom/test.php:7] "Debug mode has been enabled"
29
- [DEBUG] [/var/www/html/simplehtmldom/HtmlNode.php:83] [/var/www/html/simplehtmldom/test.php:11] "simplehtmldom\HtmlNode->children() has been deprecated and will be removed in the next major version of simplehtmldom. Use simplehtmldom\HtmlNode->childNodes() instead."
30
- [DEBUG] [/var/www/html/simplehtmldom/Debug.php:38] [/var/www/html/simplehtmldom/test.php:13] "Debug mode has been disabled"
31
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/log.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: log()
3
- ---
4
-
5
- ```php
6
- Debug::log (string $message)
7
- ```
8
-
9
- Logs a debug message if the debugger is enabled. Does nothing if the debugger is disabled.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/log_once.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: log_once()
3
- ---
4
-
5
- ```php
6
- Debug::log_once (string $message)
7
- ```
8
-
9
- Logs a debug message if the debugger is enabled. Does nothing if the debugger is disabled. Each message is logged only once (based on file and line number).
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/setDebugHandler.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: setDebugHandler()
3
- ---
4
-
5
- ```php
6
- Debug::setDebugHandler ( [$function = null] )
7
- ```
8
-
9
- Sets the debug handler for debug messages. Uses `error_log` if `$function = null` (default).
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__call.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: __call
3
- ---
4
-
5
- ```php
6
- __call ($function, $arguments)
7
- ```
8
-
9
- Serves as a wrapper for deprecated methods. See [magic methods](https://www.php.net/manual/en/language.oop5.overloading.php#object.call) for more information.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__construct.md DELETED
@@ -1,22 +0,0 @@
1
- ---
2
- title: __construct
3
- ---
4
-
5
- ```php
6
- __construct ( [ string $str = null [, bool $lowercase = true [, bool $forceTagsClosed = true [, string $target_charset = DEFAULT_TARGET_CHARSET [, bool $stripRN = true [, string $defaultBRText = DEFAULT_BR_TEXT [, string $defaultSpanText = DEFAULT_SPAN_TEXT [, int $options = 0 ]]]]]]]]) : object
7
- ```
8
-
9
- Creates a new `HtmlDocument` object.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `str` | The HTML document string.
14
- | `lowercase` | Tag names are parsed in lowercase letters if enabled.
15
- | `forceTagsClosed` | Tags inside block tags are forcefully closed if the closing tag was omitted.
16
- | `target_charset` | Defines the target charset for text returned by the parser.
17
- | `stripRN` | Newline characters are replaced by whitespace if enabled.
18
- | `defaultBRText` | Defines the default text to return for `<br>` elements.
19
- | `defaultSpanText` | Defines the default text to return for `<span>` elements.
20
- | `options` | Additional options for the parser. Currently supports `'HDOM_SMARTY_AS_TEXT'` to remove [Smarty](https://www.smarty.net/) scripts.
21
-
22
- Returns the object.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__debugInfo.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: __debugInfo
3
- ---
4
-
5
- ```php
6
- __debugInfo ( )
7
- ```
8
-
9
- Returns debugging information about the current object.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__destruct.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: __destruct
3
- ---
4
-
5
- ```php
6
- __destruct ()
7
- ```
8
-
9
- Destroys the current object and clears memory.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__get.md DELETED
@@ -1,19 +0,0 @@
1
- ---
2
- title: __get
3
- ---
4
-
5
- ```php
6
- __get ( string $name ) : mixed
7
- ```
8
-
9
- See [magic methods](http://php.net/manual/en/language.oop5.overloading.php#object.get)
10
-
11
- Supports following names:
12
-
13
- | Name | Description
14
- | ---- | -----------
15
- | `outertext` | Returns the outer text of the root element.
16
- | `innertext` | Returns the inner text of the root element.
17
- | `plaintext` | Returns the plain text of the root element.
18
- | `charset` | Returns the charset for the document.
19
- | `target_charset` | Returns the target charset for the document.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__toString.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: __toString
3
- ---
4
-
5
- ```php
6
- __toString () : string
7
- ```
8
-
9
- Returns the inner text of the root element of the DOM.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/as_text_node.md DELETED
@@ -1,15 +0,0 @@
1
- ---
2
- title: as_text_node (protected)
3
- ---
4
-
5
- ```php
6
- as_text_node ( string $tag ) : bool
7
- ```
8
-
9
- Adds a tag as text node.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `tag` | The element's tag name.
14
-
15
- Returns true on success.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/childNodes.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: childNodes
3
- ---
4
-
5
- ```php
6
- childNodes ( [ int $idx = -1 ] ) : mixed
7
- ```
8
-
9
- Returns children of the root element.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `idx` | Index of the child element to return.
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/copy_skip.md DELETED
@@ -1,15 +0,0 @@
1
- ---
2
- title: copy_skip (protected)
3
- ---
4
-
5
- ```php
6
- copy_skip ( string $chars ) : string
7
- ```
8
-
9
- Skips characters starting at the current parsing position in the document. Sets the parsing position to the first character not in the provided list of characters.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `chars` | A list of characters to skip.
14
-
15
- Returns the skipped characters.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/copy_until.md DELETED
@@ -1,15 +0,0 @@
1
- ---
2
- title: copy_until (protected)
3
- ---
4
-
5
- ```php
6
- copy_until ( string $chars ) : string
7
- ```
8
-
9
- Copies all characters starting at the current parsing position in the document. Sets the parsing position to the first character that matches any of the characters in the provided list of characters.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `chars` | A list of characters to stop copying at.
14
-
15
- Returns the copied characters.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/copy_until_char.md DELETED
@@ -1,15 +0,0 @@
1
- ---
2
- title: copy_until_char (protected)
3
- ---
4
-
5
- ```php
6
- copy_until ( string $char ) : string
7
- ```
8
-
9
- Copies all characters starting at the current parsing position in the document. Sets the parsing position to the first character that matches the provided character.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `char` | A character to stop copying at.
14
-
15
- Returns the copied characters.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/createElement.md DELETED
@@ -1,16 +0,0 @@
1
- ---
2
- title: createElement
3
- ---
4
-
5
- ```php
6
- createElement ( string $name [, string $value = null ] ) : object
7
- ```
8
-
9
- Creates a new element.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `name` | Name of the element
14
- | `value` | Value of the element
15
-
16
- Returns the element.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/createTextNode.md DELETED
@@ -1,11 +0,0 @@
1
- ---
2
- title: createTextNode
3
- ---
4
-
5
- ```php
6
- createTextNode ( string $value ) : object
7
- ```
8
-
9
- Creates a new text element.
10
-
11
- Returns the element.
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/decode.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: decode (protected)
3
- ---
4
-
5
- ```php
6
- decode ()
7
- ```
8
-
9
- Decodes HTML entities in the DOM recursively.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/dump.md DELETED
@@ -1,15 +0,0 @@
1
- ---
2
- title: dump
3
- ---
4
-
5
- ```php
6
- dump ( [ bool show_attr = true ] ) : string
7
- ```
8
-
9
- Dumps the entire DOM into a string. Useful for debugging purposes.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `show_attr` | Attributes are included in the dump when enabled.
14
-
15
- Returns the DOM tree as string.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/expect.md DELETED
@@ -1,15 +0,0 @@
1
- ---
2
- title: expect
3
- ---
4
-
5
- ```php
6
- expect (
7
- string $selector
8
- [, int $idx = null ]
9
- [, bool $lowercase = false ]
10
- ) : mixed
11
- ```
12
-
13
- Finds elements in the DOM.
14
- Returns null if no match was found.
15
- See [`find`](./find/) for a description of parameters and selectors.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/find.md DELETED
@@ -1,17 +0,0 @@
1
- ---
2
- title: find
3
- ---
4
-
5
- ```php
6
- find ( string $selector [, int $idx = null [, bool $lowercase = false ]] ) : mixed
7
- ```
8
-
9
- Finds elements in the DOM.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `selector` | A [CSS style selector](/HtmlNode/find).
14
- | `idx` | Index of the element to return.
15
- | `lowercase` | Matches tag names case insensitive when enabled.
16
-
17
- Returns an array of matches or a single element if `idx` is defined.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/firstChild.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: firstChild
3
- ---
4
-
5
- ```php
6
- firstChild () : object
7
- ```
8
-
9
- Returns the first child of the root element.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/getElementById.md DELETED
@@ -1,15 +0,0 @@
1
- ---
2
- title: getElementById
3
- ---
4
-
5
- ```php
6
- getElementById ( string $id ) : object
7
- ```
8
-
9
- Searches an element by id.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `id` | ID of the element to find.
14
-
15
- Returns the element or null if no match was found.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/getElementByTagName.md DELETED
@@ -1,15 +0,0 @@
1
- ---
2
- title: getElementByTagName
3
- ---
4
-
5
- ```php
6
- getElementByTagName ( string $name ) : object
7
- ```
8
-
9
- Searches an element by tag name.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `name` | Tag name of the element to find.
14
-
15
- Returns the element or null if no match was found.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/getElementsById.md DELETED
@@ -1,16 +0,0 @@
1
- ---
2
- title: getElementsById
3
- ---
4
-
5
- ```php
6
- getElementsById ( string $id [, int $idx = null ] ) : object
7
- ```
8
-
9
- Searches elements by id.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `id` | ID of the element to find.
14
- | `idx` | Returns the element at the specified index if defined.
15
-
16
- Returns the element(s) or null if no match was found.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/getElementsByTagName.md DELETED
@@ -1,16 +0,0 @@
1
- ---
2
- title: getElementsByTagName
3
- ---
4
-
5
- ```php
6
- getElementsByTagName ( string $name [, int $idx = -1 ] ) : object
7
- ```
8
-
9
- Searches elements by tag name.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `name` | Tag name of the element to find.
14
- | `idx` | Returns the element at the specified index.
15
-
16
- Returns the element(s) or null if no match was found.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/index.md DELETED
@@ -1,38 +0,0 @@
1
- ---
2
- title: HtmlDocument
3
- ---
4
-
5
- Represents the [DOM](https://en.wikipedia.org/wiki/Document_Object_Model) in memory. Provides functions to parse documents and access individual elements (see [`HtmlNode`](../HtmlNode/)).
6
-
7
- ## Public Properties
8
-
9
- | Property | Description
10
- | -------- | -----------
11
- | `root` | Root node of the document.
12
- | `nodes` | List of top-level nodes in the document.
13
- | `callback` | Callback function that is called for each element in the DOM when generating outertext.
14
- | `lowercase` | If enabled, all tag names are converted to lowercase when parsing documents.
15
- | `original_size` | Original document size in bytes.
16
- | `size` | Current document size in bytes.
17
- | `_charset` | Charset of the original document.
18
- | `_target_charset` | Target charset for the current document.
19
- | `default_span_text` | Text to return for `<span>` elements.
20
-
21
- ## Protected Properties
22
-
23
- | Property | Description
24
- | -------- | -----------
25
- | `pos` | Current parsing position within `doc`.
26
- | `doc` | The original document.
27
- | `char` | Character at position `pos` in `doc`.
28
- | `cursor` | Current element cursor in the document.
29
- | `parent` | Parent element node.
30
- | `noise` | Noise from the original document (i.e. scripts, comments, etc...).
31
- | `token_blank` | Tokens that are considered whitespace in HTML.
32
- | `token_equal` | Tokens to identify the equal sign for attributes, stopping either at the closing tag ("/" i.e. `<html />`) or the end of an opening tag (">" i.e. `<html>`).
33
- | `token_slash` | Tokens to identify the end of a tag name. A tag name either ends on the ending slash ("/" i.e. `<html/>`) or whitespace (`"\s\r\n\t"`).
34
- | `token_attr` | Tokens to identify the end of an attribute.
35
- | `default_br_text` | Text to return for `<br>` elements.
36
- | `self_closing_tags` | A list of tag names where the closing tag is omitted.
37
- | `block_tags` | A list of tag names where remaining unclosed tags are forcibly closed.
38
- | `optional_closing_tags` | A list of tag names where the closing tag can be omitted.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/lastChild.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: lastChild
3
- ---
4
-
5
- ```php
6
- lastChild () : object
7
- ```
8
-
9
- Returns the last child of the root element.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/link_nodes.md DELETED
@@ -1,14 +0,0 @@
1
- ---
2
- title: link_nodes (protected)
3
- ---
4
-
5
- ```php
6
- link_nodes ( object &$node, bool $is_child )
7
- ```
8
-
9
- Links the provided node to the DOM tree.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `node` | The node to link to the DOM tree.
14
- | `is_child` | If active, makes the node a sibling of the current node (child of parent).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/load.md DELETED
@@ -1,20 +0,0 @@
1
- ---
2
- title: load
3
- ---
4
-
5
- ```php
6
- load ( string $str [, bool $lowercase = true [, bool $stripRN = true [, string $defaultBRText = DEFAULT_BR_TEXT [, string $defaultSpanText = DEFAULT_SPAN_TEXT [, int $options = 0 ]]]]]) : object
7
- ```
8
-
9
- Loads the provided HTML document string.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `str` | The HTML document string.
14
- | `lowercase` | Tag names are parsed in lowercase letters if enabled.
15
- | `stripRN` | Newline characters are replaced by whitespace if enabled.
16
- | `defaultBRText` | Defines the default text to return for `<br>` elements.
17
- | `defaultSpanText` | Defines the default text to return for `<span>` elements.
18
- | `options` | Additional options for the parser. Currently supports `'HDOM_SMARTY_AS_TEXT'` to remove [Smarty](https://www.smarty.net/) scripts.
19
-
20
- Returns the object.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/loadFile.md DELETED
@@ -1,11 +0,0 @@
1
- ---
2
- title: loadFile
3
- ---
4
-
5
- ```php
6
- loadFile (...)
7
- ```
8
-
9
- Loads a HTML document from file. Supports arguments of [`file_get_contents`](http://php.net/manual/en/function.file-get-contents.php).
10
-
11
- Returns the object.
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/parse.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: parse (protected)
3
- ---
4
-
5
- ```php
6
- parse ()
7
- ```
8
-
9
- Parses the document. This function is called after the document was loaded into `$this->doc`.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/parse_attr.md DELETED
@@ -1,15 +0,0 @@
1
- ---
2
- title: parse_attr (protected)
3
- ---
4
-
5
- ```php
6
- parse_attr ( object $node, string $name, array &$space )
7
- ```
8
-
9
- Parses a single attribute starting at the current parsing position in the document.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `node` | The current element (node).
14
- | `name` | The attribute name.
15
- | `space` | An array of whitespace sorounding the current attribute (see [Attribute Whitespace](../definitions/#attribute-whitespace)).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/parse_charset.md DELETED
@@ -1,17 +0,0 @@
1
- ---
2
- title: parse_charset (protected)
3
- ---
4
-
5
- ```php
6
- parse_charset ()
7
- ```
8
-
9
- Parses the charset.
10
-
11
- If the callback function `get_last_retrieve_url_contents_content_type` exists, it is assumed to return the content type header for the current document as string.
12
-
13
- Uses the charset from the metadata of the page if defined.
14
-
15
- If none of the previous conditions are met, the charset is determined by `mb_detect_encoding` if multi-byte support is active.
16
-
17
- If multi-byte support is not active the charset is assumed to be `'UTF-8'`.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/prepare.md DELETED
@@ -1,16 +0,0 @@
1
- ---
2
- title: prepare (protected)
3
- ---
4
-
5
- ```php
6
- prepare ( string $str [, bool $lowercase = true [, string $defaultBRText = DEFAULT_BR_TEXT [, string $defaultSpanText = DEFAULT_SPAN_TEXT ]]] )
7
- ```
8
-
9
- Initializes the DOM object.
10
-
11
- | Parameters | Description
12
- | ---------- | -----------
13
- | `str` | The HTML document string.
14
- | `lowercase` | Tag names are parsed in lowercase letters if enabled.
15
- | `defaultBRText` | Defines the default text to return for `<br>` elements.
16
- | `defaultSpanText` | Defines the default text to return for `<span>` elements.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/read_tag.md DELETED
@@ -1,11 +0,0 @@
1
- ---
2
- title: read_tag (protected)
3
- ---
4
-
5
- ```php
6
- read_tag () : bool
7
- ```
8
-
9
- Reads a single tag starting at the current parsing position in the document. The tag is automatically added to the DOM.
10
-
11
- Returns true if a tag was found.
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/remove_callback.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: remove_callback
3
- ---
4
-
5
- ```php
6
- remove_callback ()
7
- ```
8
-
9
- Removes the callback set by [`set_callback`](#set_callback).
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/remove_noise.md DELETED
@@ -1,16 +0,0 @@
1
- ---
2
- title: remove_noise (protected)
3
- ---
4
-
5
- ```php
6
- remove_noise ( string $pattern [, bool $remove_tag = false] )
7
- ```
8
-
9
- Replaces noise in the document (i.e. scripts) by placeholders and adds the removed contents to `$this->noise`.
10
-
11
- _Note_: Noise is replaced by placeholders in order to allow restoring the original contents. Placeholders take the form of `'___noise___1000'` where the number is increased by one for each removed noise.
12
-
13
- | Parameter | Description
14
- | --------- | -----------
15
- | `pattern` | A regular expression that matches the noise to remove.
16
- | `remove_tag` | Removes the entire match when enabled or submatches when disabled.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/restore_noise.md DELETED
@@ -1,15 +0,0 @@
1
- ---
2
- title: restore_noise (protected)
3
- ---
4
-
5
- ```php
6
- restore_noise ( string $text ) : string
7
- ```
8
-
9
- Restores noise in the provided string by replacing noise placeholders by their original contents.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `text` | A string (potentially) containing noise placeholders.
14
-
15
- Returns the string with original contents restored or the original string if it doesn't contain noise placeholders.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/save.md DELETED
@@ -1,15 +0,0 @@
1
- ---
2
- title: save
3
- ---
4
-
5
- ```php
6
- save ( [ string $filepath = '' ] ) : string
7
- ```
8
-
9
- Writes the current DOM to file.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `filepath` | Writes to file if the provided file path is not empty.
14
-
15
- Returns the document string.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/search_noise.md DELETED
@@ -1,15 +0,0 @@
1
- ---
2
- title: search_noise (protected)
3
- ---
4
-
5
- ```php
6
- search_noise ( string $text ) : string
7
- ```
8
-
9
- Find a single noise element by providing the noise placeholder text.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `text` | The noise placeholder to find.
14
-
15
- Returns the original contents for the placeholder.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/set_callback.md DELETED
@@ -1,14 +0,0 @@
1
- ---
2
- title: set_callback
3
- ---
4
-
5
- ```php
6
- set_callback ( string $function_name )
7
- ```
8
-
9
- Sets the callback function which is called on each element of the DOM when building outertext.
10
- The function must accept a single parameter of type `HtmlNode`.
11
-
12
- | Parameter | Description
13
- | --------- | -----------
14
- | `function_name` | Name of the function.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/skip.md DELETED
@@ -1,14 +0,0 @@
1
-
2
- ---
3
- title: skip (protected)
4
- ---
5
-
6
- ```php
7
- skip ( string $chars )
8
- ```
9
-
10
- Skips characters starting at the current parsing position in the document. Sets the parsing position to the first character not in the provided list of characters.
11
-
12
- | Parameter | Description
13
- | --------- | -----------
14
- | `chars` | A list of characters to skip.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__call.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: __call
3
- ---
4
-
5
- ```php
6
- __call ($function, $arguments)
7
- ```
8
-
9
- Serves as a wrapper for deprecated methods. See [magic methods](https://www.php.net/manual/en/language.oop5.overloading.php#object.call) for more information.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__construct.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: __construct
3
- ---
4
-
5
- ```php
6
- __construct ( [ object $dom ] ) : object
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `dom` | An object of type [`HtmlDocument`](api/HtmlDocument/).
12
-
13
- Constructs a new object of type `HtmlNode`, assignes `$dom` as DOM object and adds itself to the list of nodes in `$dom`.
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__debugInfo.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: __debugInfo
3
- ---
4
-
5
- ```php
6
- __debugInfo ( )
7
- ```
8
-
9
- Returns debugging information about the current object.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__destruct.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: __destruct
3
- ---
4
-
5
- ```php
6
- __destruct ( )
7
- ```
8
-
9
- Destructs the current object and frees memory.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__get.md DELETED
@@ -1,24 +0,0 @@
1
- ---
2
- title: __get
3
- ---
4
-
5
- ```php
6
- __get ( string $name ) : mixed
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `name` | `outertext`, `innertext`, `plaintext`, `xmltext` or attribute name.
12
-
13
- See [magic methods](http://php.net/manual/en/language.oop5.overloading.php#object.get)
14
-
15
- If the provided name is a valid attribute name, returns the attribute value. Otherwise a value according to the table below.
16
-
17
- | Name | Description
18
- | ---- | -----------
19
- | `outertext` | Returns the outer text of the current node.
20
- | `innertext` | Returns the inner text of the current node.
21
- | `plaintext` | Returns the plain text of the current node.
22
- | `xmltext` | Returns the xml representation for the inner text of the current node as a CDATA section.
23
-
24
- Returns nothing if the provided name is neither a valid attribute name, nor a valid parameter name.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__isset.md DELETED
@@ -1,21 +0,0 @@
1
- ---
2
- title: __isset
3
- ---
4
-
5
- ```php
6
- __isset ( string $name ) : bool
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `name` | `outertext`, `innertext`, `plaintext` or attribute name.
12
-
13
- See [magic methods](http://php.net/manual/en/language.oop5.overloading.php#object.get)
14
-
15
- Returns true if the provided name is a valid attribute name or any of the values in the table below. False otherwise.
16
-
17
- | Name | Description
18
- | ---- | -----------
19
- | `outertext` | Returns the outer text of the current node.
20
- | `innertext` | Returns the inner text of the current node.
21
- | `plaintext` | Returns the plain text of the current node.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__set.md DELETED
@@ -1,20 +0,0 @@
1
- ---
2
- title: __set
3
- ---
4
-
5
- ```php
6
- __set ( string $name, mixed $value )
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `name` | `outertext`, `innertext` or attribute name.
12
- | `value` | Value to set.
13
-
14
- See [magic methods](http://php.net/manual/en/language.oop5.overloading.php#object.get)
15
-
16
- Sets the outer text of the current node to `$value` if `$name` is `outertext`.
17
-
18
- Sets the inner text of the current node to `$value` if `$name` is `innertext`.
19
-
20
- Otherwise, adds or updates an attribute with name `$name` and value `$value` to the current node.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__toString.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: __toString
3
- ---
4
-
5
- ```php
6
- __toString ( ) : string
7
- ```
8
-
9
- Returns the outer text of the current node.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__unset.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: __unset
3
- ---
4
-
5
- ```php
6
- __unset ( string $name )
7
- ```
8
-
9
- Removes the attribute with name `$name` from the current node if it exists.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/addClass.md DELETED
@@ -1,25 +0,0 @@
1
- ---
2
- title: addClass
3
- ---
4
-
5
- ```php
6
- addClass ( mixed $class )
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `class` | Specifies one or more class names to be added.
12
-
13
- Adds one or more class names to the current node.
14
-
15
- **Remarks**
16
-
17
- * To add more than one class, separate the class names with space or provide them as an array.
18
-
19
- **Examples**
20
-
21
- ```php
22
- $node->addClass('hidden');
23
- $node->addClass('article important');
24
- $node->addClass(array('article', 'new'));
25
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/appendChild.md DELETED
@@ -1,15 +0,0 @@
1
- ---
2
- title: appendChild
3
- ---
4
-
5
- ```php
6
- appendChild ( object $node ) : object
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `node` | An object of type [`HtmlNode`](../HtmlNode/)
12
-
13
- Makes the current node parent of the node provided to this function.
14
-
15
- Returns the provided node.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/childNodes.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: childNodes
3
- ---
4
-
5
- ```php
6
- childNodes ( [ int $idx = -1 ] ) : mixed
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `idx` | Index of the node to return or `-1` to return all nodes.
12
-
13
- Returns all or one specific child node from the current node.
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/clear.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: clear
3
- ---
4
-
5
- ```php
6
- clear ( )
7
- ```
8
-
9
- Sets all properties in the current node, which contain objects, to null.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/convert_text.md DELETED
@@ -1,15 +0,0 @@
1
- ---
2
- title: convert_text
3
- ---
4
-
5
- ```php
6
- convert_text ( string $text ) : string
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `text` | Text to convert.
12
-
13
- Assumes that the provided text is in the form of the configured source character set (see [`sourceCharset`](../HtmlNode/) and converts it to the specified target character set (see [`targetCharset`](../HtmlNode/)).
14
-
15
- Returns the converted text.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/definitions.md DELETED
@@ -1,96 +0,0 @@
1
- ---
2
- title: Definitions
3
- ---
4
-
5
- The definitions below are an essential part of the parser.
6
-
7
- ## Node Types
8
-
9
- The type of a node is determined during parsing and represented by one of the elements in the list below.
10
-
11
- | Type | Description
12
- | ---- | -----------
13
- | `HDOM_TYPE_ELEMENT` | Start tag (i.e. `<html>`)
14
- | `HDOM_TYPE_COMMENT` | HTML comment (i.e. `<!-- Hello, World! -->`)
15
- | `HDOM_TYPE_TEXT` | Plain text (i.e. `Hello, World!`)
16
- | `HDOM_TYPE_ROOT` | Root element. There can always only be one root element in the DOM.
17
- | `HDOM_TYPE_UNKNOWN` | Unknown type (i.e. CDATA, DOCTYPE, etc...)
18
-
19
- ### Example
20
-
21
- ```html
22
- <!DOCTYPE html><html><!-- Hello, World! --></html>Hello, World!
23
- ```
24
-
25
- _Note_: `HDOM_TYPE_ROOT` always exists regardless of the actual document structure.
26
-
27
- | HTML | Node Type
28
- | ---- | ---------
29
- | | `HDOM_TYPE_ROOT`
30
- | `<!DOCTYPE html>` | `HDOM_TYPE_UNKNOWN`
31
- | `<html>` | `HDOM_TYPE_ELEMENT`
32
- | `<!-- Hello, World! -->` | `HDOM_TYPE_COMMENT`
33
- | `Hello, World!` | `HDOM_TYPE_TEXT`
34
-
35
- ## Quote Types
36
-
37
- Identifies the quoting type on attribute values.
38
-
39
- | Type | Description
40
- | ---- | -----------
41
- | `HDOM_QUOTE_DOUBLE` | Double quotes (`""`)
42
- | `HDOM_QUOTE_SINGLE` | Single quotes (`''`)
43
- | `HDOM_QUOTE_NO` | Not quoted (flag)
44
-
45
- _Note_: Attributes with no values (flags) are stored as `HDOM_QUOTE_NO`.
46
-
47
- ### Example
48
-
49
- ```html
50
- <p class="paragraph" id='info1' hidden>Hello, World!</p>
51
- ```
52
-
53
- | Attribute | Description
54
- | --------- | -----------
55
- | `class="paragraph"` | `HDOM_QUOTE_DOUBLE`
56
- | `id='info1'` | `HDOM_QUOTE_SINGLE`
57
- | `hidden` | `HDOM_QUOTE_NO`
58
-
59
- ## Node Info Types
60
-
61
- Each node stores additional information (metadata) that is identified by the elements below.
62
-
63
- | Type | Description
64
- | ---- | -----------
65
- | `HDOM_INFO_BEGIN` | Cursor position for the start tag of a node.
66
- | `HDOM_INFO_END` | Cursor position for the end tag of a node. A value of zero indicates a node with no end tag (missing closing tag).
67
- | `HDOM_INFO_QUOTE` | Quote type for attribute values. The value must be an element of [Quote Type](#quote-types).
68
- | `HDOM_INFO_SPACE` | Array of whitespace around attributes (see [Attribute Whitespace](#attribute-whitespace)).
69
- | `HDOM_INFO_TEXT` | Non-HTML text in tags (i.e. comments, doctype, etc...).
70
- | `HDOM_INFO_INNER` | Inner text of a node.
71
- | `HDOM_INFO_OUTER` | Outer text of a node.
72
- | `HDOM_INFO_ENDSPACE` | Whitespace at the end of a tag before the closing bracket.
73
-
74
- ## Attribute Whitespace
75
-
76
- Whitespace around attributes is stored in the form of an array with three elements:
77
-
78
- | Element | Description
79
- | ------- | -----------
80
- | `0` | Whitespace before the attribute name.
81
- | `1` | Whitespace between attribute name and the equal sign.
82
- | `2` | Whitespace between the equal sign and the attribute value
83
-
84
- ### Example
85
-
86
- ```html
87
- <p class="paragraph" id = 'info1'hidden>Hello, World!</p>
88
- ```
89
-
90
- _Note_: Whitespace before attribute names is not displayed in the browser. It is, however, part of the attributes.
91
-
92
- | Attribute | Description
93
- | --------- | -----------
94
- | ` class="paragraph"` | `[0] => ' ', [1] => '', [2] => ''`
95
- | ` id = 'info1'` | `[0] => ' ', [1] => ' ', [2] => ' '`
96
- | `hidden` | `[0] => '', [1] => '', [2] => ''`
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/dump.md DELETED
@@ -1,14 +0,0 @@
1
- ---
2
- title: dump
3
- ---
4
-
5
- ```php
6
- dump ( [ bool $show_attr = false [, int $depth = 0 ]] )
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `show_attr` | Attribute names are included in the output if enabled.
12
- | `depth` | Depth of the current element
13
-
14
- Dumps information about the current node and all child nodes recursively.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/dump_node.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: dump_node
3
- ---
4
-
5
- ```php
6
- dump_node ( [ bool $echo = true ] ) : mixed
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `echo` | Echoes the dump details directly if enabled.
12
-
13
- Dumps information about the current document node. Returns a string if `$echo` is set to false, null otherwise.
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/expect.md DELETED
@@ -1,15 +0,0 @@
1
- ---
2
- title: expect
3
- ---
4
-
5
- ```php
6
- expect (
7
- string $selector
8
- [, int $idx = null ]
9
- [, bool $lowercase = false ]
10
- ) : mixed
11
- ```
12
-
13
- Finds elements in the DOM.
14
- Returns null if no match was found.
15
- See [`find`](./find/) for a description of parameters and selectors.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/find.md DELETED
@@ -1,47 +0,0 @@
1
- ---
2
- title: find
3
- ---
4
-
5
- ```php
6
- find (
7
- string $selector
8
- [, int $idx = null ]
9
- [, bool $lowercase = false ]
10
- ) : mixed
11
- ```
12
-
13
- | Parameter | Description
14
- | --------- | -----------
15
- | `selector` | [CSS](https://www.w3.org/TR/selectors/) selector.
16
- | `idx` | Index of element to return.
17
- | `lowercase` | Matches tag names case insensitive (lowercase) if enabled.
18
-
19
- Finds one or more nodes in the current document, using CSS selectors.
20
-
21
- * Returns null if no match was found.
22
- * Returns an array of [`HtmlNode`](../HtmlNode/) if `$idx` is null.
23
- * Returns an object of type [`HtmlNode`](../HtmlNode/) if `$idx` is anything __but__ null.
24
-
25
- ## Supported Selectors
26
-
27
- | Selector | Description
28
- | --------- | -----------
29
- | `*` | [Universal selector](https://www.w3.org/TR/selectors/#the-universal-selector)
30
- | `E` | [Type (tag name) selector](https://www.w3.org/TR/selectors/#type-selectors)
31
- | `E#id` | [ID selector](https://www.w3.org/TR/selectors/#id-selectors)
32
- | `E.class` | [Class selector](https://www.w3.org/TR/selectors/#class-html)
33
- | `E[attr]` | [Attribute selector](https://www.w3.org/TR/selectors/#attribute-selectors)
34
- | `E[attr="value"]` | [Attribute selector](https://www.w3.org/TR/selectors/#attribute-selectors)
35
- | `E[attr="value"] i` | [Case-sensitivity](https://www.w3.org/TR/selectors/#attribute-case)
36
- | `E[attr="value"] s` | [Case-sensitivity](https://www.w3.org/TR/selectors/#attribute-case)
37
- | `E[attr~="value"]` | [Attribute selector](https://www.w3.org/TR/selectors/#attribute-selectors)
38
- | `E[attr^="value"]` | [Substring matching attribute selector](https://www.w3.org/TR/selectors/#attribute-substrings)
39
- | `E[attr$="value"]` | [Substring matching attribute selector](https://www.w3.org/TR/selectors/#attribute-substrings)
40
- | `E[attr*="value"]` | [Substring matching attribute selector](https://www.w3.org/TR/selectors/#attribute-substrings)
41
- | `E[attr|="value"]` | [Attribute selector](https://www.w3.org/TR/selectors/#attribute-selectors)
42
- | `E F` | [Descendant combinator](https://www.w3.org/TR/selectors/#descendant-combinators)
43
- | `E > F` | [Child combinator](https://www.w3.org/TR/selectors/#child-combinators)
44
- | `E + F` | [Next-sibling combinator](https://www.w3.org/TR/selectors/#adjacent-sibling-combinators)
45
- | `E ~ F` | [Subsequent-sibling combinator](https://www.w3.org/TR/selectors/#general-sibling-combinators)
46
- | `E, F` | [Selector list](https://www.w3.org/TR/selectors/#selector-list)
47
- | `E:not(s)` | [Negation pseudo-class](https://www.w3.org/TR/selectors-3/#negation)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/find_ancestor_tag.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: find_ancestor_tag
3
- ---
4
-
5
- ```php
6
- find_ancestor_tag ( string $tag ) : object
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `tag` | Tag name of the element to find.
12
-
13
- Returns the first matching node that matches the specified tag name or null if no match was found.
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/firstChild.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: firstChild
3
- ---
4
-
5
- ```php
6
- firstChild ( ) : mixed
7
- ```
8
-
9
- Returns the first child node of the current node or null if the current nod has no child nodes.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getAllAttributes.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: getAllAttributes
3
- ---
4
-
5
- ```php
6
- getAllAttributes ( ) : array
7
- ```
8
-
9
- Returns all attributes for the current node.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getAttribute.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: getAttribute
3
- ---
4
-
5
- ```php
6
- getAttribute ( string $name ) : mixed
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `name` | Attribute name.
12
-
13
- Returns the value for the attribute `$name`.
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getElementById.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: getElementById
3
- ---
4
-
5
- ```php
6
- getElementById ( string $id ) : object
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `id` | Element id.
12
-
13
- Returns the first element with the specified id.
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getElementByTagName.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: getElementByTagName
3
- ---
4
-
5
- ```php
6
- getElementByTagName ( string $name ) : object
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `name` | Tag name.
12
-
13
- Returns the first element with the specified tag name.
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getElementsById.md DELETED
@@ -1,14 +0,0 @@
1
- ---
2
- title: getElementsById
3
- ---
4
-
5
- ```php
6
- getElementsById ( string $id [, int $idx = null] ) : mixed
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `id` | Element id.
12
- | `idx` | Index of element to return.
13
-
14
- Returns all elements with the specified id if `$idx` is null, or a specific one if `$idx` is a valid index.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getElementsByTagName.md DELETED
@@ -1,14 +0,0 @@
1
- ---
2
- title: getElementsByTagName
3
- ---
4
-
5
- ```php
6
- getElementsByTagName ( string $name [, int $idx = null ] ) : mixed
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `name` | Tag name.
12
- | `idx` | Index of the element to return.
13
-
14
- Returns all elements with the specified tag name if `$idx` is null, or a specific one if `$idx` is a valid index.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/get_display_size.md DELETED
@@ -1,11 +0,0 @@
1
- ---
2
- title: get_display_size
3
- ---
4
-
5
- ```php
6
- get_display_size ( ) : mixed
7
- ```
8
-
9
- Returns false if the current node is not an image.
10
-
11
- Returns an associative array of two elements - `height` and `width` - that represent the display size of the image.
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/hasAttribute.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: hasAttribute
3
- ---
4
-
5
- ```php
6
- hasAttribute ( string $name ) : bool
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `name` | Name of the attribute.
12
-
13
- Returns true if the current node has an attribute with the specified name.
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/hasChildNodes.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: hasChildNodes
3
- ---
4
-
5
- ```php
6
- hasChildNodes ( ) : bool
7
- ```
8
-
9
- Returns true if the current node has one or more child nodes.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/hasClass.md DELETED
@@ -1,19 +0,0 @@
1
- ---
2
- title: hasClass
3
- ---
4
-
5
- ```php
6
- hasClass ( string $class ) : bool
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `class` | Specifies the class name to search for.
12
-
13
- Returns true if the current node has the specified class name.
14
-
15
- **Examples**
16
-
17
- ```php
18
- $node->hasClass('article');
19
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/index.md DELETED
@@ -1,28 +0,0 @@
1
- ---
2
- title: HtmlNode
3
- ---
4
-
5
- Represents a single node in the DOM tree (see [`HtmlDocument`](../HtmlDocument/)).
6
-
7
- ## Public Properties
8
-
9
- | Property | Description
10
- | -------- | -----------
11
- | `_` | Node meta data (i.e. type of node).
12
- | `attr` | List of attributes.
13
- | `children` | List of child nodes.
14
- | `nodes` | List of nodes.
15
- | `nodetype` | Node type.
16
- | `parent` | Parent node object.
17
- | `tag` | Node's tag name.
18
- | `tag_start` | Start position of the tag name in the original document.
19
-
20
- ## Protected Properties
21
-
22
- None.
23
-
24
- ## Private Properties
25
-
26
- | Property | Description
27
- | -------- | -----------
28
- | `dom` | The DOM object (see [`HtmlDocument`](../HtmlDocument/)).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/innertext.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: innertext
3
- ---
4
-
5
- ```php
6
- innertext ( ) : string
7
- ```
8
-
9
- Returns the inner text (everything inside the opening and closing tags) of the current node.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/is_block_element.md DELETED
@@ -1,16 +0,0 @@
1
- ---
2
- title: is_block_element
3
- ---
4
-
5
- ```php
6
- is_block_element ( object $node ) : bool
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `node` | A node
12
-
13
- Returns true if the provided element is a block level element.
14
-
15
- Find more information about block level elements at
16
- [https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php](https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/is_inline_element.md DELETED
@@ -1,16 +0,0 @@
1
- ---
2
- title: is_inline_element
3
- ---
4
-
5
- ```php
6
- is_inline_element ( object $node ) : bool
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `node` | A node
12
-
13
- Returns true if the provided element is a inline level element.
14
-
15
- Find more information about inline level elements at
16
- [https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php](https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/is_utf8.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: is_utf8 (static)
3
- ---
4
-
5
- ```php
6
- is_utf8 ( string $str ) : bool
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `str` | String to test.
12
-
13
- Returns true if the provided string is a valid UTF-8 string.
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/lastChild.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: lastChild
3
- ---
4
-
5
- ```php
6
- lastChild ( ) : object
7
- ```
8
-
9
- Returns the last child of the current node or null if the current node has no child elements.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/makeup.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: makeup
3
- ---
4
-
5
- ```php
6
- makeup ( ) : string
7
- ```
8
-
9
- Returns the HTML representation of the current node.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/match.md DELETED
@@ -1,21 +0,0 @@
1
- ---
2
- title: match (protected)
3
- ---
4
-
5
- ```php
6
- match (
7
- string $exp
8
- , string $pattern
9
- , string $value
10
- , string $case_sensitivity
11
- ) : bool
12
- ```
13
-
14
- | Parameter | Description
15
- | --------- | -----------
16
- | `exp` | Expression
17
- | `pattern` | Pattern
18
- | `value` | Value
19
- | `case_sensitivity` | Case sensitivity
20
-
21
- Matches a single attribute value against the specified attribute selector. See also [`find`](../find/).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/nextSibling.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: nextSibling
3
- ---
4
-
5
- ```php
6
- nextSibling ( ) : object
7
- ```
8
-
9
- Returns the next sibling of the current node or null if the current node has no next sibling.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/nodeName.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: nodeName
3
- ---
4
-
5
- ```php
6
- nodeName ( ) : string
7
- ```
8
-
9
- Returns the name of the current node (tag name).
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/outertext.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: outertext
3
- ---
4
-
5
- ```php
6
- outertext ( ) : string
7
- ```
8
-
9
- Returns the outer text (everything including the opening and closing tags) of the current node.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/parent.md DELETED
@@ -1,14 +0,0 @@
1
- ---
2
- title: parent
3
- ---
4
-
5
- ```php
6
- parent ( [ object $parent = null ] ) : object
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `parent` | The parent node
12
-
13
- * Returns the parent node of the current node if `$parent` is null.
14
- * Sets the parent node of the current node if `$parent` is not null. In this case the current node is automatically added to the list of nodes in the parent node.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/parentNode.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: parentNode
3
- ---
4
-
5
- ```php
6
- parentNode () : object
7
- ```
8
-
9
- Returns the current's node parent.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/parse_selector.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: parse_selector (protected)
3
- ---
4
-
5
- ```php
6
- parse_selector ( string $selector_string ) : array
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `selector_string` | The selector string
12
-
13
- Parses a CSS selector into an internal format for further use. See also [`find`](../find/).
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/previousSibling.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: previousSibling
3
- ---
4
-
5
- ```php
6
- previousSibling ( ) : object
7
- ```
8
-
9
- Returns the previous sibling of the current node, or null if the current node has no previous sibling.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/remove.md DELETED
@@ -1,41 +0,0 @@
1
- ---
2
- title: remove
3
- ---
4
-
5
- ```php
6
- remove ( )
7
- ```
8
-
9
- Removes the current node recursively from the DOM.
10
- Does nothing if the node has no parent (root node);
11
-
12
- **Example**
13
-
14
- ```php
15
- $html->load(<<<EOD
16
- <html>
17
- <body>
18
- <table>
19
- <tr><th>Title</th></tr>
20
- <tr><td>Row 1</td></tr>
21
- </table>
22
- </body>
23
- </html>
24
- EOD
25
- );
26
-
27
- $table = $html->find('table', 0);
28
- $table->remove();
29
-
30
- echo $html;
31
-
32
- /**
33
- * Returns
34
- *
35
- * <html> <body> </body> </html>
36
- */
37
- ```
38
-
39
- **Remarks**
40
-
41
- * Whitespace immediately **before** the removed node will remain in the DOM.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/removeAttribute.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: removeAttribute
3
- ---
4
-
5
- ```php
6
- removeAttribute ( string $name )
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `name` | Name of the attribute to remove.
12
-
13
- Removes the attribute with the speicified name from the current node.
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/removeChild.md DELETED
@@ -1,45 +0,0 @@
1
- ---
2
- title: removeChild
3
- ---
4
-
5
- ```php
6
- removeChild ( object $node )
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `node` | Node to remove from current element, must be a child of the current element.
12
-
13
- Removes the node recursively from the DOM.
14
- Does nothing if the provided node is not a child of the current node.
15
-
16
- **Example**
17
-
18
- ```php
19
- $html->load(<<<EOD
20
- <html>
21
- <body>
22
- <table>
23
- <tr><th>Title</th></tr>
24
- <tr><td>Row 1</td></tr>
25
- </table>
26
- </body>
27
- </html>
28
- EOD
29
- );
30
-
31
- $body = $html->find('body', 0);
32
- $body->removeChild($body->find('table', 0));
33
-
34
- echo $html;
35
-
36
- /**
37
- * Returns
38
- *
39
- * <html> <body> </body> </html>
40
- */
41
- ```
42
-
43
- **Remarks**
44
-
45
- * Whitespace immediately **before** the removed node will remain in the DOM.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/removeClass.md DELETED
@@ -1,27 +0,0 @@
1
- ---
2
- title: removeClass
3
- ---
4
-
5
- ```php
6
- removeClass ( [ mixed $class = null ] )
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `class` | Specifies one or more class names to be removed.
12
-
13
- Removes one or more class names from the current node.
14
-
15
- **Remarks**
16
-
17
- * To remove more than one class, separate the class names with space or provide them as an array.
18
- * If no parameter is specified, this method will remove all class names from the current node.
19
-
20
- **Examples**
21
-
22
- ```php
23
- $node->removeClass('hidden');
24
- $node->removeClass('article important');
25
- $node->removeClass(array('article', 'new'));
26
- $node->removeClass();
27
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/save.md DELETED
@@ -1,22 +0,0 @@
1
- ---
2
- title: save
3
- ---
4
-
5
- ```php
6
- save ( [ string $filepath = '' ] ) : string
7
- ```
8
-
9
- Writes the current node to file.
10
-
11
- | Parameter | Description
12
- | --------- | -----------
13
- | `filepath` | Writes to file if the provided file path is not empty.
14
-
15
- Returns the document string.
16
-
17
- **Examples**
18
-
19
- ```php
20
- $string = $node->save();
21
- $string = $node->save($file);
22
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/seek.md DELETED
@@ -1,21 +0,0 @@
1
- ---
2
- title: seek (protected)
3
- ---
4
-
5
- ```php
6
- seek (
7
- string $selector
8
- , array &$ret
9
- , string $parent_cmd
10
- [, bool $lowercase = false ]
11
- )
12
- ```
13
-
14
- | Parameter | Description
15
- | --------- | -----------
16
- | `selector` | The current selector.
17
- | `ret` | Previous return value (starting point).
18
- | `parent_cmd` | The combinator used before the current selector.
19
- | `lowercase` | Matches tag names case insensitive (lowercase) if enabled.
20
-
21
- Starts by searching for child elements of `$ret` that match the specified selector. Adds matching elements to `$ret` (for the next iteration).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/setAttribute.md DELETED
@@ -1,14 +0,0 @@
1
- ---
2
- title: setAttribute
3
- ---
4
-
5
- ```php
6
- setAttribute ( string $name, string $value )
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `name` | Attribute name
12
- | `value` | Attribute value
13
-
14
- Adds or sets an attribute in the current node to the specified value.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/text.md DELETED
@@ -1,27 +0,0 @@
1
- ---
2
- title: text
3
- ---
4
-
5
- ```php
6
- text ( [ bool $trim = true ] ) : string
7
- ```
8
-
9
- | Parameter | Description
10
- | --------- | -----------
11
- | `trim` | Removes whitespace around the text if enabled.
12
-
13
- Returns the text representation of the current node and its decendants.
14
-
15
- **Remarks**
16
-
17
- * You can use the short-hand version [`$node->plaintext`](__get.md) instead of
18
- `$node->text()` which also works on the document level (automatically selects
19
- the root element of the document).
20
-
21
- **Examples**
22
-
23
- ```php
24
- $node->text();
25
- $node->plaintext;
26
- $html->plaintext;
27
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/xmltext.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- title: xmltext
3
- ---
4
-
5
- ```php
6
- xmltext ( ) : string
7
- ```
8
-
9
- Returns the xml representation for the inner text of the current node as a CDATA section.
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/api/constants.md DELETED
@@ -1,31 +0,0 @@
1
- ---
2
- title: Constants
3
- ---
4
-
5
- Constants define how the parser treats documents. They can be defined before
6
- loading the parser to globally replace the default values.
7
-
8
- ## DEFAULT_TARGET_CHARSET
9
-
10
- Defines the default target charset for text returned by the parser.
11
-
12
- Default: `'UTF-8'`
13
-
14
- ## DEFAULT_BR_TEXT
15
-
16
- Defines the default text to return for `<br>` elements.
17
-
18
- Default: `"\r\n"`
19
-
20
- ## DEFAULT_SPAN_TEXT
21
-
22
- Defines the default text to return for `<span>` elements.
23
-
24
- Default: `' '`
25
-
26
- ## MAX_FILE_SIZE
27
-
28
- Defines the maximum number of bytes the parser can load into memory. This limit
29
- only applies to the source file or string.
30
-
31
- Default: `2621440`
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/extra.css DELETED
@@ -1,9 +0,0 @@
1
- code {
2
- white-space: nowrap;
3
- }
4
-
5
- .logo {
6
- display: block;
7
- margin: auto;
8
- text-align: center;
9
- }
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/faq/0001.md DELETED
@@ -1,67 +0,0 @@
1
- ---
2
- title: Why is the parser so memory hungry?
3
- ---
4
-
5
- ## Short answer
6
-
7
- DOM parsers generally require a lot of memory to represent the document tree and its attributes in memory. If memory is a concern, consider using a SAX parser instead.
8
-
9
- ## Answer
10
-
11
- The parser loads the entire document tree and its attributes into memory. This is called the Document Object Model (DOM).
12
-
13
- The DOM is not just a copy of the source document. It represents each element in the source document by an object in memory. The result looks like a tree, which is why its called the document tree:
14
-
15
- ```
16
-
17
- html
18
- / \
19
- head body
20
- / \ \
21
- title meta div
22
- / \
23
- ul a
24
- / \
25
- li li
26
-
27
- ```
28
-
29
- *Note*: Attributes, contents and closing tags were omitted for simplicity.
30
-
31
- In this example, for each node the parser needs to store
32
-
33
- * the name of the node ('html', 'head', 'body', 'title', ...),
34
- * a reference to the parent node (i.e. 'div' points to 'body' which points to 'html') and
35
- * a list of references to its child nodes (i.e. 'html' points to 'head' and 'body').
36
-
37
- Here is a simplified representation:
38
-
39
- ```
40
- object
41
- > node_name
42
- > parent_node
43
- > child_nodes[]
44
- ```
45
-
46
- While the source document only stores the node name and the opening and closing brackets (i.e. `<html>`), a node stores the node name as well as references to the parent and child nodes. Each of which require memory.
47
-
48
- ## Example
49
-
50
- Let's take the 'head' element and compare the source data with the object data.
51
-
52
- This is the source data: `<head>` (6 Bytes)
53
-
54
- The equivalent node (including references to parent and child nodes) has following data:
55
-
56
- * Node Object (40 Bytes for the base object + 3 x 16 Bytes for properties = 88 Bytes) [^1]
57
- * Node Name "head" (4 Bytes)
58
- * Parent Node (unknown number of Bytes)
59
- * Child Nodes (8 x 36 Bytes) [^2]
60
-
61
- This amounts to 380 Bytes per object. A factor of 63 compared to the source data. With larger datasets this factor will be smaller, especially when taking content data into account.
62
-
63
- A factor of ~30 compared to the source data is realistic for DOM parsers [^3]. If memory is a concern, consider using a SAX parser instead.
64
-
65
- [^1]: [Objects in PHP 7](https://nikic.github.io/2015/06/19/Internal-value-representation-in-PHP-7-part-2.html#objects-in-php-7) by nikic
66
- [^2]: [PHP's new hashtable implementation](https://nikic.github.io/2014/12/22/PHPs-new-hashtable-implementation.html#memory-utilization) by nikic
67
- [^3]: [Htlm Agility Pack Issue #77](https://github.com/zzzprojects/html-agility-pack/issues/77) by aktzpn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/index.md DELETED
@@ -1,33 +0,0 @@
1
- ---
2
- title: PHP Simple HTML DOM Parser
3
- ---
4
-
5
- A fast, simple and reliable HTML document parser for PHP.
6
-
7
- Created by S.C. Chen, based on [HTML Parser for PHP 4](http://php-html.sourceforge.net/) by Jose
8
- Solorzano.
9
-
10
- # Parse any HTML document
11
-
12
- PHP Simple HTML DOM Parser handles any HTML document, even ones that are considered
13
- invalid by the [HTML](https://www.w3.org/TR/html/) specification.
14
-
15
- # Select elements using CSS selectors
16
-
17
- PHP Simple HTML DOM Parser supports CSS style selectors to navigate the DOM,
18
- similar to [jQuery](https://jquery.com/).
19
-
20
- # Download
21
-
22
- * Download the latest version from [SourceForge](https://sourceforge.net/projects/simplehtmldom/)
23
-
24
- # Contributing
25
-
26
- * Request features on the [Feature Request Tracker](https://sourceforge.net/p/simplehtmldom/feature-requests/)
27
- * Report bugs on the [Bug Tracker](https://sourceforge.net/p/simplehtmldom/bugs/)
28
- * Get involved with the community on the [Discussions Board](https://sourceforge.net/p/simplehtmldom/discussion/)
29
-
30
- # License
31
-
32
- PHP Simple HTML DOM Parser is [Free Software](https://en.wikipedia.org/wiki/Free_software)
33
- licensed under the [MIT License](https://opensource.org/licenses/MIT).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/manual/accessing-element-attributes.md DELETED
@@ -1,40 +0,0 @@
1
- ---
2
- title: Accessing HTML element attributes
3
- ---
4
-
5
- # Get, Set and Remove attributes
6
-
7
- ```php
8
- // Get attribute ( If the attribute is non-value attribute (eg. checked, selected...), it will returns true or false)
9
- $value = $e->href;
10
-
11
- // Set attribute(If the attribute is non-value attribute (eg. checked, selected...), set it's value as true or false)
12
- $e->href = 'my link';
13
-
14
- // Remove attribute, set it's value as null!
15
- $e->href = null;
16
-
17
- // Determine whether an attribute exist?
18
- if(isset($e->href))
19
- echo 'href exist!';
20
- ```
21
-
22
- # Magic attributes
23
-
24
- ```php
25
- // Example
26
- $html->load("<div>foo <b>bar</b></div>");
27
- $e = $html->find("div", 0);
28
-
29
- echo $e->tag; // Returns: " div"
30
- echo $e->outertext; // Returns: " <div>foo <b>bar</b></div>"
31
- echo $e->innertext; // Returns: " foo <b>bar</b>"
32
- echo $e->plaintext; // Returns: " foo bar"
33
- ```
34
-
35
- Attribute name | Description
36
- -------------- | -----------
37
- `$e->tag` | Read or write the **tag name** of element.
38
- `$e->outertext`| Read or write the **outer HTML text** of element.
39
- `$e->innertext`| Read or write the **inner HTML text** of element.
40
- `$e->plaintext`| Read or write the **plain text** of element.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/manual/adding-nodes.md DELETED
@@ -1,123 +0,0 @@
1
- ---
2
- title: Adding Nodes to the DOM
3
- ---
4
-
5
- The parser allows you to add new elements to an existing document. Find below an
6
- example for that.
7
-
8
- **Remarks**
9
-
10
- - It is not possible to create nodes via `->outertext`, `->innertext`, or
11
- `->plaintext`. These properties only change the text representation of a node
12
- and will return undesired results if used incorrectly.
13
- - Use [`$html->createElement`](/api/HtmlDocument/createElement) and
14
- [`$html->createTextNode`](/api/HtmlDocument/createTextNode) to create
15
- new nodes.
16
- - Use [`$node->appendChild`](/api/HtmlNode/appendChild) to add a
17
- node as child to another node.
18
- - Nodes can be combined in any order.
19
-
20
- **Example**
21
-
22
- ```php
23
- <?php
24
- // This example illustrates adding new elements to the DOM.
25
-
26
- require_once 'HtmlDocument.php';
27
- use simplehtmldom\HtmlDocument;
28
-
29
- /***************************** table data *************************************/
30
-
31
- $header = array('Ocean', 'Volume (km^3)');
32
-
33
- $data = array(
34
- array('Arctic Ocean', 18750000),
35
- array('Atlantic Ocean', 310410900),
36
- array('Indian Ocean', 264000000),
37
- array('Pacific Ocean', 660000000),
38
- array('Souce China Sea', 9880000),
39
- array('Southern Ocean', 71800000)
40
- );
41
-
42
- /***************************** template ***************************************/
43
-
44
- $doc = <<<EOD
45
- <html>
46
- <head>
47
- <style>
48
- table { border: 1px solid black; }
49
-
50
- tr:nth-child(even) { background: #CCC }
51
- tr:nth-child(odd) { background: #FFF }
52
- </style>
53
- </head>
54
- <body>
55
- <h1>Volumes of the World's Oceans</h1>
56
- </body>
57
- </html>
58
- EOD;
59
-
60
- /***************************** code *******************************************/
61
-
62
- $html = new HtmlDocument();
63
- $html->load($doc);
64
- $body = $html->find('body', 0);
65
- $table = $html->createElement('table');
66
-
67
- // Header row
68
- $tr = $html->createElement('tr');
69
- foreach ($header as $entry) {
70
- $th = $html->createElement('th', $entry);
71
- $tr->appendChild($th);
72
- }
73
- $table->appendChild($tr);
74
-
75
- // Table data
76
- foreach ($data as $row) {
77
- $tr = $html->createElement('tr');
78
- foreach ($row as $entry) {
79
-
80
- // (optional) Add info to the volume column
81
- if (is_numeric($entry)) {
82
- $value = number_format($entry);
83
- $td = $html->createElement('td', $value);
84
- $td->setAttribute('volume', $entry);
85
- } else {
86
- $td = $html->createElement('td', $entry);
87
- }
88
-
89
- $tr->appendChild($td);
90
- }
91
- $table->appendChild($tr);
92
- }
93
-
94
- $body->appendChild($table);
95
-
96
- echo $html . PHP_EOL;
97
-
98
- /**
99
- * Output (beautified)
100
- *
101
- * <html>
102
- * <head>
103
- * <style>
104
- * table { border: 1px solid black; }
105
- * tr:nth-child(even) { background: #CCC }
106
- * tr:nth-child(odd) { background: #FFF }
107
- * </style>
108
- * </head>
109
- * <body>
110
- * <h1>Volumes of the World's Oceans</h1>
111
- * <table>
112
- * <tr><th>Ocean</th><th>Volume (km^3)</th></tr>
113
- * <tr><td>Arctic Ocean</td><td volume="18750000">18,750,000</td></tr>
114
- * <tr><td>Atlantic Ocean</td><td volume="310410900">310,410,900</td></tr>
115
- * <tr><td>Indian Ocean</td><td volume="264000000">264,000,000</td></tr>
116
- * <tr><td>Pacific Ocean</td><td volume="660000000">660,000,000</td></tr>
117
- * <tr><td>Souce China Sea</td><td volume="9880000">9,880,000</td></tr>
118
- * <tr><td>Southern Ocean</td><td volume="71800000">71,800,000</td></tr>
119
- * </table>
120
- * </body>
121
- * </html>
122
- */
123
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/manual/creating-dom-objects.md DELETED
@@ -1,37 +0,0 @@
1
- ---
2
- title: Creating HTML DOM objects
3
- ---
4
-
5
- ## From string
6
-
7
- ```php
8
- <?php
9
- include_once 'HtmlDocument';
10
- use simplehtmldom\HtmlDocument;
11
-
12
- $html = new HtmlDocument();
13
- $html->load('<html><body>Hello!</body></html>');
14
- ```
15
-
16
- ## From URL
17
-
18
- ```php
19
- <?php
20
- include_once 'HtmlWeb';
21
- use simplehtmldom\HtmlWeb;
22
-
23
- $html = new HtmlWeb();
24
- $html->load('http://www.google.com/');
25
-
26
- ```
27
-
28
- ## From file
29
-
30
- ```php
31
- <?php
32
- include_once 'HtmlDocument';
33
- use simplehtmldom\HtmlDocument;
34
-
35
- $html = new HtmlDocument();
36
- $html->loadFile('test.htm');
37
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/manual/customizing-parsing-behavior.md DELETED
@@ -1,18 +0,0 @@
1
- ---
2
- title: Customizing Parsing behavior
3
- ---
4
-
5
- ```php
6
- // Write a function with parameter "$element"
7
- function my_callback($element) {
8
- // Hide all <b> tags
9
- if ($element->tag === 'b')
10
- $element->outertext = '';
11
- }
12
-
13
- // Register the callback function with it's function name
14
- $html->set_callback('my_callback');
15
-
16
- // Callback function will be invoked while dumping
17
- echo $html;
18
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/manual/finding-html-elements.md DELETED
@@ -1,90 +0,0 @@
1
- ---
2
- title: Finding HTML Elements
3
- ---
4
-
5
- ## By tag name
6
-
7
- ```php
8
- // Find all anchors, returns a array of element objects
9
- $ret = $html->find('a');
10
-
11
- // Find all anchors and images, returns an array of element objects
12
- $ret = $html->find('a, img');
13
-
14
- // Find (N)th anchor, returns element object or null if not found (zero based)
15
- $ret = $html->find('a', 0);
16
-
17
- // Find last anchor, returns element object or null if not found (zero based)
18
- $ret = $html->find('a', -1);
19
- ```
20
-
21
- ## By class name or id
22
-
23
- ```php
24
- // Find all element which id=foo
25
- $ret = $html->find('#foo');
26
-
27
- // Find all element which class=foo
28
- $ret = $html->find('.foo');
29
- ```
30
-
31
- ## By attribute
32
-
33
- ```php
34
- // Find all <div> with the id attribute
35
- $ret = $html->find('div[id]');
36
-
37
- // Find all <div> which attribute id=foo
38
- $ret = $html->find('div[id=foo]');
39
-
40
- // Find all anchors and images with the "title" attribute
41
- $ret = $html->find('a[title], img[title]');
42
-
43
- // Find all element has attribute id
44
- $ret = $html->find('*[id]');
45
- ```
46
-
47
- ## Descendants
48
-
49
- ```php
50
- // Find all <li> in <ul>
51
- $es = $html->find('ul li');
52
-
53
- // Find Nested <div> tags
54
- $es = $html->find('div div div');
55
-
56
- // Find all <td> in <table> which class=hello
57
- $es = $html->find('table.hello td');
58
-
59
- // Find all td tags with attribite align=center in table tags
60
- $es = $html->find('table td[align=center]');
61
- ```
62
-
63
- ## Nested elements
64
-
65
- ```php
66
- // Find all <li> in <ul>
67
- foreach($html->find('ul') as $ul)
68
- {
69
- foreach($ul->find('li') as $li)
70
- {
71
- // do something...
72
- }
73
- }
74
-
75
- // Find first <li> in first <ul>
76
- $e = $html->find('ul', 0)->find('li', 0);
77
- ```
78
-
79
- ## Text, comments and CDATA
80
-
81
- ```php
82
- // Find all text blocks
83
- $es = $html->find('text');
84
-
85
- // Find all comment (<!--...-->) blocks
86
- $es = $html->find('comment');
87
-
88
- // Find CDATA blocks
89
- $es = $html->find('cdata');
90
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/manual/saving-dom-objects.md DELETED
@@ -1,11 +0,0 @@
1
- ---
2
- title: Saving DOM objects
3
- ---
4
-
5
- ```php
6
- // Dumps the internal DOM tree back into string
7
- $str = $html->save();
8
-
9
- // Dumps the internal DOM tree back into a file
10
- $html->save('result.htm');
11
- ```
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/manual/traversing-dom-tree.md DELETED
@@ -1,23 +0,0 @@
1
- ---
2
- title: Traversing the DOM tree
3
- ---
4
-
5
- If you are not so familiar with HTML DOM, check this [link](http://php.net/manual/en/book.dom.php) to learn more...
6
-
7
- ```php
8
- // Example
9
- echo $html->find("#div1", 0)->children(1)->children(1)->children(2)->id;
10
- // or
11
- echo $html->getElementById("div1")->childNodes(1)->childNodes(1)->childNodes(2)->getAttribute('id');
12
- ```
13
-
14
- You can also call methods using the **Camel naming conventions**.
15
-
16
- Method | Description
17
- ------ | -----------
18
- `$e->children( [int $index] ) : mixed` | Returns the Nth **child object** if **index** is set, otherwise return an **array of children**.
19
- `$e->parent() : element` | Returns the **parent** of element.
20
- `$e->first_child() : element` | Returns the **first child** of element, or **null** if not found.
21
- `$e->last_child() : element` | Returns the **last child** of element, or **null** if not found.
22
- `$e->next_sibling() : element` | Returns the **next sibling** of element, or **null** if not found.
23
- `$e->prev_sibling() : element` | Returns the **previous sibling** of element, or **null** if not found.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/quick-start.md DELETED
@@ -1,98 +0,0 @@
1
- ---
2
- title: Quick Start
3
- ---
4
-
5
- Find below sample code that demonstrate the fundamental features of PHP Simple HTML DOM Parser.
6
-
7
- ## Read plain text from HTML document
8
-
9
- ```php
10
- <?php
11
- include_once 'HtmlWeb.php';
12
- use simplehtmldom\HtmlWeb;
13
-
14
- $html = new HtmlWeb();
15
- echo $html->load('https://www.google.com/')->plaintext;
16
- ```
17
-
18
- Loads a webpage into memory, parses it and returns the plain text.
19
-
20
- ## Read plain text from HTML string
21
-
22
- ```php
23
- <?php
24
- include_once 'HtmlDocument.php';
25
- use simplehtmldom\HtmlDocument;
26
-
27
- $html = new HtmlDocument();
28
- echo $html->load('<ul><li>Hello, World!</li></ul>')->plaintext;
29
- ```
30
-
31
- Parses HTML formatted text and returns the plain text. Note that the parser handles partial documents as well as full documents.
32
-
33
- ## Read specific elements from HTML document
34
-
35
- ```php
36
- <?php
37
- include_once 'HtmlWeb.php';
38
- use simplehtmldom\HtmlWeb;
39
-
40
- $html = new HtmlWeb();
41
- $html->load('https://www.google.com/');
42
-
43
- foreach($html->find('img') as $element)
44
- echo $element->src . '<br>';
45
-
46
- foreach($html->find('a') as $element)
47
- echo $element->href . '<br>';
48
- ```
49
-
50
- Loads the specified document into memory and returns a list of image sources as well as anchor links. Note that [`find`](manual/finding-html-elements.md) supports [CSS](https://www.w3.org/TR/selectors/) selectors to find elements in the DOM.
51
-
52
- ## Modify HTML documents
53
-
54
- ```php
55
- <?php
56
- include_once 'HtmlDocument.php';
57
- use simplehtmldom\HtmlDocument;
58
-
59
- $html = new HtmlDocument();
60
- $html->load('<div id="hello">Hello, </div><div id="world">World!</div>');
61
-
62
- $html->find('div', 1)->class = 'bar';
63
- $html->find('div[id=hello]', 0)->innertext = 'foo';
64
-
65
- echo $html; // <div id="hello">foo</div><div id="world" class="bar">World!</div>
66
- ```
67
-
68
- Parses the provided HTML string and replaces elements in the DOM before returning the updated HTML string. In this example, the class for the second `div` element is set to `bar` and the inner text for the first `div` element to `foo`.
69
-
70
- Note that [`find`](manual/finding-html-elements.md) supports a second parameter to return a single element from the array of matches.
71
-
72
- Note that attributes can be accessed directly by the means of magic methods (`->class` and `->innertext` in the example above).
73
-
74
- ## Collect information from Slashdot
75
-
76
- ```php
77
- <?php
78
- include_once 'HtmlWeb.php';
79
- use simplehtmldom\HtmlWeb;
80
-
81
- $html = new HtmlWeb();
82
- $html->load('https://slashdot.org/');
83
-
84
- $articles = $html->find('article[data-fhtype="story"]');
85
-
86
- foreach($articles as $article) {
87
- $item['title'] = $article->find('.story-title', 0)->plaintext;
88
- $item['intro'] = $article->find('.p', 0)->plaintext;
89
- $item['details'] = $article->find('.details', 0)->plaintext;
90
- $items[] = $item;
91
- }
92
-
93
- print_r($items);
94
- ```
95
-
96
- Collects information from [Slashdot](https://slashdot.org/) for further processing.
97
-
98
- Note that the combination of CSS selectors and magic methods make the process of parsing HTML documents a simple task that is easy to understand.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/docs/requirements.md DELETED
@@ -1,7 +0,0 @@
1
- | Requirement | Minimum | Recommended |
2
- | ----------- |:-------:|:-----------: |
3
- | PHP Version | 5.6.0 | [Latest stable release](https://www.php.net/supported-versions.php) |
4
- | PHP Extensions | [iconv](https://www.php.net/manual/en/book.iconv.php) | [iconv](https://www.php.net/manual/en/book.iconv.php),<br> [mbstring](https://www.php.net/manual/en/book.mbstring.php), <br> [cURL](https://www.php.net/manual/en/book.curl.php)
5
- | PHP INI Settings | --- | [allow_url_fopen = On](https://www.php.net/manual/en/filesystem.configuration.php#ini.allow-url-fopen) **
6
-
7
- ** This is only necessary if cURL is not available.
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/manual/mkdocs.yml DELETED
@@ -1,152 +0,0 @@
1
- site_name: Simple HTML DOM documentation
2
- site_url: http://simplehtmldom.sourceforge.net/
3
- site_description: A simple HTML DOM parser written in PHP
4
-
5
- repo_name: SourceForge
6
- repo_url: https://sourceforge.net/projects/simplehtmldom/
7
-
8
- theme:
9
- name: readthedocs
10
- custom_dir: 'custom_theme/'
11
-
12
- markdown_extensions:
13
- - footnotes
14
-
15
- google_analytics: ['UA-3452027-2', 'simplehtmldom.sourceforge.net']
16
-
17
- extra_css: [extra.css]
18
-
19
- nav:
20
- - 'index.md'
21
- - 'requirements.md'
22
- - 'quick-start.md'
23
- - FAQ:
24
- - 'faq/0001.md'
25
- - Manual:
26
- - 'manual/creating-dom-objects.md'
27
- - 'manual/finding-html-elements.md'
28
- - 'manual/accessing-element-attributes.md'
29
- - 'manual/traversing-dom-tree.md'
30
- - 'manual/saving-dom-objects.md'
31
- - 'manual/customizing-parsing-behavior.md'
32
- - 'manual/adding-nodes.md'
33
- - API:
34
- - 'api/constants.md'
35
- - HtmlDocument:
36
- - 'api/HtmlDocument/index.md'
37
- - 'api/HtmlDocument/__call.md'
38
- - 'api/HtmlDocument/__construct.md'
39
- - 'api/HtmlDocument/__debugInfo.md'
40
- - 'api/HtmlDocument/__destruct.md'
41
- - 'api/HtmlDocument/load.md'
42
- - 'api/HtmlDocument/set_callback.md'
43
- - 'api/HtmlDocument/remove_callback.md'
44
- - 'api/HtmlDocument/save.md'
45
- - 'api/HtmlDocument/expect.md'
46
- - 'api/HtmlDocument/find.md'
47
- - 'api/HtmlDocument/decode.md'
48
- - 'api/HtmlDocument/dump.md'
49
- - 'api/HtmlDocument/prepare.md'
50
- - 'api/HtmlDocument/parse.md'
51
- - 'api/HtmlDocument/parse_charset.md'
52
- - 'api/HtmlDocument/read_tag.md'
53
- - 'api/HtmlDocument/parse_attr.md'
54
- - 'api/HtmlDocument/link_nodes.md'
55
- - 'api/HtmlDocument/as_text_node.md'
56
- - 'api/HtmlDocument/skip.md'
57
- - 'api/HtmlDocument/copy_skip.md'
58
- - 'api/HtmlDocument/copy_until.md'
59
- - 'api/HtmlDocument/copy_until_char.md'
60
- - 'api/HtmlDocument/remove_noise.md'
61
- - 'api/HtmlDocument/restore_noise.md'
62
- - 'api/HtmlDocument/search_noise.md'
63
- - 'api/HtmlDocument/__toString.md'
64
- - 'api/HtmlDocument/__get.md'
65
- - 'api/HtmlDocument/childNodes.md'
66
- - 'api/HtmlDocument/firstChild.md'
67
- - 'api/HtmlDocument/lastChild.md'
68
- - 'api/HtmlDocument/createElement.md'
69
- - 'api/HtmlDocument/createTextNode.md'
70
- - 'api/HtmlDocument/getElementById.md'
71
- - 'api/HtmlDocument/getElementsById.md'
72
- - 'api/HtmlDocument/getElementByTagName.md'
73
- - 'api/HtmlDocument/getElementsByTagName.md'
74
- - 'api/HtmlDocument/loadFile.md'
75
- - HtmlNode:
76
- - 'api/HtmlNode/index.md'
77
- - 'api/HtmlNode/definitions.md'
78
- - 'api/HtmlNode/__call.md'
79
- - 'api/HtmlNode/__construct.md'
80
- - 'api/HtmlNode/__debugInfo.md'
81
- - 'api/HtmlNode/__destruct.md'
82
- - 'api/HtmlNode/__get.md'
83
- - 'api/HtmlNode/__isset.md'
84
- - 'api/HtmlNode/__set.md'
85
- - 'api/HtmlNode/__toString.md'
86
- - 'api/HtmlNode/__unset.md'
87
- - 'api/HtmlNode/addClass.md'
88
- - 'api/HtmlNode/appendChild.md'
89
- - 'api/HtmlNode/childNodes.md'
90
- - 'api/HtmlNode/clear.md'
91
- - 'api/HtmlNode/convert_text.md'
92
- - 'api/HtmlNode/dump.md'
93
- - 'api/HtmlNode/dump_node.md'
94
- - 'api/HtmlNode/expect.md'
95
- - 'api/HtmlNode/find.md'
96
- - 'api/HtmlNode/find_ancestor_tag.md'
97
- - 'api/HtmlNode/firstChild.md'
98
- - 'api/HtmlNode/get_display_size.md'
99
- - 'api/HtmlNode/getAllAttributes.md'
100
- - 'api/HtmlNode/getAttribute.md'
101
- - 'api/HtmlNode/getElementById.md'
102
- - 'api/HtmlNode/getElementByTagName.md'
103
- - 'api/HtmlNode/getElementsById.md'
104
- - 'api/HtmlNode/getElementsByTagName.md'
105
- - 'api/HtmlNode/hasAttribute.md'
106
- - 'api/HtmlNode/hasChildNodes.md'
107
- - 'api/HtmlNode/hasClass.md'
108
- - 'api/HtmlNode/innertext.md'
109
- - 'api/HtmlNode/is_block_element.md'
110
- - 'api/HtmlNode/is_inline_element.md'
111
- - 'api/HtmlNode/is_utf8.md'
112
- - 'api/HtmlNode/lastChild.md'
113
- - 'api/HtmlNode/makeup.md'
114
- - 'api/HtmlNode/match.md'
115
- - 'api/HtmlNode/nextSibling.md'
116
- - 'api/HtmlNode/nodeName.md'
117
- - 'api/HtmlNode/outertext.md'
118
- - 'api/HtmlNode/parent.md'
119
- - 'api/HtmlNode/parentNode.md'
120
- - 'api/HtmlNode/parse_selector.md'
121
- - 'api/HtmlNode/previousSibling.md'
122
- - 'api/HtmlNode/remove.md'
123
- - 'api/HtmlNode/removeAttribute.md'
124
- - 'api/HtmlNode/removeChild.md'
125
- - 'api/HtmlNode/removeClass.md'
126
- - 'api/HtmlNode/save.md'
127
- - 'api/HtmlNode/seek.md'
128
- - 'api/HtmlNode/setAttribute.md'
129
- - 'api/HtmlNode/text.md'
130
- - 'api/HtmlNode/xmltext.md'
131
- - Debug:
132
- - 'api/Debug/index.md'
133
- - 'api/Debug/enable.md'
134
- - 'api/Debug/disable.md'
135
- - 'api/Debug/log.md'
136
- - 'api/Debug/log_once.md'
137
- - 'api/Debug/setDebugHandler.md'
138
-
139
- plugins:
140
- - redirects:
141
- redirect_maps:
142
- 'api/HtmlDocument/load_file.md': 'api/HtmlDocument/loadFile.md'
143
- 'api/HtmlDocument/clear.md': 'api/HtmlDocument/__destruct.md'
144
- 'api/HtmlNode/children.md': 'api/HtmlNode/childNodes.md'
145
- 'api/HtmlNode/first_child.md': 'api/HtmlNode/firstChild.md'
146
- 'api/HtmlNode/has_child.md': 'api/HtmlNode/hasChildNodes.md'
147
- 'api/HtmlNode/last_child.md': 'api/HtmlNode/lastChild.md'
148
- 'api/HtmlNode/next_sibling.md': 'api/HtmlNode/nextSibling.md'
149
- 'api/HtmlNode/prev_sibling.md': 'api/HtmlNode/previousSibling.md'
150
- 'api/HtmlNode/prevSibling.md': 'api/HtmlNode/previousSibling.md'
151
-
152
- docs_dir: 'docs'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/phpcompatibility.xml DELETED
@@ -1,11 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <ruleset name="PHPCompatibility">
3
- <description>Defines rules for PHPCompatibility</description>
4
- <exclude-pattern>./app</exclude-pattern>
5
- <exclude-pattern>./example</exclude-pattern>
6
- <exclude-pattern>./manual</exclude-pattern>
7
- <exclude-pattern>./testcase</exclude-pattern>
8
- <exclude-pattern>./tests</exclude-pattern>
9
- <config name="testVersion" value="5.6"/>
10
- <rule ref="PHPCompatibility" />
11
- </ruleset>
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/phpcs.xml DELETED
@@ -1,48 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <ruleset name="Ruleset">
3
- <description>Created with the PHP Coding Standard Generator. http://edorian.github.com/php-coding-standard-generator/</description>
4
- <exclude-pattern>./app</exclude-pattern>
5
- <exclude-pattern>./example</exclude-pattern>
6
- <exclude-pattern>./manual</exclude-pattern>
7
- <exclude-pattern>./testcase</exclude-pattern>
8
- <rule ref="Generic.WhiteSpace.DisallowSpaceIndent"/>
9
- <rule ref="Generic.Classes.DuplicateClassName"/>
10
- <rule ref="PSR2.Methods.MethodDeclaration" />
11
- <rule ref="Generic.CodeAnalysis.EmptyStatement"/>
12
- <rule ref="Generic.CodeAnalysis.UnconditionalIfStatement"/>
13
- <rule ref="Generic.CodeAnalysis.UnnecessaryFinalModifier"/>
14
- <rule ref="Generic.CodeAnalysis.UselessOverridingMethod"/>
15
- <rule ref="Generic.Functions.FunctionCallArgumentSpacing"/>
16
- <rule ref="Generic.Functions.OpeningFunctionBraceBsdAllman"/>
17
- <rule ref="PEAR.Functions.ValidDefaultValue"/>
18
- <rule ref="PSR2.ControlStructures.ElseIfDeclaration"/>
19
- <rule ref="PSR2.ControlStructures.ControlStructureSpacing"/>
20
- <rule ref="Squiz.WhiteSpace.CastSpacing"/>
21
- <rule ref="Squiz.WhiteSpace.OperatorSpacing"/>
22
- <rule ref="Squiz.WhiteSpace.SemicolonSpacing"/>
23
- <rule ref="Squiz.WhiteSpace.SuperfluousWhitespace"/>
24
- <rule ref="Squiz.Strings.ConcatenationSpacing">
25
- <properties>
26
- <property name="spacing" value="1"/>
27
- <property name="ignoreNewlines" value="true"/>
28
- </properties>
29
- </rule>
30
- <rule ref="Squiz.Functions.FunctionDeclarationArgumentSpacing">
31
- <properties>
32
- <property name="equalsSpacing" value="1"/>
33
- </properties>
34
- </rule>
35
- <rule ref="Generic.Files.LineLength">
36
- <properties>
37
- <property name="lineLimit" value="80"/>
38
- <property name="absoluteLineLimit" value="120"/>
39
- </properties>
40
- </rule>
41
- <rule ref="Generic.NamingConventions.UpperCaseConstantName"/>
42
- <rule ref="Generic.PHP.LowerCaseConstant"/>
43
- <rule ref="Squiz.Strings.DoubleQuoteUsage">
44
- <exclude name="Squiz.Strings.DoubleQuoteUsage.ContainsVar" />
45
- </rule>
46
- <rule ref="Generic.Strings.UnnecessaryStringConcat"/>
47
- <rule ref="PSR2.Files.EndFileNewline"/>
48
- </ruleset>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/phpunit.xml DELETED
@@ -1,41 +0,0 @@
1
- <!--
2
- PHPUnit is a programmer-oriented testing framework for PHP.
3
- https://phpunit.de/
4
-
5
- Unit tests are based on PHPUnit 6
6
- https://phpunit.de/announcements/phpunit-6.html
7
- -->
8
- <phpunit
9
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
10
- xsi:noNamespaceSchemaLocation="http://schema.phpunit.de/6.5/phpunit.xsd"
11
- colors="true"
12
- processIsolation="false"
13
- timeoutForSmallTests="1"
14
- timeoutForMediumTests="1"
15
- timeoutForLargeTests="6" >
16
-
17
- <testsuites>
18
- <testsuite name="standard">
19
- <directory suffix='_test.php'>tests</directory>
20
- <exclude>tests/memory_parsing_test.php</exclude>
21
- <exclude>tests/entity_decoding_test.php</exclude>
22
- </testsuite>
23
- <testsuite name="memory">
24
- <file>tests/memory_parsing_test.php</file>
25
- </testsuite>
26
- <testsuite name="entity_decoding">
27
- <file>tests/entity_decoding_test.php</file>
28
- </testsuite>
29
- </testsuites>
30
-
31
- <filter>
32
- <whitelist>
33
- <directory>simple_html_dom.php</directory>
34
- <directory>HtmlDocument.php</directory>
35
- <directory>HtmlNode.php</directory>
36
- <directory>HtmlWeb.php</directory>
37
- <directory>Debug.php</directory>
38
- </whitelist>
39
- </filter>
40
-
41
- </phpunit>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/release.sh DELETED
@@ -1,69 +0,0 @@
1
- #!/bin/bash
2
-
3
- # This file automates the release process based on the tag of the current commit
4
- #
5
- # 1) Tag current version: "git tag x.y.z", where x is the major, y the minor
6
- # and z the patch version number. For example: "git tag 2.0.0"
7
- #
8
- # 2) Build release file: "sh release.sh". For the example above, this will build
9
- # "simplehtmldom_2_0_0.zip"
10
-
11
- tag=$(git tag -l --points-at HEAD)
12
-
13
- if [ -z "$tag" ]; then
14
- echo "The current commit is not tagged!"
15
- echo "Insert valid tag name or press Ctrl+C to abort."
16
- read -p "Format: Major.Minor.Patch[-Suffix]: " tag
17
- if [ -z "$tag" ]; then
18
- echo "No tag name provided."
19
- exit
20
- fi;
21
- $(git tag ${tag})
22
- fi;
23
-
24
- # Check if the tag follows https://semver.org/
25
- version="$(echo ${tag} | cut -d'-' -f1)"
26
- major="$(echo ${version} | cut -d'.' -f1)"
27
- minor="$(echo ${version} | cut -d'.' -f2)"
28
- patch="$(echo ${version} | cut -d'.' -f3)"
29
- suffix="$(echo ${tag} | cut -d'-' -f2)"
30
-
31
- # git tag could return an error
32
- tag=$(git tag -l --points-at HEAD)
33
-
34
- if [ -z "$tag" ]; then
35
- echo "Something went wrong!"
36
- exit
37
- fi;
38
-
39
- echo "Building release for ${tag}..."
40
-
41
- if [ -z "$major" ]; then echo "Major version is missing in ${tag}"; fi;
42
- if [ -z "$minor" ]; then echo "Minor version is missing in ${tag}"; fi;
43
- if [ -z "$patch" ]; then echo "Patch version is missing in ${tag}"; fi;
44
-
45
- if [ -z "$major" ] || [ -z "$minor" ] || [ -z "$patch" ]; then
46
- echo "Aborting script!"
47
- exit
48
- fi;
49
-
50
- # Archive file
51
- prefix="simplehtmldom_"
52
- version=$(echo "$tag" | tr . _)
53
-
54
- # Keyword substitution in files
55
- marker="\\\$Rev\\\$"
56
- replacement="Rev. $tag ($(git rev-list --count HEAD))"
57
-
58
- # Build archive
59
- if [ "$version" ]; then
60
- # Inject version information to all files (limit to file type!)
61
- find . -name '*.php' -exec sed -i -e "s/$marker/$replacement/g" {} \;;
62
- find . -name '*.htm' -exec sed -i -e "s/$marker/$replacement/g" {} \;;
63
- # Create stash commit (otherwise git archive won't work)
64
- stash=$(git stash create);
65
- git archive --format=zip --output="$prefix$version".zip --worktree-attributes "$stash";
66
- # Clenup
67
- git checkout .;
68
- git gc --prune;
69
- fi;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/simple_html_dom.php CHANGED
@@ -1,153 +1,153 @@
1
- <?php
2
-
3
- /**
4
- * Website: http://sourceforge.net/projects/simplehtmldom/
5
- * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
6
- *
7
- * Licensed under The MIT License
8
- * See the LICENSE file in the project root for more information.
9
- *
10
- * Authors:
11
- * S.C. Chen
12
- * John Schlick
13
- * Rus Carroll
14
- * logmanoriginal
15
- *
16
- * Contributors:
17
- * Yousuke Kumakura
18
- * Vadim Voituk
19
- * Antcs
20
- *
21
- * Version $Rev$
22
- */
23
-
24
- if (defined('DEFAULT_TARGET_CHARSET')) {
25
- define('\simplehtmldom\DEFAULT_TARGET_CHARSET', DEFAULT_TARGET_CHARSET);
26
- }
27
-
28
- if (defined('DEFAULT_BR_TEXT')) {
29
- define('\simplehtmldom\DEFAULT_BR_TEXT', DEFAULT_BR_TEXT);
30
- }
31
-
32
- if (defined('DEFAULT_SPAN_TEXT')) {
33
- define('\simplehtmldom\DEFAULT_SPAN_TEXT', DEFAULT_SPAN_TEXT);
34
- }
35
-
36
- if (defined('MAX_FILE_SIZE')) {
37
- define('\simplehtmldom\MAX_FILE_SIZE', MAX_FILE_SIZE);
38
- }
39
-
40
- include_once 'HtmlDocument.php';
41
- include_once 'HtmlNode.php';
42
-
43
- if (!defined('DEFAULT_TARGET_CHARSET')) {
44
- define('DEFAULT_TARGET_CHARSET', \simplehtmldom\DEFAULT_TARGET_CHARSET);
45
- }
46
-
47
- if (!defined('DEFAULT_BR_TEXT')) {
48
- define('DEFAULT_BR_TEXT', \simplehtmldom\DEFAULT_BR_TEXT);
49
- }
50
-
51
- if (!defined('DEFAULT_SPAN_TEXT')) {
52
- define('DEFAULT_SPAN_TEXT', \simplehtmldom\DEFAULT_SPAN_TEXT);
53
- }
54
-
55
- if (!defined('MAX_FILE_SIZE')) {
56
- define('MAX_FILE_SIZE', \simplehtmldom\MAX_FILE_SIZE);
57
- }
58
-
59
- define('HDOM_TYPE_ELEMENT', \simplehtmldom\HtmlNode::HDOM_TYPE_ELEMENT);
60
- define('HDOM_TYPE_COMMENT', \simplehtmldom\HtmlNode::HDOM_TYPE_COMMENT);
61
- define('HDOM_TYPE_TEXT', \simplehtmldom\HtmlNode::HDOM_TYPE_TEXT);
62
- define('HDOM_TYPE_ROOT', \simplehtmldom\HtmlNode::HDOM_TYPE_ROOT);
63
- define('HDOM_TYPE_UNKNOWN', \simplehtmldom\HtmlNode::HDOM_TYPE_UNKNOWN);
64
- define('HDOM_QUOTE_DOUBLE', \simplehtmldom\HtmlNode::HDOM_QUOTE_DOUBLE);
65
- define('HDOM_QUOTE_SINGLE', \simplehtmldom\HtmlNode::HDOM_QUOTE_SINGLE);
66
- define('HDOM_QUOTE_NO', \simplehtmldom\HtmlNode::HDOM_QUOTE_NO);
67
- define('HDOM_INFO_BEGIN', \simplehtmldom\HtmlNode::HDOM_INFO_BEGIN);
68
- define('HDOM_INFO_END', \simplehtmldom\HtmlNode::HDOM_INFO_END);
69
- define('HDOM_INFO_QUOTE', \simplehtmldom\HtmlNode::HDOM_INFO_QUOTE);
70
- define('HDOM_INFO_SPACE', \simplehtmldom\HtmlNode::HDOM_INFO_SPACE);
71
- define('HDOM_INFO_TEXT', \simplehtmldom\HtmlNode::HDOM_INFO_TEXT);
72
- define('HDOM_INFO_INNER', \simplehtmldom\HtmlNode::HDOM_INFO_INNER);
73
- define('HDOM_INFO_OUTER', \simplehtmldom\HtmlNode::HDOM_INFO_OUTER);
74
- define('HDOM_INFO_ENDSPACE', \simplehtmldom\HtmlNode::HDOM_INFO_ENDSPACE);
75
-
76
- define('HDOM_SMARTY_AS_TEXT', \simplehtmldom\HDOM_SMARTY_AS_TEXT);
77
-
78
- class_alias('\simplehtmldom\HtmlDocument', 'simple_html_dom', true);
79
- class_alias('\simplehtmldom\HtmlNode', 'simple_html_dom_node', true);
80
-
81
- function file_get_html(
82
- $url,
83
- $use_include_path = false,
84
- $context = null,
85
- $offset = 0,
86
- $maxLen = -1,
87
- $lowercase = true,
88
- $forceTagsClosed = true,
89
- $target_charset = DEFAULT_TARGET_CHARSET,
90
- $stripRN = true,
91
- $defaultBRText = DEFAULT_BR_TEXT,
92
- $defaultSpanText = DEFAULT_SPAN_TEXT)
93
- {
94
- if($maxLen <= 0) { $maxLen = MAX_FILE_SIZE; }
95
-
96
- $dom = new simple_html_dom(
97
- null,
98
- $lowercase,
99
- $forceTagsClosed,
100
- $target_charset,
101
- $stripRN,
102
- $defaultBRText,
103
- $defaultSpanText
104
- );
105
-
106
- $contents = file_get_contents(
107
- $url,
108
- $use_include_path,
109
- $context,
110
- $offset,
111
- $maxLen + 1 // Load extra byte for limit check
112
- );
113
-
114
- if (empty($contents) || strlen($contents) > $maxLen) {
115
- $dom->clear();
116
- return false;
117
- }
118
-
119
- return $dom->load($contents, $lowercase, $stripRN);
120
- }
121
-
122
- function str_get_html(
123
- $str,
124
- $lowercase = true,
125
- $forceTagsClosed = true,
126
- $target_charset = DEFAULT_TARGET_CHARSET,
127
- $stripRN = true,
128
- $defaultBRText = DEFAULT_BR_TEXT,
129
- $defaultSpanText = DEFAULT_SPAN_TEXT)
130
- {
131
- $dom = new simple_html_dom(
132
- null,
133
- $lowercase,
134
- $forceTagsClosed,
135
- $target_charset,
136
- $stripRN,
137
- $defaultBRText,
138
- $defaultSpanText
139
- );
140
-
141
- if (empty($str) || strlen($str) > MAX_FILE_SIZE) {
142
- $dom->clear();
143
- return false;
144
- }
145
-
146
- return $dom->load($str, $lowercase, $stripRN);
147
- }
148
-
149
- /** @codeCoverageIgnore */
150
- function dump_html_tree($node, $show_attr = true, $deep = 0)
151
- {
152
- $node->dump($node);
153
- }
1
+ <?php
2
+
3
+ /**
4
+ * Website: http://sourceforge.net/projects/simplehtmldom/
5
+ * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
6
+ *
7
+ * Licensed under The MIT License
8
+ * See the LICENSE file in the project root for more information.
9
+ *
10
+ * Authors:
11
+ * S.C. Chen
12
+ * John Schlick
13
+ * Rus Carroll
14
+ * logmanoriginal
15
+ *
16
+ * Contributors:
17
+ * Yousuke Kumakura
18
+ * Vadim Voituk
19
+ * Antcs
20
+ *
21
+ * Version $Rev$
22
+ */
23
+
24
+ if (defined('DEFAULT_TARGET_CHARSET')) {
25
+ define('\simplehtmldom\DEFAULT_TARGET_CHARSET', DEFAULT_TARGET_CHARSET);
26
+ }
27
+
28
+ if (defined('DEFAULT_BR_TEXT')) {
29
+ define('\simplehtmldom\DEFAULT_BR_TEXT', DEFAULT_BR_TEXT);
30
+ }
31
+
32
+ if (defined('DEFAULT_SPAN_TEXT')) {
33
+ define('\simplehtmldom\DEFAULT_SPAN_TEXT', DEFAULT_SPAN_TEXT);
34
+ }
35
+
36
+ if (defined('MAX_FILE_SIZE')) {
37
+ define('\simplehtmldom\MAX_FILE_SIZE', MAX_FILE_SIZE);
38
+ }
39
+
40
+ include_once 'HtmlDocument.php';
41
+ include_once 'HtmlNode.php';
42
+
43
+ if (!defined('DEFAULT_TARGET_CHARSET')) {
44
+ define('DEFAULT_TARGET_CHARSET', \simplehtmldom\DEFAULT_TARGET_CHARSET);
45
+ }
46
+
47
+ if (!defined('DEFAULT_BR_TEXT')) {
48
+ define('DEFAULT_BR_TEXT', \simplehtmldom\DEFAULT_BR_TEXT);
49
+ }
50
+
51
+ if (!defined('DEFAULT_SPAN_TEXT')) {
52
+ define('DEFAULT_SPAN_TEXT', \simplehtmldom\DEFAULT_SPAN_TEXT);
53
+ }
54
+
55
+ if (!defined('MAX_FILE_SIZE')) {
56
+ define('MAX_FILE_SIZE', \simplehtmldom\MAX_FILE_SIZE);
57
+ }
58
+
59
+ define('HDOM_TYPE_ELEMENT', \simplehtmldom\HtmlNode::HDOM_TYPE_ELEMENT);
60
+ define('HDOM_TYPE_COMMENT', \simplehtmldom\HtmlNode::HDOM_TYPE_COMMENT);
61
+ define('HDOM_TYPE_TEXT', \simplehtmldom\HtmlNode::HDOM_TYPE_TEXT);
62
+ define('HDOM_TYPE_ROOT', \simplehtmldom\HtmlNode::HDOM_TYPE_ROOT);
63
+ define('HDOM_TYPE_UNKNOWN', \simplehtmldom\HtmlNode::HDOM_TYPE_UNKNOWN);
64
+ define('HDOM_QUOTE_DOUBLE', \simplehtmldom\HtmlNode::HDOM_QUOTE_DOUBLE);
65
+ define('HDOM_QUOTE_SINGLE', \simplehtmldom\HtmlNode::HDOM_QUOTE_SINGLE);
66
+ define('HDOM_QUOTE_NO', \simplehtmldom\HtmlNode::HDOM_QUOTE_NO);
67
+ define('HDOM_INFO_BEGIN', \simplehtmldom\HtmlNode::HDOM_INFO_BEGIN);
68
+ define('HDOM_INFO_END', \simplehtmldom\HtmlNode::HDOM_INFO_END);
69
+ define('HDOM_INFO_QUOTE', \simplehtmldom\HtmlNode::HDOM_INFO_QUOTE);
70
+ define('HDOM_INFO_SPACE', \simplehtmldom\HtmlNode::HDOM_INFO_SPACE);
71
+ define('HDOM_INFO_TEXT', \simplehtmldom\HtmlNode::HDOM_INFO_TEXT);
72
+ define('HDOM_INFO_INNER', \simplehtmldom\HtmlNode::HDOM_INFO_INNER);
73
+ define('HDOM_INFO_OUTER', \simplehtmldom\HtmlNode::HDOM_INFO_OUTER);
74
+ define('HDOM_INFO_ENDSPACE', \simplehtmldom\HtmlNode::HDOM_INFO_ENDSPACE);
75
+
76
+ define('HDOM_SMARTY_AS_TEXT', \simplehtmldom\HDOM_SMARTY_AS_TEXT);
77
+
78
+ class_alias('\simplehtmldom\HtmlDocument', 'simple_html_dom', true);
79
+ class_alias('\simplehtmldom\HtmlNode', 'simple_html_dom_node', true);
80
+
81
+ function file_get_html(
82
+ $url,
83
+ $use_include_path = false,
84
+ $context = null,
85
+ $offset = 0,
86
+ $maxLen = -1,
87
+ $lowercase = true,
88
+ $forceTagsClosed = true,
89
+ $target_charset = DEFAULT_TARGET_CHARSET,
90
+ $stripRN = true,
91
+ $defaultBRText = DEFAULT_BR_TEXT,
92
+ $defaultSpanText = DEFAULT_SPAN_TEXT)
93
+ {
94
+ if($maxLen <= 0) { $maxLen = MAX_FILE_SIZE; }
95
+
96
+ $dom = new simple_html_dom(
97
+ null,
98
+ $lowercase,
99
+ $forceTagsClosed,
100
+ $target_charset,
101
+ $stripRN,
102
+ $defaultBRText,
103
+ $defaultSpanText
104
+ );
105
+
106
+ $contents = file_get_contents(
107
+ $url,
108
+ $use_include_path,
109
+ $context,
110
+ $offset,
111
+ $maxLen + 1 // Load extra byte for limit check
112
+ );
113
+
114
+ if (empty($contents) || strlen($contents) > $maxLen) {
115
+ $dom->clear();
116
+ return false;
117
+ }
118
+
119
+ return $dom->load($contents, $lowercase, $stripRN);
120
+ }
121
+
122
+ function str_get_html(
123
+ $str,
124
+ $lowercase = true,
125
+ $forceTagsClosed = true,
126
+ $target_charset = DEFAULT_TARGET_CHARSET,
127
+ $stripRN = true,
128
+ $defaultBRText = DEFAULT_BR_TEXT,
129
+ $defaultSpanText = DEFAULT_SPAN_TEXT)
130
+ {
131
+ $dom = new simple_html_dom(
132
+ null,
133
+ $lowercase,
134
+ $forceTagsClosed,
135
+ $target_charset,
136
+ $stripRN,
137
+ $defaultBRText,
138
+ $defaultSpanText
139
+ );
140
+
141
+ if (empty($str) || strlen($str) > MAX_FILE_SIZE) {
142
+ $dom->clear();
143
+ return false;
144
+ }
145
+
146
+ return $dom->load($str, $lowercase, $stripRN);
147
+ }
148
+
149
+ /** @codeCoverageIgnore */
150
+ function dump_html_tree($node, $show_attr = true, $deep = 0)
151
+ {
152
+ $node->dump($node);
153
+ }
vendor/simplehtmldom/simplehtmldom/tests/attribute_test.php DELETED
@@ -1,50 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Tests the attribute parsing behavior of the parser
7
- */
8
- class attribute_test extends TestCase {
9
- private $html;
10
-
11
- protected function setUp()
12
- {
13
- $this->html = new simple_html_dom();
14
- }
15
-
16
- protected function tearDown()
17
- {
18
- $this->html->clear();
19
- unset($this->html);
20
- }
21
-
22
- /** @dataProvider dataProvider_for_attribute_should_parse */
23
- public function test_attribute_should_parse($expected, $doc)
24
- {
25
- $this->html->load($doc);
26
- $this->assertEquals($expected, $this->html->save());
27
- }
28
-
29
- public function dataProvider_for_attribute_should_parse()
30
- {
31
- return array(
32
- 'double quotes' => array(
33
- '<p class="hidden"></p>',
34
- '<p class="hidden"></p>'
35
- ),
36
- 'single quotes' => array(
37
- '<p class=\'hidden\'></p>',
38
- '<p class=\'hidden\'></p>'
39
- ),
40
- 'no quotes' => array(
41
- '<p class=hidden></p>',
42
- '<p class=hidden></p>'
43
- ),
44
- 'no value' => array(
45
- '<p hidden></p>',
46
- '<p hidden></p>'
47
- )
48
- );
49
- }
50
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/bug_report_test.php DELETED
@@ -1,476 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Tests for bugs reported in the bug tracker
7
- *
8
- * @link https://sourceforge.net/p/simplehtmldom/bugs
9
- * Bug tracker
10
- */
11
- class bug_report_test extends TestCase {
12
- private $html;
13
-
14
- protected function setUp()
15
- {
16
- $this->html = new simple_html_dom;
17
- }
18
-
19
- protected function tearDown()
20
- {
21
- $this->html->clear();
22
- unset($this->html);
23
- }
24
-
25
- /**
26
- * Bug #56 (Attribute values overwritten,behaviour differs from browsers)
27
- *
28
- * If a HTML-tag defines an attribute twice (or more times) the parser only
29
- * considers the last appearance / the last value within the HTML-tag.
30
- * Browsers like IE and Firefox, however, only consider the first appearance
31
- * / the first value.
32
- *
33
- * @link https://sourceforge.net/p/simplehtmldom/bugs/56/ Bug #56
34
- */
35
- public function test_bug_56()
36
- {
37
- $doc = <<<HTML
38
- <a href="http://simplehtmldom.sourceforge.net/" target="_self" target="_blank"></a>
39
- HTML;
40
-
41
- $anchor = $this->html->load($doc)->find('a', 0);
42
-
43
- $this->assertEquals('_self', $anchor->target);
44
- }
45
-
46
- /**
47
- * Bug #62 (foreach($html->find(a[id=0]) as $e))
48
- *
49
- * `find` should be able to distinguish attributes with value "0" from other
50
- * attributes when doing `find('[id="0"]')`.
51
- *
52
- * @link https://sourceforge.net/p/simplehtmldom/bugs/62/ Bug #62
53
- */
54
- public function test_bug_62()
55
- {
56
- $doc = <<<HTML
57
- <body>
58
- <a href="#" id="0">PHP Simple HTML DOM Parser</a>
59
- <a href="#" id="1">A PHP based DOM parser</a>
60
- </body>
61
- HTML;
62
-
63
- $this->html->load($doc);
64
-
65
- $this->assertCount(1, $this->html->find('[id=0]'));
66
- $this->assertCount(1, $this->html->find('#0'));
67
- }
68
-
69
- /**
70
- * Bug 79 (tbody doesn't work in find())
71
- *
72
- * Specifying 'tbody' as CSS selector will not return the body of a table as
73
- * expected, but the table instead.
74
- *
75
- * @link https://sourceforge.net/p/simplehtmldom/bugs/79/ Bug #79
76
- */
77
- public function test_bug_79()
78
- {
79
- $doc = <<<HTML
80
- <table>
81
- <thead><tr><th>PHP Simple HTML DOM Parser</th></tr></thead>
82
- <tbody><tr><td>A PHP based DOM parser</td></tr></tbody>
83
- </table>
84
- HTML;
85
-
86
- $this->html->load($doc);
87
-
88
- $this->assertCount(2, $this->html->find('table tr'));
89
- $this->assertCount(1, $this->html->find('table thead tr'));
90
- $this->assertCount(1, $this->html->find('table tbody tr'));
91
- }
92
-
93
- /**
94
- * Bug #97 (plaintext returning conjoined elements)
95
- *
96
- * Paragraphs ("p" elements) should start on a new line when returned as
97
- * plain text.
98
- *
99
- * **Note**:
100
- *
101
- * Browsers typically separate multiple paragraphs by blank lines.
102
- *
103
- * @link https://sourceforge.net/p/simplehtmldom/bugs/97/ Bug #97
104
- */
105
- public function test_bug_97()
106
- {
107
- $doc = <<<HTML
108
- <div><p>PHP Simple HTML DOM Parser</p><p>A PHP based DOM parser</p></div>
109
- HTML;
110
-
111
- $text = $this->html->load($doc)->find('div', 0)->plaintext;
112
-
113
- $this->assertEquals(
114
- "PHP Simple HTML DOM Parser\n\nA PHP based DOM parser",
115
- $text
116
- );
117
- }
118
-
119
- /**
120
- * Bug #116 (problem getting tag attributes)
121
- *
122
- * Parsing fails on attributes that are not separated by whitespace.
123
- *
124
- * **Note**:
125
- *
126
- * The [Markup Validation Service](https://validator.w3.org/#validate_by_input)
127
- * reports: No space between attributes.
128
- *
129
- * @link https://sourceforge.net/p/simplehtmldom/bugs/116/ Bug #116
130
- */
131
- public function test_bug_116()
132
- {
133
- $doc = <<<HTML
134
- <a href="#"title="PHP Simple HTML DOM Parser"></a>
135
- HTML;
136
-
137
- $anchor = $this->html->load($doc)->find('a', 0);
138
-
139
- $this->assertCount(2, $anchor->getAllAttributes());
140
- $this->assertEquals('#', $anchor->href);
141
- $this->assertEquals('PHP Simple HTML DOM Parser', $anchor->title);
142
- }
143
-
144
- /**
145
- * Bug #121 (//Comment\n != //Comment\s)
146
- *
147
- * Replacing newlines results in scripts changing behavior if comments are
148
- * placed before functions.
149
- *
150
- * **Expected Behavior**:
151
- *
152
- * Script tags should be returned exactly as provided to the parser with all
153
- * newlines kept intact.
154
- *
155
- * **Workaround**:
156
- *
157
- * Set `$stripRN = false` when loading contents. This will prevent newlines
158
- * being replaced by spaces.
159
- *
160
- * @link https://sourceforge.net/p/simplehtmldom/bugs/121/ Bug #121
161
- */
162
- public function test_bug_121()
163
- {
164
- $doc = <<<HTML
165
- <script>
166
- // alert("PHP Simple HTML DOM Parser");
167
- alert("A PHP based DOM parser");
168
- </script>
169
- HTML;
170
-
171
- $dom = $this->html->load($doc);
172
-
173
- $this->assertEquals($doc, (string)$this->html);
174
- }
175
-
176
- /**
177
- * Bug #127 (Incorrect attribute value gives unexpected results)
178
- *
179
- * Attributes ending on "\" cause the parser to continue parsing the
180
- * remaining document as the attribute value.
181
- *
182
- * @link https://sourceforge.net/p/simplehtmldom/bugs/127/ Bug #127
183
- */
184
- public function test_bug_127()
185
- {
186
- $doc = <<<HTML
187
- <div id="before"></div>
188
- <a href="#" alt="PHP Simple HTML DOM Parser\">
189
- <div id="after"></div>
190
- HTML;
191
-
192
- $this->html->load($doc);
193
-
194
- $this->assertEquals(
195
- 'PHP Simple HTML DOM Parser\\',
196
- $this->html->find('a', 0)->alt
197
- );
198
- }
199
-
200
- /**
201
- * Bug #144 (Forward slashes in pattern break wildcard Find)
202
- *
203
- * The wildcard find method "*=" uses preg_match, delimited by forward
204
- * slashes. Therefore, if you have any forward slashes in your pattern,
205
- * you need to manually escape them, otherwise the find won't work. This
206
- * comes up frequently when searching for URL's in href attributes.
207
- *
208
- * @link https://sourceforge.net/p/simplehtmldom/bugs/144/ Bug #144
209
- */
210
- public function test_bug_144()
211
- {
212
- $doc = <<<HTML
213
- <a href="http://simplehtmldom.sourceforge.net">Home</a>
214
- <a href="http://simplehtmldom.sourceforge.net/manual.htm">Manual</a>
215
- HTML;
216
-
217
- $this->html->load($doc);
218
-
219
- $this->assertCount(1, $this->html->find('a[href*="/manual.htm"]'));
220
- }
221
-
222
- /**
223
- * Bug #153 (Invalid argument supplied for foreach())
224
- *
225
- * The parser incorrectly assumes that $this->nodes always exists (not null)
226
- * and tries to iterate over each element in the array when performing the
227
- * clear() operation.
228
- *
229
- * @link https://sourceforge.net/p/simplehtmldom/bugs/153/ Bug #153
230
- * @doesNotPerformAssertions
231
- */
232
- public function test_bug_153()
233
- {
234
- $doc = '<p>PHP Simple HTML DOM Parser</p>';
235
-
236
- $this->html->load($doc);
237
- $this->html->nodes = null;
238
-
239
- $this->html->clear();
240
- }
241
-
242
- /**
243
- * Bug #154 (Fatal error: Call to a member function find() on null)
244
- *
245
- * The parser incorrectly removes everything between `{` and `}` attempting
246
- * to remove "Smarty Scripts" from the DOM. This causes regular text to be
247
- * removed as well, if it contains curly braces.
248
- *
249
- * **Example Code**:
250
- *
251
- * ```HTML
252
- * <div class="before"></div>
253
- * <p>{PHP Simple HTML DOM Parser</p>
254
- * <p>{A PHP based DOM parser}</p>
255
- * <div id="after"></div>
256
- * ```
257
- *
258
- * @link https://sourceforge.net/p/simplehtmldom/bugs/154/ Bug #154
259
- * @link https://www.smarty.net Smarty
260
- */
261
- public function test_bug_154()
262
- {
263
- $doc = <<<HTML
264
- <p>{PHP Simple HTML DOM Parser</p>
265
- <p>{A PHP based DOM parser}</p>
266
- HTML;
267
-
268
- $this->html->load($doc);
269
-
270
- $this->assertCount(2, $this->html->find('p'));
271
-
272
- $this->assertEquals(
273
- '{PHP Simple HTML DOM Parser',
274
- $this->html->find('p', 0)->innertext
275
- );
276
-
277
- $this->assertEquals(
278
- '{A PHP based DOM parser}',
279
- $this->html->find('p', 1)->innertext
280
- );
281
-
282
- /* With Smarty as text */
283
-
284
- $this->html->load($doc, true, true, DEFAULT_BR_TEXT, DEFAULT_SPAN_TEXT, HDOM_SMARTY_AS_TEXT);
285
-
286
- $this->assertCount(1, $this->html->find('p'));
287
-
288
- $this->assertEquals(
289
- '{PHP Simple HTML DOM Parser</p><p>{A PHP based DOM parser}',
290
- $this->html->find('p', 0)->innertext
291
- );
292
-
293
- }
294
-
295
- /**
296
- * Bug #160 (Parsing fails with '<-' + '/' symbols combination in string)
297
- *
298
- * **Example Code**:
299
- *
300
- * ```HTML
301
- * <div id="before"></div>
302
- * <span>---> PHP Simple HTML DOM Parser <--- A /PHP based DOM parser</span>
303
- * <div id="after"></div>
304
- * ```
305
- *
306
- * **Note**:
307
- *
308
- * The [Markup Validation Service](https://validator.w3.org/#validate_by_input)
309
- * reports: Bad character `-` after `<`. Probable cause: Unescaped `<`. Try
310
- * escaping it as `&lt;`.
311
- *
312
- * @link https://sourceforge.net/p/simplehtmldom/bugs/160/ Bug #160
313
- * @link https://validator.w3.org/#validate_by_input Markup Validation Service
314
- */
315
- public function test_bug_160()
316
- {
317
- $doc = <<<HTML
318
- <div id="before"></div>
319
- <span>---> PHP Simple HTML DOM Parser <--- A /PHP based DOM parser</span>
320
- <div id="after"></div>
321
- HTML;
322
-
323
- $this->html->load($doc);
324
-
325
- $this->assertEquals(
326
- '---> PHP Simple HTML DOM Parser <--- A /PHP based DOM parser',
327
- $this->html->find('span', 0)->innertext
328
- );
329
- }
330
-
331
- /**
332
- * Bug #163 (Missing whitespace in plaintext property)
333
- *
334
- * **Example Code**:
335
- *
336
- * ```php
337
- * $doc = 'Hello<a href=""> World';
338
- * $html->load($doc);
339
- * echo "$html->plaintext\n";
340
- * ```
341
- *
342
- * @link https://sourceforge.net/p/simplehtmldom/bugs/163/ Bug #163
343
- */
344
- public function test_bug_163()
345
- {
346
- $doc = 'Hello<a href=""> World';
347
- $expected = 'Hello World';
348
-
349
- $this->html->load($doc);
350
-
351
- $this->assertEquals($expected, $this->html->plaintext);
352
- }
353
-
354
- /**
355
- * Bug #166 (Duplicate attributes)
356
- *
357
- * @link https://sourceforge.net/p/simplehtmldom/bugs/166/ Bug #166
358
- */
359
- public function test_bug_166()
360
- {
361
- $doc = '<div style="display:none;" style="margin-top: 5px;"></div>';
362
-
363
- $this->html->load($doc);
364
-
365
- $this->assertEquals(1, count($this->html->find('div', 0)->getAllAttributes()));
366
- }
367
-
368
- /**
369
- * Bug #169 (Incorrectly parsed attribute selectors ending on "s" or "i")
370
- *
371
- * This bug happens only when using attribute selectors without quotes, so
372
- * doing [att=val] instead of [att="val"].
373
- *
374
- * @link https://sourceforge.net/p/simplehtmldom/bugs/169/ Bug #169
375
- */
376
- public function test_bug_169()
377
- {
378
- $doc = '<div class="test_s" /><div class="test_i" />';
379
-
380
- $this->html->load($doc);
381
-
382
- $this->assertCount(
383
- 1,
384
- $this->html->find('div[class=test_s]'),
385
- 'Failed parsing attribute values ending on "s"'
386
- );
387
-
388
- $this->assertCount(
389
- 1,
390
- $this->html->find('div[class=test_i]'),
391
- 'Failed parsing attribute values ending on "i"'
392
- );
393
- }
394
-
395
- /**
396
- * Bug #172 (Problem with the remove function)
397
- *
398
- * `simple_html_dom_node::remove()` throws a fatal error:
399
- * `Uncaught Error: Call to a member function remove() on null in <file>:<line>`
400
- * when removing an element from the DOM if
401
- * - another element has previously been removed,
402
- * - the previous element was placed before the current element in the DOM and
403
- * - `simple_html_dom_node::remove()` is called on a node returned by
404
- * `simple_html_dom_node::find()` or `simple_html_dom::find()`
405
- *
406
- * This error can also happen for `simple_html_dom_node::removeChild()`
407
- *
408
- * @link https://sourceforge.net/p/simplehtmldom/bugs/172/ Bug #172
409
- */
410
- public function test_bug_172()
411
- {
412
- $expected = '<div></div><div></div>';
413
-
414
- $doc = '<div><img src="#"></div><div><img src="#"></div>';
415
-
416
- $this->html->load($doc);
417
-
418
- $this->html->find('div img', 0)->remove();
419
-
420
- $img = $this->html->find('div', 1)->find('img', 0);
421
-
422
- $this->assertNotNull(
423
- $img,
424
- 'find() on node failed after using remove() on a previous node'
425
- );
426
-
427
- $img->remove();
428
-
429
- $this->assertEquals($expected, $this->html->save());
430
- }
431
-
432
- /**
433
- * Bug #178 (Charset not handled properly)
434
- *
435
- * @link https://sourceforge.net/p/simplehtmldom/bugs/178/ Bug #178
436
- */
437
- public function test_bug_178()
438
- {
439
- /**
440
- * Note: The testdata must be encoded in order to work for machines with
441
- * different codepages!
442
- */
443
-
444
- $expected = chr(hexdec('c4')); // "č"
445
-
446
- /**
447
- * <!DOCTYPE html>
448
- * <html lang="windows-1250">
449
- * <head>
450
- * <meta http-equiv="Content-Type" content="text/html; charset=windows-1250">
451
- * </head>
452
- * <body>
453
- * a><span>K�</span></a>
454
- * <b>K�</b>
455
- * </body>
456
- * </html>
457
- */
458
- // phpcs:ignore Generic.Files.LineLength
459
- $data = base64_decode('PCFET0NUWVBFIGh0bWw+CjxodG1sIGxhbmc9IndpbmRvd3MtMTI1MCI+CjxoZWFkPgogICAgPG1ldGEgaHR0cC1lcXVpdj0iQ29udGVudC1UeXBlIiBjb250ZW50PSJ0ZXh0L2h0bWw7IGNoYXJzZXQ9d2luZG93cy0xMjUwIj4KPC9oZWFkPgo8Ym9keT4KICAgIDxhPjxzcGFuPkvoPC9zcGFuPjwvYT4KICAgIDxiPkvoPC9iPgo8L2JvZHk+CjwvaHRtbD4=');
460
-
461
- $this->html = str_get_html($data);
462
-
463
- $this->assertEquals(
464
- $expected,
465
- $this->html->find('a', 0)->innertext[7],
466
- 'outertext() should convert text inside elements'
467
- ); // note: innertext() calls outertext() internally
468
-
469
- $this->assertEquals(
470
- $expected,
471
- $this->html->find('b', 0)->innertext[1],
472
- 'innertext() should convert text inside elements'
473
- );
474
- }
475
-
476
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/callback_test.php DELETED
@@ -1,45 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Tests the callback feature of the parser
7
- */
8
- class callback_test extends TestCase {
9
- private $html;
10
-
11
- protected function setUp()
12
- {
13
- $this->html = new simple_html_dom();
14
- $this->html->set_callback(
15
- function($element)
16
- {
17
- $element->tag = 'surprise';
18
- }
19
- );
20
- }
21
-
22
- protected function tearDown()
23
- {
24
- $this->html->clear();
25
- unset($this->html);
26
- }
27
-
28
- public function test_htmldocument_set_callback_should_register_function()
29
- {
30
- $this->assertNotNull($this->html->callback);
31
- }
32
-
33
- public function test_htmldocument_remove_callback_should_unregister_function()
34
- {
35
- $this->html->remove_callback();
36
- $this->assertNull($this->html->callback);
37
- }
38
-
39
- public function test_htmlnode_outertext_uses_callback_function()
40
- {
41
- $expected = '<surprise></surprise>';
42
- $this->html->load('<html></html>');
43
- $this->assertEquals($expected, $this->html->save());
44
- }
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/cdata_test.php DELETED
@@ -1,69 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Checks if the parser properly handles CDATA sections
7
- */
8
- class cdata_test extends TestCase {
9
-
10
- private $html;
11
-
12
- protected function setUp()
13
- {
14
- $this->html = new simple_html_dom;
15
- }
16
-
17
- protected function tearDown()
18
- {
19
- $this->html->clear();
20
- unset($this->html);
21
- }
22
-
23
- /**
24
- * @dataProvider dataProvider_for_cdata_should_parse
25
- */
26
- public function test_cdata_should_parse($expected, $doc)
27
- {
28
- $this->html->load($doc);
29
- $this->assertEquals($expected, $this->html->find('cdata', 0)->innertext);
30
- $this->assertEquals($doc, $this->html->save());
31
- }
32
-
33
- public function dataProvider_for_cdata_should_parse()
34
- {
35
- return array(
36
- 'empty' => array(
37
- '',
38
- '<![CDATA[]]>',
39
- ),
40
- 'space' => array(
41
- ' ',
42
- '<![CDATA[ ]]>',
43
- ),
44
- 'brackets' => array(
45
- ']][[',
46
- '<![CDATA[]][[]]>',
47
- ),
48
- 'html' => array(
49
- '<p>Hello, World!</p>',
50
- '<![CDATA[<p>Hello, World!</p>]]>',
51
- ),
52
- 'comment' => array(
53
- '<!-- Hello, World! -->',
54
- '<![CDATA[<!-- Hello, World! -->]]>'
55
- ),
56
- 'newline' => array(
57
- "Hello\nWorld!",
58
- "<![CDATA[Hello\nWorld!]]>"
59
- ),
60
- );
61
- }
62
-
63
- public function test_html_inside_cdata_should_not_appear_in_the_dom()
64
- {
65
- $this->html->load('<![CDATA[<div>Hello, World!</div>]]>');
66
- $this->assertNotNull($this->html->find('cdata', 0));
67
- $this->assertNull($this->html->find('div', 0));
68
- }
69
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/charset_test.php DELETED
@@ -1,80 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Test if the parser properly detects document encodings
7
- */
8
- class charset_test extends TestCase {
9
-
10
- private $html;
11
-
12
- protected function setUp()
13
- {
14
- $this->html = new simple_html_dom;
15
- }
16
-
17
- protected function tearDown()
18
- {
19
- $this->html->clear();
20
- unset($this->html);
21
- }
22
-
23
- /**
24
- * @dataProvider fileProvider
25
- */
26
- public function test_charset($path)
27
- {
28
- $expected = strtoupper(basename($path, '.html'));
29
-
30
- $this->html->loadFile($path);
31
-
32
- $this->assertEquals($expected, $this->html->_charset);
33
- }
34
-
35
- /** @dataProvider fileProvider */
36
- public function test_is_utf8($file)
37
- {
38
- $testdata = file_get_contents($file);
39
-
40
- if (strtoupper(basename($file, '.html')) === 'UTF-8') {
41
- $this->assertTrue(simple_html_dom_node::is_utf8($testdata));
42
- } else {
43
- $this->assertFalse(simple_html_dom_node::is_utf8($testdata));
44
- }
45
- }
46
-
47
- /** @dataProvider fileProvider */
48
- public function test_convert_text_should_handle_different_encodings($file)
49
- {
50
- $testdata = file_get_contents($file);
51
- $charset = strtoupper(basename($file, '.html'));
52
- $expected = iconv($charset, 'UTF-8', $testdata);
53
-
54
- $this->html->load(''); // We need at least the root node
55
-
56
- if ($charset === 'UTF-8') {
57
- $this->html->_charset = 'TryMe'; // Trap the parser
58
- // Wrap content in BOM
59
- $testdata = "\xef\xbb\xbf" . $testdata . "\xef\xbb\xbf";
60
- } else {
61
- $this->html->_charset = $charset; // Hint source charset
62
- }
63
-
64
- $this->html->_target_charset = 'UTF-8'; // Enforce target charset
65
-
66
- $this->assertEquals($expected, $this->html->root->convert_text($testdata));
67
- }
68
-
69
- public function fileProvider()
70
- {
71
- $files = array();
72
-
73
- foreach(glob(__DIR__ . '/data/charset/*.html') as $path) {
74
- $files[strtoupper(basename($path, '.html'))] = array($path);
75
- }
76
-
77
- return $files;
78
- }
79
-
80
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/comment_test.php DELETED
@@ -1,93 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Checks if the parser properly handles comments
7
- */
8
- class comment_test extends TestCase {
9
-
10
- private $html;
11
-
12
- protected function setUp()
13
- {
14
- $this->html = new simple_html_dom;
15
- }
16
-
17
- protected function tearDown()
18
- {
19
- $this->html->clear();
20
- unset($this->html);
21
- }
22
-
23
- /**
24
- * @dataProvider dataProvider_for_comment_should_parse
25
- */
26
- public function test_comment_should_parse($expected, $doc)
27
- {
28
- $this->html->load($doc);
29
- $this->assertEquals($expected, $this->html->find('comment', 0)->innertext);
30
- $this->assertEquals($doc, $this->html->save());
31
- }
32
-
33
- public function dataProvider_for_comment_should_parse()
34
- {
35
- return array(
36
- 'empty' => array(
37
- '',
38
- '<!---->',
39
- ),
40
- 'space' => array(
41
- ' ',
42
- '<!-- -->',
43
- ),
44
- 'brackets' => array(
45
- ']][[',
46
- '<!--]][[-->',
47
- ),
48
- 'html' => array(
49
- '<p>Hello, World!</p>',
50
- '<!--<p>Hello, World!</p>-->',
51
- ),
52
- 'cdata' => array(
53
- '<![CDATA[Hello, World!]]>',
54
- '<!--<![CDATA[Hello, World!]]>-->',
55
- ),
56
- 'newline' => array(
57
- "Hello\nWorld!",
58
- "<!--Hello\nWorld!-->",
59
- ),
60
- 'nested comment start tag' => array(
61
- '<!--',
62
- '<!--<!---->',
63
- ),
64
- 'reverse comment start tag' => array(
65
- '--!>',
66
- '<!----!>-->',
67
- ),
68
- 'almost comment start tag' => array(
69
- '<!-',
70
- '<!--<!--->',
71
- ),
72
- );
73
- }
74
-
75
- public function test_html_inside_comment_should_not_appear_in_the_dom()
76
- {
77
- $this->html->load('<!-- <div>Hello, World!</div> -->');
78
- $this->assertNotNull($this->html->find('comment', 0));
79
- $this->assertNull($this->html->find('div', 0));
80
- }
81
-
82
- public function test_comment_starting_with_greater_than_sign_should_break_comment()
83
- {
84
- $this->html->load('<!--><div>Hello, World!</div>-->');
85
- $this->assertEquals('Hello, World!', $this->html->find('div', 0)->plaintext);
86
- }
87
-
88
- public function test_comment_starting_with_dash_plus_greater_than_sign_should_break_comment()
89
- {
90
- $this->html->load('<!---><div>Hello, World!</div>-->');
91
- $this->assertEquals('Hello, World!', $this->html->find('div', 0)->plaintext);
92
- }
93
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/css_selector_test.php DELETED
@@ -1,646 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Tests for CSS selectors based on the current CSS Specification
7
- *
8
- * @link https://www.w3.org/TR/selectors/ CSS Selectors Specification
9
- */
10
- class css_selector_test extends TestCase {
11
-
12
- private $html;
13
-
14
- protected function setUp()
15
- {
16
- $this->html = new simple_html_dom();
17
- }
18
-
19
- protected function tearDown()
20
- {
21
- $this->html->clear();
22
- unset($this->html);
23
- }
24
-
25
- /**
26
- * pattern: "*"
27
- * Represents any element
28
- *
29
- * @link https://www.w3.org/TR/selectors/#the-universal-selector
30
- * Universal Selector
31
- */
32
- public function test_universal()
33
- {
34
- $doc = <<<HTML
35
- <html>
36
- <head><title>PHP Simple HTML DOM Parser</title></head>
37
- <body><h1>A PHP based DOM parser</h1></body>
38
- </html>
39
- HTML;
40
-
41
- $this->html->load($doc);
42
-
43
- $this->assertCount(5, $this->html->find('*'));
44
- }
45
-
46
- /**
47
- * pattern: "E"
48
- * Represents an element of type E
49
- *
50
- * @link https://www.w3.org/TR/selectors/#type-selectors
51
- * Type Selector
52
- */
53
- public function test_type()
54
- {
55
- $doc = <<<HTML
56
- <html>
57
- <head><title>PHP Simple HTML DOM Parser</title></head>
58
- <body><h1>A PHP based DOM parser</h1></body>
59
- </html>
60
- HTML;
61
-
62
- $this->html->load($doc);
63
-
64
- $this->assertCount(1, $this->html->find('html'));
65
- $this->assertCount(1, $this->html->find('head'));
66
- $this->assertCount(1, $this->html->find('title'));
67
- $this->assertCount(1, $this->html->find('body'));
68
- $this->assertCount(1, $this->html->find('h1'));
69
-
70
- // This should not exist
71
- $this->assertCount(0, $this->html->find('div'));
72
- }
73
-
74
- /**
75
- * pattern: "E.warning"
76
- * Represents an E element belonging to the class warning (the document
77
- * language specifies how class is determined).
78
- *
79
- * @link https://www.w3.org/TR/selectors/#class-html
80
- * Class selectors
81
- */
82
- public function test_class()
83
- {
84
- $doc = <<<HTML
85
- <html>
86
- <body>
87
- <p class="title header">PHP Simple HTML DOM Parser</p>
88
- <p class="subtitle">A PHP based DOM parser</p>
89
- </body>
90
- </html>
91
- HTML;
92
-
93
- $this->html->load($doc);
94
-
95
- $this->assertCount(1, $this->html->find('p.title'));
96
- $this->assertCount(1, $this->html->find('p.subtitle'));
97
- $this->assertCount(1, $this->html->find('p.title.header'));
98
- }
99
-
100
- public function test_class_should_skip_tags_without_classes()
101
- {
102
- $doc = <<<HTML
103
- <html>
104
- <body>
105
- <p>Lorem ipsum dolor sit amet, consectetur adipiscing elit.</p>
106
- </body>
107
- </html>
108
- HTML;
109
-
110
- $this->html->load($doc);
111
-
112
- $this->assertCount(0, $this->html->find('p.title'));
113
- }
114
-
115
- public function test_class_should_find_camel_case()
116
- {
117
- $doc = <<<HTML
118
- <html>
119
- <body>
120
- <p class="myClass">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</p>
121
- </body>
122
- </html>
123
- HTML;
124
-
125
- $this->html->load($doc);
126
-
127
- $this->assertCount(1, $this->html->find('p.myClass'));
128
- }
129
-
130
- /**
131
- * pattern: "E#myid"
132
- * Represents an E element with ID equal to myid.
133
- *
134
- * @link https://www.w3.org/TR/selectors/#id-selectors
135
- * ID selectors
136
- */
137
- public function test_id()
138
- {
139
- $doc = <<<HTML
140
- <html>
141
- <body>
142
- <p id="title">PHP Simple HTML DOM Parser</p>
143
- <p id="subtitle">A PHP based DOM parser</p>
144
- </body>
145
- </html>
146
- HTML;
147
-
148
- $this->html->load($doc);
149
-
150
- $this->assertCount(1, $this->html->find('p#title'));
151
- $this->assertCount(1, $this->html->find('p#subtitle'));
152
- }
153
-
154
- public function test_id_selector_should_find_camel_case()
155
- {
156
- $doc = <<<HTML
157
- <html>
158
- <body>
159
- <p id="myMessage">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</p>
160
- </body>
161
- </html>
162
- HTML;
163
-
164
- $this->html->load($doc);
165
-
166
- $this->assertCount(1, $this->html->find('p#myMessage'));
167
- }
168
-
169
- #region Attribute
170
-
171
- /**
172
- * pattern: "E[foo]"
173
- * Represents an E element with a foo attribute
174
- *
175
- * @link https://www.w3.org/TR/selectors/#attribute-selectors
176
- * Attribute selectors
177
- */
178
- public function test_attribute_exists()
179
- {
180
- $doc = <<<HTML
181
- <html>
182
- <body>
183
- <p id="title" class="h1">PHP Simple HTML DOM Parser</p>
184
- <p id="subtitle" class="h2" style="color:blue;">A PHP based DOM parser</p>
185
- </body>
186
- </html>
187
- HTML;
188
-
189
- $this->html->load($doc);
190
-
191
- $this->assertCount(2, $this->html->find('p[id]'));
192
- $this->assertCount(2, $this->html->find('p[id][class]'));
193
- $this->assertCount(1, $this->html->find('p[id][class][style]'));
194
- }
195
-
196
- /**
197
- * pattern: "E[foo="bar"]"
198
- * Represents an E element whose foo attribute value is exactly equal to bar
199
- *
200
- * @link https://www.w3.org/TR/selectors/#attribute-selectors
201
- * Attribute selectors
202
- */
203
- public function test_attribute_value_equals()
204
- {
205
- $doc = <<<HTML
206
- <html>
207
- <body>
208
- <p id="title" class="h1">PHP Simple HTML DOM Parser</p>
209
- <p id="subtitle" class="h2" style="color:blue;">A PHP based DOM parser</p>
210
- </body>
211
- </html>
212
- HTML;
213
-
214
- $this->html->load($doc);
215
-
216
- $this->assertCount(1, $this->html->find('p[id="title"]'));
217
- $this->assertCount(1, $this->html->find('p[id="subtitle"]'));
218
- $this->assertCount(1, $this->html->find('p[id="title"][class="h1"]'));
219
- $this->assertCount(1, $this->html->find('p[id="subtitle"][class="h2"][style="color:blue;"]'));
220
- }
221
-
222
- /**
223
- * pattern: "E[foo="bar" i]"
224
- * Represents an E element whose foo attribute value is exactly equal to any
225
- * (ASCII-range) case-permutation of bar
226
- *
227
- * @link https://www.w3.org/TR/selectors/#attribute-case
228
- * Attribute case
229
- */
230
- public function test_attribute_value_equals_case_insensitive()
231
- {
232
- $doc = <<<HTML
233
- <html lang="en-US">
234
- <body>
235
- <p att="title">PHP Simple HTML DOM Parser</p>
236
- <p att="tItle">A PHP based DOM parser</p>
237
- </body>
238
- </html>
239
- HTML;
240
-
241
- $this->html->load($doc);
242
-
243
- $this->assertCount(1, $this->html->find('p[att="title"]'));
244
- $this->assertCount(2, $this->html->find('p[att="title" i]'));
245
-
246
- $this->assertCount(0, $this->html->find('p[att^="TITLE"]'));
247
- $this->assertCount(2, $this->html->find('p[att^="TITLE" i]'));
248
-
249
- $this->assertCount(0, $this->html->find('p[att$="LE"]'));
250
- $this->assertCount(2, $this->html->find('p[att$="LE" i]'));
251
-
252
- $this->assertCount(0, $this->html->find('p[att*="ITL"]'));
253
- $this->assertCount(2, $this->html->find('p[att*="ITL" i]'));
254
-
255
- $this->assertCount(0, $this->html->find('html[lang|="EN"]'));
256
- $this->assertCount(1, $this->html->find('html[lang|="EN" i]'));
257
- }
258
-
259
- /**
260
- * pattern: "E[foo="bar" s]"
261
- * Represents an E element whose foo attribute value is exactly and
262
- * case-sensitively equal to bar
263
- *
264
- * @link https://www.w3.org/TR/selectors/#attribute-case
265
- * Attribute case
266
- */
267
- public function test_attribute_value_equals_case_sensitive()
268
- {
269
- $doc = <<<HTML
270
- <html lang="en-US">
271
- <body>
272
- <p att="title header">PHP Simple HTML DOM Parser</p>
273
- <p att="tItle">A PHP based DOM parser</p>
274
- </body>
275
- </html>
276
- HTML;
277
-
278
- $this->html->load($doc);
279
-
280
- $this->assertCount(0, $this->html->find('p[att="title" s]'));
281
- $this->assertCount(1, $this->html->find('p[att="tItle" s]'));
282
- }
283
-
284
- /**
285
- * pattern: "E[foo~="bar"]"
286
- * Represents an E element whose foo attribute value is a list of
287
- * whitespace-separated values, one of which is exactly equal to bar
288
- *
289
- * @link https://www.w3.org/TR/selectors/#attribute-selectors
290
- * Attribute selectors
291
- */
292
- public function test_attribute_value_list_contains()
293
- {
294
- $doc = <<<HTML
295
- <html>
296
- <body>
297
- <p att="title header">PHP Simple HTML DOM Parser</p>
298
- <p att="title subtitle">A PHP based DOM parser</p>
299
- </body>
300
- </html>
301
- HTML;
302
-
303
- $this->html->load($doc);
304
-
305
- $this->assertCount(2, $this->html->find('p[att~="title"]'));
306
- $this->assertCount(1, $this->html->find('p[att~="header"]'));
307
- $this->assertCount(1, $this->html->find('p[att~="subtitle"]'));
308
- $this->assertCount(0, $this->html->find('p[att~=" title"'));
309
- $this->assertCount(0, $this->html->find('p[att~=" "'));
310
- }
311
-
312
- /**
313
- * pattern: "E[foo^="bar"]"
314
- * Represents an E element whose foo attribute value begins exactly with the
315
- * string bar
316
- *
317
- * @link https://www.w3.org/TR/selectors/#attribute-substrings
318
- * Attribute selectors
319
- */
320
- public function test_attribute_value_begins()
321
- {
322
- $doc = <<<HTML
323
- <html>
324
- <body>
325
- <p id="title">PHP Simple HTML DOM Parser</p>
326
- <p id="subtitle">A PHP based DOM parser</p>
327
- </body>
328
- </html>
329
- HTML;
330
-
331
- $this->html->load($doc);
332
-
333
- $this->assertCount(1, $this->html->find('p[id^="sub"]'));
334
- }
335
-
336
- /**
337
- * pattern: "E[foo$="bar"]"
338
- * Represents an E element whose foo attribute value ends exactly with the
339
- * string bar
340
- *
341
- * @link https://www.w3.org/TR/selectors/#attribute-substrings
342
- * Attribute substrings
343
- */
344
- public function test_attribute_value_ends()
345
- {
346
- $doc = <<<HTML
347
- <html>
348
- <body>
349
- <p id="title">PHP Simple HTML DOM Parser</p>
350
- <p id="subtitle">A PHP based DOM parser</p>
351
- </body>
352
- </html>
353
- HTML;
354
-
355
- $this->html->load($doc);
356
-
357
- $this->assertCount(2, $this->html->find('p[id$="title"]'));
358
- }
359
-
360
- /**
361
- * pattern: "E[foo*="bar"]"
362
- * Represents an E element whose foo attribute value contains the substring
363
- * bar
364
- *
365
- * @link https://www.w3.org/TR/selectors/#attribute-substrings
366
- * Attribute substrings
367
- */
368
- public function test_attribute_value_contains()
369
- {
370
- $doc = <<<HTML
371
- <html>
372
- <body>
373
- <p id="title">PHP Simple HTML DOM Parser</p>
374
- <p id="subtitle">A PHP based DOM parser</p>
375
- <p class="title header">PHP Simple HTML DOM Parser</p>
376
- <p class="title subtitle">A PHP based DOM parser</p>
377
- </body>
378
- </html>
379
- HTML;
380
-
381
- $this->html->load($doc);
382
-
383
- $this->assertCount(2, $this->html->find('p[id*="itl"]'));
384
- $this->assertCount(2, $this->html->find('p[class*="title"]'));
385
- $this->assertCount(1, $this->html->find('p[class*="title header"]'));
386
- $this->assertCount(1, $this->html->find('p[class*="subtitle"]'));
387
- }
388
-
389
- /**
390
- * pattern: "E[foo|="en"]"
391
- * Represents an E element whose foo attribute value is a hyphen-separated
392
- * list of values beginning with en
393
- *
394
- * @link https://www.w3.org/TR/selectors/#attribute-selectors
395
- * Attribute selectors
396
- */
397
- public function test_attribute_value_list_begins()
398
- {
399
- $doc = <<<HTML
400
- <a href="#" hreflang="en-US">en-US</a>
401
- <a href="#" hreflang="en-UK">en-UK</a>
402
- <a href="#" hreflang="en">en</a>
403
- <a href="#" hreflang="fr">fr</a>
404
- HTML;
405
-
406
- $this->html->load($doc);
407
-
408
- $anchors = $this->html->find('a[hreflang|="en"]');
409
-
410
- $this->assertCount(3, $anchors);
411
- $this->assertEquals('en-US', $anchors[0]->innertext);
412
- $this->assertEquals('en-UK', $anchors[1]->innertext);
413
- $this->assertEquals('en', $anchors[2]->innertext);
414
- }
415
-
416
- #endregion Attribute
417
-
418
- #region Combinator
419
-
420
- /**
421
- * pattern: "E F"
422
- * Represents an F element descendant of an E element
423
- *
424
- * @link https://www.w3.org/TR/selectors/#descendant-combinators
425
- * Descendant combinators
426
- */
427
- public function test_descendant_combinators()
428
- {
429
- $doc = <<<HTML
430
- <html>
431
- <body>
432
- <p id="title">PHP Simple HTML DOM Parser</p>
433
- <p id="subtitle">A PHP based DOM parser</p>
434
- </body>
435
- </html>
436
- HTML;
437
-
438
- $this->html->load($doc);
439
-
440
- $this->assertCount(2, $this->html->find('html body p'));
441
- }
442
-
443
- /**
444
- * pattern: "E > F"
445
- * Represents an F element child of an E element
446
- *
447
- * @link https://www.w3.org/TR/selectors/#child-combinators
448
- * Child combinators
449
- */
450
- public function test_child_combinators()
451
- {
452
- $doc = <<<HTML
453
- <html>
454
- <body>
455
- <p id="title">PHP Simple HTML DOM Parser</p>
456
- <p id="subtitle">A PHP based DOM parser</p>
457
- <div>
458
- <p>Lorem ipsum dolor sit amet.</p>
459
- </div>
460
- </body>
461
- </html>
462
- HTML;
463
-
464
- $this->html->load($doc);
465
-
466
- $this->assertCount(2, $this->html->find('html > body > p'));
467
- }
468
-
469
- /**
470
- * pattern: "E + F"
471
- * Represents an F element immediately preceded by an E element
472
- *
473
- * @link https://www.w3.org/TR/selectors/#adjacent-sibling-combinators
474
- * Next sibling combinators
475
- */
476
- public function test_next_sibling_combinators()
477
- {
478
- $doc = <<<HTML
479
- <html>
480
- <body>
481
- <h1>PHP Simple HTML DOM Parser</h1>
482
- <p id="title">PHP Simple HTML DOM Parser</p>
483
- <h2>A PHP based DOM parser</h2>
484
- <p id="subtitle">A PHP based DOM parser</p>
485
- </body>
486
- </html>
487
- HTML;
488
-
489
- $this->html->load($doc);
490
-
491
- $this->assertCount(1, $this->html->find('h1 + p'));
492
- $this->assertCount(1, $this->html->find('h2 + p'));
493
- }
494
-
495
- /**
496
- * pattern: "E ~ F"
497
- * Represents an F element preceded by an E element
498
- *
499
- * @link https://www.w3.org/TR/selectors/#general-sibling-combinators
500
- * General sibling combinators
501
- */
502
- public function test_general_sibling_combinators()
503
- {
504
- $doc = <<<HTML
505
- <html>
506
- <body>
507
- <h1>PHP Simple HTML DOM Parser</h1>
508
- <p id="title">PHP Simple HTML DOM Parser</p>
509
- <h2>A PHP based DOM parser</h2>
510
- <p id="subtitle">A PHP based DOM parser</p>
511
- </body>
512
- </html>
513
- HTML;
514
-
515
- $this->html->load($doc);
516
-
517
- $this->assertCount(2, $this->html->find('h1 ~ p'));
518
- $this->assertCount(1, $this->html->find('h2 ~ p'));
519
- }
520
-
521
- #endregion Combinator
522
-
523
- #region Pseudo Classes
524
-
525
- /**
526
- * pattern: "E:not(s)"
527
- * Represents an E element that does not match simple selector s
528
- *
529
- * @link https://www.w3.org/TR/selectors-3/#negation
530
- * Negation pseudo class
531
- */
532
- public function test_negation_pseudo_class()
533
- {
534
- $doc = <<<HTML
535
- <html>
536
- <body>
537
- <h1>PHP Simple HTML DOM Parser</h1>
538
- <p id="title">PHP Simple HTML DOM Parser</p>
539
- <h2>A PHP based DOM parser</h2>
540
- <p id="subtitle">A PHP based DOM parser</p>
541
- </body>
542
- </html>
543
- HTML;
544
-
545
- $this->html->load($doc);
546
-
547
- $this->assertCount(1, $this->html->find('p:not([id="title"])'));
548
- $this->assertCount(5, $this->html->find(':not(p[id="subtitle"])'));
549
- $this->assertCount(3, $this->html->find('body :not(p[id="title"])'));
550
- }
551
-
552
- #endregion Pseudo Classes
553
-
554
- /**
555
- * "comment", "cdata" and "text" selectors are specific to this parser. They
556
- * allow users to directly address these nodes and extract useful information.
557
- *
558
- * @dataProvider dataProvider_for_find_should_work_with_special_selector
559
- */
560
- public function test_find_should_work_with_special_selector($selector, $expected, $doc, $message)
561
- {
562
- $this->html->load($doc);
563
- $this->assertEquals($expected, $this->html->find($selector, 0)->innertext, $message);
564
- $this->assertEquals($doc, $this->html->save());
565
- }
566
-
567
- public function dataProvider_for_find_should_work_with_special_selector()
568
- {
569
- $data = array(
570
- 'text without elements' => array(
571
- 'text',
572
- 'Hello, World!',
573
- 'Hello, World!',
574
- 'find should return text without elements'
575
- ),
576
- 'text outside html' => array(
577
- 'text',
578
- 'Hello, World!',
579
- 'Hello, World!<html></html>',
580
- 'find should return text outside html'
581
- ),
582
- 'text inside element' => array(
583
- 'text',
584
- 'Hello, World!',
585
- '<html>Hello, World!</html>',
586
- 'find should return text inside element'
587
- ),
588
- 'text between elements' => array(
589
- 'text',
590
- 'Hello, World!',
591
- '<html><head></head>Hello, World!<body></body></html>',
592
- 'find should return text between elements'
593
- ),
594
- 'cdata without elements' => array(
595
- 'cdata',
596
- 'Hello, World!',
597
- '<![CDATA[Hello, World!]]>',
598
- 'find should return cdata elements'
599
- ),
600
- 'cdata outside html' => array(
601
- 'cdata',
602
- 'Hello, World!',
603
- '<![CDATA[Hello, World!]]><html></html>',
604
- 'find should return cdata elements'
605
- ),
606
- 'cdata inside element' => array(
607
- 'cdata',
608
- 'Hello, World!',
609
- '<html><![CDATA[Hello, World!]]></html>',
610
- 'find should return cdata elements'
611
- ),
612
- 'cdata between elements' => array(
613
- 'cdata',
614
- 'Hello, World!',
615
- '<html><head></head><![CDATA[Hello, World!]]><body></body></html>',
616
- 'find should return cdata elements'
617
- ),
618
- 'comment without elements' => array(
619
- 'comment',
620
- 'Hello, World!',
621
- '<!--Hello, World!-->',
622
- 'find should return comments'
623
- ),
624
- 'comment outside html' => array(
625
- 'comment',
626
- 'Hello, World!',
627
- '<!--Hello, World!--><html></html>',
628
- 'find should return comments'
629
- ),
630
- 'comment inside element' => array(
631
- 'comment',
632
- 'Hello, World!',
633
- '<html><!--Hello, World!--></html>',
634
- 'find should return comments'
635
- ),
636
- 'comment between elements' => array(
637
- 'comment',
638
- 'Hello, World!',
639
- '<html><head></head><!--Hello, World!--><body></body></html>',
640
- 'find should return comments'
641
- )
642
- );
643
-
644
- return $data;
645
- }
646
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/debug_info_test.php DELETED
@@ -1,37 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Tests for debug information generated by the parser
7
- */
8
- class debug_info_test extends TestCase {
9
- private $html;
10
-
11
- protected function setUp()
12
- {
13
- $this->html = new simple_html_dom();
14
- }
15
-
16
- protected function tearDown()
17
- {
18
- $this->html->clear();
19
- unset($this->html);
20
- }
21
-
22
- /** @dataProvider dataProvider_for_print_r */
23
- public function test_print_r($expected, $html)
24
- {
25
- $this->html->load($html);
26
- $this->assertEquals($expected, print_r($this->html, true));
27
- }
28
-
29
- public function dataProvider_for_print_r()
30
- {
31
- return array(
32
- 'should return __debugInfo' => array(
33
- 'expected' => file_get_contents(__DIR__ . '/data/debug_info/print_r_expected.txt'),
34
- 'html' => file_get_contents(__DIR__ . '/data/debug_info/print_r_testdata.html')
35
- ));
36
- }
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/debug_with_callback_test.php DELETED
@@ -1,83 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../Debug.php';
3
- use PHPUnit\Framework\TestCase;
4
- use simplehtmldom\Debug;
5
-
6
- /**
7
- * Tests the Debug class with custom callback
8
- */
9
- class debug_with_callback_test extends TestCase {
10
- private $html;
11
- private $debug_message;
12
-
13
- protected function setUp()
14
- {
15
- Debug::setDebugHandler(array($this, 'debugMessageHandler'));
16
- Debug::enable();
17
-
18
- // Discard initial message
19
- $this->debug_message = null;
20
- }
21
-
22
- protected function tearDown()
23
- {
24
- Debug::disable();
25
- Debug::setDebugHandler();
26
- }
27
-
28
- public function debugMessageHandler($message)
29
- {
30
- $this->debug_message = $message;
31
- }
32
-
33
- public function test_enable_should_issue_a_message()
34
- {
35
- $this->assertNull($this->debug_message);
36
- Debug::enable();
37
- $this->assertNotNull($this->debug_message);
38
- }
39
-
40
- public function test_disable_should_issue_a_message()
41
- {
42
- $this->assertNull($this->debug_message);
43
- Debug::disable();
44
- $this->assertNotNull($this->debug_message);
45
- }
46
-
47
- public function test_log_should_issue_the_message()
48
- {
49
- $expected = 'Hello, World!';
50
- $this->assertNull($this->debug_message);
51
- Debug::log('Hello, World!');
52
- $this->assertContains($expected, $this->debug_message);
53
- }
54
-
55
- public function test_log_should_issue_the_same_message_multiple_times()
56
- {
57
- $expected = 'Hello, World!';
58
- $this->assertNull($this->debug_message);
59
-
60
- for($i = 0; $i < 2; $i++)
61
- {
62
- Debug::log('Hello, World!');
63
- $this->assertContains($expected, $this->debug_message);
64
- $this->debug_message = null;
65
- }
66
- }
67
-
68
- public function test_log_once_should_issue_the_message_only_once()
69
- {
70
- $this->assertNull($this->debug_message);
71
-
72
- for($i = 0; $i < 2; $i++)
73
- {
74
- Debug::log_once('Hello, World!');
75
- if ($i === 0) {
76
- $this->assertContains('Hello, World!', $this->debug_message);
77
- } else {
78
- $this->assertNull($this->debug_message);
79
- }
80
- $this->debug_message = null;
81
- }
82
- }
83
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/doctype_test.php DELETED
@@ -1,47 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Checks if the parser properly handles DOCTYPE
7
- */
8
- class doctype_test extends TestCase {
9
-
10
- private $html;
11
-
12
- protected function setUp()
13
- {
14
- $this->html = new simple_html_dom();
15
- }
16
-
17
- protected function tearDown()
18
- {
19
- $this->html->clear();
20
- unset($this->html);
21
- }
22
-
23
- /**
24
- * @dataProvider dataProvider_for_doctype_should_parse
25
- */
26
- public function test_doctype_should_parse($expected, $doc)
27
- {
28
- // Note: The parser currently doesn't make any assumptions about DOCTYPE
29
- $this->html->load($doc);
30
- $this->assertEquals($expected, $this->html->root->plaintext);
31
- $this->assertEquals($doc, $this->html->save());
32
- }
33
-
34
- public function dataProvider_for_doctype_should_parse()
35
- {
36
- return array(
37
- 'normal' => array(
38
- '',
39
- '<!DOCTYPE html><html></html>',
40
- ),
41
- 'stray doctype' => array(
42
- 'Hello, World!',
43
- '<p><!DOCTYPE html>Hello, World!</p>',
44
- ),
45
- );
46
- }
47
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/dom_manipulation_test.php DELETED
@@ -1,102 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Tests the DOM manipulation ability of the parser
7
- */
8
- class dom_manipulation_test extends TestCase {
9
- private $dom;
10
-
11
- protected function setUp()
12
- {
13
- $this->dom = new simple_html_dom();
14
- }
15
-
16
- protected function tearDown()
17
- {
18
- $this->dom->clear();
19
- unset($this->dom);
20
- }
21
-
22
- public function test_dom_should_accept_nested_elements()
23
- {
24
- $expected = '<html><head></head><body></body></html>';
25
-
26
- $html = $this->dom->createElement('html');
27
- $head = $this->dom->createElement('head');
28
- $body = $this->dom->createElement('body');
29
-
30
- $this->dom->root->appendChild($html);
31
-
32
- $html
33
- ->appendChild($head)
34
- ->appendChild($body);
35
-
36
- $this->assertEquals($expected, $this->dom->save());
37
- }
38
-
39
- public function test_dom_should_find_added_elements()
40
- {
41
- $html = $this->dom->createElement('html');
42
- $head = $this->dom->createElement('head');
43
- $body = $this->dom->createElement('body');
44
-
45
- $this->dom->root->appendChild($html);
46
-
47
- $html
48
- ->appendChild($head)
49
- ->appendChild($body);
50
-
51
- $this->assertNotNull($this->dom->find('html', 0));
52
- $this->assertNotNull($this->dom->find('head', 0));
53
- $this->assertNotNull($this->dom->find('body', 0));
54
- }
55
-
56
- public function test_dom_should_find_elements_added_to_existing_dom()
57
- {
58
- $this->dom->load('<html></html>');
59
-
60
- $head = $this->dom->createElement('head');
61
- $body = $this->dom->createElement('body');
62
-
63
- $this->dom->find('html', 0)
64
- ->appendChild($head)
65
- ->appendChild($body);
66
-
67
- $this->assertNotNull($this->dom->find('html', 0));
68
- $this->assertNotNull($this->dom->find('head', 0));
69
- $this->assertNotNull($this->dom->find('body', 0));
70
- }
71
-
72
- public function test_dom_should_find_elements_added_to_existing_nested_dom()
73
- {
74
- $this->dom->load('<html><body></body></html>');
75
-
76
- $table = $this->dom->createElement('table');
77
- $tr = $this->dom->createElement('tr');
78
-
79
- $this->dom->find('body', 0)->appendChild($table);
80
- $table->appendChild($tr);
81
-
82
- $this->assertNotNull($this->dom->find('table', 0));
83
- $this->assertNotNull($this->dom->find('tr', 0));
84
- }
85
-
86
- public function test_dom_should_find_elements_add_in_reverse()
87
- {
88
- $html = $this->dom->createElement('html');
89
- $head = $this->dom->createElement('head');
90
- $body = $this->dom->createElement('body');
91
-
92
- $html
93
- ->appendChild($head)
94
- ->appendChild($body);
95
-
96
- $this->dom->root->appendChild($html);
97
-
98
- $this->assertNotNull($this->dom->find('html', 0));
99
- $this->assertNotNull($this->dom->find('head', 0));
100
- $this->assertNotNull($this->dom->find('body', 0));
101
- }
102
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/entity_decoding_test.php DELETED
@@ -1,62 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Tests for simple_html_dom entity decoding
7
- */
8
- class entity_decoding_test extends TestCase {
9
- /**
10
- * @dataProvider load_should_decode_entity_dataProvider
11
- */
12
- public function test_load_should_decode_entity($name, $char, $expected)
13
- {
14
- $this->assertEquals($expected, $char, 'Character: ' . $name);
15
- }
16
-
17
- public function load_should_decode_entity_dataProvider()
18
- {
19
- $file = __DIR__ . '/data/entity_decoding/Character Entity Reference Chart.html';
20
-
21
- // This operation is very slow due to missing closing tags
22
- $html = new simple_html_dom();
23
- $html->loadFile($file);
24
-
25
- $table = get_html_translation_table(HTML_ENTITIES, ENT_QUOTES | ENT_HTML5, $html->target_charset);
26
-
27
- $vector = array();
28
-
29
- foreach($html->find('table tr') as $tr) {
30
- $char = $tr->find('td.character', 0)->innertext;
31
- $char = substr($char, 1); /* first character is always space */
32
-
33
- $name = $tr->find('td.named > code', 0)->plaintext;
34
- $name = explode(' ', $name)[0]; /* may contain multiple representations */
35
-
36
- $expected = array_search($name, $table, true);
37
-
38
- if ($expected === false) continue; /* Unknown entity */
39
-
40
- $vector[] = array(
41
- $name,
42
- $char,
43
- $expected
44
- );
45
- }
46
-
47
- return $vector;
48
- }
49
-
50
- public function test_decode_should_decode_attributes()
51
- {
52
- $expected = 'Häagen-Dazs';
53
-
54
- $html = new simple_html_dom();
55
- $html->load('<meta name="description" content="H&auml;agen-Dazs">');
56
-
57
- $description = $html->find('meta[name="description"]', 0);
58
-
59
- $this->assertEquals($expected, $description->content);
60
- }
61
-
62
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/file_get_html_test.php DELETED
@@ -1,80 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Tests file_get_html
7
- */
8
- class file_get_html_test extends TestCase {
9
- private $testdata_file = __DIR__ . '/data/file_get_html/testdata.html';
10
-
11
- #region maxLen
12
-
13
- /**
14
- * Files equal to maxLen should load normally.
15
- * @dataProvider fileProvider
16
- */
17
- public function test_files_equal_to_maxlen_should_load_normally($file)
18
- {
19
- $expected = file_get_contents($file);
20
- $size = filesize($file);
21
-
22
- $this->assertEquals(
23
- $expected,
24
- file_get_html(
25
- $file,
26
- false,
27
- null,
28
- 0,
29
- $size,
30
- true,
31
- false,
32
- DEFAULT_TARGET_CHARSET,
33
- false,
34
- DEFAULT_BR_TEXT,
35
- DEFAULT_SPAN_TEXT
36
- )->save(),
37
- 'Files equal to maxLen should load normally.'
38
- );
39
- }
40
-
41
- /**
42
- * Files larger than maxLen should return false.
43
- * @dataProvider fileProvider
44
- */
45
- public function test_files_larger_than_maxlen_should_return_false($file)
46
- {
47
- $size = filesize($file);
48
-
49
- $this->assertFalse(
50
- file_get_html(
51
- $file,
52
- false,
53
- null,
54
- 0,
55
- $size - 1,
56
- true,
57
- false,
58
- DEFAULT_TARGET_CHARSET,
59
- false,
60
- DEFAULT_BR_TEXT,
61
- DEFAULT_SPAN_TEXT
62
- ),
63
- 'Files larger than $maxLen should return false.'
64
- );
65
- }
66
-
67
- public function fileProvider()
68
- {
69
- $files = array();
70
-
71
- foreach(glob(__DIR__ . '/data/file_get_html/*.html') as $path) {
72
- $files[strtoupper(basename($path, '.html'))] = array($path);
73
- }
74
-
75
- return $files;
76
- }
77
-
78
- #endregion maxLen
79
-
80
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/htmldocument___call_test.php DELETED
@@ -1,39 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Tests if __call properly forwards function calls
7
- */
8
- class htmldocument___call_test extends TestCase {
9
- private $html;
10
-
11
- protected function setUp()
12
- {
13
- $this->html = new simple_html_dom();
14
- }
15
-
16
- protected function tearDown()
17
- {
18
- $this->html->clear();
19
- unset($this->html);
20
- }
21
-
22
- function test_load_file_should_return_loadFile()
23
- {
24
- $file = __DIR__ . '/data/htmldocument___call/testdata.html';
25
-
26
- $this->assertEquals(
27
- $this->html->loadFile($file),
28
- $this->html->load_file($file)
29
- );
30
- }
31
-
32
- /**
33
- * @expectedException PHPUnit\Framework\Error\Error
34
- */
35
- function test_unknown_function_should_return_error()
36
- {
37
- $this->html->doSomethingStupid();
38
- }
39
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/htmldocument_test.php DELETED
@@ -1,249 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Tests for simple_html_dom
7
- */
8
- class htmldocument_test extends TestCase {
9
-
10
- private $html;
11
-
12
- protected function setUp()
13
- {
14
- $this->html = new simple_html_dom;
15
- }
16
-
17
- protected function tearDown()
18
- {
19
- $this->html->clear();
20
- unset($this->html);
21
- }
22
-
23
- public function test___get_outertext_should_return_html()
24
- {
25
- $doc = '<html></html>';
26
- $this->html->load($doc);
27
-
28
- $this->assertEquals($doc, $this->html->outertext);
29
- }
30
-
31
- public function test___get_innertext_should_return_html()
32
- {
33
- $doc = '<html></html>';
34
- $this->html->load($doc);
35
-
36
- $this->assertEquals($doc, $this->html->innertext);
37
- }
38
-
39
- public function test___get_plaintext_should_return_html_content()
40
- {
41
- $expected = 'Hello, World!';
42
- $doc = '<html><p>Hello, World!</p></html>';
43
- $this->html->load($doc);
44
-
45
- $this->assertEquals($expected, $this->html->plaintext);
46
- }
47
-
48
- public function test___get_charset_should_return__charset()
49
- {
50
- $expected = 'UTF-8';
51
- $doc = '<html><p>Hello, World!</p></html>';
52
- $this->html->load($doc);
53
-
54
- $this->assertEquals($expected, $this->html->charset);
55
- }
56
-
57
- public function test___get_target_charset_should_return__target_charset()
58
- {
59
- $expected = 'UTF-8';
60
- $doc = '<html><p>Hello, World!</p></html>';
61
- $this->html->load($doc);
62
-
63
- $this->assertEquals($expected, $this->html->target_charset);
64
- }
65
-
66
- public function test___get_should_return_null_for_unknown_type()
67
- {
68
- $doc = '<html></html>';
69
- $this->html->load($doc);
70
-
71
- $this->assertNull($this->html->this_type_doesnt_exist_probably);
72
- }
73
-
74
- public function test_childNodes_should_return_element_by_index()
75
- {
76
- $expected = 'PHP Simple HTML DOM Parser';
77
- $doc = '<p>PHP Simple HTML DOM Parser</p>';
78
- $this->html->load($doc);
79
-
80
- $this->assertEquals($expected, $this->html->childNodes(0)->text());
81
- }
82
-
83
- public function test_childNodes_should_return_null_if_index_out_of_range()
84
- {
85
- $doc = '<p>PHP Simple HTML DOM Parser</p>';
86
- $this->html->load($doc);
87
-
88
- $this->assertNull($this->html->childNodes(1));
89
- }
90
-
91
- public function test_childNodes_should_work_after_remove()
92
- {
93
- $doc = '<a href="#"></a><img><p></p>';
94
-
95
- $this->html->load($doc);
96
- $this->html->find('img', 0)->remove();
97
-
98
- $this->assertCount(2, $this->html->childNodes());
99
- $this->assertArrayHasKey(0, $this->html->childNodes());
100
- $this->assertArrayHasKey(1, $this->html->childNodes());
101
- }
102
-
103
- public function test_getElementById_should_return_matching_element()
104
- {
105
- $doc = '<html><p id="claim">PHP Simple HTML DOM Parser</p></html>';
106
-
107
- $this->html->load($doc);
108
-
109
- $this->assertNotNull($this->html->getElementById('claim'));
110
- $this->assertNull($this->html->getElementById('unknown'));
111
- }
112
-
113
- public function test_getElementsById_should_return_matching_element()
114
- {
115
- // Note, this technically doesn't make sense but it's supported
116
- $doc = '<html><p id="a"></p><p id="a"></p></html>';
117
-
118
- $this->html->load($doc);
119
-
120
- $this->assertCount(2, $this->html->getElementsById('a'));
121
- }
122
-
123
- public function test_getElementByTagName_should_return_matching_element()
124
- {
125
- $expected = 'Hello';
126
- $doc = '<html><p>Hello</p><p>World</p></html>';
127
-
128
- $this->html->load($doc);
129
-
130
- $this->assertEquals($expected, $this->html->getElementByTagName('p')->text());
131
- }
132
-
133
- public function test_getElementsByTagName_should_return_matching_element()
134
- {
135
- $doc = '<html><p>Hello</p><p>World</p></html>';
136
-
137
- $this->html->load($doc);
138
-
139
- $this->assertCount(2, $this->html->getElementsByTagName('p'));
140
- }
141
-
142
- public function test_lastChild_should_return_null_without_children()
143
- {
144
- $doc = ' ';
145
- $this->html->load($doc);
146
-
147
- $this->assertNull($this->html->lastChild());
148
- }
149
-
150
- public function test_lastChild_should_work_after_remove()
151
- {
152
- $doc = '<div><a href="#"></a><p></p></div>';
153
-
154
- $this->html->load($doc);
155
- $this->html->find('p', 0)->remove();
156
-
157
- $this->assertNotNull($this->html->lastChild());
158
- }
159
-
160
- public function test_createElement_should_return_an_element()
161
- {
162
- $this->assertEquals(
163
- '<html></html>',
164
- $this->html->createElement('html')->save()
165
- );
166
- }
167
-
168
- public function test_createElement_should_create_element_with_content()
169
- {
170
- $this->assertEquals(
171
- '<html>Hello, World!</html>',
172
- $this->html->createElement('html', 'Hello, World!')->save()
173
- );
174
- }
175
-
176
- /** @dataProvider dataProvider_for_createElement_should_not_affect_the_dom */
177
- public function test_createElement_should_not_affect_the_dom($doc)
178
- {
179
- $this->html->load($doc);
180
-
181
- $this->assertEquals(
182
- '<html></html>',
183
- $this->html->createElement('html')->save()
184
- );
185
- $this->assertEquals(
186
- '<head></head>',
187
- $this->html->createElement('head')->save()
188
- );
189
- $this->assertEquals(
190
- '<body></body>',
191
- $this->html->createElement('body')->save()
192
- );
193
- $this->assertEquals(
194
- $doc,
195
- $this->html->save()
196
- );
197
-
198
- $this->assertNull($this->html->find('html', 0));
199
- $this->assertNull($this->html->find('head', 0));
200
- $this->assertNull($this->html->find('body', 0));
201
- }
202
-
203
- public function dataProvider_for_createElement_should_not_affect_the_dom()
204
- {
205
- return array(
206
- 'empty' => array(''),
207
- 'single' => array('<div></div>'),
208
- 'nested' => array('<table><tr></tr><tr></tr></table>'),
209
- );
210
- }
211
-
212
- public function test_createTextNode_should_return_a_node()
213
- {
214
- $this->assertNotNull($this->html->createTextNode('<html>'));
215
- }
216
-
217
- public function test_createTextNode_should_create_a_text_node()
218
- {
219
- $this->assertEquals(
220
- 'Hello, World!',
221
- $this->html->createTextNode('Hello, World!')->save()
222
- );
223
- }
224
-
225
- /** @dataProvider dataProvider_for_createTextNode_should_not_affect_the_dom */
226
- public function test_createTextNode_should_not_affect_the_dom($doc)
227
- {
228
- $this->html->load($doc);
229
-
230
- $this->assertEquals(
231
- 'Hello, World!',
232
- $this->html->createTextNode('Hello, World!')->save()
233
- );
234
- $this->assertEquals(
235
- $doc,
236
- $this->html->save()
237
- );
238
- }
239
-
240
- public function dataProvider_for_createTextNode_should_not_affect_the_dom()
241
- {
242
- return array(
243
- 'empty' => array(''),
244
- 'single' => array('<div></div>'),
245
- 'nested' => array('<table><tr></tr><tr></tr></table>'),
246
- );
247
- }
248
-
249
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/htmlnode___call_test.php DELETED
@@ -1,77 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Tests if __call properly forwards function calls
7
- */
8
- class htmlnode___call_test extends TestCase {
9
- private $html;
10
-
11
- protected function setUp()
12
- {
13
- $this->html = new simple_html_dom();
14
- $this->html->load('<html><head /><body /></html>');
15
- }
16
-
17
- protected function tearDown()
18
- {
19
- $this->html->clear();
20
- unset($this->html);
21
- }
22
-
23
- function test_children_should_return_childNodes()
24
- {
25
- $this->assertEquals($this->html->root->childNodes(), $this->html->root->children());
26
- $this->assertEquals($this->html->root->childNodes(0), $this->html->root->children(0));
27
- $this->assertEquals($this->html->root->childNodes(1), $this->html->root->children(1));
28
- }
29
-
30
- function test_first_child_should_return_firstChild()
31
- {
32
- $this->assertEquals(
33
- $this->html->root->firstChild(),
34
- $this->html->root->first_child()
35
- );
36
- }
37
-
38
- function test_has_child_should_return_hasChildNodes()
39
- {
40
- $this->assertEquals(
41
- $this->html->root->hasChildNodes(),
42
- $this->html->root->has_child()
43
- );
44
- }
45
-
46
- function test_last_child_should_return_lastChild()
47
- {
48
- $this->assertEquals(
49
- $this->html->root->lastChild(),
50
- $this->html->root->last_child()
51
- );
52
- }
53
-
54
- function test_next_sibling_should_return_nextSibling()
55
- {
56
- $this->assertEquals(
57
- $this->html->find('head', 0)->nextSibling(),
58
- $this->html->find('head', 0)->next_sibling()
59
- );
60
- }
61
-
62
- function test_prev_sibling_should_return_previousSibling()
63
- {
64
- $this->assertEquals(
65
- $this->html->find('body', 0)->previousSibling(),
66
- $this->html->find('body', 0)->prev_sibling()
67
- );
68
- }
69
-
70
- /**
71
- * @expectedException PHPUnit\Framework\Error\Error
72
- */
73
- function test_unknown_function_should_return_error()
74
- {
75
- $this->html->root->doSomethingStupid();
76
- }
77
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/htmlnode_test.php DELETED
@@ -1,628 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Tests for simple_html_dom_node
7
- */
8
- class htmlnode_test extends TestCase {
9
-
10
- private $html;
11
-
12
- protected function setUp()
13
- {
14
- $this->html = new simple_html_dom;
15
- }
16
-
17
- protected function tearDown()
18
- {
19
- $this->html->clear();
20
- unset($this->html);
21
- }
22
-
23
- public function test___get_should_return_false_for_unknown_attributes()
24
- {
25
- $doc = '<html></html>';
26
- $this->html->load($doc);
27
-
28
- $this->assertFalse($this->html->find('html', 0)->lang);
29
- }
30
-
31
- public function test___set_should_overwrite_innertext()
32
- {
33
- $expected = 'PHP Simple HTML DOM Parser';
34
- $doc = '<html><!--Hello, World!--></html>';
35
- $this->html->load($doc);
36
- $this->html->find('comment', 0)->innertext = '';
37
- $this->html->find('html', 0)->innertext = $expected;
38
-
39
- $this->assertEquals($expected, $this->html->find('html', 0)->innertext());
40
- $this->assertEmpty($this->html->find('comment', 0)->innertext());
41
- }
42
-
43
- public function test___unset_should_remove_attribute()
44
- {
45
- $doc = '<html lang="en"></html>';
46
- $this->html->load($doc);
47
- unset($this->html->find('html', 0)->lang);
48
-
49
- $this->assertFalse($this->html->find('html', 0)->lang);
50
- }
51
-
52
- public function test_remove_should_remove_node()
53
- {
54
- $expected = <<<EOD
55
- <html>
56
- <body>
57
-
58
- </body>
59
- </html>
60
- EOD;
61
-
62
- $doc = <<<EOD
63
- <html>
64
- <body>
65
- <table>
66
- <tr><th>Title</th></tr>
67
- <tr><td>Row 1</td></tr>
68
- </table>
69
- </body>
70
- </html>
71
- EOD;
72
-
73
- $this->html->load($doc, true, false);
74
-
75
- $table = $this->html->find('table', 0);
76
-
77
- $this->assertEquals($doc, $this->html->save());
78
-
79
- $table->remove();
80
-
81
- $this->assertEquals($expected, $this->html->save());
82
- }
83
-
84
- public function test_removeChild()
85
- {
86
- $expected = <<<EOD
87
- <html>
88
- <body>
89
-
90
- </body>
91
- </html>
92
- EOD;
93
-
94
- $doc = <<<EOD
95
- <html>
96
- <body>
97
- <table>
98
- <tr><th>Title</th></tr>
99
- <tr><td>Row 1</td></tr>
100
- </table>
101
- </body>
102
- </html>
103
- EOD;
104
-
105
- $this->html->load($doc, true, false);
106
-
107
- $body = $this->html->find('body', 0);
108
-
109
- $this->assertEquals($doc, $this->html->save());
110
-
111
- $body->removeChild($body->find('table', 0));
112
-
113
- $this->assertEquals($expected, $this->html->save());
114
- }
115
-
116
- public function test_has_child_should_return_boolean()
117
- {
118
- $doc = '<html><p>PHP Simple HTML DOM Parser</p></html>';
119
- $this->html->load($doc);
120
-
121
- $this->assertTrue($this->html->find('html', 0)->has_child());
122
- $this->assertFalse($this->html->find('p', 0)->has_child());
123
-
124
- $this->assertTrue($this->html->find('html', 0)->hasChildNodes());
125
- $this->assertFalse($this->html->find('p', 0)->hasChildNodes());
126
- }
127
-
128
- public function test_hasAttribute_should_return_boolean()
129
- {
130
- $doc = '<html lang="en"></html>';
131
-
132
- $this->html->load($doc);
133
-
134
- $this->assertTrue($this->html->find('html', 0)->hasAttribute('lang'));
135
- $this->assertFalse($this->html->find('html', 0)->hasAttribute('encoding'));
136
- }
137
-
138
- public function test_hasClass_should_return_true()
139
- {
140
- $this->html->load('<p class="article">Simple HTML DOM Parser</p>');
141
-
142
- $this->assertTrue($this->html->find('p', 0)->hasClass('article'));
143
- }
144
-
145
- public function test_innertext_should_return_comment()
146
- {
147
- $expected = 'For your information';
148
- $doc = '<html><!--For your information--></html>';
149
-
150
- $this->html->load($doc);
151
-
152
- $this->assertEquals($expected, $this->html->find('comment', 0)->innertext());
153
- }
154
-
155
- public function test_hasClass_should_return_false()
156
- {
157
- $this->html->load('<p>Simple HTML DOM Parser</p>');
158
-
159
- $this->assertFalse($this->html->find('p', 0)->hasClass('article'));
160
- }
161
-
162
- public function test_addClass_should_add_class()
163
- {
164
- $expected = '<p class="article">Simple HTML DOM Parser</p>';
165
-
166
- $this->html->load('<p>Simple HTML DOM Parser</p>');
167
- $this->html->find('p', 0)->addClass('article');
168
-
169
- $this->assertEquals($expected, $this->html->save());
170
- }
171
-
172
- public function test_addClass_should_add_class_list()
173
- {
174
- $expected = '<p class="article new">Simple HTML DOM Parser</p>';
175
-
176
- $this->html->load('<p>Simple HTML DOM Parser</p>');
177
- $this->html->find('p', 0)->addClass('article new');
178
-
179
- $this->assertEquals($expected, $this->html->save());
180
- }
181
-
182
- public function test_addClass_should_add_class_array()
183
- {
184
- $expected = '<p class="article new">Simple HTML DOM Parser</p>';
185
-
186
- $this->html->load('<p>Simple HTML DOM Parser</p>');
187
- $this->html->find('p', 0)->addClass(array('article', 'new'));
188
-
189
- $this->assertEquals($expected, $this->html->save());
190
- }
191
-
192
- public function test_addClass_should_add_missing_classes()
193
- {
194
- $expected = '<p class="article new">Simple HTML DOM Parser</p>';
195
-
196
- $this->html->load('<p class="article">Simple HTML DOM Parser</p>');
197
- $this->html->find('p', 0)->addClass('article new');
198
-
199
- $this->assertEquals($expected, $this->html->save());
200
- }
201
-
202
- public function test_appendChild_should_add_node_as_child()
203
- {
204
- $expected = '<html><p>Simple HTML DOM Parser</p></html>';
205
- $doc = '<html></html>';
206
-
207
- $this->html->load($doc);
208
- $child = $this->html->createElement('p', 'Simple HTML DOM Parser');
209
- $this->html->find('html', 0)->appendChild($child);
210
-
211
- $this->assertEquals($expected, $this->html->save());
212
- }
213
-
214
- public function test_removeClass_should_do_nothing_without_class()
215
- {
216
- $expected = '<p>Simple HTML DOM Parser</p>';
217
- $doc = '<p>Simple HTML DOM Parser</p>';
218
-
219
- $this->html->load($doc);
220
- $this->html->find('p', 0)->removeClass('new');
221
-
222
- $this->assertEquals($expected, $this->html->save());
223
- }
224
-
225
- public function test_removeClass_should_remove_all_classes()
226
- {
227
- $expected = '<p>Simple HTML DOM Parser</p>';
228
-
229
- $this->html->load('<p class="article">Simple HTML DOM Parser</p>');
230
- $this->html->find('p', 0)->removeClass();
231
-
232
- $this->assertEquals($expected, $this->html->save());
233
- }
234
-
235
- public function test_removeClass_should_remove_class_attribute()
236
- {
237
- $expected = '<p>Simple HTML DOM Parser</p>';
238
-
239
- $this->html->load('<p class="article">Simple HTML DOM Parser</p>');
240
- $this->html->find('p', 0)->removeClass('article');
241
-
242
- $this->assertEquals($expected, $this->html->save());
243
- }
244
-
245
- public function test_removeClass_should_remove_class()
246
- {
247
- $expected = '<p class="article">Simple HTML DOM Parser</p>';
248
-
249
- $this->html->load('<p class="article new">Simple HTML DOM Parser</p>');
250
- $this->html->find('p', 0)->removeClass('new');
251
-
252
- $this->assertEquals($expected, $this->html->save());
253
- }
254
-
255
- public function test_removeClass_should_remove_class_list()
256
- {
257
- $expected = '<p>Simple HTML DOM Parser</p>';
258
-
259
- $this->html->load('<p class="article new">Simple HTML DOM Parser</p>');
260
- $this->html->find('p', 0)->removeClass('article new');
261
-
262
- $this->assertEquals($expected, $this->html->save());
263
- }
264
-
265
- public function test_removeClass_should_remove_class_array()
266
- {
267
- $expected = '<p>Simple HTML DOM Parser</p>';
268
-
269
- $this->html->load('<p class="article new">Simple HTML DOM Parser</p>');
270
- $this->html->find('p', 0)->removeClass(array('article', 'new'));
271
-
272
- $this->assertEquals($expected, $this->html->save());
273
- }
274
-
275
- public function test_save_should_return_outerhtml()
276
- {
277
- $expected = '<p>Simple HTML DOM Parser</p>';
278
-
279
- $this->html->load('<div><p>Simple HTML DOM Parser</p></div>');
280
-
281
- $this->assertEquals($expected, $this->html->find('p', 0)->save());
282
- }
283
-
284
- public function test_setAttribute_should_update_attribute()
285
- {
286
- $expected = 'de';
287
- $doc = '<html lang="en"></html>';
288
-
289
- $this->html->load($doc);
290
- $this->html->find('html', 0)->setAttribute('lang', 'de');
291
-
292
- $this->assertEquals($expected, $this->html->find('html', 0)->getAttribute('lang'));
293
- }
294
-
295
- public function test_text_should_be_empty_after_clear()
296
- {
297
- $doc = '<html></html>';
298
-
299
- $this->html->load($doc);
300
- $element = $this->html->find('html', 0);
301
- $element->clear();
302
-
303
- $this->assertEmpty($element->text());
304
- }
305
-
306
- public function test_text_should_not_include_script_elements()
307
- {
308
- $expected = 'PHP Simple HTML DOM Parser';
309
- $doc = '<script>alert();</script><h1>PHP Simple HTML DOM Parser</h1>';
310
-
311
- $this->html->load($doc);
312
-
313
- $this->assertEquals($expected, $this->html->root->text());
314
- }
315
-
316
- public function test_text_should_not_include_style_elements()
317
- {
318
- $expected = 'PHP Simple HTML DOM Parser';
319
- $doc = '<style>h1{color: blue;}</style><h1>PHP Simple HTML DOM Parser</h1>';
320
-
321
- $this->html->load($doc);
322
-
323
- $this->assertEquals($expected, $this->html->root->text());
324
- }
325
-
326
- public function test_text_should_not_include_comments()
327
- {
328
- $expected = 'PHP Simple HTML DOM Parser';
329
- $doc = '<!--Hi there :)--><h1>PHP Simple HTML DOM Parser</h1>';
330
-
331
- $this->html->load($doc);
332
-
333
- $this->assertEquals($expected, $this->html->root->text());
334
- }
335
-
336
- public function test_text_should_include_cdata_content()
337
- {
338
- $expected = '<?php Simple HTML DOM Parser';
339
- $doc = '<h1><![CDATA[<?php]]> Simple HTML DOM Parser</h1>';
340
-
341
- $this->html->load($doc);
342
-
343
- $this->assertEquals($expected, $this->html->root->text());
344
- }
345
-
346
- public function test_save_should_create_file()
347
- {
348
- $expected_file = __DIR__ . '/data/simple_html_dom_node/save_file_expected.html';
349
- $file = __DIR__ . '/data/simple_html_dom_node/save_file.html';
350
-
351
- $this->html->load('<div><p>Simple HTML DOM Parser</p></div>');
352
- $this->html->find('p', 0)->save($file);
353
-
354
- $this->assertFileExists($file);
355
- $this->assertFileEquals($expected_file, $file);
356
- }
357
-
358
- public function test_find_ancestor_tag_should_return_element()
359
- {
360
- $doc = '<html><p></p></html>';
361
- $this->html->load($doc);
362
-
363
- $this->assertEquals(
364
- $this->html->find('html', 0),
365
- $this->html->find('p', 0)->find_ancestor_tag('html')
366
- );
367
- }
368
-
369
- public function test_find_ancestor_tag_should_return_null_without_match()
370
- {
371
- $doc = '<html><p></p></html>';
372
- $this->html->load($doc);
373
-
374
- $this->assertNull($this->html->find('p', 0)->find_ancestor_tag('a'));
375
- }
376
-
377
- public function test_first_child_should_return_null_without_children()
378
- {
379
- $doc = '<html></html>';
380
- $this->html->load($doc);
381
-
382
- $this->assertNull($this->html->find('html', 0)->first_child());
383
- $this->assertNull($this->html->find('html', 0)->firstChild());
384
- }
385
-
386
- public function test_first_child_should_work_after_remove()
387
- {
388
- $doc = '<div><a href="#"></a><p></p></div>';
389
-
390
- $this->html->load($doc);
391
- $this->html->find('a', 0)->remove();
392
-
393
- $this->assertNotNull($this->html->find('div', 0)->first_child());
394
- $this->assertNotNull($this->html->find('div', 0)->firstChild());
395
- }
396
-
397
- public function test_getAttribute_should_return_attribute()
398
- {
399
- $expected = 'en';
400
- $doc = '<html lang="en"></html>';
401
-
402
- $this->html->load($doc);
403
-
404
- $this->assertEquals($expected, $this->html->find('html', 0)->getAttribute('lang'));
405
- }
406
-
407
- public function test_getElementById_should_return_matching_element()
408
- {
409
- $doc = '<html><p id="claim">PHP Simple HTML DOM Parser</p></html>';
410
-
411
- $this->html->load($doc);
412
-
413
- $this->assertNotNull($this->html->root->getElementById('claim'));
414
- $this->assertNull($this->html->root->getElementById('unknown'));
415
- }
416
-
417
- public function test_getElementsById_should_return_matching_element()
418
- {
419
- // Note, this technically doesn't make sense but it's supported
420
- $doc = '<html><p id="a"></p><p id="a"></p></html>';
421
-
422
- $this->html->load($doc);
423
-
424
- $this->assertCount(2, $this->html->root->getElementsById('a'));
425
- }
426
-
427
- public function test_getElementByTagName_should_return_matching_element()
428
- {
429
- $expected = 'Hello';
430
- $doc = '<html><p>Hello</p><p>World</p></html>';
431
-
432
- $this->html->load($doc);
433
-
434
- $this->assertEquals($expected, $this->html->root->getElementByTagName('p')->text());
435
- }
436
-
437
- public function test_getElementsByTagName_should_return_matching_element()
438
- {
439
- $doc = '<html><p>Hello</p><p>World</p></html>';
440
-
441
- $this->html->load($doc);
442
-
443
- $this->assertCount(2, $this->html->root->getElementsByTagName('p'));
444
- }
445
-
446
- public function test_last_child_should_return_null_without_children()
447
- {
448
- $doc = '<html></html>';
449
- $this->html->load($doc);
450
-
451
- $this->assertNull($this->html->find('html', 0)->last_child());
452
- $this->assertNull($this->html->find('html', 0)->lastChild());
453
- }
454
-
455
- public function test_last_child_should_work_after_remove()
456
- {
457
- $doc = '<div><a href="#"></a><p></p></div>';
458
-
459
- $this->html->load($doc);
460
- $this->html->find('p', 0)->remove();
461
-
462
- $this->assertNotNull($this->html->find('div', 0)->last_child());
463
- $this->assertNotNull($this->html->find('div', 0)->lastChild());
464
- }
465
-
466
- public function test_next_sibling_should_return_null_without_parent()
467
- {
468
- $doc = '<html></html>';
469
- $this->html->load($doc);
470
-
471
- $this->assertNull($this->html->root->next_sibling());
472
- }
473
-
474
- public function test_next_sibling_should_return_null_without_sibling()
475
- {
476
- $doc = '<html></html>';
477
- $this->html->load($doc);
478
-
479
- $this->assertNull($this->html->find('html', 0)->next_sibling());
480
- }
481
-
482
- public function test_outertext_should_return_innertext_for_root()
483
- {
484
- $doc = '<html><!--For your information--></html>';
485
-
486
- $this->html->load($doc);
487
-
488
- $this->assertEquals($doc, $this->html->root->outertext());
489
- }
490
-
491
- public function test_outertext_should_return_custom_outertext()
492
- {
493
- $expected = '<html>Hello, World!</html>';
494
- $doc = '<html><p>PHP Simple HTML DOM Parser</p></html>';
495
-
496
- $this->html->load($doc);
497
- $this->html->find('p', 0)->outertext = 'Hello, World!';
498
-
499
- $this->assertEquals($expected, $this->html->find('html', 0)->outertext());
500
- }
501
-
502
- public function test_next_sibling_should_work_after_remove()
503
- {
504
- $doc = '<div><a href="#"></a><img><p></p></div>';
505
-
506
- $this->html->load($doc);
507
- $this->html->find('img', 0)->remove();
508
-
509
- $this->assertNotNull($this->html->find('a', 0)->next_sibling());
510
- }
511
-
512
- public function test_nodeName_should_return_tag_name()
513
- {
514
- $expected = 'div';
515
- $doc = '<div></div>';
516
-
517
- $this->html->load($doc);
518
-
519
- $this->assertEquals($expected, $this->html->find('div', 0)->nodeName());
520
- }
521
-
522
- public function test_parent_should_make_node_child_of_element()
523
- {
524
- $expected = '<html><p>PHP Simple HTML DOM Parser</p></html>';
525
- $doc = '<html></html>';
526
-
527
- $this->html->load($doc);
528
- $this->html->find('html', 0);
529
-
530
- $node = $this->html->createElement('p', 'PHP Simple HTML DOM Parser');
531
- $node->parent($this->html->find('html', 0));
532
-
533
- $this->assertEquals($expected, $this->html->save());
534
- }
535
-
536
- public function test_prev_sibling_should_return_null_without_parent()
537
- {
538
- $doc = '<html></html>';
539
- $this->html->load($doc);
540
-
541
- $this->assertNull($this->html->root->prev_sibling());
542
- }
543
-
544
- public function test_prev_sibling_should_return_null_without_sibling()
545
- {
546
- $doc = '<html></html>';
547
- $this->html->load($doc);
548
-
549
- $this->assertNull($this->html->find('html', 0)->prev_sibling());
550
- }
551
-
552
- public function test_prev_sibling_should_work_after_remove()
553
- {
554
- $doc = '<div><a href="#"></a><img><p></p></div>';
555
-
556
- $this->html->load($doc);
557
- $this->html->find('img', 0)->remove();
558
-
559
- $this->assertNotNull($this->html->find('p', 0)->prev_sibling());
560
- }
561
-
562
- public function test_children_should_return_element_by_index()
563
- {
564
- $expected = 'PHP Simple HTML DOM Parser';
565
- $doc = '<html><p>PHP Simple HTML DOM Parser</p></html>';
566
- $this->html->load($doc);
567
-
568
- $this->assertEquals($expected, $this->html->find('html', 0)->children(0)->text());
569
- $this->assertEquals($expected, $this->html->find('html', 0)->childNodes(0)->text());
570
- }
571
-
572
- public function test_children_should_return_null_if_index_out_of_range()
573
- {
574
- $doc = '<html><p>PHP Simple HTML DOM Parser</p></html>';
575
- $this->html->load($doc);
576
-
577
- $this->assertNull($this->html->find('html', 0)->children(1));
578
- $this->assertNull($this->html->find('html', 0)->childNodes(1));
579
- }
580
-
581
- public function test_children_should_work_after_remove()
582
- {
583
- $doc = '<div><a href="#"></a><img><p></p></div>';
584
-
585
- $this->html->load($doc);
586
- $this->html->find('img', 0)->remove();
587
-
588
- $this->assertCount(2, $this->html->find('div', 0)->children());
589
- $this->assertArrayHasKey(0, $this->html->find('div', 0)->children());
590
- $this->assertArrayHasKey(1, $this->html->find('div', 0)->children());
591
-
592
- $this->assertCount(2, $this->html->find('div', 0)->childNodes());
593
- $this->assertArrayHasKey(0, $this->html->find('div', 0)->childNodes());
594
- $this->assertArrayHasKey(1, $this->html->find('div', 0)->childNodes());
595
- }
596
-
597
- public function test_expect_should_return_null_for_no_match()
598
- {
599
- $doc = '<div><a href="#"></a><img><p></p></div>';
600
-
601
- $this->html->load($doc);
602
- $this->html->find('img', 0)->remove();
603
-
604
- $this->assertNull($this->html->expect('p.class'));
605
- $this->assertNull($this->html->expect('p.class', 0));
606
- }
607
-
608
- public function test_toString_should_return_html_document()
609
- {
610
- $doc = '<p>PHP Simple HTML DOM Parser</p>';
611
-
612
- $this->html->load($doc);
613
-
614
- $this->assertEquals($doc, sprintf('%s', $this->html->find('p', 0)));
615
- }
616
-
617
- public function test_xmltext_should_strip_cdata_tags()
618
- {
619
- $expected = '<p><em>PHP Simple HTML DOM Parser</em></p>';
620
- $doc = '<p><![CDATA[<em>]]>PHP Simple HTML DOM Parser<![CDATA[</em>]]></p>';
621
-
622
- $this->html->load($doc);
623
-
624
- $this->assertEquals($expected, $this->html->root->xmltext);
625
- $this->assertEquals($expected, $this->html->root->xmltext());
626
- }
627
-
628
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/htmlweb_curl_test.php DELETED
@@ -1,49 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../HtmlWeb.php';
3
- use simplehtmldom\HtmlWeb;
4
- use PHPUnit\Framework\TestCase;
5
-
6
- /**
7
- * Tests the cURL part of HtmlWeb
8
- */
9
- class htmlweb_curl_test extends TestCase {
10
- private $web;
11
-
12
- protected function setUp()
13
- {
14
- if (!extension_loaded('curl')) {
15
- $this->markTestSkipped('The cURL extension must be enabled for this test.');
16
- }
17
-
18
- $this->web = new HtmlWeb();
19
- }
20
-
21
- protected function tearDown()
22
- {
23
- unset($this->web);
24
- }
25
-
26
- public function urlProvider()
27
- {
28
- return array(
29
- 'Google' => array('https://www.google.com/'),
30
- 'GitHub' => array('https://www.github.com/'),
31
- );
32
- }
33
-
34
- /** @dataProvider urlProvider */
35
- public function test_load_should_return_dom_object($url)
36
- {
37
- $this->assertNotNull($this->web->load($url));
38
- }
39
-
40
- public function test_load_should_return_null_on_negative_response()
41
- {
42
- $this->assertNull($this->web->load('https://simplehtmldom.sourceforge.io/a.html'));
43
- }
44
-
45
- public function test_load_should_return_null_for_pages_larger_than_max_file_size()
46
- {
47
- $this->assertNull($this->web->load('https://www.w3.org/TR/html52/single-page.html'));
48
- }
49
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/htmlweb_fopen_test.php DELETED
@@ -1,53 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../HtmlWeb.php';
3
- use simplehtmldom\HtmlWeb;
4
- use PHPUnit\Framework\TestCase;
5
-
6
- /**
7
- * Tests the fopen part of HtmlWeb
8
- */
9
- class htmlweb_fopen_test extends TestCase {
10
- private $web;
11
-
12
- protected function setUp()
13
- {
14
- if (extension_loaded('curl')) {
15
- $this->markTestSkipped('The cURL extension must be disabled for this test.');
16
- }
17
-
18
- if (!(ini_get('allow_url_fopen'))) {
19
- $this->markTestSkipped('allow_url_fopen must be enabled for this test.');
20
- }
21
-
22
- $this->web = new HtmlWeb();
23
- }
24
-
25
- protected function tearDown()
26
- {
27
- unset($this->web);
28
- }
29
-
30
- public function urlProvider()
31
- {
32
- return array(
33
- 'Google' => array('https://www.google.com/'),
34
- 'GitHub' => array('https://www.github.com/'),
35
- );
36
- }
37
-
38
- /** @dataProvider urlProvider */
39
- public function test_load_should_return_dom_object($url)
40
- {
41
- $this->assertNotNull($this->web->load($url));
42
- }
43
-
44
- public function test_load_should_return_null_on_negative_response()
45
- {
46
- $this->assertNull($this->web->load('https://simplehtmldom.sourceforge.io/a.html'));
47
- }
48
-
49
- public function test_load_should_return_null_for_pages_larger_than_max_file_size()
50
- {
51
- $this->assertNull($this->web->load('https://www.w3.org/TR/html52/single-page.html'));
52
- }
53
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/htmlweb_test.php DELETED
@@ -1,49 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../HtmlWeb.php';
3
- use simplehtmldom\HtmlWeb;
4
- use PHPUnit\Framework\TestCase;
5
-
6
- /**
7
- * Tests the general behavior of HtmlWeb
8
- */
9
- class htmlweb_test extends TestCase {
10
- private $web;
11
-
12
- protected function setUp()
13
- {
14
- $this->web = new HtmlWeb();
15
- }
16
-
17
- protected function tearDown()
18
- {
19
- unset($this->web);
20
- }
21
-
22
- public function urlProvider()
23
- {
24
- return array(
25
- 'Empty URL' => array(''),
26
- 'Scheme Missing' => array('//github.com/simplehtmldom/'),
27
- 'Wrong Scheme' => array('ssh://github.com/'),
28
- );
29
- }
30
-
31
- /** @dataProvider urlProvider */
32
- public function test_load_should_return_null_for_invalid_url($url)
33
- {
34
- $this->assertNull($this->web->load($url));
35
- }
36
-
37
- public function test_load_should_return_null_without_curl_and_fopen()
38
- {
39
- if (extension_loaded('curl')) {
40
- $this->markTestSkipped('The cURL extension must be disabled for this test.');
41
- }
42
-
43
- if (ini_get('allow_url_fopen')) {
44
- $this->markTestSkipped('allow_url_fopen must be disabled for this test.');
45
- }
46
-
47
- $this->assertNull($this->web->load('https://www.google.com/'));
48
- }
49
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/memory_parsing_test.php DELETED
@@ -1,131 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
- use PHPUnit\Framework\TestResult;
5
-
6
- /**
7
- * Test memory behavior of parsing operations
8
- *
9
- * **Note**:
10
- *
11
- * These tests are based on /testcase/memory_test.php
12
- */
13
- class memory_parsing_test extends TestCase {
14
- /** File for memory tests */
15
- private $file = __DIR__ . '/data/memory/index.html';
16
-
17
- public function setUp()
18
- {
19
- /**
20
- * The first time we access a file, PHP acquires additional memory that
21
- * breaks some assertions. For some reason, loading the contents once
22
- * fixes this issue.
23
- */
24
- gc_enable();
25
- $contents = file_get_contents($this->file, false, null, 0, filesize($this->file));
26
- $html = new simple_html_dom($contents);
27
- unset($html);
28
- $contents = null;
29
- $file = null;
30
- gc_collect_cycles();
31
- }
32
-
33
- /**
34
- * Test if the parser properly releases memory using simple_html_dom (50x)
35
- *
36
- * Memory usage should stay stable when using the parser in a loop.
37
- */
38
- public function test_simple_html_dom()
39
- {
40
- $contents = file_get_contents($this->file, false, null, 0, filesize($this->file));
41
-
42
- if (is_file($this->file)) {
43
- // Cleanup before doing anything
44
- gc_enable();
45
- gc_collect_cycles();
46
-
47
- for ($i = 0; $i <= 50; $i++) {
48
- $memory_start = memory_get_usage();
49
-
50
- $html = new simple_html_dom($contents);
51
- unset($html);
52
- gc_collect_cycles(); // Trigger garbage collection
53
-
54
- $memory_end = memory_get_usage();
55
-
56
- $this->assertEquals($memory_start, $memory_end, 'Iteration: ' . $i);
57
- }
58
- } else {
59
- throw new Exception('Unable to perform test, file doesn\'t exist!');
60
- }
61
- }
62
-
63
- /**
64
- * Test if the parser properly releases memory using loadFile (50x)
65
- *
66
- * Memory usage should stay stable or slightly decrease (out of our control)
67
- * when using the parser in a loop.
68
- */
69
- public function test_loadFile()
70
- {
71
-
72
- if (is_file($this->file)) {
73
- // Cleanup before doing anything
74
- gc_enable();
75
- gc_collect_cycles();
76
-
77
- for ($i = 0; $i <= 50; $i++) {
78
- $memory_start = memory_get_usage();
79
-
80
- $html = new simple_html_dom();
81
- $html->loadFile($this->file, false, null, 0, filesize($this->file));
82
- unset($html);
83
- gc_collect_cycles(); // Trigger garbage collection
84
-
85
- $memory_end = memory_get_usage();
86
-
87
- $this->assertEquals($memory_start, $memory_end, 'Iteration: ' . $i);
88
- }
89
- } else {
90
- throw new Exception('Unable to perform test, file doesn\'t exist!');
91
- }
92
- }
93
-
94
- /**
95
- * Test if the parser correctly handles large files (optional)
96
- *
97
- * Uses the single page representation of the HTML Specification to perform
98
- * tests on large files (>10 MB).
99
- *
100
- * @link https://www.w3.org/TR/html/single-page.html HTML Specification (single page)
101
- */
102
- public function test_large_file()
103
- {
104
- // Note: The HTML Specification is VERY large (> 10 MB) and takes a very
105
- // long time to download. Thus, it should be placed in a local directory
106
- $file = __DIR__ . '/data/HTML 5.2.html';
107
-
108
- if (!is_file($file)) {
109
- $this->markTestSkipped(
110
- 'Download the HTML Specification as single page to "' . $file . '"'
111
- );
112
- }
113
-
114
- // Cleanup before doing anything
115
- gc_enable();
116
- gc_collect_cycles();
117
-
118
- $memory_start = memory_get_usage();
119
-
120
- // Use actual file size to load the entire file
121
- $html = new simple_html_dom;
122
- $html->load($file);
123
- unset($html);
124
- gc_collect_cycles(); // Trigger garbage collection
125
-
126
- $memory_end = memory_get_usage();
127
-
128
- $this->assertEquals($memory_start, $memory_end);
129
- }
130
-
131
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/optional_tags_test.php DELETED
@@ -1,791 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Tests for optional tags
7
- *
8
- * Optional tags are tags that can be omitted in cases where the DOM structure
9
- * is obvious to the parser.
10
- *
11
- * _Note_:
12
- * - Elements with optional end tag (test_optional_*_end_tag) should appear in
13
- * {@see simple_html_dom::$optional_closing_tags}
14
- *
15
- * @link https://www.w3.org/TR/html52/single-page.html#optional-tags
16
- * Optional tags
17
- */
18
- class optional_tags_test extends TestCase {
19
- private $html;
20
-
21
- protected function setUp()
22
- {
23
- $this->html = new simple_html_dom;
24
- }
25
-
26
- protected function tearDown()
27
- {
28
- $this->html->clear();
29
- unset($this->html);
30
- }
31
-
32
- /**
33
- * An html element’s start tag may be omitted if the first thing inside the
34
- * html element is not a comment.
35
- */
36
- public function test_optional_html_start_tag()
37
- {
38
- $doc = <<<HTML
39
- <!DOCTYPE HTML>
40
-
41
- <head></head>
42
- <body></body>
43
- </html>
44
- HTML;
45
-
46
- $this->markTestSkipped(
47
- 'Optional html start tag is not supported right now'
48
- );
49
-
50
- $this->assertNotNull($this->html->load($doc)->find('html', 0));
51
- }
52
-
53
- /**
54
- * An html element’s end tag may be omitted if the html element is not
55
- * immediately followed by a comment.
56
- */
57
- public function test_optional_html_end_tag()
58
- {
59
- $doc = <<<HTML
60
- <!DOCTYPE HTML>
61
- <html>
62
- <head></head>
63
- <body></body>
64
-
65
- HTML;
66
-
67
- $this->assertNotNull($this->html->load($doc)->find('html', 0));
68
- }
69
-
70
- /**
71
- * A head element’s start tag may be omitted if the element is empty, or if
72
- * the first thing inside the head element is an element.
73
- */
74
- public function test_optional_head_start_tag()
75
- {
76
- $doc = <<<HTML
77
- <!DOCTYPE HTML>
78
- <html>
79
- </head>
80
- <body></body>
81
- </html>
82
- HTML;
83
-
84
- $this->markTestSkipped(
85
- 'Optional head start tag is not supported right now'
86
- );
87
-
88
- $this->assertNotNull($this->html->load($doc)->find('head', 0));
89
- }
90
-
91
- /**
92
- * A head element’s end tag may be omitted if the head element is not
93
- * immediately followed by a space character or a comment.
94
- */
95
- public function test_optional_head_end_tag()
96
- {
97
- $doc = <<<HTML
98
- <!DOCTYPE HTML>
99
- <html>
100
- <head>
101
- <body></body>
102
- </html>
103
- HTML;
104
-
105
- $this->assertNotNull($this->html->load($doc)->find('head', 0));
106
- }
107
-
108
- /**
109
- * A body element’s start tag may be omitted if the element is empty, or if
110
- * the first thing inside the body element is not a space character or a
111
- * comment, except if the first thing inside the body element is a meta,
112
- * link, script, style, or template element.
113
- */
114
- public function test_optional_body_start_tag()
115
- {
116
- $doc = <<<HTML
117
- <!DOCTYPE HTML>
118
- <html>
119
- <head></head>
120
- </body>
121
- </html>
122
- HTML;
123
-
124
- $this->markTestSkipped(
125
- 'Optional body start tag is not supported right now'
126
- );
127
-
128
- $this->assertNotNull($this->html->load($doc)->find('body', 0));
129
- }
130
-
131
- /**
132
- * A body element’s end tag may be omitted if the body element is not
133
- * immediately followed by a comment.
134
- */
135
- public function test_optional_body_end_tag()
136
- {
137
- $doc = <<<HTML
138
- <!DOCTYPE HTML>
139
- <html>
140
- <head></head>
141
- <body>
142
- </html>
143
- HTML;
144
-
145
- $this->assertNotNull($this->html->load($doc)->find('body', 0));
146
- }
147
-
148
- /**
149
- * With all optional tags taken into account, the DOM can be simplified
150
- */
151
- public function test_optional_html_head_body_tags()
152
- {
153
- // phpcs:disable Generic.Files.LineLength
154
- $doc1 = <<<HTML
155
- <!DOCTYPE HTML><html><head><title>PHP Simple HTML DOM Parser</title></head><body><p>A PHP based DOM parser</p></body></html>
156
- HTML;
157
- // phpcs:enable
158
-
159
- $doc2 = <<<HTML
160
- <!DOCTYPE HTML><title>PHP Simple HTML DOM Parser</title><p>A PHP based DOM parser</p>
161
- HTML;
162
-
163
- $dom1 = $this->html->load($doc1);
164
- $dom2 = $this->html->load($doc2);
165
-
166
- $this->markTestSkipped(
167
- 'Some optional tags are not supported right now'
168
- );
169
-
170
- $this->assertNotNull($dom1->find('html', 0), 'html not found!');
171
- $this->assertNotNull($dom1->find('head', 0), 'head not found!');
172
- $this->assertNotNull($dom1->find('body', 0), 'body not found!');
173
-
174
- $this->assertEquals($dom1->find('html', 0), $dom2->find('html', 0));
175
- $this->assertEquals($dom1->find('head', 0), $dom2->find('head', 0));
176
- $this->assertEquals($dom1->find('body', 0), $dom2->find('body', 0));
177
- }
178
-
179
- /**
180
- * An li element’s end tag may be omitted if the li element is immediately
181
- * followed by another li element or if there is no more content in the
182
- * parent element.
183
- */
184
- public function test_optional_li_end_tag()
185
- {
186
- $doc = <<<HTML
187
- <!DOCTYPE HTML>
188
- <html>
189
- <head></head>
190
- <body>
191
- <ul><li>PHP Simple HTML DOM Parser<li>A PHP based DOM parser</ul>
192
- </body>
193
- </html>
194
- HTML;
195
-
196
- $dom = $this->html->load($doc);
197
-
198
- $this->assertCount(2, $dom->find('li'));
199
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('li', 0)->innertext);
200
- $this->assertEquals('A PHP based DOM parser', $dom->find('li', 1)->innertext);
201
- }
202
-
203
- /**
204
- * A dt element’s end tag may be omitted if the dt element is immediately
205
- * followed by another dt element or a dd element.
206
- */
207
- public function test_optional_dt_end_tag()
208
- {
209
- $doc = <<<HTML
210
- <!DOCTYPE HTML>
211
- <html>
212
- <head></head>
213
- <body>
214
- <dl><dt>PHP Simple HTML DOM Parser<dt>A PHP based DOM parser</dl>
215
- </body>
216
- </html>
217
- HTML;
218
-
219
- $dom = $this->html->load($doc);
220
-
221
- $this->assertCount(2, $dom->find('dt'));
222
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('dt', 0)->innertext);
223
- $this->assertEquals('A PHP based DOM parser', $dom->find('dt', 1)->innertext);
224
- }
225
-
226
- /**
227
- * A dd element’s end tag may be omitted if the dd element is immediately
228
- * followed by another dd element or a dt element, or if there is no more
229
- * content in the parent element.
230
- */
231
- public function test_optional_dd_end_tag()
232
- {
233
- $doc = <<<HTML
234
- <!DOCTYPE HTML>
235
- <html>
236
- <head></head>
237
- <body>
238
- <dl><dd>PHP Simple HTML DOM Parser<dd>A PHP based DOM parser</dl>
239
- </body>
240
- </html>
241
- HTML;
242
-
243
- $dom = $this->html->load($doc);
244
-
245
- $this->assertCount(2, $dom->find('dd'));
246
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('dd', 0)->innertext);
247
- $this->assertEquals('A PHP based DOM parser', $dom->find('dd', 1)->innertext);
248
- }
249
-
250
- /**
251
- * A p element’s end tag may be omitted if the p element is immediately
252
- * followed by an address, article, aside, blockquote, details, div, dl,
253
- * fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6,
254
- * header, hr, main, nav, ol, p, pre, section, table, or ul element, or if
255
- * there is no more content in the parent element and the parent element is
256
- * an HTML element that is not an a, audio, del, ins, map, noscript, or
257
- * video element, or an autonomous custom element.
258
- */
259
- public function test_optional_p_end_tag()
260
- {
261
- $token = array(
262
- 'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl',
263
- 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2',
264
- 'h3', 'h4', 'h5', 'h6', 'header', 'hr', 'main', 'nav', 'ol', 'p',
265
- 'pre', 'section', 'table', 'ul'
266
- );
267
-
268
- foreach ($token as $t) {
269
- $doc = <<<HTML
270
- <!DOCTYPE HTML>
271
- <html>
272
- <head></head>
273
- <body>
274
- <div><p>PHP Simple HTML DOM Parser<{$t}>A PHP based DOM parser</{$t}></div>
275
- </body>
276
- </html>
277
- HTML;
278
-
279
- $dom = $this->html->load($doc);
280
-
281
- $this->markTestSkipped(
282
- 'Optional p end tags are currently not supported!'
283
- );
284
-
285
- $this->assertNotNull($dom->find('p', 0), 'Missing p tag');
286
- $this->assertNotNull($dom->find($t, 0), "Missing {$t} tag");
287
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('p', 0)->innertext);
288
- }
289
- }
290
-
291
- /**
292
- * An rt element’s end tag may be omitted if the rt element is immediately
293
- * followed by an rt or rp element, or if there is no more content in the
294
- * parent element.
295
- */
296
- public function test_optional_rt_end_tag()
297
- {
298
- $doc = <<<HTML
299
- <!DOCTYPE HTML>
300
- <html>
301
- <head></head>
302
- <body>
303
- <div><rt>PHP Simple HTML DOM Parser<rt>A PHP based DOM parser</div>
304
- <div><rt>PHP Simple HTML DOM Parser<rp>A PHP based DOM parser</rp></div>
305
- </body>
306
- </html>
307
- HTML;
308
-
309
- $dom = $this->html->load($doc);
310
-
311
- $this->assertCount(3, $dom->find('rt'));
312
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('rt', 0)->innertext);
313
- $this->assertEquals('A PHP based DOM parser', $dom->find('rt', 1)->innertext);
314
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('rt', 2)->innertext);
315
- }
316
-
317
- /**
318
- * An rp element’s end tag may be omitted if the rp element is immediately
319
- * followed by an rt or rp element, or if there is no more content in the
320
- * parent element.
321
- */
322
- public function test_optional_rp_end_tag()
323
- {
324
- $doc = <<<HTML
325
- <!DOCTYPE HTML>
326
- <html>
327
- <head></head>
328
- <body>
329
- <div><rp>PHP Simple HTML DOM Parser<rp>A PHP based DOM parser</div>
330
- <div><rp>PHP Simple HTML DOM Parser<rt>A PHP based DOM parser</rt></div>
331
- </body>
332
- </html>
333
- HTML;
334
-
335
- $dom = $this->html->load($doc);
336
-
337
- $this->assertCount(3, $dom->find('rp'));
338
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('rp', 0)->innertext);
339
- $this->assertEquals('A PHP based DOM parser', $dom->find('rp', 1)->innertext);
340
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('rp', 2)->innertext);
341
- }
342
-
343
- /**
344
- * An optgroup element’s end tag may be omitted if the optgroup element is
345
- * immediately followed by another optgroup element, or if there is no more
346
- * content in the parent element.
347
- */
348
- public function test_optional_optgroup_end_tag()
349
- {
350
- $doc = <<<HTML
351
- <!DOCTYPE HTML>
352
- <html>
353
- <head></head>
354
- <body><optgroup>PHP Simple HTML DOM Parser<optgroup>A PHP based DOM parser</body>
355
- </html>
356
- HTML;
357
-
358
- $dom = $this->html->load($doc);
359
-
360
- $this->assertCount(2, $dom->find('optgroup'));
361
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('optgroup', 0)->innertext);
362
- $this->assertEquals('A PHP based DOM parser', $dom->find('optgroup', 1)->innertext);
363
- }
364
-
365
- /**
366
- * An option element’s end tag may be omitted if the option element is
367
- * immediately followed by another option element, or if it is immediately
368
- * followed by an optgroup element, or if there is no more content in the
369
- * parent element.
370
- */
371
- public function test_optional_option_end_tag()
372
- {
373
- $doc = <<<HTML
374
- <!DOCTYPE HTML>
375
- <html>
376
- <head></head>
377
- <body>
378
- <div><option>PHP Simple HTML DOM Parser<option>A PHP based DOM parser</div>
379
- <div><option>PHP Simple HTML DOM Parser<optgroup>A PHP based DOM parser</optgroup></div>
380
- </body>
381
- </html>
382
- HTML;
383
-
384
- $dom = $this->html->load($doc);
385
-
386
- $this->assertCount(3, $dom->find('option'));
387
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('option', 0)->innertext);
388
- $this->assertEquals('A PHP based DOM parser', $dom->find('option', 1)->innertext);
389
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('option', 2)->innertext);
390
- }
391
-
392
- /**
393
- * A colgroup element’s start tag may be omitted if the first thing inside
394
- * the colgroup element is a col element, and if the element is not
395
- * immediately preceded by another colgroup element whose end tag has been
396
- * omitted. (It can’t be omitted if the element is empty.)
397
- */
398
- public function test_optional_colgroup_start_tag()
399
- {
400
- $doc = <<<HTML
401
- <!DOCTYPE HTML>
402
- <html>
403
- <head></head>
404
- <body>
405
- <col>PHP Simple HTML DOM Parser</colgroup><col>A PHP based DOM parser</colgroup>
406
- </body>
407
- </html>
408
- HTML;
409
-
410
- $dom = $this->html->load($doc);
411
-
412
- $this->markTestSkipped(
413
- 'Optional start tags are not supported right now'
414
- );
415
-
416
- $this->assertCount(2, $dom->find('colgroup'));
417
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('colgroup', 0)->innertext);
418
- $this->assertEquals('A PHP based DOM parser', $dom->find('colgroup', 1)->innertext);
419
- }
420
-
421
- /**
422
- * A colgroup element’s end tag may be omitted if the colgroup element is
423
- * not immediately followed by a space character or a comment.
424
- */
425
- public function test_optional_colgroup_end_tag()
426
- {
427
- $doc = <<<HTML
428
- <!DOCTYPE HTML>
429
- <html>
430
- <head></head>
431
- <body><colgroup>PHP Simple HTML DOM Parser</body>
432
- </html>
433
- HTML;
434
-
435
- $dom = $this->html->load($doc);
436
-
437
- $this->assertNotNull($dom->find('colgroup', 0));
438
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('colgroup', 0)->innertext);
439
- }
440
-
441
- /**
442
- * A caption element’s end tag may be omitted if the caption element is not
443
- * immediately followed by a space character or a comment.
444
- */
445
- public function test_optional_caption_end_tag()
446
- {
447
- $doc = <<<HTML
448
- <!DOCTYPE HTML>
449
- <html>
450
- <head></head>
451
- <body><caption>PHP Simple HTML DOM Parser</body>
452
- </html>
453
- HTML;
454
-
455
- $dom = $this->html->load($doc);
456
-
457
- $this->assertNotNull($dom->find('caption', 0));
458
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('caption', 0)->innertext);
459
- }
460
-
461
- /**
462
- * A thead element’s end tag may be omitted if the thead element is
463
- * immediately followed by a tbody or tfoot element.
464
- */
465
- public function test_optional_thead_end_tag()
466
- {
467
- $token = array('tbody', 'tfoot');
468
-
469
- foreach ($token as $t) {
470
- $doc = <<<HTML
471
- <!DOCTYPE HTML>
472
- <html>
473
- <head></head>
474
- <body><thead><{$t}>PHP Simple HTML DOM Parser</{$t}></body>
475
- </html>
476
- HTML;
477
-
478
- $dom = $this->html->load($doc);
479
-
480
- $this->assertNotNull($dom->find('thead', 0), 'Missing thead tag');
481
- $this->assertNotNull($dom->find($t, 0), "Missing {$t} tag");
482
- $this->assertEquals("<{$t}>PHP Simple HTML DOM Parser</{$t}>", $dom->find('thead', 0)->innertext);
483
- }
484
- }
485
-
486
- /**
487
- * A tbody element’s start tag may be omitted if the first thing inside the
488
- * tbody element is a tr element, and if the element is not immediately
489
- * preceded by a tbody, thead, or tfoot element whose end tag has been
490
- * omitted. (It can’t be omitted if the element is empty.)
491
- */
492
- public function test_optional_tbody_start_tag()
493
- {
494
- $doc = <<<HTML
495
- <!DOCTYPE HTML>
496
- <html>
497
- <head></head>
498
- <body><tr>PHP Simple HTML DOM Parser</tr></tbody></body>
499
- </html>
500
- HTML;
501
-
502
- $dom = $this->html->load($doc);
503
-
504
- $this->markTestSkipped(
505
- 'tbody is currently skipped by the parser'
506
- );
507
-
508
- $this->assertNotNull($dom->find('tbody', 0));
509
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('tbody', 0)->innertext);
510
- }
511
-
512
- /**
513
- * A tbody element’s end tag may be omitted if the tbody element is
514
- * immediately followed by a tbody or tfoot element, or if there is no more
515
- * content in the parent element.
516
- */
517
- public function test_optional_tbody_end_tag()
518
- {
519
- $doc = <<<HTML
520
- <!DOCTYPE HTML>
521
- <html>
522
- <head></head>
523
- <body><tbody>PHP Simple HTML DOM Parser<tbody>A PHP based DOM parser</body>
524
- </html>
525
- HTML;
526
-
527
- $dom = $this->html->load($doc);
528
-
529
- $this->markTestSkipped(
530
- 'tbody is currently skipped by the parser'
531
- );
532
-
533
- $this->assertCount(2, $dom->find('tbody'));
534
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('tbody', 0)->innertext);
535
- $this->assertEquals('A PHP based DOM parser', $dom->find('tbody', 1)->innertext);
536
- }
537
-
538
- /**
539
- * A tfoot element’s end tag may be omitted if there is no more content in
540
- * the parent element.
541
- */
542
- public function test_optional_tfoot_end_tag()
543
- {
544
- $doc = <<<HTML
545
- <!DOCTYPE HTML>
546
- <html>
547
- <head></head>
548
- <body><tfoot>PHP Simple HTML DOM Parser</body>
549
- </html>
550
- HTML;
551
-
552
- $dom = $this->html->load($doc);
553
-
554
- $this->assertNotNull($dom->find('tfoot', 0));
555
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('tfoot', 0)->innertext);
556
- }
557
-
558
- /**
559
- * A tr element’s end tag may be omitted if the tr element is immediately
560
- * followed by another tr element, or if there is no more content in the
561
- * parent element.
562
- */
563
- public function test_optional_tr_end_tag()
564
- {
565
- $doc = <<<HTML
566
- <!DOCTYPE HTML>
567
- <html>
568
- <head></head>
569
- <body><tr>PHP Simple HTML DOM Parser<tr>A PHP based DOM parser</body>
570
- </html>
571
- HTML;
572
-
573
- $dom = $this->html->load($doc);
574
-
575
- $this->assertCount(2, $dom->find('tr'));
576
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('tr', 0)->innertext);
577
- $this->assertEquals('A PHP based DOM parser', $dom->find('tr', 1)->innertext);
578
- }
579
-
580
- /**
581
- * A td element’s end tag may be omitted if the td element is immediately
582
- * followed by a td or th element, or if there is no more content in the
583
- * parent element.
584
- */
585
- public function test_optional_td_end_tag()
586
- {
587
- $doc = <<<HTML
588
- <!DOCTYPE HTML>
589
- <html>
590
- <head></head>
591
- <body>
592
- <div><td>PHP Simple HTML DOM Parser<td>A PHP based DOM parser</div>
593
- <div><td>PHP Simple HTML DOM Parser<th>A PHP based DOM parser</th></div>
594
- </body>
595
- </html>
596
- HTML;
597
-
598
- $dom = $this->html->load($doc);
599
-
600
- $this->assertCount(3, $dom->find('td'));
601
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('td', 0)->innertext);
602
- $this->assertEquals('A PHP based DOM parser', $dom->find('td', 1)->innertext);
603
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('td', 2)->innertext);
604
- }
605
-
606
- /**
607
- * A th element’s end tag may be omitted if the th element is immediately
608
- * followed by a td or th element, or if there is no more content in the
609
- * parent element.
610
- */
611
- public function test_optional_th_end_tag()
612
- {
613
- $doc = <<<HTML
614
- <!DOCTYPE HTML>
615
- <html>
616
- <head></head>
617
- <body>
618
- <div><th>PHP Simple HTML DOM Parser<th>A PHP based DOM parser</div>
619
- <div><th>PHP Simple HTML DOM Parser<td>A PHP based DOM parser</td></div>
620
- </body>
621
- </html>
622
- HTML;
623
-
624
- $dom = $this->html->load($doc);
625
-
626
- $this->assertCount(3, $dom->find('th'));
627
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('th', 0)->innertext);
628
- $this->assertEquals('A PHP based DOM parser', $dom->find('th', 1)->innertext);
629
- $this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('th', 2)->innertext);
630
- }
631
-
632
- /**
633
- * Checks if optional end tags are properly handled by the parser.
634
- *
635
- * @dataProvider dataProvider_for_parser_should_close_optional_end_tags
636
- */
637
- public function test_parser_should_close_optional_end_tags($doc, $expected, $message)
638
- {
639
- $this->html->load($doc);
640
- $this->assertEquals($expected, $this->html->save(), $message);
641
- }
642
-
643
- /**
644
- * @todo: The list of block tags and optional closing tags should come from
645
- * code, not copied here.
646
- */
647
- public function dataProvider_for_parser_should_close_optional_end_tags()
648
- {
649
- $block_tags = array(
650
- 'body' => 1,
651
- 'div' => 1,
652
- 'form' => 1,
653
- 'root' => 1,
654
- 'span' => 1,
655
- 'table' => 1
656
- );
657
-
658
- // Remove root (implicitly added by the parser)
659
- unset($block_tags['root']);
660
-
661
- $optional_closing_tags = array(
662
- // Not optional, see
663
- // https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element
664
- 'b' => array('b' => 1),
665
- 'dd' => array('dd' => 1, 'dt' => 1),
666
- // Not optional, see
667
- // https://www.w3.org/TR/html/grouping-content.html#the-dl-element
668
- 'dl' => array('dd' => 1, 'dt' => 1),
669
- 'dt' => array('dd' => 1, 'dt' => 1),
670
- 'li' => array('li' => 1),
671
- 'optgroup' => array('optgroup' => 1, 'option' => 1),
672
- 'option' => array('optgroup' => 1, 'option' => 1),
673
- 'p' => array('p' => 1),
674
- 'rp' => array('rp' => 1, 'rt' => 1),
675
- 'rt' => array('rp' => 1, 'rt' => 1),
676
- 'td' => array('td' => 1, 'th' => 1),
677
- 'th' => array('td' => 1, 'th' => 1),
678
- 'tr' => array('td' => 1, 'th' => 1, 'tr' => 1),
679
- );
680
-
681
- $data = array();
682
-
683
- // Block tags should close optional elements
684
- foreach(array_keys($block_tags) as $block) {
685
- foreach(array_keys($optional_closing_tags) as $e) {
686
- $data["$block should close $e"] = array(
687
- "<$block><$e></$block>",
688
- "<$block><$e></$e></$block>",
689
- "$block should close $e"
690
- );
691
- }
692
- }
693
-
694
- // Special case for root (has no tags)
695
- foreach(array_keys($optional_closing_tags) as $e) {
696
- $data["root should close $e"] = array(
697
- "<$e>",
698
- "<$e></$e>",
699
- "root should close $e"
700
- );
701
- }
702
-
703
- // Block tags should close optional elements, even if the opening tag is missing
704
- foreach(array_keys($block_tags) as $block) {
705
- foreach(array_keys($optional_closing_tags) as $e) {
706
- $data["$block should close $e"] = array(
707
- "<$e></$block>",
708
- "<$e></$e></$block>",
709
- "$block should close $e"
710
- );
711
- }
712
- }
713
-
714
- // Block tags should close NESTED optional elements
715
- foreach(array_keys($block_tags) as $block) {
716
- foreach(array_keys($optional_closing_tags) as $e) {
717
- foreach(array_keys($optional_closing_tags[$e]) as $child) {
718
-
719
- // skip if element closes itself
720
- if($e === $child) continue;
721
-
722
- // skip if child and element are mutual exclusive
723
- if(isset($optional_closing_tags[$child])
724
- && array_key_exists($e, $optional_closing_tags[$child])) {
725
- continue;
726
- }
727
-
728
- $data["$block should close nested $e and $child"] = array(
729
- "<$block><$e><$child></$block>",
730
- "<$block><$e><$child></$child></$e></$block>",
731
- "$block should close nested $e and $child"
732
- );
733
- }
734
- }
735
- }
736
-
737
- // Special case for root (has not tags)
738
- foreach(array_keys($block_tags) as $block) {
739
- foreach(array_keys($optional_closing_tags) as $e) {
740
- foreach(array_keys($optional_closing_tags[$e]) as $child) {
741
-
742
- // skip if nested element closes itself
743
- if($e === $child) continue;
744
-
745
- // skip if child and element are mutual exclusive
746
- if(isset($optional_closing_tags[$child])
747
- && array_key_exists($e, $optional_closing_tags[$child])) {
748
- continue;
749
- }
750
-
751
- $data["root should close nested $e and $child"] = array(
752
- "<$e><$child>",
753
- "<$e><$child></$child></$e>",
754
- "root should close nested $e and $child"
755
- );
756
- }
757
- }
758
- }
759
-
760
- // Some optional tags should close other optional tags
761
- foreach(array_keys($optional_closing_tags) as $e) {
762
- foreach(array_keys($optional_closing_tags[$e]) as $child) {
763
- $data["$e should close $child"] = array(
764
- "<$child><$e>",
765
- "<$child></$child><$e></$e>",
766
- "$e should close $child"
767
- );
768
- }
769
- }
770
-
771
- // Optional tags should NOT close stray elements
772
- foreach(array_keys($optional_closing_tags) as $e) {
773
- $data["$e should NOT close a"] = array(
774
- "<a><$e>",
775
- "<a><$e></$e>",
776
- "$e should NOT close a"
777
- );
778
- }
779
-
780
- // Normal tags should NOT close optional elements
781
- foreach(array_keys($optional_closing_tags) as $e) {
782
- $data["a should NOT close $e"] = array(
783
- "<$e><a></a>",
784
- "<$e><a></a></$e>",
785
- "a should NOT close $e"
786
- );
787
- }
788
-
789
- return $data;
790
- }
791
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/script_test.php DELETED
@@ -1,69 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Checks if the parser properly handles script elements
7
- */
8
- class script_test extends TestCase {
9
-
10
- private $html;
11
-
12
- protected function setUp()
13
- {
14
- $this->html = new simple_html_dom;
15
- }
16
-
17
- protected function tearDown()
18
- {
19
- $this->html->clear();
20
- unset($this->html);
21
- }
22
-
23
- /**
24
- * @dataProvider dataProvider_for_script_should_parse
25
- */
26
- public function test_script_should_parse($expected, $doc)
27
- {
28
- $this->html->load($doc);
29
- $this->assertEquals($expected, $this->html->find('script', 0)->innertext);
30
- $this->assertEquals($doc, $this->html->save());
31
- }
32
-
33
- public function dataProvider_for_script_should_parse()
34
- {
35
- return array(
36
- 'empty' => array(
37
- '',
38
- '<script></script>',
39
- ),
40
- 'empty with type' => array(
41
- '',
42
- '<script type="application/javascript"></script>',
43
- ),
44
- 'space' => array(
45
- ' ',
46
- '<script> </script>',
47
- ),
48
- 'html string' => array(
49
- "var foo = '<div>Hello, World!</div>';",
50
- "<script>var foo = '<div>Hello, World!</div>';</script>",
51
- ),
52
- 'newline' => array(
53
- "\n",
54
- "<script>\n</script>"
55
- ),
56
- 'newline with type' => array(
57
- "\n",
58
- "<script type=\"application/javascript\">\n</script>"
59
- ),
60
- );
61
- }
62
-
63
- public function test_html_inside_script_should_not_appear_in_the_dom()
64
- {
65
- $this->html->load('<script><div>Hello, World!</div></script>');
66
- $this->assertNotNull($this->html->find('script', 0));
67
- $this->assertNull($this->html->find('div', 0));
68
- }
69
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/self_closing_tags_test.php DELETED
@@ -1,417 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Test identification of self closing tags (void elements)
7
- *
8
- * _Remarks_:
9
- * - Test functions are ordered by name to allow for simple navigation.
10
- * - Self-closing tags should be tested for their previous and next sibling, as
11
- * they are "consumed" by incorrectly detected elements. For example:
12
- * `<div id="before"></div><tag><div id="after"></div>` will result in
13
- * `tag->outertext` with contents `<tag><div id="after"></div>` because `<tag>`
14
- * is NOT a valid void element. If it were a valid element, `tag->outertext`
15
- * would return `<tag>` instead.
16
- * - Tests should not include more than one self-closing tag, to correctly assert
17
- * results.
18
- * - Do not remove obsolete tests! These tests should be changed to correctly
19
- * assert the opposite condition is true (i.e. change `assertNull` to
20
- * `assertNotNull`).
21
- *
22
- * _Code template_:
23
- * Use the following code template for all unit tests in this file. Replace all
24
- * occurrences of "tag" by the corresponding tag name (i.e. for the `area` tag
25
- * function name `test_tag` should become `test_area`).
26
- *
27
- * ```php
28
- * public function test_tag() {
29
- * $src = <<<HTML
30
- * <div id="before"></div>
31
- * <tag>
32
- * <div id="after"></div>
33
- * HTML;
34
- *
35
- * $tag = $this->html->load($src)->find('tag', 0);
36
- *
37
- * $this->assertEquals('before', $tag->previousSibling()->id);
38
- * $this->assertEquals('<tag>', $tag->outertext);
39
- * $this->assertEquals('after', $tag->nextSibling()->id);
40
- * }
41
- * ```
42
- *
43
- * @link https://www.w3.org/TR/html HTML Specification
44
- * @link https://www.w3.org/TR/html/syntax.html#void-elements Void elements
45
- */
46
- class self_closing_tags_test extends TestCase {
47
-
48
- private $html;
49
-
50
- protected function setUp()
51
- {
52
- $this->html = new simple_html_dom;
53
- }
54
-
55
- protected function tearDown()
56
- {
57
- $this->html->clear();
58
- unset($this->html);
59
- }
60
-
61
- /**
62
- * @link https://www.w3.org/TR/html52/semantics-embedded-content.html#the-area-element
63
- * The area element
64
- */
65
- public function test_area()
66
- {
67
- $src = <<<HTML
68
- <map name="shapes">
69
- <div id="before"></div>
70
- <area shape=rect coords="50,50,100,100">
71
- <div id="after"></div>
72
- </map>
73
- HTML;
74
-
75
- $area = $this->html->load($src)->find('area', 0);
76
-
77
- $this->assertEquals('before', $area->previousSibling()->id);
78
- $this->assertEquals('<area shape=rect coords="50,50,100,100">', $area->outertext);
79
- $this->assertEquals('after', $area->nextSibling()->id);
80
- }
81
-
82
- /**
83
- * @link https://www.w3.org/TR/html52/document-metadata.html#the-base-element
84
- * The base element
85
- */
86
- public function test_base()
87
- {
88
- $src = <<<HTML
89
- <head>
90
- <div id="before"></div>
91
- <base href="http://simplehtmldom.sourceforge.net/" target="_blank">
92
- <div id="after"></div>
93
- </head>
94
- HTML;
95
-
96
- $base = $this->html->load($src)->find('base', 0);
97
-
98
- $this->assertEquals('before', $base->previousSibling()->id);
99
- $this->assertEquals('<base href="http://simplehtmldom.sourceforge.net/" target="_blank">', $base->outertext);
100
- $this->assertEquals('after', $base->nextSibling()->id);
101
- }
102
-
103
- /**
104
- * @link https://www.w3.org/TR/html52/textlevel-semantics.html#the-br-element
105
- * The br element
106
- */
107
- public function test_br()
108
- {
109
- $src = <<<HTML
110
- <body>
111
- <div id="before"></div>
112
- <p>PHP Simple HTML DOM Parser<br>A PHP based DOM parser</p>
113
- <div id="after"></div>
114
- </body>
115
- HTML;
116
-
117
- // Normal operation (innertext of <br> is replaced by DEFAULT_BR_TEXT)
118
- $br = $this->html->load($src)->find('br', 0);
119
-
120
- $this->assertEquals('before', $br->parentNode()->previousSibling()->id);
121
- $this->assertEquals('<br>', $br->outertext);
122
- $this->assertEquals(DEFAULT_BR_TEXT, $br->innertext);
123
- $this->assertEquals('after', $br->parentNode()->nextSibling()->id);
124
-
125
- $this->assertEquals(
126
- 'PHP Simple HTML DOM Parser' . DEFAULT_BR_TEXT . 'A PHP based DOM parser',
127
- $br->parentNode()->plaintext
128
- );
129
-
130
- // Custom text (innertext of <br> is replaced by custom value)
131
- $br_text = 'br_text';
132
- $br = $this->html->load($src, true, true, $br_text)->find('br', 0);
133
-
134
- $this->assertEquals('before', $br->parentNode()->previousSibling()->id);
135
- $this->assertEquals('<br>', $br->outertext);
136
- $this->assertEquals($br_text, $br->innertext);
137
- $this->assertEquals('after', $br->parentNode()->nextSibling()->id);
138
-
139
- $this->assertEquals(
140
- 'PHP Simple HTML DOM Parser' . $br_text . 'A PHP based DOM parser',
141
- $br->parentNode()->plaintext
142
- );
143
- }
144
-
145
- /**
146
- * @link https://www.w3.org/TR/html52/tabular-data.html#the-col-element
147
- * The col element
148
- */
149
- public function test_col()
150
- {
151
- $src = <<<HTML
152
- <table>
153
- <div id="before"></div>
154
- <colgroup><col><col><col></colgroup>
155
- <div id="after"></div>
156
- </table>
157
- HTML;
158
-
159
- $col = $this->html->load($src)->find('col', 0);
160
-
161
- $this->assertEquals('before', $col->parentNode()->previousSibling()->id);
162
- $this->assertEquals('<col>', $col->outertext);
163
- $this->assertEquals('after', $col->parentNode()->nextSibling()->id);
164
- }
165
-
166
- /**
167
- * @link https://www.w3.org/TR/html52/semantics-embedded-content.html#the-embed-element
168
- * The embed element
169
- */
170
- public function test_embed()
171
- {
172
- $src = <<<HTML
173
- <body>
174
- <div id="before"></div>
175
- <embed src="catgame.swf" quality="high">
176
- <div id="after"></div>
177
- </body>
178
- HTML;
179
-
180
- $embed = $this->html->load($src)->find('embed', 0);
181
-
182
- $this->assertEquals('before', $embed->previousSibling()->id);
183
- $this->assertEquals('<embed src="catgame.swf" quality="high">', $embed->outertext);
184
- $this->assertEquals('after', $embed->nextSibling()->id);
185
- }
186
-
187
- /**
188
- * @link https://www.w3.org/TR/html52/grouping-content.html#the-hr-element
189
- * The hr element
190
- */
191
- public function test_hr()
192
- {
193
- $src = <<<HTML
194
- <p>PHP Simple HTML DOM Parser</p>
195
- <div id="before"></div>
196
- <hr>
197
- <div id="after"></div>
198
- <p>A PHP based DOM parser</p>
199
- HTML;
200
-
201
- $hr = $this->html->load($src)->find('hr', 0);
202
-
203
- $this->assertEquals('before', $hr->previousSibling()->id);
204
- $this->assertEquals('<hr>', $hr->outertext);
205
- $this->assertEquals('after', $hr->nextSibling()->id);
206
- }
207
-
208
- /**
209
- * @link https://www.w3.org/TR/html52/semantics-embedded-content.html#the-img-element
210
- * The img element
211
- */
212
- public function test_img()
213
- {
214
- $src = <<<HTML
215
- <a href="http://simplehtmldom.sourceforge.net">
216
- <div id="before"></div>
217
- <img src="images/simple_html_dom.png" width="128" height="128" alt="PHP Simple HTML DOM Parser Manual">
218
- <div id="after"></div>
219
- </a>
220
- HTML;
221
-
222
- $img = $this->html->load($src)->find('img', 0);
223
-
224
- $this->assertEquals('before', $img->previousSibling()->id);
225
- $this->assertEquals(
226
- '<img src="images/simple_html_dom.png" width="128" height="128" alt="PHP Simple HTML DOM Parser Manual">',
227
- $img->outertext);
228
- $this->assertEquals('after', $img->nextSibling()->id);
229
- }
230
-
231
- /**
232
- * @link https://www.w3.org/TR/html52/sec-forms.html#the-input-element
233
- * The input element
234
- */
235
- public function test_input()
236
- {
237
- $src = <<<HTML
238
- <body>
239
- <div id="before"></div>
240
- <input type="url" name="location" list="urls">
241
- <div id="after"></div>
242
- <datalist id="urls">
243
- <option
244
- label="PHP Simple HTML DOM Parser"
245
- value="https://sourceforge.net/projects/simplehtmldom/"></option>
246
- <option
247
- label="PHP Simple HTML DOM Parser Manual"
248
- value="http://simplehtmldom.sourceforge.net/"></option>
249
- </datalist>
250
- </body>
251
- HTML;
252
-
253
- $input = $this->html->load($src)->find('input', 0);
254
-
255
- $this->assertEquals('before', $input->previousSibling()->id);
256
- $this->assertEquals('<input type="url" name="location" list="urls">', $input->outertext);
257
- $this->assertEquals('after', $input->nextSibling()->id);
258
- }
259
-
260
- /**
261
- * @link https://www.w3.org/TR/html52/document-metadata.html#the-link-element
262
- * The link element
263
- */
264
- public function test_link()
265
- {
266
- $src = <<<HTML
267
- <title>
268
- <div id="before"></div>
269
- <link rel="MIT license" href="https://opensource.org/licenses/MIT">
270
- <div id="after"></div>
271
- <title>
272
- HTML;
273
-
274
- $link = $this->html->load($src)->find('link', 0);
275
-
276
- $this->assertEquals('before', $link->previousSibling()->id);
277
- $this->assertEquals('<link rel="MIT license" href="https://opensource.org/licenses/MIT">', $link->outertext);
278
- $this->assertEquals('after', $link->nextSibling()->id);
279
- }
280
-
281
- /**
282
- * @link https://www.w3.org/TR/html52/document-metadata.html#the-meta-element
283
- * The meta element
284
- */
285
- public function test_meta()
286
- {
287
- $src = <<<HTML
288
- <title>
289
- <div id="before"></div>
290
- <meta name=generator content="Simple HTML DOM Parser">
291
- <div id="after"></div>
292
- </title>
293
- HTML;
294
-
295
- $meta = $this->html->load($src)->find('meta', 0);
296
-
297
- $this->assertEquals('before', $meta->previousSibling()->id);
298
- $this->assertEquals('<meta name=generator content="Simple HTML DOM Parser">', $meta->outertext);
299
- $this->assertEquals('after', $meta->nextSibling()->id);
300
- }
301
-
302
- /**
303
- * @link https://www.w3.org/TR/html52/semantics-embedded-content.html#the-param-element
304
- * The param element
305
- */
306
- public function test_param()
307
- {
308
- $src = <<<HTML
309
- <object type="application/simple_html_dom">
310
- <div id="before"></div>
311
- <param name="self_closing_tags" value="param">
312
- <div id="after"></div>
313
- </object>
314
- HTML;
315
-
316
- $param = $this->html->load($src)->find('param', 0);
317
-
318
- $this->assertEquals('before', $param->previousSibling()->id);
319
- $this->assertEquals('<param name="self_closing_tags" value="param">', $param->outertext);
320
- $this->assertEquals('after', $param->nextSibling()->id);
321
- }
322
-
323
- /**
324
- * @link https://www.w3.org/TR/html52/semantics-embedded-content.html#the-source-element
325
- * The source element
326
- */
327
- public function test_source()
328
- {
329
- $src = <<<HTML
330
- <video controls autoplay>
331
- <div id="before"></div>
332
- <source src="simple_html_dom.mp4" type="video/mp4">
333
- <div id="after"></div>
334
- </video>
335
- HTML;
336
-
337
- $source = $this->html->load($src)->find('source', 0);
338
-
339
- $this->assertEquals('before', $source->previousSibling()->id);
340
- $this->assertEquals('<source src="simple_html_dom.mp4" type="video/mp4">', $source->outertext);
341
- $this->assertEquals('after', $source->nextSibling()->id);
342
- }
343
-
344
- /**
345
- * Checks if the spacer element is NOT detected as self-closing tag
346
- *
347
- * @link https://www.w3.org/TR/html5/obsolete.html#non-conforming-features
348
- * Non-conforming features
349
- * @link https://www.w3.org/TR/html5/dom.html#ref-for-elementdef-spacer
350
- * Elements in the DOM
351
- * @link https://developer.mozilla.org/en-US/docs/Web/HTML/Element/spacer
352
- * <spacer>
353
- */
354
- public function test_spacer_not()
355
- {
356
- $src = <<<HTML
357
- <div id="before"></div>
358
- <spacer type="horizontal" size="10"></spacer>
359
- <div id="after"></div>
360
- HTML;
361
-
362
- $spacer = $this->html->load($src)->find('spacer', 0);
363
-
364
- $this->assertEquals('before', $spacer->previousSibling()->id);
365
- $this->assertEquals('<spacer type="horizontal" size="10"></spacer>', $spacer->outertext);
366
- $this->assertEquals('after', $spacer->nextSibling()->id);
367
- }
368
-
369
- /**
370
- * @link https://www.w3.org/TR/html52/semantics-embedded-content.html#the-track-element
371
- * The track element
372
- */
373
- public function test_track()
374
- {
375
- $src = <<<HTML
376
- <video src="simple_html_dom.webm">
377
- <div id="before"></div>
378
- <track kind=subtitles src=simple_html_dom.en.vtt srclang=en label="English">
379
- <div id="after"></div>
380
- <track kind=captions src=simple_html_dom.hoh.vtt srclang=en label="English captions">
381
- <track kind=subtitles src=simple_html_dom.fr.vtt srclang=fr lang=fr label="Français">
382
- <track kind=subtitles src=simple_html_dom.de.vtt srclang=de lang=de label="Deutsch">
383
- </video>
384
- HTML;
385
-
386
- $track = $this->html->load($src)->find('track', 0);
387
-
388
- $this->assertEquals('before', $track->previousSibling()->id);
389
-
390
- $this->assertEquals(
391
- '<track kind=subtitles src=simple_html_dom.en.vtt srclang=en label="English">',
392
- $track->outertext
393
- );
394
-
395
- $this->assertEquals('after', $track->nextSibling()->id);
396
- }
397
-
398
- /**
399
- * @link https://www.w3.org/TR/html52/textlevel-semantics.html#the-wbr-element
400
- * The wbr element
401
- */
402
- public function test_wbr()
403
- {
404
- $src = <<<HTML
405
- <div id="before"></div>
406
- <p>https://sourceforge.net/<wbr>projects/<wbr>simplehtmldom/</p>
407
- <div id="after"></div>
408
- HTML;
409
-
410
- $wbr = $this->html->load($src)->find('wbr', 0);
411
-
412
- $this->assertEquals('before', $wbr->parentNode()->previousSibling()->id);
413
- $this->assertEquals('<wbr>', $wbr->outertext);
414
- $this->assertEquals('after', $wbr->parentNode()->nextSibling()->id);
415
- }
416
-
417
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/server_side_script_test.php DELETED
@@ -1,28 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Checks if the parser properly handles server-side scripts
7
- */
8
- class server_side_script_test extends TestCase {
9
-
10
- private $html;
11
-
12
- protected function setUp()
13
- {
14
- $this->html = new simple_html_dom;
15
- }
16
-
17
- protected function tearDown()
18
- {
19
- $this->html->clear();
20
- unset($this->html);
21
- }
22
-
23
- public function test_html_inside_sss_should_not_appear_in_the_dom()
24
- {
25
- $this->html->load('<?php <div>Hello, World!</div> ?>');
26
- $this->assertNull($this->html->find('div', 0));
27
- }
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/str_get_html_test.php DELETED
@@ -1,18 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Tests str_get_html
7
- */
8
- class str_get_html_test extends TestCase {
9
-
10
- /**
11
- * str_get_html should return false on empty string.
12
- */
13
- public function test_empty_string_should_return_false()
14
- {
15
- $this->assertFalse(str_get_html(''));
16
- }
17
-
18
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/style_test.php DELETED
@@ -1,58 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Checks if the parser properly handles style elements
7
- */
8
- class style_test extends TestCase {
9
-
10
- private $html;
11
-
12
- protected function setUp()
13
- {
14
- $this->html = new simple_html_dom;
15
- }
16
-
17
- protected function tearDown()
18
- {
19
- $this->html->clear();
20
- unset($this->html);
21
- }
22
-
23
- /**
24
- * @dataProvider dataProvider_for_style_should_parse
25
- */
26
- public function test_style_should_parse($expected, $doc)
27
- {
28
- $this->html->load($doc);
29
- $this->assertEquals($expected, $this->html->find('style', 0)->innertext);
30
- $this->assertEquals($doc, $this->html->save());
31
- }
32
-
33
- public function dataProvider_for_style_should_parse()
34
- {
35
- return array(
36
- 'empty' => array(
37
- '',
38
- '<style></style>',
39
- ),
40
- 'empty without end tag' => array(
41
- '',
42
- '<style/>',
43
- ),
44
- 'space' => array(
45
- ' ',
46
- '<style> </style>',
47
- ),
48
- 'newline' => array(
49
- "\n",
50
- "<style>\n</style>"
51
- ),
52
- 'multiple style tags' => array(
53
- 'Hello',
54
- '<style>Hello</style><style>World</style>'
55
- ),
56
- );
57
- }
58
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vendor/simplehtmldom/simplehtmldom/tests/whitespace_test.php DELETED
@@ -1,483 +0,0 @@
1
- <?php
2
- require_once __DIR__ . '/../simple_html_dom.php';
3
- use PHPUnit\Framework\TestCase;
4
-
5
- /**
6
- * Test if the parser properly removes whitespace
7
- */
8
- class whitespace_test extends TestCase {
9
-
10
- private $html;
11
-
12
- protected function setUp()
13
- {
14
- $this->html = new simple_html_dom;
15
- }
16
-
17
- protected function tearDown()
18
- {
19
- $this->html->clear();
20
- unset($this->html);
21
- }
22
-
23
- public function provide_whitespace_around_attributes()
24
- {
25
- return array(array(<<<EOD
26
- <html>
27
- <head>
28
- <meta charset="UTF-8">
29
- <meta name ="description" content= "simplehtmldom">
30
- <meta name = "keywords" content = "simple,html,dom">
31
- <meta name = "author"
32
- content = "John Doe">
33
- </head>
34
- </html>
35
- EOD
36
- ));
37
- }
38
-
39
- public function provide_whitespace_around_void_tags()
40
- {
41
- return array(array(<<<EOD
42
- <html>
43
- <head>
44
- <meta charset="UTF-8">
45
- < meta name="description" content="simplehtmldom">
46
- <meta name="keywords" content="simple,html,dom" >
47
- < meta name="author" content="John Doe" >
48
- < meta name="viewport" content="width=device-width, initial-scale=1.0"
49
- >
50
- </head>
51
- </html>
52
- EOD
53
- ));
54
- }
55
-
56
- public function provide_whitespace_around_tags()
57
- {
58
- return array(array(<<<EOD
59
- <html>
60
- <body>
61
- <div class="article" />
62
- < div class="article" />
63
- <div class="article" / >
64
- < div class="article" / >
65
- <
66
- div class="article" /
67
- >
68
- < div class="article" / >
69
- </body>
70
- </html>
71
- EOD
72
- ));
73
- }
74
-
75
- public function provide_whitespace_around_tags_without_class()
76
- {
77
- return array(array(<<<EOD
78
- <html>
79
- <body>
80
- <div />
81
- < div />
82
- <div / >
83
- < div / >
84
- <
85
- div /
86
- >
87
- < div / >
88
- </body>
89
- </html>
90
- EOD
91
- ));
92
- }
93
-
94
- public function provide_whitespace_around_nested_tags()
95
- {
96
- return array(array(<<<EOD
97
- <html>
98
- <body>
99
- <div class="article">
100
- < div class="level1">
101
- <div class="level2" >
102
- < div class="level3" >
103
- <
104
- div class="level4"
105
- >
106
- < div class="level5" >
107
- < /div >
108
- <
109
- /div
110
- >
111
- < /div >
112
- </div >
113
- < /div>
114
- </ div>
115
- </body>
116
- </html>
117
- EOD
118
- ));
119
- }
120
-
121
- public function provide_whitespace_in_class_values()
122
- {
123
- return array(array(<<<EOD
124
- <html>
125
- <body>
126
- <div class=""/>
127
- <div class=" "/>
128
- <div class=" "/>
129
- <div class="article"/>
130
- <div class=" article"/>
131
- <div class="article "/>
132
- <div class=" article "/>
133
- <div class=" article "/>
134
- <div class="article new"/>
135
- <div class=" article new"/>
136
- <div class="article new "/>
137
- <div class="article new"/>
138
- <div class=" article new "/>
139
- <div class="
140
- article
141
- new
142
- "/>
143
- </body>
144
- </html>
145
- EOD
146
- ));
147
- }
148
-
149
- public function provide_whitespace_in_attribute_values()
150
- {
151
- return array(array(<<<EOD
152
- <html>
153
- <body>
154
- <div attribute=""/>
155
- <div attribute=" "/>
156
- <div attribute=" "/>
157
- <div attribute="article"/>
158
- <div attribute=" article"/>
159
- <div attribute="article "/>
160
- <div attribute=" article "/>
161
- <div attribute=" article "/>
162
- <div attribute="article new"/>
163
- <div attribute=" article new"/>
164
- <div attribute="article new "/>
165
- <div attribute="article new"/>
166
- <div attribute=" article new "/>
167
- <div attribute="
168
- article
169
- new
170
- "/>
171
- </body>
172
- </html>
173
- EOD
174
- ));
175
- }
176
-
177
- /** @dataProvider provide_whitespace_around_attributes */
178
- public function test_parse_removes_whitespace_around_attributes($doc)
179
- {
180
- // phpcs:ignore Generic.Files.LineLength
181
- $expected = '<html><head><meta charset="UTF-8"><meta name="description" content="simplehtmldom"><meta name="keywords" content="simple,html,dom"><meta name="author" content="John Doe"></head></html>';
182
- $this->html->load($doc);
183
-
184
- $this->assertEquals($expected, $this->html->save());
185
- }
186
-
187
- /** @dataProvider provide_whitespace_around_void_tags */
188
- public function test_parse_removes_whitespace_around_void_tags($doc)
189
- {
190
- // phpcs:ignore Generic.Files.LineLength
191
- $expected = '<html><head><meta charset="UTF-8"><meta name="description" content="simplehtmldom"><meta name="keywords" content="simple,html,dom"><meta name="author" content="John Doe"><meta name="viewport" content="width=device-width, initial-scale=1.0"></head></html>';
192
- $this->html->load($doc);
193
-
194
- $this->assertEquals($expected, $this->html->save());
195
- }
196
-
197
- /** @dataProvider provide_whitespace_around_tags */
198
- public function test_parse_removes_whitespace_around_tags($doc)
199
- {
200
- // phpcs:ignore Generic.Files.LineLength
201
- $expected = '<html><body><div class="article"/><div class="article"/><div class="article"/><div class="article"/><div class="article"/><div class="article"/></body></html>';
202
- $this->html->load($doc);
203
-
204
- $this->assertEquals($expected, $this->html->save());
205
- }
206
-
207
- /** @dataProvider provide_whitespace_around_tags_without_class */
208
- public function test_parse_removes_whitespace_around_tags_without_class($doc)
209
- {
210
- // phpcs:ignore Generic.Files.LineLength
211
- $expected = '<html><body><div/><div/><div/><div/><div/><div/></body></html>';
212
- $this->html->load($doc);
213
-
214
- $this->assertEquals($expected, $this->html->save());
215
- }
216
-
217
- /** @dataProvider provide_whitespace_around_nested_tags */
218
- public function test_parse_removes_whitespace_around_nested_tags($doc)
219
- {
220
- // phpcs:ignore Generic.Files.LineLength
221
- $expected = '<html><body><div class="article"><div class="level1"><div class="level2"><div class="level3"><div class="level4"><div class="level5"></div></div></div></div></div></div></body></html>';
222
- $this->html->load($doc);
223
-
224
- $this->assertEquals($expected, $this->html->save());
225
- }
226
-
227
- /** @dataProvider provide_whitespace_in_class_values */
228
- public function test_parse_removes_whitespace_in_class_values($doc)
229
- {
230
- $this->html->load($doc);
231
-
232
- $this->assertCount(11, $this->html->find('.article'));
233
- $this->assertCount(6, $this->html->find('.new'));
234
- $this->assertCount(6, $this->html->find('[class="article new"]'));
235
- $this->assertEquals('article', $this->html->find('.article', 0)->class);
236
- $this->assertEquals('article new', $this->html->find('[class="article new"]', 0)->class);
237
- }
238
-
239
- /** @dataProvider provide_whitespace_in_class_values */
240
- public function test_find_removes_whitespace_in_class_selectors($doc)
241
- {
242
- $this->html->load($doc);
243
-
244
- $this->assertCount(11, $this->html->find('.article'));
245
- $this->assertCount(11, $this->html->find(' .article'));
246
- $this->assertCount(11, $this->html->find('.article '));
247
- $this->assertCount(11, $this->html->find(' .article '));
248
- $this->assertCount(11, $this->html->find(' .article '));
249
-
250
- $this->assertCount(6, $this->html->find('[class="article new"]' ));
251
- $this->assertCount(6, $this->html->find('[class=" article new"]' ));
252
- $this->assertCount(6, $this->html->find('[class="article new "]' ));
253
- $this->assertCount(6, $this->html->find('[class=" article new "]' ));
254
- $this->assertCount(6, $this->html->find('[class="article new"]' ));
255
- $this->assertCount(6, $this->html->find('[class=" article new "]' ));
256
- }
257
-
258
- /** @dataProvider provide_whitespace_in_attribute_values */
259
- public function test_parse_removes_whitespace_in_attribute_values($doc)
260
- {
261
- $this->html->load($doc);
262
-
263
- $this->assertCount(11, $this->html->find('[attribute*="article"]'));
264
- $this->assertCount(6, $this->html->find('[attribute*="new"]'));
265
- $this->assertCount(6, $this->html->find('[attribute="article new"]'));
266
- $this->assertEquals('article', $this->html->find('[attribute*="article"]', 0)->attribute);
267
- $this->assertEquals('article new', $this->html->find('[attribute*="article new"]', 0)->attribute);
268
- }
269
-
270
- /** @dataProvider provide_whitespace_in_class_values */
271
- public function test_find_keeps_whitespace_without_trim($doc)
272
- {
273
- $this->html->load($doc, true, false);
274
-
275
- $this->assertCount(11, $this->html->find('.article'));
276
- $this->assertCount(11, $this->html->find(' .article'));
277
- $this->assertCount(11, $this->html->find('.article '));
278
- $this->assertCount(11, $this->html->find(' .article '));
279
- $this->assertCount(11, $this->html->find(' .article '));
280
-
281
- $this->assertCount(6, $this->html->find('[class="article new"]' ));
282
- $this->assertCount(6, $this->html->find('[class=" article new"]' ));
283
- $this->assertCount(6, $this->html->find('[class="article new "]' ));
284
- $this->assertCount(6, $this->html->find('[class=" article new "]' ));
285
- $this->assertCount(6, $this->html->find('[class="article new"]' ));
286
- $this->assertCount(6, $this->html->find('[class=" article new "]' ));
287
- }
288
-
289
- /**
290
- * @dataProvider provide_whitespace_around_attributes
291
- * @dataProvider provide_whitespace_around_void_tags
292
- * @dataProvider provide_whitespace_around_tags
293
- * @dataProvider provide_whitespace_around_tags_without_class
294
- * @dataProvider provide_whitespace_around_nested_tags
295
- * @dataProvider provide_whitespace_in_attribute_values
296
- * @dataProvider provide_whitespace_in_class_values
297
- */
298
- public function test_parse_keeps_whitespace_without_trim($doc)
299
- {
300
- $expected = $doc;
301
- $this->html->load($doc, true, false);
302
-
303
- $this->assertEquals($expected, $this->html->save());
304
- }
305
-
306
- public function test_text_should_trim_whitespace()
307
- {
308
- $expected = 'PHP Simple HTML DOM Parser';
309
-
310
- $doc = <<<EOD
311
- <p> PHP Simple HTML DOM Parser </p>
312
- EOD;
313
-
314
- $this->html->load($doc);
315
-
316
- $this->assertEquals($expected, $this->html->root->text());
317
- }
318
-
319
- public function test_text_should_trim_nested_whitespace()
320
- {
321
- $expected = 'PHP Simple HTML DOM Parser';
322
-
323
- $doc = <<<EOD
324
- <p> <span> </span> PHP Simple HTML DOM Parser </p>
325
- EOD;
326
-
327
- $this->html->load($doc);
328
-
329
- $this->assertEquals($expected, $this->html->root->text());
330
- }
331
-
332
- public function test_text_should_remove_newline_from_paragraph()
333
- {
334
- $expected = 'PHP Simple HTML DOM Parser';
335
-
336
- $doc = <<<EOD
337
- <p>
338
- PHP Simple HTML DOM Parser
339
- </p>
340
- EOD;
341
-
342
- $this->html->load($doc);
343
-
344
- $this->assertEquals($expected, $this->html->root->text());
345
- }
346
-
347
- public function test_text_should_remove_nested_newline_from_paragraph()
348
- {
349
- $expected = 'PHP Simple HTML DOM Parser';
350
-
351
- $doc = <<<EOD
352
- <p>
353
- <span>
354
-
355
- </span>
356
- PHP Simple HTML DOM Parser
357
- </p>
358
- EOD;
359
-
360
- $this->html->load($doc);
361
-
362
- $this->assertEquals($expected, $this->html->root->text());
363
- }
364
-
365
- public function test_text_should_add_newline_between_paragraph()
366
- {
367
- $expected = <<<EOD
368
- PHP Simple HTML DOM Parser
369
-
370
- A fast, simple and reliable HTML document parser for PHP.
371
- EOD;
372
-
373
- $doc = <<<EOD
374
- <p>PHP Simple HTML DOM Parser</p>
375
- <p>A fast, simple and reliable HTML document parser for PHP.</p>
376
- EOD;
377
-
378
- $this->html->load($doc);
379
-
380
- $this->assertEquals($expected, $this->html->root->text());
381
- }
382
-
383
- public function test_text_should_add_newline_between_nested_paragraph()
384
- {
385
- $expected = <<<EOD
386
- PHP Simple HTML DOM Parser
387
-
388
- A fast, simple and reliable HTML document parser for PHP.
389
- EOD;
390
-
391
- $doc = <<<EOD
392
- <div><p>PHP Simple HTML DOM Parser</p></div>
393
- <div><p>A fast, simple and reliable HTML document parser for PHP.</p></div>
394
- EOD;
395
-
396
- $this->html->load($doc);
397
-
398
- $this->assertEquals($expected, $this->html->root->text());
399
- }
400
-
401
- public function test_text_should_keep_whitespace_around_inline_elements()
402
- {
403
- $expected = <<<EOD
404
- PHP Simple HTML DOM Parser. A fast, simple and reliable HTML document parser for PHP.
405
- EOD;
406
-
407
- $doc = <<<EOD
408
- <p>PHP Simple HTML DOM Parser. <em>A fast, simple and reliable HTML document parser for PHP.</em></p>
409
- EOD;
410
-
411
- $this->html->load($doc);
412
-
413
- $this->assertEquals($expected, $this->html->root->text());
414
- }
415
-
416
- public function test_text_should_skip_empty_paragraphs()
417
- {
418
- $expected = <<<EOD
419
- PHP Simple HTML DOM Parser.
420
-
421
- A fast, simple and reliable HTML document parser for PHP.
422
- EOD;
423
-
424
- $doc = <<<EOD
425
- <p>PHP Simple HTML DOM Parser.</p>
426
- <p> </p>
427
- <p> </p>
428
- <p> </p>
429
- <p> </p>
430
- <p>A fast, simple and reliable HTML document parser for PHP.</p>
431
- EOD;
432
-
433
- $this->html->load($doc);
434
-
435
- $this->assertEquals($expected, $this->html->root->text());
436
- }
437
-
438
- public function test_text_should_handle_nbsp_like_whitespace()
439
- {
440
- $expected = <<<EOD
441
- PHP Simple HTML DOM Parser.
442
-
443
- A fast, simple and reliable HTML document parser for PHP.
444
- EOD;
445
-
446
- $doc = <<<EOD
447
- <p>&nbsp;PHP Simple HTML DOM Parser.&nbsp;</p>&nbsp;
448
- <p>&nbsp;</p>&nbsp;
449
- <p>&nbsp;</p>&nbsp;
450
- <p>&nbsp;</p>&nbsp;
451
- <p>&nbsp;</p>&nbsp;
452
- <p>&nbsp;A fast, simple and reliable HTML<span>&nbsp;&nbsp;</span>&nbsp; document parser for PHP.&nbsp;</p>
453
- EOD;
454
-
455
- $this->html->load($doc);
456
-
457
- $this->assertEquals($expected, $this->html->root->text());
458
- }
459
-
460
- /**
461
- * The library uses UTF-8 internally. All operations in the text() function
462
- * should therefore handle UTF-8 characters accordingly. If UTF-8 characters
463
- * are handled like ASCII, the resulting output could be incorrect or cause
464
- * errors.
465
- *
466
- * @link https://sourceforge.net/p/simplehtmldom/feature-requests/62/ Feature #62
467
- */
468
- public function test_text_should_handle_utf8_characters()
469
- {
470
- $expected = '«Hello, World»';
471
-
472
- $doc = '&#xAB;Hello, World&#xBB;&nbsp;';
473
-
474
- $this->html->load($doc);
475
-
476
- $this->assertEquals(
477
- $expected,
478
- $this->html->root->text(),
479
- 'UTF-8 characters should not be handled like ASCII characters!'
480
- );
481
- }
482
-
483
- }