Version Description
Download this release
Release Info
Developer | optimocha |
Plugin | Speed Booster Pack |
Version | 4.1.1 |
Comparing to | |
See all releases |
Code changes from version 4.1.0 to 4.1.1
- README.txt +9 -0
- admin/class-speed-booster-pack-admin.php +19 -41
- admin/js/speed-booster-pack-admin.js +1 -1
- includes/classes/class-sbp-cache-warmup.php +3 -2
- includes/classes/class-sbp-cloudflare.php +3 -3
- includes/classes/class-sbp-css-minifier.php +3 -4
- includes/classes/class-sbp-warmup-process.php +1 -14
- includes/sbp-helpers.php +1 -1
- speed-booster-pack.php +3 -2
- vendor/simplehtmldom/simplehtmldom/.gitattributes +0 -13
- vendor/simplehtmldom/simplehtmldom/.gitignore +0 -12
- vendor/simplehtmldom/simplehtmldom/.travis.yml +0 -46
- vendor/simplehtmldom/simplehtmldom/CHANGELOG.md +368 -368
- vendor/simplehtmldom/simplehtmldom/Debug.php +149 -149
- vendor/simplehtmldom/simplehtmldom/HtmlDocument.php +1133 -1133
- vendor/simplehtmldom/simplehtmldom/HtmlNode.php +1441 -1441
- vendor/simplehtmldom/simplehtmldom/HtmlWeb.php +134 -134
- vendor/simplehtmldom/simplehtmldom/LICENSE +20 -20
- vendor/simplehtmldom/simplehtmldom/README.md +0 -116
- vendor/simplehtmldom/simplehtmldom/composer.json +57 -57
- vendor/simplehtmldom/simplehtmldom/constants.php +28 -28
- vendor/simplehtmldom/simplehtmldom/example/example_advanced_selector.php +0 -59
- vendor/simplehtmldom/simplehtmldom/example/example_basic_selector.php +0 -35
- vendor/simplehtmldom/simplehtmldom/example/example_callback.php +0 -21
- vendor/simplehtmldom/simplehtmldom/example/example_extract_html.php +0 -7
- vendor/simplehtmldom/simplehtmldom/example/example_modify_contents.php +0 -15
- vendor/simplehtmldom/simplehtmldom/example/scraping/example_scraping_digg.php +0 -55
- vendor/simplehtmldom/simplehtmldom/example/scraping/example_scraping_imdb.php +0 -22
- vendor/simplehtmldom/simplehtmldom/example/scraping/example_scraping_slashdot.php +0 -33
- vendor/simplehtmldom/simplehtmldom/manual/README.md +0 -75
- vendor/simplehtmldom/simplehtmldom/manual/custom_theme/main.html +0 -7
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/disable.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/enable.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/index.md +0 -31
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/log.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/log_once.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/setDebugHandler.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__call.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__construct.md +0 -22
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__debugInfo.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__destruct.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__get.md +0 -19
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__toString.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/as_text_node.md +0 -15
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/childNodes.md +0 -13
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/copy_skip.md +0 -15
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/copy_until.md +0 -15
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/copy_until_char.md +0 -15
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/createElement.md +0 -16
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/createTextNode.md +0 -11
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/decode.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/dump.md +0 -15
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/expect.md +0 -15
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/find.md +0 -17
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/firstChild.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/getElementById.md +0 -15
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/getElementByTagName.md +0 -15
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/getElementsById.md +0 -16
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/getElementsByTagName.md +0 -16
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/index.md +0 -38
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/lastChild.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/link_nodes.md +0 -14
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/load.md +0 -20
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/loadFile.md +0 -11
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/parse.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/parse_attr.md +0 -15
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/parse_charset.md +0 -17
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/prepare.md +0 -16
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/read_tag.md +0 -11
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/remove_callback.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/remove_noise.md +0 -16
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/restore_noise.md +0 -15
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/save.md +0 -15
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/search_noise.md +0 -15
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/set_callback.md +0 -14
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/skip.md +0 -14
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__call.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__construct.md +0 -13
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__debugInfo.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__destruct.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__get.md +0 -24
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__isset.md +0 -21
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__set.md +0 -20
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__toString.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__unset.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/addClass.md +0 -25
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/appendChild.md +0 -15
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/childNodes.md +0 -13
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/clear.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/convert_text.md +0 -15
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/definitions.md +0 -96
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/dump.md +0 -14
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/dump_node.md +0 -13
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/expect.md +0 -15
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/find.md +0 -47
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/find_ancestor_tag.md +0 -13
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/firstChild.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getAllAttributes.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getAttribute.md +0 -13
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getElementById.md +0 -13
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getElementByTagName.md +0 -13
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getElementsById.md +0 -14
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getElementsByTagName.md +0 -14
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/get_display_size.md +0 -11
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/hasAttribute.md +0 -13
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/hasChildNodes.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/hasClass.md +0 -19
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/index.md +0 -28
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/innertext.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/is_block_element.md +0 -16
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/is_inline_element.md +0 -16
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/is_utf8.md +0 -13
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/lastChild.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/makeup.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/match.md +0 -21
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/nextSibling.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/nodeName.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/outertext.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/parent.md +0 -14
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/parentNode.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/parse_selector.md +0 -13
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/previousSibling.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/remove.md +0 -41
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/removeAttribute.md +0 -13
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/removeChild.md +0 -45
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/removeClass.md +0 -27
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/save.md +0 -22
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/seek.md +0 -21
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/setAttribute.md +0 -14
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/text.md +0 -27
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/xmltext.md +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/api/constants.md +0 -31
- vendor/simplehtmldom/simplehtmldom/manual/docs/extra.css +0 -9
- vendor/simplehtmldom/simplehtmldom/manual/docs/faq/0001.md +0 -67
- vendor/simplehtmldom/simplehtmldom/manual/docs/index.md +0 -33
- vendor/simplehtmldom/simplehtmldom/manual/docs/manual/accessing-element-attributes.md +0 -40
- vendor/simplehtmldom/simplehtmldom/manual/docs/manual/adding-nodes.md +0 -123
- vendor/simplehtmldom/simplehtmldom/manual/docs/manual/creating-dom-objects.md +0 -37
- vendor/simplehtmldom/simplehtmldom/manual/docs/manual/customizing-parsing-behavior.md +0 -18
- vendor/simplehtmldom/simplehtmldom/manual/docs/manual/finding-html-elements.md +0 -90
- vendor/simplehtmldom/simplehtmldom/manual/docs/manual/saving-dom-objects.md +0 -11
- vendor/simplehtmldom/simplehtmldom/manual/docs/manual/traversing-dom-tree.md +0 -23
- vendor/simplehtmldom/simplehtmldom/manual/docs/quick-start.md +0 -98
- vendor/simplehtmldom/simplehtmldom/manual/docs/requirements.md +0 -7
- vendor/simplehtmldom/simplehtmldom/manual/mkdocs.yml +0 -152
- vendor/simplehtmldom/simplehtmldom/phpcompatibility.xml +0 -11
- vendor/simplehtmldom/simplehtmldom/phpcs.xml +0 -48
- vendor/simplehtmldom/simplehtmldom/phpunit.xml +0 -41
- vendor/simplehtmldom/simplehtmldom/release.sh +0 -69
- vendor/simplehtmldom/simplehtmldom/simple_html_dom.php +153 -153
- vendor/simplehtmldom/simplehtmldom/tests/attribute_test.php +0 -50
- vendor/simplehtmldom/simplehtmldom/tests/bug_report_test.php +0 -476
- vendor/simplehtmldom/simplehtmldom/tests/callback_test.php +0 -45
- vendor/simplehtmldom/simplehtmldom/tests/cdata_test.php +0 -69
- vendor/simplehtmldom/simplehtmldom/tests/charset_test.php +0 -80
- vendor/simplehtmldom/simplehtmldom/tests/comment_test.php +0 -93
- vendor/simplehtmldom/simplehtmldom/tests/css_selector_test.php +0 -646
- vendor/simplehtmldom/simplehtmldom/tests/debug_info_test.php +0 -37
- vendor/simplehtmldom/simplehtmldom/tests/debug_with_callback_test.php +0 -83
- vendor/simplehtmldom/simplehtmldom/tests/doctype_test.php +0 -47
- vendor/simplehtmldom/simplehtmldom/tests/dom_manipulation_test.php +0 -102
- vendor/simplehtmldom/simplehtmldom/tests/entity_decoding_test.php +0 -62
- vendor/simplehtmldom/simplehtmldom/tests/file_get_html_test.php +0 -80
- vendor/simplehtmldom/simplehtmldom/tests/htmldocument___call_test.php +0 -39
- vendor/simplehtmldom/simplehtmldom/tests/htmldocument_test.php +0 -249
- vendor/simplehtmldom/simplehtmldom/tests/htmlnode___call_test.php +0 -77
- vendor/simplehtmldom/simplehtmldom/tests/htmlnode_test.php +0 -628
- vendor/simplehtmldom/simplehtmldom/tests/htmlweb_curl_test.php +0 -49
- vendor/simplehtmldom/simplehtmldom/tests/htmlweb_fopen_test.php +0 -53
- vendor/simplehtmldom/simplehtmldom/tests/htmlweb_test.php +0 -49
- vendor/simplehtmldom/simplehtmldom/tests/memory_parsing_test.php +0 -131
- vendor/simplehtmldom/simplehtmldom/tests/optional_tags_test.php +0 -791
- vendor/simplehtmldom/simplehtmldom/tests/script_test.php +0 -69
- vendor/simplehtmldom/simplehtmldom/tests/self_closing_tags_test.php +0 -417
- vendor/simplehtmldom/simplehtmldom/tests/server_side_script_test.php +0 -28
- vendor/simplehtmldom/simplehtmldom/tests/str_get_html_test.php +0 -18
- vendor/simplehtmldom/simplehtmldom/tests/style_test.php +0 -58
- vendor/simplehtmldom/simplehtmldom/tests/whitespace_test.php +0 -483
README.txt
CHANGED
@@ -6,6 +6,7 @@ Stable tag: 4.1.0
|
|
6 |
Requires at least: 4.6
|
7 |
Tested up to: 5.6
|
8 |
Requires PHP: 5.6
|
|
|
9 |
License: GPLv3 or later
|
10 |
License URI: https://www.gnu.org/licenses/gpl-3.0.html
|
11 |
|
@@ -104,6 +105,14 @@ All the time! We're always looking for new ways to get this plugin to a better s
|
|
104 |
|
105 |
== Changelog ==
|
106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
= 4.1.0 =
|
108 |
|
109 |
*Release Date: 21 January 2021*
|
6 |
Requires at least: 4.6
|
7 |
Tested up to: 5.6
|
8 |
Requires PHP: 5.6
|
9 |
+
Stable tag: 4.1.1
|
10 |
License: GPLv3 or later
|
11 |
License URI: https://www.gnu.org/licenses/gpl-3.0.html
|
12 |
|
105 |
|
106 |
== Changelog ==
|
107 |
|
108 |
+
*Release Date: 28 January 2021*
|
109 |
+
|
110 |
+
* **Improved**: Wrote better descriptions for certain settings.
|
111 |
+
* **Fixed**: Tiny warning on Cloudflare settings.
|
112 |
+
* **Fixed**: Inline CSS wasn't able to parse some `url()` values, until now.
|
113 |
+
* **Fixed**: Cache warmup now handles pages with a better working process.
|
114 |
+
* **Fixed**: SBP won't try to _optimize_ files that are generated on-the-fly by WordPress (like robots.txt or the RSS feeds).
|
115 |
+
|
116 |
= 4.1.0 =
|
117 |
|
118 |
*Release Date: 21 January 2021*
|
admin/class-speed-booster-pack-admin.php
CHANGED
@@ -282,7 +282,7 @@ class Speed_Booster_Pack_Admin {
|
|
282 |
'<strong>' . __( 'Special', 'speed-booster-pack' ) . '</strong>: ' . __( 'This module has features for specific cases like CDN usage, localizing tracker scripts, adding custom JavaScript code and optimizations for some popular plugins.', 'speed-booster-pack' ) . '</li><li>' .
|
283 |
'<strong>' . __( 'Tweaks', 'speed-booster-pack' ) . '</strong>: ' . __( 'This module lets you tweak the WordPress core and your page sources by dequeueing core scripts/styles, decluttering <head>, optimizing revisions and the Heartbeat API and so on.', 'speed-booster-pack' ) . '</li></ul>' .
|
284 |
'<p>' . __( 'Feel free to experiment, and don\'t forget to create exclude rules when necessary!', 'speed-booster-pack' ) . '</p>',
|
285 |
-
], //
|
286 |
[
|
287 |
'type' => 'subheading',
|
288 |
'content' => __( 'Upcoming features', 'speed-booster-pack' ),
|
@@ -351,9 +351,7 @@ class Speed_Booster_Pack_Admin {
|
|
351 |
'id' => 'caching_warmup_after_clear',
|
352 |
'type' => 'switcher',
|
353 |
'title' => __( 'Warm up cache on clear', 'speed-booster-pack' ),
|
354 |
-
|
355 |
-
'desc' => __( 'Warm up cache everytime cache cleared.', 'speed-booster-pack' ),
|
356 |
-
// BEYNTODO: Change text
|
357 |
'dependency' => [ 'module_caching', '==', '1', '', 'visible' ],
|
358 |
],
|
359 |
[
|
@@ -434,37 +432,37 @@ class Speed_Booster_Pack_Admin {
|
|
434 |
'desc' => __( 'You can find your zone ID in the Overview tab on your Cloudflare panel.', 'speed-booster-pack' ),
|
435 |
],
|
436 |
[
|
437 |
-
'title' => __( '
|
438 |
'id' => 'cf_rocket_loader_enable',
|
439 |
'class' => 'with-preloader',
|
440 |
'type' => 'switcher',
|
441 |
],
|
442 |
[
|
443 |
-
'title' => __( '
|
444 |
'id' => 'cf_dev_mode_enable',
|
445 |
'class' => 'with-preloader',
|
446 |
'type' => 'switcher',
|
447 |
],
|
448 |
[
|
449 |
-
'title' => __( '
|
450 |
'id' => 'cf_css_minify_enable',
|
451 |
'class' => 'with-preloader',
|
452 |
'type' => 'switcher',
|
453 |
],
|
454 |
[
|
455 |
-
'title' => __( '
|
456 |
'id' => 'cf_html_minify_enable',
|
457 |
'class' => 'with-preloader',
|
458 |
'type' => 'switcher',
|
459 |
],
|
460 |
[
|
461 |
-
'title' => __( '
|
462 |
'id' => 'cf_js_minify_enable',
|
463 |
'class' => 'with-preloader',
|
464 |
'type' => 'switcher',
|
465 |
],
|
466 |
[
|
467 |
-
'title' => __( 'Browser Cache TTL', 'speed-booster-pack' ),
|
468 |
'id' => 'cf_browser_cache_ttl',
|
469 |
'class' => 'with-preloader',
|
470 |
'type' => 'select',
|
@@ -498,12 +496,12 @@ class Speed_Booster_Pack_Admin {
|
|
498 |
'type' => 'content',
|
499 |
'content' => '
|
500 |
<span>
|
501 |
-
<a href="#" class="button button-small sbp-cloudflare-test">Test
|
502 |
<span class="sbp-cloudflare-fetching">' . __( 'Fetching Cloudflare settings...', 'speed-booster-pack' ) . '</span>
|
503 |
</span>
|
504 |
<span class="sbp-cloudflare-info-text sbp-cloudflare-incorrect" style="color:red; vertical-align: middle;"><i class="fa fa-exclamation-triangle"></i> ' . __( 'Your Cloudflare credentials are incorrect.', 'speed-booster-pack' ) . '</span>
|
505 |
<span class="sbp-cloudflare-info-text sbp-cloudflare-correct" style="color:green; vertical-align: middle;"><i class="fa fa-check-circle"></i> ' . __( 'Your Cloudflare credentials are correct.', 'speed-booster-pack' ) . '</span>
|
506 |
-
<span class="sbp-cloudflare-info-text sbp-cloudflare-warning" style="color:orange; vertical-align: middle;"><i class="fa fa-exclamation-circle"></i> ' . __( '
|
507 |
',
|
508 |
],
|
509 |
];
|
@@ -567,13 +565,13 @@ class Speed_Booster_Pack_Admin {
|
|
567 |
'title' => __( 'Included Directories', 'speed-booster-pack' ),
|
568 |
'id' => 'cdn_includes',
|
569 |
'type' => 'code_editor',
|
570 |
-
'desc' => __( '
|
571 |
],
|
572 |
[
|
573 |
'title' => __( 'Excluded Extensions', 'speed-booster-pack' ),
|
574 |
'id' => 'cdn_excludes',
|
575 |
'type' => 'code_editor',
|
576 |
-
'desc' => __( '
|
577 |
],
|
578 |
],
|
579 |
$cloudflare_fields,
|
@@ -589,19 +587,18 @@ class Speed_Booster_Pack_Admin {
|
|
589 |
);
|
590 |
/* END Section: CDN & Proxy */
|
591 |
|
592 |
-
|
593 |
-
/* BEGIN Section: CSS Optimizations */
|
594 |
CSF::createSection(
|
595 |
$prefix,
|
596 |
[
|
597 |
-
'title' => __( 'CSS
|
598 |
'id' => 'css',
|
599 |
'icon' => 'fa fa-palette',
|
600 |
'fields' => [
|
601 |
|
602 |
[
|
603 |
/* translators: used like "Enable/Disable XXX" where "XXX" is the module name. */
|
604 |
-
'title' => __( 'Enable/Disable', 'speed-booster-pack' ) . ' ' . __( 'CSS
|
605 |
'id' => 'module_css',
|
606 |
'class' => 'module-css',
|
607 |
'type' => 'switcher',
|
@@ -640,6 +637,7 @@ class Speed_Booster_Pack_Admin {
|
|
640 |
[
|
641 |
'id' => 'is_front_page',
|
642 |
'type' => 'code_editor',
|
|
|
643 |
'desc' => sprintf( __( 'This CSS block will be injected into the front page of your website. %1$s%2$s%3$s', 'speed-booster-pack' ), '<a href="https://developer.wordpress.org/reference/functions/is_front_page/" rel="external noopener" target="_blank">', sprintf( __( 'Learn more about %s.', 'speed-booster-pack' ), '<code>is_front_page()</code>' ), '</a>' ),
|
644 |
],
|
645 |
],
|
@@ -709,10 +707,8 @@ class Speed_Booster_Pack_Admin {
|
|
709 |
],
|
710 |
[
|
711 |
'title' => __( 'Remove critical CSS after onload', 'speed-booster-pack' ),
|
712 |
-
// BEYNTODO: Change Text
|
713 |
'id' => 'remove_criticalcss',
|
714 |
-
'desc' => __( 'Remove critical CSS
|
715 |
-
// BEYNTODO: Change Text
|
716 |
'type' => 'switcher',
|
717 |
'default' => true,
|
718 |
'dependency' => [ 'module_css|enable_criticalcss', '==|==', '1|1', '', 'visible' ],
|
@@ -746,7 +742,7 @@ class Speed_Booster_Pack_Admin {
|
|
746 |
],
|
747 |
]
|
748 |
);
|
749 |
-
/* END Section: CSS
|
750 |
|
751 |
/* BEGIN Section: Assets */
|
752 |
$asset_fields = [
|
@@ -1333,14 +1329,12 @@ class Speed_Booster_Pack_Admin {
|
|
1333 |
|
1334 |
// Warmup Notice
|
1335 |
if ( get_transient( 'sbp_warmup_started' ) ) {
|
1336 |
-
// BEYNTODO: Add translator note
|
1337 |
SBP_Notice_Manager::display_notice( 'sbp_warmup_started', '<p><strong>' . SBP_PLUGIN_NAME . ':</strong> ' . __( 'Cache warmup started.', 'speed-booster-pack' ) . '</p>', 'success', true, 'recurrent' );
|
1338 |
}
|
1339 |
|
1340 |
// Warmup Notice
|
1341 |
if ( get_transient( 'sbp_warmup_complete' ) ) {
|
1342 |
-
|
1343 |
-
SBP_Notice_Manager::display_notice( 'sbp_warmup_complete', '<p><strong>' . SBP_PLUGIN_NAME . ':</strong> ' . __( 'Static cache files created.', 'speed-booster-pack' ) . '</p>', 'success', true, 'recurrent' );
|
1344 |
}
|
1345 |
|
1346 |
// WP-Config Inject File Error
|
@@ -1352,22 +1346,6 @@ class Speed_Booster_Pack_Admin {
|
|
1352 |
if ( get_transient( 'sbp_wp_config_error' ) ) {
|
1353 |
SBP_Notice_Manager::display_notice( 'sbp_wp_config_error', '<p><strong>' . SBP_PLUGIN_NAME . '</strong> ' . __( 'Can not write wp-config.php file. Some ' . SBP_PLUGIN_NAME . ' features may not work. Please check your file permissions.', 'speed-booster-pack' ) . '</p>', 'error', true, 'recurrent' );
|
1354 |
}
|
1355 |
-
|
1356 |
-
// WP-Config File Error
|
1357 |
-
if ( get_transient( 'sbp_warmup_errors' ) ) {
|
1358 |
-
$list = '';
|
1359 |
-
$errors = get_transient( 'sbp_warmup_errors' );
|
1360 |
-
if ( is_array( $errors ) ) {
|
1361 |
-
foreach ( $errors as $error ) {
|
1362 |
-
$extras = [];
|
1363 |
-
if ( isset( $error['options']['user-agent'] ) && $error['options']['user-agent'] === 'Mobile' ) {
|
1364 |
-
$extras[] = '(Mobile)';
|
1365 |
-
}
|
1366 |
-
$list .= '<li><a href="' . $error['url'] . '" target="_blank">' . $error['url'] . ' ' . implode( ' ', $extras ) . '</a></li>';
|
1367 |
-
}
|
1368 |
-
SBP_Notice_Manager::display_notice( 'sbp_warmup_errors', '<p><strong>' . SBP_PLUGIN_NAME . '</strong> ' . __( 'Cache warmup completed but following pages may not be cached. Please check this pages are available. (Hover this notice to see all errors)', 'speed-booster-pack' ) . '</p><ul class="warmup-cache-error-list">' . $list . '</ul>', 'error', true, 'recurrent' );
|
1369 |
-
}
|
1370 |
-
}
|
1371 |
}
|
1372 |
|
1373 |
private function initialize_announce4wp() {
|
282 |
'<strong>' . __( 'Special', 'speed-booster-pack' ) . '</strong>: ' . __( 'This module has features for specific cases like CDN usage, localizing tracker scripts, adding custom JavaScript code and optimizations for some popular plugins.', 'speed-booster-pack' ) . '</li><li>' .
|
283 |
'<strong>' . __( 'Tweaks', 'speed-booster-pack' ) . '</strong>: ' . __( 'This module lets you tweak the WordPress core and your page sources by dequeueing core scripts/styles, decluttering <head>, optimizing revisions and the Heartbeat API and so on.', 'speed-booster-pack' ) . '</li></ul>' .
|
284 |
'<p>' . __( 'Feel free to experiment, and don\'t forget to create exclude rules when necessary!', 'speed-booster-pack' ) . '</p>',
|
285 |
+
], // Z_TODO: Fetching clouflare settings ibaresi ekle.
|
286 |
[
|
287 |
'type' => 'subheading',
|
288 |
'content' => __( 'Upcoming features', 'speed-booster-pack' ),
|
351 |
'id' => 'caching_warmup_after_clear',
|
352 |
'type' => 'switcher',
|
353 |
'title' => __( 'Warm up cache on clear', 'speed-booster-pack' ),
|
354 |
+
'desc' => __( 'Creates cache files for the front page and all pages that are linked from the front page, each time the cache is cleared. Note that even though you don\'t turn this option on, you can manually warm up the cache from your admin bar.', 'speed-booster-pack' ),
|
|
|
|
|
355 |
'dependency' => [ 'module_caching', '==', '1', '', 'visible' ],
|
356 |
],
|
357 |
[
|
432 |
'desc' => __( 'You can find your zone ID in the Overview tab on your Cloudflare panel.', 'speed-booster-pack' ),
|
433 |
],
|
434 |
[
|
435 |
+
'title' => __( 'Rocket Loader', 'speed-booster-pack' ),
|
436 |
'id' => 'cf_rocket_loader_enable',
|
437 |
'class' => 'with-preloader',
|
438 |
'type' => 'switcher',
|
439 |
],
|
440 |
[
|
441 |
+
'title' => __( 'Development Mode', 'speed-booster-pack' ),
|
442 |
'id' => 'cf_dev_mode_enable',
|
443 |
'class' => 'with-preloader',
|
444 |
'type' => 'switcher',
|
445 |
],
|
446 |
[
|
447 |
+
'title' => __( 'Minify CSS', 'speed-booster-pack' ),
|
448 |
'id' => 'cf_css_minify_enable',
|
449 |
'class' => 'with-preloader',
|
450 |
'type' => 'switcher',
|
451 |
],
|
452 |
[
|
453 |
+
'title' => __( 'Minify HTML', 'speed-booster-pack' ),
|
454 |
'id' => 'cf_html_minify_enable',
|
455 |
'class' => 'with-preloader',
|
456 |
'type' => 'switcher',
|
457 |
],
|
458 |
[
|
459 |
+
'title' => __( 'Minify JS', 'speed-booster-pack' ),
|
460 |
'id' => 'cf_js_minify_enable',
|
461 |
'class' => 'with-preloader',
|
462 |
'type' => 'switcher',
|
463 |
],
|
464 |
[
|
465 |
+
'title' => __( 'Browser Cache TTL', 'speed-booster-pack' ),
|
466 |
'id' => 'cf_browser_cache_ttl',
|
467 |
'class' => 'with-preloader',
|
468 |
'type' => 'select',
|
496 |
'type' => 'content',
|
497 |
'content' => '
|
498 |
<span>
|
499 |
+
<a href="#" class="button button-small sbp-cloudflare-test">' . __( 'Test Cloudflare connection', 'speed-booster-pack' ) . '<span class="sbp-cloudflare-spinner"></span></a>
|
500 |
<span class="sbp-cloudflare-fetching">' . __( 'Fetching Cloudflare settings...', 'speed-booster-pack' ) . '</span>
|
501 |
</span>
|
502 |
<span class="sbp-cloudflare-info-text sbp-cloudflare-incorrect" style="color:red; vertical-align: middle;"><i class="fa fa-exclamation-triangle"></i> ' . __( 'Your Cloudflare credentials are incorrect.', 'speed-booster-pack' ) . '</span>
|
503 |
<span class="sbp-cloudflare-info-text sbp-cloudflare-correct" style="color:green; vertical-align: middle;"><i class="fa fa-check-circle"></i> ' . __( 'Your Cloudflare credentials are correct.', 'speed-booster-pack' ) . '</span>
|
504 |
+
<span class="sbp-cloudflare-info-text sbp-cloudflare-warning" style="color:orange; vertical-align: middle;"><i class="fa fa-exclamation-circle"></i> ' . __( 'Enter your Cloudflare credentials and save settings to see CloudFlare options.', 'speed-booster-pack' ) . '</span>
|
505 |
',
|
506 |
],
|
507 |
];
|
565 |
'title' => __( 'Included Directories', 'speed-booster-pack' ),
|
566 |
'id' => 'cdn_includes',
|
567 |
'type' => 'code_editor',
|
568 |
+
'desc' => __( 'Anything other than WordPress\'s existing directories should be entered here to be rewritten with the CDN domain. Separated by new lines.', 'speed-booster-pack' ),
|
569 |
],
|
570 |
[
|
571 |
'title' => __( 'Excluded Extensions', 'speed-booster-pack' ),
|
572 |
'id' => 'cdn_excludes',
|
573 |
'type' => 'code_editor',
|
574 |
+
'desc' => __( 'If you want to exclude certain file types, enter the extensions here. Separated by new lines.', 'speed-booster-pack' ),
|
575 |
],
|
576 |
],
|
577 |
$cloudflare_fields,
|
587 |
);
|
588 |
/* END Section: CDN & Proxy */
|
589 |
|
590 |
+
/* BEGIN Section: Optimize CSS */
|
|
|
591 |
CSF::createSection(
|
592 |
$prefix,
|
593 |
[
|
594 |
+
'title' => __( 'Optimize CSS', 'speed-booster-pack' ),
|
595 |
'id' => 'css',
|
596 |
'icon' => 'fa fa-palette',
|
597 |
'fields' => [
|
598 |
|
599 |
[
|
600 |
/* translators: used like "Enable/Disable XXX" where "XXX" is the module name. */
|
601 |
+
'title' => __( 'Enable/Disable', 'speed-booster-pack' ) . ' ' . __( 'Optimize CSS', 'speed-booster-pack' ),
|
602 |
'id' => 'module_css',
|
603 |
'class' => 'module-css',
|
604 |
'type' => 'switcher',
|
637 |
[
|
638 |
'id' => 'is_front_page',
|
639 |
'type' => 'code_editor',
|
640 |
+
// Z_TODO: Edit the following description.
|
641 |
'desc' => sprintf( __( 'This CSS block will be injected into the front page of your website. %1$s%2$s%3$s', 'speed-booster-pack' ), '<a href="https://developer.wordpress.org/reference/functions/is_front_page/" rel="external noopener" target="_blank">', sprintf( __( 'Learn more about %s.', 'speed-booster-pack' ), '<code>is_front_page()</code>' ), '</a>' ),
|
642 |
],
|
643 |
],
|
707 |
],
|
708 |
[
|
709 |
'title' => __( 'Remove critical CSS after onload', 'speed-booster-pack' ),
|
|
|
710 |
'id' => 'remove_criticalcss',
|
711 |
+
'desc' => __( 'Remove critical CSS the onload event fires on browser. Enable this only if you\'re having styling issues when the pages finish loading.', 'speed-booster-pack' ),
|
|
|
712 |
'type' => 'switcher',
|
713 |
'default' => true,
|
714 |
'dependency' => [ 'module_css|enable_criticalcss', '==|==', '1|1', '', 'visible' ],
|
742 |
],
|
743 |
]
|
744 |
);
|
745 |
+
/* END Section: Optimize CSS */
|
746 |
|
747 |
/* BEGIN Section: Assets */
|
748 |
$asset_fields = [
|
1329 |
|
1330 |
// Warmup Notice
|
1331 |
if ( get_transient( 'sbp_warmup_started' ) ) {
|
|
|
1332 |
SBP_Notice_Manager::display_notice( 'sbp_warmup_started', '<p><strong>' . SBP_PLUGIN_NAME . ':</strong> ' . __( 'Cache warmup started.', 'speed-booster-pack' ) . '</p>', 'success', true, 'recurrent' );
|
1333 |
}
|
1334 |
|
1335 |
// Warmup Notice
|
1336 |
if ( get_transient( 'sbp_warmup_complete' ) ) {
|
1337 |
+
SBP_Notice_Manager::display_notice( 'sbp_warmup_complete', '<p><strong>' . SBP_PLUGIN_NAME . ':</strong> ' . __( 'Static cache files created.', 'speed-booster-pack' ) . '</p>', 'info', true, 'recurrent' );
|
|
|
1338 |
}
|
1339 |
|
1340 |
// WP-Config Inject File Error
|
1346 |
if ( get_transient( 'sbp_wp_config_error' ) ) {
|
1347 |
SBP_Notice_Manager::display_notice( 'sbp_wp_config_error', '<p><strong>' . SBP_PLUGIN_NAME . '</strong> ' . __( 'Can not write wp-config.php file. Some ' . SBP_PLUGIN_NAME . ' features may not work. Please check your file permissions.', 'speed-booster-pack' ) . '</p>', 'error', true, 'recurrent' );
|
1348 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1349 |
}
|
1350 |
|
1351 |
private function initialize_announce4wp() {
|
admin/js/speed-booster-pack-admin.js
CHANGED
@@ -153,7 +153,7 @@
|
|
153 |
} else if (response.status === 'empty_info') {
|
154 |
$('.sbp-cloudflare-warning').show();
|
155 |
} else {
|
156 |
-
//
|
157 |
$('.sbp-cloudflare-incorrect').show();
|
158 |
$('.with-preloader::before, .with-preloader::after').remove();
|
159 |
}
|
153 |
} else if (response.status === 'empty_info') {
|
154 |
$('.sbp-cloudflare-warning').show();
|
155 |
} else {
|
156 |
+
// Z_TODO: Baglanti hatalarinda ozel uyari goster.
|
157 |
$('.sbp-cloudflare-incorrect').show();
|
158 |
$('.with-preloader::before, .with-preloader::after').remove();
|
159 |
}
|
includes/classes/class-sbp-cache-warmup.php
CHANGED
@@ -78,8 +78,9 @@ class SBP_Cache_Warmup extends SBP_Abstract_Module {
|
|
78 |
$urls[] = $href;
|
79 |
$this->warmup_process->push_to_queue( [ 'url' => $href ] );
|
80 |
if ( sbp_get_option( 'caching_separate_mobile' ) ) {
|
81 |
-
$this->warmup_process->push_to_queue( [
|
82 |
-
|
|
|
83 |
] );
|
84 |
}
|
85 |
}
|
78 |
$urls[] = $href;
|
79 |
$this->warmup_process->push_to_queue( [ 'url' => $href ] );
|
80 |
if ( sbp_get_option( 'caching_separate_mobile' ) ) {
|
81 |
+
$this->warmup_process->push_to_queue( [
|
82 |
+
'url' => $href,
|
83 |
+
'options' => [ 'user-agent' => 'Mobile' ],
|
84 |
] );
|
85 |
}
|
86 |
}
|
includes/classes/class-sbp-cloudflare.php
CHANGED
@@ -93,7 +93,7 @@ class SBP_Cloudflare extends SBP_Abstract_Module {
|
|
93 |
if ( self::is_cloudflare_active() ) {
|
94 |
$result = self::send_request( 'purge_cache', 'POST', [ 'purge_everything' => true ] );
|
95 |
|
96 |
-
if ( true === $result['success'] ) {
|
97 |
return true;
|
98 |
}
|
99 |
}
|
@@ -112,7 +112,7 @@ class SBP_Cloudflare extends SBP_Abstract_Module {
|
|
112 |
public static function check_credentials( $override_credentials = [] ) {
|
113 |
$result = self::send_request( 'check_credentials', 'GET', [], $override_credentials );
|
114 |
|
115 |
-
if ( true === $result['success'] ) {
|
116 |
return true;
|
117 |
}
|
118 |
|
@@ -231,7 +231,7 @@ class SBP_Cloudflare extends SBP_Abstract_Module {
|
|
231 |
$settings = [];
|
232 |
|
233 |
$result = self::send_request( 'settings' );
|
234 |
-
if ( $result['success'] ) {
|
235 |
foreach ( $result['result'] as $setting ) {
|
236 |
if ( in_array( $setting['id'], $settings_to_fetch ) ) {
|
237 |
$settings[ $setting['id'] ] = $setting;
|
93 |
if ( self::is_cloudflare_active() ) {
|
94 |
$result = self::send_request( 'purge_cache', 'POST', [ 'purge_everything' => true ] );
|
95 |
|
96 |
+
if ( $result && true === $result['success'] ) {
|
97 |
return true;
|
98 |
}
|
99 |
}
|
112 |
public static function check_credentials( $override_credentials = [] ) {
|
113 |
$result = self::send_request( 'check_credentials', 'GET', [], $override_credentials );
|
114 |
|
115 |
+
if ( $result && true === $result['success'] ) {
|
116 |
return true;
|
117 |
}
|
118 |
|
231 |
$settings = [];
|
232 |
|
233 |
$result = self::send_request( 'settings' );
|
234 |
+
if ( $result && $result['success'] ) {
|
235 |
foreach ( $result['result'] as $setting ) {
|
236 |
if ( in_array( $setting['id'], $settings_to_fetch ) ) {
|
237 |
$settings[ $setting['id'] ] = $setting;
|
includes/classes/class-sbp-css-minifier.php
CHANGED
@@ -105,7 +105,7 @@ class SBP_CSS_Minifier extends SBP_Abstract_Module {
|
|
105 |
$css = $this->minify_css( $css );
|
106 |
}
|
107 |
|
108 |
-
$css = $this->
|
109 |
|
110 |
return $css;
|
111 |
}
|
@@ -113,17 +113,16 @@ class SBP_CSS_Minifier extends SBP_Abstract_Module {
|
|
113 |
return false;
|
114 |
}
|
115 |
|
116 |
-
private function
|
117 |
$css_dir = substr( $url, 0, strrpos( $url, '/' ) );
|
118 |
|
119 |
// remove empty url() declarations
|
120 |
$css = preg_replace( "/url\(\s?\)/", "", $css );
|
121 |
// new regex expression
|
122 |
-
$css = preg_replace( "/url(?!\(['\"]?(data:|http:|https:))\(['\"]?([^\/][^'\"\)]*)['\"]?\)/i",
|
123 |
"url({$css_dir}/$2)",
|
124 |
$css );
|
125 |
|
126 |
-
|
127 |
return $css;
|
128 |
}
|
129 |
|
105 |
$css = $this->minify_css( $css );
|
106 |
}
|
107 |
|
108 |
+
$css = $this->rebuild_css_urls( $css, $url );
|
109 |
|
110 |
return $css;
|
111 |
}
|
113 |
return false;
|
114 |
}
|
115 |
|
116 |
+
private function rebuild_css_urls( $css, $url ) {
|
117 |
$css_dir = substr( $url, 0, strrpos( $url, '/' ) );
|
118 |
|
119 |
// remove empty url() declarations
|
120 |
$css = preg_replace( "/url\(\s?\)/", "", $css );
|
121 |
// new regex expression
|
122 |
+
$css = preg_replace( "/url\s*(?!\(['\"]?(data:|http:|https:))\(\s*['\"]?([^\/][^'\"\)]*)['\"]?\s*\)/i",
|
123 |
"url({$css_dir}/$2)",
|
124 |
$css );
|
125 |
|
|
|
126 |
return $css;
|
127 |
}
|
128 |
|
includes/classes/class-sbp-warmup-process.php
CHANGED
@@ -9,9 +9,6 @@ if ( ! defined( 'WPINC' ) ) {
|
|
9 |
|
10 |
class SBP_Warmup_Process extends \WP_Background_Process {
|
11 |
protected $action = 'warmup';
|
12 |
-
private $done = [];
|
13 |
-
private $success = [];
|
14 |
-
private $failed = [];
|
15 |
private $begun = false;
|
16 |
|
17 |
protected function task( $item ) {
|
@@ -28,14 +25,7 @@ class SBP_Warmup_Process extends \WP_Background_Process {
|
|
28 |
'limit_response_size' => 100,
|
29 |
], $options );
|
30 |
|
31 |
-
$
|
32 |
-
|
33 |
-
$response = wp_remote_get( $item['url'], $args );
|
34 |
-
if ( 200 !== wp_remote_retrieve_response_code( $response ) ) {
|
35 |
-
$this->failed[] = $item;
|
36 |
-
} else {
|
37 |
-
$this->success[] = $item;
|
38 |
-
}
|
39 |
|
40 |
if ( $this->begun === false ) {
|
41 |
set_transient( 'sbp_warmup_started', 1 );
|
@@ -46,9 +36,6 @@ class SBP_Warmup_Process extends \WP_Background_Process {
|
|
46 |
}
|
47 |
|
48 |
protected function complete() {
|
49 |
-
/* translator: %s is the url of the page */
|
50 |
-
// BEYNTODO: Change Text
|
51 |
-
set_transient( 'sbp_warmup_errors', $this->failed );
|
52 |
set_transient( 'sbp_warmup_complete', true );
|
53 |
delete_transient( 'sbp_warmup_started' );
|
54 |
parent::complete();
|
9 |
|
10 |
class SBP_Warmup_Process extends \WP_Background_Process {
|
11 |
protected $action = 'warmup';
|
|
|
|
|
|
|
12 |
private $begun = false;
|
13 |
|
14 |
protected function task( $item ) {
|
25 |
'limit_response_size' => 100,
|
26 |
], $options );
|
27 |
|
28 |
+
wp_remote_get( $item['url'], $args );
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
if ( $this->begun === false ) {
|
31 |
set_transient( 'sbp_warmup_started', 1 );
|
36 |
}
|
37 |
|
38 |
protected function complete() {
|
|
|
|
|
|
|
39 |
set_transient( 'sbp_warmup_complete', true );
|
40 |
delete_transient( 'sbp_warmup_started' );
|
41 |
parent::complete();
|
includes/sbp-helpers.php
CHANGED
@@ -49,7 +49,7 @@ if ( ! function_exists( 'sbp_get_hosting_restrictions' ) ) {
|
|
49 |
return 'Kinsta';
|
50 |
}
|
51 |
|
52 |
-
if ( function_exists( 'is_wpe' ) || function_exists( 'is_wpe_snapshot' ) ) { //
|
53 |
return [ 'name' => 'WP Engine', 'disable_features' => [] ];
|
54 |
}
|
55 |
|
49 |
return 'Kinsta';
|
50 |
}
|
51 |
|
52 |
+
if ( function_exists( 'is_wpe' ) || function_exists( 'is_wpe_snapshot' ) ) { // Z_TODO: Check here
|
53 |
return [ 'name' => 'WP Engine', 'disable_features' => [] ];
|
54 |
}
|
55 |
|
speed-booster-pack.php
CHANGED
@@ -6,7 +6,7 @@
|
|
6 |
* Plugin Name: Speed Booster Pack
|
7 |
* Plugin URI: https://speedboosterpack.com
|
8 |
* Description: PageSpeed optimization is vital for SEO: A faster website equals better conversions. Optimize & cache your site with this smart plugin!
|
9 |
-
* Version: 4.1.
|
10 |
* Author: Optimocha
|
11 |
* Author URI: https://optimocha.com
|
12 |
* License: GPLv3 or later
|
@@ -32,7 +32,7 @@ define( 'SBP_PLUGIN_NAME', 'Speed Booster Pack' );
|
|
32 |
/**
|
33 |
* Current plugin version.
|
34 |
*/
|
35 |
-
define( 'SBP_VERSION', '4.1.
|
36 |
|
37 |
/**
|
38 |
* Plugin website URL.
|
@@ -165,6 +165,7 @@ function sbp_autoloader( $class_name ) {
|
|
165 |
* @since 4.0.0
|
166 |
*/
|
167 |
function run_speed_booster_pack() {
|
|
|
168 |
|
169 |
$plugin = new Speed_Booster_Pack();
|
170 |
$plugin->run();
|
6 |
* Plugin Name: Speed Booster Pack
|
7 |
* Plugin URI: https://speedboosterpack.com
|
8 |
* Description: PageSpeed optimization is vital for SEO: A faster website equals better conversions. Optimize & cache your site with this smart plugin!
|
9 |
+
* Version: 4.1.1
|
10 |
* Author: Optimocha
|
11 |
* Author URI: https://optimocha.com
|
12 |
* License: GPLv3 or later
|
32 |
/**
|
33 |
* Current plugin version.
|
34 |
*/
|
35 |
+
define( 'SBP_VERSION', '4.1.1' );
|
36 |
|
37 |
/**
|
38 |
* Plugin website URL.
|
165 |
* @since 4.0.0
|
166 |
*/
|
167 |
function run_speed_booster_pack() {
|
168 |
+
if( preg_match( '/(\.txt|\.pdf|\.xml|\.ico|\.gz|\/feed\/?)/', $_SERVER['REQUEST_URI'] ) ) {return;}
|
169 |
|
170 |
$plugin = new Speed_Booster_Pack();
|
171 |
$plugin->run();
|
vendor/simplehtmldom/simplehtmldom/.gitattributes
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
* text=auto
|
2 |
-
|
3 |
-
# Ignored files during git-archive
|
4 |
-
.gitattributes export-ignore
|
5 |
-
.gitignore export-ignore
|
6 |
-
.travis.yml export-ignore
|
7 |
-
phpcompatibility.xml export-ignore
|
8 |
-
phpunit.xml export-ignore
|
9 |
-
phpcs.xml export-ignore
|
10 |
-
release.sh export-ignore
|
11 |
-
testcase/ export-ignore
|
12 |
-
tests/ export-ignore
|
13 |
-
manual/site/ export-ignore
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/.gitignore
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
# Build files
|
2 |
-
simplehtmldom_*.zip
|
3 |
-
|
4 |
-
# Unit test data files
|
5 |
-
tests/data/*
|
6 |
-
!tests/data/.gitkeep
|
7 |
-
|
8 |
-
# vscode configuration files
|
9 |
-
.vscode/
|
10 |
-
|
11 |
-
# Generated site data
|
12 |
-
manual/site/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/.travis.yml
DELETED
@@ -1,46 +0,0 @@
|
|
1 |
-
dist: trusty
|
2 |
-
language: php
|
3 |
-
|
4 |
-
install:
|
5 |
-
- composer global require dealerdirect/phpcodesniffer-composer-installer;
|
6 |
-
- composer global require phpcompatibility/php-compatibility;
|
7 |
-
- if [[ "$PHPUNIT" ]]; then
|
8 |
-
composer global require phpunit/phpunit ^$PHPUNIT;
|
9 |
-
fi
|
10 |
-
|
11 |
-
script:
|
12 |
-
- phpenv rehash
|
13 |
-
# Run PHP_CodeSniffer on all versions
|
14 |
-
- ~/.config/composer/vendor/bin/phpcs . --standard=phpcs.xml --warning-severity=0 --extensions=php -p;
|
15 |
-
# Check PHP compatibility for the lowest and highest supported version
|
16 |
-
- if [[ $TRAVIS_PHP_VERSION == "5.6" || $TRAVIS_PHP_VERSION == "7.3" ]]; then
|
17 |
-
~/.config/composer/vendor/bin/phpcs . --standard=phpcompatibility.xml --extensions=php -p;
|
18 |
-
fi
|
19 |
-
# Run unit tests on highest major version
|
20 |
-
- if [[ ${TRAVIS_PHP_VERSION:0:1} == "7" ]]; then
|
21 |
-
~/.config/composer/vendor/bin/phpunit --configuration=phpunit.xml --include-path=lib/;
|
22 |
-
fi
|
23 |
-
|
24 |
-
php:
|
25 |
-
- 7.3
|
26 |
-
|
27 |
-
env:
|
28 |
-
- PHPUNIT=6
|
29 |
-
- PHPUNIT=7
|
30 |
-
- PHPUNIT=8
|
31 |
-
|
32 |
-
matrix:
|
33 |
-
fast_finish: true
|
34 |
-
|
35 |
-
include:
|
36 |
-
- php: 5.6
|
37 |
-
env: PHPUNIT=
|
38 |
-
- php: 7.0
|
39 |
-
- php: 7.1
|
40 |
-
- php: 7.2
|
41 |
-
|
42 |
-
allow_failures:
|
43 |
-
- php: 7.3
|
44 |
-
env: PHPUNIT=7
|
45 |
-
- php: 7.3
|
46 |
-
env: PHPUNIT=8
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/CHANGELOG.md
CHANGED
@@ -1,369 +1,369 @@
|
|
1 |
-
# Changelog
|
2 |
-
All notable changes to this project will be documented in this file.
|
3 |
-
|
4 |
-
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
5 |
-
|
6 |
-
|
7 |
-
## [Unreleased]
|
8 |
-
### Changed
|
9 |
-
- Comments that start with `>` or `->` are now considered malformed in accordance with [section 12.1.6](https://html.spec.whatwg.org/multipage/syntax.html#comments) of the [HTML specification](https://html.spec.whatwg.org/multipage/). Comments may still contain the strings `<!--` or `--!>` and they may still end with `<!-` contrary to the specification.
|
10 |
-
|
11 |
-
## [2.0-RC2] - 2019-11-09
|
12 |
-
|
13 |
-
**Important**: This is a release **candidate**, which means some features might not yet be stable or emit unexpected behavior. Please don't hesitate to report broken or unstable features.
|
14 |
-
|
15 |
-
### Added
|
16 |
-
- Added a `README` file.
|
17 |
-
- Added a `composer` file.
|
18 |
-
- Added `.travis.yml` for automated unit tests with `Travis-CI`.
|
19 |
-
- Added the magic method `__debugInfo` to `HtmlDocument` and `HtmlNode` in order to reduce the memory footprint and to prevent recursion errors when using `print_r` and `var_dump`.
|
20 |
-
- Added the magic method `__call` to `HtmlDocument` and `HtmlNode` as a wrapper for deprecated methods using the lowercase calling convention (see below).
|
21 |
-
- Added unit tests `attribute_test.php`, `callback_test.php`, `debug_info_test.php`, `doctype_test.php`, `script_test.php`, `server_side_script_test.php`, `style_test.php` and `dom_manipulation_test.php`.
|
22 |
-
- Added and extended unit tests for `cdata_test.php` and `comment_test.php`.
|
23 |
-
- Added a new `Debug` class to inform users about deprecated functions, malformed documents and parsing issues.
|
24 |
-
- Added full support for `script` element parsing.
|
25 |
-
### Changed
|
26 |
-
- Renamed unit test `simple_html_dom_test.php` to `htmldocument_test.php`.
|
27 |
-
- Renamed unit test `simple_html_dom_node_test.php` to `htmlnode_test.php`.
|
28 |
-
- Changed the implementation of destructors for better garbage collection.
|
29 |
-
- Changed how literal elements (`script`, `style`, `cdata`, "comment" and `code`) are handled by `HtmlDocument`.
|
30 |
-
### Deprecated
|
31 |
-
- `HtmlDocument::clear()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `unset()` instead.
|
32 |
-
- `HtmlDocument::load_file()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlDocument::loadFile()` instead.
|
33 |
-
- `HtmlNode::children()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::childNodes()` instead.
|
34 |
-
- `HtmlNode::first_child()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::firstChild()` instead.
|
35 |
-
- `HtmlNode::has_child()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::hasChild()` instead.
|
36 |
-
- `HtmlNode::last_child()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::lastChild()` instead.
|
37 |
-
- `HtmlNode::next_sibling()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::nextSibling()` instead.
|
38 |
-
- `HtmlNode::prev_sibling()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::previousSibling()` instead.
|
39 |
-
- Support for Smarty scripts has been deprecated and will be removed in the next major version of simplehtmldom.
|
40 |
-
- Support for server-side scripts has been deprecated and will be removed in the next major version of simplehtmldom.
|
41 |
-
### Removed
|
42 |
-
- Removed the `testcase/` folder as all tests are covered by unit tests inside `tests/`.
|
43 |
-
### Fixed
|
44 |
-
- Fixed a bug with boolean attributes that were incorrectly represented with a value of "1" when saving the DOM.
|
45 |
-
- Fixed a bug with comment and CDATA parsing that could cause an infinite loop if any of these elements contained `script`, `style`, `code`, server-side php or Smarty tags.
|
46 |
-
- Fixed a bug with comment and CDATA parsing that resulted in whitespace and newlines being removed when loading a document with `$stripRN = true` (default setting).
|
47 |
-
- Fixed a bug with attribute values that resulted in incorrectly encoded content when using `outertext()`, `innertext()` or `save()`.
|
48 |
-
- Fixed a bug with charset encoding that resulted in partially encoded documents depending on the use of `outertext()` and `innertext()` [#178](https://sourceforge.net/p/simplehtmldom/bugs/178/)
|
49 |
-
- Fixed multiple bugs related to DOM manipulation when using `HtmlDocument::createElement()`, `HtmlDocument::createTextNode()` and `HtmlNode::appendChild()`.
|
50 |
-
|
51 |
-
## [2.0-RC1] - 2019-10-20
|
52 |
-
|
53 |
-
**Important**: This is a release **candidate**, which means some features might not yet be stable or emit unexpected behavior. Please don't hesitate to report broken or unstable features.
|
54 |
-
|
55 |
-
### Added
|
56 |
-
- Added unit tests
|
57 |
-
- Added tests for whitespace handling.
|
58 |
-
- Added tests for entity decoding.
|
59 |
-
- Added tests for node functions after calling remove().
|
60 |
-
- Added tests for `maxLen` in `file_get_html`.
|
61 |
-
- Added tests for `simple_html_dom_node`.
|
62 |
-
- Added tests for `HtmlWeb`.
|
63 |
-
- Added test for bug [#172](https://sourceforge.net/p/simplehtmldom/bugs/172/)
|
64 |
-
- Added optional argument `$trim = true` to `$node->text()`
|
65 |
-
- Added attribute value normalization
|
66 |
-
- https://www.w3.org/TR/html/syntax.html#attribute-values
|
67 |
-
- https://www.w3.org/TR/xml/#AVNormalize
|
68 |
-
- Added automatic HTML entity decoding when loading documents [feature:#52]
|
69 |
-
- Added [the negation pseudo-class](https://www.w3.org/TR/selectors-3/#negation)
|
70 |
-
- Added `simple_html_dom::expect()`.
|
71 |
-
- Added `simple_html_dom_node::expect()`.
|
72 |
-
- Added the ability to parse CDATA sections.
|
73 |
-
- Added `HtmlWeb` to directly load webpages via cURL or fopen as DOM.
|
74 |
-
- Added `HtmlDocument`, `HtmlNode`, `HtmlWeb` and `constants` to namespace `simplehtmldom`.
|
75 |
-
- Added a new element type `HDOM_TYPE_CDATA` for CDATA sections.
|
76 |
-
- Added full support for parsing comments and CDATA sections.
|
77 |
-
### Changed
|
78 |
-
- `simple_html_dom::doc` is now unset after loading the DOM.
|
79 |
-
- `simple_html_dom::restore_noise()` now clears restored elements.
|
80 |
-
- `simple_html_dom_node::_[HDOM_INFO_ENDSPACE]` now only exists if needed.
|
81 |
-
- `simple_html_dom_node::_[HDOM_INFO_SPACE]`
|
82 |
-
- Now stores elements by attribute names.
|
83 |
-
- Now only exists if needed (defaults to `array(' ', '', '')`).
|
84 |
-
- `simple_html_dom_node::_[HDOM_INFO_QUOTE]`
|
85 |
-
- Now stores elements by attribute names.
|
86 |
-
- Now only exists if needed (defaults to `HDOM_QUOTE_DOUBLE`).
|
87 |
-
- `simple_html_dom_node::text()` now supports all block and inline level elements.
|
88 |
-
- `simple_html_dom_node::text()` now skips empty block elements.
|
89 |
-
- `simple_html_dom_node::text()` now properly handles ` ` characters.
|
90 |
-
- `simple_html_dom_node::removeChild()` now removes all types of childs.
|
91 |
-
- Increased `MAX_FILE_SIZE` from 0.6 MB (600000 Bytes) to 2.5 MiB (2621440 Bytes)
|
92 |
-
- `HDOM_INFO_INNER` (innertext) is now stored as part of the owning element.
|
93 |
-
- Moved and renamed `simple_html_dom` to `HtmlDocument`.
|
94 |
-
- Moved and renamed `simple_html_dom_node` to `HtmlNode`.
|
95 |
-
- Moved constants to `constants.php`
|
96 |
-
- Moved `HDOM_TYPE_*`, `HDOM_INFO_*` and `HDOM_QUOTE_*` constants into `HtmlNode`.
|
97 |
-
### Removed
|
98 |
-
- Removed `/example/scraping/example_scraping_general.php`.
|
99 |
-
- Removed `/example/simple_html_dom_utility.php`.
|
100 |
-
- Removed `/app`.
|
101 |
-
- Removed `/testcase/reader`.
|
102 |
-
- Removed `simple_html_dom_node::tag_start`.
|
103 |
-
### Fixed
|
104 |
-
- Fixed fatal error when removing nodes from the DOM (#172)
|
105 |
-
- Fixed `simple_html_dom::parse()` to work after removing elements from the DOM.
|
106 |
-
- Fixed `simple_html_dom_node::text()` to properly handle UTF-8 characters.
|
107 |
-
- Fixed all scripts in the example folder.
|
108 |
-
- Fixed `file_get_html` to return false if the file size is larger than `maxLen`.
|
109 |
-
- Fixed a bug that caused the parser to convert UTF-8 to UTF-8 on mistake.
|
110 |
-
- Fixed `simple_html_dom::loadFile` to properly forward arguments to `simple_html_dom::load_file`.
|
111 |
-
- Fixed handling of optional closing tags to end on the last element.
|
112 |
-
- Fixed broken support for `text` nodes when using `find` (#175).
|
113 |
-
|
114 |
-
## [1.9] - 2019-05-30
|
115 |
-
### Added
|
116 |
-
- Added unit test for bug reports
|
117 |
-
- Added test for bug [#153](https://sourceforge.net/p/simplehtmldom/bugs/153/)
|
118 |
-
- Added test for bug [#163](https://sourceforge.net/p/simplehtmldom/bugs/163/)
|
119 |
-
- Added test for bug [#166](https://sourceforge.net/p/simplehtmldom/bugs/166/)
|
120 |
-
- Added test for bug [#169](https://sourceforge.net/p/simplehtmldom/bugs/169/)
|
121 |
-
- Added unit test for character sets UTF-8, CP1251 and CP1252 (#142)
|
122 |
-
- Added support for meta charset to parse_charset
|
123 |
-
- Added detection for CP1251 to parse_charset, using iconv
|
124 |
-
- Added LICENSE file (MIT) to the project root
|
125 |
-
- Added functions to `simple_html_dom_node`
|
126 |
-
- `remove`: Removes the current node recursively from the DOM tree
|
127 |
-
- `removeChild`: Removes a child node recursively from the DOM tree
|
128 |
-
- `hasClass`: Checks if the current node has the specified class name
|
129 |
-
- `addClass`: Adds one or more classes to the current node
|
130 |
-
- `removeClass`: Removes one or more classes from the current node
|
131 |
-
- `save`: Saves the current node to disk
|
132 |
-
### Changed
|
133 |
-
- Changed manual from custom implementation to MkDocs (https://www.mkdocs.org/)
|
134 |
-
### Fixed
|
135 |
-
- Fixed warning when trying to clear() the DOM on a null nodes list (#153)
|
136 |
-
- Fixed missing whitespace when returning plaintext (#163)
|
137 |
-
- Fixed broken detection of duplicate attributes (#166)
|
138 |
-
- Fixed broken detection of CP1252 (ISO-8859-1) documents (#142)
|
139 |
-
- Fixed error using next-sibling combinator ('E + F') on last child
|
140 |
-
- Fixed selector parsing for attribute selectors ending on "s" or "i" (#169)
|
141 |
-
|
142 |
-
## [1.8.1] - 2019-01-13
|
143 |
-
### Fixed
|
144 |
-
- Fixed various bugs related to parsing classes and ids
|
145 |
-
|
146 |
-
## [1.8] - 2019-01-13
|
147 |
-
### Added
|
148 |
-
- Added documentation for `simple_html_dom_node::find`
|
149 |
-
- Added documentation for `simple_html_dom_node::parse_selector`
|
150 |
-
- Added documentation for `simple_html_dom_node::seek`
|
151 |
-
- Added documentation for `simple_html_dom_node::match`
|
152 |
-
- Added unit tests for bug reports
|
153 |
-
- Added test for bug [#62](https://sourceforge.net/p/simplehtmldom/bugs/62/)
|
154 |
-
- Added test for bug [#79](https://sourceforge.net/p/simplehtmldom/bugs/79/)
|
155 |
-
- Added test for bug [#144](https://sourceforge.net/p/simplehtmldom/bugs/144/)
|
156 |
-
- Added unit tests for CSS selectors
|
157 |
-
- Added ability to define constants before simple_html_dom does
|
158 |
-
- 'DEFAULT_TARGET_CHARSET'
|
159 |
-
- 'DEFAULT_BR_TEXT'
|
160 |
-
- 'DEFAULT_SPAN_TEXT'
|
161 |
-
- 'MAX_FILE_SIZE'
|
162 |
-
- Added support for CSS combinators
|
163 |
-
- Added support for Child Combinator (`>`)
|
164 |
-
- Added support for Next Sibling Combinator (`+`)
|
165 |
-
- Added support for Subsequent Sibling Combinator (`~`)
|
166 |
-
- Added support for multiclass selectors (`.class.class.class`)
|
167 |
-
- Added support for multiattribute selectors (`[attr1][attr2][attribute3]`)
|
168 |
-
- Added support for attribute selectors
|
169 |
-
- Added support for pipe selectors (`|=`)
|
170 |
-
- Added support for tilde selectors (`~=`)
|
171 |
-
- Added support for case sensitivity selectors (`i` and `s`)
|
172 |
-
- Added unit tests for PHP compatibility to PHP 5.6+
|
173 |
-
- Added coding standard using PHP_CodeSniffer
|
174 |
-
### Changed
|
175 |
-
- Removed automatic filtering of 'tbody' selectors (#79)
|
176 |
-
> Remove 'tbody' from all selectors to maintain the previous state!
|
177 |
-
- Coding standard using PHP_CodeSniffer
|
178 |
-
### Fixed
|
179 |
-
- Fixed broken CSS selector attributes with value "0" (#62)
|
180 |
-
- Fixed broken simple_html_dom::load_file
|
181 |
-
- Fixed forward slashes in CSS selector breaks value matching using '*=' (#144)
|
182 |
-
- Fixed Universal Selectors
|
183 |
-
|
184 |
-
## [1.7] - 2018-12-10
|
185 |
-
### Added
|
186 |
-
- Added code documentation to improve readability
|
187 |
-
- Added unit tests for `simple_html_dom::$self_closing_tags`
|
188 |
-
- Added unit tests for `simple_html_dom::$optional_closing_tags`
|
189 |
-
- Added unit tests for bug reports
|
190 |
-
- Added test for bug [#56](https://sourceforge.net/p/simplehtmldom/bugs/56/)
|
191 |
-
- Added test for bug [#97](https://sourceforge.net/p/simplehtmldom/bugs/97/)
|
192 |
-
- Added test for bug [#116](https://sourceforge.net/p/simplehtmldom/bugs/116/)
|
193 |
-
- Added test for bug [#121](https://sourceforge.net/p/simplehtmldom/bugs/127/)
|
194 |
-
- Added test for bug [#127](https://sourceforge.net/p/simplehtmldom/bugs/127/)
|
195 |
-
- Added test for bug [#154](https://sourceforge.net/p/simplehtmldom/bugs/154/)
|
196 |
-
- Added test for bug [#160](https://sourceforge.net/p/simplehtmldom/bugs/160/)
|
197 |
-
- Added unit tests for memory management of the parser
|
198 |
-
- Added bit flags to `simple_html_dom::load()`
|
199 |
-
- Added bit flag `HDOM_SMARTY_AS_TEXT` to optionally filter Smarty scripts (#154)\
|
200 |
-
**Note**: Smarty scripts are no longer filtered by default!\
|
201 |
-
- Added build script to automate releases
|
202 |
-
- Added support for attributes without whitespace to separate them
|
203 |
-
### Changed
|
204 |
-
- Improved documentation and readability for `$self_closing_tags`
|
205 |
-
- Improved documentation and readability for `$block_tags`
|
206 |
-
- Improved documentation and readability for `$optional_closing_tags`
|
207 |
-
- Updated list of `simple_html_dom::$self_closing_tags`
|
208 |
-
- Removed 'spacer' (obsolete)
|
209 |
-
- Added 'area'
|
210 |
-
- Added 'col'
|
211 |
-
- Added 'meta'
|
212 |
-
- Added 'param'
|
213 |
-
- Added 'source'
|
214 |
-
- Added 'track'
|
215 |
-
- Added 'wbr'
|
216 |
-
- Updated list of `simple_html_dom::$optional_closing_tags`
|
217 |
-
- Removed "nobr" (obsolete)
|
218 |
-
- Added 'th' as closable element to 'td'
|
219 |
-
- Added 'td' as closable element to 'th'
|
220 |
-
- Added 'optgroup' with 'optgroup' and 'option' as closable elements
|
221 |
-
- Added 'optgroup' as closable element to 'option'
|
222 |
-
- Added 'rp' with 'rp' and 'rt' as closable elements
|
223 |
-
- Added 'rt' with 'rt' and 'rp' as closable elements
|
224 |
-
- Clarified meaning of `simple_html_dom->parent`
|
225 |
-
- Changed default `$offset` for `file_get_html()` from -1 to 0 (#161)
|
226 |
-
- Changed `simple_html_dom::load()` to remove script tags before replacing newline characters
|
227 |
-
- `simple_html_dom_node::text()` no longer adds whitespace to top level span elements (only to sub-elements)
|
228 |
-
- `simple_html_dom_node::text()` adds blank lines between paragraphs
|
229 |
-
- Normalized line endings in the repository to LF via `.gitattributes`
|
230 |
-
- Improved performance of `simple_html_dom::parse_charset()` by approximately 25%
|
231 |
-
- Improved performance of `simple_html_dom::parse()` by approximately 10%
|
232 |
-
### Deprecated
|
233 |
-
- `str_get_html()` is deprecated and should be replaced by `new simple_html_dom()`
|
234 |
-
### Removed
|
235 |
-
- Removed protected function `simple_html_dom::copy_until_char_escaped()`
|
236 |
-
### Fixed
|
237 |
-
- Fixed compatibility issues with PHP 7.3
|
238 |
-
- Fixed typo (#147)
|
239 |
-
- Fixed handling of incorrectly escaped text (#160)
|
240 |
-
- Restore functionality of `$maxLen` in `file_get_html()`
|
241 |
-
- Fixed load_file breaks if an error ocurred in another script
|
242 |
-
|
243 |
-
## [1.6] - 2014-05-28
|
244 |
-
### Added
|
245 |
-
- Added some ability to insert and create nodes
|
246 |
-
- Add ability to search the "noise" array
|
247 |
-
|
248 |
-
## [1.5] - 2012-09-10
|
249 |
-
### Added
|
250 |
-
- Added flag: LOCK_EX while calling "file_put_contents()"
|
251 |
-
- Added support for detecting the source html character set. This is used to convert characters when plaintext is requested.
|
252 |
-
- Other little fixes and features, too numerous to categorize
|
253 |
-
### Changed
|
254 |
-
- Error of "file_get_contents()" will be thrown as an exception
|
255 |
-
### Fixed
|
256 |
-
- Fixed the typo of "token_blank_t"
|
257 |
-
- Memory leak fixed
|
258 |
-
|
259 |
-
## [1.11] - 2008-12-14
|
260 |
-
### Added
|
261 |
-
- Supports xpath generated from Firebug
|
262 |
-
- New method "dump" of "simple_html_dom_node"
|
263 |
-
- New attribute "xmltext" of "simple_html_dom_node"
|
264 |
-
### Changed
|
265 |
-
- Remove preg_quote on selector match function: `[attribute*=value]`
|
266 |
-
- Element "Comment" will treat as children
|
267 |
-
### Fixed
|
268 |
-
- Fixed the problem with `<pre>`
|
269 |
-
- Fixed bug #2207477 (does not load some pages properly)
|
270 |
-
- Fixed bug #2315853 (Error with character after < sign)
|
271 |
-
|
272 |
-
## [1.10] - 2008-10-25
|
273 |
-
### Changed
|
274 |
-
- Negative indexes supports of "find" method, thanks for Vadim Voituk
|
275 |
-
- Constructor with automatically load contents either text or file/url, thanks for Antcs
|
276 |
-
- Fully supports wildcard in selectors
|
277 |
-
### Fixed
|
278 |
-
- Fixed bug of confusing by the < symbol inside the text
|
279 |
-
- Fixed bug of dash in selectors
|
280 |
-
- Fixed bug of `<nobr>`
|
281 |
-
- Fixed bug #2155883 (Nested List Parses Incorrectly)
|
282 |
-
- Fixed bug #2155113 (error with unclosed html tags)
|
283 |
-
|
284 |
-
## [1.00] - 2008-09-05
|
285 |
-
### Added
|
286 |
-
- New method "getAllAttributes" of "simple_html_dom_node"
|
287 |
-
- Supports full javascript string in selector: `$e->find("a[onclick=alert('hello')]")`
|
288 |
-
### Changed
|
289 |
-
- Changed selector "*=" to case-insentive
|
290 |
-
### Fixed
|
291 |
-
- Fixed the bug of selector in some critical conditions
|
292 |
-
- Fixed the bug of striping php tags
|
293 |
-
- Fixed the bug of remove_noise()
|
294 |
-
- Fixed the bug of noise in attributes
|
295 |
-
|
296 |
-
## [0.99] - 2008-08-03
|
297 |
-
### Changed
|
298 |
-
- Performance tuning (boost 10%)
|
299 |
-
- Memory requirement reduced by 25%
|
300 |
-
- Changed function name from "file_get_dom()" to "file_get_html()"
|
301 |
-
- Changed function name from "str_get_dom()" to "str_get_html()"
|
302 |
-
### Fixed
|
303 |
-
- Fixed bug #2011286 (Error with unclosed html tags)
|
304 |
-
- Fixed bug #2012551 (Error parsing divs)
|
305 |
-
- Fixed bug #2020924 (Error for missed tag)
|
306 |
-
- Fixed bug (problem with `<body>` tag's innertext)
|
307 |
-
|
308 |
-
## [0.98] - 2008-06-24
|
309 |
-
### Added
|
310 |
-
- Supports "multiple class" selector feature: `<div class="a b c"></div>`
|
311 |
-
- New "callback function" feature
|
312 |
-
- New "multiple selectors" feature: $dom->find('p,a,b')
|
313 |
-
- New examples
|
314 |
-
- Supports extract contents from HTML features: $dom->plaintext
|
315 |
-
### Changed
|
316 |
-
- Performance tuning (boost 20%)
|
317 |
-
- Changed simple_html_dom_node method name from "text()" to "makeup()"
|
318 |
-
### Fixed
|
319 |
-
- Fixed the bug of $dom->clear()
|
320 |
-
- Fixed the bug of text nodes' innertext
|
321 |
-
- Fixed the bug of comment nodes' innertext
|
322 |
-
- Fixed the bug of decendent selector with optional tags
|
323 |
-
|
324 |
-
## [0.97] - 2008-05-09
|
325 |
-
### Added
|
326 |
-
- New node type "comment" (eg. $dom->find('comment'))
|
327 |
-
- Add self-closing tags: 'base', 'spacer'
|
328 |
-
- New example "simple_html_dom_utility.php"
|
329 |
-
### Changed
|
330 |
-
- File and class name changed (html_dom_parser->simple_html_dom)
|
331 |
-
### Removed
|
332 |
-
- ($dom->save_file) will not support anymore
|
333 |
-
- Remove example "example_customize_parser.php"
|
334 |
-
### Fixed
|
335 |
-
- Fixed the bug of outertext (th)
|
336 |
-
- Fixed the bug of regular expression escaping chars ($dom->find)
|
337 |
-
- Fixed the bug while line-breaker and "\t" in tags
|
338 |
-
|
339 |
-
## [0.96] - 2008-04-27
|
340 |
-
### Added
|
341 |
-
- Reference section in manual
|
342 |
-
- Added traverse section in manual
|
343 |
-
- Added the solution while server behind proxy in FAQ (Thanks to Yousuke Shaggy)
|
344 |
-
- New method to remove attribute.
|
345 |
-
- New DOM operations(first_child, last_child, next_sibling, previous_sibling) (Request #1936000)
|
346 |
-
### Changed
|
347 |
-
- Now file_get_dom supports full file_get_contents parameters
|
348 |
-
### Fixed
|
349 |
-
- Fixed the bug of self-closing tags in the end of file
|
350 |
-
- Fixed the bug of blanks in the end of tag
|
351 |
-
- Fixed some typo of testcase
|
352 |
-
|
353 |
-
## [0.95] - 2008-04-13
|
354 |
-
### Added
|
355 |
-
- Supports tag name with namespace
|
356 |
-
### Changed
|
357 |
-
- New attribute filters (Thanks to Yousuke Kumakura)
|
358 |
-
- Refine structure of testcase
|
359 |
-
### Fixed
|
360 |
-
- Fix the bug of optional-closing tags
|
361 |
-
- Fix the bug of parsing the line break next to the tag's name
|
362 |
-
|
363 |
-
## [0.94] - 2008-04-06
|
364 |
-
### Added
|
365 |
-
- Add FAQ section in manual
|
366 |
-
### Fixed
|
367 |
-
- Fixed infinity loop while the source content is BAD HTML
|
368 |
-
- Fixed the bug of adding new attributes to self closing tags
|
369 |
- Fixed the bug of customize parser without $dom->remove_noise()
|
1 |
+
# Changelog
|
2 |
+
All notable changes to this project will be documented in this file.
|
3 |
+
|
4 |
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
5 |
+
|
6 |
+
|
7 |
+
## [Unreleased]
|
8 |
+
### Changed
|
9 |
+
- Comments that start with `>` or `->` are now considered malformed in accordance with [section 12.1.6](https://html.spec.whatwg.org/multipage/syntax.html#comments) of the [HTML specification](https://html.spec.whatwg.org/multipage/). Comments may still contain the strings `<!--` or `--!>` and they may still end with `<!-` contrary to the specification.
|
10 |
+
|
11 |
+
## [2.0-RC2] - 2019-11-09
|
12 |
+
|
13 |
+
**Important**: This is a release **candidate**, which means some features might not yet be stable or emit unexpected behavior. Please don't hesitate to report broken or unstable features.
|
14 |
+
|
15 |
+
### Added
|
16 |
+
- Added a `README` file.
|
17 |
+
- Added a `composer` file.
|
18 |
+
- Added `.travis.yml` for automated unit tests with `Travis-CI`.
|
19 |
+
- Added the magic method `__debugInfo` to `HtmlDocument` and `HtmlNode` in order to reduce the memory footprint and to prevent recursion errors when using `print_r` and `var_dump`.
|
20 |
+
- Added the magic method `__call` to `HtmlDocument` and `HtmlNode` as a wrapper for deprecated methods using the lowercase calling convention (see below).
|
21 |
+
- Added unit tests `attribute_test.php`, `callback_test.php`, `debug_info_test.php`, `doctype_test.php`, `script_test.php`, `server_side_script_test.php`, `style_test.php` and `dom_manipulation_test.php`.
|
22 |
+
- Added and extended unit tests for `cdata_test.php` and `comment_test.php`.
|
23 |
+
- Added a new `Debug` class to inform users about deprecated functions, malformed documents and parsing issues.
|
24 |
+
- Added full support for `script` element parsing.
|
25 |
+
### Changed
|
26 |
+
- Renamed unit test `simple_html_dom_test.php` to `htmldocument_test.php`.
|
27 |
+
- Renamed unit test `simple_html_dom_node_test.php` to `htmlnode_test.php`.
|
28 |
+
- Changed the implementation of destructors for better garbage collection.
|
29 |
+
- Changed how literal elements (`script`, `style`, `cdata`, "comment" and `code`) are handled by `HtmlDocument`.
|
30 |
+
### Deprecated
|
31 |
+
- `HtmlDocument::clear()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `unset()` instead.
|
32 |
+
- `HtmlDocument::load_file()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlDocument::loadFile()` instead.
|
33 |
+
- `HtmlNode::children()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::childNodes()` instead.
|
34 |
+
- `HtmlNode::first_child()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::firstChild()` instead.
|
35 |
+
- `HtmlNode::has_child()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::hasChild()` instead.
|
36 |
+
- `HtmlNode::last_child()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::lastChild()` instead.
|
37 |
+
- `HtmlNode::next_sibling()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::nextSibling()` instead.
|
38 |
+
- `HtmlNode::prev_sibling()` has been deprecated and will be removed in the next major version of simplehtmldom. Use `HtmlNode::previousSibling()` instead.
|
39 |
+
- Support for Smarty scripts has been deprecated and will be removed in the next major version of simplehtmldom.
|
40 |
+
- Support for server-side scripts has been deprecated and will be removed in the next major version of simplehtmldom.
|
41 |
+
### Removed
|
42 |
+
- Removed the `testcase/` folder as all tests are covered by unit tests inside `tests/`.
|
43 |
+
### Fixed
|
44 |
+
- Fixed a bug with boolean attributes that were incorrectly represented with a value of "1" when saving the DOM.
|
45 |
+
- Fixed a bug with comment and CDATA parsing that could cause an infinite loop if any of these elements contained `script`, `style`, `code`, server-side php or Smarty tags.
|
46 |
+
- Fixed a bug with comment and CDATA parsing that resulted in whitespace and newlines being removed when loading a document with `$stripRN = true` (default setting).
|
47 |
+
- Fixed a bug with attribute values that resulted in incorrectly encoded content when using `outertext()`, `innertext()` or `save()`.
|
48 |
+
- Fixed a bug with charset encoding that resulted in partially encoded documents depending on the use of `outertext()` and `innertext()` [#178](https://sourceforge.net/p/simplehtmldom/bugs/178/)
|
49 |
+
- Fixed multiple bugs related to DOM manipulation when using `HtmlDocument::createElement()`, `HtmlDocument::createTextNode()` and `HtmlNode::appendChild()`.
|
50 |
+
|
51 |
+
## [2.0-RC1] - 2019-10-20
|
52 |
+
|
53 |
+
**Important**: This is a release **candidate**, which means some features might not yet be stable or emit unexpected behavior. Please don't hesitate to report broken or unstable features.
|
54 |
+
|
55 |
+
### Added
|
56 |
+
- Added unit tests
|
57 |
+
- Added tests for whitespace handling.
|
58 |
+
- Added tests for entity decoding.
|
59 |
+
- Added tests for node functions after calling remove().
|
60 |
+
- Added tests for `maxLen` in `file_get_html`.
|
61 |
+
- Added tests for `simple_html_dom_node`.
|
62 |
+
- Added tests for `HtmlWeb`.
|
63 |
+
- Added test for bug [#172](https://sourceforge.net/p/simplehtmldom/bugs/172/)
|
64 |
+
- Added optional argument `$trim = true` to `$node->text()`
|
65 |
+
- Added attribute value normalization
|
66 |
+
- https://www.w3.org/TR/html/syntax.html#attribute-values
|
67 |
+
- https://www.w3.org/TR/xml/#AVNormalize
|
68 |
+
- Added automatic HTML entity decoding when loading documents [feature:#52]
|
69 |
+
- Added [the negation pseudo-class](https://www.w3.org/TR/selectors-3/#negation)
|
70 |
+
- Added `simple_html_dom::expect()`.
|
71 |
+
- Added `simple_html_dom_node::expect()`.
|
72 |
+
- Added the ability to parse CDATA sections.
|
73 |
+
- Added `HtmlWeb` to directly load webpages via cURL or fopen as DOM.
|
74 |
+
- Added `HtmlDocument`, `HtmlNode`, `HtmlWeb` and `constants` to namespace `simplehtmldom`.
|
75 |
+
- Added a new element type `HDOM_TYPE_CDATA` for CDATA sections.
|
76 |
+
- Added full support for parsing comments and CDATA sections.
|
77 |
+
### Changed
|
78 |
+
- `simple_html_dom::doc` is now unset after loading the DOM.
|
79 |
+
- `simple_html_dom::restore_noise()` now clears restored elements.
|
80 |
+
- `simple_html_dom_node::_[HDOM_INFO_ENDSPACE]` now only exists if needed.
|
81 |
+
- `simple_html_dom_node::_[HDOM_INFO_SPACE]`
|
82 |
+
- Now stores elements by attribute names.
|
83 |
+
- Now only exists if needed (defaults to `array(' ', '', '')`).
|
84 |
+
- `simple_html_dom_node::_[HDOM_INFO_QUOTE]`
|
85 |
+
- Now stores elements by attribute names.
|
86 |
+
- Now only exists if needed (defaults to `HDOM_QUOTE_DOUBLE`).
|
87 |
+
- `simple_html_dom_node::text()` now supports all block and inline level elements.
|
88 |
+
- `simple_html_dom_node::text()` now skips empty block elements.
|
89 |
+
- `simple_html_dom_node::text()` now properly handles ` ` characters.
|
90 |
+
- `simple_html_dom_node::removeChild()` now removes all types of childs.
|
91 |
+
- Increased `MAX_FILE_SIZE` from 0.6 MB (600000 Bytes) to 2.5 MiB (2621440 Bytes)
|
92 |
+
- `HDOM_INFO_INNER` (innertext) is now stored as part of the owning element.
|
93 |
+
- Moved and renamed `simple_html_dom` to `HtmlDocument`.
|
94 |
+
- Moved and renamed `simple_html_dom_node` to `HtmlNode`.
|
95 |
+
- Moved constants to `constants.php`
|
96 |
+
- Moved `HDOM_TYPE_*`, `HDOM_INFO_*` and `HDOM_QUOTE_*` constants into `HtmlNode`.
|
97 |
+
### Removed
|
98 |
+
- Removed `/example/scraping/example_scraping_general.php`.
|
99 |
+
- Removed `/example/simple_html_dom_utility.php`.
|
100 |
+
- Removed `/app`.
|
101 |
+
- Removed `/testcase/reader`.
|
102 |
+
- Removed `simple_html_dom_node::tag_start`.
|
103 |
+
### Fixed
|
104 |
+
- Fixed fatal error when removing nodes from the DOM (#172)
|
105 |
+
- Fixed `simple_html_dom::parse()` to work after removing elements from the DOM.
|
106 |
+
- Fixed `simple_html_dom_node::text()` to properly handle UTF-8 characters.
|
107 |
+
- Fixed all scripts in the example folder.
|
108 |
+
- Fixed `file_get_html` to return false if the file size is larger than `maxLen`.
|
109 |
+
- Fixed a bug that caused the parser to convert UTF-8 to UTF-8 on mistake.
|
110 |
+
- Fixed `simple_html_dom::loadFile` to properly forward arguments to `simple_html_dom::load_file`.
|
111 |
+
- Fixed handling of optional closing tags to end on the last element.
|
112 |
+
- Fixed broken support for `text` nodes when using `find` (#175).
|
113 |
+
|
114 |
+
## [1.9] - 2019-05-30
|
115 |
+
### Added
|
116 |
+
- Added unit test for bug reports
|
117 |
+
- Added test for bug [#153](https://sourceforge.net/p/simplehtmldom/bugs/153/)
|
118 |
+
- Added test for bug [#163](https://sourceforge.net/p/simplehtmldom/bugs/163/)
|
119 |
+
- Added test for bug [#166](https://sourceforge.net/p/simplehtmldom/bugs/166/)
|
120 |
+
- Added test for bug [#169](https://sourceforge.net/p/simplehtmldom/bugs/169/)
|
121 |
+
- Added unit test for character sets UTF-8, CP1251 and CP1252 (#142)
|
122 |
+
- Added support for meta charset to parse_charset
|
123 |
+
- Added detection for CP1251 to parse_charset, using iconv
|
124 |
+
- Added LICENSE file (MIT) to the project root
|
125 |
+
- Added functions to `simple_html_dom_node`
|
126 |
+
- `remove`: Removes the current node recursively from the DOM tree
|
127 |
+
- `removeChild`: Removes a child node recursively from the DOM tree
|
128 |
+
- `hasClass`: Checks if the current node has the specified class name
|
129 |
+
- `addClass`: Adds one or more classes to the current node
|
130 |
+
- `removeClass`: Removes one or more classes from the current node
|
131 |
+
- `save`: Saves the current node to disk
|
132 |
+
### Changed
|
133 |
+
- Changed manual from custom implementation to MkDocs (https://www.mkdocs.org/)
|
134 |
+
### Fixed
|
135 |
+
- Fixed warning when trying to clear() the DOM on a null nodes list (#153)
|
136 |
+
- Fixed missing whitespace when returning plaintext (#163)
|
137 |
+
- Fixed broken detection of duplicate attributes (#166)
|
138 |
+
- Fixed broken detection of CP1252 (ISO-8859-1) documents (#142)
|
139 |
+
- Fixed error using next-sibling combinator ('E + F') on last child
|
140 |
+
- Fixed selector parsing for attribute selectors ending on "s" or "i" (#169)
|
141 |
+
|
142 |
+
## [1.8.1] - 2019-01-13
|
143 |
+
### Fixed
|
144 |
+
- Fixed various bugs related to parsing classes and ids
|
145 |
+
|
146 |
+
## [1.8] - 2019-01-13
|
147 |
+
### Added
|
148 |
+
- Added documentation for `simple_html_dom_node::find`
|
149 |
+
- Added documentation for `simple_html_dom_node::parse_selector`
|
150 |
+
- Added documentation for `simple_html_dom_node::seek`
|
151 |
+
- Added documentation for `simple_html_dom_node::match`
|
152 |
+
- Added unit tests for bug reports
|
153 |
+
- Added test for bug [#62](https://sourceforge.net/p/simplehtmldom/bugs/62/)
|
154 |
+
- Added test for bug [#79](https://sourceforge.net/p/simplehtmldom/bugs/79/)
|
155 |
+
- Added test for bug [#144](https://sourceforge.net/p/simplehtmldom/bugs/144/)
|
156 |
+
- Added unit tests for CSS selectors
|
157 |
+
- Added ability to define constants before simple_html_dom does
|
158 |
+
- 'DEFAULT_TARGET_CHARSET'
|
159 |
+
- 'DEFAULT_BR_TEXT'
|
160 |
+
- 'DEFAULT_SPAN_TEXT'
|
161 |
+
- 'MAX_FILE_SIZE'
|
162 |
+
- Added support for CSS combinators
|
163 |
+
- Added support for Child Combinator (`>`)
|
164 |
+
- Added support for Next Sibling Combinator (`+`)
|
165 |
+
- Added support for Subsequent Sibling Combinator (`~`)
|
166 |
+
- Added support for multiclass selectors (`.class.class.class`)
|
167 |
+
- Added support for multiattribute selectors (`[attr1][attr2][attribute3]`)
|
168 |
+
- Added support for attribute selectors
|
169 |
+
- Added support for pipe selectors (`|=`)
|
170 |
+
- Added support for tilde selectors (`~=`)
|
171 |
+
- Added support for case sensitivity selectors (`i` and `s`)
|
172 |
+
- Added unit tests for PHP compatibility to PHP 5.6+
|
173 |
+
- Added coding standard using PHP_CodeSniffer
|
174 |
+
### Changed
|
175 |
+
- Removed automatic filtering of 'tbody' selectors (#79)
|
176 |
+
> Remove 'tbody' from all selectors to maintain the previous state!
|
177 |
+
- Coding standard using PHP_CodeSniffer
|
178 |
+
### Fixed
|
179 |
+
- Fixed broken CSS selector attributes with value "0" (#62)
|
180 |
+
- Fixed broken simple_html_dom::load_file
|
181 |
+
- Fixed forward slashes in CSS selector breaks value matching using '*=' (#144)
|
182 |
+
- Fixed Universal Selectors
|
183 |
+
|
184 |
+
## [1.7] - 2018-12-10
|
185 |
+
### Added
|
186 |
+
- Added code documentation to improve readability
|
187 |
+
- Added unit tests for `simple_html_dom::$self_closing_tags`
|
188 |
+
- Added unit tests for `simple_html_dom::$optional_closing_tags`
|
189 |
+
- Added unit tests for bug reports
|
190 |
+
- Added test for bug [#56](https://sourceforge.net/p/simplehtmldom/bugs/56/)
|
191 |
+
- Added test for bug [#97](https://sourceforge.net/p/simplehtmldom/bugs/97/)
|
192 |
+
- Added test for bug [#116](https://sourceforge.net/p/simplehtmldom/bugs/116/)
|
193 |
+
- Added test for bug [#121](https://sourceforge.net/p/simplehtmldom/bugs/127/)
|
194 |
+
- Added test for bug [#127](https://sourceforge.net/p/simplehtmldom/bugs/127/)
|
195 |
+
- Added test for bug [#154](https://sourceforge.net/p/simplehtmldom/bugs/154/)
|
196 |
+
- Added test for bug [#160](https://sourceforge.net/p/simplehtmldom/bugs/160/)
|
197 |
+
- Added unit tests for memory management of the parser
|
198 |
+
- Added bit flags to `simple_html_dom::load()`
|
199 |
+
- Added bit flag `HDOM_SMARTY_AS_TEXT` to optionally filter Smarty scripts (#154)\
|
200 |
+
**Note**: Smarty scripts are no longer filtered by default!\
|
201 |
+
- Added build script to automate releases
|
202 |
+
- Added support for attributes without whitespace to separate them
|
203 |
+
### Changed
|
204 |
+
- Improved documentation and readability for `$self_closing_tags`
|
205 |
+
- Improved documentation and readability for `$block_tags`
|
206 |
+
- Improved documentation and readability for `$optional_closing_tags`
|
207 |
+
- Updated list of `simple_html_dom::$self_closing_tags`
|
208 |
+
- Removed 'spacer' (obsolete)
|
209 |
+
- Added 'area'
|
210 |
+
- Added 'col'
|
211 |
+
- Added 'meta'
|
212 |
+
- Added 'param'
|
213 |
+
- Added 'source'
|
214 |
+
- Added 'track'
|
215 |
+
- Added 'wbr'
|
216 |
+
- Updated list of `simple_html_dom::$optional_closing_tags`
|
217 |
+
- Removed "nobr" (obsolete)
|
218 |
+
- Added 'th' as closable element to 'td'
|
219 |
+
- Added 'td' as closable element to 'th'
|
220 |
+
- Added 'optgroup' with 'optgroup' and 'option' as closable elements
|
221 |
+
- Added 'optgroup' as closable element to 'option'
|
222 |
+
- Added 'rp' with 'rp' and 'rt' as closable elements
|
223 |
+
- Added 'rt' with 'rt' and 'rp' as closable elements
|
224 |
+
- Clarified meaning of `simple_html_dom->parent`
|
225 |
+
- Changed default `$offset` for `file_get_html()` from -1 to 0 (#161)
|
226 |
+
- Changed `simple_html_dom::load()` to remove script tags before replacing newline characters
|
227 |
+
- `simple_html_dom_node::text()` no longer adds whitespace to top level span elements (only to sub-elements)
|
228 |
+
- `simple_html_dom_node::text()` adds blank lines between paragraphs
|
229 |
+
- Normalized line endings in the repository to LF via `.gitattributes`
|
230 |
+
- Improved performance of `simple_html_dom::parse_charset()` by approximately 25%
|
231 |
+
- Improved performance of `simple_html_dom::parse()` by approximately 10%
|
232 |
+
### Deprecated
|
233 |
+
- `str_get_html()` is deprecated and should be replaced by `new simple_html_dom()`
|
234 |
+
### Removed
|
235 |
+
- Removed protected function `simple_html_dom::copy_until_char_escaped()`
|
236 |
+
### Fixed
|
237 |
+
- Fixed compatibility issues with PHP 7.3
|
238 |
+
- Fixed typo (#147)
|
239 |
+
- Fixed handling of incorrectly escaped text (#160)
|
240 |
+
- Restore functionality of `$maxLen` in `file_get_html()`
|
241 |
+
- Fixed load_file breaks if an error ocurred in another script
|
242 |
+
|
243 |
+
## [1.6] - 2014-05-28
|
244 |
+
### Added
|
245 |
+
- Added some ability to insert and create nodes
|
246 |
+
- Add ability to search the "noise" array
|
247 |
+
|
248 |
+
## [1.5] - 2012-09-10
|
249 |
+
### Added
|
250 |
+
- Added flag: LOCK_EX while calling "file_put_contents()"
|
251 |
+
- Added support for detecting the source html character set. This is used to convert characters when plaintext is requested.
|
252 |
+
- Other little fixes and features, too numerous to categorize
|
253 |
+
### Changed
|
254 |
+
- Error of "file_get_contents()" will be thrown as an exception
|
255 |
+
### Fixed
|
256 |
+
- Fixed the typo of "token_blank_t"
|
257 |
+
- Memory leak fixed
|
258 |
+
|
259 |
+
## [1.11] - 2008-12-14
|
260 |
+
### Added
|
261 |
+
- Supports xpath generated from Firebug
|
262 |
+
- New method "dump" of "simple_html_dom_node"
|
263 |
+
- New attribute "xmltext" of "simple_html_dom_node"
|
264 |
+
### Changed
|
265 |
+
- Remove preg_quote on selector match function: `[attribute*=value]`
|
266 |
+
- Element "Comment" will treat as children
|
267 |
+
### Fixed
|
268 |
+
- Fixed the problem with `<pre>`
|
269 |
+
- Fixed bug #2207477 (does not load some pages properly)
|
270 |
+
- Fixed bug #2315853 (Error with character after < sign)
|
271 |
+
|
272 |
+
## [1.10] - 2008-10-25
|
273 |
+
### Changed
|
274 |
+
- Negative indexes supports of "find" method, thanks for Vadim Voituk
|
275 |
+
- Constructor with automatically load contents either text or file/url, thanks for Antcs
|
276 |
+
- Fully supports wildcard in selectors
|
277 |
+
### Fixed
|
278 |
+
- Fixed bug of confusing by the < symbol inside the text
|
279 |
+
- Fixed bug of dash in selectors
|
280 |
+
- Fixed bug of `<nobr>`
|
281 |
+
- Fixed bug #2155883 (Nested List Parses Incorrectly)
|
282 |
+
- Fixed bug #2155113 (error with unclosed html tags)
|
283 |
+
|
284 |
+
## [1.00] - 2008-09-05
|
285 |
+
### Added
|
286 |
+
- New method "getAllAttributes" of "simple_html_dom_node"
|
287 |
+
- Supports full javascript string in selector: `$e->find("a[onclick=alert('hello')]")`
|
288 |
+
### Changed
|
289 |
+
- Changed selector "*=" to case-insentive
|
290 |
+
### Fixed
|
291 |
+
- Fixed the bug of selector in some critical conditions
|
292 |
+
- Fixed the bug of striping php tags
|
293 |
+
- Fixed the bug of remove_noise()
|
294 |
+
- Fixed the bug of noise in attributes
|
295 |
+
|
296 |
+
## [0.99] - 2008-08-03
|
297 |
+
### Changed
|
298 |
+
- Performance tuning (boost 10%)
|
299 |
+
- Memory requirement reduced by 25%
|
300 |
+
- Changed function name from "file_get_dom()" to "file_get_html()"
|
301 |
+
- Changed function name from "str_get_dom()" to "str_get_html()"
|
302 |
+
### Fixed
|
303 |
+
- Fixed bug #2011286 (Error with unclosed html tags)
|
304 |
+
- Fixed bug #2012551 (Error parsing divs)
|
305 |
+
- Fixed bug #2020924 (Error for missed tag)
|
306 |
+
- Fixed bug (problem with `<body>` tag's innertext)
|
307 |
+
|
308 |
+
## [0.98] - 2008-06-24
|
309 |
+
### Added
|
310 |
+
- Supports "multiple class" selector feature: `<div class="a b c"></div>`
|
311 |
+
- New "callback function" feature
|
312 |
+
- New "multiple selectors" feature: $dom->find('p,a,b')
|
313 |
+
- New examples
|
314 |
+
- Supports extract contents from HTML features: $dom->plaintext
|
315 |
+
### Changed
|
316 |
+
- Performance tuning (boost 20%)
|
317 |
+
- Changed simple_html_dom_node method name from "text()" to "makeup()"
|
318 |
+
### Fixed
|
319 |
+
- Fixed the bug of $dom->clear()
|
320 |
+
- Fixed the bug of text nodes' innertext
|
321 |
+
- Fixed the bug of comment nodes' innertext
|
322 |
+
- Fixed the bug of decendent selector with optional tags
|
323 |
+
|
324 |
+
## [0.97] - 2008-05-09
|
325 |
+
### Added
|
326 |
+
- New node type "comment" (eg. $dom->find('comment'))
|
327 |
+
- Add self-closing tags: 'base', 'spacer'
|
328 |
+
- New example "simple_html_dom_utility.php"
|
329 |
+
### Changed
|
330 |
+
- File and class name changed (html_dom_parser->simple_html_dom)
|
331 |
+
### Removed
|
332 |
+
- ($dom->save_file) will not support anymore
|
333 |
+
- Remove example "example_customize_parser.php"
|
334 |
+
### Fixed
|
335 |
+
- Fixed the bug of outertext (th)
|
336 |
+
- Fixed the bug of regular expression escaping chars ($dom->find)
|
337 |
+
- Fixed the bug while line-breaker and "\t" in tags
|
338 |
+
|
339 |
+
## [0.96] - 2008-04-27
|
340 |
+
### Added
|
341 |
+
- Reference section in manual
|
342 |
+
- Added traverse section in manual
|
343 |
+
- Added the solution while server behind proxy in FAQ (Thanks to Yousuke Shaggy)
|
344 |
+
- New method to remove attribute.
|
345 |
+
- New DOM operations(first_child, last_child, next_sibling, previous_sibling) (Request #1936000)
|
346 |
+
### Changed
|
347 |
+
- Now file_get_dom supports full file_get_contents parameters
|
348 |
+
### Fixed
|
349 |
+
- Fixed the bug of self-closing tags in the end of file
|
350 |
+
- Fixed the bug of blanks in the end of tag
|
351 |
+
- Fixed some typo of testcase
|
352 |
+
|
353 |
+
## [0.95] - 2008-04-13
|
354 |
+
### Added
|
355 |
+
- Supports tag name with namespace
|
356 |
+
### Changed
|
357 |
+
- New attribute filters (Thanks to Yousuke Kumakura)
|
358 |
+
- Refine structure of testcase
|
359 |
+
### Fixed
|
360 |
+
- Fix the bug of optional-closing tags
|
361 |
+
- Fix the bug of parsing the line break next to the tag's name
|
362 |
+
|
363 |
+
## [0.94] - 2008-04-06
|
364 |
+
### Added
|
365 |
+
- Add FAQ section in manual
|
366 |
+
### Fixed
|
367 |
+
- Fixed infinity loop while the source content is BAD HTML
|
368 |
+
- Fixed the bug of adding new attributes to self closing tags
|
369 |
- Fixed the bug of customize parser without $dom->remove_noise()
|
vendor/simplehtmldom/simplehtmldom/Debug.php
CHANGED
@@ -1,149 +1,149 @@
|
|
1 |
-
<?php namespace simplehtmldom;
|
2 |
-
|
3 |
-
/**
|
4 |
-
* Website: http://sourceforge.net/projects/simplehtmldom/
|
5 |
-
* Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
|
6 |
-
*
|
7 |
-
* Licensed under The MIT License
|
8 |
-
* See the LICENSE file in the project root for more information.
|
9 |
-
*
|
10 |
-
* Authors:
|
11 |
-
* S.C. Chen
|
12 |
-
* John Schlick
|
13 |
-
* Rus Carroll
|
14 |
-
* logmanoriginal
|
15 |
-
*
|
16 |
-
* Contributors:
|
17 |
-
* Yousuke Kumakura
|
18 |
-
* Vadim Voituk
|
19 |
-
* Antcs
|
20 |
-
*
|
21 |
-
* Version $Rev$
|
22 |
-
*/
|
23 |
-
|
24 |
-
/**
|
25 |
-
* Implements functions for debugging purposes. Debugging can be enabled and
|
26 |
-
* disabled on demand. Debug messages are send to error_log by default but it
|
27 |
-
* is also possible to register a custom debug handler.
|
28 |
-
*/
|
29 |
-
class Debug {
|
30 |
-
|
31 |
-
private static $enabled = false;
|
32 |
-
private static $debugHandler = null;
|
33 |
-
private static $callerLock = array();
|
34 |
-
|
35 |
-
/**
|
36 |
-
* Checks whether debug mode is enabled.
|
37 |
-
*
|
38 |
-
* @return bool True if debug mode is enabled, false otherwise.
|
39 |
-
*/
|
40 |
-
public static function isEnabled()
|
41 |
-
{
|
42 |
-
return self::$enabled;
|
43 |
-
}
|
44 |
-
|
45 |
-
/**
|
46 |
-
* Enables debug mode
|
47 |
-
*/
|
48 |
-
public static function enable()
|
49 |
-
{
|
50 |
-
self::$enabled = true;
|
51 |
-
self::log('Debug mode has been enabled');
|
52 |
-
}
|
53 |
-
|
54 |
-
/**
|
55 |
-
* Disables debug mode
|
56 |
-
*/
|
57 |
-
public static function disable()
|
58 |
-
{
|
59 |
-
self::log('Debug mode has been disabled');
|
60 |
-
self::$enabled = false;
|
61 |
-
}
|
62 |
-
|
63 |
-
/**
|
64 |
-
* Sets the debug handler.
|
65 |
-
*
|
66 |
-
* `null`: error_log (default)
|
67 |
-
*/
|
68 |
-
public static function setDebugHandler($function = null)
|
69 |
-
{
|
70 |
-
if ($function === self::$debugHandler) return;
|
71 |
-
|
72 |
-
self::log('New debug handler registered');
|
73 |
-
self::$debugHandler = $function;
|
74 |
-
}
|
75 |
-
|
76 |
-
/**
|
77 |
-
* This is the actual log function. It allows to set a custom backtrace to
|
78 |
-
* eliminate traces of this class.
|
79 |
-
*/
|
80 |
-
private static function log_trace($message, $backtrace)
|
81 |
-
{
|
82 |
-
$idx = 0;
|
83 |
-
$debugmessage = '';
|
84 |
-
|
85 |
-
foreach($backtrace as $caller)
|
86 |
-
{
|
87 |
-
if (!isset($caller['file']) && !isset($caller['line'])) {
|
88 |
-
break; // Unknown caller
|
89 |
-
}
|
90 |
-
|
91 |
-
$debugmessage .= ' [' . $caller['file'] . ':' . $caller['line'];
|
92 |
-
|
93 |
-
if ($idx > 1) { // Do not include the call to Debug::log
|
94 |
-
$debugmessage .= ' '
|
95 |
-
. $caller['class']
|
96 |
-
. $caller['type']
|
97 |
-
. $caller['function']
|
98 |
-
. '()';
|
99 |
-
}
|
100 |
-
|
101 |
-
$debugmessage .= ']';
|
102 |
-
|
103 |
-
// Stop at the first caller that isn't part of simplehtmldom
|
104 |
-
if (!isset($caller['class']) || strpos($caller['class'], 'simplehtmldom\\') !== 0) {
|
105 |
-
break;
|
106 |
-
}
|
107 |
-
}
|
108 |
-
|
109 |
-
$output = '[DEBUG] ' . trim($debugmessage) . ' "' . $message . '"';
|
110 |
-
|
111 |
-
if (is_null(self::$debugHandler)) {
|
112 |
-
error_log($output);
|
113 |
-
} else {
|
114 |
-
call_user_func_array(self::$debugHandler, array($output));
|
115 |
-
}
|
116 |
-
}
|
117 |
-
|
118 |
-
/**
|
119 |
-
* Adds a debug message to error_log if debug mode is enabled. Does nothing
|
120 |
-
* if debug mode is disabled.
|
121 |
-
*
|
122 |
-
* @param string $text The message to add to error_log
|
123 |
-
*/
|
124 |
-
public static function log($message)
|
125 |
-
{
|
126 |
-
if (!self::isEnabled()) return;
|
127 |
-
|
128 |
-
$backtrace = debug_backtrace(DEBUG_BACKTRACE_PROVIDE_OBJECT);
|
129 |
-
self::log_trace($message, $backtrace);
|
130 |
-
}
|
131 |
-
|
132 |
-
/**
|
133 |
-
* Adds a debug message to error_log if debug mode is enabled. Does nothing
|
134 |
-
* if debug mode is disabled. Each message is logged only once.
|
135 |
-
*
|
136 |
-
* @param string $text The message to add to error_log
|
137 |
-
*/
|
138 |
-
public static function log_once($message)
|
139 |
-
{
|
140 |
-
if (!self::isEnabled()) return;
|
141 |
-
|
142 |
-
// Keep track of caller (file & line)
|
143 |
-
$backtrace = debug_backtrace(DEBUG_BACKTRACE_PROVIDE_OBJECT);
|
144 |
-
if (in_array($backtrace[0], self::$callerLock, true)) return;
|
145 |
-
|
146 |
-
self::$callerLock[] = $backtrace[0];
|
147 |
-
self::log_trace($message, $backtrace);
|
148 |
-
}
|
149 |
-
}
|
1 |
+
<?php namespace simplehtmldom;
|
2 |
+
|
3 |
+
/**
|
4 |
+
* Website: http://sourceforge.net/projects/simplehtmldom/
|
5 |
+
* Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
|
6 |
+
*
|
7 |
+
* Licensed under The MIT License
|
8 |
+
* See the LICENSE file in the project root for more information.
|
9 |
+
*
|
10 |
+
* Authors:
|
11 |
+
* S.C. Chen
|
12 |
+
* John Schlick
|
13 |
+
* Rus Carroll
|
14 |
+
* logmanoriginal
|
15 |
+
*
|
16 |
+
* Contributors:
|
17 |
+
* Yousuke Kumakura
|
18 |
+
* Vadim Voituk
|
19 |
+
* Antcs
|
20 |
+
*
|
21 |
+
* Version $Rev$
|
22 |
+
*/
|
23 |
+
|
24 |
+
/**
|
25 |
+
* Implements functions for debugging purposes. Debugging can be enabled and
|
26 |
+
* disabled on demand. Debug messages are send to error_log by default but it
|
27 |
+
* is also possible to register a custom debug handler.
|
28 |
+
*/
|
29 |
+
class Debug {
|
30 |
+
|
31 |
+
private static $enabled = false;
|
32 |
+
private static $debugHandler = null;
|
33 |
+
private static $callerLock = array();
|
34 |
+
|
35 |
+
/**
|
36 |
+
* Checks whether debug mode is enabled.
|
37 |
+
*
|
38 |
+
* @return bool True if debug mode is enabled, false otherwise.
|
39 |
+
*/
|
40 |
+
public static function isEnabled()
|
41 |
+
{
|
42 |
+
return self::$enabled;
|
43 |
+
}
|
44 |
+
|
45 |
+
/**
|
46 |
+
* Enables debug mode
|
47 |
+
*/
|
48 |
+
public static function enable()
|
49 |
+
{
|
50 |
+
self::$enabled = true;
|
51 |
+
self::log('Debug mode has been enabled');
|
52 |
+
}
|
53 |
+
|
54 |
+
/**
|
55 |
+
* Disables debug mode
|
56 |
+
*/
|
57 |
+
public static function disable()
|
58 |
+
{
|
59 |
+
self::log('Debug mode has been disabled');
|
60 |
+
self::$enabled = false;
|
61 |
+
}
|
62 |
+
|
63 |
+
/**
|
64 |
+
* Sets the debug handler.
|
65 |
+
*
|
66 |
+
* `null`: error_log (default)
|
67 |
+
*/
|
68 |
+
public static function setDebugHandler($function = null)
|
69 |
+
{
|
70 |
+
if ($function === self::$debugHandler) return;
|
71 |
+
|
72 |
+
self::log('New debug handler registered');
|
73 |
+
self::$debugHandler = $function;
|
74 |
+
}
|
75 |
+
|
76 |
+
/**
|
77 |
+
* This is the actual log function. It allows to set a custom backtrace to
|
78 |
+
* eliminate traces of this class.
|
79 |
+
*/
|
80 |
+
private static function log_trace($message, $backtrace)
|
81 |
+
{
|
82 |
+
$idx = 0;
|
83 |
+
$debugmessage = '';
|
84 |
+
|
85 |
+
foreach($backtrace as $caller)
|
86 |
+
{
|
87 |
+
if (!isset($caller['file']) && !isset($caller['line'])) {
|
88 |
+
break; // Unknown caller
|
89 |
+
}
|
90 |
+
|
91 |
+
$debugmessage .= ' [' . $caller['file'] . ':' . $caller['line'];
|
92 |
+
|
93 |
+
if ($idx > 1) { // Do not include the call to Debug::log
|
94 |
+
$debugmessage .= ' '
|
95 |
+
. $caller['class']
|
96 |
+
. $caller['type']
|
97 |
+
. $caller['function']
|
98 |
+
. '()';
|
99 |
+
}
|
100 |
+
|
101 |
+
$debugmessage .= ']';
|
102 |
+
|
103 |
+
// Stop at the first caller that isn't part of simplehtmldom
|
104 |
+
if (!isset($caller['class']) || strpos($caller['class'], 'simplehtmldom\\') !== 0) {
|
105 |
+
break;
|
106 |
+
}
|
107 |
+
}
|
108 |
+
|
109 |
+
$output = '[DEBUG] ' . trim($debugmessage) . ' "' . $message . '"';
|
110 |
+
|
111 |
+
if (is_null(self::$debugHandler)) {
|
112 |
+
error_log($output);
|
113 |
+
} else {
|
114 |
+
call_user_func_array(self::$debugHandler, array($output));
|
115 |
+
}
|
116 |
+
}
|
117 |
+
|
118 |
+
/**
|
119 |
+
* Adds a debug message to error_log if debug mode is enabled. Does nothing
|
120 |
+
* if debug mode is disabled.
|
121 |
+
*
|
122 |
+
* @param string $text The message to add to error_log
|
123 |
+
*/
|
124 |
+
public static function log($message)
|
125 |
+
{
|
126 |
+
if (!self::isEnabled()) return;
|
127 |
+
|
128 |
+
$backtrace = debug_backtrace(DEBUG_BACKTRACE_PROVIDE_OBJECT);
|
129 |
+
self::log_trace($message, $backtrace);
|
130 |
+
}
|
131 |
+
|
132 |
+
/**
|
133 |
+
* Adds a debug message to error_log if debug mode is enabled. Does nothing
|
134 |
+
* if debug mode is disabled. Each message is logged only once.
|
135 |
+
*
|
136 |
+
* @param string $text The message to add to error_log
|
137 |
+
*/
|
138 |
+
public static function log_once($message)
|
139 |
+
{
|
140 |
+
if (!self::isEnabled()) return;
|
141 |
+
|
142 |
+
// Keep track of caller (file & line)
|
143 |
+
$backtrace = debug_backtrace(DEBUG_BACKTRACE_PROVIDE_OBJECT);
|
144 |
+
if (in_array($backtrace[0], self::$callerLock, true)) return;
|
145 |
+
|
146 |
+
self::$callerLock[] = $backtrace[0];
|
147 |
+
self::log_trace($message, $backtrace);
|
148 |
+
}
|
149 |
+
}
|
vendor/simplehtmldom/simplehtmldom/HtmlDocument.php
CHANGED
@@ -1,1133 +1,1133 @@
|
|
1 |
-
<?php namespace simplehtmldom;
|
2 |
-
|
3 |
-
/**
|
4 |
-
* Website: http://sourceforge.net/projects/simplehtmldom/
|
5 |
-
* Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
|
6 |
-
*
|
7 |
-
* Licensed under The MIT License
|
8 |
-
* See the LICENSE file in the project root for more information.
|
9 |
-
*
|
10 |
-
* Authors:
|
11 |
-
* S.C. Chen
|
12 |
-
* John Schlick
|
13 |
-
* Rus Carroll
|
14 |
-
* logmanoriginal
|
15 |
-
*
|
16 |
-
* Contributors:
|
17 |
-
* Yousuke Kumakura
|
18 |
-
* Vadim Voituk
|
19 |
-
* Antcs
|
20 |
-
*
|
21 |
-
* Version $Rev$
|
22 |
-
*/
|
23 |
-
|
24 |
-
include_once 'constants.php';
|
25 |
-
include_once 'HtmlNode.php';
|
26 |
-
include_once 'Debug.php';
|
27 |
-
|
28 |
-
class HtmlDocument
|
29 |
-
{
|
30 |
-
public $root = null;
|
31 |
-
public $nodes = array();
|
32 |
-
public $callback = null;
|
33 |
-
public $lowercase = false;
|
34 |
-
public $original_size;
|
35 |
-
public $size;
|
36 |
-
|
37 |
-
protected $pos;
|
38 |
-
protected $doc;
|
39 |
-
protected $char;
|
40 |
-
|
41 |
-
protected $cursor;
|
42 |
-
protected $parent;
|
43 |
-
protected $noise = array();
|
44 |
-
protected $token_blank = " \t\r\n";
|
45 |
-
protected $token_equal = ' =/>';
|
46 |
-
protected $token_slash = " />\r\n\t";
|
47 |
-
protected $token_attr = ' >';
|
48 |
-
|
49 |
-
public $_charset = '';
|
50 |
-
public $_target_charset = '';
|
51 |
-
|
52 |
-
public $default_br_text = '';
|
53 |
-
public $default_span_text = '';
|
54 |
-
|
55 |
-
protected $self_closing_tags = array(
|
56 |
-
'area' => 1,
|
57 |
-
'base' => 1,
|
58 |
-
'br' => 1,
|
59 |
-
'col' => 1,
|
60 |
-
'embed' => 1,
|
61 |
-
'hr' => 1,
|
62 |
-
'img' => 1,
|
63 |
-
'input' => 1,
|
64 |
-
'link' => 1,
|
65 |
-
'meta' => 1,
|
66 |
-
'param' => 1,
|
67 |
-
'source' => 1,
|
68 |
-
'track' => 1,
|
69 |
-
'wbr' => 1
|
70 |
-
);
|
71 |
-
protected $block_tags = array(
|
72 |
-
'body' => 1,
|
73 |
-
'div' => 1,
|
74 |
-
'form' => 1,
|
75 |
-
'root' => 1,
|
76 |
-
'span' => 1,
|
77 |
-
'table' => 1
|
78 |
-
);
|
79 |
-
protected $optional_closing_tags = array(
|
80 |
-
// Not optional, see
|
81 |
-
// https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element
|
82 |
-
'b' => array('b' => 1),
|
83 |
-
'dd' => array('dd' => 1, 'dt' => 1),
|
84 |
-
// Not optional, see
|
85 |
-
// https://www.w3.org/TR/html/grouping-content.html#the-dl-element
|
86 |
-
'dl' => array('dd' => 1, 'dt' => 1),
|
87 |
-
'dt' => array('dd' => 1, 'dt' => 1),
|
88 |
-
'li' => array('li' => 1),
|
89 |
-
'optgroup' => array('optgroup' => 1, 'option' => 1),
|
90 |
-
'option' => array('optgroup' => 1, 'option' => 1),
|
91 |
-
'p' => array('p' => 1),
|
92 |
-
'rp' => array('rp' => 1, 'rt' => 1),
|
93 |
-
'rt' => array('rp' => 1, 'rt' => 1),
|
94 |
-
'td' => array('td' => 1, 'th' => 1),
|
95 |
-
'th' => array('td' => 1, 'th' => 1),
|
96 |
-
'tr' => array('td' => 1, 'th' => 1, 'tr' => 1),
|
97 |
-
);
|
98 |
-
|
99 |
-
function __call($func, $args)
|
100 |
-
{
|
101 |
-
// Allow users to call methods with lower_case syntax
|
102 |
-
switch($func)
|
103 |
-
{
|
104 |
-
case 'load_file':
|
105 |
-
$actual_function = 'loadFile'; break;
|
106 |
-
case 'clear': return; /* no-op */
|
107 |
-
default:
|
108 |
-
trigger_error(
|
109 |
-
'Call to undefined method ' . __CLASS__ . '::' . $func . '()',
|
110 |
-
E_USER_ERROR
|
111 |
-
);
|
112 |
-
}
|
113 |
-
|
114 |
-
// phpcs:ignore Generic.Files.LineLength
|
115 |
-
Debug::log(__CLASS__ . '->' . $func . '() has been deprecated and will be removed in the next major version of simplehtmldom. Use ' . __CLASS__ . '->' . $actual_function . '() instead.');
|
116 |
-
|
117 |
-
return call_user_func_array(array($this, $actual_function), $args);
|
118 |
-
}
|
119 |
-
|
120 |
-
function __construct(
|
121 |
-
$str = null,
|
122 |
-
$lowercase = true,
|
123 |
-
$forceTagsClosed = true,
|
124 |
-
$target_charset = DEFAULT_TARGET_CHARSET,
|
125 |
-
$stripRN = true,
|
126 |
-
$defaultBRText = DEFAULT_BR_TEXT,
|
127 |
-
$defaultSpanText = DEFAULT_SPAN_TEXT,
|
128 |
-
$options = 0)
|
129 |
-
{
|
130 |
-
if ($str) {
|
131 |
-
if (preg_match('/^http:\/\//i', $str) || is_file($str)) {
|
132 |
-
$this->load_file($str);
|
133 |
-
} else {
|
134 |
-
$this->load(
|
135 |
-
$str,
|
136 |
-
$lowercase,
|
137 |
-
$stripRN,
|
138 |
-
$defaultBRText,
|
139 |
-
$defaultSpanText,
|
140 |
-
$options
|
141 |
-
);
|
142 |
-
}
|
143 |
-
} else {
|
144 |
-
$this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText);
|
145 |
-
}
|
146 |
-
// Forcing tags to be closed implies that we don't trust the html, but
|
147 |
-
// it can lead to parsing errors if we SHOULD trust the html.
|
148 |
-
if (!$forceTagsClosed) {
|
149 |
-
$this->optional_closing_array = array();
|
150 |
-
}
|
151 |
-
|
152 |
-
$this->_target_charset = $target_charset;
|
153 |
-
}
|
154 |
-
|
155 |
-
function __debugInfo()
|
156 |
-
{
|
157 |
-
return array(
|
158 |
-
'root' => $this->root,
|
159 |
-
'noise' => empty($this->noise) ? 'none' : $this->noise,
|
160 |
-
'charset' => $this->_charset,
|
161 |
-
'target charset' => $this->_target_charset,
|
162 |
-
'original size' => $this->original_size
|
163 |
-
);
|
164 |
-
}
|
165 |
-
|
166 |
-
function __destruct()
|
167 |
-
{
|
168 |
-
if (isset($this->nodes)) {
|
169 |
-
foreach ($this->nodes as $n) {
|
170 |
-
$n->clear();
|
171 |
-
}
|
172 |
-
}
|
173 |
-
}
|
174 |
-
|
175 |
-
function load(
|
176 |
-
$str,
|
177 |
-
$lowercase = true,
|
178 |
-
$stripRN = true,
|
179 |
-
$defaultBRText = DEFAULT_BR_TEXT,
|
180 |
-
$defaultSpanText = DEFAULT_SPAN_TEXT,
|
181 |
-
$options = 0)
|
182 |
-
{
|
183 |
-
// prepare
|
184 |
-
$this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText);
|
185 |
-
|
186 |
-
if ($stripRN) {
|
187 |
-
// Temporarily remove any element that shouldn't loose whitespace
|
188 |
-
$this->remove_noise("'<\s*script[^>]*>(.*?)<\s*/\s*script\s*>'is");
|
189 |
-
$this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is");
|
190 |
-
$this->remove_noise("'<!--(?!>|\->)(.*?)-->'is");
|
191 |
-
$this->remove_noise("'<\s*style[^>]*>(.*?)<\s*/\s*style\s*>'is");
|
192 |
-
$this->remove_noise("'<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>'is");
|
193 |
-
|
194 |
-
// Edited By Speed Booster Pack. Do not minify html!
|
195 |
-
// Remove whitespace and newlines between tags
|
196 |
-
$this->doc = preg_replace('/\>([\t\s]*[\r\n]^[\t\s]*)\</m', '><', $this->doc);
|
197 |
-
|
198 |
-
// Remove whitespace and newlines in text
|
199 |
-
$this->doc = preg_replace('/([\t\s]*[\r\n]^[\t\s]*)/m', ' ', $this->doc);
|
200 |
-
|
201 |
-
// Restore temporarily removed elements and calculate new size
|
202 |
-
$this->doc = $this->restore_noise($this->doc);
|
203 |
-
$this->size = strlen($this->doc);
|
204 |
-
}
|
205 |
-
|
206 |
-
$this->remove_noise("'(<\?)(.*?)(\?>)'s", true); // server-side script
|
207 |
-
if (count($this->noise)) {
|
208 |
-
// phpcs:ignore Generic.Files.LineLength
|
209 |
-
Debug::log('Support for server-side scripts has been deprecated and will be removed in the next major version of simplehtmldom.');
|
210 |
-
}
|
211 |
-
|
212 |
-
if($options & HDOM_SMARTY_AS_TEXT) { // Strip Smarty scripts
|
213 |
-
$this->remove_noise("'(\{\w)(.*?)(\})'s", true);
|
214 |
-
// phpcs:ignore Generic.Files.LineLength
|
215 |
-
Debug::log('Support for Smarty scripts has been deprecated and will be removed in the next major version of simplehtmldom.');
|
216 |
-
}
|
217 |
-
|
218 |
-
// parsing
|
219 |
-
$this->parse($stripRN);
|
220 |
-
// end
|
221 |
-
$this->root->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
|
222 |
-
$this->parse_charset();
|
223 |
-
$this->decode();
|
224 |
-
unset($this->doc);
|
225 |
-
|
226 |
-
// make load function chainable
|
227 |
-
return $this;
|
228 |
-
}
|
229 |
-
|
230 |
-
function set_callback($function_name)
|
231 |
-
{
|
232 |
-
$this->callback = $function_name;
|
233 |
-
}
|
234 |
-
|
235 |
-
function remove_callback()
|
236 |
-
{
|
237 |
-
$this->callback = null;
|
238 |
-
}
|
239 |
-
|
240 |
-
function save($filepath = '')
|
241 |
-
{
|
242 |
-
$ret = $this->root->innertext();
|
243 |
-
if ($filepath !== '') { file_put_contents($filepath, $ret, LOCK_EX); }
|
244 |
-
return $ret;
|
245 |
-
}
|
246 |
-
|
247 |
-
function find($selector, $idx = null, $lowercase = false)
|
248 |
-
{
|
249 |
-
return $this->root->find($selector, $idx, $lowercase);
|
250 |
-
}
|
251 |
-
|
252 |
-
function expect($selector, $idx = null, $lowercase = false)
|
253 |
-
{
|
254 |
-
return $this->root->expect($selector, $idx, $lowercase);
|
255 |
-
}
|
256 |
-
|
257 |
-
/** @codeCoverageIgnore */
|
258 |
-
function dump($show_attr = true)
|
259 |
-
{
|
260 |
-
$this->root->dump($show_attr);
|
261 |
-
}
|
262 |
-
|
263 |
-
protected function prepare(
|
264 |
-
$str, $lowercase = true,
|
265 |
-
$defaultBRText = DEFAULT_BR_TEXT,
|
266 |
-
$defaultSpanText = DEFAULT_SPAN_TEXT)
|
267 |
-
{
|
268 |
-
$this->clear();
|
269 |
-
|
270 |
-
$this->doc = trim($str);
|
271 |
-
$this->size = strlen($this->doc);
|
272 |
-
$this->original_size = $this->size; // original size of the html
|
273 |
-
$this->pos = 0;
|
274 |
-
$this->cursor = 1;
|
275 |
-
$this->noise = array();
|
276 |
-
$this->nodes = array();
|
277 |
-
$this->lowercase = $lowercase;
|
278 |
-
$this->default_br_text = $defaultBRText;
|
279 |
-
$this->default_span_text = $defaultSpanText;
|
280 |
-
$this->root = new HtmlNode($this);
|
281 |
-
$this->root->tag = 'root';
|
282 |
-
$this->root->_[HtmlNode::HDOM_INFO_BEGIN] = -1;
|
283 |
-
$this->root->nodetype = HtmlNode::HDOM_TYPE_ROOT;
|
284 |
-
$this->parent = $this->root;
|
285 |
-
if ($this->size > 0) { $this->char = $this->doc[0]; }
|
286 |
-
}
|
287 |
-
|
288 |
-
protected function decode()
|
289 |
-
{
|
290 |
-
foreach($this->nodes as $node) {
|
291 |
-
if (isset($node->_[HtmlNode::HDOM_INFO_TEXT])) {
|
292 |
-
$node->_[HtmlNode::HDOM_INFO_TEXT] = html_entity_decode(
|
293 |
-
$this->restore_noise($node->_[HtmlNode::HDOM_INFO_TEXT]),
|
294 |
-
ENT_QUOTES | ENT_HTML5,
|
295 |
-
$this->_target_charset
|
296 |
-
);
|
297 |
-
}
|
298 |
-
if (isset($node->_[HtmlNode::HDOM_INFO_INNER])) {
|
299 |
-
$node->_[HtmlNode::HDOM_INFO_INNER] = html_entity_decode(
|
300 |
-
$this->restore_noise($node->_[HtmlNode::HDOM_INFO_INNER]),
|
301 |
-
ENT_QUOTES | ENT_HTML5,
|
302 |
-
$this->_target_charset
|
303 |
-
);
|
304 |
-
}
|
305 |
-
if (isset($node->attr) && is_array($node->attr)) {
|
306 |
-
foreach($node->attr as $a => $v) {
|
307 |
-
if ($v === true) continue;
|
308 |
-
$node->attr[$a] = html_entity_decode(
|
309 |
-
$v,
|
310 |
-
ENT_QUOTES | ENT_HTML5,
|
311 |
-
$this->_target_charset
|
312 |
-
);
|
313 |
-
}
|
314 |
-
}
|
315 |
-
}
|
316 |
-
}
|
317 |
-
|
318 |
-
protected function parse($trim = false)
|
319 |
-
{
|
320 |
-
while (true) {
|
321 |
-
|
322 |
-
if ($this->char !== '<') {
|
323 |
-
$content = $this->copy_until_char('<');
|
324 |
-
|
325 |
-
if ($content !== '') {
|
326 |
-
|
327 |
-
// Skip whitespace between tags? (</a> <b>)
|
328 |
-
if ($trim && trim($content) === '') {
|
329 |
-
continue;
|
330 |
-
}
|
331 |
-
|
332 |
-
$node = new HtmlNode($this);
|
333 |
-
++$this->cursor;
|
334 |
-
$node->_[HtmlNode::HDOM_INFO_TEXT] = $content;
|
335 |
-
$this->link_nodes($node, false);
|
336 |
-
|
337 |
-
}
|
338 |
-
}
|
339 |
-
|
340 |
-
if($this->read_tag($trim) === false) {
|
341 |
-
break;
|
342 |
-
}
|
343 |
-
}
|
344 |
-
}
|
345 |
-
|
346 |
-
protected function parse_charset()
|
347 |
-
{
|
348 |
-
$charset = null;
|
349 |
-
|
350 |
-
if (function_exists('get_last_retrieve_url_contents_content_type')) {
|
351 |
-
$contentTypeHeader = get_last_retrieve_url_contents_content_type();
|
352 |
-
$success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches);
|
353 |
-
if ($success) {
|
354 |
-
$charset = $matches[1];
|
355 |
-
}
|
356 |
-
|
357 |
-
// phpcs:ignore Generic.Files.LineLength
|
358 |
-
Debug::log('Determining charset using get_last_retrieve_url_contents_content_type() ' . ($success ? 'successful' : 'failed'));
|
359 |
-
}
|
360 |
-
|
361 |
-
if (empty($charset)) {
|
362 |
-
// https://www.w3.org/TR/html/document-metadata.html#statedef-http-equiv-content-type
|
363 |
-
$el = $this->root->find('meta[http-equiv=Content-Type]', 0, true);
|
364 |
-
|
365 |
-
if (!empty($el)) {
|
366 |
-
$fullvalue = $el->content;
|
367 |
-
|
368 |
-
if (!empty($fullvalue)) {
|
369 |
-
$success = preg_match(
|
370 |
-
'/charset=(.+)/i',
|
371 |
-
$fullvalue,
|
372 |
-
$matches
|
373 |
-
);
|
374 |
-
|
375 |
-
if ($success) {
|
376 |
-
$charset = $matches[1];
|
377 |
-
}
|
378 |
-
}
|
379 |
-
}
|
380 |
-
}
|
381 |
-
|
382 |
-
if (empty($charset)) {
|
383 |
-
// https://www.w3.org/TR/html/document-metadata.html#character-encoding-declaration
|
384 |
-
if ($meta = $this->root->find('meta[charset]', 0)) {
|
385 |
-
$charset = $meta->charset;
|
386 |
-
}
|
387 |
-
}
|
388 |
-
|
389 |
-
if (empty($charset)) {
|
390 |
-
// Try to guess the charset based on the content
|
391 |
-
// Requires Multibyte String (mbstring) support (optional)
|
392 |
-
if (function_exists('mb_detect_encoding')) {
|
393 |
-
/**
|
394 |
-
* mb_detect_encoding() is not intended to distinguish between
|
395 |
-
* charsets, especially single-byte charsets. Its primary
|
396 |
-
* purpose is to detect which multibyte encoding is in use,
|
397 |
-
* i.e. UTF-8, UTF-16, shift-JIS, etc.
|
398 |
-
*
|
399 |
-
* -- https://bugs.php.net/bug.php?id=38138
|
400 |
-
*
|
401 |
-
* Adding both CP1251/ISO-8859-5 and CP1252/ISO-8859-1 will
|
402 |
-
* always result in CP1251/ISO-8859-5 and vice versa.
|
403 |
-
*
|
404 |
-
* Thus, only detect if it's either UTF-8 or CP1252/ISO-8859-1
|
405 |
-
* to stay compatible.
|
406 |
-
*/
|
407 |
-
$encoding = mb_detect_encoding(
|
408 |
-
$this->doc,
|
409 |
-
array( 'UTF-8', 'CP1252', 'ISO-8859-1' )
|
410 |
-
);
|
411 |
-
|
412 |
-
if ($encoding === 'CP1252' || $encoding === 'ISO-8859-1') {
|
413 |
-
// Due to a limitation of mb_detect_encoding
|
414 |
-
// 'CP1251'/'ISO-8859-5' will be detected as
|
415 |
-
// 'CP1252'/'ISO-8859-1'. This will cause iconv to fail, in
|
416 |
-
// which case we can simply assume it is the other charset.
|
417 |
-
if (!@iconv('CP1252', 'UTF-8', $this->doc)) {
|
418 |
-
$encoding = 'CP1251';
|
419 |
-
}
|
420 |
-
}
|
421 |
-
|
422 |
-
if ($encoding !== false) {
|
423 |
-
$charset = $encoding;
|
424 |
-
}
|
425 |
-
}
|
426 |
-
}
|
427 |
-
|
428 |
-
if (empty($charset)) {
|
429 |
-
Debug::log('Unable to determine charset from source document. Assuming UTF-8');
|
430 |
-
$charset = 'UTF-8';
|
431 |
-
}
|
432 |
-
|
433 |
-
// Since CP1252 is a superset, if we get one of it's subsets, we want
|
434 |
-
// it instead.
|
435 |
-
if ((strtolower($charset) == 'iso-8859-1')
|
436 |
-
|| (strtolower($charset) == 'latin1')
|
437 |
-
|| (strtolower($charset) == 'latin-1')) {
|
438 |
-
$charset = 'CP1252';
|
439 |
-
}
|
440 |
-
|
441 |
-
return $this->_charset = $charset;
|
442 |
-
}
|
443 |
-
|
444 |
-
protected function read_tag($trim)
|
445 |
-
{
|
446 |
-
if ($this->char !== '<') { // End Of File
|
447 |
-
$this->root->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
|
448 |
-
|
449 |
-
// We might be in a nest of unclosed elements for which the end tags
|
450 |
-
// can be omitted. Close them for faster seek operations.
|
451 |
-
do {
|
452 |
-
if (isset($this->optional_closing_tags[strtolower($this->parent->tag)])) {
|
453 |
-
$this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
|
454 |
-
}
|
455 |
-
} while ($this->parent = $this->parent->parent);
|
456 |
-
|
457 |
-
return false;
|
458 |
-
}
|
459 |
-
|
460 |
-
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
461 |
-
|
462 |
-
if ($trim) { // "< /html>"
|
463 |
-
$this->skip($this->token_blank);
|
464 |
-
}
|
465 |
-
|
466 |
-
// End tag: https://dev.w3.org/html5/pf-summary/syntax.html#end-tags
|
467 |
-
if ($this->char === '/') {
|
468 |
-
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
469 |
-
|
470 |
-
$tag = $this->copy_until_char('>');
|
471 |
-
$tag = $trim ? ltrim($tag, $this->token_blank) : $tag;
|
472 |
-
|
473 |
-
// Skip attributes and whitespace in end tags
|
474 |
-
if ($trim && ($pos = strpos($tag, ' ')) !== false) {
|
475 |
-
// phpcs:ignore Generic.Files.LineLength
|
476 |
-
Debug::log_once('Source document contains superfluous whitespace in end tags (</html >).');
|
477 |
-
$tag = substr($tag, 0, $pos);
|
478 |
-
}
|
479 |
-
|
480 |
-
if (strcasecmp($this->parent->tag, $tag)) { // Parent is not start tag
|
481 |
-
$parent_lower = strtolower($this->parent->tag);
|
482 |
-
$tag_lower = strtolower($tag);
|
483 |
-
if (isset($this->optional_closing_tags[$parent_lower]) && isset($this->block_tags[$tag_lower])) {
|
484 |
-
$org_parent = $this->parent;
|
485 |
-
|
486 |
-
// Look for the start tag
|
487 |
-
while (($this->parent->parent) && strtolower($this->parent->tag) !== $tag_lower){
|
488 |
-
// Close any unclosed element with optional end tags
|
489 |
-
if (isset($this->optional_closing_tags[strtolower($this->parent->tag)]))
|
490 |
-
$this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
|
491 |
-
$this->parent = $this->parent->parent;
|
492 |
-
}
|
493 |
-
|
494 |
-
// No start tag, close grandparent
|
495 |
-
if (strtolower($this->parent->tag) !== $tag_lower) {
|
496 |
-
$this->parent = $org_parent;
|
497 |
-
|
498 |
-
if ($this->parent->parent) {
|
499 |
-
$this->parent = $this->parent->parent;
|
500 |
-
}
|
501 |
-
|
502 |
-
$this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
|
503 |
-
return $this->as_text_node($tag);
|
504 |
-
}
|
505 |
-
} elseif (($this->parent->parent) && isset($this->block_tags[$tag_lower])) {
|
506 |
-
// grandparent exists + current is block tag
|
507 |
-
// Parent has no end tag
|
508 |
-
$this->parent->_[HtmlNode::HDOM_INFO_END] = 0;
|
509 |
-
$org_parent = $this->parent;
|
510 |
-
|
511 |
-
// Find start tag
|
512 |
-
while (($this->parent->parent) && strtolower($this->parent->tag) !== $tag_lower) {
|
513 |
-
$this->parent = $this->parent->parent;
|
514 |
-
}
|
515 |
-
|
516 |
-
// No start tag, close parent
|
517 |
-
if (strtolower($this->parent->tag) !== $tag_lower) {
|
518 |
-
$this->parent = $org_parent; // restore origonal parent
|
519 |
-
$this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
|
520 |
-
return $this->as_text_node($tag);
|
521 |
-
}
|
522 |
-
} elseif (($this->parent->parent) && strtolower($this->parent->parent->tag) === $tag_lower) {
|
523 |
-
// Grandparent exists and current tag closes it
|
524 |
-
$this->parent->_[HtmlNode::HDOM_INFO_END] = 0;
|
525 |
-
$this->parent = $this->parent->parent;
|
526 |
-
} else { // Random tag, add as text node
|
527 |
-
return $this->as_text_node($tag);
|
528 |
-
}
|
529 |
-
}
|
530 |
-
|
531 |
-
// Link with start tag
|
532 |
-
$this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
|
533 |
-
|
534 |
-
if ($this->parent->parent) {
|
535 |
-
$this->parent = $this->parent->parent;
|
536 |
-
}
|
537 |
-
|
538 |
-
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
539 |
-
return true;
|
540 |
-
}
|
541 |
-
|
542 |
-
// Start tag: https://dev.w3.org/html5/pf-summary/syntax.html#start-tags
|
543 |
-
$node = new HtmlNode($this);
|
544 |
-
$node->_[HtmlNode::HDOM_INFO_BEGIN] = $this->cursor++;
|
545 |
-
|
546 |
-
// Tag name
|
547 |
-
$tag = $this->copy_until($this->token_slash);
|
548 |
-
|
549 |
-
if (isset($tag[0]) && $tag[0] === '!') { // Doctype, CData, Comment
|
550 |
-
if (isset($tag[2]) && $tag[1] === '-' && $tag[2] === '-') { // Comment ("<!--")
|
551 |
-
|
552 |
-
/**
|
553 |
-
* Comments must have the following format:
|
554 |
-
*
|
555 |
-
* 1. The string "<!--"
|
556 |
-
*
|
557 |
-
* 2. Optionally, text, with the additional restriction that the
|
558 |
-
* text must not start with the string ">", nor start with the
|
559 |
-
* string "->", nor contain the strings "<!--", "-->", or "--!>",
|
560 |
-
* nor end with the string "<!-".
|
561 |
-
*
|
562 |
-
* 3. The string "-->"
|
563 |
-
*
|
564 |
-
* -- https://www.w3.org/TR/html53/syntax.html#comments
|
565 |
-
*/
|
566 |
-
|
567 |
-
// Go back until $tag only contains start of comment "!--".
|
568 |
-
while (strlen($tag) > 3) {
|
569 |
-
$this->char = $this->doc[--$this->pos]; // previous
|
570 |
-
$tag = substr($tag, 0, strlen($tag) - 1);
|
571 |
-
}
|
572 |
-
|
573 |
-
$node->nodetype = HtmlNode::HDOM_TYPE_COMMENT;
|
574 |
-
$node->tag = 'comment';
|
575 |
-
|
576 |
-
$data = '';
|
577 |
-
|
578 |
-
while(true) {
|
579 |
-
// Copy until first char of end tag
|
580 |
-
$data .= $this->copy_until_char('-');
|
581 |
-
|
582 |
-
// Look ahead in the document, maybe we are at the end
|
583 |
-
if (($this->pos + 3) > $this->size) { // End of document
|
584 |
-
Debug::log('Source document ended unexpectedly!');
|
585 |
-
break;
|
586 |
-
} elseif (substr($this->doc, $this->pos, 3) === '-->') { // end
|
587 |
-
$data .= $this->copy_until_char('>');
|
588 |
-
break;
|
589 |
-
}
|
590 |
-
|
591 |
-
$data .= $this->char;
|
592 |
-
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
593 |
-
}
|
594 |
-
|
595 |
-
if (substr($data, 0, 1) === '>') { // "<!-->"
|
596 |
-
Debug::log('Comment must not start with the string ">"!');
|
597 |
-
$this->pos -= strlen($data);
|
598 |
-
$this->char = $this->doc[$this->pos];
|
599 |
-
$data = '';
|
600 |
-
}
|
601 |
-
|
602 |
-
if (substr($data, 0, 2) === '->') { // "<!--->"
|
603 |
-
Debug::log('Comment must not start with the string "->"!');
|
604 |
-
$this->pos -= strlen($data);
|
605 |
-
$this->char = $this->doc[$this->pos];
|
606 |
-
$data = '';
|
607 |
-
}
|
608 |
-
|
609 |
-
if (strpos($data, '<!--') !== false) { // "<!--<!---->"
|
610 |
-
Debug::log('Comment must not contain the string "<!--"!');
|
611 |
-
// simplehtmldom can work with it anyway
|
612 |
-
}
|
613 |
-
|
614 |
-
if (strpos($data, '--!>') !== false) { // "<!----!>-->"
|
615 |
-
Debug::log('Comment must not contain the string "--!>"!');
|
616 |
-
// simplehtmldom can work with it anyway
|
617 |
-
}
|
618 |
-
|
619 |
-
if (substr($data, -3, 3) === '<!-') { // "<!--<!--->"
|
620 |
-
Debug::log('Comment must not end with "<!-"!');
|
621 |
-
// simplehtmldom can work with it anyway
|
622 |
-
}
|
623 |
-
|
624 |
-
$tag .= $data;
|
625 |
-
$tag = $this->restore_noise($tag);
|
626 |
-
|
627 |
-
// Comment starts after "!--" and ends before "--" (5 chars total)
|
628 |
-
$node->_[HtmlNode::HDOM_INFO_INNER] = substr($tag, 3, strlen($tag) - 5);
|
629 |
-
} elseif (substr($tag, 1, 7) === '[CDATA[') {
|
630 |
-
|
631 |
-
// Go back until $tag only contains start of cdata "![CDATA[".
|
632 |
-
while (strlen($tag) > 8) {
|
633 |
-
$this->char = $this->doc[--$this->pos]; // previous
|
634 |
-
$tag = substr($tag, 0, strlen($tag) - 1);
|
635 |
-
}
|
636 |
-
|
637 |
-
// CDATA can contain HTML stuff, need to find closing tags first
|
638 |
-
$node->nodetype = HtmlNode::HDOM_TYPE_CDATA;
|
639 |
-
$node->tag = 'cdata';
|
640 |
-
|
641 |
-
$data = '';
|
642 |
-
|
643 |
-
// There is a rare chance of empty CDATA: "<[CDATA[]]>"
|
644 |
-
// In which case the current char is the first "[" of the end tag
|
645 |
-
// But the CDATA could also just be a bracket: "<[CDATA[]]]>"
|
646 |
-
while(true) {
|
647 |
-
// Copy until first char of end tag
|
648 |
-
$data .= $this->copy_until_char(']');
|
649 |
-
|
650 |
-
// Look ahead in the document, maybe we are at the end
|
651 |
-
if (($this->pos + 3) > $this->size) { // End of document
|
652 |
-
Debug::log('Source document ended unexpectedly!');
|
653 |
-
break;
|
654 |
-
} elseif (substr($this->doc, $this->pos, 3) === ']]>') { // end
|
655 |
-
$data .= $this->copy_until_char('>');
|
656 |
-
break;
|
657 |
-
}
|
658 |
-
|
659 |
-
$data .= $this->char;
|
660 |
-
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
661 |
-
}
|
662 |
-
|
663 |
-
$tag .= $data;
|
664 |
-
$tag = $this->restore_noise($tag);
|
665 |
-
|
666 |
-
// CDATA starts after "![CDATA[" and ends before "]]" (10 chars total)
|
667 |
-
$node->_[HtmlNode::HDOM_INFO_INNER] = substr($tag, 8, strlen($tag) - 10);
|
668 |
-
} else { // Unknown
|
669 |
-
Debug::log('Source document contains unknown declaration: <' . $tag);
|
670 |
-
$node->nodetype = HtmlNode::HDOM_TYPE_UNKNOWN;
|
671 |
-
$node->tag = 'unknown';
|
672 |
-
}
|
673 |
-
|
674 |
-
$node->_[HtmlNode::HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until_char('>');
|
675 |
-
|
676 |
-
if ($this->char === '>') {
|
677 |
-
$node->_[HtmlNode::HDOM_INFO_TEXT] .= '>';
|
678 |
-
}
|
679 |
-
|
680 |
-
$this->link_nodes($node, true);
|
681 |
-
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
682 |
-
return true;
|
683 |
-
}
|
684 |
-
|
685 |
-
if (!preg_match('/^\w[\w:-]*$/', $tag)) { // Invalid tag name
|
686 |
-
$node->_[HtmlNode::HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>');
|
687 |
-
|
688 |
-
if ($this->char === '>') { // End tag
|
689 |
-
$node->_[HtmlNode::HDOM_INFO_TEXT] .= '>';
|
690 |
-
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
691 |
-
}
|
692 |
-
|
693 |
-
$this->link_nodes($node, false);
|
694 |
-
Debug::log('Source document contains invalid tag name: ' . $node->_[HtmlNode::HDOM_INFO_TEXT]);
|
695 |
-
return true;
|
696 |
-
}
|
697 |
-
|
698 |
-
// Valid tag name
|
699 |
-
$node->nodetype = HtmlNode::HDOM_TYPE_ELEMENT;
|
700 |
-
$tag_lower = strtolower($tag);
|
701 |
-
$node->tag = ($this->lowercase) ? $tag_lower : $tag;
|
702 |
-
|
703 |
-
if (isset($this->optional_closing_tags[$tag_lower])) { // Optional closing tag
|
704 |
-
while (isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)])) {
|
705 |
-
// Previous element was the last element of ancestor
|
706 |
-
$this->parent->_[HtmlNode::HDOM_INFO_END] = $node->_[HtmlNode::HDOM_INFO_BEGIN] - 1;
|
707 |
-
$this->parent = $this->parent->parent;
|
708 |
-
}
|
709 |
-
$node->parent = $this->parent;
|
710 |
-
}
|
711 |
-
|
712 |
-
$guard = 0; // prevent infinity loop
|
713 |
-
|
714 |
-
// [0] Space between tag and first attribute
|
715 |
-
$space = array($this->copy_skip($this->token_blank), '', '');
|
716 |
-
|
717 |
-
do { // Parse attributes
|
718 |
-
$name = $this->copy_until($this->token_equal);
|
719 |
-
|
720 |
-
if ($name === '' && $this->char !== null && $space[0] === '') {
|
721 |
-
break;
|
722 |
-
}
|
723 |
-
|
724 |
-
if ($guard === $this->pos) { // Escape infinite loop
|
725 |
-
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
726 |
-
continue;
|
727 |
-
}
|
728 |
-
|
729 |
-
$guard = $this->pos;
|
730 |
-
|
731 |
-
if ($this->pos >= $this->size - 1 && $this->char !== '>') { // End Of File
|
732 |
-
Debug::log('Source document ended unexpectedly!');
|
733 |
-
$node->nodetype = HtmlNode::HDOM_TYPE_TEXT;
|
734 |
-
$node->_[HtmlNode::HDOM_INFO_END] = 0;
|
735 |
-
$node->_[HtmlNode::HDOM_INFO_TEXT] = '<' . $tag . $space[0] . $name;
|
736 |
-
$node->tag = 'text';
|
737 |
-
$this->link_nodes($node, false);
|
738 |
-
return true;
|
739 |
-
}
|
740 |
-
|
741 |
-
if ($name === '/' || $name === '') { // No more attributes
|
742 |
-
break;
|
743 |
-
}
|
744 |
-
|
745 |
-
// [1] Whitespace after attribute name
|
746 |
-
$space[1] = (strpos($this->token_blank, $this->char) === false) ? '' : $this->copy_skip($this->token_blank);
|
747 |
-
|
748 |
-
$name = $this->restore_noise($name); // might be a noisy name
|
749 |
-
|
750 |
-
if ($this->lowercase) {
|
751 |
-
$name = strtolower($name);
|
752 |
-
}
|
753 |
-
|
754 |
-
if ($this->char === '=') { // Attribute with value
|
755 |
-
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
756 |
-
$this->parse_attr($node, $name, $space, $trim); // get attribute value
|
757 |
-
} else { // Attribute without value
|
758 |
-
$node->_[HtmlNode::HDOM_INFO_QUOTE][$name] = HtmlNode::HDOM_QUOTE_NO;
|
759 |
-
$node->attr[$name] = true;
|
760 |
-
if ($this->char !== '>') {
|
761 |
-
$this->char = $this->doc[--$this->pos];
|
762 |
-
} // prev
|
763 |
-
}
|
764 |
-
|
765 |
-
// Space before attribute and around equal sign
|
766 |
-
if (!$trim && $space !== array(' ', '', '')) {
|
767 |
-
// phpcs:ignore Generic.Files.LineLength
|
768 |
-
Debug::log_once('Source document contains superfluous whitespace in attributes (<e attribute = "value">). Enable trimming or fix attribute spacing for best performance.');
|
769 |
-
$node->_[HtmlNode::HDOM_INFO_SPACE][$name] = $space;
|
770 |
-
}
|
771 |
-
|
772 |
-
// prepare for next attribute
|
773 |
-
$space = array(
|
774 |
-
((strpos($this->token_blank, $this->char) === false) ? '' : $this->copy_skip($this->token_blank)),
|
775 |
-
'',
|
776 |
-
''
|
777 |
-
);
|
778 |
-
} while ($this->char !== '>' && $this->char !== '/');
|
779 |
-
|
780 |
-
$this->link_nodes($node, true);
|
781 |
-
|
782 |
-
// Space after last attribute before closing the tag
|
783 |
-
if (!$trim && $space[0] !== '') {
|
784 |
-
// phpcs:ignore Generic.Files.LineLength
|
785 |
-
Debug::log_once('Source document contains superfluous whitespace before the closing braket (<e attribute="value" >). Enable trimming or remove spaces before closing brackets for best performance.');
|
786 |
-
$node->_[HtmlNode::HDOM_INFO_ENDSPACE] = $space[0];
|
787 |
-
}
|
788 |
-
|
789 |
-
$rest = ($this->char === '>') ? '' : $this->copy_until_char('>');
|
790 |
-
$rest = ($trim) ? trim($rest) : $rest; // <html / >
|
791 |
-
|
792 |
-
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
793 |
-
|
794 |
-
if (trim($rest) === '/') { // Void element
|
795 |
-
if ($rest !== '') {
|
796 |
-
if (isset($node->_[HtmlNode::HDOM_INFO_ENDSPACE])) {
|
797 |
-
$node->_[HtmlNode::HDOM_INFO_ENDSPACE] .= $rest;
|
798 |
-
} else {
|
799 |
-
$node->_[HtmlNode::HDOM_INFO_ENDSPACE] = $rest;
|
800 |
-
}
|
801 |
-
}
|
802 |
-
$node->_[HtmlNode::HDOM_INFO_END] = 0;
|
803 |
-
} elseif (!isset($this->self_closing_tags[strtolower($node->tag)])) {
|
804 |
-
$innertext = $this->copy_until_char('<');
|
805 |
-
if ($innertext !== '') {
|
806 |
-
$node->_[HtmlNode::HDOM_INFO_INNER] = $innertext;
|
807 |
-
}
|
808 |
-
$this->parent = $node;
|
809 |
-
}
|
810 |
-
|
811 |
-
if ($node->tag === 'br') {
|
812 |
-
$node->_[HtmlNode::HDOM_INFO_INNER] = $this->default_br_text;
|
813 |
-
} elseif ($node->tag === 'script') {
|
814 |
-
$data = '';
|
815 |
-
|
816 |
-
// There is a rare chance of empty script: "<script></script>"
|
817 |
-
// In which case the current char is the start of the end tag
|
818 |
-
// But the script could also just contain tags: "<script><div></script>"
|
819 |
-
while(true) {
|
820 |
-
// Copy until first char of end tag
|
821 |
-
$data .= $this->copy_until_char('<');
|
822 |
-
|
823 |
-
// Look ahead in the document, maybe we are at the end
|
824 |
-
if (($this->pos + 9) > $this->size) { // End of document
|
825 |
-
Debug::log('Source document ended unexpectedly!');
|
826 |
-
break;
|
827 |
-
} elseif (substr($this->doc, $this->pos, 8) === '</script') { // end
|
828 |
-
$this->skip('>'); // don't include the end tag
|
829 |
-
break;
|
830 |
-
}
|
831 |
-
|
832 |
-
// Note: A script tag may contain any other tag except </script>
|
833 |
-
// which needs to be escaped as <\/script>
|
834 |
-
|
835 |
-
$data .= $this->char;
|
836 |
-
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
837 |
-
}
|
838 |
-
|
839 |
-
$node = new HtmlNode($this);
|
840 |
-
++$this->cursor;
|
841 |
-
$node->_[HtmlNode::HDOM_INFO_TEXT] = $data;
|
842 |
-
$this->link_nodes($node, false);
|
843 |
-
}
|
844 |
-
|
845 |
-
return true;
|
846 |
-
}
|
847 |
-
|
848 |
-
protected function parse_attr($node, $name, &$space, $trim)
|
849 |
-
{
|
850 |
-
$is_duplicate = isset($node->attr[$name]);
|
851 |
-
|
852 |
-
if (!$is_duplicate) // Copy whitespace between "=" and value
|
853 |
-
$space[2] = (strpos($this->token_blank, $this->char) === false) ? '' : $this->copy_skip($this->token_blank);
|
854 |
-
|
855 |
-
switch ($this->char) {
|
856 |
-
case '"':
|
857 |
-
$quote_type = HtmlNode::HDOM_QUOTE_DOUBLE;
|
858 |
-
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
859 |
-
$value = $this->copy_until_char('"');
|
860 |
-
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
861 |
-
break;
|
862 |
-
case '\'':
|
863 |
-
// phpcs:ignore Generic.Files.LineLength
|
864 |
-
Debug::log_once('Source document contains attribute values with single quotes (<e attribute=\'value\'>). Use double quotes for best performance.');
|
865 |
-
$quote_type = HtmlNode::HDOM_QUOTE_SINGLE;
|
866 |
-
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
867 |
-
$value = $this->copy_until_char('\'');
|
868 |
-
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
869 |
-
break;
|
870 |
-
default:
|
871 |
-
// phpcs:ignore Generic.Files.LineLength
|
872 |
-
Debug::log_once('Source document contains attribute values without quotes (<e attribute=value>). Use double quotes for best performance');
|
873 |
-
$quote_type = HtmlNode::HDOM_QUOTE_NO;
|
874 |
-
$value = $this->copy_until($this->token_attr);
|
875 |
-
}
|
876 |
-
|
877 |
-
$value = $this->restore_noise($value);
|
878 |
-
|
879 |
-
if ($trim) {
|
880 |
-
// Attribute values must not contain control characters other than space
|
881 |
-
// https://www.w3.org/TR/html/dom.html#text-content
|
882 |
-
// https://www.w3.org/TR/html/syntax.html#attribute-values
|
883 |
-
// https://www.w3.org/TR/xml/#AVNormalize
|
884 |
-
$value = preg_replace("/[\r\n\t\s]+/u", ' ', $value);
|
885 |
-
$value = trim($value);
|
886 |
-
}
|
887 |
-
|
888 |
-
if (!$is_duplicate) {
|
889 |
-
if ($quote_type !== HtmlNode::HDOM_QUOTE_DOUBLE) {
|
890 |
-
$node->_[HtmlNode::HDOM_INFO_QUOTE][$name] = $quote_type;
|
891 |
-
}
|
892 |
-
$node->attr[$name] = $value;
|
893 |
-
}
|
894 |
-
}
|
895 |
-
|
896 |
-
protected function link_nodes(&$node, $is_child)
|
897 |
-
{
|
898 |
-
$node->parent = $this->parent;
|
899 |
-
$this->parent->nodes[] = $node;
|
900 |
-
if ($is_child) {
|
901 |
-
$this->parent->children[] = $node;
|
902 |
-
}
|
903 |
-
}
|
904 |
-
|
905 |
-
protected function as_text_node($tag)
|
906 |
-
{
|
907 |
-
$node = new HtmlNode($this);
|
908 |
-
++$this->cursor;
|
909 |
-
$node->_[HtmlNode::HDOM_INFO_TEXT] = '</' . $tag . '>';
|
910 |
-
$this->link_nodes($node, false);
|
911 |
-
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
912 |
-
return true;
|
913 |
-
}
|
914 |
-
|
915 |
-
protected function skip($chars)
|
916 |
-
{
|
917 |
-
$this->pos += strspn($this->doc, $chars, $this->pos);
|
918 |
-
$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
919 |
-
}
|
920 |
-
|
921 |
-
protected function copy_skip($chars)
|
922 |
-
{
|
923 |
-
$pos = $this->pos;
|
924 |
-
$len = strspn($this->doc, $chars, $pos);
|
925 |
-
if ($len === 0) { return ''; }
|
926 |
-
$this->pos += $len;
|
927 |
-
$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
928 |
-
return substr($this->doc, $pos, $len);
|
929 |
-
}
|
930 |
-
|
931 |
-
protected function copy_until($chars)
|
932 |
-
{
|
933 |
-
$pos = $this->pos;
|
934 |
-
$len = strcspn($this->doc, $chars, $pos);
|
935 |
-
$this->pos += $len;
|
936 |
-
$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
937 |
-
return substr($this->doc, $pos, $len);
|
938 |
-
}
|
939 |
-
|
940 |
-
protected function copy_until_char($char)
|
941 |
-
{
|
942 |
-
if ($this->char === null) { return ''; }
|
943 |
-
|
944 |
-
if (($pos = strpos($this->doc, $char, $this->pos)) === false) {
|
945 |
-
$ret = substr($this->doc, $this->pos, $this->size - $this->pos);
|
946 |
-
$this->char = null;
|
947 |
-
$this->pos = $this->size;
|
948 |
-
return $ret;
|
949 |
-
}
|
950 |
-
|
951 |
-
if ($pos === $this->pos) { return ''; }
|
952 |
-
|
953 |
-
$pos_old = $this->pos;
|
954 |
-
$this->char = $this->doc[$pos];
|
955 |
-
$this->pos = $pos;
|
956 |
-
return substr($this->doc, $pos_old, $pos - $pos_old);
|
957 |
-
}
|
958 |
-
|
959 |
-
protected function remove_noise($pattern, $remove_tag = false)
|
960 |
-
{
|
961 |
-
$count = preg_match_all(
|
962 |
-
$pattern,
|
963 |
-
$this->doc,
|
964 |
-
$matches,
|
965 |
-
PREG_SET_ORDER | PREG_OFFSET_CAPTURE
|
966 |
-
);
|
967 |
-
|
968 |
-
for ($i = $count - 1; $i > -1; --$i) {
|
969 |
-
$key = '___noise___' . sprintf('% 5d', count($this->noise) + 1000);
|
970 |
-
|
971 |
-
$idx = ($remove_tag) ? 0 : 1; // 0 = entire match, 1 = submatch
|
972 |
-
$this->noise[$key] = $matches[$i][$idx][0];
|
973 |
-
$this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
|
974 |
-
}
|
975 |
-
|
976 |
-
// reset the length of content
|
977 |
-
$this->size = strlen($this->doc);
|
978 |
-
|
979 |
-
if ($this->size > 0) {
|
980 |
-
$this->char = $this->doc[0];
|
981 |
-
}
|
982 |
-
}
|
983 |
-
|
984 |
-
function restore_noise($text)
|
985 |
-
{
|
986 |
-
if (empty($this->noise)) return $text; // nothing to restore
|
987 |
-
$pos = 0;
|
988 |
-
while (($pos = strpos($text, '___noise___', $pos)) !== false) {
|
989 |
-
// Sometimes there is a broken piece of markup, and we don't GET the
|
990 |
-
// pos+11 etc... token which indicates a problem outside of us...
|
991 |
-
|
992 |
-
// todo: "___noise___1000" (or any number with four or more digits)
|
993 |
-
// in the DOM causes an infinite loop which could be utilized by
|
994 |
-
// malicious software
|
995 |
-
if (strlen($text) > $pos + 15) {
|
996 |
-
$key = '___noise___'
|
997 |
-
. $text[$pos + 11]
|
998 |
-
. $text[$pos + 12]
|
999 |
-
. $text[$pos + 13]
|
1000 |
-
. $text[$pos + 14]
|
1001 |
-
. $text[$pos + 15];
|
1002 |
-
|
1003 |
-
if (isset($this->noise[$key])) {
|
1004 |
-
$text = substr($text, 0, $pos)
|
1005 |
-
. $this->noise[$key]
|
1006 |
-
. substr($text, $pos + 16);
|
1007 |
-
|
1008 |
-
unset($this->noise[$key]);
|
1009 |
-
} else {
|
1010 |
-
Debug::log_once('Noise restoration failed. DOM has been corrupted!');
|
1011 |
-
// do this to prevent an infinite loop.
|
1012 |
-
// FIXME: THis causes an infinite loop because the keyword ___NOISE___ is included in the key!
|
1013 |
-
$text = substr($text, 0, $pos)
|
1014 |
-
. 'UNDEFINED NOISE FOR KEY: '
|
1015 |
-
. $key
|
1016 |
-
. substr($text, $pos + 16);
|
1017 |
-
}
|
1018 |
-
} else {
|
1019 |
-
// There is no valid key being given back to us... We must get
|
1020 |
-
// rid of the ___noise___ or we will have a problem.
|
1021 |
-
Debug::log_once('Noise restoration failed. The provided key is incomplete: ' . $text);
|
1022 |
-
$text = substr($text, 0, $pos)
|
1023 |
-
. 'NO NUMERIC NOISE KEY'
|
1024 |
-
. substr($text, $pos + 11);
|
1025 |
-
}
|
1026 |
-
}
|
1027 |
-
return $text;
|
1028 |
-
}
|
1029 |
-
|
1030 |
-
function search_noise($text)
|
1031 |
-
{
|
1032 |
-
foreach($this->noise as $noiseElement) {
|
1033 |
-
if (strpos($noiseElement, $text) !== false) {
|
1034 |
-
return $noiseElement;
|
1035 |
-
}
|
1036 |
-
}
|
1037 |
-
}
|
1038 |
-
|
1039 |
-
function __toString()
|
1040 |
-
{
|
1041 |
-
return $this->root->innertext();
|
1042 |
-
}
|
1043 |
-
|
1044 |
-
function __get($name)
|
1045 |
-
{
|
1046 |
-
switch ($name) {
|
1047 |
-
case 'outertext':
|
1048 |
-
return $this->root->innertext();
|
1049 |
-
case 'innertext':
|
1050 |
-
return $this->root->innertext();
|
1051 |
-
case 'plaintext':
|
1052 |
-
return $this->root->text();
|
1053 |
-
case 'charset':
|
1054 |
-
return $this->_charset;
|
1055 |
-
case 'target_charset':
|
1056 |
-
return $this->_target_charset;
|
1057 |
-
}
|
1058 |
-
}
|
1059 |
-
|
1060 |
-
function childNodes($idx = -1)
|
1061 |
-
{
|
1062 |
-
return $this->root->childNodes($idx);
|
1063 |
-
}
|
1064 |
-
|
1065 |
-
function firstChild()
|
1066 |
-
{
|
1067 |
-
return $this->root->firstChild();
|
1068 |
-
}
|
1069 |
-
|
1070 |
-
function lastChild()
|
1071 |
-
{
|
1072 |
-
return $this->root->lastChild();
|
1073 |
-
}
|
1074 |
-
|
1075 |
-
function createElement($name, $value = null)
|
1076 |
-
{
|
1077 |
-
$node = new HtmlNode(null);
|
1078 |
-
$node->nodetype = HtmlNode::HDOM_TYPE_ELEMENT;
|
1079 |
-
$node->_[HtmlNode::HDOM_INFO_BEGIN] = 1;
|
1080 |
-
$node->_[HtmlNode::HDOM_INFO_END] = 1;
|
1081 |
-
|
1082 |
-
if ($value !== null) {
|
1083 |
-
$node->_[HtmlNode::HDOM_INFO_INNER] = $value;
|
1084 |
-
}
|
1085 |
-
|
1086 |
-
$node->tag = $name;
|
1087 |
-
|
1088 |
-
return $node;
|
1089 |
-
}
|
1090 |
-
|
1091 |
-
function createTextNode($value)
|
1092 |
-
{
|
1093 |
-
$node = new HtmlNode($this);
|
1094 |
-
$node->nodetype = HtmlNode::HDOM_TYPE_TEXT;
|
1095 |
-
|
1096 |
-
if ($value !== null) {
|
1097 |
-
$node->_[HtmlNode::HDOM_INFO_TEXT] = $value;
|
1098 |
-
}
|
1099 |
-
|
1100 |
-
return $node;
|
1101 |
-
}
|
1102 |
-
|
1103 |
-
function getElementById($id)
|
1104 |
-
{
|
1105 |
-
return $this->find("#$id", 0);
|
1106 |
-
}
|
1107 |
-
|
1108 |
-
function getElementsById($id, $idx = null)
|
1109 |
-
{
|
1110 |
-
return $this->find("#$id", $idx);
|
1111 |
-
}
|
1112 |
-
|
1113 |
-
function getElementByTagName($name)
|
1114 |
-
{
|
1115 |
-
return $this->find($name, 0);
|
1116 |
-
}
|
1117 |
-
|
1118 |
-
function getElementsByTagName($name, $idx = null)
|
1119 |
-
{
|
1120 |
-
return $this->find($name, $idx);
|
1121 |
-
}
|
1122 |
-
|
1123 |
-
function loadFile($file)
|
1124 |
-
{
|
1125 |
-
$args = func_get_args();
|
1126 |
-
|
1127 |
-
if(($doc = call_user_func_array('file_get_contents', $args)) !== false) {
|
1128 |
-
$this->load($doc, true);
|
1129 |
-
} else {
|
1130 |
-
return false;
|
1131 |
-
}
|
1132 |
-
}
|
1133 |
-
}
|
1 |
+
<?php namespace simplehtmldom;
|
2 |
+
|
3 |
+
/**
|
4 |
+
* Website: http://sourceforge.net/projects/simplehtmldom/
|
5 |
+
* Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
|
6 |
+
*
|
7 |
+
* Licensed under The MIT License
|
8 |
+
* See the LICENSE file in the project root for more information.
|
9 |
+
*
|
10 |
+
* Authors:
|
11 |
+
* S.C. Chen
|
12 |
+
* John Schlick
|
13 |
+
* Rus Carroll
|
14 |
+
* logmanoriginal
|
15 |
+
*
|
16 |
+
* Contributors:
|
17 |
+
* Yousuke Kumakura
|
18 |
+
* Vadim Voituk
|
19 |
+
* Antcs
|
20 |
+
*
|
21 |
+
* Version $Rev$
|
22 |
+
*/
|
23 |
+
|
24 |
+
include_once 'constants.php';
|
25 |
+
include_once 'HtmlNode.php';
|
26 |
+
include_once 'Debug.php';
|
27 |
+
|
28 |
+
class HtmlDocument
|
29 |
+
{
|
30 |
+
public $root = null;
|
31 |
+
public $nodes = array();
|
32 |
+
public $callback = null;
|
33 |
+
public $lowercase = false;
|
34 |
+
public $original_size;
|
35 |
+
public $size;
|
36 |
+
|
37 |
+
protected $pos;
|
38 |
+
protected $doc;
|
39 |
+
protected $char;
|
40 |
+
|
41 |
+
protected $cursor;
|
42 |
+
protected $parent;
|
43 |
+
protected $noise = array();
|
44 |
+
protected $token_blank = " \t\r\n";
|
45 |
+
protected $token_equal = ' =/>';
|
46 |
+
protected $token_slash = " />\r\n\t";
|
47 |
+
protected $token_attr = ' >';
|
48 |
+
|
49 |
+
public $_charset = '';
|
50 |
+
public $_target_charset = '';
|
51 |
+
|
52 |
+
public $default_br_text = '';
|
53 |
+
public $default_span_text = '';
|
54 |
+
|
55 |
+
protected $self_closing_tags = array(
|
56 |
+
'area' => 1,
|
57 |
+
'base' => 1,
|
58 |
+
'br' => 1,
|
59 |
+
'col' => 1,
|
60 |
+
'embed' => 1,
|
61 |
+
'hr' => 1,
|
62 |
+
'img' => 1,
|
63 |
+
'input' => 1,
|
64 |
+
'link' => 1,
|
65 |
+
'meta' => 1,
|
66 |
+
'param' => 1,
|
67 |
+
'source' => 1,
|
68 |
+
'track' => 1,
|
69 |
+
'wbr' => 1
|
70 |
+
);
|
71 |
+
protected $block_tags = array(
|
72 |
+
'body' => 1,
|
73 |
+
'div' => 1,
|
74 |
+
'form' => 1,
|
75 |
+
'root' => 1,
|
76 |
+
'span' => 1,
|
77 |
+
'table' => 1
|
78 |
+
);
|
79 |
+
protected $optional_closing_tags = array(
|
80 |
+
// Not optional, see
|
81 |
+
// https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element
|
82 |
+
'b' => array('b' => 1),
|
83 |
+
'dd' => array('dd' => 1, 'dt' => 1),
|
84 |
+
// Not optional, see
|
85 |
+
// https://www.w3.org/TR/html/grouping-content.html#the-dl-element
|
86 |
+
'dl' => array('dd' => 1, 'dt' => 1),
|
87 |
+
'dt' => array('dd' => 1, 'dt' => 1),
|
88 |
+
'li' => array('li' => 1),
|
89 |
+
'optgroup' => array('optgroup' => 1, 'option' => 1),
|
90 |
+
'option' => array('optgroup' => 1, 'option' => 1),
|
91 |
+
'p' => array('p' => 1),
|
92 |
+
'rp' => array('rp' => 1, 'rt' => 1),
|
93 |
+
'rt' => array('rp' => 1, 'rt' => 1),
|
94 |
+
'td' => array('td' => 1, 'th' => 1),
|
95 |
+
'th' => array('td' => 1, 'th' => 1),
|
96 |
+
'tr' => array('td' => 1, 'th' => 1, 'tr' => 1),
|
97 |
+
);
|
98 |
+
|
99 |
+
function __call($func, $args)
|
100 |
+
{
|
101 |
+
// Allow users to call methods with lower_case syntax
|
102 |
+
switch($func)
|
103 |
+
{
|
104 |
+
case 'load_file':
|
105 |
+
$actual_function = 'loadFile'; break;
|
106 |
+
case 'clear': return; /* no-op */
|
107 |
+
default:
|
108 |
+
trigger_error(
|
109 |
+
'Call to undefined method ' . __CLASS__ . '::' . $func . '()',
|
110 |
+
E_USER_ERROR
|
111 |
+
);
|
112 |
+
}
|
113 |
+
|
114 |
+
// phpcs:ignore Generic.Files.LineLength
|
115 |
+
Debug::log(__CLASS__ . '->' . $func . '() has been deprecated and will be removed in the next major version of simplehtmldom. Use ' . __CLASS__ . '->' . $actual_function . '() instead.');
|
116 |
+
|
117 |
+
return call_user_func_array(array($this, $actual_function), $args);
|
118 |
+
}
|
119 |
+
|
120 |
+
function __construct(
|
121 |
+
$str = null,
|
122 |
+
$lowercase = true,
|
123 |
+
$forceTagsClosed = true,
|
124 |
+
$target_charset = DEFAULT_TARGET_CHARSET,
|
125 |
+
$stripRN = true,
|
126 |
+
$defaultBRText = DEFAULT_BR_TEXT,
|
127 |
+
$defaultSpanText = DEFAULT_SPAN_TEXT,
|
128 |
+
$options = 0)
|
129 |
+
{
|
130 |
+
if ($str) {
|
131 |
+
if (preg_match('/^http:\/\//i', $str) || is_file($str)) {
|
132 |
+
$this->load_file($str);
|
133 |
+
} else {
|
134 |
+
$this->load(
|
135 |
+
$str,
|
136 |
+
$lowercase,
|
137 |
+
$stripRN,
|
138 |
+
$defaultBRText,
|
139 |
+
$defaultSpanText,
|
140 |
+
$options
|
141 |
+
);
|
142 |
+
}
|
143 |
+
} else {
|
144 |
+
$this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText);
|
145 |
+
}
|
146 |
+
// Forcing tags to be closed implies that we don't trust the html, but
|
147 |
+
// it can lead to parsing errors if we SHOULD trust the html.
|
148 |
+
if (!$forceTagsClosed) {
|
149 |
+
$this->optional_closing_array = array();
|
150 |
+
}
|
151 |
+
|
152 |
+
$this->_target_charset = $target_charset;
|
153 |
+
}
|
154 |
+
|
155 |
+
function __debugInfo()
|
156 |
+
{
|
157 |
+
return array(
|
158 |
+
'root' => $this->root,
|
159 |
+
'noise' => empty($this->noise) ? 'none' : $this->noise,
|
160 |
+
'charset' => $this->_charset,
|
161 |
+
'target charset' => $this->_target_charset,
|
162 |
+
'original size' => $this->original_size
|
163 |
+
);
|
164 |
+
}
|
165 |
+
|
166 |
+
function __destruct()
|
167 |
+
{
|
168 |
+
if (isset($this->nodes)) {
|
169 |
+
foreach ($this->nodes as $n) {
|
170 |
+
$n->clear();
|
171 |
+
}
|
172 |
+
}
|
173 |
+
}
|
174 |
+
|
175 |
+
function load(
|
176 |
+
$str,
|
177 |
+
$lowercase = true,
|
178 |
+
$stripRN = true,
|
179 |
+
$defaultBRText = DEFAULT_BR_TEXT,
|
180 |
+
$defaultSpanText = DEFAULT_SPAN_TEXT,
|
181 |
+
$options = 0)
|
182 |
+
{
|
183 |
+
// prepare
|
184 |
+
$this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText);
|
185 |
+
|
186 |
+
if ($stripRN) {
|
187 |
+
// Temporarily remove any element that shouldn't loose whitespace
|
188 |
+
$this->remove_noise("'<\s*script[^>]*>(.*?)<\s*/\s*script\s*>'is");
|
189 |
+
$this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is");
|
190 |
+
$this->remove_noise("'<!--(?!>|\->)(.*?)-->'is");
|
191 |
+
$this->remove_noise("'<\s*style[^>]*>(.*?)<\s*/\s*style\s*>'is");
|
192 |
+
$this->remove_noise("'<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>'is");
|
193 |
+
|
194 |
+
// Edited By Speed Booster Pack. Do not minify html!
|
195 |
+
// Remove whitespace and newlines between tags
|
196 |
+
$this->doc = preg_replace('/\>([\t\s]*[\r\n]^[\t\s]*)\</m', '><', $this->doc);
|
197 |
+
|
198 |
+
// Remove whitespace and newlines in text
|
199 |
+
$this->doc = preg_replace('/([\t\s]*[\r\n]^[\t\s]*)/m', ' ', $this->doc);
|
200 |
+
|
201 |
+
// Restore temporarily removed elements and calculate new size
|
202 |
+
$this->doc = $this->restore_noise($this->doc);
|
203 |
+
$this->size = strlen($this->doc);
|
204 |
+
}
|
205 |
+
|
206 |
+
$this->remove_noise("'(<\?)(.*?)(\?>)'s", true); // server-side script
|
207 |
+
if (count($this->noise)) {
|
208 |
+
// phpcs:ignore Generic.Files.LineLength
|
209 |
+
Debug::log('Support for server-side scripts has been deprecated and will be removed in the next major version of simplehtmldom.');
|
210 |
+
}
|
211 |
+
|
212 |
+
if($options & HDOM_SMARTY_AS_TEXT) { // Strip Smarty scripts
|
213 |
+
$this->remove_noise("'(\{\w)(.*?)(\})'s", true);
|
214 |
+
// phpcs:ignore Generic.Files.LineLength
|
215 |
+
Debug::log('Support for Smarty scripts has been deprecated and will be removed in the next major version of simplehtmldom.');
|
216 |
+
}
|
217 |
+
|
218 |
+
// parsing
|
219 |
+
$this->parse($stripRN);
|
220 |
+
// end
|
221 |
+
$this->root->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
|
222 |
+
$this->parse_charset();
|
223 |
+
$this->decode();
|
224 |
+
unset($this->doc);
|
225 |
+
|
226 |
+
// make load function chainable
|
227 |
+
return $this;
|
228 |
+
}
|
229 |
+
|
230 |
+
function set_callback($function_name)
|
231 |
+
{
|
232 |
+
$this->callback = $function_name;
|
233 |
+
}
|
234 |
+
|
235 |
+
function remove_callback()
|
236 |
+
{
|
237 |
+
$this->callback = null;
|
238 |
+
}
|
239 |
+
|
240 |
+
function save($filepath = '')
|
241 |
+
{
|
242 |
+
$ret = $this->root->innertext();
|
243 |
+
if ($filepath !== '') { file_put_contents($filepath, $ret, LOCK_EX); }
|
244 |
+
return $ret;
|
245 |
+
}
|
246 |
+
|
247 |
+
function find($selector, $idx = null, $lowercase = false)
|
248 |
+
{
|
249 |
+
return $this->root->find($selector, $idx, $lowercase);
|
250 |
+
}
|
251 |
+
|
252 |
+
function expect($selector, $idx = null, $lowercase = false)
|
253 |
+
{
|
254 |
+
return $this->root->expect($selector, $idx, $lowercase);
|
255 |
+
}
|
256 |
+
|
257 |
+
/** @codeCoverageIgnore */
|
258 |
+
function dump($show_attr = true)
|
259 |
+
{
|
260 |
+
$this->root->dump($show_attr);
|
261 |
+
}
|
262 |
+
|
263 |
+
protected function prepare(
|
264 |
+
$str, $lowercase = true,
|
265 |
+
$defaultBRText = DEFAULT_BR_TEXT,
|
266 |
+
$defaultSpanText = DEFAULT_SPAN_TEXT)
|
267 |
+
{
|
268 |
+
$this->clear();
|
269 |
+
|
270 |
+
$this->doc = trim($str);
|
271 |
+
$this->size = strlen($this->doc);
|
272 |
+
$this->original_size = $this->size; // original size of the html
|
273 |
+
$this->pos = 0;
|
274 |
+
$this->cursor = 1;
|
275 |
+
$this->noise = array();
|
276 |
+
$this->nodes = array();
|
277 |
+
$this->lowercase = $lowercase;
|
278 |
+
$this->default_br_text = $defaultBRText;
|
279 |
+
$this->default_span_text = $defaultSpanText;
|
280 |
+
$this->root = new HtmlNode($this);
|
281 |
+
$this->root->tag = 'root';
|
282 |
+
$this->root->_[HtmlNode::HDOM_INFO_BEGIN] = -1;
|
283 |
+
$this->root->nodetype = HtmlNode::HDOM_TYPE_ROOT;
|
284 |
+
$this->parent = $this->root;
|
285 |
+
if ($this->size > 0) { $this->char = $this->doc[0]; }
|
286 |
+
}
|
287 |
+
|
288 |
+
protected function decode()
|
289 |
+
{
|
290 |
+
foreach($this->nodes as $node) {
|
291 |
+
if (isset($node->_[HtmlNode::HDOM_INFO_TEXT])) {
|
292 |
+
$node->_[HtmlNode::HDOM_INFO_TEXT] = html_entity_decode(
|
293 |
+
$this->restore_noise($node->_[HtmlNode::HDOM_INFO_TEXT]),
|
294 |
+
ENT_QUOTES | ENT_HTML5,
|
295 |
+
$this->_target_charset
|
296 |
+
);
|
297 |
+
}
|
298 |
+
if (isset($node->_[HtmlNode::HDOM_INFO_INNER])) {
|
299 |
+
$node->_[HtmlNode::HDOM_INFO_INNER] = html_entity_decode(
|
300 |
+
$this->restore_noise($node->_[HtmlNode::HDOM_INFO_INNER]),
|
301 |
+
ENT_QUOTES | ENT_HTML5,
|
302 |
+
$this->_target_charset
|
303 |
+
);
|
304 |
+
}
|
305 |
+
if (isset($node->attr) && is_array($node->attr)) {
|
306 |
+
foreach($node->attr as $a => $v) {
|
307 |
+
if ($v === true) continue;
|
308 |
+
$node->attr[$a] = html_entity_decode(
|
309 |
+
$v,
|
310 |
+
ENT_QUOTES | ENT_HTML5,
|
311 |
+
$this->_target_charset
|
312 |
+
);
|
313 |
+
}
|
314 |
+
}
|
315 |
+
}
|
316 |
+
}
|
317 |
+
|
318 |
+
protected function parse($trim = false)
|
319 |
+
{
|
320 |
+
while (true) {
|
321 |
+
|
322 |
+
if ($this->char !== '<') {
|
323 |
+
$content = $this->copy_until_char('<');
|
324 |
+
|
325 |
+
if ($content !== '') {
|
326 |
+
|
327 |
+
// Skip whitespace between tags? (</a> <b>)
|
328 |
+
if ($trim && trim($content) === '') {
|
329 |
+
continue;
|
330 |
+
}
|
331 |
+
|
332 |
+
$node = new HtmlNode($this);
|
333 |
+
++$this->cursor;
|
334 |
+
$node->_[HtmlNode::HDOM_INFO_TEXT] = $content;
|
335 |
+
$this->link_nodes($node, false);
|
336 |
+
|
337 |
+
}
|
338 |
+
}
|
339 |
+
|
340 |
+
if($this->read_tag($trim) === false) {
|
341 |
+
break;
|
342 |
+
}
|
343 |
+
}
|
344 |
+
}
|
345 |
+
|
346 |
+
protected function parse_charset()
|
347 |
+
{
|
348 |
+
$charset = null;
|
349 |
+
|
350 |
+
if (function_exists('get_last_retrieve_url_contents_content_type')) {
|
351 |
+
$contentTypeHeader = get_last_retrieve_url_contents_content_type();
|
352 |
+
$success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches);
|
353 |
+
if ($success) {
|
354 |
+
$charset = $matches[1];
|
355 |
+
}
|
356 |
+
|
357 |
+
// phpcs:ignore Generic.Files.LineLength
|
358 |
+
Debug::log('Determining charset using get_last_retrieve_url_contents_content_type() ' . ($success ? 'successful' : 'failed'));
|
359 |
+
}
|
360 |
+
|
361 |
+
if (empty($charset)) {
|
362 |
+
// https://www.w3.org/TR/html/document-metadata.html#statedef-http-equiv-content-type
|
363 |
+
$el = $this->root->find('meta[http-equiv=Content-Type]', 0, true);
|
364 |
+
|
365 |
+
if (!empty($el)) {
|
366 |
+
$fullvalue = $el->content;
|
367 |
+
|
368 |
+
if (!empty($fullvalue)) {
|
369 |
+
$success = preg_match(
|
370 |
+
'/charset=(.+)/i',
|
371 |
+
$fullvalue,
|
372 |
+
$matches
|
373 |
+
);
|
374 |
+
|
375 |
+
if ($success) {
|
376 |
+
$charset = $matches[1];
|
377 |
+
}
|
378 |
+
}
|
379 |
+
}
|
380 |
+
}
|
381 |
+
|
382 |
+
if (empty($charset)) {
|
383 |
+
// https://www.w3.org/TR/html/document-metadata.html#character-encoding-declaration
|
384 |
+
if ($meta = $this->root->find('meta[charset]', 0)) {
|
385 |
+
$charset = $meta->charset;
|
386 |
+
}
|
387 |
+
}
|
388 |
+
|
389 |
+
if (empty($charset)) {
|
390 |
+
// Try to guess the charset based on the content
|
391 |
+
// Requires Multibyte String (mbstring) support (optional)
|
392 |
+
if (function_exists('mb_detect_encoding')) {
|
393 |
+
/**
|
394 |
+
* mb_detect_encoding() is not intended to distinguish between
|
395 |
+
* charsets, especially single-byte charsets. Its primary
|
396 |
+
* purpose is to detect which multibyte encoding is in use,
|
397 |
+
* i.e. UTF-8, UTF-16, shift-JIS, etc.
|
398 |
+
*
|
399 |
+
* -- https://bugs.php.net/bug.php?id=38138
|
400 |
+
*
|
401 |
+
* Adding both CP1251/ISO-8859-5 and CP1252/ISO-8859-1 will
|
402 |
+
* always result in CP1251/ISO-8859-5 and vice versa.
|
403 |
+
*
|
404 |
+
* Thus, only detect if it's either UTF-8 or CP1252/ISO-8859-1
|
405 |
+
* to stay compatible.
|
406 |
+
*/
|
407 |
+
$encoding = mb_detect_encoding(
|
408 |
+
$this->doc,
|
409 |
+
array( 'UTF-8', 'CP1252', 'ISO-8859-1' )
|
410 |
+
);
|
411 |
+
|
412 |
+
if ($encoding === 'CP1252' || $encoding === 'ISO-8859-1') {
|
413 |
+
// Due to a limitation of mb_detect_encoding
|
414 |
+
// 'CP1251'/'ISO-8859-5' will be detected as
|
415 |
+
// 'CP1252'/'ISO-8859-1'. This will cause iconv to fail, in
|
416 |
+
// which case we can simply assume it is the other charset.
|
417 |
+
if (!@iconv('CP1252', 'UTF-8', $this->doc)) {
|
418 |
+
$encoding = 'CP1251';
|
419 |
+
}
|
420 |
+
}
|
421 |
+
|
422 |
+
if ($encoding !== false) {
|
423 |
+
$charset = $encoding;
|
424 |
+
}
|
425 |
+
}
|
426 |
+
}
|
427 |
+
|
428 |
+
if (empty($charset)) {
|
429 |
+
Debug::log('Unable to determine charset from source document. Assuming UTF-8');
|
430 |
+
$charset = 'UTF-8';
|
431 |
+
}
|
432 |
+
|
433 |
+
// Since CP1252 is a superset, if we get one of it's subsets, we want
|
434 |
+
// it instead.
|
435 |
+
if ((strtolower($charset) == 'iso-8859-1')
|
436 |
+
|| (strtolower($charset) == 'latin1')
|
437 |
+
|| (strtolower($charset) == 'latin-1')) {
|
438 |
+
$charset = 'CP1252';
|
439 |
+
}
|
440 |
+
|
441 |
+
return $this->_charset = $charset;
|
442 |
+
}
|
443 |
+
|
444 |
+
protected function read_tag($trim)
|
445 |
+
{
|
446 |
+
if ($this->char !== '<') { // End Of File
|
447 |
+
$this->root->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
|
448 |
+
|
449 |
+
// We might be in a nest of unclosed elements for which the end tags
|
450 |
+
// can be omitted. Close them for faster seek operations.
|
451 |
+
do {
|
452 |
+
if (isset($this->optional_closing_tags[strtolower($this->parent->tag)])) {
|
453 |
+
$this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
|
454 |
+
}
|
455 |
+
} while ($this->parent = $this->parent->parent);
|
456 |
+
|
457 |
+
return false;
|
458 |
+
}
|
459 |
+
|
460 |
+
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
461 |
+
|
462 |
+
if ($trim) { // "< /html>"
|
463 |
+
$this->skip($this->token_blank);
|
464 |
+
}
|
465 |
+
|
466 |
+
// End tag: https://dev.w3.org/html5/pf-summary/syntax.html#end-tags
|
467 |
+
if ($this->char === '/') {
|
468 |
+
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
469 |
+
|
470 |
+
$tag = $this->copy_until_char('>');
|
471 |
+
$tag = $trim ? ltrim($tag, $this->token_blank) : $tag;
|
472 |
+
|
473 |
+
// Skip attributes and whitespace in end tags
|
474 |
+
if ($trim && ($pos = strpos($tag, ' ')) !== false) {
|
475 |
+
// phpcs:ignore Generic.Files.LineLength
|
476 |
+
Debug::log_once('Source document contains superfluous whitespace in end tags (</html >).');
|
477 |
+
$tag = substr($tag, 0, $pos);
|
478 |
+
}
|
479 |
+
|
480 |
+
if (strcasecmp($this->parent->tag, $tag)) { // Parent is not start tag
|
481 |
+
$parent_lower = strtolower($this->parent->tag);
|
482 |
+
$tag_lower = strtolower($tag);
|
483 |
+
if (isset($this->optional_closing_tags[$parent_lower]) && isset($this->block_tags[$tag_lower])) {
|
484 |
+
$org_parent = $this->parent;
|
485 |
+
|
486 |
+
// Look for the start tag
|
487 |
+
while (($this->parent->parent) && strtolower($this->parent->tag) !== $tag_lower){
|
488 |
+
// Close any unclosed element with optional end tags
|
489 |
+
if (isset($this->optional_closing_tags[strtolower($this->parent->tag)]))
|
490 |
+
$this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
|
491 |
+
$this->parent = $this->parent->parent;
|
492 |
+
}
|
493 |
+
|
494 |
+
// No start tag, close grandparent
|
495 |
+
if (strtolower($this->parent->tag) !== $tag_lower) {
|
496 |
+
$this->parent = $org_parent;
|
497 |
+
|
498 |
+
if ($this->parent->parent) {
|
499 |
+
$this->parent = $this->parent->parent;
|
500 |
+
}
|
501 |
+
|
502 |
+
$this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
|
503 |
+
return $this->as_text_node($tag);
|
504 |
+
}
|
505 |
+
} elseif (($this->parent->parent) && isset($this->block_tags[$tag_lower])) {
|
506 |
+
// grandparent exists + current is block tag
|
507 |
+
// Parent has no end tag
|
508 |
+
$this->parent->_[HtmlNode::HDOM_INFO_END] = 0;
|
509 |
+
$org_parent = $this->parent;
|
510 |
+
|
511 |
+
// Find start tag
|
512 |
+
while (($this->parent->parent) && strtolower($this->parent->tag) !== $tag_lower) {
|
513 |
+
$this->parent = $this->parent->parent;
|
514 |
+
}
|
515 |
+
|
516 |
+
// No start tag, close parent
|
517 |
+
if (strtolower($this->parent->tag) !== $tag_lower) {
|
518 |
+
$this->parent = $org_parent; // restore origonal parent
|
519 |
+
$this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
|
520 |
+
return $this->as_text_node($tag);
|
521 |
+
}
|
522 |
+
} elseif (($this->parent->parent) && strtolower($this->parent->parent->tag) === $tag_lower) {
|
523 |
+
// Grandparent exists and current tag closes it
|
524 |
+
$this->parent->_[HtmlNode::HDOM_INFO_END] = 0;
|
525 |
+
$this->parent = $this->parent->parent;
|
526 |
+
} else { // Random tag, add as text node
|
527 |
+
return $this->as_text_node($tag);
|
528 |
+
}
|
529 |
+
}
|
530 |
+
|
531 |
+
// Link with start tag
|
532 |
+
$this->parent->_[HtmlNode::HDOM_INFO_END] = $this->cursor;
|
533 |
+
|
534 |
+
if ($this->parent->parent) {
|
535 |
+
$this->parent = $this->parent->parent;
|
536 |
+
}
|
537 |
+
|
538 |
+
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
539 |
+
return true;
|
540 |
+
}
|
541 |
+
|
542 |
+
// Start tag: https://dev.w3.org/html5/pf-summary/syntax.html#start-tags
|
543 |
+
$node = new HtmlNode($this);
|
544 |
+
$node->_[HtmlNode::HDOM_INFO_BEGIN] = $this->cursor++;
|
545 |
+
|
546 |
+
// Tag name
|
547 |
+
$tag = $this->copy_until($this->token_slash);
|
548 |
+
|
549 |
+
if (isset($tag[0]) && $tag[0] === '!') { // Doctype, CData, Comment
|
550 |
+
if (isset($tag[2]) && $tag[1] === '-' && $tag[2] === '-') { // Comment ("<!--")
|
551 |
+
|
552 |
+
/**
|
553 |
+
* Comments must have the following format:
|
554 |
+
*
|
555 |
+
* 1. The string "<!--"
|
556 |
+
*
|
557 |
+
* 2. Optionally, text, with the additional restriction that the
|
558 |
+
* text must not start with the string ">", nor start with the
|
559 |
+
* string "->", nor contain the strings "<!--", "-->", or "--!>",
|
560 |
+
* nor end with the string "<!-".
|
561 |
+
*
|
562 |
+
* 3. The string "-->"
|
563 |
+
*
|
564 |
+
* -- https://www.w3.org/TR/html53/syntax.html#comments
|
565 |
+
*/
|
566 |
+
|
567 |
+
// Go back until $tag only contains start of comment "!--".
|
568 |
+
while (strlen($tag) > 3) {
|
569 |
+
$this->char = $this->doc[--$this->pos]; // previous
|
570 |
+
$tag = substr($tag, 0, strlen($tag) - 1);
|
571 |
+
}
|
572 |
+
|
573 |
+
$node->nodetype = HtmlNode::HDOM_TYPE_COMMENT;
|
574 |
+
$node->tag = 'comment';
|
575 |
+
|
576 |
+
$data = '';
|
577 |
+
|
578 |
+
while(true) {
|
579 |
+
// Copy until first char of end tag
|
580 |
+
$data .= $this->copy_until_char('-');
|
581 |
+
|
582 |
+
// Look ahead in the document, maybe we are at the end
|
583 |
+
if (($this->pos + 3) > $this->size) { // End of document
|
584 |
+
Debug::log('Source document ended unexpectedly!');
|
585 |
+
break;
|
586 |
+
} elseif (substr($this->doc, $this->pos, 3) === '-->') { // end
|
587 |
+
$data .= $this->copy_until_char('>');
|
588 |
+
break;
|
589 |
+
}
|
590 |
+
|
591 |
+
$data .= $this->char;
|
592 |
+
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
593 |
+
}
|
594 |
+
|
595 |
+
if (substr($data, 0, 1) === '>') { // "<!-->"
|
596 |
+
Debug::log('Comment must not start with the string ">"!');
|
597 |
+
$this->pos -= strlen($data);
|
598 |
+
$this->char = $this->doc[$this->pos];
|
599 |
+
$data = '';
|
600 |
+
}
|
601 |
+
|
602 |
+
if (substr($data, 0, 2) === '->') { // "<!--->"
|
603 |
+
Debug::log('Comment must not start with the string "->"!');
|
604 |
+
$this->pos -= strlen($data);
|
605 |
+
$this->char = $this->doc[$this->pos];
|
606 |
+
$data = '';
|
607 |
+
}
|
608 |
+
|
609 |
+
if (strpos($data, '<!--') !== false) { // "<!--<!---->"
|
610 |
+
Debug::log('Comment must not contain the string "<!--"!');
|
611 |
+
// simplehtmldom can work with it anyway
|
612 |
+
}
|
613 |
+
|
614 |
+
if (strpos($data, '--!>') !== false) { // "<!----!>-->"
|
615 |
+
Debug::log('Comment must not contain the string "--!>"!');
|
616 |
+
// simplehtmldom can work with it anyway
|
617 |
+
}
|
618 |
+
|
619 |
+
if (substr($data, -3, 3) === '<!-') { // "<!--<!--->"
|
620 |
+
Debug::log('Comment must not end with "<!-"!');
|
621 |
+
// simplehtmldom can work with it anyway
|
622 |
+
}
|
623 |
+
|
624 |
+
$tag .= $data;
|
625 |
+
$tag = $this->restore_noise($tag);
|
626 |
+
|
627 |
+
// Comment starts after "!--" and ends before "--" (5 chars total)
|
628 |
+
$node->_[HtmlNode::HDOM_INFO_INNER] = substr($tag, 3, strlen($tag) - 5);
|
629 |
+
} elseif (substr($tag, 1, 7) === '[CDATA[') {
|
630 |
+
|
631 |
+
// Go back until $tag only contains start of cdata "![CDATA[".
|
632 |
+
while (strlen($tag) > 8) {
|
633 |
+
$this->char = $this->doc[--$this->pos]; // previous
|
634 |
+
$tag = substr($tag, 0, strlen($tag) - 1);
|
635 |
+
}
|
636 |
+
|
637 |
+
// CDATA can contain HTML stuff, need to find closing tags first
|
638 |
+
$node->nodetype = HtmlNode::HDOM_TYPE_CDATA;
|
639 |
+
$node->tag = 'cdata';
|
640 |
+
|
641 |
+
$data = '';
|
642 |
+
|
643 |
+
// There is a rare chance of empty CDATA: "<[CDATA[]]>"
|
644 |
+
// In which case the current char is the first "[" of the end tag
|
645 |
+
// But the CDATA could also just be a bracket: "<[CDATA[]]]>"
|
646 |
+
while(true) {
|
647 |
+
// Copy until first char of end tag
|
648 |
+
$data .= $this->copy_until_char(']');
|
649 |
+
|
650 |
+
// Look ahead in the document, maybe we are at the end
|
651 |
+
if (($this->pos + 3) > $this->size) { // End of document
|
652 |
+
Debug::log('Source document ended unexpectedly!');
|
653 |
+
break;
|
654 |
+
} elseif (substr($this->doc, $this->pos, 3) === ']]>') { // end
|
655 |
+
$data .= $this->copy_until_char('>');
|
656 |
+
break;
|
657 |
+
}
|
658 |
+
|
659 |
+
$data .= $this->char;
|
660 |
+
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
661 |
+
}
|
662 |
+
|
663 |
+
$tag .= $data;
|
664 |
+
$tag = $this->restore_noise($tag);
|
665 |
+
|
666 |
+
// CDATA starts after "![CDATA[" and ends before "]]" (10 chars total)
|
667 |
+
$node->_[HtmlNode::HDOM_INFO_INNER] = substr($tag, 8, strlen($tag) - 10);
|
668 |
+
} else { // Unknown
|
669 |
+
Debug::log('Source document contains unknown declaration: <' . $tag);
|
670 |
+
$node->nodetype = HtmlNode::HDOM_TYPE_UNKNOWN;
|
671 |
+
$node->tag = 'unknown';
|
672 |
+
}
|
673 |
+
|
674 |
+
$node->_[HtmlNode::HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until_char('>');
|
675 |
+
|
676 |
+
if ($this->char === '>') {
|
677 |
+
$node->_[HtmlNode::HDOM_INFO_TEXT] .= '>';
|
678 |
+
}
|
679 |
+
|
680 |
+
$this->link_nodes($node, true);
|
681 |
+
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
682 |
+
return true;
|
683 |
+
}
|
684 |
+
|
685 |
+
if (!preg_match('/^\w[\w:-]*$/', $tag)) { // Invalid tag name
|
686 |
+
$node->_[HtmlNode::HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>');
|
687 |
+
|
688 |
+
if ($this->char === '>') { // End tag
|
689 |
+
$node->_[HtmlNode::HDOM_INFO_TEXT] .= '>';
|
690 |
+
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
691 |
+
}
|
692 |
+
|
693 |
+
$this->link_nodes($node, false);
|
694 |
+
Debug::log('Source document contains invalid tag name: ' . $node->_[HtmlNode::HDOM_INFO_TEXT]);
|
695 |
+
return true;
|
696 |
+
}
|
697 |
+
|
698 |
+
// Valid tag name
|
699 |
+
$node->nodetype = HtmlNode::HDOM_TYPE_ELEMENT;
|
700 |
+
$tag_lower = strtolower($tag);
|
701 |
+
$node->tag = ($this->lowercase) ? $tag_lower : $tag;
|
702 |
+
|
703 |
+
if (isset($this->optional_closing_tags[$tag_lower])) { // Optional closing tag
|
704 |
+
while (isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)])) {
|
705 |
+
// Previous element was the last element of ancestor
|
706 |
+
$this->parent->_[HtmlNode::HDOM_INFO_END] = $node->_[HtmlNode::HDOM_INFO_BEGIN] - 1;
|
707 |
+
$this->parent = $this->parent->parent;
|
708 |
+
}
|
709 |
+
$node->parent = $this->parent;
|
710 |
+
}
|
711 |
+
|
712 |
+
$guard = 0; // prevent infinity loop
|
713 |
+
|
714 |
+
// [0] Space between tag and first attribute
|
715 |
+
$space = array($this->copy_skip($this->token_blank), '', '');
|
716 |
+
|
717 |
+
do { // Parse attributes
|
718 |
+
$name = $this->copy_until($this->token_equal);
|
719 |
+
|
720 |
+
if ($name === '' && $this->char !== null && $space[0] === '') {
|
721 |
+
break;
|
722 |
+
}
|
723 |
+
|
724 |
+
if ($guard === $this->pos) { // Escape infinite loop
|
725 |
+
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
726 |
+
continue;
|
727 |
+
}
|
728 |
+
|
729 |
+
$guard = $this->pos;
|
730 |
+
|
731 |
+
if ($this->pos >= $this->size - 1 && $this->char !== '>') { // End Of File
|
732 |
+
Debug::log('Source document ended unexpectedly!');
|
733 |
+
$node->nodetype = HtmlNode::HDOM_TYPE_TEXT;
|
734 |
+
$node->_[HtmlNode::HDOM_INFO_END] = 0;
|
735 |
+
$node->_[HtmlNode::HDOM_INFO_TEXT] = '<' . $tag . $space[0] . $name;
|
736 |
+
$node->tag = 'text';
|
737 |
+
$this->link_nodes($node, false);
|
738 |
+
return true;
|
739 |
+
}
|
740 |
+
|
741 |
+
if ($name === '/' || $name === '') { // No more attributes
|
742 |
+
break;
|
743 |
+
}
|
744 |
+
|
745 |
+
// [1] Whitespace after attribute name
|
746 |
+
$space[1] = (strpos($this->token_blank, $this->char) === false) ? '' : $this->copy_skip($this->token_blank);
|
747 |
+
|
748 |
+
$name = $this->restore_noise($name); // might be a noisy name
|
749 |
+
|
750 |
+
if ($this->lowercase) {
|
751 |
+
$name = strtolower($name);
|
752 |
+
}
|
753 |
+
|
754 |
+
if ($this->char === '=') { // Attribute with value
|
755 |
+
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
756 |
+
$this->parse_attr($node, $name, $space, $trim); // get attribute value
|
757 |
+
} else { // Attribute without value
|
758 |
+
$node->_[HtmlNode::HDOM_INFO_QUOTE][$name] = HtmlNode::HDOM_QUOTE_NO;
|
759 |
+
$node->attr[$name] = true;
|
760 |
+
if ($this->char !== '>') {
|
761 |
+
$this->char = $this->doc[--$this->pos];
|
762 |
+
} // prev
|
763 |
+
}
|
764 |
+
|
765 |
+
// Space before attribute and around equal sign
|
766 |
+
if (!$trim && $space !== array(' ', '', '')) {
|
767 |
+
// phpcs:ignore Generic.Files.LineLength
|
768 |
+
Debug::log_once('Source document contains superfluous whitespace in attributes (<e attribute = "value">). Enable trimming or fix attribute spacing for best performance.');
|
769 |
+
$node->_[HtmlNode::HDOM_INFO_SPACE][$name] = $space;
|
770 |
+
}
|
771 |
+
|
772 |
+
// prepare for next attribute
|
773 |
+
$space = array(
|
774 |
+
((strpos($this->token_blank, $this->char) === false) ? '' : $this->copy_skip($this->token_blank)),
|
775 |
+
'',
|
776 |
+
''
|
777 |
+
);
|
778 |
+
} while ($this->char !== '>' && $this->char !== '/');
|
779 |
+
|
780 |
+
$this->link_nodes($node, true);
|
781 |
+
|
782 |
+
// Space after last attribute before closing the tag
|
783 |
+
if (!$trim && $space[0] !== '') {
|
784 |
+
// phpcs:ignore Generic.Files.LineLength
|
785 |
+
Debug::log_once('Source document contains superfluous whitespace before the closing braket (<e attribute="value" >). Enable trimming or remove spaces before closing brackets for best performance.');
|
786 |
+
$node->_[HtmlNode::HDOM_INFO_ENDSPACE] = $space[0];
|
787 |
+
}
|
788 |
+
|
789 |
+
$rest = ($this->char === '>') ? '' : $this->copy_until_char('>');
|
790 |
+
$rest = ($trim) ? trim($rest) : $rest; // <html / >
|
791 |
+
|
792 |
+
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
793 |
+
|
794 |
+
if (trim($rest) === '/') { // Void element
|
795 |
+
if ($rest !== '') {
|
796 |
+
if (isset($node->_[HtmlNode::HDOM_INFO_ENDSPACE])) {
|
797 |
+
$node->_[HtmlNode::HDOM_INFO_ENDSPACE] .= $rest;
|
798 |
+
} else {
|
799 |
+
$node->_[HtmlNode::HDOM_INFO_ENDSPACE] = $rest;
|
800 |
+
}
|
801 |
+
}
|
802 |
+
$node->_[HtmlNode::HDOM_INFO_END] = 0;
|
803 |
+
} elseif (!isset($this->self_closing_tags[strtolower($node->tag)])) {
|
804 |
+
$innertext = $this->copy_until_char('<');
|
805 |
+
if ($innertext !== '') {
|
806 |
+
$node->_[HtmlNode::HDOM_INFO_INNER] = $innertext;
|
807 |
+
}
|
808 |
+
$this->parent = $node;
|
809 |
+
}
|
810 |
+
|
811 |
+
if ($node->tag === 'br') {
|
812 |
+
$node->_[HtmlNode::HDOM_INFO_INNER] = $this->default_br_text;
|
813 |
+
} elseif ($node->tag === 'script') {
|
814 |
+
$data = '';
|
815 |
+
|
816 |
+
// There is a rare chance of empty script: "<script></script>"
|
817 |
+
// In which case the current char is the start of the end tag
|
818 |
+
// But the script could also just contain tags: "<script><div></script>"
|
819 |
+
while(true) {
|
820 |
+
// Copy until first char of end tag
|
821 |
+
$data .= $this->copy_until_char('<');
|
822 |
+
|
823 |
+
// Look ahead in the document, maybe we are at the end
|
824 |
+
if (($this->pos + 9) > $this->size) { // End of document
|
825 |
+
Debug::log('Source document ended unexpectedly!');
|
826 |
+
break;
|
827 |
+
} elseif (substr($this->doc, $this->pos, 8) === '</script') { // end
|
828 |
+
$this->skip('>'); // don't include the end tag
|
829 |
+
break;
|
830 |
+
}
|
831 |
+
|
832 |
+
// Note: A script tag may contain any other tag except </script>
|
833 |
+
// which needs to be escaped as <\/script>
|
834 |
+
|
835 |
+
$data .= $this->char;
|
836 |
+
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
837 |
+
}
|
838 |
+
|
839 |
+
$node = new HtmlNode($this);
|
840 |
+
++$this->cursor;
|
841 |
+
$node->_[HtmlNode::HDOM_INFO_TEXT] = $data;
|
842 |
+
$this->link_nodes($node, false);
|
843 |
+
}
|
844 |
+
|
845 |
+
return true;
|
846 |
+
}
|
847 |
+
|
848 |
+
protected function parse_attr($node, $name, &$space, $trim)
|
849 |
+
{
|
850 |
+
$is_duplicate = isset($node->attr[$name]);
|
851 |
+
|
852 |
+
if (!$is_duplicate) // Copy whitespace between "=" and value
|
853 |
+
$space[2] = (strpos($this->token_blank, $this->char) === false) ? '' : $this->copy_skip($this->token_blank);
|
854 |
+
|
855 |
+
switch ($this->char) {
|
856 |
+
case '"':
|
857 |
+
$quote_type = HtmlNode::HDOM_QUOTE_DOUBLE;
|
858 |
+
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
859 |
+
$value = $this->copy_until_char('"');
|
860 |
+
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
861 |
+
break;
|
862 |
+
case '\'':
|
863 |
+
// phpcs:ignore Generic.Files.LineLength
|
864 |
+
Debug::log_once('Source document contains attribute values with single quotes (<e attribute=\'value\'>). Use double quotes for best performance.');
|
865 |
+
$quote_type = HtmlNode::HDOM_QUOTE_SINGLE;
|
866 |
+
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
867 |
+
$value = $this->copy_until_char('\'');
|
868 |
+
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
869 |
+
break;
|
870 |
+
default:
|
871 |
+
// phpcs:ignore Generic.Files.LineLength
|
872 |
+
Debug::log_once('Source document contains attribute values without quotes (<e attribute=value>). Use double quotes for best performance');
|
873 |
+
$quote_type = HtmlNode::HDOM_QUOTE_NO;
|
874 |
+
$value = $this->copy_until($this->token_attr);
|
875 |
+
}
|
876 |
+
|
877 |
+
$value = $this->restore_noise($value);
|
878 |
+
|
879 |
+
if ($trim) {
|
880 |
+
// Attribute values must not contain control characters other than space
|
881 |
+
// https://www.w3.org/TR/html/dom.html#text-content
|
882 |
+
// https://www.w3.org/TR/html/syntax.html#attribute-values
|
883 |
+
// https://www.w3.org/TR/xml/#AVNormalize
|
884 |
+
$value = preg_replace("/[\r\n\t\s]+/u", ' ', $value);
|
885 |
+
$value = trim($value);
|
886 |
+
}
|
887 |
+
|
888 |
+
if (!$is_duplicate) {
|
889 |
+
if ($quote_type !== HtmlNode::HDOM_QUOTE_DOUBLE) {
|
890 |
+
$node->_[HtmlNode::HDOM_INFO_QUOTE][$name] = $quote_type;
|
891 |
+
}
|
892 |
+
$node->attr[$name] = $value;
|
893 |
+
}
|
894 |
+
}
|
895 |
+
|
896 |
+
protected function link_nodes(&$node, $is_child)
|
897 |
+
{
|
898 |
+
$node->parent = $this->parent;
|
899 |
+
$this->parent->nodes[] = $node;
|
900 |
+
if ($is_child) {
|
901 |
+
$this->parent->children[] = $node;
|
902 |
+
}
|
903 |
+
}
|
904 |
+
|
905 |
+
protected function as_text_node($tag)
|
906 |
+
{
|
907 |
+
$node = new HtmlNode($this);
|
908 |
+
++$this->cursor;
|
909 |
+
$node->_[HtmlNode::HDOM_INFO_TEXT] = '</' . $tag . '>';
|
910 |
+
$this->link_nodes($node, false);
|
911 |
+
$this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
912 |
+
return true;
|
913 |
+
}
|
914 |
+
|
915 |
+
protected function skip($chars)
|
916 |
+
{
|
917 |
+
$this->pos += strspn($this->doc, $chars, $this->pos);
|
918 |
+
$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
919 |
+
}
|
920 |
+
|
921 |
+
protected function copy_skip($chars)
|
922 |
+
{
|
923 |
+
$pos = $this->pos;
|
924 |
+
$len = strspn($this->doc, $chars, $pos);
|
925 |
+
if ($len === 0) { return ''; }
|
926 |
+
$this->pos += $len;
|
927 |
+
$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
928 |
+
return substr($this->doc, $pos, $len);
|
929 |
+
}
|
930 |
+
|
931 |
+
protected function copy_until($chars)
|
932 |
+
{
|
933 |
+
$pos = $this->pos;
|
934 |
+
$len = strcspn($this->doc, $chars, $pos);
|
935 |
+
$this->pos += $len;
|
936 |
+
$this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
|
937 |
+
return substr($this->doc, $pos, $len);
|
938 |
+
}
|
939 |
+
|
940 |
+
protected function copy_until_char($char)
|
941 |
+
{
|
942 |
+
if ($this->char === null) { return ''; }
|
943 |
+
|
944 |
+
if (($pos = strpos($this->doc, $char, $this->pos)) === false) {
|
945 |
+
$ret = substr($this->doc, $this->pos, $this->size - $this->pos);
|
946 |
+
$this->char = null;
|
947 |
+
$this->pos = $this->size;
|
948 |
+
return $ret;
|
949 |
+
}
|
950 |
+
|
951 |
+
if ($pos === $this->pos) { return ''; }
|
952 |
+
|
953 |
+
$pos_old = $this->pos;
|
954 |
+
$this->char = $this->doc[$pos];
|
955 |
+
$this->pos = $pos;
|
956 |
+
return substr($this->doc, $pos_old, $pos - $pos_old);
|
957 |
+
}
|
958 |
+
|
959 |
+
protected function remove_noise($pattern, $remove_tag = false)
|
960 |
+
{
|
961 |
+
$count = preg_match_all(
|
962 |
+
$pattern,
|
963 |
+
$this->doc,
|
964 |
+
$matches,
|
965 |
+
PREG_SET_ORDER | PREG_OFFSET_CAPTURE
|
966 |
+
);
|
967 |
+
|
968 |
+
for ($i = $count - 1; $i > -1; --$i) {
|
969 |
+
$key = '___noise___' . sprintf('% 5d', count($this->noise) + 1000);
|
970 |
+
|
971 |
+
$idx = ($remove_tag) ? 0 : 1; // 0 = entire match, 1 = submatch
|
972 |
+
$this->noise[$key] = $matches[$i][$idx][0];
|
973 |
+
$this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
|
974 |
+
}
|
975 |
+
|
976 |
+
// reset the length of content
|
977 |
+
$this->size = strlen($this->doc);
|
978 |
+
|
979 |
+
if ($this->size > 0) {
|
980 |
+
$this->char = $this->doc[0];
|
981 |
+
}
|
982 |
+
}
|
983 |
+
|
984 |
+
function restore_noise($text)
|
985 |
+
{
|
986 |
+
if (empty($this->noise)) return $text; // nothing to restore
|
987 |
+
$pos = 0;
|
988 |
+
while (($pos = strpos($text, '___noise___', $pos)) !== false) {
|
989 |
+
// Sometimes there is a broken piece of markup, and we don't GET the
|
990 |
+
// pos+11 etc... token which indicates a problem outside of us...
|
991 |
+
|
992 |
+
// todo: "___noise___1000" (or any number with four or more digits)
|
993 |
+
// in the DOM causes an infinite loop which could be utilized by
|
994 |
+
// malicious software
|
995 |
+
if (strlen($text) > $pos + 15) {
|
996 |
+
$key = '___noise___'
|
997 |
+
. $text[$pos + 11]
|
998 |
+
. $text[$pos + 12]
|
999 |
+
. $text[$pos + 13]
|
1000 |
+
. $text[$pos + 14]
|
1001 |
+
. $text[$pos + 15];
|
1002 |
+
|
1003 |
+
if (isset($this->noise[$key])) {
|
1004 |
+
$text = substr($text, 0, $pos)
|
1005 |
+
. $this->noise[$key]
|
1006 |
+
. substr($text, $pos + 16);
|
1007 |
+
|
1008 |
+
unset($this->noise[$key]);
|
1009 |
+
} else {
|
1010 |
+
Debug::log_once('Noise restoration failed. DOM has been corrupted!');
|
1011 |
+
// do this to prevent an infinite loop.
|
1012 |
+
// FIXME: THis causes an infinite loop because the keyword ___NOISE___ is included in the key!
|
1013 |
+
$text = substr($text, 0, $pos)
|
1014 |
+
. 'UNDEFINED NOISE FOR KEY: '
|
1015 |
+
. $key
|
1016 |
+
. substr($text, $pos + 16);
|
1017 |
+
}
|
1018 |
+
} else {
|
1019 |
+
// There is no valid key being given back to us... We must get
|
1020 |
+
// rid of the ___noise___ or we will have a problem.
|
1021 |
+
Debug::log_once('Noise restoration failed. The provided key is incomplete: ' . $text);
|
1022 |
+
$text = substr($text, 0, $pos)
|
1023 |
+
. 'NO NUMERIC NOISE KEY'
|
1024 |
+
. substr($text, $pos + 11);
|
1025 |
+
}
|
1026 |
+
}
|
1027 |
+
return $text;
|
1028 |
+
}
|
1029 |
+
|
1030 |
+
function search_noise($text)
|
1031 |
+
{
|
1032 |
+
foreach($this->noise as $noiseElement) {
|
1033 |
+
if (strpos($noiseElement, $text) !== false) {
|
1034 |
+
return $noiseElement;
|
1035 |
+
}
|
1036 |
+
}
|
1037 |
+
}
|
1038 |
+
|
1039 |
+
function __toString()
|
1040 |
+
{
|
1041 |
+
return $this->root->innertext();
|
1042 |
+
}
|
1043 |
+
|
1044 |
+
function __get($name)
|
1045 |
+
{
|
1046 |
+
switch ($name) {
|
1047 |
+
case 'outertext':
|
1048 |
+
return $this->root->innertext();
|
1049 |
+
case 'innertext':
|
1050 |
+
return $this->root->innertext();
|
1051 |
+
case 'plaintext':
|
1052 |
+
return $this->root->text();
|
1053 |
+
case 'charset':
|
1054 |
+
return $this->_charset;
|
1055 |
+
case 'target_charset':
|
1056 |
+
return $this->_target_charset;
|
1057 |
+
}
|
1058 |
+
}
|
1059 |
+
|
1060 |
+
function childNodes($idx = -1)
|
1061 |
+
{
|
1062 |
+
return $this->root->childNodes($idx);
|
1063 |
+
}
|
1064 |
+
|
1065 |
+
function firstChild()
|
1066 |
+
{
|
1067 |
+
return $this->root->firstChild();
|
1068 |
+
}
|
1069 |
+
|
1070 |
+
function lastChild()
|
1071 |
+
{
|
1072 |
+
return $this->root->lastChild();
|
1073 |
+
}
|
1074 |
+
|
1075 |
+
function createElement($name, $value = null)
|
1076 |
+
{
|
1077 |
+
$node = new HtmlNode(null);
|
1078 |
+
$node->nodetype = HtmlNode::HDOM_TYPE_ELEMENT;
|
1079 |
+
$node->_[HtmlNode::HDOM_INFO_BEGIN] = 1;
|
1080 |
+
$node->_[HtmlNode::HDOM_INFO_END] = 1;
|
1081 |
+
|
1082 |
+
if ($value !== null) {
|
1083 |
+
$node->_[HtmlNode::HDOM_INFO_INNER] = $value;
|
1084 |
+
}
|
1085 |
+
|
1086 |
+
$node->tag = $name;
|
1087 |
+
|
1088 |
+
return $node;
|
1089 |
+
}
|
1090 |
+
|
1091 |
+
function createTextNode($value)
|
1092 |
+
{
|
1093 |
+
$node = new HtmlNode($this);
|
1094 |
+
$node->nodetype = HtmlNode::HDOM_TYPE_TEXT;
|
1095 |
+
|
1096 |
+
if ($value !== null) {
|
1097 |
+
$node->_[HtmlNode::HDOM_INFO_TEXT] = $value;
|
1098 |
+
}
|
1099 |
+
|
1100 |
+
return $node;
|
1101 |
+
}
|
1102 |
+
|
1103 |
+
function getElementById($id)
|
1104 |
+
{
|
1105 |
+
return $this->find("#$id", 0);
|
1106 |
+
}
|
1107 |
+
|
1108 |
+
function getElementsById($id, $idx = null)
|
1109 |
+
{
|
1110 |
+
return $this->find("#$id", $idx);
|
1111 |
+
}
|
1112 |
+
|
1113 |
+
function getElementByTagName($name)
|
1114 |
+
{
|
1115 |
+
return $this->find($name, 0);
|
1116 |
+
}
|
1117 |
+
|
1118 |
+
function getElementsByTagName($name, $idx = null)
|
1119 |
+
{
|
1120 |
+
return $this->find($name, $idx);
|
1121 |
+
}
|
1122 |
+
|
1123 |
+
function loadFile($file)
|
1124 |
+
{
|
1125 |
+
$args = func_get_args();
|
1126 |
+
|
1127 |
+
if(($doc = call_user_func_array('file_get_contents', $args)) !== false) {
|
1128 |
+
$this->load($doc, true);
|
1129 |
+
} else {
|
1130 |
+
return false;
|
1131 |
+
}
|
1132 |
+
}
|
1133 |
+
}
|
vendor/simplehtmldom/simplehtmldom/HtmlNode.php
CHANGED
@@ -1,1441 +1,1441 @@
|
|
1 |
-
<?php namespace simplehtmldom;
|
2 |
-
|
3 |
-
/**
|
4 |
-
* Website: http://sourceforge.net/projects/simplehtmldom/
|
5 |
-
* Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
|
6 |
-
*
|
7 |
-
* Licensed under The MIT License
|
8 |
-
* See the LICENSE file in the project root for more information.
|
9 |
-
*
|
10 |
-
* Authors:
|
11 |
-
* S.C. Chen
|
12 |
-
* John Schlick
|
13 |
-
* Rus Carroll
|
14 |
-
* logmanoriginal
|
15 |
-
*
|
16 |
-
* Contributors:
|
17 |
-
* Yousuke Kumakura
|
18 |
-
* Vadim Voituk
|
19 |
-
* Antcs
|
20 |
-
*
|
21 |
-
* Version $Rev$
|
22 |
-
*/
|
23 |
-
|
24 |
-
include_once 'constants.php';
|
25 |
-
include_once 'Debug.php';
|
26 |
-
|
27 |
-
class HtmlNode
|
28 |
-
{
|
29 |
-
const HDOM_TYPE_ELEMENT = 1;
|
30 |
-
const HDOM_TYPE_COMMENT = 2;
|
31 |
-
const HDOM_TYPE_TEXT = 3;
|
32 |
-
const HDOM_TYPE_ROOT = 5;
|
33 |
-
const HDOM_TYPE_UNKNOWN = 6;
|
34 |
-
const HDOM_TYPE_CDATA = 7;
|
35 |
-
|
36 |
-
const HDOM_QUOTE_DOUBLE = 0;
|
37 |
-
const HDOM_QUOTE_SINGLE = 1;
|
38 |
-
const HDOM_QUOTE_NO = 3;
|
39 |
-
|
40 |
-
const HDOM_INFO_BEGIN = 0;
|
41 |
-
const HDOM_INFO_END = 1;
|
42 |
-
const HDOM_INFO_QUOTE = 2;
|
43 |
-
const HDOM_INFO_SPACE = 3;
|
44 |
-
const HDOM_INFO_TEXT = 4;
|
45 |
-
const HDOM_INFO_INNER = 5;
|
46 |
-
const HDOM_INFO_OUTER = 6;
|
47 |
-
const HDOM_INFO_ENDSPACE = 7;
|
48 |
-
|
49 |
-
public $nodetype = self::HDOM_TYPE_TEXT;
|
50 |
-
public $tag = 'text';
|
51 |
-
public $attr = array();
|
52 |
-
public $children = array();
|
53 |
-
public $nodes = array();
|
54 |
-
public $parent = null;
|
55 |
-
public $_ = array();
|
56 |
-
private $dom = null;
|
57 |
-
|
58 |
-
function __call($func, $args)
|
59 |
-
{
|
60 |
-
// Allow users to call methods with lower_case syntax
|
61 |
-
switch($func)
|
62 |
-
{
|
63 |
-
case 'children':
|
64 |
-
$actual_function = 'childNodes'; break;
|
65 |
-
case 'first_child':
|
66 |
-
$actual_function = 'firstChild'; break;
|
67 |
-
case 'has_child':
|
68 |
-
$actual_function = 'hasChildNodes'; break;
|
69 |
-
case 'last_child':
|
70 |
-
$actual_function = 'lastChild'; break;
|
71 |
-
case 'next_sibling':
|
72 |
-
$actual_function = 'nextSibling'; break;
|
73 |
-
case 'prev_sibling':
|
74 |
-
$actual_function = 'previousSibling'; break;
|
75 |
-
default:
|
76 |
-
trigger_error(
|
77 |
-
'Call to undefined method ' . __CLASS__ . '::' . $func . '()',
|
78 |
-
E_USER_ERROR
|
79 |
-
);
|
80 |
-
}
|
81 |
-
|
82 |
-
// phpcs:ignore Generic.Files.LineLength
|
83 |
-
Debug::log(__CLASS__ . '->' . $func . '() has been deprecated and will be removed in the next major version of simplehtmldom. Use ' . __CLASS__ . '->' . $actual_function . '() instead.');
|
84 |
-
|
85 |
-
return call_user_func_array(array($this, $actual_function), $args);
|
86 |
-
}
|
87 |
-
|
88 |
-
function __construct($dom)
|
89 |
-
{
|
90 |
-
if ($dom === null) return $this;
|
91 |
-
|
92 |
-
$this->dom = $dom;
|
93 |
-
$dom->nodes[] = $this;
|
94 |
-
}
|
95 |
-
|
96 |
-
function __debugInfo()
|
97 |
-
{
|
98 |
-
// Translate node type to human-readable form
|
99 |
-
switch($this->nodetype)
|
100 |
-
{
|
101 |
-
case self::HDOM_TYPE_ELEMENT:
|
102 |
-
$nodetype = "HDOM_TYPE_ELEMENT ($this->nodetype)";
|
103 |
-
break;
|
104 |
-
case self::HDOM_TYPE_COMMENT:
|
105 |
-
$nodetype = "HDOM_TYPE_COMMENT ($this->nodetype)";
|
106 |
-
break;
|
107 |
-
case self::HDOM_TYPE_TEXT:
|
108 |
-
$nodetype = "HDOM_TYPE_TEXT ($this->nodetype)";
|
109 |
-
break;
|
110 |
-
case self::HDOM_TYPE_ROOT:
|
111 |
-
$nodetype = "HDOM_TYPE_ROOT ($this->nodetype)";
|
112 |
-
break;
|
113 |
-
case self::HDOM_TYPE_CDATA:
|
114 |
-
$nodetype = "HDOM_TYPE_CDATA ($this->nodetype)";
|
115 |
-
break;
|
116 |
-
case self::HDOM_TYPE_UNKNOWN:
|
117 |
-
default:
|
118 |
-
$nodetype = "HDOM_TYPE_UNKNOWN ($this->nodetype)";
|
119 |
-
}
|
120 |
-
|
121 |
-
return array(
|
122 |
-
'nodetype' => $nodetype,
|
123 |
-
'tag' => $this->tag,
|
124 |
-
'attributes' => empty($this->attr) ? 'none' : $this->attr,
|
125 |
-
'nodes' => empty($this->nodes) ? 'none' : $this->nodes
|
126 |
-
);
|
127 |
-
}
|
128 |
-
|
129 |
-
function __toString()
|
130 |
-
{
|
131 |
-
return $this->outertext();
|
132 |
-
}
|
133 |
-
|
134 |
-
function clear()
|
135 |
-
{
|
136 |
-
unset($this->dom); // Break link to origin
|
137 |
-
unset($this->parent); // Break link to branch
|
138 |
-
}
|
139 |
-
|
140 |
-
/** @codeCoverageIgnore */
|
141 |
-
function dump($show_attr = true, $depth = 0)
|
142 |
-
{
|
143 |
-
echo str_repeat("\t", $depth) . $this->tag;
|
144 |
-
|
145 |
-
if ($show_attr && count($this->attr) > 0) {
|
146 |
-
echo '(';
|
147 |
-
foreach ($this->attr as $k => $v) {
|
148 |
-
echo "[$k]=>\"$v\", ";
|
149 |
-
}
|
150 |
-
echo ')';
|
151 |
-
}
|
152 |
-
|
153 |
-
echo "\n";
|
154 |
-
|
155 |
-
if ($this->nodes) {
|
156 |
-
foreach ($this->nodes as $node) {
|
157 |
-
$node->dump($show_attr, $depth + 1);
|
158 |
-
}
|
159 |
-
}
|
160 |
-
}
|
161 |
-
|
162 |
-
/** @codeCoverageIgnore */
|
163 |
-
function dump_node($echo = true)
|
164 |
-
{
|
165 |
-
$string = $this->tag;
|
166 |
-
|
167 |
-
if (count($this->attr) > 0) {
|
168 |
-
$string .= '(';
|
169 |
-
foreach ($this->attr as $k => $v) {
|
170 |
-
$string .= "[$k]=>\"$v\", ";
|
171 |
-
}
|
172 |
-
$string .= ')';
|
173 |
-
}
|
174 |
-
|
175 |
-
if (count($this->_) > 0) {
|
176 |
-
$string .= ' $_ (';
|
177 |
-
foreach ($this->_ as $k => $v) {
|
178 |
-
if (is_array($v)) {
|
179 |
-
$string .= "[$k]=>(";
|
180 |
-
foreach ($v as $k2 => $v2) {
|
181 |
-
$string .= "[$k2]=>\"$v2\", ";
|
182 |
-
}
|
183 |
-
$string .= ')';
|
184 |
-
} else {
|
185 |
-
$string .= "[$k]=>\"$v\", ";
|
186 |
-
}
|
187 |
-
}
|
188 |
-
$string .= ')';
|
189 |
-
}
|
190 |
-
|
191 |
-
if (isset($this->text)) {
|
192 |
-
$string .= " text: ({$this->text})";
|
193 |
-
}
|
194 |
-
|
195 |
-
$string .= ' HDOM_INNER_INFO: ';
|
196 |
-
|
197 |
-
if (isset($node->_[self::HDOM_INFO_INNER])) {
|
198 |
-
$string .= "'" . $node->_[self::HDOM_INFO_INNER] . "'";
|
199 |
-
} else {
|
200 |
-
$string .= ' NULL ';
|
201 |
-
}
|
202 |
-
|
203 |
-
$string .= ' children: ' . count($this->children);
|
204 |
-
$string .= ' nodes: ' . count($this->nodes);
|
205 |
-
$string .= "\n";
|
206 |
-
|
207 |
-
if ($echo) {
|
208 |
-
echo $string;
|
209 |
-
return;
|
210 |
-
} else {
|
211 |
-
return $string;
|
212 |
-
}
|
213 |
-
}
|
214 |
-
|
215 |
-
function parent($parent = null)
|
216 |
-
{
|
217 |
-
// I am SURE that this doesn't work properly.
|
218 |
-
// It fails to unset the current node from it's current parents nodes or
|
219 |
-
// children list first.
|
220 |
-
if ($parent !== null) {
|
221 |
-
$this->parent = $parent;
|
222 |
-
$this->parent->nodes[] = $this;
|
223 |
-
$this->parent->children[] = $this;
|
224 |
-
}
|
225 |
-
|
226 |
-
return $this->parent;
|
227 |
-
}
|
228 |
-
|
229 |
-
function find_ancestor_tag($tag)
|
230 |
-
{
|
231 |
-
if ($this->parent === null) return null;
|
232 |
-
|
233 |
-
$ancestor = $this->parent;
|
234 |
-
|
235 |
-
while (!is_null($ancestor)) {
|
236 |
-
if ($ancestor->tag === $tag) {
|
237 |
-
break;
|
238 |
-
}
|
239 |
-
|
240 |
-
$ancestor = $ancestor->parent;
|
241 |
-
}
|
242 |
-
|
243 |
-
return $ancestor;
|
244 |
-
}
|
245 |
-
|
246 |
-
function innertext()
|
247 |
-
{
|
248 |
-
if (isset($this->_[self::HDOM_INFO_INNER])) {
|
249 |
-
$ret = $this->_[self::HDOM_INFO_INNER];
|
250 |
-
} elseif (isset($this->_[self::HDOM_INFO_TEXT])) {
|
251 |
-
$ret = $this->_[self::HDOM_INFO_TEXT];
|
252 |
-
} else {
|
253 |
-
$ret = '';
|
254 |
-
}
|
255 |
-
|
256 |
-
foreach ($this->nodes as $n) {
|
257 |
-
$ret .= $n->outertext();
|
258 |
-
}
|
259 |
-
|
260 |
-
return $this->convert_text($ret);
|
261 |
-
}
|
262 |
-
|
263 |
-
function outertext()
|
264 |
-
{
|
265 |
-
if ($this->tag === 'root') {
|
266 |
-
return $this->innertext();
|
267 |
-
}
|
268 |
-
|
269 |
-
// todo: What is the use of this callback? Remove?
|
270 |
-
if ($this->dom && $this->dom->callback !== null) {
|
271 |
-
call_user_func_array($this->dom->callback, array($this));
|
272 |
-
}
|
273 |
-
|
274 |
-
if (isset($this->_[self::HDOM_INFO_OUTER])) {
|
275 |
-
return $this->convert_text($this->_[self::HDOM_INFO_OUTER]);
|
276 |
-
}
|
277 |
-
|
278 |
-
if (isset($this->_[self::HDOM_INFO_TEXT])) {
|
279 |
-
return $this->convert_text($this->_[self::HDOM_INFO_TEXT]);
|
280 |
-
}
|
281 |
-
|
282 |
-
$ret = '';
|
283 |
-
|
284 |
-
if (isset($this->_[self::HDOM_INFO_BEGIN])) {
|
285 |
-
$ret = $this->makeup();
|
286 |
-
}
|
287 |
-
|
288 |
-
if (isset($this->_[self::HDOM_INFO_INNER])) {
|
289 |
-
// todo: <br> should either never have self::HDOM_INFO_INNER or always
|
290 |
-
if ($this->tag !== 'br') {
|
291 |
-
$ret .= $this->_[self::HDOM_INFO_INNER];
|
292 |
-
}
|
293 |
-
}
|
294 |
-
|
295 |
-
if ($this->nodes) {
|
296 |
-
foreach ($this->nodes as $n) {
|
297 |
-
$ret .= $n->outertext();
|
298 |
-
}
|
299 |
-
}
|
300 |
-
|
301 |
-
if (isset($this->_[self::HDOM_INFO_END]) && $this->_[self::HDOM_INFO_END] != 0) {
|
302 |
-
$ret .= '</' . $this->tag . '>';
|
303 |
-
}
|
304 |
-
|
305 |
-
return $this->convert_text($ret);
|
306 |
-
}
|
307 |
-
|
308 |
-
/**
|
309 |
-
* Returns true if the provided element is a block level element
|
310 |
-
* @link https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php
|
311 |
-
*/
|
312 |
-
protected function is_block_element($node)
|
313 |
-
{
|
314 |
-
// todo: When we have the utility class this should be moved there
|
315 |
-
return in_array(strtolower($node->tag), array(
|
316 |
-
'p',
|
317 |
-
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
318 |
-
'ol', 'ul',
|
319 |
-
'pre',
|
320 |
-
'address',
|
321 |
-
'blockquote',
|
322 |
-
'dl',
|
323 |
-
'div',
|
324 |
-
'fieldset',
|
325 |
-
'form',
|
326 |
-
'hr',
|
327 |
-
'noscript',
|
328 |
-
'table'
|
329 |
-
));
|
330 |
-
}
|
331 |
-
|
332 |
-
/**
|
333 |
-
* Returns true if the provided element is an inline level element
|
334 |
-
* @link https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php
|
335 |
-
*/
|
336 |
-
protected function is_inline_element($node)
|
337 |
-
{
|
338 |
-
// todo: When we have the utility class this should be moved there
|
339 |
-
return in_array(strtolower($node->tag), array(
|
340 |
-
'b', 'big', 'i', 'small', 'tt',
|
341 |
-
'abbr', 'acronym', 'cite', 'code', 'dfn', 'em', 'kbd', 'strong', 'samp', 'var',
|
342 |
-
'a', 'bdo', 'br', 'img', 'map', 'object', 'q', 'script', 'span', 'sub', 'sup',
|
343 |
-
'button', 'input', 'label', 'select', 'textarea'
|
344 |
-
));
|
345 |
-
}
|
346 |
-
|
347 |
-
function text($trim = true)
|
348 |
-
{
|
349 |
-
$ret = '';
|
350 |
-
|
351 |
-
if (strtolower($this->tag) === 'script') {
|
352 |
-
$ret = '';
|
353 |
-
} elseif (strtolower($this->tag) === 'style') {
|
354 |
-
$ret = '';
|
355 |
-
} elseif ($this->nodetype === self::HDOM_TYPE_COMMENT) {
|
356 |
-
$ret = '';
|
357 |
-
} elseif ($this->nodetype === self::HDOM_TYPE_CDATA) {
|
358 |
-
$ret = $this->_[self::HDOM_INFO_INNER];
|
359 |
-
} elseif ($this->nodetype === self::HDOM_TYPE_UNKNOWN) {
|
360 |
-
$ret = '';
|
361 |
-
} elseif (isset($this->_[self::HDOM_INFO_INNER])) {
|
362 |
-
$ret = $this->_[self::HDOM_INFO_INNER];
|
363 |
-
} elseif ($this->nodetype === self::HDOM_TYPE_TEXT) {
|
364 |
-
$ret = $this->_[self::HDOM_INFO_TEXT];
|
365 |
-
}
|
366 |
-
|
367 |
-
if (is_null($this->nodes)) {
|
368 |
-
return '';
|
369 |
-
}
|
370 |
-
|
371 |
-
foreach ($this->nodes as $n) {
|
372 |
-
if ($this->is_block_element($n)) {
|
373 |
-
|
374 |
-
$block = ltrim($this->convert_text($n->text(false)));
|
375 |
-
|
376 |
-
if (empty($block))
|
377 |
-
continue;
|
378 |
-
|
379 |
-
$ret = rtrim($ret) . "\n\n" . $block;
|
380 |
-
|
381 |
-
} elseif ($this->is_inline_element($n)) {
|
382 |
-
// todo: <br> introduces code smell because no space but \n
|
383 |
-
if (strtolower($n->tag) === 'br') {
|
384 |
-
$ret .= $this->dom->default_br_text ?: DEFAULT_BR_TEXT;
|
385 |
-
} else {
|
386 |
-
$inline = ltrim($this->convert_text($n->text(false)));
|
387 |
-
|
388 |
-
if (empty($inline))
|
389 |
-
continue;
|
390 |
-
|
391 |
-
$ret = $ret . $this->convert_text($n->text(false));
|
392 |
-
}
|
393 |
-
} else {
|
394 |
-
$ret .= $this->convert_text($n->text(false));
|
395 |
-
}
|
396 |
-
}
|
397 |
-
|
398 |
-
// Reduce whitespace at start/end to a single (or none) space
|
399 |
-
$ret = preg_replace('/[ \t\n\r\0\x0B\xC2\xA0]+$/u', $trim ? '' : ' ', $ret);
|
400 |
-
$ret = preg_replace('/^[ \t\n\r\0\x0B\xC2\xA0]+/u', $trim ? '' : ' ', $ret);
|
401 |
-
|
402 |
-
return $ret;
|
403 |
-
}
|
404 |
-
|
405 |
-
function xmltext()
|
406 |
-
{
|
407 |
-
$ret = $this->innertext();
|
408 |
-
$ret = str_ireplace('<![CDATA[', '', $ret);
|
409 |
-
$ret = str_replace(']]>', '', $ret);
|
410 |
-
return $ret;
|
411 |
-
}
|
412 |
-
|
413 |
-
function makeup()
|
414 |
-
{
|
415 |
-
// text, comment, unknown
|
416 |
-
if (isset($this->_[self::HDOM_INFO_TEXT])) {
|
417 |
-
return $this->_[self::HDOM_INFO_TEXT];
|
418 |
-
}
|
419 |
-
|
420 |
-
$ret = '<' . $this->tag;
|
421 |
-
|
422 |
-
foreach ($this->attr as $key => $val) {
|
423 |
-
|
424 |
-
// skip removed attribute
|
425 |
-
if ($val === null || $val === false) { continue; }
|
426 |
-
|
427 |
-
if (isset($this->_[self::HDOM_INFO_SPACE][$key])) {
|
428 |
-
$ret .= $this->_[self::HDOM_INFO_SPACE][$key][0];
|
429 |
-
} else {
|
430 |
-
$ret .= ' ';
|
431 |
-
}
|
432 |
-
|
433 |
-
//no value attr: nowrap, checked selected...
|
434 |
-
if ($val === true) {
|
435 |
-
$ret .= $key;
|
436 |
-
} else {
|
437 |
-
if (isset($this->_[self::HDOM_INFO_QUOTE][$key])) {
|
438 |
-
$quote_type = $this->_[self::HDOM_INFO_QUOTE][$key];
|
439 |
-
} else {
|
440 |
-
$quote_type = self::HDOM_QUOTE_DOUBLE;
|
441 |
-
}
|
442 |
-
|
443 |
-
switch ($quote_type)
|
444 |
-
{
|
445 |
-
case self::HDOM_QUOTE_SINGLE:
|
446 |
-
$quote = '\'';
|
447 |
-
$val = htmlentities($val, ENT_QUOTES, $this->dom->target_charset);
|
448 |
-
break;
|
449 |
-
case self::HDOM_QUOTE_NO:
|
450 |
-
$quote = '';
|
451 |
-
break;
|
452 |
-
case self::HDOM_QUOTE_DOUBLE:
|
453 |
-
default:
|
454 |
-
$quote = '"';
|
455 |
-
$val = htmlentities($val, ENT_COMPAT, $this->dom->target_charset);
|
456 |
-
}
|
457 |
-
|
458 |
-
$ret .= $key
|
459 |
-
. (isset($this->_[self::HDOM_INFO_SPACE][$key]) ? $this->_[self::HDOM_INFO_SPACE][$key][1] : '')
|
460 |
-
. '='
|
461 |
-
. (isset($this->_[self::HDOM_INFO_SPACE][$key]) ? $this->_[self::HDOM_INFO_SPACE][$key][2] : '')
|
462 |
-
. $quote
|
463 |
-
. $val
|
464 |
-
. $quote;
|
465 |
-
}
|
466 |
-
}
|
467 |
-
|
468 |
-
if(isset($this->_[self::HDOM_INFO_ENDSPACE])) {
|
469 |
-
$ret .= $this->_[self::HDOM_INFO_ENDSPACE];
|
470 |
-
}
|
471 |
-
|
472 |
-
return $ret . '>';
|
473 |
-
}
|
474 |
-
|
475 |
-
function find($selector, $idx = null, $lowercase = false)
|
476 |
-
{
|
477 |
-
$selectors = $this->parse_selector($selector);
|
478 |
-
if (($count = count($selectors)) === 0) { return array(); }
|
479 |
-
$found_keys = array();
|
480 |
-
|
481 |
-
// find each selector
|
482 |
-
for ($c = 0; $c < $count; ++$c) {
|
483 |
-
// The change on the below line was documented on the sourceforge
|
484 |
-
// code tracker id 2788009
|
485 |
-
// used to be: if (($levle=count($selectors[0]))===0) return array();
|
486 |
-
if (($levle = count($selectors[$c])) === 0) {
|
487 |
-
Debug::log_once('Empty selector (' . $selector . ') matches nothing.');
|
488 |
-
return array();
|
489 |
-
}
|
490 |
-
|
491 |
-
if (!isset($this->_[self::HDOM_INFO_BEGIN])) {
|
492 |
-
Debug::log_once('Invalid operation. The current node has no start tag.');
|
493 |
-
return array();
|
494 |
-
}
|
495 |
-
|
496 |
-
$head = array($this->_[self::HDOM_INFO_BEGIN] => 1);
|
497 |
-
$cmd = ' '; // Combinator
|
498 |
-
|
499 |
-
// handle descendant selectors, no recursive!
|
500 |
-
for ($l = 0; $l < $levle; ++$l) {
|
501 |
-
$ret = array();
|
502 |
-
|
503 |
-
foreach ($head as $k => $v) {
|
504 |
-
$n = ($k === -1) ? $this->dom->root : $this->dom->nodes[$k];
|
505 |
-
//PaperG - Pass this optional parameter on to the seek function.
|
506 |
-
$n->seek($selectors[$c][$l], $ret, $cmd, $lowercase);
|
507 |
-
}
|
508 |
-
|
509 |
-
$head = $ret;
|
510 |
-
$cmd = $selectors[$c][$l][6]; // Next Combinator
|
511 |
-
}
|
512 |
-
|
513 |
-
foreach ($head as $k => $v) {
|
514 |
-
if (!isset($found_keys[$k])) {
|
515 |
-
$found_keys[$k] = 1;
|
516 |
-
}
|
517 |
-
}
|
518 |
-
}
|
519 |
-
|
520 |
-
// sort keys
|
521 |
-
ksort($found_keys);
|
522 |
-
|
523 |
-
$found = array();
|
524 |
-
foreach ($found_keys as $k => $v) {
|
525 |
-
$found[] = $this->dom->nodes[$k];
|
526 |
-
}
|
527 |
-
|
528 |
-
// return nth-element or array
|
529 |
-
if (is_null($idx)) { return $found; }
|
530 |
-
elseif ($idx < 0) { $idx = count($found) + $idx; }
|
531 |
-
return (isset($found[$idx])) ? $found[$idx] : null;
|
532 |
-
}
|
533 |
-
|
534 |
-
function expect($selector, $idx = null, $lowercase = false)
|
535 |
-
{
|
536 |
-
return $this->find($selector, $idx, $lowercase) ?: null;
|
537 |
-
}
|
538 |
-
|
539 |
-
protected function seek($selector, &$ret, $parent_cmd, $lowercase = false)
|
540 |
-
{
|
541 |
-
list($ps_selector, $tag, $ps_element, $id, $class, $attributes, $cmb) = $selector;
|
542 |
-
$nodes = array();
|
543 |
-
|
544 |
-
if ($parent_cmd === ' ') { // Descendant Combinator
|
545 |
-
// Find parent closing tag if the current element doesn't have a closing
|
546 |
-
// tag (i.e. void element)
|
547 |
-
$end = (!empty($this->_[self::HDOM_INFO_END])) ? $this->_[self::HDOM_INFO_END] : 0;
|
548 |
-
if ($end == 0 && $this->parent) {
|
549 |
-
$parent = $this->parent;
|
550 |
-
while ($parent !== null && !isset($parent->_[self::HDOM_INFO_END])) {
|
551 |
-
$end -= 1;
|
552 |
-
$parent = $parent->parent;
|
553 |
-
}
|
554 |
-
$end += $parent->_[self::HDOM_INFO_END];
|
555 |
-
}
|
556 |
-
|
557 |
-
if ($end === 0) {
|
558 |
-
$end = count($this->dom->nodes);
|
559 |
-
}
|
560 |
-
|
561 |
-
// Get list of target nodes
|
562 |
-
$nodes_start = $this->_[self::HDOM_INFO_BEGIN] + 1;
|
563 |
-
|
564 |
-
// remove() makes $this->dom->nodes non-contiguous; use what is left.
|
565 |
-
$nodes = array_intersect_key(
|
566 |
-
$this->dom->nodes,
|
567 |
-
array_flip(range($nodes_start, $end))
|
568 |
-
);
|
569 |
-
} elseif ($parent_cmd === '>') { // Child Combinator
|
570 |
-
$nodes = $this->children;
|
571 |
-
} elseif ($parent_cmd === '+'
|
572 |
-
&& $this->parent
|
573 |
-
&& in_array($this, $this->parent->children)) { // Next-Sibling Combinator
|
574 |
-
$index = array_search($this, $this->parent->children, true) + 1;
|
575 |
-
if ($index < count($this->parent->children))
|
576 |
-
$nodes[] = $this->parent->children[$index];
|
577 |
-
} elseif ($parent_cmd === '~'
|
578 |
-
&& $this->parent
|
579 |
-
&& in_array($this, $this->parent->children)) { // Subsequent Sibling Combinator
|
580 |
-
$index = array_search($this, $this->parent->children, true);
|
581 |
-
$nodes = array_slice($this->parent->children, $index);
|
582 |
-
}
|
583 |
-
|
584 |
-
// Go throgh each element starting at this element until the end tag
|
585 |
-
// Note: If this element is a void tag, any previous void element is
|
586 |
-
// skipped.
|
587 |
-
foreach($nodes as $node) {
|
588 |
-
$pass = true;
|
589 |
-
|
590 |
-
// Skip root nodes
|
591 |
-
if(!$node->parent) {
|
592 |
-
unset($node);
|
593 |
-
continue;
|
594 |
-
}
|
595 |
-
|
596 |
-
// Handle 'text' selector
|
597 |
-
if($pass && $tag === 'text') {
|
598 |
-
|
599 |
-
if($node->tag === 'text') {
|
600 |
-
$ret[array_search($node, $this->dom->nodes, true)] = 1;
|
601 |
-
}
|
602 |
-
|
603 |
-
if(isset($node->_[self::HDOM_INFO_INNER])) {
|
604 |
-
$ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
|
605 |
-
}
|
606 |
-
|
607 |
-
unset($node);
|
608 |
-
continue;
|
609 |
-
|
610 |
-
}
|
611 |
-
|
612 |
-
// Handle 'cdata' selector
|
613 |
-
if($pass && $tag === 'cdata') {
|
614 |
-
|
615 |
-
if($node->tag === 'cdata') {
|
616 |
-
$ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
|
617 |
-
}
|
618 |
-
|
619 |
-
unset($node);
|
620 |
-
continue;
|
621 |
-
|
622 |
-
}
|
623 |
-
|
624 |
-
// Handle 'comment'
|
625 |
-
if($pass && $tag === 'comment' && $node->tag === 'comment') {
|
626 |
-
$ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
|
627 |
-
unset($node);
|
628 |
-
continue;
|
629 |
-
}
|
630 |
-
|
631 |
-
// Skip if node isn't a child node (i.e. text nodes)
|
632 |
-
if($pass && !in_array($node, $node->parent->children, true)) {
|
633 |
-
unset($node);
|
634 |
-
continue;
|
635 |
-
}
|
636 |
-
|
637 |
-
// Skip if tag doesn't match
|
638 |
-
if ($pass && $tag !== '' && $tag !== $node->tag && $tag !== '*') {
|
639 |
-
$pass = false;
|
640 |
-
}
|
641 |
-
|
642 |
-
// Skip if ID doesn't exist
|
643 |
-
if ($pass && $id !== '' && !isset($node->attr['id'])) {
|
644 |
-
$pass = false;
|
645 |
-
}
|
646 |
-
|
647 |
-
// Check if ID matches
|
648 |
-
if ($pass && $id !== '' && isset($node->attr['id'])) {
|
649 |
-
// Note: Only consider the first ID (as browsers do)
|
650 |
-
$node_id = explode(' ', trim($node->attr['id']))[0];
|
651 |
-
|
652 |
-
if($id !== $node_id) { $pass = false; }
|
653 |
-
}
|
654 |
-
|
655 |
-
// Check if all class(es) exist
|
656 |
-
if ($pass && $class !== '' && is_array($class) && !empty($class)) {
|
657 |
-
if (isset($node->attr['class'])) {
|
658 |
-
// Apply the same rules for the pattern and attribute value
|
659 |
-
// Attribute values must not contain control characters other than space
|
660 |
-
// https://www.w3.org/TR/html/dom.html#text-content
|
661 |
-
// https://www.w3.org/TR/html/syntax.html#attribute-values
|
662 |
-
// https://www.w3.org/TR/xml/#AVNormalize
|
663 |
-
$node_classes = preg_replace("/[\r\n\t\s]+/u", ' ', $node->attr['class']);
|
664 |
-
$node_classes = trim($node_classes);
|
665 |
-
$node_classes = explode(' ', $node_classes);
|
666 |
-
|
667 |
-
if ($lowercase) {
|
668 |
-
$node_classes = array_map('strtolower', $node_classes);
|
669 |
-
}
|
670 |
-
|
671 |
-
foreach($class as $c) {
|
672 |
-
if(!in_array($c, $node_classes)) {
|
673 |
-
$pass = false;
|
674 |
-
break;
|
675 |
-
}
|
676 |
-
}
|
677 |
-
} else {
|
678 |
-
$pass = false;
|
679 |
-
}
|
680 |
-
}
|
681 |
-
|
682 |
-
// Check attributes
|
683 |
-
if ($pass
|
684 |
-
&& $attributes !== ''
|
685 |
-
&& is_array($attributes)
|
686 |
-
&& !empty($attributes)) {
|
687 |
-
foreach($attributes as $a) {
|
688 |
-
list (
|
689 |
-
$att_name,
|
690 |
-
$att_expr,
|
691 |
-
$att_val,
|
692 |
-
$att_inv,
|
693 |
-
$att_case_sensitivity
|
694 |
-
) = $a;
|
695 |
-
|
696 |
-
// Handle indexing attributes (i.e. "[2]")
|
697 |
-
/**
|
698 |
-
* Note: This is not supported by the CSS Standard but adds
|
699 |
-
* the ability to select items compatible to XPath (i.e.
|
700 |
-
* the 3rd element within it's parent).
|
701 |
-
*
|
702 |
-
* Note: This doesn't conflict with the CSS Standard which
|
703 |
-
* doesn't work on numeric attributes anyway.
|
704 |
-
*/
|
705 |
-
if (is_numeric($att_name)
|
706 |
-
&& $att_expr === ''
|
707 |
-
&& $att_val === '') {
|
708 |
-
$count = 0;
|
709 |
-
|
710 |
-
// Find index of current element in parent
|
711 |
-
foreach ($node->parent->children as $c) {
|
712 |
-
if ($c->tag === $node->tag) ++$count;
|
713 |
-
if ($c === $node) break;
|
714 |
-
}
|
715 |
-
|
716 |
-
// If this is the correct node, continue with next
|
717 |
-
// attribute
|
718 |
-
if ($count === (int)$att_name) continue;
|
719 |
-
}
|
720 |
-
|
721 |
-
// Check attribute availability
|
722 |
-
if ($att_inv) { // Attribute should NOT be set
|
723 |
-
if (isset($node->attr[$att_name])) {
|
724 |
-
$pass = false;
|
725 |
-
break;
|
726 |
-
}
|
727 |
-
} else { // Attribute should be set
|
728 |
-
// todo: "plaintext" is not a valid CSS selector!
|
729 |
-
if ($att_name !== 'plaintext'
|
730 |
-
&& !isset($node->attr[$att_name])) {
|
731 |
-
$pass = false;
|
732 |
-
break;
|
733 |
-
}
|
734 |
-
}
|
735 |
-
|
736 |
-
// Continue with next attribute if expression isn't defined
|
737 |
-
if ($att_expr === '') continue;
|
738 |
-
|
739 |
-
// If they have told us that this is a "plaintext"
|
740 |
-
// search then we want the plaintext of the node - right?
|
741 |
-
// todo "plaintext" is not a valid CSS selector!
|
742 |
-
if ($att_name === 'plaintext') {
|
743 |
-
$nodeKeyValue = $node->text();
|
744 |
-
} else {
|
745 |
-
$nodeKeyValue = $node->attr[$att_name];
|
746 |
-
}
|
747 |
-
|
748 |
-
// If lowercase is set, do a case insensitive test of
|
749 |
-
// the value of the selector.
|
750 |
-
if ($lowercase) {
|
751 |
-
$check = $this->match(
|
752 |
-
$att_expr,
|
753 |
-
strtolower($att_val),
|
754 |
-
strtolower($nodeKeyValue),
|
755 |
-
$att_case_sensitivity
|
756 |
-
);
|
757 |
-
} else {
|
758 |
-
$check = $this->match(
|
759 |
-
$att_expr,
|
760 |
-
$att_val,
|
761 |
-
$nodeKeyValue,
|
762 |
-
$att_case_sensitivity
|
763 |
-
);
|
764 |
-
}
|
765 |
-
|
766 |
-
$check = $ps_element === 'not' ? !$check : $check;
|
767 |
-
|
768 |
-
if (!$check) {
|
769 |
-
$pass = false;
|
770 |
-
break;
|
771 |
-
}
|
772 |
-
}
|
773 |
-
}
|
774 |
-
|
775 |
-
// Found a match. Add to list and clear node
|
776 |
-
$pass = $ps_selector === 'not' ? !$pass : $pass;
|
777 |
-
if ($pass) $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
|
778 |
-
unset($node);
|
779 |
-
}
|
780 |
-
}
|
781 |
-
|
782 |
-
protected function match($exp, $pattern, $value, $case_sensitivity)
|
783 |
-
{
|
784 |
-
if ($case_sensitivity === 'i') {
|
785 |
-
$pattern = strtolower($pattern);
|
786 |
-
$value = strtolower($value);
|
787 |
-
}
|
788 |
-
|
789 |
-
// Apply the same rules for the pattern and attribute value
|
790 |
-
// Attribute values must not contain control characters other than space
|
791 |
-
// https://www.w3.org/TR/html/dom.html#text-content
|
792 |
-
// https://www.w3.org/TR/html/syntax.html#attribute-values
|
793 |
-
// https://www.w3.org/TR/xml/#AVNormalize
|
794 |
-
$pattern = preg_replace("/[\r\n\t\s]+/u", ' ', $pattern);
|
795 |
-
$pattern = trim($pattern);
|
796 |
-
|
797 |
-
$value = preg_replace("/[\r\n\t\s]+/u", ' ', $value);
|
798 |
-
$value = trim($value);
|
799 |
-
|
800 |
-
switch ($exp) {
|
801 |
-
case '=':
|
802 |
-
return ($value === $pattern);
|
803 |
-
case '!=':
|
804 |
-
return ($value !== $pattern);
|
805 |
-
case '^=':
|
806 |
-
return preg_match('/^' . preg_quote($pattern, '/') . '/', $value);
|
807 |
-
case '$=':
|
808 |
-
return preg_match('/' . preg_quote($pattern, '/') . '$/', $value);
|
809 |
-
case '*=':
|
810 |
-
return preg_match('/' . preg_quote($pattern, '/') . '/', $value);
|
811 |
-
case '|=':
|
812 |
-
/**
|
813 |
-
* [att|=val]
|
814 |
-
*
|
815 |
-
* Represents an element with the att attribute, its value
|
816 |
-
* either being exactly "val" or beginning with "val"
|
817 |
-
* immediately followed by "-" (U+002D).
|
818 |
-
*/
|
819 |
-
return strpos($value, $pattern) === 0;
|
820 |
-
case '~=':
|
821 |
-
/**
|
822 |
-
* [att~=val]
|
823 |
-
*
|
824 |
-
* Represents an element with the att attribute whose value is a
|
825 |
-
* whitespace-separated list of words, one of which is exactly
|
826 |
-
* "val". If "val" contains whitespace, it will never represent
|
827 |
-
* anything (since the words are separated by spaces). Also if
|
828 |
-
* "val" is the empty string, it will never represent anything.
|
829 |
-
*/
|
830 |
-
return in_array($pattern, explode(' ', trim($value)), true);
|
831 |
-
}
|
832 |
-
|
833 |
-
Debug::log('Unhandled attribute selector: ' . $exp . '!');
|
834 |
-
return false;
|
835 |
-
}
|
836 |
-
|
837 |
-
protected function parse_selector($selector_string)
|
838 |
-
{
|
839 |
-
/**
|
840 |
-
* Pattern of CSS selectors, modified from mootools (https://mootools.net/)
|
841 |
-
*
|
842 |
-
* Paperg: Add the colon to the attribute, so that it properly finds
|
843 |
-
* <tag attr:ibute="something" > like google does.
|
844 |
-
*
|
845 |
-
* Note: if you try to look at this attribute, you MUST use getAttribute
|
846 |
-
* since $dom->x:y will fail the php syntax check.
|
847 |
-
*
|
848 |
-
* Notice the \[ starting the attribute? and the @? following? This
|
849 |
-
* implies that an attribute can begin with an @ sign that is not
|
850 |
-
* captured. This implies that an html attribute specifier may start
|
851 |
-
* with an @ sign that is NOT captured by the expression. Farther study
|
852 |
-
* is required to determine of this should be documented or removed.
|
853 |
-
*
|
854 |
-
* Matches selectors in this order:
|
855 |
-
*
|
856 |
-
* [0] - full match
|
857 |
-
*
|
858 |
-
* [1] - pseudo selector
|
859 |
-
* (?:\:(\w+)\()?
|
860 |
-
* Matches the pseudo selector (optional)
|
861 |
-
*
|
862 |
-
* [2] - tag name
|
863 |
-
* ([\w:\*-]*)
|
864 |
-
* Matches the tag name consisting of zero or more words, colons,
|
865 |
-
* asterisks and hyphens.
|
866 |
-
*
|
867 |
-
* [3] - pseudo selector
|
868 |
-
* (?:\:(\w+)\()?
|
869 |
-
* Matches the pseudo selector (optional)
|
870 |
-
*
|
871 |
-
* [4] - id name
|
872 |
-
* (?:\#([\w-]+))
|
873 |
-
* Optionally matches a id name, consisting of an "#" followed by
|
874 |
-
* the id name (one or more words and hyphens).
|
875 |
-
*
|
876 |
-
* [5] - class names (including dots)
|
877 |
-
* (?:\.([\w\.-]+))?
|
878 |
-
* Optionally matches a list of classs, consisting of an "."
|
879 |
-
* followed by the class name (one or more words and hyphens)
|
880 |
-
* where multiple classes can be chained (i.e. ".foo.bar.baz")
|
881 |
-
*
|
882 |
-
* [6] - attributes
|
883 |
-
* ((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?
|
884 |
-
* Optionally matches the attributes list
|
885 |
-
*
|
886 |
-
* [7] - separator
|
887 |
-
* ([\/, >+~]+)
|
888 |
-
* Matches the selector list separator
|
889 |
-
*/
|
890 |
-
// phpcs:ignore Generic.Files.LineLength
|
891 |
-
$pattern = "/(?:\:(\w+)\()?([\w:\*-]*)(?:\:(\w+)\()?(?:\#([\w-]+))?(?:|\.([\w\.-]+))?((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?(?:\))?(?:\))?([\/, >+~]+)/is";
|
892 |
-
|
893 |
-
preg_match_all(
|
894 |
-
$pattern,
|
895 |
-
trim($selector_string) . ' ', // Add final ' ' as pseudo separator
|
896 |
-
$matches,
|
897 |
-
PREG_SET_ORDER
|
898 |
-
);
|
899 |
-
|
900 |
-
$selectors = array();
|
901 |
-
$result = array();
|
902 |
-
|
903 |
-
foreach ($matches as $m) {
|
904 |
-
$m[0] = trim($m[0]);
|
905 |
-
|
906 |
-
// Skip NoOps
|
907 |
-
if ($m[0] === '' || $m[0] === '/' || $m[0] === '//') { continue; }
|
908 |
-
|
909 |
-
array_shift($m);
|
910 |
-
|
911 |
-
// Convert to lowercase
|
912 |
-
if ($this->dom->lowercase) {
|
913 |
-
$m[1] = strtolower($m[1]);
|
914 |
-
}
|
915 |
-
|
916 |
-
// Extract classes
|
917 |
-
if ($m[4] !== '') { $m[4] = explode('.', $m[4]); }
|
918 |
-
|
919 |
-
/* Extract attributes (pattern based on the pattern above!)
|
920 |
-
|
921 |
-
* [0] - full match
|
922 |
-
* [1] - attribute name
|
923 |
-
* [2] - attribute expression
|
924 |
-
* [3] - attribute value
|
925 |
-
* [4] - case sensitivity
|
926 |
-
*
|
927 |
-
* Note: Attributes can be negated with a "!" prefix to their name
|
928 |
-
*/
|
929 |
-
if($m[5] !== '') {
|
930 |
-
preg_match_all(
|
931 |
-
"/\[@?(!?[\w:-]+)(?:([!*^$|~]?=)[\"']?(.*?)[\"']?)?(?:\s+?([iIsS])?)?\]/is",
|
932 |
-
trim($m[5]),
|
933 |
-
$attributes,
|
934 |
-
PREG_SET_ORDER
|
935 |
-
);
|
936 |
-
|
937 |
-
// Replace element by array
|
938 |
-
$m[5] = array();
|
939 |
-
|
940 |
-
foreach($attributes as $att) {
|
941 |
-
// Skip empty matches
|
942 |
-
if(trim($att[0]) === '') { continue; }
|
943 |
-
|
944 |
-
$inverted = (isset($att[1][0]) && $att[1][0] === '!');
|
945 |
-
$m[5][] = array(
|
946 |
-
$inverted ? substr($att[1], 1) : $att[1], // Name
|
947 |
-
(isset($att[2])) ? $att[2] : '', // Expression
|
948 |
-
(isset($att[3])) ? $att[3] : '', // Value
|
949 |
-
$inverted, // Inverted Flag
|
950 |
-
(isset($att[4])) ? strtolower($att[4]) : '', // Case-Sensitivity
|
951 |
-
);
|
952 |
-
}
|
953 |
-
}
|
954 |
-
|
955 |
-
// Sanitize Separator
|
956 |
-
if ($m[6] !== '' && trim($m[6]) === '') { // Descendant Separator
|
957 |
-
$m[6] = ' ';
|
958 |
-
} else { // Other Separator
|
959 |
-
$m[6] = trim($m[6]);
|
960 |
-
}
|
961 |
-
|
962 |
-
// Clear Separator if it's a Selector List
|
963 |
-
if ($is_list = ($m[6] === ',')) { $m[6] = ''; }
|
964 |
-
|
965 |
-
$result[] = $m;
|
966 |
-
|
967 |
-
if ($is_list) { // Selector List
|
968 |
-
$selectors[] = $result;
|
969 |
-
$result = array();
|
970 |
-
}
|
971 |
-
}
|
972 |
-
|
973 |
-
if (count($result) > 0) { $selectors[] = $result; }
|
974 |
-
return $selectors;
|
975 |
-
}
|
976 |
-
|
977 |
-
function __get($name)
|
978 |
-
{
|
979 |
-
if (isset($this->attr[$name])) {
|
980 |
-
return $this->convert_text($this->attr[$name]);
|
981 |
-
}
|
982 |
-
|
983 |
-
switch ($name) {
|
984 |
-
case 'outertext': return $this->outertext();
|
985 |
-
case 'innertext': return $this->innertext();
|
986 |
-
case 'plaintext': return $this->text();
|
987 |
-
case 'xmltext': return $this->xmltext();
|
988 |
-
}
|
989 |
-
|
990 |
-
return false;
|
991 |
-
}
|
992 |
-
|
993 |
-
function __set($name, $value)
|
994 |
-
{
|
995 |
-
switch ($name) {
|
996 |
-
case 'outertext':
|
997 |
-
$this->_[self::HDOM_INFO_OUTER] = $value;
|
998 |
-
break;
|
999 |
-
case 'innertext':
|
1000 |
-
if (isset($this->_[self::HDOM_INFO_TEXT])) {
|
1001 |
-
$this->_[self::HDOM_INFO_TEXT] = '';
|
1002 |
-
}
|
1003 |
-
$this->_[self::HDOM_INFO_INNER] = $value;
|
1004 |
-
break;
|
1005 |
-
default: $this->attr[$name] = $value;
|
1006 |
-
}
|
1007 |
-
}
|
1008 |
-
|
1009 |
-
function __isset($name)
|
1010 |
-
{
|
1011 |
-
switch ($name) {
|
1012 |
-
case 'outertext': return true;
|
1013 |
-
case 'innertext': return true;
|
1014 |
-
case 'plaintext': return true;
|
1015 |
-
}
|
1016 |
-
|
1017 |
-
return isset($this->attr[$name]);
|
1018 |
-
}
|
1019 |
-
|
1020 |
-
function __unset($name)
|
1021 |
-
{
|
1022 |
-
if (isset($this->attr[$name])) { unset($this->attr[$name]); }
|
1023 |
-
}
|
1024 |
-
|
1025 |
-
function convert_text($text)
|
1026 |
-
{
|
1027 |
-
$converted_text = $text;
|
1028 |
-
|
1029 |
-
$sourceCharset = '';
|
1030 |
-
$targetCharset = '';
|
1031 |
-
|
1032 |
-
if ($this->dom) {
|
1033 |
-
$sourceCharset = strtoupper($this->dom->_charset);
|
1034 |
-
$targetCharset = strtoupper($this->dom->_target_charset);
|
1035 |
-
}
|
1036 |
-
|
1037 |
-
if (!empty($sourceCharset) && !empty($targetCharset)) {
|
1038 |
-
if (strtoupper($sourceCharset) === strtoupper($targetCharset)) {
|
1039 |
-
$converted_text = $text;
|
1040 |
-
} elseif ((strtoupper($targetCharset) === 'UTF-8') && (self::is_utf8($text))) {
|
1041 |
-
Debug::log_once('The source charset was incorrectly detected as ' . $sourceCharset . ' but should have been UTF-8');
|
1042 |
-
$converted_text = $text;
|
1043 |
-
} else {
|
1044 |
-
$converted_text = iconv($sourceCharset, $targetCharset, $text);
|
1045 |
-
}
|
1046 |
-
}
|
1047 |
-
|
1048 |
-
// Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output.
|
1049 |
-
if ($targetCharset === 'UTF-8') {
|
1050 |
-
if (substr($converted_text, 0, 3) === "\xef\xbb\xbf") {
|
1051 |
-
$converted_text = substr($converted_text, 3);
|
1052 |
-
}
|
1053 |
-
|
1054 |
-
if (substr($converted_text, -3) === "\xef\xbb\xbf") {
|
1055 |
-
$converted_text = substr($converted_text, 0, -3);
|
1056 |
-
}
|
1057 |
-
}
|
1058 |
-
|
1059 |
-
return $converted_text;
|
1060 |
-
}
|
1061 |
-
|
1062 |
-
static function is_utf8($str)
|
1063 |
-
{
|
1064 |
-
$c = 0; $b = 0;
|
1065 |
-
$bits = 0;
|
1066 |
-
$len = strlen($str);
|
1067 |
-
for($i = 0; $i < $len; $i++) {
|
1068 |
-
$c = ord($str[$i]);
|
1069 |
-
if($c > 128) {
|
1070 |
-
if(($c >= 254)) { return false; }
|
1071 |
-
elseif($c >= 252) { $bits = 6; }
|
1072 |
-
elseif($c >= 248) { $bits = 5; }
|
1073 |
-
elseif($c >= 240) { $bits = 4; }
|
1074 |
-
elseif($c >= 224) { $bits = 3; }
|
1075 |
-
elseif($c >= 192) { $bits = 2; }
|
1076 |
-
else { return false; }
|
1077 |
-
if(($i + $bits) > $len) { return false; }
|
1078 |
-
while($bits > 1) {
|
1079 |
-
$i++;
|
1080 |
-
$b = ord($str[$i]);
|
1081 |
-
if($b < 128 || $b > 191) { return false; }
|
1082 |
-
$bits--;
|
1083 |
-
}
|
1084 |
-
}
|
1085 |
-
}
|
1086 |
-
return true;
|
1087 |
-
}
|
1088 |
-
|
1089 |
-
function get_display_size()
|
1090 |
-
{
|
1091 |
-
$width = -1;
|
1092 |
-
$height = -1;
|
1093 |
-
|
1094 |
-
if ($this->tag !== 'img') {
|
1095 |
-
return false;
|
1096 |
-
}
|
1097 |
-
|
1098 |
-
// See if there is aheight or width attribute in the tag itself.
|
1099 |
-
if (isset($this->attr['width'])) {
|
1100 |
-
$width = $this->attr['width'];
|
1101 |
-
}
|
1102 |
-
|
1103 |
-
if (isset($this->attr['height'])) {
|
1104 |
-
$height = $this->attr['height'];
|
1105 |
-
}
|
1106 |
-
|
1107 |
-
// Now look for an inline style.
|
1108 |
-
if (isset($this->attr['style'])) {
|
1109 |
-
// Thanks to user gnarf from stackoverflow for this regular expression.
|
1110 |
-
$attributes = array();
|
1111 |
-
|
1112 |
-
preg_match_all(
|
1113 |
-
'/([\w-]+)\s*:\s*([^;]+)\s*;?/',
|
1114 |
-
$this->attr['style'],
|
1115 |
-
$matches,
|
1116 |
-
PREG_SET_ORDER
|
1117 |
-
);
|
1118 |
-
|
1119 |
-
foreach ($matches as $match) {
|
1120 |
-
$attributes[$match[1]] = $match[2];
|
1121 |
-
}
|
1122 |
-
|
1123 |
-
// If there is a width in the style attributes:
|
1124 |
-
if (isset($attributes['width']) && $width == -1) {
|
1125 |
-
// check that the last two characters are px (pixels)
|
1126 |
-
if (strtolower(substr($attributes['width'], -2)) === 'px') {
|
1127 |
-
$proposed_width = substr($attributes['width'], 0, -2);
|
1128 |
-
// Now make sure that it's an integer and not something stupid.
|
1129 |
-
if (filter_var($proposed_width, FILTER_VALIDATE_INT)) {
|
1130 |
-
$width = $proposed_width;
|
1131 |
-
}
|
1132 |
-
}
|
1133 |
-
}
|
1134 |
-
|
1135 |
-
// If there is a width in the style attributes:
|
1136 |
-
if (isset($attributes['height']) && $height == -1) {
|
1137 |
-
// check that the last two characters are px (pixels)
|
1138 |
-
if (strtolower(substr($attributes['height'], -2)) == 'px') {
|
1139 |
-
$proposed_height = substr($attributes['height'], 0, -2);
|
1140 |
-
// Now make sure that it's an integer and not something stupid.
|
1141 |
-
if (filter_var($proposed_height, FILTER_VALIDATE_INT)) {
|
1142 |
-
$height = $proposed_height;
|
1143 |
-
}
|
1144 |
-
}
|
1145 |
-
}
|
1146 |
-
|
1147 |
-
}
|
1148 |
-
|
1149 |
-
// Future enhancement:
|
1150 |
-
// Look in the tag to see if there is a class or id specified that has
|
1151 |
-
// a height or width attribute to it.
|
1152 |
-
|
1153 |
-
// Far future enhancement
|
1154 |
-
// Look at all the parent tags of this image to see if they specify a
|
1155 |
-
// class or id that has an img selector that specifies a height or width
|
1156 |
-
// Note that in this case, the class or id will have the img subselector
|
1157 |
-
// for it to apply to the image.
|
1158 |
-
|
1159 |
-
// ridiculously far future development
|
1160 |
-
// If the class or id is specified in a SEPARATE css file thats not on
|
1161 |
-
// the page, go get it and do what we were just doing for the ones on
|
1162 |
-
// the page.
|
1163 |
-
|
1164 |
-
$result = array(
|
1165 |
-
'height' => $height,
|
1166 |
-
'width' => $width
|
1167 |
-
);
|
1168 |
-
|
1169 |
-
return $result;
|
1170 |
-
}
|
1171 |
-
|
1172 |
-
function save($filepath = '')
|
1173 |
-
{
|
1174 |
-
$ret = $this->outertext();
|
1175 |
-
|
1176 |
-
if ($filepath !== '') {
|
1177 |
-
file_put_contents($filepath, $ret, LOCK_EX);
|
1178 |
-
}
|
1179 |
-
|
1180 |
-
return $ret;
|
1181 |
-
}
|
1182 |
-
|
1183 |
-
function addClass($class)
|
1184 |
-
{
|
1185 |
-
if (is_string($class)) {
|
1186 |
-
$class = explode(' ', $class);
|
1187 |
-
}
|
1188 |
-
|
1189 |
-
if (is_array($class)) {
|
1190 |
-
foreach($class as $c) {
|
1191 |
-
if (isset($this->class)) {
|
1192 |
-
if ($this->hasClass($c)) {
|
1193 |
-
continue;
|
1194 |
-
} else {
|
1195 |
-
$this->class .= ' ' . $c;
|
1196 |
-
}
|
1197 |
-
} else {
|
1198 |
-
$this->class = $c;
|
1199 |
-
}
|
1200 |
-
}
|
1201 |
-
}
|
1202 |
-
}
|
1203 |
-
|
1204 |
-
function hasClass($class)
|
1205 |
-
{
|
1206 |
-
if (is_string($class)) {
|
1207 |
-
if (isset($this->class)) {
|
1208 |
-
return in_array($class, explode(' ', $this->class), true);
|
1209 |
-
}
|
1210 |
-
}
|
1211 |
-
|
1212 |
-
return false;
|
1213 |
-
}
|
1214 |
-
|
1215 |
-
function removeClass($class = null)
|
1216 |
-
{
|
1217 |
-
if (!isset($this->class)) {
|
1218 |
-
return;
|
1219 |
-
}
|
1220 |
-
|
1221 |
-
if (is_null($class)) {
|
1222 |
-
$this->removeAttribute('class');
|
1223 |
-
return;
|
1224 |
-
}
|
1225 |
-
|
1226 |
-
if (is_string($class)) {
|
1227 |
-
$class = explode(' ', $class);
|
1228 |
-
}
|
1229 |
-
|
1230 |
-
if (is_array($class)) {
|
1231 |
-
$class = array_diff(explode(' ', $this->class), $class);
|
1232 |
-
if (empty($class)) {
|
1233 |
-
$this->removeAttribute('class');
|
1234 |
-
} else {
|
1235 |
-
$this->class = implode(' ', $class);
|
1236 |
-
}
|
1237 |
-
}
|
1238 |
-
}
|
1239 |
-
|
1240 |
-
function getAllAttributes()
|
1241 |
-
{
|
1242 |
-
return $this->attr;
|
1243 |
-
}
|
1244 |
-
|
1245 |
-
function getAttribute($name)
|
1246 |
-
{
|
1247 |
-
return $this->$name;
|
1248 |
-
}
|
1249 |
-
|
1250 |
-
function setAttribute($name, $value)
|
1251 |
-
{
|
1252 |
-
$this->$name = $value;
|
1253 |
-
}
|
1254 |
-
|
1255 |
-
function hasAttribute($name)
|
1256 |
-
{
|
1257 |
-
return isset($this->$name);
|
1258 |
-
}
|
1259 |
-
|
1260 |
-
function removeAttribute($name)
|
1261 |
-
{
|
1262 |
-
unset($this->$name);
|
1263 |
-
}
|
1264 |
-
|
1265 |
-
function remove()
|
1266 |
-
{
|
1267 |
-
if ($this->parent) {
|
1268 |
-
$this->parent->removeChild($this);
|
1269 |
-
}
|
1270 |
-
}
|
1271 |
-
|
1272 |
-
function removeChild($node)
|
1273 |
-
{
|
1274 |
-
foreach($node->children as $child) {
|
1275 |
-
$node->removeChild($child);
|
1276 |
-
}
|
1277 |
-
|
1278 |
-
// No need to re-index node->children because it is about to be removed!
|
1279 |
-
|
1280 |
-
foreach($node->nodes as $entity) {
|
1281 |
-
$enidx = array_search($entity, $node->nodes, true);
|
1282 |
-
$edidx = array_search($entity, $node->dom->nodes, true);
|
1283 |
-
|
1284 |
-
if ($enidx !== false) {
|
1285 |
-
unset($node->nodes[$enidx]);
|
1286 |
-
}
|
1287 |
-
|
1288 |
-
if ($edidx !== false) {
|
1289 |
-
unset($node->dom->nodes[$edidx]);
|
1290 |
-
}
|
1291 |
-
}
|
1292 |
-
|
1293 |
-
// No need to re-index node->nodes because it is about to be removed!
|
1294 |
-
|
1295 |
-
$nidx = array_search($node, $this->nodes, true);
|
1296 |
-
$cidx = array_search($node, $this->children, true);
|
1297 |
-
$didx = array_search($node, $this->dom->nodes, true);
|
1298 |
-
|
1299 |
-
if ($nidx !== false) {
|
1300 |
-
unset($this->nodes[$nidx]);
|
1301 |
-
}
|
1302 |
-
|
1303 |
-
$this->nodes = array_values($this->nodes);
|
1304 |
-
|
1305 |
-
if ($cidx !== false) {
|
1306 |
-
unset($this->children[$cidx]);
|
1307 |
-
}
|
1308 |
-
|
1309 |
-
$this->children = array_values($this->children);
|
1310 |
-
|
1311 |
-
if ($didx !== false) {
|
1312 |
-
unset($this->dom->nodes[$didx]);
|
1313 |
-
}
|
1314 |
-
|
1315 |
-
// Do not re-index dom->nodes because nodes point to other nodes in the
|
1316 |
-
// array explicitly!
|
1317 |
-
|
1318 |
-
$node->clear();
|
1319 |
-
}
|
1320 |
-
|
1321 |
-
function getElementById($id)
|
1322 |
-
{
|
1323 |
-
return $this->find("#$id", 0);
|
1324 |
-
}
|
1325 |
-
|
1326 |
-
function getElementsById($id, $idx = null)
|
1327 |
-
{
|
1328 |
-
return $this->find("#$id", $idx);
|
1329 |
-
}
|
1330 |
-
|
1331 |
-
function getElementByTagName($name)
|
1332 |
-
{
|
1333 |
-
return $this->find($name, 0);
|
1334 |
-
}
|
1335 |
-
|
1336 |
-
function getElementsByTagName($name, $idx = null)
|
1337 |
-
{
|
1338 |
-
return $this->find($name, $idx);
|
1339 |
-
}
|
1340 |
-
|
1341 |
-
function parentNode()
|
1342 |
-
{
|
1343 |
-
return $this->parent();
|
1344 |
-
}
|
1345 |
-
|
1346 |
-
function childNodes($idx = -1)
|
1347 |
-
{
|
1348 |
-
if ($idx === -1) {
|
1349 |
-
return $this->children;
|
1350 |
-
}
|
1351 |
-
|
1352 |
-
if (isset($this->children[$idx])) {
|
1353 |
-
return $this->children[$idx];
|
1354 |
-
}
|
1355 |
-
|
1356 |
-
return null;
|
1357 |
-
}
|
1358 |
-
|
1359 |
-
function firstChild()
|
1360 |
-
{
|
1361 |
-
if (count($this->children) > 0) {
|
1362 |
-
return $this->children[0];
|
1363 |
-
}
|
1364 |
-
return null;
|
1365 |
-
}
|
1366 |
-
|
1367 |
-
function lastChild()
|
1368 |
-
{
|
1369 |
-
if (count($this->children) > 0) {
|
1370 |
-
return end($this->children);
|
1371 |
-
}
|
1372 |
-
return null;
|
1373 |
-
}
|
1374 |
-
|
1375 |
-
function nextSibling()
|
1376 |
-
{
|
1377 |
-
if ($this->parent === null) {
|
1378 |
-
return null;
|
1379 |
-
}
|
1380 |
-
|
1381 |
-
$idx = array_search($this, $this->parent->children, true);
|
1382 |
-
|
1383 |
-
if ($idx !== false && isset($this->parent->children[$idx + 1])) {
|
1384 |
-
return $this->parent->children[$idx + 1];
|
1385 |
-
}
|
1386 |
-
|
1387 |
-
return null;
|
1388 |
-
}
|
1389 |
-
|
1390 |
-
function previousSibling()
|
1391 |
-
{
|
1392 |
-
if ($this->parent === null) {
|
1393 |
-
return null;
|
1394 |
-
}
|
1395 |
-
|
1396 |
-
$idx = array_search($this, $this->parent->children, true);
|
1397 |
-
|
1398 |
-
if ($idx !== false && $idx > 0) {
|
1399 |
-
return $this->parent->children[$idx - 1];
|
1400 |
-
}
|
1401 |
-
|
1402 |
-
return null;
|
1403 |
-
|
1404 |
-
}
|
1405 |
-
|
1406 |
-
function hasChildNodes()
|
1407 |
-
{
|
1408 |
-
return !empty($this->children);
|
1409 |
-
}
|
1410 |
-
|
1411 |
-
function nodeName()
|
1412 |
-
{
|
1413 |
-
return $this->tag;
|
1414 |
-
}
|
1415 |
-
|
1416 |
-
function appendChild($node)
|
1417 |
-
{
|
1418 |
-
$node->parent = $this;
|
1419 |
-
$this->nodes[] = $node;
|
1420 |
-
$this->children[] = $node;
|
1421 |
-
|
1422 |
-
if ($this->dom) { // Attach current node to DOM (recursively)
|
1423 |
-
$children = array($node);
|
1424 |
-
|
1425 |
-
while($children) {
|
1426 |
-
$child = array_pop($children);
|
1427 |
-
$children = array_merge($children, $child->children);
|
1428 |
-
|
1429 |
-
$this->dom->nodes[] = $child;
|
1430 |
-
$child->dom = $this->dom;
|
1431 |
-
$child->_[self::HDOM_INFO_BEGIN] = count($this->dom->nodes) - 1;
|
1432 |
-
$child->_[self::HDOM_INFO_END] = $child->_[self::HDOM_INFO_BEGIN];
|
1433 |
-
}
|
1434 |
-
|
1435 |
-
$this->dom->root->_[self::HDOM_INFO_END] = count($this->dom->nodes) - 1;
|
1436 |
-
}
|
1437 |
-
|
1438 |
-
return $this;
|
1439 |
-
}
|
1440 |
-
|
1441 |
-
}
|
1 |
+
<?php namespace simplehtmldom;
|
2 |
+
|
3 |
+
/**
|
4 |
+
* Website: http://sourceforge.net/projects/simplehtmldom/
|
5 |
+
* Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
|
6 |
+
*
|
7 |
+
* Licensed under The MIT License
|
8 |
+
* See the LICENSE file in the project root for more information.
|
9 |
+
*
|
10 |
+
* Authors:
|
11 |
+
* S.C. Chen
|
12 |
+
* John Schlick
|
13 |
+
* Rus Carroll
|
14 |
+
* logmanoriginal
|
15 |
+
*
|
16 |
+
* Contributors:
|
17 |
+
* Yousuke Kumakura
|
18 |
+
* Vadim Voituk
|
19 |
+
* Antcs
|
20 |
+
*
|
21 |
+
* Version $Rev$
|
22 |
+
*/
|
23 |
+
|
24 |
+
include_once 'constants.php';
|
25 |
+
include_once 'Debug.php';
|
26 |
+
|
27 |
+
class HtmlNode
|
28 |
+
{
|
29 |
+
const HDOM_TYPE_ELEMENT = 1;
|
30 |
+
const HDOM_TYPE_COMMENT = 2;
|
31 |
+
const HDOM_TYPE_TEXT = 3;
|
32 |
+
const HDOM_TYPE_ROOT = 5;
|
33 |
+
const HDOM_TYPE_UNKNOWN = 6;
|
34 |
+
const HDOM_TYPE_CDATA = 7;
|
35 |
+
|
36 |
+
const HDOM_QUOTE_DOUBLE = 0;
|
37 |
+
const HDOM_QUOTE_SINGLE = 1;
|
38 |
+
const HDOM_QUOTE_NO = 3;
|
39 |
+
|
40 |
+
const HDOM_INFO_BEGIN = 0;
|
41 |
+
const HDOM_INFO_END = 1;
|
42 |
+
const HDOM_INFO_QUOTE = 2;
|
43 |
+
const HDOM_INFO_SPACE = 3;
|
44 |
+
const HDOM_INFO_TEXT = 4;
|
45 |
+
const HDOM_INFO_INNER = 5;
|
46 |
+
const HDOM_INFO_OUTER = 6;
|
47 |
+
const HDOM_INFO_ENDSPACE = 7;
|
48 |
+
|
49 |
+
public $nodetype = self::HDOM_TYPE_TEXT;
|
50 |
+
public $tag = 'text';
|
51 |
+
public $attr = array();
|
52 |
+
public $children = array();
|
53 |
+
public $nodes = array();
|
54 |
+
public $parent = null;
|
55 |
+
public $_ = array();
|
56 |
+
private $dom = null;
|
57 |
+
|
58 |
+
function __call($func, $args)
|
59 |
+
{
|
60 |
+
// Allow users to call methods with lower_case syntax
|
61 |
+
switch($func)
|
62 |
+
{
|
63 |
+
case 'children':
|
64 |
+
$actual_function = 'childNodes'; break;
|
65 |
+
case 'first_child':
|
66 |
+
$actual_function = 'firstChild'; break;
|
67 |
+
case 'has_child':
|
68 |
+
$actual_function = 'hasChildNodes'; break;
|
69 |
+
case 'last_child':
|
70 |
+
$actual_function = 'lastChild'; break;
|
71 |
+
case 'next_sibling':
|
72 |
+
$actual_function = 'nextSibling'; break;
|
73 |
+
case 'prev_sibling':
|
74 |
+
$actual_function = 'previousSibling'; break;
|
75 |
+
default:
|
76 |
+
trigger_error(
|
77 |
+
'Call to undefined method ' . __CLASS__ . '::' . $func . '()',
|
78 |
+
E_USER_ERROR
|
79 |
+
);
|
80 |
+
}
|
81 |
+
|
82 |
+
// phpcs:ignore Generic.Files.LineLength
|
83 |
+
Debug::log(__CLASS__ . '->' . $func . '() has been deprecated and will be removed in the next major version of simplehtmldom. Use ' . __CLASS__ . '->' . $actual_function . '() instead.');
|
84 |
+
|
85 |
+
return call_user_func_array(array($this, $actual_function), $args);
|
86 |
+
}
|
87 |
+
|
88 |
+
function __construct($dom)
|
89 |
+
{
|
90 |
+
if ($dom === null) return $this;
|
91 |
+
|
92 |
+
$this->dom = $dom;
|
93 |
+
$dom->nodes[] = $this;
|
94 |
+
}
|
95 |
+
|
96 |
+
function __debugInfo()
|
97 |
+
{
|
98 |
+
// Translate node type to human-readable form
|
99 |
+
switch($this->nodetype)
|
100 |
+
{
|
101 |
+
case self::HDOM_TYPE_ELEMENT:
|
102 |
+
$nodetype = "HDOM_TYPE_ELEMENT ($this->nodetype)";
|
103 |
+
break;
|
104 |
+
case self::HDOM_TYPE_COMMENT:
|
105 |
+
$nodetype = "HDOM_TYPE_COMMENT ($this->nodetype)";
|
106 |
+
break;
|
107 |
+
case self::HDOM_TYPE_TEXT:
|
108 |
+
$nodetype = "HDOM_TYPE_TEXT ($this->nodetype)";
|
109 |
+
break;
|
110 |
+
case self::HDOM_TYPE_ROOT:
|
111 |
+
$nodetype = "HDOM_TYPE_ROOT ($this->nodetype)";
|
112 |
+
break;
|
113 |
+
case self::HDOM_TYPE_CDATA:
|
114 |
+
$nodetype = "HDOM_TYPE_CDATA ($this->nodetype)";
|
115 |
+
break;
|
116 |
+
case self::HDOM_TYPE_UNKNOWN:
|
117 |
+
default:
|
118 |
+
$nodetype = "HDOM_TYPE_UNKNOWN ($this->nodetype)";
|
119 |
+
}
|
120 |
+
|
121 |
+
return array(
|
122 |
+
'nodetype' => $nodetype,
|
123 |
+
'tag' => $this->tag,
|
124 |
+
'attributes' => empty($this->attr) ? 'none' : $this->attr,
|
125 |
+
'nodes' => empty($this->nodes) ? 'none' : $this->nodes
|
126 |
+
);
|
127 |
+
}
|
128 |
+
|
129 |
+
function __toString()
|
130 |
+
{
|
131 |
+
return $this->outertext();
|
132 |
+
}
|
133 |
+
|
134 |
+
function clear()
|
135 |
+
{
|
136 |
+
unset($this->dom); // Break link to origin
|
137 |
+
unset($this->parent); // Break link to branch
|
138 |
+
}
|
139 |
+
|
140 |
+
/** @codeCoverageIgnore */
|
141 |
+
function dump($show_attr = true, $depth = 0)
|
142 |
+
{
|
143 |
+
echo str_repeat("\t", $depth) . $this->tag;
|
144 |
+
|
145 |
+
if ($show_attr && count($this->attr) > 0) {
|
146 |
+
echo '(';
|
147 |
+
foreach ($this->attr as $k => $v) {
|
148 |
+
echo "[$k]=>\"$v\", ";
|
149 |
+
}
|
150 |
+
echo ')';
|
151 |
+
}
|
152 |
+
|
153 |
+
echo "\n";
|
154 |
+
|
155 |
+
if ($this->nodes) {
|
156 |
+
foreach ($this->nodes as $node) {
|
157 |
+
$node->dump($show_attr, $depth + 1);
|
158 |
+
}
|
159 |
+
}
|
160 |
+
}
|
161 |
+
|
162 |
+
/** @codeCoverageIgnore */
|
163 |
+
function dump_node($echo = true)
|
164 |
+
{
|
165 |
+
$string = $this->tag;
|
166 |
+
|
167 |
+
if (count($this->attr) > 0) {
|
168 |
+
$string .= '(';
|
169 |
+
foreach ($this->attr as $k => $v) {
|
170 |
+
$string .= "[$k]=>\"$v\", ";
|
171 |
+
}
|
172 |
+
$string .= ')';
|
173 |
+
}
|
174 |
+
|
175 |
+
if (count($this->_) > 0) {
|
176 |
+
$string .= ' $_ (';
|
177 |
+
foreach ($this->_ as $k => $v) {
|
178 |
+
if (is_array($v)) {
|
179 |
+
$string .= "[$k]=>(";
|
180 |
+
foreach ($v as $k2 => $v2) {
|
181 |
+
$string .= "[$k2]=>\"$v2\", ";
|
182 |
+
}
|
183 |
+
$string .= ')';
|
184 |
+
} else {
|
185 |
+
$string .= "[$k]=>\"$v\", ";
|
186 |
+
}
|
187 |
+
}
|
188 |
+
$string .= ')';
|
189 |
+
}
|
190 |
+
|
191 |
+
if (isset($this->text)) {
|
192 |
+
$string .= " text: ({$this->text})";
|
193 |
+
}
|
194 |
+
|
195 |
+
$string .= ' HDOM_INNER_INFO: ';
|
196 |
+
|
197 |
+
if (isset($node->_[self::HDOM_INFO_INNER])) {
|
198 |
+
$string .= "'" . $node->_[self::HDOM_INFO_INNER] . "'";
|
199 |
+
} else {
|
200 |
+
$string .= ' NULL ';
|
201 |
+
}
|
202 |
+
|
203 |
+
$string .= ' children: ' . count($this->children);
|
204 |
+
$string .= ' nodes: ' . count($this->nodes);
|
205 |
+
$string .= "\n";
|
206 |
+
|
207 |
+
if ($echo) {
|
208 |
+
echo $string;
|
209 |
+
return;
|
210 |
+
} else {
|
211 |
+
return $string;
|
212 |
+
}
|
213 |
+
}
|
214 |
+
|
215 |
+
function parent($parent = null)
|
216 |
+
{
|
217 |
+
// I am SURE that this doesn't work properly.
|
218 |
+
// It fails to unset the current node from it's current parents nodes or
|
219 |
+
// children list first.
|
220 |
+
if ($parent !== null) {
|
221 |
+
$this->parent = $parent;
|
222 |
+
$this->parent->nodes[] = $this;
|
223 |
+
$this->parent->children[] = $this;
|
224 |
+
}
|
225 |
+
|
226 |
+
return $this->parent;
|
227 |
+
}
|
228 |
+
|
229 |
+
function find_ancestor_tag($tag)
|
230 |
+
{
|
231 |
+
if ($this->parent === null) return null;
|
232 |
+
|
233 |
+
$ancestor = $this->parent;
|
234 |
+
|
235 |
+
while (!is_null($ancestor)) {
|
236 |
+
if ($ancestor->tag === $tag) {
|
237 |
+
break;
|
238 |
+
}
|
239 |
+
|
240 |
+
$ancestor = $ancestor->parent;
|
241 |
+
}
|
242 |
+
|
243 |
+
return $ancestor;
|
244 |
+
}
|
245 |
+
|
246 |
+
function innertext()
|
247 |
+
{
|
248 |
+
if (isset($this->_[self::HDOM_INFO_INNER])) {
|
249 |
+
$ret = $this->_[self::HDOM_INFO_INNER];
|
250 |
+
} elseif (isset($this->_[self::HDOM_INFO_TEXT])) {
|
251 |
+
$ret = $this->_[self::HDOM_INFO_TEXT];
|
252 |
+
} else {
|
253 |
+
$ret = '';
|
254 |
+
}
|
255 |
+
|
256 |
+
foreach ($this->nodes as $n) {
|
257 |
+
$ret .= $n->outertext();
|
258 |
+
}
|
259 |
+
|
260 |
+
return $this->convert_text($ret);
|
261 |
+
}
|
262 |
+
|
263 |
+
function outertext()
|
264 |
+
{
|
265 |
+
if ($this->tag === 'root') {
|
266 |
+
return $this->innertext();
|
267 |
+
}
|
268 |
+
|
269 |
+
// todo: What is the use of this callback? Remove?
|
270 |
+
if ($this->dom && $this->dom->callback !== null) {
|
271 |
+
call_user_func_array($this->dom->callback, array($this));
|
272 |
+
}
|
273 |
+
|
274 |
+
if (isset($this->_[self::HDOM_INFO_OUTER])) {
|
275 |
+
return $this->convert_text($this->_[self::HDOM_INFO_OUTER]);
|
276 |
+
}
|
277 |
+
|
278 |
+
if (isset($this->_[self::HDOM_INFO_TEXT])) {
|
279 |
+
return $this->convert_text($this->_[self::HDOM_INFO_TEXT]);
|
280 |
+
}
|
281 |
+
|
282 |
+
$ret = '';
|
283 |
+
|
284 |
+
if (isset($this->_[self::HDOM_INFO_BEGIN])) {
|
285 |
+
$ret = $this->makeup();
|
286 |
+
}
|
287 |
+
|
288 |
+
if (isset($this->_[self::HDOM_INFO_INNER])) {
|
289 |
+
// todo: <br> should either never have self::HDOM_INFO_INNER or always
|
290 |
+
if ($this->tag !== 'br') {
|
291 |
+
$ret .= $this->_[self::HDOM_INFO_INNER];
|
292 |
+
}
|
293 |
+
}
|
294 |
+
|
295 |
+
if ($this->nodes) {
|
296 |
+
foreach ($this->nodes as $n) {
|
297 |
+
$ret .= $n->outertext();
|
298 |
+
}
|
299 |
+
}
|
300 |
+
|
301 |
+
if (isset($this->_[self::HDOM_INFO_END]) && $this->_[self::HDOM_INFO_END] != 0) {
|
302 |
+
$ret .= '</' . $this->tag . '>';
|
303 |
+
}
|
304 |
+
|
305 |
+
return $this->convert_text($ret);
|
306 |
+
}
|
307 |
+
|
308 |
+
/**
|
309 |
+
* Returns true if the provided element is a block level element
|
310 |
+
* @link https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php
|
311 |
+
*/
|
312 |
+
protected function is_block_element($node)
|
313 |
+
{
|
314 |
+
// todo: When we have the utility class this should be moved there
|
315 |
+
return in_array(strtolower($node->tag), array(
|
316 |
+
'p',
|
317 |
+
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
318 |
+
'ol', 'ul',
|
319 |
+
'pre',
|
320 |
+
'address',
|
321 |
+
'blockquote',
|
322 |
+
'dl',
|
323 |
+
'div',
|
324 |
+
'fieldset',
|
325 |
+
'form',
|
326 |
+
'hr',
|
327 |
+
'noscript',
|
328 |
+
'table'
|
329 |
+
));
|
330 |
+
}
|
331 |
+
|
332 |
+
/**
|
333 |
+
* Returns true if the provided element is an inline level element
|
334 |
+
* @link https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php
|
335 |
+
*/
|
336 |
+
protected function is_inline_element($node)
|
337 |
+
{
|
338 |
+
// todo: When we have the utility class this should be moved there
|
339 |
+
return in_array(strtolower($node->tag), array(
|
340 |
+
'b', 'big', 'i', 'small', 'tt',
|
341 |
+
'abbr', 'acronym', 'cite', 'code', 'dfn', 'em', 'kbd', 'strong', 'samp', 'var',
|
342 |
+
'a', 'bdo', 'br', 'img', 'map', 'object', 'q', 'script', 'span', 'sub', 'sup',
|
343 |
+
'button', 'input', 'label', 'select', 'textarea'
|
344 |
+
));
|
345 |
+
}
|
346 |
+
|
347 |
+
function text($trim = true)
|
348 |
+
{
|
349 |
+
$ret = '';
|
350 |
+
|
351 |
+
if (strtolower($this->tag) === 'script') {
|
352 |
+
$ret = '';
|
353 |
+
} elseif (strtolower($this->tag) === 'style') {
|
354 |
+
$ret = '';
|
355 |
+
} elseif ($this->nodetype === self::HDOM_TYPE_COMMENT) {
|
356 |
+
$ret = '';
|
357 |
+
} elseif ($this->nodetype === self::HDOM_TYPE_CDATA) {
|
358 |
+
$ret = $this->_[self::HDOM_INFO_INNER];
|
359 |
+
} elseif ($this->nodetype === self::HDOM_TYPE_UNKNOWN) {
|
360 |
+
$ret = '';
|
361 |
+
} elseif (isset($this->_[self::HDOM_INFO_INNER])) {
|
362 |
+
$ret = $this->_[self::HDOM_INFO_INNER];
|
363 |
+
} elseif ($this->nodetype === self::HDOM_TYPE_TEXT) {
|
364 |
+
$ret = $this->_[self::HDOM_INFO_TEXT];
|
365 |
+
}
|
366 |
+
|
367 |
+
if (is_null($this->nodes)) {
|
368 |
+
return '';
|
369 |
+
}
|
370 |
+
|
371 |
+
foreach ($this->nodes as $n) {
|
372 |
+
if ($this->is_block_element($n)) {
|
373 |
+
|
374 |
+
$block = ltrim($this->convert_text($n->text(false)));
|
375 |
+
|
376 |
+
if (empty($block))
|
377 |
+
continue;
|
378 |
+
|
379 |
+
$ret = rtrim($ret) . "\n\n" . $block;
|
380 |
+
|
381 |
+
} elseif ($this->is_inline_element($n)) {
|
382 |
+
// todo: <br> introduces code smell because no space but \n
|
383 |
+
if (strtolower($n->tag) === 'br') {
|
384 |
+
$ret .= $this->dom->default_br_text ?: DEFAULT_BR_TEXT;
|
385 |
+
} else {
|
386 |
+
$inline = ltrim($this->convert_text($n->text(false)));
|
387 |
+
|
388 |
+
if (empty($inline))
|
389 |
+
continue;
|
390 |
+
|
391 |
+
$ret = $ret . $this->convert_text($n->text(false));
|
392 |
+
}
|
393 |
+
} else {
|
394 |
+
$ret .= $this->convert_text($n->text(false));
|
395 |
+
}
|
396 |
+
}
|
397 |
+
|
398 |
+
// Reduce whitespace at start/end to a single (or none) space
|
399 |
+
$ret = preg_replace('/[ \t\n\r\0\x0B\xC2\xA0]+$/u', $trim ? '' : ' ', $ret);
|
400 |
+
$ret = preg_replace('/^[ \t\n\r\0\x0B\xC2\xA0]+/u', $trim ? '' : ' ', $ret);
|
401 |
+
|
402 |
+
return $ret;
|
403 |
+
}
|
404 |
+
|
405 |
+
function xmltext()
|
406 |
+
{
|
407 |
+
$ret = $this->innertext();
|
408 |
+
$ret = str_ireplace('<![CDATA[', '', $ret);
|
409 |
+
$ret = str_replace(']]>', '', $ret);
|
410 |
+
return $ret;
|
411 |
+
}
|
412 |
+
|
413 |
+
function makeup()
|
414 |
+
{
|
415 |
+
// text, comment, unknown
|
416 |
+
if (isset($this->_[self::HDOM_INFO_TEXT])) {
|
417 |
+
return $this->_[self::HDOM_INFO_TEXT];
|
418 |
+
}
|
419 |
+
|
420 |
+
$ret = '<' . $this->tag;
|
421 |
+
|
422 |
+
foreach ($this->attr as $key => $val) {
|
423 |
+
|
424 |
+
// skip removed attribute
|
425 |
+
if ($val === null || $val === false) { continue; }
|
426 |
+
|
427 |
+
if (isset($this->_[self::HDOM_INFO_SPACE][$key])) {
|
428 |
+
$ret .= $this->_[self::HDOM_INFO_SPACE][$key][0];
|
429 |
+
} else {
|
430 |
+
$ret .= ' ';
|
431 |
+
}
|
432 |
+
|
433 |
+
//no value attr: nowrap, checked selected...
|
434 |
+
if ($val === true) {
|
435 |
+
$ret .= $key;
|
436 |
+
} else {
|
437 |
+
if (isset($this->_[self::HDOM_INFO_QUOTE][$key])) {
|
438 |
+
$quote_type = $this->_[self::HDOM_INFO_QUOTE][$key];
|
439 |
+
} else {
|
440 |
+
$quote_type = self::HDOM_QUOTE_DOUBLE;
|
441 |
+
}
|
442 |
+
|
443 |
+
switch ($quote_type)
|
444 |
+
{
|
445 |
+
case self::HDOM_QUOTE_SINGLE:
|
446 |
+
$quote = '\'';
|
447 |
+
$val = htmlentities($val, ENT_QUOTES, $this->dom->target_charset);
|
448 |
+
break;
|
449 |
+
case self::HDOM_QUOTE_NO:
|
450 |
+
$quote = '';
|
451 |
+
break;
|
452 |
+
case self::HDOM_QUOTE_DOUBLE:
|
453 |
+
default:
|
454 |
+
$quote = '"';
|
455 |
+
$val = htmlentities($val, ENT_COMPAT, $this->dom->target_charset);
|
456 |
+
}
|
457 |
+
|
458 |
+
$ret .= $key
|
459 |
+
. (isset($this->_[self::HDOM_INFO_SPACE][$key]) ? $this->_[self::HDOM_INFO_SPACE][$key][1] : '')
|
460 |
+
. '='
|
461 |
+
. (isset($this->_[self::HDOM_INFO_SPACE][$key]) ? $this->_[self::HDOM_INFO_SPACE][$key][2] : '')
|
462 |
+
. $quote
|
463 |
+
. $val
|
464 |
+
. $quote;
|
465 |
+
}
|
466 |
+
}
|
467 |
+
|
468 |
+
if(isset($this->_[self::HDOM_INFO_ENDSPACE])) {
|
469 |
+
$ret .= $this->_[self::HDOM_INFO_ENDSPACE];
|
470 |
+
}
|
471 |
+
|
472 |
+
return $ret . '>';
|
473 |
+
}
|
474 |
+
|
475 |
+
function find($selector, $idx = null, $lowercase = false)
|
476 |
+
{
|
477 |
+
$selectors = $this->parse_selector($selector);
|
478 |
+
if (($count = count($selectors)) === 0) { return array(); }
|
479 |
+
$found_keys = array();
|
480 |
+
|
481 |
+
// find each selector
|
482 |
+
for ($c = 0; $c < $count; ++$c) {
|
483 |
+
// The change on the below line was documented on the sourceforge
|
484 |
+
// code tracker id 2788009
|
485 |
+
// used to be: if (($levle=count($selectors[0]))===0) return array();
|
486 |
+
if (($levle = count($selectors[$c])) === 0) {
|
487 |
+
Debug::log_once('Empty selector (' . $selector . ') matches nothing.');
|
488 |
+
return array();
|
489 |
+
}
|
490 |
+
|
491 |
+
if (!isset($this->_[self::HDOM_INFO_BEGIN])) {
|
492 |
+
Debug::log_once('Invalid operation. The current node has no start tag.');
|
493 |
+
return array();
|
494 |
+
}
|
495 |
+
|
496 |
+
$head = array($this->_[self::HDOM_INFO_BEGIN] => 1);
|
497 |
+
$cmd = ' '; // Combinator
|
498 |
+
|
499 |
+
// handle descendant selectors, no recursive!
|
500 |
+
for ($l = 0; $l < $levle; ++$l) {
|
501 |
+
$ret = array();
|
502 |
+
|
503 |
+
foreach ($head as $k => $v) {
|
504 |
+
$n = ($k === -1) ? $this->dom->root : $this->dom->nodes[$k];
|
505 |
+
//PaperG - Pass this optional parameter on to the seek function.
|
506 |
+
$n->seek($selectors[$c][$l], $ret, $cmd, $lowercase);
|
507 |
+
}
|
508 |
+
|
509 |
+
$head = $ret;
|
510 |
+
$cmd = $selectors[$c][$l][6]; // Next Combinator
|
511 |
+
}
|
512 |
+
|
513 |
+
foreach ($head as $k => $v) {
|
514 |
+
if (!isset($found_keys[$k])) {
|
515 |
+
$found_keys[$k] = 1;
|
516 |
+
}
|
517 |
+
}
|
518 |
+
}
|
519 |
+
|
520 |
+
// sort keys
|
521 |
+
ksort($found_keys);
|
522 |
+
|
523 |
+
$found = array();
|
524 |
+
foreach ($found_keys as $k => $v) {
|
525 |
+
$found[] = $this->dom->nodes[$k];
|
526 |
+
}
|
527 |
+
|
528 |
+
// return nth-element or array
|
529 |
+
if (is_null($idx)) { return $found; }
|
530 |
+
elseif ($idx < 0) { $idx = count($found) + $idx; }
|
531 |
+
return (isset($found[$idx])) ? $found[$idx] : null;
|
532 |
+
}
|
533 |
+
|
534 |
+
function expect($selector, $idx = null, $lowercase = false)
|
535 |
+
{
|
536 |
+
return $this->find($selector, $idx, $lowercase) ?: null;
|
537 |
+
}
|
538 |
+
|
539 |
+
protected function seek($selector, &$ret, $parent_cmd, $lowercase = false)
|
540 |
+
{
|
541 |
+
list($ps_selector, $tag, $ps_element, $id, $class, $attributes, $cmb) = $selector;
|
542 |
+
$nodes = array();
|
543 |
+
|
544 |
+
if ($parent_cmd === ' ') { // Descendant Combinator
|
545 |
+
// Find parent closing tag if the current element doesn't have a closing
|
546 |
+
// tag (i.e. void element)
|
547 |
+
$end = (!empty($this->_[self::HDOM_INFO_END])) ? $this->_[self::HDOM_INFO_END] : 0;
|
548 |
+
if ($end == 0 && $this->parent) {
|
549 |
+
$parent = $this->parent;
|
550 |
+
while ($parent !== null && !isset($parent->_[self::HDOM_INFO_END])) {
|
551 |
+
$end -= 1;
|
552 |
+
$parent = $parent->parent;
|
553 |
+
}
|
554 |
+
$end += $parent->_[self::HDOM_INFO_END];
|
555 |
+
}
|
556 |
+
|
557 |
+
if ($end === 0) {
|
558 |
+
$end = count($this->dom->nodes);
|
559 |
+
}
|
560 |
+
|
561 |
+
// Get list of target nodes
|
562 |
+
$nodes_start = $this->_[self::HDOM_INFO_BEGIN] + 1;
|
563 |
+
|
564 |
+
// remove() makes $this->dom->nodes non-contiguous; use what is left.
|
565 |
+
$nodes = array_intersect_key(
|
566 |
+
$this->dom->nodes,
|
567 |
+
array_flip(range($nodes_start, $end))
|
568 |
+
);
|
569 |
+
} elseif ($parent_cmd === '>') { // Child Combinator
|
570 |
+
$nodes = $this->children;
|
571 |
+
} elseif ($parent_cmd === '+'
|
572 |
+
&& $this->parent
|
573 |
+
&& in_array($this, $this->parent->children)) { // Next-Sibling Combinator
|
574 |
+
$index = array_search($this, $this->parent->children, true) + 1;
|
575 |
+
if ($index < count($this->parent->children))
|
576 |
+
$nodes[] = $this->parent->children[$index];
|
577 |
+
} elseif ($parent_cmd === '~'
|
578 |
+
&& $this->parent
|
579 |
+
&& in_array($this, $this->parent->children)) { // Subsequent Sibling Combinator
|
580 |
+
$index = array_search($this, $this->parent->children, true);
|
581 |
+
$nodes = array_slice($this->parent->children, $index);
|
582 |
+
}
|
583 |
+
|
584 |
+
// Go throgh each element starting at this element until the end tag
|
585 |
+
// Note: If this element is a void tag, any previous void element is
|
586 |
+
// skipped.
|
587 |
+
foreach($nodes as $node) {
|
588 |
+
$pass = true;
|
589 |
+
|
590 |
+
// Skip root nodes
|
591 |
+
if(!$node->parent) {
|
592 |
+
unset($node);
|
593 |
+
continue;
|
594 |
+
}
|
595 |
+
|
596 |
+
// Handle 'text' selector
|
597 |
+
if($pass && $tag === 'text') {
|
598 |
+
|
599 |
+
if($node->tag === 'text') {
|
600 |
+
$ret[array_search($node, $this->dom->nodes, true)] = 1;
|
601 |
+
}
|
602 |
+
|
603 |
+
if(isset($node->_[self::HDOM_INFO_INNER])) {
|
604 |
+
$ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
|
605 |
+
}
|
606 |
+
|
607 |
+
unset($node);
|
608 |
+
continue;
|
609 |
+
|
610 |
+
}
|
611 |
+
|
612 |
+
// Handle 'cdata' selector
|
613 |
+
if($pass && $tag === 'cdata') {
|
614 |
+
|
615 |
+
if($node->tag === 'cdata') {
|
616 |
+
$ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
|
617 |
+
}
|
618 |
+
|
619 |
+
unset($node);
|
620 |
+
continue;
|
621 |
+
|
622 |
+
}
|
623 |
+
|
624 |
+
// Handle 'comment'
|
625 |
+
if($pass && $tag === 'comment' && $node->tag === 'comment') {
|
626 |
+
$ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
|
627 |
+
unset($node);
|
628 |
+
continue;
|
629 |
+
}
|
630 |
+
|
631 |
+
// Skip if node isn't a child node (i.e. text nodes)
|
632 |
+
if($pass && !in_array($node, $node->parent->children, true)) {
|
633 |
+
unset($node);
|
634 |
+
continue;
|
635 |
+
}
|
636 |
+
|
637 |
+
// Skip if tag doesn't match
|
638 |
+
if ($pass && $tag !== '' && $tag !== $node->tag && $tag !== '*') {
|
639 |
+
$pass = false;
|
640 |
+
}
|
641 |
+
|
642 |
+
// Skip if ID doesn't exist
|
643 |
+
if ($pass && $id !== '' && !isset($node->attr['id'])) {
|
644 |
+
$pass = false;
|
645 |
+
}
|
646 |
+
|
647 |
+
// Check if ID matches
|
648 |
+
if ($pass && $id !== '' && isset($node->attr['id'])) {
|
649 |
+
// Note: Only consider the first ID (as browsers do)
|
650 |
+
$node_id = explode(' ', trim($node->attr['id']))[0];
|
651 |
+
|
652 |
+
if($id !== $node_id) { $pass = false; }
|
653 |
+
}
|
654 |
+
|
655 |
+
// Check if all class(es) exist
|
656 |
+
if ($pass && $class !== '' && is_array($class) && !empty($class)) {
|
657 |
+
if (isset($node->attr['class'])) {
|
658 |
+
// Apply the same rules for the pattern and attribute value
|
659 |
+
// Attribute values must not contain control characters other than space
|
660 |
+
// https://www.w3.org/TR/html/dom.html#text-content
|
661 |
+
// https://www.w3.org/TR/html/syntax.html#attribute-values
|
662 |
+
// https://www.w3.org/TR/xml/#AVNormalize
|
663 |
+
$node_classes = preg_replace("/[\r\n\t\s]+/u", ' ', $node->attr['class']);
|
664 |
+
$node_classes = trim($node_classes);
|
665 |
+
$node_classes = explode(' ', $node_classes);
|
666 |
+
|
667 |
+
if ($lowercase) {
|
668 |
+
$node_classes = array_map('strtolower', $node_classes);
|
669 |
+
}
|
670 |
+
|
671 |
+
foreach($class as $c) {
|
672 |
+
if(!in_array($c, $node_classes)) {
|
673 |
+
$pass = false;
|
674 |
+
break;
|
675 |
+
}
|
676 |
+
}
|
677 |
+
} else {
|
678 |
+
$pass = false;
|
679 |
+
}
|
680 |
+
}
|
681 |
+
|
682 |
+
// Check attributes
|
683 |
+
if ($pass
|
684 |
+
&& $attributes !== ''
|
685 |
+
&& is_array($attributes)
|
686 |
+
&& !empty($attributes)) {
|
687 |
+
foreach($attributes as $a) {
|
688 |
+
list (
|
689 |
+
$att_name,
|
690 |
+
$att_expr,
|
691 |
+
$att_val,
|
692 |
+
$att_inv,
|
693 |
+
$att_case_sensitivity
|
694 |
+
) = $a;
|
695 |
+
|
696 |
+
// Handle indexing attributes (i.e. "[2]")
|
697 |
+
/**
|
698 |
+
* Note: This is not supported by the CSS Standard but adds
|
699 |
+
* the ability to select items compatible to XPath (i.e.
|
700 |
+
* the 3rd element within it's parent).
|
701 |
+
*
|
702 |
+
* Note: This doesn't conflict with the CSS Standard which
|
703 |
+
* doesn't work on numeric attributes anyway.
|
704 |
+
*/
|
705 |
+
if (is_numeric($att_name)
|
706 |
+
&& $att_expr === ''
|
707 |
+
&& $att_val === '') {
|
708 |
+
$count = 0;
|
709 |
+
|
710 |
+
// Find index of current element in parent
|
711 |
+
foreach ($node->parent->children as $c) {
|
712 |
+
if ($c->tag === $node->tag) ++$count;
|
713 |
+
if ($c === $node) break;
|
714 |
+
}
|
715 |
+
|
716 |
+
// If this is the correct node, continue with next
|
717 |
+
// attribute
|
718 |
+
if ($count === (int)$att_name) continue;
|
719 |
+
}
|
720 |
+
|
721 |
+
// Check attribute availability
|
722 |
+
if ($att_inv) { // Attribute should NOT be set
|
723 |
+
if (isset($node->attr[$att_name])) {
|
724 |
+
$pass = false;
|
725 |
+
break;
|
726 |
+
}
|
727 |
+
} else { // Attribute should be set
|
728 |
+
// todo: "plaintext" is not a valid CSS selector!
|
729 |
+
if ($att_name !== 'plaintext'
|
730 |
+
&& !isset($node->attr[$att_name])) {
|
731 |
+
$pass = false;
|
732 |
+
break;
|
733 |
+
}
|
734 |
+
}
|
735 |
+
|
736 |
+
// Continue with next attribute if expression isn't defined
|
737 |
+
if ($att_expr === '') continue;
|
738 |
+
|
739 |
+
// If they have told us that this is a "plaintext"
|
740 |
+
// search then we want the plaintext of the node - right?
|
741 |
+
// todo "plaintext" is not a valid CSS selector!
|
742 |
+
if ($att_name === 'plaintext') {
|
743 |
+
$nodeKeyValue = $node->text();
|
744 |
+
} else {
|
745 |
+
$nodeKeyValue = $node->attr[$att_name];
|
746 |
+
}
|
747 |
+
|
748 |
+
// If lowercase is set, do a case insensitive test of
|
749 |
+
// the value of the selector.
|
750 |
+
if ($lowercase) {
|
751 |
+
$check = $this->match(
|
752 |
+
$att_expr,
|
753 |
+
strtolower($att_val),
|
754 |
+
strtolower($nodeKeyValue),
|
755 |
+
$att_case_sensitivity
|
756 |
+
);
|
757 |
+
} else {
|
758 |
+
$check = $this->match(
|
759 |
+
$att_expr,
|
760 |
+
$att_val,
|
761 |
+
$nodeKeyValue,
|
762 |
+
$att_case_sensitivity
|
763 |
+
);
|
764 |
+
}
|
765 |
+
|
766 |
+
$check = $ps_element === 'not' ? !$check : $check;
|
767 |
+
|
768 |
+
if (!$check) {
|
769 |
+
$pass = false;
|
770 |
+
break;
|
771 |
+
}
|
772 |
+
}
|
773 |
+
}
|
774 |
+
|
775 |
+
// Found a match. Add to list and clear node
|
776 |
+
$pass = $ps_selector === 'not' ? !$pass : $pass;
|
777 |
+
if ($pass) $ret[$node->_[self::HDOM_INFO_BEGIN]] = 1;
|
778 |
+
unset($node);
|
779 |
+
}
|
780 |
+
}
|
781 |
+
|
782 |
+
protected function match($exp, $pattern, $value, $case_sensitivity)
|
783 |
+
{
|
784 |
+
if ($case_sensitivity === 'i') {
|
785 |
+
$pattern = strtolower($pattern);
|
786 |
+
$value = strtolower($value);
|
787 |
+
}
|
788 |
+
|
789 |
+
// Apply the same rules for the pattern and attribute value
|
790 |
+
// Attribute values must not contain control characters other than space
|
791 |
+
// https://www.w3.org/TR/html/dom.html#text-content
|
792 |
+
// https://www.w3.org/TR/html/syntax.html#attribute-values
|
793 |
+
// https://www.w3.org/TR/xml/#AVNormalize
|
794 |
+
$pattern = preg_replace("/[\r\n\t\s]+/u", ' ', $pattern);
|
795 |
+
$pattern = trim($pattern);
|
796 |
+
|
797 |
+
$value = preg_replace("/[\r\n\t\s]+/u", ' ', $value);
|
798 |
+
$value = trim($value);
|
799 |
+
|
800 |
+
switch ($exp) {
|
801 |
+
case '=':
|
802 |
+
return ($value === $pattern);
|
803 |
+
case '!=':
|
804 |
+
return ($value !== $pattern);
|
805 |
+
case '^=':
|
806 |
+
return preg_match('/^' . preg_quote($pattern, '/') . '/', $value);
|
807 |
+
case '$=':
|
808 |
+
return preg_match('/' . preg_quote($pattern, '/') . '$/', $value);
|
809 |
+
case '*=':
|
810 |
+
return preg_match('/' . preg_quote($pattern, '/') . '/', $value);
|
811 |
+
case '|=':
|
812 |
+
/**
|
813 |
+
* [att|=val]
|
814 |
+
*
|
815 |
+
* Represents an element with the att attribute, its value
|
816 |
+
* either being exactly "val" or beginning with "val"
|
817 |
+
* immediately followed by "-" (U+002D).
|
818 |
+
*/
|
819 |
+
return strpos($value, $pattern) === 0;
|
820 |
+
case '~=':
|
821 |
+
/**
|
822 |
+
* [att~=val]
|
823 |
+
*
|
824 |
+
* Represents an element with the att attribute whose value is a
|
825 |
+
* whitespace-separated list of words, one of which is exactly
|
826 |
+
* "val". If "val" contains whitespace, it will never represent
|
827 |
+
* anything (since the words are separated by spaces). Also if
|
828 |
+
* "val" is the empty string, it will never represent anything.
|
829 |
+
*/
|
830 |
+
return in_array($pattern, explode(' ', trim($value)), true);
|
831 |
+
}
|
832 |
+
|
833 |
+
Debug::log('Unhandled attribute selector: ' . $exp . '!');
|
834 |
+
return false;
|
835 |
+
}
|
836 |
+
|
837 |
+
protected function parse_selector($selector_string)
|
838 |
+
{
|
839 |
+
/**
|
840 |
+
* Pattern of CSS selectors, modified from mootools (https://mootools.net/)
|
841 |
+
*
|
842 |
+
* Paperg: Add the colon to the attribute, so that it properly finds
|
843 |
+
* <tag attr:ibute="something" > like google does.
|
844 |
+
*
|
845 |
+
* Note: if you try to look at this attribute, you MUST use getAttribute
|
846 |
+
* since $dom->x:y will fail the php syntax check.
|
847 |
+
*
|
848 |
+
* Notice the \[ starting the attribute? and the @? following? This
|
849 |
+
* implies that an attribute can begin with an @ sign that is not
|
850 |
+
* captured. This implies that an html attribute specifier may start
|
851 |
+
* with an @ sign that is NOT captured by the expression. Farther study
|
852 |
+
* is required to determine of this should be documented or removed.
|
853 |
+
*
|
854 |
+
* Matches selectors in this order:
|
855 |
+
*
|
856 |
+
* [0] - full match
|
857 |
+
*
|
858 |
+
* [1] - pseudo selector
|
859 |
+
* (?:\:(\w+)\()?
|
860 |
+
* Matches the pseudo selector (optional)
|
861 |
+
*
|
862 |
+
* [2] - tag name
|
863 |
+
* ([\w:\*-]*)
|
864 |
+
* Matches the tag name consisting of zero or more words, colons,
|
865 |
+
* asterisks and hyphens.
|
866 |
+
*
|
867 |
+
* [3] - pseudo selector
|
868 |
+
* (?:\:(\w+)\()?
|
869 |
+
* Matches the pseudo selector (optional)
|
870 |
+
*
|
871 |
+
* [4] - id name
|
872 |
+
* (?:\#([\w-]+))
|
873 |
+
* Optionally matches a id name, consisting of an "#" followed by
|
874 |
+
* the id name (one or more words and hyphens).
|
875 |
+
*
|
876 |
+
* [5] - class names (including dots)
|
877 |
+
* (?:\.([\w\.-]+))?
|
878 |
+
* Optionally matches a list of classs, consisting of an "."
|
879 |
+
* followed by the class name (one or more words and hyphens)
|
880 |
+
* where multiple classes can be chained (i.e. ".foo.bar.baz")
|
881 |
+
*
|
882 |
+
* [6] - attributes
|
883 |
+
* ((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?
|
884 |
+
* Optionally matches the attributes list
|
885 |
+
*
|
886 |
+
* [7] - separator
|
887 |
+
* ([\/, >+~]+)
|
888 |
+
* Matches the selector list separator
|
889 |
+
*/
|
890 |
+
// phpcs:ignore Generic.Files.LineLength
|
891 |
+
$pattern = "/(?:\:(\w+)\()?([\w:\*-]*)(?:\:(\w+)\()?(?:\#([\w-]+))?(?:|\.([\w\.-]+))?((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?(?:\))?(?:\))?([\/, >+~]+)/is";
|
892 |
+
|
893 |
+
preg_match_all(
|
894 |
+
$pattern,
|
895 |
+
trim($selector_string) . ' ', // Add final ' ' as pseudo separator
|
896 |
+
$matches,
|
897 |
+
PREG_SET_ORDER
|
898 |
+
);
|
899 |
+
|
900 |
+
$selectors = array();
|
901 |
+
$result = array();
|
902 |
+
|
903 |
+
foreach ($matches as $m) {
|
904 |
+
$m[0] = trim($m[0]);
|
905 |
+
|
906 |
+
// Skip NoOps
|
907 |
+
if ($m[0] === '' || $m[0] === '/' || $m[0] === '//') { continue; }
|
908 |
+
|
909 |
+
array_shift($m);
|
910 |
+
|
911 |
+
// Convert to lowercase
|
912 |
+
if ($this->dom->lowercase) {
|
913 |
+
$m[1] = strtolower($m[1]);
|
914 |
+
}
|
915 |
+
|
916 |
+
// Extract classes
|
917 |
+
if ($m[4] !== '') { $m[4] = explode('.', $m[4]); }
|
918 |
+
|
919 |
+
/* Extract attributes (pattern based on the pattern above!)
|
920 |
+
|
921 |
+
* [0] - full match
|
922 |
+
* [1] - attribute name
|
923 |
+
* [2] - attribute expression
|
924 |
+
* [3] - attribute value
|
925 |
+
* [4] - case sensitivity
|
926 |
+
*
|
927 |
+
* Note: Attributes can be negated with a "!" prefix to their name
|
928 |
+
*/
|
929 |
+
if($m[5] !== '') {
|
930 |
+
preg_match_all(
|
931 |
+
"/\[@?(!?[\w:-]+)(?:([!*^$|~]?=)[\"']?(.*?)[\"']?)?(?:\s+?([iIsS])?)?\]/is",
|
932 |
+
trim($m[5]),
|
933 |
+
$attributes,
|
934 |
+
PREG_SET_ORDER
|
935 |
+
);
|
936 |
+
|
937 |
+
// Replace element by array
|
938 |
+
$m[5] = array();
|
939 |
+
|
940 |
+
foreach($attributes as $att) {
|
941 |
+
// Skip empty matches
|
942 |
+
if(trim($att[0]) === '') { continue; }
|
943 |
+
|
944 |
+
$inverted = (isset($att[1][0]) && $att[1][0] === '!');
|
945 |
+
$m[5][] = array(
|
946 |
+
$inverted ? substr($att[1], 1) : $att[1], // Name
|
947 |
+
(isset($att[2])) ? $att[2] : '', // Expression
|
948 |
+
(isset($att[3])) ? $att[3] : '', // Value
|
949 |
+
$inverted, // Inverted Flag
|
950 |
+
(isset($att[4])) ? strtolower($att[4]) : '', // Case-Sensitivity
|
951 |
+
);
|
952 |
+
}
|
953 |
+
}
|
954 |
+
|
955 |
+
// Sanitize Separator
|
956 |
+
if ($m[6] !== '' && trim($m[6]) === '') { // Descendant Separator
|
957 |
+
$m[6] = ' ';
|
958 |
+
} else { // Other Separator
|
959 |
+
$m[6] = trim($m[6]);
|
960 |
+
}
|
961 |
+
|
962 |
+
// Clear Separator if it's a Selector List
|
963 |
+
if ($is_list = ($m[6] === ',')) { $m[6] = ''; }
|
964 |
+
|
965 |
+
$result[] = $m;
|
966 |
+
|
967 |
+
if ($is_list) { // Selector List
|
968 |
+
$selectors[] = $result;
|
969 |
+
$result = array();
|
970 |
+
}
|
971 |
+
}
|
972 |
+
|
973 |
+
if (count($result) > 0) { $selectors[] = $result; }
|
974 |
+
return $selectors;
|
975 |
+
}
|
976 |
+
|
977 |
+
function __get($name)
|
978 |
+
{
|
979 |
+
if (isset($this->attr[$name])) {
|
980 |
+
return $this->convert_text($this->attr[$name]);
|
981 |
+
}
|
982 |
+
|
983 |
+
switch ($name) {
|
984 |
+
case 'outertext': return $this->outertext();
|
985 |
+
case 'innertext': return $this->innertext();
|
986 |
+
case 'plaintext': return $this->text();
|
987 |
+
case 'xmltext': return $this->xmltext();
|
988 |
+
}
|
989 |
+
|
990 |
+
return false;
|
991 |
+
}
|
992 |
+
|
993 |
+
function __set($name, $value)
|
994 |
+
{
|
995 |
+
switch ($name) {
|
996 |
+
case 'outertext':
|
997 |
+
$this->_[self::HDOM_INFO_OUTER] = $value;
|
998 |
+
break;
|
999 |
+
case 'innertext':
|
1000 |
+
if (isset($this->_[self::HDOM_INFO_TEXT])) {
|
1001 |
+
$this->_[self::HDOM_INFO_TEXT] = '';
|
1002 |
+
}
|
1003 |
+
$this->_[self::HDOM_INFO_INNER] = $value;
|
1004 |
+
break;
|
1005 |
+
default: $this->attr[$name] = $value;
|
1006 |
+
}
|
1007 |
+
}
|
1008 |
+
|
1009 |
+
function __isset($name)
|
1010 |
+
{
|
1011 |
+
switch ($name) {
|
1012 |
+
case 'outertext': return true;
|
1013 |
+
case 'innertext': return true;
|
1014 |
+
case 'plaintext': return true;
|
1015 |
+
}
|
1016 |
+
|
1017 |
+
return isset($this->attr[$name]);
|
1018 |
+
}
|
1019 |
+
|
1020 |
+
function __unset($name)
|
1021 |
+
{
|
1022 |
+
if (isset($this->attr[$name])) { unset($this->attr[$name]); }
|
1023 |
+
}
|
1024 |
+
|
1025 |
+
function convert_text($text)
|
1026 |
+
{
|
1027 |
+
$converted_text = $text;
|
1028 |
+
|
1029 |
+
$sourceCharset = '';
|
1030 |
+
$targetCharset = '';
|
1031 |
+
|
1032 |
+
if ($this->dom) {
|
1033 |
+
$sourceCharset = strtoupper($this->dom->_charset);
|
1034 |
+
$targetCharset = strtoupper($this->dom->_target_charset);
|
1035 |
+
}
|
1036 |
+
|
1037 |
+
if (!empty($sourceCharset) && !empty($targetCharset)) {
|
1038 |
+
if (strtoupper($sourceCharset) === strtoupper($targetCharset)) {
|
1039 |
+
$converted_text = $text;
|
1040 |
+
} elseif ((strtoupper($targetCharset) === 'UTF-8') && (self::is_utf8($text))) {
|
1041 |
+
Debug::log_once('The source charset was incorrectly detected as ' . $sourceCharset . ' but should have been UTF-8');
|
1042 |
+
$converted_text = $text;
|
1043 |
+
} else {
|
1044 |
+
$converted_text = iconv($sourceCharset, $targetCharset, $text);
|
1045 |
+
}
|
1046 |
+
}
|
1047 |
+
|
1048 |
+
// Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output.
|
1049 |
+
if ($targetCharset === 'UTF-8') {
|
1050 |
+
if (substr($converted_text, 0, 3) === "\xef\xbb\xbf") {
|
1051 |
+
$converted_text = substr($converted_text, 3);
|
1052 |
+
}
|
1053 |
+
|
1054 |
+
if (substr($converted_text, -3) === "\xef\xbb\xbf") {
|
1055 |
+
$converted_text = substr($converted_text, 0, -3);
|
1056 |
+
}
|
1057 |
+
}
|
1058 |
+
|
1059 |
+
return $converted_text;
|
1060 |
+
}
|
1061 |
+
|
1062 |
+
static function is_utf8($str)
|
1063 |
+
{
|
1064 |
+
$c = 0; $b = 0;
|
1065 |
+
$bits = 0;
|
1066 |
+
$len = strlen($str);
|
1067 |
+
for($i = 0; $i < $len; $i++) {
|
1068 |
+
$c = ord($str[$i]);
|
1069 |
+
if($c > 128) {
|
1070 |
+
if(($c >= 254)) { return false; }
|
1071 |
+
elseif($c >= 252) { $bits = 6; }
|
1072 |
+
elseif($c >= 248) { $bits = 5; }
|
1073 |
+
elseif($c >= 240) { $bits = 4; }
|
1074 |
+
elseif($c >= 224) { $bits = 3; }
|
1075 |
+
elseif($c >= 192) { $bits = 2; }
|
1076 |
+
else { return false; }
|
1077 |
+
if(($i + $bits) > $len) { return false; }
|
1078 |
+
while($bits > 1) {
|
1079 |
+
$i++;
|
1080 |
+
$b = ord($str[$i]);
|
1081 |
+
if($b < 128 || $b > 191) { return false; }
|
1082 |
+
$bits--;
|
1083 |
+
}
|
1084 |
+
}
|
1085 |
+
}
|
1086 |
+
return true;
|
1087 |
+
}
|
1088 |
+
|
1089 |
+
function get_display_size()
|
1090 |
+
{
|
1091 |
+
$width = -1;
|
1092 |
+
$height = -1;
|
1093 |
+
|
1094 |
+
if ($this->tag !== 'img') {
|
1095 |
+
return false;
|
1096 |
+
}
|
1097 |
+
|
1098 |
+
// See if there is aheight or width attribute in the tag itself.
|
1099 |
+
if (isset($this->attr['width'])) {
|
1100 |
+
$width = $this->attr['width'];
|
1101 |
+
}
|
1102 |
+
|
1103 |
+
if (isset($this->attr['height'])) {
|
1104 |
+
$height = $this->attr['height'];
|
1105 |
+
}
|
1106 |
+
|
1107 |
+
// Now look for an inline style.
|
1108 |
+
if (isset($this->attr['style'])) {
|
1109 |
+
// Thanks to user gnarf from stackoverflow for this regular expression.
|
1110 |
+
$attributes = array();
|
1111 |
+
|
1112 |
+
preg_match_all(
|
1113 |
+
'/([\w-]+)\s*:\s*([^;]+)\s*;?/',
|
1114 |
+
$this->attr['style'],
|
1115 |
+
$matches,
|
1116 |
+
PREG_SET_ORDER
|
1117 |
+
);
|
1118 |
+
|
1119 |
+
foreach ($matches as $match) {
|
1120 |
+
$attributes[$match[1]] = $match[2];
|
1121 |
+
}
|
1122 |
+
|
1123 |
+
// If there is a width in the style attributes:
|
1124 |
+
if (isset($attributes['width']) && $width == -1) {
|
1125 |
+
// check that the last two characters are px (pixels)
|
1126 |
+
if (strtolower(substr($attributes['width'], -2)) === 'px') {
|
1127 |
+
$proposed_width = substr($attributes['width'], 0, -2);
|
1128 |
+
// Now make sure that it's an integer and not something stupid.
|
1129 |
+
if (filter_var($proposed_width, FILTER_VALIDATE_INT)) {
|
1130 |
+
$width = $proposed_width;
|
1131 |
+
}
|
1132 |
+
}
|
1133 |
+
}
|
1134 |
+
|
1135 |
+
// If there is a width in the style attributes:
|
1136 |
+
if (isset($attributes['height']) && $height == -1) {
|
1137 |
+
// check that the last two characters are px (pixels)
|
1138 |
+
if (strtolower(substr($attributes['height'], -2)) == 'px') {
|
1139 |
+
$proposed_height = substr($attributes['height'], 0, -2);
|
1140 |
+
// Now make sure that it's an integer and not something stupid.
|
1141 |
+
if (filter_var($proposed_height, FILTER_VALIDATE_INT)) {
|
1142 |
+
$height = $proposed_height;
|
1143 |
+
}
|
1144 |
+
}
|
1145 |
+
}
|
1146 |
+
|
1147 |
+
}
|
1148 |
+
|
1149 |
+
// Future enhancement:
|
1150 |
+
// Look in the tag to see if there is a class or id specified that has
|
1151 |
+
// a height or width attribute to it.
|
1152 |
+
|
1153 |
+
// Far future enhancement
|
1154 |
+
// Look at all the parent tags of this image to see if they specify a
|
1155 |
+
// class or id that has an img selector that specifies a height or width
|
1156 |
+
// Note that in this case, the class or id will have the img subselector
|
1157 |
+
// for it to apply to the image.
|
1158 |
+
|
1159 |
+
// ridiculously far future development
|
1160 |
+
// If the class or id is specified in a SEPARATE css file thats not on
|
1161 |
+
// the page, go get it and do what we were just doing for the ones on
|
1162 |
+
// the page.
|
1163 |
+
|
1164 |
+
$result = array(
|
1165 |
+
'height' => $height,
|
1166 |
+
'width' => $width
|
1167 |
+
);
|
1168 |
+
|
1169 |
+
return $result;
|
1170 |
+
}
|
1171 |
+
|
1172 |
+
function save($filepath = '')
|
1173 |
+
{
|
1174 |
+
$ret = $this->outertext();
|
1175 |
+
|
1176 |
+
if ($filepath !== '') {
|
1177 |
+
file_put_contents($filepath, $ret, LOCK_EX);
|
1178 |
+
}
|
1179 |
+
|
1180 |
+
return $ret;
|
1181 |
+
}
|
1182 |
+
|
1183 |
+
function addClass($class)
|
1184 |
+
{
|
1185 |
+
if (is_string($class)) {
|
1186 |
+
$class = explode(' ', $class);
|
1187 |
+
}
|
1188 |
+
|
1189 |
+
if (is_array($class)) {
|
1190 |
+
foreach($class as $c) {
|
1191 |
+
if (isset($this->class)) {
|
1192 |
+
if ($this->hasClass($c)) {
|
1193 |
+
continue;
|
1194 |
+
} else {
|
1195 |
+
$this->class .= ' ' . $c;
|
1196 |
+
}
|
1197 |
+
} else {
|
1198 |
+
$this->class = $c;
|
1199 |
+
}
|
1200 |
+
}
|
1201 |
+
}
|
1202 |
+
}
|
1203 |
+
|
1204 |
+
function hasClass($class)
|
1205 |
+
{
|
1206 |
+
if (is_string($class)) {
|
1207 |
+
if (isset($this->class)) {
|
1208 |
+
return in_array($class, explode(' ', $this->class), true);
|
1209 |
+
}
|
1210 |
+
}
|
1211 |
+
|
1212 |
+
return false;
|
1213 |
+
}
|
1214 |
+
|
1215 |
+
function removeClass($class = null)
|
1216 |
+
{
|
1217 |
+
if (!isset($this->class)) {
|
1218 |
+
return;
|
1219 |
+
}
|
1220 |
+
|
1221 |
+
if (is_null($class)) {
|
1222 |
+
$this->removeAttribute('class');
|
1223 |
+
return;
|
1224 |
+
}
|
1225 |
+
|
1226 |
+
if (is_string($class)) {
|
1227 |
+
$class = explode(' ', $class);
|
1228 |
+
}
|
1229 |
+
|
1230 |
+
if (is_array($class)) {
|
1231 |
+
$class = array_diff(explode(' ', $this->class), $class);
|
1232 |
+
if (empty($class)) {
|
1233 |
+
$this->removeAttribute('class');
|
1234 |
+
} else {
|
1235 |
+
$this->class = implode(' ', $class);
|
1236 |
+
}
|
1237 |
+
}
|
1238 |
+
}
|
1239 |
+
|
1240 |
+
function getAllAttributes()
|
1241 |
+
{
|
1242 |
+
return $this->attr;
|
1243 |
+
}
|
1244 |
+
|
1245 |
+
function getAttribute($name)
|
1246 |
+
{
|
1247 |
+
return $this->$name;
|
1248 |
+
}
|
1249 |
+
|
1250 |
+
function setAttribute($name, $value)
|
1251 |
+
{
|
1252 |
+
$this->$name = $value;
|
1253 |
+
}
|
1254 |
+
|
1255 |
+
function hasAttribute($name)
|
1256 |
+
{
|
1257 |
+
return isset($this->$name);
|
1258 |
+
}
|
1259 |
+
|
1260 |
+
function removeAttribute($name)
|
1261 |
+
{
|
1262 |
+
unset($this->$name);
|
1263 |
+
}
|
1264 |
+
|
1265 |
+
function remove()
|
1266 |
+
{
|
1267 |
+
if ($this->parent) {
|
1268 |
+
$this->parent->removeChild($this);
|
1269 |
+
}
|
1270 |
+
}
|
1271 |
+
|
1272 |
+
function removeChild($node)
|
1273 |
+
{
|
1274 |
+
foreach($node->children as $child) {
|
1275 |
+
$node->removeChild($child);
|
1276 |
+
}
|
1277 |
+
|
1278 |
+
// No need to re-index node->children because it is about to be removed!
|
1279 |
+
|
1280 |
+
foreach($node->nodes as $entity) {
|
1281 |
+
$enidx = array_search($entity, $node->nodes, true);
|
1282 |
+
$edidx = array_search($entity, $node->dom->nodes, true);
|
1283 |
+
|
1284 |
+
if ($enidx !== false) {
|
1285 |
+
unset($node->nodes[$enidx]);
|
1286 |
+
}
|
1287 |
+
|
1288 |
+
if ($edidx !== false) {
|
1289 |
+
unset($node->dom->nodes[$edidx]);
|
1290 |
+
}
|
1291 |
+
}
|
1292 |
+
|
1293 |
+
// No need to re-index node->nodes because it is about to be removed!
|
1294 |
+
|
1295 |
+
$nidx = array_search($node, $this->nodes, true);
|
1296 |
+
$cidx = array_search($node, $this->children, true);
|
1297 |
+
$didx = array_search($node, $this->dom->nodes, true);
|
1298 |
+
|
1299 |
+
if ($nidx !== false) {
|
1300 |
+
unset($this->nodes[$nidx]);
|
1301 |
+
}
|
1302 |
+
|
1303 |
+
$this->nodes = array_values($this->nodes);
|
1304 |
+
|
1305 |
+
if ($cidx !== false) {
|
1306 |
+
unset($this->children[$cidx]);
|
1307 |
+
}
|
1308 |
+
|
1309 |
+
$this->children = array_values($this->children);
|
1310 |
+
|
1311 |
+
if ($didx !== false) {
|
1312 |
+
unset($this->dom->nodes[$didx]);
|
1313 |
+
}
|
1314 |
+
|
1315 |
+
// Do not re-index dom->nodes because nodes point to other nodes in the
|
1316 |
+
// array explicitly!
|
1317 |
+
|
1318 |
+
$node->clear();
|
1319 |
+
}
|
1320 |
+
|
1321 |
+
function getElementById($id)
|
1322 |
+
{
|
1323 |
+
return $this->find("#$id", 0);
|
1324 |
+
}
|
1325 |
+
|
1326 |
+
function getElementsById($id, $idx = null)
|
1327 |
+
{
|
1328 |
+
return $this->find("#$id", $idx);
|
1329 |
+
}
|
1330 |
+
|
1331 |
+
function getElementByTagName($name)
|
1332 |
+
{
|
1333 |
+
return $this->find($name, 0);
|
1334 |
+
}
|
1335 |
+
|
1336 |
+
function getElementsByTagName($name, $idx = null)
|
1337 |
+
{
|
1338 |
+
return $this->find($name, $idx);
|
1339 |
+
}
|
1340 |
+
|
1341 |
+
function parentNode()
|
1342 |
+
{
|
1343 |
+
return $this->parent();
|
1344 |
+
}
|
1345 |
+
|
1346 |
+
function childNodes($idx = -1)
|
1347 |
+
{
|
1348 |
+
if ($idx === -1) {
|
1349 |
+
return $this->children;
|
1350 |
+
}
|
1351 |
+
|
1352 |
+
if (isset($this->children[$idx])) {
|
1353 |
+
return $this->children[$idx];
|
1354 |
+
}
|
1355 |
+
|
1356 |
+
return null;
|
1357 |
+
}
|
1358 |
+
|
1359 |
+
function firstChild()
|
1360 |
+
{
|
1361 |
+
if (count($this->children) > 0) {
|
1362 |
+
return $this->children[0];
|
1363 |
+
}
|
1364 |
+
return null;
|
1365 |
+
}
|
1366 |
+
|
1367 |
+
function lastChild()
|
1368 |
+
{
|
1369 |
+
if (count($this->children) > 0) {
|
1370 |
+
return end($this->children);
|
1371 |
+
}
|
1372 |
+
return null;
|
1373 |
+
}
|
1374 |
+
|
1375 |
+
function nextSibling()
|
1376 |
+
{
|
1377 |
+
if ($this->parent === null) {
|
1378 |
+
return null;
|
1379 |
+
}
|
1380 |
+
|
1381 |
+
$idx = array_search($this, $this->parent->children, true);
|
1382 |
+
|
1383 |
+
if ($idx !== false && isset($this->parent->children[$idx + 1])) {
|
1384 |
+
return $this->parent->children[$idx + 1];
|
1385 |
+
}
|
1386 |
+
|
1387 |
+
return null;
|
1388 |
+
}
|
1389 |
+
|
1390 |
+
function previousSibling()
|
1391 |
+
{
|
1392 |
+
if ($this->parent === null) {
|
1393 |
+
return null;
|
1394 |
+
}
|
1395 |
+
|
1396 |
+
$idx = array_search($this, $this->parent->children, true);
|
1397 |
+
|
1398 |
+
if ($idx !== false && $idx > 0) {
|
1399 |
+
return $this->parent->children[$idx - 1];
|
1400 |
+
}
|
1401 |
+
|
1402 |
+
return null;
|
1403 |
+
|
1404 |
+
}
|
1405 |
+
|
1406 |
+
function hasChildNodes()
|
1407 |
+
{
|
1408 |
+
return !empty($this->children);
|
1409 |
+
}
|
1410 |
+
|
1411 |
+
function nodeName()
|
1412 |
+
{
|
1413 |
+
return $this->tag;
|
1414 |
+
}
|
1415 |
+
|
1416 |
+
function appendChild($node)
|
1417 |
+
{
|
1418 |
+
$node->parent = $this;
|
1419 |
+
$this->nodes[] = $node;
|
1420 |
+
$this->children[] = $node;
|
1421 |
+
|
1422 |
+
if ($this->dom) { // Attach current node to DOM (recursively)
|
1423 |
+
$children = array($node);
|
1424 |
+
|
1425 |
+
while($children) {
|
1426 |
+
$child = array_pop($children);
|
1427 |
+
$children = array_merge($children, $child->children);
|
1428 |
+
|
1429 |
+
$this->dom->nodes[] = $child;
|
1430 |
+
$child->dom = $this->dom;
|
1431 |
+
$child->_[self::HDOM_INFO_BEGIN] = count($this->dom->nodes) - 1;
|
1432 |
+
$child->_[self::HDOM_INFO_END] = $child->_[self::HDOM_INFO_BEGIN];
|
1433 |
+
}
|
1434 |
+
|
1435 |
+
$this->dom->root->_[self::HDOM_INFO_END] = count($this->dom->nodes) - 1;
|
1436 |
+
}
|
1437 |
+
|
1438 |
+
return $this;
|
1439 |
+
}
|
1440 |
+
|
1441 |
+
}
|
vendor/simplehtmldom/simplehtmldom/HtmlWeb.php
CHANGED
@@ -1,134 +1,134 @@
|
|
1 |
-
<?php namespace simplehtmldom;
|
2 |
-
|
3 |
-
/**
|
4 |
-
* Website: http://sourceforge.net/projects/simplehtmldom/
|
5 |
-
* Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
|
6 |
-
*
|
7 |
-
* Licensed under The MIT License
|
8 |
-
* See the LICENSE file in the project root for more information.
|
9 |
-
*
|
10 |
-
* Authors:
|
11 |
-
* S.C. Chen
|
12 |
-
* John Schlick
|
13 |
-
* Rus Carroll
|
14 |
-
* logmanoriginal
|
15 |
-
*
|
16 |
-
* Contributors:
|
17 |
-
* Yousuke Kumakura
|
18 |
-
* Vadim Voituk
|
19 |
-
* Antcs
|
20 |
-
*
|
21 |
-
* Version $Rev$
|
22 |
-
*/
|
23 |
-
|
24 |
-
include_once 'HtmlDocument.php';
|
25 |
-
|
26 |
-
class HtmlWeb {
|
27 |
-
|
28 |
-
/**
|
29 |
-
* @return HtmlDocument Returns the DOM for a webpage
|
30 |
-
* @return null Returns null if the cURL extension is not loaded and allow_url_fopen=Off
|
31 |
-
* @return null Returns null if the provided URL is invalid (not PHP_URL_SCHEME)
|
32 |
-
* @return null Returns null if the provided URL does not specify the HTTP or HTTPS protocol
|
33 |
-
*/
|
34 |
-
function load($url)
|
35 |
-
{
|
36 |
-
if(!filter_var($url, FILTER_VALIDATE_URL)) {
|
37 |
-
return null;
|
38 |
-
}
|
39 |
-
|
40 |
-
if($scheme = parse_url($url, PHP_URL_SCHEME)) {
|
41 |
-
switch(strtolower($scheme)) {
|
42 |
-
case 'http':
|
43 |
-
case 'https': break;
|
44 |
-
default: return null;
|
45 |
-
}
|
46 |
-
|
47 |
-
if(extension_loaded('curl')) {
|
48 |
-
return $this->load_curl($url);
|
49 |
-
} elseif(ini_get('allow_url_fopen')) {
|
50 |
-
return $this->load_fopen($url);
|
51 |
-
} else {
|
52 |
-
error_log(__FUNCTION__ . ' requires either the cURL extension or allow_url_fopen=On in php.ini');
|
53 |
-
}
|
54 |
-
}
|
55 |
-
|
56 |
-
return null;
|
57 |
-
}
|
58 |
-
|
59 |
-
/**
|
60 |
-
* cURL implementation of load
|
61 |
-
*/
|
62 |
-
private function load_curl($url)
|
63 |
-
{
|
64 |
-
$ch = curl_init();
|
65 |
-
|
66 |
-
curl_setopt($ch, CURLOPT_URL, $url);
|
67 |
-
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
68 |
-
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
69 |
-
|
70 |
-
// There is no guarantee this request will be fulfilled
|
71 |
-
// -- https://www.php.net/manual/en/function.curl-setopt.php
|
72 |
-
curl_setopt($ch, CURLOPT_BUFFERSIZE, MAX_FILE_SIZE);
|
73 |
-
|
74 |
-
// There is no guarantee this request will be fulfilled
|
75 |
-
$header = array(
|
76 |
-
'Accept: text/html', // Prefer HTML format
|
77 |
-
'Accept-Charset: utf-8', // Prefer UTF-8 encoding
|
78 |
-
);
|
79 |
-
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
|
80 |
-
|
81 |
-
$doc = curl_exec($ch);
|
82 |
-
|
83 |
-
if(curl_getinfo($ch, CURLINFO_RESPONSE_CODE) !== 200) {
|
84 |
-
return null;
|
85 |
-
}
|
86 |
-
|
87 |
-
curl_close($ch);
|
88 |
-
|
89 |
-
if(strlen($doc) > MAX_FILE_SIZE) {
|
90 |
-
return null;
|
91 |
-
}
|
92 |
-
|
93 |
-
return new HtmlDocument($doc);
|
94 |
-
}
|
95 |
-
|
96 |
-
/**
|
97 |
-
* fopen implementation of load
|
98 |
-
*/
|
99 |
-
private function load_fopen($url)
|
100 |
-
{
|
101 |
-
// There is no guarantee this request will be fulfilled
|
102 |
-
$context = stream_context_create(array('http' => array(
|
103 |
-
'header' => array(
|
104 |
-
'Accept: text/html', // Prefer HTML format
|
105 |
-
'Accept-Charset: utf-8', // Prefer UTF-8 encoding
|
106 |
-
),
|
107 |
-
'ignore_errors' => true // Always fetch content
|
108 |
-
)));
|
109 |
-
|
110 |
-
$doc = file_get_contents($url, false, $context, 0, MAX_FILE_SIZE + 1);
|
111 |
-
|
112 |
-
if(isset($http_response_header)) {
|
113 |
-
foreach($http_response_header as $rh) {
|
114 |
-
// https://stackoverflow.com/a/1442526
|
115 |
-
$parts = explode(' ', $rh, 3);
|
116 |
-
|
117 |
-
if(preg_match('/HTTP\/\d\.\d/', $parts[0])) {
|
118 |
-
$code = $parts[1];
|
119 |
-
}
|
120 |
-
} // Last code is final status
|
121 |
-
|
122 |
-
if(!isset($code) || $code !== '200') {
|
123 |
-
return null;
|
124 |
-
}
|
125 |
-
}
|
126 |
-
|
127 |
-
if(strlen($doc) > MAX_FILE_SIZE) {
|
128 |
-
return null;
|
129 |
-
}
|
130 |
-
|
131 |
-
return new HtmlDocument($doc);
|
132 |
-
}
|
133 |
-
|
134 |
-
}
|
1 |
+
<?php namespace simplehtmldom;
|
2 |
+
|
3 |
+
/**
|
4 |
+
* Website: http://sourceforge.net/projects/simplehtmldom/
|
5 |
+
* Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
|
6 |
+
*
|
7 |
+
* Licensed under The MIT License
|
8 |
+
* See the LICENSE file in the project root for more information.
|
9 |
+
*
|
10 |
+
* Authors:
|
11 |
+
* S.C. Chen
|
12 |
+
* John Schlick
|
13 |
+
* Rus Carroll
|
14 |
+
* logmanoriginal
|
15 |
+
*
|
16 |
+
* Contributors:
|
17 |
+
* Yousuke Kumakura
|
18 |
+
* Vadim Voituk
|
19 |
+
* Antcs
|
20 |
+
*
|
21 |
+
* Version $Rev$
|
22 |
+
*/
|
23 |
+
|
24 |
+
include_once 'HtmlDocument.php';
|
25 |
+
|
26 |
+
class HtmlWeb {
|
27 |
+
|
28 |
+
/**
|
29 |
+
* @return HtmlDocument Returns the DOM for a webpage
|
30 |
+
* @return null Returns null if the cURL extension is not loaded and allow_url_fopen=Off
|
31 |
+
* @return null Returns null if the provided URL is invalid (not PHP_URL_SCHEME)
|
32 |
+
* @return null Returns null if the provided URL does not specify the HTTP or HTTPS protocol
|
33 |
+
*/
|
34 |
+
function load($url)
|
35 |
+
{
|
36 |
+
if(!filter_var($url, FILTER_VALIDATE_URL)) {
|
37 |
+
return null;
|
38 |
+
}
|
39 |
+
|
40 |
+
if($scheme = parse_url($url, PHP_URL_SCHEME)) {
|
41 |
+
switch(strtolower($scheme)) {
|
42 |
+
case 'http':
|
43 |
+
case 'https': break;
|
44 |
+
default: return null;
|
45 |
+
}
|
46 |
+
|
47 |
+
if(extension_loaded('curl')) {
|
48 |
+
return $this->load_curl($url);
|
49 |
+
} elseif(ini_get('allow_url_fopen')) {
|
50 |
+
return $this->load_fopen($url);
|
51 |
+
} else {
|
52 |
+
error_log(__FUNCTION__ . ' requires either the cURL extension or allow_url_fopen=On in php.ini');
|
53 |
+
}
|
54 |
+
}
|
55 |
+
|
56 |
+
return null;
|
57 |
+
}
|
58 |
+
|
59 |
+
/**
|
60 |
+
* cURL implementation of load
|
61 |
+
*/
|
62 |
+
private function load_curl($url)
|
63 |
+
{
|
64 |
+
$ch = curl_init();
|
65 |
+
|
66 |
+
curl_setopt($ch, CURLOPT_URL, $url);
|
67 |
+
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
68 |
+
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
69 |
+
|
70 |
+
// There is no guarantee this request will be fulfilled
|
71 |
+
// -- https://www.php.net/manual/en/function.curl-setopt.php
|
72 |
+
curl_setopt($ch, CURLOPT_BUFFERSIZE, MAX_FILE_SIZE);
|
73 |
+
|
74 |
+
// There is no guarantee this request will be fulfilled
|
75 |
+
$header = array(
|
76 |
+
'Accept: text/html', // Prefer HTML format
|
77 |
+
'Accept-Charset: utf-8', // Prefer UTF-8 encoding
|
78 |
+
);
|
79 |
+
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
|
80 |
+
|
81 |
+
$doc = curl_exec($ch);
|
82 |
+
|
83 |
+
if(curl_getinfo($ch, CURLINFO_RESPONSE_CODE) !== 200) {
|
84 |
+
return null;
|
85 |
+
}
|
86 |
+
|
87 |
+
curl_close($ch);
|
88 |
+
|
89 |
+
if(strlen($doc) > MAX_FILE_SIZE) {
|
90 |
+
return null;
|
91 |
+
}
|
92 |
+
|
93 |
+
return new HtmlDocument($doc);
|
94 |
+
}
|
95 |
+
|
96 |
+
/**
|
97 |
+
* fopen implementation of load
|
98 |
+
*/
|
99 |
+
private function load_fopen($url)
|
100 |
+
{
|
101 |
+
// There is no guarantee this request will be fulfilled
|
102 |
+
$context = stream_context_create(array('http' => array(
|
103 |
+
'header' => array(
|
104 |
+
'Accept: text/html', // Prefer HTML format
|
105 |
+
'Accept-Charset: utf-8', // Prefer UTF-8 encoding
|
106 |
+
),
|
107 |
+
'ignore_errors' => true // Always fetch content
|
108 |
+
)));
|
109 |
+
|
110 |
+
$doc = file_get_contents($url, false, $context, 0, MAX_FILE_SIZE + 1);
|
111 |
+
|
112 |
+
if(isset($http_response_header)) {
|
113 |
+
foreach($http_response_header as $rh) {
|
114 |
+
// https://stackoverflow.com/a/1442526
|
115 |
+
$parts = explode(' ', $rh, 3);
|
116 |
+
|
117 |
+
if(preg_match('/HTTP\/\d\.\d/', $parts[0])) {
|
118 |
+
$code = $parts[1];
|
119 |
+
}
|
120 |
+
} // Last code is final status
|
121 |
+
|
122 |
+
if(!isset($code) || $code !== '200') {
|
123 |
+
return null;
|
124 |
+
}
|
125 |
+
}
|
126 |
+
|
127 |
+
if(strlen($doc) > MAX_FILE_SIZE) {
|
128 |
+
return null;
|
129 |
+
}
|
130 |
+
|
131 |
+
return new HtmlDocument($doc);
|
132 |
+
}
|
133 |
+
|
134 |
+
}
|
vendor/simplehtmldom/simplehtmldom/LICENSE
CHANGED
@@ -1,21 +1,21 @@
|
|
1 |
-
MIT License
|
2 |
-
|
3 |
-
Copyright (c) 2019 S.C. Chen, John Schlick, logmanoriginal
|
4 |
-
|
5 |
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
-
of this software and associated documentation files (the "Software"), to deal
|
7 |
-
in the Software without restriction, including without limitation the rights
|
8 |
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
-
copies of the Software, and to permit persons to whom the Software is
|
10 |
-
furnished to do so, subject to the following conditions:
|
11 |
-
|
12 |
-
The above copyright notice and this permission notice shall be included in all
|
13 |
-
copies or substantial portions of the Software.
|
14 |
-
|
15 |
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
SOFTWARE.
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2019 S.C. Chen, John Schlick, logmanoriginal
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
SOFTWARE.
|
vendor/simplehtmldom/simplehtmldom/README.md
DELETED
@@ -1,116 +0,0 @@
|
|
1 |
-
# PHP Simple HTML DOM Parser
|
2 |
-
|
3 |
-
[![LICENSE](https://img.shields.io/github/license/simplehtmldom/simplehtmldom?logo=github&style=for-the-badge)](https://github.com/simplehtmldom/simplehtmldom/blob/master/LICENSE)
|
4 |
-
[![RELEASE](https://img.shields.io/github/v/tag/simplehtmldom/simplehtmldom?label=release&logo=github&style=for-the-badge)](https://sourceforge.com/projects/simplehtmldom/files/simplehtmldom/)
|
5 |
-
[![BUILD STATUS](https://img.shields.io/travis/com/simplehtmldom/simplehtmldom?logo=travis&style=for-the-badge)](https://travis-ci.com/simplehtmldom/simplehtmldom)
|
6 |
-
[![PACKAGIST](https://img.shields.io/packagist/v/simplehtmldom/simplehtmldom?logo=composer&style=for-the-badge)](https://packagist.org/packages/simplehtmldom/simplehtmldom)
|
7 |
-
|
8 |
-
simplehtmldom is a fast and reliable HTML DOM parser for PHP.
|
9 |
-
|
10 |
-
## Key features
|
11 |
-
|
12 |
-
* Purely PHP-based DOM parser (no XML extensions required).
|
13 |
-
* Works with well-formed and broken HTML documents.
|
14 |
-
* Loads webpages, local files and document strings.
|
15 |
-
* Supports CSS selectors.
|
16 |
-
|
17 |
-
## Requirements
|
18 |
-
|
19 |
-
simplehtmldom requires **PHP 5.6 or higher** with [ext-iconv](https://www.php.net/manual/en/book.iconv.php) enabled. Following extensions enable additional features of the parser:
|
20 |
-
|
21 |
-
* [ext-mbstring](https://secure.php.net/manual/en/book.mbstring.php) (recommended) \
|
22 |
-
Enables better detection for multi-byte documents.
|
23 |
-
* [ext-curl](https://secure.php.net/manual/en/book.curl.php) \
|
24 |
-
Enables cURL support for the class `HtmlWeb`.
|
25 |
-
* [ext-openssl](https://secure.php.net/manual/en/book.openssl.php) (recommended when using cURL) \
|
26 |
-
Enables SSL support for cURL.
|
27 |
-
|
28 |
-
## Installation
|
29 |
-
|
30 |
-
**Manually**:
|
31 |
-
|
32 |
-
Download the latest release from [SourceForge](https://sourceforge.net/projects/simplehtmldom/files/latest) and extract the files in the vendor folder of your project.
|
33 |
-
|
34 |
-
**Composer**:
|
35 |
-
|
36 |
-
```sh
|
37 |
-
composer require simplehtmldom/simplehtmldom
|
38 |
-
```
|
39 |
-
|
40 |
-
**Git**:
|
41 |
-
|
42 |
-
```
|
43 |
-
git clone git://git.code.sf.net/p/simplehtmldom/repository simplehtmldom
|
44 |
-
```
|
45 |
-
|
46 |
-
_Note_: The [GitHub repository](https://github.com/simplehtmldom/simplehtmldom) serves as a mirror for the SourceForge project. We currently accept pull requests and issues only via SourceForge.
|
47 |
-
|
48 |
-
## Usage
|
49 |
-
|
50 |
-
This example illustrates how to return the page title:
|
51 |
-
|
52 |
-
<details><summary>Manually</summary>
|
53 |
-
|
54 |
-
```
|
55 |
-
<?php
|
56 |
-
include_once 'HtmlWeb.php';
|
57 |
-
use simplehtmldom\HtmlWeb;
|
58 |
-
|
59 |
-
$client = new HtmlWeb();
|
60 |
-
$html = $client->load('https://www.google.com/search?q=simplehtmldom');
|
61 |
-
|
62 |
-
// Returns the page title
|
63 |
-
echo $html->find('title', 0)->plaintext . PHP_EOL;
|
64 |
-
```
|
65 |
-
|
66 |
-
</details>
|
67 |
-
|
68 |
-
<details><summary>Using composer</summary>
|
69 |
-
|
70 |
-
```
|
71 |
-
<?php
|
72 |
-
include_once 'vendor/autoload.php';
|
73 |
-
use simplehtmldom\HtmlWeb;
|
74 |
-
|
75 |
-
$client = new HtmlWeb();
|
76 |
-
$html = $client->load('https://www.google.com/search?q=simplehtmldom');
|
77 |
-
|
78 |
-
// Returns the page title
|
79 |
-
echo $html->find('title', 0)->plaintext . PHP_EOL;
|
80 |
-
```
|
81 |
-
|
82 |
-
</details>
|
83 |
-
|
84 |
-
Find more examples in the installation folder under `examples`.
|
85 |
-
|
86 |
-
## Documentation
|
87 |
-
|
88 |
-
The documentation for this library is hosted at [https://simplehtmldom.sourceforge.io/docs/](https://simplehtmldom.sourceforge.io/docs/)
|
89 |
-
|
90 |
-
## Getting involved
|
91 |
-
|
92 |
-
There are various ways for you to get involved with simplehtmldom. Here are a few:
|
93 |
-
|
94 |
-
* Share this project with your friends (Twitter, Facebook, ..._you name it_...).
|
95 |
-
* Report [bugs](https://sourceforge.net/p/simplehtmldom/bugs/) (SourceForge).
|
96 |
-
* Request [features](https://sourceforge.net/p/simplehtmldom/feature-requests/) (SourceForge).
|
97 |
-
* Discuss existing bugs, features and ideas.
|
98 |
-
|
99 |
-
If you want to contribute code to the project, please open a [feature request](https://sourceforge.net/p/simplehtmldom/feature-requests/) and include your patch with the message.
|
100 |
-
|
101 |
-
## Authors
|
102 |
-
|
103 |
-
* [S.C. Chen](https://sourceforge.net/u/me578022/)
|
104 |
-
* [John Schlick](https://sourceforge.net/u/john_schlick/)
|
105 |
-
* [logmanoriginal](https://sourceforge.net/u/logmanoriginal/)
|
106 |
-
* Rus Carroll
|
107 |
-
* Yousuke Kumakura
|
108 |
-
* Vadim Voituk
|
109 |
-
|
110 |
-
## License
|
111 |
-
|
112 |
-
The source code for simplehtmldom is licensed under the MIT license. For further information read the LICENSE file in the root directory (should be located next to this README file).
|
113 |
-
|
114 |
-
## Technical notes
|
115 |
-
|
116 |
-
simplehtmldom is a purely PHP-based DOM parser that doesn't rely on external libraries like [libxml](https://www.php.net/manual/en/book.libxml.php), [SimpleXML](https://www.php.net/manual/en/book.simplexml.php) or [PHP DOM](https://www.php.net/manual/en/book.dom.php). Doing so provides better control over the parsing algorithm and a much simpler API that even novice users can learn to use in a short amount of time.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/composer.json
CHANGED
@@ -1,58 +1,58 @@
|
|
1 |
-
{
|
2 |
-
"name": "simplehtmldom/simplehtmldom",
|
3 |
-
"description": "A fast, simple and reliable HTML document parser for PHP.",
|
4 |
-
"keywords": [
|
5 |
-
"php",
|
6 |
-
"html",
|
7 |
-
"dom",
|
8 |
-
"parser",
|
9 |
-
"simple",
|
10 |
-
"simplehtmldom"
|
11 |
-
],
|
12 |
-
"homepage": "https://simplehtmldom.sourceforge.io/",
|
13 |
-
"license": "MIT",
|
14 |
-
"authors": [
|
15 |
-
{
|
16 |
-
"name": "S.C. Chen",
|
17 |
-
"role": "Developer"
|
18 |
-
},
|
19 |
-
{
|
20 |
-
"name": "John Schlick",
|
21 |
-
"role": "Developer"
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"name": "logmanoriginal",
|
25 |
-
"role": "Developer"
|
26 |
-
}
|
27 |
-
],
|
28 |
-
"support": {
|
29 |
-
"issues": "https://sourceforge.net/p/simplehtmldom/bugs/",
|
30 |
-
"wiki": "https://simplehtmldom.sourceforge.io/docs/",
|
31 |
-
"source": "https://sourceforge.net/p/simplehtmldom/repository/",
|
32 |
-
"rss": "https://sourceforge.net/p/simplehtmldom/news/feed.rss"
|
33 |
-
},
|
34 |
-
"autoload": {
|
35 |
-
"classmap": [
|
36 |
-
"./"
|
37 |
-
],
|
38 |
-
"exclude-from-classmap": [
|
39 |
-
"/example/",
|
40 |
-
"/manual/",
|
41 |
-
"/testcase/",
|
42 |
-
"/tests/",
|
43 |
-
"simple_html_dom.php"
|
44 |
-
]
|
45 |
-
},
|
46 |
-
"require": {
|
47 |
-
"php": ">=5.6",
|
48 |
-
"ext-iconv": "*"
|
49 |
-
},
|
50 |
-
"require-dev": {
|
51 |
-
"phpunit/phpunit": "^6 || ^7"
|
52 |
-
},
|
53 |
-
"suggest": {
|
54 |
-
"ext-mbstring": "Allows better decoding for multi-byte documents",
|
55 |
-
"ext-curl": "Needed to support cURL downloads in class HtmlWeb",
|
56 |
-
"ext-openssl": "Allows loading HTTPS pages when using cURL"
|
57 |
-
}
|
58 |
}
|
1 |
+
{
|
2 |
+
"name": "simplehtmldom/simplehtmldom",
|
3 |
+
"description": "A fast, simple and reliable HTML document parser for PHP.",
|
4 |
+
"keywords": [
|
5 |
+
"php",
|
6 |
+
"html",
|
7 |
+
"dom",
|
8 |
+
"parser",
|
9 |
+
"simple",
|
10 |
+
"simplehtmldom"
|
11 |
+
],
|
12 |
+
"homepage": "https://simplehtmldom.sourceforge.io/",
|
13 |
+
"license": "MIT",
|
14 |
+
"authors": [
|
15 |
+
{
|
16 |
+
"name": "S.C. Chen",
|
17 |
+
"role": "Developer"
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"name": "John Schlick",
|
21 |
+
"role": "Developer"
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"name": "logmanoriginal",
|
25 |
+
"role": "Developer"
|
26 |
+
}
|
27 |
+
],
|
28 |
+
"support": {
|
29 |
+
"issues": "https://sourceforge.net/p/simplehtmldom/bugs/",
|
30 |
+
"wiki": "https://simplehtmldom.sourceforge.io/docs/",
|
31 |
+
"source": "https://sourceforge.net/p/simplehtmldom/repository/",
|
32 |
+
"rss": "https://sourceforge.net/p/simplehtmldom/news/feed.rss"
|
33 |
+
},
|
34 |
+
"autoload": {
|
35 |
+
"classmap": [
|
36 |
+
"./"
|
37 |
+
],
|
38 |
+
"exclude-from-classmap": [
|
39 |
+
"/example/",
|
40 |
+
"/manual/",
|
41 |
+
"/testcase/",
|
42 |
+
"/tests/",
|
43 |
+
"simple_html_dom.php"
|
44 |
+
]
|
45 |
+
},
|
46 |
+
"require": {
|
47 |
+
"php": ">=5.6",
|
48 |
+
"ext-iconv": "*"
|
49 |
+
},
|
50 |
+
"require-dev": {
|
51 |
+
"phpunit/phpunit": "^6 || ^7"
|
52 |
+
},
|
53 |
+
"suggest": {
|
54 |
+
"ext-mbstring": "Allows better decoding for multi-byte documents",
|
55 |
+
"ext-curl": "Needed to support cURL downloads in class HtmlWeb",
|
56 |
+
"ext-openssl": "Allows loading HTTPS pages when using cURL"
|
57 |
+
}
|
58 |
}
|
vendor/simplehtmldom/simplehtmldom/constants.php
CHANGED
@@ -1,28 +1,28 @@
|
|
1 |
-
<?php namespace simplehtmldom;
|
2 |
-
|
3 |
-
/**
|
4 |
-
* Website: http://sourceforge.net/projects/simplehtmldom/
|
5 |
-
* Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
|
6 |
-
*
|
7 |
-
* Licensed under The MIT License
|
8 |
-
* See the LICENSE file in the project root for more information.
|
9 |
-
*
|
10 |
-
* Authors:
|
11 |
-
* S.C. Chen
|
12 |
-
* John Schlick
|
13 |
-
* Rus Carroll
|
14 |
-
* logmanoriginal
|
15 |
-
*
|
16 |
-
* Contributors:
|
17 |
-
* Yousuke Kumakura
|
18 |
-
* Vadim Voituk
|
19 |
-
* Antcs
|
20 |
-
*
|
21 |
-
* Version $Rev$
|
22 |
-
*/
|
23 |
-
|
24 |
-
defined(__NAMESPACE__ . '\DEFAULT_TARGET_CHARSET') || define(__NAMESPACE__ . '\DEFAULT_TARGET_CHARSET', 'UTF-8');
|
25 |
-
defined(__NAMESPACE__ . '\DEFAULT_BR_TEXT') || define(__NAMESPACE__ . '\DEFAULT_BR_TEXT', "\r\n");
|
26 |
-
defined(__NAMESPACE__ . '\DEFAULT_SPAN_TEXT') || define(__NAMESPACE__ . '\DEFAULT_SPAN_TEXT', ' ');
|
27 |
-
defined(__NAMESPACE__ . '\MAX_FILE_SIZE') || define(__NAMESPACE__ . '\MAX_FILE_SIZE', 2621440);
|
28 |
-
define(__NAMESPACE__ . '\HDOM_SMARTY_AS_TEXT', 1);
|
1 |
+
<?php namespace simplehtmldom;
|
2 |
+
|
3 |
+
/**
|
4 |
+
* Website: http://sourceforge.net/projects/simplehtmldom/
|
5 |
+
* Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
|
6 |
+
*
|
7 |
+
* Licensed under The MIT License
|
8 |
+
* See the LICENSE file in the project root for more information.
|
9 |
+
*
|
10 |
+
* Authors:
|
11 |
+
* S.C. Chen
|
12 |
+
* John Schlick
|
13 |
+
* Rus Carroll
|
14 |
+
* logmanoriginal
|
15 |
+
*
|
16 |
+
* Contributors:
|
17 |
+
* Yousuke Kumakura
|
18 |
+
* Vadim Voituk
|
19 |
+
* Antcs
|
20 |
+
*
|
21 |
+
* Version $Rev$
|
22 |
+
*/
|
23 |
+
|
24 |
+
defined(__NAMESPACE__ . '\DEFAULT_TARGET_CHARSET') || define(__NAMESPACE__ . '\DEFAULT_TARGET_CHARSET', 'UTF-8');
|
25 |
+
defined(__NAMESPACE__ . '\DEFAULT_BR_TEXT') || define(__NAMESPACE__ . '\DEFAULT_BR_TEXT', "\r\n");
|
26 |
+
defined(__NAMESPACE__ . '\DEFAULT_SPAN_TEXT') || define(__NAMESPACE__ . '\DEFAULT_SPAN_TEXT', ' ');
|
27 |
+
defined(__NAMESPACE__ . '\MAX_FILE_SIZE') || define(__NAMESPACE__ . '\MAX_FILE_SIZE', 2621440);
|
28 |
+
define(__NAMESPACE__ . '\HDOM_SMARTY_AS_TEXT', 1);
|
vendor/simplehtmldom/simplehtmldom/example/example_advanced_selector.php
DELETED
@@ -1,59 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
// This example illustrates how to use advanced selector features
|
3 |
-
include_once '../HtmlDocument.php';
|
4 |
-
use simplehtmldom\HtmlDocument;
|
5 |
-
|
6 |
-
// -----------------------------------------------------------------------------
|
7 |
-
echo '<h2>Descendant selectors</h2>' . PHP_EOL;
|
8 |
-
|
9 |
-
$doc = <<<HTML
|
10 |
-
<div>
|
11 |
-
<div>
|
12 |
-
<div class="foo bar">ok</div>
|
13 |
-
</div>
|
14 |
-
</div>
|
15 |
-
HTML;
|
16 |
-
|
17 |
-
echo (new HtmlDocument())->load($doc)->find('div div div', 0)->innertext . PHP_EOL;
|
18 |
-
|
19 |
-
// -----------------------------------------------------------------------------
|
20 |
-
echo '<h2>Nested selectors</h2>' . PHP_EOL;
|
21 |
-
|
22 |
-
$doc = <<<HTML
|
23 |
-
<ul id="ul1">
|
24 |
-
<li>item:<span>1</span></li>
|
25 |
-
<li>item:<span>2</span></li>
|
26 |
-
</ul>
|
27 |
-
<ul id="ul2">
|
28 |
-
<li>item:<span>3</span></li>
|
29 |
-
<li>item:<span>4</span></li>
|
30 |
-
</ul>
|
31 |
-
HTML;
|
32 |
-
|
33 |
-
$html = (new HtmlDocument())->load($doc);
|
34 |
-
|
35 |
-
foreach($html->find('ul') as $ul) {
|
36 |
-
foreach($ul->find('li') as $li)
|
37 |
-
echo $li->innertext . '<br>' . PHP_EOL;
|
38 |
-
}
|
39 |
-
|
40 |
-
// -----------------------------------------------------------------------------
|
41 |
-
echo '<h2>Parsing checkboxes</h2>' . PHP_EOL;
|
42 |
-
|
43 |
-
$doc = <<<HTML
|
44 |
-
<form name="form1" method="post" action="">
|
45 |
-
<input type="checkbox" name="checkbox1" value="checkbox1" checked>item1<br>
|
46 |
-
<input type="checkbox" name="checkbox2" value="checkbox2">item2<br>
|
47 |
-
<input type="checkbox" name="checkbox3" value="checkbox3" checked>item3<br>
|
48 |
-
</form>
|
49 |
-
HTML;
|
50 |
-
|
51 |
-
$html = (new HtmlDocument())->load($doc);
|
52 |
-
|
53 |
-
foreach($html->find('input[type=checkbox]') as $checkbox) {
|
54 |
-
if ($checkbox->checked) {
|
55 |
-
echo $checkbox->name . ' is checked<br>' . PHP_EOL;
|
56 |
-
} else {
|
57 |
-
echo $checkbox->name . ' is not checked<br>' . PHP_EOL;
|
58 |
-
}
|
59 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/example/example_basic_selector.php
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
// This example illustrates how to use basic selectors to retrieve HTML contents
|
3 |
-
include_once '../HtmlWeb.php';
|
4 |
-
use simplehtmldom\HtmlWeb;
|
5 |
-
|
6 |
-
// get DOM from URL or file
|
7 |
-
$doc = new HtmlWeb();
|
8 |
-
$html = $doc->load('http://www.google.com/');
|
9 |
-
|
10 |
-
// find all links
|
11 |
-
foreach($html->find('a') as $e)
|
12 |
-
echo $e->href . '<br>' . PHP_EOL;
|
13 |
-
|
14 |
-
// find all images
|
15 |
-
foreach($html->find('img') as $e)
|
16 |
-
echo $e->src . '<br>' . PHP_EOL;
|
17 |
-
|
18 |
-
// find all images with full tag
|
19 |
-
foreach($html->find('img') as $e)
|
20 |
-
echo $e->outertext . '<br>' . PHP_EOL;
|
21 |
-
|
22 |
-
// find all div tags with id="gbar"
|
23 |
-
foreach($html->find('div#gbar') as $e)
|
24 |
-
echo $e->innertext . '<br>' . PHP_EOL;
|
25 |
-
|
26 |
-
// find all span tags with class="gb1"
|
27 |
-
foreach($html->find('span.gb1') as $e)
|
28 |
-
echo $e->outertext . '<br>' . PHP_EOL;
|
29 |
-
|
30 |
-
// find all td tags with attribute align="center"
|
31 |
-
foreach($html->find('td[align=center]') as $e)
|
32 |
-
echo $e->innertext . '<br>' . PHP_EOL;
|
33 |
-
|
34 |
-
// extract text from HTML
|
35 |
-
echo $html->plaintext . PHP_EOL;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/example/example_callback.php
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
// This example illustrates how to utilize the callback feature to manipulate the DOM
|
3 |
-
include_once '../HtmlWeb.php';
|
4 |
-
use simplehtmldom\HtmlWeb;
|
5 |
-
|
6 |
-
// Write a callback function with one parameter for the element
|
7 |
-
function my_callback($element) {
|
8 |
-
if ($element->tag === 'a') { // Invalidate all anchors
|
9 |
-
$element->href = '#';
|
10 |
-
}
|
11 |
-
}
|
12 |
-
|
13 |
-
// Load the document
|
14 |
-
$doc = new HtmlWeb();
|
15 |
-
$html = $doc->load('https://www.google.com/');
|
16 |
-
|
17 |
-
// Register the callback function
|
18 |
-
$html->set_callback('my_callback');
|
19 |
-
|
20 |
-
// The callback function is invoked automatically when accessing the contents.
|
21 |
-
echo $html;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/example/example_extract_html.php
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
// This example illustrates how to extract text content from a webpage
|
3 |
-
include_once '../HtmlWeb.php';
|
4 |
-
use simplehtmldom\HtmlWeb;
|
5 |
-
|
6 |
-
$doc = new HtmlWeb();
|
7 |
-
echo $doc->load('https://www.google.com/')->plaintext;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/example/example_modify_contents.php
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
// This example illustrates how to modify HTML contents
|
3 |
-
include_once '../HtmlWeb.php';
|
4 |
-
use simplehtmldom\HtmlWeb;
|
5 |
-
|
6 |
-
// Load the document
|
7 |
-
$doc = new HtmlWeb();
|
8 |
-
$html = $doc->load('https://www.google.com/');
|
9 |
-
|
10 |
-
// Remove all images and inputs from the DOM
|
11 |
-
foreach($html->find('img, input') as $element) {
|
12 |
-
$element->remove();
|
13 |
-
}
|
14 |
-
|
15 |
-
echo $html;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/example/scraping/example_scraping_digg.php
DELETED
@@ -1,55 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
/**
|
3 |
-
* This example loads the main page from https://digg.com/, extracts news items
|
4 |
-
* and returns the details in a custom format.
|
5 |
-
*/
|
6 |
-
include_once '../../HtmlWeb.php';
|
7 |
-
use simplehtmldom\HtmlWeb;
|
8 |
-
|
9 |
-
// Download a page
|
10 |
-
$doc = new HtmlWeb();
|
11 |
-
$html = $doc->load('https://digg.com/');
|
12 |
-
|
13 |
-
// Loop through all articles in the page
|
14 |
-
foreach($html->find('article') as $article) {
|
15 |
-
|
16 |
-
// Find the title of the current article
|
17 |
-
if($title = $article->find('h2', 0)) {
|
18 |
-
$item['title'] = trim($title->plaintext);
|
19 |
-
} else {
|
20 |
-
$item['title'] = 'Unknown title';
|
21 |
-
}
|
22 |
-
|
23 |
-
// Find the description of the current article
|
24 |
-
if($details = $article->find('div.description', 0)) {
|
25 |
-
$item['details'] = trim($details->plaintext);
|
26 |
-
} else {
|
27 |
-
$item['details'] = '...';
|
28 |
-
}
|
29 |
-
|
30 |
-
// Find the tags for the current article
|
31 |
-
if($diggs = $article->find('a[rel="tag"]', 0)) {
|
32 |
-
$item['diggs'] = trim($diggs->plaintext);
|
33 |
-
} else {
|
34 |
-
$item['diggs'] = '';
|
35 |
-
}
|
36 |
-
|
37 |
-
$data[] = $item;
|
38 |
-
}
|
39 |
-
|
40 |
-
// (optional) Release memory
|
41 |
-
$html->clear();
|
42 |
-
unset($html);
|
43 |
-
|
44 |
-
// Display your own page to the user
|
45 |
-
foreach($data as $item) {
|
46 |
-
echo <<<EOD
|
47 |
-
|
48 |
-
<h2>{$item['title']}</h2>
|
49 |
-
<ul>
|
50 |
-
<li>{$item['details']}</li>
|
51 |
-
<li>{$item['diggs']}</li>
|
52 |
-
</ul>
|
53 |
-
|
54 |
-
EOD;
|
55 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/example/scraping/example_scraping_imdb.php
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
/**
|
3 |
-
* This example loads a page from IMDb and displays the most important details
|
4 |
-
* in a custom format.
|
5 |
-
*/
|
6 |
-
include_once '../../HtmlWeb.php';
|
7 |
-
use simplehtmldom\HtmlWeb;
|
8 |
-
|
9 |
-
// Load the page into memory
|
10 |
-
$doc = new HtmlWeb();
|
11 |
-
$html = $doc->load('https://imdb.com/title/tt0335266/');
|
12 |
-
|
13 |
-
// Extract details
|
14 |
-
$title = $html->find('title', 0)->plaintext;
|
15 |
-
$rating = $html->find('div[class="ratingValue"] span', 0)->plaintext;
|
16 |
-
$storyline = $html->find('#titleStoryLine p', 0)->plaintext;
|
17 |
-
|
18 |
-
// Clean up memory
|
19 |
-
$html->clear();
|
20 |
-
unset($html);
|
21 |
-
|
22 |
-
echo '<h1>' . $title . '</h1><p>Rating: ' . $rating . '<br>' . $storyline . '</p>';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/example/scraping/example_scraping_slashdot.php
DELETED
@@ -1,33 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
/**
|
3 |
-
* This example loads a page from Slashdot and displays articles in a custom
|
4 |
-
* format.
|
5 |
-
*/
|
6 |
-
include_once '../../HtmlWeb.php';
|
7 |
-
use simplehtmldom\HtmlWeb;
|
8 |
-
|
9 |
-
// Load the page into memory
|
10 |
-
$doc = new HtmlWeb();
|
11 |
-
$html = $doc->load('https://slashdot.org/');
|
12 |
-
|
13 |
-
// Find and extract all articles
|
14 |
-
foreach($html->find('#firehoselist > [id^="firehose-"]') as $article) {
|
15 |
-
$item['title'] = trim($article->find('[id^="title-"]', 0)->plaintext);
|
16 |
-
$item['body'] = trim($article->find('[id^="text-"]', 0)->plaintext);
|
17 |
-
|
18 |
-
$data[] = $item;
|
19 |
-
}
|
20 |
-
|
21 |
-
// clean up memory
|
22 |
-
$html->clear();
|
23 |
-
unset($html);
|
24 |
-
|
25 |
-
// Return custom page
|
26 |
-
foreach($data as $item) {
|
27 |
-
echo <<<EOD
|
28 |
-
|
29 |
-
<h2>{$item['title']}</h2>
|
30 |
-
<p>{$item['body']}</p>
|
31 |
-
|
32 |
-
EOD;
|
33 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/README.md
DELETED
@@ -1,75 +0,0 @@
|
|
1 |
-
This folder contains the source files for http://simplehtmldom.sourceforge.io/,
|
2 |
-
the project page for PHP Simple HTML DOM Parser.
|
3 |
-
|
4 |
-
Source files are written in Markdown: https://en.wikipedia.org/wiki/Markdown
|
5 |
-
|
6 |
-
Site data is generated by MkDocs, a lightweight static site generator for project
|
7 |
-
documentation: https://www.mkdocs.org/
|
8 |
-
|
9 |
-
# Folder structure
|
10 |
-
|
11 |
-
`custom_theme` : Contains customizations to the theme provided by MkDocs.
|
12 |
-
`docs` : Contains the source files for the project page (the actual pages).
|
13 |
-
`site` : Contains the output files for the project page when build with MkDocs.
|
14 |
-
`extra.css` : Customizations to the styles provided by MkDocs.
|
15 |
-
`mkdocs.yml` : The configuration file that is used by MkDocs to generate pages.
|
16 |
-
|
17 |
-
# Adding new pages
|
18 |
-
|
19 |
-
Place new files in `docs`. Use subfolders (as few levels as possible) to
|
20 |
-
separate categories.
|
21 |
-
|
22 |
-
Files added to the manual will **not** appear on the project page automatically.
|
23 |
-
All pages need to be specified in the _mkdocs.yml_ file under "nav:". Simply add
|
24 |
-
the relative path to the new file where appropriate.
|
25 |
-
|
26 |
-
Note: Files are not added automatically because they are sorted by name if not
|
27 |
-
specified manually. Since readability is key factor for manuals, the files must
|
28 |
-
be sorted in a way that makes it clear to users.
|
29 |
-
|
30 |
-
# Setting up MkDocs
|
31 |
-
|
32 |
-
The installation instructions for MkDocs are provided on their homepage:
|
33 |
-
https://www.mkdocs.org/#installation
|
34 |
-
|
35 |
-
MkDocs automatically builds the project based on the _mkdocs.yml_ file. Find the
|
36 |
-
specification for this file at https://www.mkdocs.org/user-guide/configuration/.
|
37 |
-
|
38 |
-
# Building project pages
|
39 |
-
|
40 |
-
The build process depends on your installation of MkDocs. Typically MkDocs is
|
41 |
-
made available via the command line.
|
42 |
-
|
43 |
-
## Step 1 - Check your version of MkDocs
|
44 |
-
|
45 |
-
To check your version of MkDocs run this command:
|
46 |
-
|
47 |
-
`mkdocs --version` or
|
48 |
-
`python3 -m mkdocs --version`
|
49 |
-
|
50 |
-
Should return `version 1.0.4` or higher. If it doesn't make sure to install the
|
51 |
-
latest version using `pip install mkdocs` or `python3 -m pip install mkdocs`. If
|
52 |
-
you don't have pip installed, install it via package manager or follow the
|
53 |
-
instructions at https://pip.pypa.io/en/stable/installing/
|
54 |
-
|
55 |
-
Make sure to also install the `redirects` plugin to allow for page redirects:
|
56 |
-
`pip install mkdocs-redirects` or `python3 -m pip install mkdocs-redirects`.
|
57 |
-
|
58 |
-
## Step 2 - View the project locally
|
59 |
-
|
60 |
-
MkDocs allows you to view the project files in a browser on your local machine:
|
61 |
-
|
62 |
-
`mkdocs serve` or
|
63 |
-
`python3 -m mkdocs serve`
|
64 |
-
|
65 |
-
If the process is successful you can access the site at http://127.0.0.1:8000.
|
66 |
-
|
67 |
-
## Step 3 - Build the project
|
68 |
-
|
69 |
-
If you are satisfied with the results of the project, build the final project
|
70 |
-
with this command:
|
71 |
-
|
72 |
-
`mkdocs build` or
|
73 |
-
`python3 -m mkdocs build`
|
74 |
-
|
75 |
-
Find the output files in the `site` folder.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/custom_theme/main.html
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
{% extends "base.html" %}
|
2 |
-
|
3 |
-
{% block footer %}
|
4 |
-
{% include "footer.html" %}
|
5 |
-
<hr>
|
6 |
-
<a class="logo" href="https://sourceforge.net/p/simplehtmldom/"><img alt="Download PHP Simple HTML DOM Parser" src="https://sourceforge.net/sflogo.php?type=16&group_id=218559" ></a>
|
7 |
-
{% endblock %}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/disable.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: disable()
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
Debug::disable ()
|
7 |
-
```
|
8 |
-
|
9 |
-
Globally disables debug messages.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/enable.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: enable()
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
Debug::enable ()
|
7 |
-
```
|
8 |
-
|
9 |
-
Globally enables debug messages.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/index.md
DELETED
@@ -1,31 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Debug
|
3 |
-
---
|
4 |
-
|
5 |
-
Provides the ability to see debug messages for deprecated functions, malformed documents and parsing issues. Debug messages can be send to a custom message handler or written to error_log (default).
|
6 |
-
|
7 |
-
## Example
|
8 |
-
|
9 |
-
```php
|
10 |
-
<?php
|
11 |
-
include_once 'HtmlDocument.php';
|
12 |
-
include_once 'Debug.php';
|
13 |
-
use simplehtmldom\HtmlDocument;
|
14 |
-
use simplehtmldom\Debug;
|
15 |
-
|
16 |
-
Debug::enable();
|
17 |
-
|
18 |
-
$dom = new HtmlDocument();
|
19 |
-
$dom->load('<html></html>');
|
20 |
-
$dom->root->children(); // This causes a deprecation warning
|
21 |
-
|
22 |
-
Debug::disable();
|
23 |
-
```
|
24 |
-
|
25 |
-
**Output**
|
26 |
-
|
27 |
-
```
|
28 |
-
[DEBUG] [/var/www/html/simplehtmldom/Debug.php:30] [/var/www/html/simplehtmldom/test.php:7] "Debug mode has been enabled"
|
29 |
-
[DEBUG] [/var/www/html/simplehtmldom/HtmlNode.php:83] [/var/www/html/simplehtmldom/test.php:11] "simplehtmldom\HtmlNode->children() has been deprecated and will be removed in the next major version of simplehtmldom. Use simplehtmldom\HtmlNode->childNodes() instead."
|
30 |
-
[DEBUG] [/var/www/html/simplehtmldom/Debug.php:38] [/var/www/html/simplehtmldom/test.php:13] "Debug mode has been disabled"
|
31 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/log.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: log()
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
Debug::log (string $message)
|
7 |
-
```
|
8 |
-
|
9 |
-
Logs a debug message if the debugger is enabled. Does nothing if the debugger is disabled.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/log_once.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: log_once()
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
Debug::log_once (string $message)
|
7 |
-
```
|
8 |
-
|
9 |
-
Logs a debug message if the debugger is enabled. Does nothing if the debugger is disabled. Each message is logged only once (based on file and line number).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/Debug/setDebugHandler.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: setDebugHandler()
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
Debug::setDebugHandler ( [$function = null] )
|
7 |
-
```
|
8 |
-
|
9 |
-
Sets the debug handler for debug messages. Uses `error_log` if `$function = null` (default).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__call.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: __call
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
__call ($function, $arguments)
|
7 |
-
```
|
8 |
-
|
9 |
-
Serves as a wrapper for deprecated methods. See [magic methods](https://www.php.net/manual/en/language.oop5.overloading.php#object.call) for more information.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__construct.md
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: __construct
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
__construct ( [ string $str = null [, bool $lowercase = true [, bool $forceTagsClosed = true [, string $target_charset = DEFAULT_TARGET_CHARSET [, bool $stripRN = true [, string $defaultBRText = DEFAULT_BR_TEXT [, string $defaultSpanText = DEFAULT_SPAN_TEXT [, int $options = 0 ]]]]]]]]) : object
|
7 |
-
```
|
8 |
-
|
9 |
-
Creates a new `HtmlDocument` object.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `str` | The HTML document string.
|
14 |
-
| `lowercase` | Tag names are parsed in lowercase letters if enabled.
|
15 |
-
| `forceTagsClosed` | Tags inside block tags are forcefully closed if the closing tag was omitted.
|
16 |
-
| `target_charset` | Defines the target charset for text returned by the parser.
|
17 |
-
| `stripRN` | Newline characters are replaced by whitespace if enabled.
|
18 |
-
| `defaultBRText` | Defines the default text to return for `<br>` elements.
|
19 |
-
| `defaultSpanText` | Defines the default text to return for `<span>` elements.
|
20 |
-
| `options` | Additional options for the parser. Currently supports `'HDOM_SMARTY_AS_TEXT'` to remove [Smarty](https://www.smarty.net/) scripts.
|
21 |
-
|
22 |
-
Returns the object.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__debugInfo.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: __debugInfo
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
__debugInfo ( )
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns debugging information about the current object.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__destruct.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: __destruct
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
__destruct ()
|
7 |
-
```
|
8 |
-
|
9 |
-
Destroys the current object and clears memory.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__get.md
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: __get
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
__get ( string $name ) : mixed
|
7 |
-
```
|
8 |
-
|
9 |
-
See [magic methods](http://php.net/manual/en/language.oop5.overloading.php#object.get)
|
10 |
-
|
11 |
-
Supports following names:
|
12 |
-
|
13 |
-
| Name | Description
|
14 |
-
| ---- | -----------
|
15 |
-
| `outertext` | Returns the outer text of the root element.
|
16 |
-
| `innertext` | Returns the inner text of the root element.
|
17 |
-
| `plaintext` | Returns the plain text of the root element.
|
18 |
-
| `charset` | Returns the charset for the document.
|
19 |
-
| `target_charset` | Returns the target charset for the document.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/__toString.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: __toString
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
__toString () : string
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns the inner text of the root element of the DOM.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/as_text_node.md
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: as_text_node (protected)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
as_text_node ( string $tag ) : bool
|
7 |
-
```
|
8 |
-
|
9 |
-
Adds a tag as text node.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `tag` | The element's tag name.
|
14 |
-
|
15 |
-
Returns true on success.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/childNodes.md
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: childNodes
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
childNodes ( [ int $idx = -1 ] ) : mixed
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns children of the root element.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `idx` | Index of the child element to return.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/copy_skip.md
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: copy_skip (protected)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
copy_skip ( string $chars ) : string
|
7 |
-
```
|
8 |
-
|
9 |
-
Skips characters starting at the current parsing position in the document. Sets the parsing position to the first character not in the provided list of characters.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `chars` | A list of characters to skip.
|
14 |
-
|
15 |
-
Returns the skipped characters.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/copy_until.md
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: copy_until (protected)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
copy_until ( string $chars ) : string
|
7 |
-
```
|
8 |
-
|
9 |
-
Copies all characters starting at the current parsing position in the document. Sets the parsing position to the first character that matches any of the characters in the provided list of characters.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `chars` | A list of characters to stop copying at.
|
14 |
-
|
15 |
-
Returns the copied characters.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/copy_until_char.md
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: copy_until_char (protected)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
copy_until ( string $char ) : string
|
7 |
-
```
|
8 |
-
|
9 |
-
Copies all characters starting at the current parsing position in the document. Sets the parsing position to the first character that matches the provided character.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `char` | A character to stop copying at.
|
14 |
-
|
15 |
-
Returns the copied characters.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/createElement.md
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: createElement
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
createElement ( string $name [, string $value = null ] ) : object
|
7 |
-
```
|
8 |
-
|
9 |
-
Creates a new element.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `name` | Name of the element
|
14 |
-
| `value` | Value of the element
|
15 |
-
|
16 |
-
Returns the element.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/createTextNode.md
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: createTextNode
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
createTextNode ( string $value ) : object
|
7 |
-
```
|
8 |
-
|
9 |
-
Creates a new text element.
|
10 |
-
|
11 |
-
Returns the element.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/decode.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: decode (protected)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
decode ()
|
7 |
-
```
|
8 |
-
|
9 |
-
Decodes HTML entities in the DOM recursively.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/dump.md
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: dump
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
dump ( [ bool show_attr = true ] ) : string
|
7 |
-
```
|
8 |
-
|
9 |
-
Dumps the entire DOM into a string. Useful for debugging purposes.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `show_attr` | Attributes are included in the dump when enabled.
|
14 |
-
|
15 |
-
Returns the DOM tree as string.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/expect.md
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: expect
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
expect (
|
7 |
-
string $selector
|
8 |
-
[, int $idx = null ]
|
9 |
-
[, bool $lowercase = false ]
|
10 |
-
) : mixed
|
11 |
-
```
|
12 |
-
|
13 |
-
Finds elements in the DOM.
|
14 |
-
Returns null if no match was found.
|
15 |
-
See [`find`](./find/) for a description of parameters and selectors.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/find.md
DELETED
@@ -1,17 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: find
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
find ( string $selector [, int $idx = null [, bool $lowercase = false ]] ) : mixed
|
7 |
-
```
|
8 |
-
|
9 |
-
Finds elements in the DOM.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `selector` | A [CSS style selector](/HtmlNode/find).
|
14 |
-
| `idx` | Index of the element to return.
|
15 |
-
| `lowercase` | Matches tag names case insensitive when enabled.
|
16 |
-
|
17 |
-
Returns an array of matches or a single element if `idx` is defined.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/firstChild.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: firstChild
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
firstChild () : object
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns the first child of the root element.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/getElementById.md
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: getElementById
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
getElementById ( string $id ) : object
|
7 |
-
```
|
8 |
-
|
9 |
-
Searches an element by id.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `id` | ID of the element to find.
|
14 |
-
|
15 |
-
Returns the element or null if no match was found.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/getElementByTagName.md
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: getElementByTagName
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
getElementByTagName ( string $name ) : object
|
7 |
-
```
|
8 |
-
|
9 |
-
Searches an element by tag name.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `name` | Tag name of the element to find.
|
14 |
-
|
15 |
-
Returns the element or null if no match was found.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/getElementsById.md
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: getElementsById
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
getElementsById ( string $id [, int $idx = null ] ) : object
|
7 |
-
```
|
8 |
-
|
9 |
-
Searches elements by id.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `id` | ID of the element to find.
|
14 |
-
| `idx` | Returns the element at the specified index if defined.
|
15 |
-
|
16 |
-
Returns the element(s) or null if no match was found.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/getElementsByTagName.md
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: getElementsByTagName
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
getElementsByTagName ( string $name [, int $idx = -1 ] ) : object
|
7 |
-
```
|
8 |
-
|
9 |
-
Searches elements by tag name.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `name` | Tag name of the element to find.
|
14 |
-
| `idx` | Returns the element at the specified index.
|
15 |
-
|
16 |
-
Returns the element(s) or null if no match was found.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/index.md
DELETED
@@ -1,38 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: HtmlDocument
|
3 |
-
---
|
4 |
-
|
5 |
-
Represents the [DOM](https://en.wikipedia.org/wiki/Document_Object_Model) in memory. Provides functions to parse documents and access individual elements (see [`HtmlNode`](../HtmlNode/)).
|
6 |
-
|
7 |
-
## Public Properties
|
8 |
-
|
9 |
-
| Property | Description
|
10 |
-
| -------- | -----------
|
11 |
-
| `root` | Root node of the document.
|
12 |
-
| `nodes` | List of top-level nodes in the document.
|
13 |
-
| `callback` | Callback function that is called for each element in the DOM when generating outertext.
|
14 |
-
| `lowercase` | If enabled, all tag names are converted to lowercase when parsing documents.
|
15 |
-
| `original_size` | Original document size in bytes.
|
16 |
-
| `size` | Current document size in bytes.
|
17 |
-
| `_charset` | Charset of the original document.
|
18 |
-
| `_target_charset` | Target charset for the current document.
|
19 |
-
| `default_span_text` | Text to return for `<span>` elements.
|
20 |
-
|
21 |
-
## Protected Properties
|
22 |
-
|
23 |
-
| Property | Description
|
24 |
-
| -------- | -----------
|
25 |
-
| `pos` | Current parsing position within `doc`.
|
26 |
-
| `doc` | The original document.
|
27 |
-
| `char` | Character at position `pos` in `doc`.
|
28 |
-
| `cursor` | Current element cursor in the document.
|
29 |
-
| `parent` | Parent element node.
|
30 |
-
| `noise` | Noise from the original document (i.e. scripts, comments, etc...).
|
31 |
-
| `token_blank` | Tokens that are considered whitespace in HTML.
|
32 |
-
| `token_equal` | Tokens to identify the equal sign for attributes, stopping either at the closing tag ("/" i.e. `<html />`) or the end of an opening tag (">" i.e. `<html>`).
|
33 |
-
| `token_slash` | Tokens to identify the end of a tag name. A tag name either ends on the ending slash ("/" i.e. `<html/>`) or whitespace (`"\s\r\n\t"`).
|
34 |
-
| `token_attr` | Tokens to identify the end of an attribute.
|
35 |
-
| `default_br_text` | Text to return for `<br>` elements.
|
36 |
-
| `self_closing_tags` | A list of tag names where the closing tag is omitted.
|
37 |
-
| `block_tags` | A list of tag names where remaining unclosed tags are forcibly closed.
|
38 |
-
| `optional_closing_tags` | A list of tag names where the closing tag can be omitted.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/lastChild.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: lastChild
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
lastChild () : object
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns the last child of the root element.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/link_nodes.md
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: link_nodes (protected)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
link_nodes ( object &$node, bool $is_child )
|
7 |
-
```
|
8 |
-
|
9 |
-
Links the provided node to the DOM tree.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `node` | The node to link to the DOM tree.
|
14 |
-
| `is_child` | If active, makes the node a sibling of the current node (child of parent).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/load.md
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: load
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
load ( string $str [, bool $lowercase = true [, bool $stripRN = true [, string $defaultBRText = DEFAULT_BR_TEXT [, string $defaultSpanText = DEFAULT_SPAN_TEXT [, int $options = 0 ]]]]]) : object
|
7 |
-
```
|
8 |
-
|
9 |
-
Loads the provided HTML document string.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `str` | The HTML document string.
|
14 |
-
| `lowercase` | Tag names are parsed in lowercase letters if enabled.
|
15 |
-
| `stripRN` | Newline characters are replaced by whitespace if enabled.
|
16 |
-
| `defaultBRText` | Defines the default text to return for `<br>` elements.
|
17 |
-
| `defaultSpanText` | Defines the default text to return for `<span>` elements.
|
18 |
-
| `options` | Additional options for the parser. Currently supports `'HDOM_SMARTY_AS_TEXT'` to remove [Smarty](https://www.smarty.net/) scripts.
|
19 |
-
|
20 |
-
Returns the object.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/loadFile.md
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: loadFile
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
loadFile (...)
|
7 |
-
```
|
8 |
-
|
9 |
-
Loads a HTML document from file. Supports arguments of [`file_get_contents`](http://php.net/manual/en/function.file-get-contents.php).
|
10 |
-
|
11 |
-
Returns the object.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/parse.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: parse (protected)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
parse ()
|
7 |
-
```
|
8 |
-
|
9 |
-
Parses the document. This function is called after the document was loaded into `$this->doc`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/parse_attr.md
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: parse_attr (protected)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
parse_attr ( object $node, string $name, array &$space )
|
7 |
-
```
|
8 |
-
|
9 |
-
Parses a single attribute starting at the current parsing position in the document.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `node` | The current element (node).
|
14 |
-
| `name` | The attribute name.
|
15 |
-
| `space` | An array of whitespace sorounding the current attribute (see [Attribute Whitespace](../definitions/#attribute-whitespace)).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/parse_charset.md
DELETED
@@ -1,17 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: parse_charset (protected)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
parse_charset ()
|
7 |
-
```
|
8 |
-
|
9 |
-
Parses the charset.
|
10 |
-
|
11 |
-
If the callback function `get_last_retrieve_url_contents_content_type` exists, it is assumed to return the content type header for the current document as string.
|
12 |
-
|
13 |
-
Uses the charset from the metadata of the page if defined.
|
14 |
-
|
15 |
-
If none of the previous conditions are met, the charset is determined by `mb_detect_encoding` if multi-byte support is active.
|
16 |
-
|
17 |
-
If multi-byte support is not active the charset is assumed to be `'UTF-8'`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/prepare.md
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: prepare (protected)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
prepare ( string $str [, bool $lowercase = true [, string $defaultBRText = DEFAULT_BR_TEXT [, string $defaultSpanText = DEFAULT_SPAN_TEXT ]]] )
|
7 |
-
```
|
8 |
-
|
9 |
-
Initializes the DOM object.
|
10 |
-
|
11 |
-
| Parameters | Description
|
12 |
-
| ---------- | -----------
|
13 |
-
| `str` | The HTML document string.
|
14 |
-
| `lowercase` | Tag names are parsed in lowercase letters if enabled.
|
15 |
-
| `defaultBRText` | Defines the default text to return for `<br>` elements.
|
16 |
-
| `defaultSpanText` | Defines the default text to return for `<span>` elements.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/read_tag.md
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: read_tag (protected)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
read_tag () : bool
|
7 |
-
```
|
8 |
-
|
9 |
-
Reads a single tag starting at the current parsing position in the document. The tag is automatically added to the DOM.
|
10 |
-
|
11 |
-
Returns true if a tag was found.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/remove_callback.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: remove_callback
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
remove_callback ()
|
7 |
-
```
|
8 |
-
|
9 |
-
Removes the callback set by [`set_callback`](#set_callback).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/remove_noise.md
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: remove_noise (protected)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
remove_noise ( string $pattern [, bool $remove_tag = false] )
|
7 |
-
```
|
8 |
-
|
9 |
-
Replaces noise in the document (i.e. scripts) by placeholders and adds the removed contents to `$this->noise`.
|
10 |
-
|
11 |
-
_Note_: Noise is replaced by placeholders in order to allow restoring the original contents. Placeholders take the form of `'___noise___1000'` where the number is increased by one for each removed noise.
|
12 |
-
|
13 |
-
| Parameter | Description
|
14 |
-
| --------- | -----------
|
15 |
-
| `pattern` | A regular expression that matches the noise to remove.
|
16 |
-
| `remove_tag` | Removes the entire match when enabled or submatches when disabled.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/restore_noise.md
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: restore_noise (protected)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
restore_noise ( string $text ) : string
|
7 |
-
```
|
8 |
-
|
9 |
-
Restores noise in the provided string by replacing noise placeholders by their original contents.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `text` | A string (potentially) containing noise placeholders.
|
14 |
-
|
15 |
-
Returns the string with original contents restored or the original string if it doesn't contain noise placeholders.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/save.md
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: save
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
save ( [ string $filepath = '' ] ) : string
|
7 |
-
```
|
8 |
-
|
9 |
-
Writes the current DOM to file.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `filepath` | Writes to file if the provided file path is not empty.
|
14 |
-
|
15 |
-
Returns the document string.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/search_noise.md
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: search_noise (protected)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
search_noise ( string $text ) : string
|
7 |
-
```
|
8 |
-
|
9 |
-
Find a single noise element by providing the noise placeholder text.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `text` | The noise placeholder to find.
|
14 |
-
|
15 |
-
Returns the original contents for the placeholder.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/set_callback.md
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: set_callback
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
set_callback ( string $function_name )
|
7 |
-
```
|
8 |
-
|
9 |
-
Sets the callback function which is called on each element of the DOM when building outertext.
|
10 |
-
The function must accept a single parameter of type `HtmlNode`.
|
11 |
-
|
12 |
-
| Parameter | Description
|
13 |
-
| --------- | -----------
|
14 |
-
| `function_name` | Name of the function.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlDocument/skip.md
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
|
2 |
-
---
|
3 |
-
title: skip (protected)
|
4 |
-
---
|
5 |
-
|
6 |
-
```php
|
7 |
-
skip ( string $chars )
|
8 |
-
```
|
9 |
-
|
10 |
-
Skips characters starting at the current parsing position in the document. Sets the parsing position to the first character not in the provided list of characters.
|
11 |
-
|
12 |
-
| Parameter | Description
|
13 |
-
| --------- | -----------
|
14 |
-
| `chars` | A list of characters to skip.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__call.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: __call
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
__call ($function, $arguments)
|
7 |
-
```
|
8 |
-
|
9 |
-
Serves as a wrapper for deprecated methods. See [magic methods](https://www.php.net/manual/en/language.oop5.overloading.php#object.call) for more information.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__construct.md
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: __construct
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
__construct ( [ object $dom ] ) : object
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `dom` | An object of type [`HtmlDocument`](api/HtmlDocument/).
|
12 |
-
|
13 |
-
Constructs a new object of type `HtmlNode`, assignes `$dom` as DOM object and adds itself to the list of nodes in `$dom`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__debugInfo.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: __debugInfo
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
__debugInfo ( )
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns debugging information about the current object.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__destruct.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: __destruct
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
__destruct ( )
|
7 |
-
```
|
8 |
-
|
9 |
-
Destructs the current object and frees memory.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__get.md
DELETED
@@ -1,24 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: __get
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
__get ( string $name ) : mixed
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `name` | `outertext`, `innertext`, `plaintext`, `xmltext` or attribute name.
|
12 |
-
|
13 |
-
See [magic methods](http://php.net/manual/en/language.oop5.overloading.php#object.get)
|
14 |
-
|
15 |
-
If the provided name is a valid attribute name, returns the attribute value. Otherwise a value according to the table below.
|
16 |
-
|
17 |
-
| Name | Description
|
18 |
-
| ---- | -----------
|
19 |
-
| `outertext` | Returns the outer text of the current node.
|
20 |
-
| `innertext` | Returns the inner text of the current node.
|
21 |
-
| `plaintext` | Returns the plain text of the current node.
|
22 |
-
| `xmltext` | Returns the xml representation for the inner text of the current node as a CDATA section.
|
23 |
-
|
24 |
-
Returns nothing if the provided name is neither a valid attribute name, nor a valid parameter name.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__isset.md
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: __isset
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
__isset ( string $name ) : bool
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `name` | `outertext`, `innertext`, `plaintext` or attribute name.
|
12 |
-
|
13 |
-
See [magic methods](http://php.net/manual/en/language.oop5.overloading.php#object.get)
|
14 |
-
|
15 |
-
Returns true if the provided name is a valid attribute name or any of the values in the table below. False otherwise.
|
16 |
-
|
17 |
-
| Name | Description
|
18 |
-
| ---- | -----------
|
19 |
-
| `outertext` | Returns the outer text of the current node.
|
20 |
-
| `innertext` | Returns the inner text of the current node.
|
21 |
-
| `plaintext` | Returns the plain text of the current node.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__set.md
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: __set
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
__set ( string $name, mixed $value )
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `name` | `outertext`, `innertext` or attribute name.
|
12 |
-
| `value` | Value to set.
|
13 |
-
|
14 |
-
See [magic methods](http://php.net/manual/en/language.oop5.overloading.php#object.get)
|
15 |
-
|
16 |
-
Sets the outer text of the current node to `$value` if `$name` is `outertext`.
|
17 |
-
|
18 |
-
Sets the inner text of the current node to `$value` if `$name` is `innertext`.
|
19 |
-
|
20 |
-
Otherwise, adds or updates an attribute with name `$name` and value `$value` to the current node.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__toString.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: __toString
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
__toString ( ) : string
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns the outer text of the current node.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/__unset.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: __unset
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
__unset ( string $name )
|
7 |
-
```
|
8 |
-
|
9 |
-
Removes the attribute with name `$name` from the current node if it exists.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/addClass.md
DELETED
@@ -1,25 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: addClass
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
addClass ( mixed $class )
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `class` | Specifies one or more class names to be added.
|
12 |
-
|
13 |
-
Adds one or more class names to the current node.
|
14 |
-
|
15 |
-
**Remarks**
|
16 |
-
|
17 |
-
* To add more than one class, separate the class names with space or provide them as an array.
|
18 |
-
|
19 |
-
**Examples**
|
20 |
-
|
21 |
-
```php
|
22 |
-
$node->addClass('hidden');
|
23 |
-
$node->addClass('article important');
|
24 |
-
$node->addClass(array('article', 'new'));
|
25 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/appendChild.md
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: appendChild
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
appendChild ( object $node ) : object
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `node` | An object of type [`HtmlNode`](../HtmlNode/)
|
12 |
-
|
13 |
-
Makes the current node parent of the node provided to this function.
|
14 |
-
|
15 |
-
Returns the provided node.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/childNodes.md
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: childNodes
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
childNodes ( [ int $idx = -1 ] ) : mixed
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `idx` | Index of the node to return or `-1` to return all nodes.
|
12 |
-
|
13 |
-
Returns all or one specific child node from the current node.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/clear.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: clear
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
clear ( )
|
7 |
-
```
|
8 |
-
|
9 |
-
Sets all properties in the current node, which contain objects, to null.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/convert_text.md
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: convert_text
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
convert_text ( string $text ) : string
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `text` | Text to convert.
|
12 |
-
|
13 |
-
Assumes that the provided text is in the form of the configured source character set (see [`sourceCharset`](../HtmlNode/) and converts it to the specified target character set (see [`targetCharset`](../HtmlNode/)).
|
14 |
-
|
15 |
-
Returns the converted text.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/definitions.md
DELETED
@@ -1,96 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Definitions
|
3 |
-
---
|
4 |
-
|
5 |
-
The definitions below are an essential part of the parser.
|
6 |
-
|
7 |
-
## Node Types
|
8 |
-
|
9 |
-
The type of a node is determined during parsing and represented by one of the elements in the list below.
|
10 |
-
|
11 |
-
| Type | Description
|
12 |
-
| ---- | -----------
|
13 |
-
| `HDOM_TYPE_ELEMENT` | Start tag (i.e. `<html>`)
|
14 |
-
| `HDOM_TYPE_COMMENT` | HTML comment (i.e. `<!-- Hello, World! -->`)
|
15 |
-
| `HDOM_TYPE_TEXT` | Plain text (i.e. `Hello, World!`)
|
16 |
-
| `HDOM_TYPE_ROOT` | Root element. There can always only be one root element in the DOM.
|
17 |
-
| `HDOM_TYPE_UNKNOWN` | Unknown type (i.e. CDATA, DOCTYPE, etc...)
|
18 |
-
|
19 |
-
### Example
|
20 |
-
|
21 |
-
```html
|
22 |
-
<!DOCTYPE html><html><!-- Hello, World! --></html>Hello, World!
|
23 |
-
```
|
24 |
-
|
25 |
-
_Note_: `HDOM_TYPE_ROOT` always exists regardless of the actual document structure.
|
26 |
-
|
27 |
-
| HTML | Node Type
|
28 |
-
| ---- | ---------
|
29 |
-
| | `HDOM_TYPE_ROOT`
|
30 |
-
| `<!DOCTYPE html>` | `HDOM_TYPE_UNKNOWN`
|
31 |
-
| `<html>` | `HDOM_TYPE_ELEMENT`
|
32 |
-
| `<!-- Hello, World! -->` | `HDOM_TYPE_COMMENT`
|
33 |
-
| `Hello, World!` | `HDOM_TYPE_TEXT`
|
34 |
-
|
35 |
-
## Quote Types
|
36 |
-
|
37 |
-
Identifies the quoting type on attribute values.
|
38 |
-
|
39 |
-
| Type | Description
|
40 |
-
| ---- | -----------
|
41 |
-
| `HDOM_QUOTE_DOUBLE` | Double quotes (`""`)
|
42 |
-
| `HDOM_QUOTE_SINGLE` | Single quotes (`''`)
|
43 |
-
| `HDOM_QUOTE_NO` | Not quoted (flag)
|
44 |
-
|
45 |
-
_Note_: Attributes with no values (flags) are stored as `HDOM_QUOTE_NO`.
|
46 |
-
|
47 |
-
### Example
|
48 |
-
|
49 |
-
```html
|
50 |
-
<p class="paragraph" id='info1' hidden>Hello, World!</p>
|
51 |
-
```
|
52 |
-
|
53 |
-
| Attribute | Description
|
54 |
-
| --------- | -----------
|
55 |
-
| `class="paragraph"` | `HDOM_QUOTE_DOUBLE`
|
56 |
-
| `id='info1'` | `HDOM_QUOTE_SINGLE`
|
57 |
-
| `hidden` | `HDOM_QUOTE_NO`
|
58 |
-
|
59 |
-
## Node Info Types
|
60 |
-
|
61 |
-
Each node stores additional information (metadata) that is identified by the elements below.
|
62 |
-
|
63 |
-
| Type | Description
|
64 |
-
| ---- | -----------
|
65 |
-
| `HDOM_INFO_BEGIN` | Cursor position for the start tag of a node.
|
66 |
-
| `HDOM_INFO_END` | Cursor position for the end tag of a node. A value of zero indicates a node with no end tag (missing closing tag).
|
67 |
-
| `HDOM_INFO_QUOTE` | Quote type for attribute values. The value must be an element of [Quote Type](#quote-types).
|
68 |
-
| `HDOM_INFO_SPACE` | Array of whitespace around attributes (see [Attribute Whitespace](#attribute-whitespace)).
|
69 |
-
| `HDOM_INFO_TEXT` | Non-HTML text in tags (i.e. comments, doctype, etc...).
|
70 |
-
| `HDOM_INFO_INNER` | Inner text of a node.
|
71 |
-
| `HDOM_INFO_OUTER` | Outer text of a node.
|
72 |
-
| `HDOM_INFO_ENDSPACE` | Whitespace at the end of a tag before the closing bracket.
|
73 |
-
|
74 |
-
## Attribute Whitespace
|
75 |
-
|
76 |
-
Whitespace around attributes is stored in the form of an array with three elements:
|
77 |
-
|
78 |
-
| Element | Description
|
79 |
-
| ------- | -----------
|
80 |
-
| `0` | Whitespace before the attribute name.
|
81 |
-
| `1` | Whitespace between attribute name and the equal sign.
|
82 |
-
| `2` | Whitespace between the equal sign and the attribute value
|
83 |
-
|
84 |
-
### Example
|
85 |
-
|
86 |
-
```html
|
87 |
-
<p class="paragraph" id = 'info1'hidden>Hello, World!</p>
|
88 |
-
```
|
89 |
-
|
90 |
-
_Note_: Whitespace before attribute names is not displayed in the browser. It is, however, part of the attributes.
|
91 |
-
|
92 |
-
| Attribute | Description
|
93 |
-
| --------- | -----------
|
94 |
-
| ` class="paragraph"` | `[0] => ' ', [1] => '', [2] => ''`
|
95 |
-
| ` id = 'info1'` | `[0] => ' ', [1] => ' ', [2] => ' '`
|
96 |
-
| `hidden` | `[0] => '', [1] => '', [2] => ''`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/dump.md
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: dump
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
dump ( [ bool $show_attr = false [, int $depth = 0 ]] )
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `show_attr` | Attribute names are included in the output if enabled.
|
12 |
-
| `depth` | Depth of the current element
|
13 |
-
|
14 |
-
Dumps information about the current node and all child nodes recursively.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/dump_node.md
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: dump_node
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
dump_node ( [ bool $echo = true ] ) : mixed
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `echo` | Echoes the dump details directly if enabled.
|
12 |
-
|
13 |
-
Dumps information about the current document node. Returns a string if `$echo` is set to false, null otherwise.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/expect.md
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: expect
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
expect (
|
7 |
-
string $selector
|
8 |
-
[, int $idx = null ]
|
9 |
-
[, bool $lowercase = false ]
|
10 |
-
) : mixed
|
11 |
-
```
|
12 |
-
|
13 |
-
Finds elements in the DOM.
|
14 |
-
Returns null if no match was found.
|
15 |
-
See [`find`](./find/) for a description of parameters and selectors.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/find.md
DELETED
@@ -1,47 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: find
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
find (
|
7 |
-
string $selector
|
8 |
-
[, int $idx = null ]
|
9 |
-
[, bool $lowercase = false ]
|
10 |
-
) : mixed
|
11 |
-
```
|
12 |
-
|
13 |
-
| Parameter | Description
|
14 |
-
| --------- | -----------
|
15 |
-
| `selector` | [CSS](https://www.w3.org/TR/selectors/) selector.
|
16 |
-
| `idx` | Index of element to return.
|
17 |
-
| `lowercase` | Matches tag names case insensitive (lowercase) if enabled.
|
18 |
-
|
19 |
-
Finds one or more nodes in the current document, using CSS selectors.
|
20 |
-
|
21 |
-
* Returns null if no match was found.
|
22 |
-
* Returns an array of [`HtmlNode`](../HtmlNode/) if `$idx` is null.
|
23 |
-
* Returns an object of type [`HtmlNode`](../HtmlNode/) if `$idx` is anything __but__ null.
|
24 |
-
|
25 |
-
## Supported Selectors
|
26 |
-
|
27 |
-
| Selector | Description
|
28 |
-
| --------- | -----------
|
29 |
-
| `*` | [Universal selector](https://www.w3.org/TR/selectors/#the-universal-selector)
|
30 |
-
| `E` | [Type (tag name) selector](https://www.w3.org/TR/selectors/#type-selectors)
|
31 |
-
| `E#id` | [ID selector](https://www.w3.org/TR/selectors/#id-selectors)
|
32 |
-
| `E.class` | [Class selector](https://www.w3.org/TR/selectors/#class-html)
|
33 |
-
| `E[attr]` | [Attribute selector](https://www.w3.org/TR/selectors/#attribute-selectors)
|
34 |
-
| `E[attr="value"]` | [Attribute selector](https://www.w3.org/TR/selectors/#attribute-selectors)
|
35 |
-
| `E[attr="value"] i` | [Case-sensitivity](https://www.w3.org/TR/selectors/#attribute-case)
|
36 |
-
| `E[attr="value"] s` | [Case-sensitivity](https://www.w3.org/TR/selectors/#attribute-case)
|
37 |
-
| `E[attr~="value"]` | [Attribute selector](https://www.w3.org/TR/selectors/#attribute-selectors)
|
38 |
-
| `E[attr^="value"]` | [Substring matching attribute selector](https://www.w3.org/TR/selectors/#attribute-substrings)
|
39 |
-
| `E[attr$="value"]` | [Substring matching attribute selector](https://www.w3.org/TR/selectors/#attribute-substrings)
|
40 |
-
| `E[attr*="value"]` | [Substring matching attribute selector](https://www.w3.org/TR/selectors/#attribute-substrings)
|
41 |
-
| `E[attr|="value"]` | [Attribute selector](https://www.w3.org/TR/selectors/#attribute-selectors)
|
42 |
-
| `E F` | [Descendant combinator](https://www.w3.org/TR/selectors/#descendant-combinators)
|
43 |
-
| `E > F` | [Child combinator](https://www.w3.org/TR/selectors/#child-combinators)
|
44 |
-
| `E + F` | [Next-sibling combinator](https://www.w3.org/TR/selectors/#adjacent-sibling-combinators)
|
45 |
-
| `E ~ F` | [Subsequent-sibling combinator](https://www.w3.org/TR/selectors/#general-sibling-combinators)
|
46 |
-
| `E, F` | [Selector list](https://www.w3.org/TR/selectors/#selector-list)
|
47 |
-
| `E:not(s)` | [Negation pseudo-class](https://www.w3.org/TR/selectors-3/#negation)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/find_ancestor_tag.md
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: find_ancestor_tag
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
find_ancestor_tag ( string $tag ) : object
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `tag` | Tag name of the element to find.
|
12 |
-
|
13 |
-
Returns the first matching node that matches the specified tag name or null if no match was found.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/firstChild.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: firstChild
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
firstChild ( ) : mixed
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns the first child node of the current node or null if the current nod has no child nodes.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getAllAttributes.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: getAllAttributes
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
getAllAttributes ( ) : array
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns all attributes for the current node.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getAttribute.md
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: getAttribute
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
getAttribute ( string $name ) : mixed
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `name` | Attribute name.
|
12 |
-
|
13 |
-
Returns the value for the attribute `$name`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getElementById.md
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: getElementById
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
getElementById ( string $id ) : object
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `id` | Element id.
|
12 |
-
|
13 |
-
Returns the first element with the specified id.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getElementByTagName.md
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: getElementByTagName
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
getElementByTagName ( string $name ) : object
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `name` | Tag name.
|
12 |
-
|
13 |
-
Returns the first element with the specified tag name.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getElementsById.md
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: getElementsById
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
getElementsById ( string $id [, int $idx = null] ) : mixed
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `id` | Element id.
|
12 |
-
| `idx` | Index of element to return.
|
13 |
-
|
14 |
-
Returns all elements with the specified id if `$idx` is null, or a specific one if `$idx` is a valid index.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/getElementsByTagName.md
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: getElementsByTagName
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
getElementsByTagName ( string $name [, int $idx = null ] ) : mixed
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `name` | Tag name.
|
12 |
-
| `idx` | Index of the element to return.
|
13 |
-
|
14 |
-
Returns all elements with the specified tag name if `$idx` is null, or a specific one if `$idx` is a valid index.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/get_display_size.md
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: get_display_size
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
get_display_size ( ) : mixed
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns false if the current node is not an image.
|
10 |
-
|
11 |
-
Returns an associative array of two elements - `height` and `width` - that represent the display size of the image.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/hasAttribute.md
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: hasAttribute
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
hasAttribute ( string $name ) : bool
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `name` | Name of the attribute.
|
12 |
-
|
13 |
-
Returns true if the current node has an attribute with the specified name.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/hasChildNodes.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: hasChildNodes
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
hasChildNodes ( ) : bool
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns true if the current node has one or more child nodes.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/hasClass.md
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: hasClass
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
hasClass ( string $class ) : bool
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `class` | Specifies the class name to search for.
|
12 |
-
|
13 |
-
Returns true if the current node has the specified class name.
|
14 |
-
|
15 |
-
**Examples**
|
16 |
-
|
17 |
-
```php
|
18 |
-
$node->hasClass('article');
|
19 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/index.md
DELETED
@@ -1,28 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: HtmlNode
|
3 |
-
---
|
4 |
-
|
5 |
-
Represents a single node in the DOM tree (see [`HtmlDocument`](../HtmlDocument/)).
|
6 |
-
|
7 |
-
## Public Properties
|
8 |
-
|
9 |
-
| Property | Description
|
10 |
-
| -------- | -----------
|
11 |
-
| `_` | Node meta data (i.e. type of node).
|
12 |
-
| `attr` | List of attributes.
|
13 |
-
| `children` | List of child nodes.
|
14 |
-
| `nodes` | List of nodes.
|
15 |
-
| `nodetype` | Node type.
|
16 |
-
| `parent` | Parent node object.
|
17 |
-
| `tag` | Node's tag name.
|
18 |
-
| `tag_start` | Start position of the tag name in the original document.
|
19 |
-
|
20 |
-
## Protected Properties
|
21 |
-
|
22 |
-
None.
|
23 |
-
|
24 |
-
## Private Properties
|
25 |
-
|
26 |
-
| Property | Description
|
27 |
-
| -------- | -----------
|
28 |
-
| `dom` | The DOM object (see [`HtmlDocument`](../HtmlDocument/)).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/innertext.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: innertext
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
innertext ( ) : string
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns the inner text (everything inside the opening and closing tags) of the current node.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/is_block_element.md
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: is_block_element
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
is_block_element ( object $node ) : bool
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `node` | A node
|
12 |
-
|
13 |
-
Returns true if the provided element is a block level element.
|
14 |
-
|
15 |
-
Find more information about block level elements at
|
16 |
-
[https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php](https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/is_inline_element.md
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: is_inline_element
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
is_inline_element ( object $node ) : bool
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `node` | A node
|
12 |
-
|
13 |
-
Returns true if the provided element is a inline level element.
|
14 |
-
|
15 |
-
Find more information about inline level elements at
|
16 |
-
[https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php](https://www.w3resource.com/html/HTML-block-level-and-inline-elements.php)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/is_utf8.md
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: is_utf8 (static)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
is_utf8 ( string $str ) : bool
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `str` | String to test.
|
12 |
-
|
13 |
-
Returns true if the provided string is a valid UTF-8 string.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/lastChild.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: lastChild
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
lastChild ( ) : object
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns the last child of the current node or null if the current node has no child elements.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/makeup.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: makeup
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
makeup ( ) : string
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns the HTML representation of the current node.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/match.md
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: match (protected)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
match (
|
7 |
-
string $exp
|
8 |
-
, string $pattern
|
9 |
-
, string $value
|
10 |
-
, string $case_sensitivity
|
11 |
-
) : bool
|
12 |
-
```
|
13 |
-
|
14 |
-
| Parameter | Description
|
15 |
-
| --------- | -----------
|
16 |
-
| `exp` | Expression
|
17 |
-
| `pattern` | Pattern
|
18 |
-
| `value` | Value
|
19 |
-
| `case_sensitivity` | Case sensitivity
|
20 |
-
|
21 |
-
Matches a single attribute value against the specified attribute selector. See also [`find`](../find/).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/nextSibling.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: nextSibling
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
nextSibling ( ) : object
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns the next sibling of the current node or null if the current node has no next sibling.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/nodeName.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: nodeName
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
nodeName ( ) : string
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns the name of the current node (tag name).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/outertext.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: outertext
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
outertext ( ) : string
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns the outer text (everything including the opening and closing tags) of the current node.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/parent.md
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: parent
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
parent ( [ object $parent = null ] ) : object
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `parent` | The parent node
|
12 |
-
|
13 |
-
* Returns the parent node of the current node if `$parent` is null.
|
14 |
-
* Sets the parent node of the current node if `$parent` is not null. In this case the current node is automatically added to the list of nodes in the parent node.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/parentNode.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: parentNode
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
parentNode () : object
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns the current's node parent.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/parse_selector.md
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: parse_selector (protected)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
parse_selector ( string $selector_string ) : array
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `selector_string` | The selector string
|
12 |
-
|
13 |
-
Parses a CSS selector into an internal format for further use. See also [`find`](../find/).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/previousSibling.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: previousSibling
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
previousSibling ( ) : object
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns the previous sibling of the current node, or null if the current node has no previous sibling.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/remove.md
DELETED
@@ -1,41 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: remove
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
remove ( )
|
7 |
-
```
|
8 |
-
|
9 |
-
Removes the current node recursively from the DOM.
|
10 |
-
Does nothing if the node has no parent (root node);
|
11 |
-
|
12 |
-
**Example**
|
13 |
-
|
14 |
-
```php
|
15 |
-
$html->load(<<<EOD
|
16 |
-
<html>
|
17 |
-
<body>
|
18 |
-
<table>
|
19 |
-
<tr><th>Title</th></tr>
|
20 |
-
<tr><td>Row 1</td></tr>
|
21 |
-
</table>
|
22 |
-
</body>
|
23 |
-
</html>
|
24 |
-
EOD
|
25 |
-
);
|
26 |
-
|
27 |
-
$table = $html->find('table', 0);
|
28 |
-
$table->remove();
|
29 |
-
|
30 |
-
echo $html;
|
31 |
-
|
32 |
-
/**
|
33 |
-
* Returns
|
34 |
-
*
|
35 |
-
* <html> <body> </body> </html>
|
36 |
-
*/
|
37 |
-
```
|
38 |
-
|
39 |
-
**Remarks**
|
40 |
-
|
41 |
-
* Whitespace immediately **before** the removed node will remain in the DOM.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/removeAttribute.md
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: removeAttribute
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
removeAttribute ( string $name )
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `name` | Name of the attribute to remove.
|
12 |
-
|
13 |
-
Removes the attribute with the speicified name from the current node.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/removeChild.md
DELETED
@@ -1,45 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: removeChild
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
removeChild ( object $node )
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `node` | Node to remove from current element, must be a child of the current element.
|
12 |
-
|
13 |
-
Removes the node recursively from the DOM.
|
14 |
-
Does nothing if the provided node is not a child of the current node.
|
15 |
-
|
16 |
-
**Example**
|
17 |
-
|
18 |
-
```php
|
19 |
-
$html->load(<<<EOD
|
20 |
-
<html>
|
21 |
-
<body>
|
22 |
-
<table>
|
23 |
-
<tr><th>Title</th></tr>
|
24 |
-
<tr><td>Row 1</td></tr>
|
25 |
-
</table>
|
26 |
-
</body>
|
27 |
-
</html>
|
28 |
-
EOD
|
29 |
-
);
|
30 |
-
|
31 |
-
$body = $html->find('body', 0);
|
32 |
-
$body->removeChild($body->find('table', 0));
|
33 |
-
|
34 |
-
echo $html;
|
35 |
-
|
36 |
-
/**
|
37 |
-
* Returns
|
38 |
-
*
|
39 |
-
* <html> <body> </body> </html>
|
40 |
-
*/
|
41 |
-
```
|
42 |
-
|
43 |
-
**Remarks**
|
44 |
-
|
45 |
-
* Whitespace immediately **before** the removed node will remain in the DOM.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/removeClass.md
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: removeClass
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
removeClass ( [ mixed $class = null ] )
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `class` | Specifies one or more class names to be removed.
|
12 |
-
|
13 |
-
Removes one or more class names from the current node.
|
14 |
-
|
15 |
-
**Remarks**
|
16 |
-
|
17 |
-
* To remove more than one class, separate the class names with space or provide them as an array.
|
18 |
-
* If no parameter is specified, this method will remove all class names from the current node.
|
19 |
-
|
20 |
-
**Examples**
|
21 |
-
|
22 |
-
```php
|
23 |
-
$node->removeClass('hidden');
|
24 |
-
$node->removeClass('article important');
|
25 |
-
$node->removeClass(array('article', 'new'));
|
26 |
-
$node->removeClass();
|
27 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/save.md
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: save
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
save ( [ string $filepath = '' ] ) : string
|
7 |
-
```
|
8 |
-
|
9 |
-
Writes the current node to file.
|
10 |
-
|
11 |
-
| Parameter | Description
|
12 |
-
| --------- | -----------
|
13 |
-
| `filepath` | Writes to file if the provided file path is not empty.
|
14 |
-
|
15 |
-
Returns the document string.
|
16 |
-
|
17 |
-
**Examples**
|
18 |
-
|
19 |
-
```php
|
20 |
-
$string = $node->save();
|
21 |
-
$string = $node->save($file);
|
22 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/seek.md
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: seek (protected)
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
seek (
|
7 |
-
string $selector
|
8 |
-
, array &$ret
|
9 |
-
, string $parent_cmd
|
10 |
-
[, bool $lowercase = false ]
|
11 |
-
)
|
12 |
-
```
|
13 |
-
|
14 |
-
| Parameter | Description
|
15 |
-
| --------- | -----------
|
16 |
-
| `selector` | The current selector.
|
17 |
-
| `ret` | Previous return value (starting point).
|
18 |
-
| `parent_cmd` | The combinator used before the current selector.
|
19 |
-
| `lowercase` | Matches tag names case insensitive (lowercase) if enabled.
|
20 |
-
|
21 |
-
Starts by searching for child elements of `$ret` that match the specified selector. Adds matching elements to `$ret` (for the next iteration).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/setAttribute.md
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: setAttribute
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
setAttribute ( string $name, string $value )
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `name` | Attribute name
|
12 |
-
| `value` | Attribute value
|
13 |
-
|
14 |
-
Adds or sets an attribute in the current node to the specified value.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/text.md
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: text
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
text ( [ bool $trim = true ] ) : string
|
7 |
-
```
|
8 |
-
|
9 |
-
| Parameter | Description
|
10 |
-
| --------- | -----------
|
11 |
-
| `trim` | Removes whitespace around the text if enabled.
|
12 |
-
|
13 |
-
Returns the text representation of the current node and its decendants.
|
14 |
-
|
15 |
-
**Remarks**
|
16 |
-
|
17 |
-
* You can use the short-hand version [`$node->plaintext`](__get.md) instead of
|
18 |
-
`$node->text()` which also works on the document level (automatically selects
|
19 |
-
the root element of the document).
|
20 |
-
|
21 |
-
**Examples**
|
22 |
-
|
23 |
-
```php
|
24 |
-
$node->text();
|
25 |
-
$node->plaintext;
|
26 |
-
$html->plaintext;
|
27 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/HtmlNode/xmltext.md
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: xmltext
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
xmltext ( ) : string
|
7 |
-
```
|
8 |
-
|
9 |
-
Returns the xml representation for the inner text of the current node as a CDATA section.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/api/constants.md
DELETED
@@ -1,31 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Constants
|
3 |
-
---
|
4 |
-
|
5 |
-
Constants define how the parser treats documents. They can be defined before
|
6 |
-
loading the parser to globally replace the default values.
|
7 |
-
|
8 |
-
## DEFAULT_TARGET_CHARSET
|
9 |
-
|
10 |
-
Defines the default target charset for text returned by the parser.
|
11 |
-
|
12 |
-
Default: `'UTF-8'`
|
13 |
-
|
14 |
-
## DEFAULT_BR_TEXT
|
15 |
-
|
16 |
-
Defines the default text to return for `<br>` elements.
|
17 |
-
|
18 |
-
Default: `"\r\n"`
|
19 |
-
|
20 |
-
## DEFAULT_SPAN_TEXT
|
21 |
-
|
22 |
-
Defines the default text to return for `<span>` elements.
|
23 |
-
|
24 |
-
Default: `' '`
|
25 |
-
|
26 |
-
## MAX_FILE_SIZE
|
27 |
-
|
28 |
-
Defines the maximum number of bytes the parser can load into memory. This limit
|
29 |
-
only applies to the source file or string.
|
30 |
-
|
31 |
-
Default: `2621440`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/extra.css
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
code {
|
2 |
-
white-space: nowrap;
|
3 |
-
}
|
4 |
-
|
5 |
-
.logo {
|
6 |
-
display: block;
|
7 |
-
margin: auto;
|
8 |
-
text-align: center;
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/faq/0001.md
DELETED
@@ -1,67 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Why is the parser so memory hungry?
|
3 |
-
---
|
4 |
-
|
5 |
-
## Short answer
|
6 |
-
|
7 |
-
DOM parsers generally require a lot of memory to represent the document tree and its attributes in memory. If memory is a concern, consider using a SAX parser instead.
|
8 |
-
|
9 |
-
## Answer
|
10 |
-
|
11 |
-
The parser loads the entire document tree and its attributes into memory. This is called the Document Object Model (DOM).
|
12 |
-
|
13 |
-
The DOM is not just a copy of the source document. It represents each element in the source document by an object in memory. The result looks like a tree, which is why its called the document tree:
|
14 |
-
|
15 |
-
```
|
16 |
-
|
17 |
-
html
|
18 |
-
/ \
|
19 |
-
head body
|
20 |
-
/ \ \
|
21 |
-
title meta div
|
22 |
-
/ \
|
23 |
-
ul a
|
24 |
-
/ \
|
25 |
-
li li
|
26 |
-
|
27 |
-
```
|
28 |
-
|
29 |
-
*Note*: Attributes, contents and closing tags were omitted for simplicity.
|
30 |
-
|
31 |
-
In this example, for each node the parser needs to store
|
32 |
-
|
33 |
-
* the name of the node ('html', 'head', 'body', 'title', ...),
|
34 |
-
* a reference to the parent node (i.e. 'div' points to 'body' which points to 'html') and
|
35 |
-
* a list of references to its child nodes (i.e. 'html' points to 'head' and 'body').
|
36 |
-
|
37 |
-
Here is a simplified representation:
|
38 |
-
|
39 |
-
```
|
40 |
-
object
|
41 |
-
> node_name
|
42 |
-
> parent_node
|
43 |
-
> child_nodes[]
|
44 |
-
```
|
45 |
-
|
46 |
-
While the source document only stores the node name and the opening and closing brackets (i.e. `<html>`), a node stores the node name as well as references to the parent and child nodes. Each of which require memory.
|
47 |
-
|
48 |
-
## Example
|
49 |
-
|
50 |
-
Let's take the 'head' element and compare the source data with the object data.
|
51 |
-
|
52 |
-
This is the source data: `<head>` (6 Bytes)
|
53 |
-
|
54 |
-
The equivalent node (including references to parent and child nodes) has following data:
|
55 |
-
|
56 |
-
* Node Object (40 Bytes for the base object + 3 x 16 Bytes for properties = 88 Bytes) [^1]
|
57 |
-
* Node Name "head" (4 Bytes)
|
58 |
-
* Parent Node (unknown number of Bytes)
|
59 |
-
* Child Nodes (8 x 36 Bytes) [^2]
|
60 |
-
|
61 |
-
This amounts to 380 Bytes per object. A factor of 63 compared to the source data. With larger datasets this factor will be smaller, especially when taking content data into account.
|
62 |
-
|
63 |
-
A factor of ~30 compared to the source data is realistic for DOM parsers [^3]. If memory is a concern, consider using a SAX parser instead.
|
64 |
-
|
65 |
-
[^1]: [Objects in PHP 7](https://nikic.github.io/2015/06/19/Internal-value-representation-in-PHP-7-part-2.html#objects-in-php-7) by nikic
|
66 |
-
[^2]: [PHP's new hashtable implementation](https://nikic.github.io/2014/12/22/PHPs-new-hashtable-implementation.html#memory-utilization) by nikic
|
67 |
-
[^3]: [Htlm Agility Pack Issue #77](https://github.com/zzzprojects/html-agility-pack/issues/77) by aktzpn
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/index.md
DELETED
@@ -1,33 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: PHP Simple HTML DOM Parser
|
3 |
-
---
|
4 |
-
|
5 |
-
A fast, simple and reliable HTML document parser for PHP.
|
6 |
-
|
7 |
-
Created by S.C. Chen, based on [HTML Parser for PHP 4](http://php-html.sourceforge.net/) by Jose
|
8 |
-
Solorzano.
|
9 |
-
|
10 |
-
# Parse any HTML document
|
11 |
-
|
12 |
-
PHP Simple HTML DOM Parser handles any HTML document, even ones that are considered
|
13 |
-
invalid by the [HTML](https://www.w3.org/TR/html/) specification.
|
14 |
-
|
15 |
-
# Select elements using CSS selectors
|
16 |
-
|
17 |
-
PHP Simple HTML DOM Parser supports CSS style selectors to navigate the DOM,
|
18 |
-
similar to [jQuery](https://jquery.com/).
|
19 |
-
|
20 |
-
# Download
|
21 |
-
|
22 |
-
* Download the latest version from [SourceForge](https://sourceforge.net/projects/simplehtmldom/)
|
23 |
-
|
24 |
-
# Contributing
|
25 |
-
|
26 |
-
* Request features on the [Feature Request Tracker](https://sourceforge.net/p/simplehtmldom/feature-requests/)
|
27 |
-
* Report bugs on the [Bug Tracker](https://sourceforge.net/p/simplehtmldom/bugs/)
|
28 |
-
* Get involved with the community on the [Discussions Board](https://sourceforge.net/p/simplehtmldom/discussion/)
|
29 |
-
|
30 |
-
# License
|
31 |
-
|
32 |
-
PHP Simple HTML DOM Parser is [Free Software](https://en.wikipedia.org/wiki/Free_software)
|
33 |
-
licensed under the [MIT License](https://opensource.org/licenses/MIT).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/manual/accessing-element-attributes.md
DELETED
@@ -1,40 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Accessing HTML element attributes
|
3 |
-
---
|
4 |
-
|
5 |
-
# Get, Set and Remove attributes
|
6 |
-
|
7 |
-
```php
|
8 |
-
// Get attribute ( If the attribute is non-value attribute (eg. checked, selected...), it will returns true or false)
|
9 |
-
$value = $e->href;
|
10 |
-
|
11 |
-
// Set attribute(If the attribute is non-value attribute (eg. checked, selected...), set it's value as true or false)
|
12 |
-
$e->href = 'my link';
|
13 |
-
|
14 |
-
// Remove attribute, set it's value as null!
|
15 |
-
$e->href = null;
|
16 |
-
|
17 |
-
// Determine whether an attribute exist?
|
18 |
-
if(isset($e->href))
|
19 |
-
echo 'href exist!';
|
20 |
-
```
|
21 |
-
|
22 |
-
# Magic attributes
|
23 |
-
|
24 |
-
```php
|
25 |
-
// Example
|
26 |
-
$html->load("<div>foo <b>bar</b></div>");
|
27 |
-
$e = $html->find("div", 0);
|
28 |
-
|
29 |
-
echo $e->tag; // Returns: " div"
|
30 |
-
echo $e->outertext; // Returns: " <div>foo <b>bar</b></div>"
|
31 |
-
echo $e->innertext; // Returns: " foo <b>bar</b>"
|
32 |
-
echo $e->plaintext; // Returns: " foo bar"
|
33 |
-
```
|
34 |
-
|
35 |
-
Attribute name | Description
|
36 |
-
-------------- | -----------
|
37 |
-
`$e->tag` | Read or write the **tag name** of element.
|
38 |
-
`$e->outertext`| Read or write the **outer HTML text** of element.
|
39 |
-
`$e->innertext`| Read or write the **inner HTML text** of element.
|
40 |
-
`$e->plaintext`| Read or write the **plain text** of element.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/manual/adding-nodes.md
DELETED
@@ -1,123 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Adding Nodes to the DOM
|
3 |
-
---
|
4 |
-
|
5 |
-
The parser allows you to add new elements to an existing document. Find below an
|
6 |
-
example for that.
|
7 |
-
|
8 |
-
**Remarks**
|
9 |
-
|
10 |
-
- It is not possible to create nodes via `->outertext`, `->innertext`, or
|
11 |
-
`->plaintext`. These properties only change the text representation of a node
|
12 |
-
and will return undesired results if used incorrectly.
|
13 |
-
- Use [`$html->createElement`](/api/HtmlDocument/createElement) and
|
14 |
-
[`$html->createTextNode`](/api/HtmlDocument/createTextNode) to create
|
15 |
-
new nodes.
|
16 |
-
- Use [`$node->appendChild`](/api/HtmlNode/appendChild) to add a
|
17 |
-
node as child to another node.
|
18 |
-
- Nodes can be combined in any order.
|
19 |
-
|
20 |
-
**Example**
|
21 |
-
|
22 |
-
```php
|
23 |
-
<?php
|
24 |
-
// This example illustrates adding new elements to the DOM.
|
25 |
-
|
26 |
-
require_once 'HtmlDocument.php';
|
27 |
-
use simplehtmldom\HtmlDocument;
|
28 |
-
|
29 |
-
/***************************** table data *************************************/
|
30 |
-
|
31 |
-
$header = array('Ocean', 'Volume (km^3)');
|
32 |
-
|
33 |
-
$data = array(
|
34 |
-
array('Arctic Ocean', 18750000),
|
35 |
-
array('Atlantic Ocean', 310410900),
|
36 |
-
array('Indian Ocean', 264000000),
|
37 |
-
array('Pacific Ocean', 660000000),
|
38 |
-
array('Souce China Sea', 9880000),
|
39 |
-
array('Southern Ocean', 71800000)
|
40 |
-
);
|
41 |
-
|
42 |
-
/***************************** template ***************************************/
|
43 |
-
|
44 |
-
$doc = <<<EOD
|
45 |
-
<html>
|
46 |
-
<head>
|
47 |
-
<style>
|
48 |
-
table { border: 1px solid black; }
|
49 |
-
|
50 |
-
tr:nth-child(even) { background: #CCC }
|
51 |
-
tr:nth-child(odd) { background: #FFF }
|
52 |
-
</style>
|
53 |
-
</head>
|
54 |
-
<body>
|
55 |
-
<h1>Volumes of the World's Oceans</h1>
|
56 |
-
</body>
|
57 |
-
</html>
|
58 |
-
EOD;
|
59 |
-
|
60 |
-
/***************************** code *******************************************/
|
61 |
-
|
62 |
-
$html = new HtmlDocument();
|
63 |
-
$html->load($doc);
|
64 |
-
$body = $html->find('body', 0);
|
65 |
-
$table = $html->createElement('table');
|
66 |
-
|
67 |
-
// Header row
|
68 |
-
$tr = $html->createElement('tr');
|
69 |
-
foreach ($header as $entry) {
|
70 |
-
$th = $html->createElement('th', $entry);
|
71 |
-
$tr->appendChild($th);
|
72 |
-
}
|
73 |
-
$table->appendChild($tr);
|
74 |
-
|
75 |
-
// Table data
|
76 |
-
foreach ($data as $row) {
|
77 |
-
$tr = $html->createElement('tr');
|
78 |
-
foreach ($row as $entry) {
|
79 |
-
|
80 |
-
// (optional) Add info to the volume column
|
81 |
-
if (is_numeric($entry)) {
|
82 |
-
$value = number_format($entry);
|
83 |
-
$td = $html->createElement('td', $value);
|
84 |
-
$td->setAttribute('volume', $entry);
|
85 |
-
} else {
|
86 |
-
$td = $html->createElement('td', $entry);
|
87 |
-
}
|
88 |
-
|
89 |
-
$tr->appendChild($td);
|
90 |
-
}
|
91 |
-
$table->appendChild($tr);
|
92 |
-
}
|
93 |
-
|
94 |
-
$body->appendChild($table);
|
95 |
-
|
96 |
-
echo $html . PHP_EOL;
|
97 |
-
|
98 |
-
/**
|
99 |
-
* Output (beautified)
|
100 |
-
*
|
101 |
-
* <html>
|
102 |
-
* <head>
|
103 |
-
* <style>
|
104 |
-
* table { border: 1px solid black; }
|
105 |
-
* tr:nth-child(even) { background: #CCC }
|
106 |
-
* tr:nth-child(odd) { background: #FFF }
|
107 |
-
* </style>
|
108 |
-
* </head>
|
109 |
-
* <body>
|
110 |
-
* <h1>Volumes of the World's Oceans</h1>
|
111 |
-
* <table>
|
112 |
-
* <tr><th>Ocean</th><th>Volume (km^3)</th></tr>
|
113 |
-
* <tr><td>Arctic Ocean</td><td volume="18750000">18,750,000</td></tr>
|
114 |
-
* <tr><td>Atlantic Ocean</td><td volume="310410900">310,410,900</td></tr>
|
115 |
-
* <tr><td>Indian Ocean</td><td volume="264000000">264,000,000</td></tr>
|
116 |
-
* <tr><td>Pacific Ocean</td><td volume="660000000">660,000,000</td></tr>
|
117 |
-
* <tr><td>Souce China Sea</td><td volume="9880000">9,880,000</td></tr>
|
118 |
-
* <tr><td>Southern Ocean</td><td volume="71800000">71,800,000</td></tr>
|
119 |
-
* </table>
|
120 |
-
* </body>
|
121 |
-
* </html>
|
122 |
-
*/
|
123 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/manual/creating-dom-objects.md
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Creating HTML DOM objects
|
3 |
-
---
|
4 |
-
|
5 |
-
## From string
|
6 |
-
|
7 |
-
```php
|
8 |
-
<?php
|
9 |
-
include_once 'HtmlDocument';
|
10 |
-
use simplehtmldom\HtmlDocument;
|
11 |
-
|
12 |
-
$html = new HtmlDocument();
|
13 |
-
$html->load('<html><body>Hello!</body></html>');
|
14 |
-
```
|
15 |
-
|
16 |
-
## From URL
|
17 |
-
|
18 |
-
```php
|
19 |
-
<?php
|
20 |
-
include_once 'HtmlWeb';
|
21 |
-
use simplehtmldom\HtmlWeb;
|
22 |
-
|
23 |
-
$html = new HtmlWeb();
|
24 |
-
$html->load('http://www.google.com/');
|
25 |
-
|
26 |
-
```
|
27 |
-
|
28 |
-
## From file
|
29 |
-
|
30 |
-
```php
|
31 |
-
<?php
|
32 |
-
include_once 'HtmlDocument';
|
33 |
-
use simplehtmldom\HtmlDocument;
|
34 |
-
|
35 |
-
$html = new HtmlDocument();
|
36 |
-
$html->loadFile('test.htm');
|
37 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/manual/customizing-parsing-behavior.md
DELETED
@@ -1,18 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Customizing Parsing behavior
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
// Write a function with parameter "$element"
|
7 |
-
function my_callback($element) {
|
8 |
-
// Hide all <b> tags
|
9 |
-
if ($element->tag === 'b')
|
10 |
-
$element->outertext = '';
|
11 |
-
}
|
12 |
-
|
13 |
-
// Register the callback function with it's function name
|
14 |
-
$html->set_callback('my_callback');
|
15 |
-
|
16 |
-
// Callback function will be invoked while dumping
|
17 |
-
echo $html;
|
18 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/manual/finding-html-elements.md
DELETED
@@ -1,90 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Finding HTML Elements
|
3 |
-
---
|
4 |
-
|
5 |
-
## By tag name
|
6 |
-
|
7 |
-
```php
|
8 |
-
// Find all anchors, returns a array of element objects
|
9 |
-
$ret = $html->find('a');
|
10 |
-
|
11 |
-
// Find all anchors and images, returns an array of element objects
|
12 |
-
$ret = $html->find('a, img');
|
13 |
-
|
14 |
-
// Find (N)th anchor, returns element object or null if not found (zero based)
|
15 |
-
$ret = $html->find('a', 0);
|
16 |
-
|
17 |
-
// Find last anchor, returns element object or null if not found (zero based)
|
18 |
-
$ret = $html->find('a', -1);
|
19 |
-
```
|
20 |
-
|
21 |
-
## By class name or id
|
22 |
-
|
23 |
-
```php
|
24 |
-
// Find all element which id=foo
|
25 |
-
$ret = $html->find('#foo');
|
26 |
-
|
27 |
-
// Find all element which class=foo
|
28 |
-
$ret = $html->find('.foo');
|
29 |
-
```
|
30 |
-
|
31 |
-
## By attribute
|
32 |
-
|
33 |
-
```php
|
34 |
-
// Find all <div> with the id attribute
|
35 |
-
$ret = $html->find('div[id]');
|
36 |
-
|
37 |
-
// Find all <div> which attribute id=foo
|
38 |
-
$ret = $html->find('div[id=foo]');
|
39 |
-
|
40 |
-
// Find all anchors and images with the "title" attribute
|
41 |
-
$ret = $html->find('a[title], img[title]');
|
42 |
-
|
43 |
-
// Find all element has attribute id
|
44 |
-
$ret = $html->find('*[id]');
|
45 |
-
```
|
46 |
-
|
47 |
-
## Descendants
|
48 |
-
|
49 |
-
```php
|
50 |
-
// Find all <li> in <ul>
|
51 |
-
$es = $html->find('ul li');
|
52 |
-
|
53 |
-
// Find Nested <div> tags
|
54 |
-
$es = $html->find('div div div');
|
55 |
-
|
56 |
-
// Find all <td> in <table> which class=hello
|
57 |
-
$es = $html->find('table.hello td');
|
58 |
-
|
59 |
-
// Find all td tags with attribite align=center in table tags
|
60 |
-
$es = $html->find('table td[align=center]');
|
61 |
-
```
|
62 |
-
|
63 |
-
## Nested elements
|
64 |
-
|
65 |
-
```php
|
66 |
-
// Find all <li> in <ul>
|
67 |
-
foreach($html->find('ul') as $ul)
|
68 |
-
{
|
69 |
-
foreach($ul->find('li') as $li)
|
70 |
-
{
|
71 |
-
// do something...
|
72 |
-
}
|
73 |
-
}
|
74 |
-
|
75 |
-
// Find first <li> in first <ul>
|
76 |
-
$e = $html->find('ul', 0)->find('li', 0);
|
77 |
-
```
|
78 |
-
|
79 |
-
## Text, comments and CDATA
|
80 |
-
|
81 |
-
```php
|
82 |
-
// Find all text blocks
|
83 |
-
$es = $html->find('text');
|
84 |
-
|
85 |
-
// Find all comment (<!--...-->) blocks
|
86 |
-
$es = $html->find('comment');
|
87 |
-
|
88 |
-
// Find CDATA blocks
|
89 |
-
$es = $html->find('cdata');
|
90 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/manual/saving-dom-objects.md
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Saving DOM objects
|
3 |
-
---
|
4 |
-
|
5 |
-
```php
|
6 |
-
// Dumps the internal DOM tree back into string
|
7 |
-
$str = $html->save();
|
8 |
-
|
9 |
-
// Dumps the internal DOM tree back into a file
|
10 |
-
$html->save('result.htm');
|
11 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/manual/traversing-dom-tree.md
DELETED
@@ -1,23 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Traversing the DOM tree
|
3 |
-
---
|
4 |
-
|
5 |
-
If you are not so familiar with HTML DOM, check this [link](http://php.net/manual/en/book.dom.php) to learn more...
|
6 |
-
|
7 |
-
```php
|
8 |
-
// Example
|
9 |
-
echo $html->find("#div1", 0)->children(1)->children(1)->children(2)->id;
|
10 |
-
// or
|
11 |
-
echo $html->getElementById("div1")->childNodes(1)->childNodes(1)->childNodes(2)->getAttribute('id');
|
12 |
-
```
|
13 |
-
|
14 |
-
You can also call methods using the **Camel naming conventions**.
|
15 |
-
|
16 |
-
Method | Description
|
17 |
-
------ | -----------
|
18 |
-
`$e->children( [int $index] ) : mixed` | Returns the Nth **child object** if **index** is set, otherwise return an **array of children**.
|
19 |
-
`$e->parent() : element` | Returns the **parent** of element.
|
20 |
-
`$e->first_child() : element` | Returns the **first child** of element, or **null** if not found.
|
21 |
-
`$e->last_child() : element` | Returns the **last child** of element, or **null** if not found.
|
22 |
-
`$e->next_sibling() : element` | Returns the **next sibling** of element, or **null** if not found.
|
23 |
-
`$e->prev_sibling() : element` | Returns the **previous sibling** of element, or **null** if not found.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/quick-start.md
DELETED
@@ -1,98 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Quick Start
|
3 |
-
---
|
4 |
-
|
5 |
-
Find below sample code that demonstrate the fundamental features of PHP Simple HTML DOM Parser.
|
6 |
-
|
7 |
-
## Read plain text from HTML document
|
8 |
-
|
9 |
-
```php
|
10 |
-
<?php
|
11 |
-
include_once 'HtmlWeb.php';
|
12 |
-
use simplehtmldom\HtmlWeb;
|
13 |
-
|
14 |
-
$html = new HtmlWeb();
|
15 |
-
echo $html->load('https://www.google.com/')->plaintext;
|
16 |
-
```
|
17 |
-
|
18 |
-
Loads a webpage into memory, parses it and returns the plain text.
|
19 |
-
|
20 |
-
## Read plain text from HTML string
|
21 |
-
|
22 |
-
```php
|
23 |
-
<?php
|
24 |
-
include_once 'HtmlDocument.php';
|
25 |
-
use simplehtmldom\HtmlDocument;
|
26 |
-
|
27 |
-
$html = new HtmlDocument();
|
28 |
-
echo $html->load('<ul><li>Hello, World!</li></ul>')->plaintext;
|
29 |
-
```
|
30 |
-
|
31 |
-
Parses HTML formatted text and returns the plain text. Note that the parser handles partial documents as well as full documents.
|
32 |
-
|
33 |
-
## Read specific elements from HTML document
|
34 |
-
|
35 |
-
```php
|
36 |
-
<?php
|
37 |
-
include_once 'HtmlWeb.php';
|
38 |
-
use simplehtmldom\HtmlWeb;
|
39 |
-
|
40 |
-
$html = new HtmlWeb();
|
41 |
-
$html->load('https://www.google.com/');
|
42 |
-
|
43 |
-
foreach($html->find('img') as $element)
|
44 |
-
echo $element->src . '<br>';
|
45 |
-
|
46 |
-
foreach($html->find('a') as $element)
|
47 |
-
echo $element->href . '<br>';
|
48 |
-
```
|
49 |
-
|
50 |
-
Loads the specified document into memory and returns a list of image sources as well as anchor links. Note that [`find`](manual/finding-html-elements.md) supports [CSS](https://www.w3.org/TR/selectors/) selectors to find elements in the DOM.
|
51 |
-
|
52 |
-
## Modify HTML documents
|
53 |
-
|
54 |
-
```php
|
55 |
-
<?php
|
56 |
-
include_once 'HtmlDocument.php';
|
57 |
-
use simplehtmldom\HtmlDocument;
|
58 |
-
|
59 |
-
$html = new HtmlDocument();
|
60 |
-
$html->load('<div id="hello">Hello, </div><div id="world">World!</div>');
|
61 |
-
|
62 |
-
$html->find('div', 1)->class = 'bar';
|
63 |
-
$html->find('div[id=hello]', 0)->innertext = 'foo';
|
64 |
-
|
65 |
-
echo $html; // <div id="hello">foo</div><div id="world" class="bar">World!</div>
|
66 |
-
```
|
67 |
-
|
68 |
-
Parses the provided HTML string and replaces elements in the DOM before returning the updated HTML string. In this example, the class for the second `div` element is set to `bar` and the inner text for the first `div` element to `foo`.
|
69 |
-
|
70 |
-
Note that [`find`](manual/finding-html-elements.md) supports a second parameter to return a single element from the array of matches.
|
71 |
-
|
72 |
-
Note that attributes can be accessed directly by the means of magic methods (`->class` and `->innertext` in the example above).
|
73 |
-
|
74 |
-
## Collect information from Slashdot
|
75 |
-
|
76 |
-
```php
|
77 |
-
<?php
|
78 |
-
include_once 'HtmlWeb.php';
|
79 |
-
use simplehtmldom\HtmlWeb;
|
80 |
-
|
81 |
-
$html = new HtmlWeb();
|
82 |
-
$html->load('https://slashdot.org/');
|
83 |
-
|
84 |
-
$articles = $html->find('article[data-fhtype="story"]');
|
85 |
-
|
86 |
-
foreach($articles as $article) {
|
87 |
-
$item['title'] = $article->find('.story-title', 0)->plaintext;
|
88 |
-
$item['intro'] = $article->find('.p', 0)->plaintext;
|
89 |
-
$item['details'] = $article->find('.details', 0)->plaintext;
|
90 |
-
$items[] = $item;
|
91 |
-
}
|
92 |
-
|
93 |
-
print_r($items);
|
94 |
-
```
|
95 |
-
|
96 |
-
Collects information from [Slashdot](https://slashdot.org/) for further processing.
|
97 |
-
|
98 |
-
Note that the combination of CSS selectors and magic methods make the process of parsing HTML documents a simple task that is easy to understand.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/docs/requirements.md
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
| Requirement | Minimum | Recommended |
|
2 |
-
| ----------- |:-------:|:-----------: |
|
3 |
-
| PHP Version | 5.6.0 | [Latest stable release](https://www.php.net/supported-versions.php) |
|
4 |
-
| PHP Extensions | [iconv](https://www.php.net/manual/en/book.iconv.php) | [iconv](https://www.php.net/manual/en/book.iconv.php),<br> [mbstring](https://www.php.net/manual/en/book.mbstring.php), <br> [cURL](https://www.php.net/manual/en/book.curl.php)
|
5 |
-
| PHP INI Settings | --- | [allow_url_fopen = On](https://www.php.net/manual/en/filesystem.configuration.php#ini.allow-url-fopen) **
|
6 |
-
|
7 |
-
** This is only necessary if cURL is not available.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/manual/mkdocs.yml
DELETED
@@ -1,152 +0,0 @@
|
|
1 |
-
site_name: Simple HTML DOM documentation
|
2 |
-
site_url: http://simplehtmldom.sourceforge.net/
|
3 |
-
site_description: A simple HTML DOM parser written in PHP
|
4 |
-
|
5 |
-
repo_name: SourceForge
|
6 |
-
repo_url: https://sourceforge.net/projects/simplehtmldom/
|
7 |
-
|
8 |
-
theme:
|
9 |
-
name: readthedocs
|
10 |
-
custom_dir: 'custom_theme/'
|
11 |
-
|
12 |
-
markdown_extensions:
|
13 |
-
- footnotes
|
14 |
-
|
15 |
-
google_analytics: ['UA-3452027-2', 'simplehtmldom.sourceforge.net']
|
16 |
-
|
17 |
-
extra_css: [extra.css]
|
18 |
-
|
19 |
-
nav:
|
20 |
-
- 'index.md'
|
21 |
-
- 'requirements.md'
|
22 |
-
- 'quick-start.md'
|
23 |
-
- FAQ:
|
24 |
-
- 'faq/0001.md'
|
25 |
-
- Manual:
|
26 |
-
- 'manual/creating-dom-objects.md'
|
27 |
-
- 'manual/finding-html-elements.md'
|
28 |
-
- 'manual/accessing-element-attributes.md'
|
29 |
-
- 'manual/traversing-dom-tree.md'
|
30 |
-
- 'manual/saving-dom-objects.md'
|
31 |
-
- 'manual/customizing-parsing-behavior.md'
|
32 |
-
- 'manual/adding-nodes.md'
|
33 |
-
- API:
|
34 |
-
- 'api/constants.md'
|
35 |
-
- HtmlDocument:
|
36 |
-
- 'api/HtmlDocument/index.md'
|
37 |
-
- 'api/HtmlDocument/__call.md'
|
38 |
-
- 'api/HtmlDocument/__construct.md'
|
39 |
-
- 'api/HtmlDocument/__debugInfo.md'
|
40 |
-
- 'api/HtmlDocument/__destruct.md'
|
41 |
-
- 'api/HtmlDocument/load.md'
|
42 |
-
- 'api/HtmlDocument/set_callback.md'
|
43 |
-
- 'api/HtmlDocument/remove_callback.md'
|
44 |
-
- 'api/HtmlDocument/save.md'
|
45 |
-
- 'api/HtmlDocument/expect.md'
|
46 |
-
- 'api/HtmlDocument/find.md'
|
47 |
-
- 'api/HtmlDocument/decode.md'
|
48 |
-
- 'api/HtmlDocument/dump.md'
|
49 |
-
- 'api/HtmlDocument/prepare.md'
|
50 |
-
- 'api/HtmlDocument/parse.md'
|
51 |
-
- 'api/HtmlDocument/parse_charset.md'
|
52 |
-
- 'api/HtmlDocument/read_tag.md'
|
53 |
-
- 'api/HtmlDocument/parse_attr.md'
|
54 |
-
- 'api/HtmlDocument/link_nodes.md'
|
55 |
-
- 'api/HtmlDocument/as_text_node.md'
|
56 |
-
- 'api/HtmlDocument/skip.md'
|
57 |
-
- 'api/HtmlDocument/copy_skip.md'
|
58 |
-
- 'api/HtmlDocument/copy_until.md'
|
59 |
-
- 'api/HtmlDocument/copy_until_char.md'
|
60 |
-
- 'api/HtmlDocument/remove_noise.md'
|
61 |
-
- 'api/HtmlDocument/restore_noise.md'
|
62 |
-
- 'api/HtmlDocument/search_noise.md'
|
63 |
-
- 'api/HtmlDocument/__toString.md'
|
64 |
-
- 'api/HtmlDocument/__get.md'
|
65 |
-
- 'api/HtmlDocument/childNodes.md'
|
66 |
-
- 'api/HtmlDocument/firstChild.md'
|
67 |
-
- 'api/HtmlDocument/lastChild.md'
|
68 |
-
- 'api/HtmlDocument/createElement.md'
|
69 |
-
- 'api/HtmlDocument/createTextNode.md'
|
70 |
-
- 'api/HtmlDocument/getElementById.md'
|
71 |
-
- 'api/HtmlDocument/getElementsById.md'
|
72 |
-
- 'api/HtmlDocument/getElementByTagName.md'
|
73 |
-
- 'api/HtmlDocument/getElementsByTagName.md'
|
74 |
-
- 'api/HtmlDocument/loadFile.md'
|
75 |
-
- HtmlNode:
|
76 |
-
- 'api/HtmlNode/index.md'
|
77 |
-
- 'api/HtmlNode/definitions.md'
|
78 |
-
- 'api/HtmlNode/__call.md'
|
79 |
-
- 'api/HtmlNode/__construct.md'
|
80 |
-
- 'api/HtmlNode/__debugInfo.md'
|
81 |
-
- 'api/HtmlNode/__destruct.md'
|
82 |
-
- 'api/HtmlNode/__get.md'
|
83 |
-
- 'api/HtmlNode/__isset.md'
|
84 |
-
- 'api/HtmlNode/__set.md'
|
85 |
-
- 'api/HtmlNode/__toString.md'
|
86 |
-
- 'api/HtmlNode/__unset.md'
|
87 |
-
- 'api/HtmlNode/addClass.md'
|
88 |
-
- 'api/HtmlNode/appendChild.md'
|
89 |
-
- 'api/HtmlNode/childNodes.md'
|
90 |
-
- 'api/HtmlNode/clear.md'
|
91 |
-
- 'api/HtmlNode/convert_text.md'
|
92 |
-
- 'api/HtmlNode/dump.md'
|
93 |
-
- 'api/HtmlNode/dump_node.md'
|
94 |
-
- 'api/HtmlNode/expect.md'
|
95 |
-
- 'api/HtmlNode/find.md'
|
96 |
-
- 'api/HtmlNode/find_ancestor_tag.md'
|
97 |
-
- 'api/HtmlNode/firstChild.md'
|
98 |
-
- 'api/HtmlNode/get_display_size.md'
|
99 |
-
- 'api/HtmlNode/getAllAttributes.md'
|
100 |
-
- 'api/HtmlNode/getAttribute.md'
|
101 |
-
- 'api/HtmlNode/getElementById.md'
|
102 |
-
- 'api/HtmlNode/getElementByTagName.md'
|
103 |
-
- 'api/HtmlNode/getElementsById.md'
|
104 |
-
- 'api/HtmlNode/getElementsByTagName.md'
|
105 |
-
- 'api/HtmlNode/hasAttribute.md'
|
106 |
-
- 'api/HtmlNode/hasChildNodes.md'
|
107 |
-
- 'api/HtmlNode/hasClass.md'
|
108 |
-
- 'api/HtmlNode/innertext.md'
|
109 |
-
- 'api/HtmlNode/is_block_element.md'
|
110 |
-
- 'api/HtmlNode/is_inline_element.md'
|
111 |
-
- 'api/HtmlNode/is_utf8.md'
|
112 |
-
- 'api/HtmlNode/lastChild.md'
|
113 |
-
- 'api/HtmlNode/makeup.md'
|
114 |
-
- 'api/HtmlNode/match.md'
|
115 |
-
- 'api/HtmlNode/nextSibling.md'
|
116 |
-
- 'api/HtmlNode/nodeName.md'
|
117 |
-
- 'api/HtmlNode/outertext.md'
|
118 |
-
- 'api/HtmlNode/parent.md'
|
119 |
-
- 'api/HtmlNode/parentNode.md'
|
120 |
-
- 'api/HtmlNode/parse_selector.md'
|
121 |
-
- 'api/HtmlNode/previousSibling.md'
|
122 |
-
- 'api/HtmlNode/remove.md'
|
123 |
-
- 'api/HtmlNode/removeAttribute.md'
|
124 |
-
- 'api/HtmlNode/removeChild.md'
|
125 |
-
- 'api/HtmlNode/removeClass.md'
|
126 |
-
- 'api/HtmlNode/save.md'
|
127 |
-
- 'api/HtmlNode/seek.md'
|
128 |
-
- 'api/HtmlNode/setAttribute.md'
|
129 |
-
- 'api/HtmlNode/text.md'
|
130 |
-
- 'api/HtmlNode/xmltext.md'
|
131 |
-
- Debug:
|
132 |
-
- 'api/Debug/index.md'
|
133 |
-
- 'api/Debug/enable.md'
|
134 |
-
- 'api/Debug/disable.md'
|
135 |
-
- 'api/Debug/log.md'
|
136 |
-
- 'api/Debug/log_once.md'
|
137 |
-
- 'api/Debug/setDebugHandler.md'
|
138 |
-
|
139 |
-
plugins:
|
140 |
-
- redirects:
|
141 |
-
redirect_maps:
|
142 |
-
'api/HtmlDocument/load_file.md': 'api/HtmlDocument/loadFile.md'
|
143 |
-
'api/HtmlDocument/clear.md': 'api/HtmlDocument/__destruct.md'
|
144 |
-
'api/HtmlNode/children.md': 'api/HtmlNode/childNodes.md'
|
145 |
-
'api/HtmlNode/first_child.md': 'api/HtmlNode/firstChild.md'
|
146 |
-
'api/HtmlNode/has_child.md': 'api/HtmlNode/hasChildNodes.md'
|
147 |
-
'api/HtmlNode/last_child.md': 'api/HtmlNode/lastChild.md'
|
148 |
-
'api/HtmlNode/next_sibling.md': 'api/HtmlNode/nextSibling.md'
|
149 |
-
'api/HtmlNode/prev_sibling.md': 'api/HtmlNode/previousSibling.md'
|
150 |
-
'api/HtmlNode/prevSibling.md': 'api/HtmlNode/previousSibling.md'
|
151 |
-
|
152 |
-
docs_dir: 'docs'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/phpcompatibility.xml
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
-
<ruleset name="PHPCompatibility">
|
3 |
-
<description>Defines rules for PHPCompatibility</description>
|
4 |
-
<exclude-pattern>./app</exclude-pattern>
|
5 |
-
<exclude-pattern>./example</exclude-pattern>
|
6 |
-
<exclude-pattern>./manual</exclude-pattern>
|
7 |
-
<exclude-pattern>./testcase</exclude-pattern>
|
8 |
-
<exclude-pattern>./tests</exclude-pattern>
|
9 |
-
<config name="testVersion" value="5.6"/>
|
10 |
-
<rule ref="PHPCompatibility" />
|
11 |
-
</ruleset>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/phpcs.xml
DELETED
@@ -1,48 +0,0 @@
|
|
1 |
-
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
-
<ruleset name="Ruleset">
|
3 |
-
<description>Created with the PHP Coding Standard Generator. http://edorian.github.com/php-coding-standard-generator/</description>
|
4 |
-
<exclude-pattern>./app</exclude-pattern>
|
5 |
-
<exclude-pattern>./example</exclude-pattern>
|
6 |
-
<exclude-pattern>./manual</exclude-pattern>
|
7 |
-
<exclude-pattern>./testcase</exclude-pattern>
|
8 |
-
<rule ref="Generic.WhiteSpace.DisallowSpaceIndent"/>
|
9 |
-
<rule ref="Generic.Classes.DuplicateClassName"/>
|
10 |
-
<rule ref="PSR2.Methods.MethodDeclaration" />
|
11 |
-
<rule ref="Generic.CodeAnalysis.EmptyStatement"/>
|
12 |
-
<rule ref="Generic.CodeAnalysis.UnconditionalIfStatement"/>
|
13 |
-
<rule ref="Generic.CodeAnalysis.UnnecessaryFinalModifier"/>
|
14 |
-
<rule ref="Generic.CodeAnalysis.UselessOverridingMethod"/>
|
15 |
-
<rule ref="Generic.Functions.FunctionCallArgumentSpacing"/>
|
16 |
-
<rule ref="Generic.Functions.OpeningFunctionBraceBsdAllman"/>
|
17 |
-
<rule ref="PEAR.Functions.ValidDefaultValue"/>
|
18 |
-
<rule ref="PSR2.ControlStructures.ElseIfDeclaration"/>
|
19 |
-
<rule ref="PSR2.ControlStructures.ControlStructureSpacing"/>
|
20 |
-
<rule ref="Squiz.WhiteSpace.CastSpacing"/>
|
21 |
-
<rule ref="Squiz.WhiteSpace.OperatorSpacing"/>
|
22 |
-
<rule ref="Squiz.WhiteSpace.SemicolonSpacing"/>
|
23 |
-
<rule ref="Squiz.WhiteSpace.SuperfluousWhitespace"/>
|
24 |
-
<rule ref="Squiz.Strings.ConcatenationSpacing">
|
25 |
-
<properties>
|
26 |
-
<property name="spacing" value="1"/>
|
27 |
-
<property name="ignoreNewlines" value="true"/>
|
28 |
-
</properties>
|
29 |
-
</rule>
|
30 |
-
<rule ref="Squiz.Functions.FunctionDeclarationArgumentSpacing">
|
31 |
-
<properties>
|
32 |
-
<property name="equalsSpacing" value="1"/>
|
33 |
-
</properties>
|
34 |
-
</rule>
|
35 |
-
<rule ref="Generic.Files.LineLength">
|
36 |
-
<properties>
|
37 |
-
<property name="lineLimit" value="80"/>
|
38 |
-
<property name="absoluteLineLimit" value="120"/>
|
39 |
-
</properties>
|
40 |
-
</rule>
|
41 |
-
<rule ref="Generic.NamingConventions.UpperCaseConstantName"/>
|
42 |
-
<rule ref="Generic.PHP.LowerCaseConstant"/>
|
43 |
-
<rule ref="Squiz.Strings.DoubleQuoteUsage">
|
44 |
-
<exclude name="Squiz.Strings.DoubleQuoteUsage.ContainsVar" />
|
45 |
-
</rule>
|
46 |
-
<rule ref="Generic.Strings.UnnecessaryStringConcat"/>
|
47 |
-
<rule ref="PSR2.Files.EndFileNewline"/>
|
48 |
-
</ruleset>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/phpunit.xml
DELETED
@@ -1,41 +0,0 @@
|
|
1 |
-
<!--
|
2 |
-
PHPUnit is a programmer-oriented testing framework for PHP.
|
3 |
-
https://phpunit.de/
|
4 |
-
|
5 |
-
Unit tests are based on PHPUnit 6
|
6 |
-
https://phpunit.de/announcements/phpunit-6.html
|
7 |
-
-->
|
8 |
-
<phpunit
|
9 |
-
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
10 |
-
xsi:noNamespaceSchemaLocation="http://schema.phpunit.de/6.5/phpunit.xsd"
|
11 |
-
colors="true"
|
12 |
-
processIsolation="false"
|
13 |
-
timeoutForSmallTests="1"
|
14 |
-
timeoutForMediumTests="1"
|
15 |
-
timeoutForLargeTests="6" >
|
16 |
-
|
17 |
-
<testsuites>
|
18 |
-
<testsuite name="standard">
|
19 |
-
<directory suffix='_test.php'>tests</directory>
|
20 |
-
<exclude>tests/memory_parsing_test.php</exclude>
|
21 |
-
<exclude>tests/entity_decoding_test.php</exclude>
|
22 |
-
</testsuite>
|
23 |
-
<testsuite name="memory">
|
24 |
-
<file>tests/memory_parsing_test.php</file>
|
25 |
-
</testsuite>
|
26 |
-
<testsuite name="entity_decoding">
|
27 |
-
<file>tests/entity_decoding_test.php</file>
|
28 |
-
</testsuite>
|
29 |
-
</testsuites>
|
30 |
-
|
31 |
-
<filter>
|
32 |
-
<whitelist>
|
33 |
-
<directory>simple_html_dom.php</directory>
|
34 |
-
<directory>HtmlDocument.php</directory>
|
35 |
-
<directory>HtmlNode.php</directory>
|
36 |
-
<directory>HtmlWeb.php</directory>
|
37 |
-
<directory>Debug.php</directory>
|
38 |
-
</whitelist>
|
39 |
-
</filter>
|
40 |
-
|
41 |
-
</phpunit>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/release.sh
DELETED
@@ -1,69 +0,0 @@
|
|
1 |
-
#!/bin/bash
|
2 |
-
|
3 |
-
# This file automates the release process based on the tag of the current commit
|
4 |
-
#
|
5 |
-
# 1) Tag current version: "git tag x.y.z", where x is the major, y the minor
|
6 |
-
# and z the patch version number. For example: "git tag 2.0.0"
|
7 |
-
#
|
8 |
-
# 2) Build release file: "sh release.sh". For the example above, this will build
|
9 |
-
# "simplehtmldom_2_0_0.zip"
|
10 |
-
|
11 |
-
tag=$(git tag -l --points-at HEAD)
|
12 |
-
|
13 |
-
if [ -z "$tag" ]; then
|
14 |
-
echo "The current commit is not tagged!"
|
15 |
-
echo "Insert valid tag name or press Ctrl+C to abort."
|
16 |
-
read -p "Format: Major.Minor.Patch[-Suffix]: " tag
|
17 |
-
if [ -z "$tag" ]; then
|
18 |
-
echo "No tag name provided."
|
19 |
-
exit
|
20 |
-
fi;
|
21 |
-
$(git tag ${tag})
|
22 |
-
fi;
|
23 |
-
|
24 |
-
# Check if the tag follows https://semver.org/
|
25 |
-
version="$(echo ${tag} | cut -d'-' -f1)"
|
26 |
-
major="$(echo ${version} | cut -d'.' -f1)"
|
27 |
-
minor="$(echo ${version} | cut -d'.' -f2)"
|
28 |
-
patch="$(echo ${version} | cut -d'.' -f3)"
|
29 |
-
suffix="$(echo ${tag} | cut -d'-' -f2)"
|
30 |
-
|
31 |
-
# git tag could return an error
|
32 |
-
tag=$(git tag -l --points-at HEAD)
|
33 |
-
|
34 |
-
if [ -z "$tag" ]; then
|
35 |
-
echo "Something went wrong!"
|
36 |
-
exit
|
37 |
-
fi;
|
38 |
-
|
39 |
-
echo "Building release for ${tag}..."
|
40 |
-
|
41 |
-
if [ -z "$major" ]; then echo "Major version is missing in ${tag}"; fi;
|
42 |
-
if [ -z "$minor" ]; then echo "Minor version is missing in ${tag}"; fi;
|
43 |
-
if [ -z "$patch" ]; then echo "Patch version is missing in ${tag}"; fi;
|
44 |
-
|
45 |
-
if [ -z "$major" ] || [ -z "$minor" ] || [ -z "$patch" ]; then
|
46 |
-
echo "Aborting script!"
|
47 |
-
exit
|
48 |
-
fi;
|
49 |
-
|
50 |
-
# Archive file
|
51 |
-
prefix="simplehtmldom_"
|
52 |
-
version=$(echo "$tag" | tr . _)
|
53 |
-
|
54 |
-
# Keyword substitution in files
|
55 |
-
marker="\\\$Rev\\\$"
|
56 |
-
replacement="Rev. $tag ($(git rev-list --count HEAD))"
|
57 |
-
|
58 |
-
# Build archive
|
59 |
-
if [ "$version" ]; then
|
60 |
-
# Inject version information to all files (limit to file type!)
|
61 |
-
find . -name '*.php' -exec sed -i -e "s/$marker/$replacement/g" {} \;;
|
62 |
-
find . -name '*.htm' -exec sed -i -e "s/$marker/$replacement/g" {} \;;
|
63 |
-
# Create stash commit (otherwise git archive won't work)
|
64 |
-
stash=$(git stash create);
|
65 |
-
git archive --format=zip --output="$prefix$version".zip --worktree-attributes "$stash";
|
66 |
-
# Clenup
|
67 |
-
git checkout .;
|
68 |
-
git gc --prune;
|
69 |
-
fi;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/simple_html_dom.php
CHANGED
@@ -1,153 +1,153 @@
|
|
1 |
-
<?php
|
2 |
-
|
3 |
-
/**
|
4 |
-
* Website: http://sourceforge.net/projects/simplehtmldom/
|
5 |
-
* Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
|
6 |
-
*
|
7 |
-
* Licensed under The MIT License
|
8 |
-
* See the LICENSE file in the project root for more information.
|
9 |
-
*
|
10 |
-
* Authors:
|
11 |
-
* S.C. Chen
|
12 |
-
* John Schlick
|
13 |
-
* Rus Carroll
|
14 |
-
* logmanoriginal
|
15 |
-
*
|
16 |
-
* Contributors:
|
17 |
-
* Yousuke Kumakura
|
18 |
-
* Vadim Voituk
|
19 |
-
* Antcs
|
20 |
-
*
|
21 |
-
* Version $Rev$
|
22 |
-
*/
|
23 |
-
|
24 |
-
if (defined('DEFAULT_TARGET_CHARSET')) {
|
25 |
-
define('\simplehtmldom\DEFAULT_TARGET_CHARSET', DEFAULT_TARGET_CHARSET);
|
26 |
-
}
|
27 |
-
|
28 |
-
if (defined('DEFAULT_BR_TEXT')) {
|
29 |
-
define('\simplehtmldom\DEFAULT_BR_TEXT', DEFAULT_BR_TEXT);
|
30 |
-
}
|
31 |
-
|
32 |
-
if (defined('DEFAULT_SPAN_TEXT')) {
|
33 |
-
define('\simplehtmldom\DEFAULT_SPAN_TEXT', DEFAULT_SPAN_TEXT);
|
34 |
-
}
|
35 |
-
|
36 |
-
if (defined('MAX_FILE_SIZE')) {
|
37 |
-
define('\simplehtmldom\MAX_FILE_SIZE', MAX_FILE_SIZE);
|
38 |
-
}
|
39 |
-
|
40 |
-
include_once 'HtmlDocument.php';
|
41 |
-
include_once 'HtmlNode.php';
|
42 |
-
|
43 |
-
if (!defined('DEFAULT_TARGET_CHARSET')) {
|
44 |
-
define('DEFAULT_TARGET_CHARSET', \simplehtmldom\DEFAULT_TARGET_CHARSET);
|
45 |
-
}
|
46 |
-
|
47 |
-
if (!defined('DEFAULT_BR_TEXT')) {
|
48 |
-
define('DEFAULT_BR_TEXT', \simplehtmldom\DEFAULT_BR_TEXT);
|
49 |
-
}
|
50 |
-
|
51 |
-
if (!defined('DEFAULT_SPAN_TEXT')) {
|
52 |
-
define('DEFAULT_SPAN_TEXT', \simplehtmldom\DEFAULT_SPAN_TEXT);
|
53 |
-
}
|
54 |
-
|
55 |
-
if (!defined('MAX_FILE_SIZE')) {
|
56 |
-
define('MAX_FILE_SIZE', \simplehtmldom\MAX_FILE_SIZE);
|
57 |
-
}
|
58 |
-
|
59 |
-
define('HDOM_TYPE_ELEMENT', \simplehtmldom\HtmlNode::HDOM_TYPE_ELEMENT);
|
60 |
-
define('HDOM_TYPE_COMMENT', \simplehtmldom\HtmlNode::HDOM_TYPE_COMMENT);
|
61 |
-
define('HDOM_TYPE_TEXT', \simplehtmldom\HtmlNode::HDOM_TYPE_TEXT);
|
62 |
-
define('HDOM_TYPE_ROOT', \simplehtmldom\HtmlNode::HDOM_TYPE_ROOT);
|
63 |
-
define('HDOM_TYPE_UNKNOWN', \simplehtmldom\HtmlNode::HDOM_TYPE_UNKNOWN);
|
64 |
-
define('HDOM_QUOTE_DOUBLE', \simplehtmldom\HtmlNode::HDOM_QUOTE_DOUBLE);
|
65 |
-
define('HDOM_QUOTE_SINGLE', \simplehtmldom\HtmlNode::HDOM_QUOTE_SINGLE);
|
66 |
-
define('HDOM_QUOTE_NO', \simplehtmldom\HtmlNode::HDOM_QUOTE_NO);
|
67 |
-
define('HDOM_INFO_BEGIN', \simplehtmldom\HtmlNode::HDOM_INFO_BEGIN);
|
68 |
-
define('HDOM_INFO_END', \simplehtmldom\HtmlNode::HDOM_INFO_END);
|
69 |
-
define('HDOM_INFO_QUOTE', \simplehtmldom\HtmlNode::HDOM_INFO_QUOTE);
|
70 |
-
define('HDOM_INFO_SPACE', \simplehtmldom\HtmlNode::HDOM_INFO_SPACE);
|
71 |
-
define('HDOM_INFO_TEXT', \simplehtmldom\HtmlNode::HDOM_INFO_TEXT);
|
72 |
-
define('HDOM_INFO_INNER', \simplehtmldom\HtmlNode::HDOM_INFO_INNER);
|
73 |
-
define('HDOM_INFO_OUTER', \simplehtmldom\HtmlNode::HDOM_INFO_OUTER);
|
74 |
-
define('HDOM_INFO_ENDSPACE', \simplehtmldom\HtmlNode::HDOM_INFO_ENDSPACE);
|
75 |
-
|
76 |
-
define('HDOM_SMARTY_AS_TEXT', \simplehtmldom\HDOM_SMARTY_AS_TEXT);
|
77 |
-
|
78 |
-
class_alias('\simplehtmldom\HtmlDocument', 'simple_html_dom', true);
|
79 |
-
class_alias('\simplehtmldom\HtmlNode', 'simple_html_dom_node', true);
|
80 |
-
|
81 |
-
function file_get_html(
|
82 |
-
$url,
|
83 |
-
$use_include_path = false,
|
84 |
-
$context = null,
|
85 |
-
$offset = 0,
|
86 |
-
$maxLen = -1,
|
87 |
-
$lowercase = true,
|
88 |
-
$forceTagsClosed = true,
|
89 |
-
$target_charset = DEFAULT_TARGET_CHARSET,
|
90 |
-
$stripRN = true,
|
91 |
-
$defaultBRText = DEFAULT_BR_TEXT,
|
92 |
-
$defaultSpanText = DEFAULT_SPAN_TEXT)
|
93 |
-
{
|
94 |
-
if($maxLen <= 0) { $maxLen = MAX_FILE_SIZE; }
|
95 |
-
|
96 |
-
$dom = new simple_html_dom(
|
97 |
-
null,
|
98 |
-
$lowercase,
|
99 |
-
$forceTagsClosed,
|
100 |
-
$target_charset,
|
101 |
-
$stripRN,
|
102 |
-
$defaultBRText,
|
103 |
-
$defaultSpanText
|
104 |
-
);
|
105 |
-
|
106 |
-
$contents = file_get_contents(
|
107 |
-
$url,
|
108 |
-
$use_include_path,
|
109 |
-
$context,
|
110 |
-
$offset,
|
111 |
-
$maxLen + 1 // Load extra byte for limit check
|
112 |
-
);
|
113 |
-
|
114 |
-
if (empty($contents) || strlen($contents) > $maxLen) {
|
115 |
-
$dom->clear();
|
116 |
-
return false;
|
117 |
-
}
|
118 |
-
|
119 |
-
return $dom->load($contents, $lowercase, $stripRN);
|
120 |
-
}
|
121 |
-
|
122 |
-
function str_get_html(
|
123 |
-
$str,
|
124 |
-
$lowercase = true,
|
125 |
-
$forceTagsClosed = true,
|
126 |
-
$target_charset = DEFAULT_TARGET_CHARSET,
|
127 |
-
$stripRN = true,
|
128 |
-
$defaultBRText = DEFAULT_BR_TEXT,
|
129 |
-
$defaultSpanText = DEFAULT_SPAN_TEXT)
|
130 |
-
{
|
131 |
-
$dom = new simple_html_dom(
|
132 |
-
null,
|
133 |
-
$lowercase,
|
134 |
-
$forceTagsClosed,
|
135 |
-
$target_charset,
|
136 |
-
$stripRN,
|
137 |
-
$defaultBRText,
|
138 |
-
$defaultSpanText
|
139 |
-
);
|
140 |
-
|
141 |
-
if (empty($str) || strlen($str) > MAX_FILE_SIZE) {
|
142 |
-
$dom->clear();
|
143 |
-
return false;
|
144 |
-
}
|
145 |
-
|
146 |
-
return $dom->load($str, $lowercase, $stripRN);
|
147 |
-
}
|
148 |
-
|
149 |
-
/** @codeCoverageIgnore */
|
150 |
-
function dump_html_tree($node, $show_attr = true, $deep = 0)
|
151 |
-
{
|
152 |
-
$node->dump($node);
|
153 |
-
}
|
1 |
+
<?php
|
2 |
+
|
3 |
+
/**
|
4 |
+
* Website: http://sourceforge.net/projects/simplehtmldom/
|
5 |
+
* Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
|
6 |
+
*
|
7 |
+
* Licensed under The MIT License
|
8 |
+
* See the LICENSE file in the project root for more information.
|
9 |
+
*
|
10 |
+
* Authors:
|
11 |
+
* S.C. Chen
|
12 |
+
* John Schlick
|
13 |
+
* Rus Carroll
|
14 |
+
* logmanoriginal
|
15 |
+
*
|
16 |
+
* Contributors:
|
17 |
+
* Yousuke Kumakura
|
18 |
+
* Vadim Voituk
|
19 |
+
* Antcs
|
20 |
+
*
|
21 |
+
* Version $Rev$
|
22 |
+
*/
|
23 |
+
|
24 |
+
if (defined('DEFAULT_TARGET_CHARSET')) {
|
25 |
+
define('\simplehtmldom\DEFAULT_TARGET_CHARSET', DEFAULT_TARGET_CHARSET);
|
26 |
+
}
|
27 |
+
|
28 |
+
if (defined('DEFAULT_BR_TEXT')) {
|
29 |
+
define('\simplehtmldom\DEFAULT_BR_TEXT', DEFAULT_BR_TEXT);
|
30 |
+
}
|
31 |
+
|
32 |
+
if (defined('DEFAULT_SPAN_TEXT')) {
|
33 |
+
define('\simplehtmldom\DEFAULT_SPAN_TEXT', DEFAULT_SPAN_TEXT);
|
34 |
+
}
|
35 |
+
|
36 |
+
if (defined('MAX_FILE_SIZE')) {
|
37 |
+
define('\simplehtmldom\MAX_FILE_SIZE', MAX_FILE_SIZE);
|
38 |
+
}
|
39 |
+
|
40 |
+
include_once 'HtmlDocument.php';
|
41 |
+
include_once 'HtmlNode.php';
|
42 |
+
|
43 |
+
if (!defined('DEFAULT_TARGET_CHARSET')) {
|
44 |
+
define('DEFAULT_TARGET_CHARSET', \simplehtmldom\DEFAULT_TARGET_CHARSET);
|
45 |
+
}
|
46 |
+
|
47 |
+
if (!defined('DEFAULT_BR_TEXT')) {
|
48 |
+
define('DEFAULT_BR_TEXT', \simplehtmldom\DEFAULT_BR_TEXT);
|
49 |
+
}
|
50 |
+
|
51 |
+
if (!defined('DEFAULT_SPAN_TEXT')) {
|
52 |
+
define('DEFAULT_SPAN_TEXT', \simplehtmldom\DEFAULT_SPAN_TEXT);
|
53 |
+
}
|
54 |
+
|
55 |
+
if (!defined('MAX_FILE_SIZE')) {
|
56 |
+
define('MAX_FILE_SIZE', \simplehtmldom\MAX_FILE_SIZE);
|
57 |
+
}
|
58 |
+
|
59 |
+
define('HDOM_TYPE_ELEMENT', \simplehtmldom\HtmlNode::HDOM_TYPE_ELEMENT);
|
60 |
+
define('HDOM_TYPE_COMMENT', \simplehtmldom\HtmlNode::HDOM_TYPE_COMMENT);
|
61 |
+
define('HDOM_TYPE_TEXT', \simplehtmldom\HtmlNode::HDOM_TYPE_TEXT);
|
62 |
+
define('HDOM_TYPE_ROOT', \simplehtmldom\HtmlNode::HDOM_TYPE_ROOT);
|
63 |
+
define('HDOM_TYPE_UNKNOWN', \simplehtmldom\HtmlNode::HDOM_TYPE_UNKNOWN);
|
64 |
+
define('HDOM_QUOTE_DOUBLE', \simplehtmldom\HtmlNode::HDOM_QUOTE_DOUBLE);
|
65 |
+
define('HDOM_QUOTE_SINGLE', \simplehtmldom\HtmlNode::HDOM_QUOTE_SINGLE);
|
66 |
+
define('HDOM_QUOTE_NO', \simplehtmldom\HtmlNode::HDOM_QUOTE_NO);
|
67 |
+
define('HDOM_INFO_BEGIN', \simplehtmldom\HtmlNode::HDOM_INFO_BEGIN);
|
68 |
+
define('HDOM_INFO_END', \simplehtmldom\HtmlNode::HDOM_INFO_END);
|
69 |
+
define('HDOM_INFO_QUOTE', \simplehtmldom\HtmlNode::HDOM_INFO_QUOTE);
|
70 |
+
define('HDOM_INFO_SPACE', \simplehtmldom\HtmlNode::HDOM_INFO_SPACE);
|
71 |
+
define('HDOM_INFO_TEXT', \simplehtmldom\HtmlNode::HDOM_INFO_TEXT);
|
72 |
+
define('HDOM_INFO_INNER', \simplehtmldom\HtmlNode::HDOM_INFO_INNER);
|
73 |
+
define('HDOM_INFO_OUTER', \simplehtmldom\HtmlNode::HDOM_INFO_OUTER);
|
74 |
+
define('HDOM_INFO_ENDSPACE', \simplehtmldom\HtmlNode::HDOM_INFO_ENDSPACE);
|
75 |
+
|
76 |
+
define('HDOM_SMARTY_AS_TEXT', \simplehtmldom\HDOM_SMARTY_AS_TEXT);
|
77 |
+
|
78 |
+
class_alias('\simplehtmldom\HtmlDocument', 'simple_html_dom', true);
|
79 |
+
class_alias('\simplehtmldom\HtmlNode', 'simple_html_dom_node', true);
|
80 |
+
|
81 |
+
function file_get_html(
|
82 |
+
$url,
|
83 |
+
$use_include_path = false,
|
84 |
+
$context = null,
|
85 |
+
$offset = 0,
|
86 |
+
$maxLen = -1,
|
87 |
+
$lowercase = true,
|
88 |
+
$forceTagsClosed = true,
|
89 |
+
$target_charset = DEFAULT_TARGET_CHARSET,
|
90 |
+
$stripRN = true,
|
91 |
+
$defaultBRText = DEFAULT_BR_TEXT,
|
92 |
+
$defaultSpanText = DEFAULT_SPAN_TEXT)
|
93 |
+
{
|
94 |
+
if($maxLen <= 0) { $maxLen = MAX_FILE_SIZE; }
|
95 |
+
|
96 |
+
$dom = new simple_html_dom(
|
97 |
+
null,
|
98 |
+
$lowercase,
|
99 |
+
$forceTagsClosed,
|
100 |
+
$target_charset,
|
101 |
+
$stripRN,
|
102 |
+
$defaultBRText,
|
103 |
+
$defaultSpanText
|
104 |
+
);
|
105 |
+
|
106 |
+
$contents = file_get_contents(
|
107 |
+
$url,
|
108 |
+
$use_include_path,
|
109 |
+
$context,
|
110 |
+
$offset,
|
111 |
+
$maxLen + 1 // Load extra byte for limit check
|
112 |
+
);
|
113 |
+
|
114 |
+
if (empty($contents) || strlen($contents) > $maxLen) {
|
115 |
+
$dom->clear();
|
116 |
+
return false;
|
117 |
+
}
|
118 |
+
|
119 |
+
return $dom->load($contents, $lowercase, $stripRN);
|
120 |
+
}
|
121 |
+
|
122 |
+
function str_get_html(
|
123 |
+
$str,
|
124 |
+
$lowercase = true,
|
125 |
+
$forceTagsClosed = true,
|
126 |
+
$target_charset = DEFAULT_TARGET_CHARSET,
|
127 |
+
$stripRN = true,
|
128 |
+
$defaultBRText = DEFAULT_BR_TEXT,
|
129 |
+
$defaultSpanText = DEFAULT_SPAN_TEXT)
|
130 |
+
{
|
131 |
+
$dom = new simple_html_dom(
|
132 |
+
null,
|
133 |
+
$lowercase,
|
134 |
+
$forceTagsClosed,
|
135 |
+
$target_charset,
|
136 |
+
$stripRN,
|
137 |
+
$defaultBRText,
|
138 |
+
$defaultSpanText
|
139 |
+
);
|
140 |
+
|
141 |
+
if (empty($str) || strlen($str) > MAX_FILE_SIZE) {
|
142 |
+
$dom->clear();
|
143 |
+
return false;
|
144 |
+
}
|
145 |
+
|
146 |
+
return $dom->load($str, $lowercase, $stripRN);
|
147 |
+
}
|
148 |
+
|
149 |
+
/** @codeCoverageIgnore */
|
150 |
+
function dump_html_tree($node, $show_attr = true, $deep = 0)
|
151 |
+
{
|
152 |
+
$node->dump($node);
|
153 |
+
}
|
vendor/simplehtmldom/simplehtmldom/tests/attribute_test.php
DELETED
@@ -1,50 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Tests the attribute parsing behavior of the parser
|
7 |
-
*/
|
8 |
-
class attribute_test extends TestCase {
|
9 |
-
private $html;
|
10 |
-
|
11 |
-
protected function setUp()
|
12 |
-
{
|
13 |
-
$this->html = new simple_html_dom();
|
14 |
-
}
|
15 |
-
|
16 |
-
protected function tearDown()
|
17 |
-
{
|
18 |
-
$this->html->clear();
|
19 |
-
unset($this->html);
|
20 |
-
}
|
21 |
-
|
22 |
-
/** @dataProvider dataProvider_for_attribute_should_parse */
|
23 |
-
public function test_attribute_should_parse($expected, $doc)
|
24 |
-
{
|
25 |
-
$this->html->load($doc);
|
26 |
-
$this->assertEquals($expected, $this->html->save());
|
27 |
-
}
|
28 |
-
|
29 |
-
public function dataProvider_for_attribute_should_parse()
|
30 |
-
{
|
31 |
-
return array(
|
32 |
-
'double quotes' => array(
|
33 |
-
'<p class="hidden"></p>',
|
34 |
-
'<p class="hidden"></p>'
|
35 |
-
),
|
36 |
-
'single quotes' => array(
|
37 |
-
'<p class=\'hidden\'></p>',
|
38 |
-
'<p class=\'hidden\'></p>'
|
39 |
-
),
|
40 |
-
'no quotes' => array(
|
41 |
-
'<p class=hidden></p>',
|
42 |
-
'<p class=hidden></p>'
|
43 |
-
),
|
44 |
-
'no value' => array(
|
45 |
-
'<p hidden></p>',
|
46 |
-
'<p hidden></p>'
|
47 |
-
)
|
48 |
-
);
|
49 |
-
}
|
50 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/bug_report_test.php
DELETED
@@ -1,476 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Tests for bugs reported in the bug tracker
|
7 |
-
*
|
8 |
-
* @link https://sourceforge.net/p/simplehtmldom/bugs
|
9 |
-
* Bug tracker
|
10 |
-
*/
|
11 |
-
class bug_report_test extends TestCase {
|
12 |
-
private $html;
|
13 |
-
|
14 |
-
protected function setUp()
|
15 |
-
{
|
16 |
-
$this->html = new simple_html_dom;
|
17 |
-
}
|
18 |
-
|
19 |
-
protected function tearDown()
|
20 |
-
{
|
21 |
-
$this->html->clear();
|
22 |
-
unset($this->html);
|
23 |
-
}
|
24 |
-
|
25 |
-
/**
|
26 |
-
* Bug #56 (Attribute values overwritten,behaviour differs from browsers)
|
27 |
-
*
|
28 |
-
* If a HTML-tag defines an attribute twice (or more times) the parser only
|
29 |
-
* considers the last appearance / the last value within the HTML-tag.
|
30 |
-
* Browsers like IE and Firefox, however, only consider the first appearance
|
31 |
-
* / the first value.
|
32 |
-
*
|
33 |
-
* @link https://sourceforge.net/p/simplehtmldom/bugs/56/ Bug #56
|
34 |
-
*/
|
35 |
-
public function test_bug_56()
|
36 |
-
{
|
37 |
-
$doc = <<<HTML
|
38 |
-
<a href="http://simplehtmldom.sourceforge.net/" target="_self" target="_blank"></a>
|
39 |
-
HTML;
|
40 |
-
|
41 |
-
$anchor = $this->html->load($doc)->find('a', 0);
|
42 |
-
|
43 |
-
$this->assertEquals('_self', $anchor->target);
|
44 |
-
}
|
45 |
-
|
46 |
-
/**
|
47 |
-
* Bug #62 (foreach($html->find(a[id=0]) as $e))
|
48 |
-
*
|
49 |
-
* `find` should be able to distinguish attributes with value "0" from other
|
50 |
-
* attributes when doing `find('[id="0"]')`.
|
51 |
-
*
|
52 |
-
* @link https://sourceforge.net/p/simplehtmldom/bugs/62/ Bug #62
|
53 |
-
*/
|
54 |
-
public function test_bug_62()
|
55 |
-
{
|
56 |
-
$doc = <<<HTML
|
57 |
-
<body>
|
58 |
-
<a href="#" id="0">PHP Simple HTML DOM Parser</a>
|
59 |
-
<a href="#" id="1">A PHP based DOM parser</a>
|
60 |
-
</body>
|
61 |
-
HTML;
|
62 |
-
|
63 |
-
$this->html->load($doc);
|
64 |
-
|
65 |
-
$this->assertCount(1, $this->html->find('[id=0]'));
|
66 |
-
$this->assertCount(1, $this->html->find('#0'));
|
67 |
-
}
|
68 |
-
|
69 |
-
/**
|
70 |
-
* Bug 79 (tbody doesn't work in find())
|
71 |
-
*
|
72 |
-
* Specifying 'tbody' as CSS selector will not return the body of a table as
|
73 |
-
* expected, but the table instead.
|
74 |
-
*
|
75 |
-
* @link https://sourceforge.net/p/simplehtmldom/bugs/79/ Bug #79
|
76 |
-
*/
|
77 |
-
public function test_bug_79()
|
78 |
-
{
|
79 |
-
$doc = <<<HTML
|
80 |
-
<table>
|
81 |
-
<thead><tr><th>PHP Simple HTML DOM Parser</th></tr></thead>
|
82 |
-
<tbody><tr><td>A PHP based DOM parser</td></tr></tbody>
|
83 |
-
</table>
|
84 |
-
HTML;
|
85 |
-
|
86 |
-
$this->html->load($doc);
|
87 |
-
|
88 |
-
$this->assertCount(2, $this->html->find('table tr'));
|
89 |
-
$this->assertCount(1, $this->html->find('table thead tr'));
|
90 |
-
$this->assertCount(1, $this->html->find('table tbody tr'));
|
91 |
-
}
|
92 |
-
|
93 |
-
/**
|
94 |
-
* Bug #97 (plaintext returning conjoined elements)
|
95 |
-
*
|
96 |
-
* Paragraphs ("p" elements) should start on a new line when returned as
|
97 |
-
* plain text.
|
98 |
-
*
|
99 |
-
* **Note**:
|
100 |
-
*
|
101 |
-
* Browsers typically separate multiple paragraphs by blank lines.
|
102 |
-
*
|
103 |
-
* @link https://sourceforge.net/p/simplehtmldom/bugs/97/ Bug #97
|
104 |
-
*/
|
105 |
-
public function test_bug_97()
|
106 |
-
{
|
107 |
-
$doc = <<<HTML
|
108 |
-
<div><p>PHP Simple HTML DOM Parser</p><p>A PHP based DOM parser</p></div>
|
109 |
-
HTML;
|
110 |
-
|
111 |
-
$text = $this->html->load($doc)->find('div', 0)->plaintext;
|
112 |
-
|
113 |
-
$this->assertEquals(
|
114 |
-
"PHP Simple HTML DOM Parser\n\nA PHP based DOM parser",
|
115 |
-
$text
|
116 |
-
);
|
117 |
-
}
|
118 |
-
|
119 |
-
/**
|
120 |
-
* Bug #116 (problem getting tag attributes)
|
121 |
-
*
|
122 |
-
* Parsing fails on attributes that are not separated by whitespace.
|
123 |
-
*
|
124 |
-
* **Note**:
|
125 |
-
*
|
126 |
-
* The [Markup Validation Service](https://validator.w3.org/#validate_by_input)
|
127 |
-
* reports: No space between attributes.
|
128 |
-
*
|
129 |
-
* @link https://sourceforge.net/p/simplehtmldom/bugs/116/ Bug #116
|
130 |
-
*/
|
131 |
-
public function test_bug_116()
|
132 |
-
{
|
133 |
-
$doc = <<<HTML
|
134 |
-
<a href="#"title="PHP Simple HTML DOM Parser"></a>
|
135 |
-
HTML;
|
136 |
-
|
137 |
-
$anchor = $this->html->load($doc)->find('a', 0);
|
138 |
-
|
139 |
-
$this->assertCount(2, $anchor->getAllAttributes());
|
140 |
-
$this->assertEquals('#', $anchor->href);
|
141 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $anchor->title);
|
142 |
-
}
|
143 |
-
|
144 |
-
/**
|
145 |
-
* Bug #121 (//Comment\n != //Comment\s)
|
146 |
-
*
|
147 |
-
* Replacing newlines results in scripts changing behavior if comments are
|
148 |
-
* placed before functions.
|
149 |
-
*
|
150 |
-
* **Expected Behavior**:
|
151 |
-
*
|
152 |
-
* Script tags should be returned exactly as provided to the parser with all
|
153 |
-
* newlines kept intact.
|
154 |
-
*
|
155 |
-
* **Workaround**:
|
156 |
-
*
|
157 |
-
* Set `$stripRN = false` when loading contents. This will prevent newlines
|
158 |
-
* being replaced by spaces.
|
159 |
-
*
|
160 |
-
* @link https://sourceforge.net/p/simplehtmldom/bugs/121/ Bug #121
|
161 |
-
*/
|
162 |
-
public function test_bug_121()
|
163 |
-
{
|
164 |
-
$doc = <<<HTML
|
165 |
-
<script>
|
166 |
-
// alert("PHP Simple HTML DOM Parser");
|
167 |
-
alert("A PHP based DOM parser");
|
168 |
-
</script>
|
169 |
-
HTML;
|
170 |
-
|
171 |
-
$dom = $this->html->load($doc);
|
172 |
-
|
173 |
-
$this->assertEquals($doc, (string)$this->html);
|
174 |
-
}
|
175 |
-
|
176 |
-
/**
|
177 |
-
* Bug #127 (Incorrect attribute value gives unexpected results)
|
178 |
-
*
|
179 |
-
* Attributes ending on "\" cause the parser to continue parsing the
|
180 |
-
* remaining document as the attribute value.
|
181 |
-
*
|
182 |
-
* @link https://sourceforge.net/p/simplehtmldom/bugs/127/ Bug #127
|
183 |
-
*/
|
184 |
-
public function test_bug_127()
|
185 |
-
{
|
186 |
-
$doc = <<<HTML
|
187 |
-
<div id="before"></div>
|
188 |
-
<a href="#" alt="PHP Simple HTML DOM Parser\">
|
189 |
-
<div id="after"></div>
|
190 |
-
HTML;
|
191 |
-
|
192 |
-
$this->html->load($doc);
|
193 |
-
|
194 |
-
$this->assertEquals(
|
195 |
-
'PHP Simple HTML DOM Parser\\',
|
196 |
-
$this->html->find('a', 0)->alt
|
197 |
-
);
|
198 |
-
}
|
199 |
-
|
200 |
-
/**
|
201 |
-
* Bug #144 (Forward slashes in pattern break wildcard Find)
|
202 |
-
*
|
203 |
-
* The wildcard find method "*=" uses preg_match, delimited by forward
|
204 |
-
* slashes. Therefore, if you have any forward slashes in your pattern,
|
205 |
-
* you need to manually escape them, otherwise the find won't work. This
|
206 |
-
* comes up frequently when searching for URL's in href attributes.
|
207 |
-
*
|
208 |
-
* @link https://sourceforge.net/p/simplehtmldom/bugs/144/ Bug #144
|
209 |
-
*/
|
210 |
-
public function test_bug_144()
|
211 |
-
{
|
212 |
-
$doc = <<<HTML
|
213 |
-
<a href="http://simplehtmldom.sourceforge.net">Home</a>
|
214 |
-
<a href="http://simplehtmldom.sourceforge.net/manual.htm">Manual</a>
|
215 |
-
HTML;
|
216 |
-
|
217 |
-
$this->html->load($doc);
|
218 |
-
|
219 |
-
$this->assertCount(1, $this->html->find('a[href*="/manual.htm"]'));
|
220 |
-
}
|
221 |
-
|
222 |
-
/**
|
223 |
-
* Bug #153 (Invalid argument supplied for foreach())
|
224 |
-
*
|
225 |
-
* The parser incorrectly assumes that $this->nodes always exists (not null)
|
226 |
-
* and tries to iterate over each element in the array when performing the
|
227 |
-
* clear() operation.
|
228 |
-
*
|
229 |
-
* @link https://sourceforge.net/p/simplehtmldom/bugs/153/ Bug #153
|
230 |
-
* @doesNotPerformAssertions
|
231 |
-
*/
|
232 |
-
public function test_bug_153()
|
233 |
-
{
|
234 |
-
$doc = '<p>PHP Simple HTML DOM Parser</p>';
|
235 |
-
|
236 |
-
$this->html->load($doc);
|
237 |
-
$this->html->nodes = null;
|
238 |
-
|
239 |
-
$this->html->clear();
|
240 |
-
}
|
241 |
-
|
242 |
-
/**
|
243 |
-
* Bug #154 (Fatal error: Call to a member function find() on null)
|
244 |
-
*
|
245 |
-
* The parser incorrectly removes everything between `{` and `}` attempting
|
246 |
-
* to remove "Smarty Scripts" from the DOM. This causes regular text to be
|
247 |
-
* removed as well, if it contains curly braces.
|
248 |
-
*
|
249 |
-
* **Example Code**:
|
250 |
-
*
|
251 |
-
* ```HTML
|
252 |
-
* <div class="before"></div>
|
253 |
-
* <p>{PHP Simple HTML DOM Parser</p>
|
254 |
-
* <p>{A PHP based DOM parser}</p>
|
255 |
-
* <div id="after"></div>
|
256 |
-
* ```
|
257 |
-
*
|
258 |
-
* @link https://sourceforge.net/p/simplehtmldom/bugs/154/ Bug #154
|
259 |
-
* @link https://www.smarty.net Smarty
|
260 |
-
*/
|
261 |
-
public function test_bug_154()
|
262 |
-
{
|
263 |
-
$doc = <<<HTML
|
264 |
-
<p>{PHP Simple HTML DOM Parser</p>
|
265 |
-
<p>{A PHP based DOM parser}</p>
|
266 |
-
HTML;
|
267 |
-
|
268 |
-
$this->html->load($doc);
|
269 |
-
|
270 |
-
$this->assertCount(2, $this->html->find('p'));
|
271 |
-
|
272 |
-
$this->assertEquals(
|
273 |
-
'{PHP Simple HTML DOM Parser',
|
274 |
-
$this->html->find('p', 0)->innertext
|
275 |
-
);
|
276 |
-
|
277 |
-
$this->assertEquals(
|
278 |
-
'{A PHP based DOM parser}',
|
279 |
-
$this->html->find('p', 1)->innertext
|
280 |
-
);
|
281 |
-
|
282 |
-
/* With Smarty as text */
|
283 |
-
|
284 |
-
$this->html->load($doc, true, true, DEFAULT_BR_TEXT, DEFAULT_SPAN_TEXT, HDOM_SMARTY_AS_TEXT);
|
285 |
-
|
286 |
-
$this->assertCount(1, $this->html->find('p'));
|
287 |
-
|
288 |
-
$this->assertEquals(
|
289 |
-
'{PHP Simple HTML DOM Parser</p><p>{A PHP based DOM parser}',
|
290 |
-
$this->html->find('p', 0)->innertext
|
291 |
-
);
|
292 |
-
|
293 |
-
}
|
294 |
-
|
295 |
-
/**
|
296 |
-
* Bug #160 (Parsing fails with '<-' + '/' symbols combination in string)
|
297 |
-
*
|
298 |
-
* **Example Code**:
|
299 |
-
*
|
300 |
-
* ```HTML
|
301 |
-
* <div id="before"></div>
|
302 |
-
* <span>---> PHP Simple HTML DOM Parser <--- A /PHP based DOM parser</span>
|
303 |
-
* <div id="after"></div>
|
304 |
-
* ```
|
305 |
-
*
|
306 |
-
* **Note**:
|
307 |
-
*
|
308 |
-
* The [Markup Validation Service](https://validator.w3.org/#validate_by_input)
|
309 |
-
* reports: Bad character `-` after `<`. Probable cause: Unescaped `<`. Try
|
310 |
-
* escaping it as `<`.
|
311 |
-
*
|
312 |
-
* @link https://sourceforge.net/p/simplehtmldom/bugs/160/ Bug #160
|
313 |
-
* @link https://validator.w3.org/#validate_by_input Markup Validation Service
|
314 |
-
*/
|
315 |
-
public function test_bug_160()
|
316 |
-
{
|
317 |
-
$doc = <<<HTML
|
318 |
-
<div id="before"></div>
|
319 |
-
<span>---> PHP Simple HTML DOM Parser <--- A /PHP based DOM parser</span>
|
320 |
-
<div id="after"></div>
|
321 |
-
HTML;
|
322 |
-
|
323 |
-
$this->html->load($doc);
|
324 |
-
|
325 |
-
$this->assertEquals(
|
326 |
-
'---> PHP Simple HTML DOM Parser <--- A /PHP based DOM parser',
|
327 |
-
$this->html->find('span', 0)->innertext
|
328 |
-
);
|
329 |
-
}
|
330 |
-
|
331 |
-
/**
|
332 |
-
* Bug #163 (Missing whitespace in plaintext property)
|
333 |
-
*
|
334 |
-
* **Example Code**:
|
335 |
-
*
|
336 |
-
* ```php
|
337 |
-
* $doc = 'Hello<a href=""> World';
|
338 |
-
* $html->load($doc);
|
339 |
-
* echo "$html->plaintext\n";
|
340 |
-
* ```
|
341 |
-
*
|
342 |
-
* @link https://sourceforge.net/p/simplehtmldom/bugs/163/ Bug #163
|
343 |
-
*/
|
344 |
-
public function test_bug_163()
|
345 |
-
{
|
346 |
-
$doc = 'Hello<a href=""> World';
|
347 |
-
$expected = 'Hello World';
|
348 |
-
|
349 |
-
$this->html->load($doc);
|
350 |
-
|
351 |
-
$this->assertEquals($expected, $this->html->plaintext);
|
352 |
-
}
|
353 |
-
|
354 |
-
/**
|
355 |
-
* Bug #166 (Duplicate attributes)
|
356 |
-
*
|
357 |
-
* @link https://sourceforge.net/p/simplehtmldom/bugs/166/ Bug #166
|
358 |
-
*/
|
359 |
-
public function test_bug_166()
|
360 |
-
{
|
361 |
-
$doc = '<div style="display:none;" style="margin-top: 5px;"></div>';
|
362 |
-
|
363 |
-
$this->html->load($doc);
|
364 |
-
|
365 |
-
$this->assertEquals(1, count($this->html->find('div', 0)->getAllAttributes()));
|
366 |
-
}
|
367 |
-
|
368 |
-
/**
|
369 |
-
* Bug #169 (Incorrectly parsed attribute selectors ending on "s" or "i")
|
370 |
-
*
|
371 |
-
* This bug happens only when using attribute selectors without quotes, so
|
372 |
-
* doing [att=val] instead of [att="val"].
|
373 |
-
*
|
374 |
-
* @link https://sourceforge.net/p/simplehtmldom/bugs/169/ Bug #169
|
375 |
-
*/
|
376 |
-
public function test_bug_169()
|
377 |
-
{
|
378 |
-
$doc = '<div class="test_s" /><div class="test_i" />';
|
379 |
-
|
380 |
-
$this->html->load($doc);
|
381 |
-
|
382 |
-
$this->assertCount(
|
383 |
-
1,
|
384 |
-
$this->html->find('div[class=test_s]'),
|
385 |
-
'Failed parsing attribute values ending on "s"'
|
386 |
-
);
|
387 |
-
|
388 |
-
$this->assertCount(
|
389 |
-
1,
|
390 |
-
$this->html->find('div[class=test_i]'),
|
391 |
-
'Failed parsing attribute values ending on "i"'
|
392 |
-
);
|
393 |
-
}
|
394 |
-
|
395 |
-
/**
|
396 |
-
* Bug #172 (Problem with the remove function)
|
397 |
-
*
|
398 |
-
* `simple_html_dom_node::remove()` throws a fatal error:
|
399 |
-
* `Uncaught Error: Call to a member function remove() on null in <file>:<line>`
|
400 |
-
* when removing an element from the DOM if
|
401 |
-
* - another element has previously been removed,
|
402 |
-
* - the previous element was placed before the current element in the DOM and
|
403 |
-
* - `simple_html_dom_node::remove()` is called on a node returned by
|
404 |
-
* `simple_html_dom_node::find()` or `simple_html_dom::find()`
|
405 |
-
*
|
406 |
-
* This error can also happen for `simple_html_dom_node::removeChild()`
|
407 |
-
*
|
408 |
-
* @link https://sourceforge.net/p/simplehtmldom/bugs/172/ Bug #172
|
409 |
-
*/
|
410 |
-
public function test_bug_172()
|
411 |
-
{
|
412 |
-
$expected = '<div></div><div></div>';
|
413 |
-
|
414 |
-
$doc = '<div><img src="#"></div><div><img src="#"></div>';
|
415 |
-
|
416 |
-
$this->html->load($doc);
|
417 |
-
|
418 |
-
$this->html->find('div img', 0)->remove();
|
419 |
-
|
420 |
-
$img = $this->html->find('div', 1)->find('img', 0);
|
421 |
-
|
422 |
-
$this->assertNotNull(
|
423 |
-
$img,
|
424 |
-
'find() on node failed after using remove() on a previous node'
|
425 |
-
);
|
426 |
-
|
427 |
-
$img->remove();
|
428 |
-
|
429 |
-
$this->assertEquals($expected, $this->html->save());
|
430 |
-
}
|
431 |
-
|
432 |
-
/**
|
433 |
-
* Bug #178 (Charset not handled properly)
|
434 |
-
*
|
435 |
-
* @link https://sourceforge.net/p/simplehtmldom/bugs/178/ Bug #178
|
436 |
-
*/
|
437 |
-
public function test_bug_178()
|
438 |
-
{
|
439 |
-
/**
|
440 |
-
* Note: The testdata must be encoded in order to work for machines with
|
441 |
-
* different codepages!
|
442 |
-
*/
|
443 |
-
|
444 |
-
$expected = chr(hexdec('c4')); // "č"
|
445 |
-
|
446 |
-
/**
|
447 |
-
* <!DOCTYPE html>
|
448 |
-
* <html lang="windows-1250">
|
449 |
-
* <head>
|
450 |
-
* <meta http-equiv="Content-Type" content="text/html; charset=windows-1250">
|
451 |
-
* </head>
|
452 |
-
* <body>
|
453 |
-
* a><span>K�</span></a>
|
454 |
-
* <b>K�</b>
|
455 |
-
* </body>
|
456 |
-
* </html>
|
457 |
-
*/
|
458 |
-
// phpcs:ignore Generic.Files.LineLength
|
459 |
-
$data = base64_decode('PCFET0NUWVBFIGh0bWw+CjxodG1sIGxhbmc9IndpbmRvd3MtMTI1MCI+CjxoZWFkPgogICAgPG1ldGEgaHR0cC1lcXVpdj0iQ29udGVudC1UeXBlIiBjb250ZW50PSJ0ZXh0L2h0bWw7IGNoYXJzZXQ9d2luZG93cy0xMjUwIj4KPC9oZWFkPgo8Ym9keT4KICAgIDxhPjxzcGFuPkvoPC9zcGFuPjwvYT4KICAgIDxiPkvoPC9iPgo8L2JvZHk+CjwvaHRtbD4=');
|
460 |
-
|
461 |
-
$this->html = str_get_html($data);
|
462 |
-
|
463 |
-
$this->assertEquals(
|
464 |
-
$expected,
|
465 |
-
$this->html->find('a', 0)->innertext[7],
|
466 |
-
'outertext() should convert text inside elements'
|
467 |
-
); // note: innertext() calls outertext() internally
|
468 |
-
|
469 |
-
$this->assertEquals(
|
470 |
-
$expected,
|
471 |
-
$this->html->find('b', 0)->innertext[1],
|
472 |
-
'innertext() should convert text inside elements'
|
473 |
-
);
|
474 |
-
}
|
475 |
-
|
476 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/callback_test.php
DELETED
@@ -1,45 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Tests the callback feature of the parser
|
7 |
-
*/
|
8 |
-
class callback_test extends TestCase {
|
9 |
-
private $html;
|
10 |
-
|
11 |
-
protected function setUp()
|
12 |
-
{
|
13 |
-
$this->html = new simple_html_dom();
|
14 |
-
$this->html->set_callback(
|
15 |
-
function($element)
|
16 |
-
{
|
17 |
-
$element->tag = 'surprise';
|
18 |
-
}
|
19 |
-
);
|
20 |
-
}
|
21 |
-
|
22 |
-
protected function tearDown()
|
23 |
-
{
|
24 |
-
$this->html->clear();
|
25 |
-
unset($this->html);
|
26 |
-
}
|
27 |
-
|
28 |
-
public function test_htmldocument_set_callback_should_register_function()
|
29 |
-
{
|
30 |
-
$this->assertNotNull($this->html->callback);
|
31 |
-
}
|
32 |
-
|
33 |
-
public function test_htmldocument_remove_callback_should_unregister_function()
|
34 |
-
{
|
35 |
-
$this->html->remove_callback();
|
36 |
-
$this->assertNull($this->html->callback);
|
37 |
-
}
|
38 |
-
|
39 |
-
public function test_htmlnode_outertext_uses_callback_function()
|
40 |
-
{
|
41 |
-
$expected = '<surprise></surprise>';
|
42 |
-
$this->html->load('<html></html>');
|
43 |
-
$this->assertEquals($expected, $this->html->save());
|
44 |
-
}
|
45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/cdata_test.php
DELETED
@@ -1,69 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Checks if the parser properly handles CDATA sections
|
7 |
-
*/
|
8 |
-
class cdata_test extends TestCase {
|
9 |
-
|
10 |
-
private $html;
|
11 |
-
|
12 |
-
protected function setUp()
|
13 |
-
{
|
14 |
-
$this->html = new simple_html_dom;
|
15 |
-
}
|
16 |
-
|
17 |
-
protected function tearDown()
|
18 |
-
{
|
19 |
-
$this->html->clear();
|
20 |
-
unset($this->html);
|
21 |
-
}
|
22 |
-
|
23 |
-
/**
|
24 |
-
* @dataProvider dataProvider_for_cdata_should_parse
|
25 |
-
*/
|
26 |
-
public function test_cdata_should_parse($expected, $doc)
|
27 |
-
{
|
28 |
-
$this->html->load($doc);
|
29 |
-
$this->assertEquals($expected, $this->html->find('cdata', 0)->innertext);
|
30 |
-
$this->assertEquals($doc, $this->html->save());
|
31 |
-
}
|
32 |
-
|
33 |
-
public function dataProvider_for_cdata_should_parse()
|
34 |
-
{
|
35 |
-
return array(
|
36 |
-
'empty' => array(
|
37 |
-
'',
|
38 |
-
'<![CDATA[]]>',
|
39 |
-
),
|
40 |
-
'space' => array(
|
41 |
-
' ',
|
42 |
-
'<![CDATA[ ]]>',
|
43 |
-
),
|
44 |
-
'brackets' => array(
|
45 |
-
']][[',
|
46 |
-
'<![CDATA[]][[]]>',
|
47 |
-
),
|
48 |
-
'html' => array(
|
49 |
-
'<p>Hello, World!</p>',
|
50 |
-
'<![CDATA[<p>Hello, World!</p>]]>',
|
51 |
-
),
|
52 |
-
'comment' => array(
|
53 |
-
'<!-- Hello, World! -->',
|
54 |
-
'<![CDATA[<!-- Hello, World! -->]]>'
|
55 |
-
),
|
56 |
-
'newline' => array(
|
57 |
-
"Hello\nWorld!",
|
58 |
-
"<![CDATA[Hello\nWorld!]]>"
|
59 |
-
),
|
60 |
-
);
|
61 |
-
}
|
62 |
-
|
63 |
-
public function test_html_inside_cdata_should_not_appear_in_the_dom()
|
64 |
-
{
|
65 |
-
$this->html->load('<![CDATA[<div>Hello, World!</div>]]>');
|
66 |
-
$this->assertNotNull($this->html->find('cdata', 0));
|
67 |
-
$this->assertNull($this->html->find('div', 0));
|
68 |
-
}
|
69 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/charset_test.php
DELETED
@@ -1,80 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Test if the parser properly detects document encodings
|
7 |
-
*/
|
8 |
-
class charset_test extends TestCase {
|
9 |
-
|
10 |
-
private $html;
|
11 |
-
|
12 |
-
protected function setUp()
|
13 |
-
{
|
14 |
-
$this->html = new simple_html_dom;
|
15 |
-
}
|
16 |
-
|
17 |
-
protected function tearDown()
|
18 |
-
{
|
19 |
-
$this->html->clear();
|
20 |
-
unset($this->html);
|
21 |
-
}
|
22 |
-
|
23 |
-
/**
|
24 |
-
* @dataProvider fileProvider
|
25 |
-
*/
|
26 |
-
public function test_charset($path)
|
27 |
-
{
|
28 |
-
$expected = strtoupper(basename($path, '.html'));
|
29 |
-
|
30 |
-
$this->html->loadFile($path);
|
31 |
-
|
32 |
-
$this->assertEquals($expected, $this->html->_charset);
|
33 |
-
}
|
34 |
-
|
35 |
-
/** @dataProvider fileProvider */
|
36 |
-
public function test_is_utf8($file)
|
37 |
-
{
|
38 |
-
$testdata = file_get_contents($file);
|
39 |
-
|
40 |
-
if (strtoupper(basename($file, '.html')) === 'UTF-8') {
|
41 |
-
$this->assertTrue(simple_html_dom_node::is_utf8($testdata));
|
42 |
-
} else {
|
43 |
-
$this->assertFalse(simple_html_dom_node::is_utf8($testdata));
|
44 |
-
}
|
45 |
-
}
|
46 |
-
|
47 |
-
/** @dataProvider fileProvider */
|
48 |
-
public function test_convert_text_should_handle_different_encodings($file)
|
49 |
-
{
|
50 |
-
$testdata = file_get_contents($file);
|
51 |
-
$charset = strtoupper(basename($file, '.html'));
|
52 |
-
$expected = iconv($charset, 'UTF-8', $testdata);
|
53 |
-
|
54 |
-
$this->html->load(''); // We need at least the root node
|
55 |
-
|
56 |
-
if ($charset === 'UTF-8') {
|
57 |
-
$this->html->_charset = 'TryMe'; // Trap the parser
|
58 |
-
// Wrap content in BOM
|
59 |
-
$testdata = "\xef\xbb\xbf" . $testdata . "\xef\xbb\xbf";
|
60 |
-
} else {
|
61 |
-
$this->html->_charset = $charset; // Hint source charset
|
62 |
-
}
|
63 |
-
|
64 |
-
$this->html->_target_charset = 'UTF-8'; // Enforce target charset
|
65 |
-
|
66 |
-
$this->assertEquals($expected, $this->html->root->convert_text($testdata));
|
67 |
-
}
|
68 |
-
|
69 |
-
public function fileProvider()
|
70 |
-
{
|
71 |
-
$files = array();
|
72 |
-
|
73 |
-
foreach(glob(__DIR__ . '/data/charset/*.html') as $path) {
|
74 |
-
$files[strtoupper(basename($path, '.html'))] = array($path);
|
75 |
-
}
|
76 |
-
|
77 |
-
return $files;
|
78 |
-
}
|
79 |
-
|
80 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/comment_test.php
DELETED
@@ -1,93 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Checks if the parser properly handles comments
|
7 |
-
*/
|
8 |
-
class comment_test extends TestCase {
|
9 |
-
|
10 |
-
private $html;
|
11 |
-
|
12 |
-
protected function setUp()
|
13 |
-
{
|
14 |
-
$this->html = new simple_html_dom;
|
15 |
-
}
|
16 |
-
|
17 |
-
protected function tearDown()
|
18 |
-
{
|
19 |
-
$this->html->clear();
|
20 |
-
unset($this->html);
|
21 |
-
}
|
22 |
-
|
23 |
-
/**
|
24 |
-
* @dataProvider dataProvider_for_comment_should_parse
|
25 |
-
*/
|
26 |
-
public function test_comment_should_parse($expected, $doc)
|
27 |
-
{
|
28 |
-
$this->html->load($doc);
|
29 |
-
$this->assertEquals($expected, $this->html->find('comment', 0)->innertext);
|
30 |
-
$this->assertEquals($doc, $this->html->save());
|
31 |
-
}
|
32 |
-
|
33 |
-
public function dataProvider_for_comment_should_parse()
|
34 |
-
{
|
35 |
-
return array(
|
36 |
-
'empty' => array(
|
37 |
-
'',
|
38 |
-
'<!---->',
|
39 |
-
),
|
40 |
-
'space' => array(
|
41 |
-
' ',
|
42 |
-
'<!-- -->',
|
43 |
-
),
|
44 |
-
'brackets' => array(
|
45 |
-
']][[',
|
46 |
-
'<!--]][[-->',
|
47 |
-
),
|
48 |
-
'html' => array(
|
49 |
-
'<p>Hello, World!</p>',
|
50 |
-
'<!--<p>Hello, World!</p>-->',
|
51 |
-
),
|
52 |
-
'cdata' => array(
|
53 |
-
'<![CDATA[Hello, World!]]>',
|
54 |
-
'<!--<![CDATA[Hello, World!]]>-->',
|
55 |
-
),
|
56 |
-
'newline' => array(
|
57 |
-
"Hello\nWorld!",
|
58 |
-
"<!--Hello\nWorld!-->",
|
59 |
-
),
|
60 |
-
'nested comment start tag' => array(
|
61 |
-
'<!--',
|
62 |
-
'<!--<!---->',
|
63 |
-
),
|
64 |
-
'reverse comment start tag' => array(
|
65 |
-
'--!>',
|
66 |
-
'<!----!>-->',
|
67 |
-
),
|
68 |
-
'almost comment start tag' => array(
|
69 |
-
'<!-',
|
70 |
-
'<!--<!--->',
|
71 |
-
),
|
72 |
-
);
|
73 |
-
}
|
74 |
-
|
75 |
-
public function test_html_inside_comment_should_not_appear_in_the_dom()
|
76 |
-
{
|
77 |
-
$this->html->load('<!-- <div>Hello, World!</div> -->');
|
78 |
-
$this->assertNotNull($this->html->find('comment', 0));
|
79 |
-
$this->assertNull($this->html->find('div', 0));
|
80 |
-
}
|
81 |
-
|
82 |
-
public function test_comment_starting_with_greater_than_sign_should_break_comment()
|
83 |
-
{
|
84 |
-
$this->html->load('<!--><div>Hello, World!</div>-->');
|
85 |
-
$this->assertEquals('Hello, World!', $this->html->find('div', 0)->plaintext);
|
86 |
-
}
|
87 |
-
|
88 |
-
public function test_comment_starting_with_dash_plus_greater_than_sign_should_break_comment()
|
89 |
-
{
|
90 |
-
$this->html->load('<!---><div>Hello, World!</div>-->');
|
91 |
-
$this->assertEquals('Hello, World!', $this->html->find('div', 0)->plaintext);
|
92 |
-
}
|
93 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/css_selector_test.php
DELETED
@@ -1,646 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Tests for CSS selectors based on the current CSS Specification
|
7 |
-
*
|
8 |
-
* @link https://www.w3.org/TR/selectors/ CSS Selectors Specification
|
9 |
-
*/
|
10 |
-
class css_selector_test extends TestCase {
|
11 |
-
|
12 |
-
private $html;
|
13 |
-
|
14 |
-
protected function setUp()
|
15 |
-
{
|
16 |
-
$this->html = new simple_html_dom();
|
17 |
-
}
|
18 |
-
|
19 |
-
protected function tearDown()
|
20 |
-
{
|
21 |
-
$this->html->clear();
|
22 |
-
unset($this->html);
|
23 |
-
}
|
24 |
-
|
25 |
-
/**
|
26 |
-
* pattern: "*"
|
27 |
-
* Represents any element
|
28 |
-
*
|
29 |
-
* @link https://www.w3.org/TR/selectors/#the-universal-selector
|
30 |
-
* Universal Selector
|
31 |
-
*/
|
32 |
-
public function test_universal()
|
33 |
-
{
|
34 |
-
$doc = <<<HTML
|
35 |
-
<html>
|
36 |
-
<head><title>PHP Simple HTML DOM Parser</title></head>
|
37 |
-
<body><h1>A PHP based DOM parser</h1></body>
|
38 |
-
</html>
|
39 |
-
HTML;
|
40 |
-
|
41 |
-
$this->html->load($doc);
|
42 |
-
|
43 |
-
$this->assertCount(5, $this->html->find('*'));
|
44 |
-
}
|
45 |
-
|
46 |
-
/**
|
47 |
-
* pattern: "E"
|
48 |
-
* Represents an element of type E
|
49 |
-
*
|
50 |
-
* @link https://www.w3.org/TR/selectors/#type-selectors
|
51 |
-
* Type Selector
|
52 |
-
*/
|
53 |
-
public function test_type()
|
54 |
-
{
|
55 |
-
$doc = <<<HTML
|
56 |
-
<html>
|
57 |
-
<head><title>PHP Simple HTML DOM Parser</title></head>
|
58 |
-
<body><h1>A PHP based DOM parser</h1></body>
|
59 |
-
</html>
|
60 |
-
HTML;
|
61 |
-
|
62 |
-
$this->html->load($doc);
|
63 |
-
|
64 |
-
$this->assertCount(1, $this->html->find('html'));
|
65 |
-
$this->assertCount(1, $this->html->find('head'));
|
66 |
-
$this->assertCount(1, $this->html->find('title'));
|
67 |
-
$this->assertCount(1, $this->html->find('body'));
|
68 |
-
$this->assertCount(1, $this->html->find('h1'));
|
69 |
-
|
70 |
-
// This should not exist
|
71 |
-
$this->assertCount(0, $this->html->find('div'));
|
72 |
-
}
|
73 |
-
|
74 |
-
/**
|
75 |
-
* pattern: "E.warning"
|
76 |
-
* Represents an E element belonging to the class warning (the document
|
77 |
-
* language specifies how class is determined).
|
78 |
-
*
|
79 |
-
* @link https://www.w3.org/TR/selectors/#class-html
|
80 |
-
* Class selectors
|
81 |
-
*/
|
82 |
-
public function test_class()
|
83 |
-
{
|
84 |
-
$doc = <<<HTML
|
85 |
-
<html>
|
86 |
-
<body>
|
87 |
-
<p class="title header">PHP Simple HTML DOM Parser</p>
|
88 |
-
<p class="subtitle">A PHP based DOM parser</p>
|
89 |
-
</body>
|
90 |
-
</html>
|
91 |
-
HTML;
|
92 |
-
|
93 |
-
$this->html->load($doc);
|
94 |
-
|
95 |
-
$this->assertCount(1, $this->html->find('p.title'));
|
96 |
-
$this->assertCount(1, $this->html->find('p.subtitle'));
|
97 |
-
$this->assertCount(1, $this->html->find('p.title.header'));
|
98 |
-
}
|
99 |
-
|
100 |
-
public function test_class_should_skip_tags_without_classes()
|
101 |
-
{
|
102 |
-
$doc = <<<HTML
|
103 |
-
<html>
|
104 |
-
<body>
|
105 |
-
<p>Lorem ipsum dolor sit amet, consectetur adipiscing elit.</p>
|
106 |
-
</body>
|
107 |
-
</html>
|
108 |
-
HTML;
|
109 |
-
|
110 |
-
$this->html->load($doc);
|
111 |
-
|
112 |
-
$this->assertCount(0, $this->html->find('p.title'));
|
113 |
-
}
|
114 |
-
|
115 |
-
public function test_class_should_find_camel_case()
|
116 |
-
{
|
117 |
-
$doc = <<<HTML
|
118 |
-
<html>
|
119 |
-
<body>
|
120 |
-
<p class="myClass">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</p>
|
121 |
-
</body>
|
122 |
-
</html>
|
123 |
-
HTML;
|
124 |
-
|
125 |
-
$this->html->load($doc);
|
126 |
-
|
127 |
-
$this->assertCount(1, $this->html->find('p.myClass'));
|
128 |
-
}
|
129 |
-
|
130 |
-
/**
|
131 |
-
* pattern: "E#myid"
|
132 |
-
* Represents an E element with ID equal to myid.
|
133 |
-
*
|
134 |
-
* @link https://www.w3.org/TR/selectors/#id-selectors
|
135 |
-
* ID selectors
|
136 |
-
*/
|
137 |
-
public function test_id()
|
138 |
-
{
|
139 |
-
$doc = <<<HTML
|
140 |
-
<html>
|
141 |
-
<body>
|
142 |
-
<p id="title">PHP Simple HTML DOM Parser</p>
|
143 |
-
<p id="subtitle">A PHP based DOM parser</p>
|
144 |
-
</body>
|
145 |
-
</html>
|
146 |
-
HTML;
|
147 |
-
|
148 |
-
$this->html->load($doc);
|
149 |
-
|
150 |
-
$this->assertCount(1, $this->html->find('p#title'));
|
151 |
-
$this->assertCount(1, $this->html->find('p#subtitle'));
|
152 |
-
}
|
153 |
-
|
154 |
-
public function test_id_selector_should_find_camel_case()
|
155 |
-
{
|
156 |
-
$doc = <<<HTML
|
157 |
-
<html>
|
158 |
-
<body>
|
159 |
-
<p id="myMessage">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</p>
|
160 |
-
</body>
|
161 |
-
</html>
|
162 |
-
HTML;
|
163 |
-
|
164 |
-
$this->html->load($doc);
|
165 |
-
|
166 |
-
$this->assertCount(1, $this->html->find('p#myMessage'));
|
167 |
-
}
|
168 |
-
|
169 |
-
#region Attribute
|
170 |
-
|
171 |
-
/**
|
172 |
-
* pattern: "E[foo]"
|
173 |
-
* Represents an E element with a foo attribute
|
174 |
-
*
|
175 |
-
* @link https://www.w3.org/TR/selectors/#attribute-selectors
|
176 |
-
* Attribute selectors
|
177 |
-
*/
|
178 |
-
public function test_attribute_exists()
|
179 |
-
{
|
180 |
-
$doc = <<<HTML
|
181 |
-
<html>
|
182 |
-
<body>
|
183 |
-
<p id="title" class="h1">PHP Simple HTML DOM Parser</p>
|
184 |
-
<p id="subtitle" class="h2" style="color:blue;">A PHP based DOM parser</p>
|
185 |
-
</body>
|
186 |
-
</html>
|
187 |
-
HTML;
|
188 |
-
|
189 |
-
$this->html->load($doc);
|
190 |
-
|
191 |
-
$this->assertCount(2, $this->html->find('p[id]'));
|
192 |
-
$this->assertCount(2, $this->html->find('p[id][class]'));
|
193 |
-
$this->assertCount(1, $this->html->find('p[id][class][style]'));
|
194 |
-
}
|
195 |
-
|
196 |
-
/**
|
197 |
-
* pattern: "E[foo="bar"]"
|
198 |
-
* Represents an E element whose foo attribute value is exactly equal to bar
|
199 |
-
*
|
200 |
-
* @link https://www.w3.org/TR/selectors/#attribute-selectors
|
201 |
-
* Attribute selectors
|
202 |
-
*/
|
203 |
-
public function test_attribute_value_equals()
|
204 |
-
{
|
205 |
-
$doc = <<<HTML
|
206 |
-
<html>
|
207 |
-
<body>
|
208 |
-
<p id="title" class="h1">PHP Simple HTML DOM Parser</p>
|
209 |
-
<p id="subtitle" class="h2" style="color:blue;">A PHP based DOM parser</p>
|
210 |
-
</body>
|
211 |
-
</html>
|
212 |
-
HTML;
|
213 |
-
|
214 |
-
$this->html->load($doc);
|
215 |
-
|
216 |
-
$this->assertCount(1, $this->html->find('p[id="title"]'));
|
217 |
-
$this->assertCount(1, $this->html->find('p[id="subtitle"]'));
|
218 |
-
$this->assertCount(1, $this->html->find('p[id="title"][class="h1"]'));
|
219 |
-
$this->assertCount(1, $this->html->find('p[id="subtitle"][class="h2"][style="color:blue;"]'));
|
220 |
-
}
|
221 |
-
|
222 |
-
/**
|
223 |
-
* pattern: "E[foo="bar" i]"
|
224 |
-
* Represents an E element whose foo attribute value is exactly equal to any
|
225 |
-
* (ASCII-range) case-permutation of bar
|
226 |
-
*
|
227 |
-
* @link https://www.w3.org/TR/selectors/#attribute-case
|
228 |
-
* Attribute case
|
229 |
-
*/
|
230 |
-
public function test_attribute_value_equals_case_insensitive()
|
231 |
-
{
|
232 |
-
$doc = <<<HTML
|
233 |
-
<html lang="en-US">
|
234 |
-
<body>
|
235 |
-
<p att="title">PHP Simple HTML DOM Parser</p>
|
236 |
-
<p att="tItle">A PHP based DOM parser</p>
|
237 |
-
</body>
|
238 |
-
</html>
|
239 |
-
HTML;
|
240 |
-
|
241 |
-
$this->html->load($doc);
|
242 |
-
|
243 |
-
$this->assertCount(1, $this->html->find('p[att="title"]'));
|
244 |
-
$this->assertCount(2, $this->html->find('p[att="title" i]'));
|
245 |
-
|
246 |
-
$this->assertCount(0, $this->html->find('p[att^="TITLE"]'));
|
247 |
-
$this->assertCount(2, $this->html->find('p[att^="TITLE" i]'));
|
248 |
-
|
249 |
-
$this->assertCount(0, $this->html->find('p[att$="LE"]'));
|
250 |
-
$this->assertCount(2, $this->html->find('p[att$="LE" i]'));
|
251 |
-
|
252 |
-
$this->assertCount(0, $this->html->find('p[att*="ITL"]'));
|
253 |
-
$this->assertCount(2, $this->html->find('p[att*="ITL" i]'));
|
254 |
-
|
255 |
-
$this->assertCount(0, $this->html->find('html[lang|="EN"]'));
|
256 |
-
$this->assertCount(1, $this->html->find('html[lang|="EN" i]'));
|
257 |
-
}
|
258 |
-
|
259 |
-
/**
|
260 |
-
* pattern: "E[foo="bar" s]"
|
261 |
-
* Represents an E element whose foo attribute value is exactly and
|
262 |
-
* case-sensitively equal to bar
|
263 |
-
*
|
264 |
-
* @link https://www.w3.org/TR/selectors/#attribute-case
|
265 |
-
* Attribute case
|
266 |
-
*/
|
267 |
-
public function test_attribute_value_equals_case_sensitive()
|
268 |
-
{
|
269 |
-
$doc = <<<HTML
|
270 |
-
<html lang="en-US">
|
271 |
-
<body>
|
272 |
-
<p att="title header">PHP Simple HTML DOM Parser</p>
|
273 |
-
<p att="tItle">A PHP based DOM parser</p>
|
274 |
-
</body>
|
275 |
-
</html>
|
276 |
-
HTML;
|
277 |
-
|
278 |
-
$this->html->load($doc);
|
279 |
-
|
280 |
-
$this->assertCount(0, $this->html->find('p[att="title" s]'));
|
281 |
-
$this->assertCount(1, $this->html->find('p[att="tItle" s]'));
|
282 |
-
}
|
283 |
-
|
284 |
-
/**
|
285 |
-
* pattern: "E[foo~="bar"]"
|
286 |
-
* Represents an E element whose foo attribute value is a list of
|
287 |
-
* whitespace-separated values, one of which is exactly equal to bar
|
288 |
-
*
|
289 |
-
* @link https://www.w3.org/TR/selectors/#attribute-selectors
|
290 |
-
* Attribute selectors
|
291 |
-
*/
|
292 |
-
public function test_attribute_value_list_contains()
|
293 |
-
{
|
294 |
-
$doc = <<<HTML
|
295 |
-
<html>
|
296 |
-
<body>
|
297 |
-
<p att="title header">PHP Simple HTML DOM Parser</p>
|
298 |
-
<p att="title subtitle">A PHP based DOM parser</p>
|
299 |
-
</body>
|
300 |
-
</html>
|
301 |
-
HTML;
|
302 |
-
|
303 |
-
$this->html->load($doc);
|
304 |
-
|
305 |
-
$this->assertCount(2, $this->html->find('p[att~="title"]'));
|
306 |
-
$this->assertCount(1, $this->html->find('p[att~="header"]'));
|
307 |
-
$this->assertCount(1, $this->html->find('p[att~="subtitle"]'));
|
308 |
-
$this->assertCount(0, $this->html->find('p[att~=" title"'));
|
309 |
-
$this->assertCount(0, $this->html->find('p[att~=" "'));
|
310 |
-
}
|
311 |
-
|
312 |
-
/**
|
313 |
-
* pattern: "E[foo^="bar"]"
|
314 |
-
* Represents an E element whose foo attribute value begins exactly with the
|
315 |
-
* string bar
|
316 |
-
*
|
317 |
-
* @link https://www.w3.org/TR/selectors/#attribute-substrings
|
318 |
-
* Attribute selectors
|
319 |
-
*/
|
320 |
-
public function test_attribute_value_begins()
|
321 |
-
{
|
322 |
-
$doc = <<<HTML
|
323 |
-
<html>
|
324 |
-
<body>
|
325 |
-
<p id="title">PHP Simple HTML DOM Parser</p>
|
326 |
-
<p id="subtitle">A PHP based DOM parser</p>
|
327 |
-
</body>
|
328 |
-
</html>
|
329 |
-
HTML;
|
330 |
-
|
331 |
-
$this->html->load($doc);
|
332 |
-
|
333 |
-
$this->assertCount(1, $this->html->find('p[id^="sub"]'));
|
334 |
-
}
|
335 |
-
|
336 |
-
/**
|
337 |
-
* pattern: "E[foo$="bar"]"
|
338 |
-
* Represents an E element whose foo attribute value ends exactly with the
|
339 |
-
* string bar
|
340 |
-
*
|
341 |
-
* @link https://www.w3.org/TR/selectors/#attribute-substrings
|
342 |
-
* Attribute substrings
|
343 |
-
*/
|
344 |
-
public function test_attribute_value_ends()
|
345 |
-
{
|
346 |
-
$doc = <<<HTML
|
347 |
-
<html>
|
348 |
-
<body>
|
349 |
-
<p id="title">PHP Simple HTML DOM Parser</p>
|
350 |
-
<p id="subtitle">A PHP based DOM parser</p>
|
351 |
-
</body>
|
352 |
-
</html>
|
353 |
-
HTML;
|
354 |
-
|
355 |
-
$this->html->load($doc);
|
356 |
-
|
357 |
-
$this->assertCount(2, $this->html->find('p[id$="title"]'));
|
358 |
-
}
|
359 |
-
|
360 |
-
/**
|
361 |
-
* pattern: "E[foo*="bar"]"
|
362 |
-
* Represents an E element whose foo attribute value contains the substring
|
363 |
-
* bar
|
364 |
-
*
|
365 |
-
* @link https://www.w3.org/TR/selectors/#attribute-substrings
|
366 |
-
* Attribute substrings
|
367 |
-
*/
|
368 |
-
public function test_attribute_value_contains()
|
369 |
-
{
|
370 |
-
$doc = <<<HTML
|
371 |
-
<html>
|
372 |
-
<body>
|
373 |
-
<p id="title">PHP Simple HTML DOM Parser</p>
|
374 |
-
<p id="subtitle">A PHP based DOM parser</p>
|
375 |
-
<p class="title header">PHP Simple HTML DOM Parser</p>
|
376 |
-
<p class="title subtitle">A PHP based DOM parser</p>
|
377 |
-
</body>
|
378 |
-
</html>
|
379 |
-
HTML;
|
380 |
-
|
381 |
-
$this->html->load($doc);
|
382 |
-
|
383 |
-
$this->assertCount(2, $this->html->find('p[id*="itl"]'));
|
384 |
-
$this->assertCount(2, $this->html->find('p[class*="title"]'));
|
385 |
-
$this->assertCount(1, $this->html->find('p[class*="title header"]'));
|
386 |
-
$this->assertCount(1, $this->html->find('p[class*="subtitle"]'));
|
387 |
-
}
|
388 |
-
|
389 |
-
/**
|
390 |
-
* pattern: "E[foo|="en"]"
|
391 |
-
* Represents an E element whose foo attribute value is a hyphen-separated
|
392 |
-
* list of values beginning with en
|
393 |
-
*
|
394 |
-
* @link https://www.w3.org/TR/selectors/#attribute-selectors
|
395 |
-
* Attribute selectors
|
396 |
-
*/
|
397 |
-
public function test_attribute_value_list_begins()
|
398 |
-
{
|
399 |
-
$doc = <<<HTML
|
400 |
-
<a href="#" hreflang="en-US">en-US</a>
|
401 |
-
<a href="#" hreflang="en-UK">en-UK</a>
|
402 |
-
<a href="#" hreflang="en">en</a>
|
403 |
-
<a href="#" hreflang="fr">fr</a>
|
404 |
-
HTML;
|
405 |
-
|
406 |
-
$this->html->load($doc);
|
407 |
-
|
408 |
-
$anchors = $this->html->find('a[hreflang|="en"]');
|
409 |
-
|
410 |
-
$this->assertCount(3, $anchors);
|
411 |
-
$this->assertEquals('en-US', $anchors[0]->innertext);
|
412 |
-
$this->assertEquals('en-UK', $anchors[1]->innertext);
|
413 |
-
$this->assertEquals('en', $anchors[2]->innertext);
|
414 |
-
}
|
415 |
-
|
416 |
-
#endregion Attribute
|
417 |
-
|
418 |
-
#region Combinator
|
419 |
-
|
420 |
-
/**
|
421 |
-
* pattern: "E F"
|
422 |
-
* Represents an F element descendant of an E element
|
423 |
-
*
|
424 |
-
* @link https://www.w3.org/TR/selectors/#descendant-combinators
|
425 |
-
* Descendant combinators
|
426 |
-
*/
|
427 |
-
public function test_descendant_combinators()
|
428 |
-
{
|
429 |
-
$doc = <<<HTML
|
430 |
-
<html>
|
431 |
-
<body>
|
432 |
-
<p id="title">PHP Simple HTML DOM Parser</p>
|
433 |
-
<p id="subtitle">A PHP based DOM parser</p>
|
434 |
-
</body>
|
435 |
-
</html>
|
436 |
-
HTML;
|
437 |
-
|
438 |
-
$this->html->load($doc);
|
439 |
-
|
440 |
-
$this->assertCount(2, $this->html->find('html body p'));
|
441 |
-
}
|
442 |
-
|
443 |
-
/**
|
444 |
-
* pattern: "E > F"
|
445 |
-
* Represents an F element child of an E element
|
446 |
-
*
|
447 |
-
* @link https://www.w3.org/TR/selectors/#child-combinators
|
448 |
-
* Child combinators
|
449 |
-
*/
|
450 |
-
public function test_child_combinators()
|
451 |
-
{
|
452 |
-
$doc = <<<HTML
|
453 |
-
<html>
|
454 |
-
<body>
|
455 |
-
<p id="title">PHP Simple HTML DOM Parser</p>
|
456 |
-
<p id="subtitle">A PHP based DOM parser</p>
|
457 |
-
<div>
|
458 |
-
<p>Lorem ipsum dolor sit amet.</p>
|
459 |
-
</div>
|
460 |
-
</body>
|
461 |
-
</html>
|
462 |
-
HTML;
|
463 |
-
|
464 |
-
$this->html->load($doc);
|
465 |
-
|
466 |
-
$this->assertCount(2, $this->html->find('html > body > p'));
|
467 |
-
}
|
468 |
-
|
469 |
-
/**
|
470 |
-
* pattern: "E + F"
|
471 |
-
* Represents an F element immediately preceded by an E element
|
472 |
-
*
|
473 |
-
* @link https://www.w3.org/TR/selectors/#adjacent-sibling-combinators
|
474 |
-
* Next sibling combinators
|
475 |
-
*/
|
476 |
-
public function test_next_sibling_combinators()
|
477 |
-
{
|
478 |
-
$doc = <<<HTML
|
479 |
-
<html>
|
480 |
-
<body>
|
481 |
-
<h1>PHP Simple HTML DOM Parser</h1>
|
482 |
-
<p id="title">PHP Simple HTML DOM Parser</p>
|
483 |
-
<h2>A PHP based DOM parser</h2>
|
484 |
-
<p id="subtitle">A PHP based DOM parser</p>
|
485 |
-
</body>
|
486 |
-
</html>
|
487 |
-
HTML;
|
488 |
-
|
489 |
-
$this->html->load($doc);
|
490 |
-
|
491 |
-
$this->assertCount(1, $this->html->find('h1 + p'));
|
492 |
-
$this->assertCount(1, $this->html->find('h2 + p'));
|
493 |
-
}
|
494 |
-
|
495 |
-
/**
|
496 |
-
* pattern: "E ~ F"
|
497 |
-
* Represents an F element preceded by an E element
|
498 |
-
*
|
499 |
-
* @link https://www.w3.org/TR/selectors/#general-sibling-combinators
|
500 |
-
* General sibling combinators
|
501 |
-
*/
|
502 |
-
public function test_general_sibling_combinators()
|
503 |
-
{
|
504 |
-
$doc = <<<HTML
|
505 |
-
<html>
|
506 |
-
<body>
|
507 |
-
<h1>PHP Simple HTML DOM Parser</h1>
|
508 |
-
<p id="title">PHP Simple HTML DOM Parser</p>
|
509 |
-
<h2>A PHP based DOM parser</h2>
|
510 |
-
<p id="subtitle">A PHP based DOM parser</p>
|
511 |
-
</body>
|
512 |
-
</html>
|
513 |
-
HTML;
|
514 |
-
|
515 |
-
$this->html->load($doc);
|
516 |
-
|
517 |
-
$this->assertCount(2, $this->html->find('h1 ~ p'));
|
518 |
-
$this->assertCount(1, $this->html->find('h2 ~ p'));
|
519 |
-
}
|
520 |
-
|
521 |
-
#endregion Combinator
|
522 |
-
|
523 |
-
#region Pseudo Classes
|
524 |
-
|
525 |
-
/**
|
526 |
-
* pattern: "E:not(s)"
|
527 |
-
* Represents an E element that does not match simple selector s
|
528 |
-
*
|
529 |
-
* @link https://www.w3.org/TR/selectors-3/#negation
|
530 |
-
* Negation pseudo class
|
531 |
-
*/
|
532 |
-
public function test_negation_pseudo_class()
|
533 |
-
{
|
534 |
-
$doc = <<<HTML
|
535 |
-
<html>
|
536 |
-
<body>
|
537 |
-
<h1>PHP Simple HTML DOM Parser</h1>
|
538 |
-
<p id="title">PHP Simple HTML DOM Parser</p>
|
539 |
-
<h2>A PHP based DOM parser</h2>
|
540 |
-
<p id="subtitle">A PHP based DOM parser</p>
|
541 |
-
</body>
|
542 |
-
</html>
|
543 |
-
HTML;
|
544 |
-
|
545 |
-
$this->html->load($doc);
|
546 |
-
|
547 |
-
$this->assertCount(1, $this->html->find('p:not([id="title"])'));
|
548 |
-
$this->assertCount(5, $this->html->find(':not(p[id="subtitle"])'));
|
549 |
-
$this->assertCount(3, $this->html->find('body :not(p[id="title"])'));
|
550 |
-
}
|
551 |
-
|
552 |
-
#endregion Pseudo Classes
|
553 |
-
|
554 |
-
/**
|
555 |
-
* "comment", "cdata" and "text" selectors are specific to this parser. They
|
556 |
-
* allow users to directly address these nodes and extract useful information.
|
557 |
-
*
|
558 |
-
* @dataProvider dataProvider_for_find_should_work_with_special_selector
|
559 |
-
*/
|
560 |
-
public function test_find_should_work_with_special_selector($selector, $expected, $doc, $message)
|
561 |
-
{
|
562 |
-
$this->html->load($doc);
|
563 |
-
$this->assertEquals($expected, $this->html->find($selector, 0)->innertext, $message);
|
564 |
-
$this->assertEquals($doc, $this->html->save());
|
565 |
-
}
|
566 |
-
|
567 |
-
public function dataProvider_for_find_should_work_with_special_selector()
|
568 |
-
{
|
569 |
-
$data = array(
|
570 |
-
'text without elements' => array(
|
571 |
-
'text',
|
572 |
-
'Hello, World!',
|
573 |
-
'Hello, World!',
|
574 |
-
'find should return text without elements'
|
575 |
-
),
|
576 |
-
'text outside html' => array(
|
577 |
-
'text',
|
578 |
-
'Hello, World!',
|
579 |
-
'Hello, World!<html></html>',
|
580 |
-
'find should return text outside html'
|
581 |
-
),
|
582 |
-
'text inside element' => array(
|
583 |
-
'text',
|
584 |
-
'Hello, World!',
|
585 |
-
'<html>Hello, World!</html>',
|
586 |
-
'find should return text inside element'
|
587 |
-
),
|
588 |
-
'text between elements' => array(
|
589 |
-
'text',
|
590 |
-
'Hello, World!',
|
591 |
-
'<html><head></head>Hello, World!<body></body></html>',
|
592 |
-
'find should return text between elements'
|
593 |
-
),
|
594 |
-
'cdata without elements' => array(
|
595 |
-
'cdata',
|
596 |
-
'Hello, World!',
|
597 |
-
'<![CDATA[Hello, World!]]>',
|
598 |
-
'find should return cdata elements'
|
599 |
-
),
|
600 |
-
'cdata outside html' => array(
|
601 |
-
'cdata',
|
602 |
-
'Hello, World!',
|
603 |
-
'<![CDATA[Hello, World!]]><html></html>',
|
604 |
-
'find should return cdata elements'
|
605 |
-
),
|
606 |
-
'cdata inside element' => array(
|
607 |
-
'cdata',
|
608 |
-
'Hello, World!',
|
609 |
-
'<html><![CDATA[Hello, World!]]></html>',
|
610 |
-
'find should return cdata elements'
|
611 |
-
),
|
612 |
-
'cdata between elements' => array(
|
613 |
-
'cdata',
|
614 |
-
'Hello, World!',
|
615 |
-
'<html><head></head><![CDATA[Hello, World!]]><body></body></html>',
|
616 |
-
'find should return cdata elements'
|
617 |
-
),
|
618 |
-
'comment without elements' => array(
|
619 |
-
'comment',
|
620 |
-
'Hello, World!',
|
621 |
-
'<!--Hello, World!-->',
|
622 |
-
'find should return comments'
|
623 |
-
),
|
624 |
-
'comment outside html' => array(
|
625 |
-
'comment',
|
626 |
-
'Hello, World!',
|
627 |
-
'<!--Hello, World!--><html></html>',
|
628 |
-
'find should return comments'
|
629 |
-
),
|
630 |
-
'comment inside element' => array(
|
631 |
-
'comment',
|
632 |
-
'Hello, World!',
|
633 |
-
'<html><!--Hello, World!--></html>',
|
634 |
-
'find should return comments'
|
635 |
-
),
|
636 |
-
'comment between elements' => array(
|
637 |
-
'comment',
|
638 |
-
'Hello, World!',
|
639 |
-
'<html><head></head><!--Hello, World!--><body></body></html>',
|
640 |
-
'find should return comments'
|
641 |
-
)
|
642 |
-
);
|
643 |
-
|
644 |
-
return $data;
|
645 |
-
}
|
646 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/debug_info_test.php
DELETED
@@ -1,37 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Tests for debug information generated by the parser
|
7 |
-
*/
|
8 |
-
class debug_info_test extends TestCase {
|
9 |
-
private $html;
|
10 |
-
|
11 |
-
protected function setUp()
|
12 |
-
{
|
13 |
-
$this->html = new simple_html_dom();
|
14 |
-
}
|
15 |
-
|
16 |
-
protected function tearDown()
|
17 |
-
{
|
18 |
-
$this->html->clear();
|
19 |
-
unset($this->html);
|
20 |
-
}
|
21 |
-
|
22 |
-
/** @dataProvider dataProvider_for_print_r */
|
23 |
-
public function test_print_r($expected, $html)
|
24 |
-
{
|
25 |
-
$this->html->load($html);
|
26 |
-
$this->assertEquals($expected, print_r($this->html, true));
|
27 |
-
}
|
28 |
-
|
29 |
-
public function dataProvider_for_print_r()
|
30 |
-
{
|
31 |
-
return array(
|
32 |
-
'should return __debugInfo' => array(
|
33 |
-
'expected' => file_get_contents(__DIR__ . '/data/debug_info/print_r_expected.txt'),
|
34 |
-
'html' => file_get_contents(__DIR__ . '/data/debug_info/print_r_testdata.html')
|
35 |
-
));
|
36 |
-
}
|
37 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/debug_with_callback_test.php
DELETED
@@ -1,83 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../Debug.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
use simplehtmldom\Debug;
|
5 |
-
|
6 |
-
/**
|
7 |
-
* Tests the Debug class with custom callback
|
8 |
-
*/
|
9 |
-
class debug_with_callback_test extends TestCase {
|
10 |
-
private $html;
|
11 |
-
private $debug_message;
|
12 |
-
|
13 |
-
protected function setUp()
|
14 |
-
{
|
15 |
-
Debug::setDebugHandler(array($this, 'debugMessageHandler'));
|
16 |
-
Debug::enable();
|
17 |
-
|
18 |
-
// Discard initial message
|
19 |
-
$this->debug_message = null;
|
20 |
-
}
|
21 |
-
|
22 |
-
protected function tearDown()
|
23 |
-
{
|
24 |
-
Debug::disable();
|
25 |
-
Debug::setDebugHandler();
|
26 |
-
}
|
27 |
-
|
28 |
-
public function debugMessageHandler($message)
|
29 |
-
{
|
30 |
-
$this->debug_message = $message;
|
31 |
-
}
|
32 |
-
|
33 |
-
public function test_enable_should_issue_a_message()
|
34 |
-
{
|
35 |
-
$this->assertNull($this->debug_message);
|
36 |
-
Debug::enable();
|
37 |
-
$this->assertNotNull($this->debug_message);
|
38 |
-
}
|
39 |
-
|
40 |
-
public function test_disable_should_issue_a_message()
|
41 |
-
{
|
42 |
-
$this->assertNull($this->debug_message);
|
43 |
-
Debug::disable();
|
44 |
-
$this->assertNotNull($this->debug_message);
|
45 |
-
}
|
46 |
-
|
47 |
-
public function test_log_should_issue_the_message()
|
48 |
-
{
|
49 |
-
$expected = 'Hello, World!';
|
50 |
-
$this->assertNull($this->debug_message);
|
51 |
-
Debug::log('Hello, World!');
|
52 |
-
$this->assertContains($expected, $this->debug_message);
|
53 |
-
}
|
54 |
-
|
55 |
-
public function test_log_should_issue_the_same_message_multiple_times()
|
56 |
-
{
|
57 |
-
$expected = 'Hello, World!';
|
58 |
-
$this->assertNull($this->debug_message);
|
59 |
-
|
60 |
-
for($i = 0; $i < 2; $i++)
|
61 |
-
{
|
62 |
-
Debug::log('Hello, World!');
|
63 |
-
$this->assertContains($expected, $this->debug_message);
|
64 |
-
$this->debug_message = null;
|
65 |
-
}
|
66 |
-
}
|
67 |
-
|
68 |
-
public function test_log_once_should_issue_the_message_only_once()
|
69 |
-
{
|
70 |
-
$this->assertNull($this->debug_message);
|
71 |
-
|
72 |
-
for($i = 0; $i < 2; $i++)
|
73 |
-
{
|
74 |
-
Debug::log_once('Hello, World!');
|
75 |
-
if ($i === 0) {
|
76 |
-
$this->assertContains('Hello, World!', $this->debug_message);
|
77 |
-
} else {
|
78 |
-
$this->assertNull($this->debug_message);
|
79 |
-
}
|
80 |
-
$this->debug_message = null;
|
81 |
-
}
|
82 |
-
}
|
83 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/doctype_test.php
DELETED
@@ -1,47 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Checks if the parser properly handles DOCTYPE
|
7 |
-
*/
|
8 |
-
class doctype_test extends TestCase {
|
9 |
-
|
10 |
-
private $html;
|
11 |
-
|
12 |
-
protected function setUp()
|
13 |
-
{
|
14 |
-
$this->html = new simple_html_dom();
|
15 |
-
}
|
16 |
-
|
17 |
-
protected function tearDown()
|
18 |
-
{
|
19 |
-
$this->html->clear();
|
20 |
-
unset($this->html);
|
21 |
-
}
|
22 |
-
|
23 |
-
/**
|
24 |
-
* @dataProvider dataProvider_for_doctype_should_parse
|
25 |
-
*/
|
26 |
-
public function test_doctype_should_parse($expected, $doc)
|
27 |
-
{
|
28 |
-
// Note: The parser currently doesn't make any assumptions about DOCTYPE
|
29 |
-
$this->html->load($doc);
|
30 |
-
$this->assertEquals($expected, $this->html->root->plaintext);
|
31 |
-
$this->assertEquals($doc, $this->html->save());
|
32 |
-
}
|
33 |
-
|
34 |
-
public function dataProvider_for_doctype_should_parse()
|
35 |
-
{
|
36 |
-
return array(
|
37 |
-
'normal' => array(
|
38 |
-
'',
|
39 |
-
'<!DOCTYPE html><html></html>',
|
40 |
-
),
|
41 |
-
'stray doctype' => array(
|
42 |
-
'Hello, World!',
|
43 |
-
'<p><!DOCTYPE html>Hello, World!</p>',
|
44 |
-
),
|
45 |
-
);
|
46 |
-
}
|
47 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/dom_manipulation_test.php
DELETED
@@ -1,102 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Tests the DOM manipulation ability of the parser
|
7 |
-
*/
|
8 |
-
class dom_manipulation_test extends TestCase {
|
9 |
-
private $dom;
|
10 |
-
|
11 |
-
protected function setUp()
|
12 |
-
{
|
13 |
-
$this->dom = new simple_html_dom();
|
14 |
-
}
|
15 |
-
|
16 |
-
protected function tearDown()
|
17 |
-
{
|
18 |
-
$this->dom->clear();
|
19 |
-
unset($this->dom);
|
20 |
-
}
|
21 |
-
|
22 |
-
public function test_dom_should_accept_nested_elements()
|
23 |
-
{
|
24 |
-
$expected = '<html><head></head><body></body></html>';
|
25 |
-
|
26 |
-
$html = $this->dom->createElement('html');
|
27 |
-
$head = $this->dom->createElement('head');
|
28 |
-
$body = $this->dom->createElement('body');
|
29 |
-
|
30 |
-
$this->dom->root->appendChild($html);
|
31 |
-
|
32 |
-
$html
|
33 |
-
->appendChild($head)
|
34 |
-
->appendChild($body);
|
35 |
-
|
36 |
-
$this->assertEquals($expected, $this->dom->save());
|
37 |
-
}
|
38 |
-
|
39 |
-
public function test_dom_should_find_added_elements()
|
40 |
-
{
|
41 |
-
$html = $this->dom->createElement('html');
|
42 |
-
$head = $this->dom->createElement('head');
|
43 |
-
$body = $this->dom->createElement('body');
|
44 |
-
|
45 |
-
$this->dom->root->appendChild($html);
|
46 |
-
|
47 |
-
$html
|
48 |
-
->appendChild($head)
|
49 |
-
->appendChild($body);
|
50 |
-
|
51 |
-
$this->assertNotNull($this->dom->find('html', 0));
|
52 |
-
$this->assertNotNull($this->dom->find('head', 0));
|
53 |
-
$this->assertNotNull($this->dom->find('body', 0));
|
54 |
-
}
|
55 |
-
|
56 |
-
public function test_dom_should_find_elements_added_to_existing_dom()
|
57 |
-
{
|
58 |
-
$this->dom->load('<html></html>');
|
59 |
-
|
60 |
-
$head = $this->dom->createElement('head');
|
61 |
-
$body = $this->dom->createElement('body');
|
62 |
-
|
63 |
-
$this->dom->find('html', 0)
|
64 |
-
->appendChild($head)
|
65 |
-
->appendChild($body);
|
66 |
-
|
67 |
-
$this->assertNotNull($this->dom->find('html', 0));
|
68 |
-
$this->assertNotNull($this->dom->find('head', 0));
|
69 |
-
$this->assertNotNull($this->dom->find('body', 0));
|
70 |
-
}
|
71 |
-
|
72 |
-
public function test_dom_should_find_elements_added_to_existing_nested_dom()
|
73 |
-
{
|
74 |
-
$this->dom->load('<html><body></body></html>');
|
75 |
-
|
76 |
-
$table = $this->dom->createElement('table');
|
77 |
-
$tr = $this->dom->createElement('tr');
|
78 |
-
|
79 |
-
$this->dom->find('body', 0)->appendChild($table);
|
80 |
-
$table->appendChild($tr);
|
81 |
-
|
82 |
-
$this->assertNotNull($this->dom->find('table', 0));
|
83 |
-
$this->assertNotNull($this->dom->find('tr', 0));
|
84 |
-
}
|
85 |
-
|
86 |
-
public function test_dom_should_find_elements_add_in_reverse()
|
87 |
-
{
|
88 |
-
$html = $this->dom->createElement('html');
|
89 |
-
$head = $this->dom->createElement('head');
|
90 |
-
$body = $this->dom->createElement('body');
|
91 |
-
|
92 |
-
$html
|
93 |
-
->appendChild($head)
|
94 |
-
->appendChild($body);
|
95 |
-
|
96 |
-
$this->dom->root->appendChild($html);
|
97 |
-
|
98 |
-
$this->assertNotNull($this->dom->find('html', 0));
|
99 |
-
$this->assertNotNull($this->dom->find('head', 0));
|
100 |
-
$this->assertNotNull($this->dom->find('body', 0));
|
101 |
-
}
|
102 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/entity_decoding_test.php
DELETED
@@ -1,62 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Tests for simple_html_dom entity decoding
|
7 |
-
*/
|
8 |
-
class entity_decoding_test extends TestCase {
|
9 |
-
/**
|
10 |
-
* @dataProvider load_should_decode_entity_dataProvider
|
11 |
-
*/
|
12 |
-
public function test_load_should_decode_entity($name, $char, $expected)
|
13 |
-
{
|
14 |
-
$this->assertEquals($expected, $char, 'Character: ' . $name);
|
15 |
-
}
|
16 |
-
|
17 |
-
public function load_should_decode_entity_dataProvider()
|
18 |
-
{
|
19 |
-
$file = __DIR__ . '/data/entity_decoding/Character Entity Reference Chart.html';
|
20 |
-
|
21 |
-
// This operation is very slow due to missing closing tags
|
22 |
-
$html = new simple_html_dom();
|
23 |
-
$html->loadFile($file);
|
24 |
-
|
25 |
-
$table = get_html_translation_table(HTML_ENTITIES, ENT_QUOTES | ENT_HTML5, $html->target_charset);
|
26 |
-
|
27 |
-
$vector = array();
|
28 |
-
|
29 |
-
foreach($html->find('table tr') as $tr) {
|
30 |
-
$char = $tr->find('td.character', 0)->innertext;
|
31 |
-
$char = substr($char, 1); /* first character is always space */
|
32 |
-
|
33 |
-
$name = $tr->find('td.named > code', 0)->plaintext;
|
34 |
-
$name = explode(' ', $name)[0]; /* may contain multiple representations */
|
35 |
-
|
36 |
-
$expected = array_search($name, $table, true);
|
37 |
-
|
38 |
-
if ($expected === false) continue; /* Unknown entity */
|
39 |
-
|
40 |
-
$vector[] = array(
|
41 |
-
$name,
|
42 |
-
$char,
|
43 |
-
$expected
|
44 |
-
);
|
45 |
-
}
|
46 |
-
|
47 |
-
return $vector;
|
48 |
-
}
|
49 |
-
|
50 |
-
public function test_decode_should_decode_attributes()
|
51 |
-
{
|
52 |
-
$expected = 'Häagen-Dazs';
|
53 |
-
|
54 |
-
$html = new simple_html_dom();
|
55 |
-
$html->load('<meta name="description" content="Häagen-Dazs">');
|
56 |
-
|
57 |
-
$description = $html->find('meta[name="description"]', 0);
|
58 |
-
|
59 |
-
$this->assertEquals($expected, $description->content);
|
60 |
-
}
|
61 |
-
|
62 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/file_get_html_test.php
DELETED
@@ -1,80 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Tests file_get_html
|
7 |
-
*/
|
8 |
-
class file_get_html_test extends TestCase {
|
9 |
-
private $testdata_file = __DIR__ . '/data/file_get_html/testdata.html';
|
10 |
-
|
11 |
-
#region maxLen
|
12 |
-
|
13 |
-
/**
|
14 |
-
* Files equal to maxLen should load normally.
|
15 |
-
* @dataProvider fileProvider
|
16 |
-
*/
|
17 |
-
public function test_files_equal_to_maxlen_should_load_normally($file)
|
18 |
-
{
|
19 |
-
$expected = file_get_contents($file);
|
20 |
-
$size = filesize($file);
|
21 |
-
|
22 |
-
$this->assertEquals(
|
23 |
-
$expected,
|
24 |
-
file_get_html(
|
25 |
-
$file,
|
26 |
-
false,
|
27 |
-
null,
|
28 |
-
0,
|
29 |
-
$size,
|
30 |
-
true,
|
31 |
-
false,
|
32 |
-
DEFAULT_TARGET_CHARSET,
|
33 |
-
false,
|
34 |
-
DEFAULT_BR_TEXT,
|
35 |
-
DEFAULT_SPAN_TEXT
|
36 |
-
)->save(),
|
37 |
-
'Files equal to maxLen should load normally.'
|
38 |
-
);
|
39 |
-
}
|
40 |
-
|
41 |
-
/**
|
42 |
-
* Files larger than maxLen should return false.
|
43 |
-
* @dataProvider fileProvider
|
44 |
-
*/
|
45 |
-
public function test_files_larger_than_maxlen_should_return_false($file)
|
46 |
-
{
|
47 |
-
$size = filesize($file);
|
48 |
-
|
49 |
-
$this->assertFalse(
|
50 |
-
file_get_html(
|
51 |
-
$file,
|
52 |
-
false,
|
53 |
-
null,
|
54 |
-
0,
|
55 |
-
$size - 1,
|
56 |
-
true,
|
57 |
-
false,
|
58 |
-
DEFAULT_TARGET_CHARSET,
|
59 |
-
false,
|
60 |
-
DEFAULT_BR_TEXT,
|
61 |
-
DEFAULT_SPAN_TEXT
|
62 |
-
),
|
63 |
-
'Files larger than $maxLen should return false.'
|
64 |
-
);
|
65 |
-
}
|
66 |
-
|
67 |
-
public function fileProvider()
|
68 |
-
{
|
69 |
-
$files = array();
|
70 |
-
|
71 |
-
foreach(glob(__DIR__ . '/data/file_get_html/*.html') as $path) {
|
72 |
-
$files[strtoupper(basename($path, '.html'))] = array($path);
|
73 |
-
}
|
74 |
-
|
75 |
-
return $files;
|
76 |
-
}
|
77 |
-
|
78 |
-
#endregion maxLen
|
79 |
-
|
80 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/htmldocument___call_test.php
DELETED
@@ -1,39 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Tests if __call properly forwards function calls
|
7 |
-
*/
|
8 |
-
class htmldocument___call_test extends TestCase {
|
9 |
-
private $html;
|
10 |
-
|
11 |
-
protected function setUp()
|
12 |
-
{
|
13 |
-
$this->html = new simple_html_dom();
|
14 |
-
}
|
15 |
-
|
16 |
-
protected function tearDown()
|
17 |
-
{
|
18 |
-
$this->html->clear();
|
19 |
-
unset($this->html);
|
20 |
-
}
|
21 |
-
|
22 |
-
function test_load_file_should_return_loadFile()
|
23 |
-
{
|
24 |
-
$file = __DIR__ . '/data/htmldocument___call/testdata.html';
|
25 |
-
|
26 |
-
$this->assertEquals(
|
27 |
-
$this->html->loadFile($file),
|
28 |
-
$this->html->load_file($file)
|
29 |
-
);
|
30 |
-
}
|
31 |
-
|
32 |
-
/**
|
33 |
-
* @expectedException PHPUnit\Framework\Error\Error
|
34 |
-
*/
|
35 |
-
function test_unknown_function_should_return_error()
|
36 |
-
{
|
37 |
-
$this->html->doSomethingStupid();
|
38 |
-
}
|
39 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/htmldocument_test.php
DELETED
@@ -1,249 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Tests for simple_html_dom
|
7 |
-
*/
|
8 |
-
class htmldocument_test extends TestCase {
|
9 |
-
|
10 |
-
private $html;
|
11 |
-
|
12 |
-
protected function setUp()
|
13 |
-
{
|
14 |
-
$this->html = new simple_html_dom;
|
15 |
-
}
|
16 |
-
|
17 |
-
protected function tearDown()
|
18 |
-
{
|
19 |
-
$this->html->clear();
|
20 |
-
unset($this->html);
|
21 |
-
}
|
22 |
-
|
23 |
-
public function test___get_outertext_should_return_html()
|
24 |
-
{
|
25 |
-
$doc = '<html></html>';
|
26 |
-
$this->html->load($doc);
|
27 |
-
|
28 |
-
$this->assertEquals($doc, $this->html->outertext);
|
29 |
-
}
|
30 |
-
|
31 |
-
public function test___get_innertext_should_return_html()
|
32 |
-
{
|
33 |
-
$doc = '<html></html>';
|
34 |
-
$this->html->load($doc);
|
35 |
-
|
36 |
-
$this->assertEquals($doc, $this->html->innertext);
|
37 |
-
}
|
38 |
-
|
39 |
-
public function test___get_plaintext_should_return_html_content()
|
40 |
-
{
|
41 |
-
$expected = 'Hello, World!';
|
42 |
-
$doc = '<html><p>Hello, World!</p></html>';
|
43 |
-
$this->html->load($doc);
|
44 |
-
|
45 |
-
$this->assertEquals($expected, $this->html->plaintext);
|
46 |
-
}
|
47 |
-
|
48 |
-
public function test___get_charset_should_return__charset()
|
49 |
-
{
|
50 |
-
$expected = 'UTF-8';
|
51 |
-
$doc = '<html><p>Hello, World!</p></html>';
|
52 |
-
$this->html->load($doc);
|
53 |
-
|
54 |
-
$this->assertEquals($expected, $this->html->charset);
|
55 |
-
}
|
56 |
-
|
57 |
-
public function test___get_target_charset_should_return__target_charset()
|
58 |
-
{
|
59 |
-
$expected = 'UTF-8';
|
60 |
-
$doc = '<html><p>Hello, World!</p></html>';
|
61 |
-
$this->html->load($doc);
|
62 |
-
|
63 |
-
$this->assertEquals($expected, $this->html->target_charset);
|
64 |
-
}
|
65 |
-
|
66 |
-
public function test___get_should_return_null_for_unknown_type()
|
67 |
-
{
|
68 |
-
$doc = '<html></html>';
|
69 |
-
$this->html->load($doc);
|
70 |
-
|
71 |
-
$this->assertNull($this->html->this_type_doesnt_exist_probably);
|
72 |
-
}
|
73 |
-
|
74 |
-
public function test_childNodes_should_return_element_by_index()
|
75 |
-
{
|
76 |
-
$expected = 'PHP Simple HTML DOM Parser';
|
77 |
-
$doc = '<p>PHP Simple HTML DOM Parser</p>';
|
78 |
-
$this->html->load($doc);
|
79 |
-
|
80 |
-
$this->assertEquals($expected, $this->html->childNodes(0)->text());
|
81 |
-
}
|
82 |
-
|
83 |
-
public function test_childNodes_should_return_null_if_index_out_of_range()
|
84 |
-
{
|
85 |
-
$doc = '<p>PHP Simple HTML DOM Parser</p>';
|
86 |
-
$this->html->load($doc);
|
87 |
-
|
88 |
-
$this->assertNull($this->html->childNodes(1));
|
89 |
-
}
|
90 |
-
|
91 |
-
public function test_childNodes_should_work_after_remove()
|
92 |
-
{
|
93 |
-
$doc = '<a href="#"></a><img><p></p>';
|
94 |
-
|
95 |
-
$this->html->load($doc);
|
96 |
-
$this->html->find('img', 0)->remove();
|
97 |
-
|
98 |
-
$this->assertCount(2, $this->html->childNodes());
|
99 |
-
$this->assertArrayHasKey(0, $this->html->childNodes());
|
100 |
-
$this->assertArrayHasKey(1, $this->html->childNodes());
|
101 |
-
}
|
102 |
-
|
103 |
-
public function test_getElementById_should_return_matching_element()
|
104 |
-
{
|
105 |
-
$doc = '<html><p id="claim">PHP Simple HTML DOM Parser</p></html>';
|
106 |
-
|
107 |
-
$this->html->load($doc);
|
108 |
-
|
109 |
-
$this->assertNotNull($this->html->getElementById('claim'));
|
110 |
-
$this->assertNull($this->html->getElementById('unknown'));
|
111 |
-
}
|
112 |
-
|
113 |
-
public function test_getElementsById_should_return_matching_element()
|
114 |
-
{
|
115 |
-
// Note, this technically doesn't make sense but it's supported
|
116 |
-
$doc = '<html><p id="a"></p><p id="a"></p></html>';
|
117 |
-
|
118 |
-
$this->html->load($doc);
|
119 |
-
|
120 |
-
$this->assertCount(2, $this->html->getElementsById('a'));
|
121 |
-
}
|
122 |
-
|
123 |
-
public function test_getElementByTagName_should_return_matching_element()
|
124 |
-
{
|
125 |
-
$expected = 'Hello';
|
126 |
-
$doc = '<html><p>Hello</p><p>World</p></html>';
|
127 |
-
|
128 |
-
$this->html->load($doc);
|
129 |
-
|
130 |
-
$this->assertEquals($expected, $this->html->getElementByTagName('p')->text());
|
131 |
-
}
|
132 |
-
|
133 |
-
public function test_getElementsByTagName_should_return_matching_element()
|
134 |
-
{
|
135 |
-
$doc = '<html><p>Hello</p><p>World</p></html>';
|
136 |
-
|
137 |
-
$this->html->load($doc);
|
138 |
-
|
139 |
-
$this->assertCount(2, $this->html->getElementsByTagName('p'));
|
140 |
-
}
|
141 |
-
|
142 |
-
public function test_lastChild_should_return_null_without_children()
|
143 |
-
{
|
144 |
-
$doc = ' ';
|
145 |
-
$this->html->load($doc);
|
146 |
-
|
147 |
-
$this->assertNull($this->html->lastChild());
|
148 |
-
}
|
149 |
-
|
150 |
-
public function test_lastChild_should_work_after_remove()
|
151 |
-
{
|
152 |
-
$doc = '<div><a href="#"></a><p></p></div>';
|
153 |
-
|
154 |
-
$this->html->load($doc);
|
155 |
-
$this->html->find('p', 0)->remove();
|
156 |
-
|
157 |
-
$this->assertNotNull($this->html->lastChild());
|
158 |
-
}
|
159 |
-
|
160 |
-
public function test_createElement_should_return_an_element()
|
161 |
-
{
|
162 |
-
$this->assertEquals(
|
163 |
-
'<html></html>',
|
164 |
-
$this->html->createElement('html')->save()
|
165 |
-
);
|
166 |
-
}
|
167 |
-
|
168 |
-
public function test_createElement_should_create_element_with_content()
|
169 |
-
{
|
170 |
-
$this->assertEquals(
|
171 |
-
'<html>Hello, World!</html>',
|
172 |
-
$this->html->createElement('html', 'Hello, World!')->save()
|
173 |
-
);
|
174 |
-
}
|
175 |
-
|
176 |
-
/** @dataProvider dataProvider_for_createElement_should_not_affect_the_dom */
|
177 |
-
public function test_createElement_should_not_affect_the_dom($doc)
|
178 |
-
{
|
179 |
-
$this->html->load($doc);
|
180 |
-
|
181 |
-
$this->assertEquals(
|
182 |
-
'<html></html>',
|
183 |
-
$this->html->createElement('html')->save()
|
184 |
-
);
|
185 |
-
$this->assertEquals(
|
186 |
-
'<head></head>',
|
187 |
-
$this->html->createElement('head')->save()
|
188 |
-
);
|
189 |
-
$this->assertEquals(
|
190 |
-
'<body></body>',
|
191 |
-
$this->html->createElement('body')->save()
|
192 |
-
);
|
193 |
-
$this->assertEquals(
|
194 |
-
$doc,
|
195 |
-
$this->html->save()
|
196 |
-
);
|
197 |
-
|
198 |
-
$this->assertNull($this->html->find('html', 0));
|
199 |
-
$this->assertNull($this->html->find('head', 0));
|
200 |
-
$this->assertNull($this->html->find('body', 0));
|
201 |
-
}
|
202 |
-
|
203 |
-
public function dataProvider_for_createElement_should_not_affect_the_dom()
|
204 |
-
{
|
205 |
-
return array(
|
206 |
-
'empty' => array(''),
|
207 |
-
'single' => array('<div></div>'),
|
208 |
-
'nested' => array('<table><tr></tr><tr></tr></table>'),
|
209 |
-
);
|
210 |
-
}
|
211 |
-
|
212 |
-
public function test_createTextNode_should_return_a_node()
|
213 |
-
{
|
214 |
-
$this->assertNotNull($this->html->createTextNode('<html>'));
|
215 |
-
}
|
216 |
-
|
217 |
-
public function test_createTextNode_should_create_a_text_node()
|
218 |
-
{
|
219 |
-
$this->assertEquals(
|
220 |
-
'Hello, World!',
|
221 |
-
$this->html->createTextNode('Hello, World!')->save()
|
222 |
-
);
|
223 |
-
}
|
224 |
-
|
225 |
-
/** @dataProvider dataProvider_for_createTextNode_should_not_affect_the_dom */
|
226 |
-
public function test_createTextNode_should_not_affect_the_dom($doc)
|
227 |
-
{
|
228 |
-
$this->html->load($doc);
|
229 |
-
|
230 |
-
$this->assertEquals(
|
231 |
-
'Hello, World!',
|
232 |
-
$this->html->createTextNode('Hello, World!')->save()
|
233 |
-
);
|
234 |
-
$this->assertEquals(
|
235 |
-
$doc,
|
236 |
-
$this->html->save()
|
237 |
-
);
|
238 |
-
}
|
239 |
-
|
240 |
-
public function dataProvider_for_createTextNode_should_not_affect_the_dom()
|
241 |
-
{
|
242 |
-
return array(
|
243 |
-
'empty' => array(''),
|
244 |
-
'single' => array('<div></div>'),
|
245 |
-
'nested' => array('<table><tr></tr><tr></tr></table>'),
|
246 |
-
);
|
247 |
-
}
|
248 |
-
|
249 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/htmlnode___call_test.php
DELETED
@@ -1,77 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Tests if __call properly forwards function calls
|
7 |
-
*/
|
8 |
-
class htmlnode___call_test extends TestCase {
|
9 |
-
private $html;
|
10 |
-
|
11 |
-
protected function setUp()
|
12 |
-
{
|
13 |
-
$this->html = new simple_html_dom();
|
14 |
-
$this->html->load('<html><head /><body /></html>');
|
15 |
-
}
|
16 |
-
|
17 |
-
protected function tearDown()
|
18 |
-
{
|
19 |
-
$this->html->clear();
|
20 |
-
unset($this->html);
|
21 |
-
}
|
22 |
-
|
23 |
-
function test_children_should_return_childNodes()
|
24 |
-
{
|
25 |
-
$this->assertEquals($this->html->root->childNodes(), $this->html->root->children());
|
26 |
-
$this->assertEquals($this->html->root->childNodes(0), $this->html->root->children(0));
|
27 |
-
$this->assertEquals($this->html->root->childNodes(1), $this->html->root->children(1));
|
28 |
-
}
|
29 |
-
|
30 |
-
function test_first_child_should_return_firstChild()
|
31 |
-
{
|
32 |
-
$this->assertEquals(
|
33 |
-
$this->html->root->firstChild(),
|
34 |
-
$this->html->root->first_child()
|
35 |
-
);
|
36 |
-
}
|
37 |
-
|
38 |
-
function test_has_child_should_return_hasChildNodes()
|
39 |
-
{
|
40 |
-
$this->assertEquals(
|
41 |
-
$this->html->root->hasChildNodes(),
|
42 |
-
$this->html->root->has_child()
|
43 |
-
);
|
44 |
-
}
|
45 |
-
|
46 |
-
function test_last_child_should_return_lastChild()
|
47 |
-
{
|
48 |
-
$this->assertEquals(
|
49 |
-
$this->html->root->lastChild(),
|
50 |
-
$this->html->root->last_child()
|
51 |
-
);
|
52 |
-
}
|
53 |
-
|
54 |
-
function test_next_sibling_should_return_nextSibling()
|
55 |
-
{
|
56 |
-
$this->assertEquals(
|
57 |
-
$this->html->find('head', 0)->nextSibling(),
|
58 |
-
$this->html->find('head', 0)->next_sibling()
|
59 |
-
);
|
60 |
-
}
|
61 |
-
|
62 |
-
function test_prev_sibling_should_return_previousSibling()
|
63 |
-
{
|
64 |
-
$this->assertEquals(
|
65 |
-
$this->html->find('body', 0)->previousSibling(),
|
66 |
-
$this->html->find('body', 0)->prev_sibling()
|
67 |
-
);
|
68 |
-
}
|
69 |
-
|
70 |
-
/**
|
71 |
-
* @expectedException PHPUnit\Framework\Error\Error
|
72 |
-
*/
|
73 |
-
function test_unknown_function_should_return_error()
|
74 |
-
{
|
75 |
-
$this->html->root->doSomethingStupid();
|
76 |
-
}
|
77 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/htmlnode_test.php
DELETED
@@ -1,628 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Tests for simple_html_dom_node
|
7 |
-
*/
|
8 |
-
class htmlnode_test extends TestCase {
|
9 |
-
|
10 |
-
private $html;
|
11 |
-
|
12 |
-
protected function setUp()
|
13 |
-
{
|
14 |
-
$this->html = new simple_html_dom;
|
15 |
-
}
|
16 |
-
|
17 |
-
protected function tearDown()
|
18 |
-
{
|
19 |
-
$this->html->clear();
|
20 |
-
unset($this->html);
|
21 |
-
}
|
22 |
-
|
23 |
-
public function test___get_should_return_false_for_unknown_attributes()
|
24 |
-
{
|
25 |
-
$doc = '<html></html>';
|
26 |
-
$this->html->load($doc);
|
27 |
-
|
28 |
-
$this->assertFalse($this->html->find('html', 0)->lang);
|
29 |
-
}
|
30 |
-
|
31 |
-
public function test___set_should_overwrite_innertext()
|
32 |
-
{
|
33 |
-
$expected = 'PHP Simple HTML DOM Parser';
|
34 |
-
$doc = '<html><!--Hello, World!--></html>';
|
35 |
-
$this->html->load($doc);
|
36 |
-
$this->html->find('comment', 0)->innertext = '';
|
37 |
-
$this->html->find('html', 0)->innertext = $expected;
|
38 |
-
|
39 |
-
$this->assertEquals($expected, $this->html->find('html', 0)->innertext());
|
40 |
-
$this->assertEmpty($this->html->find('comment', 0)->innertext());
|
41 |
-
}
|
42 |
-
|
43 |
-
public function test___unset_should_remove_attribute()
|
44 |
-
{
|
45 |
-
$doc = '<html lang="en"></html>';
|
46 |
-
$this->html->load($doc);
|
47 |
-
unset($this->html->find('html', 0)->lang);
|
48 |
-
|
49 |
-
$this->assertFalse($this->html->find('html', 0)->lang);
|
50 |
-
}
|
51 |
-
|
52 |
-
public function test_remove_should_remove_node()
|
53 |
-
{
|
54 |
-
$expected = <<<EOD
|
55 |
-
<html>
|
56 |
-
<body>
|
57 |
-
|
58 |
-
</body>
|
59 |
-
</html>
|
60 |
-
EOD;
|
61 |
-
|
62 |
-
$doc = <<<EOD
|
63 |
-
<html>
|
64 |
-
<body>
|
65 |
-
<table>
|
66 |
-
<tr><th>Title</th></tr>
|
67 |
-
<tr><td>Row 1</td></tr>
|
68 |
-
</table>
|
69 |
-
</body>
|
70 |
-
</html>
|
71 |
-
EOD;
|
72 |
-
|
73 |
-
$this->html->load($doc, true, false);
|
74 |
-
|
75 |
-
$table = $this->html->find('table', 0);
|
76 |
-
|
77 |
-
$this->assertEquals($doc, $this->html->save());
|
78 |
-
|
79 |
-
$table->remove();
|
80 |
-
|
81 |
-
$this->assertEquals($expected, $this->html->save());
|
82 |
-
}
|
83 |
-
|
84 |
-
public function test_removeChild()
|
85 |
-
{
|
86 |
-
$expected = <<<EOD
|
87 |
-
<html>
|
88 |
-
<body>
|
89 |
-
|
90 |
-
</body>
|
91 |
-
</html>
|
92 |
-
EOD;
|
93 |
-
|
94 |
-
$doc = <<<EOD
|
95 |
-
<html>
|
96 |
-
<body>
|
97 |
-
<table>
|
98 |
-
<tr><th>Title</th></tr>
|
99 |
-
<tr><td>Row 1</td></tr>
|
100 |
-
</table>
|
101 |
-
</body>
|
102 |
-
</html>
|
103 |
-
EOD;
|
104 |
-
|
105 |
-
$this->html->load($doc, true, false);
|
106 |
-
|
107 |
-
$body = $this->html->find('body', 0);
|
108 |
-
|
109 |
-
$this->assertEquals($doc, $this->html->save());
|
110 |
-
|
111 |
-
$body->removeChild($body->find('table', 0));
|
112 |
-
|
113 |
-
$this->assertEquals($expected, $this->html->save());
|
114 |
-
}
|
115 |
-
|
116 |
-
public function test_has_child_should_return_boolean()
|
117 |
-
{
|
118 |
-
$doc = '<html><p>PHP Simple HTML DOM Parser</p></html>';
|
119 |
-
$this->html->load($doc);
|
120 |
-
|
121 |
-
$this->assertTrue($this->html->find('html', 0)->has_child());
|
122 |
-
$this->assertFalse($this->html->find('p', 0)->has_child());
|
123 |
-
|
124 |
-
$this->assertTrue($this->html->find('html', 0)->hasChildNodes());
|
125 |
-
$this->assertFalse($this->html->find('p', 0)->hasChildNodes());
|
126 |
-
}
|
127 |
-
|
128 |
-
public function test_hasAttribute_should_return_boolean()
|
129 |
-
{
|
130 |
-
$doc = '<html lang="en"></html>';
|
131 |
-
|
132 |
-
$this->html->load($doc);
|
133 |
-
|
134 |
-
$this->assertTrue($this->html->find('html', 0)->hasAttribute('lang'));
|
135 |
-
$this->assertFalse($this->html->find('html', 0)->hasAttribute('encoding'));
|
136 |
-
}
|
137 |
-
|
138 |
-
public function test_hasClass_should_return_true()
|
139 |
-
{
|
140 |
-
$this->html->load('<p class="article">Simple HTML DOM Parser</p>');
|
141 |
-
|
142 |
-
$this->assertTrue($this->html->find('p', 0)->hasClass('article'));
|
143 |
-
}
|
144 |
-
|
145 |
-
public function test_innertext_should_return_comment()
|
146 |
-
{
|
147 |
-
$expected = 'For your information';
|
148 |
-
$doc = '<html><!--For your information--></html>';
|
149 |
-
|
150 |
-
$this->html->load($doc);
|
151 |
-
|
152 |
-
$this->assertEquals($expected, $this->html->find('comment', 0)->innertext());
|
153 |
-
}
|
154 |
-
|
155 |
-
public function test_hasClass_should_return_false()
|
156 |
-
{
|
157 |
-
$this->html->load('<p>Simple HTML DOM Parser</p>');
|
158 |
-
|
159 |
-
$this->assertFalse($this->html->find('p', 0)->hasClass('article'));
|
160 |
-
}
|
161 |
-
|
162 |
-
public function test_addClass_should_add_class()
|
163 |
-
{
|
164 |
-
$expected = '<p class="article">Simple HTML DOM Parser</p>';
|
165 |
-
|
166 |
-
$this->html->load('<p>Simple HTML DOM Parser</p>');
|
167 |
-
$this->html->find('p', 0)->addClass('article');
|
168 |
-
|
169 |
-
$this->assertEquals($expected, $this->html->save());
|
170 |
-
}
|
171 |
-
|
172 |
-
public function test_addClass_should_add_class_list()
|
173 |
-
{
|
174 |
-
$expected = '<p class="article new">Simple HTML DOM Parser</p>';
|
175 |
-
|
176 |
-
$this->html->load('<p>Simple HTML DOM Parser</p>');
|
177 |
-
$this->html->find('p', 0)->addClass('article new');
|
178 |
-
|
179 |
-
$this->assertEquals($expected, $this->html->save());
|
180 |
-
}
|
181 |
-
|
182 |
-
public function test_addClass_should_add_class_array()
|
183 |
-
{
|
184 |
-
$expected = '<p class="article new">Simple HTML DOM Parser</p>';
|
185 |
-
|
186 |
-
$this->html->load('<p>Simple HTML DOM Parser</p>');
|
187 |
-
$this->html->find('p', 0)->addClass(array('article', 'new'));
|
188 |
-
|
189 |
-
$this->assertEquals($expected, $this->html->save());
|
190 |
-
}
|
191 |
-
|
192 |
-
public function test_addClass_should_add_missing_classes()
|
193 |
-
{
|
194 |
-
$expected = '<p class="article new">Simple HTML DOM Parser</p>';
|
195 |
-
|
196 |
-
$this->html->load('<p class="article">Simple HTML DOM Parser</p>');
|
197 |
-
$this->html->find('p', 0)->addClass('article new');
|
198 |
-
|
199 |
-
$this->assertEquals($expected, $this->html->save());
|
200 |
-
}
|
201 |
-
|
202 |
-
public function test_appendChild_should_add_node_as_child()
|
203 |
-
{
|
204 |
-
$expected = '<html><p>Simple HTML DOM Parser</p></html>';
|
205 |
-
$doc = '<html></html>';
|
206 |
-
|
207 |
-
$this->html->load($doc);
|
208 |
-
$child = $this->html->createElement('p', 'Simple HTML DOM Parser');
|
209 |
-
$this->html->find('html', 0)->appendChild($child);
|
210 |
-
|
211 |
-
$this->assertEquals($expected, $this->html->save());
|
212 |
-
}
|
213 |
-
|
214 |
-
public function test_removeClass_should_do_nothing_without_class()
|
215 |
-
{
|
216 |
-
$expected = '<p>Simple HTML DOM Parser</p>';
|
217 |
-
$doc = '<p>Simple HTML DOM Parser</p>';
|
218 |
-
|
219 |
-
$this->html->load($doc);
|
220 |
-
$this->html->find('p', 0)->removeClass('new');
|
221 |
-
|
222 |
-
$this->assertEquals($expected, $this->html->save());
|
223 |
-
}
|
224 |
-
|
225 |
-
public function test_removeClass_should_remove_all_classes()
|
226 |
-
{
|
227 |
-
$expected = '<p>Simple HTML DOM Parser</p>';
|
228 |
-
|
229 |
-
$this->html->load('<p class="article">Simple HTML DOM Parser</p>');
|
230 |
-
$this->html->find('p', 0)->removeClass();
|
231 |
-
|
232 |
-
$this->assertEquals($expected, $this->html->save());
|
233 |
-
}
|
234 |
-
|
235 |
-
public function test_removeClass_should_remove_class_attribute()
|
236 |
-
{
|
237 |
-
$expected = '<p>Simple HTML DOM Parser</p>';
|
238 |
-
|
239 |
-
$this->html->load('<p class="article">Simple HTML DOM Parser</p>');
|
240 |
-
$this->html->find('p', 0)->removeClass('article');
|
241 |
-
|
242 |
-
$this->assertEquals($expected, $this->html->save());
|
243 |
-
}
|
244 |
-
|
245 |
-
public function test_removeClass_should_remove_class()
|
246 |
-
{
|
247 |
-
$expected = '<p class="article">Simple HTML DOM Parser</p>';
|
248 |
-
|
249 |
-
$this->html->load('<p class="article new">Simple HTML DOM Parser</p>');
|
250 |
-
$this->html->find('p', 0)->removeClass('new');
|
251 |
-
|
252 |
-
$this->assertEquals($expected, $this->html->save());
|
253 |
-
}
|
254 |
-
|
255 |
-
public function test_removeClass_should_remove_class_list()
|
256 |
-
{
|
257 |
-
$expected = '<p>Simple HTML DOM Parser</p>';
|
258 |
-
|
259 |
-
$this->html->load('<p class="article new">Simple HTML DOM Parser</p>');
|
260 |
-
$this->html->find('p', 0)->removeClass('article new');
|
261 |
-
|
262 |
-
$this->assertEquals($expected, $this->html->save());
|
263 |
-
}
|
264 |
-
|
265 |
-
public function test_removeClass_should_remove_class_array()
|
266 |
-
{
|
267 |
-
$expected = '<p>Simple HTML DOM Parser</p>';
|
268 |
-
|
269 |
-
$this->html->load('<p class="article new">Simple HTML DOM Parser</p>');
|
270 |
-
$this->html->find('p', 0)->removeClass(array('article', 'new'));
|
271 |
-
|
272 |
-
$this->assertEquals($expected, $this->html->save());
|
273 |
-
}
|
274 |
-
|
275 |
-
public function test_save_should_return_outerhtml()
|
276 |
-
{
|
277 |
-
$expected = '<p>Simple HTML DOM Parser</p>';
|
278 |
-
|
279 |
-
$this->html->load('<div><p>Simple HTML DOM Parser</p></div>');
|
280 |
-
|
281 |
-
$this->assertEquals($expected, $this->html->find('p', 0)->save());
|
282 |
-
}
|
283 |
-
|
284 |
-
public function test_setAttribute_should_update_attribute()
|
285 |
-
{
|
286 |
-
$expected = 'de';
|
287 |
-
$doc = '<html lang="en"></html>';
|
288 |
-
|
289 |
-
$this->html->load($doc);
|
290 |
-
$this->html->find('html', 0)->setAttribute('lang', 'de');
|
291 |
-
|
292 |
-
$this->assertEquals($expected, $this->html->find('html', 0)->getAttribute('lang'));
|
293 |
-
}
|
294 |
-
|
295 |
-
public function test_text_should_be_empty_after_clear()
|
296 |
-
{
|
297 |
-
$doc = '<html></html>';
|
298 |
-
|
299 |
-
$this->html->load($doc);
|
300 |
-
$element = $this->html->find('html', 0);
|
301 |
-
$element->clear();
|
302 |
-
|
303 |
-
$this->assertEmpty($element->text());
|
304 |
-
}
|
305 |
-
|
306 |
-
public function test_text_should_not_include_script_elements()
|
307 |
-
{
|
308 |
-
$expected = 'PHP Simple HTML DOM Parser';
|
309 |
-
$doc = '<script>alert();</script><h1>PHP Simple HTML DOM Parser</h1>';
|
310 |
-
|
311 |
-
$this->html->load($doc);
|
312 |
-
|
313 |
-
$this->assertEquals($expected, $this->html->root->text());
|
314 |
-
}
|
315 |
-
|
316 |
-
public function test_text_should_not_include_style_elements()
|
317 |
-
{
|
318 |
-
$expected = 'PHP Simple HTML DOM Parser';
|
319 |
-
$doc = '<style>h1{color: blue;}</style><h1>PHP Simple HTML DOM Parser</h1>';
|
320 |
-
|
321 |
-
$this->html->load($doc);
|
322 |
-
|
323 |
-
$this->assertEquals($expected, $this->html->root->text());
|
324 |
-
}
|
325 |
-
|
326 |
-
public function test_text_should_not_include_comments()
|
327 |
-
{
|
328 |
-
$expected = 'PHP Simple HTML DOM Parser';
|
329 |
-
$doc = '<!--Hi there :)--><h1>PHP Simple HTML DOM Parser</h1>';
|
330 |
-
|
331 |
-
$this->html->load($doc);
|
332 |
-
|
333 |
-
$this->assertEquals($expected, $this->html->root->text());
|
334 |
-
}
|
335 |
-
|
336 |
-
public function test_text_should_include_cdata_content()
|
337 |
-
{
|
338 |
-
$expected = '<?php Simple HTML DOM Parser';
|
339 |
-
$doc = '<h1><![CDATA[<?php]]> Simple HTML DOM Parser</h1>';
|
340 |
-
|
341 |
-
$this->html->load($doc);
|
342 |
-
|
343 |
-
$this->assertEquals($expected, $this->html->root->text());
|
344 |
-
}
|
345 |
-
|
346 |
-
public function test_save_should_create_file()
|
347 |
-
{
|
348 |
-
$expected_file = __DIR__ . '/data/simple_html_dom_node/save_file_expected.html';
|
349 |
-
$file = __DIR__ . '/data/simple_html_dom_node/save_file.html';
|
350 |
-
|
351 |
-
$this->html->load('<div><p>Simple HTML DOM Parser</p></div>');
|
352 |
-
$this->html->find('p', 0)->save($file);
|
353 |
-
|
354 |
-
$this->assertFileExists($file);
|
355 |
-
$this->assertFileEquals($expected_file, $file);
|
356 |
-
}
|
357 |
-
|
358 |
-
public function test_find_ancestor_tag_should_return_element()
|
359 |
-
{
|
360 |
-
$doc = '<html><p></p></html>';
|
361 |
-
$this->html->load($doc);
|
362 |
-
|
363 |
-
$this->assertEquals(
|
364 |
-
$this->html->find('html', 0),
|
365 |
-
$this->html->find('p', 0)->find_ancestor_tag('html')
|
366 |
-
);
|
367 |
-
}
|
368 |
-
|
369 |
-
public function test_find_ancestor_tag_should_return_null_without_match()
|
370 |
-
{
|
371 |
-
$doc = '<html><p></p></html>';
|
372 |
-
$this->html->load($doc);
|
373 |
-
|
374 |
-
$this->assertNull($this->html->find('p', 0)->find_ancestor_tag('a'));
|
375 |
-
}
|
376 |
-
|
377 |
-
public function test_first_child_should_return_null_without_children()
|
378 |
-
{
|
379 |
-
$doc = '<html></html>';
|
380 |
-
$this->html->load($doc);
|
381 |
-
|
382 |
-
$this->assertNull($this->html->find('html', 0)->first_child());
|
383 |
-
$this->assertNull($this->html->find('html', 0)->firstChild());
|
384 |
-
}
|
385 |
-
|
386 |
-
public function test_first_child_should_work_after_remove()
|
387 |
-
{
|
388 |
-
$doc = '<div><a href="#"></a><p></p></div>';
|
389 |
-
|
390 |
-
$this->html->load($doc);
|
391 |
-
$this->html->find('a', 0)->remove();
|
392 |
-
|
393 |
-
$this->assertNotNull($this->html->find('div', 0)->first_child());
|
394 |
-
$this->assertNotNull($this->html->find('div', 0)->firstChild());
|
395 |
-
}
|
396 |
-
|
397 |
-
public function test_getAttribute_should_return_attribute()
|
398 |
-
{
|
399 |
-
$expected = 'en';
|
400 |
-
$doc = '<html lang="en"></html>';
|
401 |
-
|
402 |
-
$this->html->load($doc);
|
403 |
-
|
404 |
-
$this->assertEquals($expected, $this->html->find('html', 0)->getAttribute('lang'));
|
405 |
-
}
|
406 |
-
|
407 |
-
public function test_getElementById_should_return_matching_element()
|
408 |
-
{
|
409 |
-
$doc = '<html><p id="claim">PHP Simple HTML DOM Parser</p></html>';
|
410 |
-
|
411 |
-
$this->html->load($doc);
|
412 |
-
|
413 |
-
$this->assertNotNull($this->html->root->getElementById('claim'));
|
414 |
-
$this->assertNull($this->html->root->getElementById('unknown'));
|
415 |
-
}
|
416 |
-
|
417 |
-
public function test_getElementsById_should_return_matching_element()
|
418 |
-
{
|
419 |
-
// Note, this technically doesn't make sense but it's supported
|
420 |
-
$doc = '<html><p id="a"></p><p id="a"></p></html>';
|
421 |
-
|
422 |
-
$this->html->load($doc);
|
423 |
-
|
424 |
-
$this->assertCount(2, $this->html->root->getElementsById('a'));
|
425 |
-
}
|
426 |
-
|
427 |
-
public function test_getElementByTagName_should_return_matching_element()
|
428 |
-
{
|
429 |
-
$expected = 'Hello';
|
430 |
-
$doc = '<html><p>Hello</p><p>World</p></html>';
|
431 |
-
|
432 |
-
$this->html->load($doc);
|
433 |
-
|
434 |
-
$this->assertEquals($expected, $this->html->root->getElementByTagName('p')->text());
|
435 |
-
}
|
436 |
-
|
437 |
-
public function test_getElementsByTagName_should_return_matching_element()
|
438 |
-
{
|
439 |
-
$doc = '<html><p>Hello</p><p>World</p></html>';
|
440 |
-
|
441 |
-
$this->html->load($doc);
|
442 |
-
|
443 |
-
$this->assertCount(2, $this->html->root->getElementsByTagName('p'));
|
444 |
-
}
|
445 |
-
|
446 |
-
public function test_last_child_should_return_null_without_children()
|
447 |
-
{
|
448 |
-
$doc = '<html></html>';
|
449 |
-
$this->html->load($doc);
|
450 |
-
|
451 |
-
$this->assertNull($this->html->find('html', 0)->last_child());
|
452 |
-
$this->assertNull($this->html->find('html', 0)->lastChild());
|
453 |
-
}
|
454 |
-
|
455 |
-
public function test_last_child_should_work_after_remove()
|
456 |
-
{
|
457 |
-
$doc = '<div><a href="#"></a><p></p></div>';
|
458 |
-
|
459 |
-
$this->html->load($doc);
|
460 |
-
$this->html->find('p', 0)->remove();
|
461 |
-
|
462 |
-
$this->assertNotNull($this->html->find('div', 0)->last_child());
|
463 |
-
$this->assertNotNull($this->html->find('div', 0)->lastChild());
|
464 |
-
}
|
465 |
-
|
466 |
-
public function test_next_sibling_should_return_null_without_parent()
|
467 |
-
{
|
468 |
-
$doc = '<html></html>';
|
469 |
-
$this->html->load($doc);
|
470 |
-
|
471 |
-
$this->assertNull($this->html->root->next_sibling());
|
472 |
-
}
|
473 |
-
|
474 |
-
public function test_next_sibling_should_return_null_without_sibling()
|
475 |
-
{
|
476 |
-
$doc = '<html></html>';
|
477 |
-
$this->html->load($doc);
|
478 |
-
|
479 |
-
$this->assertNull($this->html->find('html', 0)->next_sibling());
|
480 |
-
}
|
481 |
-
|
482 |
-
public function test_outertext_should_return_innertext_for_root()
|
483 |
-
{
|
484 |
-
$doc = '<html><!--For your information--></html>';
|
485 |
-
|
486 |
-
$this->html->load($doc);
|
487 |
-
|
488 |
-
$this->assertEquals($doc, $this->html->root->outertext());
|
489 |
-
}
|
490 |
-
|
491 |
-
public function test_outertext_should_return_custom_outertext()
|
492 |
-
{
|
493 |
-
$expected = '<html>Hello, World!</html>';
|
494 |
-
$doc = '<html><p>PHP Simple HTML DOM Parser</p></html>';
|
495 |
-
|
496 |
-
$this->html->load($doc);
|
497 |
-
$this->html->find('p', 0)->outertext = 'Hello, World!';
|
498 |
-
|
499 |
-
$this->assertEquals($expected, $this->html->find('html', 0)->outertext());
|
500 |
-
}
|
501 |
-
|
502 |
-
public function test_next_sibling_should_work_after_remove()
|
503 |
-
{
|
504 |
-
$doc = '<div><a href="#"></a><img><p></p></div>';
|
505 |
-
|
506 |
-
$this->html->load($doc);
|
507 |
-
$this->html->find('img', 0)->remove();
|
508 |
-
|
509 |
-
$this->assertNotNull($this->html->find('a', 0)->next_sibling());
|
510 |
-
}
|
511 |
-
|
512 |
-
public function test_nodeName_should_return_tag_name()
|
513 |
-
{
|
514 |
-
$expected = 'div';
|
515 |
-
$doc = '<div></div>';
|
516 |
-
|
517 |
-
$this->html->load($doc);
|
518 |
-
|
519 |
-
$this->assertEquals($expected, $this->html->find('div', 0)->nodeName());
|
520 |
-
}
|
521 |
-
|
522 |
-
public function test_parent_should_make_node_child_of_element()
|
523 |
-
{
|
524 |
-
$expected = '<html><p>PHP Simple HTML DOM Parser</p></html>';
|
525 |
-
$doc = '<html></html>';
|
526 |
-
|
527 |
-
$this->html->load($doc);
|
528 |
-
$this->html->find('html', 0);
|
529 |
-
|
530 |
-
$node = $this->html->createElement('p', 'PHP Simple HTML DOM Parser');
|
531 |
-
$node->parent($this->html->find('html', 0));
|
532 |
-
|
533 |
-
$this->assertEquals($expected, $this->html->save());
|
534 |
-
}
|
535 |
-
|
536 |
-
public function test_prev_sibling_should_return_null_without_parent()
|
537 |
-
{
|
538 |
-
$doc = '<html></html>';
|
539 |
-
$this->html->load($doc);
|
540 |
-
|
541 |
-
$this->assertNull($this->html->root->prev_sibling());
|
542 |
-
}
|
543 |
-
|
544 |
-
public function test_prev_sibling_should_return_null_without_sibling()
|
545 |
-
{
|
546 |
-
$doc = '<html></html>';
|
547 |
-
$this->html->load($doc);
|
548 |
-
|
549 |
-
$this->assertNull($this->html->find('html', 0)->prev_sibling());
|
550 |
-
}
|
551 |
-
|
552 |
-
public function test_prev_sibling_should_work_after_remove()
|
553 |
-
{
|
554 |
-
$doc = '<div><a href="#"></a><img><p></p></div>';
|
555 |
-
|
556 |
-
$this->html->load($doc);
|
557 |
-
$this->html->find('img', 0)->remove();
|
558 |
-
|
559 |
-
$this->assertNotNull($this->html->find('p', 0)->prev_sibling());
|
560 |
-
}
|
561 |
-
|
562 |
-
public function test_children_should_return_element_by_index()
|
563 |
-
{
|
564 |
-
$expected = 'PHP Simple HTML DOM Parser';
|
565 |
-
$doc = '<html><p>PHP Simple HTML DOM Parser</p></html>';
|
566 |
-
$this->html->load($doc);
|
567 |
-
|
568 |
-
$this->assertEquals($expected, $this->html->find('html', 0)->children(0)->text());
|
569 |
-
$this->assertEquals($expected, $this->html->find('html', 0)->childNodes(0)->text());
|
570 |
-
}
|
571 |
-
|
572 |
-
public function test_children_should_return_null_if_index_out_of_range()
|
573 |
-
{
|
574 |
-
$doc = '<html><p>PHP Simple HTML DOM Parser</p></html>';
|
575 |
-
$this->html->load($doc);
|
576 |
-
|
577 |
-
$this->assertNull($this->html->find('html', 0)->children(1));
|
578 |
-
$this->assertNull($this->html->find('html', 0)->childNodes(1));
|
579 |
-
}
|
580 |
-
|
581 |
-
public function test_children_should_work_after_remove()
|
582 |
-
{
|
583 |
-
$doc = '<div><a href="#"></a><img><p></p></div>';
|
584 |
-
|
585 |
-
$this->html->load($doc);
|
586 |
-
$this->html->find('img', 0)->remove();
|
587 |
-
|
588 |
-
$this->assertCount(2, $this->html->find('div', 0)->children());
|
589 |
-
$this->assertArrayHasKey(0, $this->html->find('div', 0)->children());
|
590 |
-
$this->assertArrayHasKey(1, $this->html->find('div', 0)->children());
|
591 |
-
|
592 |
-
$this->assertCount(2, $this->html->find('div', 0)->childNodes());
|
593 |
-
$this->assertArrayHasKey(0, $this->html->find('div', 0)->childNodes());
|
594 |
-
$this->assertArrayHasKey(1, $this->html->find('div', 0)->childNodes());
|
595 |
-
}
|
596 |
-
|
597 |
-
public function test_expect_should_return_null_for_no_match()
|
598 |
-
{
|
599 |
-
$doc = '<div><a href="#"></a><img><p></p></div>';
|
600 |
-
|
601 |
-
$this->html->load($doc);
|
602 |
-
$this->html->find('img', 0)->remove();
|
603 |
-
|
604 |
-
$this->assertNull($this->html->expect('p.class'));
|
605 |
-
$this->assertNull($this->html->expect('p.class', 0));
|
606 |
-
}
|
607 |
-
|
608 |
-
public function test_toString_should_return_html_document()
|
609 |
-
{
|
610 |
-
$doc = '<p>PHP Simple HTML DOM Parser</p>';
|
611 |
-
|
612 |
-
$this->html->load($doc);
|
613 |
-
|
614 |
-
$this->assertEquals($doc, sprintf('%s', $this->html->find('p', 0)));
|
615 |
-
}
|
616 |
-
|
617 |
-
public function test_xmltext_should_strip_cdata_tags()
|
618 |
-
{
|
619 |
-
$expected = '<p><em>PHP Simple HTML DOM Parser</em></p>';
|
620 |
-
$doc = '<p><![CDATA[<em>]]>PHP Simple HTML DOM Parser<![CDATA[</em>]]></p>';
|
621 |
-
|
622 |
-
$this->html->load($doc);
|
623 |
-
|
624 |
-
$this->assertEquals($expected, $this->html->root->xmltext);
|
625 |
-
$this->assertEquals($expected, $this->html->root->xmltext());
|
626 |
-
}
|
627 |
-
|
628 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/htmlweb_curl_test.php
DELETED
@@ -1,49 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../HtmlWeb.php';
|
3 |
-
use simplehtmldom\HtmlWeb;
|
4 |
-
use PHPUnit\Framework\TestCase;
|
5 |
-
|
6 |
-
/**
|
7 |
-
* Tests the cURL part of HtmlWeb
|
8 |
-
*/
|
9 |
-
class htmlweb_curl_test extends TestCase {
|
10 |
-
private $web;
|
11 |
-
|
12 |
-
protected function setUp()
|
13 |
-
{
|
14 |
-
if (!extension_loaded('curl')) {
|
15 |
-
$this->markTestSkipped('The cURL extension must be enabled for this test.');
|
16 |
-
}
|
17 |
-
|
18 |
-
$this->web = new HtmlWeb();
|
19 |
-
}
|
20 |
-
|
21 |
-
protected function tearDown()
|
22 |
-
{
|
23 |
-
unset($this->web);
|
24 |
-
}
|
25 |
-
|
26 |
-
public function urlProvider()
|
27 |
-
{
|
28 |
-
return array(
|
29 |
-
'Google' => array('https://www.google.com/'),
|
30 |
-
'GitHub' => array('https://www.github.com/'),
|
31 |
-
);
|
32 |
-
}
|
33 |
-
|
34 |
-
/** @dataProvider urlProvider */
|
35 |
-
public function test_load_should_return_dom_object($url)
|
36 |
-
{
|
37 |
-
$this->assertNotNull($this->web->load($url));
|
38 |
-
}
|
39 |
-
|
40 |
-
public function test_load_should_return_null_on_negative_response()
|
41 |
-
{
|
42 |
-
$this->assertNull($this->web->load('https://simplehtmldom.sourceforge.io/a.html'));
|
43 |
-
}
|
44 |
-
|
45 |
-
public function test_load_should_return_null_for_pages_larger_than_max_file_size()
|
46 |
-
{
|
47 |
-
$this->assertNull($this->web->load('https://www.w3.org/TR/html52/single-page.html'));
|
48 |
-
}
|
49 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/htmlweb_fopen_test.php
DELETED
@@ -1,53 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../HtmlWeb.php';
|
3 |
-
use simplehtmldom\HtmlWeb;
|
4 |
-
use PHPUnit\Framework\TestCase;
|
5 |
-
|
6 |
-
/**
|
7 |
-
* Tests the fopen part of HtmlWeb
|
8 |
-
*/
|
9 |
-
class htmlweb_fopen_test extends TestCase {
|
10 |
-
private $web;
|
11 |
-
|
12 |
-
protected function setUp()
|
13 |
-
{
|
14 |
-
if (extension_loaded('curl')) {
|
15 |
-
$this->markTestSkipped('The cURL extension must be disabled for this test.');
|
16 |
-
}
|
17 |
-
|
18 |
-
if (!(ini_get('allow_url_fopen'))) {
|
19 |
-
$this->markTestSkipped('allow_url_fopen must be enabled for this test.');
|
20 |
-
}
|
21 |
-
|
22 |
-
$this->web = new HtmlWeb();
|
23 |
-
}
|
24 |
-
|
25 |
-
protected function tearDown()
|
26 |
-
{
|
27 |
-
unset($this->web);
|
28 |
-
}
|
29 |
-
|
30 |
-
public function urlProvider()
|
31 |
-
{
|
32 |
-
return array(
|
33 |
-
'Google' => array('https://www.google.com/'),
|
34 |
-
'GitHub' => array('https://www.github.com/'),
|
35 |
-
);
|
36 |
-
}
|
37 |
-
|
38 |
-
/** @dataProvider urlProvider */
|
39 |
-
public function test_load_should_return_dom_object($url)
|
40 |
-
{
|
41 |
-
$this->assertNotNull($this->web->load($url));
|
42 |
-
}
|
43 |
-
|
44 |
-
public function test_load_should_return_null_on_negative_response()
|
45 |
-
{
|
46 |
-
$this->assertNull($this->web->load('https://simplehtmldom.sourceforge.io/a.html'));
|
47 |
-
}
|
48 |
-
|
49 |
-
public function test_load_should_return_null_for_pages_larger_than_max_file_size()
|
50 |
-
{
|
51 |
-
$this->assertNull($this->web->load('https://www.w3.org/TR/html52/single-page.html'));
|
52 |
-
}
|
53 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/htmlweb_test.php
DELETED
@@ -1,49 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../HtmlWeb.php';
|
3 |
-
use simplehtmldom\HtmlWeb;
|
4 |
-
use PHPUnit\Framework\TestCase;
|
5 |
-
|
6 |
-
/**
|
7 |
-
* Tests the general behavior of HtmlWeb
|
8 |
-
*/
|
9 |
-
class htmlweb_test extends TestCase {
|
10 |
-
private $web;
|
11 |
-
|
12 |
-
protected function setUp()
|
13 |
-
{
|
14 |
-
$this->web = new HtmlWeb();
|
15 |
-
}
|
16 |
-
|
17 |
-
protected function tearDown()
|
18 |
-
{
|
19 |
-
unset($this->web);
|
20 |
-
}
|
21 |
-
|
22 |
-
public function urlProvider()
|
23 |
-
{
|
24 |
-
return array(
|
25 |
-
'Empty URL' => array(''),
|
26 |
-
'Scheme Missing' => array('//github.com/simplehtmldom/'),
|
27 |
-
'Wrong Scheme' => array('ssh://github.com/'),
|
28 |
-
);
|
29 |
-
}
|
30 |
-
|
31 |
-
/** @dataProvider urlProvider */
|
32 |
-
public function test_load_should_return_null_for_invalid_url($url)
|
33 |
-
{
|
34 |
-
$this->assertNull($this->web->load($url));
|
35 |
-
}
|
36 |
-
|
37 |
-
public function test_load_should_return_null_without_curl_and_fopen()
|
38 |
-
{
|
39 |
-
if (extension_loaded('curl')) {
|
40 |
-
$this->markTestSkipped('The cURL extension must be disabled for this test.');
|
41 |
-
}
|
42 |
-
|
43 |
-
if (ini_get('allow_url_fopen')) {
|
44 |
-
$this->markTestSkipped('allow_url_fopen must be disabled for this test.');
|
45 |
-
}
|
46 |
-
|
47 |
-
$this->assertNull($this->web->load('https://www.google.com/'));
|
48 |
-
}
|
49 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/memory_parsing_test.php
DELETED
@@ -1,131 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
use PHPUnit\Framework\TestResult;
|
5 |
-
|
6 |
-
/**
|
7 |
-
* Test memory behavior of parsing operations
|
8 |
-
*
|
9 |
-
* **Note**:
|
10 |
-
*
|
11 |
-
* These tests are based on /testcase/memory_test.php
|
12 |
-
*/
|
13 |
-
class memory_parsing_test extends TestCase {
|
14 |
-
/** File for memory tests */
|
15 |
-
private $file = __DIR__ . '/data/memory/index.html';
|
16 |
-
|
17 |
-
public function setUp()
|
18 |
-
{
|
19 |
-
/**
|
20 |
-
* The first time we access a file, PHP acquires additional memory that
|
21 |
-
* breaks some assertions. For some reason, loading the contents once
|
22 |
-
* fixes this issue.
|
23 |
-
*/
|
24 |
-
gc_enable();
|
25 |
-
$contents = file_get_contents($this->file, false, null, 0, filesize($this->file));
|
26 |
-
$html = new simple_html_dom($contents);
|
27 |
-
unset($html);
|
28 |
-
$contents = null;
|
29 |
-
$file = null;
|
30 |
-
gc_collect_cycles();
|
31 |
-
}
|
32 |
-
|
33 |
-
/**
|
34 |
-
* Test if the parser properly releases memory using simple_html_dom (50x)
|
35 |
-
*
|
36 |
-
* Memory usage should stay stable when using the parser in a loop.
|
37 |
-
*/
|
38 |
-
public function test_simple_html_dom()
|
39 |
-
{
|
40 |
-
$contents = file_get_contents($this->file, false, null, 0, filesize($this->file));
|
41 |
-
|
42 |
-
if (is_file($this->file)) {
|
43 |
-
// Cleanup before doing anything
|
44 |
-
gc_enable();
|
45 |
-
gc_collect_cycles();
|
46 |
-
|
47 |
-
for ($i = 0; $i <= 50; $i++) {
|
48 |
-
$memory_start = memory_get_usage();
|
49 |
-
|
50 |
-
$html = new simple_html_dom($contents);
|
51 |
-
unset($html);
|
52 |
-
gc_collect_cycles(); // Trigger garbage collection
|
53 |
-
|
54 |
-
$memory_end = memory_get_usage();
|
55 |
-
|
56 |
-
$this->assertEquals($memory_start, $memory_end, 'Iteration: ' . $i);
|
57 |
-
}
|
58 |
-
} else {
|
59 |
-
throw new Exception('Unable to perform test, file doesn\'t exist!');
|
60 |
-
}
|
61 |
-
}
|
62 |
-
|
63 |
-
/**
|
64 |
-
* Test if the parser properly releases memory using loadFile (50x)
|
65 |
-
*
|
66 |
-
* Memory usage should stay stable or slightly decrease (out of our control)
|
67 |
-
* when using the parser in a loop.
|
68 |
-
*/
|
69 |
-
public function test_loadFile()
|
70 |
-
{
|
71 |
-
|
72 |
-
if (is_file($this->file)) {
|
73 |
-
// Cleanup before doing anything
|
74 |
-
gc_enable();
|
75 |
-
gc_collect_cycles();
|
76 |
-
|
77 |
-
for ($i = 0; $i <= 50; $i++) {
|
78 |
-
$memory_start = memory_get_usage();
|
79 |
-
|
80 |
-
$html = new simple_html_dom();
|
81 |
-
$html->loadFile($this->file, false, null, 0, filesize($this->file));
|
82 |
-
unset($html);
|
83 |
-
gc_collect_cycles(); // Trigger garbage collection
|
84 |
-
|
85 |
-
$memory_end = memory_get_usage();
|
86 |
-
|
87 |
-
$this->assertEquals($memory_start, $memory_end, 'Iteration: ' . $i);
|
88 |
-
}
|
89 |
-
} else {
|
90 |
-
throw new Exception('Unable to perform test, file doesn\'t exist!');
|
91 |
-
}
|
92 |
-
}
|
93 |
-
|
94 |
-
/**
|
95 |
-
* Test if the parser correctly handles large files (optional)
|
96 |
-
*
|
97 |
-
* Uses the single page representation of the HTML Specification to perform
|
98 |
-
* tests on large files (>10 MB).
|
99 |
-
*
|
100 |
-
* @link https://www.w3.org/TR/html/single-page.html HTML Specification (single page)
|
101 |
-
*/
|
102 |
-
public function test_large_file()
|
103 |
-
{
|
104 |
-
// Note: The HTML Specification is VERY large (> 10 MB) and takes a very
|
105 |
-
// long time to download. Thus, it should be placed in a local directory
|
106 |
-
$file = __DIR__ . '/data/HTML 5.2.html';
|
107 |
-
|
108 |
-
if (!is_file($file)) {
|
109 |
-
$this->markTestSkipped(
|
110 |
-
'Download the HTML Specification as single page to "' . $file . '"'
|
111 |
-
);
|
112 |
-
}
|
113 |
-
|
114 |
-
// Cleanup before doing anything
|
115 |
-
gc_enable();
|
116 |
-
gc_collect_cycles();
|
117 |
-
|
118 |
-
$memory_start = memory_get_usage();
|
119 |
-
|
120 |
-
// Use actual file size to load the entire file
|
121 |
-
$html = new simple_html_dom;
|
122 |
-
$html->load($file);
|
123 |
-
unset($html);
|
124 |
-
gc_collect_cycles(); // Trigger garbage collection
|
125 |
-
|
126 |
-
$memory_end = memory_get_usage();
|
127 |
-
|
128 |
-
$this->assertEquals($memory_start, $memory_end);
|
129 |
-
}
|
130 |
-
|
131 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/optional_tags_test.php
DELETED
@@ -1,791 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Tests for optional tags
|
7 |
-
*
|
8 |
-
* Optional tags are tags that can be omitted in cases where the DOM structure
|
9 |
-
* is obvious to the parser.
|
10 |
-
*
|
11 |
-
* _Note_:
|
12 |
-
* - Elements with optional end tag (test_optional_*_end_tag) should appear in
|
13 |
-
* {@see simple_html_dom::$optional_closing_tags}
|
14 |
-
*
|
15 |
-
* @link https://www.w3.org/TR/html52/single-page.html#optional-tags
|
16 |
-
* Optional tags
|
17 |
-
*/
|
18 |
-
class optional_tags_test extends TestCase {
|
19 |
-
private $html;
|
20 |
-
|
21 |
-
protected function setUp()
|
22 |
-
{
|
23 |
-
$this->html = new simple_html_dom;
|
24 |
-
}
|
25 |
-
|
26 |
-
protected function tearDown()
|
27 |
-
{
|
28 |
-
$this->html->clear();
|
29 |
-
unset($this->html);
|
30 |
-
}
|
31 |
-
|
32 |
-
/**
|
33 |
-
* An html element’s start tag may be omitted if the first thing inside the
|
34 |
-
* html element is not a comment.
|
35 |
-
*/
|
36 |
-
public function test_optional_html_start_tag()
|
37 |
-
{
|
38 |
-
$doc = <<<HTML
|
39 |
-
<!DOCTYPE HTML>
|
40 |
-
|
41 |
-
<head></head>
|
42 |
-
<body></body>
|
43 |
-
</html>
|
44 |
-
HTML;
|
45 |
-
|
46 |
-
$this->markTestSkipped(
|
47 |
-
'Optional html start tag is not supported right now'
|
48 |
-
);
|
49 |
-
|
50 |
-
$this->assertNotNull($this->html->load($doc)->find('html', 0));
|
51 |
-
}
|
52 |
-
|
53 |
-
/**
|
54 |
-
* An html element’s end tag may be omitted if the html element is not
|
55 |
-
* immediately followed by a comment.
|
56 |
-
*/
|
57 |
-
public function test_optional_html_end_tag()
|
58 |
-
{
|
59 |
-
$doc = <<<HTML
|
60 |
-
<!DOCTYPE HTML>
|
61 |
-
<html>
|
62 |
-
<head></head>
|
63 |
-
<body></body>
|
64 |
-
|
65 |
-
HTML;
|
66 |
-
|
67 |
-
$this->assertNotNull($this->html->load($doc)->find('html', 0));
|
68 |
-
}
|
69 |
-
|
70 |
-
/**
|
71 |
-
* A head element’s start tag may be omitted if the element is empty, or if
|
72 |
-
* the first thing inside the head element is an element.
|
73 |
-
*/
|
74 |
-
public function test_optional_head_start_tag()
|
75 |
-
{
|
76 |
-
$doc = <<<HTML
|
77 |
-
<!DOCTYPE HTML>
|
78 |
-
<html>
|
79 |
-
</head>
|
80 |
-
<body></body>
|
81 |
-
</html>
|
82 |
-
HTML;
|
83 |
-
|
84 |
-
$this->markTestSkipped(
|
85 |
-
'Optional head start tag is not supported right now'
|
86 |
-
);
|
87 |
-
|
88 |
-
$this->assertNotNull($this->html->load($doc)->find('head', 0));
|
89 |
-
}
|
90 |
-
|
91 |
-
/**
|
92 |
-
* A head element’s end tag may be omitted if the head element is not
|
93 |
-
* immediately followed by a space character or a comment.
|
94 |
-
*/
|
95 |
-
public function test_optional_head_end_tag()
|
96 |
-
{
|
97 |
-
$doc = <<<HTML
|
98 |
-
<!DOCTYPE HTML>
|
99 |
-
<html>
|
100 |
-
<head>
|
101 |
-
<body></body>
|
102 |
-
</html>
|
103 |
-
HTML;
|
104 |
-
|
105 |
-
$this->assertNotNull($this->html->load($doc)->find('head', 0));
|
106 |
-
}
|
107 |
-
|
108 |
-
/**
|
109 |
-
* A body element’s start tag may be omitted if the element is empty, or if
|
110 |
-
* the first thing inside the body element is not a space character or a
|
111 |
-
* comment, except if the first thing inside the body element is a meta,
|
112 |
-
* link, script, style, or template element.
|
113 |
-
*/
|
114 |
-
public function test_optional_body_start_tag()
|
115 |
-
{
|
116 |
-
$doc = <<<HTML
|
117 |
-
<!DOCTYPE HTML>
|
118 |
-
<html>
|
119 |
-
<head></head>
|
120 |
-
</body>
|
121 |
-
</html>
|
122 |
-
HTML;
|
123 |
-
|
124 |
-
$this->markTestSkipped(
|
125 |
-
'Optional body start tag is not supported right now'
|
126 |
-
);
|
127 |
-
|
128 |
-
$this->assertNotNull($this->html->load($doc)->find('body', 0));
|
129 |
-
}
|
130 |
-
|
131 |
-
/**
|
132 |
-
* A body element’s end tag may be omitted if the body element is not
|
133 |
-
* immediately followed by a comment.
|
134 |
-
*/
|
135 |
-
public function test_optional_body_end_tag()
|
136 |
-
{
|
137 |
-
$doc = <<<HTML
|
138 |
-
<!DOCTYPE HTML>
|
139 |
-
<html>
|
140 |
-
<head></head>
|
141 |
-
<body>
|
142 |
-
</html>
|
143 |
-
HTML;
|
144 |
-
|
145 |
-
$this->assertNotNull($this->html->load($doc)->find('body', 0));
|
146 |
-
}
|
147 |
-
|
148 |
-
/**
|
149 |
-
* With all optional tags taken into account, the DOM can be simplified
|
150 |
-
*/
|
151 |
-
public function test_optional_html_head_body_tags()
|
152 |
-
{
|
153 |
-
// phpcs:disable Generic.Files.LineLength
|
154 |
-
$doc1 = <<<HTML
|
155 |
-
<!DOCTYPE HTML><html><head><title>PHP Simple HTML DOM Parser</title></head><body><p>A PHP based DOM parser</p></body></html>
|
156 |
-
HTML;
|
157 |
-
// phpcs:enable
|
158 |
-
|
159 |
-
$doc2 = <<<HTML
|
160 |
-
<!DOCTYPE HTML><title>PHP Simple HTML DOM Parser</title><p>A PHP based DOM parser</p>
|
161 |
-
HTML;
|
162 |
-
|
163 |
-
$dom1 = $this->html->load($doc1);
|
164 |
-
$dom2 = $this->html->load($doc2);
|
165 |
-
|
166 |
-
$this->markTestSkipped(
|
167 |
-
'Some optional tags are not supported right now'
|
168 |
-
);
|
169 |
-
|
170 |
-
$this->assertNotNull($dom1->find('html', 0), 'html not found!');
|
171 |
-
$this->assertNotNull($dom1->find('head', 0), 'head not found!');
|
172 |
-
$this->assertNotNull($dom1->find('body', 0), 'body not found!');
|
173 |
-
|
174 |
-
$this->assertEquals($dom1->find('html', 0), $dom2->find('html', 0));
|
175 |
-
$this->assertEquals($dom1->find('head', 0), $dom2->find('head', 0));
|
176 |
-
$this->assertEquals($dom1->find('body', 0), $dom2->find('body', 0));
|
177 |
-
}
|
178 |
-
|
179 |
-
/**
|
180 |
-
* An li element’s end tag may be omitted if the li element is immediately
|
181 |
-
* followed by another li element or if there is no more content in the
|
182 |
-
* parent element.
|
183 |
-
*/
|
184 |
-
public function test_optional_li_end_tag()
|
185 |
-
{
|
186 |
-
$doc = <<<HTML
|
187 |
-
<!DOCTYPE HTML>
|
188 |
-
<html>
|
189 |
-
<head></head>
|
190 |
-
<body>
|
191 |
-
<ul><li>PHP Simple HTML DOM Parser<li>A PHP based DOM parser</ul>
|
192 |
-
</body>
|
193 |
-
</html>
|
194 |
-
HTML;
|
195 |
-
|
196 |
-
$dom = $this->html->load($doc);
|
197 |
-
|
198 |
-
$this->assertCount(2, $dom->find('li'));
|
199 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('li', 0)->innertext);
|
200 |
-
$this->assertEquals('A PHP based DOM parser', $dom->find('li', 1)->innertext);
|
201 |
-
}
|
202 |
-
|
203 |
-
/**
|
204 |
-
* A dt element’s end tag may be omitted if the dt element is immediately
|
205 |
-
* followed by another dt element or a dd element.
|
206 |
-
*/
|
207 |
-
public function test_optional_dt_end_tag()
|
208 |
-
{
|
209 |
-
$doc = <<<HTML
|
210 |
-
<!DOCTYPE HTML>
|
211 |
-
<html>
|
212 |
-
<head></head>
|
213 |
-
<body>
|
214 |
-
<dl><dt>PHP Simple HTML DOM Parser<dt>A PHP based DOM parser</dl>
|
215 |
-
</body>
|
216 |
-
</html>
|
217 |
-
HTML;
|
218 |
-
|
219 |
-
$dom = $this->html->load($doc);
|
220 |
-
|
221 |
-
$this->assertCount(2, $dom->find('dt'));
|
222 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('dt', 0)->innertext);
|
223 |
-
$this->assertEquals('A PHP based DOM parser', $dom->find('dt', 1)->innertext);
|
224 |
-
}
|
225 |
-
|
226 |
-
/**
|
227 |
-
* A dd element’s end tag may be omitted if the dd element is immediately
|
228 |
-
* followed by another dd element or a dt element, or if there is no more
|
229 |
-
* content in the parent element.
|
230 |
-
*/
|
231 |
-
public function test_optional_dd_end_tag()
|
232 |
-
{
|
233 |
-
$doc = <<<HTML
|
234 |
-
<!DOCTYPE HTML>
|
235 |
-
<html>
|
236 |
-
<head></head>
|
237 |
-
<body>
|
238 |
-
<dl><dd>PHP Simple HTML DOM Parser<dd>A PHP based DOM parser</dl>
|
239 |
-
</body>
|
240 |
-
</html>
|
241 |
-
HTML;
|
242 |
-
|
243 |
-
$dom = $this->html->load($doc);
|
244 |
-
|
245 |
-
$this->assertCount(2, $dom->find('dd'));
|
246 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('dd', 0)->innertext);
|
247 |
-
$this->assertEquals('A PHP based DOM parser', $dom->find('dd', 1)->innertext);
|
248 |
-
}
|
249 |
-
|
250 |
-
/**
|
251 |
-
* A p element’s end tag may be omitted if the p element is immediately
|
252 |
-
* followed by an address, article, aside, blockquote, details, div, dl,
|
253 |
-
* fieldset, figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6,
|
254 |
-
* header, hr, main, nav, ol, p, pre, section, table, or ul element, or if
|
255 |
-
* there is no more content in the parent element and the parent element is
|
256 |
-
* an HTML element that is not an a, audio, del, ins, map, noscript, or
|
257 |
-
* video element, or an autonomous custom element.
|
258 |
-
*/
|
259 |
-
public function test_optional_p_end_tag()
|
260 |
-
{
|
261 |
-
$token = array(
|
262 |
-
'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl',
|
263 |
-
'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2',
|
264 |
-
'h3', 'h4', 'h5', 'h6', 'header', 'hr', 'main', 'nav', 'ol', 'p',
|
265 |
-
'pre', 'section', 'table', 'ul'
|
266 |
-
);
|
267 |
-
|
268 |
-
foreach ($token as $t) {
|
269 |
-
$doc = <<<HTML
|
270 |
-
<!DOCTYPE HTML>
|
271 |
-
<html>
|
272 |
-
<head></head>
|
273 |
-
<body>
|
274 |
-
<div><p>PHP Simple HTML DOM Parser<{$t}>A PHP based DOM parser</{$t}></div>
|
275 |
-
</body>
|
276 |
-
</html>
|
277 |
-
HTML;
|
278 |
-
|
279 |
-
$dom = $this->html->load($doc);
|
280 |
-
|
281 |
-
$this->markTestSkipped(
|
282 |
-
'Optional p end tags are currently not supported!'
|
283 |
-
);
|
284 |
-
|
285 |
-
$this->assertNotNull($dom->find('p', 0), 'Missing p tag');
|
286 |
-
$this->assertNotNull($dom->find($t, 0), "Missing {$t} tag");
|
287 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('p', 0)->innertext);
|
288 |
-
}
|
289 |
-
}
|
290 |
-
|
291 |
-
/**
|
292 |
-
* An rt element’s end tag may be omitted if the rt element is immediately
|
293 |
-
* followed by an rt or rp element, or if there is no more content in the
|
294 |
-
* parent element.
|
295 |
-
*/
|
296 |
-
public function test_optional_rt_end_tag()
|
297 |
-
{
|
298 |
-
$doc = <<<HTML
|
299 |
-
<!DOCTYPE HTML>
|
300 |
-
<html>
|
301 |
-
<head></head>
|
302 |
-
<body>
|
303 |
-
<div><rt>PHP Simple HTML DOM Parser<rt>A PHP based DOM parser</div>
|
304 |
-
<div><rt>PHP Simple HTML DOM Parser<rp>A PHP based DOM parser</rp></div>
|
305 |
-
</body>
|
306 |
-
</html>
|
307 |
-
HTML;
|
308 |
-
|
309 |
-
$dom = $this->html->load($doc);
|
310 |
-
|
311 |
-
$this->assertCount(3, $dom->find('rt'));
|
312 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('rt', 0)->innertext);
|
313 |
-
$this->assertEquals('A PHP based DOM parser', $dom->find('rt', 1)->innertext);
|
314 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('rt', 2)->innertext);
|
315 |
-
}
|
316 |
-
|
317 |
-
/**
|
318 |
-
* An rp element’s end tag may be omitted if the rp element is immediately
|
319 |
-
* followed by an rt or rp element, or if there is no more content in the
|
320 |
-
* parent element.
|
321 |
-
*/
|
322 |
-
public function test_optional_rp_end_tag()
|
323 |
-
{
|
324 |
-
$doc = <<<HTML
|
325 |
-
<!DOCTYPE HTML>
|
326 |
-
<html>
|
327 |
-
<head></head>
|
328 |
-
<body>
|
329 |
-
<div><rp>PHP Simple HTML DOM Parser<rp>A PHP based DOM parser</div>
|
330 |
-
<div><rp>PHP Simple HTML DOM Parser<rt>A PHP based DOM parser</rt></div>
|
331 |
-
</body>
|
332 |
-
</html>
|
333 |
-
HTML;
|
334 |
-
|
335 |
-
$dom = $this->html->load($doc);
|
336 |
-
|
337 |
-
$this->assertCount(3, $dom->find('rp'));
|
338 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('rp', 0)->innertext);
|
339 |
-
$this->assertEquals('A PHP based DOM parser', $dom->find('rp', 1)->innertext);
|
340 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('rp', 2)->innertext);
|
341 |
-
}
|
342 |
-
|
343 |
-
/**
|
344 |
-
* An optgroup element’s end tag may be omitted if the optgroup element is
|
345 |
-
* immediately followed by another optgroup element, or if there is no more
|
346 |
-
* content in the parent element.
|
347 |
-
*/
|
348 |
-
public function test_optional_optgroup_end_tag()
|
349 |
-
{
|
350 |
-
$doc = <<<HTML
|
351 |
-
<!DOCTYPE HTML>
|
352 |
-
<html>
|
353 |
-
<head></head>
|
354 |
-
<body><optgroup>PHP Simple HTML DOM Parser<optgroup>A PHP based DOM parser</body>
|
355 |
-
</html>
|
356 |
-
HTML;
|
357 |
-
|
358 |
-
$dom = $this->html->load($doc);
|
359 |
-
|
360 |
-
$this->assertCount(2, $dom->find('optgroup'));
|
361 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('optgroup', 0)->innertext);
|
362 |
-
$this->assertEquals('A PHP based DOM parser', $dom->find('optgroup', 1)->innertext);
|
363 |
-
}
|
364 |
-
|
365 |
-
/**
|
366 |
-
* An option element’s end tag may be omitted if the option element is
|
367 |
-
* immediately followed by another option element, or if it is immediately
|
368 |
-
* followed by an optgroup element, or if there is no more content in the
|
369 |
-
* parent element.
|
370 |
-
*/
|
371 |
-
public function test_optional_option_end_tag()
|
372 |
-
{
|
373 |
-
$doc = <<<HTML
|
374 |
-
<!DOCTYPE HTML>
|
375 |
-
<html>
|
376 |
-
<head></head>
|
377 |
-
<body>
|
378 |
-
<div><option>PHP Simple HTML DOM Parser<option>A PHP based DOM parser</div>
|
379 |
-
<div><option>PHP Simple HTML DOM Parser<optgroup>A PHP based DOM parser</optgroup></div>
|
380 |
-
</body>
|
381 |
-
</html>
|
382 |
-
HTML;
|
383 |
-
|
384 |
-
$dom = $this->html->load($doc);
|
385 |
-
|
386 |
-
$this->assertCount(3, $dom->find('option'));
|
387 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('option', 0)->innertext);
|
388 |
-
$this->assertEquals('A PHP based DOM parser', $dom->find('option', 1)->innertext);
|
389 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('option', 2)->innertext);
|
390 |
-
}
|
391 |
-
|
392 |
-
/**
|
393 |
-
* A colgroup element’s start tag may be omitted if the first thing inside
|
394 |
-
* the colgroup element is a col element, and if the element is not
|
395 |
-
* immediately preceded by another colgroup element whose end tag has been
|
396 |
-
* omitted. (It can’t be omitted if the element is empty.)
|
397 |
-
*/
|
398 |
-
public function test_optional_colgroup_start_tag()
|
399 |
-
{
|
400 |
-
$doc = <<<HTML
|
401 |
-
<!DOCTYPE HTML>
|
402 |
-
<html>
|
403 |
-
<head></head>
|
404 |
-
<body>
|
405 |
-
<col>PHP Simple HTML DOM Parser</colgroup><col>A PHP based DOM parser</colgroup>
|
406 |
-
</body>
|
407 |
-
</html>
|
408 |
-
HTML;
|
409 |
-
|
410 |
-
$dom = $this->html->load($doc);
|
411 |
-
|
412 |
-
$this->markTestSkipped(
|
413 |
-
'Optional start tags are not supported right now'
|
414 |
-
);
|
415 |
-
|
416 |
-
$this->assertCount(2, $dom->find('colgroup'));
|
417 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('colgroup', 0)->innertext);
|
418 |
-
$this->assertEquals('A PHP based DOM parser', $dom->find('colgroup', 1)->innertext);
|
419 |
-
}
|
420 |
-
|
421 |
-
/**
|
422 |
-
* A colgroup element’s end tag may be omitted if the colgroup element is
|
423 |
-
* not immediately followed by a space character or a comment.
|
424 |
-
*/
|
425 |
-
public function test_optional_colgroup_end_tag()
|
426 |
-
{
|
427 |
-
$doc = <<<HTML
|
428 |
-
<!DOCTYPE HTML>
|
429 |
-
<html>
|
430 |
-
<head></head>
|
431 |
-
<body><colgroup>PHP Simple HTML DOM Parser</body>
|
432 |
-
</html>
|
433 |
-
HTML;
|
434 |
-
|
435 |
-
$dom = $this->html->load($doc);
|
436 |
-
|
437 |
-
$this->assertNotNull($dom->find('colgroup', 0));
|
438 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('colgroup', 0)->innertext);
|
439 |
-
}
|
440 |
-
|
441 |
-
/**
|
442 |
-
* A caption element’s end tag may be omitted if the caption element is not
|
443 |
-
* immediately followed by a space character or a comment.
|
444 |
-
*/
|
445 |
-
public function test_optional_caption_end_tag()
|
446 |
-
{
|
447 |
-
$doc = <<<HTML
|
448 |
-
<!DOCTYPE HTML>
|
449 |
-
<html>
|
450 |
-
<head></head>
|
451 |
-
<body><caption>PHP Simple HTML DOM Parser</body>
|
452 |
-
</html>
|
453 |
-
HTML;
|
454 |
-
|
455 |
-
$dom = $this->html->load($doc);
|
456 |
-
|
457 |
-
$this->assertNotNull($dom->find('caption', 0));
|
458 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('caption', 0)->innertext);
|
459 |
-
}
|
460 |
-
|
461 |
-
/**
|
462 |
-
* A thead element’s end tag may be omitted if the thead element is
|
463 |
-
* immediately followed by a tbody or tfoot element.
|
464 |
-
*/
|
465 |
-
public function test_optional_thead_end_tag()
|
466 |
-
{
|
467 |
-
$token = array('tbody', 'tfoot');
|
468 |
-
|
469 |
-
foreach ($token as $t) {
|
470 |
-
$doc = <<<HTML
|
471 |
-
<!DOCTYPE HTML>
|
472 |
-
<html>
|
473 |
-
<head></head>
|
474 |
-
<body><thead><{$t}>PHP Simple HTML DOM Parser</{$t}></body>
|
475 |
-
</html>
|
476 |
-
HTML;
|
477 |
-
|
478 |
-
$dom = $this->html->load($doc);
|
479 |
-
|
480 |
-
$this->assertNotNull($dom->find('thead', 0), 'Missing thead tag');
|
481 |
-
$this->assertNotNull($dom->find($t, 0), "Missing {$t} tag");
|
482 |
-
$this->assertEquals("<{$t}>PHP Simple HTML DOM Parser</{$t}>", $dom->find('thead', 0)->innertext);
|
483 |
-
}
|
484 |
-
}
|
485 |
-
|
486 |
-
/**
|
487 |
-
* A tbody element’s start tag may be omitted if the first thing inside the
|
488 |
-
* tbody element is a tr element, and if the element is not immediately
|
489 |
-
* preceded by a tbody, thead, or tfoot element whose end tag has been
|
490 |
-
* omitted. (It can’t be omitted if the element is empty.)
|
491 |
-
*/
|
492 |
-
public function test_optional_tbody_start_tag()
|
493 |
-
{
|
494 |
-
$doc = <<<HTML
|
495 |
-
<!DOCTYPE HTML>
|
496 |
-
<html>
|
497 |
-
<head></head>
|
498 |
-
<body><tr>PHP Simple HTML DOM Parser</tr></tbody></body>
|
499 |
-
</html>
|
500 |
-
HTML;
|
501 |
-
|
502 |
-
$dom = $this->html->load($doc);
|
503 |
-
|
504 |
-
$this->markTestSkipped(
|
505 |
-
'tbody is currently skipped by the parser'
|
506 |
-
);
|
507 |
-
|
508 |
-
$this->assertNotNull($dom->find('tbody', 0));
|
509 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('tbody', 0)->innertext);
|
510 |
-
}
|
511 |
-
|
512 |
-
/**
|
513 |
-
* A tbody element’s end tag may be omitted if the tbody element is
|
514 |
-
* immediately followed by a tbody or tfoot element, or if there is no more
|
515 |
-
* content in the parent element.
|
516 |
-
*/
|
517 |
-
public function test_optional_tbody_end_tag()
|
518 |
-
{
|
519 |
-
$doc = <<<HTML
|
520 |
-
<!DOCTYPE HTML>
|
521 |
-
<html>
|
522 |
-
<head></head>
|
523 |
-
<body><tbody>PHP Simple HTML DOM Parser<tbody>A PHP based DOM parser</body>
|
524 |
-
</html>
|
525 |
-
HTML;
|
526 |
-
|
527 |
-
$dom = $this->html->load($doc);
|
528 |
-
|
529 |
-
$this->markTestSkipped(
|
530 |
-
'tbody is currently skipped by the parser'
|
531 |
-
);
|
532 |
-
|
533 |
-
$this->assertCount(2, $dom->find('tbody'));
|
534 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('tbody', 0)->innertext);
|
535 |
-
$this->assertEquals('A PHP based DOM parser', $dom->find('tbody', 1)->innertext);
|
536 |
-
}
|
537 |
-
|
538 |
-
/**
|
539 |
-
* A tfoot element’s end tag may be omitted if there is no more content in
|
540 |
-
* the parent element.
|
541 |
-
*/
|
542 |
-
public function test_optional_tfoot_end_tag()
|
543 |
-
{
|
544 |
-
$doc = <<<HTML
|
545 |
-
<!DOCTYPE HTML>
|
546 |
-
<html>
|
547 |
-
<head></head>
|
548 |
-
<body><tfoot>PHP Simple HTML DOM Parser</body>
|
549 |
-
</html>
|
550 |
-
HTML;
|
551 |
-
|
552 |
-
$dom = $this->html->load($doc);
|
553 |
-
|
554 |
-
$this->assertNotNull($dom->find('tfoot', 0));
|
555 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('tfoot', 0)->innertext);
|
556 |
-
}
|
557 |
-
|
558 |
-
/**
|
559 |
-
* A tr element’s end tag may be omitted if the tr element is immediately
|
560 |
-
* followed by another tr element, or if there is no more content in the
|
561 |
-
* parent element.
|
562 |
-
*/
|
563 |
-
public function test_optional_tr_end_tag()
|
564 |
-
{
|
565 |
-
$doc = <<<HTML
|
566 |
-
<!DOCTYPE HTML>
|
567 |
-
<html>
|
568 |
-
<head></head>
|
569 |
-
<body><tr>PHP Simple HTML DOM Parser<tr>A PHP based DOM parser</body>
|
570 |
-
</html>
|
571 |
-
HTML;
|
572 |
-
|
573 |
-
$dom = $this->html->load($doc);
|
574 |
-
|
575 |
-
$this->assertCount(2, $dom->find('tr'));
|
576 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('tr', 0)->innertext);
|
577 |
-
$this->assertEquals('A PHP based DOM parser', $dom->find('tr', 1)->innertext);
|
578 |
-
}
|
579 |
-
|
580 |
-
/**
|
581 |
-
* A td element’s end tag may be omitted if the td element is immediately
|
582 |
-
* followed by a td or th element, or if there is no more content in the
|
583 |
-
* parent element.
|
584 |
-
*/
|
585 |
-
public function test_optional_td_end_tag()
|
586 |
-
{
|
587 |
-
$doc = <<<HTML
|
588 |
-
<!DOCTYPE HTML>
|
589 |
-
<html>
|
590 |
-
<head></head>
|
591 |
-
<body>
|
592 |
-
<div><td>PHP Simple HTML DOM Parser<td>A PHP based DOM parser</div>
|
593 |
-
<div><td>PHP Simple HTML DOM Parser<th>A PHP based DOM parser</th></div>
|
594 |
-
</body>
|
595 |
-
</html>
|
596 |
-
HTML;
|
597 |
-
|
598 |
-
$dom = $this->html->load($doc);
|
599 |
-
|
600 |
-
$this->assertCount(3, $dom->find('td'));
|
601 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('td', 0)->innertext);
|
602 |
-
$this->assertEquals('A PHP based DOM parser', $dom->find('td', 1)->innertext);
|
603 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('td', 2)->innertext);
|
604 |
-
}
|
605 |
-
|
606 |
-
/**
|
607 |
-
* A th element’s end tag may be omitted if the th element is immediately
|
608 |
-
* followed by a td or th element, or if there is no more content in the
|
609 |
-
* parent element.
|
610 |
-
*/
|
611 |
-
public function test_optional_th_end_tag()
|
612 |
-
{
|
613 |
-
$doc = <<<HTML
|
614 |
-
<!DOCTYPE HTML>
|
615 |
-
<html>
|
616 |
-
<head></head>
|
617 |
-
<body>
|
618 |
-
<div><th>PHP Simple HTML DOM Parser<th>A PHP based DOM parser</div>
|
619 |
-
<div><th>PHP Simple HTML DOM Parser<td>A PHP based DOM parser</td></div>
|
620 |
-
</body>
|
621 |
-
</html>
|
622 |
-
HTML;
|
623 |
-
|
624 |
-
$dom = $this->html->load($doc);
|
625 |
-
|
626 |
-
$this->assertCount(3, $dom->find('th'));
|
627 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('th', 0)->innertext);
|
628 |
-
$this->assertEquals('A PHP based DOM parser', $dom->find('th', 1)->innertext);
|
629 |
-
$this->assertEquals('PHP Simple HTML DOM Parser', $dom->find('th', 2)->innertext);
|
630 |
-
}
|
631 |
-
|
632 |
-
/**
|
633 |
-
* Checks if optional end tags are properly handled by the parser.
|
634 |
-
*
|
635 |
-
* @dataProvider dataProvider_for_parser_should_close_optional_end_tags
|
636 |
-
*/
|
637 |
-
public function test_parser_should_close_optional_end_tags($doc, $expected, $message)
|
638 |
-
{
|
639 |
-
$this->html->load($doc);
|
640 |
-
$this->assertEquals($expected, $this->html->save(), $message);
|
641 |
-
}
|
642 |
-
|
643 |
-
/**
|
644 |
-
* @todo: The list of block tags and optional closing tags should come from
|
645 |
-
* code, not copied here.
|
646 |
-
*/
|
647 |
-
public function dataProvider_for_parser_should_close_optional_end_tags()
|
648 |
-
{
|
649 |
-
$block_tags = array(
|
650 |
-
'body' => 1,
|
651 |
-
'div' => 1,
|
652 |
-
'form' => 1,
|
653 |
-
'root' => 1,
|
654 |
-
'span' => 1,
|
655 |
-
'table' => 1
|
656 |
-
);
|
657 |
-
|
658 |
-
// Remove root (implicitly added by the parser)
|
659 |
-
unset($block_tags['root']);
|
660 |
-
|
661 |
-
$optional_closing_tags = array(
|
662 |
-
// Not optional, see
|
663 |
-
// https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element
|
664 |
-
'b' => array('b' => 1),
|
665 |
-
'dd' => array('dd' => 1, 'dt' => 1),
|
666 |
-
// Not optional, see
|
667 |
-
// https://www.w3.org/TR/html/grouping-content.html#the-dl-element
|
668 |
-
'dl' => array('dd' => 1, 'dt' => 1),
|
669 |
-
'dt' => array('dd' => 1, 'dt' => 1),
|
670 |
-
'li' => array('li' => 1),
|
671 |
-
'optgroup' => array('optgroup' => 1, 'option' => 1),
|
672 |
-
'option' => array('optgroup' => 1, 'option' => 1),
|
673 |
-
'p' => array('p' => 1),
|
674 |
-
'rp' => array('rp' => 1, 'rt' => 1),
|
675 |
-
'rt' => array('rp' => 1, 'rt' => 1),
|
676 |
-
'td' => array('td' => 1, 'th' => 1),
|
677 |
-
'th' => array('td' => 1, 'th' => 1),
|
678 |
-
'tr' => array('td' => 1, 'th' => 1, 'tr' => 1),
|
679 |
-
);
|
680 |
-
|
681 |
-
$data = array();
|
682 |
-
|
683 |
-
// Block tags should close optional elements
|
684 |
-
foreach(array_keys($block_tags) as $block) {
|
685 |
-
foreach(array_keys($optional_closing_tags) as $e) {
|
686 |
-
$data["$block should close $e"] = array(
|
687 |
-
"<$block><$e></$block>",
|
688 |
-
"<$block><$e></$e></$block>",
|
689 |
-
"$block should close $e"
|
690 |
-
);
|
691 |
-
}
|
692 |
-
}
|
693 |
-
|
694 |
-
// Special case for root (has no tags)
|
695 |
-
foreach(array_keys($optional_closing_tags) as $e) {
|
696 |
-
$data["root should close $e"] = array(
|
697 |
-
"<$e>",
|
698 |
-
"<$e></$e>",
|
699 |
-
"root should close $e"
|
700 |
-
);
|
701 |
-
}
|
702 |
-
|
703 |
-
// Block tags should close optional elements, even if the opening tag is missing
|
704 |
-
foreach(array_keys($block_tags) as $block) {
|
705 |
-
foreach(array_keys($optional_closing_tags) as $e) {
|
706 |
-
$data["$block should close $e"] = array(
|
707 |
-
"<$e></$block>",
|
708 |
-
"<$e></$e></$block>",
|
709 |
-
"$block should close $e"
|
710 |
-
);
|
711 |
-
}
|
712 |
-
}
|
713 |
-
|
714 |
-
// Block tags should close NESTED optional elements
|
715 |
-
foreach(array_keys($block_tags) as $block) {
|
716 |
-
foreach(array_keys($optional_closing_tags) as $e) {
|
717 |
-
foreach(array_keys($optional_closing_tags[$e]) as $child) {
|
718 |
-
|
719 |
-
// skip if element closes itself
|
720 |
-
if($e === $child) continue;
|
721 |
-
|
722 |
-
// skip if child and element are mutual exclusive
|
723 |
-
if(isset($optional_closing_tags[$child])
|
724 |
-
&& array_key_exists($e, $optional_closing_tags[$child])) {
|
725 |
-
continue;
|
726 |
-
}
|
727 |
-
|
728 |
-
$data["$block should close nested $e and $child"] = array(
|
729 |
-
"<$block><$e><$child></$block>",
|
730 |
-
"<$block><$e><$child></$child></$e></$block>",
|
731 |
-
"$block should close nested $e and $child"
|
732 |
-
);
|
733 |
-
}
|
734 |
-
}
|
735 |
-
}
|
736 |
-
|
737 |
-
// Special case for root (has not tags)
|
738 |
-
foreach(array_keys($block_tags) as $block) {
|
739 |
-
foreach(array_keys($optional_closing_tags) as $e) {
|
740 |
-
foreach(array_keys($optional_closing_tags[$e]) as $child) {
|
741 |
-
|
742 |
-
// skip if nested element closes itself
|
743 |
-
if($e === $child) continue;
|
744 |
-
|
745 |
-
// skip if child and element are mutual exclusive
|
746 |
-
if(isset($optional_closing_tags[$child])
|
747 |
-
&& array_key_exists($e, $optional_closing_tags[$child])) {
|
748 |
-
continue;
|
749 |
-
}
|
750 |
-
|
751 |
-
$data["root should close nested $e and $child"] = array(
|
752 |
-
"<$e><$child>",
|
753 |
-
"<$e><$child></$child></$e>",
|
754 |
-
"root should close nested $e and $child"
|
755 |
-
);
|
756 |
-
}
|
757 |
-
}
|
758 |
-
}
|
759 |
-
|
760 |
-
// Some optional tags should close other optional tags
|
761 |
-
foreach(array_keys($optional_closing_tags) as $e) {
|
762 |
-
foreach(array_keys($optional_closing_tags[$e]) as $child) {
|
763 |
-
$data["$e should close $child"] = array(
|
764 |
-
"<$child><$e>",
|
765 |
-
"<$child></$child><$e></$e>",
|
766 |
-
"$e should close $child"
|
767 |
-
);
|
768 |
-
}
|
769 |
-
}
|
770 |
-
|
771 |
-
// Optional tags should NOT close stray elements
|
772 |
-
foreach(array_keys($optional_closing_tags) as $e) {
|
773 |
-
$data["$e should NOT close a"] = array(
|
774 |
-
"<a><$e>",
|
775 |
-
"<a><$e></$e>",
|
776 |
-
"$e should NOT close a"
|
777 |
-
);
|
778 |
-
}
|
779 |
-
|
780 |
-
// Normal tags should NOT close optional elements
|
781 |
-
foreach(array_keys($optional_closing_tags) as $e) {
|
782 |
-
$data["a should NOT close $e"] = array(
|
783 |
-
"<$e><a></a>",
|
784 |
-
"<$e><a></a></$e>",
|
785 |
-
"a should NOT close $e"
|
786 |
-
);
|
787 |
-
}
|
788 |
-
|
789 |
-
return $data;
|
790 |
-
}
|
791 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/script_test.php
DELETED
@@ -1,69 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Checks if the parser properly handles script elements
|
7 |
-
*/
|
8 |
-
class script_test extends TestCase {
|
9 |
-
|
10 |
-
private $html;
|
11 |
-
|
12 |
-
protected function setUp()
|
13 |
-
{
|
14 |
-
$this->html = new simple_html_dom;
|
15 |
-
}
|
16 |
-
|
17 |
-
protected function tearDown()
|
18 |
-
{
|
19 |
-
$this->html->clear();
|
20 |
-
unset($this->html);
|
21 |
-
}
|
22 |
-
|
23 |
-
/**
|
24 |
-
* @dataProvider dataProvider_for_script_should_parse
|
25 |
-
*/
|
26 |
-
public function test_script_should_parse($expected, $doc)
|
27 |
-
{
|
28 |
-
$this->html->load($doc);
|
29 |
-
$this->assertEquals($expected, $this->html->find('script', 0)->innertext);
|
30 |
-
$this->assertEquals($doc, $this->html->save());
|
31 |
-
}
|
32 |
-
|
33 |
-
public function dataProvider_for_script_should_parse()
|
34 |
-
{
|
35 |
-
return array(
|
36 |
-
'empty' => array(
|
37 |
-
'',
|
38 |
-
'<script></script>',
|
39 |
-
),
|
40 |
-
'empty with type' => array(
|
41 |
-
'',
|
42 |
-
'<script type="application/javascript"></script>',
|
43 |
-
),
|
44 |
-
'space' => array(
|
45 |
-
' ',
|
46 |
-
'<script> </script>',
|
47 |
-
),
|
48 |
-
'html string' => array(
|
49 |
-
"var foo = '<div>Hello, World!</div>';",
|
50 |
-
"<script>var foo = '<div>Hello, World!</div>';</script>",
|
51 |
-
),
|
52 |
-
'newline' => array(
|
53 |
-
"\n",
|
54 |
-
"<script>\n</script>"
|
55 |
-
),
|
56 |
-
'newline with type' => array(
|
57 |
-
"\n",
|
58 |
-
"<script type=\"application/javascript\">\n</script>"
|
59 |
-
),
|
60 |
-
);
|
61 |
-
}
|
62 |
-
|
63 |
-
public function test_html_inside_script_should_not_appear_in_the_dom()
|
64 |
-
{
|
65 |
-
$this->html->load('<script><div>Hello, World!</div></script>');
|
66 |
-
$this->assertNotNull($this->html->find('script', 0));
|
67 |
-
$this->assertNull($this->html->find('div', 0));
|
68 |
-
}
|
69 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/self_closing_tags_test.php
DELETED
@@ -1,417 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Test identification of self closing tags (void elements)
|
7 |
-
*
|
8 |
-
* _Remarks_:
|
9 |
-
* - Test functions are ordered by name to allow for simple navigation.
|
10 |
-
* - Self-closing tags should be tested for their previous and next sibling, as
|
11 |
-
* they are "consumed" by incorrectly detected elements. For example:
|
12 |
-
* `<div id="before"></div><tag><div id="after"></div>` will result in
|
13 |
-
* `tag->outertext` with contents `<tag><div id="after"></div>` because `<tag>`
|
14 |
-
* is NOT a valid void element. If it were a valid element, `tag->outertext`
|
15 |
-
* would return `<tag>` instead.
|
16 |
-
* - Tests should not include more than one self-closing tag, to correctly assert
|
17 |
-
* results.
|
18 |
-
* - Do not remove obsolete tests! These tests should be changed to correctly
|
19 |
-
* assert the opposite condition is true (i.e. change `assertNull` to
|
20 |
-
* `assertNotNull`).
|
21 |
-
*
|
22 |
-
* _Code template_:
|
23 |
-
* Use the following code template for all unit tests in this file. Replace all
|
24 |
-
* occurrences of "tag" by the corresponding tag name (i.e. for the `area` tag
|
25 |
-
* function name `test_tag` should become `test_area`).
|
26 |
-
*
|
27 |
-
* ```php
|
28 |
-
* public function test_tag() {
|
29 |
-
* $src = <<<HTML
|
30 |
-
* <div id="before"></div>
|
31 |
-
* <tag>
|
32 |
-
* <div id="after"></div>
|
33 |
-
* HTML;
|
34 |
-
*
|
35 |
-
* $tag = $this->html->load($src)->find('tag', 0);
|
36 |
-
*
|
37 |
-
* $this->assertEquals('before', $tag->previousSibling()->id);
|
38 |
-
* $this->assertEquals('<tag>', $tag->outertext);
|
39 |
-
* $this->assertEquals('after', $tag->nextSibling()->id);
|
40 |
-
* }
|
41 |
-
* ```
|
42 |
-
*
|
43 |
-
* @link https://www.w3.org/TR/html HTML Specification
|
44 |
-
* @link https://www.w3.org/TR/html/syntax.html#void-elements Void elements
|
45 |
-
*/
|
46 |
-
class self_closing_tags_test extends TestCase {
|
47 |
-
|
48 |
-
private $html;
|
49 |
-
|
50 |
-
protected function setUp()
|
51 |
-
{
|
52 |
-
$this->html = new simple_html_dom;
|
53 |
-
}
|
54 |
-
|
55 |
-
protected function tearDown()
|
56 |
-
{
|
57 |
-
$this->html->clear();
|
58 |
-
unset($this->html);
|
59 |
-
}
|
60 |
-
|
61 |
-
/**
|
62 |
-
* @link https://www.w3.org/TR/html52/semantics-embedded-content.html#the-area-element
|
63 |
-
* The area element
|
64 |
-
*/
|
65 |
-
public function test_area()
|
66 |
-
{
|
67 |
-
$src = <<<HTML
|
68 |
-
<map name="shapes">
|
69 |
-
<div id="before"></div>
|
70 |
-
<area shape=rect coords="50,50,100,100">
|
71 |
-
<div id="after"></div>
|
72 |
-
</map>
|
73 |
-
HTML;
|
74 |
-
|
75 |
-
$area = $this->html->load($src)->find('area', 0);
|
76 |
-
|
77 |
-
$this->assertEquals('before', $area->previousSibling()->id);
|
78 |
-
$this->assertEquals('<area shape=rect coords="50,50,100,100">', $area->outertext);
|
79 |
-
$this->assertEquals('after', $area->nextSibling()->id);
|
80 |
-
}
|
81 |
-
|
82 |
-
/**
|
83 |
-
* @link https://www.w3.org/TR/html52/document-metadata.html#the-base-element
|
84 |
-
* The base element
|
85 |
-
*/
|
86 |
-
public function test_base()
|
87 |
-
{
|
88 |
-
$src = <<<HTML
|
89 |
-
<head>
|
90 |
-
<div id="before"></div>
|
91 |
-
<base href="http://simplehtmldom.sourceforge.net/" target="_blank">
|
92 |
-
<div id="after"></div>
|
93 |
-
</head>
|
94 |
-
HTML;
|
95 |
-
|
96 |
-
$base = $this->html->load($src)->find('base', 0);
|
97 |
-
|
98 |
-
$this->assertEquals('before', $base->previousSibling()->id);
|
99 |
-
$this->assertEquals('<base href="http://simplehtmldom.sourceforge.net/" target="_blank">', $base->outertext);
|
100 |
-
$this->assertEquals('after', $base->nextSibling()->id);
|
101 |
-
}
|
102 |
-
|
103 |
-
/**
|
104 |
-
* @link https://www.w3.org/TR/html52/textlevel-semantics.html#the-br-element
|
105 |
-
* The br element
|
106 |
-
*/
|
107 |
-
public function test_br()
|
108 |
-
{
|
109 |
-
$src = <<<HTML
|
110 |
-
<body>
|
111 |
-
<div id="before"></div>
|
112 |
-
<p>PHP Simple HTML DOM Parser<br>A PHP based DOM parser</p>
|
113 |
-
<div id="after"></div>
|
114 |
-
</body>
|
115 |
-
HTML;
|
116 |
-
|
117 |
-
// Normal operation (innertext of <br> is replaced by DEFAULT_BR_TEXT)
|
118 |
-
$br = $this->html->load($src)->find('br', 0);
|
119 |
-
|
120 |
-
$this->assertEquals('before', $br->parentNode()->previousSibling()->id);
|
121 |
-
$this->assertEquals('<br>', $br->outertext);
|
122 |
-
$this->assertEquals(DEFAULT_BR_TEXT, $br->innertext);
|
123 |
-
$this->assertEquals('after', $br->parentNode()->nextSibling()->id);
|
124 |
-
|
125 |
-
$this->assertEquals(
|
126 |
-
'PHP Simple HTML DOM Parser' . DEFAULT_BR_TEXT . 'A PHP based DOM parser',
|
127 |
-
$br->parentNode()->plaintext
|
128 |
-
);
|
129 |
-
|
130 |
-
// Custom text (innertext of <br> is replaced by custom value)
|
131 |
-
$br_text = 'br_text';
|
132 |
-
$br = $this->html->load($src, true, true, $br_text)->find('br', 0);
|
133 |
-
|
134 |
-
$this->assertEquals('before', $br->parentNode()->previousSibling()->id);
|
135 |
-
$this->assertEquals('<br>', $br->outertext);
|
136 |
-
$this->assertEquals($br_text, $br->innertext);
|
137 |
-
$this->assertEquals('after', $br->parentNode()->nextSibling()->id);
|
138 |
-
|
139 |
-
$this->assertEquals(
|
140 |
-
'PHP Simple HTML DOM Parser' . $br_text . 'A PHP based DOM parser',
|
141 |
-
$br->parentNode()->plaintext
|
142 |
-
);
|
143 |
-
}
|
144 |
-
|
145 |
-
/**
|
146 |
-
* @link https://www.w3.org/TR/html52/tabular-data.html#the-col-element
|
147 |
-
* The col element
|
148 |
-
*/
|
149 |
-
public function test_col()
|
150 |
-
{
|
151 |
-
$src = <<<HTML
|
152 |
-
<table>
|
153 |
-
<div id="before"></div>
|
154 |
-
<colgroup><col><col><col></colgroup>
|
155 |
-
<div id="after"></div>
|
156 |
-
</table>
|
157 |
-
HTML;
|
158 |
-
|
159 |
-
$col = $this->html->load($src)->find('col', 0);
|
160 |
-
|
161 |
-
$this->assertEquals('before', $col->parentNode()->previousSibling()->id);
|
162 |
-
$this->assertEquals('<col>', $col->outertext);
|
163 |
-
$this->assertEquals('after', $col->parentNode()->nextSibling()->id);
|
164 |
-
}
|
165 |
-
|
166 |
-
/**
|
167 |
-
* @link https://www.w3.org/TR/html52/semantics-embedded-content.html#the-embed-element
|
168 |
-
* The embed element
|
169 |
-
*/
|
170 |
-
public function test_embed()
|
171 |
-
{
|
172 |
-
$src = <<<HTML
|
173 |
-
<body>
|
174 |
-
<div id="before"></div>
|
175 |
-
<embed src="catgame.swf" quality="high">
|
176 |
-
<div id="after"></div>
|
177 |
-
</body>
|
178 |
-
HTML;
|
179 |
-
|
180 |
-
$embed = $this->html->load($src)->find('embed', 0);
|
181 |
-
|
182 |
-
$this->assertEquals('before', $embed->previousSibling()->id);
|
183 |
-
$this->assertEquals('<embed src="catgame.swf" quality="high">', $embed->outertext);
|
184 |
-
$this->assertEquals('after', $embed->nextSibling()->id);
|
185 |
-
}
|
186 |
-
|
187 |
-
/**
|
188 |
-
* @link https://www.w3.org/TR/html52/grouping-content.html#the-hr-element
|
189 |
-
* The hr element
|
190 |
-
*/
|
191 |
-
public function test_hr()
|
192 |
-
{
|
193 |
-
$src = <<<HTML
|
194 |
-
<p>PHP Simple HTML DOM Parser</p>
|
195 |
-
<div id="before"></div>
|
196 |
-
<hr>
|
197 |
-
<div id="after"></div>
|
198 |
-
<p>A PHP based DOM parser</p>
|
199 |
-
HTML;
|
200 |
-
|
201 |
-
$hr = $this->html->load($src)->find('hr', 0);
|
202 |
-
|
203 |
-
$this->assertEquals('before', $hr->previousSibling()->id);
|
204 |
-
$this->assertEquals('<hr>', $hr->outertext);
|
205 |
-
$this->assertEquals('after', $hr->nextSibling()->id);
|
206 |
-
}
|
207 |
-
|
208 |
-
/**
|
209 |
-
* @link https://www.w3.org/TR/html52/semantics-embedded-content.html#the-img-element
|
210 |
-
* The img element
|
211 |
-
*/
|
212 |
-
public function test_img()
|
213 |
-
{
|
214 |
-
$src = <<<HTML
|
215 |
-
<a href="http://simplehtmldom.sourceforge.net">
|
216 |
-
<div id="before"></div>
|
217 |
-
<img src="images/simple_html_dom.png" width="128" height="128" alt="PHP Simple HTML DOM Parser Manual">
|
218 |
-
<div id="after"></div>
|
219 |
-
</a>
|
220 |
-
HTML;
|
221 |
-
|
222 |
-
$img = $this->html->load($src)->find('img', 0);
|
223 |
-
|
224 |
-
$this->assertEquals('before', $img->previousSibling()->id);
|
225 |
-
$this->assertEquals(
|
226 |
-
'<img src="images/simple_html_dom.png" width="128" height="128" alt="PHP Simple HTML DOM Parser Manual">',
|
227 |
-
$img->outertext);
|
228 |
-
$this->assertEquals('after', $img->nextSibling()->id);
|
229 |
-
}
|
230 |
-
|
231 |
-
/**
|
232 |
-
* @link https://www.w3.org/TR/html52/sec-forms.html#the-input-element
|
233 |
-
* The input element
|
234 |
-
*/
|
235 |
-
public function test_input()
|
236 |
-
{
|
237 |
-
$src = <<<HTML
|
238 |
-
<body>
|
239 |
-
<div id="before"></div>
|
240 |
-
<input type="url" name="location" list="urls">
|
241 |
-
<div id="after"></div>
|
242 |
-
<datalist id="urls">
|
243 |
-
<option
|
244 |
-
label="PHP Simple HTML DOM Parser"
|
245 |
-
value="https://sourceforge.net/projects/simplehtmldom/"></option>
|
246 |
-
<option
|
247 |
-
label="PHP Simple HTML DOM Parser Manual"
|
248 |
-
value="http://simplehtmldom.sourceforge.net/"></option>
|
249 |
-
</datalist>
|
250 |
-
</body>
|
251 |
-
HTML;
|
252 |
-
|
253 |
-
$input = $this->html->load($src)->find('input', 0);
|
254 |
-
|
255 |
-
$this->assertEquals('before', $input->previousSibling()->id);
|
256 |
-
$this->assertEquals('<input type="url" name="location" list="urls">', $input->outertext);
|
257 |
-
$this->assertEquals('after', $input->nextSibling()->id);
|
258 |
-
}
|
259 |
-
|
260 |
-
/**
|
261 |
-
* @link https://www.w3.org/TR/html52/document-metadata.html#the-link-element
|
262 |
-
* The link element
|
263 |
-
*/
|
264 |
-
public function test_link()
|
265 |
-
{
|
266 |
-
$src = <<<HTML
|
267 |
-
<title>
|
268 |
-
<div id="before"></div>
|
269 |
-
<link rel="MIT license" href="https://opensource.org/licenses/MIT">
|
270 |
-
<div id="after"></div>
|
271 |
-
<title>
|
272 |
-
HTML;
|
273 |
-
|
274 |
-
$link = $this->html->load($src)->find('link', 0);
|
275 |
-
|
276 |
-
$this->assertEquals('before', $link->previousSibling()->id);
|
277 |
-
$this->assertEquals('<link rel="MIT license" href="https://opensource.org/licenses/MIT">', $link->outertext);
|
278 |
-
$this->assertEquals('after', $link->nextSibling()->id);
|
279 |
-
}
|
280 |
-
|
281 |
-
/**
|
282 |
-
* @link https://www.w3.org/TR/html52/document-metadata.html#the-meta-element
|
283 |
-
* The meta element
|
284 |
-
*/
|
285 |
-
public function test_meta()
|
286 |
-
{
|
287 |
-
$src = <<<HTML
|
288 |
-
<title>
|
289 |
-
<div id="before"></div>
|
290 |
-
<meta name=generator content="Simple HTML DOM Parser">
|
291 |
-
<div id="after"></div>
|
292 |
-
</title>
|
293 |
-
HTML;
|
294 |
-
|
295 |
-
$meta = $this->html->load($src)->find('meta', 0);
|
296 |
-
|
297 |
-
$this->assertEquals('before', $meta->previousSibling()->id);
|
298 |
-
$this->assertEquals('<meta name=generator content="Simple HTML DOM Parser">', $meta->outertext);
|
299 |
-
$this->assertEquals('after', $meta->nextSibling()->id);
|
300 |
-
}
|
301 |
-
|
302 |
-
/**
|
303 |
-
* @link https://www.w3.org/TR/html52/semantics-embedded-content.html#the-param-element
|
304 |
-
* The param element
|
305 |
-
*/
|
306 |
-
public function test_param()
|
307 |
-
{
|
308 |
-
$src = <<<HTML
|
309 |
-
<object type="application/simple_html_dom">
|
310 |
-
<div id="before"></div>
|
311 |
-
<param name="self_closing_tags" value="param">
|
312 |
-
<div id="after"></div>
|
313 |
-
</object>
|
314 |
-
HTML;
|
315 |
-
|
316 |
-
$param = $this->html->load($src)->find('param', 0);
|
317 |
-
|
318 |
-
$this->assertEquals('before', $param->previousSibling()->id);
|
319 |
-
$this->assertEquals('<param name="self_closing_tags" value="param">', $param->outertext);
|
320 |
-
$this->assertEquals('after', $param->nextSibling()->id);
|
321 |
-
}
|
322 |
-
|
323 |
-
/**
|
324 |
-
* @link https://www.w3.org/TR/html52/semantics-embedded-content.html#the-source-element
|
325 |
-
* The source element
|
326 |
-
*/
|
327 |
-
public function test_source()
|
328 |
-
{
|
329 |
-
$src = <<<HTML
|
330 |
-
<video controls autoplay>
|
331 |
-
<div id="before"></div>
|
332 |
-
<source src="simple_html_dom.mp4" type="video/mp4">
|
333 |
-
<div id="after"></div>
|
334 |
-
</video>
|
335 |
-
HTML;
|
336 |
-
|
337 |
-
$source = $this->html->load($src)->find('source', 0);
|
338 |
-
|
339 |
-
$this->assertEquals('before', $source->previousSibling()->id);
|
340 |
-
$this->assertEquals('<source src="simple_html_dom.mp4" type="video/mp4">', $source->outertext);
|
341 |
-
$this->assertEquals('after', $source->nextSibling()->id);
|
342 |
-
}
|
343 |
-
|
344 |
-
/**
|
345 |
-
* Checks if the spacer element is NOT detected as self-closing tag
|
346 |
-
*
|
347 |
-
* @link https://www.w3.org/TR/html5/obsolete.html#non-conforming-features
|
348 |
-
* Non-conforming features
|
349 |
-
* @link https://www.w3.org/TR/html5/dom.html#ref-for-elementdef-spacer
|
350 |
-
* Elements in the DOM
|
351 |
-
* @link https://developer.mozilla.org/en-US/docs/Web/HTML/Element/spacer
|
352 |
-
* <spacer>
|
353 |
-
*/
|
354 |
-
public function test_spacer_not()
|
355 |
-
{
|
356 |
-
$src = <<<HTML
|
357 |
-
<div id="before"></div>
|
358 |
-
<spacer type="horizontal" size="10"></spacer>
|
359 |
-
<div id="after"></div>
|
360 |
-
HTML;
|
361 |
-
|
362 |
-
$spacer = $this->html->load($src)->find('spacer', 0);
|
363 |
-
|
364 |
-
$this->assertEquals('before', $spacer->previousSibling()->id);
|
365 |
-
$this->assertEquals('<spacer type="horizontal" size="10"></spacer>', $spacer->outertext);
|
366 |
-
$this->assertEquals('after', $spacer->nextSibling()->id);
|
367 |
-
}
|
368 |
-
|
369 |
-
/**
|
370 |
-
* @link https://www.w3.org/TR/html52/semantics-embedded-content.html#the-track-element
|
371 |
-
* The track element
|
372 |
-
*/
|
373 |
-
public function test_track()
|
374 |
-
{
|
375 |
-
$src = <<<HTML
|
376 |
-
<video src="simple_html_dom.webm">
|
377 |
-
<div id="before"></div>
|
378 |
-
<track kind=subtitles src=simple_html_dom.en.vtt srclang=en label="English">
|
379 |
-
<div id="after"></div>
|
380 |
-
<track kind=captions src=simple_html_dom.hoh.vtt srclang=en label="English captions">
|
381 |
-
<track kind=subtitles src=simple_html_dom.fr.vtt srclang=fr lang=fr label="Français">
|
382 |
-
<track kind=subtitles src=simple_html_dom.de.vtt srclang=de lang=de label="Deutsch">
|
383 |
-
</video>
|
384 |
-
HTML;
|
385 |
-
|
386 |
-
$track = $this->html->load($src)->find('track', 0);
|
387 |
-
|
388 |
-
$this->assertEquals('before', $track->previousSibling()->id);
|
389 |
-
|
390 |
-
$this->assertEquals(
|
391 |
-
'<track kind=subtitles src=simple_html_dom.en.vtt srclang=en label="English">',
|
392 |
-
$track->outertext
|
393 |
-
);
|
394 |
-
|
395 |
-
$this->assertEquals('after', $track->nextSibling()->id);
|
396 |
-
}
|
397 |
-
|
398 |
-
/**
|
399 |
-
* @link https://www.w3.org/TR/html52/textlevel-semantics.html#the-wbr-element
|
400 |
-
* The wbr element
|
401 |
-
*/
|
402 |
-
public function test_wbr()
|
403 |
-
{
|
404 |
-
$src = <<<HTML
|
405 |
-
<div id="before"></div>
|
406 |
-
<p>https://sourceforge.net/<wbr>projects/<wbr>simplehtmldom/</p>
|
407 |
-
<div id="after"></div>
|
408 |
-
HTML;
|
409 |
-
|
410 |
-
$wbr = $this->html->load($src)->find('wbr', 0);
|
411 |
-
|
412 |
-
$this->assertEquals('before', $wbr->parentNode()->previousSibling()->id);
|
413 |
-
$this->assertEquals('<wbr>', $wbr->outertext);
|
414 |
-
$this->assertEquals('after', $wbr->parentNode()->nextSibling()->id);
|
415 |
-
}
|
416 |
-
|
417 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/server_side_script_test.php
DELETED
@@ -1,28 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Checks if the parser properly handles server-side scripts
|
7 |
-
*/
|
8 |
-
class server_side_script_test extends TestCase {
|
9 |
-
|
10 |
-
private $html;
|
11 |
-
|
12 |
-
protected function setUp()
|
13 |
-
{
|
14 |
-
$this->html = new simple_html_dom;
|
15 |
-
}
|
16 |
-
|
17 |
-
protected function tearDown()
|
18 |
-
{
|
19 |
-
$this->html->clear();
|
20 |
-
unset($this->html);
|
21 |
-
}
|
22 |
-
|
23 |
-
public function test_html_inside_sss_should_not_appear_in_the_dom()
|
24 |
-
{
|
25 |
-
$this->html->load('<?php <div>Hello, World!</div> ?>');
|
26 |
-
$this->assertNull($this->html->find('div', 0));
|
27 |
-
}
|
28 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/str_get_html_test.php
DELETED
@@ -1,18 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Tests str_get_html
|
7 |
-
*/
|
8 |
-
class str_get_html_test extends TestCase {
|
9 |
-
|
10 |
-
/**
|
11 |
-
* str_get_html should return false on empty string.
|
12 |
-
*/
|
13 |
-
public function test_empty_string_should_return_false()
|
14 |
-
{
|
15 |
-
$this->assertFalse(str_get_html(''));
|
16 |
-
}
|
17 |
-
|
18 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/style_test.php
DELETED
@@ -1,58 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Checks if the parser properly handles style elements
|
7 |
-
*/
|
8 |
-
class style_test extends TestCase {
|
9 |
-
|
10 |
-
private $html;
|
11 |
-
|
12 |
-
protected function setUp()
|
13 |
-
{
|
14 |
-
$this->html = new simple_html_dom;
|
15 |
-
}
|
16 |
-
|
17 |
-
protected function tearDown()
|
18 |
-
{
|
19 |
-
$this->html->clear();
|
20 |
-
unset($this->html);
|
21 |
-
}
|
22 |
-
|
23 |
-
/**
|
24 |
-
* @dataProvider dataProvider_for_style_should_parse
|
25 |
-
*/
|
26 |
-
public function test_style_should_parse($expected, $doc)
|
27 |
-
{
|
28 |
-
$this->html->load($doc);
|
29 |
-
$this->assertEquals($expected, $this->html->find('style', 0)->innertext);
|
30 |
-
$this->assertEquals($doc, $this->html->save());
|
31 |
-
}
|
32 |
-
|
33 |
-
public function dataProvider_for_style_should_parse()
|
34 |
-
{
|
35 |
-
return array(
|
36 |
-
'empty' => array(
|
37 |
-
'',
|
38 |
-
'<style></style>',
|
39 |
-
),
|
40 |
-
'empty without end tag' => array(
|
41 |
-
'',
|
42 |
-
'<style/>',
|
43 |
-
),
|
44 |
-
'space' => array(
|
45 |
-
' ',
|
46 |
-
'<style> </style>',
|
47 |
-
),
|
48 |
-
'newline' => array(
|
49 |
-
"\n",
|
50 |
-
"<style>\n</style>"
|
51 |
-
),
|
52 |
-
'multiple style tags' => array(
|
53 |
-
'Hello',
|
54 |
-
'<style>Hello</style><style>World</style>'
|
55 |
-
),
|
56 |
-
);
|
57 |
-
}
|
58 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vendor/simplehtmldom/simplehtmldom/tests/whitespace_test.php
DELETED
@@ -1,483 +0,0 @@
|
|
1 |
-
<?php
|
2 |
-
require_once __DIR__ . '/../simple_html_dom.php';
|
3 |
-
use PHPUnit\Framework\TestCase;
|
4 |
-
|
5 |
-
/**
|
6 |
-
* Test if the parser properly removes whitespace
|
7 |
-
*/
|
8 |
-
class whitespace_test extends TestCase {
|
9 |
-
|
10 |
-
private $html;
|
11 |
-
|
12 |
-
protected function setUp()
|
13 |
-
{
|
14 |
-
$this->html = new simple_html_dom;
|
15 |
-
}
|
16 |
-
|
17 |
-
protected function tearDown()
|
18 |
-
{
|
19 |
-
$this->html->clear();
|
20 |
-
unset($this->html);
|
21 |
-
}
|
22 |
-
|
23 |
-
public function provide_whitespace_around_attributes()
|
24 |
-
{
|
25 |
-
return array(array(<<<EOD
|
26 |
-
<html>
|
27 |
-
<head>
|
28 |
-
<meta charset="UTF-8">
|
29 |
-
<meta name ="description" content= "simplehtmldom">
|
30 |
-
<meta name = "keywords" content = "simple,html,dom">
|
31 |
-
<meta name = "author"
|
32 |
-
content = "John Doe">
|
33 |
-
</head>
|
34 |
-
</html>
|
35 |
-
EOD
|
36 |
-
));
|
37 |
-
}
|
38 |
-
|
39 |
-
public function provide_whitespace_around_void_tags()
|
40 |
-
{
|
41 |
-
return array(array(<<<EOD
|
42 |
-
<html>
|
43 |
-
<head>
|
44 |
-
<meta charset="UTF-8">
|
45 |
-
< meta name="description" content="simplehtmldom">
|
46 |
-
<meta name="keywords" content="simple,html,dom" >
|
47 |
-
< meta name="author" content="John Doe" >
|
48 |
-
< meta name="viewport" content="width=device-width, initial-scale=1.0"
|
49 |
-
>
|
50 |
-
</head>
|
51 |
-
</html>
|
52 |
-
EOD
|
53 |
-
));
|
54 |
-
}
|
55 |
-
|
56 |
-
public function provide_whitespace_around_tags()
|
57 |
-
{
|
58 |
-
return array(array(<<<EOD
|
59 |
-
<html>
|
60 |
-
<body>
|
61 |
-
<div class="article" />
|
62 |
-
< div class="article" />
|
63 |
-
<div class="article" / >
|
64 |
-
< div class="article" / >
|
65 |
-
<
|
66 |
-
div class="article" /
|
67 |
-
>
|
68 |
-
< div class="article" / >
|
69 |
-
</body>
|
70 |
-
</html>
|
71 |
-
EOD
|
72 |
-
));
|
73 |
-
}
|
74 |
-
|
75 |
-
public function provide_whitespace_around_tags_without_class()
|
76 |
-
{
|
77 |
-
return array(array(<<<EOD
|
78 |
-
<html>
|
79 |
-
<body>
|
80 |
-
<div />
|
81 |
-
< div />
|
82 |
-
<div / >
|
83 |
-
< div / >
|
84 |
-
<
|
85 |
-
div /
|
86 |
-
>
|
87 |
-
< div / >
|
88 |
-
</body>
|
89 |
-
</html>
|
90 |
-
EOD
|
91 |
-
));
|
92 |
-
}
|
93 |
-
|
94 |
-
public function provide_whitespace_around_nested_tags()
|
95 |
-
{
|
96 |
-
return array(array(<<<EOD
|
97 |
-
<html>
|
98 |
-
<body>
|
99 |
-
<div class="article">
|
100 |
-
< div class="level1">
|
101 |
-
<div class="level2" >
|
102 |
-
< div class="level3" >
|
103 |
-
<
|
104 |
-
div class="level4"
|
105 |
-
>
|
106 |
-
< div class="level5" >
|
107 |
-
< /div >
|
108 |
-
<
|
109 |
-
/div
|
110 |
-
>
|
111 |
-
< /div >
|
112 |
-
</div >
|
113 |
-
< /div>
|
114 |
-
</ div>
|
115 |
-
</body>
|
116 |
-
</html>
|
117 |
-
EOD
|
118 |
-
));
|
119 |
-
}
|
120 |
-
|
121 |
-
public function provide_whitespace_in_class_values()
|
122 |
-
{
|
123 |
-
return array(array(<<<EOD
|
124 |
-
<html>
|
125 |
-
<body>
|
126 |
-
<div class=""/>
|
127 |
-
<div class=" "/>
|
128 |
-
<div class=" "/>
|
129 |
-
<div class="article"/>
|
130 |
-
<div class=" article"/>
|
131 |
-
<div class="article "/>
|
132 |
-
<div class=" article "/>
|
133 |
-
<div class=" article "/>
|
134 |
-
<div class="article new"/>
|
135 |
-
<div class=" article new"/>
|
136 |
-
<div class="article new "/>
|
137 |
-
<div class="article new"/>
|
138 |
-
<div class=" article new "/>
|
139 |
-
<div class="
|
140 |
-
article
|
141 |
-
new
|
142 |
-
"/>
|
143 |
-
</body>
|
144 |
-
</html>
|
145 |
-
EOD
|
146 |
-
));
|
147 |
-
}
|
148 |
-
|
149 |
-
public function provide_whitespace_in_attribute_values()
|
150 |
-
{
|
151 |
-
return array(array(<<<EOD
|
152 |
-
<html>
|
153 |
-
<body>
|
154 |
-
<div attribute=""/>
|
155 |
-
<div attribute=" "/>
|
156 |
-
<div attribute=" "/>
|
157 |
-
<div attribute="article"/>
|
158 |
-
<div attribute=" article"/>
|
159 |
-
<div attribute="article "/>
|
160 |
-
<div attribute=" article "/>
|
161 |
-
<div attribute=" article "/>
|
162 |
-
<div attribute="article new"/>
|
163 |
-
<div attribute=" article new"/>
|
164 |
-
<div attribute="article new "/>
|
165 |
-
<div attribute="article new"/>
|
166 |
-
<div attribute=" article new "/>
|
167 |
-
<div attribute="
|
168 |
-
article
|
169 |
-
new
|
170 |
-
"/>
|
171 |
-
</body>
|
172 |
-
</html>
|
173 |
-
EOD
|
174 |
-
));
|
175 |
-
}
|
176 |
-
|
177 |
-
/** @dataProvider provide_whitespace_around_attributes */
|
178 |
-
public function test_parse_removes_whitespace_around_attributes($doc)
|
179 |
-
{
|
180 |
-
// phpcs:ignore Generic.Files.LineLength
|
181 |
-
$expected = '<html><head><meta charset="UTF-8"><meta name="description" content="simplehtmldom"><meta name="keywords" content="simple,html,dom"><meta name="author" content="John Doe"></head></html>';
|
182 |
-
$this->html->load($doc);
|
183 |
-
|
184 |
-
$this->assertEquals($expected, $this->html->save());
|
185 |
-
}
|
186 |
-
|
187 |
-
/** @dataProvider provide_whitespace_around_void_tags */
|
188 |
-
public function test_parse_removes_whitespace_around_void_tags($doc)
|
189 |
-
{
|
190 |
-
// phpcs:ignore Generic.Files.LineLength
|
191 |
-
$expected = '<html><head><meta charset="UTF-8"><meta name="description" content="simplehtmldom"><meta name="keywords" content="simple,html,dom"><meta name="author" content="John Doe"><meta name="viewport" content="width=device-width, initial-scale=1.0"></head></html>';
|
192 |
-
$this->html->load($doc);
|
193 |
-
|
194 |
-
$this->assertEquals($expected, $this->html->save());
|
195 |
-
}
|
196 |
-
|
197 |
-
/** @dataProvider provide_whitespace_around_tags */
|
198 |
-
public function test_parse_removes_whitespace_around_tags($doc)
|
199 |
-
{
|
200 |
-
// phpcs:ignore Generic.Files.LineLength
|
201 |
-
$expected = '<html><body><div class="article"/><div class="article"/><div class="article"/><div class="article"/><div class="article"/><div class="article"/></body></html>';
|
202 |
-
$this->html->load($doc);
|
203 |
-
|
204 |
-
$this->assertEquals($expected, $this->html->save());
|
205 |
-
}
|
206 |
-
|
207 |
-
/** @dataProvider provide_whitespace_around_tags_without_class */
|
208 |
-
public function test_parse_removes_whitespace_around_tags_without_class($doc)
|
209 |
-
{
|
210 |
-
// phpcs:ignore Generic.Files.LineLength
|
211 |
-
$expected = '<html><body><div/><div/><div/><div/><div/><div/></body></html>';
|
212 |
-
$this->html->load($doc);
|
213 |
-
|
214 |
-
$this->assertEquals($expected, $this->html->save());
|
215 |
-
}
|
216 |
-
|
217 |
-
/** @dataProvider provide_whitespace_around_nested_tags */
|
218 |
-
public function test_parse_removes_whitespace_around_nested_tags($doc)
|
219 |
-
{
|
220 |
-
// phpcs:ignore Generic.Files.LineLength
|
221 |
-
$expected = '<html><body><div class="article"><div class="level1"><div class="level2"><div class="level3"><div class="level4"><div class="level5"></div></div></div></div></div></div></body></html>';
|
222 |
-
$this->html->load($doc);
|
223 |
-
|
224 |
-
$this->assertEquals($expected, $this->html->save());
|
225 |
-
}
|
226 |
-
|
227 |
-
/** @dataProvider provide_whitespace_in_class_values */
|
228 |
-
public function test_parse_removes_whitespace_in_class_values($doc)
|
229 |
-
{
|
230 |
-
$this->html->load($doc);
|
231 |
-
|
232 |
-
$this->assertCount(11, $this->html->find('.article'));
|
233 |
-
$this->assertCount(6, $this->html->find('.new'));
|
234 |
-
$this->assertCount(6, $this->html->find('[class="article new"]'));
|
235 |
-
$this->assertEquals('article', $this->html->find('.article', 0)->class);
|
236 |
-
$this->assertEquals('article new', $this->html->find('[class="article new"]', 0)->class);
|
237 |
-
}
|
238 |
-
|
239 |
-
/** @dataProvider provide_whitespace_in_class_values */
|
240 |
-
public function test_find_removes_whitespace_in_class_selectors($doc)
|
241 |
-
{
|
242 |
-
$this->html->load($doc);
|
243 |
-
|
244 |
-
$this->assertCount(11, $this->html->find('.article'));
|
245 |
-
$this->assertCount(11, $this->html->find(' .article'));
|
246 |
-
$this->assertCount(11, $this->html->find('.article '));
|
247 |
-
$this->assertCount(11, $this->html->find(' .article '));
|
248 |
-
$this->assertCount(11, $this->html->find(' .article '));
|
249 |
-
|
250 |
-
$this->assertCount(6, $this->html->find('[class="article new"]' ));
|
251 |
-
$this->assertCount(6, $this->html->find('[class=" article new"]' ));
|
252 |
-
$this->assertCount(6, $this->html->find('[class="article new "]' ));
|
253 |
-
$this->assertCount(6, $this->html->find('[class=" article new "]' ));
|
254 |
-
$this->assertCount(6, $this->html->find('[class="article new"]' ));
|
255 |
-
$this->assertCount(6, $this->html->find('[class=" article new "]' ));
|
256 |
-
}
|
257 |
-
|
258 |
-
/** @dataProvider provide_whitespace_in_attribute_values */
|
259 |
-
public function test_parse_removes_whitespace_in_attribute_values($doc)
|
260 |
-
{
|
261 |
-
$this->html->load($doc);
|
262 |
-
|
263 |
-
$this->assertCount(11, $this->html->find('[attribute*="article"]'));
|
264 |
-
$this->assertCount(6, $this->html->find('[attribute*="new"]'));
|
265 |
-
$this->assertCount(6, $this->html->find('[attribute="article new"]'));
|
266 |
-
$this->assertEquals('article', $this->html->find('[attribute*="article"]', 0)->attribute);
|
267 |
-
$this->assertEquals('article new', $this->html->find('[attribute*="article new"]', 0)->attribute);
|
268 |
-
}
|
269 |
-
|
270 |
-
/** @dataProvider provide_whitespace_in_class_values */
|
271 |
-
public function test_find_keeps_whitespace_without_trim($doc)
|
272 |
-
{
|
273 |
-
$this->html->load($doc, true, false);
|
274 |
-
|
275 |
-
$this->assertCount(11, $this->html->find('.article'));
|
276 |
-
$this->assertCount(11, $this->html->find(' .article'));
|
277 |
-
$this->assertCount(11, $this->html->find('.article '));
|
278 |
-
$this->assertCount(11, $this->html->find(' .article '));
|
279 |
-
$this->assertCount(11, $this->html->find(' .article '));
|
280 |
-
|
281 |
-
$this->assertCount(6, $this->html->find('[class="article new"]' ));
|
282 |
-
$this->assertCount(6, $this->html->find('[class=" article new"]' ));
|
283 |
-
$this->assertCount(6, $this->html->find('[class="article new "]' ));
|
284 |
-
$this->assertCount(6, $this->html->find('[class=" article new "]' ));
|
285 |
-
$this->assertCount(6, $this->html->find('[class="article new"]' ));
|
286 |
-
$this->assertCount(6, $this->html->find('[class=" article new "]' ));
|
287 |
-
}
|
288 |
-
|
289 |
-
/**
|
290 |
-
* @dataProvider provide_whitespace_around_attributes
|
291 |
-
* @dataProvider provide_whitespace_around_void_tags
|
292 |
-
* @dataProvider provide_whitespace_around_tags
|
293 |
-
* @dataProvider provide_whitespace_around_tags_without_class
|
294 |
-
* @dataProvider provide_whitespace_around_nested_tags
|
295 |
-
* @dataProvider provide_whitespace_in_attribute_values
|
296 |
-
* @dataProvider provide_whitespace_in_class_values
|
297 |
-
*/
|
298 |
-
public function test_parse_keeps_whitespace_without_trim($doc)
|
299 |
-
{
|
300 |
-
$expected = $doc;
|
301 |
-
$this->html->load($doc, true, false);
|
302 |
-
|
303 |
-
$this->assertEquals($expected, $this->html->save());
|
304 |
-
}
|
305 |
-
|
306 |
-
public function test_text_should_trim_whitespace()
|
307 |
-
{
|
308 |
-
$expected = 'PHP Simple HTML DOM Parser';
|
309 |
-
|
310 |
-
$doc = <<<EOD
|
311 |
-
<p> PHP Simple HTML DOM Parser </p>
|
312 |
-
EOD;
|
313 |
-
|
314 |
-
$this->html->load($doc);
|
315 |
-
|
316 |
-
$this->assertEquals($expected, $this->html->root->text());
|
317 |
-
}
|
318 |
-
|
319 |
-
public function test_text_should_trim_nested_whitespace()
|
320 |
-
{
|
321 |
-
$expected = 'PHP Simple HTML DOM Parser';
|
322 |
-
|
323 |
-
$doc = <<<EOD
|
324 |
-
<p> <span> </span> PHP Simple HTML DOM Parser </p>
|
325 |
-
EOD;
|
326 |
-
|
327 |
-
$this->html->load($doc);
|
328 |
-
|
329 |
-
$this->assertEquals($expected, $this->html->root->text());
|
330 |
-
}
|
331 |
-
|
332 |
-
public function test_text_should_remove_newline_from_paragraph()
|
333 |
-
{
|
334 |
-
$expected = 'PHP Simple HTML DOM Parser';
|
335 |
-
|
336 |
-
$doc = <<<EOD
|
337 |
-
<p>
|
338 |
-
PHP Simple HTML DOM Parser
|
339 |
-
</p>
|
340 |
-
EOD;
|
341 |
-
|
342 |
-
$this->html->load($doc);
|
343 |
-
|
344 |
-
$this->assertEquals($expected, $this->html->root->text());
|
345 |
-
}
|
346 |
-
|
347 |
-
public function test_text_should_remove_nested_newline_from_paragraph()
|
348 |
-
{
|
349 |
-
$expected = 'PHP Simple HTML DOM Parser';
|
350 |
-
|
351 |
-
$doc = <<<EOD
|
352 |
-
<p>
|
353 |
-
<span>
|
354 |
-
|
355 |
-
</span>
|
356 |
-
PHP Simple HTML DOM Parser
|
357 |
-
</p>
|
358 |
-
EOD;
|
359 |
-
|
360 |
-
$this->html->load($doc);
|
361 |
-
|
362 |
-
$this->assertEquals($expected, $this->html->root->text());
|
363 |
-
}
|
364 |
-
|
365 |
-
public function test_text_should_add_newline_between_paragraph()
|
366 |
-
{
|
367 |
-
$expected = <<<EOD
|
368 |
-
PHP Simple HTML DOM Parser
|
369 |
-
|
370 |
-
A fast, simple and reliable HTML document parser for PHP.
|
371 |
-
EOD;
|
372 |
-
|
373 |
-
$doc = <<<EOD
|
374 |
-
<p>PHP Simple HTML DOM Parser</p>
|
375 |
-
<p>A fast, simple and reliable HTML document parser for PHP.</p>
|
376 |
-
EOD;
|
377 |
-
|
378 |
-
$this->html->load($doc);
|
379 |
-
|
380 |
-
$this->assertEquals($expected, $this->html->root->text());
|
381 |
-
}
|
382 |
-
|
383 |
-
public function test_text_should_add_newline_between_nested_paragraph()
|
384 |
-
{
|
385 |
-
$expected = <<<EOD
|
386 |
-
PHP Simple HTML DOM Parser
|
387 |
-
|
388 |
-
A fast, simple and reliable HTML document parser for PHP.
|
389 |
-
EOD;
|
390 |
-
|
391 |
-
$doc = <<<EOD
|
392 |
-
<div><p>PHP Simple HTML DOM Parser</p></div>
|
393 |
-
<div><p>A fast, simple and reliable HTML document parser for PHP.</p></div>
|
394 |
-
EOD;
|
395 |
-
|
396 |
-
$this->html->load($doc);
|
397 |
-
|
398 |
-
$this->assertEquals($expected, $this->html->root->text());
|
399 |
-
}
|
400 |
-
|
401 |
-
public function test_text_should_keep_whitespace_around_inline_elements()
|
402 |
-
{
|
403 |
-
$expected = <<<EOD
|
404 |
-
PHP Simple HTML DOM Parser. A fast, simple and reliable HTML document parser for PHP.
|
405 |
-
EOD;
|
406 |
-
|
407 |
-
$doc = <<<EOD
|
408 |
-
<p>PHP Simple HTML DOM Parser. <em>A fast, simple and reliable HTML document parser for PHP.</em></p>
|
409 |
-
EOD;
|
410 |
-
|
411 |
-
$this->html->load($doc);
|
412 |
-
|
413 |
-
$this->assertEquals($expected, $this->html->root->text());
|
414 |
-
}
|
415 |
-
|
416 |
-
public function test_text_should_skip_empty_paragraphs()
|
417 |
-
{
|
418 |
-
$expected = <<<EOD
|
419 |
-
PHP Simple HTML DOM Parser.
|
420 |
-
|
421 |
-
A fast, simple and reliable HTML document parser for PHP.
|
422 |
-
EOD;
|
423 |
-
|
424 |
-
$doc = <<<EOD
|
425 |
-
<p>PHP Simple HTML DOM Parser.</p>
|
426 |
-
<p> </p>
|
427 |
-
<p> </p>
|
428 |
-
<p> </p>
|
429 |
-
<p> </p>
|
430 |
-
<p>A fast, simple and reliable HTML document parser for PHP.</p>
|
431 |
-
EOD;
|
432 |
-
|
433 |
-
$this->html->load($doc);
|
434 |
-
|
435 |
-
$this->assertEquals($expected, $this->html->root->text());
|
436 |
-
}
|
437 |
-
|
438 |
-
public function test_text_should_handle_nbsp_like_whitespace()
|
439 |
-
{
|
440 |
-
$expected = <<<EOD
|
441 |
-
PHP Simple HTML DOM Parser.
|
442 |
-
|
443 |
-
A fast, simple and reliable HTML document parser for PHP.
|
444 |
-
EOD;
|
445 |
-
|
446 |
-
$doc = <<<EOD
|
447 |
-
<p> PHP Simple HTML DOM Parser. </p>
|
448 |
-
<p> </p>
|
449 |
-
<p> </p>
|
450 |
-
<p> </p>
|
451 |
-
<p> </p>
|
452 |
-
<p> A fast, simple and reliable HTML<span> </span> document parser for PHP. </p>
|
453 |
-
EOD;
|
454 |
-
|
455 |
-
$this->html->load($doc);
|
456 |
-
|
457 |
-
$this->assertEquals($expected, $this->html->root->text());
|
458 |
-
}
|
459 |
-
|
460 |
-
/**
|
461 |
-
* The library uses UTF-8 internally. All operations in the text() function
|
462 |
-
* should therefore handle UTF-8 characters accordingly. If UTF-8 characters
|
463 |
-
* are handled like ASCII, the resulting output could be incorrect or cause
|
464 |
-
* errors.
|
465 |
-
*
|
466 |
-
* @link https://sourceforge.net/p/simplehtmldom/feature-requests/62/ Feature #62
|
467 |
-
*/
|
468 |
-
public function test_text_should_handle_utf8_characters()
|
469 |
-
{
|
470 |
-
$expected = '«Hello, World»';
|
471 |
-
|
472 |
-
$doc = '«Hello, World» ';
|
473 |
-
|
474 |
-
$this->html->load($doc);
|
475 |
-
|
476 |
-
$this->assertEquals(
|
477 |
-
$expected,
|
478 |
-
$this->html->root->text(),
|
479 |
-
'UTF-8 characters should not be handled like ASCII characters!'
|
480 |
-
);
|
481 |
-
}
|
482 |
-
|
483 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|