WP Retina 2x - Version 5.5.2

Version Description

  • Fix: New version of HtmlDomParser.
  • Update: New dashboard.
Download this release

Release Info

Developer TigrouMeow
Plugin Icon 128x128 WP Retina 2x
Version 5.5.2
Comparing to
See all releases

Code changes from version 5.5.1 to 5.5.2

common/admin.css CHANGED
@@ -134,7 +134,7 @@
134
  padding: 10px 10px;
135
  }
136
 
137
- .meow-box p.submit {
138
  text-align: right;
139
  margin: 10px -10px -10px -10px;
140
  padding: 7px 10px 10px 0px !important;
@@ -299,9 +299,29 @@
299
 
300
  /* DASHBOARD */
301
 
 
 
 
 
 
 
 
 
 
302
  .meow-dashboard .meow-box li {
303
  border-bottom: 1px solid #eee;
304
- padding: 0px 10px 10px 10px;
 
 
 
 
 
 
 
 
 
 
 
305
  }
306
 
307
  .meow-dashboard .meow-box li:last-child {
134
  padding: 10px 10px;
135
  }
136
 
137
+ .meow-box p.submit, .meow-box div.submit {
138
  text-align: right;
139
  margin: 10px -10px -10px -10px;
140
  padding: 7px 10px 10px 0px !important;
299
 
300
  /* DASHBOARD */
301
 
302
+ .meow-dashboard {
303
+ box-sizing: border-box;
304
+ }
305
+
306
+ .meow-dashboard .meow-box ul {
307
+ padding: 0px;
308
+ margin: 0px;
309
+ }
310
+
311
  .meow-dashboard .meow-box li {
312
  border-bottom: 1px solid #eee;
313
+ margin: 0px;
314
+ padding: 10px;
315
+ min-height: 80px;
316
+ box-sizing: border-box;
317
+ }
318
+
319
+ .meow-dashboard .meow-box li img {
320
+ width: 80px;
321
+ height: 80px;
322
+ float: left;
323
+ margin: -10px 10px -10px -10px;
324
+
325
  }
326
 
327
  .meow-dashboard .meow-box li:last-child {
common/admin.php CHANGED
@@ -4,8 +4,10 @@ if ( !class_exists( 'MeowApps_Admin' ) ) {
4
 
5
  class MeowApps_Admin {
6
 
 
 
7
  public static $loaded = false;
8
- public static $admin_version = "1.6";
9
 
10
  public $prefix; // prefix used for actions, filters (mfrh)
11
  public $mainfile; // plugin main file (media-file-renamer.php)
@@ -151,8 +153,8 @@ if ( !class_exists( 'MeowApps_Admin' ) ) {
151
  }
152
  ?>
153
  <h1 style="line-height: 16px;">
154
- <img width="36" style="margin-right: 10px; float: left; position: relative; top: -5px;"
155
- src="<?php echo $this->meowapps_logo_url(); ?>"><?php echo $title; ?><br />
156
  <span style="font-size: 12px"><?php echo $author; ?></span>
157
  </h1>
158
  <div style="clear: both;"></div>
@@ -174,8 +176,8 @@ if ( !class_exists( 'MeowApps_Admin' ) ) {
174
  // Creates standard menu if it does NOT exist
175
  global $submenu;
176
  if ( !isset( $submenu[ 'meowapps-main-menu' ] ) ) {
177
- add_menu_page( 'Meow Apps', 'Meow Apps', 'manage_options', 'meowapps-main-menu',
178
- array( $this, 'admin_meow_apps' ), 'dashicons-camera', 82 );
179
  add_submenu_page( 'meowapps-main-menu', __( 'Dashboard', 'meowapps' ),
180
  __( 'Dashboard', 'meowapps' ), 'manage_options',
181
  'meowapps-main-menu', array( $this, 'admin_meow_apps' ) );
@@ -284,7 +286,7 @@ if ( !class_exists( 'MeowApps_Admin' ) ) {
284
  function plugins_loaded() {
285
  if ( isset( $_GET[ 'tool' ] ) && $_GET[ 'tool' ] == 'error_log' ) {
286
  $sec = "5";
287
- header("Refresh: $sec;");
288
  }
289
  }
290
 
@@ -303,11 +305,20 @@ if ( !class_exists( 'MeowApps_Admin' ) ) {
303
  echo "</div>";
304
  }
305
  else if ( isset( $_GET['tool'] ) && $_GET['tool'] == 'error_log' ) {
 
 
 
 
306
  $errorpath = ini_get( 'error_log' );
307
  echo "<a href=\"javascript:history.go(-1)\">< Go back</a><br /><br />";
 
 
 
 
 
308
  echo '<div id="error_log">';
309
  if ( file_exists( $errorpath ) ) {
310
- echo "Now (auto-reload every 5 seconds):<br />[" . date( "d-M-Y H:i:s", time() ) . " UTC]<br /<br /><br />Errors (order by latest):";
311
  $errors = file_get_contents( $errorpath );
312
  $errors = explode( "\n", $errors );
313
  $errors = array_reverse( $errors );
@@ -332,24 +343,28 @@ if ( !class_exists( 'MeowApps_Admin' ) ) {
332
  <div class="meow-box meow-col meow-span_1_of_2 ">
333
  <h3 class=""><span class="dashicons dashicons-camera"></span> UI Plugins </h3>
334
  <ul class="">
335
- <li><b>WP/LR Sync</b> <?php echo $this->check_install( 'wplr-sync' ) ?><br />
 
336
  Synchronize photos (folders, collections, keywords) from Lightroom to WordPress.</li>
337
- <li><b>Meow Lightbox</b> <?php echo $this->check_install( 'meow-lightbox' ) ?><br />
 
338
  Light but powerful lightbox that can also display photo information (EXIF).</li>
339
- <li><b>Meow Gallery</b> <?php echo $this->check_install( 'meow-gallery' ) ?><br />
 
340
  Gallery (using the built-in WP gallery) that makes your website look better.</li>
341
- <!-- <li><b>Audio Story for Images</b> <?php echo $this->check_install( 'audio-story-images' ) ?><br />
342
- Add audio (music, explanation, ambiance) to your images.</li> -->
343
  </ul>
344
  </div>
345
  <div class="meow-box meow-col meow-span_1_of_2">
346
  <h3 class=""><span class="dashicons dashicons-admin-tools"></span> System Plugins</h3>
347
  <ul class="">
348
- <li><b>Media File Renamer</b> <?php echo $this->check_install( 'media-file-renamer' ) ?><br />
 
349
  For nicer filenames and better SEO.</li>
350
- <li><b>Media Cleaner</b> <?php echo $this->check_install( 'media-cleaner' ) ?><br />
 
351
  Detect the files which are not in use.</li>
352
- <li><b>WP Retina 2x</b> <?php echo $this->check_install( 'wp-retina-2x' ) ?><br />
 
353
  The famous plugin that adds Retina support.</li>
354
  </ul>
355
  </div>
4
 
5
  class MeowApps_Admin {
6
 
7
+ public static $logo = '';
8
+
9
  public static $loaded = false;
10
+ public static $admin_version = "1.9";
11
 
12
  public $prefix; // prefix used for actions, filters (mfrh)
13
  public $mainfile; // plugin main file (media-file-renamer.php)
153
  }
154
  ?>
155
  <h1 style="line-height: 16px;">
156
+ <img width="42" style="margin-right: 10px; float: left; position: relative; top: -5px;"
157
+ src="<?php echo MeowApps_Admin::$logo ?>"><?php echo $title; ?><br />
158
  <span style="font-size: 12px"><?php echo $author; ?></span>
159
  </h1>
160
  <div style="clear: both;"></div>
176
  // Creates standard menu if it does NOT exist
177
  global $submenu;
178
  if ( !isset( $submenu[ 'meowapps-main-menu' ] ) ) {
179
+ add_menu_page( 'Meow Apps', '<img style="width: 24px; margin-left: -30px; position: absolute; margin-top: -3px;" src="' . MeowApps_Admin::$logo . '" />Meow Apps', 'manage_options', 'meowapps-main-menu',
180
+ array( $this, 'admin_meow_apps' ), '', 82 );
181
  add_submenu_page( 'meowapps-main-menu', __( 'Dashboard', 'meowapps' ),
182
  __( 'Dashboard', 'meowapps' ), 'manage_options',
183
  'meowapps-main-menu', array( $this, 'admin_meow_apps' ) );
286
  function plugins_loaded() {
287
  if ( isset( $_GET[ 'tool' ] ) && $_GET[ 'tool' ] == 'error_log' ) {
288
  $sec = "5";
289
+ header( "Refresh: $sec;" );
290
  }
291
  }
292
 
305
  echo "</div>";
306
  }
307
  else if ( isset( $_GET['tool'] ) && $_GET['tool'] == 'error_log' ) {
308
+ $log_msg = '=== MEOW APPS DEBUG (This is not an error) ===';
309
+ if ( isset( $_POST['write_logs'] ) ) {
310
+ error_log( $log_msg );
311
+ }
312
  $errorpath = ini_get( 'error_log' );
313
  echo "<a href=\"javascript:history.go(-1)\">< Go back</a><br /><br />";
314
+ echo '
315
+ <form method="post">
316
+ <input type="hidden" name="write_logs" value="true">
317
+ <input class="button button-primary" type="submit" value="Write in the Error Logs">
318
+ </form><br />';
319
  echo '<div id="error_log">';
320
  if ( file_exists( $errorpath ) ) {
321
+ echo "Now (auto-reload every 5 seconds): [" . date( "d-M-Y H:i:s", time() ) . " UTC]<br /><br /><h2 style='margin: 0px;'>Errors (order by latest)</h2>";
322
  $errors = file_get_contents( $errorpath );
323
  $errors = explode( "\n", $errors );
324
  $errors = array_reverse( $errors );
343
  <div class="meow-box meow-col meow-span_1_of_2 ">
344
  <h3 class=""><span class="dashicons dashicons-camera"></span> UI Plugins </h3>
345
  <ul class="">
346
+ <li><img src='<?= $this->common_url( 'img/wplr-sync.jpg' ) ?>' /><b>WP/LR Sync</b>
347
+ <?php echo $this->check_install( 'wplr-sync' ) ?><br />
348
  Synchronize photos (folders, collections, keywords) from Lightroom to WordPress.</li>
349
+ <li><img src='<?= $this->common_url( 'img/meow-lightbox.jpg' ) ?>' /><b>Meow Lightbox</b>
350
+ <?php echo $this->check_install( 'meow-lightbox' ) ?><br />
351
  Light but powerful lightbox that can also display photo information (EXIF).</li>
352
+ <li><img src='<?= $this->common_url( 'img/meow-gallery.jpg' ) ?>' /><b>Meow Gallery</b>
353
+ <?php echo $this->check_install( 'meow-gallery' ) ?><br />
354
  Gallery (using the built-in WP gallery) that makes your website look better.</li>
 
 
355
  </ul>
356
  </div>
357
  <div class="meow-box meow-col meow-span_1_of_2">
358
  <h3 class=""><span class="dashicons dashicons-admin-tools"></span> System Plugins</h3>
359
  <ul class="">
360
+ <li><img src='<?= $this->common_url( 'img/media-file-renamer.jpg' ) ?>' /><b>Media File Renamer</b>
361
+ <?php echo $this->check_install( 'media-file-renamer' ) ?><br />
362
  For nicer filenames and better SEO.</li>
363
+ <li><img src='<?= $this->common_url( 'img/media-cleaner.jpg' ) ?>' /><b>Media Cleaner</b>
364
+ <?php echo $this->check_install( 'media-cleaner' ) ?><br />
365
  Detect the files which are not in use.</li>
366
+ <li><img src='<?= $this->common_url( 'img/wp-retina-2x.jpg' ) ?>' /><b>WP Retina 2x</b>
367
+ <?php echo $this->check_install( 'wp-retina-2x' ) ?><br />
368
  The famous plugin that adds Retina support.</li>
369
  </ul>
370
  </div>
common/img/default.png ADDED
Binary file
common/img/media-cleaner.jpg ADDED
Binary file
common/img/media-file-renamer.jpg ADDED
Binary file
common/img/meow-gallery.jpg ADDED
Binary file
common/img/meow-lightbox.jpg ADDED
Binary file
common/img/wp-retina-2x.jpg ADDED
Binary file
common/img/wpengine.png DELETED
Binary file
common/img/wplr-sync.jpg ADDED
Binary file
core.php CHANGED
@@ -1,5 +1,8 @@
1
  <?php
2
 
 
 
 
3
  class Meow_WR2X_Core {
4
 
5
  public $admin = null;
@@ -98,14 +101,12 @@ class Meow_WR2X_Core {
98
  function picture_rewrite( $buffer ) {
99
  if ( !isset( $buffer ) || trim( $buffer ) === '' )
100
  return $buffer;
101
- if ( !function_exists( "str_get_html" ) )
102
- include( __DIR__ . '/inc/simple_html_dom.php' );
103
-
104
  $lazysize = get_option( "wr2x_picturefill_lazysizes" ) && $this->admin->is_registered();
105
  $killSrc = !get_option( "wr2x_picturefill_keep_src" );
106
  $nodes_count = 0;
107
  $nodes_replaced = 0;
108
- $html = str_get_html( $buffer );
109
  if ( !$html ) {
110
  $this->log( "The HTML buffer is null, another plugin might block the process." );
111
  return $buffer;
@@ -1050,13 +1051,14 @@ class Meow_WR2X_Core {
1050
  // Change proposed by Nicscott01, slighlty modified by Jordy (+isset)
1051
  // (https://wordpress.org/support/topic/issue-with-crop-position?replies=4#post-6200271)
1052
  $crop = isset( $_wp_additional_image_sizes[$name] ) ? $_wp_additional_image_sizes[$name]['crop'] : true;
1053
- $customCrop = null;
 
 
 
 
 
 
1054
 
1055
- // Support for Manual Image Crop
1056
- // If the size of the image was manually cropped, let's keep it.
1057
- if ( class_exists( 'ManualImageCrop' ) && isset( $meta['micSelectedArea'] ) && isset( $meta['micSelectedArea'][$name] ) && isset( $meta['micSelectedArea'][$name]['scale'] ) ) {
1058
- $customCrop = $meta['micSelectedArea'][$name];
1059
- }
1060
  $image = $this->resize( $originalfile, $meta['sizes'][$name]['width'] * 2,
1061
  $meta['sizes'][$name]['height'] * 2, $crop, $retina_file, $customCrop );
1062
  }
1
  <?php
2
 
3
+ require_once 'vendor/autoload.php';
4
+ use KubAT\PhpSimple\HtmlDomParser;
5
+
6
  class Meow_WR2X_Core {
7
 
8
  public $admin = null;
101
  function picture_rewrite( $buffer ) {
102
  if ( !isset( $buffer ) || trim( $buffer ) === '' )
103
  return $buffer;
104
+ $html = new HtmlDomParser();
 
 
105
  $lazysize = get_option( "wr2x_picturefill_lazysizes" ) && $this->admin->is_registered();
106
  $killSrc = !get_option( "wr2x_picturefill_keep_src" );
107
  $nodes_count = 0;
108
  $nodes_replaced = 0;
109
+ $html = $html->str_get_html( $buffer );
110
  if ( !$html ) {
111
  $this->log( "The HTML buffer is null, another plugin might block the process." );
112
  return $buffer;
1051
  // Change proposed by Nicscott01, slighlty modified by Jordy (+isset)
1052
  // (https://wordpress.org/support/topic/issue-with-crop-position?replies=4#post-6200271)
1053
  $crop = isset( $_wp_additional_image_sizes[$name] ) ? $_wp_additional_image_sizes[$name]['crop'] : true;
1054
+ $customCrop = apply_filters( 'wr2x_custom_crop', null );
1055
+
1056
+ // // Support for Manual Image Crop
1057
+ // // If the size of the image was manually cropped, let's keep it.
1058
+ // if ( class_exists( 'ManualImageCrop' ) && isset( $meta['micSelectedArea'] ) && isset( $meta['micSelectedArea'][$name] ) && isset( $meta['micSelectedArea'][$name]['scale'] ) ) {
1059
+ // $customCrop = $meta['micSelectedArea'][$name];
1060
+ // }
1061
 
 
 
 
 
 
1062
  $image = $this->resize( $originalfile, $meta['sizes'][$name]['width'] * 2,
1063
  $meta['sizes'][$name]['height'] * 2, $crop, $retina_file, $customCrop );
1064
  }
inc/simple_html_dom.php DELETED
@@ -1,1742 +0,0 @@
1
- <?php
2
- /**
3
- * Website: http://sourceforge.net/projects/simplehtmldom/
4
- * Additional projects that may be used: http://sourceforge.net/projects/debugobject/
5
- * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
6
- * Contributions by:
7
- * Yousuke Kumakura (Attribute filters)
8
- * Vadim Voituk (Negative indexes supports of "find" method)
9
- * Antcs (Constructor with automatically load contents either text or file/url)
10
- *
11
- * all affected sections have comments starting with "PaperG"
12
- *
13
- * Paperg - Added case insensitive testing of the value of the selector.
14
- * Paperg - Added tag_start for the starting index of tags - NOTE: This works but not accurately.
15
- * This tag_start gets counted AFTER \r\n have been crushed out, and after the remove_noice calls so it will not reflect the REAL position of the tag in the source,
16
- * it will almost always be smaller by some amount.
17
- * We use this to determine how far into the file the tag in question is. This "percentage will never be accurate as the $dom->size is the "real" number of bytes the dom was created from.
18
- * but for most purposes, it's a really good estimation.
19
- * Paperg - Added the forceTagsClosed to the dom constructor. Forcing tags closed is great for malformed html, but it CAN lead to parsing errors.
20
- * Allow the user to tell us how much they trust the html.
21
- * Paperg add the text and plaintext to the selectors for the find syntax. plaintext implies text in the innertext of a node. text implies that the tag is a text node.
22
- * This allows for us to find tags based on the text they contain.
23
- * Create find_ancestor_tag to see if a tag is - at any level - inside of another specific tag.
24
- * Paperg: added parse_charset so that we know about the character set of the source document.
25
- * NOTE: If the user's system has a routine called get_last_retrieve_url_contents_content_type availalbe, we will assume it's returning the content-type header from the
26
- * last transfer or curl_exec, and we will parse that and use it in preference to any other method of charset detection.
27
- *
28
- * Found infinite loop in the case of broken html in restore_noise. Rewrote to protect from that.
29
- * PaperG (John Schlick) Added get_display_size for "IMG" tags.
30
- *
31
- * Licensed under The MIT License
32
- * Redistributions of files must retain the above copyright notice.
33
- *
34
- * @author S.C. Chen <me578022@gmail.com>
35
- * @author John Schlick
36
- * @author Rus Carroll
37
- * @version 1.5 ($Rev: 210 $)
38
- * @package PlaceLocalInclude
39
- * @subpackage simple_html_dom
40
- */
41
-
42
- /**
43
- * All of the Defines for the classes below.
44
- * @author S.C. Chen <me578022@gmail.com>
45
- */
46
- define('HDOM_TYPE_ELEMENT', 1);
47
- define('HDOM_TYPE_COMMENT', 2);
48
- define('HDOM_TYPE_TEXT', 3);
49
- define('HDOM_TYPE_ENDTAG', 4);
50
- define('HDOM_TYPE_ROOT', 5);
51
- define('HDOM_TYPE_UNKNOWN', 6);
52
- define('HDOM_QUOTE_DOUBLE', 0);
53
- define('HDOM_QUOTE_SINGLE', 1);
54
- define('HDOM_QUOTE_NO', 3);
55
- define('HDOM_INFO_BEGIN', 0);
56
- define('HDOM_INFO_END', 1);
57
- define('HDOM_INFO_QUOTE', 2);
58
- define('HDOM_INFO_SPACE', 3);
59
- define('HDOM_INFO_TEXT', 4);
60
- define('HDOM_INFO_INNER', 5);
61
- define('HDOM_INFO_OUTER', 6);
62
- define('HDOM_INFO_ENDSPACE',7);
63
- define('DEFAULT_TARGET_CHARSET', 'UTF-8');
64
- define('DEFAULT_BR_TEXT', "\r\n");
65
- define('DEFAULT_SPAN_TEXT', " ");
66
- define('MAX_FILE_SIZE', 600000);
67
- // helper functions
68
- // -----------------------------------------------------------------------------
69
- // get html dom from file
70
- // $maxlen is defined in the code as PHP_STREAM_COPY_ALL which is defined as -1.
71
- function file_get_html($url, $use_include_path = false, $context=null, $offset = -1, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT)
72
- {
73
- // We DO force the tags to be terminated.
74
- $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
75
- // For sourceforge users: uncomment the next line and comment the retreive_url_contents line 2 lines down if it is not already done.
76
- $contents = file_get_contents($url, $use_include_path, $context, $offset);
77
- // Paperg - use our own mechanism for getting the contents as we want to control the timeout.
78
- //$contents = retrieve_url_contents($url);
79
- if (empty($contents) || strlen($contents) > MAX_FILE_SIZE)
80
- {
81
- return false;
82
- }
83
- // The second parameter can force the selectors to all be lowercase.
84
- $dom->load($contents, $lowercase, $stripRN);
85
- return $dom;
86
- }
87
-
88
- // get html dom from string
89
- function str_get_html($str, $lowercase=true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT)
90
- {
91
- $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
92
- if (empty($str) || strlen($str) > MAX_FILE_SIZE)
93
- {
94
- $dom->clear();
95
- return false;
96
- }
97
- $dom->load($str, $lowercase, $stripRN);
98
- return $dom;
99
- }
100
-
101
- // dump html dom tree
102
- function dump_html_tree($node, $show_attr=true, $deep=0)
103
- {
104
- $node->dump($node);
105
- }
106
-
107
-
108
- /**
109
- * simple html dom node
110
- * PaperG - added ability for "find" routine to lowercase the value of the selector.
111
- * PaperG - added $tag_start to track the start position of the tag in the total byte index
112
- *
113
- * @package PlaceLocalInclude
114
- */
115
- class simple_html_dom_node
116
- {
117
- public $nodetype = HDOM_TYPE_TEXT;
118
- public $tag = 'text';
119
- public $attr = array();
120
- public $children = array();
121
- public $nodes = array();
122
- public $parent = null;
123
- // The "info" array - see HDOM_INFO_... for what each element contains.
124
- public $_ = array();
125
- public $tag_start = 0;
126
- private $dom = null;
127
-
128
- function __construct($dom)
129
- {
130
- $this->dom = $dom;
131
- $dom->nodes[] = $this;
132
- }
133
-
134
- function __destruct()
135
- {
136
- $this->clear();
137
- }
138
-
139
- function __toString()
140
- {
141
- return $this->outertext();
142
- }
143
-
144
- // clean up memory due to php5 circular references memory leak...
145
- function clear()
146
- {
147
- $this->dom = null;
148
- $this->nodes = null;
149
- $this->parent = null;
150
- $this->children = null;
151
- }
152
-
153
- // dump node's tree
154
- function dump($show_attr=true, $deep=0)
155
- {
156
- $lead = str_repeat(' ', $deep);
157
-
158
- echo $lead.$this->tag;
159
- if ($show_attr && count($this->attr)>0)
160
- {
161
- echo '(';
162
- foreach ($this->attr as $k=>$v)
163
- echo "[$k]=>\"".$this->$k.'", ';
164
- echo ')';
165
- }
166
- echo "\n";
167
-
168
- if ($this->nodes)
169
- {
170
- foreach ($this->nodes as $c)
171
- {
172
- $c->dump($show_attr, $deep+1);
173
- }
174
- }
175
- }
176
-
177
-
178
- // Debugging function to dump a single dom node with a bunch of information about it.
179
- function dump_node($echo=true)
180
- {
181
-
182
- $string = $this->tag;
183
- if (count($this->attr)>0)
184
- {
185
- $string .= '(';
186
- foreach ($this->attr as $k=>$v)
187
- {
188
- $string .= "[$k]=>\"".$this->$k.'", ';
189
- }
190
- $string .= ')';
191
- }
192
- if (count($this->_)>0)
193
- {
194
- $string .= ' $_ (';
195
- foreach ($this->_ as $k=>$v)
196
- {
197
- if (is_array($v))
198
- {
199
- $string .= "[$k]=>(";
200
- foreach ($v as $k2=>$v2)
201
- {
202
- $string .= "[$k2]=>\"".$v2.'", ';
203
- }
204
- $string .= ")";
205
- } else {
206
- $string .= "[$k]=>\"".$v.'", ';
207
- }
208
- }
209
- $string .= ")";
210
- }
211
-
212
- if (isset($this->text))
213
- {
214
- $string .= " text: (" . $this->text . ")";
215
- }
216
-
217
- $string .= " HDOM_INNER_INFO: '";
218
- if (isset($node->_[HDOM_INFO_INNER]))
219
- {
220
- $string .= $node->_[HDOM_INFO_INNER] . "'";
221
- }
222
- else
223
- {
224
- $string .= ' NULL ';
225
- }
226
-
227
- $string .= " children: " . count($this->children);
228
- $string .= " nodes: " . count($this->nodes);
229
- $string .= " tag_start: " . $this->tag_start;
230
- $string .= "\n";
231
-
232
- if ($echo)
233
- {
234
- echo $string;
235
- return;
236
- }
237
- else
238
- {
239
- return $string;
240
- }
241
- }
242
-
243
- // returns the parent of node
244
- // If a node is passed in, it will reset the parent of the current node to that one.
245
- function parent($parent=null)
246
- {
247
- // I am SURE that this doesn't work properly.
248
- // It fails to unset the current node from it's current parents nodes or children list first.
249
- if ($parent !== null)
250
- {
251
- $this->parent = $parent;
252
- $this->parent->nodes[] = $this;
253
- $this->parent->children[] = $this;
254
- }
255
-
256
- return $this->parent;
257
- }
258
-
259
- // verify that node has children
260
- function has_child()
261
- {
262
- return !empty($this->children);
263
- }
264
-
265
- // returns children of node
266
- function children($idx=-1)
267
- {
268
- if ($idx===-1)
269
- {
270
- return $this->children;
271
- }
272
- if (isset($this->children[$idx]))
273
- {
274
- return $this->children[$idx];
275
- }
276
- return null;
277
- }
278
-
279
- // returns the first child of node
280
- function first_child()
281
- {
282
- if (count($this->children)>0)
283
- {
284
- return $this->children[0];
285
- }
286
- return null;
287
- }
288
-
289
- // returns the last child of node
290
- function last_child()
291
- {
292
- if (($count=count($this->children))>0)
293
- {
294
- return $this->children[$count-1];
295
- }
296
- return null;
297
- }
298
-
299
- // returns the next sibling of node
300
- function next_sibling()
301
- {
302
- if ($this->parent===null)
303
- {
304
- return null;
305
- }
306
-
307
- $idx = 0;
308
- $count = count($this->parent->children);
309
- while ($idx<$count && $this!==$this->parent->children[$idx])
310
- {
311
- ++$idx;
312
- }
313
- if (++$idx>=$count)
314
- {
315
- return null;
316
- }
317
- return $this->parent->children[$idx];
318
- }
319
-
320
- // returns the previous sibling of node
321
- function prev_sibling()
322
- {
323
- if ($this->parent===null) return null;
324
- $idx = 0;
325
- $count = count($this->parent->children);
326
- while ($idx<$count && $this!==$this->parent->children[$idx])
327
- ++$idx;
328
- if (--$idx<0) return null;
329
- return $this->parent->children[$idx];
330
- }
331
-
332
- // function to locate a specific ancestor tag in the path to the root.
333
- function find_ancestor_tag($tag)
334
- {
335
- global $debug_object;
336
- if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
337
-
338
- // Start by including ourselves in the comparison.
339
- $returnDom = $this;
340
-
341
- while (!is_null($returnDom))
342
- {
343
- if (is_object($debug_object)) { $debug_object->debug_log(2, "Current tag is: " . $returnDom->tag); }
344
-
345
- if ($returnDom->tag == $tag)
346
- {
347
- break;
348
- }
349
- $returnDom = $returnDom->parent;
350
- }
351
- return $returnDom;
352
- }
353
-
354
- // get dom node's inner html
355
- function innertext()
356
- {
357
- if (isset($this->_[HDOM_INFO_INNER])) return $this->_[HDOM_INFO_INNER];
358
- if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
359
-
360
- $ret = '';
361
- foreach ($this->nodes as $n)
362
- $ret .= $n->outertext();
363
- return $ret;
364
- }
365
-
366
- // get dom node's outer text (with tag)
367
- function outertext()
368
- {
369
- global $debug_object;
370
- if (is_object($debug_object))
371
- {
372
- $text = '';
373
- if ($this->tag == 'text')
374
- {
375
- if (!empty($this->text))
376
- {
377
- $text = " with text: " . $this->text;
378
- }
379
- }
380
- $debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text);
381
- }
382
-
383
- if ($this->tag==='root') return $this->innertext();
384
-
385
- // trigger callback
386
- if ($this->dom && $this->dom->callback!==null)
387
- {
388
- call_user_func_array($this->dom->callback, array($this));
389
- }
390
-
391
- if (isset($this->_[HDOM_INFO_OUTER])) return $this->_[HDOM_INFO_OUTER];
392
- if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
393
-
394
- // render begin tag
395
- if ($this->dom && $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]])
396
- {
397
- $ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup();
398
- } else {
399
- $ret = "";
400
- }
401
-
402
- // render inner text
403
- if (isset($this->_[HDOM_INFO_INNER]))
404
- {
405
- // If it's a br tag... don't return the HDOM_INNER_INFO that we may or may not have added.
406
- if ($this->tag != "br")
407
- {
408
- $ret .= $this->_[HDOM_INFO_INNER];
409
- }
410
- } else {
411
- if ($this->nodes)
412
- {
413
- foreach ($this->nodes as $n)
414
- {
415
- $ret .= $this->convert_text($n->outertext());
416
- }
417
- }
418
- }
419
-
420
- // render end tag
421
- if (isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END]!=0)
422
- $ret .= '</'.$this->tag.'>';
423
- return $ret;
424
- }
425
-
426
- // get dom node's plain text
427
- function text()
428
- {
429
- if (isset($this->_[HDOM_INFO_INNER])) return $this->_[HDOM_INFO_INNER];
430
- switch ($this->nodetype)
431
- {
432
- case HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
433
- case HDOM_TYPE_COMMENT: return '';
434
- case HDOM_TYPE_UNKNOWN: return '';
435
- }
436
- if (strcasecmp($this->tag, 'script')===0) return '';
437
- if (strcasecmp($this->tag, 'style')===0) return '';
438
-
439
- $ret = '';
440
- // In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed for some span tags, and some p tags) $this->nodes is set to NULL.
441
- // NOTE: This indicates that there is a problem where it's set to NULL without a clear happening.
442
- // WHY is this happening?
443
- if (!is_null($this->nodes))
444
- {
445
- foreach ($this->nodes as $n)
446
- {
447
- $ret .= $this->convert_text($n->text());
448
- }
449
-
450
- // If this node is a span... add a space at the end of it so multiple spans don't run into each other. This is plaintext after all.
451
- if ($this->tag == "span")
452
- {
453
- $ret .= $this->dom->default_span_text;
454
- }
455
-
456
-
457
- }
458
- return $ret;
459
- }
460
-
461
- function xmltext()
462
- {
463
- $ret = $this->innertext();
464
- $ret = str_ireplace('<![CDATA[', '', $ret);
465
- $ret = str_replace(']]>', '', $ret);
466
- return $ret;
467
- }
468
-
469
- // build node's text with tag
470
- function makeup()
471
- {
472
- // text, comment, unknown
473
- if (isset($this->_[HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
474
-
475
- $ret = '<'.$this->tag;
476
- $i = -1;
477
-
478
- foreach ($this->attr as $key=>$val)
479
- {
480
- ++$i;
481
-
482
- // skip removed attribute
483
- if ($val===null || $val===false)
484
- continue;
485
-
486
- $ret .= $this->_[HDOM_INFO_SPACE][$i][0];
487
- //no value attr: nowrap, checked selected...
488
- if ($val===true)
489
- $ret .= $key;
490
- else {
491
- switch ($this->_[HDOM_INFO_QUOTE][$i])
492
- {
493
- case HDOM_QUOTE_DOUBLE: $quote = '"'; break;
494
- case HDOM_QUOTE_SINGLE: $quote = '\''; break;
495
- default: $quote = '';
496
- }
497
- $ret .= $key.$this->_[HDOM_INFO_SPACE][$i][1].'='.$this->_[HDOM_INFO_SPACE][$i][2].$quote.$val.$quote;
498
- }
499
- }
500
- $ret = $this->dom->restore_noise($ret);
501
- return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>';
502
- }
503
-
504
- // find elements by css selector
505
- //PaperG - added ability for find to lowercase the value of the selector.
506
- function find($selector, $idx=null, $lowercase=false)
507
- {
508
- $selectors = $this->parse_selector($selector);
509
- if (($count=count($selectors))===0) return array();
510
- $found_keys = array();
511
-
512
- // find each selector
513
- for ($c=0; $c<$count; ++$c)
514
- {
515
- // The change on the below line was documented on the sourceforge code tracker id 2788009
516
- // used to be: if (($levle=count($selectors[0]))===0) return array();
517
- if (($levle=count($selectors[$c]))===0) return array();
518
- if (!isset($this->_[HDOM_INFO_BEGIN])) return array();
519
-
520
- $head = array($this->_[HDOM_INFO_BEGIN]=>1);
521
-
522
- // handle descendant selectors, no recursive!
523
- for ($l=0; $l<$levle; ++$l)
524
- {
525
- $ret = array();
526
- foreach ($head as $k=>$v)
527
- {
528
- $n = ($k===-1) ? $this->dom->root : $this->dom->nodes[$k];
529
- //PaperG - Pass this optional parameter on to the seek function.
530
- $n->seek($selectors[$c][$l], $ret, $lowercase);
531
- }
532
- $head = $ret;
533
- }
534
-
535
- foreach ($head as $k=>$v)
536
- {
537
- if (!isset($found_keys[$k]))
538
- {
539
- $found_keys[$k] = 1;
540
- }
541
- }
542
- }
543
-
544
- // sort keys
545
- ksort($found_keys);
546
-
547
- $found = array();
548
- foreach ($found_keys as $k=>$v)
549
- $found[] = $this->dom->nodes[$k];
550
-
551
- // return nth-element or array
552
- if (is_null($idx)) return $found;
553
- else if ($idx<0) $idx = count($found) + $idx;
554
- return (isset($found[$idx])) ? $found[$idx] : null;
555
- }
556
-
557
- // seek for given conditions
558
- // PaperG - added parameter to allow for case insensitive testing of the value of a selector.
559
- protected function seek($selector, &$ret, $lowercase=false)
560
- {
561
- global $debug_object;
562
- if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
563
-
564
- list($tag, $key, $val, $exp, $no_key) = $selector;
565
-
566
- // xpath index
567
- if ($tag && $key && is_numeric($key))
568
- {
569
- $count = 0;
570
- foreach ($this->children as $c)
571
- {
572
- if ($tag==='*' || $tag===$c->tag) {
573
- if (++$count==$key) {
574
- $ret[$c->_[HDOM_INFO_BEGIN]] = 1;
575
- return;
576
- }
577
- }
578
- }
579
- return;
580
- }
581
-
582
- $end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0;
583
- if ($end==0) {
584
- $parent = $this->parent;
585
- while (!isset($parent->_[HDOM_INFO_END]) && $parent!==null) {
586
- $end -= 1;
587
- $parent = $parent->parent;
588
- }
589
- $end += $parent->_[HDOM_INFO_END];
590
- }
591
-
592
- for ($i=$this->_[HDOM_INFO_BEGIN]+1; $i<$end; ++$i) {
593
- $node = $this->dom->nodes[$i];
594
-
595
- $pass = true;
596
-
597
- if ($tag==='*' && !$key) {
598
- if (in_array($node, $this->children, true))
599
- $ret[$i] = 1;
600
- continue;
601
- }
602
-
603
- // compare tag
604
- if ($tag && $tag!=$node->tag && $tag!=='*') {$pass=false;}
605
- // compare key
606
- if ($pass && $key) {
607
- if ($no_key) {
608
- if (isset($node->attr[$key])) $pass=false;
609
- } else {
610
- if (($key != "plaintext") && !isset($node->attr[$key])) $pass=false;
611
- }
612
- }
613
- // compare value
614
- if ($pass && $key && $val && $val!=='*') {
615
- // If they have told us that this is a "plaintext" search then we want the plaintext of the node - right?
616
- if ($key == "plaintext") {
617
- // $node->plaintext actually returns $node->text();
618
- $nodeKeyValue = $node->text();
619
- } else {
620
- // this is a normal search, we want the value of that attribute of the tag.
621
- $nodeKeyValue = $node->attr[$key];
622
- }
623
- if (is_object($debug_object)) {$debug_object->debug_log(2, "testing node: " . $node->tag . " for attribute: " . $key . $exp . $val . " where nodes value is: " . $nodeKeyValue);}
624
-
625
- //PaperG - If lowercase is set, do a case insensitive test of the value of the selector.
626
- if ($lowercase) {
627
- $check = $this->match($exp, strtolower($val), strtolower($nodeKeyValue));
628
- } else {
629
- $check = $this->match($exp, $val, $nodeKeyValue);
630
- }
631
- if (is_object($debug_object)) {$debug_object->debug_log(2, "after match: " . ($check ? "true" : "false"));}
632
-
633
- // handle multiple class
634
- if (!$check && strcasecmp($key, 'class')===0) {
635
- foreach (explode(' ',$node->attr[$key]) as $k) {
636
- // Without this, there were cases where leading, trailing, or double spaces lead to our comparing blanks - bad form.
637
- if (!empty($k)) {
638
- if ($lowercase) {
639
- $check = $this->match($exp, strtolower($val), strtolower($k));
640
- } else {
641
- $check = $this->match($exp, $val, $k);
642
- }
643
- if ($check) break;
644
- }
645
- }
646
- }
647
- if (!$check) $pass = false;
648
- }
649
- if ($pass) $ret[$i] = 1;
650
- unset($node);
651
- }
652
- // It's passed by reference so this is actually what this function returns.
653
- if (is_object($debug_object)) {$debug_object->debug_log(1, "EXIT - ret: ", $ret);}
654
- }
655
-
656
- protected function match($exp, $pattern, $value) {
657
- global $debug_object;
658
- if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
659
-
660
- switch ($exp) {
661
- case '=':
662
- return ($value===$pattern);
663
- case '!=':
664
- return ($value!==$pattern);
665
- case '^=':
666
- return preg_match("/^".preg_quote($pattern,'/')."/", $value);
667
- case '$=':
668
- return preg_match("/".preg_quote($pattern,'/')."$/", $value);
669
- case '*=':
670
- if ($pattern[0]=='/') {
671
- return preg_match($pattern, $value);
672
- }
673
- return preg_match("/".$pattern."/i", $value);
674
- }
675
- return false;
676
- }
677
-
678
- protected function parse_selector($selector_string) {
679
- global $debug_object;
680
- if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
681
-
682
- // pattern of CSS selectors, modified from mootools
683
- // Paperg: Add the colon to the attrbute, so that it properly finds <tag attr:ibute="something" > like google does.
684
- // Note: if you try to look at this attribute, yo MUST use getAttribute since $dom->x:y will fail the php syntax check.
685
- // Notice the \[ starting the attbute? and the @? following? This implies that an attribute can begin with an @ sign that is not captured.
686
- // This implies that an html attribute specifier may start with an @ sign that is NOT captured by the expression.
687
- // farther study is required to determine of this should be documented or removed.
688
- // $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
689
- $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
690
- preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER);
691
- if (is_object($debug_object)) {$debug_object->debug_log(2, "Matches Array: ", $matches);}
692
-
693
- $selectors = array();
694
- $result = array();
695
- //print_r($matches);
696
-
697
- foreach ($matches as $m) {
698
- $m[0] = trim($m[0]);
699
- if ($m[0]==='' || $m[0]==='/' || $m[0]==='//') continue;
700
- // for browser generated xpath
701
- if ($m[1]==='tbody') continue;
702
-
703
- list($tag, $key, $val, $exp, $no_key) = array($m[1], null, null, '=', false);
704
- if (!empty($m[2])) {$key='id'; $val=$m[2];}
705
- if (!empty($m[3])) {$key='class'; $val=$m[3];}
706
- if (!empty($m[4])) {$key=$m[4];}
707
- if (!empty($m[5])) {$exp=$m[5];}
708
- if (!empty($m[6])) {$val=$m[6];}
709
-
710
- // convert to lowercase
711
- if ($this->dom->lowercase) {$tag=strtolower($tag); $key=strtolower($key);}
712
- //elements that do NOT have the specified attribute
713
- if (isset($key[0]) && $key[0]==='!') {$key=substr($key, 1); $no_key=true;}
714
-
715
- $result[] = array($tag, $key, $val, $exp, $no_key);
716
- if (trim($m[7])===',') {
717
- $selectors[] = $result;
718
- $result = array();
719
- }
720
- }
721
- if (count($result)>0)
722
- $selectors[] = $result;
723
- return $selectors;
724
- }
725
-
726
- function __get($name)
727
- {
728
- if (isset($this->attr[$name]))
729
- {
730
- return $this->convert_text($this->attr[$name]);
731
- }
732
- switch ($name)
733
- {
734
- case 'outertext': return $this->outertext();
735
- case 'innertext': return $this->innertext();
736
- case 'plaintext': return $this->text();
737
- case 'xmltext': return $this->xmltext();
738
- default: return array_key_exists($name, $this->attr);
739
- }
740
- }
741
-
742
- function __set($name, $value)
743
- {
744
- global $debug_object;
745
- if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
746
-
747
- switch ($name)
748
- {
749
- case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value;
750
- case 'innertext':
751
- if (isset($this->_[HDOM_INFO_TEXT])) return $this->_[HDOM_INFO_TEXT] = $value;
752
- return $this->_[HDOM_INFO_INNER] = $value;
753
- }
754
- if (!isset($this->attr[$name]))
755
- {
756
- $this->_[HDOM_INFO_SPACE][] = array(' ', '', '');
757
- $this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE;
758
- }
759
- $this->attr[$name] = $value;
760
- }
761
-
762
- function __isset($name)
763
- {
764
- switch ($name)
765
- {
766
- case 'outertext': return true;
767
- case 'innertext': return true;
768
- case 'plaintext': return true;
769
- }
770
- //no value attr: nowrap, checked selected...
771
- return (array_key_exists($name, $this->attr)) ? true : isset($this->attr[$name]);
772
- }
773
-
774
- function __unset($name) {
775
- if (isset($this->attr[$name]))
776
- unset($this->attr[$name]);
777
- }
778
-
779
- // PaperG - Function to convert the text from one character set to another if the two sets are not the same.
780
- function convert_text($text)
781
- {
782
- global $debug_object;
783
- if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
784
-
785
- $converted_text = $text;
786
-
787
- $sourceCharset = "";
788
- $targetCharset = "";
789
-
790
- if ($this->dom)
791
- {
792
- $sourceCharset = strtoupper($this->dom->_charset);
793
- $targetCharset = strtoupper($this->dom->_target_charset);
794
- }
795
- if (is_object($debug_object)) {$debug_object->debug_log(3, "source charset: " . $sourceCharset . " target charaset: " . $targetCharset);}
796
-
797
- if (!empty($sourceCharset) && !empty($targetCharset) && (strcasecmp($sourceCharset, $targetCharset) != 0))
798
- {
799
- // Check if the reported encoding could have been incorrect and the text is actually already UTF-8
800
- if ((strcasecmp($targetCharset, 'UTF-8') == 0) && ($this->is_utf8($text)))
801
- {
802
- $converted_text = $text;
803
- }
804
- else
805
- {
806
- $converted_text = iconv($sourceCharset, $targetCharset, $text);
807
- }
808
- }
809
-
810
- // Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output.
811
- if ($targetCharset == 'UTF-8')
812
- {
813
- if (substr($converted_text, 0, 3) == "\xef\xbb\xbf")
814
- {
815
- $converted_text = substr($converted_text, 3);
816
- }
817
- if (substr($converted_text, -3) == "\xef\xbb\xbf")
818
- {
819
- $converted_text = substr($converted_text, 0, -3);
820
- }
821
- }
822
-
823
- return $converted_text;
824
- }
825
-
826
- /**
827
- * Returns true if $string is valid UTF-8 and false otherwise.
828
- *
829
- * @param mixed $str String to be tested
830
- * @return boolean
831
- */
832
- static function is_utf8($str)
833
- {
834
- $c=0; $b=0;
835
- $bits=0;
836
- $len=strlen($str);
837
- for($i=0; $i<$len; $i++)
838
- {
839
- $c=ord($str[$i]);
840
- if($c > 128)
841
- {
842
- if(($c >= 254)) return false;
843
- elseif($c >= 252) $bits=6;
844
- elseif($c >= 248) $bits=5;
845
- elseif($c >= 240) $bits=4;
846
- elseif($c >= 224) $bits=3;
847
- elseif($c >= 192) $bits=2;
848
- else return false;
849
- if(($i+$bits) > $len) return false;
850
- while($bits > 1)
851
- {
852
- $i++;
853
- $b=ord($str[$i]);
854
- if($b < 128 || $b > 191) return false;
855
- $bits--;
856
- }
857
- }
858
- }
859
- return true;
860
- }
861
- /*
862
- function is_utf8($string)
863
- {
864
- //this is buggy
865
- return (utf8_encode(utf8_decode($string)) == $string);
866
- }
867
- */
868
-
869
- /**
870
- * Function to try a few tricks to determine the displayed size of an img on the page.
871
- * NOTE: This will ONLY work on an IMG tag. Returns FALSE on all other tag types.
872
- *
873
- * @author John Schlick
874
- * @version April 19 2012
875
- * @return array an array containing the 'height' and 'width' of the image on the page or -1 if we can't figure it out.
876
- */
877
- function get_display_size()
878
- {
879
- global $debug_object;
880
-
881
- $width = -1;
882
- $height = -1;
883
-
884
- if ($this->tag !== 'img')
885
- {
886
- return false;
887
- }
888
-
889
- // See if there is aheight or width attribute in the tag itself.
890
- if (isset($this->attr['width']))
891
- {
892
- $width = $this->attr['width'];
893
- }
894
-
895
- if (isset($this->attr['height']))
896
- {
897
- $height = $this->attr['height'];
898
- }
899
-
900
- // Now look for an inline style.
901
- if (isset($this->attr['style']))
902
- {
903
- // Thanks to user gnarf from stackoverflow for this regular expression.
904
- $attributes = array();
905
- preg_match_all("/([\w-]+)\s*:\s*([^;]+)\s*;?/", $this->attr['style'], $matches, PREG_SET_ORDER);
906
- foreach ($matches as $match) {
907
- $attributes[$match[1]] = $match[2];
908
- }
909
-
910
- // If there is a width in the style attributes:
911
- if (isset($attributes['width']) && $width == -1)
912
- {
913
- // check that the last two characters are px (pixels)
914
- if (strtolower(substr($attributes['width'], -2)) == 'px')
915
- {
916
- $proposed_width = substr($attributes['width'], 0, -2);
917
- // Now make sure that it's an integer and not something stupid.
918
- if (filter_var($proposed_width, FILTER_VALIDATE_INT))
919
- {
920
- $width = $proposed_width;
921
- }
922
- }
923
- }
924
-
925
- // If there is a width in the style attributes:
926
- if (isset($attributes['height']) && $height == -1)
927
- {
928
- // check that the last two characters are px (pixels)
929
- if (strtolower(substr($attributes['height'], -2)) == 'px')
930
- {
931
- $proposed_height = substr($attributes['height'], 0, -2);
932
- // Now make sure that it's an integer and not something stupid.
933
- if (filter_var($proposed_height, FILTER_VALIDATE_INT))
934
- {
935
- $height = $proposed_height;
936
- }
937
- }
938
- }
939
-
940
- }
941
-
942
- // Future enhancement:
943
- // Look in the tag to see if there is a class or id specified that has a height or width attribute to it.
944
-
945
- // Far future enhancement
946
- // Look at all the parent tags of this image to see if they specify a class or id that has an img selector that specifies a height or width
947
- // Note that in this case, the class or id will have the img subselector for it to apply to the image.
948
-
949
- // ridiculously far future development
950
- // If the class or id is specified in a SEPARATE css file thats not on the page, go get it and do what we were just doing for the ones on the page.
951
-
952
- $result = array('height' => $height,
953
- 'width' => $width);
954
- return $result;
955
- }
956
-
957
- // camel naming conventions
958
- function getAllAttributes() {return $this->attr;}
959
- function getAttribute($name) {return $this->__get($name);}
960
- function setAttribute($name, $value) {$this->__set($name, $value);}
961
- function hasAttribute($name) {return $this->__isset($name);}
962
- function removeAttribute($name) {$this->__set($name, null);}
963
- function getElementById($id) {return $this->find("#$id", 0);}
964
- function getElementsById($id, $idx=null) {return $this->find("#$id", $idx);}
965
- function getElementByTagName($name) {return $this->find($name, 0);}
966
- function getElementsByTagName($name, $idx=null) {return $this->find($name, $idx);}
967
- function parentNode() {return $this->parent();}
968
- function childNodes($idx=-1) {return $this->children($idx);}
969
- function firstChild() {return $this->first_child();}
970
- function lastChild() {return $this->last_child();}
971
- function nextSibling() {return $this->next_sibling();}
972
- function previousSibling() {return $this->prev_sibling();}
973
- function hasChildNodes() {return $this->has_child();}
974
- function nodeName() {return $this->tag;}
975
- function appendChild($node) {$node->parent($this); return $node;}
976
-
977
- }
978
-
979
- /**
980
- * simple html dom parser
981
- * Paperg - in the find routine: allow us to specify that we want case insensitive testing of the value of the selector.
982
- * Paperg - change $size from protected to public so we can easily access it
983
- * Paperg - added ForceTagsClosed in the constructor which tells us whether we trust the html or not. Default is to NOT trust it.
984
- *
985
- * @package PlaceLocalInclude
986
- */
987
- class simple_html_dom
988
- {
989
- public $root = null;
990
- public $nodes = array();
991
- public $callback = null;
992
- public $lowercase = false;
993
- // Used to keep track of how large the text was when we started.
994
- public $original_size;
995
- public $size;
996
- protected $pos;
997
- protected $doc;
998
- protected $char;
999
- protected $cursor;
1000
- protected $parent;
1001
- protected $noise = array();
1002
- protected $token_blank = " \t\r\n";
1003
- protected $token_equal = ' =/>';
1004
- protected $token_slash = " />\r\n\t";
1005
- protected $token_attr = ' >';
1006
- // Note that this is referenced by a child node, and so it needs to be public for that node to see this information.
1007
- public $_charset = '';
1008
- public $_target_charset = '';
1009
- protected $default_br_text = "";
1010
- public $default_span_text = "";
1011
-
1012
- // use isset instead of in_array, performance boost about 30%...
1013
- protected $self_closing_tags = array('img'=>1, 'br'=>1, 'input'=>1, 'meta'=>1, 'link'=>1, 'hr'=>1, 'base'=>1, 'embed'=>1, 'spacer'=>1);
1014
- protected $block_tags = array('root'=>1, 'body'=>1, 'form'=>1, 'div'=>1, 'span'=>1, 'table'=>1);
1015
- // Known sourceforge issue #2977341
1016
- // B tags that are not closed cause us to return everything to the end of the document.
1017
- protected $optional_closing_tags = array(
1018
- 'tr'=>array('tr'=>1, 'td'=>1, 'th'=>1),
1019
- 'th'=>array('th'=>1),
1020
- 'td'=>array('td'=>1),
1021
- 'li'=>array('li'=>1),
1022
- 'dt'=>array('dt'=>1, 'dd'=>1),
1023
- 'dd'=>array('dd'=>1, 'dt'=>1),
1024
- 'dl'=>array('dd'=>1, 'dt'=>1),
1025
- 'p'=>array('p'=>1),
1026
- 'nobr'=>array('nobr'=>1),
1027
- 'b'=>array('b'=>1),
1028
- 'option'=>array('option'=>1),
1029
- );
1030
-
1031
- function __construct($str=null, $lowercase=true, $forceTagsClosed=true, $target_charset=DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT)
1032
- {
1033
- if ($str)
1034
- {
1035
- if (preg_match("/^http:\/\//i",$str) || is_file($str))
1036
- {
1037
- $this->load_file($str);
1038
- }
1039
- else
1040
- {
1041
- $this->load($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText);
1042
- }
1043
- }
1044
- // Forcing tags to be closed implies that we don't trust the html, but it can lead to parsing errors if we SHOULD trust the html.
1045
- if (!$forceTagsClosed) {
1046
- $this->optional_closing_array=array();
1047
- }
1048
- $this->_target_charset = $target_charset;
1049
- }
1050
-
1051
- function __destruct()
1052
- {
1053
- $this->clear();
1054
- }
1055
-
1056
- // load html from string
1057
- function load($str, $lowercase=true, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT)
1058
- {
1059
- global $debug_object;
1060
-
1061
- // prepare
1062
- $this->prepare($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText);
1063
- // strip out cdata
1064
- $this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is", true);
1065
- // strip out comments
1066
- $this->remove_noise("'<!--(.*?)-->'is");
1067
- // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037
1068
- // Script tags removal now preceeds style tag removal.
1069
- // strip out <script> tags
1070
- $this->remove_noise("'<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>'is");
1071
- $this->remove_noise("'<\s*script\s*>(.*?)<\s*/\s*script\s*>'is");
1072
- // strip out <style> tags
1073
- $this->remove_noise("'<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>'is");
1074
- $this->remove_noise("'<\s*style\s*>(.*?)<\s*/\s*style\s*>'is");
1075
- // strip out preformatted tags
1076
- $this->remove_noise("'<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>'is");
1077
- // strip out server side scripts
1078
- $this->remove_noise("'(<\?)(.*?)(\?>)'s", true);
1079
- // strip smarty scripts
1080
- $this->remove_noise("'(\{\w)(.*?)(\})'s", true);
1081
-
1082
- // parsing
1083
- while ($this->parse());
1084
- // end
1085
- $this->root->_[HDOM_INFO_END] = $this->cursor;
1086
- $this->parse_charset();
1087
-
1088
- // make load function chainable
1089
- return $this;
1090
-
1091
- }
1092
-
1093
- // load html from file
1094
- function load_file()
1095
- {
1096
- $args = func_get_args();
1097
- $this->load(call_user_func_array('file_get_contents', $args), true);
1098
- // Throw an error if we can't properly load the dom.
1099
- if (($error=error_get_last())!==null) {
1100
- $this->clear();
1101
- return false;
1102
- }
1103
- }
1104
-
1105
- // set callback function
1106
- function set_callback($function_name)
1107
- {
1108
- $this->callback = $function_name;
1109
- }
1110
-
1111
- // remove callback function
1112
- function remove_callback()
1113
- {
1114
- $this->callback = null;
1115
- }
1116
-
1117
- // save dom as string
1118
- function save($filepath='')
1119
- {
1120
- $ret = $this->root->innertext();
1121
- if ($filepath!=='') file_put_contents($filepath, $ret, LOCK_EX);
1122
- return $ret;
1123
- }
1124
-
1125
- // find dom node by css selector
1126
- // Paperg - allow us to specify that we want case insensitive testing of the value of the selector.
1127
- function find($selector, $idx=null, $lowercase=false)
1128
- {
1129
- return $this->root->find($selector, $idx, $lowercase);
1130
- }
1131
-
1132
- // clean up memory due to php5 circular references memory leak...
1133
- function clear()
1134
- {
1135
- foreach ($this->nodes as $n) {$n->clear(); $n = null;}
1136
- // This add next line is documented in the sourceforge repository. 2977248 as a fix for ongoing memory leaks that occur even with the use of clear.
1137
- if (isset($this->children)) foreach ($this->children as $n) {$n->clear(); $n = null;}
1138
- if (isset($this->parent)) {$this->parent->clear(); unset($this->parent);}
1139
- if (isset($this->root)) {$this->root->clear(); unset($this->root);}
1140
- unset($this->doc);
1141
- unset($this->noise);
1142
- }
1143
-
1144
- function dump($show_attr=true)
1145
- {
1146
- $this->root->dump($show_attr);
1147
- }
1148
-
1149
- // prepare HTML data and init everything
1150
- protected function prepare($str, $lowercase=true, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT)
1151
- {
1152
- $this->clear();
1153
-
1154
- // set the length of content before we do anything to it.
1155
- $this->size = strlen($str);
1156
- // Save the original size of the html that we got in. It might be useful to someone.
1157
- $this->original_size = $this->size;
1158
-
1159
- //before we save the string as the doc... strip out the \r \n's if we are told to.
1160
- if ($stripRN) {
1161
- $str = str_replace("\r", " ", $str);
1162
- $str = str_replace("\n", " ", $str);
1163
-
1164
- // set the length of content since we have changed it.
1165
- $this->size = strlen($str);
1166
- }
1167
-
1168
- $this->doc = $str;
1169
- $this->pos = 0;
1170
- $this->cursor = 1;
1171
- $this->noise = array();
1172
- $this->nodes = array();
1173
- $this->lowercase = $lowercase;
1174
- $this->default_br_text = $defaultBRText;
1175
- $this->default_span_text = $defaultSpanText;
1176
- $this->root = new simple_html_dom_node($this);
1177
- $this->root->tag = 'root';
1178
- $this->root->_[HDOM_INFO_BEGIN] = -1;
1179
- $this->root->nodetype = HDOM_TYPE_ROOT;
1180
- $this->parent = $this->root;
1181
- if ($this->size>0) $this->char = $this->doc[0];
1182
- }
1183
-
1184
- // parse html content
1185
- protected function parse()
1186
- {
1187
- if (($s = $this->copy_until_char('<'))==='')
1188
- {
1189
- return $this->read_tag();
1190
- }
1191
-
1192
- // text
1193
- $node = new simple_html_dom_node($this);
1194
- ++$this->cursor;
1195
- $node->_[HDOM_INFO_TEXT] = $s;
1196
- $this->link_nodes($node, false);
1197
- return true;
1198
- }
1199
-
1200
- // PAPERG - dkchou - added this to try to identify the character set of the page we have just parsed so we know better how to spit it out later.
1201
- // NOTE: IF you provide a routine called get_last_retrieve_url_contents_content_type which returns the CURLINFO_CONTENT_TYPE from the last curl_exec
1202
- // (or the content_type header from the last transfer), we will parse THAT, and if a charset is specified, we will use it over any other mechanism.
1203
- protected function parse_charset()
1204
- {
1205
- global $debug_object;
1206
-
1207
- $charset = null;
1208
-
1209
- if (function_exists('get_last_retrieve_url_contents_content_type'))
1210
- {
1211
- $contentTypeHeader = get_last_retrieve_url_contents_content_type();
1212
- $success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches);
1213
- if ($success)
1214
- {
1215
- $charset = $matches[1];
1216
- if (is_object($debug_object)) {$debug_object->debug_log(2, 'header content-type found charset of: ' . $charset);}
1217
- }
1218
-
1219
- }
1220
-
1221
- if (empty($charset))
1222
- {
1223
- $el = $this->root->find('meta[http-equiv=Content-Type]',0, true);
1224
- if (!empty($el))
1225
- {
1226
- $fullvalue = $el->content;
1227
- if (is_object($debug_object)) {$debug_object->debug_log(2, 'meta content-type tag found' . $fullvalue);}
1228
-
1229
- if (!empty($fullvalue))
1230
- {
1231
- $success = preg_match('/charset=(.+)/i', $fullvalue, $matches);
1232
- if ($success)
1233
- {
1234
- $charset = $matches[1];
1235
- }
1236
- else
1237
- {
1238
- // If there is a meta tag, and they don't specify the character set, research says that it's typically ISO-8859-1
1239
- if (is_object($debug_object)) {$debug_object->debug_log(2, 'meta content-type tag couldn\'t be parsed. using iso-8859 default.');}
1240
- $charset = 'ISO-8859-1';
1241
- }
1242
- }
1243
- }
1244
- }
1245
-
1246
- // If we couldn't find a charset above, then lets try to detect one based on the text we got...
1247
- if (empty($charset))
1248
- {
1249
- // Use this in case mb_detect_charset isn't installed/loaded on this machine.
1250
- $charset = false;
1251
- if (function_exists('mb_detect_encoding'))
1252
- {
1253
- // Have php try to detect the encoding from the text given to us.
1254
- $charset = mb_detect_encoding($this->root->plaintext . "ascii", $encoding_list = array( "UTF-8", "CP1252" ) );
1255
- if (is_object($debug_object)) {$debug_object->debug_log(2, 'mb_detect found: ' . $charset);}
1256
- }
1257
-
1258
- // and if this doesn't work... then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need...
1259
- if ($charset === false)
1260
- {
1261
- if (is_object($debug_object)) {$debug_object->debug_log(2, 'since mb_detect failed - using default of utf-8');}
1262
- $charset = 'UTF-8';
1263
- }
1264
- }
1265
-
1266
- // Since CP1252 is a superset, if we get one of it's subsets, we want it instead.
1267
- if ((strtolower($charset) == strtolower('ISO-8859-1')) || (strtolower($charset) == strtolower('Latin1')) || (strtolower($charset) == strtolower('Latin-1')))
1268
- {
1269
- if (is_object($debug_object)) {$debug_object->debug_log(2, 'replacing ' . $charset . ' with CP1252 as its a superset');}
1270
- $charset = 'CP1252';
1271
- }
1272
-
1273
- if (is_object($debug_object)) {$debug_object->debug_log(1, 'EXIT - ' . $charset);}
1274
-
1275
- return $this->_charset = $charset;
1276
- }
1277
-
1278
- // read tag info
1279
- protected function read_tag()
1280
- {
1281
- if ($this->char!=='<')
1282
- {
1283
- $this->root->_[HDOM_INFO_END] = $this->cursor;
1284
- return false;
1285
- }
1286
- $begin_tag_pos = $this->pos;
1287
- $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1288
-
1289
- // end tag
1290
- if ($this->char==='/')
1291
- {
1292
- $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1293
- // This represents the change in the simple_html_dom trunk from revision 180 to 181.
1294
- // $this->skip($this->token_blank_t);
1295
- $this->skip($this->token_blank);
1296
- $tag = $this->copy_until_char('>');
1297
-
1298
- // skip attributes in end tag
1299
- if (($pos = strpos($tag, ' '))!==false)
1300
- $tag = substr($tag, 0, $pos);
1301
-
1302
- $parent_lower = strtolower($this->parent->tag);
1303
- $tag_lower = strtolower($tag);
1304
-
1305
- if ($parent_lower!==$tag_lower)
1306
- {
1307
- if (isset($this->optional_closing_tags[$parent_lower]) && isset($this->block_tags[$tag_lower]))
1308
- {
1309
- $this->parent->_[HDOM_INFO_END] = 0;
1310
- $org_parent = $this->parent;
1311
-
1312
- while (($this->parent->parent) && strtolower($this->parent->tag)!==$tag_lower)
1313
- $this->parent = $this->parent->parent;
1314
-
1315
- if (strtolower($this->parent->tag)!==$tag_lower) {
1316
- $this->parent = $org_parent; // restore origonal parent
1317
- if ($this->parent->parent) $this->parent = $this->parent->parent;
1318
- $this->parent->_[HDOM_INFO_END] = $this->cursor;
1319
- return $this->as_text_node($tag);
1320
- }
1321
- }
1322
- else if (($this->parent->parent) && isset($this->block_tags[$tag_lower]))
1323
- {
1324
- $this->parent->_[HDOM_INFO_END] = 0;
1325
- $org_parent = $this->parent;
1326
-
1327
- while (($this->parent->parent) && strtolower($this->parent->tag)!==$tag_lower)
1328
- $this->parent = $this->parent->parent;
1329
-
1330
- if (strtolower($this->parent->tag)!==$tag_lower)
1331
- {
1332
- $this->parent = $org_parent; // restore origonal parent
1333
- $this->parent->_[HDOM_INFO_END] = $this->cursor;
1334
- return $this->as_text_node($tag);
1335
- }
1336
- }
1337
- else if (($this->parent->parent) && strtolower($this->parent->parent->tag)===$tag_lower)
1338
- {
1339
- $this->parent->_[HDOM_INFO_END] = 0;
1340
- $this->parent = $this->parent->parent;
1341
- }
1342
- else
1343
- return $this->as_text_node($tag);
1344
- }
1345
-
1346
- $this->parent->_[HDOM_INFO_END] = $this->cursor;
1347
- if ($this->parent->parent) $this->parent = $this->parent->parent;
1348
-
1349
- $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1350
- return true;
1351
- }
1352
-
1353
- $node = new simple_html_dom_node($this);
1354
- $node->_[HDOM_INFO_BEGIN] = $this->cursor;
1355
- ++$this->cursor;
1356
- $tag = $this->copy_until($this->token_slash);
1357
- $node->tag_start = $begin_tag_pos;
1358
-
1359
- // doctype, cdata & comments...
1360
- if (isset($tag[0]) && $tag[0]==='!') {
1361
- $node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until_char('>');
1362
-
1363
- if (isset($tag[2]) && $tag[1]==='-' && $tag[2]==='-') {
1364
- $node->nodetype = HDOM_TYPE_COMMENT;
1365
- $node->tag = 'comment';
1366
- } else {
1367
- $node->nodetype = HDOM_TYPE_UNKNOWN;
1368
- $node->tag = 'unknown';
1369
- }
1370
- if ($this->char==='>') $node->_[HDOM_INFO_TEXT].='>';
1371
- $this->link_nodes($node, true);
1372
- $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1373
- return true;
1374
- }
1375
-
1376
- // text
1377
- if ($pos=strpos($tag, '<')!==false) {
1378
- $tag = '<' . substr($tag, 0, -1);
1379
- $node->_[HDOM_INFO_TEXT] = $tag;
1380
- $this->link_nodes($node, false);
1381
- $this->char = $this->doc[--$this->pos]; // prev
1382
- return true;
1383
- }
1384
-
1385
- if (!preg_match("/^[\w-:]+$/", $tag)) {
1386
- $node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>');
1387
- if ($this->char==='<') {
1388
- $this->link_nodes($node, false);
1389
- return true;
1390
- }
1391
-
1392
- if ($this->char==='>') $node->_[HDOM_INFO_TEXT].='>';
1393
- $this->link_nodes($node, false);
1394
- $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1395
- return true;
1396
- }
1397
-
1398
- // begin tag
1399
- $node->nodetype = HDOM_TYPE_ELEMENT;
1400
- $tag_lower = strtolower($tag);
1401
- $node->tag = ($this->lowercase) ? $tag_lower : $tag;
1402
-
1403
- // handle optional closing tags
1404
- if (isset($this->optional_closing_tags[$tag_lower]) )
1405
- {
1406
- while (isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)]))
1407
- {
1408
- $this->parent->_[HDOM_INFO_END] = 0;
1409
- $this->parent = $this->parent->parent;
1410
- }
1411
- $node->parent = $this->parent;
1412
- }
1413
-
1414
- $guard = 0; // prevent infinity loop
1415
- $space = array($this->copy_skip($this->token_blank), '', '');
1416
-
1417
- // attributes
1418
- do
1419
- {
1420
- if ($this->char!==null && $space[0]==='')
1421
- {
1422
- break;
1423
- }
1424
- $name = $this->copy_until($this->token_equal);
1425
- if ($guard===$this->pos)
1426
- {
1427
- $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1428
- continue;
1429
- }
1430
- $guard = $this->pos;
1431
-
1432
- // handle endless '<'
1433
- if ($this->pos>=$this->size-1 && $this->char!=='>') {
1434
- $node->nodetype = HDOM_TYPE_TEXT;
1435
- $node->_[HDOM_INFO_END] = 0;
1436
- $node->_[HDOM_INFO_TEXT] = '<'.$tag . $space[0] . $name;
1437
- $node->tag = 'text';
1438
- $this->link_nodes($node, false);
1439
- return true;
1440
- }
1441
-
1442
- // handle mismatch '<'
1443
- if ($this->doc[$this->pos-1]=='<') {
1444
- $node->nodetype = HDOM_TYPE_TEXT;
1445
- $node->tag = 'text';
1446
- $node->attr = array();
1447
- $node->_[HDOM_INFO_END] = 0;
1448
- $node->_[HDOM_INFO_TEXT] = substr($this->doc, $begin_tag_pos, $this->pos-$begin_tag_pos-1);
1449
- $this->pos -= 2;
1450
- $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1451
- $this->link_nodes($node, false);
1452
- return true;
1453
- }
1454
-
1455
- if ($name!=='/' && $name!=='') {
1456
- $space[1] = $this->copy_skip($this->token_blank);
1457
- $name = $this->restore_noise($name);
1458
- if ($this->lowercase) $name = strtolower($name);
1459
- if ($this->char==='=') {
1460
- $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1461
- $this->parse_attr($node, $name, $space);
1462
- }
1463
- else {
1464
- //no value attr: nowrap, checked selected...
1465
- $node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_NO;
1466
- $node->attr[$name] = true;
1467
- if ($this->char!='>') $this->char = $this->doc[--$this->pos]; // prev
1468
- }
1469
- $node->_[HDOM_INFO_SPACE][] = $space;
1470
- $space = array($this->copy_skip($this->token_blank), '', '');
1471
- }
1472
- else
1473
- break;
1474
- } while ($this->char!=='>' && $this->char!=='/');
1475
-
1476
- $this->link_nodes($node, true);
1477
- $node->_[HDOM_INFO_ENDSPACE] = $space[0];
1478
-
1479
- // check self closing
1480
- if ($this->copy_until_char_escape('>')==='/')
1481
- {
1482
- $node->_[HDOM_INFO_ENDSPACE] .= '/';
1483
- $node->_[HDOM_INFO_END] = 0;
1484
- }
1485
- else
1486
- {
1487
- // reset parent
1488
- if (!isset($this->self_closing_tags[strtolower($node->tag)])) $this->parent = $node;
1489
- }
1490
- $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1491
-
1492
- // If it's a BR tag, we need to set it's text to the default text.
1493
- // This way when we see it in plaintext, we can generate formatting that the user wants.
1494
- // since a br tag never has sub nodes, this works well.
1495
- if ($node->tag == "br")
1496
- {
1497
- $node->_[HDOM_INFO_INNER] = $this->default_br_text;
1498
- }
1499
-
1500
- return true;
1501
- }
1502
-
1503
- // parse attributes
1504
- protected function parse_attr($node, $name, &$space)
1505
- {
1506
- // Per sourceforge: http://sourceforge.net/tracker/?func=detail&aid=3061408&group_id=218559&atid=1044037
1507
- // If the attribute is already defined inside a tag, only pay atetntion to the first one as opposed to the last one.
1508
- if (isset($node->attr[$name]))
1509
- {
1510
- return;
1511
- }
1512
-
1513
- $space[2] = $this->copy_skip($this->token_blank);
1514
- switch ($this->char) {
1515
- case '"':
1516
- $node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE;
1517
- $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1518
- $node->attr[$name] = $this->restore_noise($this->copy_until_char_escape('"'));
1519
- $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1520
- break;
1521
- case '\'':
1522
- $node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_SINGLE;
1523
- $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1524
- $node->attr[$name] = $this->restore_noise($this->copy_until_char_escape('\''));
1525
- $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1526
- break;
1527
- default:
1528
- $node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_NO;
1529
- $node->attr[$name] = $this->restore_noise($this->copy_until($this->token_attr));
1530
- }
1531
- // PaperG: Attributes should not have \r or \n in them, that counts as html whitespace.
1532
- $node->attr[$name] = str_replace("\r", "", $node->attr[$name]);
1533
- $node->attr[$name] = str_replace("\n", "", $node->attr[$name]);
1534
- // PaperG: If this is a "class" selector, lets get rid of the preceeding and trailing space since some people leave it in the multi class case.
1535
- if ($name == "class") {
1536
- $node->attr[$name] = $node->attr[$name];
1537
- }
1538
- }
1539
-
1540
- // link node's parent
1541
- protected function link_nodes(&$node, $is_child)
1542
- {
1543
- $node->parent = $this->parent;
1544
- $this->parent->nodes[] = $node;
1545
- if ($is_child)
1546
- {
1547
- $this->parent->children[] = $node;
1548
- }
1549
- }
1550
-
1551
- // as a text node
1552
- protected function as_text_node($tag)
1553
- {
1554
- $node = new simple_html_dom_node($this);
1555
- ++$this->cursor;
1556
- $node->_[HDOM_INFO_TEXT] = '</' . $tag . '>';
1557
- $this->link_nodes($node, false);
1558
- $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1559
- return true;
1560
- }
1561
-
1562
- protected function skip($chars)
1563
- {
1564
- $this->pos += strspn($this->doc, $chars, $this->pos);
1565
- $this->char = ($this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1566
- }
1567
-
1568
- protected function copy_skip($chars)
1569
- {
1570
- $pos = $this->pos;
1571
- $len = strspn($this->doc, $chars, $pos);
1572
- $this->pos += $len;
1573
- $this->char = ($this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1574
- if ($len===0) return '';
1575
- return substr($this->doc, $pos, $len);
1576
- }
1577
-
1578
- protected function copy_until($chars)
1579
- {
1580
- $pos = $this->pos;
1581
- $len = strcspn($this->doc, $chars, $pos);
1582
- $this->pos += $len;
1583
- $this->char = ($this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1584
- return substr($this->doc, $pos, $len);
1585
- }
1586
-
1587
- protected function copy_until_char($char)
1588
- {
1589
- if ($this->char===null) return '';
1590
-
1591
- if (($pos = strpos($this->doc, $char, $this->pos))===false) {
1592
- $ret = substr($this->doc, $this->pos, $this->size-$this->pos);
1593
- $this->char = null;
1594
- $this->pos = $this->size;
1595
- return $ret;
1596
- }
1597
-
1598
- if ($pos===$this->pos) return '';
1599
- $pos_old = $this->pos;
1600
- $this->char = $this->doc[$pos];
1601
- $this->pos = $pos;
1602
- return substr($this->doc, $pos_old, $pos-$pos_old);
1603
- }
1604
-
1605
- protected function copy_until_char_escape($char)
1606
- {
1607
- if ($this->char===null) return '';
1608
-
1609
- $start = $this->pos;
1610
- while (1)
1611
- {
1612
- if (($pos = strpos($this->doc, $char, $start))===false)
1613
- {
1614
- $ret = substr($this->doc, $this->pos, $this->size-$this->pos);
1615
- $this->char = null;
1616
- $this->pos = $this->size;
1617
- return $ret;
1618
- }
1619
-
1620
- if ($pos===$this->pos) return '';
1621
-
1622
- if ($this->doc[$pos-1]==='\\') {
1623
- $start = $pos+1;
1624
- continue;
1625
- }
1626
-
1627
- $pos_old = $this->pos;
1628
- $this->char = $this->doc[$pos];
1629
- $this->pos = $pos;
1630
- return substr($this->doc, $pos_old, $pos-$pos_old);
1631
- }
1632
- }
1633
-
1634
- // remove noise from html content
1635
- // save the noise in the $this->noise array.
1636
- protected function remove_noise($pattern, $remove_tag=false)
1637
- {
1638
- global $debug_object;
1639
- if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
1640
-
1641
- $count = preg_match_all($pattern, $this->doc, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE);
1642
-
1643
- for ($i=$count-1; $i>-1; --$i)
1644
- {
1645
- $key = '___noise___'.sprintf('% 5d', count($this->noise)+1000);
1646
- if (is_object($debug_object)) { $debug_object->debug_log(2, 'key is: ' . $key); }
1647
- $idx = ($remove_tag) ? 0 : 1;
1648
- $this->noise[$key] = $matches[$i][$idx][0];
1649
- $this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
1650
- }
1651
-
1652
- // reset the length of content
1653
- $this->size = strlen($this->doc);
1654
- if ($this->size>0)
1655
- {
1656
- $this->char = $this->doc[0];
1657
- }
1658
- }
1659
-
1660
- // restore noise to html content
1661
- function restore_noise($text)
1662
- {
1663
- global $debug_object;
1664
- if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
1665
-
1666
- while (($pos=strpos($text, '___noise___'))!==false)
1667
- {
1668
- // Sometimes there is a broken piece of markup, and we don't GET the pos+11 etc... token which indicates a problem outside of us...
1669
- if (strlen($text) > $pos+15)
1670
- {
1671
- $key = '___noise___'.$text[$pos+11].$text[$pos+12].$text[$pos+13].$text[$pos+14].$text[$pos+15];
1672
- if (is_object($debug_object)) { $debug_object->debug_log(2, 'located key of: ' . $key); }
1673
-
1674
- if (isset($this->noise[$key]))
1675
- {
1676
- $text = substr($text, 0, $pos).$this->noise[$key].substr($text, $pos+16);
1677
- }
1678
- else
1679
- {
1680
- // do this to prevent an infinite loop.
1681
- $text = substr($text, 0, $pos).'UNDEFINED NOISE FOR KEY: '.$key . substr($text, $pos+16);
1682
- }
1683
- }
1684
- else
1685
- {
1686
- // There is no valid key being given back to us... We must get rid of the ___noise___ or we will have a problem.
1687
- $text = substr($text, 0, $pos).'NO NUMERIC NOISE KEY' . substr($text, $pos+11);
1688
- }
1689
- }
1690
- return $text;
1691
- }
1692
-
1693
- // Sometimes we NEED one of the noise elements.
1694
- function search_noise($text)
1695
- {
1696
- global $debug_object;
1697
- if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
1698
-
1699
- foreach($this->noise as $noiseElement)
1700
- {
1701
- if (strpos($noiseElement, $text)!==false)
1702
- {
1703
- return $noiseElement;
1704
- }
1705
- }
1706
- }
1707
- function __toString()
1708
- {
1709
- return $this->root->innertext();
1710
- }
1711
-
1712
- function __get($name)
1713
- {
1714
- switch ($name)
1715
- {
1716
- case 'outertext':
1717
- return $this->root->innertext();
1718
- case 'innertext':
1719
- return $this->root->innertext();
1720
- case 'plaintext':
1721
- return $this->root->text();
1722
- case 'charset':
1723
- return $this->_charset;
1724
- case 'target_charset':
1725
- return $this->_target_charset;
1726
- }
1727
- }
1728
-
1729
- // camel naming conventions
1730
- function childNodes($idx=-1) {return $this->root->childNodes($idx);}
1731
- function firstChild() {return $this->root->first_child();}
1732
- function lastChild() {return $this->root->last_child();}
1733
- function createElement($name, $value=null) {return @str_get_html("<$name>$value</$name>")->first_child();}
1734
- function createTextNode($value) {return @end(str_get_html($value)->nodes);}
1735
- function getElementById($id) {return $this->find("#$id", 0);}
1736
- function getElementsById($id, $idx=null) {return $this->find("#$id", $idx);}
1737
- function getElementByTagName($name) {return $this->find($name, 0);}
1738
- function getElementsByTagName($name, $idx=-1) {return $this->find($name, $idx);}
1739
- function loadFile() {$args = func_get_args();$this->load_file($args);}
1740
- }
1741
-
1742
- ?>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
readme.txt CHANGED
@@ -2,8 +2,9 @@
2
  Contributors: TigrouMeow
3
  Tags: retina, images, image, responsive, lazysizes, lazy, attachment, media, files, iphone, ipad, high-dpi
4
  Requires at least: 4.4
5
- Tested up to: 5.0
6
- Stable tag: 5.5.1
 
7
 
8
  Make your website look beautiful and crisp on modern displays by creating and displaying retina images. WP 4.4+ is also supported and enhanced.
9
 
@@ -33,6 +34,10 @@ More information and tutorial available one https://meowapps.com/wp-retina-2x/.
33
 
34
  == Changelog ==
35
 
 
 
 
 
36
  = 5.5.1 =
37
  * Fix: Uploading a PNG as a Retina was turning its transparency into black.
38
  * Fix: Now LazyLoad used with Keep SRC only loads one image, the right one (instead of two before). Thanks to Shane Bishop, the creator of EWWW (https://wordpress.org/plugins/ewww-image-optimizer/).
2
  Contributors: TigrouMeow
3
  Tags: retina, images, image, responsive, lazysizes, lazy, attachment, media, files, iphone, ipad, high-dpi
4
  Requires at least: 4.4
5
+ Tested up to: 5.1
6
+ Requires PHP: 7.0
7
+ Stable tag: 5.5.2
8
 
9
  Make your website look beautiful and crisp on modern displays by creating and displaying retina images. WP 4.4+ is also supported and enhanced.
10
 
34
 
35
  == Changelog ==
36
 
37
+ = 5.5.2 =
38
+ * Fix: New version of HtmlDomParser.
39
+ * Update: New dashboard.
40
+
41
  = 5.5.1 =
42
  * Fix: Uploading a PNG as a Retina was turning its transparency into black.
43
  * Fix: Now LazyLoad used with Keep SRC only loads one image, the right one (instead of two before). Thanks to Shane Bishop, the creator of EWWW (https://wordpress.org/plugins/ewww-image-optimizer/).
vendor/autoload.php ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
1
+ <?php
2
+
3
+ // autoload.php @generated by Composer
4
+
5
+ require_once __DIR__ . '/composer/autoload_real.php';
6
+
7
+ return ComposerAutoloaderInit91955450420fccf0146f52ef39d67985::getLoader();
vendor/composer/ClassLoader.php ADDED
@@ -0,0 +1,445 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?php
2
+
3
+ /*
4
+ * This file is part of Composer.
5
+ *
6
+ * (c) Nils Adermann <naderman@naderman.de>
7
+ * Jordi Boggiano <j.boggiano@seld.be>
8
+ *
9
+ * For the full copyright and license information, please view the LICENSE
10
+ * file that was distributed with this source code.
11
+ */
12
+
13
+ namespace Composer\Autoload;
14
+
15
+ /**
16
+ * ClassLoader implements a PSR-0, PSR-4 and classmap class loader.
17
+ *
18
+ * $loader = new \Composer\Autoload\ClassLoader();
19
+ *
20
+ * // register classes with namespaces
21
+ * $loader->add('Symfony\Component', __DIR__.'/component');
22
+ * $loader->add('Symfony', __DIR__.'/framework');
23
+ *
24
+ * // activate the autoloader
25
+ * $loader->register();
26
+ *
27
+ * // to enable searching the include path (eg. for PEAR packages)
28
+ * $loader->setUseIncludePath(true);
29
+ *
30
+ * In this example, if you try to use a class in the Symfony\Component
31
+ * namespace or one of its children (Symfony\Component\Console for instance),
32
+ * the autoloader will first look for the class under the component/
33
+ * directory, and it will then fallback to the framework/ directory if not
34
+ * found before giving up.
35
+ *
36
+ * This class is loosely based on the Symfony UniversalClassLoader.
37
+ *
38
+ * @author Fabien Potencier <fabien@symfony.com>
39
+ * @author Jordi Boggiano <j.boggiano@seld.be>
40
+ * @see http://www.php-fig.org/psr/psr-0/
41
+ * @see http://www.php-fig.org/psr/psr-4/
42
+ */
43
+ class ClassLoader
44
+ {
45
+ // PSR-4
46
+ private $prefixLengthsPsr4 = array();
47
+ private $prefixDirsPsr4 = array();
48
+ private $fallbackDirsPsr4 = array();
49
+
50
+ // PSR-0
51
+ private $prefixesPsr0 = array();
52
+ private $fallbackDirsPsr0 = array();
53
+
54
+ private $useIncludePath = false;
55
+ private $classMap = array();
56
+ private $classMapAuthoritative = false;
57
+ private $missingClasses = array();
58
+ private $apcuPrefix;
59
+
60
+ public function getPrefixes()
61
+ {
62
+ if (!empty($this->prefixesPsr0)) {
63
+ return call_user_func_array('array_merge', $this->prefixesPsr0);
64
+ }
65
+
66
+ return array();
67
+ }
68
+
69
+ public function getPrefixesPsr4()
70
+ {
71
+ return $this->prefixDirsPsr4;
72
+ }
73
+
74
+ public function getFallbackDirs()
75
+ {
76
+ return $this->fallbackDirsPsr0;
77
+ }
78
+
79
+ public function getFallbackDirsPsr4()
80
+ {
81
+ return $this->fallbackDirsPsr4;
82
+ }
83
+
84
+ public function getClassMap()
85
+ {
86
+ return $this->classMap;
87
+ }
88
+
89
+ /**
90
+ * @param array $classMap Class to filename map
91
+ */
92
+ public function addClassMap(array $classMap)
93
+ {
94
+ if ($this->classMap) {
95
+ $this->classMap = array_merge($this->classMap, $classMap);
96
+ } else {
97
+ $this->classMap = $classMap;
98
+ }
99
+ }
100
+
101
+ /**
102
+ * Registers a set of PSR-0 directories for a given prefix, either
103
+ * appending or prepending to the ones previously set for this prefix.
104
+ *
105
+ * @param string $prefix The prefix
106
+ * @param array|string $paths The PSR-0 root directories
107
+ * @param bool $prepend Whether to prepend the directories
108
+ */
109
+ public function add($prefix, $paths, $prepend = false)
110
+ {
111
+ if (!$prefix) {
112
+ if ($prepend) {
113
+ $this->fallbackDirsPsr0 = array_merge(
114
+ (array) $paths,
115
+ $this->fallbackDirsPsr0
116
+ );
117
+ } else {
118
+ $this->fallbackDirsPsr0 = array_merge(
119
+ $this->fallbackDirsPsr0,
120
+ (array) $paths
121
+ );
122
+ }
123
+
124
+ return;
125
+ }
126
+
127
+ $first = $prefix[0];
128
+ if (!isset($this->prefixesPsr0[$first][$prefix])) {
129
+ $this->prefixesPsr0[$first][$prefix] = (array) $paths;
130
+
131
+ return;
132
+ }
133
+ if ($prepend) {
134
+ $this->prefixesPsr0[$first][$prefix] = array_merge(
135
+ (array) $paths,
136
+ $this->prefixesPsr0[$first][$prefix]
137
+ );
138
+ } else {
139
+ $this->prefixesPsr0[$first][$prefix] = array_merge(
140
+ $this->prefixesPsr0[$first][$prefix],
141
+ (array) $paths
142
+ );
143
+ }
144
+ }
145
+
146
+ /**
147
+ * Registers a set of PSR-4 directories for a given namespace, either
148
+ * appending or prepending to the ones previously set for this namespace.
149
+ *
150
+ * @param string $prefix The prefix/namespace, with trailing '\\'
151
+ * @param array|string $paths The PSR-4 base directories
152
+ * @param bool $prepend Whether to prepend the directories
153
+ *
154
+ * @throws \InvalidArgumentException
155
+ */
156
+ public function addPsr4($prefix, $paths, $prepend = false)
157
+ {
158
+ if (!$prefix) {
159
+ // Register directories for the root namespace.
160
+ if ($prepend) {
161
+ $this->fallbackDirsPsr4 = array_merge(
162
+ (array) $paths,
163
+ $this->fallbackDirsPsr4
164
+ );
165
+ } else {
166
+ $this->fallbackDirsPsr4 = array_merge(
167
+ $this->fallbackDirsPsr4,
168
+ (array) $paths
169
+ );
170
+ }
171
+ } elseif (!isset($this->prefixDirsPsr4[$prefix])) {
172
+ // Register directories for a new namespace.
173
+ $length = strlen($prefix);
174
+ if ('\\' !== $prefix[$length - 1]) {
175
+ throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
176
+ }
177
+ $this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
178
+ $this->prefixDirsPsr4[$prefix] = (array) $paths;
179
+ } elseif ($prepend) {
180
+ // Prepend directories for an already registered namespace.
181
+ $this->prefixDirsPsr4[$prefix] = array_merge(
182
+ (array) $paths,
183
+ $this->prefixDirsPsr4[$prefix]
184
+ );
185
+ } else {
186
+ // Append directories for an already registered namespace.
187
+ $this->prefixDirsPsr4[$prefix] = array_merge(
188
+ $this->prefixDirsPsr4[$prefix],
189
+ (array) $paths
190
+ );
191
+ }
192
+ }
193
+
194
+ /**
195
+ * Registers a set of PSR-0 directories for a given prefix,
196
+ * replacing any others previously set for this prefix.
197
+ *
198
+ * @param string $prefix The prefix
199
+ * @param array|string $paths The PSR-0 base directories
200
+ */
201
+ public function set($prefix, $paths)
202
+ {
203
+ if (!$prefix) {
204
+ $this->fallbackDirsPsr0 = (array) $paths;
205
+ } else {
206
+ $this->prefixesPsr0[$prefix[0]][$prefix] = (array) $paths;
207
+ }
208
+ }
209
+
210
+ /**
211
+ * Registers a set of PSR-4 directories for a given namespace,
212
+ * replacing any others previously set for this namespace.
213
+ *
214
+ * @param string $prefix The prefix/namespace, with trailing '\\'
215
+ * @param array|string $paths The PSR-4 base directories
216
+ *
217
+ * @throws \InvalidArgumentException
218
+ */
219
+ public function setPsr4($prefix, $paths)
220
+ {
221
+ if (!$prefix) {
222
+ $this->fallbackDirsPsr4 = (array) $paths;
223
+ } else {
224
+ $length = strlen($prefix);
225
+ if ('\\' !== $prefix[$length - 1]) {
226
+ throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
227
+ }
228
+ $this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
229
+ $this->prefixDirsPsr4[$prefix] = (array) $paths;
230
+ }
231
+ }
232
+
233
+ /**
234
+ * Turns on searching the include path for class files.
235
+ *
236
+ * @param bool $useIncludePath
237
+ */
238
+ public function setUseIncludePath($useIncludePath)
239
+ {
240
+ $this->useIncludePath = $useIncludePath;
241
+ }
242
+
243
+ /**
244
+ * Can be used to check if the autoloader uses the include path to check
245
+ * for classes.
246
+ *
247
+ * @return bool
248
+ */
249
+ public function getUseIncludePath()
250
+ {
251
+ return $this->useIncludePath;
252
+ }
253
+
254
+ /**
255
+ * Turns off searching the prefix and fallback directories for classes
256
+ * that have not been registered with the class map.
257
+ *
258
+ * @param bool $classMapAuthoritative
259
+ */
260
+ public function setClassMapAuthoritative($classMapAuthoritative)
261
+ {
262
+ $this->classMapAuthoritative = $classMapAuthoritative;
263
+ }
264
+
265
+ /**
266
+ * Should class lookup fail if not found in the current class map?
267
+ *
268
+ * @return bool
269
+ */
270
+ public function isClassMapAuthoritative()
271
+ {
272
+ return $this->classMapAuthoritative;
273
+ }
274
+
275
+ /**
276
+ * APCu prefix to use to cache found/not-found classes, if the extension is enabled.
277
+ *
278
+ * @param string|null $apcuPrefix
279
+ */
280
+ public function setApcuPrefix($apcuPrefix)
281
+ {
282
+ $this->apcuPrefix = function_exists('apcu_fetch') && ini_get('apc.enabled') ? $apcuPrefix : null;
283
+ }
284
+
285
+ /**
286
+ * The APCu prefix in use, or null if APCu caching is not enabled.
287
+ *
288
+ * @return string|null
289
+ */
290
+ public function getApcuPrefix()
291
+ {
292
+ return $this->apcuPrefix;
293
+ }
294
+
295
+ /**
296
+ * Registers this instance as an autoloader.
297
+ *
298
+ * @param bool $prepend Whether to prepend the autoloader or not
299
+ */
300
+ public function register($prepend = false)
301
+ {
302
+ spl_autoload_register(array($this, 'loadClass'), true, $prepend);
303
+ }
304
+
305
+ /**
306
+ * Unregisters this instance as an autoloader.
307
+ */
308
+ public function unregister()
309
+ {
310
+ spl_autoload_unregister(array($this, 'loadClass'));
311
+ }
312
+
313
+ /**
314
+ * Loads the given class or interface.
315
+ *
316
+ * @param string $class The name of the class
317
+ * @return bool|null True if loaded, null otherwise
318
+ */
319
+ public function loadClass($class)
320
+ {
321
+ if ($file = $this->findFile($class)) {
322
+ includeFile($file);
323
+
324
+ return true;
325
+ }
326
+ }
327
+
328
+ /**
329
+ * Finds the path to the file where the class is defined.
330
+ *
331
+ * @param string $class The name of the class
332
+ *
333
+ * @return string|false The path if found, false otherwise
334
+ */
335
+ public function findFile($class)
336
+ {
337
+ // class map lookup
338
+ if (isset($this->classMap[$class])) {
339
+ return $this->classMap[$class];
340
+ }
341
+ if ($this->classMapAuthoritative || isset($this->missingClasses[$class])) {
342
+ return false;
343
+ }
344
+ if (null !== $this->apcuPrefix) {
345
+ $file = apcu_fetch($this->apcuPrefix.$class, $hit);
346
+ if ($hit) {
347
+ return $file;
348
+ }
349
+ }
350
+
351
+ $file = $this->findFileWithExtension($class, '.php');
352
+
353
+ // Search for Hack files if we are running on HHVM
354
+ if (false === $file && defined('HHVM_VERSION')) {
355
+ $file = $this->findFileWithExtension($class, '.hh');
356
+ }
357
+
358
+ if (null !== $this->apcuPrefix) {
359
+ apcu_add($this->apcuPrefix.$class, $file);
360
+ }
361
+
362
+ if (false === $file) {
363
+ // Remember that this class does not exist.
364
+ $this->missingClasses[$class] = true;
365
+ }
366
+
367
+ return $file;
368
+ }
369
+
370
+ private function findFileWithExtension($class, $ext)
371
+ {
372
+ // PSR-4 lookup
373
+ $logicalPathPsr4 = strtr($class, '\\', DIRECTORY_SEPARATOR) . $ext;
374
+
375
+ $first = $class[0];
376
+ if (isset($this->prefixLengthsPsr4[$first])) {
377
+ $subPath = $class;
378
+ while (false !== $lastPos = strrpos($subPath, '\\')) {
379
+ $subPath = substr($subPath, 0, $lastPos);
380
+ $search = $subPath . '\\';
381
+ if (isset($this->prefixDirsPsr4[$search])) {
382
+ $pathEnd = DIRECTORY_SEPARATOR . substr($logicalPathPsr4, $lastPos + 1);
383
+ foreach ($this->prefixDirsPsr4[$search] as $dir) {
384
+ if (file_exists($file = $dir . $pathEnd)) {
385
+ return $file;
386
+ }
387
+ }
388
+ }
389
+ }
390
+ }
391
+
392
+ // PSR-4 fallback dirs
393
+ foreach ($this->fallbackDirsPsr4 as $dir) {
394
+ if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr4)) {
395
+ return $file;
396
+ }
397
+ }
398
+
399
+ // PSR-0 lookup
400
+ if (false !== $pos = strrpos($class, '\\')) {
401
+ // namespaced class name
402
+ $logicalPathPsr0 = substr($logicalPathPsr4, 0, $pos + 1)
403
+ . strtr(substr($logicalPathPsr4, $pos + 1), '_', DIRECTORY_SEPARATOR);
404
+ } else {
405
+ // PEAR-like class name
406
+ $logicalPathPsr0 = strtr($class, '_', DIRECTORY_SEPARATOR) . $ext;
407
+ }
408
+
409
+ if (isset($this->prefixesPsr0[$first])) {
410
+ foreach ($this->prefixesPsr0[$first] as $prefix => $dirs) {
411
+ if (0 === strpos($class, $prefix)) {
412
+ foreach ($dirs as $dir) {
413
+ if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
414
+ return $file;
415
+ }
416
+ }
417
+ }
418
+ }
419
+ }
420
+
421
+ // PSR-0 fallback dirs
422
+ foreach ($this->fallbackDirsPsr0 as $dir) {
423
+ if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
424
+ return $file;
425
+ }
426
+ }
427
+
428
+ // PSR-0 include paths.
429
+ if ($this->useIncludePath && $file = stream_resolve_include_path($logicalPathPsr0)) {
430
+ return $file;
431
+ }
432
+
433
+ return false;
434
+ }
435
+ }
436
+
437
+ /**
438
+ * Scope isolated include.
439
+ *
440
+ * Prevents access to $this/self from included files.
441
+ */
442
+ function includeFile($file)
443
+ {
444
+ include $file;
445
+ }
vendor/composer/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Copyright (c) Nils Adermann, Jordi Boggiano
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ of this software and associated documentation files (the "Software"), to deal
6
+ in the Software without restriction, including without limitation the rights
7
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the Software is furnished
9
+ to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all
12
+ copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
+ THE SOFTWARE.
21
+
vendor/composer/autoload_classmap.php ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ <?php
2
+
3
+ // autoload_classmap.php @generated by Composer
4
+
5
+ $vendorDir = dirname(dirname(__FILE__));
6
+ $baseDir = dirname($vendorDir);
7
+
8
+ return array(
9
+ );
vendor/composer/autoload_namespaces.php ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
1
+ <?php
2
+
3
+ // autoload_namespaces.php @generated by Composer
4
+
5
+ $vendorDir = dirname(dirname(__FILE__));
6
+ $baseDir = dirname($vendorDir);
7
+
8
+ return array(
9
+ 'KubAT\\PhpSimple\\HtmlDomParser' => array($vendorDir . '/kub-at/php-simple-html-dom-parser/src'),
10
+ );
vendor/composer/autoload_psr4.php ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ <?php
2
+
3
+ // autoload_psr4.php @generated by Composer
4
+
5
+ $vendorDir = dirname(dirname(__FILE__));
6
+ $baseDir = dirname($vendorDir);
7
+
8
+ return array(
9
+ );
vendor/composer/autoload_real.php ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?php
2
+
3
+ // autoload_real.php @generated by Composer
4
+
5
+ class ComposerAutoloaderInit91955450420fccf0146f52ef39d67985
6
+ {
7
+ private static $loader;
8
+
9
+ public static function loadClassLoader($class)
10
+ {
11
+ if ('Composer\Autoload\ClassLoader' === $class) {
12
+ require __DIR__ . '/ClassLoader.php';
13
+ }
14
+ }
15
+
16
+ public static function getLoader()
17
+ {
18
+ if (null !== self::$loader) {
19
+ return self::$loader;
20
+ }
21
+
22
+ spl_autoload_register(array('ComposerAutoloaderInit91955450420fccf0146f52ef39d67985', 'loadClassLoader'), true, true);
23
+ self::$loader = $loader = new \Composer\Autoload\ClassLoader();
24
+ spl_autoload_unregister(array('ComposerAutoloaderInit91955450420fccf0146f52ef39d67985', 'loadClassLoader'));
25
+
26
+ $useStaticLoader = PHP_VERSION_ID >= 50600 && !defined('HHVM_VERSION') && (!function_exists('zend_loader_file_encoded') || !zend_loader_file_encoded());
27
+ if ($useStaticLoader) {
28
+ require_once __DIR__ . '/autoload_static.php';
29
+
30
+ call_user_func(\Composer\Autoload\ComposerStaticInit91955450420fccf0146f52ef39d67985::getInitializer($loader));
31
+ } else {
32
+ $map = require __DIR__ . '/autoload_namespaces.php';
33
+ foreach ($map as $namespace => $path) {
34
+ $loader->set($namespace, $path);
35
+ }
36
+
37
+ $map = require __DIR__ . '/autoload_psr4.php';
38
+ foreach ($map as $namespace => $path) {
39
+ $loader->setPsr4($namespace, $path);
40
+ }
41
+
42
+ $classMap = require __DIR__ . '/autoload_classmap.php';
43
+ if ($classMap) {
44
+ $loader->addClassMap($classMap);
45
+ }
46
+ }
47
+
48
+ $loader->register(true);
49
+
50
+ return $loader;
51
+ }
52
+ }
vendor/composer/autoload_static.php ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?php
2
+
3
+ // autoload_static.php @generated by Composer
4
+
5
+ namespace Composer\Autoload;
6
+
7
+ class ComposerStaticInit91955450420fccf0146f52ef39d67985
8
+ {
9
+ public static $prefixesPsr0 = array (
10
+ 'K' =>
11
+ array (
12
+ 'KubAT\\PhpSimple\\HtmlDomParser' =>
13
+ array (
14
+ 0 => __DIR__ . '/..' . '/kub-at/php-simple-html-dom-parser/src',
15
+ ),
16
+ ),
17
+ );
18
+
19
+ public static function getInitializer(ClassLoader $loader)
20
+ {
21
+ return \Closure::bind(function () use ($loader) {
22
+ $loader->prefixesPsr0 = ComposerStaticInit91955450420fccf0146f52ef39d67985::$prefixesPsr0;
23
+
24
+ }, null, ClassLoader::class);
25
+ }
26
+ }
vendor/composer/installed.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "name": "kub-at/php-simple-html-dom-parser",
4
+ "version": "1.8.1",
5
+ "version_normalized": "1.8.1.0",
6
+ "source": {
7
+ "type": "git",
8
+ "url": "https://github.com/Kub-AT/php-simple-html-dom-parser.git",
9
+ "reference": "6db1e01db320040024cd1f74b0e1483aa2670720"
10
+ },
11
+ "dist": {
12
+ "type": "zip",
13
+ "url": "https://api.github.com/repos/Kub-AT/php-simple-html-dom-parser/zipball/6db1e01db320040024cd1f74b0e1483aa2670720",
14
+ "reference": "6db1e01db320040024cd1f74b0e1483aa2670720",
15
+ "shasum": ""
16
+ },
17
+ "require": {
18
+ "php": ">=5.3.2"
19
+ },
20
+ "time": "2019-03-05T14:12:22+00:00",
21
+ "type": "library",
22
+ "installation-source": "dist",
23
+ "autoload": {
24
+ "psr-0": {
25
+ "KubAT\\PhpSimple\\HtmlDomParser": "src/"
26
+ }
27
+ },
28
+ "notification-url": "https://packagist.org/downloads/",
29
+ "license": [
30
+ "MIT"
31
+ ],
32
+ "authors": [
33
+ {
34
+ "name": "S.C. Chen",
35
+ "email": "me578022@gmail.com"
36
+ },
37
+ {
38
+ "name": "Jakub Stawowy",
39
+ "email": "Kub-AT@users.noreply.github.com"
40
+ }
41
+ ],
42
+ "description": "PHP Simple HTML DOM Parser with namespace and PHP 7.3 compatible",
43
+ "homepage": "http://simplehtmldom.sourceforge.net/",
44
+ "keywords": [
45
+ "Simple",
46
+ "dom",
47
+ "html"
48
+ ]
49
+ }
50
+ ]
vendor/kub-at/php-simple-html-dom-parser/CONTRIBUTING.md ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ # Contributing
2
+
3
+ I'm not the maintainer of the PHP Simple HTML DOM Parser project (https://sourceforge.net/projects/simplehtmldom/)
vendor/kub-at/php-simple-html-dom-parser/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2019 Jakub Stawowy
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
vendor/kub-at/php-simple-html-dom-parser/README.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ php-simple-html-dom-parser
2
+ ==========================
3
+
4
+ Version 1.8.1 - PHP 7.3 compatible
5
+ PHP Simple HTML DOM Parser changelog: https://sourceforge.net/projects/simplehtmldom/files/simplehtmldom/1.8.1/
6
+
7
+
8
+ Install
9
+ -------
10
+
11
+ ```
12
+ composer require kub-at/php-simple-html-dom-parser
13
+ ```
14
+
15
+ Usage
16
+ -----
17
+
18
+ ```php
19
+ use KubAT\PhpSimple\HtmlDomParser;
20
+
21
+ ...
22
+ $dom = HtmlDomParser::str_get_html( $str );
23
+ or
24
+ $dom = HtmlDomParser::file_get_html( $file_name );
25
+
26
+ $elems = $dom->find($elem_name);
27
+ ...
28
+
29
+ ```
vendor/kub-at/php-simple-html-dom-parser/composer.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "kub-at/php-simple-html-dom-parser",
3
+ "description": "PHP Simple HTML DOM Parser with namespace and PHP 7.3 compatible",
4
+ "keywords": ["html", "dom", "simple"],
5
+ "homepage": "http://simplehtmldom.sourceforge.net/",
6
+ "type": "library",
7
+ "license": "MIT",
8
+ "authors": [
9
+ {
10
+ "name": "S.C. Chen",
11
+ "email": "me578022@gmail.com"
12
+ },
13
+ {
14
+ "name": "Jakub Stawowy",
15
+ "email": "Kub-AT@users.noreply.github.com"
16
+ }
17
+ ],
18
+ "require": {
19
+ "php": ">=5.3.2"
20
+ },
21
+ "autoload": {
22
+ "psr-0": { "KubAT\\PhpSimple\\HtmlDomParser": "src/" }
23
+ }
24
+ }
vendor/kub-at/php-simple-html-dom-parser/src/KubAT/PhpSimple/HtmlDomParser.php ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?php
2
+ namespace KubAT\PhpSimple;
3
+
4
+ require 'lib'.DIRECTORY_SEPARATOR.'simple_html_dom.php';
5
+
6
+
7
+ class HtmlDomParser {
8
+
9
+ static public function file_get_html() {
10
+ return call_user_func_array('\simple_html_dom\file_get_html' , func_get_args());
11
+ }
12
+
13
+ static public function str_get_html() {
14
+ return call_user_func_array('\simple_html_dom\str_get_html' , func_get_args());
15
+ }
16
+ }
vendor/kub-at/php-simple-html-dom-parser/src/KubAT/PhpSimple/lib/simple_html_dom.php ADDED
@@ -0,0 +1,2810 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?php
2
+ namespace simple_html_dom;
3
+
4
+ /**
5
+ * Website: http://sourceforge.net/projects/simplehtmldom/
6
+ * Additional projects: http://sourceforge.net/projects/debugobject/
7
+ * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
8
+ * Contributions by:
9
+ * Yousuke Kumakura (Attribute filters)
10
+ * Vadim Voituk (Negative indexes supports of "find" method)
11
+ * Antcs (Constructor with automatically load contents either text or file/url)
12
+ *
13
+ * all affected sections have comments starting with "PaperG"
14
+ *
15
+ * Paperg - Added case insensitive testing of the value of the selector.
16
+ *
17
+ * Paperg - Added tag_start for the starting index of tags - NOTE: This works
18
+ * but not accurately. This tag_start gets counted AFTER \r\n have been crushed
19
+ * out, and after the remove_noice calls so it will not reflect the REAL
20
+ * position of the tag in the source, it will almost always be smaller by some
21
+ * amount. We use this to determine how far into the file the tag in question
22
+ * is. This "percentage" will never be accurate as the $dom->size is the "real"
23
+ * number of bytes the dom was created from. But for most purposes, it's a
24
+ * really good estimation.
25
+ *
26
+ * Paperg - Added the forceTagsClosed to the dom constructor. Forcing tags
27
+ * closed is great for malformed html, but it CAN lead to parsing errors.
28
+ *
29
+ * Allow the user to tell us how much they trust the html.
30
+ *
31
+ * Paperg add the text and plaintext to the selectors for the find syntax.
32
+ * plaintext implies text in the innertext of a node. text implies that the
33
+ * tag is a text node. This allows for us to find tags based on the text they
34
+ * contain.
35
+ *
36
+ * Create find_ancestor_tag to see if a tag is - at any level - inside of
37
+ * another specific tag.
38
+ *
39
+ * Paperg: added parse_charset so that we know about the character set of
40
+ * the source document. NOTE: If the user's system has a routine called
41
+ * get_last_retrieve_url_contents_content_type availalbe, we will assume it's
42
+ * returning the content-type header from the last transfer or curl_exec, and
43
+ * we will parse that and use it in preference to any other method of charset
44
+ * detection.
45
+ *
46
+ * Found infinite loop in the case of broken html in restore_noise. Rewrote to
47
+ * protect from that.
48
+ *
49
+ * PaperG (John Schlick) Added get_display_size for "IMG" tags.
50
+ *
51
+ * Licensed under The MIT License
52
+ * Redistributions of files must retain the above copyright notice.
53
+ *
54
+ * @author S.C. Chen <me578022@gmail.com>
55
+ * @author John Schlick
56
+ * @author Rus Carroll
57
+ * @version Rev. 1.8.1 (247)
58
+ * @package PlaceLocalInclude
59
+ * @subpackage simple_html_dom
60
+ */
61
+
62
+ /**
63
+ * All of the Defines for the classes below.
64
+ * @author S.C. Chen <me578022@gmail.com>
65
+ */
66
+ define('HDOM_TYPE_ELEMENT', 1);
67
+ define('HDOM_TYPE_COMMENT', 2);
68
+ define('HDOM_TYPE_TEXT', 3);
69
+ define('HDOM_TYPE_ENDTAG', 4);
70
+ define('HDOM_TYPE_ROOT', 5);
71
+ define('HDOM_TYPE_UNKNOWN', 6);
72
+ define('HDOM_QUOTE_DOUBLE', 0);
73
+ define('HDOM_QUOTE_SINGLE', 1);
74
+ define('HDOM_QUOTE_NO', 3);
75
+ define('HDOM_INFO_BEGIN', 0);
76
+ define('HDOM_INFO_END', 1);
77
+ define('HDOM_INFO_QUOTE', 2);
78
+ define('HDOM_INFO_SPACE', 3);
79
+ define('HDOM_INFO_TEXT', 4);
80
+ define('HDOM_INFO_INNER', 5);
81
+ define('HDOM_INFO_OUTER', 6);
82
+ define('HDOM_INFO_ENDSPACE', 7);
83
+
84
+ /** The default target charset */
85
+ defined('DEFAULT_TARGET_CHARSET') || define('DEFAULT_TARGET_CHARSET', 'UTF-8');
86
+
87
+ /** The default <br> text used instead of <br> tags when returning text */
88
+ defined('DEFAULT_BR_TEXT') || define('DEFAULT_BR_TEXT', "\r\n");
89
+
90
+ /** The default <span> text used instead of <span> tags when returning text */
91
+ defined('DEFAULT_SPAN_TEXT') || define('DEFAULT_SPAN_TEXT', ' ');
92
+
93
+ /** The maximum file size the parser should load */
94
+ defined('MAX_FILE_SIZE') || define('MAX_FILE_SIZE', 600000);
95
+
96
+ /** Contents between curly braces "{" and "}" are interpreted as text */
97
+ define('HDOM_SMARTY_AS_TEXT', 1);
98
+
99
+ // helper functions
100
+ // -----------------------------------------------------------------------------
101
+ // get html dom from file
102
+ // $maxlen is defined in the code as PHP_STREAM_COPY_ALL which is defined as -1.
103
+ function file_get_html(
104
+ $url,
105
+ $use_include_path = false,
106
+ $context = null,
107
+ $offset = 0,
108
+ $maxLen = -1,
109
+ $lowercase = true,
110
+ $forceTagsClosed = true,
111
+ $target_charset = DEFAULT_TARGET_CHARSET,
112
+ $stripRN = true,
113
+ $defaultBRText = DEFAULT_BR_TEXT,
114
+ $defaultSpanText = DEFAULT_SPAN_TEXT)
115
+ {
116
+ // Ensure maximum length is greater than zero
117
+ if($maxLen <= 0) { $maxLen = MAX_FILE_SIZE; }
118
+
119
+ // We DO force the tags to be terminated.
120
+ $dom = new simple_html_dom(
121
+ null,
122
+ $lowercase,
123
+ $forceTagsClosed,
124
+ $target_charset,
125
+ $stripRN,
126
+ $defaultBRText,
127
+ $defaultSpanText);
128
+
129
+ /**
130
+ * For sourceforge users: uncomment the next line and comment the
131
+ * retrieve_url_contents line 2 lines down if it is not already done.
132
+ */
133
+ $contents = file_get_contents(
134
+ $url,
135
+ $use_include_path,
136
+ $context,
137
+ $offset,
138
+ $maxLen);
139
+
140
+ // Paperg - use our own mechanism for getting the contents as we want to
141
+ // control the timeout.
142
+ // $contents = retrieve_url_contents($url);
143
+ if (empty($contents) || strlen($contents) > $maxLen) { return false; }
144
+
145
+ // The second parameter can force the selectors to all be lowercase.
146
+ $dom->load($contents, $lowercase, $stripRN);
147
+ return $dom;
148
+ }
149
+
150
+ // get html dom from string
151
+ function str_get_html(
152
+ $str,
153
+ $lowercase = true,
154
+ $forceTagsClosed = true,
155
+ $target_charset = DEFAULT_TARGET_CHARSET,
156
+ $stripRN = true,
157
+ $defaultBRText = DEFAULT_BR_TEXT,
158
+ $defaultSpanText = DEFAULT_SPAN_TEXT)
159
+ {
160
+ $dom = new simple_html_dom(
161
+ null,
162
+ $lowercase,
163
+ $forceTagsClosed,
164
+ $target_charset,
165
+ $stripRN,
166
+ $defaultBRText,
167
+ $defaultSpanText);
168
+
169
+ if (empty($str) || strlen($str) > MAX_FILE_SIZE) {
170
+ $dom->clear();
171
+ return false;
172
+ }
173
+
174
+ $dom->load($str, $lowercase, $stripRN);
175
+ return $dom;
176
+ }
177
+
178
+ // dump html dom tree
179
+ function dump_html_tree($node, $show_attr = true, $deep = 0)
180
+ {
181
+ $node->dump($node);
182
+ }
183
+
184
+ /**
185
+ * simple html dom node
186
+ * PaperG - added ability for "find" routine to lowercase the value of the
187
+ * selector.
188
+ *
189
+ * PaperG - added $tag_start to track the start position of the tag in the total
190
+ * byte index
191
+ *
192
+ * @package PlaceLocalInclude
193
+ */
194
+ class simple_html_dom_node
195
+ {
196
+ /**
197
+ * Node type
198
+ *
199
+ * Default is {@see HDOM_TYPE_TEXT}
200
+ *
201
+ * @var int
202
+ */
203
+ public $nodetype = HDOM_TYPE_TEXT;
204
+
205
+ /**
206
+ * Tag name
207
+ *
208
+ * Default is 'text'
209
+ *
210
+ * @var string
211
+ */
212
+ public $tag = 'text';
213
+
214
+ /**
215
+ * List of attributes
216
+ *
217
+ * @var array
218
+ */
219
+ public $attr = array();
220
+
221
+ /**
222
+ * List of child node objects
223
+ *
224
+ * @var array
225
+ */
226
+ public $children = array();
227
+ public $nodes = array();
228
+
229
+ /**
230
+ * The parent node object
231
+ *
232
+ * @var object|null
233
+ */
234
+ public $parent = null;
235
+
236
+ // The "info" array - see HDOM_INFO_... for what each element contains.
237
+ public $_ = array();
238
+
239
+ /**
240
+ * Start position of the tag in the document
241
+ *
242
+ * @var int
243
+ */
244
+ public $tag_start = 0;
245
+
246
+ /**
247
+ * The DOM object
248
+ *
249
+ * @var object|null
250
+ */
251
+ private $dom = null;
252
+
253
+ /**
254
+ * Construct new node object
255
+ *
256
+ * Adds itself to the list of DOM Nodes {@see simple_html_dom::$nodes}
257
+ */
258
+ function __construct($dom)
259
+ {
260
+ $this->dom = $dom;
261
+ $dom->nodes[] = $this;
262
+ }
263
+
264
+ function __destruct()
265
+ {
266
+ $this->clear();
267
+ }
268
+
269
+ function __toString()
270
+ {
271
+ return $this->outertext();
272
+ }
273
+
274
+ // clean up memory due to php5 circular references memory leak...
275
+ function clear()
276
+ {
277
+ $this->dom = null;
278
+ $this->nodes = null;
279
+ $this->parent = null;
280
+ $this->children = null;
281
+ }
282
+
283
+ // dump node's tree
284
+ function dump($show_attr = true, $deep = 0)
285
+ {
286
+ $lead = str_repeat(' ', $deep);
287
+
288
+ echo $lead . $this->tag;
289
+
290
+ if ($show_attr && count($this->attr) > 0) {
291
+ echo '(';
292
+ foreach ($this->attr as $k => $v) {
293
+ echo "[$k]=>\"" . $this->$k . '", ';
294
+ }
295
+ echo ')';
296
+ }
297
+
298
+ echo "\n";
299
+
300
+ if ($this->nodes) {
301
+ foreach ($this->nodes as $c) {
302
+ $c->dump($show_attr, $deep + 1);
303
+ }
304
+ }
305
+ }
306
+
307
+
308
+ // Debugging function to dump a single dom node with a bunch of information about it.
309
+ function dump_node($echo = true)
310
+ {
311
+ $string = $this->tag;
312
+
313
+ if (count($this->attr) > 0) {
314
+ $string .= '(';
315
+ foreach ($this->attr as $k => $v) {
316
+ $string .= "[$k]=>\"" . $this->$k . '", ';
317
+ }
318
+ $string .= ')';
319
+ }
320
+
321
+ if (count($this->_) > 0) {
322
+ $string .= ' $_ (';
323
+ foreach ($this->_ as $k => $v) {
324
+ if (is_array($v)) {
325
+ $string .= "[$k]=>(";
326
+ foreach ($v as $k2 => $v2) {
327
+ $string .= "[$k2]=>\"" . $v2 . '", ';
328
+ }
329
+ $string .= ')';
330
+ } else {
331
+ $string .= "[$k]=>\"" . $v . '", ';
332
+ }
333
+ }
334
+ $string .= ')';
335
+ }
336
+
337
+ if (isset($this->text)) {
338
+ $string .= ' text: (' . $this->text . ')';
339
+ }
340
+
341
+ $string .= " HDOM_INNER_INFO: '";
342
+
343
+ if (isset($node->_[HDOM_INFO_INNER])) {
344
+ $string .= $node->_[HDOM_INFO_INNER] . "'";
345
+ } else {
346
+ $string .= ' NULL ';
347
+ }
348
+
349
+ $string .= ' children: ' . count($this->children);
350
+ $string .= ' nodes: ' . count($this->nodes);
351
+ $string .= ' tag_start: ' . $this->tag_start;
352
+ $string .= "\n";
353
+
354
+ if ($echo) {
355
+ echo $string;
356
+ return;
357
+ } else {
358
+ return $string;
359
+ }
360
+ }
361
+
362
+ /**
363
+ * Return or set parent node
364
+ *
365
+ * @param object|null $parent (optional) The parent node, `null` to return
366
+ * the current parent node.
367
+ * @return object|null The parent node
368
+ */
369
+ function parent($parent = null)
370
+ {
371
+ // I am SURE that this doesn't work properly.
372
+ // It fails to unset the current node from it's current parents nodes or
373
+ // children list first.
374
+ if ($parent !== null) {
375
+ $this->parent = $parent;
376
+ $this->parent->nodes[] = $this;
377
+ $this->parent->children[] = $this;
378
+ }
379
+
380
+ return $this->parent;
381
+ }
382
+
383
+ /**
384
+ * @return bool True if the node has at least one child node
385
+ */
386
+ function has_child()
387
+ {
388
+ return !empty($this->children);
389
+ }
390
+
391
+ /**
392
+ * Get child node at specified index
393
+ *
394
+ * @param int $idx The index of the child node to return, `-1` to return all
395
+ * child nodes.
396
+ * @return object|array|null The child node at the specified index, all child
397
+ * nodes or null if the index is invalid.
398
+ */
399
+ function children($idx = -1)
400
+ {
401
+ if ($idx === -1) {
402
+ return $this->children;
403
+ }
404
+
405
+ if (isset($this->children[$idx])) {
406
+ return $this->children[$idx];
407
+ }
408
+
409
+ return null;
410
+ }
411
+
412
+ /**
413
+ * Get first child node
414
+ *
415
+ * @return object|null The first child node or null if the current node has
416
+ * no child nodes.
417
+ *
418
+ * @todo Use `empty()` instead of `count()` to improve performance on large
419
+ * arrays.
420
+ */
421
+ function first_child()
422
+ {
423
+ if (count($this->children) > 0) {
424
+ return $this->children[0];
425
+ }
426
+ return null;
427
+ }
428
+
429
+ /**
430
+ * Get last child node
431
+ *
432
+ * @return object|null The last child node or null if the current node has
433
+ * no child nodes.
434
+ *
435
+ * @todo Use `end()` to slightly improve performance on large arrays.
436
+ */
437
+ function last_child()
438
+ {
439
+ if (($count = count($this->children)) > 0) {
440
+ return $this->children[$count - 1];
441
+ }
442
+ return null;
443
+ }
444
+
445
+ /**
446
+ * Get next sibling node
447
+ *
448
+ * @return object|null The sibling node or null if the current node has no
449
+ * sibling nodes.
450
+ */
451
+ function next_sibling()
452
+ {
453
+ if ($this->parent === null) {
454
+ return null;
455
+ }
456
+
457
+ $idx = 0;
458
+ $count = count($this->parent->children);
459
+
460
+ while ($idx < $count && $this !== $this->parent->children[$idx]) {
461
+ ++$idx;
462
+ }
463
+
464
+ if (++$idx >= $count) {
465
+ return null;
466
+ }
467
+
468
+ return $this->parent->children[$idx];
469
+ }
470
+
471
+ /**
472
+ * Get previous sibling node
473
+ *
474
+ * @return object|null The sibling node or null if the current node has no
475
+ * sibling nodes.
476
+ */
477
+ function prev_sibling()
478
+ {
479
+ if ($this->parent === null) { return null; }
480
+
481
+ $idx = 0;
482
+ $count = count($this->parent->children);
483
+
484
+ while ($idx < $count && $this !== $this->parent->children[$idx]) {
485
+ ++$idx;
486
+ }
487
+
488
+ if (--$idx < 0) { return null; }
489
+
490
+ return $this->parent->children[$idx];
491
+ }
492
+
493
+ /**
494
+ * Traverse ancestors to the first matching tag.
495
+ *
496
+ * @param string $tag Tag to find
497
+ * @return object|null First matching node in the DOM tree or null if no
498
+ * match was found.
499
+ *
500
+ * @todo Null is returned implicitly by calling ->parent on the root node.
501
+ * This behaviour could change at any time, rendering this function invalid.
502
+ */
503
+ function find_ancestor_tag($tag)
504
+ {
505
+ global $debug_object;
506
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
507
+
508
+ // Start by including ourselves in the comparison.
509
+ $returnDom = $this;
510
+
511
+ while (!is_null($returnDom)) {
512
+ if (is_object($debug_object)) {
513
+ $debug_object->debug_log(2, 'Current tag is: ' . $returnDom->tag);
514
+ }
515
+
516
+ if ($returnDom->tag == $tag) {
517
+ break;
518
+ }
519
+
520
+ $returnDom = $returnDom->parent;
521
+ }
522
+
523
+ return $returnDom;
524
+ }
525
+
526
+ /**
527
+ * Get node's inner text (everything inside the opening and closing tags)
528
+ *
529
+ * @return string
530
+ */
531
+ function innertext()
532
+ {
533
+ if (isset($this->_[HDOM_INFO_INNER])) {
534
+ return $this->_[HDOM_INFO_INNER];
535
+ }
536
+
537
+ if (isset($this->_[HDOM_INFO_TEXT])) {
538
+ return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
539
+ }
540
+
541
+ $ret = '';
542
+
543
+ foreach ($this->nodes as $n) {
544
+ $ret .= $n->outertext();
545
+ }
546
+
547
+ return $ret;
548
+ }
549
+
550
+ /**
551
+ * Get node's outer text (everything including the opening and closing tags)
552
+ *
553
+ * @return string
554
+ */
555
+ function outertext()
556
+ {
557
+ global $debug_object;
558
+
559
+ if (is_object($debug_object)) {
560
+ $text = '';
561
+
562
+ if ($this->tag === 'text') {
563
+ if (!empty($this->text)) {
564
+ $text = ' with text: ' . $this->text;
565
+ }
566
+ }
567
+
568
+ $debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text);
569
+ }
570
+
571
+ if ($this->tag === 'root') return $this->innertext();
572
+
573
+ // trigger callback
574
+ if ($this->dom && $this->dom->callback !== null) {
575
+ call_user_func_array($this->dom->callback, array($this));
576
+ }
577
+
578
+ if (isset($this->_[HDOM_INFO_OUTER])) {
579
+ return $this->_[HDOM_INFO_OUTER];
580
+ }
581
+
582
+ if (isset($this->_[HDOM_INFO_TEXT])) {
583
+ return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
584
+ }
585
+
586
+ // render begin tag
587
+ if ($this->dom && $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]) {
588
+ $ret = $this->dom->nodes[$this->_[HDOM_INFO_BEGIN]]->makeup();
589
+ } else {
590
+ $ret = '';
591
+ }
592
+
593
+ // render inner text
594
+ if (isset($this->_[HDOM_INFO_INNER])) {
595
+ // If it's a br tag... don't return the HDOM_INNER_INFO that we
596
+ // may or may not have added.
597
+ if ($this->tag !== 'br') {
598
+ $ret .= $this->_[HDOM_INFO_INNER];
599
+ }
600
+ } else {
601
+ if ($this->nodes) {
602
+ foreach ($this->nodes as $n) {
603
+ $ret .= $this->convert_text($n->outertext());
604
+ }
605
+ }
606
+ }
607
+
608
+ // render end tag
609
+ if (isset($this->_[HDOM_INFO_END]) && $this->_[HDOM_INFO_END] != 0) {
610
+ $ret .= '</' . $this->tag . '>';
611
+ }
612
+
613
+ return $ret;
614
+ }
615
+
616
+ /**
617
+ * Get node's plain text (everything excluding all tags)
618
+ *
619
+ * @return string
620
+ */
621
+ function text()
622
+ {
623
+ if (isset($this->_[HDOM_INFO_INNER])) {
624
+ return $this->_[HDOM_INFO_INNER];
625
+ }
626
+
627
+ switch ($this->nodetype) {
628
+ case HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
629
+ case HDOM_TYPE_COMMENT: return '';
630
+ case HDOM_TYPE_UNKNOWN: return '';
631
+ }
632
+
633
+ if (strcasecmp($this->tag, 'script') === 0) { return ''; }
634
+ if (strcasecmp($this->tag, 'style') === 0) { return ''; }
635
+
636
+ $ret = '';
637
+
638
+ // In rare cases, (always node type 1 or HDOM_TYPE_ELEMENT - observed
639
+ // for some span tags, and some p tags) $this->nodes is set to NULL.
640
+ // NOTE: This indicates that there is a problem where it's set to NULL
641
+ // without a clear happening.
642
+ // WHY is this happening?
643
+ if (!is_null($this->nodes)) {
644
+ foreach ($this->nodes as $n) {
645
+ // Start paragraph after a blank line
646
+ if ($n->tag === 'p') {
647
+ $ret .= "\n\n";
648
+ }
649
+
650
+ $ret .= $this->convert_text($n->text());
651
+
652
+ // If this node is a span... add a space at the end of it so
653
+ // multiple spans don't run into each other. This is plaintext
654
+ // after all.
655
+ if ($n->tag === 'span') {
656
+ $ret .= $this->dom->default_span_text;
657
+ }
658
+ }
659
+ }
660
+ return trim($ret);
661
+ }
662
+
663
+ /**
664
+ * Get node's xml text (inner text as a CDATA section)
665
+ *
666
+ * @return string
667
+ */
668
+ function xmltext()
669
+ {
670
+ $ret = $this->innertext();
671
+ $ret = str_ireplace('<![CDATA[', '', $ret);
672
+ $ret = str_replace(']]>', '', $ret);
673
+ return $ret;
674
+ }
675
+
676
+ // build node's text with tag
677
+ function makeup()
678
+ {
679
+ // text, comment, unknown
680
+ if (isset($this->_[HDOM_INFO_TEXT])) {
681
+ return $this->dom->restore_noise($this->_[HDOM_INFO_TEXT]);
682
+ }
683
+
684
+ $ret = '<' . $this->tag;
685
+ $i = -1;
686
+
687
+ foreach ($this->attr as $key => $val) {
688
+ ++$i;
689
+
690
+ // skip removed attribute
691
+ if ($val === null || $val === false) { continue; }
692
+
693
+ $ret .= $this->_[HDOM_INFO_SPACE][$i][0];
694
+
695
+ //no value attr: nowrap, checked selected...
696
+ if ($val === true) {
697
+ $ret .= $key;
698
+ } else {
699
+ switch ($this->_[HDOM_INFO_QUOTE][$i])
700
+ {
701
+ case HDOM_QUOTE_DOUBLE: $quote = '"'; break;
702
+ case HDOM_QUOTE_SINGLE: $quote = '\''; break;
703
+ default: $quote = '';
704
+ }
705
+
706
+ $ret .= $key
707
+ . $this->_[HDOM_INFO_SPACE][$i][1]
708
+ . '='
709
+ . $this->_[HDOM_INFO_SPACE][$i][2]
710
+ . $quote
711
+ . $val
712
+ . $quote;
713
+ }
714
+ }
715
+
716
+ $ret = $this->dom->restore_noise($ret);
717
+ return $ret . $this->_[HDOM_INFO_ENDSPACE] . '>';
718
+ }
719
+
720
+ /**
721
+ * Find elements by CSS selector
722
+ *
723
+ * @param string $selector The CSS selector
724
+ * @param int|null $idx Index of element to return form the list of matching
725
+ * elements (default: `null` = disabled).
726
+ * @param bool $lowercase Matches tag names case insensitive (lowercase) if
727
+ * enabled (default: `false`)
728
+ * @return array|object|null A list of elements matching the specified CSS
729
+ * selector or a single element if $idx is specified or null if no element
730
+ * was found.
731
+ */
732
+ function find($selector, $idx = null, $lowercase = false)
733
+ {
734
+ $selectors = $this->parse_selector($selector);
735
+ if (($count = count($selectors)) === 0) { return array(); }
736
+ $found_keys = array();
737
+
738
+ // find each selector
739
+ for ($c = 0; $c < $count; ++$c) {
740
+ // The change on the below line was documented on the sourceforge
741
+ // code tracker id 2788009
742
+ // used to be: if (($levle=count($selectors[0]))===0) return array();
743
+ if (($levle = count($selectors[$c])) === 0) { return array(); }
744
+ if (!isset($this->_[HDOM_INFO_BEGIN])) { return array(); }
745
+
746
+ $head = array($this->_[HDOM_INFO_BEGIN] => 1);
747
+ $cmd = ' '; // Combinator
748
+
749
+ // handle descendant selectors, no recursive!
750
+ for ($l = 0; $l < $levle; ++$l) {
751
+ $ret = array();
752
+
753
+ foreach ($head as $k => $v) {
754
+ $n = ($k === -1) ? $this->dom->root : $this->dom->nodes[$k];
755
+ //PaperG - Pass this optional parameter on to the seek function.
756
+ $n->seek($selectors[$c][$l], $ret, $cmd, $lowercase);
757
+ }
758
+
759
+ $head = $ret;
760
+ $cmd = $selectors[$c][$l][4]; // Next Combinator
761
+ }
762
+
763
+ foreach ($head as $k => $v) {
764
+ if (!isset($found_keys[$k])) {
765
+ $found_keys[$k] = 1;
766
+ }
767
+ }
768
+ }
769
+
770
+ // sort keys
771
+ ksort($found_keys);
772
+
773
+ $found = array();
774
+ foreach ($found_keys as $k => $v) {
775
+ $found[] = $this->dom->nodes[$k];
776
+ }
777
+
778
+ // return nth-element or array
779
+ if (is_null($idx)) { return $found; }
780
+ elseif ($idx < 0) { $idx = count($found) + $idx; }
781
+ return (isset($found[$idx])) ? $found[$idx] : null;
782
+ }
783
+
784
+ /**
785
+ * Seek DOM elements by selector
786
+ *
787
+ * **Note**
788
+ * The selector element must be compatible to a selector from
789
+ * {@see simple_html_dom_node::parse_selector()}
790
+ *
791
+ * @param array $selector A selector element
792
+ * @param array $ret An array of matches
793
+ * @param bool $lowercase Matches tag names case insensitive (lowercase) if
794
+ * enabled (default: `false`)
795
+ * @return void
796
+ */
797
+ protected function seek($selector, &$ret, $parent_cmd, $lowercase = false)
798
+ {
799
+ global $debug_object;
800
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
801
+
802
+ list($tag, $id, $class, $attributes, $cmb) = $selector;
803
+ $nodes = array();
804
+
805
+ if ($parent_cmd === ' ') { // Descendant Combinator
806
+ // Find parent closing tag if the current element doesn't have a closing
807
+ // tag (i.e. void element)
808
+ $end = (!empty($this->_[HDOM_INFO_END])) ? $this->_[HDOM_INFO_END] : 0;
809
+ if ($end == 0) {
810
+ $parent = $this->parent;
811
+ while (!isset($parent->_[HDOM_INFO_END]) && $parent !== null) {
812
+ $end -= 1;
813
+ $parent = $parent->parent;
814
+ }
815
+ $end += $parent->_[HDOM_INFO_END];
816
+ }
817
+
818
+ // Get list of target nodes
819
+ $nodes_start = $this->_[HDOM_INFO_BEGIN] + 1;
820
+ $nodes_count = $end - $nodes_start;
821
+ $nodes = array_slice($this->dom->nodes, $nodes_start, $nodes_count, true);
822
+ } elseif ($parent_cmd === '>') { // Child Combinator
823
+ $nodes = $this->children;
824
+ } elseif ($parent_cmd === '+'
825
+ && $this->parent
826
+ && in_array($this, $this->parent->children)) { // Next-Sibling Combinator
827
+ $index = array_search($this, $this->parent->children, true) + 1;
828
+ $nodes[] = $this->parent->children[$index];
829
+ } elseif ($parent_cmd === '~'
830
+ && $this->parent
831
+ && in_array($this, $this->parent->children)) { // Subsequent Sibling Combinator
832
+ $index = array_search($this, $this->parent->children, true);
833
+ $nodes = array_slice($this->parent->children, $index);
834
+ }
835
+
836
+ // Go throgh each element starting at this element until the end tag
837
+ // Note: If this element is a void tag, any previous void element is
838
+ // skipped.
839
+ foreach($nodes as $node) {
840
+ $pass = true;
841
+
842
+ // Skip root nodes
843
+ if(!$node->parent) {
844
+ $pass = false;
845
+ }
846
+
847
+ // Skip if node isn't a child node (i.e. text nodes)
848
+ if($pass && !in_array($node, $node->parent->children, true)) {
849
+ $pass = false;
850
+ }
851
+
852
+ // Skip if tag doesn't match
853
+ if ($pass && $tag !== '' && $tag !== $node->tag && $tag !== '*') {
854
+ $pass = false;
855
+ }
856
+
857
+ // Skip if ID doesn't exist
858
+ if ($pass && $id !== '' && !isset($node->attr['id'])) {
859
+ $pass = false;
860
+ }
861
+
862
+ // Check if ID matches
863
+ if ($pass && $id !== '' && isset($node->attr['id'])) {
864
+ // Note: Only consider the first ID (as browsers do)
865
+ $node_id = explode(' ', trim($node->attr['id']))[0];
866
+
867
+ if($id !== $node_id) { $pass = false; }
868
+ }
869
+
870
+ // Check if all class(es) exist
871
+ if ($pass && $class !== '' && is_array($class) && !empty($class)) {
872
+ if (isset($node->attr['class'])) {
873
+ $node_classes = explode(' ', $node->attr['class']);
874
+
875
+ if ($lowercase) {
876
+ $node_classes = array_map('strtolower', $node_classes);
877
+ }
878
+
879
+ foreach($class as $c) {
880
+ if(!in_array($c, $node_classes)) {
881
+ $pass = false;
882
+ break;
883
+ }
884
+ }
885
+ } else {
886
+ $pass = false;
887
+ }
888
+ }
889
+
890
+ // Check attributes
891
+ if ($pass
892
+ && $attributes !== ''
893
+ && is_array($attributes)
894
+ && !empty($attributes)) {
895
+ foreach($attributes as $a) {
896
+ list (
897
+ $att_name,
898
+ $att_expr,
899
+ $att_val,
900
+ $att_inv,
901
+ $att_case_sensitivity
902
+ ) = $a;
903
+
904
+ // Handle indexing attributes (i.e. "[2]")
905
+ /**
906
+ * Note: This is not supported by the CSS Standard but adds
907
+ * the ability to select items compatible to XPath (i.e.
908
+ * the 3rd element within it's parent).
909
+ *
910
+ * Note: This doesn't conflict with the CSS Standard which
911
+ * doesn't work on numeric attributes anyway.
912
+ */
913
+ if (is_numeric($att_name)
914
+ && $att_expr === ''
915
+ && $att_val === '') {
916
+ $count = 0;
917
+
918
+ // Find index of current element in parent
919
+ foreach ($node->parent->children as $c) {
920
+ if ($c->tag === $node->tag) ++$count;
921
+ if ($c === $node) break;
922
+ }
923
+
924
+ // If this is the correct node, continue with next
925
+ // attribute
926
+ if ($count === (int)$att_name) continue;
927
+ }
928
+
929
+ // Check attribute availability
930
+ if ($att_inv) { // Attribute should NOT be set
931
+ if (isset($node->attr[$att_name])) {
932
+ $pass = false;
933
+ break;
934
+ }
935
+ } else { // Attribute should be set
936
+ // todo: "plaintext" is not a valid CSS selector!
937
+ if ($att_name !== 'plaintext'
938
+ && !isset($node->attr[$att_name])) {
939
+ $pass = false;
940
+ break;
941
+ }
942
+ }
943
+
944
+ // Continue with next attribute if expression isn't defined
945
+ if ($att_expr === '') continue;
946
+
947
+ // If they have told us that this is a "plaintext"
948
+ // search then we want the plaintext of the node - right?
949
+ // todo "plaintext" is not a valid CSS selector!
950
+ if ($att_name === 'plaintext') {
951
+ $nodeKeyValue = $node->text();
952
+ } else {
953
+ $nodeKeyValue = $node->attr[$att_name];
954
+ }
955
+
956
+ if (is_object($debug_object)) {
957
+ $debug_object->debug_log(2,
958
+ 'testing node: '
959
+ . $node->tag
960
+ . ' for attribute: '
961
+ . $att_name
962
+ . $att_expr
963
+ . $att_val
964
+ . ' where nodes value is: '
965
+ . $nodeKeyValue
966
+ );
967
+ }
968
+
969
+ // If lowercase is set, do a case insensitive test of
970
+ // the value of the selector.
971
+ if ($lowercase) {
972
+ $check = $this->match(
973
+ $att_expr,
974
+ strtolower($att_val),
975
+ strtolower($nodeKeyValue),
976
+ $att_case_sensitivity
977
+ );
978
+ } else {
979
+ $check = $this->match(
980
+ $att_expr,
981
+ $att_val,
982
+ $nodeKeyValue,
983
+ $att_case_sensitivity
984
+ );
985
+ }
986
+
987
+ if (is_object($debug_object)) {
988
+ $debug_object->debug_log(2,
989
+ 'after match: '
990
+ . ($check ? 'true' : 'false')
991
+ );
992
+ }
993
+
994
+ if (!$check) {
995
+ $pass = false;
996
+ break;
997
+ }
998
+ }
999
+ }
1000
+
1001
+ // Found a match. Add to list and clear node
1002
+ if ($pass) $ret[$node->_[HDOM_INFO_BEGIN]] = 1;
1003
+ unset($node);
1004
+ }
1005
+ // It's passed by reference so this is actually what this function returns.
1006
+ if (is_object($debug_object)) {
1007
+ $debug_object->debug_log(1, 'EXIT - ret: ', $ret);
1008
+ }
1009
+ }
1010
+
1011
+ /**
1012
+ * Match value and pattern for a given CSS expression
1013
+ *
1014
+ * **Supported Expressions**
1015
+ *
1016
+ * | Expression | Description
1017
+ * | ---------- | -----------
1018
+ * | `=` | $value and $pattern must be equal
1019
+ * | `!=` | $value and $pattern must not be equal
1020
+ * | `^=` | $value must start with $pattern
1021
+ * | `$=` | $value must end with $pattern
1022
+ * | `*=` | $value must contain $pattern
1023
+ *
1024
+ * @param string $exp The expression.
1025
+ * @param string $pattern The pattern
1026
+ * @param string $value The value
1027
+ * @value bool True if $value matches $pattern
1028
+ */
1029
+ protected function match($exp, $pattern, $value, $case_sensitivity)
1030
+ {
1031
+ global $debug_object;
1032
+ if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
1033
+
1034
+ if ($case_sensitivity === 'i') {
1035
+ $pattern = strtolower($pattern);
1036
+ $value = strtolower($value);
1037
+ }
1038
+
1039
+ switch ($exp) {
1040
+ case '=':
1041
+ return ($value === $pattern);
1042
+ case '!=':
1043
+ return ($value !== $pattern);
1044
+ case '^=':
1045
+ return preg_match('/^' . preg_quote($pattern, '/') . '/', $value);
1046
+ case '$=':
1047
+ return preg_match('/' . preg_quote($pattern, '/') . '$/', $value);
1048
+ case '*=':
1049
+ return preg_match('/' . preg_quote($pattern, '/') . '/', $value);
1050
+ case '|=':
1051
+ /**
1052
+ * [att|=val]
1053
+ *
1054
+ * Represents an element with the att attribute, its value
1055
+ * either being exactly "val" or beginning with "val"
1056
+ * immediately followed by "-" (U+002D).
1057
+ */
1058
+ return strpos($value, $pattern) === 0;
1059
+ case '~=':
1060
+ /**
1061
+ * [att~=val]
1062
+ *
1063
+ * Represents an element with the att attribute whose value is a
1064
+ * whitespace-separated list of words, one of which is exactly
1065
+ * "val". If "val" contains whitespace, it will never represent
1066
+ * anything (since the words are separated by spaces). Also if
1067
+ * "val" is the empty string, it will never represent anything.
1068
+ */
1069
+ return in_array($pattern, explode(' ', trim($value)), true);
1070
+ }
1071
+ return false;
1072
+ }
1073
+
1074
+ /**
1075
+ * Parse CSS selector
1076
+ *
1077
+ * @param string $selector_string CSS selector string
1078
+ * @return array List of CSS selectors. The format depends on the type of
1079
+ * selector:
1080
+ *
1081
+ * ```php
1082
+ *
1083
+ * array( // list of selectors (each separated by a comma), i.e. 'img, p, div'
1084
+ * array( // list of combinator selectors, i.e. 'img > p > div'
1085
+ * array( // selector element
1086
+ * [0], // (string) The element tag
1087
+ * [1], // (string) The element id
1088
+ * [2], // (array<string>) The element classes
1089
+ * [3], // (array<array<string>>) The list of attributes, each
1090
+ * // with four elements: name, expression, value, inverted
1091
+ * [4] // (string) The selector combinator (' ' | '>' | '+' | '~')
1092
+ * )
1093
+ * )
1094
+ * )
1095
+ * ```
1096
+ *
1097
+ * @link https://www.w3.org/TR/selectors/#compound Compound selector
1098
+ */
1099
+ protected function parse_selector($selector_string)
1100
+ {
1101
+ global $debug_object;
1102
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
1103
+
1104
+ /**
1105
+ * Pattern of CSS selectors, modified from mootools (https://mootools.net/)
1106
+ *
1107
+ * Paperg: Add the colon to the attribute, so that it properly finds
1108
+ * <tag attr:ibute="something" > like google does.
1109
+ *
1110
+ * Note: if you try to look at this attribute, you MUST use getAttribute
1111
+ * since $dom->x:y will fail the php syntax check.
1112
+ *
1113
+ * Notice the \[ starting the attribute? and the @? following? This
1114
+ * implies that an attribute can begin with an @ sign that is not
1115
+ * captured. This implies that an html attribute specifier may start
1116
+ * with an @ sign that is NOT captured by the expression. Farther study
1117
+ * is required to determine of this should be documented or removed.
1118
+ *
1119
+ * Matches selectors in this order:
1120
+ *
1121
+ * [0] - full match
1122
+ *
1123
+ * [1] - tag name
1124
+ * ([\w:\*-]*)
1125
+ * Matches the tag name consisting of zero or more words, colons,
1126
+ * asterisks and hyphens.
1127
+ *
1128
+ * [2] - id name
1129
+ * (?:\#([\w-]+))
1130
+ * Optionally matches a id name, consisting of an "#" followed by
1131
+ * the id name (one or more words and hyphens).
1132
+ *
1133
+ * [3] - class names (including dots)
1134
+ * (?:\.([\w\.-]+))?
1135
+ * Optionally matches a list of classs, consisting of an "."
1136
+ * followed by the class name (one or more words and hyphens)
1137
+ * where multiple classes can be chained (i.e. ".foo.bar.baz")
1138
+ *
1139
+ * [4] - attributes
1140
+ * ((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?
1141
+ * Optionally matches the attributes list
1142
+ *
1143
+ * [5] - separator
1144
+ * ([\/, >+~]+)
1145
+ * Matches the selector list separator
1146
+ */
1147
+ // phpcs:ignore Generic.Files.LineLength
1148
+ $pattern = "/([\w:\*-]*)(?:\#([\w-]+))?(?:|\.([\w\.-]+))?((?:\[@?(?:!?[\w:-]+)(?:(?:[!*^$|~]?=)[\"']?(?:.*?)[\"']?)?(?:\s*?(?:[iIsS])?)?\])+)?([\/, >+~]+)/is";
1149
+
1150
+ preg_match_all(
1151
+ $pattern,
1152
+ trim($selector_string) . ' ', // Add final ' ' as pseudo separator
1153
+ $matches,
1154
+ PREG_SET_ORDER
1155
+ );
1156
+
1157
+ if (is_object($debug_object)) {
1158
+ $debug_object->debug_log(2, 'Matches Array: ', $matches);
1159
+ }
1160
+
1161
+ $selectors = array();
1162
+ $result = array();
1163
+
1164
+ foreach ($matches as $m) {
1165
+ $m[0] = trim($m[0]);
1166
+
1167
+ // Skip NoOps
1168
+ if ($m[0] === '' || $m[0] === '/' || $m[0] === '//') { continue; }
1169
+
1170
+ // Convert to lowercase
1171
+ if ($this->dom->lowercase) {
1172
+ $m[1] = strtolower($m[1]);
1173
+ }
1174
+
1175
+ // Extract classes
1176
+ if ($m[3] !== '') { $m[3] = explode('.', $m[3]); }
1177
+
1178
+ /* Extract attributes (pattern based on the pattern above!)
1179
+
1180
+ * [0] - full match
1181
+ * [1] - attribute name
1182
+ * [2] - attribute expression
1183
+ * [3] - attribute value
1184
+ * [4] - case sensitivity
1185
+ *
1186
+ * Note: Attributes can be negated with a "!" prefix to their name
1187
+ */
1188
+ if($m[4] !== '') {
1189
+ preg_match_all(
1190
+ "/\[@?(!?[\w:-]+)(?:([!*^$|~]?=)[\"']?(.*?)[\"']?)?(?:\s*?([iIsS])?)?\]/is",
1191
+ trim($m[4]),
1192
+ $attributes,
1193
+ PREG_SET_ORDER
1194
+ );
1195
+
1196
+ // Replace element by array
1197
+ $m[4] = array();
1198
+
1199
+ foreach($attributes as $att) {
1200
+ // Skip empty matches
1201
+ if(trim($att[0]) === '') { continue; }
1202
+
1203
+ $inverted = (isset($att[1][0]) && $att[1][0] === '!');
1204
+ $m[4][] = array(
1205
+ $inverted ? substr($att[1], 1) : $att[1], // Name
1206
+ (isset($att[2])) ? $att[2] : '', // Expression
1207
+ (isset($att[3])) ? $att[3] : '', // Value
1208
+ $inverted, // Inverted Flag
1209
+ (isset($att[4])) ? strtolower($att[4]) : '', // Case-Sensitivity
1210
+ );
1211
+ }
1212
+ }
1213
+
1214
+ // Sanitize Separator
1215
+ if ($m[5] !== '' && trim($m[5]) === '') { // Descendant Separator
1216
+ $m[5] = ' ';
1217
+ } else { // Other Separator
1218
+ $m[5] = trim($m[5]);
1219
+ }
1220
+
1221
+ // Clear Separator if it's a Selector List
1222
+ if ($is_list = ($m[5] === ',')) { $m[5] = ''; }
1223
+
1224
+ // Remove full match before adding to results
1225
+ array_shift($m);
1226
+ $result[] = $m;
1227
+
1228
+ if ($is_list) { // Selector List
1229
+ $selectors[] = $result;
1230
+ $result = array();
1231
+ }
1232
+ }
1233
+
1234
+ if (count($result) > 0) { $selectors[] = $result; }
1235
+ return $selectors;
1236
+ }
1237
+
1238
+ function __get($name)
1239
+ {
1240
+ if (isset($this->attr[$name])) {
1241
+ return $this->convert_text($this->attr[$name]);
1242
+ }
1243
+ switch ($name) {
1244
+ case 'outertext': return $this->outertext();
1245
+ case 'innertext': return $this->innertext();
1246
+ case 'plaintext': return $this->text();
1247
+ case 'xmltext': return $this->xmltext();
1248
+ default: return array_key_exists($name, $this->attr);
1249
+ }
1250
+ }
1251
+
1252
+ function __set($name, $value)
1253
+ {
1254
+ global $debug_object;
1255
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
1256
+
1257
+ switch ($name) {
1258
+ case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value;
1259
+ case 'innertext':
1260
+ if (isset($this->_[HDOM_INFO_TEXT])) {
1261
+ return $this->_[HDOM_INFO_TEXT] = $value;
1262
+ }
1263
+ return $this->_[HDOM_INFO_INNER] = $value;
1264
+ }
1265
+
1266
+ if (!isset($this->attr[$name])) {
1267
+ $this->_[HDOM_INFO_SPACE][] = array(' ', '', '');
1268
+ $this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE;
1269
+ }
1270
+
1271
+ $this->attr[$name] = $value;
1272
+ }
1273
+
1274
+ function __isset($name)
1275
+ {
1276
+ switch ($name) {
1277
+ case 'outertext': return true;
1278
+ case 'innertext': return true;
1279
+ case 'plaintext': return true;
1280
+ }
1281
+ //no value attr: nowrap, checked selected...
1282
+ return (array_key_exists($name, $this->attr)) ? true : isset($this->attr[$name]);
1283
+ }
1284
+
1285
+ function __unset($name)
1286
+ {
1287
+ if (isset($this->attr[$name])) { unset($this->attr[$name]); }
1288
+ }
1289
+
1290
+ // PaperG - Function to convert the text from one character set to another
1291
+ // if the two sets are not the same.
1292
+ function convert_text($text)
1293
+ {
1294
+ global $debug_object;
1295
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
1296
+
1297
+ $converted_text = $text;
1298
+
1299
+ $sourceCharset = '';
1300
+ $targetCharset = '';
1301
+
1302
+ if ($this->dom) {
1303
+ $sourceCharset = strtoupper($this->dom->_charset);
1304
+ $targetCharset = strtoupper($this->dom->_target_charset);
1305
+ }
1306
+
1307
+ if (is_object($debug_object)) {
1308
+ $debug_object->debug_log(3,
1309
+ 'source charset: '
1310
+ . $sourceCharset
1311
+ . ' target charaset: '
1312
+ . $targetCharset
1313
+ );
1314
+ }
1315
+
1316
+ if (!empty($sourceCharset)
1317
+ && !empty($targetCharset)
1318
+ && (strcasecmp($sourceCharset, $targetCharset) != 0)) {
1319
+ // Check if the reported encoding could have been incorrect and the text is actually already UTF-8
1320
+ if ((strcasecmp($targetCharset, 'UTF-8') == 0)
1321
+ && ($this->is_utf8($text))) {
1322
+ $converted_text = $text;
1323
+ } else {
1324
+ $converted_text = iconv($sourceCharset, $targetCharset, $text);
1325
+ }
1326
+ }
1327
+
1328
+ // Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output.
1329
+ if ($targetCharset === 'UTF-8') {
1330
+ if (substr($converted_text, 0, 3) === "\xef\xbb\xbf") {
1331
+ $converted_text = substr($converted_text, 3);
1332
+ }
1333
+
1334
+ if (substr($converted_text, -3) === "\xef\xbb\xbf") {
1335
+ $converted_text = substr($converted_text, 0, -3);
1336
+ }
1337
+ }
1338
+
1339
+ return $converted_text;
1340
+ }
1341
+
1342
+ /**
1343
+ * Returns true if $string is valid UTF-8 and false otherwise.
1344
+ *
1345
+ * @param mixed $str String to be tested
1346
+ * @return boolean
1347
+ */
1348
+ static function is_utf8($str)
1349
+ {
1350
+ $c = 0; $b = 0;
1351
+ $bits = 0;
1352
+ $len = strlen($str);
1353
+ for($i = 0; $i < $len; $i++) {
1354
+ $c = ord($str[$i]);
1355
+ if($c > 128) {
1356
+ if(($c >= 254)) { return false; }
1357
+ elseif($c >= 252) { $bits = 6; }
1358
+ elseif($c >= 248) { $bits = 5; }
1359
+ elseif($c >= 240) { $bits = 4; }
1360
+ elseif($c >= 224) { $bits = 3; }
1361
+ elseif($c >= 192) { $bits = 2; }
1362
+ else { return false; }
1363
+ if(($i + $bits) > $len) { return false; }
1364
+ while($bits > 1) {
1365
+ $i++;
1366
+ $b = ord($str[$i]);
1367
+ if($b < 128 || $b > 191) { return false; }
1368
+ $bits--;
1369
+ }
1370
+ }
1371
+ }
1372
+ return true;
1373
+ }
1374
+
1375
+ /**
1376
+ * Function to try a few tricks to determine the displayed size of an img on
1377
+ * the page. NOTE: This will ONLY work on an IMG tag. Returns FALSE on all
1378
+ * other tag types.
1379
+ *
1380
+ * @author John Schlick
1381
+ * @version April 19 2012
1382
+ * @return array an array containing the 'height' and 'width' of the image
1383
+ * on the page or -1 if we can't figure it out.
1384
+ */
1385
+ function get_display_size()
1386
+ {
1387
+ global $debug_object;
1388
+
1389
+ $width = -1;
1390
+ $height = -1;
1391
+
1392
+ if ($this->tag !== 'img') {
1393
+ return false;
1394
+ }
1395
+
1396
+ // See if there is aheight or width attribute in the tag itself.
1397
+ if (isset($this->attr['width'])) {
1398
+ $width = $this->attr['width'];
1399
+ }
1400
+
1401
+ if (isset($this->attr['height'])) {
1402
+ $height = $this->attr['height'];
1403
+ }
1404
+
1405
+ // Now look for an inline style.
1406
+ if (isset($this->attr['style'])) {
1407
+ // Thanks to user gnarf from stackoverflow for this regular expression.
1408
+ $attributes = array();
1409
+
1410
+ preg_match_all(
1411
+ '/([\w-]+)\s*:\s*([^;]+)\s*;?/',
1412
+ $this->attr['style'],
1413
+ $matches,
1414
+ PREG_SET_ORDER
1415
+ );
1416
+
1417
+ foreach ($matches as $match) {
1418
+ $attributes[$match[1]] = $match[2];
1419
+ }
1420
+
1421
+ // If there is a width in the style attributes:
1422
+ if (isset($attributes['width']) && $width == -1) {
1423
+ // check that the last two characters are px (pixels)
1424
+ if (strtolower(substr($attributes['width'], -2)) === 'px') {
1425
+ $proposed_width = substr($attributes['width'], 0, -2);
1426
+ // Now make sure that it's an integer and not something stupid.
1427
+ if (filter_var($proposed_width, FILTER_VALIDATE_INT)) {
1428
+ $width = $proposed_width;
1429
+ }
1430
+ }
1431
+ }
1432
+
1433
+ // If there is a width in the style attributes:
1434
+ if (isset($attributes['height']) && $height == -1) {
1435
+ // check that the last two characters are px (pixels)
1436
+ if (strtolower(substr($attributes['height'], -2)) == 'px') {
1437
+ $proposed_height = substr($attributes['height'], 0, -2);
1438
+ // Now make sure that it's an integer and not something stupid.
1439
+ if (filter_var($proposed_height, FILTER_VALIDATE_INT)) {
1440
+ $height = $proposed_height;
1441
+ }
1442
+ }
1443
+ }
1444
+
1445
+ }
1446
+
1447
+ // Future enhancement:
1448
+ // Look in the tag to see if there is a class or id specified that has
1449
+ // a height or width attribute to it.
1450
+
1451
+ // Far future enhancement
1452
+ // Look at all the parent tags of this image to see if they specify a
1453
+ // class or id that has an img selector that specifies a height or width
1454
+ // Note that in this case, the class or id will have the img subselector
1455
+ // for it to apply to the image.
1456
+
1457
+ // ridiculously far future development
1458
+ // If the class or id is specified in a SEPARATE css file thats not on
1459
+ // the page, go get it and do what we were just doing for the ones on
1460
+ // the page.
1461
+
1462
+ $result = array(
1463
+ 'height' => $height,
1464
+ 'width' => $width
1465
+ );
1466
+
1467
+ return $result;
1468
+ }
1469
+
1470
+ // camel naming conventions
1471
+ function getAllAttributes()
1472
+ {
1473
+ return $this->attr;
1474
+ }
1475
+
1476
+ function getAttribute($name)
1477
+ {
1478
+ return $this->__get($name);
1479
+ }
1480
+
1481
+ function setAttribute($name, $value)
1482
+ {
1483
+ $this->__set($name, $value);
1484
+ }
1485
+
1486
+ function hasAttribute($name)
1487
+ {
1488
+ return $this->__isset($name);
1489
+ }
1490
+
1491
+ function removeAttribute($name)
1492
+ {
1493
+ $this->__set($name, null);
1494
+ }
1495
+
1496
+ function getElementById($id)
1497
+ {
1498
+ return $this->find("#$id", 0);
1499
+ }
1500
+
1501
+ function getElementsById($id, $idx = null)
1502
+ {
1503
+ return $this->find("#$id", $idx);
1504
+ }
1505
+
1506
+ function getElementByTagName($name)
1507
+ {
1508
+ return $this->find($name, 0);
1509
+ }
1510
+
1511
+ function getElementsByTagName($name, $idx = null)
1512
+ {
1513
+ return $this->find($name, $idx);
1514
+ }
1515
+
1516
+ function parentNode()
1517
+ {
1518
+ return $this->parent();
1519
+ }
1520
+
1521
+ function childNodes($idx = -1)
1522
+ {
1523
+ return $this->children($idx);
1524
+ }
1525
+
1526
+ function firstChild()
1527
+ {
1528
+ return $this->first_child();
1529
+ }
1530
+
1531
+ function lastChild()
1532
+ {
1533
+ return $this->last_child();
1534
+ }
1535
+
1536
+ function nextSibling()
1537
+ {
1538
+ return $this->next_sibling();
1539
+ }
1540
+
1541
+ function previousSibling()
1542
+ {
1543
+ return $this->prev_sibling();
1544
+ }
1545
+
1546
+ function hasChildNodes()
1547
+ {
1548
+ return $this->has_child();
1549
+ }
1550
+
1551
+ function nodeName()
1552
+ {
1553
+ return $this->tag;
1554
+ }
1555
+
1556
+ function appendChild($node)
1557
+ {
1558
+ $node->parent($this);
1559
+ return $node;
1560
+ }
1561
+
1562
+ }
1563
+
1564
+ /**
1565
+ * simple html dom parser
1566
+ *
1567
+ * Paperg - in the find routine: allow us to specify that we want case
1568
+ * insensitive testing of the value of the selector.
1569
+ *
1570
+ * Paperg - change $size from protected to public so we can easily access it
1571
+ *
1572
+ * Paperg - added ForceTagsClosed in the constructor which tells us whether we
1573
+ * trust the html or not. Default is to NOT trust it.
1574
+ *
1575
+ * @package PlaceLocalInclude
1576
+ */
1577
+ class simple_html_dom
1578
+ {
1579
+ /**
1580
+ * The root node of the document
1581
+ *
1582
+ * @var object
1583
+ */
1584
+ public $root = null;
1585
+
1586
+ /**
1587
+ * List of nodes in the current DOM
1588
+ *
1589
+ * @var array
1590
+ */
1591
+ public $nodes = array();
1592
+
1593
+ /**
1594
+ * Callback function to run for each element in the DOM.
1595
+ *
1596
+ * @var callable|null
1597
+ */
1598
+ public $callback = null;
1599
+
1600
+ /**
1601
+ * Indicates how tags and attributes are matched
1602
+ *
1603
+ * @var bool When set to **true** tags and attributes will be converted to
1604
+ * lowercase before matching.
1605
+ */
1606
+ public $lowercase = false;
1607
+
1608
+ /**
1609
+ * Original document size
1610
+ *
1611
+ * Holds the original document size.
1612
+ *
1613
+ * @var int
1614
+ */
1615
+ public $original_size;
1616
+
1617
+ /**
1618
+ * Current document size
1619
+ *
1620
+ * Holds the current document size. The document size is determined by the
1621
+ * string length of ({@see simple_html_dom::$doc}).
1622
+ *
1623
+ * _Note_: Using this variable is more efficient than calling `strlen($doc)`
1624
+ *
1625
+ * @var int
1626
+ * */
1627
+ public $size;
1628
+
1629
+ /**
1630
+ * Current position in the document
1631
+ *
1632
+ * @var int
1633
+ */
1634
+ protected $pos;
1635
+
1636
+ /**
1637
+ * The document
1638
+ *
1639
+ * @var string
1640
+ */
1641
+ protected $doc;
1642
+
1643
+ /**
1644
+ * Current character
1645
+ *
1646
+ * Holds the current character at position {@see simple_html_dom::$pos} in
1647
+ * the document {@see simple_html_dom::$doc}
1648
+ *
1649
+ * _Note_: Using this variable is more efficient than calling
1650
+ * `substr($doc, $pos, 1)`
1651
+ *
1652
+ * @var string
1653
+ */
1654
+ protected $char;
1655
+
1656
+ protected $cursor;
1657
+
1658
+ /**
1659
+ * Parent node of the next node detected by the parser
1660
+ *
1661
+ * @var object
1662
+ */
1663
+ protected $parent;
1664
+ protected $noise = array();
1665
+
1666
+ /**
1667
+ * Tokens considered blank in HTML
1668
+ *
1669
+ * @var string
1670
+ */
1671
+ protected $token_blank = " \t\r\n";
1672
+
1673
+ /**
1674
+ * Tokens to identify the equal sign for attributes, stopping either at the
1675
+ * closing tag ("/" i.e. "<html />") or the end of an opening tag (">" i.e.
1676
+ * "<html>")
1677
+ *
1678
+ * @var string
1679
+ */
1680
+ protected $token_equal = ' =/>';
1681
+
1682
+ /**
1683
+ * Tokens to identify the end of a tag name. A tag name either ends on the
1684
+ * ending slash ("/" i.e. "<html/>") or whitespace ("\s\r\n\t")
1685
+ *
1686
+ * @var string
1687
+ */
1688
+ protected $token_slash = " />\r\n\t";
1689
+
1690
+ /**
1691
+ * Tokens to identify the end of an attribute
1692
+ *
1693
+ * @var string
1694
+ */
1695
+ protected $token_attr = ' >';
1696
+
1697
+ // Note that this is referenced by a child node, and so it needs to be
1698
+ // public for that node to see this information.
1699
+ public $_charset = '';
1700
+ public $_target_charset = '';
1701
+
1702
+ /**
1703
+ * Innertext for <br> elements
1704
+ *
1705
+ * @var string
1706
+ */
1707
+ protected $default_br_text = '';
1708
+
1709
+ /**
1710
+ * Suffix for <span> elements
1711
+ *
1712
+ * @var string
1713
+ */
1714
+ public $default_span_text = '';
1715
+
1716
+ /**
1717
+ * Defines a list of self-closing tags (Void elements) according to the HTML
1718
+ * Specification
1719
+ *
1720
+ * _Remarks_:
1721
+ * - Use `isset()` instead of `in_array()` on array elements to boost
1722
+ * performance about 30%
1723
+ * - Sort elements by name for better readability!
1724
+ *
1725
+ * @link https://www.w3.org/TR/html HTML Specification
1726
+ * @link https://www.w3.org/TR/html/syntax.html#void-elements Void elements
1727
+ */
1728
+ protected $self_closing_tags = array(
1729
+ 'area' => 1,
1730
+ 'base' => 1,
1731
+ 'br' => 1,
1732
+ 'col' => 1,
1733
+ 'embed' => 1,
1734
+ 'hr' => 1,
1735
+ 'img' => 1,
1736
+ 'input' => 1,
1737
+ 'link' => 1,
1738
+ 'meta' => 1,
1739
+ 'param' => 1,
1740
+ 'source' => 1,
1741
+ 'track' => 1,
1742
+ 'wbr' => 1
1743
+ );
1744
+
1745
+ /**
1746
+ * Defines a list of tags which - if closed - close all optional closing
1747
+ * elements within if they haven't been closed yet. (So, an element where
1748
+ * neither opening nor closing tag is omissible consistently closes every
1749
+ * optional closing element within)
1750
+ *
1751
+ * _Remarks_:
1752
+ * - Use `isset()` instead of `in_array()` on array elements to boost
1753
+ * performance about 30%
1754
+ * - Sort elements by name for better readability!
1755
+ */
1756
+ protected $block_tags = array(
1757
+ 'body' => 1,
1758
+ 'div' => 1,
1759
+ 'form' => 1,
1760
+ 'root' => 1,
1761
+ 'span' => 1,
1762
+ 'table' => 1
1763
+ );
1764
+
1765
+ /**
1766
+ * Defines elements whose end tag is omissible.
1767
+ *
1768
+ * * key = Name of an element whose end tag is omissible.
1769
+ * * value = Names of elements whose end tag is omissible, that are closed
1770
+ * by the current element.
1771
+ *
1772
+ * _Remarks_:
1773
+ * - Use `isset()` instead of `in_array()` on array elements to boost
1774
+ * performance about 30%
1775
+ * - Sort elements by name for better readability!
1776
+ *
1777
+ * **Example**
1778
+ *
1779
+ * An `li` element’s end tag may be omitted if the `li` element is immediately
1780
+ * followed by another `li` element. To do that, add following element to the
1781
+ * array:
1782
+ *
1783
+ * ```php
1784
+ * 'li' => array('li'),
1785
+ * ```
1786
+ *
1787
+ * With this, the following two examples are considered equal. Note that the
1788
+ * second example is missing the closing tags on `li` elements.
1789
+ *
1790
+ * ```html
1791
+ * <ul><li>First Item</li><li>Second Item</li></ul>
1792
+ * ```
1793
+ *
1794
+ * <ul><li>First Item</li><li>Second Item</li></ul>
1795
+ *
1796
+ * ```html
1797
+ * <ul><li>First Item<li>Second Item</ul>
1798
+ * ```
1799
+ *
1800
+ * <ul><li>First Item<li>Second Item</ul>
1801
+ *
1802
+ * @var array A two-dimensional array where the key is the name of an
1803
+ * element whose end tag is omissible and the value is an array of elements
1804
+ * whose end tag is omissible, that are closed by the current element.
1805
+ *
1806
+ * @link https://www.w3.org/TR/html/syntax.html#optional-tags Optional tags
1807
+ *
1808
+ * @todo The implementation of optional closing tags doesn't work in all cases
1809
+ * because it only consideres elements who close other optional closing
1810
+ * tags, not taking into account that some (non-blocking) tags should close
1811
+ * these optional closing tags. For example, the end tag for "p" is omissible
1812
+ * and can be closed by an "address" element, whose end tag is NOT omissible.
1813
+ * Currently a "p" element without closing tag stops at the next "p" element
1814
+ * or blocking tag, even if it contains other elements.
1815
+ *
1816
+ * @todo Known sourceforge issue #2977341
1817
+ * B tags that are not closed cause us to return everything to the end of
1818
+ * the document.
1819
+ */
1820
+ protected $optional_closing_tags = array(
1821
+ // Not optional, see
1822
+ // https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element
1823
+ 'b' => array('b' => 1),
1824
+ 'dd' => array('dd' => 1, 'dt' => 1),
1825
+ // Not optional, see
1826
+ // https://www.w3.org/TR/html/grouping-content.html#the-dl-element
1827
+ 'dl' => array('dd' => 1, 'dt' => 1),
1828
+ 'dt' => array('dd' => 1, 'dt' => 1),
1829
+ 'li' => array('li' => 1),
1830
+ 'optgroup' => array('optgroup' => 1, 'option' => 1),
1831
+ 'option' => array('optgroup' => 1, 'option' => 1),
1832
+ 'p' => array('p' => 1),
1833
+ 'rp' => array('rp' => 1, 'rt' => 1),
1834
+ 'rt' => array('rp' => 1, 'rt' => 1),
1835
+ 'td' => array('td' => 1, 'th' => 1),
1836
+ 'th' => array('td' => 1, 'th' => 1),
1837
+ 'tr' => array('td' => 1, 'th' => 1, 'tr' => 1),
1838
+ );
1839
+
1840
+ function __construct(
1841
+ $str = null,
1842
+ $lowercase = true,
1843
+ $forceTagsClosed = true,
1844
+ $target_charset = DEFAULT_TARGET_CHARSET,
1845
+ $stripRN = true,
1846
+ $defaultBRText = DEFAULT_BR_TEXT,
1847
+ $defaultSpanText = DEFAULT_SPAN_TEXT,
1848
+ $options = 0)
1849
+ {
1850
+ if ($str) {
1851
+ if (preg_match('/^http:\/\//i', $str) || is_file($str)) {
1852
+ $this->load_file($str);
1853
+ } else {
1854
+ $this->load(
1855
+ $str,
1856
+ $lowercase,
1857
+ $stripRN,
1858
+ $defaultBRText,
1859
+ $defaultSpanText,
1860
+ $options
1861
+ );
1862
+ }
1863
+ }
1864
+ // Forcing tags to be closed implies that we don't trust the html, but
1865
+ // it can lead to parsing errors if we SHOULD trust the html.
1866
+ if (!$forceTagsClosed) {
1867
+ $this->optional_closing_array = array();
1868
+ }
1869
+
1870
+ $this->_target_charset = $target_charset;
1871
+ }
1872
+
1873
+ function __destruct()
1874
+ {
1875
+ $this->clear();
1876
+ }
1877
+
1878
+ // load html from string
1879
+ function load(
1880
+ $str,
1881
+ $lowercase = true,
1882
+ $stripRN = true,
1883
+ $defaultBRText = DEFAULT_BR_TEXT,
1884
+ $defaultSpanText = DEFAULT_SPAN_TEXT,
1885
+ $options = 0)
1886
+ {
1887
+ global $debug_object;
1888
+
1889
+ // prepare
1890
+ $this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText);
1891
+
1892
+ // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037
1893
+ // Script tags removal now preceeds style tag removal.
1894
+ // strip out <script> tags
1895
+ $this->remove_noise("'<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>'is");
1896
+ $this->remove_noise("'<\s*script\s*>(.*?)<\s*/\s*script\s*>'is");
1897
+
1898
+ // strip out the \r \n's if we are told to.
1899
+ if ($stripRN) {
1900
+ $this->doc = str_replace("\r", ' ', $this->doc);
1901
+ $this->doc = str_replace("\n", ' ', $this->doc);
1902
+
1903
+ // set the length of content since we have changed it.
1904
+ $this->size = strlen($this->doc);
1905
+ }
1906
+
1907
+ // strip out cdata
1908
+ $this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is", true);
1909
+ // strip out comments
1910
+ $this->remove_noise("'<!--(.*?)-->'is");
1911
+ // strip out <style> tags
1912
+ $this->remove_noise("'<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>'is");
1913
+ $this->remove_noise("'<\s*style\s*>(.*?)<\s*/\s*style\s*>'is");
1914
+ // strip out preformatted tags
1915
+ $this->remove_noise("'<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>'is");
1916
+ // strip out server side scripts
1917
+ $this->remove_noise("'(<\?)(.*?)(\?>)'s", true);
1918
+
1919
+ if($options & HDOM_SMARTY_AS_TEXT) { // Strip Smarty scripts
1920
+ $this->remove_noise("'(\{\w)(.*?)(\})'s", true);
1921
+ }
1922
+
1923
+ // parsing
1924
+ $this->parse();
1925
+ // end
1926
+ $this->root->_[HDOM_INFO_END] = $this->cursor;
1927
+ $this->parse_charset();
1928
+
1929
+ // make load function chainable
1930
+ return $this;
1931
+ }
1932
+
1933
+ // load html from file
1934
+ function load_file()
1935
+ {
1936
+ $args = func_get_args();
1937
+
1938
+ if(($doc = call_user_func_array('file_get_contents', $args)) !== false) {
1939
+ $this->load($doc, true);
1940
+ } else {
1941
+ return false;
1942
+ }
1943
+ }
1944
+
1945
+ /**
1946
+ * Set the callback function
1947
+ *
1948
+ * @param callable $function_name Callback function to run for each element
1949
+ * in the DOM.
1950
+ * @return void
1951
+ */
1952
+ function set_callback($function_name)
1953
+ {
1954
+ $this->callback = $function_name;
1955
+ }
1956
+
1957
+ /**
1958
+ * Remove callback function
1959
+ *
1960
+ * @return void
1961
+ */
1962
+ function remove_callback()
1963
+ {
1964
+ $this->callback = null;
1965
+ }
1966
+
1967
+ // save dom as string
1968
+ function save($filepath = '')
1969
+ {
1970
+ $ret = $this->root->innertext();
1971
+ if ($filepath !== '') { file_put_contents($filepath, $ret, LOCK_EX); }
1972
+ return $ret;
1973
+ }
1974
+
1975
+ // find dom node by css selector
1976
+ // Paperg - allow us to specify that we want case insensitive testing of the value of the selector.
1977
+ function find($selector, $idx = null, $lowercase = false)
1978
+ {
1979
+ return $this->root->find($selector, $idx, $lowercase);
1980
+ }
1981
+
1982
+ // clean up memory due to php5 circular references memory leak...
1983
+ function clear()
1984
+ {
1985
+ foreach ($this->nodes as $n) {
1986
+ $n->clear(); $n = null;
1987
+ }
1988
+
1989
+ // This add next line is documented in the sourceforge repository.
1990
+ // 2977248 as a fix for ongoing memory leaks that occur even with the
1991
+ // use of clear.
1992
+ if (isset($this->children)) {
1993
+ foreach ($this->children as $n) {
1994
+ $n->clear(); $n = null;
1995
+ }
1996
+ }
1997
+
1998
+ if (isset($this->parent)) {
1999
+ $this->parent->clear();
2000
+ unset($this->parent);
2001
+ }
2002
+
2003
+ if (isset($this->root)) {
2004
+ $this->root->clear();
2005
+ unset($this->root);
2006
+ }
2007
+
2008
+ unset($this->doc);
2009
+ unset($this->noise);
2010
+ }
2011
+
2012
+ function dump($show_attr = true)
2013
+ {
2014
+ $this->root->dump($show_attr);
2015
+ }
2016
+
2017
+ // prepare HTML data and init everything
2018
+ protected function prepare(
2019
+ $str, $lowercase = true,
2020
+ $defaultBRText = DEFAULT_BR_TEXT,
2021
+ $defaultSpanText = DEFAULT_SPAN_TEXT)
2022
+ {
2023
+ $this->clear();
2024
+
2025
+ $this->doc = trim($str);
2026
+ $this->size = strlen($this->doc);
2027
+ $this->original_size = $this->size; // original size of the html
2028
+ $this->pos = 0;
2029
+ $this->cursor = 1;
2030
+ $this->noise = array();
2031
+ $this->nodes = array();
2032
+ $this->lowercase = $lowercase;
2033
+ $this->default_br_text = $defaultBRText;
2034
+ $this->default_span_text = $defaultSpanText;
2035
+ $this->root = new simple_html_dom_node($this);
2036
+ $this->root->tag = 'root';
2037
+ $this->root->_[HDOM_INFO_BEGIN] = -1;
2038
+ $this->root->nodetype = HDOM_TYPE_ROOT;
2039
+ $this->parent = $this->root;
2040
+ if ($this->size > 0) { $this->char = $this->doc[0]; }
2041
+ }
2042
+
2043
+ /**
2044
+ * Parse HTML content
2045
+ *
2046
+ * @return bool True on success
2047
+ */
2048
+ protected function parse()
2049
+ {
2050
+ while (true) {
2051
+ // Read next tag if there is no text between current position and the
2052
+ // next opening tag.
2053
+ if (($s = $this->copy_until_char('<')) === '') {
2054
+ if($this->read_tag()) {
2055
+ continue;
2056
+ } else {
2057
+ return true;
2058
+ }
2059
+ }
2060
+
2061
+ // Add a text node for text between tags
2062
+ $node = new simple_html_dom_node($this);
2063
+ ++$this->cursor;
2064
+ $node->_[HDOM_INFO_TEXT] = $s;
2065
+ $this->link_nodes($node, false);
2066
+ }
2067
+ }
2068
+
2069
+ // PAPERG - dkchou - added this to try to identify the character set of the
2070
+ // page we have just parsed so we know better how to spit it out later.
2071
+ // NOTE: IF you provide a routine called
2072
+ // get_last_retrieve_url_contents_content_type which returns the
2073
+ // CURLINFO_CONTENT_TYPE from the last curl_exec
2074
+ // (or the content_type header from the last transfer), we will parse THAT,
2075
+ // and if a charset is specified, we will use it over any other mechanism.
2076
+ protected function parse_charset()
2077
+ {
2078
+ global $debug_object;
2079
+
2080
+ $charset = null;
2081
+
2082
+ if (function_exists('get_last_retrieve_url_contents_content_type')) {
2083
+ $contentTypeHeader = get_last_retrieve_url_contents_content_type();
2084
+ $success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches);
2085
+ if ($success) {
2086
+ $charset = $matches[1];
2087
+ if (is_object($debug_object)) {
2088
+ $debug_object->debug_log(2,
2089
+ 'header content-type found charset of: '
2090
+ . $charset
2091
+ );
2092
+ }
2093
+ }
2094
+ }
2095
+
2096
+ if (empty($charset)) {
2097
+ $el = $this->root->find('meta[http-equiv=Content-Type]', 0, true);
2098
+
2099
+ if (!empty($el)) {
2100
+ $fullvalue = $el->content;
2101
+ if (is_object($debug_object)) {
2102
+ $debug_object->debug_log(2,
2103
+ 'meta content-type tag found'
2104
+ . $fullvalue
2105
+ );
2106
+ }
2107
+
2108
+ if (!empty($fullvalue)) {
2109
+ $success = preg_match(
2110
+ '/charset=(.+)/i',
2111
+ $fullvalue,
2112
+ $matches
2113
+ );
2114
+
2115
+ if ($success) {
2116
+ $charset = $matches[1];
2117
+ } else {
2118
+ // If there is a meta tag, and they don't specify the
2119
+ // character set, research says that it's typically
2120
+ // ISO-8859-1
2121
+ if (is_object($debug_object)) {
2122
+ $debug_object->debug_log(2,
2123
+ 'meta content-type tag couldn\'t be parsed. using iso-8859 default.'
2124
+ );
2125
+ }
2126
+
2127
+ $charset = 'ISO-8859-1';
2128
+ }
2129
+ }
2130
+ }
2131
+ }
2132
+
2133
+ // If we couldn't find a charset above, then lets try to detect one
2134
+ // based on the text we got...
2135
+ if (empty($charset)) {
2136
+ // Use this in case mb_detect_charset isn't installed/loaded on
2137
+ // this machine.
2138
+ $charset = false;
2139
+ if (function_exists('mb_detect_encoding')) {
2140
+ // Have php try to detect the encoding from the text given to us.
2141
+ $charset = mb_detect_encoding(
2142
+ $this->doc . 'ascii',
2143
+ $encoding_list = array( 'UTF-8', 'CP1252' )
2144
+ );
2145
+
2146
+ if (is_object($debug_object)) {
2147
+ $debug_object->debug_log(2, 'mb_detect found: ' . $charset);
2148
+ }
2149
+ }
2150
+
2151
+ // and if this doesn't work... then we need to just wrongheadedly
2152
+ // assume it's UTF-8 so that we can move on - cause this will
2153
+ // usually give us most of what we need...
2154
+ if ($charset === false) {
2155
+ if (is_object($debug_object)) {
2156
+ $debug_object->debug_log(
2157
+ 2,
2158
+ 'since mb_detect failed - using default of utf-8'
2159
+ );
2160
+ }
2161
+
2162
+ $charset = 'UTF-8';
2163
+ }
2164
+ }
2165
+
2166
+ // Since CP1252 is a superset, if we get one of it's subsets, we want
2167
+ // it instead.
2168
+ if ((strtolower($charset) == strtolower('ISO-8859-1'))
2169
+ || (strtolower($charset) == strtolower('Latin1'))
2170
+ || (strtolower($charset) == strtolower('Latin-1'))) {
2171
+
2172
+ if (is_object($debug_object)) {
2173
+ $debug_object->debug_log(
2174
+ 2,
2175
+ 'replacing ' . $charset . ' with CP1252 as its a superset'
2176
+ );
2177
+ }
2178
+
2179
+ $charset = 'CP1252';
2180
+ }
2181
+
2182
+ if (is_object($debug_object)) {
2183
+ $debug_object->debug_log(1, 'EXIT - ' . $charset);
2184
+ }
2185
+
2186
+ return $this->_charset = $charset;
2187
+ }
2188
+
2189
+ /**
2190
+ * Parse tag from current document position.
2191
+ *
2192
+ * @return bool True if a tag was found, false otherwise
2193
+ */
2194
+ protected function read_tag()
2195
+ {
2196
+ // Set end position if no further tags found
2197
+ if ($this->char !== '<') {
2198
+ $this->root->_[HDOM_INFO_END] = $this->cursor;
2199
+ return false;
2200
+ }
2201
+
2202
+ $begin_tag_pos = $this->pos;
2203
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2204
+
2205
+ // end tag
2206
+ if ($this->char === '/') {
2207
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2208
+
2209
+ // Skip whitespace in end tags (i.e. in "</ html>")
2210
+ $this->skip($this->token_blank);
2211
+ $tag = $this->copy_until_char('>');
2212
+
2213
+ // Skip attributes in end tags
2214
+ if (($pos = strpos($tag, ' ')) !== false) {
2215
+ $tag = substr($tag, 0, $pos);
2216
+ }
2217
+
2218
+ $parent_lower = strtolower($this->parent->tag);
2219
+ $tag_lower = strtolower($tag);
2220
+
2221
+ // The end tag is supposed to close the parent tag. Handle situations
2222
+ // when it doesn't
2223
+ if ($parent_lower !== $tag_lower) {
2224
+ // Parent tag does not have to be closed necessarily (optional closing tag)
2225
+ // Current tag is a block tag, so it may close an ancestor
2226
+ if (isset($this->optional_closing_tags[$parent_lower])
2227
+ && isset($this->block_tags[$tag_lower])) {
2228
+
2229
+ $this->parent->_[HDOM_INFO_END] = 0;
2230
+ $org_parent = $this->parent;
2231
+
2232
+ // Traverse ancestors to find a matching opening tag
2233
+ // Stop at root node
2234
+ while (($this->parent->parent)
2235
+ && strtolower($this->parent->tag) !== $tag_lower
2236
+ ){
2237
+ $this->parent = $this->parent->parent;
2238
+ }
2239
+
2240
+ // If we don't have a match add current tag as text node
2241
+ if (strtolower($this->parent->tag) !== $tag_lower) {
2242
+ $this->parent = $org_parent; // restore origonal parent
2243
+
2244
+ if ($this->parent->parent) {
2245
+ $this->parent = $this->parent->parent;
2246
+ }
2247
+
2248
+ $this->parent->_[HDOM_INFO_END] = $this->cursor;
2249
+ return $this->as_text_node($tag);
2250
+ }
2251
+ } elseif (($this->parent->parent)
2252
+ && isset($this->block_tags[$tag_lower])
2253
+ ) {
2254
+ // Grandparent exists and current tag is a block tag, so our
2255
+ // parent doesn't have an end tag
2256
+ $this->parent->_[HDOM_INFO_END] = 0; // No end tag
2257
+ $org_parent = $this->parent;
2258
+
2259
+ // Traverse ancestors to find a matching opening tag
2260
+ // Stop at root node
2261
+ while (($this->parent->parent)
2262
+ && strtolower($this->parent->tag) !== $tag_lower
2263
+ ) {
2264
+ $this->parent = $this->parent->parent;
2265
+ }
2266
+
2267
+ // If we don't have a match add current tag as text node
2268
+ if (strtolower($this->parent->tag) !== $tag_lower) {
2269
+ $this->parent = $org_parent; // restore origonal parent
2270
+ $this->parent->_[HDOM_INFO_END] = $this->cursor;
2271
+ return $this->as_text_node($tag);
2272
+ }
2273
+ } elseif (($this->parent->parent)
2274
+ && strtolower($this->parent->parent->tag) === $tag_lower
2275
+ ) { // Grandparent exists and current tag closes it
2276
+ $this->parent->_[HDOM_INFO_END] = 0;
2277
+ $this->parent = $this->parent->parent;
2278
+ } else { // Random tag, add as text node
2279
+ return $this->as_text_node($tag);
2280
+ }
2281
+ }
2282
+
2283
+ // Set end position of parent tag to current cursor position
2284
+ $this->parent->_[HDOM_INFO_END] = $this->cursor;
2285
+
2286
+ if ($this->parent->parent) {
2287
+ $this->parent = $this->parent->parent;
2288
+ }
2289
+
2290
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2291
+ return true;
2292
+ }
2293
+
2294
+ // start tag
2295
+ $node = new simple_html_dom_node($this);
2296
+ $node->_[HDOM_INFO_BEGIN] = $this->cursor;
2297
+ ++$this->cursor;
2298
+ $tag = $this->copy_until($this->token_slash); // Get tag name
2299
+ $node->tag_start = $begin_tag_pos;
2300
+
2301
+ // doctype, cdata & comments...
2302
+ // <!DOCTYPE html>
2303
+ // <![CDATA[ ... ]]>
2304
+ // <!-- Comment -->
2305
+ if (isset($tag[0]) && $tag[0] === '!') {
2306
+ $node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until_char('>');
2307
+
2308
+ if (isset($tag[2]) && $tag[1] === '-' && $tag[2] === '-') { // Comment ("<!--")
2309
+ $node->nodetype = HDOM_TYPE_COMMENT;
2310
+ $node->tag = 'comment';
2311
+ } else { // Could be doctype or CDATA but we don't care
2312
+ $node->nodetype = HDOM_TYPE_UNKNOWN;
2313
+ $node->tag = 'unknown';
2314
+ }
2315
+
2316
+ if ($this->char === '>') { $node->_[HDOM_INFO_TEXT] .= '>'; }
2317
+
2318
+ $this->link_nodes($node, true);
2319
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2320
+ return true;
2321
+ }
2322
+
2323
+ // The start tag cannot contain another start tag, if so add as text
2324
+ // i.e. "<<html>"
2325
+ if ($pos = strpos($tag, '<') !== false) {
2326
+ $tag = '<' . substr($tag, 0, -1);
2327
+ $node->_[HDOM_INFO_TEXT] = $tag;
2328
+ $this->link_nodes($node, false);
2329
+ $this->char = $this->doc[--$this->pos]; // prev
2330
+ return true;
2331
+ }
2332
+
2333
+ // Handle invalid tag names (i.e. "<html#doc>")
2334
+ if (!preg_match('/^\w[\w:-]*$/', $tag)) {
2335
+ $node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>');
2336
+
2337
+ // Next char is the beginning of a new tag, don't touch it.
2338
+ if ($this->char === '<') {
2339
+ $this->link_nodes($node, false);
2340
+ return true;
2341
+ }
2342
+
2343
+ // Next char closes current tag, add and be done with it.
2344
+ if ($this->char === '>') { $node->_[HDOM_INFO_TEXT] .= '>'; }
2345
+ $this->link_nodes($node, false);
2346
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2347
+ return true;
2348
+ }
2349
+
2350
+ // begin tag, add new node
2351
+ $node->nodetype = HDOM_TYPE_ELEMENT;
2352
+ $tag_lower = strtolower($tag);
2353
+ $node->tag = ($this->lowercase) ? $tag_lower : $tag;
2354
+
2355
+ // handle optional closing tags
2356
+ if (isset($this->optional_closing_tags[$tag_lower])) {
2357
+ // Traverse ancestors to close all optional closing tags
2358
+ while (isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)])) {
2359
+ $this->parent->_[HDOM_INFO_END] = 0;
2360
+ $this->parent = $this->parent->parent;
2361
+ }
2362
+ $node->parent = $this->parent;
2363
+ }
2364
+
2365
+ $guard = 0; // prevent infinity loop
2366
+
2367
+ // [0] Space between tag and first attribute
2368
+ $space = array($this->copy_skip($this->token_blank), '', '');
2369
+
2370
+ // attributes
2371
+ do {
2372
+ // Everything until the first equal sign should be the attribute name
2373
+ $name = $this->copy_until($this->token_equal);
2374
+
2375
+ if ($name === '' && $this->char !== null && $space[0] === '') {
2376
+ break;
2377
+ }
2378
+
2379
+ if ($guard === $this->pos) { // Escape infinite loop
2380
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2381
+ continue;
2382
+ }
2383
+
2384
+ $guard = $this->pos;
2385
+
2386
+ // handle endless '<'
2387
+ // Out of bounds before the tag ended
2388
+ if ($this->pos >= $this->size - 1 && $this->char !== '>') {
2389
+ $node->nodetype = HDOM_TYPE_TEXT;
2390
+ $node->_[HDOM_INFO_END] = 0;
2391
+ $node->_[HDOM_INFO_TEXT] = '<' . $tag . $space[0] . $name;
2392
+ $node->tag = 'text';
2393
+ $this->link_nodes($node, false);
2394
+ return true;
2395
+ }
2396
+
2397
+ // handle mismatch '<'
2398
+ // Attributes cannot start after opening tag
2399
+ if ($this->doc[$this->pos - 1] == '<') {
2400
+ $node->nodetype = HDOM_TYPE_TEXT;
2401
+ $node->tag = 'text';
2402
+ $node->attr = array();
2403
+ $node->_[HDOM_INFO_END] = 0;
2404
+ $node->_[HDOM_INFO_TEXT] = substr(
2405
+ $this->doc,
2406
+ $begin_tag_pos,
2407
+ $this->pos - $begin_tag_pos - 1
2408
+ );
2409
+ $this->pos -= 2;
2410
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2411
+ $this->link_nodes($node, false);
2412
+ return true;
2413
+ }
2414
+
2415
+ if ($name !== '/' && $name !== '') { // this is a attribute name
2416
+ // [1] Whitespace after attribute name
2417
+ $space[1] = $this->copy_skip($this->token_blank);
2418
+
2419
+ $name = $this->restore_noise($name); // might be a noisy name
2420
+
2421
+ if ($this->lowercase) { $name = strtolower($name); }
2422
+
2423
+ if ($this->char === '=') { // attribute with value
2424
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2425
+ $this->parse_attr($node, $name, $space); // get attribute value
2426
+ } else {
2427
+ //no value attr: nowrap, checked selected...
2428
+ $node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_NO;
2429
+ $node->attr[$name] = true;
2430
+ if ($this->char != '>') { $this->char = $this->doc[--$this->pos]; } // prev
2431
+ }
2432
+
2433
+ $node->_[HDOM_INFO_SPACE][] = $space;
2434
+
2435
+ // prepare for next attribute
2436
+ $space = array(
2437
+ $this->copy_skip($this->token_blank),
2438
+ '',
2439
+ ''
2440
+ );
2441
+ } else { // no more attributes
2442
+ break;
2443
+ }
2444
+ } while ($this->char !== '>' && $this->char !== '/'); // go until the tag ended
2445
+
2446
+ $this->link_nodes($node, true);
2447
+ $node->_[HDOM_INFO_ENDSPACE] = $space[0];
2448
+
2449
+ // handle empty tags (i.e. "<div/>")
2450
+ if ($this->copy_until_char('>') === '/') {
2451
+ $node->_[HDOM_INFO_ENDSPACE] .= '/';
2452
+ $node->_[HDOM_INFO_END] = 0;
2453
+ } else {
2454
+ // reset parent
2455
+ if (!isset($this->self_closing_tags[strtolower($node->tag)])) {
2456
+ $this->parent = $node;
2457
+ }
2458
+ }
2459
+
2460
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2461
+
2462
+ // If it's a BR tag, we need to set it's text to the default text.
2463
+ // This way when we see it in plaintext, we can generate formatting that the user wants.
2464
+ // since a br tag never has sub nodes, this works well.
2465
+ if ($node->tag === 'br') {
2466
+ $node->_[HDOM_INFO_INNER] = $this->default_br_text;
2467
+ }
2468
+
2469
+ return true;
2470
+ }
2471
+
2472
+ /**
2473
+ * Parse attribute from current document position
2474
+ *
2475
+ * @param object $node Node for the attributes
2476
+ * @param string $name Name of the current attribute
2477
+ * @param array $space Array for spacing information
2478
+ * @return void
2479
+ */
2480
+ protected function parse_attr($node, $name, &$space)
2481
+ {
2482
+ // Per sourceforge: http://sourceforge.net/tracker/?func=detail&aid=3061408&group_id=218559&atid=1044037
2483
+ // If the attribute is already defined inside a tag, only pay attention
2484
+ // to the first one as opposed to the last one.
2485
+ // https://stackoverflow.com/a/26341866
2486
+ if (isset($node->attr[$name])) {
2487
+ return;
2488
+ }
2489
+
2490
+ // [2] Whitespace between "=" and the value
2491
+ $space[2] = $this->copy_skip($this->token_blank);
2492
+
2493
+ switch ($this->char) {
2494
+ case '"': // value is anything between double quotes
2495
+ $node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE;
2496
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2497
+ $node->attr[$name] = $this->restore_noise($this->copy_until_char('"'));
2498
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2499
+ break;
2500
+ case '\'': // value is anything between single quotes
2501
+ $node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_SINGLE;
2502
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2503
+ $node->attr[$name] = $this->restore_noise($this->copy_until_char('\''));
2504
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2505
+ break;
2506
+ default: // value is anything until the first space or end tag
2507
+ $node->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_NO;
2508
+ $node->attr[$name] = $this->restore_noise($this->copy_until($this->token_attr));
2509
+ }
2510
+ // PaperG: Attributes should not have \r or \n in them, that counts as
2511
+ // html whitespace.
2512
+ $node->attr[$name] = str_replace("\r", '', $node->attr[$name]);
2513
+ $node->attr[$name] = str_replace("\n", '', $node->attr[$name]);
2514
+ // PaperG: If this is a "class" selector, lets get rid of the preceeding
2515
+ // and trailing space since some people leave it in the multi class case.
2516
+ if ($name === 'class') {
2517
+ $node->attr[$name] = trim($node->attr[$name]);
2518
+ }
2519
+ }
2520
+
2521
+ /**
2522
+ * Link node to parent node
2523
+ *
2524
+ * @param object $node Node to link to parent
2525
+ * @param bool $is_child True if the node is a child of parent
2526
+ * @return void
2527
+ */
2528
+ // link node's parent
2529
+ protected function link_nodes(&$node, $is_child)
2530
+ {
2531
+ $node->parent = $this->parent;
2532
+ $this->parent->nodes[] = $node;
2533
+ if ($is_child) {
2534
+ $this->parent->children[] = $node;
2535
+ }
2536
+ }
2537
+
2538
+ /**
2539
+ * Add tag as text node to current node
2540
+ *
2541
+ * @param string $tag Tag name
2542
+ * @return bool True on success
2543
+ */
2544
+ protected function as_text_node($tag)
2545
+ {
2546
+ $node = new simple_html_dom_node($this);
2547
+ ++$this->cursor;
2548
+ $node->_[HDOM_INFO_TEXT] = '</' . $tag . '>';
2549
+ $this->link_nodes($node, false);
2550
+ $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2551
+ return true;
2552
+ }
2553
+
2554
+ /**
2555
+ * Seek from the current document position to the first occurrence of a
2556
+ * character not defined by the provided string. Update the current document
2557
+ * position to the new position.
2558
+ *
2559
+ * @param string $chars A string containing every allowed character.
2560
+ * @return void
2561
+ */
2562
+ protected function skip($chars)
2563
+ {
2564
+ $this->pos += strspn($this->doc, $chars, $this->pos);
2565
+ $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2566
+ }
2567
+
2568
+ /**
2569
+ * Copy substring from the current document position to the first occurrence
2570
+ * of a character not defined by the provided string.
2571
+ *
2572
+ * @param string $chars A string containing every allowed character.
2573
+ * @return string Substring from the current document position to the first
2574
+ * occurrence of a character not defined by the provided string.
2575
+ */
2576
+ protected function copy_skip($chars)
2577
+ {
2578
+ $pos = $this->pos;
2579
+ $len = strspn($this->doc, $chars, $pos);
2580
+ $this->pos += $len;
2581
+ $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2582
+ if ($len === 0) { return ''; }
2583
+ return substr($this->doc, $pos, $len);
2584
+ }
2585
+
2586
+ /**
2587
+ * Copy substring from the current document position to the first occurrence
2588
+ * of any of the provided characters.
2589
+ *
2590
+ * @param string $chars A string containing every character to stop at.
2591
+ * @return string Substring from the current document position to the first
2592
+ * occurrence of any of the provided characters.
2593
+ */
2594
+ protected function copy_until($chars)
2595
+ {
2596
+ $pos = $this->pos;
2597
+ $len = strcspn($this->doc, $chars, $pos);
2598
+ $this->pos += $len;
2599
+ $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
2600
+ return substr($this->doc, $pos, $len);
2601
+ }
2602
+
2603
+ /**
2604
+ * Copy substring from the current document position to the first occurrence
2605
+ * of the provided string.
2606
+ *
2607
+ * @param string $char The string to stop at.
2608
+ * @return string Substring from the current document position to the first
2609
+ * occurrence of the provided string.
2610
+ */
2611
+ protected function copy_until_char($char)
2612
+ {
2613
+ if ($this->char === null) { return ''; }
2614
+
2615
+ if (($pos = strpos($this->doc, $char, $this->pos)) === false) {
2616
+ $ret = substr($this->doc, $this->pos, $this->size - $this->pos);
2617
+ $this->char = null;
2618
+ $this->pos = $this->size;
2619
+ return $ret;
2620
+ }
2621
+
2622
+ if ($pos === $this->pos) { return ''; }
2623
+
2624
+ $pos_old = $this->pos;
2625
+ $this->char = $this->doc[$pos];
2626
+ $this->pos = $pos;
2627
+ return substr($this->doc, $pos_old, $pos - $pos_old);
2628
+ }
2629
+
2630
+ /**
2631
+ * Remove noise from HTML content
2632
+ *
2633
+ * Noise is stored to {@see simple_html_dom::$noise}
2634
+ *
2635
+ * @param string $pattern The regex pattern used for finding noise
2636
+ * @param bool $remove_tag True to remove the entire match. Default is false
2637
+ * to only remove the captured data.
2638
+ */
2639
+ protected function remove_noise($pattern, $remove_tag = false)
2640
+ {
2641
+ global $debug_object;
2642
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
2643
+
2644
+ $count = preg_match_all(
2645
+ $pattern,
2646
+ $this->doc,
2647
+ $matches,
2648
+ PREG_SET_ORDER | PREG_OFFSET_CAPTURE
2649
+ );
2650
+
2651
+ for ($i = $count - 1; $i > -1; --$i) {
2652
+ $key = '___noise___' . sprintf('% 5d', count($this->noise) + 1000);
2653
+
2654
+ if (is_object($debug_object)) {
2655
+ $debug_object->debug_log(2, 'key is: ' . $key);
2656
+ }
2657
+
2658
+ $idx = ($remove_tag) ? 0 : 1; // 0 = entire match, 1 = submatch
2659
+ $this->noise[$key] = $matches[$i][$idx][0];
2660
+ $this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
2661
+ }
2662
+
2663
+ // reset the length of content
2664
+ $this->size = strlen($this->doc);
2665
+
2666
+ if ($this->size > 0) {
2667
+ $this->char = $this->doc[0];
2668
+ }
2669
+ }
2670
+
2671
+ /**
2672
+ * Restore noise to HTML content
2673
+ *
2674
+ * Noise is restored from {@see simple_html_dom::$noise}
2675
+ *
2676
+ * @param string $text A subset of HTML containing noise
2677
+ * @return string The same content with noise restored
2678
+ */
2679
+ function restore_noise($text)
2680
+ {
2681
+ global $debug_object;
2682
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
2683
+
2684
+ while (($pos = strpos($text, '___noise___')) !== false) {
2685
+ // Sometimes there is a broken piece of markup, and we don't GET the
2686
+ // pos+11 etc... token which indicates a problem outside of us...
2687
+
2688
+ // todo: "___noise___1000" (or any number with four or more digits)
2689
+ // in the DOM causes an infinite loop which could be utilized by
2690
+ // malicious software
2691
+ if (strlen($text) > $pos + 15) {
2692
+ $key = '___noise___'
2693
+ . $text[$pos + 11]
2694
+ . $text[$pos + 12]
2695
+ . $text[$pos + 13]
2696
+ . $text[$pos + 14]
2697
+ . $text[$pos + 15];
2698
+
2699
+ if (is_object($debug_object)) {
2700
+ $debug_object->debug_log(2, 'located key of: ' . $key);
2701
+ }
2702
+
2703
+ if (isset($this->noise[$key])) {
2704
+ $text = substr($text, 0, $pos)
2705
+ . $this->noise[$key]
2706
+ . substr($text, $pos + 16);
2707
+ } else {
2708
+ // do this to prevent an infinite loop.
2709
+ $text = substr($text, 0, $pos)
2710
+ . 'UNDEFINED NOISE FOR KEY: '
2711
+ . $key
2712
+ . substr($text, $pos + 16);
2713
+ }
2714
+ } else {
2715
+ // There is no valid key being given back to us... We must get
2716
+ // rid of the ___noise___ or we will have a problem.
2717
+ $text = substr($text, 0, $pos)
2718
+ . 'NO NUMERIC NOISE KEY'
2719
+ . substr($text, $pos + 11);
2720
+ }
2721
+ }
2722
+ return $text;
2723
+ }
2724
+
2725
+ // Sometimes we NEED one of the noise elements.
2726
+ function search_noise($text)
2727
+ {
2728
+ global $debug_object;
2729
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
2730
+
2731
+ foreach($this->noise as $noiseElement) {
2732
+ if (strpos($noiseElement, $text) !== false) {
2733
+ return $noiseElement;
2734
+ }
2735
+ }
2736
+ }
2737
+
2738
+ function __toString()
2739
+ {
2740
+ return $this->root->innertext();
2741
+ }
2742
+
2743
+ function __get($name)
2744
+ {
2745
+ switch ($name) {
2746
+ case 'outertext':
2747
+ return $this->root->innertext();
2748
+ case 'innertext':
2749
+ return $this->root->innertext();
2750
+ case 'plaintext':
2751
+ return $this->root->text();
2752
+ case 'charset':
2753
+ return $this->_charset;
2754
+ case 'target_charset':
2755
+ return $this->_target_charset;
2756
+ }
2757
+ }
2758
+
2759
+ // camel naming conventions
2760
+ function childNodes($idx = -1)
2761
+ {
2762
+ return $this->root->childNodes($idx);
2763
+ }
2764
+
2765
+ function firstChild()
2766
+ {
2767
+ return $this->root->first_child();
2768
+ }
2769
+
2770
+ function lastChild()
2771
+ {
2772
+ return $this->root->last_child();
2773
+ }
2774
+
2775
+ function createElement($name, $value = null)
2776
+ {
2777
+ return @str_get_html("<$name>$value</$name>")->first_child();
2778
+ }
2779
+
2780
+ function createTextNode($value)
2781
+ {
2782
+ return @end(str_get_html($value)->nodes);
2783
+ }
2784
+
2785
+ function getElementById($id)
2786
+ {
2787
+ return $this->find("#$id", 0);
2788
+ }
2789
+
2790
+ function getElementsById($id, $idx = null)
2791
+ {
2792
+ return $this->find("#$id", $idx);
2793
+ }
2794
+
2795
+ function getElementByTagName($name)
2796
+ {
2797
+ return $this->find($name, 0);
2798
+ }
2799
+
2800
+ function getElementsByTagName($name, $idx = -1)
2801
+ {
2802
+ return $this->find($name, $idx);
2803
+ }
2804
+
2805
+ function loadFile()
2806
+ {
2807
+ $args = func_get_args();
2808
+ $this->load_file($args);
2809
+ }
2810
+ }
wp-retina-2x.php CHANGED
@@ -3,7 +3,7 @@
3
  Plugin Name: WP Retina 2x
4
  Plugin URI: https://meowapps.com
5
  Description: Make your website look beautiful and crisp on modern displays by creating + displaying retina images.
6
- Version: 5.5.1
7
  Author: Jordy Meow
8
  Author URI: https://meowapps.com
9
  Text Domain: wp-retina-2x
@@ -29,7 +29,7 @@ if ( class_exists( 'Meow_WR2X_Core' ) ) {
29
  global $wr2x_picturefill, $wr2x_retinajs, $wr2x_lazysizes,
30
  $wr2x_retina_image, $wr2x_core;
31
 
32
- $wr2x_version = '5.5.1';
33
  $wr2x_retinajs = '2.0.0';
34
  $wr2x_picturefill = '3.0.2';
35
  $wr2x_lazysizes = '4.0.4';
3
  Plugin Name: WP Retina 2x
4
  Plugin URI: https://meowapps.com
5
  Description: Make your website look beautiful and crisp on modern displays by creating + displaying retina images.
6
+ Version: 5.5.2
7
  Author: Jordy Meow
8
  Author URI: https://meowapps.com
9
  Text Domain: wp-retina-2x
29
  global $wr2x_picturefill, $wr2x_retinajs, $wr2x_lazysizes,
30
  $wr2x_retina_image, $wr2x_core;
31
 
32
+ $wr2x_version = '5.5.2';
33
  $wr2x_retinajs = '2.0.0';
34
  $wr2x_picturefill = '3.0.2';
35
  $wr2x_lazysizes = '4.0.4';