TranslatePress – Translate Multilingual sites - Version 1.4.1

Version Description

  • Added PHP 7.3 support
  • Performance improvements
Download this release

Release Info

Developer madalin.ungureanu
Plugin Icon 128x128 TranslatePress – Translate Multilingual sites
Version 1.4.1
Comparing to
See all releases

Code changes from version 1.4.0 to 1.4.1

assets/lib/simplehtmldom/simple_html_dom.php CHANGED
@@ -1,11 +1,12 @@
1
  <?php
2
  /**
3
  * Website: http://sourceforge.net/projects/simplehtmldom/
 
4
  * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
5
  * Contributions by:
6
- * Yousuke Kumakura (Attribute filters)
7
- * Vadim Voituk (Negative indexes supports of "find" method)
8
- * Antcs (Constructor with automatically load contents either text or file/url)
9
  *
10
  * all affected sections have comments starting with "PaperG"
11
  *
@@ -33,72 +34,92 @@
33
  * @author S.C. Chen <me578022@gmail.com>
34
  * @author John Schlick
35
  * @author Rus Carroll
36
- * @version 1.5 ($Rev: 196 $)
37
  * @package PlaceLocalInclude
38
- * @subpackage trp_simple_html_dom
39
  */
40
 
41
  /**
42
  * All of the Defines for the classes below.
43
  * @author S.C. Chen <me578022@gmail.com>
44
  */
 
 
 
 
 
 
 
 
 
 
 
45
  define('TRP_HDOM_TYPE_ELEMENT', 1);
46
  define('TRP_HDOM_TYPE_COMMENT', 2);
47
- define('TRP_HDOM_TYPE_TEXT', 3);
48
- define('TRP_HDOM_TYPE_ENDTAG', 4);
49
- define('TRP_HDOM_TYPE_ROOT', 5);
50
  define('TRP_HDOM_TYPE_UNKNOWN', 6);
51
  define('TRP_HDOM_QUOTE_DOUBLE', 0);
52
  define('TRP_HDOM_QUOTE_SINGLE', 1);
53
- define('TRP_HDOM_QUOTE_NO', 3);
54
- define('TRP_HDOM_INFO_BEGIN', 0);
55
- define('TRP_HDOM_INFO_END', 1);
56
- define('TRP_HDOM_INFO_QUOTE', 2);
57
- define('TRP_HDOM_INFO_SPACE', 3);
58
- define('TRP_HDOM_INFO_TEXT', 4);
59
- define('TRP_HDOM_INFO_INNER', 5);
60
- define('TRP_HDOM_INFO_OUTER', 6);
61
- define('TRP_HDOM_INFO_ENDSPACE', 7);
62
  define('TRP_DEFAULT_TARGET_CHARSET', 'UTF-8');
63
  define('TRP_DEFAULT_BR_TEXT', "\r\n");
64
  define('TRP_DEFAULT_SPAN_TEXT', " ");
65
- define('TRP_MAX_FILE_SIZE', 100000000);
 
 
 
 
66
  // helper functions
67
  // -----------------------------------------------------------------------------
68
  // get html dom from file
69
  // $maxlen is defined in the code as PHP_STREAM_COPY_ALL which is defined as -1.
70
- function trp_file_get_html($url, $use_include_path = false, $context = null, $offset = -1, $maxLen = -1, $lowercase = true, $forceTagsClosed = true, $target_charset = TRP_DEFAULT_TARGET_CHARSET, $stripRN = true, $defaultBRText = TRP_DEFAULT_BR_TEXT, $defaultSpanText = TRP_DEFAULT_SPAN_TEXT)
71
  {
72
- // We DO force the tags to be terminated.
73
- $dom = new trp_simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
74
- // For sourceforge users: uncomment the next line and comment the retreive_url_contents line 2 lines down if it is not already done.
75
- $contents = file_get_contents($url, $use_include_path, $context, $offset);
76
- // Paperg - use our own mechanism for getting the contents as we want to control the timeout.
77
- //$contents = retrieve_url_contents($url);
78
- if (empty($contents) || strlen($contents) > TRP_MAX_FILE_SIZE) {
79
- return false;
80
- }
81
- // The second parameter can force the selectors to all be lowercase.
82
- $dom->load($contents, $lowercase, $stripRN);
83
- return $dom;
 
 
 
 
84
  }
85
 
86
  // get html dom from string
87
- function trp_str_get_html($str, $lowercase = true, $forceTagsClosed = true, $target_charset = TRP_DEFAULT_TARGET_CHARSET, $stripRN = true, $defaultBRText = TRP_DEFAULT_BR_TEXT, $defaultSpanText = TRP_DEFAULT_SPAN_TEXT)
88
  {
89
- $dom = new trp_simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
90
- if (empty($str) || strlen($str) > TRP_MAX_FILE_SIZE) {
91
- $dom->clear();
92
- return false;
93
- }
94
- $dom->load($str, $lowercase, $stripRN);
95
- return $dom;
 
96
  }
97
 
98
  // dump html dom tree
99
- function trp_dump_html_tree($node, $show_attr = true, $deep = 0)
100
  {
101
- $node->dump($node);
102
  }
103
 
104
 
@@ -109,922 +130,987 @@ function trp_dump_html_tree($node, $show_attr = true, $deep = 0)
109
  *
110
  * @package PlaceLocalInclude
111
  */
112
- class trp_simple_html_dom_node
113
  {
114
- public $nodetype = TRP_HDOM_TYPE_TEXT;
115
- public $tag = 'text';
116
- public $attr = array();
117
- public $children = array();
118
- public $nodes = array();
119
- public $parent = null;
120
- // The "info" array - see HDOM_INFO_... for what each element contains.
121
- public $_ = array();
122
- public $tag_start = 0;
123
- private $dom = null;
124
-
125
- function __construct($dom)
126
- {
127
- $this->dom = $dom;
128
- $dom->nodes[] = $this;
129
- }
130
-
131
- function __destruct()
132
- {
133
- $this->clear();
134
- }
135
-
136
- function __toString()
137
- {
138
- return $this->outertext();
139
- }
140
-
141
- // clean up memory due to php5 circular references memory leak...
142
- function clear()
143
- {
144
- $this->dom = null;
145
- $this->nodes = null;
146
- $this->parent = null;
147
- $this->children = null;
148
- }
149
-
150
- // dump node's tree
151
- function dump($show_attr = true, $deep = 0)
152
- {
153
- $lead = str_repeat(' ', $deep);
154
-
155
- echo $lead . $this->tag;
156
- if ($show_attr && count($this->attr) > 0) {
157
- echo '(';
158
- foreach ($this->attr as $k => $v)
159
- echo "[$k]=>\"" . $this->$k . '", ';
160
- echo ')';
161
- }
162
- echo "\n";
163
-
164
- if ($this->nodes) {
165
- foreach ($this->nodes as $c) {
166
- $c->dump($show_attr, $deep + 1);
167
- }
168
- }
169
- }
170
-
171
-
172
- // Debugging function to dump a single dom node with a bunch of information about it.
173
- function dump_node($echo = true)
174
- {
175
-
176
- $string = $this->tag;
177
- if (count($this->attr) > 0) {
178
- $string .= '(';
179
- foreach ($this->attr as $k => $v) {
180
- $string .= "[$k]=>\"" . $this->$k . '", ';
181
- }
182
- $string .= ')';
183
- }
184
- if (count($this->_) > 0) {
185
- $string .= ' $_ (';
186
- foreach ($this->_ as $k => $v) {
187
- if (is_array($v)) {
188
- $string .= "[$k]=>(";
189
- foreach ($v as $k2 => $v2) {
190
- $string .= "[$k2]=>\"" . $v2 . '", ';
191
- }
192
- $string .= ")";
193
- } else {
194
- $string .= "[$k]=>\"" . $v . '", ';
195
- }
196
- }
197
- $string .= ")";
198
- }
199
-
200
- if (isset($this->text)) {
201
- $string .= " text: (" . $this->text . ")";
202
- }
203
-
204
- $string .= " HDOM_INNER_INFO: '";
205
- if (isset($node->_[TRP_HDOM_INFO_INNER])) {
206
- $string .= $node->_[TRP_HDOM_INFO_INNER] . "'";
207
- } else {
208
- $string .= ' NULL ';
209
- }
210
-
211
- $string .= " children: " . count($this->children);
212
- $string .= " nodes: " . count($this->nodes);
213
- $string .= " tag_start: " . $this->tag_start;
214
- $string .= "\n";
215
-
216
- if ($echo) {
217
- echo $string;
218
- return;
219
- } else {
220
- return $string;
221
- }
222
- }
223
-
224
- // returns the parent of node
225
- // If a node is passed in, it will reset the parent of the current node to that one.
226
- function parent($parent = null)
227
- {
228
- // I am SURE that this doesn't work properly.
229
- // It fails to unset the current node from it's current parents nodes or children list first.
230
- if ($parent !== null) {
231
- $this->parent = $parent;
232
- $this->parent->nodes[] = $this;
233
- $this->parent->children[] = $this;
234
- }
235
-
236
- return $this->parent;
237
- }
238
-
239
- // verify that node has children
240
- function has_child()
241
- {
242
- return !empty($this->children);
243
- }
244
-
245
- // returns children of node
246
- function children($idx = -1)
247
- {
248
- if ($idx === -1) {
249
- return $this->children;
250
- }
251
- if (isset($this->children[$idx])) return $this->children[$idx];
252
- return null;
253
- }
254
-
255
- // returns the first child of node
256
- function first_child()
257
- {
258
- if (count($this->children) > 0) {
259
- return $this->children[0];
260
- }
261
- return null;
262
- }
263
-
264
- // returns the last child of node
265
- function last_child()
266
- {
267
- if (($count = count($this->children)) > 0) {
268
- return $this->children[$count - 1];
269
- }
270
- return null;
271
- }
272
-
273
- // returns the next sibling of node
274
- function next_sibling()
275
- {
276
- if ($this->parent === null) {
277
- return null;
278
- }
279
-
280
- $idx = 0;
281
- $count = count($this->parent->children);
282
- while ($idx < $count && $this !== $this->parent->children[$idx]) {
283
- ++$idx;
284
- }
285
- if (++$idx >= $count) {
286
- return null;
287
- }
288
- return $this->parent->children[$idx];
289
- }
290
-
291
- // returns the previous sibling of node
292
- function prev_sibling()
293
- {
294
- if ($this->parent === null) return null;
295
- $idx = 0;
296
- $count = count($this->parent->children);
297
- while ($idx < $count && $this !== $this->parent->children[$idx])
298
- ++$idx;
299
- if (--$idx < 0) return null;
300
- return $this->parent->children[$idx];
301
- }
302
-
303
- // function to locate a specific ancestor tag in the path to the root.
304
- function find_ancestor_tag($tag)
305
- {
306
- global $debugObject;
307
- if (is_object($debugObject)) {
308
- $debugObject->debugLogEntry(1);
309
- }
310
-
311
- // Start by including ourselves in the comparison.
312
- $returnDom = $this;
313
-
314
- while (!is_null($returnDom)) {
315
- if (is_object($debugObject)) {
316
- $debugObject->debugLog(2, "Current tag is: " . $returnDom->tag);
317
- }
318
-
319
- if ($returnDom->tag == $tag) {
320
- break;
321
- }
322
- $returnDom = $returnDom->parent;
323
- }
324
- return $returnDom;
325
- }
326
-
327
- // get dom node's inner html
328
- function innertext()
329
- {
330
- if (isset($this->_[TRP_HDOM_INFO_INNER])) return $this->_[TRP_HDOM_INFO_INNER];
331
- if (isset($this->_[TRP_HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[TRP_HDOM_INFO_TEXT]);
332
-
333
- $ret = '';
334
- foreach ($this->nodes as $n)
335
- $ret .= $n->outertext();
336
- return $ret;
337
- }
338
-
339
- // get dom node's outer text (with tag)
340
- function outertext()
341
- {
342
- global $debugObject;
343
- if (is_object($debugObject)) {
344
- $text = '';
345
- if ($this->tag == 'text') {
346
- if (!empty($this->text)) {
347
- $text = " with text: " . $this->text;
348
- }
349
- }
350
- $debugObject->debugLog(1, 'Innertext of tag: ' . $this->tag . $text);
351
- }
352
-
353
- if ($this->tag === 'root') return $this->innertext();
354
-
355
- // trigger callback
356
- if ($this->dom && $this->dom->callback !== null) {
357
- call_user_func_array($this->dom->callback, array($this));
358
- }
359
-
360
- if (isset($this->_[TRP_HDOM_INFO_OUTER])) return $this->_[TRP_HDOM_INFO_OUTER];
361
- if (isset($this->_[TRP_HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[TRP_HDOM_INFO_TEXT]);
362
-
363
- // render begin tag
364
- if ($this->dom && $this->dom->nodes[$this->_[TRP_HDOM_INFO_BEGIN]]) {
365
- $ret = $this->dom->nodes[$this->_[TRP_HDOM_INFO_BEGIN]]->makeup();
366
- } else {
367
- $ret = "";
368
- }
369
-
370
- // render inner text
371
- if (isset($this->_[TRP_HDOM_INFO_INNER])) {
372
- // If it's a br tag... don't return the HDOM_INNER_INFO that we may or may not have added.
373
- if ($this->tag != "br") {
374
- $ret .= $this->_[TRP_HDOM_INFO_INNER];
375
- }
376
- } else {
377
- if ($this->nodes) {
378
- foreach ($this->nodes as $n) {
379
- $ret .= $this->convert_text($n->outertext());
380
- }
381
- }
382
- }
383
-
384
- // render end tag
385
- if (isset($this->_[TRP_HDOM_INFO_END]) && $this->_[TRP_HDOM_INFO_END] != 0)
386
- $ret .= '</' . $this->tag . '>';
387
- return $ret;
388
- }
389
-
390
- // get dom node's plain text
391
- function text()
392
- {
393
- if (isset($this->_[TRP_HDOM_INFO_INNER])) return $this->_[TRP_HDOM_INFO_INNER];
394
- switch ($this->nodetype) {
395
- case TRP_HDOM_TYPE_TEXT:
396
- return $this->dom->restore_noise($this->_[TRP_HDOM_INFO_TEXT]);
397
- case TRP_HDOM_TYPE_COMMENT:
398
- return '';
399
- case TRP_HDOM_TYPE_UNKNOWN:
400
- return '';
401
- }
402
- if (strcasecmp($this->tag, 'script') === 0) return '';
403
- if (strcasecmp($this->tag, 'style') === 0) return '';
404
-
405
- $ret = '';
406
- // In rare cases, (always node type 1 or TRP_HDOM_TYPE_ELEMENT - observed for some span tags, and some p tags) $this->nodes is set to NULL.
407
- // NOTE: This indicates that there is a problem where it's set to NULL without a clear happening.
408
- // WHY is this happening?
409
- if (!is_null($this->nodes)) {
410
- foreach ($this->nodes as $n) {
411
- $ret .= $this->convert_text($n->text());
412
- }
413
-
414
- // If this node is a span... add a space at the end of it so multiple spans don't run into each other. This is plaintext after all.
415
- if ($this->tag == "span") {
416
- $ret .= $this->dom->default_span_text;
417
- }
418
-
419
-
420
- }
421
- return $ret;
422
- }
423
-
424
- function xmltext()
425
- {
426
- $ret = $this->innertext();
427
- $ret = str_ireplace('<![CDATA[', '', $ret);
428
- $ret = str_replace(']]>', '', $ret);
429
- return $ret;
430
- }
431
-
432
- // build node's text with tag
433
- function makeup()
434
- {
435
- // text, comment, unknown
436
- if (isset($this->_[TRP_HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[TRP_HDOM_INFO_TEXT]);
437
-
438
- $ret = '<' . $this->tag;
439
- $i = -1;
440
-
441
- foreach ($this->attr as $key => $val) {
442
- ++$i;
443
-
444
- // skip removed attribute
445
- if ($val === null || $val === false)
446
- continue;
447
-
448
- $ret .= $this->_[TRP_HDOM_INFO_SPACE][$i][0];
449
- //no value attr: nowrap, checked selected...
450
- if ($val === true)
451
- $ret .= $key;
452
- else {
453
- switch ($this->_[TRP_HDOM_INFO_QUOTE][$i]) {
454
- case TRP_HDOM_QUOTE_DOUBLE:
455
- $quote = '"';
456
- break;
457
- case TRP_HDOM_QUOTE_SINGLE:
458
- $quote = '\'';
459
- break;
460
- default:
461
- $quote = '';
462
- }
463
- $ret .= $key . $this->_[TRP_HDOM_INFO_SPACE][$i][1] . '=' . $this->_[TRP_HDOM_INFO_SPACE][$i][2] . $quote . $val . $quote;
464
- }
465
- }
466
- $ret = $this->dom->restore_noise($ret);
467
- return $ret . $this->_[TRP_HDOM_INFO_ENDSPACE] . '>';
468
- }
469
-
470
- // find elements by css selector
471
- //PaperG - added ability for find to lowercase the value of the selector.
472
- function find($selector, $idx = null, $lowercase = false)
473
- {
474
- $selectors = $this->parse_selector($selector);
475
- if (($count = count($selectors)) === 0) return array();
476
- $found_keys = array();
477
-
478
- // find each selector
479
- for ($c = 0; $c < $count; ++$c) {
480
- // The change on the below line was documented on the sourceforge code tracker id 2788009
481
- // used to be: if (($levle=count($selectors[0]))===0) return array();
482
- if (($levle = count($selectors[$c])) === 0) return array();
483
- if (!isset($this->_[TRP_HDOM_INFO_BEGIN])) return array();
484
-
485
- $head = array($this->_[TRP_HDOM_INFO_BEGIN] => 1);
486
-
487
- // handle descendant selectors, no recursive!
488
- for ($l = 0; $l < $levle; ++$l) {
489
- $ret = array();
490
- foreach ($head as $k => $v) {
491
- $n = ($k === -1) ? $this->dom->root : $this->dom->nodes[$k];
492
- //PaperG - Pass this optional parameter on to the seek function.
493
- $n->seek($selectors[$c][$l], $ret, $lowercase);
494
- }
495
- $head = $ret;
496
- }
497
-
498
- foreach ($head as $k => $v) {
499
- if (!isset($found_keys[$k]))
500
- $found_keys[$k] = 1;
501
- }
502
- }
503
-
504
- // sort keys
505
- ksort($found_keys);
506
-
507
- $found = array();
508
- foreach ($found_keys as $k => $v)
509
- $found[] = $this->dom->nodes[$k];
510
-
511
- // return nth-element or array
512
- if (is_null($idx)) return $found;
513
- else if ($idx < 0) $idx = count($found) + $idx;
514
- return (isset($found[$idx])) ? $found[$idx] : null;
515
- }
516
-
517
- // seek for given conditions
518
- // PaperG - added parameter to allow for case insensitive testing of the value of a selector.
519
- protected function seek($selector, &$ret, $lowercase = false)
520
- {
521
- global $debugObject;
522
- if (is_object($debugObject)) {
523
- $debugObject->debugLogEntry(1);
524
- }
525
-
526
- list($tag, $key, $val, $exp, $no_key) = $selector;
527
-
528
- // xpath index
529
- if ($tag && $key && is_numeric($key)) {
530
- $count = 0;
531
- foreach ($this->children as $c) {
532
- if ($tag === '*' || $tag === $c->tag) {
533
- if (++$count == $key) {
534
- $ret[$c->_[TRP_HDOM_INFO_BEGIN]] = 1;
535
- return;
536
- }
537
- }
538
- }
539
- return;
540
- }
541
-
542
- $end = (!empty($this->_[TRP_HDOM_INFO_END])) ? $this->_[TRP_HDOM_INFO_END] : 0;
543
- if ($end == 0) {
544
- $parent = $this->parent;
545
- while (!isset($parent->_[TRP_HDOM_INFO_END]) && $parent !== null) {
546
- $end -= 1;
547
- $parent = $parent->parent;
548
- }
549
- $end += $parent->_[TRP_HDOM_INFO_END];
550
- }
551
-
552
- for ($i = $this->_[TRP_HDOM_INFO_BEGIN] + 1; $i < $end; ++$i) {
553
- $node = $this->dom->nodes[$i];
554
-
555
- $pass = true;
556
-
557
- if ($tag === '*' && !$key) {
558
- if (in_array($node, $this->children, true))
559
- $ret[$i] = 1;
560
- continue;
561
- }
562
-
563
- // compare tag
564
- if ($tag && $tag != $node->tag && $tag !== '*') {
565
- $pass = false;
566
- }
567
- // compare key
568
- if ($pass && $key) {
569
- if ($no_key) {
570
- if (isset($node->attr[$key])) $pass = false;
571
- } else {
572
- if (($key != "plaintext") && !isset($node->attr[$key])) $pass = false;
573
- }
574
- }
575
- // compare value
576
- if ($pass && $key && $val && $val !== '*') {
577
- // If they have told us that this is a "plaintext" search then we want the plaintext of the node - right?
578
- if ($key == "plaintext") {
579
- // $node->plaintext actually returns $node->text();
580
- $nodeKeyValue = $node->text();
581
- } else {
582
- // this is a normal search, we want the value of that attribute of the tag.
583
- $nodeKeyValue = $node->attr[$key];
584
- }
585
- if (is_object($debugObject)) {
586
- $debugObject->debugLog(2, "testing node: " . $node->tag . " for attribute: " . $key . $exp . $val . " where nodes value is: " . $nodeKeyValue);
587
- }
588
-
589
- //PaperG - If lowercase is set, do a case insensitive test of the value of the selector.
590
- if ($lowercase) {
591
- $check = $this->match($exp, strtolower($val), strtolower($nodeKeyValue));
592
- } else {
593
- $check = $this->match($exp, $val, $nodeKeyValue);
594
- }
595
- if (is_object($debugObject)) {
596
- $debugObject->debugLog(2, "after match: " . ($check ? "true" : "false"));
597
- }
598
-
599
- // handle multiple class
600
- if (!$check && strcasecmp($key, 'class') === 0) {
601
- foreach (explode(' ', $node->attr[$key]) as $k) {
602
- // Without this, there were cases where leading, trailing, or double spaces lead to our comparing blanks - bad form.
603
- if (!empty($k)) {
604
- if ($lowercase) {
605
- $check = $this->match($exp, strtolower($val), strtolower($k));
606
- } else {
607
- $check = $this->match($exp, $val, $k);
608
- }
609
- if ($check) break;
610
- }
611
- }
612
- }
613
- if (!$check) $pass = false;
614
- }
615
- if ($pass) $ret[$i] = 1;
616
- unset($node);
617
- }
618
- // It's passed by reference so this is actually what this function returns.
619
- if (is_object($debugObject)) {
620
- $debugObject->debugLog(1, "EXIT - ret: ", $ret);
621
- }
622
- }
623
-
624
- protected function match($exp, $pattern, $value)
625
- {
626
- global $debugObject;
627
- if (is_object($debugObject)) {
628
- $debugObject->debugLogEntry(1);
629
- }
630
-
631
- switch ($exp) {
632
- case '=':
633
- return ($value === $pattern);
634
- case '!=':
635
- return ($value !== $pattern);
636
- case '^=':
637
- return preg_match("/^" . preg_quote($pattern, '/') . "/", $value);
638
- case '$=':
639
- return preg_match("/" . preg_quote($pattern, '/') . "$/", $value);
640
- case '*=':
641
- if ($pattern[0] == '/') {
642
- return preg_match($pattern, $value);
643
- }
644
- return preg_match("/" . $pattern . "/i", $value);
645
- }
646
- return false;
647
- }
648
-
649
- protected function parse_selector($selector_string)
650
- {
651
- global $debugObject;
652
- if (is_object($debugObject)) {
653
- $debugObject->debugLogEntry(1);
654
- }
655
-
656
- // pattern of CSS selectors, modified from mootools
657
- // Paperg: Add the colon to the attrbute, so that it properly finds <tag attr:ibute="something" > like google does.
658
- // Note: if you try to look at this attribute, yo MUST use getAttribute since $dom->x:y will fail the php syntax check.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
659
  // Notice the \[ starting the attbute? and the @? following? This implies that an attribute can begin with an @ sign that is not captured.
660
  // This implies that an html attribute specifier may start with an @ sign that is NOT captured by the expression.
661
  // farther study is required to determine of this should be documented or removed.
662
- // $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
663
- $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
664
- preg_match_all($pattern, trim($selector_string) . ' ', $matches, PREG_SET_ORDER);
665
- if (is_object($debugObject)) {
666
- $debugObject->debugLog(2, "Matches Array: ", $matches);
667
- }
668
-
669
- $selectors = array();
670
- $result = array();
671
- //print_r($matches);
672
-
673
- foreach ($matches as $m) {
674
- $m[0] = trim($m[0]);
675
- if ($m[0] === '' || $m[0] === '/' || $m[0] === '//') continue;
676
- // for browser generated xpath
677
- if ($m[1] === 'tbody') continue;
678
-
679
- list($tag, $key, $val, $exp, $no_key) = array($m[1], null, null, '=', false);
680
- if (!empty($m[2])) {
681
- $key = 'id';
682
- $val = $m[2];
683
- }
684
- if (!empty($m[3])) {
685
- $key = 'class';
686
- $val = $m[3];
687
- }
688
- if (!empty($m[4])) {
689
- $key = $m[4];
690
- }
691
- if (!empty($m[5])) {
692
- $exp = $m[5];
693
- }
694
- if (!empty($m[6])) {
695
- $val = $m[6];
696
- }
697
-
698
- // convert to lowercase
699
- if ($this->dom->lowercase) {
700
- $tag = strtolower($tag);
701
- $key = strtolower($key);
702
- }
703
- //elements that do NOT have the specified attribute
704
- if (isset($key[0]) && $key[0] === '!') {
705
- $key = substr($key, 1);
706
- $no_key = true;
707
- }
708
-
709
- $result[] = array($tag, $key, $val, $exp, $no_key);
710
- if (trim($m[7]) === ',') {
711
- $selectors[] = $result;
712
- $result = array();
713
- }
714
- }
715
- if (count($result) > 0)
716
- $selectors[] = $result;
717
- return $selectors;
718
- }
719
-
720
- function __get($name)
721
- {
722
- if (isset($this->attr[$name])) {
723
- return $this->convert_text($this->attr[$name]);
724
- }
725
- switch ($name) {
726
- case 'outertext':
727
- return $this->outertext();
728
- case 'innertext':
729
- return $this->innertext();
730
- case 'plaintext':
731
- return $this->text();
732
- case 'xmltext':
733
- return $this->xmltext();
734
- default:
735
- return array_key_exists($name, $this->attr);
736
- }
737
- }
738
-
739
- function __set($name, $value)
740
- {
741
- switch ($name) {
742
- case 'outertext':
743
- return $this->_[TRP_HDOM_INFO_OUTER] = $value;
744
- case 'innertext':
745
- if (isset($this->_[TRP_HDOM_INFO_TEXT])) return $this->_[TRP_HDOM_INFO_TEXT] = $value;
746
- return $this->_[TRP_HDOM_INFO_INNER] = $value;
747
- }
748
- if (!isset($this->attr[$name])) {
749
- $this->_[TRP_HDOM_INFO_SPACE][] = array(' ', '', '');
750
- $this->_[TRP_HDOM_INFO_QUOTE][] = TRP_HDOM_QUOTE_DOUBLE;
751
- }
752
- $this->attr[$name] = $value;
753
- }
754
-
755
- function __isset($name)
756
- {
757
- switch ($name) {
758
- case 'outertext':
759
- return true;
760
- case 'innertext':
761
- return true;
762
- case 'plaintext':
763
- return true;
764
- }
765
- //no value attr: nowrap, checked selected...
766
- return (array_key_exists($name, $this->attr)) ? true : isset($this->attr[$name]);
767
- }
768
-
769
- function __unset($name)
770
- {
771
- if (isset($this->attr[$name]))
772
- unset($this->attr[$name]);
773
- }
774
-
775
- // PaperG - Function to convert the text from one character set to another if the two sets are not the same.
776
- function convert_text($text)
777
- {
778
- global $debugObject;
779
- if (is_object($debugObject)) {
780
- $debugObject->debugLogEntry(1);
781
- }
782
-
783
- $converted_text = $text;
784
-
785
- $sourceCharset = "";
786
- $targetCharset = "";
787
-
788
- if ($this->dom) {
789
- $sourceCharset = strtoupper($this->dom->_charset);
790
- $targetCharset = strtoupper($this->dom->_target_charset);
791
- }
792
- if (is_object($debugObject)) {
793
- $debugObject->debugLog(3, "source charset: " . $sourceCharset . " target charaset: " . $targetCharset);
794
- }
795
-
796
- if (!empty($sourceCharset) && !empty($targetCharset) && (strcasecmp($sourceCharset, $targetCharset) != 0)) {
797
- // Check if the reported encoding could have been incorrect and the text is actually already UTF-8
798
- if ((strcasecmp($targetCharset, 'UTF-8') == 0) && ($this->is_utf8($text))) {
799
- $converted_text = $text;
800
- } else {
801
- $converted_text = iconv($sourceCharset, $targetCharset, $text);
802
- }
803
- }
804
-
805
- // Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output.
806
- if ($targetCharset == 'UTF-8') {
807
- if (substr($converted_text, 0, 3) == "\xef\xbb\xbf") {
808
- $converted_text = substr($converted_text, 3);
809
- }
810
- if (substr($converted_text, -3) == "\xef\xbb\xbf") {
811
- $converted_text = substr($converted_text, 0, -3);
812
- }
813
- }
814
-
815
- return $converted_text;
816
- }
817
-
818
- /**
819
- * Returns true if $string is valid UTF-8 and false otherwise.
820
- *
821
- * @param mixed $str String to be tested
822
- * @return boolean
823
- */
824
- static function is_utf8($str)
825
- {
826
- $c = 0;
827
- $b = 0;
828
- $bits = 0;
829
- $len = strlen($str);
830
- for ($i = 0; $i < $len; $i++) {
831
- $c = ord($str[$i]);
832
- if ($c > 128) {
833
- if (($c >= 254)) return false;
834
- elseif ($c >= 252) $bits = 6;
835
- elseif ($c >= 248) $bits = 5;
836
- elseif ($c >= 240) $bits = 4;
837
- elseif ($c >= 224) $bits = 3;
838
- elseif ($c >= 192) $bits = 2;
839
- else return false;
840
- if (($i + $bits) > $len) return false;
841
- while ($bits > 1) {
842
- $i++;
843
- $b = ord($str[$i]);
844
- if ($b < 128 || $b > 191) return false;
845
- $bits--;
846
- }
847
- }
848
- }
849
- return true;
850
- }
851
- /*
852
- function is_utf8($string)
853
- {
854
- //this is buggy
855
- return (utf8_encode(utf8_decode($string)) == $string);
856
- }
857
- */
858
-
859
- /**
860
- * Function to try a few tricks to determine the displayed size of an img on the page.
861
- * NOTE: This will ONLY work on an IMG tag. Returns FALSE on all other tag types.
862
- *
863
- * @author John Schlick
864
- * @version April 19 2012
865
- * @return array an array containing the 'height' and 'width' of the image on the page or -1 if we can't figure it out.
866
- */
867
- function get_display_size()
868
- {
869
- global $debugObject;
870
-
871
- $width = -1;
872
- $height = -1;
873
-
874
- if ($this->tag !== 'img') {
875
- return false;
876
- }
877
-
878
- // See if there is aheight or width attribute in the tag itself.
879
- if (isset($this->attr['width'])) {
880
- $width = $this->attr['width'];
881
- }
882
-
883
- if (isset($this->attr['height'])) {
884
- $height = $this->attr['height'];
885
- }
886
-
887
- // Now look for an inline style.
888
- if (isset($this->attr['style'])) {
889
- // Thanks to user gnarf from stackoverflow for this regular expression.
890
- $attributes = array();
891
- preg_match_all("/([\w-]+)\s*:\s*([^;]+)\s*;?/", $this->attr['style'], $matches, PREG_SET_ORDER);
892
- foreach ($matches as $match) {
893
- $attributes[$match[1]] = $match[2];
894
- }
895
-
896
- // If there is a width in the style attributes:
897
- if (isset($attributes['width']) && $width == -1) {
898
- // check that the last two characters are px (pixels)
899
- if (strtolower(substr($attributes['width'], -2)) == 'px') {
900
- $proposed_width = substr($attributes['width'], 0, -2);
901
- // Now make sure that it's an integer and not something stupid.
902
- if (filter_var($proposed_width, FILTER_VALIDATE_INT)) {
903
- $width = $proposed_width;
904
- }
905
- }
906
- }
907
-
908
- // If there is a width in the style attributes:
909
- if (isset($attributes['height']) && $height == -1) {
910
- // check that the last two characters are px (pixels)
911
- if (strtolower(substr($attributes['height'], -2)) == 'px') {
912
- $proposed_height = substr($attributes['height'], 0, -2);
913
- // Now make sure that it's an integer and not something stupid.
914
- if (filter_var($proposed_height, FILTER_VALIDATE_INT)) {
915
- $height = $proposed_height;
916
- }
917
- }
918
- }
919
-
920
- }
921
-
922
- // Future enhancement:
923
- // Look in the tag to see if there is a class or id specified that has a height or width attribute to it.
924
-
925
- // Far future enhancement
926
- // Look at all the parent tags of this image to see if they specify a class or id that has an img selector that specifies a height or width
927
- // Note that in this case, the class or id will have the img subselector for it to apply to the image.
928
-
929
- // ridiculously far future development
930
- // If the class or id is specified in a SEPARATE css file thats not on the page, go get it and do what we were just doing for the ones on the page.
931
-
932
- $result = array('height' => $height,
933
- 'width' => $width);
934
- return $result;
935
- }
936
-
937
- // camel naming conventions
938
- function getAllAttributes()
939
- {
940
- return $this->attr;
941
- }
942
-
943
- function getAttribute($name)
944
- {
945
- return $this->__get($name);
946
- }
947
-
948
- function setAttribute($name, $value)
949
- {
950
- $this->__set($name, $value);
951
- }
952
-
953
- function hasAttribute($name)
954
- {
955
- return $this->__isset($name);
956
- }
957
-
958
- function removeAttribute($name)
959
- {
960
- $this->__set($name, null);
961
- }
962
-
963
- function getElementById($id)
964
- {
965
- return $this->find("#$id", 0);
966
- }
967
-
968
- function getElementsById($id, $idx = null)
969
- {
970
- return $this->find("#$id", $idx);
971
- }
972
-
973
- function getElementByTagName($name)
974
- {
975
- return $this->find($name, 0);
976
- }
977
-
978
- function getElementsByTagName($name, $idx = null)
979
- {
980
- return $this->find($name, $idx);
981
- }
982
-
983
- function parentNode()
984
- {
985
- return $this->parent();
986
- }
987
-
988
- function childNodes($idx = -1)
989
- {
990
- return $this->children($idx);
991
- }
992
-
993
- function firstChild()
994
- {
995
- return $this->first_child();
996
- }
997
-
998
- function lastChild()
999
- {
1000
- return $this->last_child();
1001
- }
1002
-
1003
- function nextSibling()
1004
- {
1005
- return $this->next_sibling();
1006
- }
1007
-
1008
- function previousSibling()
1009
- {
1010
- return $this->prev_sibling();
1011
- }
1012
-
1013
- function hasChildNodes()
1014
- {
1015
- return $this->has_child();
1016
- }
1017
-
1018
- function nodeName()
1019
- {
1020
- return $this->tag;
1021
- }
1022
-
1023
- function appendChild($node)
1024
- {
1025
- $node->parent($this);
1026
- return $node;
1027
- }
1028
 
1029
  }
1030
 
@@ -1036,779 +1122,1062 @@ class trp_simple_html_dom_node
1036
  *
1037
  * @package PlaceLocalInclude
1038
  */
1039
- class trp_simple_html_dom
1040
  {
1041
- public $root = null;
1042
- public $nodes = array();
1043
- public $callback = null;
1044
- public $lowercase = false;
1045
- // Used to keep track of how large the text was when we started.
1046
- public $original_size;
1047
- public $size;
1048
- protected $pos;
1049
- protected $doc;
1050
- protected $char;
1051
- protected $cursor;
1052
- protected $parent;
1053
- protected $noise = array();
1054
- protected $token_blank = " \t\r\n";
1055
- protected $token_equal = ' =/>';
1056
- protected $token_slash = " />\r\n\t";
1057
- protected $token_attr = ' >';
1058
- // Note that this is referenced by a child node, and so it needs to be public for that node to see this information.
1059
- public $_charset = '';
1060
- public $_target_charset = '';
1061
- protected $default_br_text = "";
1062
- public $default_span_text = "";
1063
-
1064
- // use isset instead of in_array, performance boost about 30%...
1065
- protected $self_closing_tags = array('img' => 1, 'br' => 1, 'input' => 1, 'meta' => 1, 'link' => 1, 'hr' => 1, 'base' => 1, 'embed' => 1, 'spacer' => 1);
1066
- protected $block_tags = array('root' => 1, 'body' => 1, 'form' => 1, 'div' => 1, 'span' => 1, 'table' => 1);
1067
- // Known sourceforge issue #2977341
1068
- // B tags that are not closed cause us to return everything to the end of the document.
1069
- protected $optional_closing_tags = array(
1070
- 'tr' => array('tr' => 1, 'td' => 1, 'th' => 1),
1071
- 'th' => array('th' => 1),
1072
- 'td' => array('td' => 1),
1073
- 'li' => array('li' => 1),
1074
- 'dt' => array('dt' => 1, 'dd' => 1),
1075
- 'dd' => array('dd' => 1, 'dt' => 1),
1076
- 'dl' => array('dd' => 1, 'dt' => 1),
1077
- 'p' => array('p' => 1),
1078
- 'nobr' => array('nobr' => 1),
1079
- 'b' => array('b' => 1),
1080
- 'option' => array('option' => 1),
1081
- );
1082
-
1083
- function __construct($str = null, $lowercase = true, $forceTagsClosed = true, $target_charset = TRP_DEFAULT_TARGET_CHARSET, $stripRN = true, $defaultBRText = TRP_DEFAULT_BR_TEXT, $defaultSpanText = TRP_DEFAULT_SPAN_TEXT)
1084
- {
1085
- if ($str) {
1086
- if (preg_match("/^http:\/\//i", $str) || is_file($str)) {
1087
- $this->load_file($str);
1088
- } else {
1089
- $this->load($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText);
1090
- }
1091
- }
1092
- // Forcing tags to be closed implies that we don't trust the html, but it can lead to parsing errors if we SHOULD trust the html.
1093
- if (!$forceTagsClosed) {
1094
- $this->optional_closing_array = array();
1095
- }
1096
- $this->_target_charset = $target_charset;
1097
- }
1098
-
1099
- function __destruct()
1100
- {
1101
- $this->clear();
1102
- }
1103
-
1104
- // load html from string
1105
- function load($str, $lowercase = true, $stripRN = true, $defaultBRText = TRP_DEFAULT_BR_TEXT, $defaultSpanText = TRP_DEFAULT_SPAN_TEXT)
1106
- {
1107
- global $debugObject;
1108
-
1109
- // prepare
1110
- $this->prepare($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText);
1111
- // strip out comments
1112
- $this->remove_noise("'<!--(.*?)-->'is");
1113
- // strip out cdata
1114
- $this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is", true);
1115
- // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037
1116
- // Script tags removal now preceeds style tag removal.
1117
- // strip out <script> tags
1118
- $this->remove_noise("'<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>'is");
1119
- $this->remove_noise("'<\s*script\s*>(.*?)<\s*/\s*script\s*>'is");
1120
- // strip out <style> tags
1121
- $this->remove_noise("'<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>'is");
1122
- $this->remove_noise("'<\s*style\s*>(.*?)<\s*/\s*style\s*>'is");
1123
- // strip out preformatted tags
1124
- $this->remove_noise("'<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>'is");
1125
- // strip out server side scripts
1126
- $this->remove_noise("'(<\?)(.*?)(\?>)'s", true);
1127
- // strip smarty scripts
1128
- $this->remove_noise("'(\{\w)(.*?)(\})'s", true);
1129
-
1130
- // parsing
1131
- while ($this->parse()) ;
1132
- // end
1133
- $this->root->_[TRP_HDOM_INFO_END] = $this->cursor;
1134
- $this->parse_charset();
1135
-
1136
- // make load function chainable
1137
- return $this;
1138
-
1139
- }
1140
-
1141
- // load html from file
1142
- function load_file()
1143
- {
1144
- $args = func_get_args();
1145
- $this->load(call_user_func_array('file_get_contents', $args), true);
1146
- // Throw an error if we can't properly load the dom.
1147
- if (($error = error_get_last()) !== null) {
1148
- $this->clear();
1149
- return false;
1150
- }
1151
- }
1152
-
1153
- // set callback function
1154
- function set_callback($function_name)
1155
- {
1156
- $this->callback = $function_name;
1157
- }
1158
-
1159
- // remove callback function
1160
- function remove_callback()
1161
- {
1162
- $this->callback = null;
1163
- }
1164
-
1165
- // save dom as string
1166
- function save($filepath = '')
1167
- {
1168
- $ret = $this->root->innertext();
1169
- if ($filepath !== '') file_put_contents($filepath, $ret, LOCK_EX);
1170
- return $ret;
1171
- }
1172
-
1173
- // find dom node by css selector
1174
- // Paperg - allow us to specify that we want case insensitive testing of the value of the selector.
1175
- function find($selector, $idx = null, $lowercase = false)
1176
- {
1177
- return $this->root->find($selector, $idx, $lowercase);
1178
- }
1179
-
1180
- // clean up memory due to php5 circular references memory leak...
1181
- function clear()
1182
- {
1183
- foreach ($this->nodes as $n) {
1184
- $n->clear();
1185
- $n = null;
1186
- }
1187
- // This add next line is documented in the sourceforge repository. 2977248 as a fix for ongoing memory leaks that occur even with the use of clear.
1188
- if (isset($this->children)) foreach ($this->children as $n) {
1189
- $n->clear();
1190
- $n = null;
1191
- }
1192
- if (isset($this->parent)) {
1193
- $this->parent->clear();
1194
- unset($this->parent);
1195
- }
1196
- if (isset($this->root)) {
1197
- $this->root->clear();
1198
- unset($this->root);
1199
- }
1200
- unset($this->doc);
1201
- unset($this->noise);
1202
- }
1203
-
1204
- function dump($show_attr = true)
1205
- {
1206
- $this->root->dump($show_attr);
1207
- }
1208
-
1209
- // prepare HTML data and init everything
1210
- protected function prepare($str, $lowercase = true, $stripRN = true, $defaultBRText = TRP_DEFAULT_BR_TEXT, $defaultSpanText = TRP_DEFAULT_SPAN_TEXT)
1211
- {
1212
- $this->clear();
1213
-
1214
- // set the length of content before we do anything to it.
1215
- $this->size = strlen($str);
1216
- // Save the original size of the html that we got in. It might be useful to someone.
1217
- $this->original_size = $this->size;
1218
-
1219
- //before we save the string as the doc... strip out the \r \n's if we are told to.
1220
- if ($stripRN) {
1221
- $str = str_replace("\r", " ", $str);
1222
- $str = str_replace("\n", " ", $str);
1223
-
1224
- // set the length of content since we have changed it.
1225
- $this->size = strlen($str);
1226
- }
1227
-
1228
- $this->doc = $str;
1229
- $this->pos = 0;
1230
- $this->cursor = 1;
1231
- $this->noise = array();
1232
- $this->nodes = array();
1233
- $this->lowercase = $lowercase;
1234
- $this->default_br_text = $defaultBRText;
1235
- $this->default_span_text = $defaultSpanText;
1236
- $this->root = new trp_simple_html_dom_node($this);
1237
- $this->root->tag = 'root';
1238
- $this->root->_[TRP_HDOM_INFO_BEGIN] = -1;
1239
- $this->root->nodetype = TRP_HDOM_TYPE_ROOT;
1240
- $this->parent = $this->root;
1241
- if ($this->size > 0) $this->char = $this->doc[0];
1242
- }
1243
-
1244
- // parse html content
1245
- protected function parse()
1246
- {
1247
- if (($s = $this->copy_until_char('<')) === '') {
1248
- return $this->read_tag();
1249
- }
1250
-
1251
- // text
1252
- $node = new trp_simple_html_dom_node($this);
1253
- ++$this->cursor;
1254
- $node->_[TRP_HDOM_INFO_TEXT] = $s;
1255
- $this->link_nodes($node, false);
1256
- return true;
1257
- }
1258
-
1259
- // PAPERG - dkchou - added this to try to identify the character set of the page we have just parsed so we know better how to spit it out later.
1260
- // NOTE: IF you provide a routine called get_last_retrieve_url_contents_content_type which returns the CURLINFO_CONTENT_TYPE from the last curl_exec
1261
- // (or the content_type header from the last transfer), we will parse THAT, and if a charset is specified, we will use it over any other mechanism.
1262
- protected function parse_charset()
1263
- {
1264
- global $debugObject;
1265
-
1266
- $charset = null;
1267
-
1268
- if (function_exists('get_last_retrieve_url_contents_content_type')) {
1269
- $contentTypeHeader = get_last_retrieve_url_contents_content_type();
1270
- $success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches);
1271
- if ($success) {
1272
- $charset = $matches[1];
1273
- if (is_object($debugObject)) {
1274
- $debugObject->debugLog(2, 'header content-type found charset of: ' . $charset);
1275
- }
1276
- }
1277
-
1278
- }
1279
-
1280
- if (empty($charset)) {
1281
- $el = $this->root->find('meta[http-equiv=Content-Type]', 0);
1282
- if (!empty($el)) {
1283
- $fullvalue = $el->content;
1284
- if (is_object($debugObject)) {
1285
- $debugObject->debugLog(2, 'meta content-type tag found' . $fullvalue);
1286
- }
1287
-
1288
- if (!empty($fullvalue)) {
1289
- $success = preg_match('/charset=(.+)/', $fullvalue, $matches);
1290
- if ($success) {
1291
- $charset = $matches[1];
1292
- } else {
1293
- // If there is a meta tag, and they don't specify the character set, research says that it's typically ISO-8859-1
1294
- if (is_object($debugObject)) {
1295
- $debugObject->debugLog(2, 'meta content-type tag couldn\'t be parsed. using iso-8859 default.');
1296
- }
1297
- $charset = 'ISO-8859-1';
1298
- }
1299
- }
1300
- }
1301
- }
1302
-
1303
- // If we couldn't find a charset above, then lets try to detect one based on the text we got...
1304
- if (empty($charset)) {
1305
- // Have php try to detect the encoding from the text given to us.
1306
- $charset = mb_detect_encoding($this->root->plaintext . "ascii", $encoding_list = array("UTF-8", "CP1252"));
1307
- if (is_object($debugObject)) {
1308
- $debugObject->debugLog(2, 'mb_detect found: ' . $charset);
1309
- }
1310
-
1311
- // and if this doesn't work... then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need...
1312
- if ($charset === false) {
1313
- if (is_object($debugObject)) {
1314
- $debugObject->debugLog(2, 'since mb_detect failed - using default of utf-8');
1315
- }
1316
- $charset = 'UTF-8';
1317
- }
1318
- }
1319
-
1320
- // Since CP1252 is a superset, if we get one of it's subsets, we want it instead.
1321
- if ((strtolower($charset) == strtolower('ISO-8859-1')) || (strtolower($charset) == strtolower('Latin1')) || (strtolower($charset) == strtolower('Latin-1'))) {
1322
- if (is_object($debugObject)) {
1323
- $debugObject->debugLog(2, 'replacing ' . $charset . ' with CP1252 as its a superset');
1324
- }
1325
- $charset = 'CP1252';
1326
- }
1327
-
1328
- if (is_object($debugObject)) {
1329
- $debugObject->debugLog(1, 'EXIT - ' . $charset);
1330
- }
1331
-
1332
- return $this->_charset = $charset;
1333
- }
1334
-
1335
- // read tag info
1336
- protected function read_tag()
1337
- {
1338
- if ($this->char !== '<') {
1339
- $this->root->_[TRP_HDOM_INFO_END] = $this->cursor;
1340
- return false;
1341
- }
1342
- $begin_tag_pos = $this->pos;
1343
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1344
-
1345
- // end tag
1346
- if ($this->char === '/') {
1347
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1348
- // This represents the change in the trp_simple_html_dom trunk from revision 180 to 181.
1349
- // $this->skip($this->token_blank_t);
1350
- $this->skip($this->token_blank);
1351
- $tag = $this->copy_until_char('>');
1352
-
1353
- // skip attributes in end tag
1354
- if (($pos = strpos($tag, ' ')) !== false)
1355
- $tag = substr($tag, 0, $pos);
1356
-
1357
- $parent_lower = strtolower($this->parent->tag);
1358
- $tag_lower = strtolower($tag);
1359
-
1360
- if ($parent_lower !== $tag_lower) {
1361
- if (isset($this->optional_closing_tags[$parent_lower]) && isset($this->block_tags[$tag_lower])) {
1362
- $this->parent->_[TRP_HDOM_INFO_END] = 0;
1363
- $org_parent = $this->parent;
1364
-
1365
- while (($this->parent->parent) && strtolower($this->parent->tag) !== $tag_lower)
1366
- $this->parent = $this->parent->parent;
1367
-
1368
- if (strtolower($this->parent->tag) !== $tag_lower) {
1369
- $this->parent = $org_parent; // restore origonal parent
1370
- if ($this->parent->parent) $this->parent = $this->parent->parent;
1371
- $this->parent->_[TRP_HDOM_INFO_END] = $this->cursor;
1372
- return $this->as_text_node($tag);
1373
- }
1374
- } else if (($this->parent->parent) && isset($this->block_tags[$tag_lower])) {
1375
- $this->parent->_[TRP_HDOM_INFO_END] = 0;
1376
- $org_parent = $this->parent;
1377
-
1378
- while (($this->parent->parent) && strtolower($this->parent->tag) !== $tag_lower)
1379
- $this->parent = $this->parent->parent;
1380
-
1381
- if (strtolower($this->parent->tag) !== $tag_lower) {
1382
- $this->parent = $org_parent; // restore origonal parent
1383
- $this->parent->_[TRP_HDOM_INFO_END] = $this->cursor;
1384
- return $this->as_text_node($tag);
1385
- }
1386
- } else if (($this->parent->parent) && strtolower($this->parent->parent->tag) === $tag_lower) {
1387
- $this->parent->_[TRP_HDOM_INFO_END] = 0;
1388
- $this->parent = $this->parent->parent;
1389
- } else
1390
- return $this->as_text_node($tag);
1391
- }
1392
-
1393
- $this->parent->_[TRP_HDOM_INFO_END] = $this->cursor;
1394
- if ($this->parent->parent) $this->parent = $this->parent->parent;
1395
-
1396
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1397
- return true;
1398
- }
1399
-
1400
- $node = new trp_simple_html_dom_node($this);
1401
- $node->_[TRP_HDOM_INFO_BEGIN] = $this->cursor;
1402
- ++$this->cursor;
1403
- $tag = $this->copy_until($this->token_slash);
1404
- $node->tag_start = $begin_tag_pos;
1405
-
1406
- // doctype, cdata & comments...
1407
- if (isset($tag[0]) && $tag[0] === '!') {
1408
- $node->_[TRP_HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until_char('>');
1409
-
1410
- if (isset($tag[2]) && $tag[1] === '-' && $tag[2] === '-') {
1411
- $node->nodetype = TRP_HDOM_TYPE_COMMENT;
1412
- $node->tag = 'comment';
1413
- } else {
1414
- $node->nodetype = TRP_HDOM_TYPE_UNKNOWN;
1415
- $node->tag = 'unknown';
1416
- }
1417
- if ($this->char === '>') $node->_[TRP_HDOM_INFO_TEXT] .= '>';
1418
- $this->link_nodes($node, true);
1419
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1420
- return true;
1421
- }
1422
-
1423
- // text
1424
- if ($pos = strpos($tag, '<') !== false) {
1425
- $tag = '<' . substr($tag, 0, -1);
1426
- $node->_[TRP_HDOM_INFO_TEXT] = $tag;
1427
- $this->link_nodes($node, false);
1428
- $this->char = $this->doc[--$this->pos]; // prev
1429
- return true;
1430
- }
1431
-
1432
- if (!preg_match("/^[\w-:]+$/", $tag)) {
1433
- $node->_[TRP_HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>');
1434
- if ($this->char === '<') {
1435
- $this->link_nodes($node, false);
1436
- return true;
1437
- }
1438
-
1439
- if ($this->char === '>') $node->_[TRP_HDOM_INFO_TEXT] .= '>';
1440
- $this->link_nodes($node, false);
1441
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1442
- return true;
1443
- }
1444
-
1445
- // begin tag
1446
- $node->nodetype = TRP_HDOM_TYPE_ELEMENT;
1447
- $tag_lower = strtolower($tag);
1448
- $node->tag = ($this->lowercase) ? $tag_lower : $tag;
1449
-
1450
- // handle optional closing tags
1451
- if (isset($this->optional_closing_tags[$tag_lower])) {
1452
- while (isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)])) {
1453
- $this->parent->_[TRP_HDOM_INFO_END] = 0;
1454
- $this->parent = $this->parent->parent;
1455
- }
1456
- $node->parent = $this->parent;
1457
- }
1458
-
1459
- $guard = 0; // prevent infinity loop
1460
- $space = array($this->copy_skip($this->token_blank), '', '');
1461
-
1462
- // attributes
1463
- do {
1464
- if ($this->char !== null && $space[0] === '') {
1465
- break;
1466
- }
1467
- $name = $this->copy_until($this->token_equal);
1468
- if ($guard === $this->pos) {
1469
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1470
- continue;
1471
- }
1472
- $guard = $this->pos;
1473
-
1474
- // handle endless '<'
1475
- if ($this->pos >= $this->size - 1 && $this->char !== '>') {
1476
- $node->nodetype = TRP_HDOM_TYPE_TEXT;
1477
- $node->_[TRP_HDOM_INFO_END] = 0;
1478
- $node->_[TRP_HDOM_INFO_TEXT] = '<' . $tag . $space[0] . $name;
1479
- $node->tag = 'text';
1480
- $this->link_nodes($node, false);
1481
- return true;
1482
- }
1483
-
1484
- // handle mismatch '<'
1485
- if ($this->doc[$this->pos - 1] == '<') {
1486
- $node->nodetype = TRP_HDOM_TYPE_TEXT;
1487
- $node->tag = 'text';
1488
- $node->attr = array();
1489
- $node->_[TRP_HDOM_INFO_END] = 0;
1490
- $node->_[TRP_HDOM_INFO_TEXT] = substr($this->doc, $begin_tag_pos, $this->pos - $begin_tag_pos - 1);
1491
- $this->pos -= 2;
1492
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1493
- $this->link_nodes($node, false);
1494
- return true;
1495
- }
1496
-
1497
- if ($name !== '/' && $name !== '') {
1498
- $space[1] = $this->copy_skip($this->token_blank);
1499
- $name = $this->restore_noise($name);
1500
- if ($this->lowercase) $name = strtolower($name);
1501
- if ($this->char === '=') {
1502
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1503
- $this->parse_attr($node, $name, $space);
1504
- } else {
1505
- //no value attr: nowrap, checked selected...
1506
- $node->_[TRP_HDOM_INFO_QUOTE][] = TRP_HDOM_QUOTE_NO;
1507
- $node->attr[$name] = true;
1508
- if ($this->char != '>') $this->char = $this->doc[--$this->pos]; // prev
1509
- }
1510
- $node->_[TRP_HDOM_INFO_SPACE][] = $space;
1511
- $space = array($this->copy_skip($this->token_blank), '', '');
1512
- } else
1513
- break;
1514
- } while ($this->char !== '>' && $this->char !== '/');
1515
-
1516
- $this->link_nodes($node, true);
1517
- $node->_[TRP_HDOM_INFO_ENDSPACE] = $space[0];
1518
-
1519
- // check self closing
1520
- if ($this->copy_until_char_escape('>') === '/') {
1521
- $node->_[TRP_HDOM_INFO_ENDSPACE] .= '/';
1522
- $node->_[TRP_HDOM_INFO_END] = 0;
1523
- } else {
1524
- // reset parent
1525
- if (!isset($this->self_closing_tags[strtolower($node->tag)])) $this->parent = $node;
1526
- }
1527
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1528
-
1529
- // If it's a BR tag, we need to set it's text to the default text.
1530
- // This way when we see it in plaintext, we can generate formatting that the user wants.
1531
- // since a br tag never has sub nodes, this works well.
1532
- if ($node->tag == "br") {
1533
- $node->_[TRP_HDOM_INFO_INNER] = $this->default_br_text;
1534
- }
1535
-
1536
- return true;
1537
- }
1538
-
1539
- // parse attributes
1540
- protected function parse_attr($node, $name, &$space)
1541
- {
1542
- // Per sourceforge: http://sourceforge.net/tracker/?func=detail&aid=3061408&group_id=218559&atid=1044037
1543
- // If the attribute is already defined inside a tag, only pay atetntion to the first one as opposed to the last one.
1544
- if (isset($node->attr[$name])) {
1545
- return;
1546
- }
1547
-
1548
- $space[2] = $this->copy_skip($this->token_blank);
1549
- switch ($this->char) {
1550
- case '"':
1551
- $node->_[TRP_HDOM_INFO_QUOTE][] = TRP_HDOM_QUOTE_DOUBLE;
1552
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1553
- $node->attr[$name] = $this->restore_noise($this->copy_until_char_escape('"'));
1554
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1555
- break;
1556
- case '\'':
1557
- $node->_[TRP_HDOM_INFO_QUOTE][] = TRP_HDOM_QUOTE_SINGLE;
1558
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1559
- $node->attr[$name] = $this->restore_noise($this->copy_until_char_escape('\''));
1560
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1561
- break;
1562
- default:
1563
- $node->_[TRP_HDOM_INFO_QUOTE][] = TRP_HDOM_QUOTE_NO;
1564
- $node->attr[$name] = $this->restore_noise($this->copy_until($this->token_attr));
1565
- }
1566
- // PaperG: Attributes should not have \r or \n in them, that counts as html whitespace.
1567
- $node->attr[$name] = str_replace("\r", "", $node->attr[$name]);
1568
- $node->attr[$name] = str_replace("\n", "", $node->attr[$name]);
1569
- // PaperG: If this is a "class" selector, lets get rid of the preceeding and trailing space since some people leave it in the multi class case.
1570
- if ($name == "class") {
1571
- $node->attr[$name] = trim($node->attr[$name]);
1572
- }
1573
- }
1574
-
1575
- // link node's parent
1576
- protected function link_nodes(&$node, $is_child)
1577
- {
1578
- $node->parent = $this->parent;
1579
- $this->parent->nodes[] = $node;
1580
- if ($is_child) {
1581
- $this->parent->children[] = $node;
1582
- }
1583
- }
1584
-
1585
- // as a text node
1586
- protected function as_text_node($tag)
1587
- {
1588
- $node = new trp_simple_html_dom_node($this);
1589
- ++$this->cursor;
1590
- $node->_[TRP_HDOM_INFO_TEXT] = '</' . $tag . '>';
1591
- $this->link_nodes($node, false);
1592
- $this->char = (++$this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1593
- return true;
1594
- }
1595
-
1596
- protected function skip($chars)
1597
- {
1598
- $this->pos += strspn($this->doc, $chars, $this->pos);
1599
- $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1600
- }
1601
-
1602
- protected function copy_skip($chars)
1603
- {
1604
- $pos = $this->pos;
1605
- $len = strspn($this->doc, $chars, $pos);
1606
- $this->pos += $len;
1607
- $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1608
- if ($len === 0) return '';
1609
- return substr($this->doc, $pos, $len);
1610
- }
1611
-
1612
- protected function copy_until($chars)
1613
- {
1614
- $pos = $this->pos;
1615
- $len = strcspn($this->doc, $chars, $pos);
1616
- $this->pos += $len;
1617
- $this->char = ($this->pos < $this->size) ? $this->doc[$this->pos] : null; // next
1618
- return substr($this->doc, $pos, $len);
1619
- }
1620
-
1621
- protected function copy_until_char($char)
1622
- {
1623
- if ($this->char === null) return '';
1624
-
1625
- if (($pos = strpos($this->doc, $char, $this->pos)) === false) {
1626
- $ret = substr($this->doc, $this->pos, $this->size - $this->pos);
1627
- $this->char = null;
1628
- $this->pos = $this->size;
1629
- return $ret;
1630
- }
1631
-
1632
- if ($pos === $this->pos) return '';
1633
- $pos_old = $this->pos;
1634
- $this->char = $this->doc[$pos];
1635
- $this->pos = $pos;
1636
- return substr($this->doc, $pos_old, $pos - $pos_old);
1637
- }
1638
-
1639
- protected function copy_until_char_escape($char)
1640
- {
1641
- if ($this->char === null) return '';
1642
-
1643
- $start = $this->pos;
1644
- while (1) {
1645
- if (($pos = strpos($this->doc, $char, $start)) === false) {
1646
- $ret = substr($this->doc, $this->pos, $this->size - $this->pos);
1647
- $this->char = null;
1648
- $this->pos = $this->size;
1649
- return $ret;
1650
- }
1651
-
1652
- if ($pos === $this->pos) return '';
1653
-
1654
- if ($this->doc[$pos - 1] === '\\') {
1655
- $start = $pos + 1;
1656
- continue;
1657
- }
1658
-
1659
- $pos_old = $this->pos;
1660
- $this->char = $this->doc[$pos];
1661
- $this->pos = $pos;
1662
- return substr($this->doc, $pos_old, $pos - $pos_old);
1663
- }
1664
- }
1665
-
1666
- // remove noise from html content
1667
- // save the noise in the $this->noise array.
1668
- protected function remove_noise($pattern, $remove_tag = false)
1669
- {
1670
- global $debugObject;
1671
- if (is_object($debugObject)) {
1672
- $debugObject->debugLogEntry(1);
1673
- }
1674
-
1675
- $count = preg_match_all($pattern, $this->doc, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
1676
-
1677
- for ($i = $count - 1; $i > -1; --$i) {
1678
- $key = '___noise___' . sprintf('% 5d', count($this->noise) + 1000);
1679
- if (is_object($debugObject)) {
1680
- $debugObject->debugLog(2, 'key is: ' . $key);
1681
- }
1682
- $idx = ($remove_tag) ? 0 : 1;
1683
- $this->noise[$key] = $matches[$i][$idx][0];
1684
- $this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
1685
- }
1686
-
1687
- // reset the length of content
1688
- $this->size = strlen($this->doc);
1689
- if ($this->size > 0) {
1690
- $this->char = $this->doc[0];
1691
- }
1692
- }
1693
-
1694
- // restore noise to html content
1695
- function restore_noise($text)
1696
- {
1697
- global $debugObject;
1698
- if (is_object($debugObject)) {
1699
- $debugObject->debugLogEntry(1);
1700
- }
1701
-
1702
- while (($pos = strpos($text, '___noise___')) !== false) {
1703
- // Sometimes there is a broken piece of markup, and we don't GET the pos+11 etc... token which indicates a problem outside of us...
1704
- if (strlen($text) > $pos + 15) {
1705
- $key = '___noise___' . $text[$pos + 11] . $text[$pos + 12] . $text[$pos + 13] . $text[$pos + 14] . $text[$pos + 15];
1706
- if (is_object($debugObject)) {
1707
- $debugObject->debugLog(2, 'located key of: ' . $key);
1708
- }
1709
-
1710
- if (isset($this->noise[$key])) {
1711
- $text = substr($text, 0, $pos) . $this->noise[$key] . substr($text, $pos + 16);
1712
- } else {
1713
- // do this to prevent an infinite loop.
1714
- $text = substr($text, 0, $pos) . 'UNDEFINED NOISE FOR KEY: ' . $key . substr($text, $pos + 16);
1715
- }
1716
- } else {
1717
- // There is no valid key being given back to us... We must get rid of the ___noise___ or we will have a problem.
1718
- $text = substr($text, 0, $pos) . 'NO NUMERIC NOISE KEY' . substr($text, $pos + 11);
1719
- }
1720
- }
1721
- return $text;
1722
- }
1723
-
1724
- // Sometimes we NEED one of the noise elements.
1725
- function search_noise($text)
1726
- {
1727
- global $debugObject;
1728
- if (is_object($debugObject)) {
1729
- $debugObject->debugLogEntry(1);
1730
- }
1731
-
1732
- foreach ($this->noise as $noiseElement) {
1733
- if (strpos($noiseElement, $text) !== false) {
1734
- return $noiseElement;
1735
- }
1736
- }
1737
- }
1738
-
1739
- function __toString()
1740
- {
1741
- return $this->root->innertext();
1742
- }
1743
-
1744
- function __get($name)
1745
- {
1746
- switch ($name) {
1747
- case 'outertext':
1748
- return $this->root->innertext();
1749
- case 'innertext':
1750
- return $this->root->innertext();
1751
- case 'plaintext':
1752
- return $this->root->text();
1753
- case 'charset':
1754
- return $this->_charset;
1755
- case 'target_charset':
1756
- return $this->_target_charset;
1757
- }
1758
- }
1759
-
1760
- // camel naming conventions
1761
- function childNodes($idx = -1)
1762
- {
1763
- return $this->root->childNodes($idx);
1764
- }
1765
-
1766
- function firstChild()
1767
- {
1768
- return $this->root->first_child();
1769
- }
1770
-
1771
- function lastChild()
1772
- {
1773
- return $this->root->last_child();
1774
- }
1775
-
1776
- function createElement($name, $value = null)
1777
- {
1778
- return @trp_str_get_html("<$name>$value</$name>")->first_child();
1779
- }
1780
-
1781
- function createTextNode($value)
1782
- {
1783
- return @end(trp_str_get_html($value)->nodes);
1784
- }
1785
-
1786
- function getElementById($id)
1787
- {
1788
- return $this->find("#$id", 0);
1789
- }
1790
-
1791
- function getElementsById($id, $idx = null)
1792
- {
1793
- return $this->find("#$id", $idx);
1794
- }
1795
-
1796
- function getElementByTagName($name)
1797
- {
1798
- return $this->find($name, 0);
1799
- }
1800
-
1801
- function getElementsByTagName($name, $idx = -1)
1802
- {
1803
- return $this->find($name, $idx);
1804
- }
1805
-
1806
- function loadFile()
1807
- {
1808
- $args = func_get_args();
1809
- $this->load_file($args);
1810
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1811
  }
1812
 
1813
-
1814
  ?>
1
  <?php
2
  /**
3
  * Website: http://sourceforge.net/projects/simplehtmldom/
4
+ * Additional projects that may be used: http://sourceforge.net/projects/debugobject/
5
  * Acknowledge: Jose Solorzano (https://sourceforge.net/projects/php-html/)
6
  * Contributions by:
7
+ * Yousuke Kumakura (Attribute filters)
8
+ * Vadim Voituk (Negative indexes supports of "find" method)
9
+ * Antcs (Constructor with automatically load contents either text or file/url)
10
  *
11
  * all affected sections have comments starting with "PaperG"
12
  *
34
  * @author S.C. Chen <me578022@gmail.com>
35
  * @author John Schlick
36
  * @author Rus Carroll
37
+ * @version Rev. 1.7 (214)
38
  * @package PlaceLocalInclude
39
+ * @subpackage simple_html_dom
40
  */
41
 
42
  /**
43
  * All of the Defines for the classes below.
44
  * @author S.C. Chen <me578022@gmail.com>
45
  */
46
+
47
+ namespace TranslatePress;
48
+ /*
49
+ * TranslatePress modifications
50
+ *
51
+ * "TRP_" prefix was appended to all the define constants.
52
+ * namespace TranslatePress was added.
53
+ * MAX_FILE_SIZE has been modified from value 600000 to new value: define( 'TRP_MAX_FILE_SIZE', 100000000 );
54
+ *
55
+ */
56
+
57
  define('TRP_HDOM_TYPE_ELEMENT', 1);
58
  define('TRP_HDOM_TYPE_COMMENT', 2);
59
+ define('TRP_HDOM_TYPE_TEXT', 3);
60
+ define('TRP_HDOM_TYPE_ENDTAG', 4);
61
+ define('TRP_HDOM_TYPE_ROOT', 5);
62
  define('TRP_HDOM_TYPE_UNKNOWN', 6);
63
  define('TRP_HDOM_QUOTE_DOUBLE', 0);
64
  define('TRP_HDOM_QUOTE_SINGLE', 1);
65
+ define('TRP_HDOM_QUOTE_NO', 3);
66
+ define('TRP_HDOM_INFO_BEGIN', 0);
67
+ define('TRP_HDOM_INFO_END', 1);
68
+ define('TRP_HDOM_INFO_QUOTE', 2);
69
+ define('TRP_HDOM_INFO_SPACE', 3);
70
+ define('TRP_HDOM_INFO_TEXT', 4);
71
+ define('TRP_HDOM_INFO_INNER', 5);
72
+ define('TRP_HDOM_INFO_OUTER', 6);
73
+ define('TRP_HDOM_INFO_ENDSPACE',7);
74
  define('TRP_DEFAULT_TARGET_CHARSET', 'UTF-8');
75
  define('TRP_DEFAULT_BR_TEXT', "\r\n");
76
  define('TRP_DEFAULT_SPAN_TEXT', " ");
77
+ define( 'TRP_MAX_FILE_SIZE', 100000000 );
78
+
79
+ /** Contents between curly braces "{" and "}" are interpreted as text */
80
+ define('TRP_HDOM_SMARTY_AS_TEXT', 1);
81
+
82
  // helper functions
83
  // -----------------------------------------------------------------------------
84
  // get html dom from file
85
  // $maxlen is defined in the code as PHP_STREAM_COPY_ALL which is defined as -1.
86
+ function file_get_html($url, $use_include_path = false, $context=null, $offset = 0, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = TRP_DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=TRP_DEFAULT_BR_TEXT, $defaultSpanText=TRP_DEFAULT_SPAN_TEXT)
87
  {
88
+ // Ensure maximum length is greater than zero
89
+ if($maxLen <= 0) { $maxLen = TRP_MAX_FILE_SIZE; }
90
+
91
+ // We DO force the tags to be terminated.
92
+ $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
93
+ // For sourceforge users: uncomment the next line and comment the retrieve_url_contents line 2 lines down if it is not already done.
94
+ $contents = file_get_contents($url, $use_include_path, $context, $offset, $maxLen);
95
+ // Paperg - use our own mechanism for getting the contents as we want to control the timeout.
96
+ //$contents = retrieve_url_contents($url);
97
+ if (empty($contents) || strlen($contents) > $maxLen)
98
+ {
99
+ return false;
100
+ }
101
+ // The second parameter can force the selectors to all be lowercase.
102
+ $dom->load($contents, $lowercase, $stripRN);
103
+ return $dom;
104
  }
105
 
106
  // get html dom from string
107
+ function str_get_html($str, $lowercase=true, $forceTagsClosed=true, $target_charset = TRP_DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=TRP_DEFAULT_BR_TEXT, $defaultSpanText=TRP_DEFAULT_SPAN_TEXT)
108
  {
109
+ $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText);
110
+ if (empty($str) || strlen($str) > TRP_MAX_FILE_SIZE)
111
+ {
112
+ $dom->clear();
113
+ return false;
114
+ }
115
+ $dom->load($str, $lowercase, $stripRN);
116
+ return $dom;
117
  }
118
 
119
  // dump html dom tree
120
+ function dump_html_tree($node, $show_attr=true, $deep=0)
121
  {
122
+ $node->dump($node);
123
  }
124
 
125
 
130
  *
131
  * @package PlaceLocalInclude
132
  */
133
+ class simple_html_dom_node
134
  {
135
+ /**
136
+ * Node type
137
+ *
138
+ * Default is {@see TRP_HDOM_TYPE_TEXT}
139
+ *
140
+ * @var int
141
+ */
142
+ public $nodetype = TRP_HDOM_TYPE_TEXT;
143
+
144
+ /**
145
+ * Tag name
146
+ *
147
+ * Default is 'text'
148
+ *
149
+ * @var string
150
+ */
151
+ public $tag = 'text';
152
+
153
+ /**
154
+ * List of attributes
155
+ *
156
+ * @var array
157
+ */
158
+ public $attr = array();
159
+
160
+ /**
161
+ * List of child node objects
162
+ *
163
+ * @var array
164
+ */
165
+ public $children = array();
166
+ public $nodes = array();
167
+
168
+ /**
169
+ * The parent node object
170
+ *
171
+ * @var object|null
172
+ */
173
+ public $parent = null;
174
+
175
+ // The "info" array - see TRP_HDOM_INFO_... for what each element contains.
176
+ public $_ = array();
177
+
178
+ /**
179
+ * Start position of the tag in the document
180
+ *
181
+ * @var int
182
+ */
183
+ public $tag_start = 0;
184
+
185
+ /**
186
+ * The DOM object
187
+ *
188
+ * @var object|null
189
+ */
190
+ private $dom = null;
191
+
192
+ /**
193
+ * Construct new node object
194
+ *
195
+ * Adds itself to the list of DOM Nodes {@see simple_html_dom::$nodes}
196
+ */
197
+ function __construct($dom)
198
+ {
199
+ $this->dom = $dom;
200
+ $dom->nodes[] = $this;
201
+ }
202
+
203
+ function __destruct()
204
+ {
205
+ $this->clear();
206
+ }
207
+
208
+ function __toString()
209
+ {
210
+ return $this->outertext();
211
+ }
212
+
213
+ // clean up memory due to php5 circular references memory leak...
214
+ function clear()
215
+ {
216
+ $this->dom = null;
217
+ $this->nodes = null;
218
+ $this->parent = null;
219
+ $this->children = null;
220
+ }
221
+
222
+ // dump node's tree
223
+ function dump($show_attr=true, $deep=0)
224
+ {
225
+ $lead = str_repeat(' ', $deep);
226
+
227
+ echo $lead.$this->tag;
228
+ if ($show_attr && count($this->attr)>0)
229
+ {
230
+ echo '(';
231
+ foreach ($this->attr as $k=>$v)
232
+ echo "[$k]=>\"".$this->$k.'", ';
233
+ echo ')';
234
+ }
235
+ echo "\n";
236
+
237
+ if ($this->nodes)
238
+ {
239
+ foreach ($this->nodes as $c)
240
+ {
241
+ $c->dump($show_attr, $deep+1);
242
+ }
243
+ }
244
+ }
245
+
246
+
247
+ // Debugging function to dump a single dom node with a bunch of information about it.
248
+ function dump_node($echo=true)
249
+ {
250
+
251
+ $string = $this->tag;
252
+ if (count($this->attr)>0)
253
+ {
254
+ $string .= '(';
255
+ foreach ($this->attr as $k=>$v)
256
+ {
257
+ $string .= "[$k]=>\"".$this->$k.'", ';
258
+ }
259
+ $string .= ')';
260
+ }
261
+ if (count($this->_)>0)
262
+ {
263
+ $string .= ' $_ (';
264
+ foreach ($this->_ as $k=>$v)
265
+ {
266
+ if (is_array($v))
267
+ {
268
+ $string .= "[$k]=>(";
269
+ foreach ($v as $k2=>$v2)
270
+ {
271
+ $string .= "[$k2]=>\"".$v2.'", ';
272
+ }
273
+ $string .= ")";
274
+ } else {
275
+ $string .= "[$k]=>\"".$v.'", ';
276
+ }
277
+ }
278
+ $string .= ")";
279
+ }
280
+
281
+ if (isset($this->text))
282
+ {
283
+ $string .= " text: (" . $this->text . ")";
284
+ }
285
+
286
+ $string .= " TRP_HDOM_INNER_INFO: '";
287
+ if (isset($node->_[TRP_HDOM_INFO_INNER]))
288
+ {
289
+ $string .= $node->_[TRP_HDOM_INFO_INNER] . "'";
290
+ }
291
+ else
292
+ {
293
+ $string .= ' NULL ';
294
+ }
295
+
296
+ $string .= " children: " . count($this->children);
297
+ $string .= " nodes: " . count($this->nodes);
298
+ $string .= " tag_start: " . $this->tag_start;
299
+ $string .= "\n";
300
+
301
+ if ($echo)
302
+ {
303
+ echo $string;
304
+ return;
305
+ }
306
+ else
307
+ {
308
+ return $string;
309
+ }
310
+ }
311
+
312
+ /**
313
+ * Return or set parent node
314
+ *
315
+ * @param object|null $parent (optional) The parent node, `null` to return
316
+ * the current parent node.
317
+ * @return object|null The parent node
318
+ */
319
+ function parent($parent=null)
320
+ {
321
+ // I am SURE that this doesn't work properly.
322
+ // It fails to unset the current node from it's current parents nodes or children list first.
323
+ if ($parent !== null)
324
+ {
325
+ $this->parent = $parent;
326
+ $this->parent->nodes[] = $this;
327
+ $this->parent->children[] = $this;
328
+ }
329
+
330
+ return $this->parent;
331
+ }
332
+
333
+ /**
334
+ * @return bool True if the node has at least one child node
335
+ */
336
+ function has_child()
337
+ {
338
+ return !empty($this->children);
339
+ }
340
+
341
+ /**
342
+ * Get child node at specified index
343
+ *
344
+ * @param int $idx The index of the child node to return, `-1` to return all
345
+ * child nodes.
346
+ * @return object|array|null The child node at the specified index, all child
347
+ * nodes or null if the index is invalid.
348
+ */
349
+ function children($idx=-1)
350
+ {
351
+ if ($idx===-1)
352
+ {
353
+ return $this->children;
354
+ }
355
+ if (isset($this->children[$idx]))
356
+ {
357
+ return $this->children[$idx];
358
+ }
359
+ return null;
360
+ }
361
+
362
+ /**
363
+ * Get first child node
364
+ *
365
+ * @return object|null The first child node or null if the current node has
366
+ * no child nodes.
367
+ *
368
+ * @todo Use `empty()` instead of `count()` to improve performance on large
369
+ * arrays.
370
+ */
371
+ function first_child()
372
+ {
373
+ if (count($this->children)>0)
374
+ {
375
+ return $this->children[0];
376
+ }
377
+ return null;
378
+ }
379
+
380
+ /**
381
+ * Get last child node
382
+ *
383
+ * @return object|null The last child node or null if the current node has
384
+ * no child nodes.
385
+ *
386
+ * @todo Use `end()` to slightly improve performance on large arrays.
387
+ */
388
+ function last_child()
389
+ {
390
+ if (($count=count($this->children))>0)
391
+ {
392
+ return $this->children[$count-1];
393
+ }
394
+ return null;
395
+ }
396
+
397
+ /**
398
+ * Get next sibling node
399
+ *
400
+ * @return object|null The sibling node or null if the current node has no
401
+ * sibling nodes.
402
+ */
403
+ function next_sibling()
404
+ {
405
+ if ($this->parent===null)
406
+ {
407
+ return null;
408
+ }
409
+
410
+ $idx = 0;
411
+ $count = count($this->parent->children);
412
+ while ($idx<$count && $this!==$this->parent->children[$idx])
413
+ {
414
+ ++$idx;
415
+ }
416
+ if (++$idx>=$count)
417
+ {
418
+ return null;
419
+ }
420
+ return $this->parent->children[$idx];
421
+ }
422
+
423
+ /**
424
+ * Get previous sibling node
425
+ *
426
+ * @return object|null The sibling node or null if the current node has no
427
+ * sibling nodes.
428
+ */
429
+ function prev_sibling()
430
+ {
431
+ if ($this->parent===null) return null;
432
+ $idx = 0;
433
+ $count = count($this->parent->children);
434
+ while ($idx<$count && $this!==$this->parent->children[$idx])
435
+ ++$idx;
436
+ if (--$idx<0) return null;
437
+ return $this->parent->children[$idx];
438
+ }
439
+
440
+ /**
441
+ * Traverse ancestors to the first matching tag.
442
+ *
443
+ * @param string $tag Tag to find
444
+ * @return object|null First matching node in the DOM tree or null if no
445
+ * match was found.
446
+ *
447
+ * @todo Null is returned implicitly by calling ->parent on the root node.
448
+ * This behaviour could change at any time, rendering this function invalid.
449
+ */
450
+ function find_ancestor_tag($tag)
451
+ {
452
+ global $debug_object;
453
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
454
+
455
+ // Start by including ourselves in the comparison.
456
+ $returnDom = $this;
457
+
458
+ while (!is_null($returnDom))
459
+ {
460
+ if (is_object($debug_object)) { $debug_object->debug_log(2, "Current tag is: " . $returnDom->tag); }
461
+
462
+ if ($returnDom->tag == $tag)
463
+ {
464
+ break;
465
+ }
466
+ $returnDom = $returnDom->parent;
467
+ }
468
+ return $returnDom;
469
+ }
470
+
471
+ /**
472
+ * Get node's inner text (everything inside the opening and closing tags)
473
+ *
474
+ * @return string
475
+ */
476
+ function innertext()
477
+ {
478
+ if (isset($this->_[TRP_HDOM_INFO_INNER])) return $this->_[TRP_HDOM_INFO_INNER];
479
+ if (isset($this->_[TRP_HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[TRP_HDOM_INFO_TEXT]);
480
+
481
+ $ret = '';
482
+ foreach ($this->nodes as $n)
483
+ $ret .= $n->outertext();
484
+ return $ret;
485
+ }
486
+
487
+ /**
488
+ * Get node's outer text (everything including the opening and closing tags)
489
+ *
490
+ * @return string
491
+ */
492
+ function outertext()
493
+ {
494
+ global $debug_object;
495
+ if (is_object($debug_object))
496
+ {
497
+ $text = '';
498
+ if ($this->tag == 'text')
499
+ {
500
+ if (!empty($this->text))
501
+ {
502
+ $text = " with text: " . $this->text;
503
+ }
504
+ }
505
+ $debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text);
506
+ }
507
+
508
+ if ($this->tag==='root') return $this->innertext();
509
+
510
+ // trigger callback
511
+ if ($this->dom && $this->dom->callback!==null)
512
+ {
513
+ call_user_func_array($this->dom->callback, array($this));
514
+ }
515
+
516
+ if (isset($this->_[TRP_HDOM_INFO_OUTER])) return $this->_[TRP_HDOM_INFO_OUTER];
517
+ if (isset($this->_[TRP_HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[TRP_HDOM_INFO_TEXT]);
518
+
519
+ // render begin tag
520
+ if ($this->dom && $this->dom->nodes[$this->_[TRP_HDOM_INFO_BEGIN]])
521
+ {
522
+ $ret = $this->dom->nodes[$this->_[TRP_HDOM_INFO_BEGIN]]->makeup();
523
+ } else {
524
+ $ret = "";
525
+ }
526
+
527
+ // render inner text
528
+ if (isset($this->_[TRP_HDOM_INFO_INNER]))
529
+ {
530
+ // If it's a br tag... don't return the TRP_HDOM_INNER_INFO that we may or may not have added.
531
+ if ($this->tag != "br")
532
+ {
533
+ $ret .= $this->_[TRP_HDOM_INFO_INNER];
534
+ }
535
+ } else {
536
+ if ($this->nodes)
537
+ {
538
+ foreach ($this->nodes as $n)
539
+ {
540
+ $ret .= $this->convert_text($n->outertext());
541
+ }
542
+ }
543
+ }
544
+
545
+ // render end tag
546
+ if (isset($this->_[TRP_HDOM_INFO_END]) && $this->_[TRP_HDOM_INFO_END]!=0)
547
+ $ret .= '</'.$this->tag.'>';
548
+ return $ret;
549
+ }
550
+
551
+ /**
552
+ * Get node's plain text (everything excluding all tags)
553
+ *
554
+ * @return string
555
+ */
556
+ function text()
557
+ {
558
+ if (isset($this->_[TRP_HDOM_INFO_INNER])) return $this->_[TRP_HDOM_INFO_INNER];
559
+ switch ($this->nodetype)
560
+ {
561
+ case TRP_HDOM_TYPE_TEXT: return $this->dom->restore_noise($this->_[TRP_HDOM_INFO_TEXT]);
562
+ case TRP_HDOM_TYPE_COMMENT: return '';
563
+ case TRP_HDOM_TYPE_UNKNOWN: return '';
564
+ }
565
+ if (strcasecmp($this->tag, 'script')===0) return '';
566
+ if (strcasecmp($this->tag, 'style')===0) return '';
567
+
568
+ $ret = '';
569
+ // In rare cases, (always node type 1 or TRP_HDOM_TYPE_ELEMENT - observed for some span tags, and some p tags) $this->nodes is set to NULL.
570
+ // NOTE: This indicates that there is a problem where it's set to NULL without a clear happening.
571
+ // WHY is this happening?
572
+ if (!is_null($this->nodes))
573
+ {
574
+ foreach ($this->nodes as $n)
575
+ {
576
+ // Start paragraph after a blank line
577
+ if ($n->tag == 'p')
578
+ {
579
+ $ret .= "\n\n";
580
+ }
581
+
582
+ $ret .= $this->convert_text($n->text());
583
+
584
+ // If this node is a span... add a space at the end of it so multiple spans don't run into each other. This is plaintext after all.
585
+ if ($n->tag == "span")
586
+ {
587
+ $ret .= $this->dom->default_span_text;
588
+ }
589
+ }
590
+ }
591
+ return trim($ret);
592
+ }
593
+
594
+ /**
595
+ * Get node's xml text (inner text as a CDATA section)
596
+ *
597
+ * @return string
598
+ */
599
+ function xmltext()
600
+ {
601
+ $ret = $this->innertext();
602
+ $ret = str_ireplace('<![CDATA[', '', $ret);
603
+ $ret = str_replace(']]>', '', $ret);
604
+ return $ret;
605
+ }
606
+
607
+ // build node's text with tag
608
+ function makeup()
609
+ {
610
+ // text, comment, unknown
611
+ if (isset($this->_[TRP_HDOM_INFO_TEXT])) return $this->dom->restore_noise($this->_[TRP_HDOM_INFO_TEXT]);
612
+
613
+ $ret = '<'.$this->tag;
614
+ $i = -1;
615
+
616
+ foreach ($this->attr as $key=>$val)
617
+ {
618
+ ++$i;
619
+
620
+ // skip removed attribute
621
+ if ($val===null || $val===false)
622
+ continue;
623
+
624
+ $ret .= $this->_[TRP_HDOM_INFO_SPACE][$i][0];
625
+ //no value attr: nowrap, checked selected...
626
+ if ($val===true)
627
+ $ret .= $key;
628
+ else {
629
+ switch ($this->_[TRP_HDOM_INFO_QUOTE][$i])
630
+ {
631
+ case TRP_HDOM_QUOTE_DOUBLE: $quote = '"'; break;
632
+ case TRP_HDOM_QUOTE_SINGLE: $quote = '\''; break;
633
+ default: $quote = '';
634
+ }
635
+ $ret .= $key.$this->_[TRP_HDOM_INFO_SPACE][$i][1].'='.$this->_[TRP_HDOM_INFO_SPACE][$i][2].$quote.$val.$quote;
636
+ }
637
+ }
638
+ $ret = $this->dom->restore_noise($ret);
639
+ return $ret . $this->_[TRP_HDOM_INFO_ENDSPACE] . '>';
640
+ }
641
+
642
+ // find elements by css selector
643
+ //PaperG - added ability for find to lowercase the value of the selector.
644
+ function find($selector, $idx=null, $lowercase=false)
645
+ {
646
+ $selectors = $this->parse_selector($selector);
647
+ if (($count=count($selectors))===0) return array();
648
+ $found_keys = array();
649
+
650
+ // find each selector
651
+ for ($c=0; $c<$count; ++$c)
652
+ {
653
+ // The change on the below line was documented on the sourceforge code tracker id 2788009
654
+ // used to be: if (($levle=count($selectors[0]))===0) return array();
655
+ if (($levle=count($selectors[$c]))===0) return array();
656
+ if (!isset($this->_[TRP_HDOM_INFO_BEGIN])) return array();
657
+
658
+ $head = array($this->_[TRP_HDOM_INFO_BEGIN]=>1);
659
+
660
+ // handle descendant selectors, no recursive!
661
+ for ($l=0; $l<$levle; ++$l)
662
+ {
663
+ $ret = array();
664
+ foreach ($head as $k=>$v)
665
+ {
666
+ $n = ($k===-1) ? $this->dom->root : $this->dom->nodes[$k];
667
+ //PaperG - Pass this optional parameter on to the seek function.
668
+ $n->seek($selectors[$c][$l], $ret, $lowercase);
669
+ }
670
+ $head = $ret;
671
+ }
672
+
673
+ foreach ($head as $k=>$v)
674
+ {
675
+ if (!isset($found_keys[$k]))
676
+ {
677
+ $found_keys[$k] = 1;
678
+ }
679
+ }
680
+ }
681
+
682
+ // sort keys
683
+ ksort($found_keys);
684
+
685
+ $found = array();
686
+ foreach ($found_keys as $k=>$v)
687
+ $found[] = $this->dom->nodes[$k];
688
+
689
+ // return nth-element or array
690
+ if (is_null($idx)) return $found;
691
+ else if ($idx<0) $idx = count($found) + $idx;
692
+ return (isset($found[$idx])) ? $found[$idx] : null;
693
+ }
694
+
695
+ // seek for given conditions
696
+ // PaperG - added parameter to allow for case insensitive testing of the value of a selector.
697
+ protected function seek($selector, &$ret, $lowercase=false)
698
+ {
699
+ global $debug_object;
700
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
701
+
702
+ list($tag, $key, $val, $exp, $no_key) = $selector;
703
+
704
+ // xpath index
705
+ if ($tag && $key && is_numeric($key))
706
+ {
707
+ $count = 0;
708
+ foreach ($this->children as $c)
709
+ {
710
+ if ($tag==='*' || $tag===$c->tag) {
711
+ if (++$count==$key) {
712
+ $ret[$c->_[TRP_HDOM_INFO_BEGIN]] = 1;
713
+ return;
714
+ }
715
+ }
716
+ }
717
+ return;
718
+ }
719
+
720
+ $end = (!empty($this->_[TRP_HDOM_INFO_END])) ? $this->_[TRP_HDOM_INFO_END] : 0;
721
+ if ($end==0) {
722
+ $parent = $this->parent;
723
+ while (!isset($parent->_[TRP_HDOM_INFO_END]) && $parent!==null) {
724
+ $end -= 1;
725
+ $parent = $parent->parent;
726
+ }
727
+ $end += $parent->_[TRP_HDOM_INFO_END];
728
+ }
729
+
730
+ for ($i=$this->_[TRP_HDOM_INFO_BEGIN]+1; $i<$end; ++$i) {
731
+ $node = $this->dom->nodes[$i];
732
+
733
+ $pass = true;
734
+
735
+ if ($tag==='*' && !$key) {
736
+ if (in_array($node, $this->children, true))
737
+ $ret[$i] = 1;
738
+ continue;
739
+ }
740
+
741
+ // compare tag
742
+ if ($tag && $tag!=$node->tag && $tag!=='*') {$pass=false;}
743
+ // compare key
744
+ if ($pass && $key) {
745
+ if ($no_key) {
746
+ if (isset($node->attr[$key])) $pass=false;
747
+ } else {
748
+ if (($key != "plaintext") && !isset($node->attr[$key])) $pass=false;
749
+ }
750
+ }
751
+ // compare value
752
+ if ($pass && $key && $val && $val!=='*') {
753
+ // If they have told us that this is a "plaintext" search then we want the plaintext of the node - right?
754
+ if ($key == "plaintext") {
755
+ // $node->plaintext actually returns $node->text();
756
+ $nodeKeyValue = $node->text();
757
+ } else {
758
+ // this is a normal search, we want the value of that attribute of the tag.
759
+ $nodeKeyValue = $node->attr[$key];
760
+ }
761
+ if (is_object($debug_object)) {$debug_object->debug_log(2, "testing node: " . $node->tag . " for attribute: " . $key . $exp . $val . " where nodes value is: " . $nodeKeyValue);}
762
+
763
+ //PaperG - If lowercase is set, do a case insensitive test of the value of the selector.
764
+ if ($lowercase) {
765
+ $check = $this->match($exp, strtolower($val), strtolower($nodeKeyValue));
766
+ } else {
767
+ $check = $this->match($exp, $val, $nodeKeyValue);
768
+ }
769
+ if (is_object($debug_object)) {$debug_object->debug_log(2, "after match: " . ($check ? "true" : "false"));}
770
+
771
+ // handle multiple class
772
+ if (!$check && strcasecmp($key, 'class')===0) {
773
+ foreach (explode(' ',$node->attr[$key]) as $k) {
774
+ // Without this, there were cases where leading, trailing, or double spaces lead to our comparing blanks - bad form.
775
+ if (!empty($k)) {
776
+ if ($lowercase) {
777
+ $check = $this->match($exp, strtolower($val), strtolower($k));
778
+ } else {
779
+ $check = $this->match($exp, $val, $k);
780
+ }
781
+ if ($check) break;
782
+ }
783
+ }
784
+ }
785
+ if (!$check) $pass = false;
786
+ }
787
+ if ($pass) $ret[$i] = 1;
788
+ unset($node);
789
+ }
790
+ // It's passed by reference so this is actually what this function returns.
791
+ if (is_object($debug_object)) {$debug_object->debug_log(1, "EXIT - ret: ", $ret);}
792
+ }
793
+
794
+ protected function match($exp, $pattern, $value) {
795
+ global $debug_object;
796
+ if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
797
+
798
+ switch ($exp) {
799
+ case '=':
800
+ return ($value===$pattern);
801
+ case '!=':
802
+ return ($value!==$pattern);
803
+ case '^=':
804
+ return preg_match("/^".preg_quote($pattern,'/')."/", $value);
805
+ case '$=':
806
+ return preg_match("/".preg_quote($pattern,'/')."$/", $value);
807
+ case '*=':
808
+ if ($pattern[0]=='/') {
809
+ return preg_match($pattern, $value);
810
+ }
811
+ return preg_match("/".$pattern."/i", $value);
812
+ }
813
+ return false;
814
+ }
815
+
816
+ protected function parse_selector($selector_string) {
817
+ global $debug_object;
818
+ if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
819
+
820
+ // pattern of CSS selectors, modified from mootools
821
+ // Paperg: Add the colon to the attrbute, so that it properly finds <tag attr:ibute="something" > like google does.
822
+ // Note: if you try to look at this attribute, yo MUST use getAttribute since $dom->x:y will fail the php syntax check.
823
  // Notice the \[ starting the attbute? and the @? following? This implies that an attribute can begin with an @ sign that is not captured.
824
  // This implies that an html attribute specifier may start with an @ sign that is NOT captured by the expression.
825
  // farther study is required to determine of this should be documented or removed.
826
+ // $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
827
+ $pattern = "/([\w:\*-]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w:-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
828
+ preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER);
829
+ if (is_object($debug_object)) {$debug_object->debug_log(2, "Matches Array: ", $matches);}
830
+
831
+ $selectors = array();
832
+ $result = array();
833
+ //print_r($matches);
834
+
835
+ foreach ($matches as $m) {
836
+ $m[0] = trim($m[0]);
837
+ if ($m[0]==='' || $m[0]==='/' || $m[0]==='//') continue;
838
+ // for browser generated xpath
839
+ if ($m[1]==='tbody') continue;
840
+
841
+ list($tag, $key, $val, $exp, $no_key) = array($m[1], null, null, '=', false);
842
+ if (!empty($m[2])) {$key='id'; $val=$m[2];}
843
+ if (!empty($m[3])) {$key='class'; $val=$m[3];}
844
+ if (!empty($m[4])) {$key=$m[4];}
845
+ if (!empty($m[5])) {$exp=$m[5];}
846
+ if (!empty($m[6])) {$val=$m[6];}
847
+
848
+ // convert to lowercase
849
+ if ($this->dom->lowercase) {$tag=strtolower($tag); $key=strtolower($key);}
850
+ //elements that do NOT have the specified attribute
851
+ if (isset($key[0]) && $key[0]==='!') {$key=substr($key, 1); $no_key=true;}
852
+
853
+ $result[] = array($tag, $key, $val, $exp, $no_key);
854
+ if (trim($m[7])===',') {
855
+ $selectors[] = $result;
856
+ $result = array();
857
+ }
858
+ }
859
+ if (count($result)>0)
860
+ $selectors[] = $result;
861
+ return $selectors;
862
+ }
863
+
864
+ function __get($name)
865
+ {
866
+ if (isset($this->attr[$name]))
867
+ {
868
+ return $this->convert_text($this->attr[$name]);
869
+ }
870
+ switch ($name)
871
+ {
872
+ case 'outertext': return $this->outertext();
873
+ case 'innertext': return $this->innertext();
874
+ case 'plaintext': return $this->text();
875
+ case 'xmltext': return $this->xmltext();
876
+ default: return array_key_exists($name, $this->attr);
877
+ }
878
+ }
879
+
880
+ function __set($name, $value)
881
+ {
882
+ global $debug_object;
883
+ if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
884
+
885
+ switch ($name)
886
+ {
887
+ case 'outertext': return $this->_[TRP_HDOM_INFO_OUTER] = $value;
888
+ case 'innertext':
889
+ if (isset($this->_[TRP_HDOM_INFO_TEXT])) return $this->_[TRP_HDOM_INFO_TEXT] = $value;
890
+ return $this->_[TRP_HDOM_INFO_INNER] = $value;
891
+ }
892
+ if (!isset($this->attr[$name]))
893
+ {
894
+ $this->_[TRP_HDOM_INFO_SPACE][] = array(' ', '', '');
895
+ $this->_[TRP_HDOM_INFO_QUOTE][] = TRP_HDOM_QUOTE_DOUBLE;
896
+ }
897
+ $this->attr[$name] = $value;
898
+ }
899
+
900
+ function __isset($name)
901
+ {
902
+ switch ($name)
903
+ {
904
+ case 'outertext': return true;
905
+ case 'innertext': return true;
906
+ case 'plaintext': return true;
907
+ }
908
+ //no value attr: nowrap, checked selected...
909
+ return (array_key_exists($name, $this->attr)) ? true : isset($this->attr[$name]);
910
+ }
911
+
912
+ function __unset($name) {
913
+ if (isset($this->attr[$name]))
914
+ unset($this->attr[$name]);
915
+ }
916
+
917
+ // PaperG - Function to convert the text from one character set to another if the two sets are not the same.
918
+ function convert_text($text)
919
+ {
920
+ global $debug_object;
921
+ if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
922
+
923
+ $converted_text = $text;
924
+
925
+ $sourceCharset = "";
926
+ $targetCharset = "";
927
+
928
+ if ($this->dom)
929
+ {
930
+ $sourceCharset = strtoupper($this->dom->_charset);
931
+ $targetCharset = strtoupper($this->dom->_target_charset);
932
+ }
933
+ if (is_object($debug_object)) {$debug_object->debug_log(3, "source charset: " . $sourceCharset . " target charaset: " . $targetCharset);}
934
+
935
+ if (!empty($sourceCharset) && !empty($targetCharset) && (strcasecmp($sourceCharset, $targetCharset) != 0))
936
+ {
937
+ // Check if the reported encoding could have been incorrect and the text is actually already UTF-8
938
+ if ((strcasecmp($targetCharset, 'UTF-8') == 0) && ($this->is_utf8($text)))
939
+ {
940
+ $converted_text = $text;
941
+ }
942
+ else
943
+ {
944
+ $converted_text = iconv($sourceCharset, $targetCharset, $text);
945
+ }
946
+ }
947
+
948
+ // Lets make sure that we don't have that silly BOM issue with any of the utf-8 text we output.
949
+ if ($targetCharset == 'UTF-8')
950
+ {
951
+ if (substr($converted_text, 0, 3) == "\xef\xbb\xbf")
952
+ {
953
+ $converted_text = substr($converted_text, 3);
954
+ }
955
+ if (substr($converted_text, -3) == "\xef\xbb\xbf")
956
+ {
957
+ $converted_text = substr($converted_text, 0, -3);
958
+ }
959
+ }
960
+
961
+ return $converted_text;
962
+ }
963
+
964
+ /**
965
+ * Returns true if $string is valid UTF-8 and false otherwise.
966
+ *
967
+ * @param mixed $str String to be tested
968
+ * @return boolean
969
+ */
970
+ static function is_utf8($str)
971
+ {
972
+ $c=0; $b=0;
973
+ $bits=0;
974
+ $len=strlen($str);
975
+ for($i=0; $i<$len; $i++)
976
+ {
977
+ $c=ord($str[$i]);
978
+ if($c > 128)
979
+ {
980
+ if(($c >= 254)) return false;
981
+ elseif($c >= 252) $bits=6;
982
+ elseif($c >= 248) $bits=5;
983
+ elseif($c >= 240) $bits=4;
984
+ elseif($c >= 224) $bits=3;
985
+ elseif($c >= 192) $bits=2;
986
+ else return false;
987
+ if(($i+$bits) > $len) return false;
988
+ while($bits > 1)
989
+ {
990
+ $i++;
991
+ $b=ord($str[$i]);
992
+ if($b < 128 || $b > 191) return false;
993
+ $bits--;
994
+ }
995
+ }
996
+ }
997
+ return true;
998
+ }
999
+ /*
1000
+ function is_utf8($string)
1001
+ {
1002
+ //this is buggy
1003
+ return (utf8_encode(utf8_decode($string)) == $string);
1004
+ }
1005
+ */
1006
+
1007
+ /**
1008
+ * Function to try a few tricks to determine the displayed size of an img on the page.
1009
+ * NOTE: This will ONLY work on an IMG tag. Returns FALSE on all other tag types.
1010
+ *
1011
+ * @author John Schlick
1012
+ * @version April 19 2012
1013
+ * @return array an array containing the 'height' and 'width' of the image on the page or -1 if we can't figure it out.
1014
+ */
1015
+ function get_display_size()
1016
+ {
1017
+ global $debug_object;
1018
+
1019
+ $width = -1;
1020
+ $height = -1;
1021
+
1022
+ if ($this->tag !== 'img')
1023
+ {
1024
+ return false;
1025
+ }
1026
+
1027
+ // See if there is aheight or width attribute in the tag itself.
1028
+ if (isset($this->attr['width']))
1029
+ {
1030
+ $width = $this->attr['width'];
1031
+ }
1032
+
1033
+ if (isset($this->attr['height']))
1034
+ {
1035
+ $height = $this->attr['height'];
1036
+ }
1037
+
1038
+ // Now look for an inline style.
1039
+ if (isset($this->attr['style']))
1040
+ {
1041
+ // Thanks to user gnarf from stackoverflow for this regular expression.
1042
+ $attributes = array();
1043
+ preg_match_all("/([\w-]+)\s*:\s*([^;]+)\s*;?/", $this->attr['style'], $matches, PREG_SET_ORDER);
1044
+ foreach ($matches as $match) {
1045
+ $attributes[$match[1]] = $match[2];
1046
+ }
1047
+
1048
+ // If there is a width in the style attributes:
1049
+ if (isset($attributes['width']) && $width == -1)
1050
+ {
1051
+ // check that the last two characters are px (pixels)
1052
+ if (strtolower(substr($attributes['width'], -2)) == 'px')
1053
+ {
1054
+ $proposed_width = substr($attributes['width'], 0, -2);
1055
+ // Now make sure that it's an integer and not something stupid.
1056
+ if (filter_var($proposed_width, FILTER_VALIDATE_INT))
1057
+ {
1058
+ $width = $proposed_width;
1059
+ }
1060
+ }
1061
+ }
1062
+
1063
+ // If there is a width in the style attributes:
1064
+ if (isset($attributes['height']) && $height == -1)
1065
+ {
1066
+ // check that the last two characters are px (pixels)
1067
+ if (strtolower(substr($attributes['height'], -2)) == 'px')
1068
+ {
1069
+ $proposed_height = substr($attributes['height'], 0, -2);
1070
+ // Now make sure that it's an integer and not something stupid.
1071
+ if (filter_var($proposed_height, FILTER_VALIDATE_INT))
1072
+ {
1073
+ $height = $proposed_height;
1074
+ }
1075
+ }
1076
+ }
1077
+
1078
+ }
1079
+
1080
+ // Future enhancement:
1081
+ // Look in the tag to see if there is a class or id specified that has a height or width attribute to it.
1082
+
1083
+ // Far future enhancement
1084
+ // Look at all the parent tags of this image to see if they specify a class or id that has an img selector that specifies a height or width
1085
+ // Note that in this case, the class or id will have the img subselector for it to apply to the image.
1086
+
1087
+ // ridiculously far future development
1088
+ // If the class or id is specified in a SEPARATE css file thats not on the page, go get it and do what we were just doing for the ones on the page.
1089
+
1090
+ $result = array('height' => $height,
1091
+ 'width' => $width);
1092
+ return $result;
1093
+ }
1094
+
1095
+ // camel naming conventions
1096
+ function getAllAttributes() {return $this->attr;}
1097
+ function getAttribute($name) {return $this->__get($name);}
1098
+ function setAttribute($name, $value) {$this->__set($name, $value);}
1099
+ function hasAttribute($name) {return $this->__isset($name);}
1100
+ function removeAttribute($name) {$this->__set($name, null);}
1101
+ function getElementById($id) {return $this->find("#$id", 0);}
1102
+ function getElementsById($id, $idx=null) {return $this->find("#$id", $idx);}
1103
+ function getElementByTagName($name) {return $this->find($name, 0);}
1104
+ function getElementsByTagName($name, $idx=null) {return $this->find($name, $idx);}
1105
+ function parentNode() {return $this->parent();}
1106
+ function childNodes($idx=-1) {return $this->children($idx);}
1107
+ function firstChild() {return $this->first_child();}
1108
+ function lastChild() {return $this->last_child();}
1109
+ function nextSibling() {return $this->next_sibling();}
1110
+ function previousSibling() {return $this->prev_sibling();}
1111
+ function hasChildNodes() {return $this->has_child();}
1112
+ function nodeName() {return $this->tag;}
1113
+ function appendChild($node) {$node->parent($this); return $node;}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1114
 
1115
  }
1116
 
1122
  *
1123
  * @package PlaceLocalInclude
1124
  */
1125
+ class simple_html_dom
1126
  {
1127
+ /**
1128
+ * The root node of the document
1129
+ *
1130
+ * @var object
1131
+ */
1132
+ public $root = null;
1133
+
1134
+ /**
1135
+ * List of nodes in the current DOM
1136
+ *
1137
+ * @var array
1138
+ */
1139
+ public $nodes = array();
1140
+
1141
+ /**
1142
+ * Callback function to run for each element in the DOM.
1143
+ *
1144
+ * @var callable|null
1145
+ */
1146
+ public $callback = null;
1147
+
1148
+ /**
1149
+ * Indicates how tags and attributes are matched
1150
+ *
1151
+ * @var bool When set to **true** tags and attributes will be converted to
1152
+ * lowercase before matching.
1153
+ */
1154
+ public $lowercase = false;
1155
+
1156
+ /**
1157
+ * Original document size
1158
+ *
1159
+ * Holds the original document size.
1160
+ *
1161
+ * @var int
1162
+ */
1163
+ public $original_size;
1164
+
1165
+ /**
1166
+ * Current document size
1167
+ *
1168
+ * Holds the current document size. The document size is determined by the
1169
+ * string length of ({@see simple_html_dom::$doc}).
1170
+ *
1171
+ * _Note_: Using this variable is more efficient than calling `strlen($doc)`
1172
+ *
1173
+ * @var int
1174
+ * */
1175
+ public $size;
1176
+
1177
+ /**
1178
+ * Current position in the document
1179
+ *
1180
+ * @var int
1181
+ */
1182
+ protected $pos;
1183
+
1184
+ /**
1185
+ * The document
1186
+ *
1187
+ * @var string
1188
+ */
1189
+ protected $doc;
1190
+
1191
+ /**
1192
+ * Current character
1193
+ *
1194
+ * Holds the current character at position {@see simple_html_dom::$pos} in
1195
+ * the document {@see simple_html_dom::$doc}
1196
+ *
1197
+ * _Note_: Using this variable is more efficient than calling `substr($doc, $pos, 1)`
1198
+ *
1199
+ * @var string
1200
+ */
1201
+ protected $char;
1202
+
1203
+ protected $cursor;
1204
+
1205
+ /**
1206
+ * Parent node of the next node detected by the parser
1207
+ *
1208
+ * @var object
1209
+ */
1210
+ protected $parent;
1211
+ protected $noise = array();
1212
+
1213
+ /**
1214
+ * Tokens considered blank in HTML
1215
+ *
1216
+ * @var string
1217
+ */
1218
+ protected $token_blank = " \t\r\n";
1219
+
1220
+ /**
1221
+ * Tokens to identify the equal sign for attributes, stopping either at the
1222
+ * closing tag ("/" i.e. "<html />") or the end of an opening tag (">" i.e.
1223
+ * "<html>")
1224
+ *
1225
+ * @var string
1226
+ */
1227
+ protected $token_equal = ' =/>';
1228
+
1229
+ /**
1230
+ * Tokens to identify the end of a tag name. A tag name either ends on the
1231
+ * ending slash ("/" i.e. "<html/>") or whitespace ("\s\r\n\t")
1232
+ *
1233
+ * @var string
1234
+ */
1235
+ protected $token_slash = " />\r\n\t";
1236
+
1237
+ /**
1238
+ * Tokens to identify the end of an attribute
1239
+ *
1240
+ * @var string
1241
+ */
1242
+ protected $token_attr = ' >';
1243
+
1244
+ // Note that this is referenced by a child node, and so it needs to be public for that node to see this information.
1245
+ public $_charset = '';
1246
+ public $_target_charset = '';
1247
+
1248
+ /**
1249
+ * Innertext for <br> elements
1250
+ *
1251
+ * @var string
1252
+ */
1253
+ protected $default_br_text = "";
1254
+
1255
+ /**
1256
+ * Suffix for <span> elements
1257
+ *
1258
+ * @var string
1259
+ */
1260
+ public $default_span_text = "";
1261
+
1262
+ /**
1263
+ * Defines a list of self-closing tags (Void elements) according to the HTML
1264
+ * Specification
1265
+ *
1266
+ * _Remarks_:
1267
+ * - Use `isset()` instead of `in_array()` on array elements to boost
1268
+ * performance about 30%
1269
+ * - Sort elements by name for better readability!
1270
+ *
1271
+ * @link https://www.w3.org/TR/html HTML Specification
1272
+ * @link https://www.w3.org/TR/html/syntax.html#void-elements Void elements
1273
+ */
1274
+ protected $self_closing_tags = array(
1275
+ 'area'=>1,
1276
+ 'base'=>1,
1277
+ 'br'=>1,
1278
+ 'col'=>1,
1279
+ 'embed'=>1,
1280
+ 'hr'=>1,
1281
+ 'img'=>1,
1282
+ 'input'=>1,
1283
+ 'link'=>1,
1284
+ 'meta'=>1,
1285
+ 'param'=>1,
1286
+ 'source'=>1,
1287
+ 'track'=>1,
1288
+ 'wbr'=>1
1289
+ );
1290
+
1291
+ /**
1292
+ * Defines a list of tags which - if closed - close all optional closing
1293
+ * elements within if they haven't been closed yet. (So, an element where
1294
+ * neither opening nor closing tag is omissible consistently closes every
1295
+ * optional closing element within)
1296
+ *
1297
+ * _Remarks_:
1298
+ * - Use `isset()` instead of `in_array()` on array elements to boost
1299
+ * performance about 30%
1300
+ * - Sort elements by name for better readability!
1301
+ */
1302
+ protected $block_tags = array(
1303
+ 'body'=>1,
1304
+ 'div'=>1,
1305
+ 'form'=>1,
1306
+ 'root'=>1,
1307
+ 'span'=>1,
1308
+ 'table'=>1
1309
+ );
1310
+
1311
+ /**
1312
+ * Defines elements whose end tag is omissible.
1313
+ *
1314
+ * * key = Name of an element whose end tag is omissible.
1315
+ * * value = Names of elements whose end tag is omissible, that are closed
1316
+ * by the current element.
1317
+ *
1318
+ * _Remarks_:
1319
+ * - Use `isset()` instead of `in_array()` on array elements to boost
1320
+ * performance about 30%
1321
+ * - Sort elements by name for better readability!
1322
+ *
1323
+ * **Example**
1324
+ *
1325
+ * An `li` element’s end tag may be omitted if the `li` element is immediately
1326
+ * followed by another `li` element. To do that, add following element to the
1327
+ * array:
1328
+ *
1329
+ * ```php
1330
+ * 'li' => array('li'),
1331
+ * ```
1332
+ *
1333
+ * With this, the following two examples are considered equal. Note that the
1334
+ * second example is missing the closing tags on `li` elements.
1335
+ *
1336
+ * ```html
1337
+ * <ul><li>First Item</li><li>Second Item</li></ul>
1338
+ * ```
1339
+ *
1340
+ * <ul><li>First Item</li><li>Second Item</li></ul>
1341
+ *
1342
+ * ```html
1343
+ * <ul><li>First Item<li>Second Item</ul>
1344
+ * ```
1345
+ *
1346
+ * <ul><li>First Item<li>Second Item</ul>
1347
+ *
1348
+ * @var array A two-dimensional array where the key is the name of an
1349
+ * element whose end tag is omissible and the value is an array of elements
1350
+ * whose end tag is omissible, that are closed by the current element.
1351
+ *
1352
+ * @link https://www.w3.org/TR/html/syntax.html#optional-tags Optional tags
1353
+ *
1354
+ * @todo The implementation of optional closing tags doesn't work in all cases
1355
+ * because it only consideres elements who close other optional closing
1356
+ * tags, not taking into account that some (non-blocking) tags should close
1357
+ * these optional closing tags. For example, the end tag for "p" is omissible
1358
+ * and can be closed by an "address" element, whose end tag is NOT omissible.
1359
+ * Currently a "p" element without closing tag stops at the next "p" element
1360
+ * or blocking tag, even if it contains other elements.
1361
+ *
1362
+ * @todo Known sourceforge issue #2977341
1363
+ * B tags that are not closed cause us to return everything to the end of
1364
+ * the document.
1365
+ */
1366
+ protected $optional_closing_tags = array(
1367
+ 'b'=>array('b'=>1), // Not optional, see https://www.w3.org/TR/html/textlevel-semantics.html#the-b-element
1368
+ 'dd'=>array('dd'=>1, 'dt'=>1),
1369
+ 'dl'=>array('dd'=>1, 'dt'=>1), // Not optional, see https://www.w3.org/TR/html/grouping-content.html#the-dl-element
1370
+ 'dt'=>array('dd'=>1, 'dt'=>1),
1371
+ 'li'=>array('li'=>1),
1372
+ 'optgroup'=>array('optgroup'=>1, 'option'=>1),
1373
+ 'option'=>array('optgroup'=>1, 'option'=>1),
1374
+ 'p'=>array('p'=>1),
1375
+ 'rp'=>array('rp'=>1, 'rt'=>1),
1376
+ 'rt'=>array('rp'=>1, 'rt'=>1),
1377
+ 'td'=>array('td'=>1, 'th'=>1),
1378
+ 'th'=>array('td'=>1, 'th'=>1),
1379
+ 'tr'=>array('td'=>1, 'th'=>1, 'tr'=>1),
1380
+ );
1381
+
1382
+ function __construct($str=null, $lowercase=true, $forceTagsClosed=true, $target_charset=TRP_DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=TRP_DEFAULT_BR_TEXT, $defaultSpanText=TRP_DEFAULT_SPAN_TEXT, $options=0)
1383
+ {
1384
+ if ($str)
1385
+ {
1386
+ if (preg_match("/^http:\/\//i",$str) || is_file($str))
1387
+ {
1388
+ $this->load_file($str);
1389
+ }
1390
+ else
1391
+ {
1392
+ $this->load($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText, $options);
1393
+ }
1394
+ }
1395
+ // Forcing tags to be closed implies that we don't trust the html, but it can lead to parsing errors if we SHOULD trust the html.
1396
+ if (!$forceTagsClosed) {
1397
+ $this->optional_closing_array=array();
1398
+ }
1399
+ $this->_target_charset = $target_charset;
1400
+ }
1401
+
1402
+ function __destruct()
1403
+ {
1404
+ $this->clear();
1405
+ }
1406
+
1407
+ // load html from string
1408
+ function load($str, $lowercase=true, $stripRN=true, $defaultBRText=TRP_DEFAULT_BR_TEXT, $defaultSpanText=TRP_DEFAULT_SPAN_TEXT, $options=0)
1409
+ {
1410
+ global $debug_object;
1411
+
1412
+ // prepare
1413
+ $this->prepare($str, $lowercase, $defaultBRText, $defaultSpanText);
1414
+
1415
+ // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037
1416
+ // Script tags removal now preceeds style tag removal.
1417
+ // strip out <script> tags
1418
+ $this->remove_noise("'<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>'is");
1419
+ $this->remove_noise("'<\s*script\s*>(.*?)<\s*/\s*script\s*>'is");
1420
+
1421
+ // strip out the \r \n's if we are told to.
1422
+ if ($stripRN) {
1423
+ $this->doc = str_replace("\r", " ", $this->doc);
1424
+ $this->doc = str_replace("\n", " ", $this->doc);
1425
+
1426
+ // set the length of content since we have changed it.
1427
+ $this->size = strlen($this->doc);
1428
+ }
1429
+
1430
+ // strip out cdata
1431
+ $this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is", true);
1432
+ // strip out comments
1433
+ $this->remove_noise("'<!--(.*?)-->'is");
1434
+ // strip out <style> tags
1435
+ $this->remove_noise("'<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>'is");
1436
+ $this->remove_noise("'<\s*style\s*>(.*?)<\s*/\s*style\s*>'is");
1437
+ // strip out preformatted tags
1438
+ $this->remove_noise("'<\s*(?:code)[^>]*>(.*?)<\s*/\s*(?:code)\s*>'is");
1439
+ // strip out server side scripts
1440
+ $this->remove_noise("'(<\?)(.*?)(\?>)'s", true);
1441
+
1442
+ if($options & TRP_HDOM_SMARTY_AS_TEXT) { // Strip Smarty scripts
1443
+ $this->remove_noise("'(\{\w)(.*?)(\})'s", true);
1444
+ }
1445
+
1446
+ // parsing
1447
+ $this->parse();
1448
+ // end
1449
+ $this->root->_[TRP_HDOM_INFO_END] = $this->cursor;
1450
+ $this->parse_charset();
1451
+
1452
+ // make load function chainable
1453
+ return $this;
1454
+
1455
+ }
1456
+
1457
+ // load html from file
1458
+ function load_file()
1459
+ {
1460
+ $args = func_get_args();
1461
+
1462
+ if($doc = call_user_func_array('file_get_contents', $args) !== false) {
1463
+ $this->load($doc, true);
1464
+ } else {
1465
+ return false;
1466
+ }
1467
+ }
1468
+
1469
+ /**
1470
+ * Set the callback function
1471
+ *
1472
+ * @param callable $function_name Callback function to run for each element
1473
+ * in the DOM.
1474
+ * @return void
1475
+ */
1476
+ function set_callback($function_name)
1477
+ {
1478
+ $this->callback = $function_name;
1479
+ }
1480
+
1481
+ /**
1482
+ * Remove callback function
1483
+ *
1484
+ * @return void
1485
+ */
1486
+ function remove_callback()
1487
+ {
1488
+ $this->callback = null;
1489
+ }
1490
+
1491
+ // save dom as string
1492
+ function save($filepath='')
1493
+ {
1494
+ $ret = $this->root->innertext();
1495
+ if ($filepath!=='') file_put_contents($filepath, $ret, LOCK_EX);
1496
+ return $ret;
1497
+ }
1498
+
1499
+ // find dom node by css selector
1500
+ // Paperg - allow us to specify that we want case insensitive testing of the value of the selector.
1501
+ function find($selector, $idx=null, $lowercase=false)
1502
+ {
1503
+ return $this->root->find($selector, $idx, $lowercase);
1504
+ }
1505
+
1506
+ // clean up memory due to php5 circular references memory leak...
1507
+ function clear()
1508
+ {
1509
+ foreach ($this->nodes as $n) {$n->clear(); $n = null;}
1510
+ // This add next line is documented in the sourceforge repository. 2977248 as a fix for ongoing memory leaks that occur even with the use of clear.
1511
+ if (isset($this->children)) foreach ($this->children as $n) {$n->clear(); $n = null;}
1512
+ if (isset($this->parent)) {$this->parent->clear(); unset($this->parent);}
1513
+ if (isset($this->root)) {$this->root->clear(); unset($this->root);}
1514
+ unset($this->doc);
1515
+ unset($this->noise);
1516
+ }
1517
+
1518
+ function dump($show_attr=true)
1519
+ {
1520
+ $this->root->dump($show_attr);
1521
+ }
1522
+
1523
+ // prepare HTML data and init everything
1524
+ protected function prepare($str, $lowercase=true, $defaultBRText=TRP_DEFAULT_BR_TEXT, $defaultSpanText=TRP_DEFAULT_SPAN_TEXT)
1525
+ {
1526
+ $this->clear();
1527
+
1528
+ $this->doc = trim($str);
1529
+ $this->size = strlen($this->doc);
1530
+ $this->original_size = $this->size; // Save the original size of the html that we got in. It might be useful to someone.
1531
+ $this->pos = 0;
1532
+ $this->cursor = 1;
1533
+ $this->noise = array();
1534
+ $this->nodes = array();
1535
+ $this->lowercase = $lowercase;
1536
+ $this->default_br_text = $defaultBRText;
1537
+ $this->default_span_text = $defaultSpanText;
1538
+ $this->root = new simple_html_dom_node($this);
1539
+ $this->root->tag = 'root';
1540
+ $this->root->_[TRP_HDOM_INFO_BEGIN] = -1;
1541
+ $this->root->nodetype = TRP_HDOM_TYPE_ROOT;
1542
+ $this->parent = $this->root;
1543
+ if ($this->size>0) $this->char = $this->doc[0];
1544
+ }
1545
+
1546
+ /**
1547
+ * Parse HTML content
1548
+ *
1549
+ * @return bool True on success
1550
+ */
1551
+ protected function parse()
1552
+ {
1553
+ while (true) {
1554
+ // Read next tag if there is no text between current position and the
1555
+ // next opening tag.
1556
+ if (($s = $this->copy_until_char('<'))==='')
1557
+ {
1558
+ if($this->read_tag()) {
1559
+ continue;
1560
+ } else {
1561
+ return true;
1562
+ }
1563
+ }
1564
+
1565
+ // Add a text node for text between tags
1566
+ $node = new simple_html_dom_node($this);
1567
+ ++$this->cursor;
1568
+ $node->_[TRP_HDOM_INFO_TEXT] = $s;
1569
+ $this->link_nodes($node, false);
1570
+ }
1571
+ }
1572
+
1573
+ // PAPERG - dkchou - added this to try to identify the character set of the page we have just parsed so we know better how to spit it out later.
1574
+ // NOTE: IF you provide a routine called get_last_retrieve_url_contents_content_type which returns the CURLINFO_CONTENT_TYPE from the last curl_exec
1575
+ // (or the content_type header from the last transfer), we will parse THAT, and if a charset is specified, we will use it over any other mechanism.
1576
+ protected function parse_charset()
1577
+ {
1578
+ global $debug_object;
1579
+
1580
+ $charset = null;
1581
+
1582
+ if (function_exists('get_last_retrieve_url_contents_content_type'))
1583
+ {
1584
+ $contentTypeHeader = get_last_retrieve_url_contents_content_type();
1585
+ $success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches);
1586
+ if ($success)
1587
+ {
1588
+ $charset = $matches[1];
1589
+ if (is_object($debug_object)) {$debug_object->debug_log(2, 'header content-type found charset of: ' . $charset);}
1590
+ }
1591
+
1592
+ }
1593
+
1594
+ if (empty($charset))
1595
+ {
1596
+ $el = $this->root->find('meta[http-equiv=Content-Type]',0, true);
1597
+ if (!empty($el))
1598
+ {
1599
+ $fullvalue = $el->content;
1600
+ if (is_object($debug_object)) {$debug_object->debug_log(2, 'meta content-type tag found' . $fullvalue);}
1601
+
1602
+ if (!empty($fullvalue))
1603
+ {
1604
+ $success = preg_match('/charset=(.+)/i', $fullvalue, $matches);
1605
+ if ($success)
1606
+ {
1607
+ $charset = $matches[1];
1608
+ }
1609
+ else
1610
+ {
1611
+ // If there is a meta tag, and they don't specify the character set, research says that it's typically ISO-8859-1
1612
+ if (is_object($debug_object)) {$debug_object->debug_log(2, 'meta content-type tag couldn\'t be parsed. using iso-8859 default.');}
1613
+ $charset = 'ISO-8859-1';
1614
+ }
1615
+ }
1616
+ }
1617
+ }
1618
+
1619
+ // If we couldn't find a charset above, then lets try to detect one based on the text we got...
1620
+ if (empty($charset))
1621
+ {
1622
+ // Use this in case mb_detect_charset isn't installed/loaded on this machine.
1623
+ $charset = false;
1624
+ if (function_exists('mb_detect_encoding'))
1625
+ {
1626
+ // Have php try to detect the encoding from the text given to us.
1627
+ $charset = mb_detect_encoding($this->doc . "ascii", $encoding_list = array( "UTF-8", "CP1252" ) );
1628
+ if (is_object($debug_object)) {$debug_object->debug_log(2, 'mb_detect found: ' . $charset);}
1629
+ }
1630
+
1631
+ // and if this doesn't work... then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need...
1632
+ if ($charset === false)
1633
+ {
1634
+ if (is_object($debug_object)) {$debug_object->debug_log(2, 'since mb_detect failed - using default of utf-8');}
1635
+ $charset = 'UTF-8';
1636
+ }
1637
+ }
1638
+
1639
+ // Since CP1252 is a superset, if we get one of it's subsets, we want it instead.
1640
+ if ((strtolower($charset) == strtolower('ISO-8859-1')) || (strtolower($charset) == strtolower('Latin1')) || (strtolower($charset) == strtolower('Latin-1')))
1641
+ {
1642
+ if (is_object($debug_object)) {$debug_object->debug_log(2, 'replacing ' . $charset . ' with CP1252 as its a superset');}
1643
+ $charset = 'CP1252';
1644
+ }
1645
+
1646
+ if (is_object($debug_object)) {$debug_object->debug_log(1, 'EXIT - ' . $charset);}
1647
+
1648
+ return $this->_charset = $charset;
1649
+ }
1650
+
1651
+ /**
1652
+ * Parse tag from current document position.
1653
+ *
1654
+ * @return bool True if a tag was found, false otherwise
1655
+ */
1656
+ protected function read_tag()
1657
+ {
1658
+ // Set end position if no further tags found
1659
+ if ($this->char!=='<')
1660
+ {
1661
+ $this->root->_[TRP_HDOM_INFO_END] = $this->cursor;
1662
+ return false;
1663
+ }
1664
+ $begin_tag_pos = $this->pos;
1665
+ $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1666
+
1667
+ // end tag
1668
+ if ($this->char==='/')
1669
+ {
1670
+ $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1671
+
1672
+ // Skip whitespace in end tags (i.e. in "</ html>")
1673
+ $this->skip($this->token_blank);
1674
+ $tag = $this->copy_until_char('>');
1675
+
1676
+ // Skip attributes in end tags
1677
+ if (($pos = strpos($tag, ' '))!==false)
1678
+ $tag = substr($tag, 0, $pos);
1679
+
1680
+ $parent_lower = strtolower($this->parent->tag);
1681
+ $tag_lower = strtolower($tag);
1682
+
1683
+ // The end tag is supposed to close the parent tag. Handle situations
1684
+ // when it doesn't
1685
+ if ($parent_lower!==$tag_lower)
1686
+ {
1687
+ // Parent tag does not have to be closed necessarily (optional closing tag)
1688
+ // Current tag is a block tag, so it may close an ancestor
1689
+ if (isset($this->optional_closing_tags[$parent_lower]) && isset($this->block_tags[$tag_lower]))
1690
+ {
1691
+ $this->parent->_[TRP_HDOM_INFO_END] = 0;
1692
+ $org_parent = $this->parent;
1693
+
1694
+ // Traverse ancestors to find a matching opening tag
1695
+ // Stop at root node
1696
+ while (($this->parent->parent) && strtolower($this->parent->tag)!==$tag_lower)
1697
+ $this->parent = $this->parent->parent;
1698
+
1699
+ // If we don't have a match add current tag as text node
1700
+ if (strtolower($this->parent->tag)!==$tag_lower) {
1701
+ $this->parent = $org_parent; // restore origonal parent
1702
+ if ($this->parent->parent) $this->parent = $this->parent->parent;
1703
+ $this->parent->_[TRP_HDOM_INFO_END] = $this->cursor;
1704
+ return $this->as_text_node($tag);
1705
+ }
1706
+ }
1707
+ // Grandparent exists and current tag is a block tag, so our parent doesn't have an end tag
1708
+ else if (($this->parent->parent) && isset($this->block_tags[$tag_lower]))
1709
+ {
1710
+ $this->parent->_[TRP_HDOM_INFO_END] = 0; // No end tag
1711
+ $org_parent = $this->parent;
1712
+
1713
+ // Traverse ancestors to find a matching opening tag
1714
+ // Stop at root node
1715
+ while (($this->parent->parent) && strtolower($this->parent->tag)!==$tag_lower)
1716
+ $this->parent = $this->parent->parent;
1717
+
1718
+ // If we don't have a match add current tag as text node
1719
+ if (strtolower($this->parent->tag)!==$tag_lower)
1720
+ {
1721
+ $this->parent = $org_parent; // restore origonal parent
1722
+ $this->parent->_[TRP_HDOM_INFO_END] = $this->cursor;
1723
+ return $this->as_text_node($tag);
1724
+ }
1725
+ }
1726
+ // Grandparent exists and current tag closes it
1727
+ else if (($this->parent->parent) && strtolower($this->parent->parent->tag)===$tag_lower)
1728
+ {
1729
+ $this->parent->_[TRP_HDOM_INFO_END] = 0;
1730
+ $this->parent = $this->parent->parent;
1731
+ }
1732
+ else // Random tag, add as text node
1733
+ return $this->as_text_node($tag);
1734
+ }
1735
+
1736
+ // Set end position of parent tag to current cursor position
1737
+ $this->parent->_[TRP_HDOM_INFO_END] = $this->cursor;
1738
+ if ($this->parent->parent) $this->parent = $this->parent->parent;
1739
+
1740
+ $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1741
+ return true;
1742
+ }
1743
+
1744
+ // start tag
1745
+ $node = new simple_html_dom_node($this);
1746
+ $node->_[TRP_HDOM_INFO_BEGIN] = $this->cursor;
1747
+ ++$this->cursor;
1748
+ $tag = $this->copy_until($this->token_slash); // Get tag name
1749
+ $node->tag_start = $begin_tag_pos;
1750
+
1751
+ // doctype, cdata & comments...
1752
+ // <!DOCTYPE html>
1753
+ // <![CDATA[ ... ]]>
1754
+ // <!-- Comment -->
1755
+ if (isset($tag[0]) && $tag[0]==='!') {
1756
+ $node->_[TRP_HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until_char('>');
1757
+
1758
+ if (isset($tag[2]) && $tag[1]==='-' && $tag[2]==='-') { // Comment ("<!--")
1759
+ $node->nodetype = TRP_HDOM_TYPE_COMMENT;
1760
+ $node->tag = 'comment';
1761
+ } else { // Could be doctype or CDATA but we don't care
1762
+ $node->nodetype = TRP_HDOM_TYPE_UNKNOWN;
1763
+ $node->tag = 'unknown';
1764
+ }
1765
+ if ($this->char==='>') $node->_[TRP_HDOM_INFO_TEXT].='>';
1766
+ $this->link_nodes($node, true);
1767
+ $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1768
+ return true;
1769
+ }
1770
+
1771
+ // The start tag cannot contain another start tag, if so add as text
1772
+ // i.e. "<<html>"
1773
+ if ($pos=strpos($tag, '<')!==false) {
1774
+ $tag = '<' . substr($tag, 0, -1);
1775
+ $node->_[TRP_HDOM_INFO_TEXT] = $tag;
1776
+ $this->link_nodes($node, false);
1777
+ $this->char = $this->doc[--$this->pos]; // prev
1778
+ return true;
1779
+ }
1780
+
1781
+ // Handle invalid tag names (i.e. "<html#doc>")
1782
+ if (!preg_match("/^\w[\w:-]*$/", $tag)) {
1783
+ $node->_[TRP_HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>');
1784
+
1785
+ // Next char is the beginning of a new tag, don't touch it.
1786
+ if ($this->char==='<') {
1787
+ $this->link_nodes($node, false);
1788
+ return true;
1789
+ }
1790
+
1791
+ // Next char closes current tag, add and be done with it.
1792
+ if ($this->char==='>') $node->_[TRP_HDOM_INFO_TEXT].='>';
1793
+ $this->link_nodes($node, false);
1794
+ $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1795
+ return true;
1796
+ }
1797
+
1798
+ // begin tag, add new node
1799
+ $node->nodetype = TRP_HDOM_TYPE_ELEMENT;
1800
+ $tag_lower = strtolower($tag);
1801
+ $node->tag = ($this->lowercase) ? $tag_lower : $tag;
1802
+
1803
+ // handle optional closing tags
1804
+ if (isset($this->optional_closing_tags[$tag_lower]) )
1805
+ {
1806
+ // Traverse ancestors to close all optional closing tags
1807
+ while (isset($this->optional_closing_tags[$tag_lower][strtolower($this->parent->tag)]))
1808
+ {
1809
+ $this->parent->_[TRP_HDOM_INFO_END] = 0;
1810
+ $this->parent = $this->parent->parent;
1811
+ }
1812
+ $node->parent = $this->parent;
1813
+ }
1814
+
1815
+ $guard = 0; // prevent infinity loop
1816
+ $space = array($this->copy_skip($this->token_blank), '', ''); // [0] Space between tag and first attribute
1817
+
1818
+ // attributes
1819
+ do
1820
+ {
1821
+ // Everything until the first equal sign should be the attribute name
1822
+ $name = $this->copy_until($this->token_equal);
1823
+
1824
+ if ($name==='' && $this->char!==null && $space[0]==='')
1825
+ {
1826
+ break;
1827
+ }
1828
+
1829
+ if ($guard===$this->pos) // Escape infinite loop
1830
+ {
1831
+ $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1832
+ continue;
1833
+ }
1834
+ $guard = $this->pos;
1835
+
1836
+ // handle endless '<'
1837
+ if ($this->pos>=$this->size-1 && $this->char!=='>') { // Out of bounds before the tag ended
1838
+ $node->nodetype = TRP_HDOM_TYPE_TEXT;
1839
+ $node->_[TRP_HDOM_INFO_END] = 0;
1840
+ $node->_[TRP_HDOM_INFO_TEXT] = '<'.$tag . $space[0] . $name;
1841
+ $node->tag = 'text';
1842
+ $this->link_nodes($node, false);
1843
+ return true;
1844
+ }
1845
+
1846
+ // handle mismatch '<'
1847
+ if ($this->doc[$this->pos-1]=='<') { // Attributes cannot start after opening tag
1848
+ $node->nodetype = TRP_HDOM_TYPE_TEXT;
1849
+ $node->tag = 'text';
1850
+ $node->attr = array();
1851
+ $node->_[TRP_HDOM_INFO_END] = 0;
1852
+ $node->_[TRP_HDOM_INFO_TEXT] = substr($this->doc, $begin_tag_pos, $this->pos-$begin_tag_pos-1);
1853
+ $this->pos -= 2;
1854
+ $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1855
+ $this->link_nodes($node, false);
1856
+ return true;
1857
+ }
1858
+
1859
+ if ($name!=='/' && $name!=='') { // this is a attribute name
1860
+ $space[1] = $this->copy_skip($this->token_blank); // [1] Whitespace after attribute name
1861
+ $name = $this->restore_noise($name); // might be a noisy name
1862
+ if ($this->lowercase) $name = strtolower($name);
1863
+ if ($this->char==='=') { // attribute with value
1864
+ $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1865
+ $this->parse_attr($node, $name, $space); // get attribute value
1866
+ }
1867
+ else {
1868
+ //no value attr: nowrap, checked selected...
1869
+ $node->_[TRP_HDOM_INFO_QUOTE][] = TRP_HDOM_QUOTE_NO;
1870
+ $node->attr[$name] = true;
1871
+ if ($this->char!='>') $this->char = $this->doc[--$this->pos]; // prev
1872
+ }
1873
+ $node->_[TRP_HDOM_INFO_SPACE][] = $space;
1874
+ $space = array($this->copy_skip($this->token_blank), '', ''); // prepare for next attribute
1875
+ }
1876
+ else // no more attributes
1877
+ break;
1878
+ } while ($this->char!=='>' && $this->char!=='/'); // go until the tag ended
1879
+
1880
+ $this->link_nodes($node, true);
1881
+ $node->_[TRP_HDOM_INFO_ENDSPACE] = $space[0];
1882
+
1883
+ // handle empty tags (i.e. "<div/>")
1884
+ if ($this->copy_until_char('>')==='/')
1885
+ {
1886
+ $node->_[TRP_HDOM_INFO_ENDSPACE] .= '/';
1887
+ $node->_[TRP_HDOM_INFO_END] = 0;
1888
+ }
1889
+ else
1890
+ {
1891
+ // reset parent
1892
+ if (!isset($this->self_closing_tags[strtolower($node->tag)])) $this->parent = $node;
1893
+ }
1894
+ $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1895
+
1896
+ // If it's a BR tag, we need to set it's text to the default text.
1897
+ // This way when we see it in plaintext, we can generate formatting that the user wants.
1898
+ // since a br tag never has sub nodes, this works well.
1899
+ if ($node->tag == "br")
1900
+ {
1901
+ $node->_[TRP_HDOM_INFO_INNER] = $this->default_br_text;
1902
+ }
1903
+
1904
+ return true;
1905
+ }
1906
+
1907
+ /**
1908
+ * Parse attribute from current document position
1909
+ *
1910
+ * @param object $node Node for the attributes
1911
+ * @param string $name Name of the current attribute
1912
+ * @param array $space Array for spacing information
1913
+ * @return void
1914
+ */
1915
+ protected function parse_attr($node, $name, &$space)
1916
+ {
1917
+ // Per sourceforge: http://sourceforge.net/tracker/?func=detail&aid=3061408&group_id=218559&atid=1044037
1918
+ // If the attribute is already defined inside a tag, only pay attention to the first one as opposed to the last one.
1919
+ // https://stackoverflow.com/a/26341866
1920
+ if (isset($node->attr[$name]))
1921
+ {
1922
+ return;
1923
+ }
1924
+
1925
+ $space[2] = $this->copy_skip($this->token_blank); // [2] Whitespace between "=" and the value
1926
+ switch ($this->char) {
1927
+ case '"': // value is anything between double quotes
1928
+ $node->_[TRP_HDOM_INFO_QUOTE][] = TRP_HDOM_QUOTE_DOUBLE;
1929
+ $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1930
+ $node->attr[$name] = $this->restore_noise($this->copy_until_char('"'));
1931
+ $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1932
+ break;
1933
+ case '\'': // value is anything between single quotes
1934
+ $node->_[TRP_HDOM_INFO_QUOTE][] = TRP_HDOM_QUOTE_SINGLE;
1935
+ $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1936
+ $node->attr[$name] = $this->restore_noise($this->copy_until_char('\''));
1937
+ $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1938
+ break;
1939
+ default: // value is anything until the first space or end tag
1940
+ $node->_[TRP_HDOM_INFO_QUOTE][] = TRP_HDOM_QUOTE_NO;
1941
+ $node->attr[$name] = $this->restore_noise($this->copy_until($this->token_attr));
1942
+ }
1943
+ // PaperG: Attributes should not have \r or \n in them, that counts as html whitespace.
1944
+ $node->attr[$name] = str_replace("\r", "", $node->attr[$name]);
1945
+ $node->attr[$name] = str_replace("\n", "", $node->attr[$name]);
1946
+ // PaperG: If this is a "class" selector, lets get rid of the preceeding and trailing space since some people leave it in the multi class case.
1947
+ if ($name == "class") {
1948
+ $node->attr[$name] = trim($node->attr[$name]);
1949
+ }
1950
+ }
1951
+
1952
+ /**
1953
+ * Link node to parent node
1954
+ *
1955
+ * @param object $node Node to link to parent
1956
+ * @param bool $is_child True if the node is a child of parent
1957
+ * @return void
1958
+ */
1959
+ // link node's parent
1960
+ protected function link_nodes(&$node, $is_child)
1961
+ {
1962
+ $node->parent = $this->parent;
1963
+ $this->parent->nodes[] = $node;
1964
+ if ($is_child)
1965
+ {
1966
+ $this->parent->children[] = $node;
1967
+ }
1968
+ }
1969
+
1970
+ /**
1971
+ * Add tag as text node to current node
1972
+ *
1973
+ * @param string $tag Tag name
1974
+ * @return bool True on success
1975
+ */
1976
+ protected function as_text_node($tag)
1977
+ {
1978
+ $node = new simple_html_dom_node($this);
1979
+ ++$this->cursor;
1980
+ $node->_[TRP_HDOM_INFO_TEXT] = '</' . $tag . '>';
1981
+ $this->link_nodes($node, false);
1982
+ $this->char = (++$this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1983
+ return true;
1984
+ }
1985
+
1986
+ /**
1987
+ * Seek from the current document position to the first occurrence of a
1988
+ * character not defined by the provided string. Update the current document
1989
+ * position to the new position.
1990
+ *
1991
+ * @param string $chars A string containing every allowed character.
1992
+ * @return void
1993
+ */
1994
+ protected function skip($chars)
1995
+ {
1996
+ $this->pos += strspn($this->doc, $chars, $this->pos);
1997
+ $this->char = ($this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
1998
+ }
1999
+
2000
+ /**
2001
+ * Copy substring from the current document position to the first occurrence
2002
+ * of a character not defined by the provided string.
2003
+ *
2004
+ * @param string $chars A string containing every allowed character.
2005
+ * @return string Substring from the current document position to the first
2006
+ * occurrence of a character not defined by the provided string.
2007
+ */
2008
+ protected function copy_skip($chars)
2009
+ {
2010
+ $pos = $this->pos;
2011
+ $len = strspn($this->doc, $chars, $pos);
2012
+ $this->pos += $len;
2013
+ $this->char = ($this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
2014
+ if ($len===0) return '';
2015
+ return substr($this->doc, $pos, $len);
2016
+ }
2017
+
2018
+ /**
2019
+ * Copy substring from the current document position to the first occurrence
2020
+ * of any of the provided characters.
2021
+ *
2022
+ * @param string $chars A string containing every character to stop at.
2023
+ * @return string Substring from the current document position to the first
2024
+ * occurrence of any of the provided characters.
2025
+ */
2026
+ protected function copy_until($chars)
2027
+ {
2028
+ $pos = $this->pos;
2029
+ $len = strcspn($this->doc, $chars, $pos);
2030
+ $this->pos += $len;
2031
+ $this->char = ($this->pos<$this->size) ? $this->doc[$this->pos] : null; // next
2032
+ return substr($this->doc, $pos, $len);
2033
+ }
2034
+
2035
+ /**
2036
+ * Copy substring from the current document position to the first occurrence
2037
+ * of the provided string.
2038
+ *
2039
+ * @param string $char The string to stop at.
2040
+ * @return string Substring from the current document position to the first
2041
+ * occurrence of the provided string.
2042
+ */
2043
+ protected function copy_until_char($char)
2044
+ {
2045
+ if ($this->char===null) return '';
2046
+
2047
+ if (($pos = strpos($this->doc, $char, $this->pos))===false) {
2048
+ $ret = substr($this->doc, $this->pos, $this->size-$this->pos);
2049
+ $this->char = null;
2050
+ $this->pos = $this->size;
2051
+ return $ret;
2052
+ }
2053
+
2054
+ if ($pos===$this->pos) return '';
2055
+ $pos_old = $this->pos;
2056
+ $this->char = $this->doc[$pos];
2057
+ $this->pos = $pos;
2058
+ return substr($this->doc, $pos_old, $pos-$pos_old);
2059
+ }
2060
+
2061
+ /**
2062
+ * Remove noise from HTML content
2063
+ *
2064
+ * Noise is stored to {@see simple_html_dom::$noise}
2065
+ *
2066
+ * @param string $pattern The regex pattern used for finding noise
2067
+ * @param bool $remove_tag True to remove the entire match. Default is false
2068
+ * to only remove the captured data.
2069
+ */
2070
+ protected function remove_noise($pattern, $remove_tag=false)
2071
+ {
2072
+ global $debug_object;
2073
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
2074
+
2075
+ $count = preg_match_all($pattern, $this->doc, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE);
2076
+
2077
+ for ($i=$count-1; $i>-1; --$i)
2078
+ {
2079
+ $key = '___noise___'.sprintf('% 5d', count($this->noise)+1000);
2080
+ if (is_object($debug_object)) { $debug_object->debug_log(2, 'key is: ' . $key); }
2081
+ $idx = ($remove_tag) ? 0 : 1; // 0 = entire match, 1 = submatch
2082
+ $this->noise[$key] = $matches[$i][$idx][0];
2083
+ $this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
2084
+ }
2085
+
2086
+ // reset the length of content
2087
+ $this->size = strlen($this->doc);
2088
+ if ($this->size>0)
2089
+ {
2090
+ $this->char = $this->doc[0];
2091
+ }
2092
+ }
2093
+
2094
+ /**
2095
+ * Restore noise to HTML content
2096
+ *
2097
+ * Noise is restored from {@see simple_html_dom::$noise}
2098
+ *
2099
+ * @param string $text A subset of HTML containing noise
2100
+ * @return string The same content with noise restored
2101
+ */
2102
+ function restore_noise($text)
2103
+ {
2104
+ global $debug_object;
2105
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
2106
+
2107
+ while (($pos=strpos($text, '___noise___'))!==false)
2108
+ {
2109
+ // Sometimes there is a broken piece of markup, and we don't GET the pos+11 etc... token which indicates a problem outside of us...
2110
+ if (strlen($text) > $pos+15)
2111
+ { // todo: "___noise___1000" (or any number with four or more digits) in the DOM causes an infinite loop which could be utilized by malicious software
2112
+ $key = '___noise___'.$text[$pos+11].$text[$pos+12].$text[$pos+13].$text[$pos+14].$text[$pos+15];
2113
+ if (is_object($debug_object)) { $debug_object->debug_log(2, 'located key of: ' . $key); }
2114
+
2115
+ if (isset($this->noise[$key]))
2116
+ {
2117
+ $text = substr($text, 0, $pos).$this->noise[$key].substr($text, $pos+16);
2118
+ }
2119
+ else
2120
+ {
2121
+ // do this to prevent an infinite loop.
2122
+ $text = substr($text, 0, $pos).'UNDEFINED NOISE FOR KEY: '.$key . substr($text, $pos+16);
2123
+ }
2124
+ }
2125
+ else
2126
+ {
2127
+ // There is no valid key being given back to us... We must get rid of the ___noise___ or we will have a problem.
2128
+ $text = substr($text, 0, $pos).'NO NUMERIC NOISE KEY' . substr($text, $pos+11);
2129
+ }
2130
+ }
2131
+ return $text;
2132
+ }
2133
+
2134
+ // Sometimes we NEED one of the noise elements.
2135
+ function search_noise($text)
2136
+ {
2137
+ global $debug_object;
2138
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
2139
+
2140
+ foreach($this->noise as $noiseElement)
2141
+ {
2142
+ if (strpos($noiseElement, $text)!==false)
2143
+ {
2144
+ return $noiseElement;
2145
+ }
2146
+ }
2147
+ }
2148
+ function __toString()
2149
+ {
2150
+ return $this->root->innertext();
2151
+ }
2152
+
2153
+ function __get($name)
2154
+ {
2155
+ switch ($name)
2156
+ {
2157
+ case 'outertext':
2158
+ return $this->root->innertext();
2159
+ case 'innertext':
2160
+ return $this->root->innertext();
2161
+ case 'plaintext':
2162
+ return $this->root->text();
2163
+ case 'charset':
2164
+ return $this->_charset;
2165
+ case 'target_charset':
2166
+ return $this->_target_charset;
2167
+ }
2168
+ }
2169
+
2170
+ // camel naming conventions
2171
+ function childNodes($idx=-1) {return $this->root->childNodes($idx);}
2172
+ function firstChild() {return $this->root->first_child();}
2173
+ function lastChild() {return $this->root->last_child();}
2174
+ function createElement($name, $value=null) {return @str_get_html("<$name>$value</$name>")->first_child();}
2175
+ function createTextNode($value) {return @end(str_get_html($value)->nodes);}
2176
+ function getElementById($id) {return $this->find("#$id", 0);}
2177
+ function getElementsById($id, $idx=null) {return $this->find("#$id", $idx);}
2178
+ function getElementByTagName($name) {return $this->find($name, 0);}
2179
+ function getElementsByTagName($name, $idx=-1) {return $this->find($name, $idx);}
2180
+ function loadFile() {$args = func_get_args();$this->load_file($args);}
2181
  }
2182
 
 
2183
  ?>
class-translate-press.php CHANGED
@@ -41,7 +41,7 @@ class TRP_Translate_Press{
41
  define( 'TRP_PLUGIN_URL', plugin_dir_url( __FILE__ ) );
42
  define( 'TRP_PLUGIN_BASE', plugin_basename( __DIR__ . '/index.php' ) );
43
  define( 'TRP_PLUGIN_SLUG', 'translatepress-multilingual' );
44
- define( 'TRP_PLUGIN_VERSION', '1.4.0' );
45
 
46
  wp_cache_add_non_persistent_groups(array('trp'));
47
 
41
  define( 'TRP_PLUGIN_URL', plugin_dir_url( __FILE__ ) );
42
  define( 'TRP_PLUGIN_BASE', plugin_basename( __DIR__ . '/index.php' ) );
43
  define( 'TRP_PLUGIN_SLUG', 'translatepress-multilingual' );
44
+ define( 'TRP_PLUGIN_VERSION', '1.4.1' );
45
 
46
  wp_cache_add_non_persistent_groups(array('trp'));
47
 
includes/class-translation-render.php CHANGED
@@ -355,7 +355,7 @@ class TRP_Translation_Render{
355
  }
356
  $merge_rules = $this->translation_manager->get_merge_rules();
357
  }
358
- $html = trp_str_get_html($output, true, true, TRP_DEFAULT_TARGET_CHARSET, false, TRP_DEFAULT_BR_TEXT, TRP_DEFAULT_SPAN_TEXT);
359
 
360
  /**
361
  * When we are in the translation editor: Intercept the trp-gettext that was wrapped around all the gettext texts, grab the attribute data-trpgettextoriginal
@@ -444,7 +444,7 @@ class TRP_Translation_Render{
444
  }
445
 
446
  // convert to a node
447
- $node_from_value = trp_str_get_html(html_entity_decode(htmlspecialchars_decode($attr_value, ENT_QUOTES)));
448
  foreach ($node_from_value->find('trp-gettext') as $nfv_row) {
449
  $nfv_row->outertext = $nfv_row->innertext();
450
  $row->setAttribute($attr_name, $node_from_value->save() );
@@ -473,7 +473,7 @@ class TRP_Translation_Render{
473
  return $trpremoved;
474
  }
475
 
476
- $html = trp_str_get_html($trpremoved, true, true, TRP_DEFAULT_TARGET_CHARSET, false, TRP_DEFAULT_BR_TEXT, TRP_DEFAULT_SPAN_TEXT);
477
 
478
  $no_translate_selectors = apply_filters( 'trp_no_translate_selectors', array( '#wpadminbar' ), $TRP_LANGUAGE );
479
 
@@ -663,7 +663,7 @@ class TRP_Translation_Render{
663
 
664
  // We need to save here in order to access the translated links too.
665
  $html = $html->save();
666
- $html = trp_str_get_html($html, true, true, TRP_DEFAULT_TARGET_CHARSET, false, TRP_DEFAULT_BR_TEXT, TRP_DEFAULT_SPAN_TEXT);
667
 
668
  // force custom links to have the correct language
669
  foreach( $html->find('a[href!="#"]') as $a_href) {
355
  }
356
  $merge_rules = $this->translation_manager->get_merge_rules();
357
  }
358
+ $html = TranslatePress\str_get_html($output, true, true, TRP_DEFAULT_TARGET_CHARSET, false, TRP_DEFAULT_BR_TEXT, TRP_DEFAULT_SPAN_TEXT);
359
 
360
  /**
361
  * When we are in the translation editor: Intercept the trp-gettext that was wrapped around all the gettext texts, grab the attribute data-trpgettextoriginal
444
  }
445
 
446
  // convert to a node
447
+ $node_from_value = TranslatePress\str_get_html(html_entity_decode(htmlspecialchars_decode($attr_value, ENT_QUOTES)), true, true, TRP_DEFAULT_TARGET_CHARSET, false, TRP_DEFAULT_BR_TEXT, TRP_DEFAULT_SPAN_TEXT);
448
  foreach ($node_from_value->find('trp-gettext') as $nfv_row) {
449
  $nfv_row->outertext = $nfv_row->innertext();
450
  $row->setAttribute($attr_name, $node_from_value->save() );
473
  return $trpremoved;
474
  }
475
 
476
+ $html = TranslatePress\str_get_html($trpremoved, true, true, TRP_DEFAULT_TARGET_CHARSET, false, TRP_DEFAULT_BR_TEXT, TRP_DEFAULT_SPAN_TEXT);
477
 
478
  $no_translate_selectors = apply_filters( 'trp_no_translate_selectors', array( '#wpadminbar' ), $TRP_LANGUAGE );
479
 
663
 
664
  // We need to save here in order to access the translated links too.
665
  $html = $html->save();
666
+ $html = TranslatePress\str_get_html($html, true, true, TRP_DEFAULT_TARGET_CHARSET, false, TRP_DEFAULT_BR_TEXT, TRP_DEFAULT_SPAN_TEXT);
667
 
668
  // force custom links to have the correct language
669
  foreach( $html->find('a[href!="#"]') as $a_href) {
includes/class-upgrade.php CHANGED
@@ -41,7 +41,11 @@ class TRP_Upgrade {
41
  if( empty($stored_database_version) || version_compare( TRP_PLUGIN_VERSION, $stored_database_version, '>' ) ){
42
  $this->check_if_gettext_tables_exist();
43
  $this->trp_query->check_for_block_type_column();
44
- $this->check_for_full_trim_originals( $stored_database_version );
 
 
 
 
45
  }
46
 
47
  update_option( 'trp_plugin_version', TRP_PLUGIN_VERSION );
@@ -62,14 +66,6 @@ class TRP_Upgrade {
62
  }
63
  }
64
 
65
- /**
66
- * Sets an option to know that an upgrade is needed
67
- */
68
- public function check_for_full_trim_originals( $stored_database_version ){
69
- if ( version_compare( '1.4.0', $stored_database_version, '>' ) ){
70
- update_option( 'trp_updated_database_full_trim_originals_140', 'no' );
71
- }
72
- }
73
 
74
  /**
75
  * Show admin notice about updating database
41
  if( empty($stored_database_version) || version_compare( TRP_PLUGIN_VERSION, $stored_database_version, '>' ) ){
42
  $this->check_if_gettext_tables_exist();
43
  $this->trp_query->check_for_block_type_column();
44
+ }
45
+ if( !empty( $stored_database_version ) ) {
46
+ if ( version_compare( '1.4.0', $stored_database_version, '>' ) ) {
47
+ update_option( 'trp_updated_database_full_trim_originals_140', 'no' );
48
+ }
49
  }
50
 
51
  update_option( 'trp_plugin_version', TRP_PLUGIN_VERSION );
66
  }
67
  }
68
 
 
 
 
 
 
 
 
 
69
 
70
  /**
71
  * Show admin notice about updating database
index.php CHANGED
@@ -3,7 +3,7 @@
3
  Plugin Name: TranslatePress - Multilingual
4
  Plugin URI: https://translatepress.com/
5
  Description: Experience a better way of translating your WordPress site, with full support for WooCommerce and site builders.
6
- Version: 1.4.0
7
  Author: Cozmoslabs, Razvan Mocanu, Madalin Ungureanu, Cristophor Hurduban
8
  Author URI: https://cozmoslabs.com/
9
  Text Domain: translatepress-multilingual
3
  Plugin Name: TranslatePress - Multilingual
4
  Plugin URI: https://translatepress.com/
5
  Description: Experience a better way of translating your WordPress site, with full support for WooCommerce and site builders.
6
+ Version: 1.4.1
7
  Author: Cozmoslabs, Razvan Mocanu, Madalin Ungureanu, Cristophor Hurduban
8
  Author URI: https://cozmoslabs.com/
9
  Text Domain: translatepress-multilingual
languages/translatepress-multilingual.pot CHANGED
@@ -17,63 +17,63 @@ msgstr ""
17
  msgid "Error! Duplicate Url slug values."
18
  msgstr ""
19
 
20
- #: ../tp-add-on-navigation-based-on-language/class-navigation-based-on-language.php:86
21
  msgid "Limit this menu item to the following languages"
22
  msgstr ""
23
 
24
- #: ../tp-add-on-navigation-based-on-language/class-navigation-based-on-language.php:92, ../tp-add-on-extra-languages/partials/language-selector-pro.php:2, ../translatepress/partials/main-settings-language-selector.php:2
25
  msgid "All Languages"
26
  msgstr ""
27
 
28
- #: ../tp-add-on-seo-pack/class-seo-pack.php:160
29
  msgid "The Yoast SEO Sitemaps will now contain the default language slug: example.com/en/sitemap_index.xml <br/> This works perfectly, just take it into account when you submit the sitemap to Google."
30
  msgstr ""
31
 
32
- #: includes/class-ald-settings.php:37
33
  msgid "First by browser language, then IP address (recommended)"
34
  msgstr ""
35
 
36
- #: includes/class-ald-settings.php:38
37
  msgid "First by IP address, then browser language"
38
  msgstr ""
39
 
40
- #: includes/class-ald-settings.php:39
41
  msgid "Only by browser language"
42
  msgstr ""
43
 
44
- #: includes/class-ald-settings.php:40
45
  msgid "Only by IP address"
46
  msgstr ""
47
 
48
- #: includes/class-ald-settings.php:110
49
  msgid "<div class=\"warning\">WARNING. Cannot determine your language preference based on your current IP.<br>This is most likely because the website is on a local environment.</div>"
50
  msgstr ""
51
 
52
- #: partials/license-settings-page.php:4, ../tp-add-on-browse-as-other-roles/partials/license-settings-page.php:4, ../tp-add-on-extra-languages/partials/license-settings-page.php:4, ../tp-add-on-navigation-based-on-language/partials/license-settings-page.php:4, ../tp-add-on-seo-pack/partials/license-settings-page.php:4, ../translatepress/partials/addons-settings-page.php:3, ../translatepress/partials/main-settings-page.php:5, ../translatepress/partials/test-google-key-settings-page.php:10, ../translatepress/partials/trp-remove-duplicate-rows.php:3, ../translatepress/partials/trp-update-database.php:3, ../trp-add-on-translator-accounts-add-on/partials/license-settings-page.php:4
53
  msgid "TranslatePress Settings"
54
  msgstr ""
55
 
56
- #: partials/license-settings-page.php:10, ../tp-add-on-browse-as-other-roles/partials/license-settings-page.php:10, ../tp-add-on-extra-languages/partials/license-settings-page.php:10, ../tp-add-on-navigation-based-on-language/partials/license-settings-page.php:10, ../tp-add-on-seo-pack/partials/license-settings-page.php:10, ../trp-add-on-translator-accounts-add-on/partials/license-settings-page.php:10
57
  msgid "License Key"
58
  msgstr ""
59
 
60
- #: partials/license-settings-page.php:15, ../tp-add-on-browse-as-other-roles/partials/license-settings-page.php:15, ../tp-add-on-extra-languages/partials/license-settings-page.php:15, ../tp-add-on-navigation-based-on-language/partials/license-settings-page.php:15, ../tp-add-on-seo-pack/partials/license-settings-page.php:15, ../trp-add-on-translator-accounts-add-on/partials/license-settings-page.php:15
61
  msgid "Enter your license key."
62
  msgstr ""
63
 
64
- #: partials/license-settings-page.php:22, partials/license-settings-page.php:31, ../tp-add-on-browse-as-other-roles/partials/license-settings-page.php:22, ../tp-add-on-browse-as-other-roles/partials/license-settings-page.php:31, ../tp-add-on-extra-languages/partials/license-settings-page.php:22, ../tp-add-on-extra-languages/partials/license-settings-page.php:31, ../tp-add-on-navigation-based-on-language/partials/license-settings-page.php:22, ../tp-add-on-navigation-based-on-language/partials/license-settings-page.php:31, ../tp-add-on-seo-pack/partials/license-settings-page.php:22, ../tp-add-on-seo-pack/partials/license-settings-page.php:31, ../trp-add-on-translator-accounts-add-on/partials/license-settings-page.php:22, ../trp-add-on-translator-accounts-add-on/partials/license-settings-page.php:31
65
  msgid "Activate License"
66
  msgstr ""
67
 
68
- #: partials/license-settings-page.php:28, ../tp-add-on-browse-as-other-roles/partials/license-settings-page.php:28, ../tp-add-on-extra-languages/partials/license-settings-page.php:28, ../tp-add-on-navigation-based-on-language/partials/license-settings-page.php:28, ../tp-add-on-seo-pack/partials/license-settings-page.php:28, ../trp-add-on-translator-accounts-add-on/partials/license-settings-page.php:28
69
  msgid "Deactivate License"
70
  msgstr ""
71
 
72
- #: partials/settings-option.php:2
73
  msgid "Method of language detection"
74
  msgstr ""
75
 
76
- #: partials/settings-option.php:14
77
  msgid "Select how the language should be detected for first time visitors.<br>The visitor's last displayed language will be remembered through cookies."
78
  msgstr ""
79
 
@@ -249,51 +249,51 @@ msgstr ""
249
  msgid "Page Title"
250
  msgstr ""
251
 
252
- #: ../translatepress/includes/class-upgrade.php:95
253
  msgid "TranslatePress data update"
254
  msgstr ""
255
 
256
- #: ../translatepress/includes/class-upgrade.php:95
257
  msgid "We need to update your translations database to the latest version."
258
  msgstr ""
259
 
260
- #: ../translatepress/includes/class-upgrade.php:96
261
  msgid "Run the updater"
262
  msgstr ""
263
 
264
- #: ../translatepress/includes/class-upgrade.php:125
265
  msgid "<p><strong>Successfully updated database!</strong></p>"
266
  msgstr ""
267
 
268
- #: ../translatepress/includes/class-upgrade.php:125, ../translatepress/includes/class-upgrade.php:130, ../translatepress/includes/class-upgrade.php:290, ../translatepress/includes/class-upgrade.php:295
269
  msgid "Back to TranslatePress Settings page"
270
  msgstr ""
271
 
272
- #: ../translatepress/includes/class-upgrade.php:130, ../translatepress/includes/class-upgrade.php:295
273
  msgid "Invalid nonce."
274
  msgstr ""
275
 
276
- #: ../translatepress/includes/class-upgrade.php:206, ../translatepress/includes/class-upgrade.php:375
277
  msgid "If the page does not redirect automatically"
278
  msgstr ""
279
 
280
- #: ../translatepress/includes/class-upgrade.php:206, ../translatepress/includes/class-upgrade.php:375
281
  msgid "click here"
282
  msgstr ""
283
 
284
- #: ../translatepress/includes/class-upgrade.php:228
285
  msgid "Querying table <strong>%s</strong>... "
286
  msgstr ""
287
 
288
- #: ../translatepress/includes/class-upgrade.php:230, ../translatepress/includes/class-upgrade.php:290
289
  msgid "Done."
290
  msgstr ""
291
 
292
- #: ../translatepress/includes/class-upgrade.php:315
293
  msgid "Querying table <strong>%s</strong>"
294
  msgstr ""
295
 
296
- #: ../translatepress/includes/class-upgrade.php:345
297
  msgid "%s duplicates removed"
298
  msgstr ""
299
 
17
  msgid "Error! Duplicate Url slug values."
18
  msgstr ""
19
 
20
+ #: class-navigation-based-on-language.php:86
21
  msgid "Limit this menu item to the following languages"
22
  msgstr ""
23
 
24
+ #: class-navigation-based-on-language.php:92, ../tp-add-on-extra-languages/partials/language-selector-pro.php:2, ../translatepress/partials/main-settings-language-selector.php:2
25
  msgid "All Languages"
26
  msgstr ""
27
 
28
+ #: ../tp-add-on-seo-pack/class-seo-pack.php:167
29
  msgid "The Yoast SEO Sitemaps will now contain the default language slug: example.com/en/sitemap_index.xml <br/> This works perfectly, just take it into account when you submit the sitemap to Google."
30
  msgstr ""
31
 
32
+ #: ../tp-add-on-automatic-language-detection/includes/class-ald-settings.php:37
33
  msgid "First by browser language, then IP address (recommended)"
34
  msgstr ""
35
 
36
+ #: ../tp-add-on-automatic-language-detection/includes/class-ald-settings.php:38
37
  msgid "First by IP address, then browser language"
38
  msgstr ""
39
 
40
+ #: ../tp-add-on-automatic-language-detection/includes/class-ald-settings.php:39
41
  msgid "Only by browser language"
42
  msgstr ""
43
 
44
+ #: ../tp-add-on-automatic-language-detection/includes/class-ald-settings.php:40
45
  msgid "Only by IP address"
46
  msgstr ""
47
 
48
+ #: ../tp-add-on-automatic-language-detection/includes/class-ald-settings.php:110
49
  msgid "<div class=\"warning\">WARNING. Cannot determine your language preference based on your current IP.<br>This is most likely because the website is on a local environment.</div>"
50
  msgstr ""
51
 
52
+ #: ../tp-add-on-automatic-language-detection/partials/license-settings-page.php:4, ../tp-add-on-browse-as-other-roles/partials/license-settings-page.php:4, ../tp-add-on-extra-languages/partials/license-settings-page.php:4, partials/license-settings-page.php:4, ../tp-add-on-seo-pack/partials/license-settings-page.php:4, ../translatepress/partials/addons-settings-page.php:3, ../translatepress/partials/main-settings-page.php:5, ../translatepress/partials/test-google-key-settings-page.php:10, ../translatepress/partials/trp-remove-duplicate-rows.php:3, ../translatepress/partials/trp-update-database.php:3, ../trp-add-on-translator-accounts-add-on/partials/license-settings-page.php:4
53
  msgid "TranslatePress Settings"
54
  msgstr ""
55
 
56
+ #: ../tp-add-on-automatic-language-detection/partials/license-settings-page.php:10, ../tp-add-on-browse-as-other-roles/partials/license-settings-page.php:10, ../tp-add-on-extra-languages/partials/license-settings-page.php:10, partials/license-settings-page.php:10, ../tp-add-on-seo-pack/partials/license-settings-page.php:10, ../trp-add-on-translator-accounts-add-on/partials/license-settings-page.php:10
57
  msgid "License Key"
58
  msgstr ""
59
 
60
+ #: ../tp-add-on-automatic-language-detection/partials/license-settings-page.php:15, ../tp-add-on-browse-as-other-roles/partials/license-settings-page.php:15, ../tp-add-on-extra-languages/partials/license-settings-page.php:15, partials/license-settings-page.php:15, ../tp-add-on-seo-pack/partials/license-settings-page.php:15, ../trp-add-on-translator-accounts-add-on/partials/license-settings-page.php:15
61
  msgid "Enter your license key."
62
  msgstr ""
63
 
64
+ #: ../tp-add-on-automatic-language-detection/partials/license-settings-page.php:22, ../tp-add-on-automatic-language-detection/partials/license-settings-page.php:31, ../tp-add-on-browse-as-other-roles/partials/license-settings-page.php:22, ../tp-add-on-browse-as-other-roles/partials/license-settings-page.php:31, ../tp-add-on-extra-languages/partials/license-settings-page.php:22, ../tp-add-on-extra-languages/partials/license-settings-page.php:31, partials/license-settings-page.php:22, partials/license-settings-page.php:31, ../tp-add-on-seo-pack/partials/license-settings-page.php:22, ../tp-add-on-seo-pack/partials/license-settings-page.php:31, ../trp-add-on-translator-accounts-add-on/partials/license-settings-page.php:22, ../trp-add-on-translator-accounts-add-on/partials/license-settings-page.php:31
65
  msgid "Activate License"
66
  msgstr ""
67
 
68
+ #: ../tp-add-on-automatic-language-detection/partials/license-settings-page.php:28, ../tp-add-on-browse-as-other-roles/partials/license-settings-page.php:28, ../tp-add-on-extra-languages/partials/license-settings-page.php:28, partials/license-settings-page.php:28, ../tp-add-on-seo-pack/partials/license-settings-page.php:28, ../trp-add-on-translator-accounts-add-on/partials/license-settings-page.php:28
69
  msgid "Deactivate License"
70
  msgstr ""
71
 
72
+ #: ../tp-add-on-automatic-language-detection/partials/settings-option.php:2
73
  msgid "Method of language detection"
74
  msgstr ""
75
 
76
+ #: ../tp-add-on-automatic-language-detection/partials/settings-option.php:14
77
  msgid "Select how the language should be detected for first time visitors.<br>The visitor's last displayed language will be remembered through cookies."
78
  msgstr ""
79
 
249
  msgid "Page Title"
250
  msgstr ""
251
 
252
+ #: ../translatepress/includes/class-upgrade.php:91
253
  msgid "TranslatePress data update"
254
  msgstr ""
255
 
256
+ #: ../translatepress/includes/class-upgrade.php:91
257
  msgid "We need to update your translations database to the latest version."
258
  msgstr ""
259
 
260
+ #: ../translatepress/includes/class-upgrade.php:92
261
  msgid "Run the updater"
262
  msgstr ""
263
 
264
+ #: ../translatepress/includes/class-upgrade.php:121
265
  msgid "<p><strong>Successfully updated database!</strong></p>"
266
  msgstr ""
267
 
268
+ #: ../translatepress/includes/class-upgrade.php:121, ../translatepress/includes/class-upgrade.php:126, ../translatepress/includes/class-upgrade.php:286, ../translatepress/includes/class-upgrade.php:291
269
  msgid "Back to TranslatePress Settings page"
270
  msgstr ""
271
 
272
+ #: ../translatepress/includes/class-upgrade.php:126, ../translatepress/includes/class-upgrade.php:291
273
  msgid "Invalid nonce."
274
  msgstr ""
275
 
276
+ #: ../translatepress/includes/class-upgrade.php:202, ../translatepress/includes/class-upgrade.php:371
277
  msgid "If the page does not redirect automatically"
278
  msgstr ""
279
 
280
+ #: ../translatepress/includes/class-upgrade.php:202, ../translatepress/includes/class-upgrade.php:371
281
  msgid "click here"
282
  msgstr ""
283
 
284
+ #: ../translatepress/includes/class-upgrade.php:224
285
  msgid "Querying table <strong>%s</strong>... "
286
  msgstr ""
287
 
288
+ #: ../translatepress/includes/class-upgrade.php:226, ../translatepress/includes/class-upgrade.php:286
289
  msgid "Done."
290
  msgstr ""
291
 
292
+ #: ../translatepress/includes/class-upgrade.php:311
293
  msgid "Querying table <strong>%s</strong>"
294
  msgstr ""
295
 
296
+ #: ../translatepress/includes/class-upgrade.php:341
297
  msgid "%s duplicates removed"
298
  msgstr ""
299
 
readme.txt CHANGED
@@ -4,7 +4,7 @@ Donate link: https://www.cozmoslabs.com/
4
  Tags: translate, translation, multilingual, automatic translation, bilingual, front-end translation, google translate, language
5
  Requires at least: 3.1.0
6
  Tested up to: 5.0.3
7
- Stable tag: 1.4.0
8
  License: GPLv2 or later
9
  License URI: http://www.gnu.org/licenses/gpl-2.0.html
10
 
@@ -37,6 +37,7 @@ https://www.youtube.com/watch?v=pUlYisvBm8g
37
  * Conditional display content shortcode based on language [trp_language language="en_US"] English content only [/trp_language]
38
  * Possibility to edit gettext strings from themes and plugins from english to english, without adding another language. Basically a string-replace functionality.
39
  * Translation Block feature in which you can translate multiple html elements together
 
40
 
41
  Note: this WordPress translation plugin uses the Google Translation API to translate the strings on your site. This feature can be enabled or disabled according to your preferences.
42
 
@@ -124,6 +125,10 @@ For more information please check out [TranslatePress - Multilingual plugin docu
124
  6. Menu Language Switcher
125
 
126
  == Changelog ==
 
 
 
 
127
  = 1.4.0 =
128
  * Added Enfold compatibility by increasing the template_include hook priority
129
  * Add the costa rica flag
4
  Tags: translate, translation, multilingual, automatic translation, bilingual, front-end translation, google translate, language
5
  Requires at least: 3.1.0
6
  Tested up to: 5.0.3
7
+ Stable tag: 1.4.1
8
  License: GPLv2 or later
9
  License URI: http://www.gnu.org/licenses/gpl-2.0.html
10
 
37
  * Conditional display content shortcode based on language [trp_language language="en_US"] English content only [/trp_language]
38
  * Possibility to edit gettext strings from themes and plugins from english to english, without adding another language. Basically a string-replace functionality.
39
  * Translation Block feature in which you can translate multiple html elements together
40
+ * Native **Gutenberg** support
41
 
42
  Note: this WordPress translation plugin uses the Google Translation API to translate the strings on your site. This feature can be enabled or disabled according to your preferences.
43
 
125
  6. Menu Language Switcher
126
 
127
  == Changelog ==
128
+ = 1.4.1 =
129
+ * Added PHP 7.3 support
130
+ * Performance improvements
131
+
132
  = 1.4.0 =
133
  * Added Enfold compatibility by increasing the template_include hook priority
134
  * Add the costa rica flag