Postie - Version 1.6.5

Version Description

(2014.10.22) = * Fixed charset encoding bug when there wasn't a content-transfer-encoding header * Upgraded simple_html_dom

Download this release

Release Info

Developer WayneAllen
Plugin Icon 128x128 Postie
Version 1.6.5
Comparing to
See all releases

Code changes from version 1.6.4 to 1.6.5

Files changed (6) hide show
  1. docs/Changes.txt +4 -0
  2. docs/Postie.txt +1 -1
  3. postie-functions.php +60 -48
  4. postie.php +3 -3
  5. readme.txt +6 -2
  6. simple_html_dom.php +59 -39
docs/Changes.txt CHANGED
@@ -27,6 +27,10 @@ All script, style and body tags are stripped from html emails.
27
  Attachments are now processed in the order they were attached.
28
 
29
  == CHANGELOG ==
 
 
 
 
30
  = 1.6.4 (2014.10.21) =
31
  * Provide post url in success email
32
 
27
  Attachments are now processed in the order they were attached.
28
 
29
  == CHANGELOG ==
30
+ = 1.6.5 (2014.10.22) =
31
+ * Fixed charset encoding bug when there wasn't a content-transfer-encoding header
32
+ * Upgraded simple_html_dom
33
+
34
  = 1.6.4 (2014.10.21) =
35
  * Provide post url in success email
36
 
docs/Postie.txt CHANGED
@@ -6,7 +6,7 @@ Plugin URI: http://PostiePlugin.com/
6
  Tags: e-mail, email, post-by-email
7
  Requires at least: 3.0
8
  Tested up to: 4.0
9
- Stable tag: 1.6.4
10
  License: GPLv2 or later
11
  License URI: http://www.gnu.org/licenses/gpl-2.0.html
12
 
6
  Tags: e-mail, email, post-by-email
7
  Requires at least: 3.0
8
  Tested up to: 4.0
9
+ Stable tag: 1.6.5
10
  License: GPLv2 or later
11
  License URI: http://www.gnu.org/licenses/gpl-2.0.html
12
 
postie-functions.php CHANGED
@@ -1,6 +1,6 @@
1
  <?php
2
  /*
3
- $Id: postie-functions.php 1011691 2014-10-21 19:08:46Z WayneAllen $
4
  */
5
 
6
  //to turn on debug output add the following line to wp-config.php
@@ -170,7 +170,7 @@ function tag_Date(&$content, $message_date) {
170
  $es = $html->find('text');
171
  DebugEcho("tag_Date: html " . count($es));
172
  foreach ($es as $e) {
173
- DebugEcho(trim($e->plaintext));
174
  $matches = array();
175
  if (1 === preg_match("/^date:\s?(.*)$/im", trim($e->plaintext), $matches)) {
176
  DebugEcho("tag_Date: found date tag $matches[1]");
@@ -320,10 +320,13 @@ function CreatePost($poster, $mimeDecodedEmail, $post_id, &$is_reply, $config, $
320
 
321
  $id = GetParentPostForReply($subject);
322
  if (empty($id)) {
 
323
  $id = $post_id;
324
  $is_reply = false;
325
  if ($config['add_meta'] == 'yes') {
 
326
  if ($config['wrap_pre'] == 'yes') {
 
327
  $content = $postAuthorDetails['content'] . "<pre>\n" . $content . "</pre>\n";
328
  $content = "<pre>\n" . $content . "</pre>\n";
329
  } else {
@@ -332,6 +335,7 @@ function CreatePost($poster, $mimeDecodedEmail, $post_id, &$is_reply, $config, $
332
  }
333
  } else {
334
  if ($config['wrap_pre'] == 'yes') {
 
335
  $content = "<pre>\n" . $content . "</pre>\n";
336
  }
337
  }
@@ -943,8 +947,9 @@ function PostToDB($details, $isReply, $customImageField, $postmodifiers) {
943
  * @return boolean
944
  */
945
  function isBannedFileName($filename, $bannedFiles) {
946
- if (empty($filename) || empty($bannedFiles))
947
  return false;
 
948
  foreach ($bannedFiles as $bannedFile) {
949
  if (fnmatch($bannedFile, $filename)) {
950
  EchoInfo("Ignoring attachment: $filename - it is on the banned files list.");
@@ -957,7 +962,7 @@ function isBannedFileName($filename, $bannedFiles) {
957
  function GetContent($part, &$attachments, $post_id, $poster, $config) {
958
  extract($config);
959
  //global $charset, $encoding;
960
- DebugEcho('----');
961
  $meta_return = '';
962
  if (property_exists($part, "ctype_primary")) {
963
  DebugEcho("GetContent: primary= " . $part->ctype_primary . ", secondary = " . $part->ctype_secondary);
@@ -969,6 +974,7 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
969
  //look for banned file names
970
  if (property_exists($part, 'ctype_parameters') && is_array($part->ctype_parameters) && array_key_exists('name', $part->ctype_parameters))
971
  if (isBannedFileName($part->ctype_parameters['name'], $banned_files_list)) {
 
972
  return NULL;
973
  }
974
 
@@ -986,7 +992,7 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
986
  }
987
 
988
  if (property_exists($part, "ctype_primary") && $part->ctype_primary == "multipart" && $part->ctype_secondary == "appledouble") {
989
- DebugEcho("multipart appledouble");
990
  $mimeDecodedEmail = DecodeMIMEMail("Content-Type: multipart/mixed; boundary=" . $part->ctype_parameters["boundary"] . "\n" . $part->body);
991
  filter_PreferedText($mimeDecodedEmail, $prefer_text_type);
992
  filter_AppleFile($mimeDecodedEmail);
@@ -1009,10 +1015,12 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
1009
  $mimetype_primary = "";
1010
  $mimetype_secondary = "";
1011
 
1012
- if (property_exists($part, "ctype_primary"))
1013
  $mimetype_primary = strtolower($part->ctype_primary);
1014
- if (property_exists($part, "ctype_secondary"))
 
1015
  $mimetype_secondary = strtolower($part->ctype_secondary);
 
1016
 
1017
  $typeinfo = wp_check_filetype($filename);
1018
  //DebugDump($typeinfo);
@@ -1040,7 +1048,7 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
1040
 
1041
  switch ($mimetype_primary) {
1042
  case 'multipart':
1043
- DebugEcho("multipart: " . count($part->parts));
1044
  //DebugDump($part);
1045
  filter_PreferedText($part, $prefer_text_type);
1046
  foreach ($part->parts as $section) {
@@ -1050,22 +1058,22 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
1050
  break;
1051
 
1052
  case 'text':
1053
- DebugEcho("ctype_primary: text");
1054
  //DebugDump($part);
1055
 
1056
  $charset = "";
1057
  if (property_exists($part, 'ctype_parameters') && array_key_exists('charset', $part->ctype_parameters) && !empty($part->ctype_parameters['charset'])) {
1058
  $charset = $part->ctype_parameters['charset'];
1059
- DebugEcho("charset: $charset");
1060
  }
1061
 
1062
  $encoding = "";
1063
  if (array_key_exists('content-transfer-encoding', $part->headers) && !empty($part->headers['content-transfer-encoding'])) {
1064
  $encoding = $part->headers['content-transfer-encoding'];
1065
- DebugEcho("encoding: $encoding");
1066
  }
1067
 
1068
- if (array_key_exists('content-transfer-encoding', $part->headers)) {
1069
  //DebugDump($part);
1070
  $part->body = HandleMessageEncoding($encoding, $charset, $part->body, $message_encoding, $message_dequote);
1071
  if (!empty($charset)) {
@@ -1074,48 +1082,48 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
1074
  //DebugDump($part);
1075
  }
1076
  if (array_key_exists('disposition', $part) && $part->disposition == 'attachment') {
1077
- DebugEcho("text Attachement: $filename");
1078
  if (!preg_match('/ATT\d\d\d\d\d.txt/i', $filename)) {
1079
  $file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
1080
  if (!is_wp_error($file_id)) {
1081
  $file = wp_get_attachment_url($file_id);
1082
  $icon = chooseAttachmentIcon($file, $mimetype_primary, $mimetype_secondary, $icon_set, $icon_size);
1083
  $attachments["html"][$filename] = "<a href='$file'>" . $icon . $filename . '</a>' . "\n";
1084
- DebugEcho("text attachment: adding '$filename'");
1085
  } else {
1086
  LogInfo($file_id->get_error_message());
1087
  }
1088
  } else {
1089
- DebugEcho("text attachment: skipping '$filename'");
1090
  }
1091
  } else {
1092
 
1093
  //go through each sub-section
1094
  if ($mimetype_secondary == 'enriched') {
1095
  //convert enriched text to HTML
1096
- DebugEcho("enriched");
1097
  $meta_return .= filter_Etf2HTML($part->body) . "\n";
1098
  } elseif ($mimetype_secondary == 'html') {
1099
  //strip excess HTML
1100
- DebugEcho("html");
1101
  $meta_return .= filter_CleanHtml($part->body) . "\n";
1102
  } elseif ($mimetype_secondary == 'plain') {
1103
- DebugEcho("plain text");
1104
  //DebugDump($part);
1105
 
1106
- DebugEcho("body text");
1107
  if ($allow_html_in_body) {
1108
- DebugEcho("html allowed");
1109
  $meta_return .= $part->body;
1110
  //$meta_return = "<div>$meta_return</div>\n";
1111
  } else {
1112
- DebugEcho("html not allowed (htmlentities)");
1113
  $meta_return .= htmlentities($part->body);
1114
  }
1115
  $meta_return = filter_StripPGP($meta_return);
1116
  //DebugEcho("meta return: $meta_return");
1117
  } else {
1118
- DebugEcho("text Attachement wo disposition: $filename");
1119
  $file_id = postie_media_handle_upload($part, $post_id, $poster);
1120
  if (!is_wp_error($file_id)) {
1121
  $file = wp_get_attachment_url($file_id);
@@ -1129,30 +1137,30 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
1129
  break;
1130
 
1131
  case 'image':
1132
- DebugEcho("image Attachement: $filename");
1133
  $file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
1134
  if (!is_wp_error($file_id)) {
1135
  //featured image logic
1136
  //set the first image we come across as the featured image
1137
- DebugEcho("has_post_thumbnail: " . has_post_thumbnail($post_id));
1138
  //DebugEcho("get_the_post_thumbnail: " .get_the_post_thumbnail($post_id));
1139
 
1140
  if ($featured_image && !has_post_thumbnail($post_id)) {
1141
- DebugEcho("featured image: $file_id");
1142
  set_post_thumbnail($post_id, $file_id);
1143
  }
1144
  $file = wp_get_attachment_url($file_id);
1145
  $cid = "";
1146
  if (array_key_exists('content-id', $part->headers)) {
1147
  $cid = trim($part->headers["content-id"], "<>");
1148
- DebugEcho("found cid: $cid");
1149
  }
1150
 
1151
  $the_post = get_post($file_id);
1152
  $attachments["html"][$filename] = parseTemplate($file_id, $mimetype_primary, $imagetemplate, $filename);
1153
  if ($cid) {
1154
  $attachments["cids"][$cid] = array($file, count($attachments["html"]) - 1);
1155
- DebugEcho("CID Attachement: $cid");
1156
  }
1157
  } else {
1158
  LogInfo("image error: " . $file_id->get_error_message());
@@ -1161,20 +1169,20 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
1161
 
1162
  case 'audio':
1163
  //DebugDump($part->headers);
1164
- DebugEcho("audio Attachement: $filename");
1165
  $file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
1166
  if (!is_wp_error($file_id)) {
1167
  $file = wp_get_attachment_url($file_id);
1168
  $cid = "";
1169
  if (array_key_exists('content-id', $part->headers)) {
1170
  $cid = trim($part->headers["content-id"], "<>");
1171
- DebugEcho("audio Attachement cid: $cid");
1172
  }
1173
  if (in_array($fileext, $audiotypes)) {
1174
- DebugEcho("using audio template: $mimetype_secondary");
1175
  $audioTemplate = $audiotemplate;
1176
  } else {
1177
- DebugEcho("using default audio template: $mimetype_secondary");
1178
  $icon = chooseAttachmentIcon($file, $mimetype_primary, $mimetype_secondary, $icon_set, $icon_size);
1179
  $audioTemplate = '<a href="{FILELINK}">' . $icon . '{FILENAME}</a>';
1180
  }
@@ -1185,24 +1193,24 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
1185
  break;
1186
 
1187
  case 'video':
1188
- DebugEcho("video Attachement: $filename");
1189
  $file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
1190
  if (!is_wp_error($file_id)) {
1191
  $file = wp_get_attachment_url($file_id);
1192
  $cid = "";
1193
  if (array_key_exists('content-id', $part->headers)) {
1194
  $cid = trim($part->headers["content-id"], "<>");
1195
- DebugEcho("video Attachement cid: $cid");
1196
  }
1197
  //DebugDump($part);
1198
  if (in_array($fileext, $video1types)) {
1199
- DebugEcho("using video1 template: $fileext");
1200
  $videoTemplate = $video1template;
1201
  } elseif (in_array($fileext, $video2types)) {
1202
- DebugEcho("using video2 template: $fileext");
1203
  $videoTemplate = $video2template;
1204
  } else {
1205
- DebugEcho("using default template: $fileext");
1206
  $icon = chooseAttachmentIcon($file, $mimetype_primary, $mimetype_secondary, $icon_set, $icon_size);
1207
  $videoTemplate = '<a href="{FILELINK}">' . $icon . '{FILENAME}</a>';
1208
  }
@@ -1214,19 +1222,19 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
1214
  break;
1215
 
1216
  default:
1217
- DebugEcho("found file type: " . $mimetype_primary);
1218
  if (in_array($mimetype_primary, $supported_file_types)) {
1219
  //pgp signature - then forget it
1220
  if ($mimetype_secondary == 'pgp-signature') {
1221
- DebugEcho("found pgp-signature - done");
1222
  break;
1223
  }
1224
  $file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
1225
  if (!is_wp_error($file_id)) {
1226
  $file = wp_get_attachment_url($file_id);
1227
- DebugEcho("uploaded $file_id ($file)");
1228
  $icon = chooseAttachmentIcon($file, $mimetype_primary, $mimetype_secondary, $icon_set, $icon_size);
1229
- DebugEcho("default: $icon $filename");
1230
  $attachments["html"][$filename] = parseTemplate($file_id, $mimetype_primary, $generaltemplate, $filename, $icon);
1231
  if (array_key_exists('content-id', $part->headers)) {
1232
  $cid = trim($part->headers["content-id"], "<>");
@@ -1234,20 +1242,20 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
1234
  $attachments["cids"][$cid] = array($file, count($attachments["html"]) - 1);
1235
  }
1236
  } else {
1237
- DebugEcho("No content-id");
1238
  }
1239
  } else {
1240
  LogInfo($file_id->get_error_message());
1241
  }
1242
  } else {
1243
- DebugEcho("Not in supported filetype list");
1244
  DebugDump($supported_file_types);
1245
  }
1246
  break;
1247
  }
1248
  }
1249
- DebugEcho("meta_return: " . substr($meta_return, 0, 500));
1250
- DebugEcho("====");
1251
  return $meta_return;
1252
  }
1253
 
@@ -1327,19 +1335,22 @@ function filter_Etf2HTML($content) {
1327
  function filter_CleanHtml($content) {
1328
  $html = str_get_html($content);
1329
  if ($html) {
1330
- DebugEcho("Looking for invalid tags");
1331
  foreach ($html->find('script, style, head') as $node) {
1332
- DebugEcho("Removing: " . $node->outertext);
1333
  $node->outertext = '';
1334
  }
 
 
1335
  $html->load($html->save());
1336
 
1337
  $b = $html->find('body');
1338
  if ($b) {
 
1339
  $content = "<div>" . $b[0]->innertext . "</div>\n";
1340
  }
1341
  } else {
1342
- DebugEcho("No HTML found");
1343
  }
1344
  return $content;
1345
  }
@@ -1561,7 +1572,7 @@ function filter_End(&$content, $config) {
1561
  //filter content for new lines
1562
  function filter_Newlines(&$content, $config) {
1563
  if ($config['filternewlines']) {
1564
-
1565
  $search = array(
1566
  "/\r\n/",
1567
  "/\n\n/",
@@ -1579,6 +1590,7 @@ function filter_Newlines(&$content, $config) {
1579
 
1580
  $result = preg_replace($search, $replace, $content);
1581
 
 
1582
  if ($config['convertnewline']) {
1583
  $content = preg_replace('/(LINEBREAK)/', "<br />\n", $result);
1584
  } else {
1
  <?php
2
  /*
3
+ $Id: postie-functions.php 1012303 2014-10-22 18:57:48Z WayneAllen $
4
  */
5
 
6
  //to turn on debug output add the following line to wp-config.php
170
  $es = $html->find('text');
171
  DebugEcho("tag_Date: html " . count($es));
172
  foreach ($es as $e) {
173
+ //DebugEcho("tag_Date: ".trim($e->plaintext));
174
  $matches = array();
175
  if (1 === preg_match("/^date:\s?(.*)$/im", trim($e->plaintext), $matches)) {
176
  DebugEcho("tag_Date: found date tag $matches[1]");
320
 
321
  $id = GetParentPostForReply($subject);
322
  if (empty($id)) {
323
+ DebugEcho("Not a reply");
324
  $id = $post_id;
325
  $is_reply = false;
326
  if ($config['add_meta'] == 'yes') {
327
+ DebugEcho("Adding meta");
328
  if ($config['wrap_pre'] == 'yes') {
329
+ DebugEcho("Adding <pre>");
330
  $content = $postAuthorDetails['content'] . "<pre>\n" . $content . "</pre>\n";
331
  $content = "<pre>\n" . $content . "</pre>\n";
332
  } else {
335
  }
336
  } else {
337
  if ($config['wrap_pre'] == 'yes') {
338
+ DebugEcho("Adding <pre>");
339
  $content = "<pre>\n" . $content . "</pre>\n";
340
  }
341
  }
947
  * @return boolean
948
  */
949
  function isBannedFileName($filename, $bannedFiles) {
950
+ if (empty($filename) || empty($bannedFiles)) {
951
  return false;
952
+ }
953
  foreach ($bannedFiles as $bannedFile) {
954
  if (fnmatch($bannedFile, $filename)) {
955
  EchoInfo("Ignoring attachment: $filename - it is on the banned files list.");
962
  function GetContent($part, &$attachments, $post_id, $poster, $config) {
963
  extract($config);
964
  //global $charset, $encoding;
965
+ DebugEcho('GetContent: ---- start');
966
  $meta_return = '';
967
  if (property_exists($part, "ctype_primary")) {
968
  DebugEcho("GetContent: primary= " . $part->ctype_primary . ", secondary = " . $part->ctype_secondary);
974
  //look for banned file names
975
  if (property_exists($part, 'ctype_parameters') && is_array($part->ctype_parameters) && array_key_exists('name', $part->ctype_parameters))
976
  if (isBannedFileName($part->ctype_parameters['name'], $banned_files_list)) {
977
+ DebugEcho("GetContent: found banned filename");
978
  return NULL;
979
  }
980
 
992
  }
993
 
994
  if (property_exists($part, "ctype_primary") && $part->ctype_primary == "multipart" && $part->ctype_secondary == "appledouble") {
995
+ DebugEcho("GetContent: multipart appledouble");
996
  $mimeDecodedEmail = DecodeMIMEMail("Content-Type: multipart/mixed; boundary=" . $part->ctype_parameters["boundary"] . "\n" . $part->body);
997
  filter_PreferedText($mimeDecodedEmail, $prefer_text_type);
998
  filter_AppleFile($mimeDecodedEmail);
1015
  $mimetype_primary = "";
1016
  $mimetype_secondary = "";
1017
 
1018
+ if (property_exists($part, "ctype_primary")) {
1019
  $mimetype_primary = strtolower($part->ctype_primary);
1020
+ }
1021
+ if (property_exists($part, "ctype_secondary")) {
1022
  $mimetype_secondary = strtolower($part->ctype_secondary);
1023
+ }
1024
 
1025
  $typeinfo = wp_check_filetype($filename);
1026
  //DebugDump($typeinfo);
1048
 
1049
  switch ($mimetype_primary) {
1050
  case 'multipart':
1051
+ DebugEcho("GetContent: multipart: " . count($part->parts));
1052
  //DebugDump($part);
1053
  filter_PreferedText($part, $prefer_text_type);
1054
  foreach ($part->parts as $section) {
1058
  break;
1059
 
1060
  case 'text':
1061
+ DebugEcho("GetContent: ctype_primary: text");
1062
  //DebugDump($part);
1063
 
1064
  $charset = "";
1065
  if (property_exists($part, 'ctype_parameters') && array_key_exists('charset', $part->ctype_parameters) && !empty($part->ctype_parameters['charset'])) {
1066
  $charset = $part->ctype_parameters['charset'];
1067
+ DebugEcho("GetContent: text charset: $charset");
1068
  }
1069
 
1070
  $encoding = "";
1071
  if (array_key_exists('content-transfer-encoding', $part->headers) && !empty($part->headers['content-transfer-encoding'])) {
1072
  $encoding = $part->headers['content-transfer-encoding'];
1073
+ DebugEcho("GetContent: text encoding: $encoding");
1074
  }
1075
 
1076
+ if ($charset !== '' || $encoding !== '') {
1077
  //DebugDump($part);
1078
  $part->body = HandleMessageEncoding($encoding, $charset, $part->body, $message_encoding, $message_dequote);
1079
  if (!empty($charset)) {
1082
  //DebugDump($part);
1083
  }
1084
  if (array_key_exists('disposition', $part) && $part->disposition == 'attachment') {
1085
+ DebugEcho("GetContent: text Attachement: $filename");
1086
  if (!preg_match('/ATT\d\d\d\d\d.txt/i', $filename)) {
1087
  $file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
1088
  if (!is_wp_error($file_id)) {
1089
  $file = wp_get_attachment_url($file_id);
1090
  $icon = chooseAttachmentIcon($file, $mimetype_primary, $mimetype_secondary, $icon_set, $icon_size);
1091
  $attachments["html"][$filename] = "<a href='$file'>" . $icon . $filename . '</a>' . "\n";
1092
+ DebugEcho("GetContent: text attachment: adding '$filename'");
1093
  } else {
1094
  LogInfo($file_id->get_error_message());
1095
  }
1096
  } else {
1097
+ DebugEcho("GetContent: text attachment: skipping '$filename'");
1098
  }
1099
  } else {
1100
 
1101
  //go through each sub-section
1102
  if ($mimetype_secondary == 'enriched') {
1103
  //convert enriched text to HTML
1104
+ DebugEcho("GetContent: enriched");
1105
  $meta_return .= filter_Etf2HTML($part->body) . "\n";
1106
  } elseif ($mimetype_secondary == 'html') {
1107
  //strip excess HTML
1108
+ DebugEcho("GetContent: html");
1109
  $meta_return .= filter_CleanHtml($part->body) . "\n";
1110
  } elseif ($mimetype_secondary == 'plain') {
1111
+ DebugEcho("GetContent: plain text");
1112
  //DebugDump($part);
1113
 
1114
+ DebugEcho("GetContent: body text");
1115
  if ($allow_html_in_body) {
1116
+ DebugEcho("GetContent: html allowed");
1117
  $meta_return .= $part->body;
1118
  //$meta_return = "<div>$meta_return</div>\n";
1119
  } else {
1120
+ DebugEcho("GetContent: html not allowed (htmlentities)");
1121
  $meta_return .= htmlentities($part->body);
1122
  }
1123
  $meta_return = filter_StripPGP($meta_return);
1124
  //DebugEcho("meta return: $meta_return");
1125
  } else {
1126
+ DebugEcho("GetContent: text Attachement wo disposition: $filename");
1127
  $file_id = postie_media_handle_upload($part, $post_id, $poster);
1128
  if (!is_wp_error($file_id)) {
1129
  $file = wp_get_attachment_url($file_id);
1137
  break;
1138
 
1139
  case 'image':
1140
+ DebugEcho("GetContent: image Attachement: $filename");
1141
  $file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
1142
  if (!is_wp_error($file_id)) {
1143
  //featured image logic
1144
  //set the first image we come across as the featured image
1145
+ DebugEcho("GetContent: has_post_thumbnail: " . has_post_thumbnail($post_id));
1146
  //DebugEcho("get_the_post_thumbnail: " .get_the_post_thumbnail($post_id));
1147
 
1148
  if ($featured_image && !has_post_thumbnail($post_id)) {
1149
+ DebugEcho("GetContent: featured image: $file_id");
1150
  set_post_thumbnail($post_id, $file_id);
1151
  }
1152
  $file = wp_get_attachment_url($file_id);
1153
  $cid = "";
1154
  if (array_key_exists('content-id', $part->headers)) {
1155
  $cid = trim($part->headers["content-id"], "<>");
1156
+ DebugEcho("GetContent: found cid: $cid");
1157
  }
1158
 
1159
  $the_post = get_post($file_id);
1160
  $attachments["html"][$filename] = parseTemplate($file_id, $mimetype_primary, $imagetemplate, $filename);
1161
  if ($cid) {
1162
  $attachments["cids"][$cid] = array($file, count($attachments["html"]) - 1);
1163
+ DebugEcho("GetContent: CID Attachement: $cid");
1164
  }
1165
  } else {
1166
  LogInfo("image error: " . $file_id->get_error_message());
1169
 
1170
  case 'audio':
1171
  //DebugDump($part->headers);
1172
+ DebugEcho("GetContent: audio Attachement: $filename");
1173
  $file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
1174
  if (!is_wp_error($file_id)) {
1175
  $file = wp_get_attachment_url($file_id);
1176
  $cid = "";
1177
  if (array_key_exists('content-id', $part->headers)) {
1178
  $cid = trim($part->headers["content-id"], "<>");
1179
+ DebugEcho("GetContent: audio Attachement cid: $cid");
1180
  }
1181
  if (in_array($fileext, $audiotypes)) {
1182
+ DebugEcho("GetContent: using audio template: $mimetype_secondary");
1183
  $audioTemplate = $audiotemplate;
1184
  } else {
1185
+ DebugEcho("GetContent: using default audio template: $mimetype_secondary");
1186
  $icon = chooseAttachmentIcon($file, $mimetype_primary, $mimetype_secondary, $icon_set, $icon_size);
1187
  $audioTemplate = '<a href="{FILELINK}">' . $icon . '{FILENAME}</a>';
1188
  }
1193
  break;
1194
 
1195
  case 'video':
1196
+ DebugEcho("GetContent: video Attachement: $filename");
1197
  $file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
1198
  if (!is_wp_error($file_id)) {
1199
  $file = wp_get_attachment_url($file_id);
1200
  $cid = "";
1201
  if (array_key_exists('content-id', $part->headers)) {
1202
  $cid = trim($part->headers["content-id"], "<>");
1203
+ DebugEcho("GetContent: video Attachement cid: $cid");
1204
  }
1205
  //DebugDump($part);
1206
  if (in_array($fileext, $video1types)) {
1207
+ DebugEcho("GetContent: using video1 template: $fileext");
1208
  $videoTemplate = $video1template;
1209
  } elseif (in_array($fileext, $video2types)) {
1210
+ DebugEcho("GetContent: using video2 template: $fileext");
1211
  $videoTemplate = $video2template;
1212
  } else {
1213
+ DebugEcho("GetContent: using default template: $fileext");
1214
  $icon = chooseAttachmentIcon($file, $mimetype_primary, $mimetype_secondary, $icon_set, $icon_size);
1215
  $videoTemplate = '<a href="{FILELINK}">' . $icon . '{FILENAME}</a>';
1216
  }
1222
  break;
1223
 
1224
  default:
1225
+ DebugEcho("GetContent: found file type: " . $mimetype_primary);
1226
  if (in_array($mimetype_primary, $supported_file_types)) {
1227
  //pgp signature - then forget it
1228
  if ($mimetype_secondary == 'pgp-signature') {
1229
+ DebugEcho("GetContent: found pgp-signature - done");
1230
  break;
1231
  }
1232
  $file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
1233
  if (!is_wp_error($file_id)) {
1234
  $file = wp_get_attachment_url($file_id);
1235
+ DebugEcho("GetContent: uploaded $file_id ($file)");
1236
  $icon = chooseAttachmentIcon($file, $mimetype_primary, $mimetype_secondary, $icon_set, $icon_size);
1237
+ DebugEcho("GetContent: default: $icon $filename");
1238
  $attachments["html"][$filename] = parseTemplate($file_id, $mimetype_primary, $generaltemplate, $filename, $icon);
1239
  if (array_key_exists('content-id', $part->headers)) {
1240
  $cid = trim($part->headers["content-id"], "<>");
1242
  $attachments["cids"][$cid] = array($file, count($attachments["html"]) - 1);
1243
  }
1244
  } else {
1245
+ DebugEcho("GetContent: No content-id");
1246
  }
1247
  } else {
1248
  LogInfo($file_id->get_error_message());
1249
  }
1250
  } else {
1251
+ DebugEcho("GetContent: Not in supported filetype list");
1252
  DebugDump($supported_file_types);
1253
  }
1254
  break;
1255
  }
1256
  }
1257
+ DebugEcho("GetContent: meta_return: " . $meta_return);
1258
+ DebugEcho("GetContent: ==== end");
1259
  return $meta_return;
1260
  }
1261
 
1335
  function filter_CleanHtml($content) {
1336
  $html = str_get_html($content);
1337
  if ($html) {
1338
+ DebugEcho("filter_CleanHtml: Looking for invalid tags");
1339
  foreach ($html->find('script, style, head') as $node) {
1340
+ DebugEcho("filter_CleanHtml: Removing: " . $node->outertext);
1341
  $node->outertext = '';
1342
  }
1343
+ DebugEcho("filter_CleanHtml: " . $html->save());
1344
+
1345
  $html->load($html->save());
1346
 
1347
  $b = $html->find('body');
1348
  if ($b) {
1349
+ DebugEcho("filter_CleanHtml: replacing body with div");
1350
  $content = "<div>" . $b[0]->innertext . "</div>\n";
1351
  }
1352
  } else {
1353
+ DebugEcho("filter_CleanHtml: No HTML found");
1354
  }
1355
  return $content;
1356
  }
1572
  //filter content for new lines
1573
  function filter_Newlines(&$content, $config) {
1574
  if ($config['filternewlines']) {
1575
+ DebugEcho("filter_Newlines: filternewlines");
1576
  $search = array(
1577
  "/\r\n/",
1578
  "/\n\n/",
1590
 
1591
  $result = preg_replace($search, $replace, $content);
1592
 
1593
+ DebugEcho("filter_Newlines: convertnewline: " . $config['convertnewline']);
1594
  if ($config['convertnewline']) {
1595
  $content = preg_replace('/(LINEBREAK)/', "<br />\n", $result);
1596
  } else {
postie.php CHANGED
@@ -4,7 +4,7 @@
4
  Plugin Name: Postie
5
  Plugin URI: http://PostiePlugin.com/
6
  Description: Create posts via email. Signifigantly upgrades the Post by Email features of Word Press.
7
- Version: 1.6.4
8
  Author: Wayne Allen
9
  Author URI: http://allens-home.com/
10
  License: GPL2
@@ -27,11 +27,11 @@
27
  */
28
 
29
  /*
30
- $Id: postie.php 1011696 2014-10-21 19:11:48Z WayneAllen $
31
  */
32
  require_once(dirname(__FILE__) . DIRECTORY_SEPARATOR . "postie-functions.php");
33
 
34
- define('POSTIE_VERSION', '1.6.4');
35
  define("POSTIE_ROOT", dirname(__FILE__));
36
  define("POSTIE_URL", WP_PLUGIN_URL . '/' . basename(dirname(__FILE__)));
37
 
4
  Plugin Name: Postie
5
  Plugin URI: http://PostiePlugin.com/
6
  Description: Create posts via email. Signifigantly upgrades the Post by Email features of Word Press.
7
+ Version: 1.6.5
8
  Author: Wayne Allen
9
  Author URI: http://allens-home.com/
10
  License: GPL2
27
  */
28
 
29
  /*
30
+ $Id: postie.php 1012303 2014-10-22 18:57:48Z WayneAllen $
31
  */
32
  require_once(dirname(__FILE__) . DIRECTORY_SEPARATOR . "postie-functions.php");
33
 
34
+ define('POSTIE_VERSION', '1.6.5');
35
  define("POSTIE_ROOT", dirname(__FILE__));
36
  define("POSTIE_URL", WP_PLUGIN_URL . '/' . basename(dirname(__FILE__)));
37
 
readme.txt CHANGED
@@ -6,7 +6,7 @@ Plugin URI: http://PostiePlugin.com/
6
  Tags: e-mail, email, post-by-email
7
  Requires at least: 3.0
8
  Tested up to: 4.0
9
- Stable tag: 1.6.4
10
  License: GPLv2 or later
11
  License URI: http://www.gnu.org/licenses/gpl-2.0.html
12
 
@@ -238,7 +238,11 @@ All script, style and body tags are stripped from html emails.
238
  Attachments are now processed in the order they were attached.
239
 
240
  == CHANGELOG ==
241
- = 1.6.4 (future) =
 
 
 
 
242
  * Provide post url in success email
243
 
244
  = 1.6.3 (2014.10.03) =
6
  Tags: e-mail, email, post-by-email
7
  Requires at least: 3.0
8
  Tested up to: 4.0
9
+ Stable tag: 1.6.5
10
  License: GPLv2 or later
11
  License URI: http://www.gnu.org/licenses/gpl-2.0.html
12
 
238
  Attachments are now processed in the order they were attached.
239
 
240
  == CHANGELOG ==
241
+ = 1.6.5 (2014.10.22) =
242
+ * Fixed charset encoding bug when there wasn't a content-transfer-encoding header
243
+ * Upgraded simple_html_dom
244
+
245
+ = 1.6.4 (2014.10.21) =
246
  * Provide post url in success email
247
 
248
  = 1.6.3 (2014.10.03) =
simple_html_dom.php CHANGED
@@ -34,7 +34,7 @@
34
  * @author S.C. Chen <me578022@gmail.com>
35
  * @author John Schlick
36
  * @author Rus Carroll
37
- * @version 1.5 ($Rev: 202 $)
38
  * @package PlaceLocalInclude
39
  * @subpackage simple_html_dom
40
  */
@@ -269,7 +269,10 @@ class simple_html_dom_node
269
  {
270
  return $this->children;
271
  }
272
- if (isset($this->children[$idx])) return $this->children[$idx];
 
 
 
273
  return null;
274
  }
275
 
@@ -330,14 +333,14 @@ class simple_html_dom_node
330
  function find_ancestor_tag($tag)
331
  {
332
  global $debug_object;
333
- if (is_object($debug_object)) { $debug_object->debugLogEntry(1); }
334
 
335
  // Start by including ourselves in the comparison.
336
  $returnDom = $this;
337
 
338
  while (!is_null($returnDom))
339
  {
340
- if (is_object($debug_object)) { $debug_object->debugLog(2, "Current tag is: " . $returnDom->tag); }
341
 
342
  if ($returnDom->tag == $tag)
343
  {
@@ -374,7 +377,7 @@ class simple_html_dom_node
374
  $text = " with text: " . $this->text;
375
  }
376
  }
377
- $debug_object->debugLog(1, 'Innertext of tag: ' . $this->tag . $text);
378
  }
379
 
380
  if ($this->tag==='root') return $this->innertext();
@@ -532,7 +535,9 @@ class simple_html_dom_node
532
  foreach ($head as $k=>$v)
533
  {
534
  if (!isset($found_keys[$k]))
 
535
  $found_keys[$k] = 1;
 
536
  }
537
  }
538
 
@@ -554,7 +559,7 @@ class simple_html_dom_node
554
  protected function seek($selector, &$ret, $lowercase=false)
555
  {
556
  global $debug_object;
557
- if (is_object($debug_object)) { $debug_object->debugLogEntry(1); }
558
 
559
  list($tag, $key, $val, $exp, $no_key) = $selector;
560
 
@@ -615,7 +620,7 @@ class simple_html_dom_node
615
  // this is a normal search, we want the value of that attribute of the tag.
616
  $nodeKeyValue = $node->attr[$key];
617
  }
618
- if (is_object($debug_object)) {$debug_object->debugLog(2, "testing node: " . $node->tag . " for attribute: " . $key . $exp . $val . " where nodes value is: " . $nodeKeyValue);}
619
 
620
  //PaperG - If lowercase is set, do a case insensitive test of the value of the selector.
621
  if ($lowercase) {
@@ -623,7 +628,7 @@ class simple_html_dom_node
623
  } else {
624
  $check = $this->match($exp, $val, $nodeKeyValue);
625
  }
626
- if (is_object($debug_object)) {$debug_object->debugLog(2, "after match: " . ($check ? "true" : "false"));}
627
 
628
  // handle multiple class
629
  if (!$check && strcasecmp($key, 'class')===0) {
@@ -645,12 +650,12 @@ class simple_html_dom_node
645
  unset($node);
646
  }
647
  // It's passed by reference so this is actually what this function returns.
648
- if (is_object($debug_object)) {$debug_object->debugLog(1, "EXIT - ret: ", $ret);}
649
  }
650
 
651
  protected function match($exp, $pattern, $value) {
652
  global $debug_object;
653
- if (is_object($debug_object)) {$debug_object->debugLogEntry(1);}
654
 
655
  switch ($exp) {
656
  case '=':
@@ -672,7 +677,7 @@ class simple_html_dom_node
672
 
673
  protected function parse_selector($selector_string) {
674
  global $debug_object;
675
- if (is_object($debug_object)) {$debug_object->debugLogEntry(1);}
676
 
677
  // pattern of CSS selectors, modified from mootools
678
  // Paperg: Add the colon to the attrbute, so that it properly finds <tag attr:ibute="something" > like google does.
@@ -683,7 +688,7 @@ class simple_html_dom_node
683
  // $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
684
  $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
685
  preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER);
686
- if (is_object($debug_object)) {$debug_object->debugLog(2, "Matches Array: ", $matches);}
687
 
688
  $selectors = array();
689
  $result = array();
@@ -718,12 +723,14 @@ class simple_html_dom_node
718
  return $selectors;
719
  }
720
 
721
- function __get($name) {
 
722
  if (isset($this->attr[$name]))
723
  {
724
  return $this->convert_text($this->attr[$name]);
725
  }
726
- switch ($name) {
 
727
  case 'outertext': return $this->outertext();
728
  case 'innertext': return $this->innertext();
729
  case 'plaintext': return $this->text();
@@ -732,22 +739,30 @@ class simple_html_dom_node
732
  }
733
  }
734
 
735
- function __set($name, $value) {
736
- switch ($name) {
 
 
 
 
 
737
  case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value;
738
  case 'innertext':
739
  if (isset($this->_[HDOM_INFO_TEXT])) return $this->_[HDOM_INFO_TEXT] = $value;
740
  return $this->_[HDOM_INFO_INNER] = $value;
741
  }
742
- if (!isset($this->attr[$name])) {
 
743
  $this->_[HDOM_INFO_SPACE][] = array(' ', '', '');
744
  $this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE;
745
  }
746
  $this->attr[$name] = $value;
747
  }
748
 
749
- function __isset($name) {
750
- switch ($name) {
 
 
751
  case 'outertext': return true;
752
  case 'innertext': return true;
753
  case 'plaintext': return true;
@@ -765,7 +780,7 @@ class simple_html_dom_node
765
  function convert_text($text)
766
  {
767
  global $debug_object;
768
- if (is_object($debug_object)) {$debug_object->debugLogEntry(1);}
769
 
770
  $converted_text = $text;
771
 
@@ -777,7 +792,7 @@ class simple_html_dom_node
777
  $sourceCharset = strtoupper($this->dom->_charset);
778
  $targetCharset = strtoupper($this->dom->_target_charset);
779
  }
780
- if (is_object($debug_object)) {$debug_object->debugLog(3, "source charset: " . $sourceCharset . " target charaset: " . $targetCharset);}
781
 
782
  if (!empty($sourceCharset) && !empty($targetCharset) && (strcasecmp($sourceCharset, $targetCharset) != 0))
783
  {
@@ -1045,10 +1060,10 @@ class simple_html_dom
1045
 
1046
  // prepare
1047
  $this->prepare($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText);
1048
- // strip out comments
1049
- $this->remove_noise("'<!--(.*?)-->'is");
1050
  // strip out cdata
1051
  $this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is", true);
 
 
1052
  // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037
1053
  // Script tags removal now preceeds style tag removal.
1054
  // strip out <script> tags
@@ -1198,22 +1213,22 @@ class simple_html_dom
1198
  if ($success)
1199
  {
1200
  $charset = $matches[1];
1201
- if (is_object($debug_object)) {$debug_object->debugLog(2, 'header content-type found charset of: ' . $charset);}
1202
  }
1203
 
1204
  }
1205
 
1206
  if (empty($charset))
1207
  {
1208
- $el = $this->root->find('meta[http-equiv=Content-Type]',0);
1209
  if (!empty($el))
1210
  {
1211
  $fullvalue = $el->content;
1212
- if (is_object($debug_object)) {$debug_object->debugLog(2, 'meta content-type tag found' . $fullvalue);}
1213
 
1214
  if (!empty($fullvalue))
1215
  {
1216
- $success = preg_match('/charset=(.+)/', $fullvalue, $matches);
1217
  if ($success)
1218
  {
1219
  $charset = $matches[1];
@@ -1221,7 +1236,7 @@ class simple_html_dom
1221
  else
1222
  {
1223
  // If there is a meta tag, and they don't specify the character set, research says that it's typically ISO-8859-1
1224
- if (is_object($debug_object)) {$debug_object->debugLog(2, 'meta content-type tag couldn\'t be parsed. using iso-8859 default.');}
1225
  $charset = 'ISO-8859-1';
1226
  }
1227
  }
@@ -1231,14 +1246,19 @@ class simple_html_dom
1231
  // If we couldn't find a charset above, then lets try to detect one based on the text we got...
1232
  if (empty($charset))
1233
  {
1234
- // Have php try to detect the encoding from the text given to us.
1235
- $charset = mb_detect_encoding($this->root->plaintext . "ascii", $encoding_list = array( "UTF-8", "CP1252" ) );
1236
- if (is_object($debug_object)) {$debug_object->debugLog(2, 'mb_detect found: ' . $charset);}
 
 
 
 
 
1237
 
1238
  // and if this doesn't work... then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need...
1239
  if ($charset === false)
1240
  {
1241
- if (is_object($debug_object)) {$debug_object->debugLog(2, 'since mb_detect failed - using default of utf-8');}
1242
  $charset = 'UTF-8';
1243
  }
1244
  }
@@ -1246,11 +1266,11 @@ class simple_html_dom
1246
  // Since CP1252 is a superset, if we get one of it's subsets, we want it instead.
1247
  if ((strtolower($charset) == strtolower('ISO-8859-1')) || (strtolower($charset) == strtolower('Latin1')) || (strtolower($charset) == strtolower('Latin-1')))
1248
  {
1249
- if (is_object($debug_object)) {$debug_object->debugLog(2, 'replacing ' . $charset . ' with CP1252 as its a superset');}
1250
  $charset = 'CP1252';
1251
  }
1252
 
1253
- if (is_object($debug_object)) {$debug_object->debugLog(1, 'EXIT - ' . $charset);}
1254
 
1255
  return $this->_charset = $charset;
1256
  }
@@ -1616,14 +1636,14 @@ class simple_html_dom
1616
  protected function remove_noise($pattern, $remove_tag=false)
1617
  {
1618
  global $debug_object;
1619
- if (is_object($debug_object)) { $debug_object->debugLogEntry(1); }
1620
 
1621
  $count = preg_match_all($pattern, $this->doc, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE);
1622
 
1623
  for ($i=$count-1; $i>-1; --$i)
1624
  {
1625
  $key = '___noise___'.sprintf('% 5d', count($this->noise)+1000);
1626
- if (is_object($debug_object)) { $debug_object->debugLog(2, 'key is: ' . $key); }
1627
  $idx = ($remove_tag) ? 0 : 1;
1628
  $this->noise[$key] = $matches[$i][$idx][0];
1629
  $this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
@@ -1641,7 +1661,7 @@ class simple_html_dom
1641
  function restore_noise($text)
1642
  {
1643
  global $debug_object;
1644
- if (is_object($debug_object)) { $debug_object->debugLogEntry(1); }
1645
 
1646
  while (($pos=strpos($text, '___noise___'))!==false)
1647
  {
@@ -1649,7 +1669,7 @@ class simple_html_dom
1649
  if (strlen($text) > $pos+15)
1650
  {
1651
  $key = '___noise___'.$text[$pos+11].$text[$pos+12].$text[$pos+13].$text[$pos+14].$text[$pos+15];
1652
- if (is_object($debug_object)) { $debug_object->debugLog(2, 'located key of: ' . $key); }
1653
 
1654
  if (isset($this->noise[$key]))
1655
  {
@@ -1674,7 +1694,7 @@ class simple_html_dom
1674
  function search_noise($text)
1675
  {
1676
  global $debug_object;
1677
- if (is_object($debug_object)) { $debug_object->debugLogEntry(1); }
1678
 
1679
  foreach($this->noise as $noiseElement)
1680
  {
34
  * @author S.C. Chen <me578022@gmail.com>
35
  * @author John Schlick
36
  * @author Rus Carroll
37
+ * @version 1.5 ($Rev: 210 $)
38
  * @package PlaceLocalInclude
39
  * @subpackage simple_html_dom
40
  */
269
  {
270
  return $this->children;
271
  }
272
+ if (isset($this->children[$idx]))
273
+ {
274
+ return $this->children[$idx];
275
+ }
276
  return null;
277
  }
278
 
333
  function find_ancestor_tag($tag)
334
  {
335
  global $debug_object;
336
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
337
 
338
  // Start by including ourselves in the comparison.
339
  $returnDom = $this;
340
 
341
  while (!is_null($returnDom))
342
  {
343
+ if (is_object($debug_object)) { $debug_object->debug_log(2, "Current tag is: " . $returnDom->tag); }
344
 
345
  if ($returnDom->tag == $tag)
346
  {
377
  $text = " with text: " . $this->text;
378
  }
379
  }
380
+ $debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text);
381
  }
382
 
383
  if ($this->tag==='root') return $this->innertext();
535
  foreach ($head as $k=>$v)
536
  {
537
  if (!isset($found_keys[$k]))
538
+ {
539
  $found_keys[$k] = 1;
540
+ }
541
  }
542
  }
543
 
559
  protected function seek($selector, &$ret, $lowercase=false)
560
  {
561
  global $debug_object;
562
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
563
 
564
  list($tag, $key, $val, $exp, $no_key) = $selector;
565
 
620
  // this is a normal search, we want the value of that attribute of the tag.
621
  $nodeKeyValue = $node->attr[$key];
622
  }
623
+ if (is_object($debug_object)) {$debug_object->debug_log(2, "testing node: " . $node->tag . " for attribute: " . $key . $exp . $val . " where nodes value is: " . $nodeKeyValue);}
624
 
625
  //PaperG - If lowercase is set, do a case insensitive test of the value of the selector.
626
  if ($lowercase) {
628
  } else {
629
  $check = $this->match($exp, $val, $nodeKeyValue);
630
  }
631
+ if (is_object($debug_object)) {$debug_object->debug_log(2, "after match: " . ($check ? "true" : "false"));}
632
 
633
  // handle multiple class
634
  if (!$check && strcasecmp($key, 'class')===0) {
650
  unset($node);
651
  }
652
  // It's passed by reference so this is actually what this function returns.
653
+ if (is_object($debug_object)) {$debug_object->debug_log(1, "EXIT - ret: ", $ret);}
654
  }
655
 
656
  protected function match($exp, $pattern, $value) {
657
  global $debug_object;
658
+ if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
659
 
660
  switch ($exp) {
661
  case '=':
677
 
678
  protected function parse_selector($selector_string) {
679
  global $debug_object;
680
+ if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
681
 
682
  // pattern of CSS selectors, modified from mootools
683
  // Paperg: Add the colon to the attrbute, so that it properly finds <tag attr:ibute="something" > like google does.
688
  // $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
689
  $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
690
  preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER);
691
+ if (is_object($debug_object)) {$debug_object->debug_log(2, "Matches Array: ", $matches);}
692
 
693
  $selectors = array();
694
  $result = array();
723
  return $selectors;
724
  }
725
 
726
+ function __get($name)
727
+ {
728
  if (isset($this->attr[$name]))
729
  {
730
  return $this->convert_text($this->attr[$name]);
731
  }
732
+ switch ($name)
733
+ {
734
  case 'outertext': return $this->outertext();
735
  case 'innertext': return $this->innertext();
736
  case 'plaintext': return $this->text();
739
  }
740
  }
741
 
742
+ function __set($name, $value)
743
+ {
744
+ global $debug_object;
745
+ if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
746
+
747
+ switch ($name)
748
+ {
749
  case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value;
750
  case 'innertext':
751
  if (isset($this->_[HDOM_INFO_TEXT])) return $this->_[HDOM_INFO_TEXT] = $value;
752
  return $this->_[HDOM_INFO_INNER] = $value;
753
  }
754
+ if (!isset($this->attr[$name]))
755
+ {
756
  $this->_[HDOM_INFO_SPACE][] = array(' ', '', '');
757
  $this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE;
758
  }
759
  $this->attr[$name] = $value;
760
  }
761
 
762
+ function __isset($name)
763
+ {
764
+ switch ($name)
765
+ {
766
  case 'outertext': return true;
767
  case 'innertext': return true;
768
  case 'plaintext': return true;
780
  function convert_text($text)
781
  {
782
  global $debug_object;
783
+ if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
784
 
785
  $converted_text = $text;
786
 
792
  $sourceCharset = strtoupper($this->dom->_charset);
793
  $targetCharset = strtoupper($this->dom->_target_charset);
794
  }
795
+ if (is_object($debug_object)) {$debug_object->debug_log(3, "source charset: " . $sourceCharset . " target charaset: " . $targetCharset);}
796
 
797
  if (!empty($sourceCharset) && !empty($targetCharset) && (strcasecmp($sourceCharset, $targetCharset) != 0))
798
  {
1060
 
1061
  // prepare
1062
  $this->prepare($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText);
 
 
1063
  // strip out cdata
1064
  $this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is", true);
1065
+ // strip out comments
1066
+ $this->remove_noise("'<!--(.*?)-->'is");
1067
  // Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037
1068
  // Script tags removal now preceeds style tag removal.
1069
  // strip out <script> tags
1213
  if ($success)
1214
  {
1215
  $charset = $matches[1];
1216
+ if (is_object($debug_object)) {$debug_object->debug_log(2, 'header content-type found charset of: ' . $charset);}
1217
  }
1218
 
1219
  }
1220
 
1221
  if (empty($charset))
1222
  {
1223
+ $el = $this->root->find('meta[http-equiv=Content-Type]',0, true);
1224
  if (!empty($el))
1225
  {
1226
  $fullvalue = $el->content;
1227
+ if (is_object($debug_object)) {$debug_object->debug_log(2, 'meta content-type tag found' . $fullvalue);}
1228
 
1229
  if (!empty($fullvalue))
1230
  {
1231
+ $success = preg_match('/charset=(.+)/i', $fullvalue, $matches);
1232
  if ($success)
1233
  {
1234
  $charset = $matches[1];
1236
  else
1237
  {
1238
  // If there is a meta tag, and they don't specify the character set, research says that it's typically ISO-8859-1
1239
+ if (is_object($debug_object)) {$debug_object->debug_log(2, 'meta content-type tag couldn\'t be parsed. using iso-8859 default.');}
1240
  $charset = 'ISO-8859-1';
1241
  }
1242
  }
1246
  // If we couldn't find a charset above, then lets try to detect one based on the text we got...
1247
  if (empty($charset))
1248
  {
1249
+ // Use this in case mb_detect_charset isn't installed/loaded on this machine.
1250
+ $charset = false;
1251
+ if (function_exists('mb_detect_encoding'))
1252
+ {
1253
+ // Have php try to detect the encoding from the text given to us.
1254
+ $charset = mb_detect_encoding($this->root->plaintext . "ascii", $encoding_list = array( "UTF-8", "CP1252" ) );
1255
+ if (is_object($debug_object)) {$debug_object->debug_log(2, 'mb_detect found: ' . $charset);}
1256
+ }
1257
 
1258
  // and if this doesn't work... then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need...
1259
  if ($charset === false)
1260
  {
1261
+ if (is_object($debug_object)) {$debug_object->debug_log(2, 'since mb_detect failed - using default of utf-8');}
1262
  $charset = 'UTF-8';
1263
  }
1264
  }
1266
  // Since CP1252 is a superset, if we get one of it's subsets, we want it instead.
1267
  if ((strtolower($charset) == strtolower('ISO-8859-1')) || (strtolower($charset) == strtolower('Latin1')) || (strtolower($charset) == strtolower('Latin-1')))
1268
  {
1269
+ if (is_object($debug_object)) {$debug_object->debug_log(2, 'replacing ' . $charset . ' with CP1252 as its a superset');}
1270
  $charset = 'CP1252';
1271
  }
1272
 
1273
+ if (is_object($debug_object)) {$debug_object->debug_log(1, 'EXIT - ' . $charset);}
1274
 
1275
  return $this->_charset = $charset;
1276
  }
1636
  protected function remove_noise($pattern, $remove_tag=false)
1637
  {
1638
  global $debug_object;
1639
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
1640
 
1641
  $count = preg_match_all($pattern, $this->doc, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE);
1642
 
1643
  for ($i=$count-1; $i>-1; --$i)
1644
  {
1645
  $key = '___noise___'.sprintf('% 5d', count($this->noise)+1000);
1646
+ if (is_object($debug_object)) { $debug_object->debug_log(2, 'key is: ' . $key); }
1647
  $idx = ($remove_tag) ? 0 : 1;
1648
  $this->noise[$key] = $matches[$i][$idx][0];
1649
  $this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
1661
  function restore_noise($text)
1662
  {
1663
  global $debug_object;
1664
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
1665
 
1666
  while (($pos=strpos($text, '___noise___'))!==false)
1667
  {
1669
  if (strlen($text) > $pos+15)
1670
  {
1671
  $key = '___noise___'.$text[$pos+11].$text[$pos+12].$text[$pos+13].$text[$pos+14].$text[$pos+15];
1672
+ if (is_object($debug_object)) { $debug_object->debug_log(2, 'located key of: ' . $key); }
1673
 
1674
  if (isset($this->noise[$key]))
1675
  {
1694
  function search_noise($text)
1695
  {
1696
  global $debug_object;
1697
+ if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
1698
 
1699
  foreach($this->noise as $noiseElement)
1700
  {