Version Description
(2014.10.22) = * Fixed charset encoding bug when there wasn't a content-transfer-encoding header * Upgraded simple_html_dom
Download this release
Release Info
Developer | WayneAllen |
Plugin | Postie |
Version | 1.6.5 |
Comparing to | |
See all releases |
Code changes from version 1.6.4 to 1.6.5
- docs/Changes.txt +4 -0
- docs/Postie.txt +1 -1
- postie-functions.php +60 -48
- postie.php +3 -3
- readme.txt +6 -2
- simple_html_dom.php +59 -39
docs/Changes.txt
CHANGED
@@ -27,6 +27,10 @@ All script, style and body tags are stripped from html emails.
|
|
27 |
Attachments are now processed in the order they were attached.
|
28 |
|
29 |
== CHANGELOG ==
|
|
|
|
|
|
|
|
|
30 |
= 1.6.4 (2014.10.21) =
|
31 |
* Provide post url in success email
|
32 |
|
27 |
Attachments are now processed in the order they were attached.
|
28 |
|
29 |
== CHANGELOG ==
|
30 |
+
= 1.6.5 (2014.10.22) =
|
31 |
+
* Fixed charset encoding bug when there wasn't a content-transfer-encoding header
|
32 |
+
* Upgraded simple_html_dom
|
33 |
+
|
34 |
= 1.6.4 (2014.10.21) =
|
35 |
* Provide post url in success email
|
36 |
|
docs/Postie.txt
CHANGED
@@ -6,7 +6,7 @@ Plugin URI: http://PostiePlugin.com/
|
|
6 |
Tags: e-mail, email, post-by-email
|
7 |
Requires at least: 3.0
|
8 |
Tested up to: 4.0
|
9 |
-
Stable tag: 1.6.
|
10 |
License: GPLv2 or later
|
11 |
License URI: http://www.gnu.org/licenses/gpl-2.0.html
|
12 |
|
6 |
Tags: e-mail, email, post-by-email
|
7 |
Requires at least: 3.0
|
8 |
Tested up to: 4.0
|
9 |
+
Stable tag: 1.6.5
|
10 |
License: GPLv2 or later
|
11 |
License URI: http://www.gnu.org/licenses/gpl-2.0.html
|
12 |
|
postie-functions.php
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
<?php
|
2 |
/*
|
3 |
-
$Id: postie-functions.php
|
4 |
*/
|
5 |
|
6 |
//to turn on debug output add the following line to wp-config.php
|
@@ -170,7 +170,7 @@ function tag_Date(&$content, $message_date) {
|
|
170 |
$es = $html->find('text');
|
171 |
DebugEcho("tag_Date: html " . count($es));
|
172 |
foreach ($es as $e) {
|
173 |
-
DebugEcho(trim($e->plaintext));
|
174 |
$matches = array();
|
175 |
if (1 === preg_match("/^date:\s?(.*)$/im", trim($e->plaintext), $matches)) {
|
176 |
DebugEcho("tag_Date: found date tag $matches[1]");
|
@@ -320,10 +320,13 @@ function CreatePost($poster, $mimeDecodedEmail, $post_id, &$is_reply, $config, $
|
|
320 |
|
321 |
$id = GetParentPostForReply($subject);
|
322 |
if (empty($id)) {
|
|
|
323 |
$id = $post_id;
|
324 |
$is_reply = false;
|
325 |
if ($config['add_meta'] == 'yes') {
|
|
|
326 |
if ($config['wrap_pre'] == 'yes') {
|
|
|
327 |
$content = $postAuthorDetails['content'] . "<pre>\n" . $content . "</pre>\n";
|
328 |
$content = "<pre>\n" . $content . "</pre>\n";
|
329 |
} else {
|
@@ -332,6 +335,7 @@ function CreatePost($poster, $mimeDecodedEmail, $post_id, &$is_reply, $config, $
|
|
332 |
}
|
333 |
} else {
|
334 |
if ($config['wrap_pre'] == 'yes') {
|
|
|
335 |
$content = "<pre>\n" . $content . "</pre>\n";
|
336 |
}
|
337 |
}
|
@@ -943,8 +947,9 @@ function PostToDB($details, $isReply, $customImageField, $postmodifiers) {
|
|
943 |
* @return boolean
|
944 |
*/
|
945 |
function isBannedFileName($filename, $bannedFiles) {
|
946 |
-
if (empty($filename) || empty($bannedFiles))
|
947 |
return false;
|
|
|
948 |
foreach ($bannedFiles as $bannedFile) {
|
949 |
if (fnmatch($bannedFile, $filename)) {
|
950 |
EchoInfo("Ignoring attachment: $filename - it is on the banned files list.");
|
@@ -957,7 +962,7 @@ function isBannedFileName($filename, $bannedFiles) {
|
|
957 |
function GetContent($part, &$attachments, $post_id, $poster, $config) {
|
958 |
extract($config);
|
959 |
//global $charset, $encoding;
|
960 |
-
DebugEcho('----');
|
961 |
$meta_return = '';
|
962 |
if (property_exists($part, "ctype_primary")) {
|
963 |
DebugEcho("GetContent: primary= " . $part->ctype_primary . ", secondary = " . $part->ctype_secondary);
|
@@ -969,6 +974,7 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
|
|
969 |
//look for banned file names
|
970 |
if (property_exists($part, 'ctype_parameters') && is_array($part->ctype_parameters) && array_key_exists('name', $part->ctype_parameters))
|
971 |
if (isBannedFileName($part->ctype_parameters['name'], $banned_files_list)) {
|
|
|
972 |
return NULL;
|
973 |
}
|
974 |
|
@@ -986,7 +992,7 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
|
|
986 |
}
|
987 |
|
988 |
if (property_exists($part, "ctype_primary") && $part->ctype_primary == "multipart" && $part->ctype_secondary == "appledouble") {
|
989 |
-
DebugEcho("multipart appledouble");
|
990 |
$mimeDecodedEmail = DecodeMIMEMail("Content-Type: multipart/mixed; boundary=" . $part->ctype_parameters["boundary"] . "\n" . $part->body);
|
991 |
filter_PreferedText($mimeDecodedEmail, $prefer_text_type);
|
992 |
filter_AppleFile($mimeDecodedEmail);
|
@@ -1009,10 +1015,12 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
|
|
1009 |
$mimetype_primary = "";
|
1010 |
$mimetype_secondary = "";
|
1011 |
|
1012 |
-
if (property_exists($part, "ctype_primary"))
|
1013 |
$mimetype_primary = strtolower($part->ctype_primary);
|
1014 |
-
|
|
|
1015 |
$mimetype_secondary = strtolower($part->ctype_secondary);
|
|
|
1016 |
|
1017 |
$typeinfo = wp_check_filetype($filename);
|
1018 |
//DebugDump($typeinfo);
|
@@ -1040,7 +1048,7 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
|
|
1040 |
|
1041 |
switch ($mimetype_primary) {
|
1042 |
case 'multipart':
|
1043 |
-
DebugEcho("multipart: " . count($part->parts));
|
1044 |
//DebugDump($part);
|
1045 |
filter_PreferedText($part, $prefer_text_type);
|
1046 |
foreach ($part->parts as $section) {
|
@@ -1050,22 +1058,22 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
|
|
1050 |
break;
|
1051 |
|
1052 |
case 'text':
|
1053 |
-
DebugEcho("ctype_primary: text");
|
1054 |
//DebugDump($part);
|
1055 |
|
1056 |
$charset = "";
|
1057 |
if (property_exists($part, 'ctype_parameters') && array_key_exists('charset', $part->ctype_parameters) && !empty($part->ctype_parameters['charset'])) {
|
1058 |
$charset = $part->ctype_parameters['charset'];
|
1059 |
-
DebugEcho("charset: $charset");
|
1060 |
}
|
1061 |
|
1062 |
$encoding = "";
|
1063 |
if (array_key_exists('content-transfer-encoding', $part->headers) && !empty($part->headers['content-transfer-encoding'])) {
|
1064 |
$encoding = $part->headers['content-transfer-encoding'];
|
1065 |
-
DebugEcho("encoding: $encoding");
|
1066 |
}
|
1067 |
|
1068 |
-
if (
|
1069 |
//DebugDump($part);
|
1070 |
$part->body = HandleMessageEncoding($encoding, $charset, $part->body, $message_encoding, $message_dequote);
|
1071 |
if (!empty($charset)) {
|
@@ -1074,48 +1082,48 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
|
|
1074 |
//DebugDump($part);
|
1075 |
}
|
1076 |
if (array_key_exists('disposition', $part) && $part->disposition == 'attachment') {
|
1077 |
-
DebugEcho("text Attachement: $filename");
|
1078 |
if (!preg_match('/ATT\d\d\d\d\d.txt/i', $filename)) {
|
1079 |
$file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
|
1080 |
if (!is_wp_error($file_id)) {
|
1081 |
$file = wp_get_attachment_url($file_id);
|
1082 |
$icon = chooseAttachmentIcon($file, $mimetype_primary, $mimetype_secondary, $icon_set, $icon_size);
|
1083 |
$attachments["html"][$filename] = "<a href='$file'>" . $icon . $filename . '</a>' . "\n";
|
1084 |
-
DebugEcho("text attachment: adding '$filename'");
|
1085 |
} else {
|
1086 |
LogInfo($file_id->get_error_message());
|
1087 |
}
|
1088 |
} else {
|
1089 |
-
DebugEcho("text attachment: skipping '$filename'");
|
1090 |
}
|
1091 |
} else {
|
1092 |
|
1093 |
//go through each sub-section
|
1094 |
if ($mimetype_secondary == 'enriched') {
|
1095 |
//convert enriched text to HTML
|
1096 |
-
DebugEcho("enriched");
|
1097 |
$meta_return .= filter_Etf2HTML($part->body) . "\n";
|
1098 |
} elseif ($mimetype_secondary == 'html') {
|
1099 |
//strip excess HTML
|
1100 |
-
DebugEcho("html");
|
1101 |
$meta_return .= filter_CleanHtml($part->body) . "\n";
|
1102 |
} elseif ($mimetype_secondary == 'plain') {
|
1103 |
-
DebugEcho("plain text");
|
1104 |
//DebugDump($part);
|
1105 |
|
1106 |
-
DebugEcho("body text");
|
1107 |
if ($allow_html_in_body) {
|
1108 |
-
DebugEcho("html allowed");
|
1109 |
$meta_return .= $part->body;
|
1110 |
//$meta_return = "<div>$meta_return</div>\n";
|
1111 |
} else {
|
1112 |
-
DebugEcho("html not allowed (htmlentities)");
|
1113 |
$meta_return .= htmlentities($part->body);
|
1114 |
}
|
1115 |
$meta_return = filter_StripPGP($meta_return);
|
1116 |
//DebugEcho("meta return: $meta_return");
|
1117 |
} else {
|
1118 |
-
DebugEcho("text Attachement wo disposition: $filename");
|
1119 |
$file_id = postie_media_handle_upload($part, $post_id, $poster);
|
1120 |
if (!is_wp_error($file_id)) {
|
1121 |
$file = wp_get_attachment_url($file_id);
|
@@ -1129,30 +1137,30 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
|
|
1129 |
break;
|
1130 |
|
1131 |
case 'image':
|
1132 |
-
DebugEcho("image Attachement: $filename");
|
1133 |
$file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
|
1134 |
if (!is_wp_error($file_id)) {
|
1135 |
//featured image logic
|
1136 |
//set the first image we come across as the featured image
|
1137 |
-
DebugEcho("has_post_thumbnail: " . has_post_thumbnail($post_id));
|
1138 |
//DebugEcho("get_the_post_thumbnail: " .get_the_post_thumbnail($post_id));
|
1139 |
|
1140 |
if ($featured_image && !has_post_thumbnail($post_id)) {
|
1141 |
-
DebugEcho("featured image: $file_id");
|
1142 |
set_post_thumbnail($post_id, $file_id);
|
1143 |
}
|
1144 |
$file = wp_get_attachment_url($file_id);
|
1145 |
$cid = "";
|
1146 |
if (array_key_exists('content-id', $part->headers)) {
|
1147 |
$cid = trim($part->headers["content-id"], "<>");
|
1148 |
-
DebugEcho("found cid: $cid");
|
1149 |
}
|
1150 |
|
1151 |
$the_post = get_post($file_id);
|
1152 |
$attachments["html"][$filename] = parseTemplate($file_id, $mimetype_primary, $imagetemplate, $filename);
|
1153 |
if ($cid) {
|
1154 |
$attachments["cids"][$cid] = array($file, count($attachments["html"]) - 1);
|
1155 |
-
DebugEcho("CID Attachement: $cid");
|
1156 |
}
|
1157 |
} else {
|
1158 |
LogInfo("image error: " . $file_id->get_error_message());
|
@@ -1161,20 +1169,20 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
|
|
1161 |
|
1162 |
case 'audio':
|
1163 |
//DebugDump($part->headers);
|
1164 |
-
DebugEcho("audio Attachement: $filename");
|
1165 |
$file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
|
1166 |
if (!is_wp_error($file_id)) {
|
1167 |
$file = wp_get_attachment_url($file_id);
|
1168 |
$cid = "";
|
1169 |
if (array_key_exists('content-id', $part->headers)) {
|
1170 |
$cid = trim($part->headers["content-id"], "<>");
|
1171 |
-
DebugEcho("audio Attachement cid: $cid");
|
1172 |
}
|
1173 |
if (in_array($fileext, $audiotypes)) {
|
1174 |
-
DebugEcho("using audio template: $mimetype_secondary");
|
1175 |
$audioTemplate = $audiotemplate;
|
1176 |
} else {
|
1177 |
-
DebugEcho("using default audio template: $mimetype_secondary");
|
1178 |
$icon = chooseAttachmentIcon($file, $mimetype_primary, $mimetype_secondary, $icon_set, $icon_size);
|
1179 |
$audioTemplate = '<a href="{FILELINK}">' . $icon . '{FILENAME}</a>';
|
1180 |
}
|
@@ -1185,24 +1193,24 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
|
|
1185 |
break;
|
1186 |
|
1187 |
case 'video':
|
1188 |
-
DebugEcho("video Attachement: $filename");
|
1189 |
$file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
|
1190 |
if (!is_wp_error($file_id)) {
|
1191 |
$file = wp_get_attachment_url($file_id);
|
1192 |
$cid = "";
|
1193 |
if (array_key_exists('content-id', $part->headers)) {
|
1194 |
$cid = trim($part->headers["content-id"], "<>");
|
1195 |
-
DebugEcho("video Attachement cid: $cid");
|
1196 |
}
|
1197 |
//DebugDump($part);
|
1198 |
if (in_array($fileext, $video1types)) {
|
1199 |
-
DebugEcho("using video1 template: $fileext");
|
1200 |
$videoTemplate = $video1template;
|
1201 |
} elseif (in_array($fileext, $video2types)) {
|
1202 |
-
DebugEcho("using video2 template: $fileext");
|
1203 |
$videoTemplate = $video2template;
|
1204 |
} else {
|
1205 |
-
DebugEcho("using default template: $fileext");
|
1206 |
$icon = chooseAttachmentIcon($file, $mimetype_primary, $mimetype_secondary, $icon_set, $icon_size);
|
1207 |
$videoTemplate = '<a href="{FILELINK}">' . $icon . '{FILENAME}</a>';
|
1208 |
}
|
@@ -1214,19 +1222,19 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
|
|
1214 |
break;
|
1215 |
|
1216 |
default:
|
1217 |
-
DebugEcho("found file type: " . $mimetype_primary);
|
1218 |
if (in_array($mimetype_primary, $supported_file_types)) {
|
1219 |
//pgp signature - then forget it
|
1220 |
if ($mimetype_secondary == 'pgp-signature') {
|
1221 |
-
DebugEcho("found pgp-signature - done");
|
1222 |
break;
|
1223 |
}
|
1224 |
$file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
|
1225 |
if (!is_wp_error($file_id)) {
|
1226 |
$file = wp_get_attachment_url($file_id);
|
1227 |
-
DebugEcho("uploaded $file_id ($file)");
|
1228 |
$icon = chooseAttachmentIcon($file, $mimetype_primary, $mimetype_secondary, $icon_set, $icon_size);
|
1229 |
-
DebugEcho("default: $icon $filename");
|
1230 |
$attachments["html"][$filename] = parseTemplate($file_id, $mimetype_primary, $generaltemplate, $filename, $icon);
|
1231 |
if (array_key_exists('content-id', $part->headers)) {
|
1232 |
$cid = trim($part->headers["content-id"], "<>");
|
@@ -1234,20 +1242,20 @@ function GetContent($part, &$attachments, $post_id, $poster, $config) {
|
|
1234 |
$attachments["cids"][$cid] = array($file, count($attachments["html"]) - 1);
|
1235 |
}
|
1236 |
} else {
|
1237 |
-
DebugEcho("No content-id");
|
1238 |
}
|
1239 |
} else {
|
1240 |
LogInfo($file_id->get_error_message());
|
1241 |
}
|
1242 |
} else {
|
1243 |
-
DebugEcho("Not in supported filetype list");
|
1244 |
DebugDump($supported_file_types);
|
1245 |
}
|
1246 |
break;
|
1247 |
}
|
1248 |
}
|
1249 |
-
DebugEcho("meta_return: " .
|
1250 |
-
DebugEcho("====");
|
1251 |
return $meta_return;
|
1252 |
}
|
1253 |
|
@@ -1327,19 +1335,22 @@ function filter_Etf2HTML($content) {
|
|
1327 |
function filter_CleanHtml($content) {
|
1328 |
$html = str_get_html($content);
|
1329 |
if ($html) {
|
1330 |
-
DebugEcho("Looking for invalid tags");
|
1331 |
foreach ($html->find('script, style, head') as $node) {
|
1332 |
-
DebugEcho("Removing: " . $node->outertext);
|
1333 |
$node->outertext = '';
|
1334 |
}
|
|
|
|
|
1335 |
$html->load($html->save());
|
1336 |
|
1337 |
$b = $html->find('body');
|
1338 |
if ($b) {
|
|
|
1339 |
$content = "<div>" . $b[0]->innertext . "</div>\n";
|
1340 |
}
|
1341 |
} else {
|
1342 |
-
DebugEcho("No HTML found");
|
1343 |
}
|
1344 |
return $content;
|
1345 |
}
|
@@ -1561,7 +1572,7 @@ function filter_End(&$content, $config) {
|
|
1561 |
//filter content for new lines
|
1562 |
function filter_Newlines(&$content, $config) {
|
1563 |
if ($config['filternewlines']) {
|
1564 |
-
|
1565 |
$search = array(
|
1566 |
"/\r\n/",
|
1567 |
"/\n\n/",
|
@@ -1579,6 +1590,7 @@ function filter_Newlines(&$content, $config) {
|
|
1579 |
|
1580 |
$result = preg_replace($search, $replace, $content);
|
1581 |
|
|
|
1582 |
if ($config['convertnewline']) {
|
1583 |
$content = preg_replace('/(LINEBREAK)/', "<br />\n", $result);
|
1584 |
} else {
|
1 |
<?php
|
2 |
/*
|
3 |
+
$Id: postie-functions.php 1012303 2014-10-22 18:57:48Z WayneAllen $
|
4 |
*/
|
5 |
|
6 |
//to turn on debug output add the following line to wp-config.php
|
170 |
$es = $html->find('text');
|
171 |
DebugEcho("tag_Date: html " . count($es));
|
172 |
foreach ($es as $e) {
|
173 |
+
//DebugEcho("tag_Date: ".trim($e->plaintext));
|
174 |
$matches = array();
|
175 |
if (1 === preg_match("/^date:\s?(.*)$/im", trim($e->plaintext), $matches)) {
|
176 |
DebugEcho("tag_Date: found date tag $matches[1]");
|
320 |
|
321 |
$id = GetParentPostForReply($subject);
|
322 |
if (empty($id)) {
|
323 |
+
DebugEcho("Not a reply");
|
324 |
$id = $post_id;
|
325 |
$is_reply = false;
|
326 |
if ($config['add_meta'] == 'yes') {
|
327 |
+
DebugEcho("Adding meta");
|
328 |
if ($config['wrap_pre'] == 'yes') {
|
329 |
+
DebugEcho("Adding <pre>");
|
330 |
$content = $postAuthorDetails['content'] . "<pre>\n" . $content . "</pre>\n";
|
331 |
$content = "<pre>\n" . $content . "</pre>\n";
|
332 |
} else {
|
335 |
}
|
336 |
} else {
|
337 |
if ($config['wrap_pre'] == 'yes') {
|
338 |
+
DebugEcho("Adding <pre>");
|
339 |
$content = "<pre>\n" . $content . "</pre>\n";
|
340 |
}
|
341 |
}
|
947 |
* @return boolean
|
948 |
*/
|
949 |
function isBannedFileName($filename, $bannedFiles) {
|
950 |
+
if (empty($filename) || empty($bannedFiles)) {
|
951 |
return false;
|
952 |
+
}
|
953 |
foreach ($bannedFiles as $bannedFile) {
|
954 |
if (fnmatch($bannedFile, $filename)) {
|
955 |
EchoInfo("Ignoring attachment: $filename - it is on the banned files list.");
|
962 |
function GetContent($part, &$attachments, $post_id, $poster, $config) {
|
963 |
extract($config);
|
964 |
//global $charset, $encoding;
|
965 |
+
DebugEcho('GetContent: ---- start');
|
966 |
$meta_return = '';
|
967 |
if (property_exists($part, "ctype_primary")) {
|
968 |
DebugEcho("GetContent: primary= " . $part->ctype_primary . ", secondary = " . $part->ctype_secondary);
|
974 |
//look for banned file names
|
975 |
if (property_exists($part, 'ctype_parameters') && is_array($part->ctype_parameters) && array_key_exists('name', $part->ctype_parameters))
|
976 |
if (isBannedFileName($part->ctype_parameters['name'], $banned_files_list)) {
|
977 |
+
DebugEcho("GetContent: found banned filename");
|
978 |
return NULL;
|
979 |
}
|
980 |
|
992 |
}
|
993 |
|
994 |
if (property_exists($part, "ctype_primary") && $part->ctype_primary == "multipart" && $part->ctype_secondary == "appledouble") {
|
995 |
+
DebugEcho("GetContent: multipart appledouble");
|
996 |
$mimeDecodedEmail = DecodeMIMEMail("Content-Type: multipart/mixed; boundary=" . $part->ctype_parameters["boundary"] . "\n" . $part->body);
|
997 |
filter_PreferedText($mimeDecodedEmail, $prefer_text_type);
|
998 |
filter_AppleFile($mimeDecodedEmail);
|
1015 |
$mimetype_primary = "";
|
1016 |
$mimetype_secondary = "";
|
1017 |
|
1018 |
+
if (property_exists($part, "ctype_primary")) {
|
1019 |
$mimetype_primary = strtolower($part->ctype_primary);
|
1020 |
+
}
|
1021 |
+
if (property_exists($part, "ctype_secondary")) {
|
1022 |
$mimetype_secondary = strtolower($part->ctype_secondary);
|
1023 |
+
}
|
1024 |
|
1025 |
$typeinfo = wp_check_filetype($filename);
|
1026 |
//DebugDump($typeinfo);
|
1048 |
|
1049 |
switch ($mimetype_primary) {
|
1050 |
case 'multipart':
|
1051 |
+
DebugEcho("GetContent: multipart: " . count($part->parts));
|
1052 |
//DebugDump($part);
|
1053 |
filter_PreferedText($part, $prefer_text_type);
|
1054 |
foreach ($part->parts as $section) {
|
1058 |
break;
|
1059 |
|
1060 |
case 'text':
|
1061 |
+
DebugEcho("GetContent: ctype_primary: text");
|
1062 |
//DebugDump($part);
|
1063 |
|
1064 |
$charset = "";
|
1065 |
if (property_exists($part, 'ctype_parameters') && array_key_exists('charset', $part->ctype_parameters) && !empty($part->ctype_parameters['charset'])) {
|
1066 |
$charset = $part->ctype_parameters['charset'];
|
1067 |
+
DebugEcho("GetContent: text charset: $charset");
|
1068 |
}
|
1069 |
|
1070 |
$encoding = "";
|
1071 |
if (array_key_exists('content-transfer-encoding', $part->headers) && !empty($part->headers['content-transfer-encoding'])) {
|
1072 |
$encoding = $part->headers['content-transfer-encoding'];
|
1073 |
+
DebugEcho("GetContent: text encoding: $encoding");
|
1074 |
}
|
1075 |
|
1076 |
+
if ($charset !== '' || $encoding !== '') {
|
1077 |
//DebugDump($part);
|
1078 |
$part->body = HandleMessageEncoding($encoding, $charset, $part->body, $message_encoding, $message_dequote);
|
1079 |
if (!empty($charset)) {
|
1082 |
//DebugDump($part);
|
1083 |
}
|
1084 |
if (array_key_exists('disposition', $part) && $part->disposition == 'attachment') {
|
1085 |
+
DebugEcho("GetContent: text Attachement: $filename");
|
1086 |
if (!preg_match('/ATT\d\d\d\d\d.txt/i', $filename)) {
|
1087 |
$file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
|
1088 |
if (!is_wp_error($file_id)) {
|
1089 |
$file = wp_get_attachment_url($file_id);
|
1090 |
$icon = chooseAttachmentIcon($file, $mimetype_primary, $mimetype_secondary, $icon_set, $icon_size);
|
1091 |
$attachments["html"][$filename] = "<a href='$file'>" . $icon . $filename . '</a>' . "\n";
|
1092 |
+
DebugEcho("GetContent: text attachment: adding '$filename'");
|
1093 |
} else {
|
1094 |
LogInfo($file_id->get_error_message());
|
1095 |
}
|
1096 |
} else {
|
1097 |
+
DebugEcho("GetContent: text attachment: skipping '$filename'");
|
1098 |
}
|
1099 |
} else {
|
1100 |
|
1101 |
//go through each sub-section
|
1102 |
if ($mimetype_secondary == 'enriched') {
|
1103 |
//convert enriched text to HTML
|
1104 |
+
DebugEcho("GetContent: enriched");
|
1105 |
$meta_return .= filter_Etf2HTML($part->body) . "\n";
|
1106 |
} elseif ($mimetype_secondary == 'html') {
|
1107 |
//strip excess HTML
|
1108 |
+
DebugEcho("GetContent: html");
|
1109 |
$meta_return .= filter_CleanHtml($part->body) . "\n";
|
1110 |
} elseif ($mimetype_secondary == 'plain') {
|
1111 |
+
DebugEcho("GetContent: plain text");
|
1112 |
//DebugDump($part);
|
1113 |
|
1114 |
+
DebugEcho("GetContent: body text");
|
1115 |
if ($allow_html_in_body) {
|
1116 |
+
DebugEcho("GetContent: html allowed");
|
1117 |
$meta_return .= $part->body;
|
1118 |
//$meta_return = "<div>$meta_return</div>\n";
|
1119 |
} else {
|
1120 |
+
DebugEcho("GetContent: html not allowed (htmlentities)");
|
1121 |
$meta_return .= htmlentities($part->body);
|
1122 |
}
|
1123 |
$meta_return = filter_StripPGP($meta_return);
|
1124 |
//DebugEcho("meta return: $meta_return");
|
1125 |
} else {
|
1126 |
+
DebugEcho("GetContent: text Attachement wo disposition: $filename");
|
1127 |
$file_id = postie_media_handle_upload($part, $post_id, $poster);
|
1128 |
if (!is_wp_error($file_id)) {
|
1129 |
$file = wp_get_attachment_url($file_id);
|
1137 |
break;
|
1138 |
|
1139 |
case 'image':
|
1140 |
+
DebugEcho("GetContent: image Attachement: $filename");
|
1141 |
$file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
|
1142 |
if (!is_wp_error($file_id)) {
|
1143 |
//featured image logic
|
1144 |
//set the first image we come across as the featured image
|
1145 |
+
DebugEcho("GetContent: has_post_thumbnail: " . has_post_thumbnail($post_id));
|
1146 |
//DebugEcho("get_the_post_thumbnail: " .get_the_post_thumbnail($post_id));
|
1147 |
|
1148 |
if ($featured_image && !has_post_thumbnail($post_id)) {
|
1149 |
+
DebugEcho("GetContent: featured image: $file_id");
|
1150 |
set_post_thumbnail($post_id, $file_id);
|
1151 |
}
|
1152 |
$file = wp_get_attachment_url($file_id);
|
1153 |
$cid = "";
|
1154 |
if (array_key_exists('content-id', $part->headers)) {
|
1155 |
$cid = trim($part->headers["content-id"], "<>");
|
1156 |
+
DebugEcho("GetContent: found cid: $cid");
|
1157 |
}
|
1158 |
|
1159 |
$the_post = get_post($file_id);
|
1160 |
$attachments["html"][$filename] = parseTemplate($file_id, $mimetype_primary, $imagetemplate, $filename);
|
1161 |
if ($cid) {
|
1162 |
$attachments["cids"][$cid] = array($file, count($attachments["html"]) - 1);
|
1163 |
+
DebugEcho("GetContent: CID Attachement: $cid");
|
1164 |
}
|
1165 |
} else {
|
1166 |
LogInfo("image error: " . $file_id->get_error_message());
|
1169 |
|
1170 |
case 'audio':
|
1171 |
//DebugDump($part->headers);
|
1172 |
+
DebugEcho("GetContent: audio Attachement: $filename");
|
1173 |
$file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
|
1174 |
if (!is_wp_error($file_id)) {
|
1175 |
$file = wp_get_attachment_url($file_id);
|
1176 |
$cid = "";
|
1177 |
if (array_key_exists('content-id', $part->headers)) {
|
1178 |
$cid = trim($part->headers["content-id"], "<>");
|
1179 |
+
DebugEcho("GetContent: audio Attachement cid: $cid");
|
1180 |
}
|
1181 |
if (in_array($fileext, $audiotypes)) {
|
1182 |
+
DebugEcho("GetContent: using audio template: $mimetype_secondary");
|
1183 |
$audioTemplate = $audiotemplate;
|
1184 |
} else {
|
1185 |
+
DebugEcho("GetContent: using default audio template: $mimetype_secondary");
|
1186 |
$icon = chooseAttachmentIcon($file, $mimetype_primary, $mimetype_secondary, $icon_set, $icon_size);
|
1187 |
$audioTemplate = '<a href="{FILELINK}">' . $icon . '{FILENAME}</a>';
|
1188 |
}
|
1193 |
break;
|
1194 |
|
1195 |
case 'video':
|
1196 |
+
DebugEcho("GetContent: video Attachement: $filename");
|
1197 |
$file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
|
1198 |
if (!is_wp_error($file_id)) {
|
1199 |
$file = wp_get_attachment_url($file_id);
|
1200 |
$cid = "";
|
1201 |
if (array_key_exists('content-id', $part->headers)) {
|
1202 |
$cid = trim($part->headers["content-id"], "<>");
|
1203 |
+
DebugEcho("GetContent: video Attachement cid: $cid");
|
1204 |
}
|
1205 |
//DebugDump($part);
|
1206 |
if (in_array($fileext, $video1types)) {
|
1207 |
+
DebugEcho("GetContent: using video1 template: $fileext");
|
1208 |
$videoTemplate = $video1template;
|
1209 |
} elseif (in_array($fileext, $video2types)) {
|
1210 |
+
DebugEcho("GetContent: using video2 template: $fileext");
|
1211 |
$videoTemplate = $video2template;
|
1212 |
} else {
|
1213 |
+
DebugEcho("GetContent: using default template: $fileext");
|
1214 |
$icon = chooseAttachmentIcon($file, $mimetype_primary, $mimetype_secondary, $icon_set, $icon_size);
|
1215 |
$videoTemplate = '<a href="{FILELINK}">' . $icon . '{FILENAME}</a>';
|
1216 |
}
|
1222 |
break;
|
1223 |
|
1224 |
default:
|
1225 |
+
DebugEcho("GetContent: found file type: " . $mimetype_primary);
|
1226 |
if (in_array($mimetype_primary, $supported_file_types)) {
|
1227 |
//pgp signature - then forget it
|
1228 |
if ($mimetype_secondary == 'pgp-signature') {
|
1229 |
+
DebugEcho("GetContent: found pgp-signature - done");
|
1230 |
break;
|
1231 |
}
|
1232 |
$file_id = postie_media_handle_upload($part, $post_id, $poster, $generate_thumbnails);
|
1233 |
if (!is_wp_error($file_id)) {
|
1234 |
$file = wp_get_attachment_url($file_id);
|
1235 |
+
DebugEcho("GetContent: uploaded $file_id ($file)");
|
1236 |
$icon = chooseAttachmentIcon($file, $mimetype_primary, $mimetype_secondary, $icon_set, $icon_size);
|
1237 |
+
DebugEcho("GetContent: default: $icon $filename");
|
1238 |
$attachments["html"][$filename] = parseTemplate($file_id, $mimetype_primary, $generaltemplate, $filename, $icon);
|
1239 |
if (array_key_exists('content-id', $part->headers)) {
|
1240 |
$cid = trim($part->headers["content-id"], "<>");
|
1242 |
$attachments["cids"][$cid] = array($file, count($attachments["html"]) - 1);
|
1243 |
}
|
1244 |
} else {
|
1245 |
+
DebugEcho("GetContent: No content-id");
|
1246 |
}
|
1247 |
} else {
|
1248 |
LogInfo($file_id->get_error_message());
|
1249 |
}
|
1250 |
} else {
|
1251 |
+
DebugEcho("GetContent: Not in supported filetype list");
|
1252 |
DebugDump($supported_file_types);
|
1253 |
}
|
1254 |
break;
|
1255 |
}
|
1256 |
}
|
1257 |
+
DebugEcho("GetContent: meta_return: " . $meta_return);
|
1258 |
+
DebugEcho("GetContent: ==== end");
|
1259 |
return $meta_return;
|
1260 |
}
|
1261 |
|
1335 |
function filter_CleanHtml($content) {
|
1336 |
$html = str_get_html($content);
|
1337 |
if ($html) {
|
1338 |
+
DebugEcho("filter_CleanHtml: Looking for invalid tags");
|
1339 |
foreach ($html->find('script, style, head') as $node) {
|
1340 |
+
DebugEcho("filter_CleanHtml: Removing: " . $node->outertext);
|
1341 |
$node->outertext = '';
|
1342 |
}
|
1343 |
+
DebugEcho("filter_CleanHtml: " . $html->save());
|
1344 |
+
|
1345 |
$html->load($html->save());
|
1346 |
|
1347 |
$b = $html->find('body');
|
1348 |
if ($b) {
|
1349 |
+
DebugEcho("filter_CleanHtml: replacing body with div");
|
1350 |
$content = "<div>" . $b[0]->innertext . "</div>\n";
|
1351 |
}
|
1352 |
} else {
|
1353 |
+
DebugEcho("filter_CleanHtml: No HTML found");
|
1354 |
}
|
1355 |
return $content;
|
1356 |
}
|
1572 |
//filter content for new lines
|
1573 |
function filter_Newlines(&$content, $config) {
|
1574 |
if ($config['filternewlines']) {
|
1575 |
+
DebugEcho("filter_Newlines: filternewlines");
|
1576 |
$search = array(
|
1577 |
"/\r\n/",
|
1578 |
"/\n\n/",
|
1590 |
|
1591 |
$result = preg_replace($search, $replace, $content);
|
1592 |
|
1593 |
+
DebugEcho("filter_Newlines: convertnewline: " . $config['convertnewline']);
|
1594 |
if ($config['convertnewline']) {
|
1595 |
$content = preg_replace('/(LINEBREAK)/', "<br />\n", $result);
|
1596 |
} else {
|
postie.php
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
Plugin Name: Postie
|
5 |
Plugin URI: http://PostiePlugin.com/
|
6 |
Description: Create posts via email. Signifigantly upgrades the Post by Email features of Word Press.
|
7 |
-
Version: 1.6.
|
8 |
Author: Wayne Allen
|
9 |
Author URI: http://allens-home.com/
|
10 |
License: GPL2
|
@@ -27,11 +27,11 @@
|
|
27 |
*/
|
28 |
|
29 |
/*
|
30 |
-
$Id: postie.php
|
31 |
*/
|
32 |
require_once(dirname(__FILE__) . DIRECTORY_SEPARATOR . "postie-functions.php");
|
33 |
|
34 |
-
define('POSTIE_VERSION', '1.6.
|
35 |
define("POSTIE_ROOT", dirname(__FILE__));
|
36 |
define("POSTIE_URL", WP_PLUGIN_URL . '/' . basename(dirname(__FILE__)));
|
37 |
|
4 |
Plugin Name: Postie
|
5 |
Plugin URI: http://PostiePlugin.com/
|
6 |
Description: Create posts via email. Signifigantly upgrades the Post by Email features of Word Press.
|
7 |
+
Version: 1.6.5
|
8 |
Author: Wayne Allen
|
9 |
Author URI: http://allens-home.com/
|
10 |
License: GPL2
|
27 |
*/
|
28 |
|
29 |
/*
|
30 |
+
$Id: postie.php 1012303 2014-10-22 18:57:48Z WayneAllen $
|
31 |
*/
|
32 |
require_once(dirname(__FILE__) . DIRECTORY_SEPARATOR . "postie-functions.php");
|
33 |
|
34 |
+
define('POSTIE_VERSION', '1.6.5');
|
35 |
define("POSTIE_ROOT", dirname(__FILE__));
|
36 |
define("POSTIE_URL", WP_PLUGIN_URL . '/' . basename(dirname(__FILE__)));
|
37 |
|
readme.txt
CHANGED
@@ -6,7 +6,7 @@ Plugin URI: http://PostiePlugin.com/
|
|
6 |
Tags: e-mail, email, post-by-email
|
7 |
Requires at least: 3.0
|
8 |
Tested up to: 4.0
|
9 |
-
Stable tag: 1.6.
|
10 |
License: GPLv2 or later
|
11 |
License URI: http://www.gnu.org/licenses/gpl-2.0.html
|
12 |
|
@@ -238,7 +238,11 @@ All script, style and body tags are stripped from html emails.
|
|
238 |
Attachments are now processed in the order they were attached.
|
239 |
|
240 |
== CHANGELOG ==
|
241 |
-
= 1.6.
|
|
|
|
|
|
|
|
|
242 |
* Provide post url in success email
|
243 |
|
244 |
= 1.6.3 (2014.10.03) =
|
6 |
Tags: e-mail, email, post-by-email
|
7 |
Requires at least: 3.0
|
8 |
Tested up to: 4.0
|
9 |
+
Stable tag: 1.6.5
|
10 |
License: GPLv2 or later
|
11 |
License URI: http://www.gnu.org/licenses/gpl-2.0.html
|
12 |
|
238 |
Attachments are now processed in the order they were attached.
|
239 |
|
240 |
== CHANGELOG ==
|
241 |
+
= 1.6.5 (2014.10.22) =
|
242 |
+
* Fixed charset encoding bug when there wasn't a content-transfer-encoding header
|
243 |
+
* Upgraded simple_html_dom
|
244 |
+
|
245 |
+
= 1.6.4 (2014.10.21) =
|
246 |
* Provide post url in success email
|
247 |
|
248 |
= 1.6.3 (2014.10.03) =
|
simple_html_dom.php
CHANGED
@@ -34,7 +34,7 @@
|
|
34 |
* @author S.C. Chen <me578022@gmail.com>
|
35 |
* @author John Schlick
|
36 |
* @author Rus Carroll
|
37 |
-
* @version 1.5 ($Rev:
|
38 |
* @package PlaceLocalInclude
|
39 |
* @subpackage simple_html_dom
|
40 |
*/
|
@@ -269,7 +269,10 @@ class simple_html_dom_node
|
|
269 |
{
|
270 |
return $this->children;
|
271 |
}
|
272 |
-
if (isset($this->children[$idx]))
|
|
|
|
|
|
|
273 |
return null;
|
274 |
}
|
275 |
|
@@ -330,14 +333,14 @@ class simple_html_dom_node
|
|
330 |
function find_ancestor_tag($tag)
|
331 |
{
|
332 |
global $debug_object;
|
333 |
-
if (is_object($debug_object)) { $debug_object->
|
334 |
|
335 |
// Start by including ourselves in the comparison.
|
336 |
$returnDom = $this;
|
337 |
|
338 |
while (!is_null($returnDom))
|
339 |
{
|
340 |
-
if (is_object($debug_object)) { $debug_object->
|
341 |
|
342 |
if ($returnDom->tag == $tag)
|
343 |
{
|
@@ -374,7 +377,7 @@ class simple_html_dom_node
|
|
374 |
$text = " with text: " . $this->text;
|
375 |
}
|
376 |
}
|
377 |
-
$debug_object->
|
378 |
}
|
379 |
|
380 |
if ($this->tag==='root') return $this->innertext();
|
@@ -532,7 +535,9 @@ class simple_html_dom_node
|
|
532 |
foreach ($head as $k=>$v)
|
533 |
{
|
534 |
if (!isset($found_keys[$k]))
|
|
|
535 |
$found_keys[$k] = 1;
|
|
|
536 |
}
|
537 |
}
|
538 |
|
@@ -554,7 +559,7 @@ class simple_html_dom_node
|
|
554 |
protected function seek($selector, &$ret, $lowercase=false)
|
555 |
{
|
556 |
global $debug_object;
|
557 |
-
if (is_object($debug_object)) { $debug_object->
|
558 |
|
559 |
list($tag, $key, $val, $exp, $no_key) = $selector;
|
560 |
|
@@ -615,7 +620,7 @@ class simple_html_dom_node
|
|
615 |
// this is a normal search, we want the value of that attribute of the tag.
|
616 |
$nodeKeyValue = $node->attr[$key];
|
617 |
}
|
618 |
-
if (is_object($debug_object)) {$debug_object->
|
619 |
|
620 |
//PaperG - If lowercase is set, do a case insensitive test of the value of the selector.
|
621 |
if ($lowercase) {
|
@@ -623,7 +628,7 @@ class simple_html_dom_node
|
|
623 |
} else {
|
624 |
$check = $this->match($exp, $val, $nodeKeyValue);
|
625 |
}
|
626 |
-
if (is_object($debug_object)) {$debug_object->
|
627 |
|
628 |
// handle multiple class
|
629 |
if (!$check && strcasecmp($key, 'class')===0) {
|
@@ -645,12 +650,12 @@ class simple_html_dom_node
|
|
645 |
unset($node);
|
646 |
}
|
647 |
// It's passed by reference so this is actually what this function returns.
|
648 |
-
if (is_object($debug_object)) {$debug_object->
|
649 |
}
|
650 |
|
651 |
protected function match($exp, $pattern, $value) {
|
652 |
global $debug_object;
|
653 |
-
if (is_object($debug_object)) {$debug_object->
|
654 |
|
655 |
switch ($exp) {
|
656 |
case '=':
|
@@ -672,7 +677,7 @@ class simple_html_dom_node
|
|
672 |
|
673 |
protected function parse_selector($selector_string) {
|
674 |
global $debug_object;
|
675 |
-
if (is_object($debug_object)) {$debug_object->
|
676 |
|
677 |
// pattern of CSS selectors, modified from mootools
|
678 |
// Paperg: Add the colon to the attrbute, so that it properly finds <tag attr:ibute="something" > like google does.
|
@@ -683,7 +688,7 @@ class simple_html_dom_node
|
|
683 |
// $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
|
684 |
$pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
|
685 |
preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER);
|
686 |
-
if (is_object($debug_object)) {$debug_object->
|
687 |
|
688 |
$selectors = array();
|
689 |
$result = array();
|
@@ -718,12 +723,14 @@ class simple_html_dom_node
|
|
718 |
return $selectors;
|
719 |
}
|
720 |
|
721 |
-
function __get($name)
|
|
|
722 |
if (isset($this->attr[$name]))
|
723 |
{
|
724 |
return $this->convert_text($this->attr[$name]);
|
725 |
}
|
726 |
-
switch ($name)
|
|
|
727 |
case 'outertext': return $this->outertext();
|
728 |
case 'innertext': return $this->innertext();
|
729 |
case 'plaintext': return $this->text();
|
@@ -732,22 +739,30 @@ class simple_html_dom_node
|
|
732 |
}
|
733 |
}
|
734 |
|
735 |
-
function __set($name, $value)
|
736 |
-
|
|
|
|
|
|
|
|
|
|
|
737 |
case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value;
|
738 |
case 'innertext':
|
739 |
if (isset($this->_[HDOM_INFO_TEXT])) return $this->_[HDOM_INFO_TEXT] = $value;
|
740 |
return $this->_[HDOM_INFO_INNER] = $value;
|
741 |
}
|
742 |
-
if (!isset($this->attr[$name]))
|
|
|
743 |
$this->_[HDOM_INFO_SPACE][] = array(' ', '', '');
|
744 |
$this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE;
|
745 |
}
|
746 |
$this->attr[$name] = $value;
|
747 |
}
|
748 |
|
749 |
-
function __isset($name)
|
750 |
-
|
|
|
|
|
751 |
case 'outertext': return true;
|
752 |
case 'innertext': return true;
|
753 |
case 'plaintext': return true;
|
@@ -765,7 +780,7 @@ class simple_html_dom_node
|
|
765 |
function convert_text($text)
|
766 |
{
|
767 |
global $debug_object;
|
768 |
-
if (is_object($debug_object)) {$debug_object->
|
769 |
|
770 |
$converted_text = $text;
|
771 |
|
@@ -777,7 +792,7 @@ class simple_html_dom_node
|
|
777 |
$sourceCharset = strtoupper($this->dom->_charset);
|
778 |
$targetCharset = strtoupper($this->dom->_target_charset);
|
779 |
}
|
780 |
-
if (is_object($debug_object)) {$debug_object->
|
781 |
|
782 |
if (!empty($sourceCharset) && !empty($targetCharset) && (strcasecmp($sourceCharset, $targetCharset) != 0))
|
783 |
{
|
@@ -1045,10 +1060,10 @@ class simple_html_dom
|
|
1045 |
|
1046 |
// prepare
|
1047 |
$this->prepare($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText);
|
1048 |
-
// strip out comments
|
1049 |
-
$this->remove_noise("'<!--(.*?)-->'is");
|
1050 |
// strip out cdata
|
1051 |
$this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is", true);
|
|
|
|
|
1052 |
// Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037
|
1053 |
// Script tags removal now preceeds style tag removal.
|
1054 |
// strip out <script> tags
|
@@ -1198,22 +1213,22 @@ class simple_html_dom
|
|
1198 |
if ($success)
|
1199 |
{
|
1200 |
$charset = $matches[1];
|
1201 |
-
if (is_object($debug_object)) {$debug_object->
|
1202 |
}
|
1203 |
|
1204 |
}
|
1205 |
|
1206 |
if (empty($charset))
|
1207 |
{
|
1208 |
-
$el = $this->root->find('meta[http-equiv=Content-Type]',0);
|
1209 |
if (!empty($el))
|
1210 |
{
|
1211 |
$fullvalue = $el->content;
|
1212 |
-
if (is_object($debug_object)) {$debug_object->
|
1213 |
|
1214 |
if (!empty($fullvalue))
|
1215 |
{
|
1216 |
-
$success = preg_match('/charset=(.+)/', $fullvalue, $matches);
|
1217 |
if ($success)
|
1218 |
{
|
1219 |
$charset = $matches[1];
|
@@ -1221,7 +1236,7 @@ class simple_html_dom
|
|
1221 |
else
|
1222 |
{
|
1223 |
// If there is a meta tag, and they don't specify the character set, research says that it's typically ISO-8859-1
|
1224 |
-
if (is_object($debug_object)) {$debug_object->
|
1225 |
$charset = 'ISO-8859-1';
|
1226 |
}
|
1227 |
}
|
@@ -1231,14 +1246,19 @@ class simple_html_dom
|
|
1231 |
// If we couldn't find a charset above, then lets try to detect one based on the text we got...
|
1232 |
if (empty($charset))
|
1233 |
{
|
1234 |
-
//
|
1235 |
-
$charset =
|
1236 |
-
if (
|
|
|
|
|
|
|
|
|
|
|
1237 |
|
1238 |
// and if this doesn't work... then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need...
|
1239 |
if ($charset === false)
|
1240 |
{
|
1241 |
-
if (is_object($debug_object)) {$debug_object->
|
1242 |
$charset = 'UTF-8';
|
1243 |
}
|
1244 |
}
|
@@ -1246,11 +1266,11 @@ class simple_html_dom
|
|
1246 |
// Since CP1252 is a superset, if we get one of it's subsets, we want it instead.
|
1247 |
if ((strtolower($charset) == strtolower('ISO-8859-1')) || (strtolower($charset) == strtolower('Latin1')) || (strtolower($charset) == strtolower('Latin-1')))
|
1248 |
{
|
1249 |
-
if (is_object($debug_object)) {$debug_object->
|
1250 |
$charset = 'CP1252';
|
1251 |
}
|
1252 |
|
1253 |
-
if (is_object($debug_object)) {$debug_object->
|
1254 |
|
1255 |
return $this->_charset = $charset;
|
1256 |
}
|
@@ -1616,14 +1636,14 @@ class simple_html_dom
|
|
1616 |
protected function remove_noise($pattern, $remove_tag=false)
|
1617 |
{
|
1618 |
global $debug_object;
|
1619 |
-
if (is_object($debug_object)) { $debug_object->
|
1620 |
|
1621 |
$count = preg_match_all($pattern, $this->doc, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE);
|
1622 |
|
1623 |
for ($i=$count-1; $i>-1; --$i)
|
1624 |
{
|
1625 |
$key = '___noise___'.sprintf('% 5d', count($this->noise)+1000);
|
1626 |
-
if (is_object($debug_object)) { $debug_object->
|
1627 |
$idx = ($remove_tag) ? 0 : 1;
|
1628 |
$this->noise[$key] = $matches[$i][$idx][0];
|
1629 |
$this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
|
@@ -1641,7 +1661,7 @@ class simple_html_dom
|
|
1641 |
function restore_noise($text)
|
1642 |
{
|
1643 |
global $debug_object;
|
1644 |
-
if (is_object($debug_object)) { $debug_object->
|
1645 |
|
1646 |
while (($pos=strpos($text, '___noise___'))!==false)
|
1647 |
{
|
@@ -1649,7 +1669,7 @@ class simple_html_dom
|
|
1649 |
if (strlen($text) > $pos+15)
|
1650 |
{
|
1651 |
$key = '___noise___'.$text[$pos+11].$text[$pos+12].$text[$pos+13].$text[$pos+14].$text[$pos+15];
|
1652 |
-
if (is_object($debug_object)) { $debug_object->
|
1653 |
|
1654 |
if (isset($this->noise[$key]))
|
1655 |
{
|
@@ -1674,7 +1694,7 @@ class simple_html_dom
|
|
1674 |
function search_noise($text)
|
1675 |
{
|
1676 |
global $debug_object;
|
1677 |
-
if (is_object($debug_object)) { $debug_object->
|
1678 |
|
1679 |
foreach($this->noise as $noiseElement)
|
1680 |
{
|
34 |
* @author S.C. Chen <me578022@gmail.com>
|
35 |
* @author John Schlick
|
36 |
* @author Rus Carroll
|
37 |
+
* @version 1.5 ($Rev: 210 $)
|
38 |
* @package PlaceLocalInclude
|
39 |
* @subpackage simple_html_dom
|
40 |
*/
|
269 |
{
|
270 |
return $this->children;
|
271 |
}
|
272 |
+
if (isset($this->children[$idx]))
|
273 |
+
{
|
274 |
+
return $this->children[$idx];
|
275 |
+
}
|
276 |
return null;
|
277 |
}
|
278 |
|
333 |
function find_ancestor_tag($tag)
|
334 |
{
|
335 |
global $debug_object;
|
336 |
+
if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
|
337 |
|
338 |
// Start by including ourselves in the comparison.
|
339 |
$returnDom = $this;
|
340 |
|
341 |
while (!is_null($returnDom))
|
342 |
{
|
343 |
+
if (is_object($debug_object)) { $debug_object->debug_log(2, "Current tag is: " . $returnDom->tag); }
|
344 |
|
345 |
if ($returnDom->tag == $tag)
|
346 |
{
|
377 |
$text = " with text: " . $this->text;
|
378 |
}
|
379 |
}
|
380 |
+
$debug_object->debug_log(1, 'Innertext of tag: ' . $this->tag . $text);
|
381 |
}
|
382 |
|
383 |
if ($this->tag==='root') return $this->innertext();
|
535 |
foreach ($head as $k=>$v)
|
536 |
{
|
537 |
if (!isset($found_keys[$k]))
|
538 |
+
{
|
539 |
$found_keys[$k] = 1;
|
540 |
+
}
|
541 |
}
|
542 |
}
|
543 |
|
559 |
protected function seek($selector, &$ret, $lowercase=false)
|
560 |
{
|
561 |
global $debug_object;
|
562 |
+
if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
|
563 |
|
564 |
list($tag, $key, $val, $exp, $no_key) = $selector;
|
565 |
|
620 |
// this is a normal search, we want the value of that attribute of the tag.
|
621 |
$nodeKeyValue = $node->attr[$key];
|
622 |
}
|
623 |
+
if (is_object($debug_object)) {$debug_object->debug_log(2, "testing node: " . $node->tag . " for attribute: " . $key . $exp . $val . " where nodes value is: " . $nodeKeyValue);}
|
624 |
|
625 |
//PaperG - If lowercase is set, do a case insensitive test of the value of the selector.
|
626 |
if ($lowercase) {
|
628 |
} else {
|
629 |
$check = $this->match($exp, $val, $nodeKeyValue);
|
630 |
}
|
631 |
+
if (is_object($debug_object)) {$debug_object->debug_log(2, "after match: " . ($check ? "true" : "false"));}
|
632 |
|
633 |
// handle multiple class
|
634 |
if (!$check && strcasecmp($key, 'class')===0) {
|
650 |
unset($node);
|
651 |
}
|
652 |
// It's passed by reference so this is actually what this function returns.
|
653 |
+
if (is_object($debug_object)) {$debug_object->debug_log(1, "EXIT - ret: ", $ret);}
|
654 |
}
|
655 |
|
656 |
protected function match($exp, $pattern, $value) {
|
657 |
global $debug_object;
|
658 |
+
if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
|
659 |
|
660 |
switch ($exp) {
|
661 |
case '=':
|
677 |
|
678 |
protected function parse_selector($selector_string) {
|
679 |
global $debug_object;
|
680 |
+
if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
|
681 |
|
682 |
// pattern of CSS selectors, modified from mootools
|
683 |
// Paperg: Add the colon to the attrbute, so that it properly finds <tag attr:ibute="something" > like google does.
|
688 |
// $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
|
689 |
$pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is";
|
690 |
preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER);
|
691 |
+
if (is_object($debug_object)) {$debug_object->debug_log(2, "Matches Array: ", $matches);}
|
692 |
|
693 |
$selectors = array();
|
694 |
$result = array();
|
723 |
return $selectors;
|
724 |
}
|
725 |
|
726 |
+
function __get($name)
|
727 |
+
{
|
728 |
if (isset($this->attr[$name]))
|
729 |
{
|
730 |
return $this->convert_text($this->attr[$name]);
|
731 |
}
|
732 |
+
switch ($name)
|
733 |
+
{
|
734 |
case 'outertext': return $this->outertext();
|
735 |
case 'innertext': return $this->innertext();
|
736 |
case 'plaintext': return $this->text();
|
739 |
}
|
740 |
}
|
741 |
|
742 |
+
function __set($name, $value)
|
743 |
+
{
|
744 |
+
global $debug_object;
|
745 |
+
if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
|
746 |
+
|
747 |
+
switch ($name)
|
748 |
+
{
|
749 |
case 'outertext': return $this->_[HDOM_INFO_OUTER] = $value;
|
750 |
case 'innertext':
|
751 |
if (isset($this->_[HDOM_INFO_TEXT])) return $this->_[HDOM_INFO_TEXT] = $value;
|
752 |
return $this->_[HDOM_INFO_INNER] = $value;
|
753 |
}
|
754 |
+
if (!isset($this->attr[$name]))
|
755 |
+
{
|
756 |
$this->_[HDOM_INFO_SPACE][] = array(' ', '', '');
|
757 |
$this->_[HDOM_INFO_QUOTE][] = HDOM_QUOTE_DOUBLE;
|
758 |
}
|
759 |
$this->attr[$name] = $value;
|
760 |
}
|
761 |
|
762 |
+
function __isset($name)
|
763 |
+
{
|
764 |
+
switch ($name)
|
765 |
+
{
|
766 |
case 'outertext': return true;
|
767 |
case 'innertext': return true;
|
768 |
case 'plaintext': return true;
|
780 |
function convert_text($text)
|
781 |
{
|
782 |
global $debug_object;
|
783 |
+
if (is_object($debug_object)) {$debug_object->debug_log_entry(1);}
|
784 |
|
785 |
$converted_text = $text;
|
786 |
|
792 |
$sourceCharset = strtoupper($this->dom->_charset);
|
793 |
$targetCharset = strtoupper($this->dom->_target_charset);
|
794 |
}
|
795 |
+
if (is_object($debug_object)) {$debug_object->debug_log(3, "source charset: " . $sourceCharset . " target charaset: " . $targetCharset);}
|
796 |
|
797 |
if (!empty($sourceCharset) && !empty($targetCharset) && (strcasecmp($sourceCharset, $targetCharset) != 0))
|
798 |
{
|
1060 |
|
1061 |
// prepare
|
1062 |
$this->prepare($str, $lowercase, $stripRN, $defaultBRText, $defaultSpanText);
|
|
|
|
|
1063 |
// strip out cdata
|
1064 |
$this->remove_noise("'<!\[CDATA\[(.*?)\]\]>'is", true);
|
1065 |
+
// strip out comments
|
1066 |
+
$this->remove_noise("'<!--(.*?)-->'is");
|
1067 |
// Per sourceforge http://sourceforge.net/tracker/?func=detail&aid=2949097&group_id=218559&atid=1044037
|
1068 |
// Script tags removal now preceeds style tag removal.
|
1069 |
// strip out <script> tags
|
1213 |
if ($success)
|
1214 |
{
|
1215 |
$charset = $matches[1];
|
1216 |
+
if (is_object($debug_object)) {$debug_object->debug_log(2, 'header content-type found charset of: ' . $charset);}
|
1217 |
}
|
1218 |
|
1219 |
}
|
1220 |
|
1221 |
if (empty($charset))
|
1222 |
{
|
1223 |
+
$el = $this->root->find('meta[http-equiv=Content-Type]',0, true);
|
1224 |
if (!empty($el))
|
1225 |
{
|
1226 |
$fullvalue = $el->content;
|
1227 |
+
if (is_object($debug_object)) {$debug_object->debug_log(2, 'meta content-type tag found' . $fullvalue);}
|
1228 |
|
1229 |
if (!empty($fullvalue))
|
1230 |
{
|
1231 |
+
$success = preg_match('/charset=(.+)/i', $fullvalue, $matches);
|
1232 |
if ($success)
|
1233 |
{
|
1234 |
$charset = $matches[1];
|
1236 |
else
|
1237 |
{
|
1238 |
// If there is a meta tag, and they don't specify the character set, research says that it's typically ISO-8859-1
|
1239 |
+
if (is_object($debug_object)) {$debug_object->debug_log(2, 'meta content-type tag couldn\'t be parsed. using iso-8859 default.');}
|
1240 |
$charset = 'ISO-8859-1';
|
1241 |
}
|
1242 |
}
|
1246 |
// If we couldn't find a charset above, then lets try to detect one based on the text we got...
|
1247 |
if (empty($charset))
|
1248 |
{
|
1249 |
+
// Use this in case mb_detect_charset isn't installed/loaded on this machine.
|
1250 |
+
$charset = false;
|
1251 |
+
if (function_exists('mb_detect_encoding'))
|
1252 |
+
{
|
1253 |
+
// Have php try to detect the encoding from the text given to us.
|
1254 |
+
$charset = mb_detect_encoding($this->root->plaintext . "ascii", $encoding_list = array( "UTF-8", "CP1252" ) );
|
1255 |
+
if (is_object($debug_object)) {$debug_object->debug_log(2, 'mb_detect found: ' . $charset);}
|
1256 |
+
}
|
1257 |
|
1258 |
// and if this doesn't work... then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need...
|
1259 |
if ($charset === false)
|
1260 |
{
|
1261 |
+
if (is_object($debug_object)) {$debug_object->debug_log(2, 'since mb_detect failed - using default of utf-8');}
|
1262 |
$charset = 'UTF-8';
|
1263 |
}
|
1264 |
}
|
1266 |
// Since CP1252 is a superset, if we get one of it's subsets, we want it instead.
|
1267 |
if ((strtolower($charset) == strtolower('ISO-8859-1')) || (strtolower($charset) == strtolower('Latin1')) || (strtolower($charset) == strtolower('Latin-1')))
|
1268 |
{
|
1269 |
+
if (is_object($debug_object)) {$debug_object->debug_log(2, 'replacing ' . $charset . ' with CP1252 as its a superset');}
|
1270 |
$charset = 'CP1252';
|
1271 |
}
|
1272 |
|
1273 |
+
if (is_object($debug_object)) {$debug_object->debug_log(1, 'EXIT - ' . $charset);}
|
1274 |
|
1275 |
return $this->_charset = $charset;
|
1276 |
}
|
1636 |
protected function remove_noise($pattern, $remove_tag=false)
|
1637 |
{
|
1638 |
global $debug_object;
|
1639 |
+
if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
|
1640 |
|
1641 |
$count = preg_match_all($pattern, $this->doc, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE);
|
1642 |
|
1643 |
for ($i=$count-1; $i>-1; --$i)
|
1644 |
{
|
1645 |
$key = '___noise___'.sprintf('% 5d', count($this->noise)+1000);
|
1646 |
+
if (is_object($debug_object)) { $debug_object->debug_log(2, 'key is: ' . $key); }
|
1647 |
$idx = ($remove_tag) ? 0 : 1;
|
1648 |
$this->noise[$key] = $matches[$i][$idx][0];
|
1649 |
$this->doc = substr_replace($this->doc, $key, $matches[$i][$idx][1], strlen($matches[$i][$idx][0]));
|
1661 |
function restore_noise($text)
|
1662 |
{
|
1663 |
global $debug_object;
|
1664 |
+
if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
|
1665 |
|
1666 |
while (($pos=strpos($text, '___noise___'))!==false)
|
1667 |
{
|
1669 |
if (strlen($text) > $pos+15)
|
1670 |
{
|
1671 |
$key = '___noise___'.$text[$pos+11].$text[$pos+12].$text[$pos+13].$text[$pos+14].$text[$pos+15];
|
1672 |
+
if (is_object($debug_object)) { $debug_object->debug_log(2, 'located key of: ' . $key); }
|
1673 |
|
1674 |
if (isset($this->noise[$key]))
|
1675 |
{
|
1694 |
function search_noise($text)
|
1695 |
{
|
1696 |
global $debug_object;
|
1697 |
+
if (is_object($debug_object)) { $debug_object->debug_log_entry(1); }
|
1698 |
|
1699 |
foreach($this->noise as $noiseElement)
|
1700 |
{
|