Broken Link Checker - Version 0.2

Version Description

Download this release

Release Info

Developer whiteshadow
Plugin Icon 128x128 Broken Link Checker
Version 0.2
Comparing to
See all releases

Code changes from version 0.1 to 0.2

Files changed (3) hide show
  1. broken-link-checker.php +12 -10
  2. readme.txt +12 -6
  3. wsblc_ajax.php +59 -15
broken-link-checker.php CHANGED
@@ -1,15 +1,15 @@
1
  <?php
2
  /*
3
  Plugin Name: Broken Link Checker
4
- Plugin URI: http://w-shadow.com/blog/2007/08/05/broken-link-checker-for-wordpress/
5
- Description: Checks your posts for broken links in background and notifies you on the dashboard if any are found.
6
- Version: 0.1
7
  Author: Janis Elsts
8
  Author URI: http://w-shadow.com/blog/
9
  */
10
 
11
  /*
12
- Copyright 2007 Janis Elsts (email : whiteshadow@w-shadow.com)
13
  */
14
 
15
  if (!class_exists('ws_broken_link_checker')) {
@@ -19,7 +19,7 @@ class ws_broken_link_checker {
19
  var $options_name='wsblc_options';
20
  var $postdata_name;
21
  var $linkdata_name;
22
- var $version='0.1';
23
  var $myfile='';
24
  var $myfolder='';
25
  var $mybasename='';
@@ -135,18 +135,21 @@ class ws_broken_link_checker {
135
  update_option($this->options_name, $this->options);
136
  };
137
 
138
- if($wpdb->get_var("show tables like '".($this->postdata_name)."'") != $this->postdata_name) {
 
 
 
139
  $sql="CREATE TABLE ".$this->postdata_name." (
140
  post_id BIGINT( 20 ) NOT NULL ,
141
  last_check DATETIME NOT NULL ,
142
  UNIQUE KEY post_id (post_id)
143
  );";
144
 
145
- require_once(ABSPATH . 'wp-admin/upgrade-functions.php');
146
  dbDelta($sql);
147
  }
148
 
149
- if($wpdb->get_var("show tables like '".($this->linkdata_name)."'") != $this->linkdata_name) {
 
150
  $sql="CREATE TABLE ".$this->linkdata_name." (
151
  id BIGINT( 20 ) UNSIGNED NOT NULL AUTO_INCREMENT ,
152
  post_id BIGINT( 20 ) NOT NULL ,
@@ -154,11 +157,10 @@ class ws_broken_link_checker {
154
  link_text VARCHAR( 50 ) NOT NULL ,
155
  broken TINYINT( 1 ) UNSIGNED DEFAULT '0' NOT NULL,
156
  last_check DATETIME NOT NULL ,
157
- hidden TINYINT( 1 ) UNSIGNED DEFAULT '0' NOT NULL,
158
  PRIMARY KEY id (id)
159
  );";
160
 
161
- require_once(ABSPATH . 'wp-admin/upgrade-functions.php');
162
  dbDelta($sql);
163
  }
164
 
1
  <?php
2
  /*
3
  Plugin Name: Broken Link Checker
4
+ Plugin URI: http://wordpress.org/extend/plugins/broken-link-checker/
5
+ Description: Checks your posts for broken links and missing images and notifies you on the dashboard if any are found.
6
+ Version: 0.2
7
  Author: Janis Elsts
8
  Author URI: http://w-shadow.com/blog/
9
  */
10
 
11
  /*
12
+ Created by Janis Elsts (email : whiteshadow@w-shadow.com)
13
  */
14
 
15
  if (!class_exists('ws_broken_link_checker')) {
19
  var $options_name='wsblc_options';
20
  var $postdata_name;
21
  var $linkdata_name;
22
+ var $version='0.2';
23
  var $myfile='';
24
  var $myfolder='';
25
  var $mybasename='';
135
  update_option($this->options_name, $this->options);
136
  };
137
 
138
+ require_once(ABSPATH . 'wp-admin/upgrade-functions.php');
139
+
140
+ if (($wpdb->get_var("show tables like '".($this->postdata_name)."'") != $this->postdata_name)
141
+ || ($this->options['version'] != $this->version ) ) {
142
  $sql="CREATE TABLE ".$this->postdata_name." (
143
  post_id BIGINT( 20 ) NOT NULL ,
144
  last_check DATETIME NOT NULL ,
145
  UNIQUE KEY post_id (post_id)
146
  );";
147
 
 
148
  dbDelta($sql);
149
  }
150
 
151
+ if (($wpdb->get_var("show tables like '".($this->linkdata_name)."'") != $this->linkdata_name)
152
+ || ($this->options['version'] != $this->version ) ) {
153
  $sql="CREATE TABLE ".$this->linkdata_name." (
154
  id BIGINT( 20 ) UNSIGNED NOT NULL AUTO_INCREMENT ,
155
  post_id BIGINT( 20 ) NOT NULL ,
157
  link_text VARCHAR( 50 ) NOT NULL ,
158
  broken TINYINT( 1 ) UNSIGNED DEFAULT '0' NOT NULL,
159
  last_check DATETIME NOT NULL ,
160
+ check_count TINYINT( 2 ) UNSIGNED DEFAULT '0' NOT NULL,
161
  PRIMARY KEY id (id)
162
  );";
163
 
 
164
  dbDelta($sql);
165
  }
166
 
readme.txt CHANGED
@@ -3,19 +3,25 @@ Contributors: whiteshadow
3
  Tags: links, broken, maintenance
4
  Requires at least: 2.0.2
5
  Tested up to: 2.3
6
- Stable tag: 0.1
7
 
8
- This plugin will check your posts for broken links in background and notify you on the dashboard if any are found. It runs while any page of WP admin panel is open.
9
 
10
  == Description ==
 
11
 
12
- Sometimes, links get broken. A page is deleted, a subdirectory forgotten, a site moved to a different domain. Most likely some of your blog posts contain links. It is almost inevitable that over time some of them will start giving the 404 Not Found error. Obviously you don't want your readers to be annoyed by clicking a link that leads nowhere. You can check the links yourself but that might be quite a task if you have a lot of posts. You could use your webserver's stats but that only works for local links. So I've made a plugin for WordPress that will check your posts (and pages) in the background, looking for broken links, and let you know if any are found.
 
 
 
 
13
 
14
- The broken links, if any are found, will show up in a new tab of WP admin panel - Manage -> Broken Links. There are several buttons for each broken link - "View" and "Edit Post" do exactly what they say and "Discard" will remove the message about a broken link, but not the link itself (so it will show up again later unless you fix it).
 
15
 
16
- You can modify the few available options at Options -> Link Checker. You can see the current checking status there, too - e.g. how many posts need to be checked and how many links are in the queue.
17
 
18
- The plugin runs while you have any page of the WordPress admin panel open.
19
 
20
 
21
  == Installation ==
3
  Tags: links, broken, maintenance
4
  Requires at least: 2.0.2
5
  Tested up to: 2.3
6
+ Stable tag: 0.2
7
 
8
+ This plugin will check your posts for broken links and missing images in background and notify you on the dashboard if any are found.
9
 
10
  == Description ==
11
+ This plugin is will monitor your blog looking for broken links and let you know if any are found.
12
 
13
+ * Checks your posts (and pages) in the background.
14
+ * Detects links that don't work and missing images.
15
+ * Notifies you on the Dashboard if any are found.
16
+ * Link checking intervals can be configured.
17
+ * New/modified posts are checked ASAP.
18
 
19
+ **How To Use It**
20
+ The broken links, if any are found, will show up in a new tab of WP admin panel - Manage -> Broken Links. A notification will also appear on the Dashboard.
21
 
22
+ There are several buttons for each broken link - "View" and "Edit Post" do exactly what they say and "Discard" will remove the message about a broken link, but not the link itself (so it will show up again later unless you fix it). If references to missing images are found, they will be listed along with the links, with "[image]" in place of link text.
23
 
24
+ You can modify the few available options at Options -> Link Checker. You can also see the current checking status there - e.g. how many posts need to be checked and how many links are in the queue. The plugin runs while you have any page of the WordPress admin panel open.
25
 
26
 
27
  == Installation ==
wsblc_ajax.php CHANGED
@@ -22,17 +22,18 @@
22
  $postdata_name=$wpdb->prefix . "blc_postdata";
23
  $linkdata_name=$wpdb->prefix . "blc_linkdata";
24
 
25
- $options=get_option('wsblc_options');
26
  $siteurl=get_option('siteurl');
27
  $max_execution_time=isset($options['max_work_session'])?intval($options['max_work_session']):27;
28
 
29
  $check_treshold=date('Y-m-d H:i:s', strtotime('-'.$options['check_treshold'].' hours'));
 
30
 
31
  $action=isset($_GET['action'])?$_GET['action']:'run_check';
32
 
33
  if($action=='dashboard_status'){
34
  /* displays a notification if broken links have been found */
35
- $sql="SELECT count(*) FROM $linkdata_name WHERE broken=1 AND hidden=0";
36
  $broken_links=$wpdb->get_var($sql);
37
  if($broken_links>0){
38
  echo "<div>
@@ -47,10 +48,10 @@
47
  $sql="SELECT count(*) FROM $postdata_name WHERE last_check<'$check_treshold'";
48
  $posts_unchecked=$wpdb->get_var($sql);
49
 
50
- $sql="SELECT count(*) FROM $linkdata_name WHERE last_check<'$check_treshold' AND hidden=0";
51
  $links_unchecked=$wpdb->get_var($sql);
52
 
53
- $sql="SELECT count(*) FROM $linkdata_name WHERE broken=1 AND hidden=0";
54
  $broken_links=$wpdb->get_var($sql);
55
 
56
  if($broken_links>0){
@@ -92,7 +93,11 @@
92
  }
93
 
94
  /* check the queue and process any links unchecked */
95
- $sql="SELECT * FROM $linkdata_name WHERE last_check<'$check_treshold' AND hidden=0 LIMIT 100";
 
 
 
 
96
  $links=$wpdb->get_results($sql, OBJECT);
97
  if($links && (count($links)>0)){
98
  //some unchecked links found
@@ -102,7 +107,8 @@
102
  //link OK, remove from queue
103
  $wpdb->query("DELETE FROM $linkdata_name WHERE id=$link->id");
104
  } else {
105
- $wpdb->query("UPDATE $linkdata_name SET broken=1, last_check=NOW() WHERE id=$link->id");
 
106
  };
107
 
108
 
@@ -150,7 +156,38 @@
150
  return true;
151
  }
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  function gather_and_save_links($content, $post_id){
 
154
  $url_pattern='/(<a[\s]+[^>]*href\s*=\s*[\"\']?)([^\'\" >]+)([\'\"]+[^<>]*>)((?sU).*)(<\/a>)/i';
155
 
156
  if(preg_match_all($url_pattern, $content, $matches, PREG_SET_ORDER)){
@@ -159,6 +196,15 @@
159
  }
160
  };
161
 
 
 
 
 
 
 
 
 
 
162
  return $content;
163
  }
164
 
@@ -174,11 +220,12 @@
174
  curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
175
 
176
  curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
 
 
177
  curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 15);
178
- curl_setopt($ch, CURLOPT_TIMEOUT, 20);
179
 
180
- //curl_setopt($ch, CURLOPT_FAILONERROR, true);
181
- curl_setopt($ch, CURLOPT_HEADER, true);
182
 
183
  if($parts['scheme']=='https'){
184
  curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
@@ -186,16 +233,13 @@
186
  } else {
187
  curl_setopt($ch, CURLOPT_NOBODY, true);
188
  }
 
189
 
190
  $response = curl_exec($ch);
 
 
191
  curl_close($ch);
192
 
193
- if(preg_match('/HTTP\/1\.\d+\s+(\d+)/', $response, $matches)){
194
- $code=intval($matches[1]);
195
- } else {
196
- return false;
197
- };
198
-
199
  return (($code>=200) && ($code<400));
200
  }
201
 
22
  $postdata_name=$wpdb->prefix . "blc_postdata";
23
  $linkdata_name=$wpdb->prefix . "blc_linkdata";
24
 
25
+ $options=$ws_link_checker->options; //get_option('wsblc_options');
26
  $siteurl=get_option('siteurl');
27
  $max_execution_time=isset($options['max_work_session'])?intval($options['max_work_session']):27;
28
 
29
  $check_treshold=date('Y-m-d H:i:s', strtotime('-'.$options['check_treshold'].' hours'));
30
+ $recheck_treshold=date('Y-m-d H:i:s', strtotime('-20 minutes'));
31
 
32
  $action=isset($_GET['action'])?$_GET['action']:'run_check';
33
 
34
  if($action=='dashboard_status'){
35
  /* displays a notification if broken links have been found */
36
+ $sql="SELECT count(*) FROM $linkdata_name WHERE broken=1";
37
  $broken_links=$wpdb->get_var($sql);
38
  if($broken_links>0){
39
  echo "<div>
48
  $sql="SELECT count(*) FROM $postdata_name WHERE last_check<'$check_treshold'";
49
  $posts_unchecked=$wpdb->get_var($sql);
50
 
51
+ $sql="SELECT count(*) FROM $linkdata_name WHERE last_check<'$check_treshold'";
52
  $links_unchecked=$wpdb->get_var($sql);
53
 
54
+ $sql="SELECT count(*) FROM $linkdata_name WHERE broken=1";
55
  $broken_links=$wpdb->get_var($sql);
56
 
57
  if($broken_links>0){
93
  }
94
 
95
  /* check the queue and process any links unchecked */
96
+ $sql="SELECT * FROM $linkdata_name WHERE ".
97
+ " ((last_check<'$check_treshold') OR ".
98
+ " (broken=1 AND check_count<5 AND last_check<'$recheck_treshold')) ".
99
+ " LIMIT 100";
100
+
101
  $links=$wpdb->get_results($sql, OBJECT);
102
  if($links && (count($links)>0)){
103
  //some unchecked links found
107
  //link OK, remove from queue
108
  $wpdb->query("DELETE FROM $linkdata_name WHERE id=$link->id");
109
  } else {
110
+ $wpdb->query("UPDATE $linkdata_name SET broken=1, ".
111
+ " last_check=NOW(), check_count=check_count+1 WHERE id=$link->id");
112
  };
113
 
114
 
156
  return true;
157
  }
158
 
159
+ function parse_image($matches, $post_id){
160
+ global $wpdb, $siteurl, $linkdata_name;
161
+
162
+ $url=$matches[2];
163
+
164
+ $parts=@parse_url($url);
165
+
166
+ if(!$parts) return false;
167
+
168
+ $url=preg_replace(
169
+ array('/([\?&]PHPSESSID=\w+)$/i','/(#[^\/]*)$/i', '/&amp;/','/^(javascript:.*)/i','/([\?&]sid=\w+)$/i'),
170
+ array('','','&','',''),
171
+ $url);
172
+
173
+ $url=trim($url);
174
+ if($url=='') return false;
175
+
176
+ // turn relative URLs into absolute URLs
177
+ $url = relative2absolute($siteurl, $url);
178
+
179
+ if(strlen($url)>3){
180
+ $wpdb->query(
181
+ "INSERT INTO $linkdata_name(post_id, url, link_text)
182
+ VALUES($post_id, '".$wpdb->escape($url)."', '[image]')"
183
+ );
184
+ };
185
+
186
+ return true;
187
+ }
188
+
189
  function gather_and_save_links($content, $post_id){
190
+ //gather links (<a href=...>)
191
  $url_pattern='/(<a[\s]+[^>]*href\s*=\s*[\"\']?)([^\'\" >]+)([\'\"]+[^<>]*>)((?sU).*)(<\/a>)/i';
192
 
193
  if(preg_match_all($url_pattern, $content, $matches, PREG_SET_ORDER)){
196
  }
197
  };
198
 
199
+ //gather images (<img src=...>)
200
+ $url_pattern='/(<img[\s]+[^>]*src\s*=\s*[\"\']?)([^\'\" >]+)([\'\"]+[^<>]*>)/i';
201
+
202
+ if(preg_match_all($url_pattern, $content, $matches, PREG_SET_ORDER)){
203
+ foreach($matches as $img){
204
+ parse_image($img, $post_id);
205
+ }
206
+ };
207
+
208
  return $content;
209
  }
210
 
220
  curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
221
 
222
  curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
223
+ curl_setopt($ch, CURLOPT_MAXREDIRS, 10);
224
+
225
  curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 15);
226
+ curl_setopt($ch, CURLOPT_TIMEOUT, 25);
227
 
228
+ curl_setopt($ch, CURLOPT_FAILONERROR, false);
 
229
 
230
  if($parts['scheme']=='https'){
231
  curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
233
  } else {
234
  curl_setopt($ch, CURLOPT_NOBODY, true);
235
  }
236
+ curl_setopt($ch, CURLOPT_HEADER, true);
237
 
238
  $response = curl_exec($ch);
239
+ $code=intval(curl_getinfo($ch, CURLINFO_HTTP_CODE));
240
+
241
  curl_close($ch);
242
 
 
 
 
 
 
 
243
  return (($code>=200) && ($code<400));
244
  }
245