Version Description
Download this release
Release Info
Developer | RobMarsh |
Plugin | Similar Posts – Best Related Posts Plugin for WordPress |
Version | 2.6.0.0 |
Comparing to | |
See all releases |
Version 2.6.0.0
- languages/de/stemmer.php +315 -0
- languages/de/stemmer.php.bak +315 -0
- languages/de/stopwords.php +4 -0
- languages/en/stemmer.php +335 -0
- languages/en/stopwords.php +4 -0
- languages/es/stemmer.php +381 -0
- languages/es/stopwords.php +4 -0
- languages/fr/stemmer.php +513 -0
- languages/fr/stopwords.php +4 -0
- languages/it/stemmer.php +341 -0
- languages/it/stopwords.php +4 -0
- readme.txt +114 -0
- similar-posts-admin.php +702 -0
- similar-posts.php +584 -0
languages/de/stemmer.php
ADDED
@@ -0,0 +1,315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?php
|
2 |
+
/*
|
3 |
+
Adapted from a drupal module -- see details below
|
4 |
+
*/
|
5 |
+
|
6 |
+
/*
|
7 |
+
Content:
|
8 |
+
Drupal module to improve searching in german texts (Porter stemmer)
|
9 |
+
Algorithm based on http://snowball.tartarus.org/algorithms/german/stemmer.html
|
10 |
+
Author:
|
11 |
+
Reiner Miericke 10.10.2007
|
12 |
+
References:
|
13 |
+
Algorithm:
|
14 |
+
http://www.clef-campaign.org/workshop2002/WN/3.pdf
|
15 |
+
http://w3.ub.uni-konstanz.de/v13/volltexte/2003/996//pdf/scherer.pdf
|
16 |
+
http://kontext.fraunhofer.de/haenelt/kurs/Referate/Kowatschew_Lang/stemming.pdf
|
17 |
+
http://www.cis.uni-muenchen.de/people/Schulz/SeminarSoSe2001IR/FilzmayerMargetic/referat.html
|
18 |
+
http://www.ifi.unizh.ch/CL/broder/mue1/porter/stemming/node1.html
|
19 |
+
For lists of stopwords see
|
20 |
+
http://members.unine.ch/jacques.savoy/clef/index.html
|
21 |
+
Small parts were stolen from dutchstemmer.module
|
22 |
+
*/
|
23 |
+
|
24 |
+
|
25 |
+
define("DE_STEMMER_VOKALE", "aeiouy���");
|
26 |
+
|
27 |
+
$enc = mb_detect_encoding('a-zA-Z���������������');
|
28 |
+
mb_internal_encoding($enc);
|
29 |
+
|
30 |
+
function _de_stemmer_split_text(&$text) {
|
31 |
+
// Split words from noise
|
32 |
+
return preg_split('/([^a-zA-Z���������������]+)/u', $text, -1, PREG_SPLIT_NO_EMPTY);
|
33 |
+
}
|
34 |
+
|
35 |
+
|
36 |
+
/**
|
37 |
+
* Implementation of hook_search_preprocess
|
38 |
+
*/
|
39 |
+
function de_stemmer_search_preprocess(&$text) {
|
40 |
+
// Split words from noise and remove apostrophes
|
41 |
+
$words = preg_split('/([^a-zA-Z���������������]+)/u', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
|
42 |
+
|
43 |
+
// Process each word
|
44 |
+
$odd = true;
|
45 |
+
foreach ($words as $k => $word) {
|
46 |
+
if ($odd) {
|
47 |
+
$words[$k] = _de_stemmer_wortstamm($word);
|
48 |
+
}
|
49 |
+
$odd = !$odd;
|
50 |
+
}
|
51 |
+
|
52 |
+
// Put it all back together
|
53 |
+
return implode('', $words);
|
54 |
+
|
55 |
+
/* alte Version
|
56 |
+
$words = _de_stemmer_split_text($text);
|
57 |
+
|
58 |
+
// Process each word
|
59 |
+
foreach ($words as $k => $word) {
|
60 |
+
if (!_de_stemmer_stoppwort(strtolower($word))) {
|
61 |
+
$words[$k] = _de_stemmer_wortstamm($word);
|
62 |
+
}
|
63 |
+
}
|
64 |
+
|
65 |
+
// Put it all back together
|
66 |
+
return implode(' ', $words);
|
67 |
+
*/
|
68 |
+
}
|
69 |
+
|
70 |
+
|
71 |
+
/**
|
72 |
+
* Implementation of hook_help().
|
73 |
+
*/
|
74 |
+
function de_stemmer_help($section = 'admin/help#search') {
|
75 |
+
switch ($section) {
|
76 |
+
case 'admin/modules#description':
|
77 |
+
return t('Implements a German stemming algorithm (Porter) to improve searching.');
|
78 |
+
}
|
79 |
+
}
|
80 |
+
|
81 |
+
|
82 |
+
/*
|
83 |
+
* Function gets as text (parameter) and splits the text into words.
|
84 |
+
* Then each word is stemmed and the word together with its stem is
|
85 |
+
* stored in an array (hash).
|
86 |
+
* As a result the hash is returned and can be used as a lookup table
|
87 |
+
* to identify words which transform to the same stem.
|
88 |
+
* For details please compare 'search.module-stem.patch'
|
89 |
+
*/
|
90 |
+
function de_stemmer_stem_list($text) {
|
91 |
+
// Split words from noise and remove apostrophes
|
92 |
+
$words = _de_stemmer_split_text($text);
|
93 |
+
|
94 |
+
$stem_list = array();
|
95 |
+
foreach ($words as $word) {
|
96 |
+
$stem_list[$word] = _de_stemmer_wortstamm($word);
|
97 |
+
}
|
98 |
+
return $stem_list;
|
99 |
+
}
|
100 |
+
|
101 |
+
|
102 |
+
function _de_stemmer_region_n($wort) {
|
103 |
+
$r = strcspn($wort, DE_STEMMER_VOKALE);
|
104 |
+
return $r + strspn($wort, DE_STEMMER_VOKALE, $r) + 1;
|
105 |
+
}
|
106 |
+
|
107 |
+
function de_stemmer_preprocess($wort) {
|
108 |
+
$wort = mb_strtolower($wort);
|
109 |
+
$wort = str_replace("�", "ss", $wort);
|
110 |
+
// replace � by ss, and put u and y between vowels into upper case
|
111 |
+
|
112 |
+
$wort = preg_replace( array( '/�/',
|
113 |
+
'/(?<=['. DE_STEMMER_VOKALE .'])u(?=['. DE_STEMMER_VOKALE .'])/u',
|
114 |
+
'/(?<=['. DE_STEMMER_VOKALE .'])y(?=['. DE_STEMMER_VOKALE .'])/u'
|
115 |
+
),
|
116 |
+
array( 'ss', 'U', 'Y' ),
|
117 |
+
$wort
|
118 |
+
);
|
119 |
+
return $wort;
|
120 |
+
}
|
121 |
+
|
122 |
+
|
123 |
+
function _de_stemmer_postprocess($wort) {
|
124 |
+
$wort = mb_strtolower($wort);
|
125 |
+
|
126 |
+
if (!_de_stemmer_ausnahme($wort)) // check for exceptions
|
127 |
+
{
|
128 |
+
$wort = strtr($wort, array('�' => 'a', '�' => 'a',
|
129 |
+
'�' => 'e', '�' => 'e',
|
130 |
+
'�' => 'i', '�' => 'i',
|
131 |
+
'�' => 'o', '�' => 'o',
|
132 |
+
'�' => "u", '�' => 'u'
|
133 |
+
));
|
134 |
+
}
|
135 |
+
return $wort;
|
136 |
+
}
|
137 |
+
|
138 |
+
|
139 |
+
function _de_stemmer_wortstamm($wort) {
|
140 |
+
$stamm = de_stemmer_preprocess($wort);
|
141 |
+
|
142 |
+
/*
|
143 |
+
* R1 is the region after the first non-vowel following a vowel,
|
144 |
+
or is the null region at the end of the word if there is no such non-vowel.
|
145 |
+
* R2 is the region after the first non-vowel following a vowel in R1,
|
146 |
+
or is the null region at the end of the word if there is no such non-vowel.
|
147 |
+
*/
|
148 |
+
|
149 |
+
$l = strlen($stamm);
|
150 |
+
$r1 = _de_stemmer_region_n($stamm);
|
151 |
+
$r2 = $r1 == $l ? $r1 : $r1 + _de_stemmer_region_n(mb_substr($stamm, $r1));
|
152 |
+
// unshure about interpreting the following rule:
|
153 |
+
// "then R1 is ADJUSTED so that the region before it contains at least 3 letters"
|
154 |
+
if ($r1 < 3) {
|
155 |
+
$r1 = 3;
|
156 |
+
}
|
157 |
+
|
158 |
+
/* Step 1
|
159 |
+
Search for the longest among the following suffixes,
|
160 |
+
(a) e em en ern er es
|
161 |
+
(b) s (preceded by a valid s-ending)
|
162 |
+
and delete if in R1.
|
163 |
+
(Of course the letter of the valid s-ending is not necessarily in R1)
|
164 |
+
*/
|
165 |
+
|
166 |
+
if (preg_match('/(e|em|en|ern|er|es)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
|
167 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
168 |
+
}
|
169 |
+
elseif (preg_match('/(?<=(b|d|f|g|h|k|l|m|n|r|t))s$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
|
170 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
171 |
+
}
|
172 |
+
|
173 |
+
|
174 |
+
/*
|
175 |
+
Step 2
|
176 |
+
Search for the longest among the following suffixes,
|
177 |
+
(a) en er est
|
178 |
+
(b) st (preceded by a valid st-ending, itself preceded by at least 3 letters)
|
179 |
+
and delete if in R1.
|
180 |
+
*/
|
181 |
+
|
182 |
+
if (preg_match('/(en|er|est)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
|
183 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
184 |
+
}
|
185 |
+
elseif (preg_match('/(?<=(b|d|f|g|h|k|l|m|n|t))st$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
|
186 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
187 |
+
}
|
188 |
+
|
189 |
+
|
190 |
+
/*
|
191 |
+
Step 3: d-suffixes ( see http://snowball.tartarus.org/texts/glossary.html )
|
192 |
+
Search for the longest among the following suffixes, and perform the action indicated.
|
193 |
+
end ung
|
194 |
+
delete if in R2
|
195 |
+
if preceded by ig, delete if in R2 and not preceded by e
|
196 |
+
ig ik isch
|
197 |
+
delete if in R2 and not preceded by e
|
198 |
+
lich heit
|
199 |
+
delete if in R2
|
200 |
+
if preceded by er or en, delete if in R1
|
201 |
+
keit
|
202 |
+
delete if in R2
|
203 |
+
if preceded by lich or ig, delete if in R2
|
204 |
+
^ means R1 ?
|
205 |
+
*/
|
206 |
+
|
207 |
+
if (preg_match('/(?<=eig)(end|ung)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
|
208 |
+
;
|
209 |
+
}
|
210 |
+
elseif (preg_match('/(end|ung)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
|
211 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
212 |
+
}
|
213 |
+
elseif (preg_match('/(?<![e])(ig|ik|isch)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
|
214 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
215 |
+
}
|
216 |
+
elseif (preg_match('/(?<=(er|en))(lich|heit)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
|
217 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
218 |
+
}
|
219 |
+
elseif (preg_match('/(lich|heit)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
|
220 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
221 |
+
}
|
222 |
+
elseif (preg_match('/(?<=lich)keit$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
|
223 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
224 |
+
}
|
225 |
+
elseif (preg_match('/(?<=ig)keit$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
|
226 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
227 |
+
}
|
228 |
+
elseif (preg_match('/keit$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
|
229 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
230 |
+
}
|
231 |
+
|
232 |
+
|
233 |
+
/* Was ist mit
|
234 |
+
chen, lein, bar, schaft, ... ?
|
235 |
+
*/
|
236 |
+
return _de_stemmer_postprocess($stamm);
|
237 |
+
}
|
238 |
+
|
239 |
+
|
240 |
+
function _de_stemmer_stoppwort($wort) {
|
241 |
+
|
242 |
+
static $stoppworte = array(
|
243 |
+
'ab', 'aber', 'aber', 'ach', 'acht', 'achte', 'achten', 'achter', 'achtes', 'ag', 'alle', 'allein', 'allem', 'allen', 'aller', 'allerdings', 'alles', 'allgemeinen', 'als', 'als', 'also', 'am', 'an', 'andere', 'anderen', 'andern', 'anders', 'au', 'auch', 'auch', 'auf', 'aus', 'ausser', 'au�er', 'ausserdem', 'au�erdem',
|
244 |
+
'bald', 'bei', 'beide', 'beiden', 'beim', 'bekannt', 'bereits', 'besonders', 'besser', 'besten', 'bin', 'bis', 'bisher', 'bist',
|
245 |
+
'da', 'dabei', 'dadurch', 'daf�r', 'dagegen', 'daher', 'dahin', 'dahinter', 'damals', 'damit', 'danach', 'daneben', 'dank', 'dann', 'daran', 'darauf', 'daraus', 'darf', 'darfst', 'darin', 'dar�ber', 'darum', 'darunter', 'das', 'das', 'dasein', 'daselbst', 'dass', 'da�', 'dasselbe', 'davon', 'davor', 'dazu', 'dazwischen', 'dein', 'deine', 'deinem', 'deiner', 'dem', 'dementsprechend', 'demgegen�ber', 'demgem�ss', 'demgem��', 'demselben', 'demzufolge', 'den', 'denen', 'denn', 'denn', 'denselben', 'der', 'deren', 'derjenige', 'derjenigen', 'dermassen', 'derma�en', 'derselbe', 'derselben', 'des', 'deshalb', 'desselben', 'dessen', 'deswegen', 'd.h', 'dich', 'die', 'diejenige', 'diejenigen', 'dies', 'diese', 'dieselbe', 'dieselben', 'diesem', 'diesen', 'dieser', 'dieses', 'dir', 'doch', 'dort', 'drei', 'drin', 'dritte', 'dritten', 'dritter', 'drittes', 'du', 'durch', 'durchaus',
|
246 |
+
'eben', 'ebenso', 'eigen', 'eigene', 'eigenen', 'eigener', 'eigenes', 'ein', 'einander', 'eine', 'einem', 'einen', 'einer', 'eines', 'einige', 'einigen', 'einiger', 'einiges', 'einmal', 'einmal', 'eins', 'elf', 'en', 'ende', 'endlich', 'entweder', 'entweder', 'er', 'ernst', 'erst', 'erste', 'ersten', 'erster', 'erstes', 'es', 'etwa', 'etwas', 'euch',
|
247 |
+
'fr�her', 'f�nf', 'f�nfte', 'f�nften', 'f�nfter', 'f�nftes', 'f�r',
|
248 |
+
'gab', 'ganz', 'ganze', 'ganzen', 'ganzer', 'ganzes', 'gar', 'gedurft', 'gegen', 'gegen�ber', 'gehabt', 'gehen', 'geht', 'gekannt', 'gekonnt', 'gemacht', 'gemocht', 'gemusst', 'genug', 'gerade', 'gern', 'gesagt', 'gesagt', 'geschweige', 'gewesen', 'gewollt', 'geworden', 'gibt', 'ging', 'gleich', 'gott', 'gross', 'gro�', 'grosse', 'gro�e', 'grossen', 'gro�en', 'grosser', 'gro�er', 'grosses', 'gro�es', 'gut', 'gute', 'guter', 'gutes',
|
249 |
+
'habe', 'haben', 'habt', 'hast', 'hat', 'hatte', 'h�tte', 'hatten', 'h�tten', 'heisst', 'her', 'heute', 'hier', 'hin', 'hinter', 'hoch',
|
250 |
+
'ich', 'ihm', 'ihn', 'ihnen', 'ihr', 'ihre', 'ihrem', 'ihren', 'ihrer', 'ihres', 'im', 'im', 'immer', 'in', 'in', 'indem', 'infolgedessen', 'ins', 'irgend', 'ist',
|
251 |
+
'ja', 'ja', 'jahr', 'jahre', 'jahren', 'je', 'jede', 'jedem', 'jeden', 'jeder', 'jedermann', 'jedermanns', 'jedoch', 'jemand', 'jemandem', 'jemanden', 'jene', 'jenem', 'jenen', 'jener', 'jenes', 'jetzt',
|
252 |
+
'kam', 'kann', 'kannst', 'kaum', 'kein', 'keine', 'keinem', 'keinen', 'keiner', 'kleine', 'kleinen', 'kleiner', 'kleines', 'kommen', 'kommt', 'k�nnen', 'k�nnt', 'konnte', 'k�nnte', 'konnten', 'kurz',
|
253 |
+
'lang', 'lange', 'lange', 'leicht', 'leide', 'lieber', 'los',
|
254 |
+
'machen', 'macht', 'machte', 'mag', 'magst', 'mahn', 'man', 'manche', 'manchem', 'manchen', 'mancher', 'manches', 'mann', 'mehr', 'mein', 'meine', 'meinem', 'meinen', 'meiner', 'meines', 'mich', 'mir', 'mit', 'mittel', 'mochte', 'm�chte', 'mochten', 'm�gen', 'm�glich', 'm�gt', 'morgen', 'muss', 'mu�', 'm�ssen', 'musst', 'm�sst', 'musste', 'mussten',
|
255 |
+
'na', 'nach', 'nachdem', 'nahm', 'nat�rlich', 'neben', 'nein', 'neue', 'neuen', 'neun', 'neunte', 'neunten', 'neunter', 'neuntes', 'nicht', 'nicht', 'nichts', 'nie', 'niemand', 'niemandem', 'niemanden', 'noch', 'nun', 'nun', 'nur',
|
256 |
+
'ob', 'oben', 'oder', 'oder', 'offen', 'oft', 'oft', 'ohne',
|
257 |
+
'recht', 'rechte', 'rechten', 'rechter', 'rechtes', 'richtig', 'rund',
|
258 |
+
'sa', 'sache', 'sagt', 'sagte', 'sah', 'satt', 'schon', 'sechs', 'sechste', 'sechsten', 'sechster', 'sechstes', 'sehr', 'sei', 'sei', 'seid', 'seien', 'sein', 'seine', 'seinem', 'seinen', 'seiner', 'seines', 'seit', 'seitdem', 'selbst', 'selbst', 'sich', 'sie', 'sieben', 'siebente', 'siebenten', 'siebenter', 'siebentes', 'sind', 'so', 'solang', 'solche', 'solchem', 'solchen', 'solcher', 'solches', 'soll', 'sollen', 'sollte', 'sollten', 'sondern', 'sonst', 'sowie', 'sp�ter', 'statt',
|
259 |
+
'tat', 'teil', 'tel', 'tritt', 'trotzdem', 'tun',
|
260 |
+
'�ber', '�berhaupt', '�brigens', 'uhr', 'um', 'und', 'und?', 'uns', 'unser', 'unsere', 'unserer', 'unter',
|
261 |
+
'vergangenen', 'viel', 'viele', 'vielem', 'vielen', 'vielleicht', 'vier', 'vierte', 'vierten', 'vierter', 'viertes', 'vom', 'von', 'vor',
|
262 |
+
'wahr?', 'w�hrend', 'w�hrenddem', 'w�hrenddessen', 'wann', 'war', 'w�re', 'waren', 'wart', 'warum', 'was', 'wegen', 'weil', 'weit', 'weiter', 'weitere', 'weiteren', 'weiteres', 'welche', 'welchem', 'welchen', 'welcher', 'welches', 'wem', 'wen', 'wenig', 'wenig', 'wenige', 'weniger', 'weniges', 'wenigstens', 'wenn', 'wenn', 'wer', 'werde', 'werden', 'werdet', 'wessen', 'wie', 'wie', 'wieder', 'will', 'willst', 'wir', 'wird', 'wirklich', 'wirst', 'wo', 'wohl', 'wollen', 'wollt', 'wollte', 'wollten', 'worden', 'wurde', 'w�rde', 'wurden', 'w�rden',
|
263 |
+
'z.b', 'zehn', 'zehnte', 'zehnten', 'zehnter', 'zehntes', 'zeit', 'zu', 'zuerst', 'zugleich', 'zum', 'zum', 'zun�chst', 'zur', 'zur�ck', 'zusammen', 'zwanzig', 'zwar', 'zwar', 'zwei', 'zweite', 'zweiten', 'zweiter', 'zweites', 'zwischen', 'zw�lf'
|
264 |
+
);
|
265 |
+
|
266 |
+
return in_array($wort, $stoppworte);
|
267 |
+
}
|
268 |
+
|
269 |
+
|
270 |
+
/*
|
271 |
+
first try to set up a list of exceptions
|
272 |
+
*/
|
273 |
+
function _de_stemmer_ausnahme(&$wort)
|
274 |
+
{ static $de_stemmer_ausnahmen = array (
|
275 |
+
'sch�n' => 'sch�n', // !schon
|
276 |
+
'bl�t' => 'bl�t', // Bl�te (NICHT Blut)
|
277 |
+
'kannt' => 'kenn',
|
278 |
+
'k�ch' => 'k�ch', // K�chen (NICHT Kuchen)
|
279 |
+
'm�g' => 'm�g',
|
280 |
+
'mocht' => 'm�g',
|
281 |
+
'mag' => 'm�g',
|
282 |
+
'ging' => 'geh',
|
283 |
+
'lief' => 'lauf',
|
284 |
+
'�nd' => '�nd' // �ndern (NICHT andern)
|
285 |
+
);
|
286 |
+
|
287 |
+
//return FALSE;
|
288 |
+
if ( array_key_exists($wort, $de_stemmer_ausnahmen) )
|
289 |
+
{ $wort = $de_stemmer_ausnahmen[$wort];
|
290 |
+
return TRUE;
|
291 |
+
}
|
292 |
+
else
|
293 |
+
return FALSE;
|
294 |
+
}
|
295 |
+
|
296 |
+
/*
|
297 |
+
Stem caching added by Rob Marsh, SJ
|
298 |
+
http://rmarsh.com
|
299 |
+
*/
|
300 |
+
|
301 |
+
$StemCache = array();
|
302 |
+
|
303 |
+
function stem($word) {
|
304 |
+
global $StemCache;
|
305 |
+
if (!isset($StemCache[$word])) {
|
306 |
+
$stemmedword = _de_stemmer_wortstamm($word);
|
307 |
+
$StemCache[$word] = $stemmedword;
|
308 |
+
}
|
309 |
+
else {
|
310 |
+
$stemmedword = $StemCache[$word] ;
|
311 |
+
}
|
312 |
+
return $stemmedword;
|
313 |
+
}
|
314 |
+
|
315 |
+
?>
|
languages/de/stemmer.php.bak
ADDED
@@ -0,0 +1,315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?php
|
2 |
+
/*
|
3 |
+
Adapted from a drupal module -- see details below
|
4 |
+
*/
|
5 |
+
|
6 |
+
/*
|
7 |
+
Content:
|
8 |
+
Drupal module to improve searching in german texts (Porter stemmer)
|
9 |
+
Algorithm based on http://snowball.tartarus.org/algorithms/german/stemmer.html
|
10 |
+
Author:
|
11 |
+
Reiner Miericke 10.10.2007
|
12 |
+
References:
|
13 |
+
Algorithm:
|
14 |
+
http://www.clef-campaign.org/workshop2002/WN/3.pdf
|
15 |
+
http://w3.ub.uni-konstanz.de/v13/volltexte/2003/996//pdf/scherer.pdf
|
16 |
+
http://kontext.fraunhofer.de/haenelt/kurs/Referate/Kowatschew_Lang/stemming.pdf
|
17 |
+
http://www.cis.uni-muenchen.de/people/Schulz/SeminarSoSe2001IR/FilzmayerMargetic/referat.html
|
18 |
+
http://www.ifi.unizh.ch/CL/broder/mue1/porter/stemming/node1.html
|
19 |
+
For lists of stopwords see
|
20 |
+
http://members.unine.ch/jacques.savoy/clef/index.html
|
21 |
+
Small parts were stolen from dutchstemmer.module
|
22 |
+
*/
|
23 |
+
|
24 |
+
|
25 |
+
define("DE_STEMMER_VOKALE", "aeiouyäöü");
|
26 |
+
|
27 |
+
$enc = mb_detect_encoding('a-zA-ZÄÖÜßäëïöüáéíóúè');
|
28 |
+
mb_internal_encoding($enc);
|
29 |
+
|
30 |
+
function _de_stemmer_split_text(&$text) {
|
31 |
+
// Split words from noise
|
32 |
+
return preg_split('/([^a-zA-ZÄÖÜßäëïöüáéíóúè]+)/u', $text, -1, PREG_SPLIT_NO_EMPTY);
|
33 |
+
}
|
34 |
+
|
35 |
+
|
36 |
+
/**
|
37 |
+
* Implementation of hook_search_preprocess
|
38 |
+
*/
|
39 |
+
function de_stemmer_search_preprocess(&$text) {
|
40 |
+
// Split words from noise and remove apostrophes
|
41 |
+
$words = preg_split('/([^a-zA-ZÄÖÜßäëïöüáéíóúè]+)/u', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
|
42 |
+
|
43 |
+
// Process each word
|
44 |
+
$odd = true;
|
45 |
+
foreach ($words as $k => $word) {
|
46 |
+
if ($odd) {
|
47 |
+
$words[$k] = _de_stemmer_wortstamm($word);
|
48 |
+
}
|
49 |
+
$odd = !$odd;
|
50 |
+
}
|
51 |
+
|
52 |
+
// Put it all back together
|
53 |
+
return implode('', $words);
|
54 |
+
|
55 |
+
/* alte Version
|
56 |
+
$words = _de_stemmer_split_text($text);
|
57 |
+
|
58 |
+
// Process each word
|
59 |
+
foreach ($words as $k => $word) {
|
60 |
+
if (!_de_stemmer_stoppwort(strtolower($word))) {
|
61 |
+
$words[$k] = _de_stemmer_wortstamm($word);
|
62 |
+
}
|
63 |
+
}
|
64 |
+
|
65 |
+
// Put it all back together
|
66 |
+
return implode(' ', $words);
|
67 |
+
*/
|
68 |
+
}
|
69 |
+
|
70 |
+
|
71 |
+
/**
|
72 |
+
* Implementation of hook_help().
|
73 |
+
*/
|
74 |
+
function de_stemmer_help($section = 'admin/help#search') {
|
75 |
+
switch ($section) {
|
76 |
+
case 'admin/modules#description':
|
77 |
+
return t('Implements a German stemming algorithm (Porter) to improve searching.');
|
78 |
+
}
|
79 |
+
}
|
80 |
+
|
81 |
+
|
82 |
+
/*
|
83 |
+
* Function gets as text (parameter) and splits the text into words.
|
84 |
+
* Then each word is stemmed and the word together with its stem is
|
85 |
+
* stored in an array (hash).
|
86 |
+
* As a result the hash is returned and can be used as a lookup table
|
87 |
+
* to identify words which transform to the same stem.
|
88 |
+
* For details please compare 'search.module-stem.patch'
|
89 |
+
*/
|
90 |
+
function de_stemmer_stem_list($text) {
|
91 |
+
// Split words from noise and remove apostrophes
|
92 |
+
$words = _de_stemmer_split_text($text);
|
93 |
+
|
94 |
+
$stem_list = array();
|
95 |
+
foreach ($words as $word) {
|
96 |
+
$stem_list[$word] = _de_stemmer_wortstamm($word);
|
97 |
+
}
|
98 |
+
return $stem_list;
|
99 |
+
}
|
100 |
+
|
101 |
+
|
102 |
+
function _de_stemmer_region_n($wort) {
|
103 |
+
$r = strcspn($wort, DE_STEMMER_VOKALE);
|
104 |
+
return $r + strspn($wort, DE_STEMMER_VOKALE, $r) + 1;
|
105 |
+
}
|
106 |
+
|
107 |
+
function de_stemmer_preprocess($wort) {
|
108 |
+
$wort = mb_strtolower($wort);
|
109 |
+
$wort = str_replace("ß", "ss", $wort);
|
110 |
+
// replace ß by ss, and put u and y between vowels into upper case
|
111 |
+
|
112 |
+
$wort = preg_replace( array( '/ß/',
|
113 |
+
'/(?<=['. DE_STEMMER_VOKALE .'])u(?=['. DE_STEMMER_VOKALE .'])/u',
|
114 |
+
'/(?<=['. DE_STEMMER_VOKALE .'])y(?=['. DE_STEMMER_VOKALE .'])/u'
|
115 |
+
),
|
116 |
+
array( 'ss', 'U', 'Y' ),
|
117 |
+
$wort
|
118 |
+
);
|
119 |
+
return $wort;
|
120 |
+
}
|
121 |
+
|
122 |
+
|
123 |
+
function _de_stemmer_postprocess($wort) {
|
124 |
+
$wort = mb_strtolower($wort);
|
125 |
+
|
126 |
+
if (!_de_stemmer_ausnahme($wort)) // check for exceptions
|
127 |
+
{
|
128 |
+
$wort = strtr($wort, array('ä' => 'a', 'á' => 'a',
|
129 |
+
'ë' => 'e', 'é' => 'e',
|
130 |
+
'ï' => 'i', 'í' => 'i',
|
131 |
+
'ö' => 'o', 'ó' => 'o',
|
132 |
+
'ü' => "u", 'ú' => 'u'
|
133 |
+
));
|
134 |
+
}
|
135 |
+
return $wort;
|
136 |
+
}
|
137 |
+
|
138 |
+
|
139 |
+
function _de_stemmer_wortstamm($wort) {
|
140 |
+
$stamm = de_stemmer_preprocess($wort);
|
141 |
+
|
142 |
+
/*
|
143 |
+
* R1 is the region after the first non-vowel following a vowel,
|
144 |
+
or is the null region at the end of the word if there is no such non-vowel.
|
145 |
+
* R2 is the region after the first non-vowel following a vowel in R1,
|
146 |
+
or is the null region at the end of the word if there is no such non-vowel.
|
147 |
+
*/
|
148 |
+
|
149 |
+
$l = strlen($stamm);
|
150 |
+
$r1 = _de_stemmer_region_n($stamm);
|
151 |
+
$r2 = $r1 == $l ? $r1 : $r1 + _de_stemmer_region_n(mb_substr($stamm, $r1));
|
152 |
+
// unshure about interpreting the following rule:
|
153 |
+
// "then R1 is ADJUSTED so that the region before it contains at least 3 letters"
|
154 |
+
if ($r1 < 3) {
|
155 |
+
$r1 = 3;
|
156 |
+
}
|
157 |
+
|
158 |
+
/* Step 1
|
159 |
+
Search for the longest among the following suffixes,
|
160 |
+
(a) e em en ern er es
|
161 |
+
(b) s (preceded by a valid s-ending)
|
162 |
+
and delete if in R1.
|
163 |
+
(Of course the letter of the valid s-ending is not necessarily in R1)
|
164 |
+
*/
|
165 |
+
|
166 |
+
if (preg_match('/(e|em|en|ern|er|es)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
|
167 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
168 |
+
}
|
169 |
+
elseif (preg_match('/(?<=(b|d|f|g|h|k|l|m|n|r|t))s$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
|
170 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
171 |
+
}
|
172 |
+
|
173 |
+
|
174 |
+
/*
|
175 |
+
Step 2
|
176 |
+
Search for the longest among the following suffixes,
|
177 |
+
(a) en er est
|
178 |
+
(b) st (preceded by a valid st-ending, itself preceded by at least 3 letters)
|
179 |
+
and delete if in R1.
|
180 |
+
*/
|
181 |
+
|
182 |
+
if (preg_match('/(en|er|est)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
|
183 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
184 |
+
}
|
185 |
+
elseif (preg_match('/(?<=(b|d|f|g|h|k|l|m|n|t))st$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
|
186 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
187 |
+
}
|
188 |
+
|
189 |
+
|
190 |
+
/*
|
191 |
+
Step 3: d-suffixes ( see http://snowball.tartarus.org/texts/glossary.html )
|
192 |
+
Search for the longest among the following suffixes, and perform the action indicated.
|
193 |
+
end ung
|
194 |
+
delete if in R2
|
195 |
+
if preceded by ig, delete if in R2 and not preceded by e
|
196 |
+
ig ik isch
|
197 |
+
delete if in R2 and not preceded by e
|
198 |
+
lich heit
|
199 |
+
delete if in R2
|
200 |
+
if preceded by er or en, delete if in R1
|
201 |
+
keit
|
202 |
+
delete if in R2
|
203 |
+
if preceded by lich or ig, delete if in R2
|
204 |
+
^ means R1 ?
|
205 |
+
*/
|
206 |
+
|
207 |
+
if (preg_match('/(?<=eig)(end|ung)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
|
208 |
+
;
|
209 |
+
}
|
210 |
+
elseif (preg_match('/(end|ung)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
|
211 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
212 |
+
}
|
213 |
+
elseif (preg_match('/(?<![e])(ig|ik|isch)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
|
214 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
215 |
+
}
|
216 |
+
elseif (preg_match('/(?<=(er|en))(lich|heit)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
|
217 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
218 |
+
}
|
219 |
+
elseif (preg_match('/(lich|heit)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
|
220 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
221 |
+
}
|
222 |
+
elseif (preg_match('/(?<=lich)keit$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
|
223 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
224 |
+
}
|
225 |
+
elseif (preg_match('/(?<=ig)keit$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
|
226 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
227 |
+
}
|
228 |
+
elseif (preg_match('/keit$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
|
229 |
+
$stamm = mb_substr($stamm, 0, $hits[0][1]);
|
230 |
+
}
|
231 |
+
|
232 |
+
|
233 |
+
/* Was ist mit
|
234 |
+
chen, lein, bar, schaft, ... ?
|
235 |
+
*/
|
236 |
+
return _de_stemmer_postprocess($stamm);
|
237 |
+
}
|
238 |
+
|
239 |
+
|
240 |
+
function _de_stemmer_stoppwort($wort) {
|
241 |
+
|
242 |
+
static $stoppworte = array(
|
243 |
+
'ab', 'aber', 'aber', 'ach', 'acht', 'achte', 'achten', 'achter', 'achtes', 'ag', 'alle', 'allein', 'allem', 'allen', 'aller', 'allerdings', 'alles', 'allgemeinen', 'als', 'als', 'also', 'am', 'an', 'andere', 'anderen', 'andern', 'anders', 'au', 'auch', 'auch', 'auf', 'aus', 'ausser', 'außer', 'ausserdem', 'außerdem',
|
244 |
+
'bald', 'bei', 'beide', 'beiden', 'beim', 'bekannt', 'bereits', 'besonders', 'besser', 'besten', 'bin', 'bis', 'bisher', 'bist',
|
245 |
+
'da', 'dabei', 'dadurch', 'dafür', 'dagegen', 'daher', 'dahin', 'dahinter', 'damals', 'damit', 'danach', 'daneben', 'dank', 'dann', 'daran', 'darauf', 'daraus', 'darf', 'darfst', 'darin', 'darüber', 'darum', 'darunter', 'das', 'das', 'dasein', 'daselbst', 'dass', 'daß', 'dasselbe', 'davon', 'davor', 'dazu', 'dazwischen', 'dein', 'deine', 'deinem', 'deiner', 'dem', 'dementsprechend', 'demgegenüber', 'demgemäss', 'demgemäß', 'demselben', 'demzufolge', 'den', 'denen', 'denn', 'denn', 'denselben', 'der', 'deren', 'derjenige', 'derjenigen', 'dermassen', 'dermaßen', 'derselbe', 'derselben', 'des', 'deshalb', 'desselben', 'dessen', 'deswegen', 'd.h', 'dich', 'die', 'diejenige', 'diejenigen', 'dies', 'diese', 'dieselbe', 'dieselben', 'diesem', 'diesen', 'dieser', 'dieses', 'dir', 'doch', 'dort', 'drei', 'drin', 'dritte', 'dritten', 'dritter', 'drittes', 'du', 'durch', 'durchaus',
|
246 |
+
'eben', 'ebenso', 'eigen', 'eigene', 'eigenen', 'eigener', 'eigenes', 'ein', 'einander', 'eine', 'einem', 'einen', 'einer', 'eines', 'einige', 'einigen', 'einiger', 'einiges', 'einmal', 'einmal', 'eins', 'elf', 'en', 'ende', 'endlich', 'entweder', 'entweder', 'er', 'ernst', 'erst', 'erste', 'ersten', 'erster', 'erstes', 'es', 'etwa', 'etwas', 'euch',
|
247 |
+
'früher', 'fünf', 'fünfte', 'fünften', 'fünfter', 'fünftes', 'für',
|
248 |
+
'gab', 'ganz', 'ganze', 'ganzen', 'ganzer', 'ganzes', 'gar', 'gedurft', 'gegen', 'gegenüber', 'gehabt', 'gehen', 'geht', 'gekannt', 'gekonnt', 'gemacht', 'gemocht', 'gemusst', 'genug', 'gerade', 'gern', 'gesagt', 'gesagt', 'geschweige', 'gewesen', 'gewollt', 'geworden', 'gibt', 'ging', 'gleich', 'gott', 'gross', 'groß', 'grosse', 'große', 'grossen', 'großen', 'grosser', 'großer', 'grosses', 'großes', 'gut', 'gute', 'guter', 'gutes',
|
249 |
+
'habe', 'haben', 'habt', 'hast', 'hat', 'hatte', 'hätte', 'hatten', 'hätten', 'heisst', 'her', 'heute', 'hier', 'hin', 'hinter', 'hoch',
|
250 |
+
'ich', 'ihm', 'ihn', 'ihnen', 'ihr', 'ihre', 'ihrem', 'ihren', 'ihrer', 'ihres', 'im', 'im', 'immer', 'in', 'in', 'indem', 'infolgedessen', 'ins', 'irgend', 'ist',
|
251 |
+
'ja', 'ja', 'jahr', 'jahre', 'jahren', 'je', 'jede', 'jedem', 'jeden', 'jeder', 'jedermann', 'jedermanns', 'jedoch', 'jemand', 'jemandem', 'jemanden', 'jene', 'jenem', 'jenen', 'jener', 'jenes', 'jetzt',
|
252 |
+
'kam', 'kann', 'kannst', 'kaum', 'kein', 'keine', 'keinem', 'keinen', 'keiner', 'kleine', 'kleinen', 'kleiner', 'kleines', 'kommen', 'kommt', 'können', 'könnt', 'konnte', 'könnte', 'konnten', 'kurz',
|
253 |
+
'lang', 'lange', 'lange', 'leicht', 'leide', 'lieber', 'los',
|
254 |
+
'machen', 'macht', 'machte', 'mag', 'magst', 'mahn', 'man', 'manche', 'manchem', 'manchen', 'mancher', 'manches', 'mann', 'mehr', 'mein', 'meine', 'meinem', 'meinen', 'meiner', 'meines', 'mich', 'mir', 'mit', 'mittel', 'mochte', 'möchte', 'mochten', 'mögen', 'möglich', 'mögt', 'morgen', 'muss', 'muß', 'müssen', 'musst', 'müsst', 'musste', 'mussten',
|
255 |
+
'na', 'nach', 'nachdem', 'nahm', 'natürlich', 'neben', 'nein', 'neue', 'neuen', 'neun', 'neunte', 'neunten', 'neunter', 'neuntes', 'nicht', 'nicht', 'nichts', 'nie', 'niemand', 'niemandem', 'niemanden', 'noch', 'nun', 'nun', 'nur',
|
256 |
+
'ob', 'oben', 'oder', 'oder', 'offen', 'oft', 'oft', 'ohne',
|
257 |
+
'recht', 'rechte', 'rechten', 'rechter', 'rechtes', 'richtig', 'rund',
|
258 |
+
'sa', 'sache', 'sagt', 'sagte', 'sah', 'satt', 'schon', 'sechs', 'sechste', 'sechsten', 'sechster', 'sechstes', 'sehr', 'sei', 'sei', 'seid', 'seien', 'sein', 'seine', 'seinem', 'seinen', 'seiner', 'seines', 'seit', 'seitdem', 'selbst', 'selbst', 'sich', 'sie', 'sieben', 'siebente', 'siebenten', 'siebenter', 'siebentes', 'sind', 'so', 'solang', 'solche', 'solchem', 'solchen', 'solcher', 'solches', 'soll', 'sollen', 'sollte', 'sollten', 'sondern', 'sonst', 'sowie', 'später', 'statt',
|
259 |
+
'tat', 'teil', 'tel', 'tritt', 'trotzdem', 'tun',
|
260 |
+
'über', 'überhaupt', 'übrigens', 'uhr', 'um', 'und', 'und?', 'uns', 'unser', 'unsere', 'unserer', 'unter',
|
261 |
+
'vergangenen', 'viel', 'viele', 'vielem', 'vielen', 'vielleicht', 'vier', 'vierte', 'vierten', 'vierter', 'viertes', 'vom', 'von', 'vor',
|
262 |
+
'wahr?', 'während', 'währenddem', 'währenddessen', 'wann', 'war', 'wäre', 'waren', 'wart', 'warum', 'was', 'wegen', 'weil', 'weit', 'weiter', 'weitere', 'weiteren', 'weiteres', 'welche', 'welchem', 'welchen', 'welcher', 'welches', 'wem', 'wen', 'wenig', 'wenig', 'wenige', 'weniger', 'weniges', 'wenigstens', 'wenn', 'wenn', 'wer', 'werde', 'werden', 'werdet', 'wessen', 'wie', 'wie', 'wieder', 'will', 'willst', 'wir', 'wird', 'wirklich', 'wirst', 'wo', 'wohl', 'wollen', 'wollt', 'wollte', 'wollten', 'worden', 'wurde', 'würde', 'wurden', 'würden',
|
263 |
+
'z.b', 'zehn', 'zehnte', 'zehnten', 'zehnter', 'zehntes', 'zeit', 'zu', 'zuerst', 'zugleich', 'zum', 'zum', 'zunächst', 'zur', 'zurück', 'zusammen', 'zwanzig', 'zwar', 'zwar', 'zwei', 'zweite', 'zweiten', 'zweiter', 'zweites', 'zwischen', 'zwölf'
|
264 |
+
);
|
265 |
+
|
266 |
+
return in_array($wort, $stoppworte);
|
267 |
+
}
|
268 |
+
|
269 |
+
|
270 |
+
/*
|
271 |
+
first try to set up a list of exceptions
|
272 |
+
*/
|
273 |
+
function _de_stemmer_ausnahme(&$wort)
|
274 |
+
{ static $de_stemmer_ausnahmen = array (
|
275 |
+
'schön' => 'schön', // !schon
|
276 |
+
'blüt' => 'blüt', // Blüte (NICHT Blut)
|
277 |
+
'kannt' => 'kenn',
|
278 |
+
'küch' => 'küch', // Küchen (NICHT Kuchen)
|
279 |
+
'mög' => 'mög',
|
280 |
+
'mocht' => 'mög',
|
281 |
+
'mag' => 'mög',
|
282 |
+
'ging' => 'geh',
|
283 |
+
'lief' => 'lauf',
|
284 |
+
'änd' => 'änd' // ändern (NICHT andern)
|
285 |
+
);
|
286 |
+
|
287 |
+
//return FALSE;
|
288 |
+
if ( array_key_exists($wort, $de_stemmer_ausnahmen) )
|
289 |
+
{ $wort = $de_stemmer_ausnahmen[$wort];
|
290 |
+
return TRUE;
|
291 |
+
}
|
292 |
+
else
|
293 |
+
return FALSE;
|
294 |
+
}
|
295 |
+
|
296 |
+
/*
|
297 |
+
Stem caching added by Rob Marsh, SJ
|
298 |
+
http://rmarsh.com
|
299 |
+
*/
|
300 |
+
|
301 |
+
$StemCache = array();
|
302 |
+
|
303 |
+
function stem($word) {
|
304 |
+
global $StemCache;
|
305 |
+
if (!isset($StemCache[$word])) {
|
306 |
+
$stemmedword = _de_stemmer_wortstamm($word);
|
307 |
+
$StemCache[$word] = $stemmedword;
|
308 |
+
}
|
309 |
+
else {
|
310 |
+
$stemmedword = $StemCache[$word] ;
|
311 |
+
}
|
312 |
+
return $stemmedword;
|
313 |
+
}
|
314 |
+
|
315 |
+
?>
|
languages/de/stopwords.php
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
1 |
+
<?php
|
2 |
+
// the list of common words we want to ignore. NB anything shorter than 4 characters is knocked by the plugin and doesn't need to figure here
|
3 |
+
$overusedwords = array("aber", "alle", "allem", "allen", "aller", "alles", "also", "ander", "andere", "anderem", "anderen", "anderer", "anderes", "anderm", "andern", "anderr", "anders", "auch", "bist", "damit", "dann", "derselbe", "derselben", "denselben", "desselben", "demselben", "dieselbe", "dieselben", "dasselbe", "dazu", "dein", "deine", "deinem", "deinen", "deiner", "deines", "denn", "derer", "dessen", "dich", "dies", "diese", "diesem", "diesen", "dieser", "dieses", "doch", "dort", "durch", "eine", "einem", "einen", "einer", "eines", "einig", "einige", "einigem", "einigen", "einiger", "einiges", "einmal", "etwas", "euer", "eure", "eurem", "euren", "eurer", "eures", "gegen", "gewesen", "habe", "haben", "hatte", "hatten", "hier", "hinter", "mich", "ihre", "ihrem", "ihren", "ihrer", "ihres", "euch", "indem", "jede", "jedem", "jeden", "jeder", "jedes", "jene", "jenem", "jenen", "jener", "jenes", "jetzt", "kann", "kein", "keine", "keinem", "keinen", "keiner", "keines", "k�nnen", "k�nnte", "machen", "manche", "manchem", "manchen", "mancher", "manches", "mein", "meine", "meinem", "meinen", "meiner", "meines", "muss", "musste", "nach", "nicht", "nichts", "noch", "oder", "ohne", "sehr", "sein", "seine", "seinem", "seinen", "seiner", "seines", "selbst", "sich", "ihnen", "sind", "solche", "solchem", "solchen", "solcher", "solches", "soll", "sollte", "sondern", "sonst", "�ber", "unse", "unsem", "unsen", "unser", "unses", "unter", "viel", "w�hrend", "waren", "warst", "weil", "weiter", "welche", "welchem", "welchen", "welcher", "welches", "wenn", "werde", "werden", "wieder", "will", "wird", "wirst", "wollen", "wollte", "w�rde", "w�rden", "zwar", "zwischen");
|
4 |
+
?>
|
languages/en/stemmer.php
ADDED
@@ -0,0 +1,335 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?php
|
2 |
+
/*
|
3 |
+
Creado por Cesar Rodas para el proyecto Saddor.com
|
4 |
+
Este Stemmer esta basado en el argoritmo de Snowball Stemmer.
|
5 |
+
saddor@gmail.com
|
6 |
+
Este programa esta bajo licencia GNU
|
7 |
+
*/
|
8 |
+
if (!defined("ENGLISHSTEMMER"))
|
9 |
+
{
|
10 |
+
define("ENGLISHSTEMMER",1,false);
|
11 |
+
class EnglishStemmer
|
12 |
+
{
|
13 |
+
var $regex_consonant = '(?:[bcdfghjklmnpqrstvwxz]|(?<=[aeiou])y|^y)';
|
14 |
+
var $regex_vowel = '(?:[aeiou]|(?<![aeiou])y)';
|
15 |
+
|
16 |
+
function Stem($word)
|
17 |
+
{
|
18 |
+
if (strlen($word) <= 2) {
|
19 |
+
return $word;
|
20 |
+
}
|
21 |
+
|
22 |
+
$word = $this->step1ab($word);
|
23 |
+
$word = $this->step1c($word);
|
24 |
+
$word = $this->step2($word);
|
25 |
+
$word = $this->step3($word);
|
26 |
+
$word = $this->step4($word);
|
27 |
+
$word = $this->step5($word);
|
28 |
+
/*
|
29 |
+
Esta parte esta editado por cesar rodas,
|
30 |
+
no quiero que me muestre ' (apostrofe) al final
|
31 |
+
*/
|
32 |
+
if (substr($word,-1,1) == "'")
|
33 |
+
$word = substr($word,0,strlen($word) -1 );
|
34 |
+
return $word;
|
35 |
+
}
|
36 |
+
|
37 |
+
|
38 |
+
function step1ab($word)
|
39 |
+
{
|
40 |
+
if (substr($word, -1) == 's') {
|
41 |
+
|
42 |
+
$this->replace($word, 'sses', 'ss')
|
43 |
+
OR $this->replace($word, 'ies', 'i')
|
44 |
+
OR $this->replace($word, 'ss', 'ss')
|
45 |
+
OR $this->replace($word, 's', '');
|
46 |
+
}
|
47 |
+
|
48 |
+
if (substr($word, -2, 1) != 'e' OR !$this->replace($word, 'eed', 'ee', 0)) { // First rule
|
49 |
+
$v = $this->regex_vowel;
|
50 |
+
|
51 |
+
if ( preg_match("#$v+#", substr($word, 0, -3)) && $this->replace($word, 'ing', '')
|
52 |
+
OR preg_match("#$v+#", substr($word, 0, -2)) && $this->replace($word, 'ed', '')) {
|
53 |
+
if ( !$this->replace($word, 'at', 'ate')
|
54 |
+
AND !$this->replace($word, 'bl', 'ble')
|
55 |
+
AND !$this->replace($word, 'iz', 'ize')) {
|
56 |
+
|
57 |
+
if ( $this->doubleConsonant($word)
|
58 |
+
AND substr($word, -2) != 'll'
|
59 |
+
AND substr($word, -2) != 'ss'
|
60 |
+
AND substr($word, -2) != 'zz') {
|
61 |
+
|
62 |
+
$word = substr($word, 0, -1);
|
63 |
+
|
64 |
+
} else if ($this->m($word) == 1 AND $this->cvc($word)) {
|
65 |
+
$word .= 'e';
|
66 |
+
}
|
67 |
+
}
|
68 |
+
}
|
69 |
+
}
|
70 |
+
|
71 |
+
return $word;
|
72 |
+
}
|
73 |
+
|
74 |
+
function step1c($word)
|
75 |
+
{
|
76 |
+
$v = $this->regex_vowel;
|
77 |
+
|
78 |
+
if (substr($word, -1) == 'y' && preg_match("#$v+#", substr($word, 0, -1))) {
|
79 |
+
$this->replace($word, 'y', 'i');
|
80 |
+
}
|
81 |
+
|
82 |
+
return $word;
|
83 |
+
}
|
84 |
+
|
85 |
+
|
86 |
+
function step2($word)
|
87 |
+
{
|
88 |
+
switch (substr($word, -2, 1)) {
|
89 |
+
case 'a':
|
90 |
+
$this->replace($word, 'ational', 'ate', 0)
|
91 |
+
OR $this->replace($word, 'tional', 'tion', 0);
|
92 |
+
break;
|
93 |
+
|
94 |
+
case 'c':
|
95 |
+
$this->replace($word, 'enci', 'ence', 0)
|
96 |
+
OR $this->replace($word, 'anci', 'ance', 0);
|
97 |
+
break;
|
98 |
+
|
99 |
+
case 'e':
|
100 |
+
$this->replace($word, 'izer', 'ize', 0);
|
101 |
+
break;
|
102 |
+
|
103 |
+
case 'g':
|
104 |
+
$this->replace($word, 'logi', 'log', 0);
|
105 |
+
break;
|
106 |
+
|
107 |
+
case 'l':
|
108 |
+
$this->replace($word, 'entli', 'ent', 0)
|
109 |
+
OR $this->replace($word, 'ousli', 'ous', 0)
|
110 |
+
OR $this->replace($word, 'alli', 'al', 0)
|
111 |
+
OR $this->replace($word, 'bli', 'ble', 0)
|
112 |
+
OR $this->replace($word, 'eli', 'e', 0);
|
113 |
+
break;
|
114 |
+
|
115 |
+
case 'o':
|
116 |
+
$this->replace($word, 'ization', 'ize', 0)
|
117 |
+
OR $this->replace($word, 'ation', 'ate', 0)
|
118 |
+
OR $this->replace($word, 'ator', 'ate', 0);
|
119 |
+
break;
|
120 |
+
|
121 |
+
case 's':
|
122 |
+
$this->replace($word, 'iveness', 'ive', 0)
|
123 |
+
OR $this->replace($word, 'fulness', 'ful', 0)
|
124 |
+
OR $this->replace($word, 'ousness', 'ous', 0)
|
125 |
+
OR $this->replace($word, 'alism', 'al', 0);
|
126 |
+
break;
|
127 |
+
|
128 |
+
case 't':
|
129 |
+
$this->replace($word, 'biliti', 'ble', 0)
|
130 |
+
OR $this->replace($word, 'aliti', 'al', 0)
|
131 |
+
OR $this->replace($word, 'iviti', 'ive', 0);
|
132 |
+
break;
|
133 |
+
}
|
134 |
+
|
135 |
+
return $word;
|
136 |
+
}
|
137 |
+
|
138 |
+
|
139 |
+
function step3($word)
|
140 |
+
{
|
141 |
+
switch (substr($word, -2, 1)) {
|
142 |
+
case 'a':
|
143 |
+
$this->replace($word, 'ical', 'ic', 0);
|
144 |
+
break;
|
145 |
+
|
146 |
+
case 's':
|
147 |
+
$this->replace($word, 'ness', '', 0);
|
148 |
+
break;
|
149 |
+
|
150 |
+
case 't':
|
151 |
+
$this->replace($word, 'icate', 'ic', 0)
|
152 |
+
OR $this->replace($word, 'iciti', 'ic', 0);
|
153 |
+
break;
|
154 |
+
|
155 |
+
case 'u':
|
156 |
+
$this->replace($word, 'ful', '', 0);
|
157 |
+
break;
|
158 |
+
|
159 |
+
case 'v':
|
160 |
+
$this->replace($word, 'ative', '', 0);
|
161 |
+
break;
|
162 |
+
|
163 |
+
case 'z':
|
164 |
+
$this->replace($word, 'alize', 'al', 0);
|
165 |
+
break;
|
166 |
+
}
|
167 |
+
|
168 |
+
return $word;
|
169 |
+
}
|
170 |
+
|
171 |
+
|
172 |
+
function step4($word)
|
173 |
+
{
|
174 |
+
switch (substr($word, -2, 1)) {
|
175 |
+
case 'a':
|
176 |
+
$this->replace($word, 'al', '', 1);
|
177 |
+
break;
|
178 |
+
|
179 |
+
case 'c':
|
180 |
+
$this->replace($word, 'ance', '', 1)
|
181 |
+
OR $this->replace($word, 'ence', '', 1);
|
182 |
+
break;
|
183 |
+
|
184 |
+
case 'e':
|
185 |
+
$this->replace($word, 'er', '', 1);
|
186 |
+
break;
|
187 |
+
|
188 |
+
case 'i':
|
189 |
+
$this->replace($word, 'ic', '', 1);
|
190 |
+
break;
|
191 |
+
|
192 |
+
case 'l':
|
193 |
+
$this->replace($word, 'able', '', 1)
|
194 |
+
OR $this->replace($word, 'ible', '', 1);
|
195 |
+
break;
|
196 |
+
|
197 |
+
case 'n':
|
198 |
+
$this->replace($word, 'ant', '', 1)
|
199 |
+
OR $this->replace($word, 'ement', '', 1)
|
200 |
+
OR $this->replace($word, 'ment', '', 1)
|
201 |
+
OR $this->replace($word, 'ent', '', 1);
|
202 |
+
break;
|
203 |
+
|
204 |
+
case 'o':
|
205 |
+
if (substr($word, -4) == 'tion' OR substr($word, -4) == 'sion') {
|
206 |
+
$this->replace($word, 'ion', '', 1);
|
207 |
+
} else {
|
208 |
+
$this->replace($word, 'ou', '', 1);
|
209 |
+
}
|
210 |
+
break;
|
211 |
+
|
212 |
+
case 's':
|
213 |
+
$this->replace($word, 'ism', '', 1);
|
214 |
+
break;
|
215 |
+
|
216 |
+
case 't':
|
217 |
+
$this->replace($word, 'ate', '', 1)
|
218 |
+
OR $this->replace($word, 'iti', '', 1);
|
219 |
+
break;
|
220 |
+
|
221 |
+
case 'u':
|
222 |
+
$this->replace($word, 'ous', '', 1);
|
223 |
+
break;
|
224 |
+
|
225 |
+
case 'v':
|
226 |
+
$this->replace($word, 'ive', '', 1);
|
227 |
+
break;
|
228 |
+
|
229 |
+
case 'z':
|
230 |
+
$this->replace($word, 'ize', '', 1);
|
231 |
+
break;
|
232 |
+
}
|
233 |
+
|
234 |
+
return $word;
|
235 |
+
}
|
236 |
+
|
237 |
+
function step5($word)
|
238 |
+
{
|
239 |
+
if (substr($word, -1) == 'e') {
|
240 |
+
if ($this->m(substr($word, 0, -1)) > 1) {
|
241 |
+
$this->replace($word, 'e', '');
|
242 |
+
|
243 |
+
} else if ($this->m(substr($word, 0, -1)) == 1) {
|
244 |
+
|
245 |
+
if (!$this->cvc(substr($word, 0, -1))) {
|
246 |
+
$this->replace($word, 'e', '');
|
247 |
+
}
|
248 |
+
}
|
249 |
+
}
|
250 |
+
|
251 |
+
// Part b
|
252 |
+
if ($this->m($word) > 1 AND $this->doubleConsonant($word) AND substr($word, -1) == 'l') {
|
253 |
+
$word = substr($word, 0, -1);
|
254 |
+
}
|
255 |
+
|
256 |
+
return $word;
|
257 |
+
}
|
258 |
+
|
259 |
+
function replace(&$str, $check, $repl, $m = null)
|
260 |
+
{
|
261 |
+
$len = 0 - strlen($check);
|
262 |
+
|
263 |
+
if (substr($str, $len) == $check) {
|
264 |
+
$substr = substr($str, 0, $len);
|
265 |
+
if (is_null($m) OR $this->m($substr) > $m) {
|
266 |
+
$str = $substr . $repl;
|
267 |
+
}
|
268 |
+
|
269 |
+
return true;
|
270 |
+
}
|
271 |
+
|
272 |
+
return false;
|
273 |
+
}
|
274 |
+
|
275 |
+
|
276 |
+
|
277 |
+
function m($str)
|
278 |
+
{
|
279 |
+
$c = $this->regex_consonant;
|
280 |
+
$v = $this->regex_vowel;
|
281 |
+
|
282 |
+
$str = preg_replace("#^$c+#", '', $str);
|
283 |
+
$str = preg_replace("#$v+$#", '', $str);
|
284 |
+
|
285 |
+
preg_match_all("#($v+$c+)#", $str, $matches);
|
286 |
+
|
287 |
+
return count($matches[1]);
|
288 |
+
}
|
289 |
+
|
290 |
+
|
291 |
+
|
292 |
+
function doubleConsonant($str)
|
293 |
+
{
|
294 |
+
$c = $this->regex_consonant;
|
295 |
+
|
296 |
+
return preg_match("#$c{2}$#", $str, $matches) AND $matches[0]{0} == $matches[0]{1};
|
297 |
+
}
|
298 |
+
|
299 |
+
|
300 |
+
|
301 |
+
function cvc($str)
|
302 |
+
{
|
303 |
+
$c = $this->regex_consonant;
|
304 |
+
$v = $this->regex_vowel;
|
305 |
+
|
306 |
+
return preg_match("#($c$v$c)$#", $str, $matches)
|
307 |
+
AND strlen($matches[1]) == 3
|
308 |
+
AND $matches[1]{2} != 'w'
|
309 |
+
AND $matches[1]{2} != 'x'
|
310 |
+
AND $matches[1]{2} != 'y';
|
311 |
+
}
|
312 |
+
}
|
313 |
+
}
|
314 |
+
|
315 |
+
/*
|
316 |
+
Stem caching added by Rob Marsh, SJ
|
317 |
+
http://rmarsh.com
|
318 |
+
*/
|
319 |
+
|
320 |
+
$Stemmer = new EnglishStemmer();
|
321 |
+
$StemCache = array();
|
322 |
+
|
323 |
+
function stem($word) {
|
324 |
+
global $Stemmer, $StemCache;
|
325 |
+
if (!isset($StemCache[$word])) {
|
326 |
+
$stemmedword = $Stemmer->Stem($word);
|
327 |
+
$StemCache[$word] = $stemmedword;
|
328 |
+
}
|
329 |
+
else {
|
330 |
+
$stemmedword = $StemCache[$word] ;
|
331 |
+
}
|
332 |
+
return $stemmedword;
|
333 |
+
}
|
334 |
+
|
335 |
+
?>
|
languages/en/stopwords.php
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
1 |
+
<?php
|
2 |
+
// the list of common words we want to ignore. NB anything shorter than 4 characters is knocked by the plugin and doesn't need to figure here
|
3 |
+
$overusedwords = array("able", "about", "above", "according", "accordingly", "across", "actually", "after", "afterwards", "again", "against", "ain't", "allow", "allows", "almost", "alone", "along", "already", "also", "although", "always", "among", "amongst", "another", "anybody", "anyhow", "anyone", "anything", "anyway", "anyways", "anywhere", "apart", "appear", "appreciate", "appropriate", "aren't", "around", "aside", "asking", "associated", "available", "away", "awfully", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "believe", "below", "beside", "besides", "best", "better", "between", "beyond", "both", "brief", "came", "cannot", "can't", "cause", "causes", "certain", "certainly", "changes", "clearly", "come", "comes", "concerning", "conse'uently", "consider", "considering", "contain", "containing", "contains", "corresponding", "could", "couldn't", "course", "currently", "definitely", "described", "despite", "didn't", "different", "does", "doesn't", "doing", "done", "don't", "down", "downwards", "during", "each", "eight", "either", "else", "elsewhere", "enough", "entirely", "especially", "even", "ever", "every", "everybody", "everyone", "everything", "everywhere", "exactly", "example", "except", "fifth", "first", "five", "followed", "following", "follows", "former", "formerly", "forth", "four", "from", "further", "furthermore", "gets", "getting", "given", "gives", "goes", "going", "gone", "gotten", "greetings", "hadn't", "happens", "hardly", "hasn't", "have", "haven't", "having", "hello", "help", "hence", "here", "hereafter", "hereby", "herein", "hereupon", "he's", "hers", "herself", "himself", "hither", "hopefully", "howbeit", "however", "ignored", "i'll", "it'd", "it's", "i've", "immediate", "inasmuch", "indeed", "indicate", "indicated", "indicates", "inner", "insofar", "instead", "into", "inward", "isn't", "itself", "just", "keep", "keeps", "kept", "know", "known", "knows", "last", "lately", "later", "latter", "latterly", "least", "less", "lest", "like", "liked", "likely", "little", "look", "looking", "looks", "mainly", "many", "maybe", "mean", "meanwhile", "merely", "might", "more", "moreover", "most", "mostly", "much", "must", "mustn't", "myself", "name", "namely", "near", "nearly", "necessary", "need", "needs", "neither", "never", "nevertheless", "next", "nine", "nobody", "none", "noone", "normally", "nothing", "novel", "nowhere", "obviously", "often", "okay", "once", "ones", "one's", "only", "onto", "other", "others", "otherwise", "ought", "ours", "ourselves", "outside", "over", "overall", "particular", "particularly", "perhaps", "placed", "please", "plus", "possible", "presumably", "probably", "provides", "'uite", "rather", "really", "reasonably", "regarding", "regardless", "regards", "relatively", "respectively", "right", "said", "same", "saying", "says", "second", "secondly", "seeing", "seem", "seemed", "seeming", "seems", "seen", "self", "selves", "sensible", "sent", "serious", "seriously", "seven", "several", "shall", "should", "shouldn't", "since", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry", "specified", "specify", "specifying", "still", "such", "sure", "take", "taken", "tell", "tends", "than", "thank", "thanks", "that", "that's", "their", "theirs", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "there's", "thereupon", "these", "they", "think", "third", "this", "thorough", "thoroughly", "those", "though", "three", "through", "throughout", "thru", "thus", "together", "took", "toward", "towards", "tried", "tries", "truly", "trying", "twice", "under", "unfortunately", "unless", "unlikely", "until", "unto", "upon", "used", "useful", "uses", "using", "usually", "value", "various", "very", "want", "wants", "wasn't", "welcome", "we'd", "well", "went", "were", "weren't", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "whoever", "whole", "whom", "whose", "will", "willing", "wish", "with", "within", "without", "wonder", "would", "wouldn't", "your", "yours", "yourself", "yourselves", "zero");
|
4 |
+
?>
|
languages/es/stemmer.php
ADDED
@@ -0,0 +1,381 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?php
|
2 |
+
/*
|
3 |
+
Creado por Cesar Rodas para el proyecto Saddor.com
|
4 |
+
Este Stemmer esta basado en el argoritmo de Snowball Stemmer.
|
5 |
+
saddor@gmail.com
|
6 |
+
Este programa esta bajo licencia GNU
|
7 |
+
*/
|
8 |
+
if (!defined("SPANISHSTEMMER"))
|
9 |
+
{
|
10 |
+
define("vocal",1,false);
|
11 |
+
define("consonante",2,false);
|
12 |
+
define("SPANISHSTEMMER",1,false);
|
13 |
+
|
14 |
+
class PorterStemmer
|
15 |
+
{
|
16 |
+
var $R1;
|
17 |
+
var $R2;
|
18 |
+
var $RV;
|
19 |
+
var $word;
|
20 |
+
function Stem($word)
|
21 |
+
{
|
22 |
+
|
23 |
+
$this->word = $word;
|
24 |
+
if (strlen($word) < 2)
|
25 |
+
return;
|
26 |
+
|
27 |
+
|
28 |
+
$this->step_0();
|
29 |
+
while($this->step_1());
|
30 |
+
$this->step_2();
|
31 |
+
$this->step_3();
|
32 |
+
return $this->word;
|
33 |
+
}
|
34 |
+
|
35 |
+
function step_0()
|
36 |
+
{
|
37 |
+
$this->splitword();
|
38 |
+
$search = array(
|
39 |
+
"me","se","sela","selo","selas","selos","la","le","lo","les",
|
40 |
+
"los","nos"
|
41 |
+
);
|
42 |
+
|
43 |
+
$prefix = array(
|
44 |
+
"i�ndo","�ndo","�r","�r","�r", /* primer caso */
|
45 |
+
"iendo","ando","ar","er","ir", /* segundo caso*/
|
46 |
+
"yendo"
|
47 |
+
);
|
48 |
+
|
49 |
+
foreach ($prefix as $id => $pref)
|
50 |
+
{
|
51 |
+
$return = false;
|
52 |
+
if ( (strstr($this->RV,$pref) != NULL) or
|
53 |
+
/* caso para yendo */
|
54 |
+
($pref == "yendo" && strstr($this->word,"uyendo")) )
|
55 |
+
{
|
56 |
+
|
57 |
+
/*
|
58 |
+
El prefijo fue encontrado, ahora buscar para borrar
|
59 |
+
el pronombre.
|
60 |
+
*/
|
61 |
+
foreach ($search as $word)
|
62 |
+
{
|
63 |
+
$len = strlen($word);
|
64 |
+
|
65 |
+
switch ($id)
|
66 |
+
{
|
67 |
+
|
68 |
+
case $id < 5: /* primer Caso*/
|
69 |
+
if ($word == substr($this->RV,-1 * $len,$len) )
|
70 |
+
{
|
71 |
+
$this->word = substr($this->word,0, strlen($this->word) - $len);
|
72 |
+
$this->word = str_replace($prefix[$id],$prefix[$id+5],$this->word);
|
73 |
+
$return = true;
|
74 |
+
}
|
75 |
+
break;
|
76 |
+
case $id < 10: /* segundo caso*/
|
77 |
+
if ($word == substr($this->RV,-1 * $len,$len) )
|
78 |
+
{
|
79 |
+
$this->word = substr($this->word,0, strlen($this->word) - $len);
|
80 |
+
$return = true;
|
81 |
+
}
|
82 |
+
break;
|
83 |
+
case $id >= 10: /* tercer caso*/
|
84 |
+
if ($word == substr($this->RV,-1 * $len,$len) )
|
85 |
+
{
|
86 |
+
|
87 |
+
$this->word = substr($this->word,0, strlen($this->word) - $len);
|
88 |
+
$return = true;
|
89 |
+
}
|
90 |
+
break;
|
91 |
+
}
|
92 |
+
}
|
93 |
+
}
|
94 |
+
|
95 |
+
}
|
96 |
+
unset($prefix,$search,$word,$id,$pref,$len);
|
97 |
+
return $return;
|
98 |
+
}
|
99 |
+
|
100 |
+
function step_1()
|
101 |
+
{
|
102 |
+
$return = false;
|
103 |
+
$this->splitword();
|
104 |
+
|
105 |
+
/* borrado de R2 */
|
106 |
+
$search = array(
|
107 |
+
"abilidades","iblemente","icaciones","ablemente","antemente","ivamente","atamente",
|
108 |
+
"amientos","icadoras","icadores","icancias","imientos","icamente",
|
109 |
+
"osamente","abilidad","icidades","ividades","adamente","icantes",
|
110 |
+
"icancia","imiemto","icadora","icaci�n","amiento","imiento","aciones",
|
111 |
+
"ativos","ativas","ividad","idades","icidad","icante",
|
112 |
+
"icador","adoras","adores","ancias","mente","ables",
|
113 |
+
"ismos","anzas","ativa","ativo","istas","ibles",
|
114 |
+
"aci�n","antes","adora","ancia","ismo","anza",
|
115 |
+
"icos","ivas","osos","ivos","ante","osas",
|
116 |
+
"ador","ible","ista","idad","able","ico",
|
117 |
+
"osa","oso","iva","ica","ica","ivo",
|
118 |
+
);
|
119 |
+
|
120 |
+
for ($i = 0; $i < count($search); $i++)
|
121 |
+
if (substr($this->R2,strlen($search[$i]) * (-1),strlen($search[$i])) == $search[$i])
|
122 |
+
{
|
123 |
+
$this->word = substr($this->word,0,strlen($this->word) - strlen($search[$i]) );
|
124 |
+
$return = true;
|
125 |
+
break;
|
126 |
+
}
|
127 |
+
/* creo que esta mal, creo que hay que buscar en R1*/
|
128 |
+
if ($this->R1 == "amente")
|
129 |
+
{
|
130 |
+
$this->word = str_replace("amente","",$this->word);
|
131 |
+
}
|
132 |
+
|
133 |
+
$search = array
|
134 |
+
(
|
135 |
+
"log�a","log�as",/**/"uci�n","uciones",/**/"encia","encias"
|
136 |
+
);
|
137 |
+
$replace = array
|
138 |
+
(
|
139 |
+
"log","log","u","u","entre","entre"
|
140 |
+
);
|
141 |
+
for ($i = 0; $i < count($search); $i++)
|
142 |
+
if (substr($this->R2,strlen($search[$i]) * (-1),strlen($search[$i])) == $search[$i])
|
143 |
+
{
|
144 |
+
$this->word = str_replace($search[$i],$replace[$i],$this->word);
|
145 |
+
$return = true;
|
146 |
+
break;
|
147 |
+
}
|
148 |
+
unset($i,$search,$replace);
|
149 |
+
return $return;
|
150 |
+
}
|
151 |
+
|
152 |
+
function step_2()
|
153 |
+
{
|
154 |
+
$this->splitword();
|
155 |
+
$return = false;
|
156 |
+
$search = array(
|
157 |
+
"ya","ye","yan","yen","yeron","yendo","yo","y�","yas","yes","yais","yamos"
|
158 |
+
);
|
159 |
+
foreach ($search as $word)
|
160 |
+
{
|
161 |
+
if (substr($this->RV,strlen($word) * (-1),strlen($word)) == $word)
|
162 |
+
if (substr($this->word,-1*(strlen($word) + 1), strlen($word) + 1) == "u".$word)
|
163 |
+
{
|
164 |
+
$this->word = substr($this->word,0, strlen($this->word) -(strlen($word) + 1));
|
165 |
+
$return = true;
|
166 |
+
}
|
167 |
+
}
|
168 |
+
|
169 |
+
if ($return == false)
|
170 |
+
$this->step_2b();
|
171 |
+
unset($return,$search,$word);
|
172 |
+
}
|
173 |
+
|
174 |
+
function step_2b()
|
175 |
+
{
|
176 |
+
$this->splitword();
|
177 |
+
$search = array(
|
178 |
+
"en","es","�is","emos"
|
179 |
+
);
|
180 |
+
|
181 |
+
foreach ($search as $word)
|
182 |
+
{
|
183 |
+
if (substr($this->RV,strlen($word) * (-1),strlen($word)) == $word)
|
184 |
+
if (substr($this->word,(-1)*(strlen($word) + 2), strlen($word) + 2) == "gu".$word)
|
185 |
+
{
|
186 |
+
$this->word = substr($this->word,0, strlen($this->word) -(strlen($word) + 1) );
|
187 |
+
$return = true;
|
188 |
+
}
|
189 |
+
/*
|
190 |
+
This part was fix by Diego Enrique Finol <dfinol at cantv dot net>
|
191 |
+
This was the email that Diego sent to me:
|
192 |
+
Epa saludos, gracias por la clase de spanish stemmer, hab�a visto lo mismo
|
193 |
+
en snowball pero me ahorraste el trabajo de convertirlo a php. S�lo not�
|
194 |
+
que en las partes en la que hab�a que borrar cierto sufijo y, adem�s,
|
195 |
+
borrar la "u" de si est� precedido por "gu" creo que no borra el sufijo si
|
196 |
+
no est� precedido por esto. O sea, hay que borrar el afijo en ambos casos,
|
197 |
+
y de paso si est� precedido por gu, tambi�n borrar la u, pero el algoritmo
|
198 |
+
s�lo lo hace si est� precedido por gu, sino, no borra nada.
|
199 |
+
|
200 |
+
Thanks Diego!.
|
201 |
+
*/
|
202 |
+
else
|
203 |
+
{
|
204 |
+
$this->word = substr($this->word,0, strlen($this->word) -(strlen($word)) );
|
205 |
+
$return = true;
|
206 |
+
}
|
207 |
+
/*End of Diego fix*/
|
208 |
+
}
|
209 |
+
|
210 |
+
$search = array(
|
211 |
+
"i�ramos","ar�amos","ir�amos","i�semos","er�amos","er�ais","eremos",
|
212 |
+
"isteis","ir�ais","ierais","iremos","�bamos","ieseis",
|
213 |
+
"asteis","�ramos","�semos","aremos","ar�ais","abais",
|
214 |
+
"�amos","arais","ieses","ar�an","iesen","ieron",
|
215 |
+
"iendo","ieras","ir�is","ar�as","er�as","aseis",
|
216 |
+
"er�is","er�an","ir�an","ar�is","ir�as","ieran",
|
217 |
+
"ando","amos","aron","asen","aras","ados",
|
218 |
+
"�ais","ases","imos","adas","idas","abas",
|
219 |
+
"iste","ir�n","er�n","ar�a","er�a","iera",
|
220 |
+
"ir�s","ir�a","aran","ar�s","er�s","aste",
|
221 |
+
"iese","aban","ar�n","�is","ada","ir�",
|
222 |
+
"�an","ir�","er�","aba","ara","ido",
|
223 |
+
"ar�","ar�","ado","er�","ase","�as",
|
224 |
+
"ida","�a","er","ar","i�","an",
|
225 |
+
"ir","as","ad","ed","id","�s",
|
226 |
+
|
227 |
+
|
228 |
+
);
|
229 |
+
|
230 |
+
foreach ($search as $word)
|
231 |
+
if (substr($this->RV,strlen($word) * (-1),strlen($word)) == $word)
|
232 |
+
{
|
233 |
+
$this->word = substr($this->word,0, strlen($this->word) -(strlen($word)));
|
234 |
+
$this->splitword();
|
235 |
+
}
|
236 |
+
unset($search,$word);
|
237 |
+
|
238 |
+
}
|
239 |
+
|
240 |
+
function step_3()
|
241 |
+
{
|
242 |
+
$this->splitword();
|
243 |
+
$return = false;
|
244 |
+
$search = array(
|
245 |
+
"os","a","o","�","�","�"
|
246 |
+
);
|
247 |
+
|
248 |
+
|
249 |
+
foreach ($search as $word)
|
250 |
+
if (substr($this->RV,strlen($word) * (-1),strlen($word)) == $word)
|
251 |
+
{
|
252 |
+
$this->word = substr($this->word,0, strlen($this->word) -(strlen($word)));
|
253 |
+
$return = true;
|
254 |
+
}
|
255 |
+
|
256 |
+
$search = array(
|
257 |
+
"e","�"
|
258 |
+
);
|
259 |
+
|
260 |
+
foreach ($search as $word)
|
261 |
+
{
|
262 |
+
if (substr($this->RV,strlen($word) * (-1),strlen($word)) == $word)
|
263 |
+
if (substr($this->RV,-1*(strlen($word) + 2), strlen($word) + 2) == "gu".$word)
|
264 |
+
{
|
265 |
+
$this->word = substr($this->word,0, strlen($this->word) -(strlen($word) + 1) );
|
266 |
+
$return = true;
|
267 |
+
}
|
268 |
+
else
|
269 |
+
{
|
270 |
+
$this->word = substr($this->word,0, strlen($this->word) -(strlen($word)) );
|
271 |
+
$return = true;
|
272 |
+
}
|
273 |
+
}
|
274 |
+
unset($search,$word);
|
275 |
+
$this->word = str_replace("�","a",$this->word);
|
276 |
+
$this->word = str_replace("�","e",$this->word);
|
277 |
+
$this->word = str_replace("�","i",$this->word);
|
278 |
+
$this->word = str_replace("�","o",$this->word);
|
279 |
+
$this->word = str_replace("�","u",$this->word);
|
280 |
+
$this->word = str_replace("�","u",$this->word);
|
281 |
+
return $return;
|
282 |
+
}
|
283 |
+
|
284 |
+
|
285 |
+
/* funciones utilizadas*/
|
286 |
+
function saddorsort($a, $b)
|
287 |
+
{
|
288 |
+
if (strlen($a) == strlen($b)) {
|
289 |
+
return 0;
|
290 |
+
}
|
291 |
+
return (strlen($a) < strlen($b)) ? 1 : -1;
|
292 |
+
}
|
293 |
+
function splitword()
|
294 |
+
{
|
295 |
+
$flag1=false;
|
296 |
+
$flag2=false;
|
297 |
+
$this->R1="";
|
298 |
+
$this->R2="";
|
299 |
+
$this->RV="";
|
300 |
+
for ($i = 1; $i < strlen($this->word); $i++)
|
301 |
+
{
|
302 |
+
if ($flag1)
|
303 |
+
$this->R1.=$this->word[$i];
|
304 |
+
if ($flag2)
|
305 |
+
$this->R2.=$this->word[$i];
|
306 |
+
|
307 |
+
if ($i+1 >= strlen($this->word))
|
308 |
+
break;
|
309 |
+
|
310 |
+
if ($this->char_is($this->word[$i]) == consonante &&
|
311 |
+
$this->char_is(@$this->word[$i+1]) == vocal &&
|
312 |
+
$flag1 == true && $flag2 == false)
|
313 |
+
$flag2=true;
|
314 |
+
|
315 |
+
if ($this->char_is($this->word[$i]) == consonante &&
|
316 |
+
$this->char_is($this->word[$i+1]) == vocal &&
|
317 |
+
$flag1 == false)
|
318 |
+
$flag1=true;
|
319 |
+
}
|
320 |
+
|
321 |
+
|
322 |
+
/* Buscando RV*/
|
323 |
+
$flag1=false;
|
324 |
+
if ($this->char_is($this->word[1]) == consonante)
|
325 |
+
{
|
326 |
+
for ($i = 2; $i < strlen($this->word); $i++)
|
327 |
+
if ($this->char_is($this->word[$i]) == vocal)
|
328 |
+
break;
|
329 |
+
$i++;
|
330 |
+
$this->RV = substr($this->word,$i);
|
331 |
+
}
|
332 |
+
else if ($this->char_is($this->word[1]) == vocal && $this->char_is($this->word[0]) == vocal)
|
333 |
+
{
|
334 |
+
for ($i = 2; $i < strlen($this->word); $i++)
|
335 |
+
if ($this->char_is($this->word[$i]) == consonante)
|
336 |
+
break;
|
337 |
+
$i++;
|
338 |
+
$this->RV = substr($this->word,$i);
|
339 |
+
}
|
340 |
+
else if (strlen($this->word) > 2)
|
341 |
+
$this->RV = substr($this->word,3);
|
342 |
+
|
343 |
+
unset($flag1,$flag2,$i);
|
344 |
+
}
|
345 |
+
|
346 |
+
function char_is($char)
|
347 |
+
{
|
348 |
+
$char = strtolower($char);
|
349 |
+
if ($char == "")
|
350 |
+
return;
|
351 |
+
$vowel = "aeiou������";
|
352 |
+
$consonant = "bcdfghijklmn�opqrsvtxwyz";
|
353 |
+
if (strstr($vowel,$char))
|
354 |
+
return vocal;
|
355 |
+
if (strstr($consonant,$char))
|
356 |
+
return consonante;
|
357 |
+
}
|
358 |
+
}
|
359 |
+
}
|
360 |
+
|
361 |
+
/*
|
362 |
+
Stem caching added by Rob Marsh, SJ
|
363 |
+
http://rmarsh.com
|
364 |
+
*/
|
365 |
+
|
366 |
+
$Stemmer = new PorterStemmer();
|
367 |
+
$StemCache = array();
|
368 |
+
|
369 |
+
function stem($word) {
|
370 |
+
global $Stemmer, $StemCache;
|
371 |
+
if (!isset($StemCache[$word])) {
|
372 |
+
$stemmedword = $Stemmer->Stem($word);
|
373 |
+
$StemCache[$word] = $stemmedword;
|
374 |
+
}
|
375 |
+
else {
|
376 |
+
$stemmedword = $StemCache[$word] ;
|
377 |
+
}
|
378 |
+
return $stemmedword;
|
379 |
+
}
|
380 |
+
|
381 |
+
?>
|
languages/es/stopwords.php
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
1 |
+
<?php
|
2 |
+
// the list of common words we want to ignore. NB anything shorter than 4 characters is knocked by the plugin and doesn't need to figure here
|
3 |
+
$overusedwords = array("algo", "alguna", "algunas", "alguno", "algunos", "alg�n", "ambos", "ampleamos", "ante", "antes", "aquel", "aquellas", "aquellos", "aqui", "arriba", "atras", "bajo", "bastante", "bien", "cada", "cierta", "ciertas", "ciertos", "como", "conseguimos", "conseguir", "consigo", "consigue", "consiguen", "consigues", "contra", "cual", "cuando", "dentro", "desde", "donde", "durante", "ella", "ellas", "ellos", "empleais", "emplean", "emplear", "empleas", "empleo", "encima", "entonces", "entre", "erais", "eramos", "eran", "eras", "eres", "esas", "esos", "esta", "estaba", "estabais", "estaban", "estabas", "estad", "estada", "estadas", "estado", "estados", "estais", "estamos", "estan", "estando", "estar", "estaremos", "estar�", "estar�n", "estar�s", "estar�", "estar�is", "estar�a", "estar�ais", "estar�amos", "estar�an", "estar�as", "estas", "este", "estemos", "esto", "estos", "estoy", "estuve", "estuviera", "estuvierais", "estuvieran", "estuvieras", "estuvieron", "estuviese", "estuvieseis", "estuviesen", "estuvieses", "estuvimos", "estuviste", "estuvisteis", "estuvi�ramos", "estuvi�semos", "estuvo", "est�", "est�bamos", "est�is", "est�n", "est�s", "est�", "est�is", "est�n", "est�s", "fuera", "fuerais", "fueran", "fueras", "fueron", "fuese", "fueseis", "fuesen", "fueses", "fuimos", "fuiste", "fuisteis", "fu�ramos", "fu�semos", "gueno", "habida", "habidas", "habido", "habidos", "habiendo", "habremos", "habr�", "habr�n", "habr�s", "habr�", "habr�is", "habr�a", "habr�ais", "habr�amos", "habr�an", "habr�as", "hab�is", "hab�a", "hab�ais", "hab�amos", "hab�an", "hab�as", "hace", "haceis", "hacemos", "hacen", "hacer", "haces", "hago", "hasta", "haya", "hayamos", "hayan", "hayas", "hay�is", "hemos", "hube", "hubiera", "hubierais", "hubieran", "hubieras", "hubieron", "hubiese", "hubieseis", "hubiesen", "hubieses", "hubimos", "hubiste", "hubisteis", "hubi�ramos", "hubi�semos", "hubo", "incluso", "intenta", "intentais", "intentamos", "intentan", "intentar", "intentas", "intento", "largo", "mientras", "modo", "mucho", "muchos", "m�as", "m�os", "nada", "nosotras", "nosotros", "nuestra", "nuestras", "nuestro", "nuestros", "otra", "otras", "otro", "otros", "para", "pero", "poco", "podeis", "podemos", "poder", "podria", "podriais", "podriamos", "podrian", "podrias", "porque", "primero desde", "puede", "pueden", "puedo", "quien", "quienes", "sabe", "sabeis", "sabemos", "saben", "saber", "sabes", "seamos", "sean", "seas", "sentid", "sentida", "sentidas", "sentido", "sentidos", "seremos", "ser�", "ser�n", "ser�s", "ser�", "ser�is", "ser�a", "ser�ais", "ser�amos", "ser�an", "ser�as", "se�is", "siendo", "siente", "sintiendo", "sobre", "sois", "solamente", "solo", "somos", "suya", "suyas", "suyo", "suyos", "tambi�n", "tanto", "tendremos", "tendr�", "tendr�n", "tendr�s", "tendr�", "tendr�is", "tendr�a", "tendr�ais", "tendr�amos", "tendr�an", "tendr�as", "tened", "teneis", "tenemos", "tener", "tenga", "tengamos", "tengan", "tengas", "tengo", "teng�is", "tenida", "tenidas", "tenido", "tenidos", "teniendo", "ten�is", "ten�a", "ten�ais", "ten�amos", "ten�an", "ten�as", "tiempo", "tiene", "tienen", "tienes", "todo", "todos", "trabaja", "trabajais", "trabajamos", "trabajan", "trabajar", "trabajas", "trabajo", "tras", "tuve", "tuviera", "tuvierais", "tuvieran", "tuvieras", "tuvieron", "tuviese", "tuvieseis", "tuviesen", "tuvieses", "tuvimos", "tuviste", "tuvisteis", "tuvi�ramos", "tuvi�semos", "tuvo", "tuya", "tuyas", "tuyo", "tuyos", "ultimo", "unas", "unos", "usais", "usamos", "usan", "usar", "usas", "vais", "valor", "vamos", "vaya", "verdad", "verdadera cierto", "verdadero", "vosostras", "vosostros", "vosotras", "vosotros", "vuestra", "vuestras", "vuestro", "vuestros", "�ramos");
|
4 |
+
?>
|
languages/fr/stemmer.php
ADDED
@@ -0,0 +1,513 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?php
|
2 |
+
|
3 |
+
/*
|
4 |
+
*
|
5 |
+
* implements a Paice/Husk Stemmer written in PHP by Alexis Ulrich (http://alx2002.free.fr)
|
6 |
+
*
|
7 |
+
* This code is in the public domain.
|
8 |
+
*
|
9 |
+
*/
|
10 |
+
|
11 |
+
|
12 |
+
// the rule patterns include all accented forms for a given language
|
13 |
+
$rule_pattern = "/^([a-z������������]*)(\*){0,1}(\d)([a-z������������]*)([.|>])/";
|
14 |
+
|
15 |
+
$PaiceHuskStemmerRules_fr = array(
|
16 |
+
'esre1>', # { -erse > -ers }
|
17 |
+
'esio1>', # { -oise > -ois }
|
18 |
+
'siol1.', # { -lois > -loi }
|
19 |
+
'siof0.', # { -fois > -fois }
|
20 |
+
'sioe0.', # { -eois > -eois }
|
21 |
+
'sio3>', # { -ois > - }
|
22 |
+
'st1>', # { -ts > -t }
|
23 |
+
'sf1>', # { -fs > -f }
|
24 |
+
'sle1>', # { -els > -el }
|
25 |
+
'slo1>', # { -ols > -ol }
|
26 |
+
's�1>', # { -�s > -� }
|
27 |
+
'�tuae5.', # { -eaut� > - }
|
28 |
+
'�tuae2.', # { -eaut� > -eau }
|
29 |
+
'tnia0.', # { -aint > -aint }
|
30 |
+
'tniv1.', # { -vint > -vin }
|
31 |
+
'tni3>', # { -int > - }
|
32 |
+
'suor1.', # { -rous > -ou }
|
33 |
+
'suo0.', # { -ous > -ous }
|
34 |
+
'sdrail5.', # { -liards > -l }
|
35 |
+
'sdrai4.', # { -iards > -i }
|
36 |
+
'er�i1>', # { -i�re > -ier }
|
37 |
+
'sesue3x>', # { -euses > -euse }
|
38 |
+
'esuey5i.', # { -yeuse > -i }
|
39 |
+
'esue2x>', # { -euse > -eux }
|
40 |
+
'se1>', # { -es > -e }
|
41 |
+
'er�g3.', # { -g�re > -g }
|
42 |
+
'eca1>', # { -ace > -ac }
|
43 |
+
'esiah0.', # { -haise > - }
|
44 |
+
'esi1>', # { -ise > -is }
|
45 |
+
'siss2.', # { -ssis > -ss }
|
46 |
+
'sir2>', # { -ris > -r }
|
47 |
+
'sit2>', # { -tis > -t }
|
48 |
+
'egan�1.', # { -�nage > -�nag }
|
49 |
+
'egalli6>', # { -illage > - }
|
50 |
+
'egass1.', # { -ssage > -sag }
|
51 |
+
'egas0.', # { -sage > - }
|
52 |
+
'egat3.', # { -tage > - }
|
53 |
+
'ega3>', # { -age > - }
|
54 |
+
'ette4>', # { -ette > - }
|
55 |
+
'ett2>', # { -tte > -t }
|
56 |
+
'etio1.', # { -oite > -oit }
|
57 |
+
'tio�4c.', # { -�oit > -c }
|
58 |
+
'tio0.', # { -oit > -oit }
|
59 |
+
'et1>', # { -te > -t }
|
60 |
+
'eb1>', # { -be > -b }
|
61 |
+
'snia1>', # { -ains > -ain }
|
62 |
+
'eniatnau8>', # { -uantaine > - }
|
63 |
+
'eniatn4.', # { -ntaine > -nt }
|
64 |
+
'enia1>', # { -aine > -ain }
|
65 |
+
'niatnio3.', # { -ointain > -oint }
|
66 |
+
'niatg3.', # { -gtain > -gt }
|
67 |
+
'e�1>', # { -�e > -� }
|
68 |
+
'�hcat1.', # { -tach� > -tach }
|
69 |
+
'�hca4.', # { -ach� > - }
|
70 |
+
'�tila5>', # { -alit� > - }
|
71 |
+
'�tici5.', # { -icit� > - }
|
72 |
+
'�tir1.', # { -rit� > -rit }
|
73 |
+
'�ti3>', # { -it� > - }
|
74 |
+
'�gan1.', # { -nag� > -nag }
|
75 |
+
'�ga3>', # { -ag� > - }
|
76 |
+
'�tehc1.', # { -chet� > -chet }
|
77 |
+
'�te3>', # { -et� > - }
|
78 |
+
'�it0.', # { -ti� > -ti� }
|
79 |
+
'�1>', # { -� > - }
|
80 |
+
'eire4.', # { -erie > - }
|
81 |
+
'eirue5.', # { -eurie > - }
|
82 |
+
'eio1.', # { -oie > -oi }
|
83 |
+
'eia1.', # { -aie > -ai }
|
84 |
+
'ei1>', # { -ie > -i }
|
85 |
+
'eng1.', # { -gne > -gn }
|
86 |
+
'xuaessi7.', # { -isseaux > - }
|
87 |
+
'xuae1>', # { -eaux > -eau }
|
88 |
+
'uaes0.', # { -seau > -seau }
|
89 |
+
'uae3.', # { -eau > - }
|
90 |
+
'xuave2l.', # { -evaux > -eval }
|
91 |
+
'xuav2li>', # { -vaux > -vail }
|
92 |
+
'xua3la>', # { -aux > -al }
|
93 |
+
'ela1>', # { -ale > -al }
|
94 |
+
'lart2.', # { -tral > -tr }
|
95 |
+
'lani2>', # { -inal > -in }
|
96 |
+
'la�2>', # { -�al > -� }
|
97 |
+
'siay4i.', # { -yais > -i }
|
98 |
+
'siassia7.', # { -aissais > - }
|
99 |
+
'siarv1*.', # { -vrais > -vrai if intact }
|
100 |
+
'sia1>', # { -ais > -ai }
|
101 |
+
'tneiayo6i.', # { -oyaient > -oi }
|
102 |
+
'tneiay6i.', # { -yaient > -i }
|
103 |
+
'tneiassia9.', # { -aissaient > - }
|
104 |
+
'tneiareio7.', # { -oieraient > -oi }
|
105 |
+
'tneia5>', # { -aient > - }
|
106 |
+
'tneia4>', # { -aient > -a }
|
107 |
+
'tiario4.', # { -oirait > -oi }
|
108 |
+
'tiarim3.', # { -mirait > -mir }
|
109 |
+
'tiaria3.', # { -airait > -air }
|
110 |
+
'tiaris3.', # { -sirait > -sir }
|
111 |
+
'tiari5.', # { -irait > - }
|
112 |
+
'tiarve6>', # { -evrait > - }
|
113 |
+
'tiare5>', # { -erait > - }
|
114 |
+
'iare4>', # { -erai > - }
|
115 |
+
'are3>', # { -era > - }
|
116 |
+
'tiay4i.', # { -yait > -i }
|
117 |
+
'tia3>', # { -ait > - }
|
118 |
+
'tnay4i.', # { -yant > -i }
|
119 |
+
'em�iu5>', # { -ui�me > - }
|
120 |
+
'em�i4>', # { -i�me > - }
|
121 |
+
'tnaun3.', # { -nuant > -nu }
|
122 |
+
'tnauqo3.', # { -oquant > -oqu }
|
123 |
+
'tnau4>', # { -uant > - }
|
124 |
+
'tnaf0.', # { -fant > -fant }
|
125 |
+
'tnat�2>', # { -�tant > -�t }
|
126 |
+
'tna3>', # { -ant > - }
|
127 |
+
'tno3>', # { -ont > - }
|
128 |
+
'zeiy4i.', # { -yiez > -i }
|
129 |
+
'zey3i.', # { -yez > -i }
|
130 |
+
'zeire5>', # { -eriez > - }
|
131 |
+
'zeird4.', # { -driez > -d }
|
132 |
+
'zeirio4.', # { -oiriez > -oi }
|
133 |
+
'ze2>', # { -ez > - }
|
134 |
+
'ssiab0.', # { -baiss > - }
|
135 |
+
'ssia4.', # { -aiss > - }
|
136 |
+
'ssi3.', # { -iss > - }
|
137 |
+
'tnemma6>', # { -amment > - }
|
138 |
+
'tnemesuey9i.', # { -yeusement > -i }
|
139 |
+
'tnemesue8>', # { -eusement > - }
|
140 |
+
'tnemevi7.', # { -ivement > - }
|
141 |
+
'tnemessia5.', # { -aissement > -aiss }
|
142 |
+
'tnemessi8.', # { -issement > - }
|
143 |
+
'tneme5>', # { -ement > - }
|
144 |
+
'tnemia4.', # { -aiment > -ai }
|
145 |
+
'tnem�5>', # { -�ment > - }
|
146 |
+
'el2l>', # { -le > -l }
|
147 |
+
'lle3le>', # { -ell > -el }
|
148 |
+
'let�0.', # { -�tel > -�tel }
|
149 |
+
'lepp0.', # { -ppel > -ppel }
|
150 |
+
'le2>', # { -el > - }
|
151 |
+
'srei1>', # { -iers > -ier }
|
152 |
+
'reit3.', # { -tier > -t }
|
153 |
+
'reila2.', # { -alier > -ali }
|
154 |
+
'rei3>', # { -ier > - }
|
155 |
+
'ert�e5.', # { -e�tre > - }
|
156 |
+
'ert��1.', # { -��tre > -��tr }
|
157 |
+
'ert�4.', # { -�tre > - }
|
158 |
+
'drai4.', # { -iard > - }
|
159 |
+
'erdro0.', # { -ordre > -ordre }
|
160 |
+
'erute5.', # { -eture > - }
|
161 |
+
'ruta0.', # { -atur > -atur }
|
162 |
+
'eruta1.', # { -ature > -atur }
|
163 |
+
'erutiov1.', # { -voiture > -voitur }
|
164 |
+
'erub3.', # { -bure > -b }
|
165 |
+
'eruh3.', # { -hure > -h }
|
166 |
+
'erul3.', # { -lure > -l }
|
167 |
+
'er2r>', # { -re > -r }
|
168 |
+
'nn1>', # { -nn > -n }
|
169 |
+
'r�i3.', # { -i�r > - }
|
170 |
+
'srev0.', # { -vers > -vers }
|
171 |
+
'sr1>', # { -rs > -r }
|
172 |
+
'rid2>', # { -dir > -d }
|
173 |
+
're2>', # { -er > - }
|
174 |
+
'xuei4.', # { -ieux > - }
|
175 |
+
'esuei5.', # { -ieuse > - }
|
176 |
+
'lbati3.', # { -itabl > -it }
|
177 |
+
'lba3>', # { -abl > - }
|
178 |
+
'rueis0.', # { -sieur > - }
|
179 |
+
'ruehcn4.', # { -ncheur > -nc }
|
180 |
+
'ecirta6.', # { -atrice > - }
|
181 |
+
'ruetai6.', # { -iateur > - }
|
182 |
+
'rueta5.', # { -ateur > - }
|
183 |
+
'rueir0.', # { -rieur > - }
|
184 |
+
'rue3>', # { -eur > - }
|
185 |
+
'esseti6.', # { -itesse > - }
|
186 |
+
'essere6>', # { -eresse > - }
|
187 |
+
'esserd1.', # { -dresse > -dress }
|
188 |
+
'esse4>', # { -esse > - }
|
189 |
+
'essiab1.', # { -baisse > -baiss }
|
190 |
+
'essia5.', # { -aisse > - }
|
191 |
+
'essio1.', # { -oisse > -oiss }
|
192 |
+
'essi4.', # { -isse > - }
|
193 |
+
'essal4.', # { -lasse > -l }
|
194 |
+
'essa1>', # { -asse > -ass }
|
195 |
+
'ssab1.', # { -bass > -bas }
|
196 |
+
'essurp1.', # { -prusse > -uss }
|
197 |
+
'essu4.', # { -usse > - }
|
198 |
+
'essi1.', # { -isse > -ss }
|
199 |
+
'ssor1.', # { -ross > -ros }
|
200 |
+
'essor2.', # { -rosse > -ros }
|
201 |
+
'esso1>', # { -osse > -oss }
|
202 |
+
'ess2>', # { -sse > -s }
|
203 |
+
'tio3.', # { -oit > - }
|
204 |
+
'r�s2re.', # { -s�r > -ser }
|
205 |
+
'r�0e.', # { -�r > -�re }
|
206 |
+
'esn1.', # { -nse > -�ns }
|
207 |
+
'eu1>', # { -ue > -u }
|
208 |
+
'sua0.', # { -aus > -aus }
|
209 |
+
'su1>', # { -us > -u }
|
210 |
+
'utt1>', # { -utt > -tt }
|
211 |
+
'tu�3c.', # { -�ut > -c }
|
212 |
+
'u�2c.', # { -�u > -c }
|
213 |
+
'ur1.', # { -ru > -r }
|
214 |
+
'ehcn2>', # { -nche > -nc }
|
215 |
+
'ehcu1>', # { -uche > -uch }
|
216 |
+
'snorr3.', # { -rrons > -rr }
|
217 |
+
'snoru3.', # { -urons > -ur }
|
218 |
+
'snorua3.', # { -aurons > -aur }
|
219 |
+
'snorv3.', # { -vrons > -vr }
|
220 |
+
'snorio4.', # { -oirons > -oi }
|
221 |
+
'snori5.', # { -irons > - }
|
222 |
+
'snore5>', # { -erons > - }
|
223 |
+
'snortt4>', # { -ttrons > -tt }
|
224 |
+
'snort�a7.', # { -a�trons > - }
|
225 |
+
'snort3.', # { -trons > -tr }
|
226 |
+
'snor4.', # { -rons > - }
|
227 |
+
'snossi6.', # { -issons > - }
|
228 |
+
'snoire6.', # { -erions > - }
|
229 |
+
'snoird5.', # { -drions > -d }
|
230 |
+
'snoitai7.', # { -iations > - }
|
231 |
+
'snoita6.', # { -ations > - }
|
232 |
+
'snoits1>', # { -stions > -stion }
|
233 |
+
'noits0.', # { -stion > -stion }
|
234 |
+
'snoi4>', # { -ions > - }
|
235 |
+
'noitaci7>', # { -ication > - }
|
236 |
+
'noitai6.', # { -iation > - }
|
237 |
+
'noita5.', # { -ation > - }
|
238 |
+
'noitu4.', # { -ution > -u }
|
239 |
+
'noi3>', # { -ion > - }
|
240 |
+
'snoya0.', # { -ayons > -ayons }
|
241 |
+
'snoy4i.', # { -yons > -i }
|
242 |
+
'sno�a1.', # { -a�ons > -a�on }
|
243 |
+
'sno�r1.', # { -r�ons > -r�on }
|
244 |
+
'snoe4.', # { -eons > - }
|
245 |
+
'snosiar1>', # { -raisons > - }
|
246 |
+
'snola1.', # { -alons > -alon }
|
247 |
+
'sno3>', # { -ons > - }
|
248 |
+
'sno1>', # { -ons > -on }
|
249 |
+
'noll2.', # { -llon > -ll }
|
250 |
+
'tnennei4.', # { -iennent > -ien }
|
251 |
+
'ennei2>', # { -ienne > -ien }
|
252 |
+
'snei1>', # { -iens > -ien }
|
253 |
+
'sne�1>', # { -�ens > -�en }
|
254 |
+
'enne�5e.', # { -�enne > -e }
|
255 |
+
'ne�3e.', # { -�en > -e }
|
256 |
+
'neic0.', # { -cien > -cien }
|
257 |
+
'neiv0.', # { -vien > -vien }
|
258 |
+
'nei3.', # { -ien > - }
|
259 |
+
'sc1.', # { -cs > -c }
|
260 |
+
'sd1.', # { -ds > -d }
|
261 |
+
'sg1.', # { -gs > -g }
|
262 |
+
'sni1.', # { -ins > -in }
|
263 |
+
'tiu0.', # { -uit > - }
|
264 |
+
'ti2.', # { -it > - }
|
265 |
+
'sp1>', # { -ps > -p }
|
266 |
+
'sna1>', # { -ans > -an }
|
267 |
+
'sue1.', # { -eus > -eu }
|
268 |
+
'enn2>', # { -nne > -n }
|
269 |
+
'nong2.', # { -gnon > -gn }
|
270 |
+
'noss2.', # { -sson > -ss }
|
271 |
+
'rioe4.', # { -eoir > - }
|
272 |
+
'riot0.', # { -toir > -toir }
|
273 |
+
'riorc1.', # { -croir > -croi }
|
274 |
+
'riovec5.', # { -cevoir > -c }
|
275 |
+
'rio3.', # { -oir > - }
|
276 |
+
'ric2.', # { -cir > -l }
|
277 |
+
'ril2.', # { -lir > -l }
|
278 |
+
'tnerim3.', # { -mirent > -mir }
|
279 |
+
'tneris3>', # { -sirent > -sir }
|
280 |
+
'tneri5.', # { -irent > - }
|
281 |
+
't�a3.', # { -a�t > - }
|
282 |
+
'riss2.', # { -ssir > -ss }
|
283 |
+
't�2.', # { -�t > - }
|
284 |
+
't�2>', # { -�t > - }
|
285 |
+
'ario2.', # { -oira > -oi }
|
286 |
+
'arim1.', # { -mira > -m }
|
287 |
+
'ara1.', # { -ara > -ar }
|
288 |
+
'aris1.', # { -sira > -sir }
|
289 |
+
'ari3.', # { -ira > - }
|
290 |
+
'art1>', # { -tra > -tr }
|
291 |
+
'ardn2.', # { -ndra > -nd }
|
292 |
+
'arr1.', # { -rra > -rr }
|
293 |
+
'arua1.', # { -aura > -aur }
|
294 |
+
'aro1.', # { -ora > -or }
|
295 |
+
'arv1.', # { -vra > -vr }
|
296 |
+
'aru1.', # { -ura > -ur }
|
297 |
+
'ar2.', # { -ra > - }
|
298 |
+
'rd1.', # { -dr > -d }
|
299 |
+
'ud1.', # { -du > - }
|
300 |
+
'ul1.', # { -lu > -l }
|
301 |
+
'ini1.', # { -ini > -in }
|
302 |
+
'rin2.', # { -nir > - }
|
303 |
+
'tnessiab3.', # { -baissent > -baiss }
|
304 |
+
'tnessia7.', # { -aissent > - }
|
305 |
+
'tnessi6.', # { -issent > - }
|
306 |
+
'tnessni4.', # { -inssent > -ins }
|
307 |
+
'sini2.', # { -inis > -in }
|
308 |
+
'sl1.', # { -ls > -l }
|
309 |
+
'iard3.', # { -drai > -d }
|
310 |
+
'iario3.', # { -oirai > -oi }
|
311 |
+
'ia2>', # { -ai > - }
|
312 |
+
'io0.', # { -oi > -oi }
|
313 |
+
'iule2.', # { -elui > -el }
|
314 |
+
'i1>', # { -i > - }
|
315 |
+
'sid2.', # { -dis > -d }
|
316 |
+
'sic2.', # { -cis > -c }
|
317 |
+
'esoi4.', # { -iose > - }
|
318 |
+
'ed1.', # { -de > -d }
|
319 |
+
'ai2>', # { -ia > - }
|
320 |
+
'a1>', # { -a > - }
|
321 |
+
'adr1.', # { -rda > -rd }
|
322 |
+
'tner�5>', # { -�rent > - }
|
323 |
+
'evir1.', # { -rive > -riv }
|
324 |
+
'evio4>', # { -oive > - }
|
325 |
+
'evi3.', # { -ive > - }
|
326 |
+
'fita4.', # { -atif > - }
|
327 |
+
'fi2>', # { -if > - }
|
328 |
+
'enie1.', # { -eine > -ein }
|
329 |
+
'sare4>', # { -eras > - }
|
330 |
+
'sari4>', # { -iras > - }
|
331 |
+
'sard3.', # { -dras > -d }
|
332 |
+
'sart2>', # { -tras > -tr }
|
333 |
+
'sa2.', # { -as > - }
|
334 |
+
'tnessa6>', # { -assent > - }
|
335 |
+
'tnessu6>', # { -ussent > - }
|
336 |
+
'tnegna3.', # { -angent > -ang }
|
337 |
+
'tnegi3.', # { -igent > -ig }
|
338 |
+
'tneg0.', # { -gent > -gent }
|
339 |
+
'tneru5>', # { -urent > - }
|
340 |
+
'tnemg0.', # { -gment > -gment }
|
341 |
+
'tnerni4.', # { -inrent > -in }
|
342 |
+
'tneiv1.', # { -vient > -vien }
|
343 |
+
'tne3>', # { -ent > - }
|
344 |
+
'une1.', # { -enu > -en }
|
345 |
+
'en1>', # { -ne > -n }
|
346 |
+
'nitn2.', # { -ntin > - }
|
347 |
+
'ecnay5i.', # { -yance > -i }
|
348 |
+
'ecnal1.', # { -lance > -lanc }
|
349 |
+
'ecna4.', # { -ance > - }
|
350 |
+
'ec1>', # { -ce > -c }
|
351 |
+
'nn1.', # { -nn > -n }
|
352 |
+
'rit2>', # { -tir > - }
|
353 |
+
'rut2>', # { -tur > -t }
|
354 |
+
'rud2.', # { -dur > -d }
|
355 |
+
'ugn1>', # { -ngu > -ng }
|
356 |
+
'eg1>', # { -ge > -g }
|
357 |
+
'tuo0.', # { -out > -out }
|
358 |
+
'tul2>', # { -lut > -l }
|
359 |
+
't�2>', # { -�t > - }
|
360 |
+
'ev1>', # { -ve > -v }
|
361 |
+
'v�2ve>', # { -�v > -ev }
|
362 |
+
'rtt1>', # { -ttr > -tt }
|
363 |
+
'emissi6.', # { -issime > - }
|
364 |
+
'em1.', # { -me > -m }
|
365 |
+
'ehc1.', # { -che > -ch }
|
366 |
+
'c�i2c�.', # { -i�c > -i�c }
|
367 |
+
'libi2l.', # { -ibil > -ibl }
|
368 |
+
'llie1.', # { -eill > -eil }
|
369 |
+
'liei4i.', # { -ieil > -i }
|
370 |
+
'xuev1.', # { -veux > -veu }
|
371 |
+
'xuey4i.', # { -yeux > -i }
|
372 |
+
'xueni5>', # { -ineux > - }
|
373 |
+
'xuell4.', # { -lleux > -l }
|
374 |
+
'xuere5.', # { -ereux > - }
|
375 |
+
'xue3>', # { -eux > - }
|
376 |
+
'rb�3rb�.', # { -�br > -�br }
|
377 |
+
'tur2.', # { -rut > -r }
|
378 |
+
'rir�4re.', # { -�rir > -er }
|
379 |
+
'rir2.', # { -rir > -r }
|
380 |
+
'c�2ca.', # { -�c > -ac }
|
381 |
+
'snu1.', # { -uns > -un }
|
382 |
+
'rt�a4.', # { -a�tr > - }
|
383 |
+
'long2.', # { -gnol > -gn }
|
384 |
+
'vec2.', # { -cev > -c }
|
385 |
+
'�1c>', # { -� > -c }
|
386 |
+
'ssilp3.', # { -pliss > -pl }
|
387 |
+
'silp2.', # { -plis > -pl }
|
388 |
+
't�hc2te.', # { -ch�t > -chet }
|
389 |
+
'n�m2ne.', # { -m�n > -men }
|
390 |
+
'llepp1.', # { -ppell > -ppel }
|
391 |
+
'tan2.', # { -nat > -n }
|
392 |
+
'rv�3rve.', # { -�vr > -evr }
|
393 |
+
'rv�3rve.', # { -�vr > -evr }
|
394 |
+
'r�2re.', # { -�r > -er }
|
395 |
+
'r�2re.', # { -�r > -er }
|
396 |
+
't�2te.', # { -�t > -et }
|
397 |
+
't�2te.', # { -�t > -et }
|
398 |
+
'epp1.', # { -ppe > -pp }
|
399 |
+
'eya2i.', # { -aye > -ai }
|
400 |
+
'ya1i.', # { -ay > -ai }
|
401 |
+
'yo1i.', # { -oy > -oi }
|
402 |
+
'esu1.', # { -use > -us }
|
403 |
+
'ugi1.', # { -igu > -g }
|
404 |
+
'tt1.', # { -tt > -t }
|
405 |
+
|
406 |
+
# end rule: the stem has already been found
|
407 |
+
'end0.'
|
408 |
+
);
|
409 |
+
|
410 |
+
// returns the number of the first rule from the rule number $rule_number
|
411 |
+
// that can be applied to the given reversed form
|
412 |
+
// returns -1 if no rule can be applied, ie the stem has been found
|
413 |
+
function getFirstRule($reversed_form, $rule_number) {
|
414 |
+
global $PaiceHuskStemmerRules_fr;
|
415 |
+
global $rule_pattern;
|
416 |
+
$nb_rules = sizeOf($PaiceHuskStemmerRules_fr);
|
417 |
+
for ($i=$rule_number; $i<$nb_rules; $i++) {
|
418 |
+
// gets the letters from the current rule
|
419 |
+
$rule = $PaiceHuskStemmerRules_fr[$i];
|
420 |
+
$rule = preg_replace($rule_pattern, "\\1", $rule);
|
421 |
+
//if (strncasecmp(utf8_decode($rule),$reversed_form,strlen(utf8_decode($rule))) == 0) return $i;
|
422 |
+
if (strncasecmp($rule, $reversed_form, strlen($rule)) == 0) return $i;
|
423 |
+
}
|
424 |
+
return -1;
|
425 |
+
}
|
426 |
+
|
427 |
+
|
428 |
+
/*
|
429 |
+
* Check the acceptability of a stem
|
430 |
+
*
|
431 |
+
* $reversed_stem: the stem to check in reverse form
|
432 |
+
*/
|
433 |
+
function checkAcceptability($reversed_stem) {
|
434 |
+
//if (preg_match("/[a��e����i��o�u��y]$/",utf8_encode($reversed_stem))) {
|
435 |
+
if (preg_match("/[a��e����i��o�u��y]$/",$reversed_stem)) {
|
436 |
+
// if the form starts with a vowel then at least two letters must remain after stemming (e.g.: "�taient" --> "�t")
|
437 |
+
return (strlen($reversed_stem) > 2);
|
438 |
+
}
|
439 |
+
else {
|
440 |
+
// if the form starts with a consonant then at least two letters must remain after stemming
|
441 |
+
if (strlen($reversed_stem) <= 2) {
|
442 |
+
return False;
|
443 |
+
}
|
444 |
+
// and at least one of these must be a vowel or "y"
|
445 |
+
//return (preg_match("/[a��e����i��o�u��y]/",utf8_encode($reversed_stem)));
|
446 |
+
return (preg_match("/[a��e����i��o�u��y]/", $reversed_stem));
|
447 |
+
}
|
448 |
+
}
|
449 |
+
|
450 |
+
|
451 |
+
/*
|
452 |
+
* the actual Paice/Husk stemmer
|
453 |
+
* which returns a stem for the given form
|
454 |
+
*
|
455 |
+
* $form: the word for which we want the stem
|
456 |
+
*/
|
457 |
+
function PaiceHuskStemmer($form) {
|
458 |
+
global $PaiceHuskStemmerRules_fr;
|
459 |
+
global $rule_pattern;
|
460 |
+
$intact = True;
|
461 |
+
$stem_found = False;
|
462 |
+
$reversed_form = strrev(utf8_decode($form));
|
463 |
+
$rule_number = 0;
|
464 |
+
// that loop goes through the rules' array until it finds an ending one (ending by '.') or the last one ('end0.')
|
465 |
+
while (True) {
|
466 |
+
$rule_number = getFirstRule($reversed_form, $rule_number);
|
467 |
+
if ($rule_number == -1) {
|
468 |
+
// no other rule can be applied => the stem has been found
|
469 |
+
break;
|
470 |
+
}
|
471 |
+
$rule = $PaiceHuskStemmerRules_fr[$rule_number];
|
472 |
+
preg_match($rule_pattern, $rule, $matches);
|
473 |
+
if (($matches[2] != '*') || ($intact)) {
|
474 |
+
$reversed_stem = utf8_decode($matches[4]) . substr($reversed_form,$matches[3],strlen($reversed_form)-$matches[3]);
|
475 |
+
if (checkAcceptability($reversed_stem)) {
|
476 |
+
$reversed_form = $reversed_stem;
|
477 |
+
if ($matches[5] == '.') break;
|
478 |
+
}
|
479 |
+
else {
|
480 |
+
// go to another rule
|
481 |
+
$rule_number++;
|
482 |
+
}
|
483 |
+
}
|
484 |
+
else {
|
485 |
+
// go to another rule
|
486 |
+
$rule_number++;
|
487 |
+
}
|
488 |
+
}
|
489 |
+
|
490 |
+
return utf8_encode(strrev($reversed_form));
|
491 |
+
|
492 |
+
}
|
493 |
+
|
494 |
+
/*
|
495 |
+
Stem caching added by Rob Marsh, SJ
|
496 |
+
http://rmarsh.com
|
497 |
+
*/
|
498 |
+
|
499 |
+
$StemCache = array();
|
500 |
+
|
501 |
+
function stem($word) {
|
502 |
+
global $StemCache;
|
503 |
+
if (!isset($StemCache[$word])) {
|
504 |
+
$stemmedword = PaiceHuskStemmer($word);
|
505 |
+
$StemCache[$word] = $stemmedword;
|
506 |
+
}
|
507 |
+
else {
|
508 |
+
$stemmedword = $StemCache[$word] ;
|
509 |
+
}
|
510 |
+
return $stemmedword;
|
511 |
+
}
|
512 |
+
|
513 |
+
?>
|
languages/fr/stopwords.php
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
1 |
+
<?php
|
2 |
+
// the list of common words we want to ignore. NB anything shorter than 4 characters is knocked by the plugin and doesn't need to figure here
|
3 |
+
$overusedwords = array("afin", "aient", "aies", "ailleurs", "ainsi", "alentour", "alias", "allaient", "allais", "allait", "allez", "allons", "alors", "apr�s", "apr�s-demain", "arri�re", "assez", "attendu", "au-dedans", "au-dehors", "au-del�", "au-dessous", "au-dessus", "au-devant", "aucun", "aucune", "audit", "aujourd'", "aujourd'hui", "auparavant", "aupr�s", "auquel", "aura", "aurai", "auraient", "aurais", "aurait", "auras", "aurez", "auriez", "aurions", "aurons", "auront", "aussi", "aussit�t", "autant", "autour", "autre", "autrefois", "autres", "autrui", "auxdites", "auxdits", "auxquelles", "auxquels", "avaient", "avais", "avait", "avant", "avant-hier", "avec", "avez", "aviez", "avions", "avoir", "avons", "ayant", "ayante", "ayantes", "ayants", "ayez", "ayons", "banco", "beaucoup", "bien", "bient�t", "c'est-�-dire", "c.-�-d.", "cahin-caha", "ceci", "cela", "celle", "celle-ci", "celle-l�", "celles", "celles-ci", "celles-l�", "celui", "celui-ci", "celui-l�", "cent", "cents", "cependant", "certain", "certaine", "certaines", "certains", "certes", "cette", "ceux", "ceux-ci", "ceux-l�", "chacun", "chacune", "chaque", "cher", "chez", "chose", "ci-apr�s", "ci-dessous", "ci-dessus", "cinq", "cinquante", "cinquante-cinq", "cinquante-deux", "cinquante-et-un", "cinquante-huit", "cinquante-neuf", "cinquante-quatre", "cinquante-sept", "cinquante-six", "cinquante-trois", "combien", "comme", "comment", "contrario", "contre", "cours", "crescendo", "c�ans", "d'abord", "d'accord", "d'affil�e", "d'ailleurs", "d'apr�s", "d'arrache-pied", "d'embl�e", "d'un", "d'une", "dans", "davantage", "debout", "dedans", "dehors", "del�", "demain", "depuis", "derechef", "derri�re", "desdites", "desdits", "desquelles", "desquels", "dessous", "dessus", "deux", "devant", "devers", "de��", "diff�rentes", "diff�rents", "dire", "disent", "dito", "divers", "diverses", "dix-huit", "dix-neuf", "dix-sept", "donc", "dont", "dor�navant", "douze", "dudit", "duquel", "durant", "d�j�", "d�pit", "d�sormais", "elle", "elles", "en-dehors", "encore", "enfin", "ensemble", "ensuite", "entre", "entre-temps", "envers", "environ", "et/ou", "eues", "eurent", "eusse", "eussent", "eusses", "eussiez", "eussions", "expr�s", "extenso", "extremis", "e�mes", "e�tes", "facto", "faire", "fais", "faisaient", "faisais", "faisait", "faisons", "fait", "faites", "fallait", "faudrait", "faut", "faveur", "flac", "fors", "fort", "forte", "fortiori", "frais", "furent", "fusse", "fussent", "fusses", "fussiez", "fussions", "f�mes", "f�tes", "grand-chose", "grosso", "gr�ce", "gu�re", "haut", "hein", "hier", "hol�", "hormis", "hors", "huit", "ibidem", "ici-bas", "idem", "illico", "ipso", "item", "jadis", "jamais", "jusqu'", "jusqu'au", "jusqu'aux", "jusqu'�", "jusque", "juste", "l'autre", "l'encontre", "l'instar", "l'insu", "l'issue", "l'occasion", "l'on", "l'un", "l'une", "l'�gard", "ladite", "laquelle", "lequel", "lesquelles", "lesquels", "leur", "leurs", "loin", "longtemps", "lors", "lorsqu'", "lorsque", "l�-bas", "l�-dedans", "l�-dehors", "l�-derri�re", "l�-dessous", "l�-dessus", "l�-devant", "l�-haut", "maint", "mainte", "maintenant", "maintes", "maints", "mais", "malgr�", "marge", "mati�re", "mien", "mienne", "miennes", "miens", "mieux", "mille", "milliards", "millions", "minima", "modo", "moins", "moult", "moyennant", "m�me", "m�mes", "nagu�re", "neuf", "nonante", "nonobstant", "notre", "nous", "nulle", "n�anmoins", "n�tre", "n�tres", "octante", "onze", "ouais", "outre", "par-ci", "par-del�", "par-derri�re", "par-dessous", "par-dessus", "par-devant", "par-l�", "parbleu", "parce", "parfois", "parmi", "part", "partir", "partout", "passim", "pass�", "pendant", "personne", "petto", "peur", "peut", "peut-�tre", "peuvent", "peux", "plus", "plusieurs", "plut�t", "point", "posteriori", "pour", "pourquoi", "pourtant", "pourvu", "presqu'", "presque", "primo", "priori", "prou", "pr�s", "pr�alable", "puis", "puisqu'", "puisque", "quand", "quant", "quarante", "quarante-cinq", "quarante-deux", "quarante-et-un", "quarante-huit", "quarante-neuf", "quarante-quatre", "quarante-sept", "quarante-six", "quarante-trois", "quasi", "quatorze", "quatre", "quatre-vingt", "quatre-vingt-cinq", "quatre-vingt-deux", "quatre-vingt-dix", "quatre-vingt-dix-huit", "quatre-vingt-dix-neuf", "quatre-vingt-dix-sept", "quatre-vingt-douze", "quatre-vingt-huit", "quatre-vingt-neuf", "quatre-vingt-onze", "quatre-vingt-quatorze", "quatre-vingt-quatre", "quatre-vingt-quinze", "quatre-vingt-seize", "quatre-vingt-sept", "quatre-vingt-six", "quatre-vingt-treize", "quatre-vingt-trois", "quatre-vingt-un", "quatre-vingt-une", "quatre-vingts", "quel", "quelle", "quelles", "quelqu'", "quelqu'un", "quelqu'une", "quelque", "quelquefois", "quelques", "quelques-unes", "quelques-uns", "quels", "quiconque", "quinze", "quoi", "quoiqu'", "quoique", "raison", "rapport", "regard", "revoici", "revoil�", "rien", "sans", "sauf", "secundo", "sein", "seize", "selon", "sensu", "sept", "septante", "sera", "serai", "seraient", "serais", "serait", "seras", "serez", "seriez", "serions", "serons", "seront", "sien", "sienne", "siennes", "siens", "sine", "sinon", "situ", "sit�t", "soient", "sois", "soit", "soixante", "soixante-cinq", "soixante-deux", "soixante-dix", "soixante-dix-huit", "soixante-dix-neuf", "soixante-dix-sept", "soixante-douze", "soixante-et-onze", "soixante-et-un", "soixante-et-une", "soixante-huit", "soixante-neuf", "soixante-quatorze", "soixante-quatre", "soixante-quinze", "soixante-seize", "soixante-sept", "soixante-six", "soixante-treize", "soixante-trois", "sommes", "sont", "soudain", "sous", "souvent", "soyez", "soyons", "stricto", "suis", "suite", "sujet", "sur-le-champ", "surtout", "tacatac", "tandis", "tant", "tant�t", "tard", "telle", "telles", "tels", "tien", "tienne", "tiennes", "tiens", "toujours", "tous", "tout", "toute", "toutefois", "toutes", "travers", "treize", "trente", "trente-cinq", "trente-deux", "trente-et-un", "trente-huit", "trente-neuf", "trente-quatre", "trente-sept", "trente-six", "trente-trois", "trois", "trop", "tr�s", "unes", "vais", "vers", "vertu", "veut", "veux", "vice-versa", "vingt", "vingt-cinq", "vingt-deux", "vingt-huit", "vingt-neuf", "vingt-quatre", "vingt-sept", "vingt-six", "vingt-trois", "vis-�-vis", "vite", "vitro", "vivo", "voici", "voil�", "voire", "volontiers", "votre", "vous", "v�tre", "v�tres", "z�ro", "�gard", "�taient", "�tais", "�tait", "�tant", "�tante", "�tantes", "�tants", "�tiez", "�tions", "�t�e", "�t�es", "�t�s", "�tes", "�tre");
|
4 |
+
?>
|
languages/it/stemmer.php
ADDED
@@ -0,0 +1,341 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?php
|
2 |
+
|
3 |
+
/*
|
4 |
+
|
5 |
+
Con=verted to PHP 4 by Rob Marsh, SJ
|
6 |
+
|
7 |
+
*/
|
8 |
+
|
9 |
+
|
10 |
+
/*
|
11 |
+
*
|
12 |
+
* This script as been written by Roberto Mirizzi (rob4you at vodafone dot it) in February 2007.
|
13 |
+
*
|
14 |
+
* It is the PHP5 implementation of Martin Porter's stemming algorithm for Italian language.
|
15 |
+
*
|
16 |
+
* This algorithm can be found at address: http://snowball.tartarus.org/algorithms/italian/stemmer.html.
|
17 |
+
*
|
18 |
+
* Use the code freely. I'm not responsible for any problems.
|
19 |
+
*
|
20 |
+
* Usage:
|
21 |
+
*
|
22 |
+
* $stemmer = new ItalianStemmer();
|
23 |
+
* $stemmed_word = $stemmer->stem($word);
|
24 |
+
*
|
25 |
+
* All Italian characters are (originally) in latin1 (ISO-8859-1).
|
26 |
+
*
|
27 |
+
*/
|
28 |
+
class ItalianStemmer {
|
29 |
+
var $vocali = array('a','e','i','o','u','�','�','�','�','�');
|
30 |
+
var $consonanti = array('b','c','d','f','g','h','j','k','l','m','n','p','q','r','s','t','v','w','x','y','z','I','U');
|
31 |
+
var $accenti_acuti = array('�','�','�','�','�');
|
32 |
+
var $accenti_gravi = array('�','�','�','�','�');
|
33 |
+
|
34 |
+
var $suffissi_step_0 = array('ci','gli','la','le','li','lo','mi','ne','si','ti','vi','sene','gliela','gliele','glieli','glielo','gliene','mela','mele','meli','melo','mene','tela','tele','teli','telo','tene','cela','cele','celi','celo','cene','vela','vele','veli','velo','vene');
|
35 |
+
|
36 |
+
var $suffissi_step_1_a = array('anza','anze','ico','ici','ica','ice','iche','ichi','ismo','ismi','abile','abili','ibile','ibili','ista','iste','isti','ist�','ist�','ist�','oso','osi','osa','ose','mente','atrice','atrici','ante','anti');
|
37 |
+
var $suffissi_step_1_b = array('azione','azioni','atore','atori');
|
38 |
+
var $suffissi_step_1_c = array('logia','logie');
|
39 |
+
var $suffissi_step_1_d = array('uzione','uzioni','usione','usioni');
|
40 |
+
var $suffissi_step_1_e = array('enza','enze');
|
41 |
+
var $suffissi_step_1_f = array('amento','amenti','imento','imenti');
|
42 |
+
var $suffissi_step_1_g = array('amente');
|
43 |
+
var $suffissi_step_1_h = array('it�');
|
44 |
+
var $suffissi_step_1_i = array('ivo','ivi','iva','ive');
|
45 |
+
|
46 |
+
var $suffissi_step_2 = array('ammo','ando','ano','are','arono','asse','assero','assi','assimo','ata','ate','ati','ato','ava','avamo','avano','avate','avi','avo','emmo','enda','ende','endi','endo','er�','erai','eranno','ere','erebbe','erebbero','erei','eremmo','eremo','ereste','eresti','erete','er�','erono','essero','ete','eva','evamo','evano','evate','evi','evo','Yamo','iamo','immo','ir�','irai','iranno','ire','irebbe','irebbero','irei','iremmo','iremo','ireste','iresti','irete','ir�','irono','isca','iscano','isce','isci','isco','iscono','issero','ita','ite','iti','ito','iva','ivamo','ivano','ivate','ivi','ivo','ono','uta','ute','uti','uto','ar','ir');
|
47 |
+
|
48 |
+
var $ante_suff_a = array('ando','endo');
|
49 |
+
var $ante_suff_b = array('ar','er','ir');
|
50 |
+
|
51 |
+
function __construct() {
|
52 |
+
usort($this->suffissi_step_0,create_function('$a,$b','return strlen($a)>strlen($b) ? -1 : 1;'));
|
53 |
+
usort($this->suffissi_step_1_a,create_function('$a,$b','return strlen($a)>strlen($b) ? -1 : 1;'));
|
54 |
+
usort($this->suffissi_step_2,create_function('$a,$b','return strlen($a)>strlen($b) ? -1 : 1;'));
|
55 |
+
}
|
56 |
+
|
57 |
+
function trim($str) {
|
58 |
+
return trim($str);
|
59 |
+
}
|
60 |
+
|
61 |
+
function to_lower($str) {
|
62 |
+
return strtolower($str);
|
63 |
+
}
|
64 |
+
|
65 |
+
function replace_acc_acuti($str) {
|
66 |
+
return str_replace($this->accenti_acuti, $this->accenti_gravi, $str); //strtr
|
67 |
+
}
|
68 |
+
|
69 |
+
function put_u_after_q_to_upper($str) {
|
70 |
+
return str_replace("qu", "qU", $str);
|
71 |
+
}
|
72 |
+
|
73 |
+
function i_u_between_vow_to_upper($str) {
|
74 |
+
$pattern = '/([aeiou�����])([iu])([aeiou�����])/e';
|
75 |
+
$replacement = "'$1'.strtoupper('$2').'$3'";
|
76 |
+
return preg_replace($pattern, $replacement, $str);
|
77 |
+
}
|
78 |
+
|
79 |
+
function return_RV($str) {
|
80 |
+
/*
|
81 |
+
If the second letter is a consonant, RV is the region after the next following vowel,
|
82 |
+
or if the first two letters are vowels, RV is the region after the next consonant, and otherwise
|
83 |
+
(consonant-vowel case) RV is the region after the third letter. But RV is the end of the word if these positions cannot be found.
|
84 |
+
example,
|
85 |
+
m a c h o [ho] o l i v a [va] t r a b a j o [bajo] � u r e o [eo] prezzo sprezzante
|
86 |
+
*/
|
87 |
+
|
88 |
+
if(strlen($str)<2) return '';//$str;
|
89 |
+
|
90 |
+
if(in_array($str[1],$this->consonanti)) {
|
91 |
+
$str = substr($str,2);
|
92 |
+
$str = strpbrk($str, implode($this->vocali));
|
93 |
+
return substr($str,1); //secondo me devo mettere 1
|
94 |
+
}
|
95 |
+
else if(in_array($str[0],$this->vocali) && in_array($str[1],$this->vocali)) {
|
96 |
+
$str = strpbrk($str, implode($this->consonanti));
|
97 |
+
return substr($str,1);
|
98 |
+
}
|
99 |
+
else if(in_array($str[0],$this->consonanti) && in_array($str[1],$this->vocali)) {
|
100 |
+
return substr($str,3);
|
101 |
+
}
|
102 |
+
|
103 |
+
}
|
104 |
+
|
105 |
+
function return_R1($str){
|
106 |
+
/*
|
107 |
+
R1 is the region after the first non-vowel following a vowel, or is the null region at the end of the word if there is no such non-vowel.
|
108 |
+
example:
|
109 |
+
beautiful [iful] beauty [y] beau [NULL] animadversion [imadversion] sprinkled [kled] eucharist [harist]
|
110 |
+
*/
|
111 |
+
|
112 |
+
$pattern = '/['.implode($this->vocali).']+'.'['.implode($this->consonanti).']'.'(.*)/';
|
113 |
+
preg_match($pattern,$str,$matches);
|
114 |
+
|
115 |
+
return count($matches)>=1 ? $matches[1] : '';
|
116 |
+
}
|
117 |
+
|
118 |
+
function return_R2($str) {
|
119 |
+
/*
|
120 |
+
R2 is the region after the first non-vowel following a vowel in R1, or is the null region at the end of the word if there is no such non-vowel.
|
121 |
+
example:
|
122 |
+
beautiful [ul] beauty [NULL] beau [NULL] animadversion [adversion] sprinkled [NULL] eucharist [ist]
|
123 |
+
*/
|
124 |
+
|
125 |
+
$R1 = $this->return_R1($str);
|
126 |
+
|
127 |
+
$pattern = '/['.implode($this->vocali).']+'.'['.implode($this->consonanti).']'.'(.*)/';
|
128 |
+
preg_match($pattern,$R1,$matches);
|
129 |
+
|
130 |
+
return count($matches)>=1 ? $matches[1] : '';
|
131 |
+
}
|
132 |
+
|
133 |
+
|
134 |
+
function step_0($str) {
|
135 |
+
//Step 0: Attached pronoun
|
136 |
+
//Always do steps 0
|
137 |
+
|
138 |
+
$str_len = strlen($str);
|
139 |
+
$rv = $this->return_RV($str);
|
140 |
+
$rv_len = strlen($rv);
|
141 |
+
|
142 |
+
$pos = 0;
|
143 |
+
foreach($this->suffissi_step_0 as $suff) {
|
144 |
+
if($rv_len-strlen($suff) < 0) continue;
|
145 |
+
$pos = strpos($rv,$suff,$rv_len-strlen($suff));
|
146 |
+
if($pos !== false) break;
|
147 |
+
}
|
148 |
+
|
149 |
+
$ante_suff = substr($rv,0,$pos);
|
150 |
+
$ante_suff_len = strlen($ante_suff);
|
151 |
+
|
152 |
+
foreach($this->ante_suff_a as $ante_a) {
|
153 |
+
if($ante_suff_len-strlen($ante_a) < 0) continue;
|
154 |
+
$pos_a = strpos($ante_suff,$ante_a,$ante_suff_len-strlen($ante_a));
|
155 |
+
if($pos_a !== false) {
|
156 |
+
return substr($str,0,$pos+$str_len-$rv_len);
|
157 |
+
}
|
158 |
+
}
|
159 |
+
|
160 |
+
foreach($this->ante_suff_b as $ante_b) {
|
161 |
+
if($ante_suff_len-strlen($ante_b) < 0) continue;
|
162 |
+
$pos_b = strpos($ante_suff,$ante_b,$ante_suff_len-strlen($ante_b));
|
163 |
+
if($pos_b !== false) {
|
164 |
+
return substr($str,0,$pos+$str_len-$rv_len).'e';
|
165 |
+
}
|
166 |
+
}
|
167 |
+
|
168 |
+
return $str;
|
169 |
+
}
|
170 |
+
|
171 |
+
function delete_suff($arr_suff,$str,$str_len,$where,$ovunque=false) {
|
172 |
+
if($where==='r2') $r = $this->return_R2($str);
|
173 |
+
else if($where==='rv') $r = $this->return_RV($str);
|
174 |
+
else if($where==='r1') $r = $this->return_R1($str);
|
175 |
+
|
176 |
+
$r_len = strlen($r);
|
177 |
+
|
178 |
+
if($ovunque) {
|
179 |
+
foreach($arr_suff as $suff) {
|
180 |
+
if($str_len-strlen($suff) < 0) continue;
|
181 |
+
$pos = strpos($str,$suff,$str_len-strlen($suff));
|
182 |
+
if($pos !== false) {
|
183 |
+
$pattern = '/'.$suff.'$/';
|
184 |
+
$ret_str = preg_match($pattern,$r) ? substr($str,0,$pos) : '';
|
185 |
+
if($ret_str !== '') return $ret_str;
|
186 |
+
break;
|
187 |
+
}
|
188 |
+
}
|
189 |
+
}
|
190 |
+
else {
|
191 |
+
foreach($arr_suff as $suff) {
|
192 |
+
if($r_len-strlen($suff) < 0) continue;
|
193 |
+
$pos = strpos($r,$suff,$r_len-strlen($suff));
|
194 |
+
if($pos !== false) return substr($str,0,$pos+$str_len-$r_len);
|
195 |
+
}
|
196 |
+
}
|
197 |
+
}
|
198 |
+
|
199 |
+
|
200 |
+
function step_1($str) {
|
201 |
+
//Step 1: Standard suffix removal
|
202 |
+
//Always do steps 1
|
203 |
+
|
204 |
+
$str_len = strlen($str);
|
205 |
+
|
206 |
+
//delete if in R1, if preceded by 'iv', delete if in R2 (and if further preceded by 'at', delete if in R2), otherwise, if preceded by 'os', 'ic' or 'abil', delete if in R2
|
207 |
+
if(count($ret_str = $this->delete_suff($this->suffissi_step_1_g,$str,$str_len,'r1'))) {
|
208 |
+
if(count($ret_str1 = $this->delete_suff(array('iv'),$ret_str,strlen($ret_str),'r2'))) {
|
209 |
+
if(count($ret_str2 = $this->delete_suff(array('at'),$ret_str1,strlen($ret_str1),'r2'))) return $ret_str2;
|
210 |
+
else return $ret_str1;
|
211 |
+
}
|
212 |
+
else if(count($ret_str1 = $this->delete_suff(array('os','ic','abil'),$ret_str,strlen($ret_str),'r2'))) {
|
213 |
+
return $ret_str1;
|
214 |
+
}
|
215 |
+
else return $ret_str;
|
216 |
+
}
|
217 |
+
//delete if in R2
|
218 |
+
if(count($ret_str = $this->delete_suff($this->suffissi_step_1_a,$str,$str_len,'r2',true))) return $ret_str;
|
219 |
+
//delete if in R2, if preceded by 'ic', delete if in R2
|
220 |
+
if(count($ret_str = $this->delete_suff($this->suffissi_step_1_b,$str,$str_len,'r2'))) {
|
221 |
+
if(count($ret_str1 = $this->delete_suff(array('ic'),$ret_str,strlen($ret_str),'r2'))) {
|
222 |
+
return $ret_str1;
|
223 |
+
}
|
224 |
+
else return $ret_str;
|
225 |
+
}
|
226 |
+
//replace with 'log' if in R2
|
227 |
+
if(count($ret_str = $this->delete_suff($this->suffissi_step_1_c,$str,$str_len,'r2'))) return $ret_str.'log';
|
228 |
+
//replace with 'u' if in R2
|
229 |
+
if(count($ret_str = $this->delete_suff($this->suffissi_step_1_d,$str,$str_len,'r2'))) return $ret_str.'u';
|
230 |
+
//replace with 'ente' if in R2
|
231 |
+
if(count($ret_str = $this->delete_suff($this->suffissi_step_1_e,$str,$str_len,'r2'))) return $ret_str.'ente';
|
232 |
+
//delete if in RV
|
233 |
+
if(count($ret_str = $this->delete_suff($this->suffissi_step_1_f,$str,$str_len,'rv'))) return $ret_str;
|
234 |
+
//delete if in R2, if preceded by 'abil', 'ic' or 'iv', delete if in R2
|
235 |
+
if(count($ret_str = $this->delete_suff($this->suffissi_step_1_h,$str,$str_len,'r2'))) {
|
236 |
+
if(count($ret_str1 = $this->delete_suff(array('abil','ic','iv'),$ret_str,strlen($ret_str),'r2'))) {
|
237 |
+
return $ret_str1;
|
238 |
+
}
|
239 |
+
else return $ret_str;
|
240 |
+
}
|
241 |
+
//delete if in R2, if preceded by 'at', delete if in R2 (and if further preceded by 'ic', delete if in R2)
|
242 |
+
if(count($ret_str = $this->delete_suff($this->suffissi_step_1_i,$str,$str_len,'r2'))) {
|
243 |
+
if(count($ret_str1 = $this->delete_suff(array('at'),$ret_str,strlen($ret_str),'r2'))) {
|
244 |
+
if(count($ret_str2 = $this->delete_suff(array('ic'),$ret_str1,strlen($ret_str1),'r2'))) return $ret_str2;
|
245 |
+
else return $ret_str1;
|
246 |
+
}
|
247 |
+
else return $ret_str;
|
248 |
+
}
|
249 |
+
|
250 |
+
return $str;
|
251 |
+
}
|
252 |
+
|
253 |
+
function step_2($str,$str_step_1) {
|
254 |
+
//Step 2: Verb suffixes
|
255 |
+
//Do step 2 if no ending was removed by step 1
|
256 |
+
|
257 |
+
if($str != $str_step_1) return $str_step_1;
|
258 |
+
|
259 |
+
$str_len = strlen($str);
|
260 |
+
|
261 |
+
if(count($ret_str = $this->delete_suff($this->suffissi_step_2,$str,$str_len,'rv'))) return $ret_str;
|
262 |
+
|
263 |
+
return $str;
|
264 |
+
}
|
265 |
+
|
266 |
+
function step_3a($str) {
|
267 |
+
//Step 3a: Delete a final 'a', 'e', 'i', 'o',' �', '�', '�' or '�' if it is in RV, and a preceding 'i' if it is in RV ('crocchi' -> 'crocch', 'crocchio' -> 'crocch')
|
268 |
+
//Always do steps 3a
|
269 |
+
|
270 |
+
$vocale_finale = array('a','e','i','o','�','�','�','�');
|
271 |
+
|
272 |
+
$str_len = strlen($str);
|
273 |
+
|
274 |
+
if(count($ret_str = $this->delete_suff($vocale_finale,$str,$str_len,'rv'))) {
|
275 |
+
if(count($ret_str1 = $this->delete_suff(array('i'),$ret_str,strlen($ret_str),'rv'))) {
|
276 |
+
return $ret_str1;
|
277 |
+
}
|
278 |
+
else return $ret_str;
|
279 |
+
}
|
280 |
+
|
281 |
+
return $str;
|
282 |
+
}
|
283 |
+
|
284 |
+
function step_3b($str) {
|
285 |
+
//Step 3b: Replace final 'ch' (or 'gh') with 'c' (or 'g') if in 'RV' ('crocch' -> 'crocc')
|
286 |
+
//Always do steps 3b
|
287 |
+
|
288 |
+
$rv = $this->return_RV($str);
|
289 |
+
|
290 |
+
$pattern = '/([cg])h$/';
|
291 |
+
$replacement = '${1}';
|
292 |
+
return substr($str,0,strlen($str)-strlen($rv)).preg_replace($pattern,$replacement,$rv);
|
293 |
+
}
|
294 |
+
|
295 |
+
function step_4($str) {
|
296 |
+
//Step 4: Finally, turn I and U back into lower case
|
297 |
+
|
298 |
+
return strtolower($str);
|
299 |
+
}
|
300 |
+
|
301 |
+
function stem($str){
|
302 |
+
$str = $this->trim($str);
|
303 |
+
$str = $this->to_lower($str);
|
304 |
+
$str = $this->replace_acc_acuti($str);
|
305 |
+
$str = $this->put_u_after_q_to_upper($str);
|
306 |
+
$str = $this->i_u_between_vow_to_upper($str);
|
307 |
+
$step0 = $this->step_0($str);
|
308 |
+
$step1 = $this->step_1($step0);
|
309 |
+
$step2 = $this->step_2($step0,$step1);
|
310 |
+
$step3a = $this->step_3a($step2);
|
311 |
+
$step3b = $this->step_3b($step3a);
|
312 |
+
$step4 = $this->step_4($step3b);
|
313 |
+
|
314 |
+
return $step4;
|
315 |
+
}
|
316 |
+
|
317 |
+
|
318 |
+
}
|
319 |
+
|
320 |
+
|
321 |
+
/*
|
322 |
+
Stem caching added by Rob Marsh, SJ
|
323 |
+
http://rmarsh.com
|
324 |
+
*/
|
325 |
+
|
326 |
+
$Stemmer = new ItalianStemmer();
|
327 |
+
$StemCache = array();
|
328 |
+
|
329 |
+
function stem($word) {
|
330 |
+
global $Stemmer, $StemCache;
|
331 |
+
if (!isset($StemCache[$word])) {
|
332 |
+
$stemmedword = $Stemmer->Stem($word);
|
333 |
+
$StemCache[$word] = $stemmedword;
|
334 |
+
}
|
335 |
+
else {
|
336 |
+
$stemmedword = $StemCache[$word] ;
|
337 |
+
}
|
338 |
+
return $stemmedword;
|
339 |
+
}
|
340 |
+
|
341 |
+
?>
|
languages/it/stopwords.php
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
1 |
+
<?php
|
2 |
+
// the list of common words we want to ignore. NB anything shorter than 4 characters is knocked by the plugin and doesn't need to figure here
|
3 |
+
$overusedwords = array("abbia", "abbiamo", "abbiano", "abbiate", "agli", "alla", "alle", "allo", "anche", "avemmo", "avendo", "avesse", "avessero", "avessi", "avessimo", "aveste", "avesti", "avete", "aveva", "avevamo", "avevano", "avevate", "avevi", "avevo", "avrai", "avranno", "avrebbe", "avrebbero", "avrei", "avremmo", "avremo", "avreste", "avresti", "avrete", "avr�", "avr�", "avuta", "avute", "avuti", "avuto", "come", "contro", "dagl", "dagli", "dall", "dalla", "dalle", "dallo", "degl", "degli", "dell", "della", "delle", "dello", "dove", "ebbe", "ebbero", "ebbi", "erano", "eravamo", "eravate", "essendo", "faccia", "facciamo", "facciano", "facciate", "faccio", "facemmo", "facendo", "facesse", "facessero", "facessi", "facessimo", "faceste", "facesti", "faceva", "facevamo", "facevano", "facevate", "facevi", "facevo", "fanno", "farai", "faranno", "farebbe", "farebbero", "farei", "faremmo", "faremo", "fareste", "faresti", "farete", "far�", "far�", "fece", "fecero", "feci", "fosse", "fossero", "fossi", "fossimo", "foste", "fosti", "fummo", "furono", "hanno", "loro", "miei", "negl", "negli", "nell", "nella", "nelle", "nello", "nostra", "nostre", "nostri", "nostro", "perch�", "quale", "quanta", "quante", "quanti", "quanto", "quella", "quelle", "quelli", "quello", "questa", "queste", "questi", "questo", "sarai", "saranno", "sarebbe", "sarebbero", "sarei", "saremmo", "saremo", "sareste", "saresti", "sarete", "sar�", "sar�", "siamo", "siano", "siate", "siete", "sono", "stai", "stando", "stanno", "starai", "staranno", "starebbe", "starebbero", "starei", "staremmo", "staremo", "stareste", "staresti", "starete", "star�", "star�", "stava", "stavamo", "stavano", "stavate", "stavi", "stavo", "stemmo", "stesse", "stessero", "stessi", "stessimo", "steste", "stesti", "stette", "stettero", "stetti", "stia", "stiamo", "stiano", "stiate", "sugl", "sugli", "sull", "sulla", "sulle", "sullo", "suoi", "tuoi", "tutti", "tutto", "vostra", "vostre", "vostri", "vostro");
|
4 |
+
?>
|
readme.txt
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
=== Similar Posts ===
|
2 |
+
Contributors: RobMarsh
|
3 |
+
Donate link: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=donate%40rmarsh%2ecom&item_name=Rob%20Marsh%27s%20WordPress%20Plugins&item_number=Similar%20Posts&no_shipping=1&cn=Any%20Comments&tax=0¤cy_code=GBP&bn=PP%2dDonationsBF&charset=UTF%2d8
|
4 |
+
Tags: posts, related, similar, related posts, similar posts, tags, post-plugins
|
5 |
+
Requires at least: 1.5
|
6 |
+
Tested up to: 2.6.0
|
7 |
+
Stable tag: 2.6.0.0
|
8 |
+
Displays a list of posts similar to the current one based on content, title and/or tags.
|
9 |
+
|
10 |
+
== Description ==
|
11 |
+
|
12 |
+
Similar Posts displays a list of posts that are similar or related to the current posts. The list can be customised in *many* ways. Similarity is judged according to a post's title, content, and tags and you can adjust the balance of factors to fit your own blog.
|
13 |
+
|
14 |
+
This plugin **requires** the latest version of the *Post-Plugin Library:* [download it now](http://downloads.wordpress.org/plugin/post-plugin-library.zip).
|
15 |
+
|
16 |
+
== Installation ==
|
17 |
+
|
18 |
+
1. IMPORTANT! If you are upgrading from a previous version first deactivate the plugin, then delete the plugin folder from your server.
|
19 |
+
|
20 |
+
1. If you have the *Similar Posts Feed* plugin installed you must deactivate it before installing Similar Posts (which now does the same job).
|
21 |
+
|
22 |
+
1. Upload the plugin folder to your /wp-content/plugins/ folder. If you haven't already you should also install the [Post-Plugin Library](http://wordpress.org/extend/plugins/post-plugin-library/)></a>.
|
23 |
+
|
24 |
+
1. Go to the **Plugins** page and activate the plugin.
|
25 |
+
|
26 |
+
1. Put `<?php similar_posts(); ?>` at the place in your template where you want the list of related posts to appear or use the plugin as a widget.
|
27 |
+
|
28 |
+
1. Use the **Options/Settings** page to adjust the behaviour of the plugin.
|
29 |
+
|
30 |
+
[My web site](http://rmarsh.com/) has [full instructions](http://rmarsh.com/plugins/similar-posts/) and [information on customisation](http://rmarsh.com/plugins/post-options/).
|
31 |
+
|
32 |
+
== Version History ==
|
33 |
+
|
34 |
+
* 2.6.0.0
|
35 |
+
* version bump to indicate compatibility with WP 2.6
|
36 |
+
* fix to really include attachments
|
37 |
+
* new parameter for {imagesrc} to append a suffix to the image name, e.g. to get the thumbnail for attachments
|
38 |
+
* 2.5.0.11
|
39 |
+
* new option to include attachments
|
40 |
+
* {php} tag now accepts nested tags
|
41 |
+
* new output tag {authorurl} -- permalink to archive of author's posts
|
42 |
+
* fix for numeric locale issue
|
43 |
+
* 2.5.0.10
|
44 |
+
* new option to select algorithm for term extraction
|
45 |
+
* new manual links option
|
46 |
+
* fix for page selection in old versions of WP
|
47 |
+
* fix for faulty tags in Cyrillic
|
48 |
+
* 2.5.0.9
|
49 |
+
* new option to match the current post's author
|
50 |
+
* extended options for snippet and excerpt output tags
|
51 |
+
* 2.5.0.7
|
52 |
+
* new option to show by status, i.e., published/private/draft/future
|
53 |
+
* {categorynames} and {categorylinks} apply 'single_cat_name' filter
|
54 |
+
* fixes bug in WP pre-2.2 causing installation to fail
|
55 |
+
* 2.5.0
|
56 |
+
* CJK digrams
|
57 |
+
* {image} has new post, link, and default parameters
|
58 |
+
* new {imagesrc} tag
|
59 |
+
* fix to empty category bug
|
60 |
+
* excluded posts bug fix
|
61 |
+
* fix for intermittent bug with 'omit current post' option
|
62 |
+
* 2.5b28
|
63 |
+
* improvements to Similar Posts matching
|
64 |
+
* experiment with Chinese/Korean/Japanese matching
|
65 |
+
* 2.5b27
|
66 |
+
* fixed bug with bulk indexing of tags
|
67 |
+
* 2.5b26
|
68 |
+
* reverted thumbnail serving (speed)
|
69 |
+
* fix current post after extra query
|
70 |
+
* 2.5b25
|
71 |
+
* option to sort output, group templates
|
72 |
+
* removed 'trim_before' option added more logical 'divider'
|
73 |
+
* {date:raw}, {commentdate:raw}, etc.
|
74 |
+
* fix for {image} resizing when <img > and not <img />
|
75 |
+
* {image} now serves real thumbnails
|
76 |
+
* 2.5b24
|
77 |
+
* fix for recursive replacement by content filter
|
78 |
+
* fix to {gravatar} to allow for 'identicon' etc.
|
79 |
+
* fix to {commenter} to allow trimming
|
80 |
+
* fix a warning in safe mode
|
81 |
+
* fix for unsanitised WP tags
|
82 |
+
* 2.5b23
|
83 |
+
* new option to filter on custom fields
|
84 |
+
* nested braces in {if}; condition now taggable
|
85 |
+
* improved bug report feature
|
86 |
+
* better way to omit user comments
|
87 |
+
* 2.5b22
|
88 |
+
* restored automatic indexing on installation
|
89 |
+
* moved indexing menu under settings
|
90 |
+
* show_pages option can now show only pages
|
91 |
+
* fix for upgraders who had utf8 selected but no mbstring
|
92 |
+
* 2.5b20
|
93 |
+
* optimised indexing for speed and memory use
|
94 |
+
* 2.5b19
|
95 |
+
* fixing some extended character issues
|
96 |
+
* 2.5b18
|
97 |
+
* fix output filter bug
|
98 |
+
* add conditional tag {if:condition:yes:no}
|
99 |
+
* 2.5b16
|
100 |
+
* fix for {php}
|
101 |
+
* 2.5b15
|
102 |
+
* fix more or less obscure bugs, add 'include posts' setting
|
103 |
+
* 2.5b14
|
104 |
+
* fix file-encoding, installation error, etc.
|
105 |
+
* 2.5b12
|
106 |
+
* fix serious bug for WP < 2.3
|
107 |
+
* 2.5b11
|
108 |
+
* some widget fixes
|
109 |
+
* 2.5b10
|
110 |
+
* fix for non-creation of table
|
111 |
+
* 2.5b9
|
112 |
+
* clarifying installation instructions
|
113 |
+
|
114 |
+
* [previous versions](http://rmarsh.com/plugins/similar-posts/)
|
similar-posts-admin.php
ADDED
@@ -0,0 +1,702 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?php
|
2 |
+
|
3 |
+
// Admin stuff for Similar Posts Plugin, Version 2.6.0.0
|
4 |
+
|
5 |
+
function similar_posts_option_menu() {
|
6 |
+
add_options_page(__('Similar Posts Options', 'similar_posts'), __('Similar Posts', 'similar_posts'), 8, 'similar-posts', 'similar_posts_options_page');
|
7 |
+
}
|
8 |
+
|
9 |
+
add_action('admin_menu', 'similar_posts_option_menu', 1);
|
10 |
+
|
11 |
+
function similar_posts_for_feed_option_menu() {
|
12 |
+
add_options_page(__('Similar Posts Feed Options', 'similar_posts'), __('Similar Posts Feed', 'similar_posts'), 8, 'similar-posts-feed', 'similar_posts_for_feed_options_page');
|
13 |
+
}
|
14 |
+
|
15 |
+
// this sneaky piece of work lets the similar posts feed menu appear and disappear
|
16 |
+
function juggle_similar_posts_menus() {
|
17 |
+
if (isset($_POST['feed_active'])) {
|
18 |
+
$active = ($_POST['feed_active'] === 'true');
|
19 |
+
} else {
|
20 |
+
$options = get_option('similar-posts');
|
21 |
+
$active = ($options['feed_active'] === 'true');
|
22 |
+
}
|
23 |
+
if ($active) {
|
24 |
+
add_action('admin_menu', 'similar_posts_for_feed_option_menu', 2);
|
25 |
+
} else {
|
26 |
+
remove_action('admin_menu', 'similar_posts_for_feed_option_menu');
|
27 |
+
}
|
28 |
+
}
|
29 |
+
|
30 |
+
add_action('plugins_loaded', 'juggle_similar_posts_menus');
|
31 |
+
|
32 |
+
function similar_posts_options_page(){
|
33 |
+
echo '<div class="wrap"><h2>';
|
34 |
+
_e('Similar Posts ', 'similar_posts');
|
35 |
+
echo '<a href="http://rmarsh.com/plugins/post-options/" style="font-size: 0.8em;">';
|
36 |
+
_e('help and instructions');
|
37 |
+
echo '</a></h2></div>';
|
38 |
+
if (!SimilarPosts::check_post_plugin_library('<h1>'.sprintf(__('Please install the %sPost Plugin Library%s plugin.'), '<a href="http://downloads.wordpress.org/plugin/post-plugin-library.zip">', '</a>').'</h1>')) return;
|
39 |
+
$m = new admin_subpages();
|
40 |
+
$m->add_subpage('General', 'general', 'similar_posts_general_options_subpage');
|
41 |
+
$m->add_subpage('Output', 'output', 'similar_posts_output_options_subpage');
|
42 |
+
$m->add_subpage('Filter', 'filter', 'similar_posts_filter_options_subpage');
|
43 |
+
$m->add_subpage('Other', 'other', 'similar_posts_other_options_subpage');
|
44 |
+
$m->add_subpage('Manage the Index', 'index', 'similar_posts_index_options_subpage');
|
45 |
+
$m->add_subpage('Report a Bug', 'bug', 'similar_posts_bug_subpage');
|
46 |
+
$m->add_subpage('Remove this Plugin', 'remove', 'similar_posts_remove_subpage');
|
47 |
+
$m->display();
|
48 |
+
}
|
49 |
+
|
50 |
+
function similar_posts_general_options_subpage(){
|
51 |
+
global $wpdb, $wp_version;
|
52 |
+
$options = get_option('similar-posts');
|
53 |
+
if (isset($_POST['update_options'])) {
|
54 |
+
check_admin_referer('similar-posts-update-options');
|
55 |
+
if (defined('POC_CACHE_4')) poc_cache_flush();
|
56 |
+
// Fill up the options with the values chosen...
|
57 |
+
$options = ppl_options_from_post($options, array('limit', 'skip', 'show_private', 'show_pages', 'show_attachments', 'status', 'age', 'omit_current_post', 'match_cat', 'match_tags', 'match_author'));
|
58 |
+
update_option('similar-posts', $options);
|
59 |
+
// Show a message to say we've done something
|
60 |
+
echo '<div class="updated fade"><p>' . __('Options saved', 'similar_posts') . '</p></div>';
|
61 |
+
}
|
62 |
+
//now we drop into html to display the option page form
|
63 |
+
?>
|
64 |
+
<div class="wrap">
|
65 |
+
<h2><?php _e('General Settings', 'similar_posts'); ?></h2>
|
66 |
+
<form method="post" action="">
|
67 |
+
<div class="submit"><input type="submit" name="update_options" value="<?php _e('Save General Settings', 'similar_posts') ?>" /></div>
|
68 |
+
<table class="optiontable form-table">
|
69 |
+
<?php
|
70 |
+
ppl_display_limit($options['limit']);
|
71 |
+
ppl_display_skip($options['skip']);
|
72 |
+
ppl_display_show_private($options['show_private']);
|
73 |
+
ppl_display_show_pages($options['show_pages']);
|
74 |
+
ppl_display_show_attachments($options['show_attachments']);
|
75 |
+
ppl_display_status($options['status']);
|
76 |
+
ppl_display_age($options['age']);
|
77 |
+
ppl_display_omit_current_post($options['omit_current_post']);
|
78 |
+
ppl_display_match_cat($options['match_cat']);
|
79 |
+
ppl_display_match_tags($options['match_tags']);
|
80 |
+
ppl_display_match_author($options['match_author']);
|
81 |
+
?>
|
82 |
+
</table>
|
83 |
+
<div class="submit"><input type="submit" name="update_options" value="<?php _e('Save General Settings', 'similar_posts') ?>" /></div>
|
84 |
+
<?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-update-options'); ?>
|
85 |
+
</form>
|
86 |
+
</div>
|
87 |
+
<?php
|
88 |
+
}
|
89 |
+
|
90 |
+
function similar_posts_output_options_subpage(){
|
91 |
+
global $wpdb, $wp_version;
|
92 |
+
$options = get_option('similar-posts');
|
93 |
+
if (isset($_POST['update_options'])) {
|
94 |
+
check_admin_referer('similar-posts-update-options');
|
95 |
+
if (defined('POC_CACHE_4')) poc_cache_flush();
|
96 |
+
// Fill up the options with the values chosen...
|
97 |
+
$options = ppl_options_from_post($options, array('output_template', 'prefix', 'suffix', 'none_text', 'no_text', 'divider', 'sort', 'group_template'));
|
98 |
+
update_option('similar-posts', $options);
|
99 |
+
// Show a message to say we've done something
|
100 |
+
echo '<div class="updated fade"><p>' . __('Options saved', 'similar_posts') . '</p></div>';
|
101 |
+
}
|
102 |
+
//now we drop into html to display the option page form
|
103 |
+
?>
|
104 |
+
<div class="wrap">
|
105 |
+
<h2><?php _e('Output Settings', 'similar_posts'); ?></h2>
|
106 |
+
<form method="post" action="">
|
107 |
+
<div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Output Settings', 'similar_posts') ?>" /></div>
|
108 |
+
<table class="optiontable form-table">
|
109 |
+
<tr>
|
110 |
+
<td>
|
111 |
+
<table>
|
112 |
+
<?php
|
113 |
+
ppl_display_output_template($options['output_template']);
|
114 |
+
ppl_display_prefix($options['prefix']);
|
115 |
+
ppl_display_suffix($options['suffix']);
|
116 |
+
ppl_display_none_text($options['none_text']);
|
117 |
+
ppl_display_no_text($options['no_text']);
|
118 |
+
ppl_display_divider($options['divider']);
|
119 |
+
ppl_display_sort($options['sort']);
|
120 |
+
ppl_display_group_template($options['group_template']);
|
121 |
+
?>
|
122 |
+
</table>
|
123 |
+
</td>
|
124 |
+
<td>
|
125 |
+
<?php ppl_display_available_tags('similar-posts'); ?>
|
126 |
+
</td></tr>
|
127 |
+
</table>
|
128 |
+
<div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Output Settings', 'similar_posts') ?>" /></div>
|
129 |
+
<?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-update-options'); ?>
|
130 |
+
</form>
|
131 |
+
</div>
|
132 |
+
<?php
|
133 |
+
}
|
134 |
+
|
135 |
+
function similar_posts_filter_options_subpage(){
|
136 |
+
global $wpdb, $wp_version;
|
137 |
+
$options = get_option('similar-posts');
|
138 |
+
if (isset($_POST['update_options'])) {
|
139 |
+
check_admin_referer('similar-posts-update-options');
|
140 |
+
if (defined('POC_CACHE_4')) poc_cache_flush();
|
141 |
+
// Fill up the options with the values chosen...
|
142 |
+
$options = ppl_options_from_post($options, array('excluded_posts', 'included_posts', 'excluded_authors', 'included_authors', 'excluded_cats', 'included_cats', 'tag_str', 'custom'));
|
143 |
+
update_option('similar-posts', $options);
|
144 |
+
// Show a message to say we've done something
|
145 |
+
echo '<div class="updated fade"><p>' . __('Options saved', 'similar_posts') . '</p></div>';
|
146 |
+
}
|
147 |
+
//now we drop into html to display the option page form
|
148 |
+
?>
|
149 |
+
<div class="wrap">
|
150 |
+
<h2><?php _e('Filter Settings', 'similar_posts'); ?></h2>
|
151 |
+
<form method="post" action="">
|
152 |
+
<div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Filter Settings', 'similar_posts') ?>" /></div>
|
153 |
+
<table class="optiontable form-table">
|
154 |
+
<?php
|
155 |
+
ppl_display_excluded_posts($options['excluded_posts']);
|
156 |
+
ppl_display_included_posts($options['included_posts']);
|
157 |
+
ppl_display_authors($options['excluded_authors'], $options['included_authors']);
|
158 |
+
ppl_display_cats($options['excluded_cats'], $options['included_cats']);
|
159 |
+
ppl_display_tag_str($options['tag_str']);
|
160 |
+
ppl_display_custom($options['custom']);
|
161 |
+
?>
|
162 |
+
</table>
|
163 |
+
<div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Filter Settings', 'similar_posts') ?>" /></div>
|
164 |
+
<?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-update-options'); ?>
|
165 |
+
</form>
|
166 |
+
</div>
|
167 |
+
<?php
|
168 |
+
}
|
169 |
+
|
170 |
+
function similar_posts_other_options_subpage(){
|
171 |
+
global $wpdb, $wp_version;
|
172 |
+
$options = get_option('similar-posts');
|
173 |
+
if (isset($_POST['update_options'])) {
|
174 |
+
check_admin_referer('similar-posts-update-options');
|
175 |
+
if (defined('POC_CACHE_4')) poc_cache_flush();
|
176 |
+
// Fill up the options with the values chosen...
|
177 |
+
$options = ppl_options_from_post($options, array('content_filter', 'stripcodes', 'feed_active', 'term_extraction', 'num_terms', 'weight_title', 'weight_content', 'weight_tags', 'hand_links'));
|
178 |
+
$wcontent = $options['weight_content'] + 0.0001;
|
179 |
+
$wtitle = $options['weight_title'] + 0.0001;
|
180 |
+
$wtags = $options['weight_tags'] + 0.0001;
|
181 |
+
$wcombined = $wcontent + $wtitle + $wtags;
|
182 |
+
$options['weight_content'] = $wcontent / $wcombined;
|
183 |
+
$options['weight_title'] = $wtitle / $wcombined;
|
184 |
+
$options['weight_tags'] = $wtags / $wcombined;
|
185 |
+
update_option('similar-posts', $options);
|
186 |
+
// Show a message to say we've done something
|
187 |
+
echo '<div class="updated fade"><p>' . __('Options saved', 'similar_posts') . '</p></div>';
|
188 |
+
}
|
189 |
+
//now we drop into html to display the option page form
|
190 |
+
?>
|
191 |
+
<div class="wrap">
|
192 |
+
<h2><?php _e('Other Settings', 'similar_posts'); ?></h2>
|
193 |
+
<form method="post" action="">
|
194 |
+
<div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Other Settings', 'similar_posts') ?>" /></div>
|
195 |
+
<table class="optiontable form-table">
|
196 |
+
<?php
|
197 |
+
ppl_display_weights($options);
|
198 |
+
ppl_display_num_terms($options['num_terms']);
|
199 |
+
ppl_display_term_extraction($options['term_extraction']);
|
200 |
+
ppl_display_feed_active($options['feed_active']);
|
201 |
+
ppl_display_hand_links($options['hand_links']);
|
202 |
+
ppl_display_content_filter($options['content_filter']);
|
203 |
+
ppl_display_stripcodes($options['stripcodes']);
|
204 |
+
?>
|
205 |
+
</table>
|
206 |
+
<div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Other Settings', 'similar_posts') ?>" /></div>
|
207 |
+
<?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-update-options'); ?>
|
208 |
+
</form>
|
209 |
+
</div>
|
210 |
+
<?php
|
211 |
+
}
|
212 |
+
|
213 |
+
function similar_posts_index_options_subpage(){
|
214 |
+
if (isset($_POST['reindex_all'])) {
|
215 |
+
check_admin_referer('similar-posts-manage-update-options');
|
216 |
+
if (defined('POC_CACHE_4')) poc_cache_flush();
|
217 |
+
$options = get_option('similar-posts');
|
218 |
+
$options['utf8'] = $_POST['utf8'];
|
219 |
+
if (!function_exists('mb_split')) {
|
220 |
+
$options['utf8'] = 'false';
|
221 |
+
}
|
222 |
+
$options['cjk'] = $_POST['cjk'];
|
223 |
+
if (!function_exists('mb_internal_encoding')) {
|
224 |
+
$options['cjk'] = 'false';
|
225 |
+
}
|
226 |
+
if ($options['cjk'] === 'true') $options['utf8'] = 'true';
|
227 |
+
$options['use_stemmer'] = $_POST['use_stemmer'];
|
228 |
+
$options['batch'] = ppl_check_cardinal($_POST['batch']);
|
229 |
+
if ($options['batch'] === 0) $options['batch'] = 100;
|
230 |
+
flush();
|
231 |
+
$termcount = save_index_entries (($options['utf8']==='true'), ($options['use_stemmer']==='true'), $options['batch'], ($options['cjk']==='true'));
|
232 |
+
update_option('similar-posts', $options);
|
233 |
+
//show a message
|
234 |
+
printf('<div class="updated fade"><p>'.__('Indexed %d posts.').'</p></div>', $termcount);
|
235 |
+
} else {
|
236 |
+
$options = get_option('similar-posts');
|
237 |
+
}
|
238 |
+
?>
|
239 |
+
<div class="wrap">
|
240 |
+
<?php
|
241 |
+
echo '<h2>'.__('Manage Index', 'similar_posts').'</h2>';
|
242 |
+
echo '<p>'.__('Similar Posts maintains a special index to help search for related posts. The index is created when the plugin is activated and then kept up-to-date automatically when posts are added, edited, or deleted.', 'similar_posts').'</p>';
|
243 |
+
echo '<p>'.__('The options that affect the index can be set below.', 'similar_posts').'</p>';
|
244 |
+
echo '<p>'.__('If you are using a language other than english you may find that the plugin mangles some characters since PHP is normally blind to multibyte characters. You can force the plugin to interpret extended characters as UTF-8 at the expense of a little speed but this facility is only available if your installation of PHP supports the mbstring functions.', 'similar_posts').'</p>';
|
245 |
+
echo '<p>'.__('Languages like Chinese, Korean and Japanese pose a special difficulty for the full-text search algorithm. As an experiment I have introduced an option below to work around some of these issues. The text must be encoded as UTF-8. I would be very grateful for feedback from any users knowledgeable in these languages.', 'similar_posts').'</p>';
|
246 |
+
echo '<p>'.__('Some related word forms should really be counted together, e.g., "follow", "follows", and "following". The plugin can use a "stemming" algorithm to reduce related forms to their root stem. It is worth experimenting to see if this improves the similarity of posts in your particular circumstances. Stemming algorithms are provided for english, german, spanish, french and italian but stemmers for other languages can be created: see the help for instructions. Note: stemming slows down the indexing more than a little.', 'similar_posts').'</p>';
|
247 |
+
echo '<p>'.__('The indexing routine processes posts in batches of 100 by default. If you run into problems with limited memory you can opt to make the batches smaller.', 'similar_posts').'</p>';
|
248 |
+
echo '<p>'.__('Note: the process of indexing may take a little while. On my modest machine 500 posts take between 5 seconds and 20 seconds (with stemming and utf-8 support). Don\'t worry if the screen fails to update until finished.', 'similar_posts').'</p>';
|
249 |
+
?>
|
250 |
+
<form method="post" action="">
|
251 |
+
<table class="optiontable form-table">
|
252 |
+
<tr valign="top">
|
253 |
+
<th scope="row"><?php _e('Handle extended characters?', 'similar_posts') ?></th>
|
254 |
+
<td>
|
255 |
+
<select name="utf8" id="utf8" <?php if (!function_exists('mb_split')) echo 'disabled="true"'; ?> >
|
256 |
+
<option <?php if($options['utf8'] == 'false') { echo 'selected="selected"'; } ?> value="false">No</option>
|
257 |
+
<option <?php if($options['utf8'] == 'true') { echo 'selected="selected"'; } ?> value="true">Yes</option>
|
258 |
+
</select>
|
259 |
+
</td>
|
260 |
+
</tr>
|
261 |
+
<tr valign="top">
|
262 |
+
<th scope="row"><?php _e('Treat as Chinese, Korean, or Japanese?', 'similar_posts') ?></th>
|
263 |
+
<td>
|
264 |
+
<select name="cjk" id="cjk" <?php if (!function_exists('mb_split')) echo 'disabled="true"'; ?> >
|
265 |
+
<option <?php if($options['cjk'] == 'false') { echo 'selected="selected"'; } ?> value="false">No</option>
|
266 |
+
<option <?php if($options['cjk'] == 'true') { echo 'selected="selected"'; } ?> value="true">Yes</option>
|
267 |
+
</select>
|
268 |
+
</td>
|
269 |
+
</tr>
|
270 |
+
<tr valign="top">
|
271 |
+
<th scope="row"><?php _e('Use a stemming algorithm?', 'similar_posts') ?></th>
|
272 |
+
<td>
|
273 |
+
<select name="use_stemmer" id="use_stemmer">
|
274 |
+
<option <?php if($options['use_stemmer'] == 'false') { echo 'selected="selected"'; } ?> value="false">No</option>
|
275 |
+
<option <?php if($options['use_stemmer'] == 'true') { echo 'selected="selected"'; } ?> value="true">Yes</option>
|
276 |
+
</select>
|
277 |
+
</td>
|
278 |
+
</tr>
|
279 |
+
<tr valign="top">
|
280 |
+
<th scope="row"><?php _e('Batch size:', 'similar_posts') ?></th>
|
281 |
+
<td><input name="batch" type="text" id="batch" value="<?php echo $options['batch']; ?>" size="3" /></td>
|
282 |
+
</tr>
|
283 |
+
</table>
|
284 |
+
<div class="submit">
|
285 |
+
<input type="submit" name="reindex_all" value="<?php _e('Recreate Index', 'similar_posts') ?>" />
|
286 |
+
<?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-manage-update-options'); ?>
|
287 |
+
</div>
|
288 |
+
</form>
|
289 |
+
</div>
|
290 |
+
<?php
|
291 |
+
}
|
292 |
+
|
293 |
+
|
294 |
+
function similar_posts_bug_subpage(){
|
295 |
+
ppl_bug_form('similar-posts');
|
296 |
+
}
|
297 |
+
|
298 |
+
function similar_posts_remove_subpage(){
|
299 |
+
function eradicate() {
|
300 |
+
global $wpdb, $table_prefix;
|
301 |
+
delete_option('similar-posts');
|
302 |
+
delete_option('similar-posts-feed');
|
303 |
+
delete_option('widget_rrm_similar_posts');
|
304 |
+
$table_name = $table_prefix . 'similar_posts';
|
305 |
+
$wpdb->query("DROP TABLE `$table_name`");
|
306 |
+
}
|
307 |
+
ppl_plugin_eradicate_form('eradicate', str_replace('-admin', '', __FILE__));
|
308 |
+
}
|
309 |
+
|
310 |
+
function similar_posts_for_feed_options_page(){
|
311 |
+
echo '<div class="wrap"><h2>';
|
312 |
+
_e('Similar Posts Feed ', 'similar_posts');
|
313 |
+
echo '<a href="http://rmarsh.com/plugins/post-options/" style="font-size: 0.8em;">';
|
314 |
+
_e('help and instructions');
|
315 |
+
echo '</a></h2></div>';
|
316 |
+
$m = new admin_subpages();
|
317 |
+
$m->add_subpage('General', 'general', 'similar_posts_feed_general_options_subpage');
|
318 |
+
$m->add_subpage('Output', 'output', 'similar_posts_feed_output_options_subpage');
|
319 |
+
$m->add_subpage('Filter', 'filter', 'similar_posts_feed_filter_options_subpage');
|
320 |
+
$m->add_subpage('Other', 'other', 'similar_posts_feed_other_options_subpage');
|
321 |
+
$m->add_subpage('Report a Bug', 'bug', 'similar_posts_feed_bug_subpage');
|
322 |
+
$m->add_subpage('Remove this Plugin', 'remove', 'similar_posts_feed_remove_subpage');
|
323 |
+
$m->display();
|
324 |
+
}
|
325 |
+
|
326 |
+
function similar_posts_feed_general_options_subpage(){
|
327 |
+
global $wpdb, $wp_version;
|
328 |
+
$options = get_option('similar-posts-feed');
|
329 |
+
if (isset($_POST['update_options'])) {
|
330 |
+
check_admin_referer('similar-posts-feed-update-options');
|
331 |
+
if (defined('POC_CACHE_4')) poc_cache_flush();
|
332 |
+
// Fill up the options with the values chosen...
|
333 |
+
$options = ppl_options_from_post($options, array('limit', 'skip', 'show_private', 'show_pages', 'show_attachments', 'status', 'age', 'omit_current_post', 'match_cat', 'match_tags', 'match_author'));
|
334 |
+
update_option('similar-posts-feed', $options);
|
335 |
+
// Show a message to say we've done something
|
336 |
+
echo '<div class="updated fade"><p>' . __('Options saved', 'similar_posts') . '</p></div>';
|
337 |
+
}
|
338 |
+
//now we drop into html to display the option page form
|
339 |
+
?>
|
340 |
+
<div class="wrap">
|
341 |
+
<h2><?php _e('General Settings', 'similar_posts'); ?></h2>
|
342 |
+
<form method="post" action="">
|
343 |
+
<div class="submit"><input type="submit" name="update_options" value="<?php _e('Save General Settings', 'similar_posts') ?>" /></div>
|
344 |
+
<table class="optiontable form-table">
|
345 |
+
<?php
|
346 |
+
ppl_display_limit($options['limit']);
|
347 |
+
ppl_display_skip($options['skip']);
|
348 |
+
ppl_display_show_private($options['show_private']);
|
349 |
+
ppl_display_show_pages($options['show_pages']);
|
350 |
+
ppl_display_show_attachments($options['show_attachments']);
|
351 |
+
ppl_display_status($options['status']);
|
352 |
+
ppl_display_age($options['age']);
|
353 |
+
ppl_display_omit_current_post($options['omit_current_post']);
|
354 |
+
ppl_display_match_cat($options['match_cat']);
|
355 |
+
ppl_display_match_tags($options['match_tags']);
|
356 |
+
ppl_display_match_author($options['match_author']);
|
357 |
+
?>
|
358 |
+
</table>
|
359 |
+
<div class="submit"><input type="submit" name="update_options" value="<?php _e('Save General Settings', 'similar_posts') ?>" /></div>
|
360 |
+
<?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-feed-update-options'); ?>
|
361 |
+
</form>
|
362 |
+
</div>
|
363 |
+
<?php
|
364 |
+
}
|
365 |
+
|
366 |
+
function similar_posts_feed_output_options_subpage(){
|
367 |
+
global $wpdb, $wp_version;
|
368 |
+
$options = get_option('similar-posts-feed');
|
369 |
+
if (isset($_POST['update_options'])) {
|
370 |
+
check_admin_referer('similar-posts-feed-update-options');
|
371 |
+
if (defined('POC_CACHE_4')) poc_cache_flush();
|
372 |
+
// Fill up the options with the values chosen...
|
373 |
+
$options = ppl_options_from_post($options, array('output_template', 'prefix', 'suffix', 'none_text', 'no_text', 'divider', 'sort', 'group_template'));
|
374 |
+
update_option('similar-posts-feed', $options);
|
375 |
+
// Show a message to say we've done something
|
376 |
+
echo '<div class="updated fade"><p>' . __('Options saved', 'similar_posts') . '</p></div>';
|
377 |
+
}
|
378 |
+
//now we drop into html to display the option page form
|
379 |
+
?>
|
380 |
+
<div class="wrap">
|
381 |
+
<h2><?php _e('Output Settings', 'similar_posts'); ?></h2>
|
382 |
+
<form method="post" action="">
|
383 |
+
<div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Output Settings', 'similar_posts') ?>" /></div>
|
384 |
+
<table class="optiontable form-table">
|
385 |
+
<tr>
|
386 |
+
<td>
|
387 |
+
<table>
|
388 |
+
<?php
|
389 |
+
ppl_display_output_template($options['output_template']);
|
390 |
+
ppl_display_prefix($options['prefix']);
|
391 |
+
ppl_display_suffix($options['suffix']);
|
392 |
+
ppl_display_none_text($options['none_text']);
|
393 |
+
ppl_display_no_text($options['no_text']);
|
394 |
+
ppl_display_divider($options['divider']);
|
395 |
+
ppl_display_sort($options['sort']);
|
396 |
+
ppl_display_group_template($options['group_template']);
|
397 |
+
?>
|
398 |
+
</table>
|
399 |
+
</td>
|
400 |
+
<td>
|
401 |
+
<?php ppl_display_available_tags('similar-posts'); ?>
|
402 |
+
</td></tr>
|
403 |
+
</table>
|
404 |
+
<div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Output Settings', 'similar_posts') ?>" /></div>
|
405 |
+
<?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-feed-update-options'); ?>
|
406 |
+
</form>
|
407 |
+
</div>
|
408 |
+
<?php
|
409 |
+
}
|
410 |
+
|
411 |
+
function similar_posts_feed_filter_options_subpage(){
|
412 |
+
global $wpdb, $wp_version;
|
413 |
+
$options = get_option('similar-posts-feed');
|
414 |
+
if (isset($_POST['update_options'])) {
|
415 |
+
check_admin_referer('similar-posts-feed-update-options');
|
416 |
+
if (defined('POC_CACHE_4')) poc_cache_flush();
|
417 |
+
// Fill up the options with the values chosen...
|
418 |
+
$options = ppl_options_from_post($options, array('excluded_posts', 'included_posts', 'excluded_authors', 'included_authors', 'excluded_cats', 'included_cats', 'tag_str', 'custom'));
|
419 |
+
update_option('similar-posts-feed', $options);
|
420 |
+
// Show a message to say we've done something
|
421 |
+
echo '<div class="updated fade"><p>' . __('Options saved', 'similar_posts') . '</p></div>';
|
422 |
+
}
|
423 |
+
//now we drop into html to display the option page form
|
424 |
+
?>
|
425 |
+
<div class="wrap">
|
426 |
+
<h2><?php _e('Filter Settings', 'similar_posts'); ?></h2>
|
427 |
+
<form method="post" action="">
|
428 |
+
<div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Filter Settings', 'similar_posts') ?>" /></div>
|
429 |
+
<table class="optiontable form-table">
|
430 |
+
<?php
|
431 |
+
ppl_display_excluded_posts($options['excluded_posts']);
|
432 |
+
ppl_display_included_posts($options['included_posts']);
|
433 |
+
ppl_display_authors($options['excluded_authors'], $options['included_authors']);
|
434 |
+
ppl_display_cats($options['excluded_cats'], $options['included_cats']);
|
435 |
+
ppl_display_tag_str($options['tag_str']);
|
436 |
+
ppl_display_custom($options['custom']);
|
437 |
+
?>
|
438 |
+
</table>
|
439 |
+
<div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Filter Settings', 'similar_posts') ?>" /></div>
|
440 |
+
<?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-feed-update-options'); ?>
|
441 |
+
</form>
|
442 |
+
</div>
|
443 |
+
<?php
|
444 |
+
}
|
445 |
+
|
446 |
+
function similar_posts_feed_other_options_subpage(){
|
447 |
+
global $wpdb, $wp_version;
|
448 |
+
$options = get_option('similar-posts-feed');
|
449 |
+
if (isset($_POST['update_options'])) {
|
450 |
+
check_admin_referer('similar-posts-feed-update-options');
|
451 |
+
if (defined('POC_CACHE_4')) poc_cache_flush();
|
452 |
+
// Fill up the options with the values chosen...
|
453 |
+
$options = ppl_options_from_post($options, array('stripcodes', 'term_extraction', 'num_terms', 'weight_title', 'weight_content', 'weight_tags', 'hand_links'));
|
454 |
+
$wcontent = $options['weight_content'] + 0.0001;
|
455 |
+
$wtitle = $options['weight_title'] + 0.0001;
|
456 |
+
$wtags = $options['weight_tags'] + 0.0001;
|
457 |
+
$wcombined = $wcontent + $wtitle + $wtags;
|
458 |
+
$options['weight_content'] = $wcontent / $wcombined;
|
459 |
+
$options['weight_title'] = $wtitle / $wcombined;
|
460 |
+
$options['weight_tags'] = $wtags / $wcombined;
|
461 |
+
update_option('similar-posts-feed', $options);
|
462 |
+
// Show a message to say we've done something
|
463 |
+
echo '<div class="updated fade"><p>' . __('Options saved', 'similar_posts') . '</p></div>';
|
464 |
+
}
|
465 |
+
//now we drop into html to display the option page form
|
466 |
+
?>
|
467 |
+
<div class="wrap">
|
468 |
+
<h2><?php _e('Other Settings', 'similar_posts'); ?></h2>
|
469 |
+
<form method="post" action="">
|
470 |
+
<div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Other Settings', 'similar_posts') ?>" /></div>
|
471 |
+
<table class="optiontable form-table">
|
472 |
+
<?php
|
473 |
+
ppl_display_weights($options);
|
474 |
+
ppl_display_num_terms($options['num_terms']);
|
475 |
+
ppl_display_term_extraction($options['term_extraction']);
|
476 |
+
ppl_display_hand_links($options['hand_links']);
|
477 |
+
ppl_display_stripcodes($options['stripcodes']);
|
478 |
+
?>
|
479 |
+
</table>
|
480 |
+
<div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Other Settings', 'similar_posts') ?>" /></div>
|
481 |
+
<?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-feed-update-options'); ?>
|
482 |
+
</form>
|
483 |
+
</div>
|
484 |
+
<?php
|
485 |
+
}
|
486 |
+
|
487 |
+
function similar_posts_feed_bug_subpage(){
|
488 |
+
ppl_bug_form('similar-posts-feed');
|
489 |
+
}
|
490 |
+
|
491 |
+
function similar_posts_feed_remove_subpage(){
|
492 |
+
function eradicate() {
|
493 |
+
global $wpdb, $table_prefix;
|
494 |
+
delete_option('similar-posts');
|
495 |
+
delete_option('similar-posts-feed');
|
496 |
+
$table_name = $table_prefix . 'similar_posts_feed';
|
497 |
+
$wpdb->query("DROP TABLE `$table_name`");
|
498 |
+
}
|
499 |
+
ppl_plugin_eradicate_form('eradicate', str_replace('-admin', '', __FILE__));
|
500 |
+
}
|
501 |
+
|
502 |
+
// sets up the index for the blog
|
503 |
+
function save_index_entries ($utf8=false, $use_stemmer=false, $batch=100, $cjk=false) {
|
504 |
+
global $wpdb, $table_prefix;
|
505 |
+
//$t0 = microtime(true);
|
506 |
+
$table_name = $table_prefix.'similar_posts';
|
507 |
+
$wpdb->query("TRUNCATE `$table_name`");
|
508 |
+
$termcount = 0;
|
509 |
+
$start = 0;
|
510 |
+
// in batches to conserve memory
|
511 |
+
while ($posts = $wpdb->get_results("SELECT `ID`, `post_title`, `post_content`, `post_type` FROM $wpdb->posts LIMIT $start, $batch", ARRAY_A)) {
|
512 |
+
reset($posts);
|
513 |
+
while (list($dummy, $post) = each($posts)) {
|
514 |
+
if ($post['post_type'] === 'revision') continue;
|
515 |
+
$content = sp_get_post_terms($post['post_content'], $utf8, $use_stemmer, $cjk);
|
516 |
+
$title = sp_get_title_terms($post['post_title'], $utf8, $use_stemmer, $cjk);
|
517 |
+
$postID = $post['ID'];
|
518 |
+
$tags = sp_get_tag_terms($postID, $utf8);
|
519 |
+
$wpdb->query("INSERT INTO `$table_name` (pID, content, title, tags) VALUES ($postID, \"$content\", \"$title\", \"$tags\")");
|
520 |
+
$termcount = $termcount + 1;
|
521 |
+
}
|
522 |
+
$start += $batch;
|
523 |
+
if (!ini_get('safe_mode')) set_time_limit(30);
|
524 |
+
}
|
525 |
+
unset($posts);
|
526 |
+
//$t = microtime(true) - $t0; echo "t = $t<br>";
|
527 |
+
return $termcount;
|
528 |
+
}
|
529 |
+
|
530 |
+
// this function gets called when the plugin is installed to set up the index and default options
|
531 |
+
function similar_posts_install() {
|
532 |
+
global $wpdb, $table_prefix;
|
533 |
+
|
534 |
+
$table_name = $table_prefix . 'similar_posts';
|
535 |
+
$errorlevel = error_reporting(0);
|
536 |
+
$suppress = $wpdb->hide_errors();
|
537 |
+
$sql = "CREATE TABLE IF NOT EXISTS `$table_name` (
|
538 |
+
`pID` bigint( 20 ) unsigned NOT NULL ,
|
539 |
+
`content` longtext NOT NULL ,
|
540 |
+
`title` text NOT NULL ,
|
541 |
+
`tags` text NOT NULL ,
|
542 |
+
FULLTEXT KEY `title` ( `title` ) ,
|
543 |
+
FULLTEXT KEY `content` ( `content` ) ,
|
544 |
+
FULLTEXT KEY `tags` ( `tags` )
|
545 |
+
) ENGINE = MyISAM CHARSET = utf8;";
|
546 |
+
$wpdb->query($sql);
|
547 |
+
// MySQL before 4.1 doesn't recognise the character set properly, so if there's an error we can try without
|
548 |
+
if ($wpdb->last_error !== '') {
|
549 |
+
$sql = "CREATE TABLE IF NOT EXISTS `$table_name` (
|
550 |
+
`pID` bigint( 20 ) unsigned NOT NULL ,
|
551 |
+
`content` longtext NOT NULL ,
|
552 |
+
`title` text NOT NULL ,
|
553 |
+
`tags` text NOT NULL ,
|
554 |
+
FULLTEXT KEY `title` ( `title` ) ,
|
555 |
+
FULLTEXT KEY `content` ( `content` ) ,
|
556 |
+
FULLTEXT KEY `tags` ( `tags` )
|
557 |
+
) ENGINE = MyISAM;";
|
558 |
+
$wpdb->query($sql);
|
559 |
+
}
|
560 |
+
$options = (array) get_option('similar-posts-feed');
|
561 |
+
// check each of the option values and, if empty, assign a default (doing it this long way
|
562 |
+
// lets us add new options in later versions)
|
563 |
+
if (!isset($options['limit'])) $options['limit'] = 5;
|
564 |
+
if (!isset($options['skip'])) $options['skip'] = 0;
|
565 |
+
if (!isset($options['age'])) {$options['age']['direction'] = 'none'; $options['age']['length'] = '0'; $options['age']['duration'] = 'month';}
|
566 |
+
if (!isset($options['divider'])) $options['divider'] = '';
|
567 |
+
if (!isset($options['omit_current_post'])) $options['omit_current_post'] = 'true';
|
568 |
+
if (!isset($options['show_private'])) $options['show_private'] = 'false';
|
569 |
+
if (!isset($options['show_pages'])) $options['show_pages'] = 'false';
|
570 |
+
if (!isset($options['show_attachments'])) $options['show_attachments'] = 'false';
|
571 |
+
// show_static is now show_pages
|
572 |
+
if ( isset($options['show_static'])) {$options['show_pages'] = $options['show_static']; unset($options['show_static']);};
|
573 |
+
if (!isset($options['none_text'])) $options['none_text'] = __('None Found', 'similar_posts');
|
574 |
+
if (!isset($options['no_text'])) $options['no_text'] = 'false';
|
575 |
+
if (!isset($options['tag_str'])) $options['tag_str'] = '';
|
576 |
+
if (!isset($options['excluded_cats'])) $options['excluded_cats'] = '';
|
577 |
+
if ($options['excluded_cats'] === '9999') $options['excluded_cats'] = '';
|
578 |
+
if (!isset($options['included_cats'])) $options['included_cats'] = '';
|
579 |
+
if ($options['included_cats'] === '9999') $options['included_cats'] = '';
|
580 |
+
if (!isset($options['excluded_authors'])) $options['excluded_authors'] = '';
|
581 |
+
if ($options['excluded_authors'] === '9999') $options['excluded_authors'] = '';
|
582 |
+
if (!isset($options['included_authors'])) $options['included_authors'] = '';
|
583 |
+
if ($options['included_authors'] === '9999') $options['included_authors'] = '';
|
584 |
+
if (!isset($options['included_posts'])) $options['included_posts'] = '';
|
585 |
+
if (!isset($options['excluded_posts'])) $options['excluded_posts'] = '';
|
586 |
+
if ($options['excluded_posts'] === '9999') $options['excluded_posts'] = '';
|
587 |
+
if (!isset($options['stripcodes'])) $options['stripcodes'] = array(array());
|
588 |
+
if (!isset($options['prefix'])) $options['prefix'] = 'Similar Posts:<ul>';
|
589 |
+
if (!isset($options['suffix'])) $options['suffix'] = '</ul>';
|
590 |
+
if (!isset($options['output_template'])) $options['output_template'] = '<li>{link}</li>';
|
591 |
+
if (!isset($options['match_cat'])) $options['match_cat'] = 'false';
|
592 |
+
if (!isset($options['match_tags'])) $options['match_tags'] = 'false';
|
593 |
+
if (!isset($options['match_author'])) $options['match_author'] = 'false';
|
594 |
+
if (!isset($options['custom'])) {$options['custom']['key'] = ''; $options['custom']['op'] = '='; $options['custom']['value'] = '';}
|
595 |
+
if (!isset($options['sort'])) {$options['sort']['by1'] = ''; $options['sort']['order1'] = SORT_ASC; $options['sort']['case1'] = 'false';$options['sort']['by2'] = ''; $options['sort']['order2'] = SORT_ASC; $options['sort']['case2'] = 'false';}
|
596 |
+
if (!isset($options['status'])) {$options['status']['publish'] = 'true'; $options['status']['private'] = 'false'; $options['status']['draft'] = 'false'; $options['status']['future'] = 'false';}
|
597 |
+
if (!isset($options['group_template'])) $options['group_template'] = '';
|
598 |
+
if (!isset($options['weight_content'])) $options['weight_content'] = 0.9;
|
599 |
+
if (!isset($options['weight_title'])) $options['weight_title'] = 0.1;
|
600 |
+
if (!isset($options['weight_tags'])) $options['weight_tags'] = 0.0;
|
601 |
+
if (!isset($options['num_terms'])) $options['num_terms'] = 20;
|
602 |
+
if (!isset($options['term_extraction'])) $options['term_extraction'] = 'frequency';
|
603 |
+
if (!isset($options['hand_links'])) $options['hand_links'] = 'false';
|
604 |
+
update_option('similar-posts-feed', $options);
|
605 |
+
|
606 |
+
$options = (array) get_option('similar-posts');
|
607 |
+
// check each of the option values and, if empty, assign a default (doing it this long way
|
608 |
+
// lets us add new options in later versions)
|
609 |
+
if (!isset($options['feed_active'])) $options['feed_active'] = 'false';
|
610 |
+
if (!isset($options['limit'])) $options['limit'] = 5;
|
611 |
+
if (!isset($options['skip'])) $options['skip'] = 0;
|
612 |
+
if (!isset($options['age'])) {$options['age']['direction'] = 'none'; $options['age']['length'] = '0'; $options['age']['duration'] = 'month';}
|
613 |
+
if (!isset($options['divider'])) $options['divider'] = '';
|
614 |
+
if (!isset($options['omit_current_post'])) $options['omit_current_post'] = 'true';
|
615 |
+
if (!isset($options['show_private'])) $options['show_private'] = 'false';
|
616 |
+
if (!isset($options['show_pages'])) $options['show_pages'] = 'false';
|
617 |
+
if (!isset($options['show_attachments'])) $options['show_attachments'] = 'false';
|
618 |
+
// show_static is now show_pages
|
619 |
+
if ( isset($options['show_static'])) {$options['show_pages'] = $options['show_static']; unset($options['show_static']);};
|
620 |
+
if (!isset($options['none_text'])) $options['none_text'] = __('None Found', 'similar_posts');
|
621 |
+
if (!isset($options['no_text'])) $options['no_text'] = 'false';
|
622 |
+
if (!isset($options['tag_str'])) $options['tag_str'] = '';
|
623 |
+
if (!isset($options['excluded_cats'])) $options['excluded_cats'] = '';
|
624 |
+
if ($options['excluded_cats'] === '9999') $options['excluded_cats'] = '';
|
625 |
+
if (!isset($options['included_cats'])) $options['included_cats'] = '';
|
626 |
+
if ($options['included_cats'] === '9999') $options['included_cats'] = '';
|
627 |
+
if (!isset($options['excluded_authors'])) $options['excluded_authors'] = '';
|
628 |
+
if ($options['excluded_authors'] === '9999') $options['excluded_authors'] = '';
|
629 |
+
if (!isset($options['included_authors'])) $options['included_authors'] = '';
|
630 |
+
if ($options['included_authors'] === '9999') $options['included_authors'] = '';
|
631 |
+
if (!isset($options['included_posts'])) $options['included_posts'] = '';
|
632 |
+
if (!isset($options['excluded_posts'])) $options['excluded_posts'] = '';
|
633 |
+
if ($options['excluded_posts'] === '9999') $options['excluded_posts'] = '';
|
634 |
+
if (!isset($options['stripcodes'])) $options['stripcodes'] = array(array());
|
635 |
+
if (!isset($options['prefix'])) $options['prefix'] = '<ul>';
|
636 |
+
if (!isset($options['suffix'])) $options['suffix'] = '</ul>';
|
637 |
+
if (!isset($options['output_template'])) $options['output_template'] = '<li>{link}</li>';
|
638 |
+
if (!isset($options['match_cat'])) $options['match_cat'] = 'false';
|
639 |
+
if (!isset($options['match_tags'])) $options['match_tags'] = 'false';
|
640 |
+
if (!isset($options['match_author'])) $options['match_author'] = 'false';
|
641 |
+
if (!isset($options['content_filter'])) $options['content_filter'] = 'false';
|
642 |
+
if (!isset($options['custom'])) {$options['custom']['key'] = ''; $options['custom']['op'] = '='; $options['custom']['value'] = '';}
|
643 |
+
if (!isset($options['sort'])) {$options['sort']['by1'] = ''; $options['sort']['order1'] = SORT_ASC; $options['sort']['case1'] = 'false';$options['sort']['by2'] = ''; $options['sort']['order2'] = SORT_ASC; $options['sort']['case2'] = 'false';}
|
644 |
+
if (!isset($options['status'])) {$options['status']['publish'] = 'true'; $options['status']['private'] = 'false'; $options['status']['draft'] = 'false'; $options['status']['future'] = 'false';}
|
645 |
+
if (!isset($options['group_template'])) $options['group_template'] = '';
|
646 |
+
if (!isset($options['weight_content'])) $options['weight_content'] = 0.9;
|
647 |
+
if (!isset($options['weight_title'])) $options['weight_title'] = 0.1;
|
648 |
+
if (!isset($options['weight_tags'])) $options['weight_tags'] = 0.0;
|
649 |
+
if (!isset($options['num_terms'])) $options['num_terms'] = 20;
|
650 |
+
if (!isset($options['term_extraction'])) $options['term_extraction'] = 'frequency';
|
651 |
+
if (!isset($options['hand_links'])) $options['hand_links'] = 'false';
|
652 |
+
if (!isset($options['utf8'])) $options['utf8'] = 'false';
|
653 |
+
if (!function_exists('mb_internal_encoding')) $options['utf8'] = 'false';
|
654 |
+
if (!isset($options['cjk'])) $options['cjk'] = 'false';
|
655 |
+
if (!function_exists('mb_internal_encoding')) $options['cjk'] = 'false';
|
656 |
+
if (!isset($options['use_stemmer'])) $options['use_stemmer'] = 'false';
|
657 |
+
if (!isset($options['batch'])) $options['batch'] = '100';
|
658 |
+
|
659 |
+
update_option('similar-posts', $options);
|
660 |
+
|
661 |
+
// initial creation of the index, if the table is empty
|
662 |
+
$num_index_posts = $wpdb->get_var("SELECT COUNT(*) FROM `$table_name`");
|
663 |
+
if ($num_index_posts == 0) save_index_entries (($options['utf8'] === 'true'), false, $options['batch'], ($options['cjk'] === 'true'));
|
664 |
+
|
665 |
+
// deactivate legacy Similar Posts Feed if present
|
666 |
+
$current = get_option('active_plugins');
|
667 |
+
if (in_array('Similar_Posts_Feed/similar-posts-feed.php', $current)) {
|
668 |
+
array_splice($current, array_search('Similar_Posts_Feed/similar-posts-feed.php', $current), 1);
|
669 |
+
update_option('active_plugins', $current);
|
670 |
+
}
|
671 |
+
unset($current);
|
672 |
+
|
673 |
+
// clear legacy custom fields
|
674 |
+
$wpdb->query("DELETE FROM $wpdb->postmeta WHERE meta_key = 'similarterms'");
|
675 |
+
|
676 |
+
// clear legacy index
|
677 |
+
$indices = $wpdb->get_results("SHOW INDEX FROM $wpdb->posts", ARRAY_A);
|
678 |
+
foreach ($indices as $index) {
|
679 |
+
if ($index['Key_name'] === 'post_similar') {
|
680 |
+
$wpdb->query("ALTER TABLE $wpdb->posts DROP INDEX post_similar");
|
681 |
+
break;
|
682 |
+
}
|
683 |
+
}
|
684 |
+
|
685 |
+
$wpdb->show_errors($suppress);
|
686 |
+
error_reporting($errorlevel);
|
687 |
+
}
|
688 |
+
|
689 |
+
if (!function_exists('ppl_plugin_basename')) {
|
690 |
+
function ppl_plugin_basename($file) {
|
691 |
+
$file = str_replace('\\','/',$file); // sanitize for Win32 installs
|
692 |
+
$file = preg_replace('|/+|','/', $file); // remove any duplicate slash
|
693 |
+
$plugin_dir = str_replace('\\','/',WP_PLUGIN_DIR); // sanitize for Win32 installs
|
694 |
+
$plugin_dir = preg_replace('|/+|','/', $plugin_dir); // remove any duplicate slash
|
695 |
+
$file = preg_replace('|^' . preg_quote($plugin_dir, '|') . '/|','',$file); // get relative path from plugins dir
|
696 |
+
return $file;
|
697 |
+
}
|
698 |
+
}
|
699 |
+
|
700 |
+
add_action('activate_'.str_replace('-admin', '', ppl_plugin_basename(__FILE__)), 'similar_posts_install');
|
701 |
+
|
702 |
+
?>
|
similar-posts.php
ADDED
@@ -0,0 +1,584 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?php
|
2 |
+
/*
|
3 |
+
Plugin Name:Similar Posts
|
4 |
+
Plugin URI: http://rmarsh.com/plugins/similar-posts/
|
5 |
+
Description: Displays a <a href="options-general.php?page=similar-posts.php">highly configurable</a> list of related posts. Similarity can be based on any combination of word usage in the content, title, or tags. Don't be disturbed if it takes a few moments to complete the installation -- the plugin is indexing your posts. <a href="http://rmarsh.com/plugins/post-options/">Instructions and help online</a>. Requires the latest version of the <a href="http://wordpress.org/extend/plugins/post-plugin-library/">Post-Plugin Library</a> to be installed.
|
6 |
+
Version: 2.6.0.0
|
7 |
+
Author: Rob Marsh, SJ
|
8 |
+
Author URI: http://rmarsh.com/
|
9 |
+
*/
|
10 |
+
|
11 |
+
/*
|
12 |
+
Copyright 2008 Rob Marsh, SJ (http://rmarsh.com)
|
13 |
+
|
14 |
+
This program is free software; you can redistribute it and/or modify
|
15 |
+
it under the terms of the GNU General Public License as published by
|
16 |
+
the Free Software Foundation; either version 2 of the License, or
|
17 |
+
(at your option) any later version.
|
18 |
+
|
19 |
+
This program is distributed in the hope that it will be useful,
|
20 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
21 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
22 |
+
GNU General Public License for more details: http://www.gnu.org/licenses/gpl.txt
|
23 |
+
*/
|
24 |
+
|
25 |
+
$similar_posts_version = $similar_posts_feed_version= '2.6.0.0';
|
26 |
+
|
27 |
+
/*
|
28 |
+
Template Tag: Displays the posts most similar to the current post.
|
29 |
+
e.g.: <?php similar_posts(); ?>
|
30 |
+
Full help and instructions at http://rmarsh.com/plugins/post-options/
|
31 |
+
*/
|
32 |
+
|
33 |
+
function similar_posts($args = '') {
|
34 |
+
echo SimilarPosts::execute($args);
|
35 |
+
}
|
36 |
+
|
37 |
+
/*
|
38 |
+
|
39 |
+
'innards'
|
40 |
+
|
41 |
+
*/
|
42 |
+
|
43 |
+
if (!defined('DSEP')) define('DSEP', DIRECTORY_SEPARATOR);
|
44 |
+
if (!defined('POST_PLUGIN_LIBRARY')) SimilarPosts::install_post_plugin_library();
|
45 |
+
|
46 |
+
class SimilarPosts {
|
47 |
+
|
48 |
+
function execute($args='', $default_output_template='<li>{link}</li>', $option_key='similar-posts'){
|
49 |
+
if (!SimilarPosts::check_post_plugin_library('<a href="http://downloads.wordpress.org/plugin/post-plugin-library.zip">'.__('Post-Plugin Library missing').'</a>')) return '';
|
50 |
+
global $table_prefix, $wpdb, $wp_version;
|
51 |
+
$start_time = ppl_microtime();
|
52 |
+
$postid = ppl_current_post_id();
|
53 |
+
if (defined('POC_CACHE_4')) {
|
54 |
+
$cache_key = $option_key.$postid.$args;
|
55 |
+
$result = poc_cache_fetch($cache_key);
|
56 |
+
if ($result !== false) return $result . sprintf("<!-- Similar Posts took %.3f ms (cached) -->", 1000 * (ppl_microtime() - $start_time));
|
57 |
+
}
|
58 |
+
$table_name = $table_prefix . 'similar_posts';
|
59 |
+
// First we process any arguments to see if any defaults have been overridden
|
60 |
+
$options = ppl_parse_args($args);
|
61 |
+
// Next we retrieve the stored options and use them unless a value has been overridden via the arguments
|
62 |
+
$options = ppl_set_options($option_key, $options, $default_output_template);
|
63 |
+
if (0 < $options['limit']) {
|
64 |
+
$match_tags = ($options['match_tags'] !== 'false' && $wp_version >= 2.3);
|
65 |
+
$exclude_cats = ($options['excluded_cats'] !== '');
|
66 |
+
$include_cats = ($options['included_cats'] !== '');
|
67 |
+
$exclude_authors = ($options['excluded_authors'] !== '');
|
68 |
+
$include_authors = ($options['included_authors'] !== '');
|
69 |
+
$exclude_posts = (trim($options['excluded_posts']) !== '');
|
70 |
+
$include_posts = (trim($options['included_posts']) !== '');
|
71 |
+
$match_category = ($options['match_cat'] === 'true');
|
72 |
+
$match_author = ($options['match_author'] === 'true');
|
73 |
+
$use_tag_str = ('' != trim($options['tag_str']) && $wp_version >= 2.3);
|
74 |
+
$omit_current_post = ($options['omit_current_post'] !== 'false');
|
75 |
+
$hide_pass = ($options['show_private'] === 'false');
|
76 |
+
$check_age = ('none' !== $options['age']['direction']);
|
77 |
+
$check_custom = (trim($options['custom']['key']) !== '');
|
78 |
+
$limit = $options['skip'].', '.$options['limit'];
|
79 |
+
|
80 |
+
//get the terms to do the matching
|
81 |
+
if ($options['term_extraction'] === 'pagerank') {
|
82 |
+
list( $contentterms, $titleterms, $tagterms) = sp_terms_by_textrank($postid, $options['num_terms']);
|
83 |
+
} else {
|
84 |
+
list( $contentterms, $titleterms, $tagterms) = sp_terms_by_freq($postid, $options['num_terms']);
|
85 |
+
}
|
86 |
+
// these should add up to 1.0
|
87 |
+
$weight_content = $options['weight_content'];
|
88 |
+
$weight_title = $options['weight_title'];
|
89 |
+
$weight_tags = $options['weight_tags'];
|
90 |
+
// below a threshold we ignore the weight completely and save some effort
|
91 |
+
if ($weight_content < 0.001) $weight_content = (int) 0;
|
92 |
+
if ($weight_title < 0.001) $weight_title = (int) 0;
|
93 |
+
if ($weight_tags < 0.001) $weight_tags = (int) 0;
|
94 |
+
|
95 |
+
$count_content = substr_count($contentterms, ' ') + 1;
|
96 |
+
$count_title = substr_count($titleterms, ' ') + 1;
|
97 |
+
$count_tags = substr_count($tagterms, ' ') + 1;
|
98 |
+
if ($weight_content) $weight_content = 57.0 * $weight_content / $count_content;
|
99 |
+
if ($weight_title) $weight_title = 18.0 * $weight_title / $count_title;
|
100 |
+
if ($weight_tags) $weight_tags = 24.0 * $weight_tags / $count_tags;
|
101 |
+
if ($options['hand_links'] === 'true') {
|
102 |
+
// check custom field for manual links
|
103 |
+
$forced_ids = $wpdb->get_var("SELECT meta_value FROM $wpdb->postmeta WHERE post_id = $postid AND meta_key = 'sp_similar' ") ;
|
104 |
+
}
|
105 |
+
// the workhorse...
|
106 |
+
$sql = "SELECT *, ";
|
107 |
+
$sql .= score_fulltext_match($table_name, $weight_title, $titleterms, $weight_content, $contentterms, $weight_tags, $tagterms, $forced_ids);
|
108 |
+
|
109 |
+
if ($check_custom) $sql .= "LEFT JOIN $wpdb->postmeta ON post_id = ID ";
|
110 |
+
|
111 |
+
// build the 'WHERE' clause
|
112 |
+
$where = array();
|
113 |
+
$where[] = where_fulltext_match($weight_title, $titleterms, $weight_content, $contentterms, $weight_tags, $tagterms);
|
114 |
+
if (!function_exists('get_post_type')) {
|
115 |
+
$where[] = where_hide_future();
|
116 |
+
} else {
|
117 |
+
$where[] = where_show_status($options['status'], $options['show_attachments']);
|
118 |
+
}
|
119 |
+
if ($match_category) $where[] = where_match_category();
|
120 |
+
if ($match_tags) $where[] = where_match_tags($options['match_tags']);
|
121 |
+
if ($match_author) $where[] = where_match_author();
|
122 |
+
$where[] = where_show_pages($options['show_pages'], $options['show_attachments']);
|
123 |
+
if ($include_cats) $where[] = where_included_cats($options['included_cats']);
|
124 |
+
if ($exclude_cats) $where[] = where_excluded_cats($options['excluded_cats']);
|
125 |
+
if ($exclude_authors) $where[] = where_excluded_authors($options['excluded_authors']);
|
126 |
+
if ($include_authors) $where[] = where_included_authors($options['included_authors']);
|
127 |
+
if ($exclude_posts) $where[] = where_excluded_posts(trim($options['excluded_posts']));
|
128 |
+
if ($include_posts) $where[] = where_included_posts(trim($options['included_posts']));
|
129 |
+
if ($use_tag_str) $where[] = where_tag_str($options['tag_str']);
|
130 |
+
if ($omit_current_post) $where[] = where_omit_post();
|
131 |
+
if ($hide_pass) $where[] = where_hide_pass();
|
132 |
+
if ($check_age) $where[] = where_check_age($options['age']['direction'], $options['age']['length'], $options['age']['duration']);
|
133 |
+
if ($check_custom) $where[] = where_check_custom($options['custom']['key'], $options['custom']['op'], $options['custom']['value']);
|
134 |
+
$sql .= "WHERE ".implode(' AND ', $where);
|
135 |
+
if ($check_custom) $sql .= " GROUP BY $wpdb->posts.ID";
|
136 |
+
$sql .= " ORDER BY score DESC LIMIT $limit";
|
137 |
+
$results = $wpdb->get_results($sql);
|
138 |
+
} else {
|
139 |
+
$results = false;
|
140 |
+
}
|
141 |
+
if ($results) {
|
142 |
+
$translations = ppl_prepare_template($options['output_template']);
|
143 |
+
foreach ($results as $result) {
|
144 |
+
$items[] = ppl_expand_template($result, $options['output_template'], $translations, $option_key);
|
145 |
+
}
|
146 |
+
if ($options['sort']['by1'] !== '') $items = ppl_sort_items($options['sort'], $results, $option_key, $options['group_template'], $items);
|
147 |
+
$output = implode(($options['divider']) ? $options['divider'] : "\n", $items);
|
148 |
+
$output = $options['prefix'] . $output . $options['suffix'];
|
149 |
+
} else {
|
150 |
+
// if we reach here our query has produced no output ... so what next?
|
151 |
+
if ($options['no_text'] !== 'false') {
|
152 |
+
$output = ''; // we display nothing at all
|
153 |
+
} else {
|
154 |
+
// we display the blank message, with tags expanded if necessary
|
155 |
+
$translations = ppl_prepare_template($options['none_text']);
|
156 |
+
$output = $options['prefix'] . ppl_expand_template(array(), $options['none_text'], $translations, $option_key) . $options['suffix'];
|
157 |
+
}
|
158 |
+
}
|
159 |
+
if (defined('POC_CACHE_4')) poc_cache_store($cache_key, $output);
|
160 |
+
return $output . sprintf("<!-- Similar Posts took %.3f ms -->", 1000 * (ppl_microtime() - $start_time));
|
161 |
+
}
|
162 |
+
|
163 |
+
// tries to install the post-plugin-library plugin
|
164 |
+
function install_post_plugin_library() {
|
165 |
+
$plugin_path = 'post-plugin-library/post-plugin-library.php';
|
166 |
+
$current = get_option('active_plugins');
|
167 |
+
if (!in_array($plugin_path, $current)) {
|
168 |
+
$current[] = $plugin_path;
|
169 |
+
update_option('active_plugins', $current);
|
170 |
+
do_action('activate_'.$plugin_path);
|
171 |
+
}
|
172 |
+
}
|
173 |
+
|
174 |
+
function check_post_plugin_library($msg) {
|
175 |
+
$exists = function_exists('ppl_microtime');
|
176 |
+
if (!$exists) echo $msg;
|
177 |
+
return $exists;
|
178 |
+
}
|
179 |
+
|
180 |
+
}
|
181 |
+
|
182 |
+
function sp_terms_by_freq($ID, $num_terms = 20) {
|
183 |
+
if (!$ID) return array('', '', '');
|
184 |
+
global $wpdb, $table_prefix;
|
185 |
+
$table_name = $table_prefix . 'similar_posts';
|
186 |
+
$terms = '';
|
187 |
+
$results = $wpdb->get_results("SELECT title, content, tags FROM $table_name WHERE pID=$ID LIMIT 1", ARRAY_A);
|
188 |
+
if ($results) {
|
189 |
+
$word = strtok($results[0]['content'], ' ');
|
190 |
+
$n = 0;
|
191 |
+
$wordtable = array();
|
192 |
+
while ($word !== false) {
|
193 |
+
$wordtable[$word] += 1;
|
194 |
+
$word = strtok(' ');
|
195 |
+
}
|
196 |
+
arsort($wordtable);
|
197 |
+
if ($num_terms < 1) $num_terms = 1;
|
198 |
+
$wordtable = array_slice($wordtable, 0, $num_terms);
|
199 |
+
|
200 |
+
foreach ($wordtable as $word => $count) {
|
201 |
+
$terms .= ' ' . $word;
|
202 |
+
}
|
203 |
+
|
204 |
+
$res[] = $terms;
|
205 |
+
$res[] = $results[0]['title'];
|
206 |
+
$res[] = $results[0]['tags'];
|
207 |
+
}
|
208 |
+
return $res;
|
209 |
+
}
|
210 |
+
|
211 |
+
|
212 |
+
// adapted PageRank algorithm see http://www.cs.unt.edu/~rada/papers/mihalcea.emnlp04.pdf
|
213 |
+
// and the weighted version http://www.cs.unt.edu/~rada/papers/hassan.ieee07.pdf
|
214 |
+
function sp_terms_by_textrank($ID, $num_terms = 20) {
|
215 |
+
global $wpdb, $table_prefix;
|
216 |
+
$table_name = $table_prefix . 'similar_posts';
|
217 |
+
$terms = '';
|
218 |
+
$results = $wpdb->get_results("SELECT title, content, tags FROM $table_name WHERE pID=$ID LIMIT 1", ARRAY_A);
|
219 |
+
if ($results) {
|
220 |
+
// build a directed graph with words as vertices and, as edges, the words which precede them
|
221 |
+
$prev_word = 'aaaaa';
|
222 |
+
$graph = array();
|
223 |
+
$word = strtok($results[0]['content'], ' ');
|
224 |
+
while ($word !== false) {
|
225 |
+
$graph[$word][$prev_word] += 1; // list the incoming words and keep a tally of how many times words co-occur
|
226 |
+
$out_edges[$prev_word] += 1; // count the number of different words that follow each word
|
227 |
+
$prev_word = $word;
|
228 |
+
$word = strtok(' ');
|
229 |
+
}
|
230 |
+
// initialise the list of PageRanks-- one for each unique word
|
231 |
+
reset($graph);
|
232 |
+
while (list($vertex, $in_edges) = each($graph)) {
|
233 |
+
$oldrank[$vertex] = 0.25;
|
234 |
+
}
|
235 |
+
$n = count($graph);
|
236 |
+
$base = 0.15 / $n;
|
237 |
+
$error_margin = $n * 0.005;
|
238 |
+
do {
|
239 |
+
$error = 0.0;
|
240 |
+
// the edge-weighted PageRank calculation
|
241 |
+
reset($graph);
|
242 |
+
while (list($vertex, $in_edges) = each($graph)) {
|
243 |
+
$r = 0;
|
244 |
+
reset($in_edges);
|
245 |
+
while (list($edge, $weight) = each($in_edges)) {
|
246 |
+
$r += ($weight * $oldrank[$edge]) / $out_edges[$edge];
|
247 |
+
}
|
248 |
+
$rank[$vertex] = $base + 0.95 * $r;
|
249 |
+
$error += abs($rank[$vertex] - $oldrank[$vertex]);
|
250 |
+
}
|
251 |
+
$oldrank = $rank;
|
252 |
+
//echo $error . '<br>';
|
253 |
+
} while ($error > $error_margin);
|
254 |
+
arsort($rank);
|
255 |
+
if ($num_terms < 1) $num_terms = 1;
|
256 |
+
$rank = array_slice($rank, 0, $num_terms);
|
257 |
+
foreach ($rank as $vertex => $score) {
|
258 |
+
$terms .= ' ' . $vertex;
|
259 |
+
}
|
260 |
+
$res[] = $terms;
|
261 |
+
$res[] = $results[0]['title'];
|
262 |
+
$res[] = $results[0]['tags'];
|
263 |
+
}
|
264 |
+
return $res;
|
265 |
+
}
|
266 |
+
|
267 |
+
// do not try and use this function directly -- it is automatically installed when the option is set to show similar posts in feeds
|
268 |
+
function similar_posts_for_feed($content) {
|
269 |
+
return (is_feed()) ? $content . SimilarPosts::execute('', '<li>{link}</li>', 'similar-posts-feed') : $content;
|
270 |
+
}
|
271 |
+
|
272 |
+
function sp_save_index_entry($postID) {
|
273 |
+
global $wpdb, $table_prefix;
|
274 |
+
$table_name = $table_prefix . 'similar_posts';
|
275 |
+
$post = $wpdb->get_row("SELECT post_content, post_title, post_type FROM $wpdb->posts WHERE ID = $postID", ARRAY_A);
|
276 |
+
if ($post['post_type'] === 'revision') return $postid;
|
277 |
+
//extract its terms
|
278 |
+
$options = get_option('similar-posts');
|
279 |
+
$utf8 = ($options['utf8'] === 'true');
|
280 |
+
$cjk = ($options['cjk'] === 'true');
|
281 |
+
$use_stemmer = ($options['use_stemmer'] === 'true');
|
282 |
+
$content = sp_get_post_terms($post['post_content'], $utf8, $use_stemmer, $cjk);
|
283 |
+
$title = sp_get_title_terms($post['post_title'], $utf8, $use_stemmer, $cjk);
|
284 |
+
$tags = sp_get_tag_terms($postID, $utf8);
|
285 |
+
//check to see if the field is set
|
286 |
+
$pid = $wpdb->get_var("SELECT pID FROM $table_name WHERE pID=$postID limit 1");
|
287 |
+
//then insert if empty
|
288 |
+
if (is_null($pid)) {
|
289 |
+
$wpdb->query("INSERT INTO $table_name (pID, content, title, tags) VALUES ($postID, \"$content\", \"$title\", \"$tags\")");
|
290 |
+
} else {
|
291 |
+
$wpdb->query("UPDATE $table_name SET content=\"$content\", title=\"$title\", tags=\"$tags\" WHERE pID=$postID" );
|
292 |
+
}
|
293 |
+
return $postID;
|
294 |
+
}
|
295 |
+
|
296 |
+
function sp_delete_index_entry($postID) {
|
297 |
+
global $wpdb, $table_prefix;
|
298 |
+
$table_name = $table_prefix . 'similar_posts';
|
299 |
+
$wpdb->query("DELETE FROM $table_name WHERE pID = $postID ");
|
300 |
+
return $postID;
|
301 |
+
}
|
302 |
+
|
303 |
+
function sp_clean_words($text) {
|
304 |
+
$text = strip_tags($text);
|
305 |
+
$text = strtolower($text);
|
306 |
+
$text = str_replace("’", "'", $text); // convert MSWord apostrophe
|
307 |
+
$text = preg_replace(array('/\[(.*?)\]/', '/&[^\s;]+;/', '/‘|’|—|“|”|–|…/', "/'\W/"), ' ', $text); //anything in [..] or any entities or MS Word droppings
|
308 |
+
return $text;
|
309 |
+
}
|
310 |
+
|
311 |
+
function sp_mb_clean_words($text) {
|
312 |
+
mb_regex_encoding('UTF-8');
|
313 |
+
mb_internal_encoding('UTF-8');
|
314 |
+
$text = strip_tags($text);
|
315 |
+
$text = mb_strtolower($text);
|
316 |
+
$text = str_replace("’", "'", $text); // convert MSWord apostrophe
|
317 |
+
$text = preg_replace(array('/\[(.*?)\]/u', '/&[^\s;]+;/u', '/‘|’|—|“|”|–|…/u', "/'\W/u"), ' ', $text); //anything in [..] or any entities
|
318 |
+
return $text;
|
319 |
+
}
|
320 |
+
|
321 |
+
function sp_mb_str_pad($text, $n, $c) {
|
322 |
+
mb_internal_encoding('UTF-8');
|
323 |
+
$l = mb_strlen($text);
|
324 |
+
if ($l > 0 && $l < $n) {
|
325 |
+
$text .= str_repeat($c, $n-$l);
|
326 |
+
}
|
327 |
+
return $text;
|
328 |
+
}
|
329 |
+
|
330 |
+
function sp_cjk_digrams($string) {
|
331 |
+
mb_internal_encoding("UTF-8");
|
332 |
+
$strlen = mb_strlen($string);
|
333 |
+
$ascii = '';
|
334 |
+
$prev = '';
|
335 |
+
$result = array();
|
336 |
+
for ($i = 0; $i < $strlen; $i++) {
|
337 |
+
$c = mb_substr($string, $i, 1);
|
338 |
+
// single-byte chars get combined
|
339 |
+
if (strlen($c) > 1) {
|
340 |
+
if ($ascii) {
|
341 |
+
$result[] = $ascii;
|
342 |
+
$ascii = '';
|
343 |
+
$prev = $c;
|
344 |
+
} else {
|
345 |
+
$result[] = sp_mb_str_pad($prev.$c, 4, '_');
|
346 |
+
$prev = $c;
|
347 |
+
}
|
348 |
+
} else {
|
349 |
+
$ascii .= $c;
|
350 |
+
}
|
351 |
+
}
|
352 |
+
if ($ascii) $result[] = $ascii;
|
353 |
+
return implode(' ', $result);
|
354 |
+
}
|
355 |
+
|
356 |
+
function sp_get_post_terms($text, $utf8, $use_stemmer, $cjk) {
|
357 |
+
global $overusedwords;
|
358 |
+
if ($utf8) {
|
359 |
+
if ($use_stemmer) {
|
360 |
+
mb_regex_encoding('UTF-8');
|
361 |
+
mb_internal_encoding('UTF-8');
|
362 |
+
$wordlist = mb_split("\W+", sp_mb_clean_words($text));
|
363 |
+
$words = '';
|
364 |
+
reset($wordlist);
|
365 |
+
while (list($n, $word) = each($wordlist)) {
|
366 |
+
if ( mb_strlen($word) > 3) {
|
367 |
+
$stem = sp_mb_str_pad(stem($word), 4, '_');
|
368 |
+
if (!isset($overusedwords[$stem])) {
|
369 |
+
$words .= $stem . ' ';
|
370 |
+
}
|
371 |
+
}
|
372 |
+
}
|
373 |
+
} else {
|
374 |
+
mb_regex_encoding('UTF-8');
|
375 |
+
mb_internal_encoding('UTF-8');
|
376 |
+
$wordlist = mb_split("\W+", sp_mb_clean_words($text));
|
377 |
+
$words = '';
|
378 |
+
reset($wordlist);
|
379 |
+
while (list($n, $word) = each($wordlist)) {
|
380 |
+
if ( mb_strlen($word) > 3 && !isset($overusedwords[$word])) {
|
381 |
+
$words .= $word . ' ';
|
382 |
+
}
|
383 |
+
}
|
384 |
+
}
|
385 |
+
} else {
|
386 |
+
if ($use_stemmer) {
|
387 |
+
$wordlist = str_word_count(sp_clean_words($text), 1);
|
388 |
+
$words = '';
|
389 |
+
reset($wordlist);
|
390 |
+
while (list($n, $word) = each($wordlist)) {
|
391 |
+
if ( strlen($word) > 3) {
|
392 |
+
$stem = str_pad(stem($word), 4, '_');
|
393 |
+
if (!isset($overusedwords[$stem])) {
|
394 |
+
$words .= $stem . ' ';
|
395 |
+
}
|
396 |
+
}
|
397 |
+
}
|
398 |
+
} else {
|
399 |
+
$wordlist = str_word_count(sp_clean_words($text), 1);
|
400 |
+
$words = '';
|
401 |
+
reset($wordlist);
|
402 |
+
while (list($n, $word) = each($wordlist)) {
|
403 |
+
if (strlen($word) > 3 && !isset($overusedwords[$word])) {
|
404 |
+
$words .= $word . ' ';
|
405 |
+
}
|
406 |
+
}
|
407 |
+
}
|
408 |
+
}
|
409 |
+
if ($cjk) $words = sp_cjk_digrams($words);
|
410 |
+
return $words;
|
411 |
+
}
|
412 |
+
|
413 |
+
$tinywords = array('the' => 1, 'and' => 1, 'of' => 1, 'a' => 1, 'for' => 1, 'on' => 1);
|
414 |
+
|
415 |
+
function sp_get_title_terms($text, $utf8, $use_stemmer, $cjk) {
|
416 |
+
global $tinywords;
|
417 |
+
if ($utf8) {
|
418 |
+
if ($use_stemmer) {
|
419 |
+
mb_regex_encoding('UTF-8');
|
420 |
+
mb_internal_encoding('UTF-8');
|
421 |
+
$wordlist = mb_split("\W+", sp_mb_clean_words($text));
|
422 |
+
$words = '';
|
423 |
+
foreach ($wordlist as $word) {
|
424 |
+
if (!isset($tinywords[$word])) {
|
425 |
+
$words .= sp_mb_str_pad(stem($word), 4, '_') . ' ';
|
426 |
+
}
|
427 |
+
}
|
428 |
+
} else {
|
429 |
+
mb_regex_encoding('UTF-8');
|
430 |
+
mb_internal_encoding('UTF-8');
|
431 |
+
$wordlist = mb_split("\W+", sp_mb_clean_words($text));
|
432 |
+
$words = '';
|
433 |
+
foreach ($wordlist as $word) {
|
434 |
+
if (!isset($tinywords[$word])) {
|
435 |
+
$words .= sp_mb_str_pad($word, 4, '_') . ' ';
|
436 |
+
}
|
437 |
+
}
|
438 |
+
}
|
439 |
+
} else {
|
440 |
+
if ($use_stemmer) {
|
441 |
+
$wordlist = str_word_count(sp_clean_words($text), 1);
|
442 |
+
$words = '';
|
443 |
+
foreach ($wordlist as $word) {
|
444 |
+
if (!isset($tinywords[$word])) {
|
445 |
+
$words .= str_pad(stem($word), 4, '_') . ' ';
|
446 |
+
}
|
447 |
+
}
|
448 |
+
} else {
|
449 |
+
$wordlist = str_word_count(sp_clean_words($text), 1);
|
450 |
+
$words = '';
|
451 |
+
foreach ($wordlist as $word) {
|
452 |
+
if (!isset($tinywords[$word])) {
|
453 |
+
$words .= str_pad($word, 4, '_') . ' ';
|
454 |
+
}
|
455 |
+
}
|
456 |
+
}
|
457 |
+
}
|
458 |
+
if ($cjk) $words = sp_cjk_digrams($words);
|
459 |
+
return $words;
|
460 |
+
}
|
461 |
+
|
462 |
+
function sp_get_tag_terms($ID, $utf8) {
|
463 |
+
global $wpdb;
|
464 |
+
if (!function_exists('get_object_term_cache')) return '';
|
465 |
+
$tags = array();
|
466 |
+
$query = "SELECT t.name FROM $wpdb->terms AS t INNER JOIN $wpdb->term_taxonomy AS tt ON tt.term_id = t.term_id INNER JOIN $wpdb->term_relationships AS tr ON tr.term_taxonomy_id = tt.term_taxonomy_id WHERE tt.taxonomy = 'post_tag' AND tr.object_id = '$ID'";
|
467 |
+
$tags = $wpdb->get_col($query);
|
468 |
+
if (!empty ($tags)) {
|
469 |
+
if ($utf8) {
|
470 |
+
mb_internal_encoding('UTF-8');
|
471 |
+
foreach ($tags as $tag) {
|
472 |
+
$newtags[] = sp_mb_str_pad(mb_strtolower(str_replace('"', "'", $tag)), 4, '_');
|
473 |
+
}
|
474 |
+
} else {
|
475 |
+
foreach ($tags as $tag) {
|
476 |
+
$newtags[] = str_pad(strtolower(str_replace('"', "'", $tag)), 4, '_');
|
477 |
+
}
|
478 |
+
}
|
479 |
+
$newtags = str_replace(' ', '_', $newtags);
|
480 |
+
$tags = implode (' ', $newtags);
|
481 |
+
} else {
|
482 |
+
$tags = '';
|
483 |
+
}
|
484 |
+
return $tags;
|
485 |
+
}
|
486 |
+
|
487 |
+
if ( is_admin() ) {
|
488 |
+
require(dirname(__FILE__).'/similar-posts-admin.php');
|
489 |
+
}
|
490 |
+
|
491 |
+
function widget_rrm_similar_posts_init() {
|
492 |
+
if (! function_exists("register_sidebar_widget")) {
|
493 |
+
return;
|
494 |
+
}
|
495 |
+
function widget_rrm_similar_posts($args) {
|
496 |
+
extract($args);
|
497 |
+
$options = get_option('widget_rrm_similar_posts');
|
498 |
+
$condition = ($options['condition']) ? $options['condition'] : 'true' ;
|
499 |
+
$condition = (stristr($condition, "return")) ? $condition : "return ".$condition;
|
500 |
+
$condition = rtrim($condition, '; ') . ' || is_admin();';
|
501 |
+
if (eval($condition)) {
|
502 |
+
$title = empty($options['title']) ? __('Similar Posts', 'similar_posts') : $options['title'];
|
503 |
+
if ( !$number = (int) $options['number'] )
|
504 |
+
$number = 10;
|
505 |
+
else if ( $number < 1 )
|
506 |
+
$number = 1;
|
507 |
+
else if ( $number > 15 )
|
508 |
+
$number = 15;
|
509 |
+
echo $before_widget;
|
510 |
+
echo $before_title.$title.$after_title;
|
511 |
+
similar_posts('limit='.$number);
|
512 |
+
echo $after_widget;
|
513 |
+
}
|
514 |
+
}
|
515 |
+
function widget_rrm_similar_posts_control() {
|
516 |
+
if ( $_POST['widget_rrm_similar_posts_submit'] ) {
|
517 |
+
$options['title'] = strip_tags(stripslashes($_POST['widget_rrm_similar_posts_title']));
|
518 |
+
$options['number'] = (int) $_POST["widget_rrm_similar_posts_number"];
|
519 |
+
$options['condition'] = stripslashes(trim($_POST["widget_rrm_similar_posts_condition"], '; '));
|
520 |
+
update_option("widget_rrm_similar_posts", $options);
|
521 |
+
} else {
|
522 |
+
$options = get_option('widget_rrm_similar_posts');
|
523 |
+
}
|
524 |
+
$title = attribute_escape($options['title']);
|
525 |
+
if ( !$number = (int) $options['number'] )
|
526 |
+
$number = 5;
|
527 |
+
$condition = attribute_escape($options['condition']);
|
528 |
+
?>
|
529 |
+
<p><label for="widget_rrm_similar_posts_title"> <?php _e('Title:', 'similar_posts'); ?> <input style="width: 200px;" id="widget_rrm_similar_posts_title" name="widget_rrm_similar_posts_title" type="text" value="<?php echo $title; ?>" /></label></p>
|
530 |
+
<p><label for="widget_rrm_similar_posts_number"> <?php _e('Number of posts to show:', 'similar_posts'); ?> <input style="width: 25px; text-align: center;" id="widget_rrm_similar_posts_number" name="widget_rrm_similar_posts_number" type="text" value="<?php echo $number; ?>" /></label> <?php _e('(at most 15)', 'similar_posts'); ?> </p>
|
531 |
+
<p><label for="widget_rrm_similar_posts_condition"> <?php echo sprintf(__('Show only if page: (e.g., %sis_single()%s)', 'similar_posts'), '<a href="http://codex.wordpress.org/Conditional_Tags" title="help">', '</a>'); ?> <input style="width: 200px;" id="widget_rrm_similar_posts_condition" name="widget_rrm_similar_posts_condition" type="text" value="<?php echo $condition; ?>" /></label></p>
|
532 |
+
<input type="hidden" id="widget_rrm_similar_posts_submit" name="widget_rrm_similar_posts_submit" value="1" />
|
533 |
+
There are many more <a href="options-general.php?page=similar-posts.php">options</a> available.
|
534 |
+
<?php
|
535 |
+
}
|
536 |
+
register_sidebar_widget(__('Similar Posts +', 'similar_posts'), 'widget_rrm_similar_posts');
|
537 |
+
register_widget_control(__('Similar Posts +', 'similar_posts'), 'widget_rrm_similar_posts_control', 300, 100);
|
538 |
+
}
|
539 |
+
|
540 |
+
add_action('plugins_loaded', 'widget_rrm_similar_posts_init');
|
541 |
+
|
542 |
+
|
543 |
+
/*
|
544 |
+
now some language specific stuff
|
545 |
+
*/
|
546 |
+
|
547 |
+
//the next lines find the language WordPress is using
|
548 |
+
$language = substr(WPLANG, 0, 2);
|
549 |
+
//if no language is specified make it the default which is 'en'
|
550 |
+
if ($language == '') {
|
551 |
+
$language = 'en';
|
552 |
+
}
|
553 |
+
$languagedir = dirname(__FILE__).DSEP.'languages'.DSEP.$language.DEP;
|
554 |
+
//see if the directory exists and if not revert to the default English dir
|
555 |
+
if (!file_exists($languagedir)) {
|
556 |
+
$languagedir = dirname(__FILE__).DSEP.'languages'.DSEP.'en'.DSEP;
|
557 |
+
}
|
558 |
+
|
559 |
+
// import the stemming algorithm ... a single function called 'stem'
|
560 |
+
require_once($languagedir.'stemmer.php');
|
561 |
+
require_once($languagedir.'stopwords.php');
|
562 |
+
global $overusedwords;
|
563 |
+
$overusedwords = array_flip($overusedwords);
|
564 |
+
|
565 |
+
function similar_posts_init () {
|
566 |
+
global $overusedwords, $wp_db_version;
|
567 |
+
load_plugin_textdomain('similar_posts');
|
568 |
+
|
569 |
+
$options = get_option('similar-posts');
|
570 |
+
if ($options['content_filter'] === 'true' && function_exists('ppl_register_content_filter')) ppl_register_content_filter('SimilarPosts');
|
571 |
+
if ($options['feed_active'] === 'true') add_filter('the_content', 'similar_posts_for_feed');
|
572 |
+
|
573 |
+
//install the actions to keep the index up to date
|
574 |
+
add_action('save_post', 'sp_save_index_entry', 1);
|
575 |
+
add_action('delete_post', 'sp_delete_index_entry', 1);
|
576 |
+
if ($wp_db_version < 3308 ) {
|
577 |
+
add_action('edit_post', 'sp_save_index_entry', 1);
|
578 |
+
add_action('publish_post', 'sp_save_index_entry', 1);
|
579 |
+
}
|
580 |
+
}
|
581 |
+
|
582 |
+
add_action ('init', 'similar_posts_init', 1);
|
583 |
+
|
584 |
+
?>
|