Similar Posts – Best Related Posts Plugin for WordPress

Version Description

Release Info

Developer	RobMarsh
Plugin	Similar Posts – Best Related Posts Plugin for WordPress
Version	2.6.0.0
Comparing to
See all releases

Version 2.6.0.0

Files changed (14) hide show

languages/de/stemmer.php +315 -0
languages/de/stemmer.php.bak +315 -0
languages/de/stopwords.php +4 -0
languages/en/stemmer.php +335 -0
languages/en/stopwords.php +4 -0
languages/es/stemmer.php +381 -0
languages/es/stopwords.php +4 -0
languages/fr/stemmer.php +513 -0
languages/fr/stopwords.php +4 -0
languages/it/stemmer.php +341 -0
languages/it/stopwords.php +4 -0
readme.txt +114 -0
similar-posts-admin.php +702 -0
similar-posts.php +584 -0

languages/de/stemmer.php ADDED Viewed

	@@ -0,0 +1,315 @@


1	+ <?php
2	+ /*
3	+ Adapted from a drupal module -- see details below
4	+ */
5	+
6	+ /*
7	+ Content:
8	+ Drupal module to improve searching in german texts (Porter stemmer)
9	+ Algorithm based on http://snowball.tartarus.org/algorithms/german/stemmer.html
10	+ Author:
11	+ Reiner Miericke 10.10.2007
12	+ References:
13	+ Algorithm:
14	+ http://www.clef-campaign.org/workshop2002/WN/3.pdf
15	+ http://w3.ub.uni-konstanz.de/v13/volltexte/2003/996//pdf/scherer.pdf
16	+ http://kontext.fraunhofer.de/haenelt/kurs/Referate/Kowatschew_Lang/stemming.pdf
17	+ http://www.cis.uni-muenchen.de/people/Schulz/SeminarSoSe2001IR/FilzmayerMargetic/referat.html
18	+ http://www.ifi.unizh.ch/CL/broder/mue1/porter/stemming/node1.html
19	+ For lists of stopwords see
20	+ http://members.unine.ch/jacques.savoy/clef/index.html
21	+ Small parts were stolen from dutchstemmer.module
22	+ */
23	+
24	+
25	+ define("DE_STEMMER_VOKALE", "aeiouy��");
26	+
27	+ $enc = mb_detect_encoding('a-zA-Z��');
28	+ mb_internal_encoding($enc);
29	+
30	+ function _de_stemmer_split_text(&$text) {
31	+ // Split words from noise
32	+ return preg_split('/([^a-zA-Z��]+)/u', $text, -1, PREG_SPLIT_NO_EMPTY);
33	+ }
34	+
35	+
36	+ /**
37	+ * Implementation of hook_search_preprocess
38	+ */
39	+ function de_stemmer_search_preprocess(&$text) {
40	+ // Split words from noise and remove apostrophes
41	+ $words = preg_split('/([^a-zA-Z��]+)/u', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
42	+
43	+ // Process each word
44	+ $odd = true;
45	+ foreach ($words as $k => $word) {
46	+ if ($odd) {
47	+ $words[$k] = _de_stemmer_wortstamm($word);
48	+ }
49	+ $odd = !$odd;
50	+ }
51	+
52	+ // Put it all back together
53	+ return implode('', $words);
54	+
55	+ /* alte Version
56	+ $words = _de_stemmer_split_text($text);
57	+
58	+ // Process each word
59	+ foreach ($words as $k => $word) {
60	+ if (!_de_stemmer_stoppwort(strtolower($word))) {
61	+ $words[$k] = _de_stemmer_wortstamm($word);
62	+ }
63	+ }
64	+
65	+ // Put it all back together
66	+ return implode(' ', $words);
67	+ */
68	+ }
69	+
70	+
71	+ /**
72	+ * Implementation of hook_help().
73	+ */
74	+ function de_stemmer_help($section = 'admin/help#search') {
75	+ switch ($section) {
76	+ case 'admin/modules#description':
77	+ return t('Implements a German stemming algorithm (Porter) to improve searching.');
78	+ }
79	+ }
80	+
81	+
82	+ /*
83	+ * Function gets as text (parameter) and splits the text into words.
84	+ * Then each word is stemmed and the word together with its stem is
85	+ * stored in an array (hash).
86	+ * As a result the hash is returned and can be used as a lookup table
87	+ * to identify words which transform to the same stem.
88	+ * For details please compare 'search.module-stem.patch'
89	+ */
90	+ function de_stemmer_stem_list($text) {
91	+ // Split words from noise and remove apostrophes
92	+ $words = _de_stemmer_split_text($text);
93	+
94	+ $stem_list = array();
95	+ foreach ($words as $word) {
96	+ $stem_list[$word] = _de_stemmer_wortstamm($word);
97	+ }
98	+ return $stem_list;
99	+ }
100	+
101	+
102	+ function _de_stemmer_region_n($wort) {
103	+ $r = strcspn($wort, DE_STEMMER_VOKALE);
104	+ return $r + strspn($wort, DE_STEMMER_VOKALE, $r) + 1;
105	+ }
106	+
107	+ function de_stemmer_preprocess($wort) {
108	+ $wort = mb_strtolower($wort);
109	+ $wort = str_replace("�", "ss", $wort);
110	+ // replace � by ss, and put u and y between vowels into upper case
111	+
112	+ $wort = preg_replace( array( '/�/',
113	+ '/(?<=['. DE_STEMMER_VOKALE .'])u(?=['. DE_STEMMER_VOKALE .'])/u',
114	+ '/(?<=['. DE_STEMMER_VOKALE .'])y(?=['. DE_STEMMER_VOKALE .'])/u'
115	+ ),
116	+ array( 'ss', 'U', 'Y' ),
117	+ $wort
118	+ );
119	+ return $wort;
120	+ }
121	+
122	+
123	+ function _de_stemmer_postprocess($wort) {
124	+ $wort = mb_strtolower($wort);
125	+
126	+ if (!_de_stemmer_ausnahme($wort)) // check for exceptions
127	+ {
128	+ $wort = strtr($wort, array('�' => 'a', '�' => 'a',
129	+ '�' => 'e', '�' => 'e',
130	+ '�' => 'i', '�' => 'i',
131	+ '�' => 'o', '�' => 'o',
132	+ '�' => "u", '�' => 'u'
133	+ ));
134	+ }
135	+ return $wort;
136	+ }
137	+
138	+
139	+ function _de_stemmer_wortstamm($wort) {
140	+ $stamm = de_stemmer_preprocess($wort);
141	+
142	+ /*
143	+ * R1 is the region after the first non-vowel following a vowel,
144	+ or is the null region at the end of the word if there is no such non-vowel.
145	+ * R2 is the region after the first non-vowel following a vowel in R1,
146	+ or is the null region at the end of the word if there is no such non-vowel.
147	+ */
148	+
149	+ $l = strlen($stamm);
150	+ $r1 = _de_stemmer_region_n($stamm);
151	+ $r2 = $r1 == $l ? $r1 : $r1 + _de_stemmer_region_n(mb_substr($stamm, $r1));
152	+ // unshure about interpreting the following rule:
153	+ // "then R1 is ADJUSTED so that the region before it contains at least 3 letters"
154	+ if ($r1 < 3) {
155	+ $r1 = 3;
156	+ }
157	+
158	+ /* Step 1
159	+ Search for the longest among the following suffixes,
160	+ (a) e em en ern er es
161	+ (b) s (preceded by a valid s-ending)
162	+ and delete if in R1.
163	+ (Of course the letter of the valid s-ending is not necessarily in R1)
164	+ */
165	+
166	+ if (preg_match('/(e\|em\|en\|ern\|er\|es)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
167	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
168	+ }
169	+ elseif (preg_match('/(?<=(b\|d\|f\|g\|h\|k\|l\|m\|n\|r\|t))s$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
170	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
171	+ }
172	+
173	+
174	+ /*
175	+ Step 2
176	+ Search for the longest among the following suffixes,
177	+ (a) en er est
178	+ (b) st (preceded by a valid st-ending, itself preceded by at least 3 letters)
179	+ and delete if in R1.
180	+ */
181	+
182	+ if (preg_match('/(en\|er\|est)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
183	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
184	+ }
185	+ elseif (preg_match('/(?<=(b\|d\|f\|g\|h\|k\|l\|m\|n\|t))st$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
186	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
187	+ }
188	+
189	+
190	+ /*
191	+ Step 3: d-suffixes ( see http://snowball.tartarus.org/texts/glossary.html )
192	+ Search for the longest among the following suffixes, and perform the action indicated.
193	+ end ung
194	+ delete if in R2
195	+ if preceded by ig, delete if in R2 and not preceded by e
196	+ ig ik isch
197	+ delete if in R2 and not preceded by e
198	+ lich heit
199	+ delete if in R2
200	+ if preceded by er or en, delete if in R1
201	+ keit
202	+ delete if in R2
203	+ if preceded by lich or ig, delete if in R2
204	+ ^ means R1 ?
205	+ */
206	+
207	+ if (preg_match('/(?<=eig)(end\|ung)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
208	+ ;
209	+ }
210	+ elseif (preg_match('/(end\|ung)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
211	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
212	+ }
213	+ elseif (preg_match('/(?<![e])(ig\|ik\|isch)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
214	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
215	+ }
216	+ elseif (preg_match('/(?<=(er\|en))(lich\|heit)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
217	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
218	+ }
219	+ elseif (preg_match('/(lich\|heit)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
220	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
221	+ }
222	+ elseif (preg_match('/(?<=lich)keit$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
223	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
224	+ }
225	+ elseif (preg_match('/(?<=ig)keit$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
226	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
227	+ }
228	+ elseif (preg_match('/keit$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
229	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
230	+ }
231	+
232	+
233	+ /* Was ist mit
234	+ chen, lein, bar, schaft, ... ?
235	+ */
236	+ return _de_stemmer_postprocess($stamm);
237	+ }
238	+
239	+
240	+ function _de_stemmer_stoppwort($wort) {
241	+
242	+ static $stoppworte = array(
243	+ 'ab', 'aber', 'aber', 'ach', 'acht', 'achte', 'achten', 'achter', 'achtes', 'ag', 'alle', 'allein', 'allem', 'allen', 'aller', 'allerdings', 'alles', 'allgemeinen', 'als', 'als', 'also', 'am', 'an', 'andere', 'anderen', 'andern', 'anders', 'au', 'auch', 'auch', 'auf', 'aus', 'ausser', 'au�er', 'ausserdem', 'au�erdem',
244	+ 'bald', 'bei', 'beide', 'beiden', 'beim', 'bekannt', 'bereits', 'besonders', 'besser', 'besten', 'bin', 'bis', 'bisher', 'bist',
245	+ 'da', 'dabei', 'dadurch', 'daf�r', 'dagegen', 'daher', 'dahin', 'dahinter', 'damals', 'damit', 'danach', 'daneben', 'dank', 'dann', 'daran', 'darauf', 'daraus', 'darf', 'darfst', 'darin', 'dar�ber', 'darum', 'darunter', 'das', 'das', 'dasein', 'daselbst', 'dass', 'da�', 'dasselbe', 'davon', 'davor', 'dazu', 'dazwischen', 'dein', 'deine', 'deinem', 'deiner', 'dem', 'dementsprechend', 'demgegen�ber', 'demgem�ss', 'demgem��', 'demselben', 'demzufolge', 'den', 'denen', 'denn', 'denn', 'denselben', 'der', 'deren', 'derjenige', 'derjenigen', 'dermassen', 'derma�en', 'derselbe', 'derselben', 'des', 'deshalb', 'desselben', 'dessen', 'deswegen', 'd.h', 'dich', 'die', 'diejenige', 'diejenigen', 'dies', 'diese', 'dieselbe', 'dieselben', 'diesem', 'diesen', 'dieser', 'dieses', 'dir', 'doch', 'dort', 'drei', 'drin', 'dritte', 'dritten', 'dritter', 'drittes', 'du', 'durch', 'durchaus',
246	+ 'eben', 'ebenso', 'eigen', 'eigene', 'eigenen', 'eigener', 'eigenes', 'ein', 'einander', 'eine', 'einem', 'einen', 'einer', 'eines', 'einige', 'einigen', 'einiger', 'einiges', 'einmal', 'einmal', 'eins', 'elf', 'en', 'ende', 'endlich', 'entweder', 'entweder', 'er', 'ernst', 'erst', 'erste', 'ersten', 'erster', 'erstes', 'es', 'etwa', 'etwas', 'euch',
247	+ 'fr�her', 'f�nf', 'f�nfte', 'f�nften', 'f�nfter', 'f�nftes', 'f�r',
248	+ 'gab', 'ganz', 'ganze', 'ganzen', 'ganzer', 'ganzes', 'gar', 'gedurft', 'gegen', 'gegen�ber', 'gehabt', 'gehen', 'geht', 'gekannt', 'gekonnt', 'gemacht', 'gemocht', 'gemusst', 'genug', 'gerade', 'gern', 'gesagt', 'gesagt', 'geschweige', 'gewesen', 'gewollt', 'geworden', 'gibt', 'ging', 'gleich', 'gott', 'gross', 'gro�', 'grosse', 'gro�e', 'grossen', 'gro�en', 'grosser', 'gro�er', 'grosses', 'gro�es', 'gut', 'gute', 'guter', 'gutes',
249	+ 'habe', 'haben', 'habt', 'hast', 'hat', 'hatte', 'h�tte', 'hatten', 'h�tten', 'heisst', 'her', 'heute', 'hier', 'hin', 'hinter', 'hoch',
250	+ 'ich', 'ihm', 'ihn', 'ihnen', 'ihr', 'ihre', 'ihrem', 'ihren', 'ihrer', 'ihres', 'im', 'im', 'immer', 'in', 'in', 'indem', 'infolgedessen', 'ins', 'irgend', 'ist',
251	+ 'ja', 'ja', 'jahr', 'jahre', 'jahren', 'je', 'jede', 'jedem', 'jeden', 'jeder', 'jedermann', 'jedermanns', 'jedoch', 'jemand', 'jemandem', 'jemanden', 'jene', 'jenem', 'jenen', 'jener', 'jenes', 'jetzt',
252	+ 'kam', 'kann', 'kannst', 'kaum', 'kein', 'keine', 'keinem', 'keinen', 'keiner', 'kleine', 'kleinen', 'kleiner', 'kleines', 'kommen', 'kommt', 'k�nnen', 'k�nnt', 'konnte', 'k�nnte', 'konnten', 'kurz',
253	+ 'lang', 'lange', 'lange', 'leicht', 'leide', 'lieber', 'los',
254	+ 'machen', 'macht', 'machte', 'mag', 'magst', 'mahn', 'man', 'manche', 'manchem', 'manchen', 'mancher', 'manches', 'mann', 'mehr', 'mein', 'meine', 'meinem', 'meinen', 'meiner', 'meines', 'mich', 'mir', 'mit', 'mittel', 'mochte', 'm�chte', 'mochten', 'm�gen', 'm�glich', 'm�gt', 'morgen', 'muss', 'mu�', 'm�ssen', 'musst', 'm�sst', 'musste', 'mussten',
255	+ 'na', 'nach', 'nachdem', 'nahm', 'nat�rlich', 'neben', 'nein', 'neue', 'neuen', 'neun', 'neunte', 'neunten', 'neunter', 'neuntes', 'nicht', 'nicht', 'nichts', 'nie', 'niemand', 'niemandem', 'niemanden', 'noch', 'nun', 'nun', 'nur',
256	+ 'ob', 'oben', 'oder', 'oder', 'offen', 'oft', 'oft', 'ohne',
257	+ 'recht', 'rechte', 'rechten', 'rechter', 'rechtes', 'richtig', 'rund',
258	+ 'sa', 'sache', 'sagt', 'sagte', 'sah', 'satt', 'schon', 'sechs', 'sechste', 'sechsten', 'sechster', 'sechstes', 'sehr', 'sei', 'sei', 'seid', 'seien', 'sein', 'seine', 'seinem', 'seinen', 'seiner', 'seines', 'seit', 'seitdem', 'selbst', 'selbst', 'sich', 'sie', 'sieben', 'siebente', 'siebenten', 'siebenter', 'siebentes', 'sind', 'so', 'solang', 'solche', 'solchem', 'solchen', 'solcher', 'solches', 'soll', 'sollen', 'sollte', 'sollten', 'sondern', 'sonst', 'sowie', 'sp�ter', 'statt',
259	+ 'tat', 'teil', 'tel', 'tritt', 'trotzdem', 'tun',
260	+ '�ber', '�berhaupt', '�brigens', 'uhr', 'um', 'und', 'und?', 'uns', 'unser', 'unsere', 'unserer', 'unter',
261	+ 'vergangenen', 'viel', 'viele', 'vielem', 'vielen', 'vielleicht', 'vier', 'vierte', 'vierten', 'vierter', 'viertes', 'vom', 'von', 'vor',
262	+ 'wahr?', 'w�hrend', 'w�hrenddem', 'w�hrenddessen', 'wann', 'war', 'w�re', 'waren', 'wart', 'warum', 'was', 'wegen', 'weil', 'weit', 'weiter', 'weitere', 'weiteren', 'weiteres', 'welche', 'welchem', 'welchen', 'welcher', 'welches', 'wem', 'wen', 'wenig', 'wenig', 'wenige', 'weniger', 'weniges', 'wenigstens', 'wenn', 'wenn', 'wer', 'werde', 'werden', 'werdet', 'wessen', 'wie', 'wie', 'wieder', 'will', 'willst', 'wir', 'wird', 'wirklich', 'wirst', 'wo', 'wohl', 'wollen', 'wollt', 'wollte', 'wollten', 'worden', 'wurde', 'w�rde', 'wurden', 'w�rden',
263	+ 'z.b', 'zehn', 'zehnte', 'zehnten', 'zehnter', 'zehntes', 'zeit', 'zu', 'zuerst', 'zugleich', 'zum', 'zum', 'zun�chst', 'zur', 'zur�ck', 'zusammen', 'zwanzig', 'zwar', 'zwar', 'zwei', 'zweite', 'zweiten', 'zweiter', 'zweites', 'zwischen', 'zw�lf'
264	+ );
265	+
266	+ return in_array($wort, $stoppworte);
267	+ }
268	+
269	+
270	+ /*
271	+ first try to set up a list of exceptions
272	+ */
273	+ function _de_stemmer_ausnahme(&$wort)
274	+ { static $de_stemmer_ausnahmen = array (
275	+ 'sch�n' => 'sch�n', // !schon
276	+ 'bl�t' => 'bl�t', // Bl�te (NICHT Blut)
277	+ 'kannt' => 'kenn',
278	+ 'k�ch' => 'k�ch', // K�chen (NICHT Kuchen)
279	+ 'm�g' => 'm�g',
280	+ 'mocht' => 'm�g',
281	+ 'mag' => 'm�g',
282	+ 'ging' => 'geh',
283	+ 'lief' => 'lauf',
284	+ '�nd' => '�nd' // �ndern (NICHT andern)
285	+ );
286	+
287	+ //return FALSE;
288	+ if ( array_key_exists($wort, $de_stemmer_ausnahmen) )
289	+ { $wort = $de_stemmer_ausnahmen[$wort];
290	+ return TRUE;
291	+ }
292	+ else
293	+ return FALSE;
294	+ }
295	+
296	+ /*
297	+ Stem caching added by Rob Marsh, SJ
298	+ http://rmarsh.com
299	+ */
300	+
301	+ $StemCache = array();
302	+
303	+ function stem($word) {
304	+ global $StemCache;
305	+ if (!isset($StemCache[$word])) {
306	+ $stemmedword = _de_stemmer_wortstamm($word);
307	+ $StemCache[$word] = $stemmedword;
308	+ }
309	+ else {
310	+ $stemmedword = $StemCache[$word] ;
311	+ }
312	+ return $stemmedword;
313	+ }
314	+
315	+ ?>

languages/de/stemmer.php.bak ADDED Viewed

	@@ -0,0 +1,315 @@


1	+ <?php
2	+ /*
3	+ Adapted from a drupal module -- see details below
4	+ */
5	+
6	+ /*
7	+ Content:
8	+ Drupal module to improve searching in german texts (Porter stemmer)
9	+ Algorithm based on http://snowball.tartarus.org/algorithms/german/stemmer.html
10	+ Author:
11	+ Reiner Miericke 10.10.2007
12	+ References:
13	+ Algorithm:
14	+ http://www.clef-campaign.org/workshop2002/WN/3.pdf
15	+ http://w3.ub.uni-konstanz.de/v13/volltexte/2003/996//pdf/scherer.pdf
16	+ http://kontext.fraunhofer.de/haenelt/kurs/Referate/Kowatschew_Lang/stemming.pdf
17	+ http://www.cis.uni-muenchen.de/people/Schulz/SeminarSoSe2001IR/FilzmayerMargetic/referat.html
18	+ http://www.ifi.unizh.ch/CL/broder/mue1/porter/stemming/node1.html
19	+ For lists of stopwords see
20	+ http://members.unine.ch/jacques.savoy/clef/index.html
21	+ Small parts were stolen from dutchstemmer.module
22	+ */
23	+
24	+
25	+ define("DE_STEMMER_VOKALE", "aeiouyäöü");
26	+
27	+ $enc = mb_detect_encoding('a-zA-ZÄÖÜßäëïöüáéíóúè');
28	+ mb_internal_encoding($enc);
29	+
30	+ function _de_stemmer_split_text(&$text) {
31	+ // Split words from noise
32	+ return preg_split('/([^a-zA-ZÄÖÜßäëïöüáéíóúè]+)/u', $text, -1, PREG_SPLIT_NO_EMPTY);
33	+ }
34	+
35	+
36	+ /**
37	+ * Implementation of hook_search_preprocess
38	+ */
39	+ function de_stemmer_search_preprocess(&$text) {
40	+ // Split words from noise and remove apostrophes
41	+ $words = preg_split('/([^a-zA-ZÄÖÜßäëïöüáéíóúè]+)/u', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
42	+
43	+ // Process each word
44	+ $odd = true;
45	+ foreach ($words as $k => $word) {
46	+ if ($odd) {
47	+ $words[$k] = _de_stemmer_wortstamm($word);
48	+ }
49	+ $odd = !$odd;
50	+ }
51	+
52	+ // Put it all back together
53	+ return implode('', $words);
54	+
55	+ /* alte Version
56	+ $words = _de_stemmer_split_text($text);
57	+
58	+ // Process each word
59	+ foreach ($words as $k => $word) {
60	+ if (!_de_stemmer_stoppwort(strtolower($word))) {
61	+ $words[$k] = _de_stemmer_wortstamm($word);
62	+ }
63	+ }
64	+
65	+ // Put it all back together
66	+ return implode(' ', $words);
67	+ */
68	+ }
69	+
70	+
71	+ /**
72	+ * Implementation of hook_help().
73	+ */
74	+ function de_stemmer_help($section = 'admin/help#search') {
75	+ switch ($section) {
76	+ case 'admin/modules#description':
77	+ return t('Implements a German stemming algorithm (Porter) to improve searching.');
78	+ }
79	+ }
80	+
81	+
82	+ /*
83	+ * Function gets as text (parameter) and splits the text into words.
84	+ * Then each word is stemmed and the word together with its stem is
85	+ * stored in an array (hash).
86	+ * As a result the hash is returned and can be used as a lookup table
87	+ * to identify words which transform to the same stem.
88	+ * For details please compare 'search.module-stem.patch'
89	+ */
90	+ function de_stemmer_stem_list($text) {
91	+ // Split words from noise and remove apostrophes
92	+ $words = _de_stemmer_split_text($text);
93	+
94	+ $stem_list = array();
95	+ foreach ($words as $word) {
96	+ $stem_list[$word] = _de_stemmer_wortstamm($word);
97	+ }
98	+ return $stem_list;
99	+ }
100	+
101	+
102	+ function _de_stemmer_region_n($wort) {
103	+ $r = strcspn($wort, DE_STEMMER_VOKALE);
104	+ return $r + strspn($wort, DE_STEMMER_VOKALE, $r) + 1;
105	+ }
106	+
107	+ function de_stemmer_preprocess($wort) {
108	+ $wort = mb_strtolower($wort);
109	+ $wort = str_replace("ß", "ss", $wort);
110	+ // replace ß by ss, and put u and y between vowels into upper case
111	+
112	+ $wort = preg_replace( array( '/ß/',
113	+ '/(?<=['. DE_STEMMER_VOKALE .'])u(?=['. DE_STEMMER_VOKALE .'])/u',
114	+ '/(?<=['. DE_STEMMER_VOKALE .'])y(?=['. DE_STEMMER_VOKALE .'])/u'
115	+ ),
116	+ array( 'ss', 'U', 'Y' ),
117	+ $wort
118	+ );
119	+ return $wort;
120	+ }
121	+
122	+
123	+ function _de_stemmer_postprocess($wort) {
124	+ $wort = mb_strtolower($wort);
125	+
126	+ if (!_de_stemmer_ausnahme($wort)) // check for exceptions
127	+ {
128	+ $wort = strtr($wort, array('ä' => 'a', 'á' => 'a',
129	+ 'ë' => 'e', 'é' => 'e',
130	+ 'ï' => 'i', 'í' => 'i',
131	+ 'ö' => 'o', 'ó' => 'o',
132	+ 'ü' => "u", 'ú' => 'u'
133	+ ));
134	+ }
135	+ return $wort;
136	+ }
137	+
138	+
139	+ function _de_stemmer_wortstamm($wort) {
140	+ $stamm = de_stemmer_preprocess($wort);
141	+
142	+ /*
143	+ * R1 is the region after the first non-vowel following a vowel,
144	+ or is the null region at the end of the word if there is no such non-vowel.
145	+ * R2 is the region after the first non-vowel following a vowel in R1,
146	+ or is the null region at the end of the word if there is no such non-vowel.
147	+ */
148	+
149	+ $l = strlen($stamm);
150	+ $r1 = _de_stemmer_region_n($stamm);
151	+ $r2 = $r1 == $l ? $r1 : $r1 + _de_stemmer_region_n(mb_substr($stamm, $r1));
152	+ // unshure about interpreting the following rule:
153	+ // "then R1 is ADJUSTED so that the region before it contains at least 3 letters"
154	+ if ($r1 < 3) {
155	+ $r1 = 3;
156	+ }
157	+
158	+ /* Step 1
159	+ Search for the longest among the following suffixes,
160	+ (a) e em en ern er es
161	+ (b) s (preceded by a valid s-ending)
162	+ and delete if in R1.
163	+ (Of course the letter of the valid s-ending is not necessarily in R1)
164	+ */
165	+
166	+ if (preg_match('/(e\|em\|en\|ern\|er\|es)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
167	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
168	+ }
169	+ elseif (preg_match('/(?<=(b\|d\|f\|g\|h\|k\|l\|m\|n\|r\|t))s$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
170	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
171	+ }
172	+
173	+
174	+ /*
175	+ Step 2
176	+ Search for the longest among the following suffixes,
177	+ (a) en er est
178	+ (b) st (preceded by a valid st-ending, itself preceded by at least 3 letters)
179	+ and delete if in R1.
180	+ */
181	+
182	+ if (preg_match('/(en\|er\|est)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
183	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
184	+ }
185	+ elseif (preg_match('/(?<=(b\|d\|f\|g\|h\|k\|l\|m\|n\|t))st$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
186	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
187	+ }
188	+
189	+
190	+ /*
191	+ Step 3: d-suffixes ( see http://snowball.tartarus.org/texts/glossary.html )
192	+ Search for the longest among the following suffixes, and perform the action indicated.
193	+ end ung
194	+ delete if in R2
195	+ if preceded by ig, delete if in R2 and not preceded by e
196	+ ig ik isch
197	+ delete if in R2 and not preceded by e
198	+ lich heit
199	+ delete if in R2
200	+ if preceded by er or en, delete if in R1
201	+ keit
202	+ delete if in R2
203	+ if preceded by lich or ig, delete if in R2
204	+ ^ means R1 ?
205	+ */
206	+
207	+ if (preg_match('/(?<=eig)(end\|ung)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
208	+ ;
209	+ }
210	+ elseif (preg_match('/(end\|ung)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
211	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
212	+ }
213	+ elseif (preg_match('/(?<![e])(ig\|ik\|isch)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
214	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
215	+ }
216	+ elseif (preg_match('/(?<=(er\|en))(lich\|heit)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
217	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
218	+ }
219	+ elseif (preg_match('/(lich\|heit)$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
220	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
221	+ }
222	+ elseif (preg_match('/(?<=lich)keit$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
223	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
224	+ }
225	+ elseif (preg_match('/(?<=ig)keit$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r1)) {
226	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
227	+ }
228	+ elseif (preg_match('/keit$/u', $stamm, $hits, PREG_OFFSET_CAPTURE, $r2)) {
229	+ $stamm = mb_substr($stamm, 0, $hits[0][1]);
230	+ }
231	+
232	+
233	+ /* Was ist mit
234	+ chen, lein, bar, schaft, ... ?
235	+ */
236	+ return _de_stemmer_postprocess($stamm);
237	+ }
238	+
239	+
240	+ function _de_stemmer_stoppwort($wort) {
241	+
242	+ static $stoppworte = array(
243	+ 'ab', 'aber', 'aber', 'ach', 'acht', 'achte', 'achten', 'achter', 'achtes', 'ag', 'alle', 'allein', 'allem', 'allen', 'aller', 'allerdings', 'alles', 'allgemeinen', 'als', 'als', 'also', 'am', 'an', 'andere', 'anderen', 'andern', 'anders', 'au', 'auch', 'auch', 'auf', 'aus', 'ausser', 'außer', 'ausserdem', 'außerdem',
244	+ 'bald', 'bei', 'beide', 'beiden', 'beim', 'bekannt', 'bereits', 'besonders', 'besser', 'besten', 'bin', 'bis', 'bisher', 'bist',
245	+ 'da', 'dabei', 'dadurch', 'dafür', 'dagegen', 'daher', 'dahin', 'dahinter', 'damals', 'damit', 'danach', 'daneben', 'dank', 'dann', 'daran', 'darauf', 'daraus', 'darf', 'darfst', 'darin', 'darüber', 'darum', 'darunter', 'das', 'das', 'dasein', 'daselbst', 'dass', 'daß', 'dasselbe', 'davon', 'davor', 'dazu', 'dazwischen', 'dein', 'deine', 'deinem', 'deiner', 'dem', 'dementsprechend', 'demgegenüber', 'demgemäss', 'demgemäß', 'demselben', 'demzufolge', 'den', 'denen', 'denn', 'denn', 'denselben', 'der', 'deren', 'derjenige', 'derjenigen', 'dermassen', 'dermaßen', 'derselbe', 'derselben', 'des', 'deshalb', 'desselben', 'dessen', 'deswegen', 'd.h', 'dich', 'die', 'diejenige', 'diejenigen', 'dies', 'diese', 'dieselbe', 'dieselben', 'diesem', 'diesen', 'dieser', 'dieses', 'dir', 'doch', 'dort', 'drei', 'drin', 'dritte', 'dritten', 'dritter', 'drittes', 'du', 'durch', 'durchaus',
246	+ 'eben', 'ebenso', 'eigen', 'eigene', 'eigenen', 'eigener', 'eigenes', 'ein', 'einander', 'eine', 'einem', 'einen', 'einer', 'eines', 'einige', 'einigen', 'einiger', 'einiges', 'einmal', 'einmal', 'eins', 'elf', 'en', 'ende', 'endlich', 'entweder', 'entweder', 'er', 'ernst', 'erst', 'erste', 'ersten', 'erster', 'erstes', 'es', 'etwa', 'etwas', 'euch',
247	+ 'früher', 'fünf', 'fünfte', 'fünften', 'fünfter', 'fünftes', 'für',
248	+ 'gab', 'ganz', 'ganze', 'ganzen', 'ganzer', 'ganzes', 'gar', 'gedurft', 'gegen', 'gegenüber', 'gehabt', 'gehen', 'geht', 'gekannt', 'gekonnt', 'gemacht', 'gemocht', 'gemusst', 'genug', 'gerade', 'gern', 'gesagt', 'gesagt', 'geschweige', 'gewesen', 'gewollt', 'geworden', 'gibt', 'ging', 'gleich', 'gott', 'gross', 'groß', 'grosse', 'große', 'grossen', 'großen', 'grosser', 'großer', 'grosses', 'großes', 'gut', 'gute', 'guter', 'gutes',
249	+ 'habe', 'haben', 'habt', 'hast', 'hat', 'hatte', 'hätte', 'hatten', 'hätten', 'heisst', 'her', 'heute', 'hier', 'hin', 'hinter', 'hoch',
250	+ 'ich', 'ihm', 'ihn', 'ihnen', 'ihr', 'ihre', 'ihrem', 'ihren', 'ihrer', 'ihres', 'im', 'im', 'immer', 'in', 'in', 'indem', 'infolgedessen', 'ins', 'irgend', 'ist',
251	+ 'ja', 'ja', 'jahr', 'jahre', 'jahren', 'je', 'jede', 'jedem', 'jeden', 'jeder', 'jedermann', 'jedermanns', 'jedoch', 'jemand', 'jemandem', 'jemanden', 'jene', 'jenem', 'jenen', 'jener', 'jenes', 'jetzt',
252	+ 'kam', 'kann', 'kannst', 'kaum', 'kein', 'keine', 'keinem', 'keinen', 'keiner', 'kleine', 'kleinen', 'kleiner', 'kleines', 'kommen', 'kommt', 'können', 'könnt', 'konnte', 'könnte', 'konnten', 'kurz',
253	+ 'lang', 'lange', 'lange', 'leicht', 'leide', 'lieber', 'los',
254	+ 'machen', 'macht', 'machte', 'mag', 'magst', 'mahn', 'man', 'manche', 'manchem', 'manchen', 'mancher', 'manches', 'mann', 'mehr', 'mein', 'meine', 'meinem', 'meinen', 'meiner', 'meines', 'mich', 'mir', 'mit', 'mittel', 'mochte', 'möchte', 'mochten', 'mögen', 'möglich', 'mögt', 'morgen', 'muss', 'muß', 'müssen', 'musst', 'müsst', 'musste', 'mussten',
255	+ 'na', 'nach', 'nachdem', 'nahm', 'natürlich', 'neben', 'nein', 'neue', 'neuen', 'neun', 'neunte', 'neunten', 'neunter', 'neuntes', 'nicht', 'nicht', 'nichts', 'nie', 'niemand', 'niemandem', 'niemanden', 'noch', 'nun', 'nun', 'nur',
256	+ 'ob', 'oben', 'oder', 'oder', 'offen', 'oft', 'oft', 'ohne',
257	+ 'recht', 'rechte', 'rechten', 'rechter', 'rechtes', 'richtig', 'rund',
258	+ 'sa', 'sache', 'sagt', 'sagte', 'sah', 'satt', 'schon', 'sechs', 'sechste', 'sechsten', 'sechster', 'sechstes', 'sehr', 'sei', 'sei', 'seid', 'seien', 'sein', 'seine', 'seinem', 'seinen', 'seiner', 'seines', 'seit', 'seitdem', 'selbst', 'selbst', 'sich', 'sie', 'sieben', 'siebente', 'siebenten', 'siebenter', 'siebentes', 'sind', 'so', 'solang', 'solche', 'solchem', 'solchen', 'solcher', 'solches', 'soll', 'sollen', 'sollte', 'sollten', 'sondern', 'sonst', 'sowie', 'später', 'statt',
259	+ 'tat', 'teil', 'tel', 'tritt', 'trotzdem', 'tun',
260	+ 'über', 'überhaupt', 'übrigens', 'uhr', 'um', 'und', 'und?', 'uns', 'unser', 'unsere', 'unserer', 'unter',
261	+ 'vergangenen', 'viel', 'viele', 'vielem', 'vielen', 'vielleicht', 'vier', 'vierte', 'vierten', 'vierter', 'viertes', 'vom', 'von', 'vor',
262	+ 'wahr?', 'während', 'währenddem', 'währenddessen', 'wann', 'war', 'wäre', 'waren', 'wart', 'warum', 'was', 'wegen', 'weil', 'weit', 'weiter', 'weitere', 'weiteren', 'weiteres', 'welche', 'welchem', 'welchen', 'welcher', 'welches', 'wem', 'wen', 'wenig', 'wenig', 'wenige', 'weniger', 'weniges', 'wenigstens', 'wenn', 'wenn', 'wer', 'werde', 'werden', 'werdet', 'wessen', 'wie', 'wie', 'wieder', 'will', 'willst', 'wir', 'wird', 'wirklich', 'wirst', 'wo', 'wohl', 'wollen', 'wollt', 'wollte', 'wollten', 'worden', 'wurde', 'würde', 'wurden', 'würden',
263	+ 'z.b', 'zehn', 'zehnte', 'zehnten', 'zehnter', 'zehntes', 'zeit', 'zu', 'zuerst', 'zugleich', 'zum', 'zum', 'zunächst', 'zur', 'zurück', 'zusammen', 'zwanzig', 'zwar', 'zwar', 'zwei', 'zweite', 'zweiten', 'zweiter', 'zweites', 'zwischen', 'zwölf'
264	+ );
265	+
266	+ return in_array($wort, $stoppworte);
267	+ }
268	+
269	+
270	+ /*
271	+ first try to set up a list of exceptions
272	+ */
273	+ function _de_stemmer_ausnahme(&$wort)
274	+ { static $de_stemmer_ausnahmen = array (
275	+ 'schön' => 'schön', // !schon
276	+ 'blüt' => 'blüt', // Blüte (NICHT Blut)
277	+ 'kannt' => 'kenn',
278	+ 'küch' => 'küch', // Küchen (NICHT Kuchen)
279	+ 'mög' => 'mög',
280	+ 'mocht' => 'mög',
281	+ 'mag' => 'mög',
282	+ 'ging' => 'geh',
283	+ 'lief' => 'lauf',
284	+ 'änd' => 'änd' // ändern (NICHT andern)
285	+ );
286	+
287	+ //return FALSE;
288	+ if ( array_key_exists($wort, $de_stemmer_ausnahmen) )
289	+ { $wort = $de_stemmer_ausnahmen[$wort];
290	+ return TRUE;
291	+ }
292	+ else
293	+ return FALSE;
294	+ }
295	+
296	+ /*
297	+ Stem caching added by Rob Marsh, SJ
298	+ http://rmarsh.com
299	+ */
300	+
301	+ $StemCache = array();
302	+
303	+ function stem($word) {
304	+ global $StemCache;
305	+ if (!isset($StemCache[$word])) {
306	+ $stemmedword = _de_stemmer_wortstamm($word);
307	+ $StemCache[$word] = $stemmedword;
308	+ }
309	+ else {
310	+ $stemmedword = $StemCache[$word] ;
311	+ }
312	+ return $stemmedword;
313	+ }
314	+
315	+ ?>

languages/de/stopwords.php ADDED Viewed

	@@ -0,0 +1,4 @@


1	+ <?php
2	+ // the list of common words we want to ignore. NB anything shorter than 4 characters is knocked by the plugin and doesn't need to figure here
3	+ $overusedwords = array("aber", "alle", "allem", "allen", "aller", "alles", "also", "ander", "andere", "anderem", "anderen", "anderer", "anderes", "anderm", "andern", "anderr", "anders", "auch", "bist", "damit", "dann", "derselbe", "derselben", "denselben", "desselben", "demselben", "dieselbe", "dieselben", "dasselbe", "dazu", "dein", "deine", "deinem", "deinen", "deiner", "deines", "denn", "derer", "dessen", "dich", "dies", "diese", "diesem", "diesen", "dieser", "dieses", "doch", "dort", "durch", "eine", "einem", "einen", "einer", "eines", "einig", "einige", "einigem", "einigen", "einiger", "einiges", "einmal", "etwas", "euer", "eure", "eurem", "euren", "eurer", "eures", "gegen", "gewesen", "habe", "haben", "hatte", "hatten", "hier", "hinter", "mich", "ihre", "ihrem", "ihren", "ihrer", "ihres", "euch", "indem", "jede", "jedem", "jeden", "jeder", "jedes", "jene", "jenem", "jenen", "jener", "jenes", "jetzt", "kann", "kein", "keine", "keinem", "keinen", "keiner", "keines", "k�nnen", "k�nnte", "machen", "manche", "manchem", "manchen", "mancher", "manches", "mein", "meine", "meinem", "meinen", "meiner", "meines", "muss", "musste", "nach", "nicht", "nichts", "noch", "oder", "ohne", "sehr", "sein", "seine", "seinem", "seinen", "seiner", "seines", "selbst", "sich", "ihnen", "sind", "solche", "solchem", "solchen", "solcher", "solches", "soll", "sollte", "sondern", "sonst", "�ber", "unse", "unsem", "unsen", "unser", "unses", "unter", "viel", "w�hrend", "waren", "warst", "weil", "weiter", "welche", "welchem", "welchen", "welcher", "welches", "wenn", "werde", "werden", "wieder", "will", "wird", "wirst", "wollen", "wollte", "w�rde", "w�rden", "zwar", "zwischen");
4	+ ?>

languages/en/stemmer.php ADDED Viewed

	@@ -0,0 +1,335 @@


1	+ <?php
2	+ /*
3	+ Creado por Cesar Rodas para el proyecto Saddor.com
4	+ Este Stemmer esta basado en el argoritmo de Snowball Stemmer.
5	+ saddor@gmail.com
6	+ Este programa esta bajo licencia GNU
7	+ */
8	+ if (!defined("ENGLISHSTEMMER"))
9	+ {
10	+ define("ENGLISHSTEMMER",1,false);
11	+ class EnglishStemmer
12	+ {
13	+ var $regex_consonant = '(?:[bcdfghjklmnpqrstvwxz]\|(?<=[aeiou])y\|^y)';
14	+ var $regex_vowel = '(?:[aeiou]\|(?<![aeiou])y)';
15	+
16	+ function Stem($word)
17	+ {
18	+ if (strlen($word) <= 2) {
19	+ return $word;
20	+ }
21	+
22	+ $word = $this->step1ab($word);
23	+ $word = $this->step1c($word);
24	+ $word = $this->step2($word);
25	+ $word = $this->step3($word);
26	+ $word = $this->step4($word);
27	+ $word = $this->step5($word);
28	+ /*
29	+ Esta parte esta editado por cesar rodas,
30	+ no quiero que me muestre ' (apostrofe) al final
31	+ */
32	+ if (substr($word,-1,1) == "'")
33	+ $word = substr($word,0,strlen($word) -1 );
34	+ return $word;
35	+ }
36	+
37	+
38	+ function step1ab($word)
39	+ {
40	+ if (substr($word, -1) == 's') {
41	+
42	+ $this->replace($word, 'sses', 'ss')
43	+ OR $this->replace($word, 'ies', 'i')
44	+ OR $this->replace($word, 'ss', 'ss')
45	+ OR $this->replace($word, 's', '');
46	+ }
47	+
48	+ if (substr($word, -2, 1) != 'e' OR !$this->replace($word, 'eed', 'ee', 0)) { // First rule
49	+ $v = $this->regex_vowel;
50	+
51	+ if ( preg_match("#$v+#", substr($word, 0, -3)) && $this->replace($word, 'ing', '')
52	+ OR preg_match("#$v+#", substr($word, 0, -2)) && $this->replace($word, 'ed', '')) {
53	+ if ( !$this->replace($word, 'at', 'ate')
54	+ AND !$this->replace($word, 'bl', 'ble')
55	+ AND !$this->replace($word, 'iz', 'ize')) {
56	+
57	+ if ( $this->doubleConsonant($word)
58	+ AND substr($word, -2) != 'll'
59	+ AND substr($word, -2) != 'ss'
60	+ AND substr($word, -2) != 'zz') {
61	+
62	+ $word = substr($word, 0, -1);
63	+
64	+ } else if ($this->m($word) == 1 AND $this->cvc($word)) {
65	+ $word .= 'e';
66	+ }
67	+ }
68	+ }
69	+ }
70	+
71	+ return $word;
72	+ }
73	+
74	+ function step1c($word)
75	+ {
76	+ $v = $this->regex_vowel;
77	+
78	+ if (substr($word, -1) == 'y' && preg_match("#$v+#", substr($word, 0, -1))) {
79	+ $this->replace($word, 'y', 'i');
80	+ }
81	+
82	+ return $word;
83	+ }
84	+
85	+
86	+ function step2($word)
87	+ {
88	+ switch (substr($word, -2, 1)) {
89	+ case 'a':
90	+ $this->replace($word, 'ational', 'ate', 0)
91	+ OR $this->replace($word, 'tional', 'tion', 0);
92	+ break;
93	+
94	+ case 'c':
95	+ $this->replace($word, 'enci', 'ence', 0)
96	+ OR $this->replace($word, 'anci', 'ance', 0);
97	+ break;
98	+
99	+ case 'e':
100	+ $this->replace($word, 'izer', 'ize', 0);
101	+ break;
102	+
103	+ case 'g':
104	+ $this->replace($word, 'logi', 'log', 0);
105	+ break;
106	+
107	+ case 'l':
108	+ $this->replace($word, 'entli', 'ent', 0)
109	+ OR $this->replace($word, 'ousli', 'ous', 0)
110	+ OR $this->replace($word, 'alli', 'al', 0)
111	+ OR $this->replace($word, 'bli', 'ble', 0)
112	+ OR $this->replace($word, 'eli', 'e', 0);
113	+ break;
114	+
115	+ case 'o':
116	+ $this->replace($word, 'ization', 'ize', 0)
117	+ OR $this->replace($word, 'ation', 'ate', 0)
118	+ OR $this->replace($word, 'ator', 'ate', 0);
119	+ break;
120	+
121	+ case 's':
122	+ $this->replace($word, 'iveness', 'ive', 0)
123	+ OR $this->replace($word, 'fulness', 'ful', 0)
124	+ OR $this->replace($word, 'ousness', 'ous', 0)
125	+ OR $this->replace($word, 'alism', 'al', 0);
126	+ break;
127	+
128	+ case 't':
129	+ $this->replace($word, 'biliti', 'ble', 0)
130	+ OR $this->replace($word, 'aliti', 'al', 0)
131	+ OR $this->replace($word, 'iviti', 'ive', 0);
132	+ break;
133	+ }
134	+
135	+ return $word;
136	+ }
137	+
138	+
139	+ function step3($word)
140	+ {
141	+ switch (substr($word, -2, 1)) {
142	+ case 'a':
143	+ $this->replace($word, 'ical', 'ic', 0);
144	+ break;
145	+
146	+ case 's':
147	+ $this->replace($word, 'ness', '', 0);
148	+ break;
149	+
150	+ case 't':
151	+ $this->replace($word, 'icate', 'ic', 0)
152	+ OR $this->replace($word, 'iciti', 'ic', 0);
153	+ break;
154	+
155	+ case 'u':
156	+ $this->replace($word, 'ful', '', 0);
157	+ break;
158	+
159	+ case 'v':
160	+ $this->replace($word, 'ative', '', 0);
161	+ break;
162	+
163	+ case 'z':
164	+ $this->replace($word, 'alize', 'al', 0);
165	+ break;
166	+ }
167	+
168	+ return $word;
169	+ }
170	+
171	+
172	+ function step4($word)
173	+ {
174	+ switch (substr($word, -2, 1)) {
175	+ case 'a':
176	+ $this->replace($word, 'al', '', 1);
177	+ break;
178	+
179	+ case 'c':
180	+ $this->replace($word, 'ance', '', 1)
181	+ OR $this->replace($word, 'ence', '', 1);
182	+ break;
183	+
184	+ case 'e':
185	+ $this->replace($word, 'er', '', 1);
186	+ break;
187	+
188	+ case 'i':
189	+ $this->replace($word, 'ic', '', 1);
190	+ break;
191	+
192	+ case 'l':
193	+ $this->replace($word, 'able', '', 1)
194	+ OR $this->replace($word, 'ible', '', 1);
195	+ break;
196	+
197	+ case 'n':
198	+ $this->replace($word, 'ant', '', 1)
199	+ OR $this->replace($word, 'ement', '', 1)
200	+ OR $this->replace($word, 'ment', '', 1)
201	+ OR $this->replace($word, 'ent', '', 1);
202	+ break;
203	+
204	+ case 'o':
205	+ if (substr($word, -4) == 'tion' OR substr($word, -4) == 'sion') {
206	+ $this->replace($word, 'ion', '', 1);
207	+ } else {
208	+ $this->replace($word, 'ou', '', 1);
209	+ }
210	+ break;
211	+
212	+ case 's':
213	+ $this->replace($word, 'ism', '', 1);
214	+ break;
215	+
216	+ case 't':
217	+ $this->replace($word, 'ate', '', 1)
218	+ OR $this->replace($word, 'iti', '', 1);
219	+ break;
220	+
221	+ case 'u':
222	+ $this->replace($word, 'ous', '', 1);
223	+ break;
224	+
225	+ case 'v':
226	+ $this->replace($word, 'ive', '', 1);
227	+ break;
228	+
229	+ case 'z':
230	+ $this->replace($word, 'ize', '', 1);
231	+ break;
232	+ }
233	+
234	+ return $word;
235	+ }
236	+
237	+ function step5($word)
238	+ {
239	+ if (substr($word, -1) == 'e') {
240	+ if ($this->m(substr($word, 0, -1)) > 1) {
241	+ $this->replace($word, 'e', '');
242	+
243	+ } else if ($this->m(substr($word, 0, -1)) == 1) {
244	+
245	+ if (!$this->cvc(substr($word, 0, -1))) {
246	+ $this->replace($word, 'e', '');
247	+ }
248	+ }
249	+ }
250	+
251	+ // Part b
252	+ if ($this->m($word) > 1 AND $this->doubleConsonant($word) AND substr($word, -1) == 'l') {
253	+ $word = substr($word, 0, -1);
254	+ }
255	+
256	+ return $word;
257	+ }
258	+
259	+ function replace(&$str, $check, $repl, $m = null)
260	+ {
261	+ $len = 0 - strlen($check);
262	+
263	+ if (substr($str, $len) == $check) {
264	+ $substr = substr($str, 0, $len);
265	+ if (is_null($m) OR $this->m($substr) > $m) {
266	+ $str = $substr . $repl;
267	+ }
268	+
269	+ return true;
270	+ }
271	+
272	+ return false;
273	+ }
274	+
275	+
276	+
277	+ function m($str)
278	+ {
279	+ $c = $this->regex_consonant;
280	+ $v = $this->regex_vowel;
281	+
282	+ $str = preg_replace("#^$c+#", '', $str);
283	+ $str = preg_replace("#$v+$#", '', $str);
284	+
285	+ preg_match_all("#($v+$c+)#", $str, $matches);
286	+
287	+ return count($matches[1]);
288	+ }
289	+
290	+
291	+
292	+ function doubleConsonant($str)
293	+ {
294	+ $c = $this->regex_consonant;
295	+
296	+ return preg_match("#$c{2}$#", $str, $matches) AND $matches[0]{0} == $matches[0]{1};
297	+ }
298	+
299	+
300	+
301	+ function cvc($str)
302	+ {
303	+ $c = $this->regex_consonant;
304	+ $v = $this->regex_vowel;
305	+
306	+ return preg_match("#($c$v$c)$#", $str, $matches)
307	+ AND strlen($matches[1]) == 3
308	+ AND $matches[1]{2} != 'w'
309	+ AND $matches[1]{2} != 'x'
310	+ AND $matches[1]{2} != 'y';
311	+ }
312	+ }
313	+ }
314	+
315	+ /*
316	+ Stem caching added by Rob Marsh, SJ
317	+ http://rmarsh.com
318	+ */
319	+
320	+ $Stemmer = new EnglishStemmer();
321	+ $StemCache = array();
322	+
323	+ function stem($word) {
324	+ global $Stemmer, $StemCache;
325	+ if (!isset($StemCache[$word])) {
326	+ $stemmedword = $Stemmer->Stem($word);
327	+ $StemCache[$word] = $stemmedword;
328	+ }
329	+ else {
330	+ $stemmedword = $StemCache[$word] ;
331	+ }
332	+ return $stemmedword;
333	+ }
334	+
335	+ ?>

languages/en/stopwords.php ADDED Viewed

	@@ -0,0 +1,4 @@


1	+ <?php
2	+ // the list of common words we want to ignore. NB anything shorter than 4 characters is knocked by the plugin and doesn't need to figure here
3	+ $overusedwords = array("able", "about", "above", "according", "accordingly", "across", "actually", "after", "afterwards", "again", "against", "ain't", "allow", "allows", "almost", "alone", "along", "already", "also", "although", "always", "among", "amongst", "another", "anybody", "anyhow", "anyone", "anything", "anyway", "anyways", "anywhere", "apart", "appear", "appreciate", "appropriate", "aren't", "around", "aside", "asking", "associated", "available", "away", "awfully", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "believe", "below", "beside", "besides", "best", "better", "between", "beyond", "both", "brief", "came", "cannot", "can't", "cause", "causes", "certain", "certainly", "changes", "clearly", "come", "comes", "concerning", "conse'uently", "consider", "considering", "contain", "containing", "contains", "corresponding", "could", "couldn't", "course", "currently", "definitely", "described", "despite", "didn't", "different", "does", "doesn't", "doing", "done", "don't", "down", "downwards", "during", "each", "eight", "either", "else", "elsewhere", "enough", "entirely", "especially", "even", "ever", "every", "everybody", "everyone", "everything", "everywhere", "exactly", "example", "except", "fifth", "first", "five", "followed", "following", "follows", "former", "formerly", "forth", "four", "from", "further", "furthermore", "gets", "getting", "given", "gives", "goes", "going", "gone", "gotten", "greetings", "hadn't", "happens", "hardly", "hasn't", "have", "haven't", "having", "hello", "help", "hence", "here", "hereafter", "hereby", "herein", "hereupon", "he's", "hers", "herself", "himself", "hither", "hopefully", "howbeit", "however", "ignored", "i'll", "it'd", "it's", "i've", "immediate", "inasmuch", "indeed", "indicate", "indicated", "indicates", "inner", "insofar", "instead", "into", "inward", "isn't", "itself", "just", "keep", "keeps", "kept", "know", "known", "knows", "last", "lately", "later", "latter", "latterly", "least", "less", "lest", "like", "liked", "likely", "little", "look", "looking", "looks", "mainly", "many", "maybe", "mean", "meanwhile", "merely", "might", "more", "moreover", "most", "mostly", "much", "must", "mustn't", "myself", "name", "namely", "near", "nearly", "necessary", "need", "needs", "neither", "never", "nevertheless", "next", "nine", "nobody", "none", "noone", "normally", "nothing", "novel", "nowhere", "obviously", "often", "okay", "once", "ones", "one's", "only", "onto", "other", "others", "otherwise", "ought", "ours", "ourselves", "outside", "over", "overall", "particular", "particularly", "perhaps", "placed", "please", "plus", "possible", "presumably", "probably", "provides", "'uite", "rather", "really", "reasonably", "regarding", "regardless", "regards", "relatively", "respectively", "right", "said", "same", "saying", "says", "second", "secondly", "seeing", "seem", "seemed", "seeming", "seems", "seen", "self", "selves", "sensible", "sent", "serious", "seriously", "seven", "several", "shall", "should", "shouldn't", "since", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry", "specified", "specify", "specifying", "still", "such", "sure", "take", "taken", "tell", "tends", "than", "thank", "thanks", "that", "that's", "their", "theirs", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "there's", "thereupon", "these", "they", "think", "third", "this", "thorough", "thoroughly", "those", "though", "three", "through", "throughout", "thru", "thus", "together", "took", "toward", "towards", "tried", "tries", "truly", "trying", "twice", "under", "unfortunately", "unless", "unlikely", "until", "unto", "upon", "used", "useful", "uses", "using", "usually", "value", "various", "very", "want", "wants", "wasn't", "welcome", "we'd", "well", "went", "were", "weren't", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "whoever", "whole", "whom", "whose", "will", "willing", "wish", "with", "within", "without", "wonder", "would", "wouldn't", "your", "yours", "yourself", "yourselves", "zero");
4	+ ?>

languages/es/stemmer.php ADDED Viewed

	@@ -0,0 +1,381 @@


1	+ <?php
2	+ /*
3	+ Creado por Cesar Rodas para el proyecto Saddor.com
4	+ Este Stemmer esta basado en el argoritmo de Snowball Stemmer.
5	+ saddor@gmail.com
6	+ Este programa esta bajo licencia GNU
7	+ */
8	+ if (!defined("SPANISHSTEMMER"))
9	+ {
10	+ define("vocal",1,false);
11	+ define("consonante",2,false);
12	+ define("SPANISHSTEMMER",1,false);
13	+
14	+ class PorterStemmer
15	+ {
16	+ var $R1;
17	+ var $R2;
18	+ var $RV;
19	+ var $word;
20	+ function Stem($word)
21	+ {
22	+
23	+ $this->word = $word;
24	+ if (strlen($word) < 2)
25	+ return;
26	+
27	+
28	+ $this->step_0();
29	+ while($this->step_1());
30	+ $this->step_2();
31	+ $this->step_3();
32	+ return $this->word;
33	+ }
34	+
35	+ function step_0()
36	+ {
37	+ $this->splitword();
38	+ $search = array(
39	+ "me","se","sela","selo","selas","selos","la","le","lo","les",
40	+ "los","nos"
41	+ );
42	+
43	+ $prefix = array(
44	+ "i�ndo","�ndo","�r","�r","�r", /* primer caso */
45	+ "iendo","ando","ar","er","ir", /* segundo caso*/
46	+ "yendo"
47	+ );
48	+
49	+ foreach ($prefix as $id => $pref)
50	+ {
51	+ $return = false;
52	+ if ( (strstr($this->RV,$pref) != NULL) or
53	+ /* caso para yendo */
54	+ ($pref == "yendo" && strstr($this->word,"uyendo")) )
55	+ {
56	+
57	+ /*
58	+ El prefijo fue encontrado, ahora buscar para borrar
59	+ el pronombre.
60	+ */
61	+ foreach ($search as $word)
62	+ {
63	+ $len = strlen($word);
64	+
65	+ switch ($id)
66	+ {
67	+
68	+ case $id < 5: /* primer Caso*/
69	+ if ($word == substr($this->RV,-1 * $len,$len) )
70	+ {
71	+ $this->word = substr($this->word,0, strlen($this->word) - $len);
72	+ $this->word = str_replace($prefix[$id],$prefix[$id+5],$this->word);
73	+ $return = true;
74	+ }
75	+ break;
76	+ case $id < 10: /* segundo caso*/
77	+ if ($word == substr($this->RV,-1 * $len,$len) )
78	+ {
79	+ $this->word = substr($this->word,0, strlen($this->word) - $len);
80	+ $return = true;
81	+ }
82	+ break;
83	+ case $id >= 10: /* tercer caso*/
84	+ if ($word == substr($this->RV,-1 * $len,$len) )
85	+ {
86	+
87	+ $this->word = substr($this->word,0, strlen($this->word) - $len);
88	+ $return = true;
89	+ }
90	+ break;
91	+ }
92	+ }
93	+ }
94	+
95	+ }
96	+ unset($prefix,$search,$word,$id,$pref,$len);
97	+ return $return;
98	+ }
99	+
100	+ function step_1()
101	+ {
102	+ $return = false;
103	+ $this->splitword();
104	+
105	+ /* borrado de R2 */
106	+ $search = array(
107	+ "abilidades","iblemente","icaciones","ablemente","antemente","ivamente","atamente",
108	+ "amientos","icadoras","icadores","icancias","imientos","icamente",
109	+ "osamente","abilidad","icidades","ividades","adamente","icantes",
110	+ "icancia","imiemto","icadora","icaci�n","amiento","imiento","aciones",
111	+ "ativos","ativas","ividad","idades","icidad","icante",
112	+ "icador","adoras","adores","ancias","mente","ables",
113	+ "ismos","anzas","ativa","ativo","istas","ibles",
114	+ "aci�n","antes","adora","ancia","ismo","anza",
115	+ "icos","ivas","osos","ivos","ante","osas",
116	+ "ador","ible","ista","idad","able","ico",
117	+ "osa","oso","iva","ica","ica","ivo",
118	+ );
119	+
120	+ for ($i = 0; $i < count($search); $i++)
121	+ if (substr($this->R2,strlen($search[$i]) * (-1),strlen($search[$i])) == $search[$i])
122	+ {
123	+ $this->word = substr($this->word,0,strlen($this->word) - strlen($search[$i]) );
124	+ $return = true;
125	+ break;
126	+ }
127	+ /* creo que esta mal, creo que hay que buscar en R1*/
128	+ if ($this->R1 == "amente")
129	+ {
130	+ $this->word = str_replace("amente","",$this->word);
131	+ }
132	+
133	+ $search = array
134	+ (
135	+ "log�a","log�as",//"uci�n","uciones",//"encia","encias"
136	+ );
137	+ $replace = array
138	+ (
139	+ "log","log","u","u","entre","entre"
140	+ );
141	+ for ($i = 0; $i < count($search); $i++)
142	+ if (substr($this->R2,strlen($search[$i]) * (-1),strlen($search[$i])) == $search[$i])
143	+ {
144	+ $this->word = str_replace($search[$i],$replace[$i],$this->word);
145	+ $return = true;
146	+ break;
147	+ }
148	+ unset($i,$search,$replace);
149	+ return $return;
150	+ }
151	+
152	+ function step_2()
153	+ {
154	+ $this->splitword();
155	+ $return = false;
156	+ $search = array(
157	+ "ya","ye","yan","yen","yeron","yendo","yo","y�","yas","yes","yais","yamos"
158	+ );
159	+ foreach ($search as $word)
160	+ {
161	+ if (substr($this->RV,strlen($word) * (-1),strlen($word)) == $word)
162	+ if (substr($this->word,-1*(strlen($word) + 1), strlen($word) + 1) == "u".$word)
163	+ {
164	+ $this->word = substr($this->word,0, strlen($this->word) -(strlen($word) + 1));
165	+ $return = true;
166	+ }
167	+ }
168	+
169	+ if ($return == false)
170	+ $this->step_2b();
171	+ unset($return,$search,$word);
172	+ }
173	+
174	+ function step_2b()
175	+ {
176	+ $this->splitword();
177	+ $search = array(
178	+ "en","es","�is","emos"
179	+ );
180	+
181	+ foreach ($search as $word)
182	+ {
183	+ if (substr($this->RV,strlen($word) * (-1),strlen($word)) == $word)
184	+ if (substr($this->word,(-1)*(strlen($word) + 2), strlen($word) + 2) == "gu".$word)
185	+ {
186	+ $this->word = substr($this->word,0, strlen($this->word) -(strlen($word) + 1) );
187	+ $return = true;
188	+ }
189	+ /*
190	+ This part was fix by Diego Enrique Finol <dfinol at cantv dot net>
191	+ This was the email that Diego sent to me:
192	+ Epa saludos, gracias por la clase de spanish stemmer, hab�a visto lo mismo
193	+ en snowball pero me ahorraste el trabajo de convertirlo a php. S�lo not�
194	+ que en las partes en la que hab�a que borrar cierto sufijo y, adem�s,
195	+ borrar la "u" de si est� precedido por "gu" creo que no borra el sufijo si
196	+ no est� precedido por esto. O sea, hay que borrar el afijo en ambos casos,
197	+ y de paso si est� precedido por gu, tambi�n borrar la u, pero el algoritmo
198	+ s�lo lo hace si est� precedido por gu, sino, no borra nada.
199	+
200	+ Thanks Diego!.
201	+ */
202	+ else
203	+ {
204	+ $this->word = substr($this->word,0, strlen($this->word) -(strlen($word)) );
205	+ $return = true;
206	+ }
207	+ /End of Diego fix/
208	+ }
209	+
210	+ $search = array(
211	+ "i�ramos","ar�amos","ir�amos","i�semos","er�amos","er�ais","eremos",
212	+ "isteis","ir�ais","ierais","iremos","�bamos","ieseis",
213	+ "asteis","�ramos","�semos","aremos","ar�ais","abais",
214	+ "�amos","arais","ieses","ar�an","iesen","ieron",
215	+ "iendo","ieras","ir�is","ar�as","er�as","aseis",
216	+ "er�is","er�an","ir�an","ar�is","ir�as","ieran",
217	+ "ando","amos","aron","asen","aras","ados",
218	+ "�ais","ases","imos","adas","idas","abas",
219	+ "iste","ir�n","er�n","ar�a","er�a","iera",
220	+ "ir�s","ir�a","aran","ar�s","er�s","aste",
221	+ "iese","aban","ar�n","�is","ada","ir�",
222	+ "�an","ir�","er�","aba","ara","ido",
223	+ "ar�","ar�","ado","er�","ase","�as",
224	+ "ida","�a","er","ar","i�","an",
225	+ "ir","as","ad","ed","id","�s",
226	+
227	+
228	+ );
229	+
230	+ foreach ($search as $word)
231	+ if (substr($this->RV,strlen($word) * (-1),strlen($word)) == $word)
232	+ {
233	+ $this->word = substr($this->word,0, strlen($this->word) -(strlen($word)));
234	+ $this->splitword();
235	+ }
236	+ unset($search,$word);
237	+
238	+ }
239	+
240	+ function step_3()
241	+ {
242	+ $this->splitword();
243	+ $return = false;
244	+ $search = array(
245	+ "os","a","o","�","�","�"
246	+ );
247	+
248	+
249	+ foreach ($search as $word)
250	+ if (substr($this->RV,strlen($word) * (-1),strlen($word)) == $word)
251	+ {
252	+ $this->word = substr($this->word,0, strlen($this->word) -(strlen($word)));
253	+ $return = true;
254	+ }
255	+
256	+ $search = array(
257	+ "e","�"
258	+ );
259	+
260	+ foreach ($search as $word)
261	+ {
262	+ if (substr($this->RV,strlen($word) * (-1),strlen($word)) == $word)
263	+ if (substr($this->RV,-1*(strlen($word) + 2), strlen($word) + 2) == "gu".$word)
264	+ {
265	+ $this->word = substr($this->word,0, strlen($this->word) -(strlen($word) + 1) );
266	+ $return = true;
267	+ }
268	+ else
269	+ {
270	+ $this->word = substr($this->word,0, strlen($this->word) -(strlen($word)) );
271	+ $return = true;
272	+ }
273	+ }
274	+ unset($search,$word);
275	+ $this->word = str_replace("�","a",$this->word);
276	+ $this->word = str_replace("�","e",$this->word);
277	+ $this->word = str_replace("�","i",$this->word);
278	+ $this->word = str_replace("�","o",$this->word);
279	+ $this->word = str_replace("�","u",$this->word);
280	+ $this->word = str_replace("�","u",$this->word);
281	+ return $return;
282	+ }
283	+
284	+
285	+ /* funciones utilizadas*/
286	+ function saddorsort($a, $b)
287	+ {
288	+ if (strlen($a) == strlen($b)) {
289	+ return 0;
290	+ }
291	+ return (strlen($a) < strlen($b)) ? 1 : -1;
292	+ }
293	+ function splitword()
294	+ {
295	+ $flag1=false;
296	+ $flag2=false;
297	+ $this->R1="";
298	+ $this->R2="";
299	+ $this->RV="";
300	+ for ($i = 1; $i < strlen($this->word); $i++)
301	+ {
302	+ if ($flag1)
303	+ $this->R1.=$this->word[$i];
304	+ if ($flag2)
305	+ $this->R2.=$this->word[$i];
306	+
307	+ if ($i+1 >= strlen($this->word))
308	+ break;
309	+
310	+ if ($this->char_is($this->word[$i]) == consonante &&
311	+ $this->char_is(@$this->word[$i+1]) == vocal &&
312	+ $flag1 == true && $flag2 == false)
313	+ $flag2=true;
314	+
315	+ if ($this->char_is($this->word[$i]) == consonante &&
316	+ $this->char_is($this->word[$i+1]) == vocal &&
317	+ $flag1 == false)
318	+ $flag1=true;
319	+ }
320	+
321	+
322	+ /* Buscando RV*/
323	+ $flag1=false;
324	+ if ($this->char_is($this->word[1]) == consonante)
325	+ {
326	+ for ($i = 2; $i < strlen($this->word); $i++)
327	+ if ($this->char_is($this->word[$i]) == vocal)
328	+ break;
329	+ $i++;
330	+ $this->RV = substr($this->word,$i);
331	+ }
332	+ else if ($this->char_is($this->word[1]) == vocal && $this->char_is($this->word[0]) == vocal)
333	+ {
334	+ for ($i = 2; $i < strlen($this->word); $i++)
335	+ if ($this->char_is($this->word[$i]) == consonante)
336	+ break;
337	+ $i++;
338	+ $this->RV = substr($this->word,$i);
339	+ }
340	+ else if (strlen($this->word) > 2)
341	+ $this->RV = substr($this->word,3);
342	+
343	+ unset($flag1,$flag2,$i);
344	+ }
345	+
346	+ function char_is($char)
347	+ {
348	+ $char = strtolower($char);
349	+ if ($char == "")
350	+ return;
351	+ $vowel = "aeiou��";
352	+ $consonant = "bcdfghijklmn�opqrsvtxwyz";
353	+ if (strstr($vowel,$char))
354	+ return vocal;
355	+ if (strstr($consonant,$char))
356	+ return consonante;
357	+ }
358	+ }
359	+ }
360	+
361	+ /*
362	+ Stem caching added by Rob Marsh, SJ
363	+ http://rmarsh.com
364	+ */
365	+
366	+ $Stemmer = new PorterStemmer();
367	+ $StemCache = array();
368	+
369	+ function stem($word) {
370	+ global $Stemmer, $StemCache;
371	+ if (!isset($StemCache[$word])) {
372	+ $stemmedword = $Stemmer->Stem($word);
373	+ $StemCache[$word] = $stemmedword;
374	+ }
375	+ else {
376	+ $stemmedword = $StemCache[$word] ;
377	+ }
378	+ return $stemmedword;
379	+ }
380	+
381	+ ?>

languages/es/stopwords.php ADDED Viewed

	@@ -0,0 +1,4 @@


1	+ <?php
2	+ // the list of common words we want to ignore. NB anything shorter than 4 characters is knocked by the plugin and doesn't need to figure here
3	+ $overusedwords = array("algo", "alguna", "algunas", "alguno", "algunos", "alg�n", "ambos", "ampleamos", "ante", "antes", "aquel", "aquellas", "aquellos", "aqui", "arriba", "atras", "bajo", "bastante", "bien", "cada", "cierta", "ciertas", "ciertos", "como", "conseguimos", "conseguir", "consigo", "consigue", "consiguen", "consigues", "contra", "cual", "cuando", "dentro", "desde", "donde", "durante", "ella", "ellas", "ellos", "empleais", "emplean", "emplear", "empleas", "empleo", "encima", "entonces", "entre", "erais", "eramos", "eran", "eras", "eres", "esas", "esos", "esta", "estaba", "estabais", "estaban", "estabas", "estad", "estada", "estadas", "estado", "estados", "estais", "estamos", "estan", "estando", "estar", "estaremos", "estar�", "estar�n", "estar�s", "estar�", "estar�is", "estar�a", "estar�ais", "estar�amos", "estar�an", "estar�as", "estas", "este", "estemos", "esto", "estos", "estoy", "estuve", "estuviera", "estuvierais", "estuvieran", "estuvieras", "estuvieron", "estuviese", "estuvieseis", "estuviesen", "estuvieses", "estuvimos", "estuviste", "estuvisteis", "estuvi�ramos", "estuvi�semos", "estuvo", "est�", "est�bamos", "est�is", "est�n", "est�s", "est�", "est�is", "est�n", "est�s", "fuera", "fuerais", "fueran", "fueras", "fueron", "fuese", "fueseis", "fuesen", "fueses", "fuimos", "fuiste", "fuisteis", "fu�ramos", "fu�semos", "gueno", "habida", "habidas", "habido", "habidos", "habiendo", "habremos", "habr�", "habr�n", "habr�s", "habr�", "habr�is", "habr�a", "habr�ais", "habr�amos", "habr�an", "habr�as", "hab�is", "hab�a", "hab�ais", "hab�amos", "hab�an", "hab�as", "hace", "haceis", "hacemos", "hacen", "hacer", "haces", "hago", "hasta", "haya", "hayamos", "hayan", "hayas", "hay�is", "hemos", "hube", "hubiera", "hubierais", "hubieran", "hubieras", "hubieron", "hubiese", "hubieseis", "hubiesen", "hubieses", "hubimos", "hubiste", "hubisteis", "hubi�ramos", "hubi�semos", "hubo", "incluso", "intenta", "intentais", "intentamos", "intentan", "intentar", "intentas", "intento", "largo", "mientras", "modo", "mucho", "muchos", "m�as", "m�os", "nada", "nosotras", "nosotros", "nuestra", "nuestras", "nuestro", "nuestros", "otra", "otras", "otro", "otros", "para", "pero", "poco", "podeis", "podemos", "poder", "podria", "podriais", "podriamos", "podrian", "podrias", "porque", "primero desde", "puede", "pueden", "puedo", "quien", "quienes", "sabe", "sabeis", "sabemos", "saben", "saber", "sabes", "seamos", "sean", "seas", "sentid", "sentida", "sentidas", "sentido", "sentidos", "seremos", "ser�", "ser�n", "ser�s", "ser�", "ser�is", "ser�a", "ser�ais", "ser�amos", "ser�an", "ser�as", "se�is", "siendo", "siente", "sintiendo", "sobre", "sois", "solamente", "solo", "somos", "suya", "suyas", "suyo", "suyos", "tambi�n", "tanto", "tendremos", "tendr�", "tendr�n", "tendr�s", "tendr�", "tendr�is", "tendr�a", "tendr�ais", "tendr�amos", "tendr�an", "tendr�as", "tened", "teneis", "tenemos", "tener", "tenga", "tengamos", "tengan", "tengas", "tengo", "teng�is", "tenida", "tenidas", "tenido", "tenidos", "teniendo", "ten�is", "ten�a", "ten�ais", "ten�amos", "ten�an", "ten�as", "tiempo", "tiene", "tienen", "tienes", "todo", "todos", "trabaja", "trabajais", "trabajamos", "trabajan", "trabajar", "trabajas", "trabajo", "tras", "tuve", "tuviera", "tuvierais", "tuvieran", "tuvieras", "tuvieron", "tuviese", "tuvieseis", "tuviesen", "tuvieses", "tuvimos", "tuviste", "tuvisteis", "tuvi�ramos", "tuvi�semos", "tuvo", "tuya", "tuyas", "tuyo", "tuyos", "ultimo", "unas", "unos", "usais", "usamos", "usan", "usar", "usas", "vais", "valor", "vamos", "vaya", "verdad", "verdadera cierto", "verdadero", "vosostras", "vosostros", "vosotras", "vosotros", "vuestra", "vuestras", "vuestro", "vuestros", "�ramos");
4	+ ?>

languages/fr/stemmer.php ADDED Viewed

	@@ -0,0 +1,513 @@


1	+ <?php
2	+
3	+ /*
4	+ *
5	+ * implements a Paice/Husk Stemmer written in PHP by Alexis Ulrich (http://alx2002.free.fr)
6	+ *
7	+ * This code is in the public domain.
8	+ *
9	+ */
10	+
11	+
12	+ // the rule patterns include all accented forms for a given language
13	+ $rule_pattern = "/^([a-z��])(\){0,1}(\d)([a-z��]*)([.\|>])/";
14	+
15	+ $PaiceHuskStemmerRules_fr = array(
16	+ 'esre1>', # { -erse > -ers }
17	+ 'esio1>', # { -oise > -ois }
18	+ 'siol1.', # { -lois > -loi }
19	+ 'siof0.', # { -fois > -fois }
20	+ 'sioe0.', # { -eois > -eois }
21	+ 'sio3>', # { -ois > - }
22	+ 'st1>', # { -ts > -t }
23	+ 'sf1>', # { -fs > -f }
24	+ 'sle1>', # { -els > -el }
25	+ 'slo1>', # { -ols > -ol }
26	+ 's�1>', # { -�s > -� }
27	+ '�tuae5.', # { -eaut� > - }
28	+ '�tuae2.', # { -eaut� > -eau }
29	+ 'tnia0.', # { -aint > -aint }
30	+ 'tniv1.', # { -vint > -vin }
31	+ 'tni3>', # { -int > - }
32	+ 'suor1.', # { -rous > -ou }
33	+ 'suo0.', # { -ous > -ous }
34	+ 'sdrail5.', # { -liards > -l }
35	+ 'sdrai4.', # { -iards > -i }
36	+ 'er�i1>', # { -i�re > -ier }
37	+ 'sesue3x>', # { -euses > -euse }
38	+ 'esuey5i.', # { -yeuse > -i }
39	+ 'esue2x>', # { -euse > -eux }
40	+ 'se1>', # { -es > -e }
41	+ 'er�g3.', # { -g�re > -g }
42	+ 'eca1>', # { -ace > -ac }
43	+ 'esiah0.', # { -haise > - }
44	+ 'esi1>', # { -ise > -is }
45	+ 'siss2.', # { -ssis > -ss }
46	+ 'sir2>', # { -ris > -r }
47	+ 'sit2>', # { -tis > -t }
48	+ 'egan�1.', # { -�nage > -�nag }
49	+ 'egalli6>', # { -illage > - }
50	+ 'egass1.', # { -ssage > -sag }
51	+ 'egas0.', # { -sage > - }
52	+ 'egat3.', # { -tage > - }
53	+ 'ega3>', # { -age > - }
54	+ 'ette4>', # { -ette > - }
55	+ 'ett2>', # { -tte > -t }
56	+ 'etio1.', # { -oite > -oit }
57	+ 'tio�4c.', # { -�oit > -c }
58	+ 'tio0.', # { -oit > -oit }
59	+ 'et1>', # { -te > -t }
60	+ 'eb1>', # { -be > -b }
61	+ 'snia1>', # { -ains > -ain }
62	+ 'eniatnau8>', # { -uantaine > - }
63	+ 'eniatn4.', # { -ntaine > -nt }
64	+ 'enia1>', # { -aine > -ain }
65	+ 'niatnio3.', # { -ointain > -oint }
66	+ 'niatg3.', # { -gtain > -gt }
67	+ 'e�1>', # { -�e > -� }
68	+ '�hcat1.', # { -tach� > -tach }
69	+ '�hca4.', # { -ach� > - }
70	+ '�tila5>', # { -alit� > - }
71	+ '�tici5.', # { -icit� > - }
72	+ '�tir1.', # { -rit� > -rit }
73	+ '�ti3>', # { -it� > - }
74	+ '�gan1.', # { -nag� > -nag }
75	+ '�ga3>', # { -ag� > - }
76	+ '�tehc1.', # { -chet� > -chet }
77	+ '�te3>', # { -et� > - }
78	+ '�it0.', # { -ti� > -ti� }
79	+ '�1>', # { -� > - }
80	+ 'eire4.', # { -erie > - }
81	+ 'eirue5.', # { -eurie > - }
82	+ 'eio1.', # { -oie > -oi }
83	+ 'eia1.', # { -aie > -ai }
84	+ 'ei1>', # { -ie > -i }
85	+ 'eng1.', # { -gne > -gn }
86	+ 'xuaessi7.', # { -isseaux > - }
87	+ 'xuae1>', # { -eaux > -eau }
88	+ 'uaes0.', # { -seau > -seau }
89	+ 'uae3.', # { -eau > - }
90	+ 'xuave2l.', # { -evaux > -eval }
91	+ 'xuav2li>', # { -vaux > -vail }
92	+ 'xua3la>', # { -aux > -al }
93	+ 'ela1>', # { -ale > -al }
94	+ 'lart2.', # { -tral > -tr }
95	+ 'lani2>', # { -inal > -in }
96	+ 'la�2>', # { -�al > -� }
97	+ 'siay4i.', # { -yais > -i }
98	+ 'siassia7.', # { -aissais > - }
99	+ 'siarv1*.', # { -vrais > -vrai if intact }
100	+ 'sia1>', # { -ais > -ai }
101	+ 'tneiayo6i.', # { -oyaient > -oi }
102	+ 'tneiay6i.', # { -yaient > -i }
103	+ 'tneiassia9.', # { -aissaient > - }
104	+ 'tneiareio7.', # { -oieraient > -oi }
105	+ 'tneia5>', # { -aient > - }
106	+ 'tneia4>', # { -aient > -a }
107	+ 'tiario4.', # { -oirait > -oi }
108	+ 'tiarim3.', # { -mirait > -mir }
109	+ 'tiaria3.', # { -airait > -air }
110	+ 'tiaris3.', # { -sirait > -sir }
111	+ 'tiari5.', # { -irait > - }
112	+ 'tiarve6>', # { -evrait > - }
113	+ 'tiare5>', # { -erait > - }
114	+ 'iare4>', # { -erai > - }
115	+ 'are3>', # { -era > - }
116	+ 'tiay4i.', # { -yait > -i }
117	+ 'tia3>', # { -ait > - }
118	+ 'tnay4i.', # { -yant > -i }
119	+ 'em�iu5>', # { -ui�me > - }
120	+ 'em�i4>', # { -i�me > - }
121	+ 'tnaun3.', # { -nuant > -nu }
122	+ 'tnauqo3.', # { -oquant > -oqu }
123	+ 'tnau4>', # { -uant > - }
124	+ 'tnaf0.', # { -fant > -fant }
125	+ 'tnat�2>', # { -�tant > -�t }
126	+ 'tna3>', # { -ant > - }
127	+ 'tno3>', # { -ont > - }
128	+ 'zeiy4i.', # { -yiez > -i }
129	+ 'zey3i.', # { -yez > -i }
130	+ 'zeire5>', # { -eriez > - }
131	+ 'zeird4.', # { -driez > -d }
132	+ 'zeirio4.', # { -oiriez > -oi }
133	+ 'ze2>', # { -ez > - }
134	+ 'ssiab0.', # { -baiss > - }
135	+ 'ssia4.', # { -aiss > - }
136	+ 'ssi3.', # { -iss > - }
137	+ 'tnemma6>', # { -amment > - }
138	+ 'tnemesuey9i.', # { -yeusement > -i }
139	+ 'tnemesue8>', # { -eusement > - }
140	+ 'tnemevi7.', # { -ivement > - }
141	+ 'tnemessia5.', # { -aissement > -aiss }
142	+ 'tnemessi8.', # { -issement > - }
143	+ 'tneme5>', # { -ement > - }
144	+ 'tnemia4.', # { -aiment > -ai }
145	+ 'tnem�5>', # { -�ment > - }
146	+ 'el2l>', # { -le > -l }
147	+ 'lle3le>', # { -ell > -el }
148	+ 'let�0.', # { -�tel > -�tel }
149	+ 'lepp0.', # { -ppel > -ppel }
150	+ 'le2>', # { -el > - }
151	+ 'srei1>', # { -iers > -ier }
152	+ 'reit3.', # { -tier > -t }
153	+ 'reila2.', # { -alier > -ali }
154	+ 'rei3>', # { -ier > - }
155	+ 'ert�e5.', # { -e�tre > - }
156	+ 'ert��1.', # { -��tre > -��tr }
157	+ 'ert�4.', # { -�tre > - }
158	+ 'drai4.', # { -iard > - }
159	+ 'erdro0.', # { -ordre > -ordre }
160	+ 'erute5.', # { -eture > - }
161	+ 'ruta0.', # { -atur > -atur }
162	+ 'eruta1.', # { -ature > -atur }
163	+ 'erutiov1.', # { -voiture > -voitur }
164	+ 'erub3.', # { -bure > -b }
165	+ 'eruh3.', # { -hure > -h }
166	+ 'erul3.', # { -lure > -l }
167	+ 'er2r>', # { -re > -r }
168	+ 'nn1>', # { -nn > -n }
169	+ 'r�i3.', # { -i�r > - }
170	+ 'srev0.', # { -vers > -vers }
171	+ 'sr1>', # { -rs > -r }
172	+ 'rid2>', # { -dir > -d }
173	+ 're2>', # { -er > - }
174	+ 'xuei4.', # { -ieux > - }
175	+ 'esuei5.', # { -ieuse > - }
176	+ 'lbati3.', # { -itabl > -it }
177	+ 'lba3>', # { -abl > - }
178	+ 'rueis0.', # { -sieur > - }
179	+ 'ruehcn4.', # { -ncheur > -nc }
180	+ 'ecirta6.', # { -atrice > - }
181	+ 'ruetai6.', # { -iateur > - }
182	+ 'rueta5.', # { -ateur > - }
183	+ 'rueir0.', # { -rieur > - }
184	+ 'rue3>', # { -eur > - }
185	+ 'esseti6.', # { -itesse > - }
186	+ 'essere6>', # { -eresse > - }
187	+ 'esserd1.', # { -dresse > -dress }
188	+ 'esse4>', # { -esse > - }
189	+ 'essiab1.', # { -baisse > -baiss }
190	+ 'essia5.', # { -aisse > - }
191	+ 'essio1.', # { -oisse > -oiss }
192	+ 'essi4.', # { -isse > - }
193	+ 'essal4.', # { -lasse > -l }
194	+ 'essa1>', # { -asse > -ass }
195	+ 'ssab1.', # { -bass > -bas }
196	+ 'essurp1.', # { -prusse > -uss }
197	+ 'essu4.', # { -usse > - }
198	+ 'essi1.', # { -isse > -ss }
199	+ 'ssor1.', # { -ross > -ros }
200	+ 'essor2.', # { -rosse > -ros }
201	+ 'esso1>', # { -osse > -oss }
202	+ 'ess2>', # { -sse > -s }
203	+ 'tio3.', # { -oit > - }
204	+ 'r�s2re.', # { -s�r > -ser }
205	+ 'r�0e.', # { -�r > -�re }
206	+ 'esn1.', # { -nse > -�ns }
207	+ 'eu1>', # { -ue > -u }
208	+ 'sua0.', # { -aus > -aus }
209	+ 'su1>', # { -us > -u }
210	+ 'utt1>', # { -utt > -tt }
211	+ 'tu�3c.', # { -�ut > -c }
212	+ 'u�2c.', # { -�u > -c }
213	+ 'ur1.', # { -ru > -r }
214	+ 'ehcn2>', # { -nche > -nc }
215	+ 'ehcu1>', # { -uche > -uch }
216	+ 'snorr3.', # { -rrons > -rr }
217	+ 'snoru3.', # { -urons > -ur }
218	+ 'snorua3.', # { -aurons > -aur }
219	+ 'snorv3.', # { -vrons > -vr }
220	+ 'snorio4.', # { -oirons > -oi }
221	+ 'snori5.', # { -irons > - }
222	+ 'snore5>', # { -erons > - }
223	+ 'snortt4>', # { -ttrons > -tt }
224	+ 'snort�a7.', # { -a�trons > - }
225	+ 'snort3.', # { -trons > -tr }
226	+ 'snor4.', # { -rons > - }
227	+ 'snossi6.', # { -issons > - }
228	+ 'snoire6.', # { -erions > - }
229	+ 'snoird5.', # { -drions > -d }
230	+ 'snoitai7.', # { -iations > - }
231	+ 'snoita6.', # { -ations > - }
232	+ 'snoits1>', # { -stions > -stion }
233	+ 'noits0.', # { -stion > -stion }
234	+ 'snoi4>', # { -ions > - }
235	+ 'noitaci7>', # { -ication > - }
236	+ 'noitai6.', # { -iation > - }
237	+ 'noita5.', # { -ation > - }
238	+ 'noitu4.', # { -ution > -u }
239	+ 'noi3>', # { -ion > - }
240	+ 'snoya0.', # { -ayons > -ayons }
241	+ 'snoy4i.', # { -yons > -i }
242	+ 'sno�a1.', # { -a�ons > -a�on }
243	+ 'sno�r1.', # { -r�ons > -r�on }
244	+ 'snoe4.', # { -eons > - }
245	+ 'snosiar1>', # { -raisons > - }
246	+ 'snola1.', # { -alons > -alon }
247	+ 'sno3>', # { -ons > - }
248	+ 'sno1>', # { -ons > -on }
249	+ 'noll2.', # { -llon > -ll }
250	+ 'tnennei4.', # { -iennent > -ien }
251	+ 'ennei2>', # { -ienne > -ien }
252	+ 'snei1>', # { -iens > -ien }
253	+ 'sne�1>', # { -�ens > -�en }
254	+ 'enne�5e.', # { -�enne > -e }
255	+ 'ne�3e.', # { -�en > -e }
256	+ 'neic0.', # { -cien > -cien }
257	+ 'neiv0.', # { -vien > -vien }
258	+ 'nei3.', # { -ien > - }
259	+ 'sc1.', # { -cs > -c }
260	+ 'sd1.', # { -ds > -d }
261	+ 'sg1.', # { -gs > -g }
262	+ 'sni1.', # { -ins > -in }
263	+ 'tiu0.', # { -uit > - }
264	+ 'ti2.', # { -it > - }
265	+ 'sp1>', # { -ps > -p }
266	+ 'sna1>', # { -ans > -an }
267	+ 'sue1.', # { -eus > -eu }
268	+ 'enn2>', # { -nne > -n }
269	+ 'nong2.', # { -gnon > -gn }
270	+ 'noss2.', # { -sson > -ss }
271	+ 'rioe4.', # { -eoir > - }
272	+ 'riot0.', # { -toir > -toir }
273	+ 'riorc1.', # { -croir > -croi }
274	+ 'riovec5.', # { -cevoir > -c }
275	+ 'rio3.', # { -oir > - }
276	+ 'ric2.', # { -cir > -l }
277	+ 'ril2.', # { -lir > -l }
278	+ 'tnerim3.', # { -mirent > -mir }
279	+ 'tneris3>', # { -sirent > -sir }
280	+ 'tneri5.', # { -irent > - }
281	+ 't�a3.', # { -a�t > - }
282	+ 'riss2.', # { -ssir > -ss }
283	+ 't�2.', # { -�t > - }
284	+ 't�2>', # { -�t > - }
285	+ 'ario2.', # { -oira > -oi }
286	+ 'arim1.', # { -mira > -m }
287	+ 'ara1.', # { -ara > -ar }
288	+ 'aris1.', # { -sira > -sir }
289	+ 'ari3.', # { -ira > - }
290	+ 'art1>', # { -tra > -tr }
291	+ 'ardn2.', # { -ndra > -nd }
292	+ 'arr1.', # { -rra > -rr }
293	+ 'arua1.', # { -aura > -aur }
294	+ 'aro1.', # { -ora > -or }
295	+ 'arv1.', # { -vra > -vr }
296	+ 'aru1.', # { -ura > -ur }
297	+ 'ar2.', # { -ra > - }
298	+ 'rd1.', # { -dr > -d }
299	+ 'ud1.', # { -du > - }
300	+ 'ul1.', # { -lu > -l }
301	+ 'ini1.', # { -ini > -in }
302	+ 'rin2.', # { -nir > - }
303	+ 'tnessiab3.', # { -baissent > -baiss }
304	+ 'tnessia7.', # { -aissent > - }
305	+ 'tnessi6.', # { -issent > - }
306	+ 'tnessni4.', # { -inssent > -ins }
307	+ 'sini2.', # { -inis > -in }
308	+ 'sl1.', # { -ls > -l }
309	+ 'iard3.', # { -drai > -d }
310	+ 'iario3.', # { -oirai > -oi }
311	+ 'ia2>', # { -ai > - }
312	+ 'io0.', # { -oi > -oi }
313	+ 'iule2.', # { -elui > -el }
314	+ 'i1>', # { -i > - }
315	+ 'sid2.', # { -dis > -d }
316	+ 'sic2.', # { -cis > -c }
317	+ 'esoi4.', # { -iose > - }
318	+ 'ed1.', # { -de > -d }
319	+ 'ai2>', # { -ia > - }
320	+ 'a1>', # { -a > - }
321	+ 'adr1.', # { -rda > -rd }
322	+ 'tner�5>', # { -�rent > - }
323	+ 'evir1.', # { -rive > -riv }
324	+ 'evio4>', # { -oive > - }
325	+ 'evi3.', # { -ive > - }
326	+ 'fita4.', # { -atif > - }
327	+ 'fi2>', # { -if > - }
328	+ 'enie1.', # { -eine > -ein }
329	+ 'sare4>', # { -eras > - }
330	+ 'sari4>', # { -iras > - }
331	+ 'sard3.', # { -dras > -d }
332	+ 'sart2>', # { -tras > -tr }
333	+ 'sa2.', # { -as > - }
334	+ 'tnessa6>', # { -assent > - }
335	+ 'tnessu6>', # { -ussent > - }
336	+ 'tnegna3.', # { -angent > -ang }
337	+ 'tnegi3.', # { -igent > -ig }
338	+ 'tneg0.', # { -gent > -gent }
339	+ 'tneru5>', # { -urent > - }
340	+ 'tnemg0.', # { -gment > -gment }
341	+ 'tnerni4.', # { -inrent > -in }
342	+ 'tneiv1.', # { -vient > -vien }
343	+ 'tne3>', # { -ent > - }
344	+ 'une1.', # { -enu > -en }
345	+ 'en1>', # { -ne > -n }
346	+ 'nitn2.', # { -ntin > - }
347	+ 'ecnay5i.', # { -yance > -i }
348	+ 'ecnal1.', # { -lance > -lanc }
349	+ 'ecna4.', # { -ance > - }
350	+ 'ec1>', # { -ce > -c }
351	+ 'nn1.', # { -nn > -n }
352	+ 'rit2>', # { -tir > - }
353	+ 'rut2>', # { -tur > -t }
354	+ 'rud2.', # { -dur > -d }
355	+ 'ugn1>', # { -ngu > -ng }
356	+ 'eg1>', # { -ge > -g }
357	+ 'tuo0.', # { -out > -out }
358	+ 'tul2>', # { -lut > -l }
359	+ 't�2>', # { -�t > - }
360	+ 'ev1>', # { -ve > -v }
361	+ 'v�2ve>', # { -�v > -ev }
362	+ 'rtt1>', # { -ttr > -tt }
363	+ 'emissi6.', # { -issime > - }
364	+ 'em1.', # { -me > -m }
365	+ 'ehc1.', # { -che > -ch }
366	+ 'c�i2c�.', # { -i�c > -i�c }
367	+ 'libi2l.', # { -ibil > -ibl }
368	+ 'llie1.', # { -eill > -eil }
369	+ 'liei4i.', # { -ieil > -i }
370	+ 'xuev1.', # { -veux > -veu }
371	+ 'xuey4i.', # { -yeux > -i }
372	+ 'xueni5>', # { -ineux > - }
373	+ 'xuell4.', # { -lleux > -l }
374	+ 'xuere5.', # { -ereux > - }
375	+ 'xue3>', # { -eux > - }
376	+ 'rb�3rb�.', # { -�br > -�br }
377	+ 'tur2.', # { -rut > -r }
378	+ 'rir�4re.', # { -�rir > -er }
379	+ 'rir2.', # { -rir > -r }
380	+ 'c�2ca.', # { -�c > -ac }
381	+ 'snu1.', # { -uns > -un }
382	+ 'rt�a4.', # { -a�tr > - }
383	+ 'long2.', # { -gnol > -gn }
384	+ 'vec2.', # { -cev > -c }
385	+ '�1c>', # { -� > -c }
386	+ 'ssilp3.', # { -pliss > -pl }
387	+ 'silp2.', # { -plis > -pl }
388	+ 't�hc2te.', # { -ch�t > -chet }
389	+ 'n�m2ne.', # { -m�n > -men }
390	+ 'llepp1.', # { -ppell > -ppel }
391	+ 'tan2.', # { -nat > -n }
392	+ 'rv�3rve.', # { -�vr > -evr }
393	+ 'rv�3rve.', # { -�vr > -evr }
394	+ 'r�2re.', # { -�r > -er }
395	+ 'r�2re.', # { -�r > -er }
396	+ 't�2te.', # { -�t > -et }
397	+ 't�2te.', # { -�t > -et }
398	+ 'epp1.', # { -ppe > -pp }
399	+ 'eya2i.', # { -aye > -ai }
400	+ 'ya1i.', # { -ay > -ai }
401	+ 'yo1i.', # { -oy > -oi }
402	+ 'esu1.', # { -use > -us }
403	+ 'ugi1.', # { -igu > -g }
404	+ 'tt1.', # { -tt > -t }
405	+
406	+ # end rule: the stem has already been found
407	+ 'end0.'
408	+ );
409	+
410	+ // returns the number of the first rule from the rule number $rule_number
411	+ // that can be applied to the given reversed form
412	+ // returns -1 if no rule can be applied, ie the stem has been found
413	+ function getFirstRule($reversed_form, $rule_number) {
414	+ global $PaiceHuskStemmerRules_fr;
415	+ global $rule_pattern;
416	+ $nb_rules = sizeOf($PaiceHuskStemmerRules_fr);
417	+ for ($i=$rule_number; $i<$nb_rules; $i++) {
418	+ // gets the letters from the current rule
419	+ $rule = $PaiceHuskStemmerRules_fr[$i];
420	+ $rule = preg_replace($rule_pattern, "\\1", $rule);
421	+ //if (strncasecmp(utf8_decode($rule),$reversed_form,strlen(utf8_decode($rule))) == 0) return $i;
422	+ if (strncasecmp($rule, $reversed_form, strlen($rule)) == 0) return $i;
423	+ }
424	+ return -1;
425	+ }
426	+
427	+
428	+ /*
429	+ * Check the acceptability of a stem
430	+ *
431	+ * $reversed_stem: the stem to check in reverse form
432	+ */
433	+ function checkAcceptability($reversed_stem) {
434	+ //if (preg_match("/[a��e��i��o�u��y]$/",utf8_encode($reversed_stem))) {
435	+ if (preg_match("/[a��e��i��o�u��y]$/",$reversed_stem)) {
436	+ // if the form starts with a vowel then at least two letters must remain after stemming (e.g.: "�taient" --> "�t")
437	+ return (strlen($reversed_stem) > 2);
438	+ }
439	+ else {
440	+ // if the form starts with a consonant then at least two letters must remain after stemming
441	+ if (strlen($reversed_stem) <= 2) {
442	+ return False;
443	+ }
444	+ // and at least one of these must be a vowel or "y"
445	+ //return (preg_match("/[a��e��i��o�u��y]/",utf8_encode($reversed_stem)));
446	+ return (preg_match("/[a��e��i��o�u��y]/", $reversed_stem));
447	+ }
448	+ }
449	+
450	+
451	+ /*
452	+ * the actual Paice/Husk stemmer
453	+ * which returns a stem for the given form
454	+ *
455	+ * $form: the word for which we want the stem
456	+ */
457	+ function PaiceHuskStemmer($form) {
458	+ global $PaiceHuskStemmerRules_fr;
459	+ global $rule_pattern;
460	+ $intact = True;
461	+ $stem_found = False;
462	+ $reversed_form = strrev(utf8_decode($form));
463	+ $rule_number = 0;
464	+ // that loop goes through the rules' array until it finds an ending one (ending by '.') or the last one ('end0.')
465	+ while (True) {
466	+ $rule_number = getFirstRule($reversed_form, $rule_number);
467	+ if ($rule_number == -1) {
468	+ // no other rule can be applied => the stem has been found
469	+ break;
470	+ }
471	+ $rule = $PaiceHuskStemmerRules_fr[$rule_number];
472	+ preg_match($rule_pattern, $rule, $matches);
473	+ if (($matches[2] != '*') \|\| ($intact)) {
474	+ $reversed_stem = utf8_decode($matches[4]) . substr($reversed_form,$matches[3],strlen($reversed_form)-$matches[3]);
475	+ if (checkAcceptability($reversed_stem)) {
476	+ $reversed_form = $reversed_stem;
477	+ if ($matches[5] == '.') break;
478	+ }
479	+ else {
480	+ // go to another rule
481	+ $rule_number++;
482	+ }
483	+ }
484	+ else {
485	+ // go to another rule
486	+ $rule_number++;
487	+ }
488	+ }
489	+
490	+ return utf8_encode(strrev($reversed_form));
491	+
492	+ }
493	+
494	+ /*
495	+ Stem caching added by Rob Marsh, SJ
496	+ http://rmarsh.com
497	+ */
498	+
499	+ $StemCache = array();
500	+
501	+ function stem($word) {
502	+ global $StemCache;
503	+ if (!isset($StemCache[$word])) {
504	+ $stemmedword = PaiceHuskStemmer($word);
505	+ $StemCache[$word] = $stemmedword;
506	+ }
507	+ else {
508	+ $stemmedword = $StemCache[$word] ;
509	+ }
510	+ return $stemmedword;
511	+ }
512	+
513	+ ?>

languages/fr/stopwords.php ADDED Viewed

	@@ -0,0 +1,4 @@


1	+ <?php
2	+ // the list of common words we want to ignore. NB anything shorter than 4 characters is knocked by the plugin and doesn't need to figure here
3	+ $overusedwords = array("afin", "aient", "aies", "ailleurs", "ainsi", "alentour", "alias", "allaient", "allais", "allait", "allez", "allons", "alors", "apr�s", "apr�s-demain", "arri�re", "assez", "attendu", "au-dedans", "au-dehors", "au-del�", "au-dessous", "au-dessus", "au-devant", "aucun", "aucune", "audit", "aujourd'", "aujourd'hui", "auparavant", "aupr�s", "auquel", "aura", "aurai", "auraient", "aurais", "aurait", "auras", "aurez", "auriez", "aurions", "aurons", "auront", "aussi", "aussit�t", "autant", "autour", "autre", "autrefois", "autres", "autrui", "auxdites", "auxdits", "auxquelles", "auxquels", "avaient", "avais", "avait", "avant", "avant-hier", "avec", "avez", "aviez", "avions", "avoir", "avons", "ayant", "ayante", "ayantes", "ayants", "ayez", "ayons", "banco", "beaucoup", "bien", "bient�t", "c'est-�-dire", "c.-�-d.", "cahin-caha", "ceci", "cela", "celle", "celle-ci", "celle-l�", "celles", "celles-ci", "celles-l�", "celui", "celui-ci", "celui-l�", "cent", "cents", "cependant", "certain", "certaine", "certaines", "certains", "certes", "cette", "ceux", "ceux-ci", "ceux-l�", "chacun", "chacune", "chaque", "cher", "chez", "chose", "ci-apr�s", "ci-dessous", "ci-dessus", "cinq", "cinquante", "cinquante-cinq", "cinquante-deux", "cinquante-et-un", "cinquante-huit", "cinquante-neuf", "cinquante-quatre", "cinquante-sept", "cinquante-six", "cinquante-trois", "combien", "comme", "comment", "contrario", "contre", "cours", "crescendo", "c�ans", "d'abord", "d'accord", "d'affil�e", "d'ailleurs", "d'apr�s", "d'arrache-pied", "d'embl�e", "d'un", "d'une", "dans", "davantage", "debout", "dedans", "dehors", "del�", "demain", "depuis", "derechef", "derri�re", "desdites", "desdits", "desquelles", "desquels", "dessous", "dessus", "deux", "devant", "devers", "de��", "diff�rentes", "diff�rents", "dire", "disent", "dito", "divers", "diverses", "dix-huit", "dix-neuf", "dix-sept", "donc", "dont", "dor�navant", "douze", "dudit", "duquel", "durant", "d�j�", "d�pit", "d�sormais", "elle", "elles", "en-dehors", "encore", "enfin", "ensemble", "ensuite", "entre", "entre-temps", "envers", "environ", "et/ou", "eues", "eurent", "eusse", "eussent", "eusses", "eussiez", "eussions", "expr�s", "extenso", "extremis", "e�mes", "e�tes", "facto", "faire", "fais", "faisaient", "faisais", "faisait", "faisons", "fait", "faites", "fallait", "faudrait", "faut", "faveur", "flac", "fors", "fort", "forte", "fortiori", "frais", "furent", "fusse", "fussent", "fusses", "fussiez", "fussions", "f�mes", "f�tes", "grand-chose", "grosso", "gr�ce", "gu�re", "haut", "hein", "hier", "hol�", "hormis", "hors", "huit", "ibidem", "ici-bas", "idem", "illico", "ipso", "item", "jadis", "jamais", "jusqu'", "jusqu'au", "jusqu'aux", "jusqu'�", "jusque", "juste", "l'autre", "l'encontre", "l'instar", "l'insu", "l'issue", "l'occasion", "l'on", "l'un", "l'une", "l'�gard", "ladite", "laquelle", "lequel", "lesquelles", "lesquels", "leur", "leurs", "loin", "longtemps", "lors", "lorsqu'", "lorsque", "l�-bas", "l�-dedans", "l�-dehors", "l�-derri�re", "l�-dessous", "l�-dessus", "l�-devant", "l�-haut", "maint", "mainte", "maintenant", "maintes", "maints", "mais", "malgr�", "marge", "mati�re", "mien", "mienne", "miennes", "miens", "mieux", "mille", "milliards", "millions", "minima", "modo", "moins", "moult", "moyennant", "m�me", "m�mes", "nagu�re", "neuf", "nonante", "nonobstant", "notre", "nous", "nulle", "n�anmoins", "n�tre", "n�tres", "octante", "onze", "ouais", "outre", "par-ci", "par-del�", "par-derri�re", "par-dessous", "par-dessus", "par-devant", "par-l�", "parbleu", "parce", "parfois", "parmi", "part", "partir", "partout", "passim", "pass�", "pendant", "personne", "petto", "peur", "peut", "peut-�tre", "peuvent", "peux", "plus", "plusieurs", "plut�t", "point", "posteriori", "pour", "pourquoi", "pourtant", "pourvu", "presqu'", "presque", "primo", "priori", "prou", "pr�s", "pr�alable", "puis", "puisqu'", "puisque", "quand", "quant", "quarante", "quarante-cinq", "quarante-deux", "quarante-et-un", "quarante-huit", "quarante-neuf", "quarante-quatre", "quarante-sept", "quarante-six", "quarante-trois", "quasi", "quatorze", "quatre", "quatre-vingt", "quatre-vingt-cinq", "quatre-vingt-deux", "quatre-vingt-dix", "quatre-vingt-dix-huit", "quatre-vingt-dix-neuf", "quatre-vingt-dix-sept", "quatre-vingt-douze", "quatre-vingt-huit", "quatre-vingt-neuf", "quatre-vingt-onze", "quatre-vingt-quatorze", "quatre-vingt-quatre", "quatre-vingt-quinze", "quatre-vingt-seize", "quatre-vingt-sept", "quatre-vingt-six", "quatre-vingt-treize", "quatre-vingt-trois", "quatre-vingt-un", "quatre-vingt-une", "quatre-vingts", "quel", "quelle", "quelles", "quelqu'", "quelqu'un", "quelqu'une", "quelque", "quelquefois", "quelques", "quelques-unes", "quelques-uns", "quels", "quiconque", "quinze", "quoi", "quoiqu'", "quoique", "raison", "rapport", "regard", "revoici", "revoil�", "rien", "sans", "sauf", "secundo", "sein", "seize", "selon", "sensu", "sept", "septante", "sera", "serai", "seraient", "serais", "serait", "seras", "serez", "seriez", "serions", "serons", "seront", "sien", "sienne", "siennes", "siens", "sine", "sinon", "situ", "sit�t", "soient", "sois", "soit", "soixante", "soixante-cinq", "soixante-deux", "soixante-dix", "soixante-dix-huit", "soixante-dix-neuf", "soixante-dix-sept", "soixante-douze", "soixante-et-onze", "soixante-et-un", "soixante-et-une", "soixante-huit", "soixante-neuf", "soixante-quatorze", "soixante-quatre", "soixante-quinze", "soixante-seize", "soixante-sept", "soixante-six", "soixante-treize", "soixante-trois", "sommes", "sont", "soudain", "sous", "souvent", "soyez", "soyons", "stricto", "suis", "suite", "sujet", "sur-le-champ", "surtout", "tacatac", "tandis", "tant", "tant�t", "tard", "telle", "telles", "tels", "tien", "tienne", "tiennes", "tiens", "toujours", "tous", "tout", "toute", "toutefois", "toutes", "travers", "treize", "trente", "trente-cinq", "trente-deux", "trente-et-un", "trente-huit", "trente-neuf", "trente-quatre", "trente-sept", "trente-six", "trente-trois", "trois", "trop", "tr�s", "unes", "vais", "vers", "vertu", "veut", "veux", "vice-versa", "vingt", "vingt-cinq", "vingt-deux", "vingt-huit", "vingt-neuf", "vingt-quatre", "vingt-sept", "vingt-six", "vingt-trois", "vis-�-vis", "vite", "vitro", "vivo", "voici", "voil�", "voire", "volontiers", "votre", "vous", "v�tre", "v�tres", "z�ro", "�gard", "�taient", "�tais", "�tait", "�tant", "�tante", "�tantes", "�tants", "�tiez", "�tions", "�t�e", "�t�es", "�t�s", "�tes", "�tre");
4	+ ?>

languages/it/stemmer.php ADDED Viewed

	@@ -0,0 +1,341 @@


1	+ <?php
2	+
3	+ /*
4	+
5	+ Con=verted to PHP 4 by Rob Marsh, SJ
6	+
7	+ */
8	+
9	+
10	+ /*
11	+ *
12	+ * This script as been written by Roberto Mirizzi (rob4you at vodafone dot it) in February 2007.
13	+ *
14	+ * It is the PHP5 implementation of Martin Porter's stemming algorithm for Italian language.
15	+ *
16	+ * This algorithm can be found at address: http://snowball.tartarus.org/algorithms/italian/stemmer.html.
17	+ *
18	+ * Use the code freely. I'm not responsible for any problems.
19	+ *
20	+ * Usage:
21	+ *
22	+ * $stemmer = new ItalianStemmer();
23	+ * $stemmed_word = $stemmer->stem($word);
24	+ *
25	+ * All Italian characters are (originally) in latin1 (ISO-8859-1).
26	+ *
27	+ */
28	+ class ItalianStemmer {
29	+ var $vocali = array('a','e','i','o','u','�','�','�','�','�');
30	+ var $consonanti = array('b','c','d','f','g','h','j','k','l','m','n','p','q','r','s','t','v','w','x','y','z','I','U');
31	+ var $accenti_acuti = array('�','�','�','�','�');
32	+ var $accenti_gravi = array('�','�','�','�','�');
33	+
34	+ var $suffissi_step_0 = array('ci','gli','la','le','li','lo','mi','ne','si','ti','vi','sene','gliela','gliele','glieli','glielo','gliene','mela','mele','meli','melo','mene','tela','tele','teli','telo','tene','cela','cele','celi','celo','cene','vela','vele','veli','velo','vene');
35	+
36	+ var $suffissi_step_1_a = array('anza','anze','ico','ici','ica','ice','iche','ichi','ismo','ismi','abile','abili','ibile','ibili','ista','iste','isti','ist�','ist�','ist�','oso','osi','osa','ose','mente','atrice','atrici','ante','anti');
37	+ var $suffissi_step_1_b = array('azione','azioni','atore','atori');
38	+ var $suffissi_step_1_c = array('logia','logie');
39	+ var $suffissi_step_1_d = array('uzione','uzioni','usione','usioni');
40	+ var $suffissi_step_1_e = array('enza','enze');
41	+ var $suffissi_step_1_f = array('amento','amenti','imento','imenti');
42	+ var $suffissi_step_1_g = array('amente');
43	+ var $suffissi_step_1_h = array('it�');
44	+ var $suffissi_step_1_i = array('ivo','ivi','iva','ive');
45	+
46	+ var $suffissi_step_2 = array('ammo','ando','ano','are','arono','asse','assero','assi','assimo','ata','ate','ati','ato','ava','avamo','avano','avate','avi','avo','emmo','enda','ende','endi','endo','er�','erai','eranno','ere','erebbe','erebbero','erei','eremmo','eremo','ereste','eresti','erete','er�','erono','essero','ete','eva','evamo','evano','evate','evi','evo','Yamo','iamo','immo','ir�','irai','iranno','ire','irebbe','irebbero','irei','iremmo','iremo','ireste','iresti','irete','ir�','irono','isca','iscano','isce','isci','isco','iscono','issero','ita','ite','iti','ito','iva','ivamo','ivano','ivate','ivi','ivo','ono','uta','ute','uti','uto','ar','ir');
47	+
48	+ var $ante_suff_a = array('ando','endo');
49	+ var $ante_suff_b = array('ar','er','ir');
50	+
51	+ function __construct() {
52	+ usort($this->suffissi_step_0,create_function('$a,$b','return strlen($a)>strlen($b) ? -1 : 1;'));
53	+ usort($this->suffissi_step_1_a,create_function('$a,$b','return strlen($a)>strlen($b) ? -1 : 1;'));
54	+ usort($this->suffissi_step_2,create_function('$a,$b','return strlen($a)>strlen($b) ? -1 : 1;'));
55	+ }
56	+
57	+ function trim($str) {
58	+ return trim($str);
59	+ }
60	+
61	+ function to_lower($str) {
62	+ return strtolower($str);
63	+ }
64	+
65	+ function replace_acc_acuti($str) {
66	+ return str_replace($this->accenti_acuti, $this->accenti_gravi, $str); //strtr
67	+ }
68	+
69	+ function put_u_after_q_to_upper($str) {
70	+ return str_replace("qu", "qU", $str);
71	+ }
72	+
73	+ function i_u_between_vow_to_upper($str) {
74	+ $pattern = '/([aeiou��])([iu])([aeiou��])/e';
75	+ $replacement = "'$1'.strtoupper('$2').'$3'";
76	+ return preg_replace($pattern, $replacement, $str);
77	+ }
78	+
79	+ function return_RV($str) {
80	+ /*
81	+ If the second letter is a consonant, RV is the region after the next following vowel,
82	+ or if the first two letters are vowels, RV is the region after the next consonant, and otherwise
83	+ (consonant-vowel case) RV is the region after the third letter. But RV is the end of the word if these positions cannot be found.
84	+ example,
85	+ m a c h o [ho] o l i v a [va] t r a b a j o [bajo] � u r e o [eo] prezzo sprezzante
86	+ */
87	+
88	+ if(strlen($str)<2) return '';//$str;
89	+
90	+ if(in_array($str[1],$this->consonanti)) {
91	+ $str = substr($str,2);
92	+ $str = strpbrk($str, implode($this->vocali));
93	+ return substr($str,1); //secondo me devo mettere 1
94	+ }
95	+ else if(in_array($str[0],$this->vocali) && in_array($str[1],$this->vocali)) {
96	+ $str = strpbrk($str, implode($this->consonanti));
97	+ return substr($str,1);
98	+ }
99	+ else if(in_array($str[0],$this->consonanti) && in_array($str[1],$this->vocali)) {
100	+ return substr($str,3);
101	+ }
102	+
103	+ }
104	+
105	+ function return_R1($str){
106	+ /*
107	+ R1 is the region after the first non-vowel following a vowel, or is the null region at the end of the word if there is no such non-vowel.
108	+ example:
109	+ beautiful [iful] beauty [y] beau [NULL] animadversion [imadversion] sprinkled [kled] eucharist [harist]
110	+ */
111	+
112	+ $pattern = '/['.implode($this->vocali).']+'.'['.implode($this->consonanti).']'.'(.*)/';
113	+ preg_match($pattern,$str,$matches);
114	+
115	+ return count($matches)>=1 ? $matches[1] : '';
116	+ }
117	+
118	+ function return_R2($str) {
119	+ /*
120	+ R2 is the region after the first non-vowel following a vowel in R1, or is the null region at the end of the word if there is no such non-vowel.
121	+ example:
122	+ beautiful [ul] beauty [NULL] beau [NULL] animadversion [adversion] sprinkled [NULL] eucharist [ist]
123	+ */
124	+
125	+ $R1 = $this->return_R1($str);
126	+
127	+ $pattern = '/['.implode($this->vocali).']+'.'['.implode($this->consonanti).']'.'(.*)/';
128	+ preg_match($pattern,$R1,$matches);
129	+
130	+ return count($matches)>=1 ? $matches[1] : '';
131	+ }
132	+
133	+
134	+ function step_0($str) {
135	+ //Step 0: Attached pronoun
136	+ //Always do steps 0
137	+
138	+ $str_len = strlen($str);
139	+ $rv = $this->return_RV($str);
140	+ $rv_len = strlen($rv);
141	+
142	+ $pos = 0;
143	+ foreach($this->suffissi_step_0 as $suff) {
144	+ if($rv_len-strlen($suff) < 0) continue;
145	+ $pos = strpos($rv,$suff,$rv_len-strlen($suff));
146	+ if($pos !== false) break;
147	+ }
148	+
149	+ $ante_suff = substr($rv,0,$pos);
150	+ $ante_suff_len = strlen($ante_suff);
151	+
152	+ foreach($this->ante_suff_a as $ante_a) {
153	+ if($ante_suff_len-strlen($ante_a) < 0) continue;
154	+ $pos_a = strpos($ante_suff,$ante_a,$ante_suff_len-strlen($ante_a));
155	+ if($pos_a !== false) {
156	+ return substr($str,0,$pos+$str_len-$rv_len);
157	+ }
158	+ }
159	+
160	+ foreach($this->ante_suff_b as $ante_b) {
161	+ if($ante_suff_len-strlen($ante_b) < 0) continue;
162	+ $pos_b = strpos($ante_suff,$ante_b,$ante_suff_len-strlen($ante_b));
163	+ if($pos_b !== false) {
164	+ return substr($str,0,$pos+$str_len-$rv_len).'e';
165	+ }
166	+ }
167	+
168	+ return $str;
169	+ }
170	+
171	+ function delete_suff($arr_suff,$str,$str_len,$where,$ovunque=false) {
172	+ if($where==='r2') $r = $this->return_R2($str);
173	+ else if($where==='rv') $r = $this->return_RV($str);
174	+ else if($where==='r1') $r = $this->return_R1($str);
175	+
176	+ $r_len = strlen($r);
177	+
178	+ if($ovunque) {
179	+ foreach($arr_suff as $suff) {
180	+ if($str_len-strlen($suff) < 0) continue;
181	+ $pos = strpos($str,$suff,$str_len-strlen($suff));
182	+ if($pos !== false) {
183	+ $pattern = '/'.$suff.'$/';
184	+ $ret_str = preg_match($pattern,$r) ? substr($str,0,$pos) : '';
185	+ if($ret_str !== '') return $ret_str;
186	+ break;
187	+ }
188	+ }
189	+ }
190	+ else {
191	+ foreach($arr_suff as $suff) {
192	+ if($r_len-strlen($suff) < 0) continue;
193	+ $pos = strpos($r,$suff,$r_len-strlen($suff));
194	+ if($pos !== false) return substr($str,0,$pos+$str_len-$r_len);
195	+ }
196	+ }
197	+ }
198	+
199	+
200	+ function step_1($str) {
201	+ //Step 1: Standard suffix removal
202	+ //Always do steps 1
203	+
204	+ $str_len = strlen($str);
205	+
206	+ //delete if in R1, if preceded by 'iv', delete if in R2 (and if further preceded by 'at', delete if in R2), otherwise, if preceded by 'os', 'ic' or 'abil', delete if in R2
207	+ if(count($ret_str = $this->delete_suff($this->suffissi_step_1_g,$str,$str_len,'r1'))) {
208	+ if(count($ret_str1 = $this->delete_suff(array('iv'),$ret_str,strlen($ret_str),'r2'))) {
209	+ if(count($ret_str2 = $this->delete_suff(array('at'),$ret_str1,strlen($ret_str1),'r2'))) return $ret_str2;
210	+ else return $ret_str1;
211	+ }
212	+ else if(count($ret_str1 = $this->delete_suff(array('os','ic','abil'),$ret_str,strlen($ret_str),'r2'))) {
213	+ return $ret_str1;
214	+ }
215	+ else return $ret_str;
216	+ }
217	+ //delete if in R2
218	+ if(count($ret_str = $this->delete_suff($this->suffissi_step_1_a,$str,$str_len,'r2',true))) return $ret_str;
219	+ //delete if in R2, if preceded by 'ic', delete if in R2
220	+ if(count($ret_str = $this->delete_suff($this->suffissi_step_1_b,$str,$str_len,'r2'))) {
221	+ if(count($ret_str1 = $this->delete_suff(array('ic'),$ret_str,strlen($ret_str),'r2'))) {
222	+ return $ret_str1;
223	+ }
224	+ else return $ret_str;
225	+ }
226	+ //replace with 'log' if in R2
227	+ if(count($ret_str = $this->delete_suff($this->suffissi_step_1_c,$str,$str_len,'r2'))) return $ret_str.'log';
228	+ //replace with 'u' if in R2
229	+ if(count($ret_str = $this->delete_suff($this->suffissi_step_1_d,$str,$str_len,'r2'))) return $ret_str.'u';
230	+ //replace with 'ente' if in R2
231	+ if(count($ret_str = $this->delete_suff($this->suffissi_step_1_e,$str,$str_len,'r2'))) return $ret_str.'ente';
232	+ //delete if in RV
233	+ if(count($ret_str = $this->delete_suff($this->suffissi_step_1_f,$str,$str_len,'rv'))) return $ret_str;
234	+ //delete if in R2, if preceded by 'abil', 'ic' or 'iv', delete if in R2
235	+ if(count($ret_str = $this->delete_suff($this->suffissi_step_1_h,$str,$str_len,'r2'))) {
236	+ if(count($ret_str1 = $this->delete_suff(array('abil','ic','iv'),$ret_str,strlen($ret_str),'r2'))) {
237	+ return $ret_str1;
238	+ }
239	+ else return $ret_str;
240	+ }
241	+ //delete if in R2, if preceded by 'at', delete if in R2 (and if further preceded by 'ic', delete if in R2)
242	+ if(count($ret_str = $this->delete_suff($this->suffissi_step_1_i,$str,$str_len,'r2'))) {
243	+ if(count($ret_str1 = $this->delete_suff(array('at'),$ret_str,strlen($ret_str),'r2'))) {
244	+ if(count($ret_str2 = $this->delete_suff(array('ic'),$ret_str1,strlen($ret_str1),'r2'))) return $ret_str2;
245	+ else return $ret_str1;
246	+ }
247	+ else return $ret_str;
248	+ }
249	+
250	+ return $str;
251	+ }
252	+
253	+ function step_2($str,$str_step_1) {
254	+ //Step 2: Verb suffixes
255	+ //Do step 2 if no ending was removed by step 1
256	+
257	+ if($str != $str_step_1) return $str_step_1;
258	+
259	+ $str_len = strlen($str);
260	+
261	+ if(count($ret_str = $this->delete_suff($this->suffissi_step_2,$str,$str_len,'rv'))) return $ret_str;
262	+
263	+ return $str;
264	+ }
265	+
266	+ function step_3a($str) {
267	+ //Step 3a: Delete a final 'a', 'e', 'i', 'o',' �', '�', '�' or '�' if it is in RV, and a preceding 'i' if it is in RV ('crocchi' -> 'crocch', 'crocchio' -> 'crocch')
268	+ //Always do steps 3a
269	+
270	+ $vocale_finale = array('a','e','i','o','�','�','�','�');
271	+
272	+ $str_len = strlen($str);
273	+
274	+ if(count($ret_str = $this->delete_suff($vocale_finale,$str,$str_len,'rv'))) {
275	+ if(count($ret_str1 = $this->delete_suff(array('i'),$ret_str,strlen($ret_str),'rv'))) {
276	+ return $ret_str1;
277	+ }
278	+ else return $ret_str;
279	+ }
280	+
281	+ return $str;
282	+ }
283	+
284	+ function step_3b($str) {
285	+ //Step 3b: Replace final 'ch' (or 'gh') with 'c' (or 'g') if in 'RV' ('crocch' -> 'crocc')
286	+ //Always do steps 3b
287	+
288	+ $rv = $this->return_RV($str);
289	+
290	+ $pattern = '/([cg])h$/';
291	+ $replacement = '${1}';
292	+ return substr($str,0,strlen($str)-strlen($rv)).preg_replace($pattern,$replacement,$rv);
293	+ }
294	+
295	+ function step_4($str) {
296	+ //Step 4: Finally, turn I and U back into lower case
297	+
298	+ return strtolower($str);
299	+ }
300	+
301	+ function stem($str){
302	+ $str = $this->trim($str);
303	+ $str = $this->to_lower($str);
304	+ $str = $this->replace_acc_acuti($str);
305	+ $str = $this->put_u_after_q_to_upper($str);
306	+ $str = $this->i_u_between_vow_to_upper($str);
307	+ $step0 = $this->step_0($str);
308	+ $step1 = $this->step_1($step0);
309	+ $step2 = $this->step_2($step0,$step1);
310	+ $step3a = $this->step_3a($step2);
311	+ $step3b = $this->step_3b($step3a);
312	+ $step4 = $this->step_4($step3b);
313	+
314	+ return $step4;
315	+ }
316	+
317	+
318	+ }
319	+
320	+
321	+ /*
322	+ Stem caching added by Rob Marsh, SJ
323	+ http://rmarsh.com
324	+ */
325	+
326	+ $Stemmer = new ItalianStemmer();
327	+ $StemCache = array();
328	+
329	+ function stem($word) {
330	+ global $Stemmer, $StemCache;
331	+ if (!isset($StemCache[$word])) {
332	+ $stemmedword = $Stemmer->Stem($word);
333	+ $StemCache[$word] = $stemmedword;
334	+ }
335	+ else {
336	+ $stemmedword = $StemCache[$word] ;
337	+ }
338	+ return $stemmedword;
339	+ }
340	+
341	+ ?>

languages/it/stopwords.php ADDED Viewed

	@@ -0,0 +1,4 @@


1	+ <?php
2	+ // the list of common words we want to ignore. NB anything shorter than 4 characters is knocked by the plugin and doesn't need to figure here
3	+ $overusedwords = array("abbia", "abbiamo", "abbiano", "abbiate", "agli", "alla", "alle", "allo", "anche", "avemmo", "avendo", "avesse", "avessero", "avessi", "avessimo", "aveste", "avesti", "avete", "aveva", "avevamo", "avevano", "avevate", "avevi", "avevo", "avrai", "avranno", "avrebbe", "avrebbero", "avrei", "avremmo", "avremo", "avreste", "avresti", "avrete", "avr�", "avr�", "avuta", "avute", "avuti", "avuto", "come", "contro", "dagl", "dagli", "dall", "dalla", "dalle", "dallo", "degl", "degli", "dell", "della", "delle", "dello", "dove", "ebbe", "ebbero", "ebbi", "erano", "eravamo", "eravate", "essendo", "faccia", "facciamo", "facciano", "facciate", "faccio", "facemmo", "facendo", "facesse", "facessero", "facessi", "facessimo", "faceste", "facesti", "faceva", "facevamo", "facevano", "facevate", "facevi", "facevo", "fanno", "farai", "faranno", "farebbe", "farebbero", "farei", "faremmo", "faremo", "fareste", "faresti", "farete", "far�", "far�", "fece", "fecero", "feci", "fosse", "fossero", "fossi", "fossimo", "foste", "fosti", "fummo", "furono", "hanno", "loro", "miei", "negl", "negli", "nell", "nella", "nelle", "nello", "nostra", "nostre", "nostri", "nostro", "perch�", "quale", "quanta", "quante", "quanti", "quanto", "quella", "quelle", "quelli", "quello", "questa", "queste", "questi", "questo", "sarai", "saranno", "sarebbe", "sarebbero", "sarei", "saremmo", "saremo", "sareste", "saresti", "sarete", "sar�", "sar�", "siamo", "siano", "siate", "siete", "sono", "stai", "stando", "stanno", "starai", "staranno", "starebbe", "starebbero", "starei", "staremmo", "staremo", "stareste", "staresti", "starete", "star�", "star�", "stava", "stavamo", "stavano", "stavate", "stavi", "stavo", "stemmo", "stesse", "stessero", "stessi", "stessimo", "steste", "stesti", "stette", "stettero", "stetti", "stia", "stiamo", "stiano", "stiate", "sugl", "sugli", "sull", "sulla", "sulle", "sullo", "suoi", "tuoi", "tutti", "tutto", "vostra", "vostre", "vostri", "vostro");
4	+ ?>

readme.txt ADDED Viewed

	@@ -0,0 +1,114 @@


1	+ === Similar Posts ===
2	+ Contributors: RobMarsh
3	+ Donate link: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=donate%40rmarsh%2ecom&item_name=Rob%20Marsh%27s%20WordPress%20Plugins&item_number=Similar%20Posts&no_shipping=1&cn=Any%20Comments&tax=0&currency_code=GBP&bn=PP%2dDonationsBF&charset=UTF%2d8
4	+ Tags: posts, related, similar, related posts, similar posts, tags, post-plugins
5	+ Requires at least: 1.5
6	+ Tested up to: 2.6.0
7	+ Stable tag: 2.6.0.0
8	+ Displays a list of posts similar to the current one based on content, title and/or tags.
9	+
10	+ == Description ==
11	+
12	+ Similar Posts displays a list of posts that are similar or related to the current posts. The list can be customised in many ways. Similarity is judged according to a post's title, content, and tags and you can adjust the balance of factors to fit your own blog.
13	+
14	+ This plugin requires the latest version of the Post-Plugin Library: [download it now](http://downloads.wordpress.org/plugin/post-plugin-library.zip).
15	+
16	+ == Installation ==
17	+
18	+ 1. IMPORTANT! If you are upgrading from a previous version first deactivate the plugin, then delete the plugin folder from your server.
19	+
20	+ 1. If you have the Similar Posts Feed plugin installed you must deactivate it before installing Similar Posts (which now does the same job).
21	+
22	+ 1. Upload the plugin folder to your /wp-content/plugins/ folder. If you haven't already you should also install the [Post-Plugin Library](http://wordpress.org/extend/plugins/post-plugin-library/)></a>.
23	+
24	+ 1. Go to the Plugins page and activate the plugin.
25	+
26	+ 1. Put `<?php similar_posts(); ?>` at the place in your template where you want the list of related posts to appear or use the plugin as a widget.
27	+
28	+ 1. Use the Options/Settings page to adjust the behaviour of the plugin.
29	+
30	+ [My web site](http://rmarsh.com/) has [full instructions](http://rmarsh.com/plugins/similar-posts/) and [information on customisation](http://rmarsh.com/plugins/post-options/).
31	+
32	+ == Version History ==
33	+
34	+ * 2.6.0.0
35	+ * version bump to indicate compatibility with WP 2.6
36	+ * fix to really include attachments
37	+ * new parameter for {imagesrc} to append a suffix to the image name, e.g. to get the thumbnail for attachments
38	+ * 2.5.0.11
39	+ * new option to include attachments
40	+ * {php} tag now accepts nested tags
41	+ * new output tag {authorurl} -- permalink to archive of author's posts
42	+ * fix for numeric locale issue
43	+ * 2.5.0.10
44	+ * new option to select algorithm for term extraction
45	+ * new manual links option
46	+ * fix for page selection in old versions of WP
47	+ * fix for faulty tags in Cyrillic
48	+ * 2.5.0.9
49	+ * new option to match the current post's author
50	+ * extended options for snippet and excerpt output tags
51	+ * 2.5.0.7
52	+ * new option to show by status, i.e., published/private/draft/future
53	+ * {categorynames} and {categorylinks} apply 'single_cat_name' filter
54	+ * fixes bug in WP pre-2.2 causing installation to fail
55	+ * 2.5.0
56	+ * CJK digrams
57	+ * {image} has new post, link, and default parameters
58	+ * new {imagesrc} tag
59	+ * fix to empty category bug
60	+ * excluded posts bug fix
61	+ * fix for intermittent bug with 'omit current post' option
62	+ * 2.5b28
63	+ * improvements to Similar Posts matching
64	+ * experiment with Chinese/Korean/Japanese matching
65	+ * 2.5b27
66	+ * fixed bug with bulk indexing of tags
67	+ * 2.5b26
68	+ * reverted thumbnail serving (speed)
69	+ * fix current post after extra query
70	+ * 2.5b25
71	+ * option to sort output, group templates
72	+ * removed 'trim_before' option added more logical 'divider'
73	+ * {date:raw}, {commentdate:raw}, etc.
74	+ * fix for {image} resizing when <img > and not <img />
75	+ * {image} now serves real thumbnails
76	+ * 2.5b24
77	+ * fix for recursive replacement by content filter
78	+ * fix to {gravatar} to allow for 'identicon' etc.
79	+ * fix to {commenter} to allow trimming
80	+ * fix a warning in safe mode
81	+ * fix for unsanitised WP tags
82	+ * 2.5b23
83	+ * new option to filter on custom fields
84	+ * nested braces in {if}; condition now taggable
85	+ * improved bug report feature
86	+ * better way to omit user comments
87	+ * 2.5b22
88	+ * restored automatic indexing on installation
89	+ * moved indexing menu under settings
90	+ * show_pages option can now show only pages
91	+ * fix for upgraders who had utf8 selected but no mbstring
92	+ * 2.5b20
93	+ * optimised indexing for speed and memory use
94	+ * 2.5b19
95	+ * fixing some extended character issues
96	+ * 2.5b18
97	+ * fix output filter bug
98	+ * add conditional tag {if:condition:yes:no}
99	+ * 2.5b16
100	+ * fix for {php}
101	+ * 2.5b15
102	+ * fix more or less obscure bugs, add 'include posts' setting
103	+ * 2.5b14
104	+ * fix file-encoding, installation error, etc.
105	+ * 2.5b12
106	+ * fix serious bug for WP < 2.3
107	+ * 2.5b11
108	+ * some widget fixes
109	+ * 2.5b10
110	+ * fix for non-creation of table
111	+ * 2.5b9
112	+ * clarifying installation instructions
113	+
114	+ * [previous versions](http://rmarsh.com/plugins/similar-posts/)

similar-posts-admin.php ADDED Viewed

	@@ -0,0 +1,702 @@


1	+ <?php
2	+
3	+ // Admin stuff for Similar Posts Plugin, Version 2.6.0.0
4	+
5	+ function similar_posts_option_menu() {
6	+ add_options_page(__('Similar Posts Options', 'similar_posts'), __('Similar Posts', 'similar_posts'), 8, 'similar-posts', 'similar_posts_options_page');
7	+ }
8	+
9	+ add_action('admin_menu', 'similar_posts_option_menu', 1);
10	+
11	+ function similar_posts_for_feed_option_menu() {
12	+ add_options_page(__('Similar Posts Feed Options', 'similar_posts'), __('Similar Posts Feed', 'similar_posts'), 8, 'similar-posts-feed', 'similar_posts_for_feed_options_page');
13	+ }
14	+
15	+ // this sneaky piece of work lets the similar posts feed menu appear and disappear
16	+ function juggle_similar_posts_menus() {
17	+ if (isset($_POST['feed_active'])) {
18	+ $active = ($_POST['feed_active'] === 'true');
19	+ } else {
20	+ $options = get_option('similar-posts');
21	+ $active = ($options['feed_active'] === 'true');
22	+ }
23	+ if ($active) {
24	+ add_action('admin_menu', 'similar_posts_for_feed_option_menu', 2);
25	+ } else {
26	+ remove_action('admin_menu', 'similar_posts_for_feed_option_menu');
27	+ }
28	+ }
29	+
30	+ add_action('plugins_loaded', 'juggle_similar_posts_menus');
31	+
32	+ function similar_posts_options_page(){
33	+ echo '<div class="wrap"><h2>';
34	+ _e('Similar Posts ', 'similar_posts');
35	+ echo '<a href="http://rmarsh.com/plugins/post-options/" style="font-size: 0.8em;">';
36	+ _e('help and instructions');
37	+ echo '</a></h2></div>';
38	+ if (!SimilarPosts::check_post_plugin_library('<h1>'.sprintf(__('Please install the %sPost Plugin Library%s plugin.'), '<a href="http://downloads.wordpress.org/plugin/post-plugin-library.zip">', '</a>').'</h1>')) return;
39	+ $m = new admin_subpages();
40	+ $m->add_subpage('General', 'general', 'similar_posts_general_options_subpage');
41	+ $m->add_subpage('Output', 'output', 'similar_posts_output_options_subpage');
42	+ $m->add_subpage('Filter', 'filter', 'similar_posts_filter_options_subpage');
43	+ $m->add_subpage('Other', 'other', 'similar_posts_other_options_subpage');
44	+ $m->add_subpage('Manage the Index', 'index', 'similar_posts_index_options_subpage');
45	+ $m->add_subpage('Report a Bug', 'bug', 'similar_posts_bug_subpage');
46	+ $m->add_subpage('Remove this Plugin', 'remove', 'similar_posts_remove_subpage');
47	+ $m->display();
48	+ }
49	+
50	+ function similar_posts_general_options_subpage(){
51	+ global $wpdb, $wp_version;
52	+ $options = get_option('similar-posts');
53	+ if (isset($_POST['update_options'])) {
54	+ check_admin_referer('similar-posts-update-options');
55	+ if (defined('POC_CACHE_4')) poc_cache_flush();
56	+ // Fill up the options with the values chosen...
57	+ $options = ppl_options_from_post($options, array('limit', 'skip', 'show_private', 'show_pages', 'show_attachments', 'status', 'age', 'omit_current_post', 'match_cat', 'match_tags', 'match_author'));
58	+ update_option('similar-posts', $options);
59	+ // Show a message to say we've done something
60	+ echo '<div class="updated fade"><p>' . __('Options saved', 'similar_posts') . '</p></div>';
61	+ }
62	+ //now we drop into html to display the option page form
63	+ ?>
64	+ <div class="wrap">
65	+ <h2><?php _e('General Settings', 'similar_posts'); ?></h2>
66	+ <form method="post" action="">
67	+ <div class="submit"><input type="submit" name="update_options" value="<?php _e('Save General Settings', 'similar_posts') ?>" /></div>
68	+ <table class="optiontable form-table">
69	+ <?php
70	+ ppl_display_limit($options['limit']);
71	+ ppl_display_skip($options['skip']);
72	+ ppl_display_show_private($options['show_private']);
73	+ ppl_display_show_pages($options['show_pages']);
74	+ ppl_display_show_attachments($options['show_attachments']);
75	+ ppl_display_status($options['status']);
76	+ ppl_display_age($options['age']);
77	+ ppl_display_omit_current_post($options['omit_current_post']);
78	+ ppl_display_match_cat($options['match_cat']);
79	+ ppl_display_match_tags($options['match_tags']);
80	+ ppl_display_match_author($options['match_author']);
81	+ ?>
82	+ </table>
83	+ <div class="submit"><input type="submit" name="update_options" value="<?php _e('Save General Settings', 'similar_posts') ?>" /></div>
84	+ <?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-update-options'); ?>
85	+ </form>
86	+ </div>
87	+ <?php
88	+ }
89	+
90	+ function similar_posts_output_options_subpage(){
91	+ global $wpdb, $wp_version;
92	+ $options = get_option('similar-posts');
93	+ if (isset($_POST['update_options'])) {
94	+ check_admin_referer('similar-posts-update-options');
95	+ if (defined('POC_CACHE_4')) poc_cache_flush();
96	+ // Fill up the options with the values chosen...
97	+ $options = ppl_options_from_post($options, array('output_template', 'prefix', 'suffix', 'none_text', 'no_text', 'divider', 'sort', 'group_template'));
98	+ update_option('similar-posts', $options);
99	+ // Show a message to say we've done something
100	+ echo '<div class="updated fade"><p>' . __('Options saved', 'similar_posts') . '</p></div>';
101	+ }
102	+ //now we drop into html to display the option page form
103	+ ?>
104	+ <div class="wrap">
105	+ <h2><?php _e('Output Settings', 'similar_posts'); ?></h2>
106	+ <form method="post" action="">
107	+ <div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Output Settings', 'similar_posts') ?>" /></div>
108	+ <table class="optiontable form-table">
109	+ <tr>
110	+ <td>
111	+ <table>
112	+ <?php
113	+ ppl_display_output_template($options['output_template']);
114	+ ppl_display_prefix($options['prefix']);
115	+ ppl_display_suffix($options['suffix']);
116	+ ppl_display_none_text($options['none_text']);
117	+ ppl_display_no_text($options['no_text']);
118	+ ppl_display_divider($options['divider']);
119	+ ppl_display_sort($options['sort']);
120	+ ppl_display_group_template($options['group_template']);
121	+ ?>
122	+ </table>
123	+ </td>
124	+ <td>
125	+ <?php ppl_display_available_tags('similar-posts'); ?>
126	+ </td></tr>
127	+ </table>
128	+ <div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Output Settings', 'similar_posts') ?>" /></div>
129	+ <?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-update-options'); ?>
130	+ </form>
131	+ </div>
132	+ <?php
133	+ }
134	+
135	+ function similar_posts_filter_options_subpage(){
136	+ global $wpdb, $wp_version;
137	+ $options = get_option('similar-posts');
138	+ if (isset($_POST['update_options'])) {
139	+ check_admin_referer('similar-posts-update-options');
140	+ if (defined('POC_CACHE_4')) poc_cache_flush();
141	+ // Fill up the options with the values chosen...
142	+ $options = ppl_options_from_post($options, array('excluded_posts', 'included_posts', 'excluded_authors', 'included_authors', 'excluded_cats', 'included_cats', 'tag_str', 'custom'));
143	+ update_option('similar-posts', $options);
144	+ // Show a message to say we've done something
145	+ echo '<div class="updated fade"><p>' . __('Options saved', 'similar_posts') . '</p></div>';
146	+ }
147	+ //now we drop into html to display the option page form
148	+ ?>
149	+ <div class="wrap">
150	+ <h2><?php _e('Filter Settings', 'similar_posts'); ?></h2>
151	+ <form method="post" action="">
152	+ <div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Filter Settings', 'similar_posts') ?>" /></div>
153	+ <table class="optiontable form-table">
154	+ <?php
155	+ ppl_display_excluded_posts($options['excluded_posts']);
156	+ ppl_display_included_posts($options['included_posts']);
157	+ ppl_display_authors($options['excluded_authors'], $options['included_authors']);
158	+ ppl_display_cats($options['excluded_cats'], $options['included_cats']);
159	+ ppl_display_tag_str($options['tag_str']);
160	+ ppl_display_custom($options['custom']);
161	+ ?>
162	+ </table>
163	+ <div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Filter Settings', 'similar_posts') ?>" /></div>
164	+ <?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-update-options'); ?>
165	+ </form>
166	+ </div>
167	+ <?php
168	+ }
169	+
170	+ function similar_posts_other_options_subpage(){
171	+ global $wpdb, $wp_version;
172	+ $options = get_option('similar-posts');
173	+ if (isset($_POST['update_options'])) {
174	+ check_admin_referer('similar-posts-update-options');
175	+ if (defined('POC_CACHE_4')) poc_cache_flush();
176	+ // Fill up the options with the values chosen...
177	+ $options = ppl_options_from_post($options, array('content_filter', 'stripcodes', 'feed_active', 'term_extraction', 'num_terms', 'weight_title', 'weight_content', 'weight_tags', 'hand_links'));
178	+ $wcontent = $options['weight_content'] + 0.0001;
179	+ $wtitle = $options['weight_title'] + 0.0001;
180	+ $wtags = $options['weight_tags'] + 0.0001;
181	+ $wcombined = $wcontent + $wtitle + $wtags;
182	+ $options['weight_content'] = $wcontent / $wcombined;
183	+ $options['weight_title'] = $wtitle / $wcombined;
184	+ $options['weight_tags'] = $wtags / $wcombined;
185	+ update_option('similar-posts', $options);
186	+ // Show a message to say we've done something
187	+ echo '<div class="updated fade"><p>' . __('Options saved', 'similar_posts') . '</p></div>';
188	+ }
189	+ //now we drop into html to display the option page form
190	+ ?>
191	+ <div class="wrap">
192	+ <h2><?php _e('Other Settings', 'similar_posts'); ?></h2>
193	+ <form method="post" action="">
194	+ <div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Other Settings', 'similar_posts') ?>" /></div>
195	+ <table class="optiontable form-table">
196	+ <?php
197	+ ppl_display_weights($options);
198	+ ppl_display_num_terms($options['num_terms']);
199	+ ppl_display_term_extraction($options['term_extraction']);
200	+ ppl_display_feed_active($options['feed_active']);
201	+ ppl_display_hand_links($options['hand_links']);
202	+ ppl_display_content_filter($options['content_filter']);
203	+ ppl_display_stripcodes($options['stripcodes']);
204	+ ?>
205	+ </table>
206	+ <div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Other Settings', 'similar_posts') ?>" /></div>
207	+ <?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-update-options'); ?>
208	+ </form>
209	+ </div>
210	+ <?php
211	+ }
212	+
213	+ function similar_posts_index_options_subpage(){
214	+ if (isset($_POST['reindex_all'])) {
215	+ check_admin_referer('similar-posts-manage-update-options');
216	+ if (defined('POC_CACHE_4')) poc_cache_flush();
217	+ $options = get_option('similar-posts');
218	+ $options['utf8'] = $_POST['utf8'];
219	+ if (!function_exists('mb_split')) {
220	+ $options['utf8'] = 'false';
221	+ }
222	+ $options['cjk'] = $_POST['cjk'];
223	+ if (!function_exists('mb_internal_encoding')) {
224	+ $options['cjk'] = 'false';
225	+ }
226	+ if ($options['cjk'] === 'true') $options['utf8'] = 'true';
227	+ $options['use_stemmer'] = $_POST['use_stemmer'];
228	+ $options['batch'] = ppl_check_cardinal($_POST['batch']);
229	+ if ($options['batch'] === 0) $options['batch'] = 100;
230	+ flush();
231	+ $termcount = save_index_entries (($options['utf8']==='true'), ($options['use_stemmer']==='true'), $options['batch'], ($options['cjk']==='true'));
232	+ update_option('similar-posts', $options);
233	+ //show a message
234	+ printf('<div class="updated fade"><p>'.__('Indexed %d posts.').'</p></div>', $termcount);
235	+ } else {
236	+ $options = get_option('similar-posts');
237	+ }
238	+ ?>
239	+ <div class="wrap">
240	+ <?php
241	+ echo '<h2>'.__('Manage Index', 'similar_posts').'</h2>';
242	+ echo '<p>'.__('Similar Posts maintains a special index to help search for related posts. The index is created when the plugin is activated and then kept up-to-date automatically when posts are added, edited, or deleted.', 'similar_posts').'</p>';
243	+ echo '<p>'.__('The options that affect the index can be set below.', 'similar_posts').'</p>';
244	+ echo '<p>'.__('If you are using a language other than english you may find that the plugin mangles some characters since PHP is normally blind to multibyte characters. You can force the plugin to interpret extended characters as UTF-8 at the expense of a little speed but this facility is only available if your installation of PHP supports the mbstring functions.', 'similar_posts').'</p>';
245	+ echo '<p>'.__('Languages like Chinese, Korean and Japanese pose a special difficulty for the full-text search algorithm. As an experiment I have introduced an option below to work around some of these issues. The text must be encoded as UTF-8. I would be very grateful for feedback from any users knowledgeable in these languages.', 'similar_posts').'</p>';
246	+ echo '<p>'.__('Some related word forms should really be counted together, e.g., "follow", "follows", and "following". The plugin can use a "stemming" algorithm to reduce related forms to their root stem. It is worth experimenting to see if this improves the similarity of posts in your particular circumstances. Stemming algorithms are provided for english, german, spanish, french and italian but stemmers for other languages can be created: see the help for instructions. Note: stemming slows down the indexing more than a little.', 'similar_posts').'</p>';
247	+ echo '<p>'.__('The indexing routine processes posts in batches of 100 by default. If you run into problems with limited memory you can opt to make the batches smaller.', 'similar_posts').'</p>';
248	+ echo '<p>'.__('Note: the process of indexing may take a little while. On my modest machine 500 posts take between 5 seconds and 20 seconds (with stemming and utf-8 support). Don\'t worry if the screen fails to update until finished.', 'similar_posts').'</p>';
249	+ ?>
250	+ <form method="post" action="">
251	+ <table class="optiontable form-table">
252	+ <tr valign="top">
253	+ <th scope="row"><?php _e('Handle extended characters?', 'similar_posts') ?></th>
254	+ <td>
255	+ <select name="utf8" id="utf8" <?php if (!function_exists('mb_split')) echo 'disabled="true"'; ?> >
256	+ <option <?php if($options['utf8'] == 'false') { echo 'selected="selected"'; } ?> value="false">No</option>
257	+ <option <?php if($options['utf8'] == 'true') { echo 'selected="selected"'; } ?> value="true">Yes</option>
258	+ </select>
259	+ </td>
260	+ </tr>
261	+ <tr valign="top">
262	+ <th scope="row"><?php _e('Treat as Chinese, Korean, or Japanese?', 'similar_posts') ?></th>
263	+ <td>
264	+ <select name="cjk" id="cjk" <?php if (!function_exists('mb_split')) echo 'disabled="true"'; ?> >
265	+ <option <?php if($options['cjk'] == 'false') { echo 'selected="selected"'; } ?> value="false">No</option>
266	+ <option <?php if($options['cjk'] == 'true') { echo 'selected="selected"'; } ?> value="true">Yes</option>
267	+ </select>
268	+ </td>
269	+ </tr>
270	+ <tr valign="top">
271	+ <th scope="row"><?php _e('Use a stemming algorithm?', 'similar_posts') ?></th>
272	+ <td>
273	+ <select name="use_stemmer" id="use_stemmer">
274	+ <option <?php if($options['use_stemmer'] == 'false') { echo 'selected="selected"'; } ?> value="false">No</option>
275	+ <option <?php if($options['use_stemmer'] == 'true') { echo 'selected="selected"'; } ?> value="true">Yes</option>
276	+ </select>
277	+ </td>
278	+ </tr>
279	+ <tr valign="top">
280	+ <th scope="row"><?php _e('Batch size:', 'similar_posts') ?></th>
281	+ <td><input name="batch" type="text" id="batch" value="<?php echo $options['batch']; ?>" size="3" /></td>
282	+ </tr>
283	+ </table>
284	+ <div class="submit">
285	+ <input type="submit" name="reindex_all" value="<?php _e('Recreate Index', 'similar_posts') ?>" />
286	+ <?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-manage-update-options'); ?>
287	+ </div>
288	+ </form>
289	+ </div>
290	+ <?php
291	+ }
292	+
293	+
294	+ function similar_posts_bug_subpage(){
295	+ ppl_bug_form('similar-posts');
296	+ }
297	+
298	+ function similar_posts_remove_subpage(){
299	+ function eradicate() {
300	+ global $wpdb, $table_prefix;
301	+ delete_option('similar-posts');
302	+ delete_option('similar-posts-feed');
303	+ delete_option('widget_rrm_similar_posts');
304	+ $table_name = $table_prefix . 'similar_posts';
305	+ $wpdb->query("DROP TABLE `$table_name`");
306	+ }
307	+ ppl_plugin_eradicate_form('eradicate', str_replace('-admin', '', __FILE__));
308	+ }
309	+
310	+ function similar_posts_for_feed_options_page(){
311	+ echo '<div class="wrap"><h2>';
312	+ _e('Similar Posts Feed ', 'similar_posts');
313	+ echo '<a href="http://rmarsh.com/plugins/post-options/" style="font-size: 0.8em;">';
314	+ _e('help and instructions');
315	+ echo '</a></h2></div>';
316	+ $m = new admin_subpages();
317	+ $m->add_subpage('General', 'general', 'similar_posts_feed_general_options_subpage');
318	+ $m->add_subpage('Output', 'output', 'similar_posts_feed_output_options_subpage');
319	+ $m->add_subpage('Filter', 'filter', 'similar_posts_feed_filter_options_subpage');
320	+ $m->add_subpage('Other', 'other', 'similar_posts_feed_other_options_subpage');
321	+ $m->add_subpage('Report a Bug', 'bug', 'similar_posts_feed_bug_subpage');
322	+ $m->add_subpage('Remove this Plugin', 'remove', 'similar_posts_feed_remove_subpage');
323	+ $m->display();
324	+ }
325	+
326	+ function similar_posts_feed_general_options_subpage(){
327	+ global $wpdb, $wp_version;
328	+ $options = get_option('similar-posts-feed');
329	+ if (isset($_POST['update_options'])) {
330	+ check_admin_referer('similar-posts-feed-update-options');
331	+ if (defined('POC_CACHE_4')) poc_cache_flush();
332	+ // Fill up the options with the values chosen...
333	+ $options = ppl_options_from_post($options, array('limit', 'skip', 'show_private', 'show_pages', 'show_attachments', 'status', 'age', 'omit_current_post', 'match_cat', 'match_tags', 'match_author'));
334	+ update_option('similar-posts-feed', $options);
335	+ // Show a message to say we've done something
336	+ echo '<div class="updated fade"><p>' . __('Options saved', 'similar_posts') . '</p></div>';
337	+ }
338	+ //now we drop into html to display the option page form
339	+ ?>
340	+ <div class="wrap">
341	+ <h2><?php _e('General Settings', 'similar_posts'); ?></h2>
342	+ <form method="post" action="">
343	+ <div class="submit"><input type="submit" name="update_options" value="<?php _e('Save General Settings', 'similar_posts') ?>" /></div>
344	+ <table class="optiontable form-table">
345	+ <?php
346	+ ppl_display_limit($options['limit']);
347	+ ppl_display_skip($options['skip']);
348	+ ppl_display_show_private($options['show_private']);
349	+ ppl_display_show_pages($options['show_pages']);
350	+ ppl_display_show_attachments($options['show_attachments']);
351	+ ppl_display_status($options['status']);
352	+ ppl_display_age($options['age']);
353	+ ppl_display_omit_current_post($options['omit_current_post']);
354	+ ppl_display_match_cat($options['match_cat']);
355	+ ppl_display_match_tags($options['match_tags']);
356	+ ppl_display_match_author($options['match_author']);
357	+ ?>
358	+ </table>
359	+ <div class="submit"><input type="submit" name="update_options" value="<?php _e('Save General Settings', 'similar_posts') ?>" /></div>
360	+ <?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-feed-update-options'); ?>
361	+ </form>
362	+ </div>
363	+ <?php
364	+ }
365	+
366	+ function similar_posts_feed_output_options_subpage(){
367	+ global $wpdb, $wp_version;
368	+ $options = get_option('similar-posts-feed');
369	+ if (isset($_POST['update_options'])) {
370	+ check_admin_referer('similar-posts-feed-update-options');
371	+ if (defined('POC_CACHE_4')) poc_cache_flush();
372	+ // Fill up the options with the values chosen...
373	+ $options = ppl_options_from_post($options, array('output_template', 'prefix', 'suffix', 'none_text', 'no_text', 'divider', 'sort', 'group_template'));
374	+ update_option('similar-posts-feed', $options);
375	+ // Show a message to say we've done something
376	+ echo '<div class="updated fade"><p>' . __('Options saved', 'similar_posts') . '</p></div>';
377	+ }
378	+ //now we drop into html to display the option page form
379	+ ?>
380	+ <div class="wrap">
381	+ <h2><?php _e('Output Settings', 'similar_posts'); ?></h2>
382	+ <form method="post" action="">
383	+ <div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Output Settings', 'similar_posts') ?>" /></div>
384	+ <table class="optiontable form-table">
385	+ <tr>
386	+ <td>
387	+ <table>
388	+ <?php
389	+ ppl_display_output_template($options['output_template']);
390	+ ppl_display_prefix($options['prefix']);
391	+ ppl_display_suffix($options['suffix']);
392	+ ppl_display_none_text($options['none_text']);
393	+ ppl_display_no_text($options['no_text']);
394	+ ppl_display_divider($options['divider']);
395	+ ppl_display_sort($options['sort']);
396	+ ppl_display_group_template($options['group_template']);
397	+ ?>
398	+ </table>
399	+ </td>
400	+ <td>
401	+ <?php ppl_display_available_tags('similar-posts'); ?>
402	+ </td></tr>
403	+ </table>
404	+ <div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Output Settings', 'similar_posts') ?>" /></div>
405	+ <?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-feed-update-options'); ?>
406	+ </form>
407	+ </div>
408	+ <?php
409	+ }
410	+
411	+ function similar_posts_feed_filter_options_subpage(){
412	+ global $wpdb, $wp_version;
413	+ $options = get_option('similar-posts-feed');
414	+ if (isset($_POST['update_options'])) {
415	+ check_admin_referer('similar-posts-feed-update-options');
416	+ if (defined('POC_CACHE_4')) poc_cache_flush();
417	+ // Fill up the options with the values chosen...
418	+ $options = ppl_options_from_post($options, array('excluded_posts', 'included_posts', 'excluded_authors', 'included_authors', 'excluded_cats', 'included_cats', 'tag_str', 'custom'));
419	+ update_option('similar-posts-feed', $options);
420	+ // Show a message to say we've done something
421	+ echo '<div class="updated fade"><p>' . __('Options saved', 'similar_posts') . '</p></div>';
422	+ }
423	+ //now we drop into html to display the option page form
424	+ ?>
425	+ <div class="wrap">
426	+ <h2><?php _e('Filter Settings', 'similar_posts'); ?></h2>
427	+ <form method="post" action="">
428	+ <div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Filter Settings', 'similar_posts') ?>" /></div>
429	+ <table class="optiontable form-table">
430	+ <?php
431	+ ppl_display_excluded_posts($options['excluded_posts']);
432	+ ppl_display_included_posts($options['included_posts']);
433	+ ppl_display_authors($options['excluded_authors'], $options['included_authors']);
434	+ ppl_display_cats($options['excluded_cats'], $options['included_cats']);
435	+ ppl_display_tag_str($options['tag_str']);
436	+ ppl_display_custom($options['custom']);
437	+ ?>
438	+ </table>
439	+ <div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Filter Settings', 'similar_posts') ?>" /></div>
440	+ <?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-feed-update-options'); ?>
441	+ </form>
442	+ </div>
443	+ <?php
444	+ }
445	+
446	+ function similar_posts_feed_other_options_subpage(){
447	+ global $wpdb, $wp_version;
448	+ $options = get_option('similar-posts-feed');
449	+ if (isset($_POST['update_options'])) {
450	+ check_admin_referer('similar-posts-feed-update-options');
451	+ if (defined('POC_CACHE_4')) poc_cache_flush();
452	+ // Fill up the options with the values chosen...
453	+ $options = ppl_options_from_post($options, array('stripcodes', 'term_extraction', 'num_terms', 'weight_title', 'weight_content', 'weight_tags', 'hand_links'));
454	+ $wcontent = $options['weight_content'] + 0.0001;
455	+ $wtitle = $options['weight_title'] + 0.0001;
456	+ $wtags = $options['weight_tags'] + 0.0001;
457	+ $wcombined = $wcontent + $wtitle + $wtags;
458	+ $options['weight_content'] = $wcontent / $wcombined;
459	+ $options['weight_title'] = $wtitle / $wcombined;
460	+ $options['weight_tags'] = $wtags / $wcombined;
461	+ update_option('similar-posts-feed', $options);
462	+ // Show a message to say we've done something
463	+ echo '<div class="updated fade"><p>' . __('Options saved', 'similar_posts') . '</p></div>';
464	+ }
465	+ //now we drop into html to display the option page form
466	+ ?>
467	+ <div class="wrap">
468	+ <h2><?php _e('Other Settings', 'similar_posts'); ?></h2>
469	+ <form method="post" action="">
470	+ <div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Other Settings', 'similar_posts') ?>" /></div>
471	+ <table class="optiontable form-table">
472	+ <?php
473	+ ppl_display_weights($options);
474	+ ppl_display_num_terms($options['num_terms']);
475	+ ppl_display_term_extraction($options['term_extraction']);
476	+ ppl_display_hand_links($options['hand_links']);
477	+ ppl_display_stripcodes($options['stripcodes']);
478	+ ?>
479	+ </table>
480	+ <div class="submit"><input type="submit" name="update_options" value="<?php _e('Save Other Settings', 'similar_posts') ?>" /></div>
481	+ <?php if (function_exists('wp_nonce_field')) wp_nonce_field('similar-posts-feed-update-options'); ?>
482	+ </form>
483	+ </div>
484	+ <?php
485	+ }
486	+
487	+ function similar_posts_feed_bug_subpage(){
488	+ ppl_bug_form('similar-posts-feed');
489	+ }
490	+
491	+ function similar_posts_feed_remove_subpage(){
492	+ function eradicate() {
493	+ global $wpdb, $table_prefix;
494	+ delete_option('similar-posts');
495	+ delete_option('similar-posts-feed');
496	+ $table_name = $table_prefix . 'similar_posts_feed';
497	+ $wpdb->query("DROP TABLE `$table_name`");
498	+ }
499	+ ppl_plugin_eradicate_form('eradicate', str_replace('-admin', '', __FILE__));
500	+ }
501	+
502	+ // sets up the index for the blog
503	+ function save_index_entries ($utf8=false, $use_stemmer=false, $batch=100, $cjk=false) {
504	+ global $wpdb, $table_prefix;
505	+ //$t0 = microtime(true);
506	+ $table_name = $table_prefix.'similar_posts';
507	+ $wpdb->query("TRUNCATE `$table_name`");
508	+ $termcount = 0;
509	+ $start = 0;
510	+ // in batches to conserve memory
511	+ while ($posts = $wpdb->get_results("SELECT `ID`, `post_title`, `post_content`, `post_type` FROM $wpdb->posts LIMIT $start, $batch", ARRAY_A)) {
512	+ reset($posts);
513	+ while (list($dummy, $post) = each($posts)) {
514	+ if ($post['post_type'] === 'revision') continue;
515	+ $content = sp_get_post_terms($post['post_content'], $utf8, $use_stemmer, $cjk);
516	+ $title = sp_get_title_terms($post['post_title'], $utf8, $use_stemmer, $cjk);
517	+ $postID = $post['ID'];
518	+ $tags = sp_get_tag_terms($postID, $utf8);
519	+ $wpdb->query("INSERT INTO `$table_name` (pID, content, title, tags) VALUES ($postID, \"$content\", \"$title\", \"$tags\")");
520	+ $termcount = $termcount + 1;
521	+ }
522	+ $start += $batch;
523	+ if (!ini_get('safe_mode')) set_time_limit(30);
524	+ }
525	+ unset($posts);
526	+ //$t = microtime(true) - $t0; echo "t = $t<br>";
527	+ return $termcount;
528	+ }
529	+
530	+ // this function gets called when the plugin is installed to set up the index and default options
531	+ function similar_posts_install() {
532	+ global $wpdb, $table_prefix;
533	+
534	+ $table_name = $table_prefix . 'similar_posts';
535	+ $errorlevel = error_reporting(0);
536	+ $suppress = $wpdb->hide_errors();
537	+ $sql = "CREATE TABLE IF NOT EXISTS `$table_name` (
538	+ `pID` bigint( 20 ) unsigned NOT NULL ,
539	+ `content` longtext NOT NULL ,
540	+ `title` text NOT NULL ,
541	+ `tags` text NOT NULL ,
542	+ FULLTEXT KEY `title` ( `title` ) ,
543	+ FULLTEXT KEY `content` ( `content` ) ,
544	+ FULLTEXT KEY `tags` ( `tags` )
545	+ ) ENGINE = MyISAM CHARSET = utf8;";
546	+ $wpdb->query($sql);
547	+ // MySQL before 4.1 doesn't recognise the character set properly, so if there's an error we can try without
548	+ if ($wpdb->last_error !== '') {
549	+ $sql = "CREATE TABLE IF NOT EXISTS `$table_name` (
550	+ `pID` bigint( 20 ) unsigned NOT NULL ,
551	+ `content` longtext NOT NULL ,
552	+ `title` text NOT NULL ,
553	+ `tags` text NOT NULL ,
554	+ FULLTEXT KEY `title` ( `title` ) ,
555	+ FULLTEXT KEY `content` ( `content` ) ,
556	+ FULLTEXT KEY `tags` ( `tags` )
557	+ ) ENGINE = MyISAM;";
558	+ $wpdb->query($sql);
559	+ }
560	+ $options = (array) get_option('similar-posts-feed');
561	+ // check each of the option values and, if empty, assign a default (doing it this long way
562	+ // lets us add new options in later versions)
563	+ if (!isset($options['limit'])) $options['limit'] = 5;
564	+ if (!isset($options['skip'])) $options['skip'] = 0;
565	+ if (!isset($options['age'])) {$options['age']['direction'] = 'none'; $options['age']['length'] = '0'; $options['age']['duration'] = 'month';}
566	+ if (!isset($options['divider'])) $options['divider'] = '';
567	+ if (!isset($options['omit_current_post'])) $options['omit_current_post'] = 'true';
568	+ if (!isset($options['show_private'])) $options['show_private'] = 'false';
569	+ if (!isset($options['show_pages'])) $options['show_pages'] = 'false';
570	+ if (!isset($options['show_attachments'])) $options['show_attachments'] = 'false';
571	+ // show_static is now show_pages
572	+ if ( isset($options['show_static'])) {$options['show_pages'] = $options['show_static']; unset($options['show_static']);};
573	+ if (!isset($options['none_text'])) $options['none_text'] = __('None Found', 'similar_posts');
574	+ if (!isset($options['no_text'])) $options['no_text'] = 'false';
575	+ if (!isset($options['tag_str'])) $options['tag_str'] = '';
576	+ if (!isset($options['excluded_cats'])) $options['excluded_cats'] = '';
577	+ if ($options['excluded_cats'] === '9999') $options['excluded_cats'] = '';
578	+ if (!isset($options['included_cats'])) $options['included_cats'] = '';
579	+ if ($options['included_cats'] === '9999') $options['included_cats'] = '';
580	+ if (!isset($options['excluded_authors'])) $options['excluded_authors'] = '';
581	+ if ($options['excluded_authors'] === '9999') $options['excluded_authors'] = '';
582	+ if (!isset($options['included_authors'])) $options['included_authors'] = '';
583	+ if ($options['included_authors'] === '9999') $options['included_authors'] = '';
584	+ if (!isset($options['included_posts'])) $options['included_posts'] = '';
585	+ if (!isset($options['excluded_posts'])) $options['excluded_posts'] = '';
586	+ if ($options['excluded_posts'] === '9999') $options['excluded_posts'] = '';
587	+ if (!isset($options['stripcodes'])) $options['stripcodes'] = array(array());
588	+ if (!isset($options['prefix'])) $options['prefix'] = 'Similar Posts:<ul>';
589	+ if (!isset($options['suffix'])) $options['suffix'] = '</ul>';
590	+ if (!isset($options['output_template'])) $options['output_template'] = '<li>{link}</li>';
591	+ if (!isset($options['match_cat'])) $options['match_cat'] = 'false';
592	+ if (!isset($options['match_tags'])) $options['match_tags'] = 'false';
593	+ if (!isset($options['match_author'])) $options['match_author'] = 'false';
594	+ if (!isset($options['custom'])) {$options['custom']['key'] = ''; $options['custom']['op'] = '='; $options['custom']['value'] = '';}
595	+ if (!isset($options['sort'])) {$options['sort']['by1'] = ''; $options['sort']['order1'] = SORT_ASC; $options['sort']['case1'] = 'false';$options['sort']['by2'] = ''; $options['sort']['order2'] = SORT_ASC; $options['sort']['case2'] = 'false';}
596	+ if (!isset($options['status'])) {$options['status']['publish'] = 'true'; $options['status']['private'] = 'false'; $options['status']['draft'] = 'false'; $options['status']['future'] = 'false';}
597	+ if (!isset($options['group_template'])) $options['group_template'] = '';
598	+ if (!isset($options['weight_content'])) $options['weight_content'] = 0.9;
599	+ if (!isset($options['weight_title'])) $options['weight_title'] = 0.1;
600	+ if (!isset($options['weight_tags'])) $options['weight_tags'] = 0.0;
601	+ if (!isset($options['num_terms'])) $options['num_terms'] = 20;
602	+ if (!isset($options['term_extraction'])) $options['term_extraction'] = 'frequency';
603	+ if (!isset($options['hand_links'])) $options['hand_links'] = 'false';
604	+ update_option('similar-posts-feed', $options);
605	+
606	+ $options = (array) get_option('similar-posts');
607	+ // check each of the option values and, if empty, assign a default (doing it this long way
608	+ // lets us add new options in later versions)
609	+ if (!isset($options['feed_active'])) $options['feed_active'] = 'false';
610	+ if (!isset($options['limit'])) $options['limit'] = 5;
611	+ if (!isset($options['skip'])) $options['skip'] = 0;
612	+ if (!isset($options['age'])) {$options['age']['direction'] = 'none'; $options['age']['length'] = '0'; $options['age']['duration'] = 'month';}
613	+ if (!isset($options['divider'])) $options['divider'] = '';
614	+ if (!isset($options['omit_current_post'])) $options['omit_current_post'] = 'true';
615	+ if (!isset($options['show_private'])) $options['show_private'] = 'false';
616	+ if (!isset($options['show_pages'])) $options['show_pages'] = 'false';
617	+ if (!isset($options['show_attachments'])) $options['show_attachments'] = 'false';
618	+ // show_static is now show_pages
619	+ if ( isset($options['show_static'])) {$options['show_pages'] = $options['show_static']; unset($options['show_static']);};
620	+ if (!isset($options['none_text'])) $options['none_text'] = __('None Found', 'similar_posts');
621	+ if (!isset($options['no_text'])) $options['no_text'] = 'false';
622	+ if (!isset($options['tag_str'])) $options['tag_str'] = '';
623	+ if (!isset($options['excluded_cats'])) $options['excluded_cats'] = '';
624	+ if ($options['excluded_cats'] === '9999') $options['excluded_cats'] = '';
625	+ if (!isset($options['included_cats'])) $options['included_cats'] = '';
626	+ if ($options['included_cats'] === '9999') $options['included_cats'] = '';
627	+ if (!isset($options['excluded_authors'])) $options['excluded_authors'] = '';
628	+ if ($options['excluded_authors'] === '9999') $options['excluded_authors'] = '';
629	+ if (!isset($options['included_authors'])) $options['included_authors'] = '';
630	+ if ($options['included_authors'] === '9999') $options['included_authors'] = '';
631	+ if (!isset($options['included_posts'])) $options['included_posts'] = '';
632	+ if (!isset($options['excluded_posts'])) $options['excluded_posts'] = '';
633	+ if ($options['excluded_posts'] === '9999') $options['excluded_posts'] = '';
634	+ if (!isset($options['stripcodes'])) $options['stripcodes'] = array(array());
635	+ if (!isset($options['prefix'])) $options['prefix'] = '<ul>';
636	+ if (!isset($options['suffix'])) $options['suffix'] = '</ul>';
637	+ if (!isset($options['output_template'])) $options['output_template'] = '<li>{link}</li>';
638	+ if (!isset($options['match_cat'])) $options['match_cat'] = 'false';
639	+ if (!isset($options['match_tags'])) $options['match_tags'] = 'false';
640	+ if (!isset($options['match_author'])) $options['match_author'] = 'false';
641	+ if (!isset($options['content_filter'])) $options['content_filter'] = 'false';
642	+ if (!isset($options['custom'])) {$options['custom']['key'] = ''; $options['custom']['op'] = '='; $options['custom']['value'] = '';}
643	+ if (!isset($options['sort'])) {$options['sort']['by1'] = ''; $options['sort']['order1'] = SORT_ASC; $options['sort']['case1'] = 'false';$options['sort']['by2'] = ''; $options['sort']['order2'] = SORT_ASC; $options['sort']['case2'] = 'false';}
644	+ if (!isset($options['status'])) {$options['status']['publish'] = 'true'; $options['status']['private'] = 'false'; $options['status']['draft'] = 'false'; $options['status']['future'] = 'false';}
645	+ if (!isset($options['group_template'])) $options['group_template'] = '';
646	+ if (!isset($options['weight_content'])) $options['weight_content'] = 0.9;
647	+ if (!isset($options['weight_title'])) $options['weight_title'] = 0.1;
648	+ if (!isset($options['weight_tags'])) $options['weight_tags'] = 0.0;
649	+ if (!isset($options['num_terms'])) $options['num_terms'] = 20;
650	+ if (!isset($options['term_extraction'])) $options['term_extraction'] = 'frequency';
651	+ if (!isset($options['hand_links'])) $options['hand_links'] = 'false';
652	+ if (!isset($options['utf8'])) $options['utf8'] = 'false';
653	+ if (!function_exists('mb_internal_encoding')) $options['utf8'] = 'false';
654	+ if (!isset($options['cjk'])) $options['cjk'] = 'false';
655	+ if (!function_exists('mb_internal_encoding')) $options['cjk'] = 'false';
656	+ if (!isset($options['use_stemmer'])) $options['use_stemmer'] = 'false';
657	+ if (!isset($options['batch'])) $options['batch'] = '100';
658	+
659	+ update_option('similar-posts', $options);
660	+
661	+ // initial creation of the index, if the table is empty
662	+ $num_index_posts = $wpdb->get_var("SELECT COUNT(*) FROM `$table_name`");
663	+ if ($num_index_posts == 0) save_index_entries (($options['utf8'] === 'true'), false, $options['batch'], ($options['cjk'] === 'true'));
664	+
665	+ // deactivate legacy Similar Posts Feed if present
666	+ $current = get_option('active_plugins');
667	+ if (in_array('Similar_Posts_Feed/similar-posts-feed.php', $current)) {
668	+ array_splice($current, array_search('Similar_Posts_Feed/similar-posts-feed.php', $current), 1);
669	+ update_option('active_plugins', $current);
670	+ }
671	+ unset($current);
672	+
673	+ // clear legacy custom fields
674	+ $wpdb->query("DELETE FROM $wpdb->postmeta WHERE meta_key = 'similarterms'");
675	+
676	+ // clear legacy index
677	+ $indices = $wpdb->get_results("SHOW INDEX FROM $wpdb->posts", ARRAY_A);
678	+ foreach ($indices as $index) {
679	+ if ($index['Key_name'] === 'post_similar') {
680	+ $wpdb->query("ALTER TABLE $wpdb->posts DROP INDEX post_similar");
681	+ break;
682	+ }
683	+ }
684	+
685	+ $wpdb->show_errors($suppress);
686	+ error_reporting($errorlevel);
687	+ }
688	+
689	+ if (!function_exists('ppl_plugin_basename')) {
690	+ function ppl_plugin_basename($file) {
691	+ $file = str_replace('\\','/',$file); // sanitize for Win32 installs
692	+ $file = preg_replace('\|/+\|','/', $file); // remove any duplicate slash
693	+ $plugin_dir = str_replace('\\','/',WP_PLUGIN_DIR); // sanitize for Win32 installs
694	+ $plugin_dir = preg_replace('\|/+\|','/', $plugin_dir); // remove any duplicate slash
695	+ $file = preg_replace('\|^' . preg_quote($plugin_dir, '\|') . '/\|','',$file); // get relative path from plugins dir
696	+ return $file;
697	+ }
698	+ }
699	+
700	+ add_action('activate_'.str_replace('-admin', '', ppl_plugin_basename(__FILE__)), 'similar_posts_install');
701	+
702	+ ?>

similar-posts.php ADDED Viewed

	@@ -0,0 +1,584 @@


1	+ <?php
2	+ /*
3	+ Plugin Name:Similar Posts
4	+ Plugin URI: http://rmarsh.com/plugins/similar-posts/
5	+ Description: Displays a <a href="options-general.php?page=similar-posts.php">highly configurable</a> list of related posts. Similarity can be based on any combination of word usage in the content, title, or tags. Don't be disturbed if it takes a few moments to complete the installation -- the plugin is indexing your posts. <a href="http://rmarsh.com/plugins/post-options/">Instructions and help online</a>. Requires the latest version of the <a href="http://wordpress.org/extend/plugins/post-plugin-library/">Post-Plugin Library</a> to be installed.
6	+ Version: 2.6.0.0
7	+ Author: Rob Marsh, SJ
8	+ Author URI: http://rmarsh.com/
9	+ */
10	+
11	+ /*
12	+ Copyright 2008 Rob Marsh, SJ (http://rmarsh.com)
13	+
14	+ This program is free software; you can redistribute it and/or modify
15	+ it under the terms of the GNU General Public License as published by
16	+ the Free Software Foundation; either version 2 of the License, or
17	+ (at your option) any later version.
18	+
19	+ This program is distributed in the hope that it will be useful,
20	+ but WITHOUT ANY WARRANTY; without even the implied warranty of
21	+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22	+ GNU General Public License for more details: http://www.gnu.org/licenses/gpl.txt
23	+ */
24	+
25	+ $similar_posts_version = $similar_posts_feed_version= '2.6.0.0';
26	+
27	+ /*
28	+ Template Tag: Displays the posts most similar to the current post.
29	+ e.g.: <?php similar_posts(); ?>
30	+ Full help and instructions at http://rmarsh.com/plugins/post-options/
31	+ */
32	+
33	+ function similar_posts($args = '') {
34	+ echo SimilarPosts::execute($args);
35	+ }
36	+
37	+ /*
38	+
39	+ 'innards'
40	+
41	+ */
42	+
43	+ if (!defined('DSEP')) define('DSEP', DIRECTORY_SEPARATOR);
44	+ if (!defined('POST_PLUGIN_LIBRARY')) SimilarPosts::install_post_plugin_library();
45	+
46	+ class SimilarPosts {
47	+
48	+ function execute($args='', $default_output_template='<li>{link}</li>', $option_key='similar-posts'){
49	+ if (!SimilarPosts::check_post_plugin_library('<a href="http://downloads.wordpress.org/plugin/post-plugin-library.zip">'.__('Post-Plugin Library missing').'</a>')) return '';
50	+ global $table_prefix, $wpdb, $wp_version;
51	+ $start_time = ppl_microtime();
52	+ $postid = ppl_current_post_id();
53	+ if (defined('POC_CACHE_4')) {
54	+ $cache_key = $option_key.$postid.$args;
55	+ $result = poc_cache_fetch($cache_key);
56	+ if ($result !== false) return $result . sprintf("<!-- Similar Posts took %.3f ms (cached) -->", 1000 * (ppl_microtime() - $start_time));
57	+ }
58	+ $table_name = $table_prefix . 'similar_posts';
59	+ // First we process any arguments to see if any defaults have been overridden
60	+ $options = ppl_parse_args($args);
61	+ // Next we retrieve the stored options and use them unless a value has been overridden via the arguments
62	+ $options = ppl_set_options($option_key, $options, $default_output_template);
63	+ if (0 < $options['limit']) {
64	+ $match_tags = ($options['match_tags'] !== 'false' && $wp_version >= 2.3);
65	+ $exclude_cats = ($options['excluded_cats'] !== '');
66	+ $include_cats = ($options['included_cats'] !== '');
67	+ $exclude_authors = ($options['excluded_authors'] !== '');
68	+ $include_authors = ($options['included_authors'] !== '');
69	+ $exclude_posts = (trim($options['excluded_posts']) !== '');
70	+ $include_posts = (trim($options['included_posts']) !== '');
71	+ $match_category = ($options['match_cat'] === 'true');
72	+ $match_author = ($options['match_author'] === 'true');
73	+ $use_tag_str = ('' != trim($options['tag_str']) && $wp_version >= 2.3);
74	+ $omit_current_post = ($options['omit_current_post'] !== 'false');
75	+ $hide_pass = ($options['show_private'] === 'false');
76	+ $check_age = ('none' !== $options['age']['direction']);
77	+ $check_custom = (trim($options['custom']['key']) !== '');
78	+ $limit = $options['skip'].', '.$options['limit'];
79	+
80	+ //get the terms to do the matching
81	+ if ($options['term_extraction'] === 'pagerank') {
82	+ list( $contentterms, $titleterms, $tagterms) = sp_terms_by_textrank($postid, $options['num_terms']);
83	+ } else {
84	+ list( $contentterms, $titleterms, $tagterms) = sp_terms_by_freq($postid, $options['num_terms']);
85	+ }
86	+ // these should add up to 1.0
87	+ $weight_content = $options['weight_content'];
88	+ $weight_title = $options['weight_title'];
89	+ $weight_tags = $options['weight_tags'];
90	+ // below a threshold we ignore the weight completely and save some effort
91	+ if ($weight_content < 0.001) $weight_content = (int) 0;
92	+ if ($weight_title < 0.001) $weight_title = (int) 0;
93	+ if ($weight_tags < 0.001) $weight_tags = (int) 0;
94	+
95	+ $count_content = substr_count($contentterms, ' ') + 1;
96	+ $count_title = substr_count($titleterms, ' ') + 1;
97	+ $count_tags = substr_count($tagterms, ' ') + 1;
98	+ if ($weight_content) $weight_content = 57.0 * $weight_content / $count_content;
99	+ if ($weight_title) $weight_title = 18.0 * $weight_title / $count_title;
100	+ if ($weight_tags) $weight_tags = 24.0 * $weight_tags / $count_tags;
101	+ if ($options['hand_links'] === 'true') {
102	+ // check custom field for manual links
103	+ $forced_ids = $wpdb->get_var("SELECT meta_value FROM $wpdb->postmeta WHERE post_id = $postid AND meta_key = 'sp_similar' ") ;
104	+ }
105	+ // the workhorse...
106	+ $sql = "SELECT *, ";
107	+ $sql .= score_fulltext_match($table_name, $weight_title, $titleterms, $weight_content, $contentterms, $weight_tags, $tagterms, $forced_ids);
108	+
109	+ if ($check_custom) $sql .= "LEFT JOIN $wpdb->postmeta ON post_id = ID ";
110	+
111	+ // build the 'WHERE' clause
112	+ $where = array();
113	+ $where[] = where_fulltext_match($weight_title, $titleterms, $weight_content, $contentterms, $weight_tags, $tagterms);
114	+ if (!function_exists('get_post_type')) {
115	+ $where[] = where_hide_future();
116	+ } else {
117	+ $where[] = where_show_status($options['status'], $options['show_attachments']);
118	+ }
119	+ if ($match_category) $where[] = where_match_category();
120	+ if ($match_tags) $where[] = where_match_tags($options['match_tags']);
121	+ if ($match_author) $where[] = where_match_author();
122	+ $where[] = where_show_pages($options['show_pages'], $options['show_attachments']);
123	+ if ($include_cats) $where[] = where_included_cats($options['included_cats']);
124	+ if ($exclude_cats) $where[] = where_excluded_cats($options['excluded_cats']);
125	+ if ($exclude_authors) $where[] = where_excluded_authors($options['excluded_authors']);
126	+ if ($include_authors) $where[] = where_included_authors($options['included_authors']);
127	+ if ($exclude_posts) $where[] = where_excluded_posts(trim($options['excluded_posts']));
128	+ if ($include_posts) $where[] = where_included_posts(trim($options['included_posts']));
129	+ if ($use_tag_str) $where[] = where_tag_str($options['tag_str']);
130	+ if ($omit_current_post) $where[] = where_omit_post();
131	+ if ($hide_pass) $where[] = where_hide_pass();
132	+ if ($check_age) $where[] = where_check_age($options['age']['direction'], $options['age']['length'], $options['age']['duration']);
133	+ if ($check_custom) $where[] = where_check_custom($options['custom']['key'], $options['custom']['op'], $options['custom']['value']);
134	+ $sql .= "WHERE ".implode(' AND ', $where);
135	+ if ($check_custom) $sql .= " GROUP BY $wpdb->posts.ID";
136	+ $sql .= " ORDER BY score DESC LIMIT $limit";
137	+ $results = $wpdb->get_results($sql);
138	+ } else {
139	+ $results = false;
140	+ }
141	+ if ($results) {
142	+ $translations = ppl_prepare_template($options['output_template']);
143	+ foreach ($results as $result) {
144	+ $items[] = ppl_expand_template($result, $options['output_template'], $translations, $option_key);
145	+ }
146	+ if ($options['sort']['by1'] !== '') $items = ppl_sort_items($options['sort'], $results, $option_key, $options['group_template'], $items);
147	+ $output = implode(($options['divider']) ? $options['divider'] : "\n", $items);
148	+ $output = $options['prefix'] . $output . $options['suffix'];
149	+ } else {
150	+ // if we reach here our query has produced no output ... so what next?
151	+ if ($options['no_text'] !== 'false') {
152	+ $output = ''; // we display nothing at all
153	+ } else {
154	+ // we display the blank message, with tags expanded if necessary
155	+ $translations = ppl_prepare_template($options['none_text']);
156	+ $output = $options['prefix'] . ppl_expand_template(array(), $options['none_text'], $translations, $option_key) . $options['suffix'];
157	+ }
158	+ }
159	+ if (defined('POC_CACHE_4')) poc_cache_store($cache_key, $output);
160	+ return $output . sprintf("<!-- Similar Posts took %.3f ms -->", 1000 * (ppl_microtime() - $start_time));
161	+ }
162	+
163	+ // tries to install the post-plugin-library plugin
164	+ function install_post_plugin_library() {
165	+ $plugin_path = 'post-plugin-library/post-plugin-library.php';
166	+ $current = get_option('active_plugins');
167	+ if (!in_array($plugin_path, $current)) {
168	+ $current[] = $plugin_path;
169	+ update_option('active_plugins', $current);
170	+ do_action('activate_'.$plugin_path);
171	+ }
172	+ }
173	+
174	+ function check_post_plugin_library($msg) {
175	+ $exists = function_exists('ppl_microtime');
176	+ if (!$exists) echo $msg;
177	+ return $exists;
178	+ }
179	+
180	+ }
181	+
182	+ function sp_terms_by_freq($ID, $num_terms = 20) {
183	+ if (!$ID) return array('', '', '');
184	+ global $wpdb, $table_prefix;
185	+ $table_name = $table_prefix . 'similar_posts';
186	+ $terms = '';
187	+ $results = $wpdb->get_results("SELECT title, content, tags FROM $table_name WHERE pID=$ID LIMIT 1", ARRAY_A);
188	+ if ($results) {
189	+ $word = strtok($results[0]['content'], ' ');
190	+ $n = 0;
191	+ $wordtable = array();
192	+ while ($word !== false) {
193	+ $wordtable[$word] += 1;
194	+ $word = strtok(' ');
195	+ }
196	+ arsort($wordtable);
197	+ if ($num_terms < 1) $num_terms = 1;
198	+ $wordtable = array_slice($wordtable, 0, $num_terms);
199	+
200	+ foreach ($wordtable as $word => $count) {
201	+ $terms .= ' ' . $word;
202	+ }
203	+
204	+ $res[] = $terms;
205	+ $res[] = $results[0]['title'];
206	+ $res[] = $results[0]['tags'];
207	+ }
208	+ return $res;
209	+ }
210	+
211	+
212	+ // adapted PageRank algorithm see http://www.cs.unt.edu/~rada/papers/mihalcea.emnlp04.pdf
213	+ // and the weighted version http://www.cs.unt.edu/~rada/papers/hassan.ieee07.pdf
214	+ function sp_terms_by_textrank($ID, $num_terms = 20) {
215	+ global $wpdb, $table_prefix;
216	+ $table_name = $table_prefix . 'similar_posts';
217	+ $terms = '';
218	+ $results = $wpdb->get_results("SELECT title, content, tags FROM $table_name WHERE pID=$ID LIMIT 1", ARRAY_A);
219	+ if ($results) {
220	+ // build a directed graph with words as vertices and, as edges, the words which precede them
221	+ $prev_word = 'aaaaa';
222	+ $graph = array();
223	+ $word = strtok($results[0]['content'], ' ');
224	+ while ($word !== false) {
225	+ $graph[$word][$prev_word] += 1; // list the incoming words and keep a tally of how many times words co-occur
226	+ $out_edges[$prev_word] += 1; // count the number of different words that follow each word
227	+ $prev_word = $word;
228	+ $word = strtok(' ');
229	+ }
230	+ // initialise the list of PageRanks-- one for each unique word
231	+ reset($graph);
232	+ while (list($vertex, $in_edges) = each($graph)) {
233	+ $oldrank[$vertex] = 0.25;
234	+ }
235	+ $n = count($graph);
236	+ $base = 0.15 / $n;
237	+ $error_margin = $n * 0.005;
238	+ do {
239	+ $error = 0.0;
240	+ // the edge-weighted PageRank calculation
241	+ reset($graph);
242	+ while (list($vertex, $in_edges) = each($graph)) {
243	+ $r = 0;
244	+ reset($in_edges);
245	+ while (list($edge, $weight) = each($in_edges)) {
246	+ $r += ($weight * $oldrank[$edge]) / $out_edges[$edge];
247	+ }
248	+ $rank[$vertex] = $base + 0.95 * $r;
249	+ $error += abs($rank[$vertex] - $oldrank[$vertex]);
250	+ }
251	+ $oldrank = $rank;
252	+ //echo $error . '<br>';
253	+ } while ($error > $error_margin);
254	+ arsort($rank);
255	+ if ($num_terms < 1) $num_terms = 1;
256	+ $rank = array_slice($rank, 0, $num_terms);
257	+ foreach ($rank as $vertex => $score) {
258	+ $terms .= ' ' . $vertex;
259	+ }
260	+ $res[] = $terms;
261	+ $res[] = $results[0]['title'];
262	+ $res[] = $results[0]['tags'];
263	+ }
264	+ return $res;
265	+ }
266	+
267	+ // do not try and use this function directly -- it is automatically installed when the option is set to show similar posts in feeds
268	+ function similar_posts_for_feed($content) {
269	+ return (is_feed()) ? $content . SimilarPosts::execute('', '<li>{link}</li>', 'similar-posts-feed') : $content;
270	+ }
271	+
272	+ function sp_save_index_entry($postID) {
273	+ global $wpdb, $table_prefix;
274	+ $table_name = $table_prefix . 'similar_posts';
275	+ $post = $wpdb->get_row("SELECT post_content, post_title, post_type FROM $wpdb->posts WHERE ID = $postID", ARRAY_A);
276	+ if ($post['post_type'] === 'revision') return $postid;
277	+ //extract its terms
278	+ $options = get_option('similar-posts');
279	+ $utf8 = ($options['utf8'] === 'true');
280	+ $cjk = ($options['cjk'] === 'true');
281	+ $use_stemmer = ($options['use_stemmer'] === 'true');
282	+ $content = sp_get_post_terms($post['post_content'], $utf8, $use_stemmer, $cjk);
283	+ $title = sp_get_title_terms($post['post_title'], $utf8, $use_stemmer, $cjk);
284	+ $tags = sp_get_tag_terms($postID, $utf8);
285	+ //check to see if the field is set
286	+ $pid = $wpdb->get_var("SELECT pID FROM $table_name WHERE pID=$postID limit 1");
287	+ //then insert if empty
288	+ if (is_null($pid)) {
289	+ $wpdb->query("INSERT INTO $table_name (pID, content, title, tags) VALUES ($postID, \"$content\", \"$title\", \"$tags\")");
290	+ } else {
291	+ $wpdb->query("UPDATE $table_name SET content=\"$content\", title=\"$title\", tags=\"$tags\" WHERE pID=$postID" );
292	+ }
293	+ return $postID;
294	+ }
295	+
296	+ function sp_delete_index_entry($postID) {
297	+ global $wpdb, $table_prefix;
298	+ $table_name = $table_prefix . 'similar_posts';
299	+ $wpdb->query("DELETE FROM $table_name WHERE pID = $postID ");
300	+ return $postID;
301	+ }
302	+
303	+ function sp_clean_words($text) {
304	+ $text = strip_tags($text);
305	+ $text = strtolower($text);
306	+ $text = str_replace("’", "'", $text); // convert MSWord apostrophe
307	+ $text = preg_replace(array('/\[(.*?)\]/', '/&[^\s;]+;/', '/‘\|’\|—\|“\|”\|–\|…/', "/'\W/"), ' ', $text); //anything in [..] or any entities or MS Word droppings
308	+ return $text;
309	+ }
310	+
311	+ function sp_mb_clean_words($text) {
312	+ mb_regex_encoding('UTF-8');
313	+ mb_internal_encoding('UTF-8');
314	+ $text = strip_tags($text);
315	+ $text = mb_strtolower($text);
316	+ $text = str_replace("’", "'", $text); // convert MSWord apostrophe
317	+ $text = preg_replace(array('/\[(.*?)\]/u', '/&[^\s;]+;/u', '/‘\|’\|—\|“\|”\|–\|…/u', "/'\W/u"), ' ', $text); //anything in [..] or any entities
318	+ return $text;
319	+ }
320	+
321	+ function sp_mb_str_pad($text, $n, $c) {
322	+ mb_internal_encoding('UTF-8');
323	+ $l = mb_strlen($text);
324	+ if ($l > 0 && $l < $n) {
325	+ $text .= str_repeat($c, $n-$l);
326	+ }
327	+ return $text;
328	+ }
329	+
330	+ function sp_cjk_digrams($string) {
331	+ mb_internal_encoding("UTF-8");
332	+ $strlen = mb_strlen($string);
333	+ $ascii = '';
334	+ $prev = '';
335	+ $result = array();
336	+ for ($i = 0; $i < $strlen; $i++) {
337	+ $c = mb_substr($string, $i, 1);
338	+ // single-byte chars get combined
339	+ if (strlen($c) > 1) {
340	+ if ($ascii) {
341	+ $result[] = $ascii;
342	+ $ascii = '';
343	+ $prev = $c;
344	+ } else {
345	+ $result[] = sp_mb_str_pad($prev.$c, 4, '_');
346	+ $prev = $c;
347	+ }
348	+ } else {
349	+ $ascii .= $c;
350	+ }
351	+ }
352	+ if ($ascii) $result[] = $ascii;
353	+ return implode(' ', $result);
354	+ }
355	+
356	+ function sp_get_post_terms($text, $utf8, $use_stemmer, $cjk) {
357	+ global $overusedwords;
358	+ if ($utf8) {
359	+ if ($use_stemmer) {
360	+ mb_regex_encoding('UTF-8');
361	+ mb_internal_encoding('UTF-8');
362	+ $wordlist = mb_split("\W+", sp_mb_clean_words($text));
363	+ $words = '';
364	+ reset($wordlist);
365	+ while (list($n, $word) = each($wordlist)) {
366	+ if ( mb_strlen($word) > 3) {
367	+ $stem = sp_mb_str_pad(stem($word), 4, '_');
368	+ if (!isset($overusedwords[$stem])) {
369	+ $words .= $stem . ' ';
370	+ }
371	+ }
372	+ }
373	+ } else {
374	+ mb_regex_encoding('UTF-8');
375	+ mb_internal_encoding('UTF-8');
376	+ $wordlist = mb_split("\W+", sp_mb_clean_words($text));
377	+ $words = '';
378	+ reset($wordlist);
379	+ while (list($n, $word) = each($wordlist)) {
380	+ if ( mb_strlen($word) > 3 && !isset($overusedwords[$word])) {
381	+ $words .= $word . ' ';
382	+ }
383	+ }
384	+ }
385	+ } else {
386	+ if ($use_stemmer) {
387	+ $wordlist = str_word_count(sp_clean_words($text), 1);
388	+ $words = '';
389	+ reset($wordlist);
390	+ while (list($n, $word) = each($wordlist)) {
391	+ if ( strlen($word) > 3) {
392	+ $stem = str_pad(stem($word), 4, '_');
393	+ if (!isset($overusedwords[$stem])) {
394	+ $words .= $stem . ' ';
395	+ }
396	+ }
397	+ }
398	+ } else {
399	+ $wordlist = str_word_count(sp_clean_words($text), 1);
400	+ $words = '';
401	+ reset($wordlist);
402	+ while (list($n, $word) = each($wordlist)) {
403	+ if (strlen($word) > 3 && !isset($overusedwords[$word])) {
404	+ $words .= $word . ' ';
405	+ }
406	+ }
407	+ }
408	+ }
409	+ if ($cjk) $words = sp_cjk_digrams($words);
410	+ return $words;
411	+ }
412	+
413	+ $tinywords = array('the' => 1, 'and' => 1, 'of' => 1, 'a' => 1, 'for' => 1, 'on' => 1);
414	+
415	+ function sp_get_title_terms($text, $utf8, $use_stemmer, $cjk) {
416	+ global $tinywords;
417	+ if ($utf8) {
418	+ if ($use_stemmer) {
419	+ mb_regex_encoding('UTF-8');
420	+ mb_internal_encoding('UTF-8');
421	+ $wordlist = mb_split("\W+", sp_mb_clean_words($text));
422	+ $words = '';
423	+ foreach ($wordlist as $word) {
424	+ if (!isset($tinywords[$word])) {
425	+ $words .= sp_mb_str_pad(stem($word), 4, '_') . ' ';
426	+ }
427	+ }
428	+ } else {
429	+ mb_regex_encoding('UTF-8');
430	+ mb_internal_encoding('UTF-8');
431	+ $wordlist = mb_split("\W+", sp_mb_clean_words($text));
432	+ $words = '';
433	+ foreach ($wordlist as $word) {
434	+ if (!isset($tinywords[$word])) {
435	+ $words .= sp_mb_str_pad($word, 4, '_') . ' ';
436	+ }
437	+ }
438	+ }
439	+ } else {
440	+ if ($use_stemmer) {
441	+ $wordlist = str_word_count(sp_clean_words($text), 1);
442	+ $words = '';
443	+ foreach ($wordlist as $word) {
444	+ if (!isset($tinywords[$word])) {
445	+ $words .= str_pad(stem($word), 4, '_') . ' ';
446	+ }
447	+ }
448	+ } else {
449	+ $wordlist = str_word_count(sp_clean_words($text), 1);
450	+ $words = '';
451	+ foreach ($wordlist as $word) {
452	+ if (!isset($tinywords[$word])) {
453	+ $words .= str_pad($word, 4, '_') . ' ';
454	+ }
455	+ }
456	+ }
457	+ }
458	+ if ($cjk) $words = sp_cjk_digrams($words);
459	+ return $words;
460	+ }
461	+
462	+ function sp_get_tag_terms($ID, $utf8) {
463	+ global $wpdb;
464	+ if (!function_exists('get_object_term_cache')) return '';
465	+ $tags = array();
466	+ $query = "SELECT t.name FROM $wpdb->terms AS t INNER JOIN $wpdb->term_taxonomy AS tt ON tt.term_id = t.term_id INNER JOIN $wpdb->term_relationships AS tr ON tr.term_taxonomy_id = tt.term_taxonomy_id WHERE tt.taxonomy = 'post_tag' AND tr.object_id = '$ID'";
467	+ $tags = $wpdb->get_col($query);
468	+ if (!empty ($tags)) {
469	+ if ($utf8) {
470	+ mb_internal_encoding('UTF-8');
471	+ foreach ($tags as $tag) {
472	+ $newtags[] = sp_mb_str_pad(mb_strtolower(str_replace('"', "'", $tag)), 4, '_');
473	+ }
474	+ } else {
475	+ foreach ($tags as $tag) {
476	+ $newtags[] = str_pad(strtolower(str_replace('"', "'", $tag)), 4, '_');
477	+ }
478	+ }
479	+ $newtags = str_replace(' ', '_', $newtags);
480	+ $tags = implode (' ', $newtags);
481	+ } else {
482	+ $tags = '';
483	+ }
484	+ return $tags;
485	+ }
486	+
487	+ if ( is_admin() ) {
488	+ require(dirname(__FILE__).'/similar-posts-admin.php');
489	+ }
490	+
491	+ function widget_rrm_similar_posts_init() {
492	+ if (! function_exists("register_sidebar_widget")) {
493	+ return;
494	+ }
495	+ function widget_rrm_similar_posts($args) {
496	+ extract($args);
497	+ $options = get_option('widget_rrm_similar_posts');
498	+ $condition = ($options['condition']) ? $options['condition'] : 'true' ;
499	+ $condition = (stristr($condition, "return")) ? $condition : "return ".$condition;
500	+ $condition = rtrim($condition, '; ') . ' \|\| is_admin();';
501	+ if (eval($condition)) {
502	+ $title = empty($options['title']) ? __('Similar Posts', 'similar_posts') : $options['title'];
503	+ if ( !$number = (int) $options['number'] )
504	+ $number = 10;
505	+ else if ( $number < 1 )
506	+ $number = 1;
507	+ else if ( $number > 15 )
508	+ $number = 15;
509	+ echo $before_widget;
510	+ echo $before_title.$title.$after_title;
511	+ similar_posts('limit='.$number);
512	+ echo $after_widget;
513	+ }
514	+ }
515	+ function widget_rrm_similar_posts_control() {
516	+ if ( $_POST['widget_rrm_similar_posts_submit'] ) {
517	+ $options['title'] = strip_tags(stripslashes($_POST['widget_rrm_similar_posts_title']));
518	+ $options['number'] = (int) $_POST["widget_rrm_similar_posts_number"];
519	+ $options['condition'] = stripslashes(trim($_POST["widget_rrm_similar_posts_condition"], '; '));
520	+ update_option("widget_rrm_similar_posts", $options);
521	+ } else {
522	+ $options = get_option('widget_rrm_similar_posts');
523	+ }
524	+ $title = attribute_escape($options['title']);
525	+ if ( !$number = (int) $options['number'] )
526	+ $number = 5;
527	+ $condition = attribute_escape($options['condition']);
528	+ ?>
529	+ <p><label for="widget_rrm_similar_posts_title"> <?php _e('Title:', 'similar_posts'); ?> <input style="width: 200px;" id="widget_rrm_similar_posts_title" name="widget_rrm_similar_posts_title" type="text" value="<?php echo $title; ?>" /></label></p>
530	+ <p><label for="widget_rrm_similar_posts_number"> <?php _e('Number of posts to show:', 'similar_posts'); ?> <input style="width: 25px; text-align: center;" id="widget_rrm_similar_posts_number" name="widget_rrm_similar_posts_number" type="text" value="<?php echo $number; ?>" /></label> <?php _e('(at most 15)', 'similar_posts'); ?> </p>
531	+ <p><label for="widget_rrm_similar_posts_condition"> <?php echo sprintf(__('Show only if page: (e.g., %sis_single()%s)', 'similar_posts'), '<a href="http://codex.wordpress.org/Conditional_Tags" title="help">', '</a>'); ?> <input style="width: 200px;" id="widget_rrm_similar_posts_condition" name="widget_rrm_similar_posts_condition" type="text" value="<?php echo $condition; ?>" /></label></p>
532	+ <input type="hidden" id="widget_rrm_similar_posts_submit" name="widget_rrm_similar_posts_submit" value="1" />
533	+ There are many more <a href="options-general.php?page=similar-posts.php">options</a> available.
534	+ <?php
535	+ }
536	+ register_sidebar_widget(__('Similar Posts +', 'similar_posts'), 'widget_rrm_similar_posts');
537	+ register_widget_control(__('Similar Posts +', 'similar_posts'), 'widget_rrm_similar_posts_control', 300, 100);
538	+ }
539	+
540	+ add_action('plugins_loaded', 'widget_rrm_similar_posts_init');
541	+
542	+
543	+ /*
544	+ now some language specific stuff
545	+ */
546	+
547	+ //the next lines find the language WordPress is using
548	+ $language = substr(WPLANG, 0, 2);
549	+ //if no language is specified make it the default which is 'en'
550	+ if ($language == '') {
551	+ $language = 'en';
552	+ }
553	+ $languagedir = dirname(__FILE__).DSEP.'languages'.DSEP.$language.DEP;
554	+ //see if the directory exists and if not revert to the default English dir
555	+ if (!file_exists($languagedir)) {
556	+ $languagedir = dirname(__FILE__).DSEP.'languages'.DSEP.'en'.DSEP;
557	+ }
558	+
559	+ // import the stemming algorithm ... a single function called 'stem'
560	+ require_once($languagedir.'stemmer.php');
561	+ require_once($languagedir.'stopwords.php');
562	+ global $overusedwords;
563	+ $overusedwords = array_flip($overusedwords);
564	+
565	+ function similar_posts_init () {
566	+ global $overusedwords, $wp_db_version;
567	+ load_plugin_textdomain('similar_posts');
568	+
569	+ $options = get_option('similar-posts');
570	+ if ($options['content_filter'] === 'true' && function_exists('ppl_register_content_filter')) ppl_register_content_filter('SimilarPosts');
571	+ if ($options['feed_active'] === 'true') add_filter('the_content', 'similar_posts_for_feed');
572	+
573	+ //install the actions to keep the index up to date
574	+ add_action('save_post', 'sp_save_index_entry', 1);
575	+ add_action('delete_post', 'sp_delete_index_entry', 1);
576	+ if ($wp_db_version < 3308 ) {
577	+ add_action('edit_post', 'sp_save_index_entry', 1);
578	+ add_action('publish_post', 'sp_save_index_entry', 1);
579	+ }
580	+ }
581	+
582	+ add_action ('init', 'similar_posts_init', 1);
583	+
584	+ ?>

Similar Posts – Best Related Posts Plugin for WordPress - Version 2.6.0.0

Version Description

Release Info

Version 2.6.0.0