]> git.wh0rd.org - tt-rss.git/blob - plugins/af_sort_bayes/lib/class.naivebayesianstorage.php
add some more bayes stuff
[tt-rss.git] / plugins / af_sort_bayes / lib / class.naivebayesianstorage.php
1 <?php
2 /*
3 ***** BEGIN LICENSE BLOCK *****
4 This file is part of PHP Naive Bayesian Filter.
5
6 The Initial Developer of the Original Code is
7 Loic d'Anterroches [loic_at_xhtml.net].
8 Portions created by the Initial Developer are Copyright (C) 2003
9 the Initial Developer. All Rights Reserved.
10
11 Contributor(s):
12
13 PHP Naive Bayesian Filter is free software; you can redistribute it
14 and/or modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of
16 the License, or (at your option) any later version.
17
18 PHP Naive Bayesian Filter is distributed in the hope that it will
19 be useful, but WITHOUT ANY WARRANTY; without even the implied
20 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
21 See the GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with Foobar; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26
27 Alternatively, the contents of this file may be used under the terms of
28 the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 in which case the provisions of the LGPL are applicable instead
30 of those above.
31
32 ***** END LICENSE BLOCK *****
33 */
34
35 /** Access to the storage of the data for the filter.
36
37 To avoid dependency with respect to any database, this class handle all the
38 access to the data storage. You can provide your own class as long as
39 all the methods are available. The current one rely on a MySQL database.
40
41 methods:
42 - array getCategories()
43 - bool wordExists(string $word)
44 - array getWord(string $word, string $categoryid)
45
46 */
47 class NaiveBayesianStorage {
48 var $con = null;
49 var $owner_uid = null;
50
51 function NaiveBayesianStorage($owner_uid) {
52 $this->con = Db::get();
53 $this->owner_uid = $owner_uid;
54
55 return true;
56 }
57
58 /** get the list of categories with basic data.
59
60 @return array key = category ids, values = array(keys = 'probability', 'word_count')
61 */
62 function getCategories() {
63 $categories = array();
64 $rs = $this->con->query('SELECT * FROM ttrss_plugin_af_sort_bayes_categories WHERE owner_uid = ' . $this->owner_uid);
65
66 while ($line = $this->con->fetch_assoc($rs)) {
67 $categories[$line['id']] = array('probability' => $line['probability'],
68 'category' => $line['category'],
69 'word_count' => $line['word_count']
70 );
71 }
72
73 return $categories;
74 }
75
76 function getCategoryByName($category) {
77 $rs = $this->con->query("SELECT id FROM ttrss_plugin_af_sort_bayes_categories WHERE category = '" .
78 $this->con->escape_string($category) . "' AND owner_uid = " . $this->owner_uid);
79
80 if ($this->con->num_rows($rs) != 0) {
81 return $this->con->fetch_result($rs, 0, "id");
82 }
83
84 return false;
85 }
86
87 /** see if the word is an already learnt word.
88 @return bool
89 @param string word
90 */
91 function wordExists($word) {
92 $rs = $this->con->query("SELECT * FROM ttrss_plugin_af_sort_bayes_wordfreqs WHERE word='" . $this->con->escape_string($word) . "' AND
93 owner_uid = " . $this->owner_uid);
94
95 return $this->con->num_rows($rs) != 0;
96 }
97
98 /** get details of a word in a category.
99 @return array ('count' => count)
100 @param string word
101 @param string category id
102 */
103 function getWord($word, $category_id) {
104 $details = array();
105
106 $rs = $this->con->query("SELECT * FROM ttrss_plugin_af_sort_bayes_wordfreqs WHERE word='" .
107 $this->con->escape_string($word) . "' AND category_id=" . (int)$category_id);
108
109 if ($this->con->num_rows($rs) == 0 ) {
110 $details['count'] = 0;
111 } else {
112 $details['count'] = $this->con->fetch_result($rs, 0, "count");
113 }
114
115 return $details;
116 }
117
118 /** update a word in a category.
119 If the word is new in this category it is added, else only the count is updated.
120
121 @return bool success
122 @param string word
123 @param int count
124 @paran string category id
125 */
126 function updateWord($word, $count, $category_id) {
127 $oldword = $this->getWord($word, $category_id);
128
129 if (0 == $oldword['count']) {
130 return $this->con->query("INSERT INTO ttrss_plugin_af_sort_bayes_wordfreqs (word, category_id, count, owner_uid)
131 VALUES ('" . $this->con->escape_string($word) . "', '" .
132 (int)$category_id . "', '" .
133 (int)$count . "', '".
134 $this->owner_uid . "')");
135 }
136 else {
137 return $this->con->query("UPDATE ttrss_plugin_af_sort_bayes_wordfreqs SET count = count + " . (int) $count . " WHERE category_id = '" . $this->con->escape_string($category_id) . "' AND word = '" . $this->con->escape_string($word) . "'");
138 }
139 }
140
141 /** remove a word from a category.
142
143 @return bool success
144 @param string word
145 @param int count
146 @param string category id
147 */
148 function removeWord($word, $count, $category_id) {
149 $oldword = $this->getWord($word, $category_id);
150
151 if (0 != $oldword['count'] && 0 >= ($oldword['count'] - $count)) {
152 return $this->con->query("DELETE FROM ttrss_plugin_af_sort_bayes_wordfreqs WHERE word='" .
153 $this->con->escape_string($word) . "' AND category_id='" .
154 $this->con->escape_string($category_id) . "'");
155 }
156 else {
157 return $this->con->query("UPDATE ttrss_plugin_af_sort_bayes_wordfreqs SET count = count - " .
158 (int) $count . " WHERE category_id = '" . $this->con->escape_string($category_id) . "'
159 AND word = '" . $this->con->escape_string($word) . "'");
160 }
161 }
162
163 /** update the probabilities of the categories and word count.
164 This function must be run after a set of training
165
166 @return bool sucess
167 */
168 function updateProbabilities() {
169 // first update the word count of each category
170 $rs = $this->con->query("SELECT SUM(count) AS total FROM ttrss_plugin_af_sort_bayes_wordfreqs WHERE owner_uid = ".$this->owner_uid);
171
172 $total_words = $this->con->fetch_result($rs, 0, "total");
173
174 if ($total_words == 0) {
175 $this->con->query("UPDATE ttrss_plugin_af_sort_bayes_categories SET word_count=0, probability=0 WHERE owner_uid = " . $this->owner_uid);
176 return true;
177 }
178
179 $rs = $this->con->query("SELECT tc.id AS category_id, SUM(count) AS total FROM ttrss_plugin_af_sort_bayes_categories AS tc
180 LEFT JOIN ttrss_plugin_af_sort_bayes_wordfreqs AS tw ON (tc.id = tw.category_id) WHERE tc.owner_uid = ".$this->owner_uid." GROUP BY tc.id");
181
182 while ($line = $this->con->fetch_assoc($rs)) {
183
184 $proba = (int)$line['total'] / $total_words;
185 $this->con->query("UPDATE ttrss_plugin_af_sort_bayes_categories SET word_count=" . (int) $line['total'] .
186 ", probability=" . $proba . " WHERE id = '" . $line['category_id'] . "'");
187 }
188
189 return true;
190 }
191
192 /** save a reference in the database.
193
194 @return bool success
195 @param string reference if, must be unique
196 @param string category id
197 @param string content of the reference
198 */
199 function saveReference($doc_id, $category_id, $content) {
200 return $this->con->query("INSERT INTO ttrss_plugin_af_sort_bayes_references (document_id, category_id, owner_uid) VALUES
201 ('" . $this->con->escape_string($doc_id) . "', '" .
202 (int)$category_id . "', " .
203 (int)$this->owner_uid . ")");
204 }
205
206 /** get a reference from the database.
207
208 @return array reference( category_id => ...., content => ....)
209 @param string id
210 */
211 function getReference($doc_id, $include_content = true)
212 {
213
214 $ref = array();
215 $rs = $this->con->query("SELECT * FROM ttrss_plugin_af_sort_bayes_references WHERE document_id='" .
216 $this->con->escape_string($doc_id) . "' AND owner_uid = " . $this->owner_uid);
217
218 if ($this->con->num_rows($rs) == 0) {
219 return $ref;
220 }
221
222 $ref['category_id'] = $this->con->fetch_result($rs, 0, 'category_id');
223 $ref['id'] = $this->con->fetch_result($rs, 0, 'id');
224 $ref['document_id'] = $this->con->fetch_result($rs, 0, 'document_id');
225
226 if ($include_content) {
227 $rs = $this->con->query("SELECT content, title FROM ttrss_entries WHERE guid = '" .
228 $this->con->escape_string($ref['document_id']) . "'");
229
230 if ($this->con->num_rows($rs) != 0) {
231 $ref['content'] = mb_strtolower($this->con->fetch_result($rs, 0, 'title') . ' ' . strip_tags($this->con->fetch_result($rs, 0, 'content')));
232 }
233 }
234
235 return $ref;
236 }
237
238 /** remove a reference from the database
239
240 @return bool sucess
241 @param string reference id
242 */
243 function removeReference($doc_id) {
244
245 return $this->con->query("DELETE FROM ttrss_plugin_af_sort_bayes_references WHERE document_id='" . $this->con->escape_string($doc_id) . "' AND owner_uid = " . $this->owner_uid);
246 }
247
248 }