]> git.wh0rd.org - tt-rss.git/blob - plugins/af_sort_bayes/lib/class.naivebayesianstorage.php
fccdcaf06eb7c643a1d098e558d5df0240256063
[tt-rss.git] / plugins / af_sort_bayes / lib / class.naivebayesianstorage.php
1 <?php
2 /*
3 ***** BEGIN LICENSE BLOCK *****
4 This file is part of PHP Naive Bayesian Filter.
5
6 The Initial Developer of the Original Code is
7 Loic d'Anterroches [loic_at_xhtml.net].
8 Portions created by the Initial Developer are Copyright (C) 2003
9 the Initial Developer. All Rights Reserved.
10
11 Contributor(s):
12
13 PHP Naive Bayesian Filter is free software; you can redistribute it
14 and/or modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of
16 the License, or (at your option) any later version.
17
18 PHP Naive Bayesian Filter is distributed in the hope that it will
19 be useful, but WITHOUT ANY WARRANTY; without even the implied
20 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
21 See the GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with Foobar; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26
27 Alternatively, the contents of this file may be used under the terms of
28 the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 in which case the provisions of the LGPL are applicable instead
30 of those above.
31
32 ***** END LICENSE BLOCK *****
33 */
34
35 /** Access to the storage of the data for the filter.
36
37 To avoid dependency with respect to any database, this class handle all the
38 access to the data storage. You can provide your own class as long as
39 all the methods are available. The current one rely on a MySQL database.
40
41 methods:
42 - array getCategories()
43 - bool wordExists(string $word)
44 - array getWord(string $word, string $categoryid)
45
46 */
47 class NaiveBayesianStorage {
48 var $con = null;
49 var $owner_uid = null;
50
51 function NaiveBayesianStorage($owner_uid) {
52 $this->con = Db::get();
53 $this->owner_uid = $owner_uid;
54
55 return true;
56 }
57
58 /** get the list of categories with basic data.
59
60 @return array key = category ids, values = array(keys = 'probability', 'word_count')
61 */
62 function getCategories() {
63 $categories = array();
64 $rs = $this->con->query('SELECT * FROM ttrss_plugin_af_sort_bayes_categories');
65
66 while ($this->con->fetch_assoc($rs)) {
67 $categories[$rs['category_id']] = array('probability' => $rs['probability'],
68 'word_count' => $rs['word_count']
69 );
70
71
72 }
73
74 return $categories;
75 }
76
77 /** see if the word is an already learnt word.
78 @return bool
79 @param string word
80 */
81 function wordExists($word) {
82 $rs = $this->con->query("SELECT * FROM ttrss_plugin_af_sort_bayes_wordfreqs WHERE word='" . $this->con->escape_string($word) . "'");
83
84 return $this->con->num_rows($rs) != 0;
85 }
86
87 /** get details of a word in a category.
88 @return array ('count' => count)
89 @param string word
90 @param string category id
91 */
92 function getWord($word, $category_id) {
93 $details = array();
94
95 $rs = $this->con->query("SELECT * FROM ttrss_plugin_af_sort_bayes_wordfreqs WHERE word='" . $this->con->escape_string($word) . "' AND category_id='" . $this->con->escape_string($category_id) . "'");
96
97 if ($this->con->num_rows($rs) == 0 ) {
98 $details['count'] = 0;
99 }
100 else {
101 $details['count'] = $rs['count'];
102 }
103
104 return $details;
105 }
106
107 /** update a word in a category.
108 If the word is new in this category it is added, else only the count is updated.
109
110 @return bool success
111 @param string word
112 @param int count
113 @paran string category id
114 */
115 function updateWord($word, $count, $category_id) {
116 $oldword = $this->getWord($word, $category_id);
117
118 if (0 == $oldword['count']) {
119 return $this->con->execute("INSERT INTO ttrss_plugin_af_sort_bayes_wordfreqs (word, category_id, count) VALUES ('" . $this->con->escape_string($word) . "', '" . $this->con->escape_string($category_id) . "', '" . $this->con->escape_string((int) $count) . "')");
120 }
121 else {
122 return $this->con->execute("UPDATE ttrss_plugin_af_sort_bayes_wordfreqs SET count = count + " . (int) $count . " WHERE category_id = '" . $this->con->escape_string($category_id) . "' AND word = '" . $this->con->escape_string($word) . "'");
123 }
124 }
125
126 /** remove a word from a category.
127
128 @return bool success
129 @param string word
130 @param int count
131 @param string category id
132 */
133 function removeWord($word, $count, $category_id) {
134 $oldword = $this->getWord($word, $category_id);
135
136 if (0 != $oldword['count'] && 0 >= ($oldword['count'] - $count)) {
137 return $this->con->execute("DELETE FROM ttrss_plugin_af_sort_bayes_wordfreqs WHERE word='" . $this->con->escape_string($word) . "' AND category_id='" . $this->con->escape_string($category_id) . "'");
138 }
139 else {
140 return $this->con->execute("UPDATE ttrss_plugin_af_sort_bayes_wordfreqs SET count = count - " . (int) $count . " WHERE category_id = '" . $this->con->escape_string($category_id) . "' AND word = '" . $this->con->escape_string($word) . "'");
141 }
142 }
143
144 /** update the probabilities of the categories and word count.
145 This function must be run after a set of training
146
147 @return bool sucess
148 */
149 function updateProbabilities() {
150 // first update the word count of each category
151 $rs = $this->con->query("SELECT category_id, SUM(count) AS total FROM ttrss_plugin_af_sort_bayes_wordfreqs WHERE 1 GROUP BY category_id");
152 $total_words = 0;
153
154 while ($this->con->fetch_assoc($rs)) {
155 $total_words += $rs['total'];
156
157 }
158
159 $rs->moveStart();
160
161 if ($total_words == 0) {
162 $this->con->execute("UPDATE ttrss_plugin_af_sort_bayes_categories SET word_count=0, probability=0 WHERE 1");
163
164 return true;
165 }
166
167 while ($this->con->fetch_assoc($rs)) {
168 $proba = $rs['total'] / $total_words;
169 $this->con->execute("UPDATE ttrss_plugin_af_sort_bayes_categories SET word_count=" . (int) $rs['total'] . ", probability=" . $proba . " WHERE category_id = '" . $rs['category_id'] . "'");
170
171 }
172
173 return true;
174 }
175
176 /** save a reference in the database.
177
178 @return bool success
179 @param string reference if, must be unique
180 @param string category id
181 @param string content of the reference
182 */
183 function saveReference($doc_id, $category_id, $content) {
184
185 return $this->con->execute("INSERT INTO ttrss_plugin_af_sort_bayes_references (id, category_id, content) VALUES ('" . $this->con->escape_string($doc_id) . "', '" . $this->con->escape_string($category_id) . "', '" . $this->con->escape_string($content) . "')");
186 }
187
188 /** get a reference from the database.
189
190 @return array reference( category_id => ...., content => ....)
191 @param string id
192 */
193 function getReference($doc_id) {
194 $ref = array();
195 $rs = $this->con->query("SELECT * FROM ttrss_plugin_af_sort_bayes_references WHERE id='" . $this->con->escape_string($doc_id) . "'");
196
197 if ($this->con->num_rows($rs) == 0 ) {
198 return $ref;
199 }
200
201 $ref['category_id'] = $rs['category_id'];
202 $ref['content'] = $rs['content'];
203 $ref['id'] = $rs['id'];
204
205 return $ref;
206 }
207
208 /** remove a reference from the database
209
210 @return bool sucess
211 @param string reference id
212 */
213 function removeReference($doc_id) {
214
215 return $this->con->execute("DELETE FROM ttrss_plugin_af_sort_bayes_references WHERE id='" . $this->con->escape_string($doc_id) . "'");
216 }
217
218 }