]> git.wh0rd.org Git - tt-rss.git/blob - plugins/af_sort_bayes/init.php
bayes: properly reset score when going good -> ugly
[tt-rss.git] / plugins / af_sort_bayes / init.php
1 <?php
2
3 class Af_Sort_Bayes extends Plugin {
4
5         private $host;
6         private $filters = array();
7         private $dbh;
8         private $score_modifier = 50;
9         private $sql_prefix = "ttrss_plugin_af_sort_bayes";
10
11         function about() {
12                 return array(1.0,
13                         "Bayesian classifier for tt-rss (WIP)",
14                         "fox");
15         }
16
17         function init($host) {
18                 require_once __DIR__ . "/lib/class.naivebayesian.php";
19                 require_once __DIR__ . "/lib/class.naivebayesian_ngram.php";
20                 require_once __DIR__ . "/lib/class.naivebayesianstorage.php";
21
22                 $this->host = $host;
23                 $this->dbh = Db::get();
24
25                 $this->init_database();
26
27                 $host->add_hook($host::HOOK_ARTICLE_FILTER, $this);
28                 $host->add_hook($host::HOOK_PREFS_TAB, $this);
29                 $host->add_hook($host::HOOK_ARTICLE_BUTTON, $this);
30
31         }
32
33         function trainArticle() {
34                 $article_id = (int) $_REQUEST["article_id"];
35                 $train_up = sql_bool_to_bool($_REQUEST["train_up"]);
36
37                 //$category = $train_up ? "GOOD" : "UGLY";
38                 $dst_category = "UGLY";
39
40                 $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
41                 $nb = new NaiveBayesian($nbs);
42
43                 $result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " .
44                         $article_id . " AND owner_uid = " . $_SESSION["uid"]);
45
46                 if ($this->dbh->num_rows($result) != 0) {
47                         $guid = $this->dbh->fetch_result($result, 0, "guid");
48                         $title = $this->dbh->fetch_result($result, 0, "title");
49                         $content = mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content")));
50                         $score = $this->dbh->fetch_result($result, 0, "score");
51
52                         $this->dbh->query("BEGIN");
53
54                         $ref = $nbs->getReference($guid, false);
55
56                         if (isset($ref['category_id'])) {
57                                 $current_category = $nbs->getCategoryById($ref['category_id']);
58                         } else {
59                                 $current_category = "UGLY";
60                         }
61
62                         // set score to fixed value for now
63
64                         if ($train_up) {
65                                 switch ($current_category) {
66                                         case "UGLY":
67                                                 $dst_category = "GOOD";
68                                                 $score = $this->score_modifier;
69                                                 break;
70                                         case "BAD":
71                                                 $dst_category = "UGLY";
72                                                 $score = 0;
73                                                 break;
74                                         case "GOOD":
75                                                 $dst_category = "GOOD";
76                                                 break;
77                                 }
78                         } else {
79                                 switch ($current_category) {
80                                         case "UGLY":
81                                                 $dst_category = "BAD";
82                                                 $score = $this->score_modifier;
83                                                 break;
84                                         case "BAD":
85                                                 $dst_category = "BAD";
86                                                 break;
87                                         case "GOOD":
88                                                 $dst_category = "UGLY";
89                                                 $score = 0;
90                                                 break;
91                                 }
92                         }
93
94                         $nb->untrain($guid, $content);
95                         $nb->train($guid, $nbs->getCategoryByName($dst_category), $content);
96
97                         $this->dbh->query("UPDATE ttrss_user_entries SET score = '$score' WHERE ref_id = $article_id AND owner_uid = " . $_SESSION["uid"]);
98
99                         $nb->updateProbabilities();
100
101                         $this->dbh->query("COMMIT");
102
103                 }
104
105                 print "$article_id :: $dst_category :: $score";
106         }
107
108         function get_js() {
109                 return file_get_contents(__DIR__ . "/init.js");
110         }
111
112         function get_prefs_js() {
113                 return file_get_contents(__DIR__ . "/init.js");
114         }
115
116         function hook_article_button($line) {
117                 return "<img src=\"plugins/af_sort_bayes/thumb_up.png\"
118                         style=\"cursor : pointer\" style=\"cursor : pointer\"
119                         onclick=\"bayesTrain(".$line["id"].", true)\"
120                         class='tagsPic' title='".__('+1')."'>" .
121                 "<img src=\"plugins/af_sort_bayes/thumb_down.png\"
122                         style=\"cursor : pointer\" style=\"cursor : pointer\"
123                         onclick=\"bayesTrain(".$line["id"].", false)\"
124                         class='tagsPic' title='".__('-1')."'>";
125
126         }
127
128         function init_database() {
129                 $prefix = $this->sql_prefix;
130
131                 // TODO there probably should be a way for plugins to determine their schema version to upgrade tables
132
133                 /*$this->dbh->query("DROP TABLE IF EXISTS ${prefix}_wordfreqs", false);
134                 $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_references", false);
135                 $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_categories", false);*/
136
137                 $this->dbh->query("BEGIN");
138
139                 // PG only for the time being
140
141                 if (DB_TYPE == "mysql") {
142
143                         $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories (
144                                 id INTEGER NOT NULL PRIMARY KEY auto_increment,
145                                 category varchar(100) NOT NULL DEFAULT '',
146                                 probability DOUBLE NOT NULL DEFAULT '0',
147                                 owner_uid INTEGER NOT NULL,
148                                 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
149                                 word_count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB");
150
151                         $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_references (
152                                 id INTEGER NOT NULL PRIMARY KEY auto_increment,
153                                 document_id VARCHAR(255) NOT NULL,
154                                 category_id INTEGER NOT NULL,
155                                 FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
156                                 owner_uid INTEGER NOT NULL,
157                                 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE) ENGINE=InnoDB");
158
159                         $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs (
160                                 word varchar(100) NOT NULL DEFAULT '',
161                                 category_id INTEGER NOT NULL,
162                                 FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
163                                 owner_uid INTEGER NOT NULL,
164                                 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
165                                 count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB");
166
167
168                 } else {
169                         $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories (
170                                 id SERIAL NOT NULL PRIMARY KEY,
171                                 category varchar(100) NOT NULL DEFAULT '',
172                                 probability DOUBLE PRECISION NOT NULL DEFAULT '0',
173                                 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
174                                 word_count BIGINT NOT NULL DEFAULT '0')");
175
176                         $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_references (
177                                 id SERIAL NOT NULL PRIMARY KEY,
178                                 document_id VARCHAR(255) NOT NULL,
179                                 category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
180                                 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE)");
181
182                         $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs (
183                                 word varchar(100) NOT NULL DEFAULT '',
184                                 category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
185                                 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
186                                 count BIGINT NOT NULL DEFAULT '0')");
187                 }
188
189                 $owner_uid = @$_SESSION["uid"];
190
191                 if ($owner_uid) {
192                         $result = $this->dbh->query("SELECT id FROM ${prefix}_categories WHERE owner_uid = $owner_uid LIMIT 1");
193
194                         if ($this->dbh->num_rows($result) == 0) {
195                                 $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('GOOD', $owner_uid)");
196                                 $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('BAD', $owner_uid)");
197                                 $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('UGLY', $owner_uid)");
198                         }
199                 }
200
201                 $this->dbh->query("COMMIT");
202         }
203
204         function renderPrefsUI() {
205                 $result = $this->dbh->query("SELECT category, probability, word_count,
206                         (SELECT COUNT(id) FROM {$this->sql_prefix}_references WHERE
207                                 category_id = {$this->sql_prefix}_categories.id) as doc_count
208                         FROM {$this->sql_prefix}_categories WHERE owner_uid = " . $_SESSION["uid"]);
209
210                 print "<h3>" . __("Statistics") . "</h3>";
211
212                 print "<table>";
213                 print "<tr><th>Category</th><th>Probability</th><th>Words</th><th>Articles</th></tr>";
214
215                 while ($line = $this->dbh->fetch_assoc($result)) {
216                         print "<tr>";
217                         foreach ($line as $k => $v) {
218                                 if ($k == "probability") $v = sprintf("%.3f", $v);
219
220                                 print "<td>$v</td>";
221                         }
222                         print "</tr>";
223                 }
224
225                 print "</table>";
226
227                 print "<h3>" . __("Last matched articles") . "</h3>";
228
229                 $result = $this->dbh->query("SELECT te.title, category, tf.title AS feed_title
230                         FROM ttrss_entries AS te, ttrss_user_entries AS tu, ttrss_feeds AS tf, {$this->sql_prefix}_references AS tr, {$this->sql_prefix}_categories AS tc
231                         WHERE tf.id = tu.feed_id AND tu.ref_id = te.id AND tc.id = tr.category_id AND tr.document_id = te.guid ORDER BY te.id DESC LIMIT 20");
232
233                 print "<ul class=\"browseFeedList\" style=\"border-width : 1px\">";
234
235                 while ($line = $this->dbh->fetch_assoc($result)) {
236                         print "<li>" . $line["category"] . ": " . $line["title"] . " (" . $line["feed_title"] . ")</li>";
237                 }
238
239                 print "</ul>";
240
241                 print "<button dojoType=\"dijit.form.Button\" onclick=\"return bayesUpdateUI()\">".
242                         __('Refresh')."</button> ";
243
244                 print "<button dojoType=\"dijit.form.Button\" onclick=\"return bayesClearDatabase()\">".
245                         __('Clear database')."</button> ";
246
247                 //
248         }
249
250         function hook_prefs_tab($args) {
251                 if ($args != "prefPrefs") return;
252
253                 print "<div id=\"af_sort_bayes_prefs\" dojoType=\"dijit.layout.AccordionPane\" title=\"".__('Bayesian classifier (af_sort_bayes)')."\">";
254
255                 $this->renderPrefsUI();
256
257                 print "</div>";
258         }
259
260         function hook_article_filter($article) {
261                 $owner_uid = $article["owner_uid"];
262
263                 $nbs = new NaiveBayesianStorage($owner_uid);
264                 $nb = new NaiveBayesian($nbs);
265
266                 $categories = $nbs->getCategories();
267
268                 if (count($categories) > 0) {
269
270                         $count_neutral = 0;
271
272                         $id_good = 0;
273                         $id_ugly = 0;
274                         $id_bad = 0;
275
276                         foreach ($categories as $id => $cat) {
277                                 if ($cat["category"] == "GOOD") {
278                                         $id_good = $id;
279                                 } else if ($cat["category"] == "UGLY") {
280                                         $id_ugly = $id;
281                                         $count_neutral += $cat["word_count"];
282                                 } else if ($cat["category"] == "BAD") {
283                                         $id_bad = $id;
284                                 }
285                         }
286
287                         $dst_category = $id_ugly;
288
289                         $bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"]));
290
291                         if ($count_neutral >= 10000) {
292                                 // enable automatic categorization
293
294                                 $result = $nb->categorize($bayes_content);
295
296                                 //print_r($result);
297
298                                 if (count($result) == 3) {
299                                         $prob_good = $result[$id_good];
300                                         $prob_bad = $result[$id_bad];
301
302                                         if ($prob_good > 0.90) {
303                                                 $dst_category = $id_good;
304                                                 $article["score_modifier"] += $this->score_modifier;
305                                         } else if ($prob_bad > 0.90) {
306                                                 $dst_category = $id_bad;
307                                                 $article["score_modifier"] -= $this->score_modifier;
308                                         }
309                                 }
310
311                                 _debug("bayes, dst category: $dst_category");
312                         }
313
314                         $nb->train($article["guid_hashed"], $dst_category, $bayes_content);
315
316                         $nb->updateProbabilities();
317                 }
318
319                 return $article;
320
321         }
322
323         function clearDatabase() {
324                 $prefix = $this->sql_prefix;
325
326                 $this->dbh->query("BEGIN");
327                 $this->dbh->query("DELETE FROM ${prefix}_references WHERE owner_uid = " . $_SESSION["uid"]);
328                 $this->dbh->query("DELETE FROM ${prefix}_wordfreqs WHERE owner_uid = " . $_SESSION["uid"]);
329                 $this->dbh->query("COMMIT");
330
331                 $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
332                 $nb = new NaiveBayesian($nbs);
333                 $nb->updateProbabilities();
334         }
335
336         function api_version() {
337                 return 2;
338         }
339
340 }
341 ?>