]> git.wh0rd.org - tt-rss.git/blame - plugins/af_sort_bayes/init.php
bayes: better prefs ui/stats
[tt-rss.git] / plugins / af_sort_bayes / init.php
CommitLineData
853cc128
AD
1<?php
2
3class Af_Sort_Bayes extends Plugin {
4
5 private $host;
6 private $filters = array();
7 private $dbh;
59e83455 8 private $score_modifier = 50;
3c43def9 9 private $sql_prefix = "ttrss_plugin_af_sort_bayes";
853cc128
AD
10
11 function about() {
12 return array(1.0,
13 "Bayesian classifier for tt-rss (WIP)",
14 "fox");
15 }
16
17 function init($host) {
18 require_once __DIR__ . "/lib/class.naivebayesian.php";
3dcd00e4 19 require_once __DIR__ . "/lib/class.naivebayesian_ngram.php";
853cc128
AD
20 require_once __DIR__ . "/lib/class.naivebayesianstorage.php";
21
22 $this->host = $host;
23 $this->dbh = Db::get();
24
25 $this->init_database();
26
27 $host->add_hook($host::HOOK_ARTICLE_FILTER, $this);
28 $host->add_hook($host::HOOK_PREFS_TAB, $this);
29 $host->add_hook($host::HOOK_ARTICLE_BUTTON, $this);
30
31 }
32
33 function trainArticle() {
34 $article_id = (int) $_REQUEST["article_id"];
35 $train_up = sql_bool_to_bool($_REQUEST["train_up"]);
36
08cfcba4
AD
37 //$category = $train_up ? "GOOD" : "UGLY";
38 $dst_category = "UGLY";
853cc128 39
59e83455 40 $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
e0ae194a 41 $nb = new NaiveBayesian($nbs);
59e83455
AD
42
43 $result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " .
44 $article_id . " AND owner_uid = " . $_SESSION["uid"]);
45
46 if ($this->dbh->num_rows($result) != 0) {
47 $guid = $this->dbh->fetch_result($result, 0, "guid");
48 $title = $this->dbh->fetch_result($result, 0, "title");
49 $content = mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content")));
50 $score = $this->dbh->fetch_result($result, 0, "score");
51
52 $this->dbh->query("BEGIN");
53
08cfcba4
AD
54 $ref = $nbs->getReference($guid, false);
55
56 if (isset($ref['category_id'])) {
57 $current_category = $nbs->getCategoryById($ref['category_id']);
58 } else {
59 $current_category = "UGLY";
59e83455
AD
60 }
61
08cfcba4
AD
62 // set score to fixed value for now
63
64 if ($train_up) {
65 switch ($current_category) {
66 case "UGLY":
67 $dst_category = "GOOD";
68 $score = $this->score_modifier;
69 break;
70 case "BAD":
71 $dst_category = "UGLY";
72 $score = 0;
73 break;
74 case "GOOD":
75 $dst_category = "GOOD";
76 break;
77 }
78 } else {
79 switch ($current_category) {
80 case "UGLY":
81 $dst_category = "BAD";
82 $score = -$this->score_modifier;
83 break;
84 case "BAD":
85 $dst_category = "BAD";
86 break;
87 case "GOOD":
88 $dst_category = "UGLY";
89 $score = -$this->score_modifier;
90 break;
91 }
92 }
59e83455 93
08cfcba4
AD
94 $nb->untrain($guid, $content);
95 $nb->train($guid, $nbs->getCategoryByName($dst_category), $content);
59e83455
AD
96
97 $this->dbh->query("UPDATE ttrss_user_entries SET score = '$score' WHERE ref_id = $article_id AND owner_uid = " . $_SESSION["uid"]);
98
99 $nb->updateProbabilities();
100
101 $this->dbh->query("COMMIT");
102
103 }
104
08cfcba4 105 print "$article_id :: $dst_category :: $score";
853cc128
AD
106 }
107
108 function get_js() {
109 return file_get_contents(__DIR__ . "/init.js");
110 }
111
3c43def9
AD
112 function get_prefs_js() {
113 return file_get_contents(__DIR__ . "/init.js");
114 }
115
853cc128
AD
116 function hook_article_button($line) {
117 return "<img src=\"plugins/af_sort_bayes/thumb_up.png\"
118 style=\"cursor : pointer\" style=\"cursor : pointer\"
119 onclick=\"bayesTrain(".$line["id"].", true)\"
120 class='tagsPic' title='".__('+1')."'>" .
121 "<img src=\"plugins/af_sort_bayes/thumb_down.png\"
122 style=\"cursor : pointer\" style=\"cursor : pointer\"
123 onclick=\"bayesTrain(".$line["id"].", false)\"
124 class='tagsPic' title='".__('-1')."'>";
125
126 }
127
128 function init_database() {
3c43def9 129 $prefix = $this->sql_prefix;
853cc128 130
59e83455
AD
131 // TODO there probably should be a way for plugins to determine their schema version to upgrade tables
132
133 /*$this->dbh->query("DROP TABLE IF EXISTS ${prefix}_wordfreqs", false);
134 $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_references", false);
135 $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_categories", false);*/
853cc128
AD
136
137 $this->dbh->query("BEGIN");
138
139 // PG only for the time being
140
b02e8bc8
AD
141 if (DB_TYPE == "mysql") {
142
143 $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories (
144 id INTEGER NOT NULL PRIMARY KEY auto_increment,
145 category varchar(100) NOT NULL DEFAULT '',
146 probability DOUBLE NOT NULL DEFAULT '0',
147 owner_uid INTEGER NOT NULL,
148 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
149 word_count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB");
150
151 $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_references (
152 id INTEGER NOT NULL PRIMARY KEY auto_increment,
153 document_id VARCHAR(255) NOT NULL,
154 category_id INTEGER NOT NULL,
155 FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
156 owner_uid INTEGER NOT NULL,
61c9ce6a 157 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE) ENGINE=InnoDB");
b02e8bc8
AD
158
159 $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs (
160 word varchar(100) NOT NULL DEFAULT '',
161 category_id INTEGER NOT NULL,
162 FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
163 owner_uid INTEGER NOT NULL,
164 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
165 count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB");
166
167
168 } else {
169 $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories (
170 id SERIAL NOT NULL PRIMARY KEY,
171 category varchar(100) NOT NULL DEFAULT '',
d62a5e0c 172 probability DOUBLE PRECISION NOT NULL DEFAULT '0',
b02e8bc8
AD
173 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
174 word_count BIGINT NOT NULL DEFAULT '0')");
175
176 $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_references (
177 id SERIAL NOT NULL PRIMARY KEY,
178 document_id VARCHAR(255) NOT NULL,
179 category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
61c9ce6a 180 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE)");
b02e8bc8
AD
181
182 $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs (
183 word varchar(100) NOT NULL DEFAULT '',
184 category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
185 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
186 count BIGINT NOT NULL DEFAULT '0')");
187 }
853cc128 188
59e83455
AD
189 $owner_uid = @$_SESSION["uid"];
190
191 if ($owner_uid) {
192 $result = $this->dbh->query("SELECT id FROM ${prefix}_categories WHERE owner_uid = $owner_uid LIMIT 1");
193
194 if ($this->dbh->num_rows($result) == 0) {
195 $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('GOOD', $owner_uid)");
08cfcba4
AD
196 $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('BAD', $owner_uid)");
197 $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('UGLY', $owner_uid)");
59e83455
AD
198 }
199 }
200
853cc128
AD
201 $this->dbh->query("COMMIT");
202 }
203
308c55c0 204 function renderPrefsUI() {
4947c02e
AD
205 $result = $this->dbh->query("SELECT category, probability, word_count,
206 (SELECT COUNT(id) FROM {$this->sql_prefix}_references WHERE
207 category_id = {$this->sql_prefix}_categories.id) as doc_count
208 FROM {$this->sql_prefix}_categories WHERE owner_uid = " . $_SESSION["uid"]);
3c43def9 209
308c55c0
AD
210 print "<h3>" . __("Statistics") . "</h3>";
211
3c43def9 212 print "<table>";
308c55c0 213 print "<tr><th>Category</th><th>Probability</th><th>Words</th><th>Articles</th></tr>";
3c43def9
AD
214
215 while ($line = $this->dbh->fetch_assoc($result)) {
216 print "<tr>";
217 foreach ($line as $k => $v) {
218 if ($k == "probability") $v = sprintf("%.3f", $v);
219
220 print "<td>$v</td>";
221 }
222 print "</tr>";
223 }
224
225 print "</table>";
226
308c55c0
AD
227 print "<h3>" . __("Last matched articles") . "</h3>";
228
229 $result = $this->dbh->query("SELECT te.title, category, tf.title AS feed_title
230 FROM ttrss_entries AS te, ttrss_user_entries AS tu, ttrss_feeds AS tf, {$this->sql_prefix}_references AS tr, {$this->sql_prefix}_categories AS tc
231 WHERE tf.id = tu.feed_id AND tu.ref_id = te.id AND tc.id = tr.category_id AND tr.document_id = te.guid ORDER BY te.id DESC LIMIT 20");
232
233 print "<ul class=\"browseFeedList\" style=\"border-width : 1px\">";
234
235 while ($line = $this->dbh->fetch_assoc($result)) {
236 print "<li>" . $line["category"] . ": " . $line["title"] . " (" . $line["feed_title"] . ")</li>";
237 }
238
239 print "</ul>";
240
241 print "<button dojoType=\"dijit.form.Button\" onclick=\"return bayesUpdateUI()\">".
242 __('Refresh')."</button> ";
243
3c43def9
AD
244 print "<button dojoType=\"dijit.form.Button\" onclick=\"return bayesClearDatabase()\">".
245 __('Clear database')."</button> ";
853cc128
AD
246
247 //
308c55c0
AD
248 }
249
250 function hook_prefs_tab($args) {
251 if ($args != "prefPrefs") return;
252
253 print "<div id=\"af_sort_bayes_prefs\" dojoType=\"dijit.layout.AccordionPane\" title=\"".__('Bayesian classifier (af_sort_bayes)')."\">";
254
255 $this->renderPrefsUI();
853cc128
AD
256
257 print "</div>";
258 }
259
260 function hook_article_filter($article) {
261 $owner_uid = $article["owner_uid"];
262
59e83455 263 $nbs = new NaiveBayesianStorage($owner_uid);
e0ae194a 264 $nb = new NaiveBayesian($nbs);
59e83455
AD
265
266 $categories = $nbs->getCategories();
267
268 if (count($categories) > 0) {
269
270 $count_neutral = 0;
08cfcba4 271
59e83455 272 $id_good = 0;
08cfcba4
AD
273 $id_ugly = 0;
274 $id_bad = 0;
59e83455
AD
275
276 foreach ($categories as $id => $cat) {
277 if ($cat["category"] == "GOOD") {
278 $id_good = $id;
08cfcba4
AD
279 } else if ($cat["category"] == "UGLY") {
280 $id_ugly = $id;
59e83455 281 $count_neutral += $cat["word_count"];
08cfcba4
AD
282 } else if ($cat["category"] == "BAD") {
283 $id_bad = $id;
59e83455
AD
284 }
285 }
286
08cfcba4 287 $dst_category = $id_ugly;
59e83455
AD
288
289 $bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"]));
290
e0ae194a 291 if ($count_neutral >= 10000) {
59e83455
AD
292 // enable automatic categorization
293
294 $result = $nb->categorize($bayes_content);
295
47cd0bd5 296 //print_r($result);
e0ae194a 297
08cfcba4 298 if (count($result) == 3) {
59e83455 299 $prob_good = $result[$id_good];
08cfcba4 300 $prob_bad = $result[$id_bad];
59e83455 301
08cfcba4 302 if ($prob_good > 0.90) {
e0ae194a 303 $dst_category = $id_good;
59e83455 304 $article["score_modifier"] += $this->score_modifier;
08cfcba4 305 } else if ($prob_bad > 0.90) {
e0ae194a 306 $dst_category = $id_bad;
08cfcba4 307 $article["score_modifier"] -= $this->score_modifier;
59e83455
AD
308 }
309 }
e0ae194a
AD
310
311 _debug("bayes, dst category: $dst_category");
59e83455
AD
312 }
313
314 $nb->train($article["guid_hashed"], $dst_category, $bayes_content);
315
316 $nb->updateProbabilities();
317 }
853cc128
AD
318
319 return $article;
320
321 }
322
3c43def9
AD
323 function clearDatabase() {
324 $prefix = $this->sql_prefix;
325
326 $this->dbh->query("BEGIN");
327 $this->dbh->query("DELETE FROM ${prefix}_references WHERE owner_uid = " . $_SESSION["uid"]);
328 $this->dbh->query("DELETE FROM ${prefix}_wordfreqs WHERE owner_uid = " . $_SESSION["uid"]);
329 $this->dbh->query("COMMIT");
330
331 $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
e0ae194a 332 $nb = new NaiveBayesian($nbs);
3c43def9
AD
333 $nb->updateProbabilities();
334 }
335
853cc128
AD
336 function api_version() {
337 return 2;
338 }
339
340}
341?>