3 class Af_Sort_Bayes
extends Plugin
{
6 private $filters = array();
8 private $score_modifier = 50;
9 private $sql_prefix = "ttrss_plugin_af_sort_bayes";
10 private $auto_categorize_threshold = 10000;
14 "Bayesian classifier for tt-rss (WIP)",
18 function init($host) {
19 require_once __DIR__
. "/lib/class.naivebayesian.php";
20 //require_once __DIR__ . "/lib/class.naivebayesian_ngram.php";
21 require_once __DIR__
. "/lib/class.naivebayesianstorage.php";
24 $this->dbh
= Db
::get();
26 $this->init_database();
28 $host->add_hook($host::HOOK_ARTICLE_FILTER
, $this);
29 $host->add_hook($host::HOOK_PREFS_TAB
, $this);
30 $host->add_hook($host::HOOK_ARTICLE_BUTTON
, $this);
34 function trainArticle() {
35 $article_id = (int) $_REQUEST["article_id"];
36 $train_up = sql_bool_to_bool($_REQUEST["train_up"]);
38 //$category = $train_up ? "GOOD" : "UGLY";
39 $dst_category = "UGLY";
41 $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
42 $nb = new NaiveBayesian($nbs);
44 $result = $this->dbh
->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " .
45 $article_id . " AND owner_uid = " . $_SESSION["uid"]);
47 if ($this->dbh
->num_rows($result) != 0) {
48 $guid = $this->dbh
->fetch_result($result, 0, "guid");
49 $title = $this->dbh
->fetch_result($result, 0, "title");
50 $content = mb_strtolower($title . " " . strip_tags($this->dbh
->fetch_result($result, 0, "content")));
51 $score = $this->dbh
->fetch_result($result, 0, "score");
53 $this->dbh
->query("BEGIN");
55 $ref = $nbs->getReference($guid, false);
57 if (isset($ref['category_id'])) {
58 $current_category = $nbs->getCategoryById($ref['category_id']);
60 $current_category = "UGLY";
63 // set score to fixed value for now
66 switch ($current_category) {
68 $dst_category = "GOOD";
69 $score = $this->score_modifier
;
72 $dst_category = "UGLY";
76 $dst_category = "GOOD";
80 switch ($current_category) {
82 $dst_category = "BAD";
83 $score = -$this->score_modifier
;
86 $dst_category = "BAD";
89 $dst_category = "UGLY";
95 $nb->untrain($guid, $content);
96 $nb->train($guid, $nbs->getCategoryByName($dst_category), $content);
98 $this->dbh
->query("UPDATE ttrss_user_entries SET score = '$score' WHERE ref_id = $article_id AND owner_uid = " . $_SESSION["uid"]);
100 $nb->updateProbabilities();
102 $this->dbh
->query("COMMIT");
106 print "$article_id :: $dst_category :: $score";
110 return file_get_contents(__DIR__
. "/init.js");
113 function get_prefs_js() {
114 return file_get_contents(__DIR__
. "/init.js");
117 function hook_article_button($line) {
118 return "<img src=\"plugins/af_sort_bayes/thumb_up.png\"
119 style=\"cursor : pointer\" style=\"cursor : pointer\"
120 onclick=\"bayesTrain(".$line["id"].", true, event)\"
121 class='tagsPic' title='".__('+1')."'>" .
122 "<img src=\"plugins/af_sort_bayes/thumb_down.png\"
123 style=\"cursor : pointer\" style=\"cursor : pointer\"
124 onclick=\"bayesTrain(".$line["id"].", false, event)\"
125 class='tagsPic' title='".__('-1')."'>" .
126 "<img src=\"plugins/af_sort_bayes/chart_bar.png\"
127 style=\"cursor : pointer\" style=\"cursor : pointer\"
128 onclick=\"bayesShow(".$line["id"].")\"
129 class='tagsPic' title='".__('Show classifier info')."'>";
133 function init_database() {
134 $prefix = $this->sql_prefix
;
136 // TODO there probably should be a way for plugins to determine their schema version to upgrade tables
138 /*$this->dbh->query("DROP TABLE IF EXISTS ${prefix}_wordfreqs", false);
139 $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_references", false);
140 $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_categories", false);*/
142 $this->dbh
->query("BEGIN");
144 // PG only for the time being
146 if (DB_TYPE
== "mysql") {
148 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories (
149 id INTEGER NOT NULL PRIMARY KEY auto_increment,
150 category varchar(100) NOT NULL DEFAULT '',
151 probability DOUBLE NOT NULL DEFAULT '0',
152 owner_uid INTEGER NOT NULL,
153 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
154 word_count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB");
156 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_references (
157 id INTEGER NOT NULL PRIMARY KEY auto_increment,
158 document_id VARCHAR(255) NOT NULL,
159 category_id INTEGER NOT NULL,
160 FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
161 owner_uid INTEGER NOT NULL,
162 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE) ENGINE=InnoDB");
164 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs (
165 word varchar(100) NOT NULL DEFAULT '',
166 category_id INTEGER NOT NULL,
167 FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
168 owner_uid INTEGER NOT NULL,
169 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
170 count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB");
174 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories (
175 id SERIAL NOT NULL PRIMARY KEY,
176 category varchar(100) NOT NULL DEFAULT '',
177 probability DOUBLE PRECISION NOT NULL DEFAULT '0',
178 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
179 word_count BIGINT NOT NULL DEFAULT '0')");
181 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_references (
182 id SERIAL NOT NULL PRIMARY KEY,
183 document_id VARCHAR(255) NOT NULL,
184 category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
185 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE)");
187 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs (
188 word varchar(100) NOT NULL DEFAULT '',
189 category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
190 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
191 count BIGINT NOT NULL DEFAULT '0')");
194 $owner_uid = @$_SESSION["uid"];
197 $result = $this->dbh
->query("SELECT id FROM ${prefix}_categories WHERE owner_uid = $owner_uid LIMIT 1");
199 if ($this->dbh
->num_rows($result) == 0) {
200 $this->dbh
->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('GOOD', $owner_uid)");
201 $this->dbh
->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('BAD', $owner_uid)");
202 $this->dbh
->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('UGLY', $owner_uid)");
206 $this->dbh
->query("COMMIT");
209 function renderPrefsUI() {
210 $result = $this->dbh
->query("SELECT category, probability, word_count,
211 (SELECT COUNT(id) FROM {$this->sql_prefix}_references WHERE
212 category_id = {$this->sql_prefix}_categories.id) as doc_count
213 FROM {$this->sql_prefix}_categories WHERE owner_uid = " . $_SESSION["uid"]);
215 print "<h3>" . __("Statistics") . "</h3>";
217 print "<p>".T_sprintf("Required UGLY word count for automatic matching: %d", $this->auto_categorize_threshold
)."</p>";
220 print "<tr><th>Category</th><th>Probability</th><th>Words</th><th>Articles</th></tr>";
222 while ($line = $this->dbh
->fetch_assoc($result)) {
224 foreach ($line as $k => $v) {
225 if ($k == "probability") $v = sprintf("%.3f", $v);
234 print "<h3>" . __("Last matched articles") . "</h3>";
236 $result = $this->dbh
->query("SELECT te.title, category, tf.title AS feed_title
237 FROM ttrss_entries AS te, ttrss_user_entries AS tu, ttrss_feeds AS tf, {$this->sql_prefix}_references AS tr, {$this->sql_prefix}_categories AS tc
238 WHERE tf.id = tu.feed_id AND tu.ref_id = te.id AND tc.id = tr.category_id AND tr.document_id = te.guid ORDER BY te.id DESC LIMIT 20");
240 print "<ul class=\"browseFeedList\" style=\"border-width : 1px\">";
242 while ($line = $this->dbh
->fetch_assoc($result)) {
243 print "<li>" . $line["category"] . ": " . $line["title"] . " (" . $line["feed_title"] . ")</li>";
248 print "<button dojoType=\"dijit.form.Button\" onclick=\"return bayesUpdateUI()\">".
249 __('Refresh')."</button> ";
251 print "<button dojoType=\"dijit.form.Button\" onclick=\"return bayesClearDatabase()\">".
252 __('Clear database')."</button> ";
257 function hook_prefs_tab($args) {
258 if ($args != "prefPrefs") return;
260 print "<div id=\"af_sort_bayes_prefs\" dojoType=\"dijit.layout.AccordionPane\" title=\"".__('Bayesian classifier (af_sort_bayes)')."\">";
262 $this->renderPrefsUI();
267 function hook_article_filter($article) {
268 $owner_uid = $article["owner_uid"];
270 // guid already includes owner_uid so we don't need to include it
271 $result = $this->dbh
->query("SELECT id FROM {$this->sql_prefix}_references WHERE
272 document_id = '" . $this->dbh
->escape_string($article['guid_hashed']) . "'");
274 if (db_num_rows($result) != 0) {
275 _debug("bayes: article already categorized");
279 $nbs = new NaiveBayesianStorage($owner_uid);
280 $nb = new NaiveBayesian($nbs);
282 $categories = $nbs->getCategories();
284 if (count($categories) > 0) {
292 foreach ($categories as $id => $cat) {
293 if ($cat["category"] == "GOOD") {
295 } else if ($cat["category"] == "UGLY") {
297 $count_neutral +
= $cat["word_count"];
298 } else if ($cat["category"] == "BAD") {
303 $dst_category = $id_ugly;
305 $bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"]));
307 if ($count_neutral >= $this->auto_categorize_threshold
) {
308 // enable automatic categorization
310 $result = $nb->categorize($bayes_content);
314 if (count($result) == 3) {
315 $prob_good = $result[$id_good];
316 $prob_bad = $result[$id_bad];
318 if (!is_nan($prob_good) && $prob_good > 0.90) {
319 $dst_category = $id_good;
320 $article["score_modifier"] +
= $this->score_modifier
;
321 } else if (!is_nan($prob_bad) && $prob_bad > 0.90) {
322 $dst_category = $id_bad;
323 $article["score_modifier"] -= $this->score_modifier
;
327 _debug("bayes, dst category: $dst_category");
330 $nb->train($article["guid_hashed"], $dst_category, $bayes_content);
332 $nb->updateProbabilities();
339 function clearDatabase() {
340 $prefix = $this->sql_prefix
;
342 $this->dbh
->query("BEGIN");
343 $this->dbh
->query("DELETE FROM ${prefix}_references WHERE owner_uid = " . $_SESSION["uid"]);
344 $this->dbh
->query("DELETE FROM ${prefix}_wordfreqs WHERE owner_uid = " . $_SESSION["uid"]);
345 $this->dbh
->query("COMMIT");
347 $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
348 $nb = new NaiveBayesian($nbs);
349 $nb->updateProbabilities();
352 function showArticleStats() {
353 $article_id = (int) $_REQUEST["article_id"];
355 $result = $this->dbh
->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " .
356 $article_id . " AND owner_uid = " . $_SESSION["uid"]);
358 if ($this->dbh
->num_rows($result) != 0) {
359 $guid = $this->dbh
->fetch_result($result, 0, "guid");
360 $title = $this->dbh
->fetch_result($result, 0, "title");
361 $content = mb_strtolower($title . " " . strip_tags($this->dbh
->fetch_result($result, 0, "content")));
363 print "<h2>" . $title . "</h2>";
365 $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
366 $nb = new NaiveBayesian($nbs);
368 $categories = $nbs->getCategories();
370 $ref = $nbs->getReference($guid, false);
372 $current_cat = isset($ref["category_id"]) ?
$categories[$ref["category_id"]]["category"] : "N/A";
374 print "<p>" . T_sprintf("Currently stored as: %s", $current_cat) . "</p>";
376 $result = $nb->categorize($content);
378 print "<h3>" . __("Classifier result") . "</h3>";
381 print "<tr><th>Category</th><th>Probability</th></tr>";
383 foreach ($result as $k => $v) {
385 print "<td>" . $categories[$k]["category"] . "</td>";
386 print "<td>" . $v . "</td>";
394 print_error("Article not found");
397 print "<div align='center'>";
399 print "<button dojoType=\"dijit.form.Button\" onclick=\"return dijit.byId('bayesShowDlg').hide()\">".
400 __('Close this window')."</button>";
406 function api_version() {