]>
git.wh0rd.org - tt-rss.git/blob - plugins/af_sort_bayes/init.php
79d287158483287d067b092955b1d66b7deb737b
3 class Af_Sort_Bayes
extends Plugin
{
6 private $filters = array();
8 private $score_modifier = 50;
12 "Bayesian classifier for tt-rss (WIP)",
16 function init($host) {
17 require_once __DIR__
. "/lib/class.naivebayesian.php";
18 require_once __DIR__
. "/lib/class.naivebayesianstorage.php";
21 $this->dbh
= Db
::get();
23 $this->init_database();
25 $host->add_hook($host::HOOK_ARTICLE_FILTER
, $this);
26 $host->add_hook($host::HOOK_PREFS_TAB
, $this);
27 $host->add_hook($host::HOOK_ARTICLE_BUTTON
, $this);
31 function trainArticle() {
32 $article_id = (int) $_REQUEST["article_id"];
33 $train_up = sql_bool_to_bool($_REQUEST["train_up"]);
35 $category = $train_up ?
"GOOD" : "NEUTRAL";
37 $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
38 $nb = new NaiveBayesian($nbs);
40 $result = $this->dbh
->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " .
41 $article_id . " AND owner_uid = " . $_SESSION["uid"]);
43 if ($this->dbh
->num_rows($result) != 0) {
44 $guid = $this->dbh
->fetch_result($result, 0, "guid");
45 $title = $this->dbh
->fetch_result($result, 0, "title");
46 $content = mb_strtolower($title . " " . strip_tags($this->dbh
->fetch_result($result, 0, "content")));
47 $score = $this->dbh
->fetch_result($result, 0, "score");
49 $this->dbh
->query("BEGIN");
51 if ($nb->untrain($guid, $content)) {
52 if ($score >= $this->score_modifier
) $score -= $this->score_modifier
;
55 $nb->train($guid, $nbs->getCategoryByName($category), $content);
57 if ($category == "GOOD") $score +
= $this->score_modifier
;
59 $this->dbh
->query("UPDATE ttrss_user_entries SET score = '$score' WHERE ref_id = $article_id AND owner_uid = " . $_SESSION["uid"]);
61 $nb->updateProbabilities();
63 $this->dbh
->query("COMMIT");
67 print "$article_id :: $category";
71 return file_get_contents(__DIR__
. "/init.js");
74 function hook_article_button($line) {
75 return "<img src=\"plugins/af_sort_bayes/thumb_up.png\"
76 style=\"cursor : pointer\" style=\"cursor : pointer\"
77 onclick=\"bayesTrain(".$line["id"].", true)\"
78 class='tagsPic' title='".__('+1')."'>" .
79 "<img src=\"plugins/af_sort_bayes/thumb_down.png\"
80 style=\"cursor : pointer\" style=\"cursor : pointer\"
81 onclick=\"bayesTrain(".$line["id"].", false)\"
82 class='tagsPic' title='".__('-1')."'>";
86 function init_database() {
87 $prefix = "ttrss_plugin_af_sort_bayes";
89 // TODO there probably should be a way for plugins to determine their schema version to upgrade tables
91 /*$this->dbh->query("DROP TABLE IF EXISTS ${prefix}_wordfreqs", false);
92 $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_references", false);
93 $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_categories", false);*/
95 $this->dbh
->query("BEGIN");
97 // PG only for the time being
99 if (DB_TYPE
== "mysql") {
101 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories (
102 id INTEGER NOT NULL PRIMARY KEY auto_increment,
103 category varchar(100) NOT NULL DEFAULT '',
104 probability DOUBLE NOT NULL DEFAULT '0',
105 owner_uid INTEGER NOT NULL,
106 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
107 word_count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB");
109 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_references (
110 id INTEGER NOT NULL PRIMARY KEY auto_increment,
111 document_id VARCHAR(255) NOT NULL,
112 category_id INTEGER NOT NULL,
113 FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
114 owner_uid INTEGER NOT NULL,
115 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
116 content text NOT NULL) ENGINE=InnoDB");
118 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs (
119 word varchar(100) NOT NULL DEFAULT '',
120 category_id INTEGER NOT NULL,
121 FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
122 owner_uid INTEGER NOT NULL,
123 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
124 count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB");
128 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories (
129 id SERIAL NOT NULL PRIMARY KEY,
130 category varchar(100) NOT NULL DEFAULT '',
131 probability DOUBLE NOT NULL DEFAULT '0',
132 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
133 word_count BIGINT NOT NULL DEFAULT '0')");
135 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_references (
136 id SERIAL NOT NULL PRIMARY KEY,
137 document_id VARCHAR(255) NOT NULL,
138 category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
139 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
140 content text NOT NULL)");
142 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs (
143 word varchar(100) NOT NULL DEFAULT '',
144 category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
145 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
146 count BIGINT NOT NULL DEFAULT '0')");
149 $owner_uid = @$_SESSION["uid"];
152 $result = $this->dbh
->query("SELECT id FROM ${prefix}_categories WHERE owner_uid = $owner_uid LIMIT 1");
154 if ($this->dbh
->num_rows($result) == 0) {
155 $this->dbh
->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('GOOD', $owner_uid)");
156 $this->dbh
->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('NEUTRAL', $owner_uid)");
160 $this->dbh
->query("COMMIT");
163 function hook_prefs_tab($args) {
164 if ($args != "prefPrefs") return;
166 print "<div dojoType=\"dijit.layout.AccordionPane\" title=\"".__('af_sort_bayes')."\">";
173 function hook_article_filter($article) {
174 $owner_uid = $article["owner_uid"];
176 $nbs = new NaiveBayesianStorage($owner_uid);
177 $nb = new NaiveBayesian($nbs);
179 $categories = $nbs->getCategories();
181 if (count($categories) > 0) {
188 foreach ($categories as $id => $cat) {
189 if ($cat["category"] == "GOOD") {
191 $count_good +
= $cat["word_count"];
192 } else if ($cat["category"] == "NEUTRAL") {
194 $count_neutral +
= $cat["word_count"];
198 $dst_category = $id_neutral;
200 $bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"]));
202 if ($count_neutral >= 3000 && $count_good >= 1000) {
203 // enable automatic categorization
205 $result = $nb->categorize($bayes_content);
207 if (count($result) == 2) {
208 $prob_good = $result[$id_good];
209 $prob_neutral = $result[$id_neutral];
211 if ($prob_good > 0.90 && $prob_good > $prob_neutral) {
212 $dst_category = $id_good; // should we autofile as good or not? idk
213 $article["score_modifier"] +
= $this->score_modifier
;
218 $nb->train($article["guid_hashed"], $dst_category, $bayes_content);
220 $nb->updateProbabilities();
227 function api_version() {