]>
git.wh0rd.org - tt-rss.git/blob - plugins/af_sort_bayes/init.php
dc90352e24cea8f3bb6205b4a98e72b7c367b7fd
3 class Af_Sort_Bayes
extends Plugin
{
6 private $filters = array();
8 private $score_modifier = 50;
9 private $sql_prefix = "ttrss_plugin_af_sort_bayes";
13 "Bayesian classifier for tt-rss (WIP)",
17 function init($host) {
18 require_once __DIR__
. "/lib/class.naivebayesian.php";
19 require_once __DIR__
. "/lib/class.naivebayesian_ngram.php";
20 require_once __DIR__
. "/lib/class.naivebayesianstorage.php";
23 $this->dbh
= Db
::get();
25 $this->init_database();
27 $host->add_hook($host::HOOK_ARTICLE_FILTER
, $this);
28 $host->add_hook($host::HOOK_PREFS_TAB
, $this);
29 $host->add_hook($host::HOOK_ARTICLE_BUTTON
, $this);
33 function trainArticle() {
34 $article_id = (int) $_REQUEST["article_id"];
35 $train_up = sql_bool_to_bool($_REQUEST["train_up"]);
37 //$category = $train_up ? "GOOD" : "UGLY";
38 $dst_category = "UGLY";
40 $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
41 $nb = new NaiveBayesianNgram($nbs);
43 $result = $this->dbh
->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " .
44 $article_id . " AND owner_uid = " . $_SESSION["uid"]);
46 if ($this->dbh
->num_rows($result) != 0) {
47 $guid = $this->dbh
->fetch_result($result, 0, "guid");
48 $title = $this->dbh
->fetch_result($result, 0, "title");
49 $content = mb_strtolower($title . " " . strip_tags($this->dbh
->fetch_result($result, 0, "content")));
50 $score = $this->dbh
->fetch_result($result, 0, "score");
52 $this->dbh
->query("BEGIN");
54 $ref = $nbs->getReference($guid, false);
56 if (isset($ref['category_id'])) {
57 $current_category = $nbs->getCategoryById($ref['category_id']);
59 $current_category = "UGLY";
62 // set score to fixed value for now
65 switch ($current_category) {
67 $dst_category = "GOOD";
68 $score = $this->score_modifier
;
71 $dst_category = "UGLY";
75 $dst_category = "GOOD";
79 switch ($current_category) {
81 $dst_category = "BAD";
82 $score = -$this->score_modifier
;
85 $dst_category = "BAD";
88 $dst_category = "UGLY";
89 $score = -$this->score_modifier
;
94 $nb->untrain($guid, $content);
95 $nb->train($guid, $nbs->getCategoryByName($dst_category), $content);
97 $this->dbh
->query("UPDATE ttrss_user_entries SET score = '$score' WHERE ref_id = $article_id AND owner_uid = " . $_SESSION["uid"]);
99 $nb->updateProbabilities();
101 $this->dbh
->query("COMMIT");
105 print "$article_id :: $dst_category :: $score";
109 return file_get_contents(__DIR__
. "/init.js");
112 function get_prefs_js() {
113 return file_get_contents(__DIR__
. "/init.js");
116 function hook_article_button($line) {
117 return "<img src=\"plugins/af_sort_bayes/thumb_up.png\"
118 style=\"cursor : pointer\" style=\"cursor : pointer\"
119 onclick=\"bayesTrain(".$line["id"].", true)\"
120 class='tagsPic' title='".__('+1')."'>" .
121 "<img src=\"plugins/af_sort_bayes/thumb_down.png\"
122 style=\"cursor : pointer\" style=\"cursor : pointer\"
123 onclick=\"bayesTrain(".$line["id"].", false)\"
124 class='tagsPic' title='".__('-1')."'>";
128 function init_database() {
129 $prefix = $this->sql_prefix
;
131 // TODO there probably should be a way for plugins to determine their schema version to upgrade tables
133 /*$this->dbh->query("DROP TABLE IF EXISTS ${prefix}_wordfreqs", false);
134 $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_references", false);
135 $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_categories", false);*/
137 $this->dbh
->query("BEGIN");
139 // PG only for the time being
141 if (DB_TYPE
== "mysql") {
143 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories (
144 id INTEGER NOT NULL PRIMARY KEY auto_increment,
145 category varchar(100) NOT NULL DEFAULT '',
146 probability DOUBLE NOT NULL DEFAULT '0',
147 owner_uid INTEGER NOT NULL,
148 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
149 word_count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB");
151 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_references (
152 id INTEGER NOT NULL PRIMARY KEY auto_increment,
153 document_id VARCHAR(255) NOT NULL,
154 category_id INTEGER NOT NULL,
155 FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
156 owner_uid INTEGER NOT NULL,
157 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
158 content text NOT NULL) ENGINE=InnoDB");
160 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs (
161 word varchar(100) NOT NULL DEFAULT '',
162 category_id INTEGER NOT NULL,
163 FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
164 owner_uid INTEGER NOT NULL,
165 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
166 count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB");
170 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories (
171 id SERIAL NOT NULL PRIMARY KEY,
172 category varchar(100) NOT NULL DEFAULT '',
173 probability DOUBLE NOT NULL DEFAULT '0',
174 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
175 word_count BIGINT NOT NULL DEFAULT '0')");
177 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_references (
178 id SERIAL NOT NULL PRIMARY KEY,
179 document_id VARCHAR(255) NOT NULL,
180 category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
181 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
182 content text NOT NULL)");
184 $this->dbh
->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs (
185 word varchar(100) NOT NULL DEFAULT '',
186 category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
187 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
188 count BIGINT NOT NULL DEFAULT '0')");
191 $owner_uid = @$_SESSION["uid"];
194 $result = $this->dbh
->query("SELECT id FROM ${prefix}_categories WHERE owner_uid = $owner_uid LIMIT 1");
196 if ($this->dbh
->num_rows($result) == 0) {
197 $this->dbh
->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('GOOD', $owner_uid)");
198 $this->dbh
->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('BAD', $owner_uid)");
199 $this->dbh
->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('UGLY', $owner_uid)");
203 $this->dbh
->query("COMMIT");
206 function hook_prefs_tab($args) {
207 if ($args != "prefPrefs") return;
209 print "<div dojoType=\"dijit.layout.AccordionPane\" title=\"".__('Bayesian classifier (af_sort_bayes)')."\">";
211 $result = $this->dbh
->query("SELECT category, probability, word_count,
212 (SELECT COUNT(id) FROM {$this->sql_prefix}_references WHERE
213 category_id = {$this->sql_prefix}_categories.id) as doc_count
214 FROM {$this->sql_prefix}_categories WHERE owner_uid = " . $_SESSION["uid"]);
217 print "<tr><th>Category</th><th>Probability</th><th>Word count</th><th>Article count</th></tr>";
219 while ($line = $this->dbh
->fetch_assoc($result)) {
221 foreach ($line as $k => $v) {
222 if ($k == "probability") $v = sprintf("%.3f", $v);
231 print "<button dojoType=\"dijit.form.Button\" onclick=\"return bayesClearDatabase()\">".
232 __('Clear database')."</button> ";
239 function hook_article_filter($article) {
240 $owner_uid = $article["owner_uid"];
242 $nbs = new NaiveBayesianStorage($owner_uid);
243 $nb = new NaiveBayesianNgram($nbs);
245 $categories = $nbs->getCategories();
247 if (count($categories) > 0) {
255 foreach ($categories as $id => $cat) {
256 if ($cat["category"] == "GOOD") {
258 } else if ($cat["category"] == "UGLY") {
260 $count_neutral +
= $cat["word_count"];
261 } else if ($cat["category"] == "BAD") {
266 $dst_category = $id_ugly;
268 $bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"]));
270 if ($count_neutral >= 5000) {
271 // enable automatic categorization
273 $result = $nb->categorize($bayes_content);
275 if (count($result) == 3) {
276 $prob_good = $result[$id_good];
277 $prob_bad = $result[$id_bad];
279 if ($prob_good > 0.90) {
280 $dst_category = $id_good; // should we autofile as good or not? idk
281 $article["score_modifier"] +
= $this->score_modifier
;
282 } else if ($prob_bad > 0.90) {
283 $dst_category = $id_bad; // should we autofile as good or not? idk
284 $article["score_modifier"] -= $this->score_modifier
;
289 $nb->train($article["guid_hashed"], $dst_category, $bayes_content);
291 $nb->updateProbabilities();
298 function clearDatabase() {
299 $prefix = $this->sql_prefix
;
301 $this->dbh
->query("BEGIN");
302 $this->dbh
->query("DELETE FROM ${prefix}_references WHERE owner_uid = " . $_SESSION["uid"]);
303 $this->dbh
->query("DELETE FROM ${prefix}_wordfreqs WHERE owner_uid = " . $_SESSION["uid"]);
304 $this->dbh
->query("COMMIT");
306 $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
307 $nb = new NaiveBayesianNgram($nbs);
308 $nb->updateProbabilities();
311 function api_version() {