]> git.wh0rd.org - tt-rss.git/blob - plugins/af_sort_bayes/init.php
add button to clear bayes database
[tt-rss.git] / plugins / af_sort_bayes / init.php
1 <?php
2
3 class Af_Sort_Bayes extends Plugin {
4
5 private $host;
6 private $filters = array();
7 private $dbh;
8 private $score_modifier = 50;
9 private $sql_prefix = "ttrss_plugin_af_sort_bayes";
10
11 function about() {
12 return array(1.0,
13 "Bayesian classifier for tt-rss (WIP)",
14 "fox");
15 }
16
17 function init($host) {
18 require_once __DIR__ . "/lib/class.naivebayesian.php";
19 require_once __DIR__ . "/lib/class.naivebayesianstorage.php";
20
21 $this->host = $host;
22 $this->dbh = Db::get();
23
24 $this->init_database();
25
26 $host->add_hook($host::HOOK_ARTICLE_FILTER, $this);
27 $host->add_hook($host::HOOK_PREFS_TAB, $this);
28 $host->add_hook($host::HOOK_ARTICLE_BUTTON, $this);
29
30 }
31
32 function trainArticle() {
33 $article_id = (int) $_REQUEST["article_id"];
34 $train_up = sql_bool_to_bool($_REQUEST["train_up"]);
35
36 $category = $train_up ? "GOOD" : "NEUTRAL";
37
38 $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
39 $nb = new NaiveBayesian($nbs);
40
41 $result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " .
42 $article_id . " AND owner_uid = " . $_SESSION["uid"]);
43
44 if ($this->dbh->num_rows($result) != 0) {
45 $guid = $this->dbh->fetch_result($result, 0, "guid");
46 $title = $this->dbh->fetch_result($result, 0, "title");
47 $content = mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content")));
48 $score = $this->dbh->fetch_result($result, 0, "score");
49
50 $this->dbh->query("BEGIN");
51
52 if ($nb->untrain($guid, $content)) {
53 if ($score >= $this->score_modifier) $score -= $this->score_modifier;
54 }
55
56 $nb->train($guid, $nbs->getCategoryByName($category), $content);
57
58 if ($category == "GOOD") $score += $this->score_modifier;
59
60 $this->dbh->query("UPDATE ttrss_user_entries SET score = '$score' WHERE ref_id = $article_id AND owner_uid = " . $_SESSION["uid"]);
61
62 $nb->updateProbabilities();
63
64 $this->dbh->query("COMMIT");
65
66 }
67
68 print "$article_id :: $category";
69 }
70
71 function get_js() {
72 return file_get_contents(__DIR__ . "/init.js");
73 }
74
75 function get_prefs_js() {
76 return file_get_contents(__DIR__ . "/init.js");
77 }
78
79 function hook_article_button($line) {
80 return "<img src=\"plugins/af_sort_bayes/thumb_up.png\"
81 style=\"cursor : pointer\" style=\"cursor : pointer\"
82 onclick=\"bayesTrain(".$line["id"].", true)\"
83 class='tagsPic' title='".__('+1')."'>" .
84 "<img src=\"plugins/af_sort_bayes/thumb_down.png\"
85 style=\"cursor : pointer\" style=\"cursor : pointer\"
86 onclick=\"bayesTrain(".$line["id"].", false)\"
87 class='tagsPic' title='".__('-1')."'>";
88
89 }
90
91 function init_database() {
92 $prefix = $this->sql_prefix;
93
94 // TODO there probably should be a way for plugins to determine their schema version to upgrade tables
95
96 /*$this->dbh->query("DROP TABLE IF EXISTS ${prefix}_wordfreqs", false);
97 $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_references", false);
98 $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_categories", false);*/
99
100 $this->dbh->query("BEGIN");
101
102 // PG only for the time being
103
104 if (DB_TYPE == "mysql") {
105
106 $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories (
107 id INTEGER NOT NULL PRIMARY KEY auto_increment,
108 category varchar(100) NOT NULL DEFAULT '',
109 probability DOUBLE NOT NULL DEFAULT '0',
110 owner_uid INTEGER NOT NULL,
111 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
112 word_count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB");
113
114 $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_references (
115 id INTEGER NOT NULL PRIMARY KEY auto_increment,
116 document_id VARCHAR(255) NOT NULL,
117 category_id INTEGER NOT NULL,
118 FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
119 owner_uid INTEGER NOT NULL,
120 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
121 content text NOT NULL) ENGINE=InnoDB");
122
123 $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs (
124 word varchar(100) NOT NULL DEFAULT '',
125 category_id INTEGER NOT NULL,
126 FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
127 owner_uid INTEGER NOT NULL,
128 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
129 count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB");
130
131
132 } else {
133 $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories (
134 id SERIAL NOT NULL PRIMARY KEY,
135 category varchar(100) NOT NULL DEFAULT '',
136 probability DOUBLE NOT NULL DEFAULT '0',
137 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
138 word_count BIGINT NOT NULL DEFAULT '0')");
139
140 $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_references (
141 id SERIAL NOT NULL PRIMARY KEY,
142 document_id VARCHAR(255) NOT NULL,
143 category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
144 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
145 content text NOT NULL)");
146
147 $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs (
148 word varchar(100) NOT NULL DEFAULT '',
149 category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
150 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
151 count BIGINT NOT NULL DEFAULT '0')");
152 }
153
154 $owner_uid = @$_SESSION["uid"];
155
156 if ($owner_uid) {
157 $result = $this->dbh->query("SELECT id FROM ${prefix}_categories WHERE owner_uid = $owner_uid LIMIT 1");
158
159 if ($this->dbh->num_rows($result) == 0) {
160 $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('GOOD', $owner_uid)");
161 $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('NEUTRAL', $owner_uid)");
162 }
163 }
164
165 $this->dbh->query("COMMIT");
166 }
167
168 function hook_prefs_tab($args) {
169 if ($args != "prefPrefs") return;
170
171 print "<div dojoType=\"dijit.layout.AccordionPane\" title=\"".__('Bayesian classifier (af_sort_bayes)')."\">";
172
173 $result = $this->dbh->query("SELECT category, probability, word_count FROM {$this->sql_prefix}_categories WHERE owner_uid = " . $_SESSION["uid"]);
174
175 print "<table>";
176 print "<tr><th>Category</th><th>Probability</th><th>Word count</th></tr>";
177
178 while ($line = $this->dbh->fetch_assoc($result)) {
179 print "<tr>";
180 foreach ($line as $k => $v) {
181 if ($k == "probability") $v = sprintf("%.3f", $v);
182
183 print "<td>$v</td>";
184 }
185 print "</tr>";
186 }
187
188 print "</table>";
189
190 print "<button dojoType=\"dijit.form.Button\" onclick=\"return bayesClearDatabase()\">".
191 __('Clear database')."</button> ";
192
193 //
194
195 print "</div>";
196 }
197
198 function hook_article_filter($article) {
199 $owner_uid = $article["owner_uid"];
200
201 $nbs = new NaiveBayesianStorage($owner_uid);
202 $nb = new NaiveBayesian($nbs);
203
204 $categories = $nbs->getCategories();
205
206 if (count($categories) > 0) {
207
208 $count_neutral = 0;
209 $count_good = 0;
210 $id_good = 0;
211 $id_neutral = 0;
212
213 foreach ($categories as $id => $cat) {
214 if ($cat["category"] == "GOOD") {
215 $id_good = $id;
216 $count_good += $cat["word_count"];
217 } else if ($cat["category"] == "NEUTRAL") {
218 $id_neutral = $id;
219 $count_neutral += $cat["word_count"];
220 }
221 }
222
223 $dst_category = $id_neutral;
224
225 $bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"]));
226
227 if ($count_neutral >= 3000 && $count_good >= 1000) {
228 // enable automatic categorization
229
230 $result = $nb->categorize($bayes_content);
231
232 if (count($result) == 2) {
233 $prob_good = $result[$id_good];
234 $prob_neutral = $result[$id_neutral];
235
236 if ($prob_good > 0.90 && $prob_good > $prob_neutral) {
237 $dst_category = $id_good; // should we autofile as good or not? idk
238 $article["score_modifier"] += $this->score_modifier;
239 }
240 }
241 }
242
243 $nb->train($article["guid_hashed"], $dst_category, $bayes_content);
244
245 $nb->updateProbabilities();
246 }
247
248 return $article;
249
250 }
251
252 function clearDatabase() {
253 $prefix = $this->sql_prefix;
254
255 $this->dbh->query("BEGIN");
256 $this->dbh->query("DELETE FROM ${prefix}_references WHERE owner_uid = " . $_SESSION["uid"]);
257 $this->dbh->query("DELETE FROM ${prefix}_wordfreqs WHERE owner_uid = " . $_SESSION["uid"]);
258 $this->dbh->query("COMMIT");
259
260 $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
261 $nb = new NaiveBayesian($nbs);
262 $nb->updateProbabilities();
263 }
264
265 function api_version() {
266 return 2;
267 }
268
269 }
270 ?>