]>
Commit | Line | Data |
---|---|---|
853cc128 AD |
1 | <?php |
2 | ||
3 | class Af_Sort_Bayes extends Plugin { | |
4 | ||
5 | private $host; | |
6 | private $filters = array(); | |
7 | private $dbh; | |
59e83455 | 8 | private $score_modifier = 50; |
3c43def9 | 9 | private $sql_prefix = "ttrss_plugin_af_sort_bayes"; |
853cc128 AD |
10 | |
11 | function about() { | |
12 | return array(1.0, | |
13 | "Bayesian classifier for tt-rss (WIP)", | |
14 | "fox"); | |
15 | } | |
16 | ||
17 | function init($host) { | |
18 | require_once __DIR__ . "/lib/class.naivebayesian.php"; | |
3dcd00e4 | 19 | require_once __DIR__ . "/lib/class.naivebayesian_ngram.php"; |
853cc128 AD |
20 | require_once __DIR__ . "/lib/class.naivebayesianstorage.php"; |
21 | ||
22 | $this->host = $host; | |
23 | $this->dbh = Db::get(); | |
24 | ||
25 | $this->init_database(); | |
26 | ||
27 | $host->add_hook($host::HOOK_ARTICLE_FILTER, $this); | |
28 | $host->add_hook($host::HOOK_PREFS_TAB, $this); | |
29 | $host->add_hook($host::HOOK_ARTICLE_BUTTON, $this); | |
30 | ||
31 | } | |
32 | ||
33 | function trainArticle() { | |
34 | $article_id = (int) $_REQUEST["article_id"]; | |
35 | $train_up = sql_bool_to_bool($_REQUEST["train_up"]); | |
36 | ||
08cfcba4 AD |
37 | //$category = $train_up ? "GOOD" : "UGLY"; |
38 | $dst_category = "UGLY"; | |
853cc128 | 39 | |
59e83455 | 40 | $nbs = new NaiveBayesianStorage($_SESSION["uid"]); |
e0ae194a | 41 | $nb = new NaiveBayesian($nbs); |
59e83455 AD |
42 | |
43 | $result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " . | |
44 | $article_id . " AND owner_uid = " . $_SESSION["uid"]); | |
45 | ||
46 | if ($this->dbh->num_rows($result) != 0) { | |
47 | $guid = $this->dbh->fetch_result($result, 0, "guid"); | |
48 | $title = $this->dbh->fetch_result($result, 0, "title"); | |
49 | $content = mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content"))); | |
50 | $score = $this->dbh->fetch_result($result, 0, "score"); | |
51 | ||
52 | $this->dbh->query("BEGIN"); | |
53 | ||
08cfcba4 AD |
54 | $ref = $nbs->getReference($guid, false); |
55 | ||
56 | if (isset($ref['category_id'])) { | |
57 | $current_category = $nbs->getCategoryById($ref['category_id']); | |
58 | } else { | |
59 | $current_category = "UGLY"; | |
59e83455 AD |
60 | } |
61 | ||
08cfcba4 AD |
62 | // set score to fixed value for now |
63 | ||
64 | if ($train_up) { | |
65 | switch ($current_category) { | |
66 | case "UGLY": | |
67 | $dst_category = "GOOD"; | |
68 | $score = $this->score_modifier; | |
69 | break; | |
70 | case "BAD": | |
71 | $dst_category = "UGLY"; | |
72 | $score = 0; | |
73 | break; | |
74 | case "GOOD": | |
75 | $dst_category = "GOOD"; | |
76 | break; | |
77 | } | |
78 | } else { | |
79 | switch ($current_category) { | |
80 | case "UGLY": | |
81 | $dst_category = "BAD"; | |
82 | $score = -$this->score_modifier; | |
83 | break; | |
84 | case "BAD": | |
85 | $dst_category = "BAD"; | |
86 | break; | |
87 | case "GOOD": | |
88 | $dst_category = "UGLY"; | |
89 | $score = -$this->score_modifier; | |
90 | break; | |
91 | } | |
92 | } | |
59e83455 | 93 | |
08cfcba4 AD |
94 | $nb->untrain($guid, $content); |
95 | $nb->train($guid, $nbs->getCategoryByName($dst_category), $content); | |
59e83455 AD |
96 | |
97 | $this->dbh->query("UPDATE ttrss_user_entries SET score = '$score' WHERE ref_id = $article_id AND owner_uid = " . $_SESSION["uid"]); | |
98 | ||
99 | $nb->updateProbabilities(); | |
100 | ||
101 | $this->dbh->query("COMMIT"); | |
102 | ||
103 | } | |
104 | ||
08cfcba4 | 105 | print "$article_id :: $dst_category :: $score"; |
853cc128 AD |
106 | } |
107 | ||
108 | function get_js() { | |
109 | return file_get_contents(__DIR__ . "/init.js"); | |
110 | } | |
111 | ||
3c43def9 AD |
112 | function get_prefs_js() { |
113 | return file_get_contents(__DIR__ . "/init.js"); | |
114 | } | |
115 | ||
853cc128 AD |
116 | function hook_article_button($line) { |
117 | return "<img src=\"plugins/af_sort_bayes/thumb_up.png\" | |
118 | style=\"cursor : pointer\" style=\"cursor : pointer\" | |
119 | onclick=\"bayesTrain(".$line["id"].", true)\" | |
120 | class='tagsPic' title='".__('+1')."'>" . | |
121 | "<img src=\"plugins/af_sort_bayes/thumb_down.png\" | |
122 | style=\"cursor : pointer\" style=\"cursor : pointer\" | |
123 | onclick=\"bayesTrain(".$line["id"].", false)\" | |
124 | class='tagsPic' title='".__('-1')."'>"; | |
125 | ||
126 | } | |
127 | ||
128 | function init_database() { | |
3c43def9 | 129 | $prefix = $this->sql_prefix; |
853cc128 | 130 | |
59e83455 AD |
131 | // TODO there probably should be a way for plugins to determine their schema version to upgrade tables |
132 | ||
133 | /*$this->dbh->query("DROP TABLE IF EXISTS ${prefix}_wordfreqs", false); | |
134 | $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_references", false); | |
135 | $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_categories", false);*/ | |
853cc128 AD |
136 | |
137 | $this->dbh->query("BEGIN"); | |
138 | ||
139 | // PG only for the time being | |
140 | ||
b02e8bc8 AD |
141 | if (DB_TYPE == "mysql") { |
142 | ||
143 | $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories ( | |
144 | id INTEGER NOT NULL PRIMARY KEY auto_increment, | |
145 | category varchar(100) NOT NULL DEFAULT '', | |
146 | probability DOUBLE NOT NULL DEFAULT '0', | |
147 | owner_uid INTEGER NOT NULL, | |
148 | FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE, | |
149 | word_count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB"); | |
150 | ||
151 | $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_references ( | |
152 | id INTEGER NOT NULL PRIMARY KEY auto_increment, | |
153 | document_id VARCHAR(255) NOT NULL, | |
154 | category_id INTEGER NOT NULL, | |
155 | FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE, | |
156 | owner_uid INTEGER NOT NULL, | |
157 | FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE, | |
158 | content text NOT NULL) ENGINE=InnoDB"); | |
159 | ||
160 | $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs ( | |
161 | word varchar(100) NOT NULL DEFAULT '', | |
162 | category_id INTEGER NOT NULL, | |
163 | FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE, | |
164 | owner_uid INTEGER NOT NULL, | |
165 | FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE, | |
166 | count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB"); | |
167 | ||
168 | ||
169 | } else { | |
170 | $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories ( | |
171 | id SERIAL NOT NULL PRIMARY KEY, | |
172 | category varchar(100) NOT NULL DEFAULT '', | |
173 | probability DOUBLE NOT NULL DEFAULT '0', | |
174 | owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE, | |
175 | word_count BIGINT NOT NULL DEFAULT '0')"); | |
176 | ||
177 | $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_references ( | |
178 | id SERIAL NOT NULL PRIMARY KEY, | |
179 | document_id VARCHAR(255) NOT NULL, | |
180 | category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE, | |
181 | owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE, | |
182 | content text NOT NULL)"); | |
183 | ||
184 | $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs ( | |
185 | word varchar(100) NOT NULL DEFAULT '', | |
186 | category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE, | |
187 | owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE, | |
188 | count BIGINT NOT NULL DEFAULT '0')"); | |
189 | } | |
853cc128 | 190 | |
59e83455 AD |
191 | $owner_uid = @$_SESSION["uid"]; |
192 | ||
193 | if ($owner_uid) { | |
194 | $result = $this->dbh->query("SELECT id FROM ${prefix}_categories WHERE owner_uid = $owner_uid LIMIT 1"); | |
195 | ||
196 | if ($this->dbh->num_rows($result) == 0) { | |
197 | $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('GOOD', $owner_uid)"); | |
08cfcba4 AD |
198 | $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('BAD', $owner_uid)"); |
199 | $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('UGLY', $owner_uid)"); | |
59e83455 AD |
200 | } |
201 | } | |
202 | ||
853cc128 AD |
203 | $this->dbh->query("COMMIT"); |
204 | } | |
205 | ||
206 | function hook_prefs_tab($args) { | |
207 | if ($args != "prefPrefs") return; | |
208 | ||
3c43def9 AD |
209 | print "<div dojoType=\"dijit.layout.AccordionPane\" title=\"".__('Bayesian classifier (af_sort_bayes)')."\">"; |
210 | ||
4947c02e AD |
211 | $result = $this->dbh->query("SELECT category, probability, word_count, |
212 | (SELECT COUNT(id) FROM {$this->sql_prefix}_references WHERE | |
213 | category_id = {$this->sql_prefix}_categories.id) as doc_count | |
214 | FROM {$this->sql_prefix}_categories WHERE owner_uid = " . $_SESSION["uid"]); | |
3c43def9 AD |
215 | |
216 | print "<table>"; | |
4947c02e | 217 | print "<tr><th>Category</th><th>Probability</th><th>Word count</th><th>Article count</th></tr>"; |
3c43def9 AD |
218 | |
219 | while ($line = $this->dbh->fetch_assoc($result)) { | |
220 | print "<tr>"; | |
221 | foreach ($line as $k => $v) { | |
222 | if ($k == "probability") $v = sprintf("%.3f", $v); | |
223 | ||
224 | print "<td>$v</td>"; | |
225 | } | |
226 | print "</tr>"; | |
227 | } | |
228 | ||
229 | print "</table>"; | |
230 | ||
231 | print "<button dojoType=\"dijit.form.Button\" onclick=\"return bayesClearDatabase()\">". | |
232 | __('Clear database')."</button> "; | |
853cc128 AD |
233 | |
234 | // | |
235 | ||
236 | print "</div>"; | |
237 | } | |
238 | ||
239 | function hook_article_filter($article) { | |
240 | $owner_uid = $article["owner_uid"]; | |
241 | ||
59e83455 | 242 | $nbs = new NaiveBayesianStorage($owner_uid); |
e0ae194a | 243 | $nb = new NaiveBayesian($nbs); |
59e83455 AD |
244 | |
245 | $categories = $nbs->getCategories(); | |
246 | ||
247 | if (count($categories) > 0) { | |
248 | ||
249 | $count_neutral = 0; | |
08cfcba4 | 250 | |
59e83455 | 251 | $id_good = 0; |
08cfcba4 AD |
252 | $id_ugly = 0; |
253 | $id_bad = 0; | |
59e83455 AD |
254 | |
255 | foreach ($categories as $id => $cat) { | |
256 | if ($cat["category"] == "GOOD") { | |
257 | $id_good = $id; | |
08cfcba4 AD |
258 | } else if ($cat["category"] == "UGLY") { |
259 | $id_ugly = $id; | |
59e83455 | 260 | $count_neutral += $cat["word_count"]; |
08cfcba4 AD |
261 | } else if ($cat["category"] == "BAD") { |
262 | $id_bad = $id; | |
59e83455 AD |
263 | } |
264 | } | |
265 | ||
08cfcba4 | 266 | $dst_category = $id_ugly; |
59e83455 AD |
267 | |
268 | $bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"])); | |
269 | ||
e0ae194a | 270 | if ($count_neutral >= 10000) { |
59e83455 AD |
271 | // enable automatic categorization |
272 | ||
273 | $result = $nb->categorize($bayes_content); | |
274 | ||
e0ae194a AD |
275 | print_r($result); |
276 | ||
08cfcba4 | 277 | if (count($result) == 3) { |
59e83455 | 278 | $prob_good = $result[$id_good]; |
08cfcba4 | 279 | $prob_bad = $result[$id_bad]; |
59e83455 | 280 | |
08cfcba4 | 281 | if ($prob_good > 0.90) { |
e0ae194a | 282 | $dst_category = $id_good; |
59e83455 | 283 | $article["score_modifier"] += $this->score_modifier; |
08cfcba4 | 284 | } else if ($prob_bad > 0.90) { |
e0ae194a | 285 | $dst_category = $id_bad; |
08cfcba4 | 286 | $article["score_modifier"] -= $this->score_modifier; |
59e83455 AD |
287 | } |
288 | } | |
e0ae194a AD |
289 | |
290 | _debug("bayes, dst category: $dst_category"); | |
59e83455 AD |
291 | } |
292 | ||
293 | $nb->train($article["guid_hashed"], $dst_category, $bayes_content); | |
294 | ||
295 | $nb->updateProbabilities(); | |
296 | } | |
853cc128 AD |
297 | |
298 | return $article; | |
299 | ||
300 | } | |
301 | ||
3c43def9 AD |
302 | function clearDatabase() { |
303 | $prefix = $this->sql_prefix; | |
304 | ||
305 | $this->dbh->query("BEGIN"); | |
306 | $this->dbh->query("DELETE FROM ${prefix}_references WHERE owner_uid = " . $_SESSION["uid"]); | |
307 | $this->dbh->query("DELETE FROM ${prefix}_wordfreqs WHERE owner_uid = " . $_SESSION["uid"]); | |
308 | $this->dbh->query("COMMIT"); | |
309 | ||
310 | $nbs = new NaiveBayesianStorage($_SESSION["uid"]); | |
e0ae194a | 311 | $nb = new NaiveBayesian($nbs); |
3c43def9 AD |
312 | $nb->updateProbabilities(); |
313 | } | |
314 | ||
853cc128 AD |
315 | function api_version() { |
316 | return 2; | |
317 | } | |
318 | ||
319 | } | |
320 | ?> |