]> git.wh0rd.org Git - tt-rss.git/blob - plugins/af_sort_bayes/init.php
bayes: add mysql script
[tt-rss.git] / plugins / af_sort_bayes / init.php
1 <?php
2
3 class Af_Sort_Bayes extends Plugin {
4
5         private $host;
6         private $filters = array();
7         private $dbh;
8         private $score_modifier = 50;
9
10         function about() {
11                 return array(1.0,
12                         "Bayesian classifier for tt-rss (WIP)",
13                         "fox");
14         }
15
16         function init($host) {
17                 require_once __DIR__ . "/lib/class.naivebayesian.php";
18                 require_once __DIR__ . "/lib/class.naivebayesianstorage.php";
19
20                 $this->host = $host;
21                 $this->dbh = Db::get();
22
23                 $this->init_database();
24
25                 $host->add_hook($host::HOOK_ARTICLE_FILTER, $this);
26                 $host->add_hook($host::HOOK_PREFS_TAB, $this);
27                 $host->add_hook($host::HOOK_ARTICLE_BUTTON, $this);
28
29         }
30
31         function trainArticle() {
32                 $article_id = (int) $_REQUEST["article_id"];
33                 $train_up = sql_bool_to_bool($_REQUEST["train_up"]);
34
35                 $category = $train_up ? "GOOD" : "NEUTRAL";
36
37                 $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
38                 $nb = new NaiveBayesian($nbs);
39
40                 $result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " .
41                         $article_id . " AND owner_uid = " . $_SESSION["uid"]);
42
43                 if ($this->dbh->num_rows($result) != 0) {
44                         $guid = $this->dbh->fetch_result($result, 0, "guid");
45                         $title = $this->dbh->fetch_result($result, 0, "title");
46                         $content = mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content")));
47                         $score = $this->dbh->fetch_result($result, 0, "score");
48
49                         $this->dbh->query("BEGIN");
50
51                         if ($nb->untrain($guid, $content)) {
52                                 if ($score >= $this->score_modifier) $score -= $this->score_modifier;
53                         }
54
55                         $nb->train($guid, $nbs->getCategoryByName($category), $content);
56
57                         if ($category == "GOOD") $score += $this->score_modifier;
58
59                         $this->dbh->query("UPDATE ttrss_user_entries SET score = '$score' WHERE ref_id = $article_id AND owner_uid = " . $_SESSION["uid"]);
60
61                         $nb->updateProbabilities();
62
63                         $this->dbh->query("COMMIT");
64
65                 }
66
67                 print "$article_id :: $category";
68         }
69
70         function get_js() {
71                 return file_get_contents(__DIR__ . "/init.js");
72         }
73
74         function hook_article_button($line) {
75                 return "<img src=\"plugins/af_sort_bayes/thumb_up.png\"
76                         style=\"cursor : pointer\" style=\"cursor : pointer\"
77                         onclick=\"bayesTrain(".$line["id"].", true)\"
78                         class='tagsPic' title='".__('+1')."'>" .
79                 "<img src=\"plugins/af_sort_bayes/thumb_down.png\"
80                         style=\"cursor : pointer\" style=\"cursor : pointer\"
81                         onclick=\"bayesTrain(".$line["id"].", false)\"
82                         class='tagsPic' title='".__('-1')."'>";
83
84         }
85
86         function init_database() {
87                 $prefix = "ttrss_plugin_af_sort_bayes";
88
89                 // TODO there probably should be a way for plugins to determine their schema version to upgrade tables
90
91                 /*$this->dbh->query("DROP TABLE IF EXISTS ${prefix}_wordfreqs", false);
92                 $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_references", false);
93                 $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_categories", false);*/
94
95                 $this->dbh->query("BEGIN");
96
97                 // PG only for the time being
98
99                 if (DB_TYPE == "mysql") {
100
101                         $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories (
102                                 id INTEGER NOT NULL PRIMARY KEY auto_increment,
103                                 category varchar(100) NOT NULL DEFAULT '',
104                                 probability DOUBLE NOT NULL DEFAULT '0',
105                                 owner_uid INTEGER NOT NULL,
106                                 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
107                                 word_count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB");
108
109                         $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_references (
110                                 id INTEGER NOT NULL PRIMARY KEY auto_increment,
111                                 document_id VARCHAR(255) NOT NULL,
112                                 category_id INTEGER NOT NULL,
113                                 FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
114                                 owner_uid INTEGER NOT NULL,
115                                 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
116                                 content text NOT NULL) ENGINE=InnoDB");
117
118                         $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs (
119                                 word varchar(100) NOT NULL DEFAULT '',
120                                 category_id INTEGER NOT NULL,
121                                 FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
122                                 owner_uid INTEGER NOT NULL,
123                                 FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE,
124                                 count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB");
125
126
127                 } else {
128                         $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories (
129                                 id SERIAL NOT NULL PRIMARY KEY,
130                                 category varchar(100) NOT NULL DEFAULT '',
131                                 probability DOUBLE NOT NULL DEFAULT '0',
132                                 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
133                                 word_count BIGINT NOT NULL DEFAULT '0')");
134
135                         $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_references (
136                                 id SERIAL NOT NULL PRIMARY KEY,
137                                 document_id VARCHAR(255) NOT NULL,
138                                 category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
139                                 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
140                                 content text NOT NULL)");
141
142                         $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs (
143                                 word varchar(100) NOT NULL DEFAULT '',
144                                 category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE,
145                                 owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE,
146                                 count BIGINT NOT NULL DEFAULT '0')");
147                 }
148
149                 $owner_uid = @$_SESSION["uid"];
150
151                 if ($owner_uid) {
152                         $result = $this->dbh->query("SELECT id FROM ${prefix}_categories WHERE owner_uid = $owner_uid LIMIT 1");
153
154                         if ($this->dbh->num_rows($result) == 0) {
155                                 $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('GOOD', $owner_uid)");
156                                 $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('NEUTRAL', $owner_uid)");
157                         }
158                 }
159
160                 $this->dbh->query("COMMIT");
161         }
162
163         function hook_prefs_tab($args) {
164                 if ($args != "prefPrefs") return;
165
166                 print "<div dojoType=\"dijit.layout.AccordionPane\" title=\"".__('af_sort_bayes')."\">";
167
168                 //
169
170                 print "</div>";
171         }
172
173         function hook_article_filter($article) {
174                 $owner_uid = $article["owner_uid"];
175
176                 $nbs = new NaiveBayesianStorage($owner_uid);
177                 $nb = new NaiveBayesian($nbs);
178
179                 $categories = $nbs->getCategories();
180
181                 if (count($categories) > 0) {
182
183                         $count_neutral = 0;
184                         $count_good = 0;
185                         $id_good = 0;
186                         $id_neutral = 0;
187
188                         foreach ($categories as $id => $cat) {
189                                 if ($cat["category"] == "GOOD") {
190                                         $id_good = $id;
191                                         $count_good += $cat["word_count"];
192                                 } else if ($cat["category"] == "NEUTRAL") {
193                                         $id_neutral = $id;
194                                         $count_neutral += $cat["word_count"];
195                                 }
196                         }
197
198                         $dst_category = $id_neutral;
199
200                         $bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"]));
201
202                         if ($count_neutral >= 3000 && $count_good >= 1000) {
203                                 // enable automatic categorization
204
205                                 $result = $nb->categorize($bayes_content);
206
207                                 if (count($result) == 2) {
208                                         $prob_good = $result[$id_good];
209                                         $prob_neutral = $result[$id_neutral];
210
211                                         if ($prob_good > 0.90 && $prob_good > $prob_neutral) {
212                                                 $dst_category = $id_good; // should we autofile as good or not? idk
213                                                 $article["score_modifier"] += $this->score_modifier;
214                                         }
215                                 }
216                         }
217
218                         $nb->train($article["guid_hashed"], $dst_category, $bayes_content);
219
220                         $nb->updateProbabilities();
221                 }
222
223                 return $article;
224
225         }
226
227         function api_version() {
228                 return 2;
229         }
230
231 }
232 ?>