]>
Commit | Line | Data |
---|---|---|
853cc128 AD |
1 | <?php |
2 | ||
3 | class Af_Sort_Bayes extends Plugin { | |
4 | ||
5 | private $host; | |
6 | private $filters = array(); | |
7 | private $dbh; | |
59e83455 | 8 | private $score_modifier = 50; |
853cc128 AD |
9 | |
10 | function about() { | |
11 | return array(1.0, | |
12 | "Bayesian classifier for tt-rss (WIP)", | |
13 | "fox"); | |
14 | } | |
15 | ||
16 | function init($host) { | |
17 | require_once __DIR__ . "/lib/class.naivebayesian.php"; | |
18 | require_once __DIR__ . "/lib/class.naivebayesianstorage.php"; | |
19 | ||
20 | $this->host = $host; | |
21 | $this->dbh = Db::get(); | |
22 | ||
23 | $this->init_database(); | |
24 | ||
25 | $host->add_hook($host::HOOK_ARTICLE_FILTER, $this); | |
26 | $host->add_hook($host::HOOK_PREFS_TAB, $this); | |
27 | $host->add_hook($host::HOOK_ARTICLE_BUTTON, $this); | |
28 | ||
29 | } | |
30 | ||
31 | function trainArticle() { | |
32 | $article_id = (int) $_REQUEST["article_id"]; | |
33 | $train_up = sql_bool_to_bool($_REQUEST["train_up"]); | |
34 | ||
59e83455 | 35 | $category = $train_up ? "GOOD" : "NEUTRAL"; |
853cc128 | 36 | |
59e83455 AD |
37 | $nbs = new NaiveBayesianStorage($_SESSION["uid"]); |
38 | $nb = new NaiveBayesian($nbs); | |
39 | ||
40 | $result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " . | |
41 | $article_id . " AND owner_uid = " . $_SESSION["uid"]); | |
42 | ||
43 | if ($this->dbh->num_rows($result) != 0) { | |
44 | $guid = $this->dbh->fetch_result($result, 0, "guid"); | |
45 | $title = $this->dbh->fetch_result($result, 0, "title"); | |
46 | $content = mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content"))); | |
47 | $score = $this->dbh->fetch_result($result, 0, "score"); | |
48 | ||
49 | $this->dbh->query("BEGIN"); | |
50 | ||
51 | if ($nb->untrain($guid, $content)) { | |
52 | if ($score >= $this->score_modifier) $score -= $this->score_modifier; | |
53 | } | |
54 | ||
55 | $nb->train($guid, $nbs->getCategoryByName($category), $content); | |
56 | ||
57 | if ($category == "GOOD") $score += $this->score_modifier; | |
58 | ||
59 | $this->dbh->query("UPDATE ttrss_user_entries SET score = '$score' WHERE ref_id = $article_id AND owner_uid = " . $_SESSION["uid"]); | |
60 | ||
61 | $nb->updateProbabilities(); | |
62 | ||
63 | $this->dbh->query("COMMIT"); | |
64 | ||
65 | } | |
66 | ||
67 | print "$article_id :: $category"; | |
853cc128 AD |
68 | } |
69 | ||
70 | function get_js() { | |
71 | return file_get_contents(__DIR__ . "/init.js"); | |
72 | } | |
73 | ||
74 | function hook_article_button($line) { | |
75 | return "<img src=\"plugins/af_sort_bayes/thumb_up.png\" | |
76 | style=\"cursor : pointer\" style=\"cursor : pointer\" | |
77 | onclick=\"bayesTrain(".$line["id"].", true)\" | |
78 | class='tagsPic' title='".__('+1')."'>" . | |
79 | "<img src=\"plugins/af_sort_bayes/thumb_down.png\" | |
80 | style=\"cursor : pointer\" style=\"cursor : pointer\" | |
81 | onclick=\"bayesTrain(".$line["id"].", false)\" | |
82 | class='tagsPic' title='".__('-1')."'>"; | |
83 | ||
84 | } | |
85 | ||
86 | function init_database() { | |
87 | $prefix = "ttrss_plugin_af_sort_bayes"; | |
88 | ||
59e83455 AD |
89 | // TODO there probably should be a way for plugins to determine their schema version to upgrade tables |
90 | ||
91 | /*$this->dbh->query("DROP TABLE IF EXISTS ${prefix}_wordfreqs", false); | |
92 | $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_references", false); | |
93 | $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_categories", false);*/ | |
853cc128 AD |
94 | |
95 | $this->dbh->query("BEGIN"); | |
96 | ||
97 | // PG only for the time being | |
98 | ||
99 | $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories ( | |
100 | id SERIAL NOT NULL PRIMARY KEY, | |
101 | category varchar(100) NOT NULL DEFAULT '', | |
102 | probability DOUBLE PRECISION NOT NULL DEFAULT '0', | |
103 | owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE, | |
104 | word_count BIGINT NOT NULL DEFAULT '0')"); | |
105 | ||
59e83455 | 106 | $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_references ( |
853cc128 | 107 | id SERIAL NOT NULL PRIMARY KEY, |
59e83455 | 108 | document_id VARCHAR(255) NOT NULL, |
853cc128 AD |
109 | category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE, |
110 | owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE, | |
111 | content text NOT NULL)"); | |
112 | ||
113 | $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs ( | |
114 | word varchar(100) NOT NULL DEFAULT '', | |
115 | category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE, | |
116 | owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE, | |
117 | count BIGINT NOT NULL DEFAULT '0')"); | |
118 | ||
59e83455 AD |
119 | $owner_uid = @$_SESSION["uid"]; |
120 | ||
121 | if ($owner_uid) { | |
122 | $result = $this->dbh->query("SELECT id FROM ${prefix}_categories WHERE owner_uid = $owner_uid LIMIT 1"); | |
123 | ||
124 | if ($this->dbh->num_rows($result) == 0) { | |
125 | $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('GOOD', $owner_uid)"); | |
126 | $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('NEUTRAL', $owner_uid)"); | |
127 | } | |
128 | } | |
129 | ||
853cc128 AD |
130 | $this->dbh->query("COMMIT"); |
131 | } | |
132 | ||
133 | function hook_prefs_tab($args) { | |
134 | if ($args != "prefPrefs") return; | |
135 | ||
136 | print "<div dojoType=\"dijit.layout.AccordionPane\" title=\"".__('af_sort_bayes')."\">"; | |
137 | ||
138 | // | |
139 | ||
140 | print "</div>"; | |
141 | } | |
142 | ||
143 | function hook_article_filter($article) { | |
144 | $owner_uid = $article["owner_uid"]; | |
145 | ||
59e83455 AD |
146 | $nbs = new NaiveBayesianStorage($owner_uid); |
147 | $nb = new NaiveBayesian($nbs); | |
148 | ||
149 | $categories = $nbs->getCategories(); | |
150 | ||
151 | if (count($categories) > 0) { | |
152 | ||
153 | $count_neutral = 0; | |
154 | $count_good = 0; | |
155 | $id_good = 0; | |
156 | $id_neutral = 0; | |
157 | ||
158 | foreach ($categories as $id => $cat) { | |
159 | if ($cat["category"] == "GOOD") { | |
160 | $id_good = $id; | |
161 | $count_good += $cat["word_count"]; | |
162 | } else if ($cat["category"] == "NEUTRAL") { | |
163 | $id_neutral = $id; | |
164 | $count_neutral += $cat["word_count"]; | |
165 | } | |
166 | } | |
167 | ||
168 | $dst_category = $id_neutral; | |
169 | ||
170 | $bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"])); | |
171 | ||
172 | if ($count_neutral >= 3000 && $count_good >= 1000) { | |
173 | // enable automatic categorization | |
174 | ||
175 | $result = $nb->categorize($bayes_content); | |
176 | ||
177 | if (count($result) == 2) { | |
178 | $prob_good = $result[$id_good]; | |
179 | $prob_neutral = $result[$id_neutral]; | |
180 | ||
181 | if ($prob_good > 0.90 && $prob_good > $prob_neutral) { | |
182 | //$dst_category = $id_good; // should we autofile as good or not? idk | |
183 | $article["score_modifier"] += $this->score_modifier; | |
184 | } | |
185 | } | |
186 | } | |
187 | ||
188 | $nb->train($article["guid_hashed"], $dst_category, $bayes_content); | |
189 | ||
190 | $nb->updateProbabilities(); | |
191 | } | |
853cc128 AD |
192 | |
193 | return $article; | |
194 | ||
195 | } | |
196 | ||
197 | function api_version() { | |
198 | return 2; | |
199 | } | |
200 | ||
201 | } | |
202 | ?> |