private $score_modifier = 50;
private $sql_prefix = "ttrss_plugin_af_sort_bayes";
private $auto_categorize_threshold = 10000;
+ private $max_document_length = 3000; // classifier can't rescale output for very long strings apparently
function about() {
return array(1.0,
function init($host) {
require_once __DIR__ . "/lib/class.naivebayesian.php";
- require_once __DIR__ . "/lib/class.naivebayesian_ngram.php";
+ //require_once __DIR__ . "/lib/class.naivebayesian_ngram.php";
require_once __DIR__ . "/lib/class.naivebayesianstorage.php";
$this->host = $host;
if ($this->dbh->num_rows($result) != 0) {
$guid = $this->dbh->fetch_result($result, 0, "guid");
$title = $this->dbh->fetch_result($result, 0, "title");
- $content = mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content")));
+ $content = mb_substr(mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content"))), 0, $this->max_document_length);
$score = $this->dbh->fetch_result($result, 0, "score");
$this->dbh->query("BEGIN");
function hook_article_button($line) {
return "<img src=\"plugins/af_sort_bayes/thumb_up.png\"
style=\"cursor : pointer\" style=\"cursor : pointer\"
- onclick=\"bayesTrain(".$line["id"].", true)\"
+ onclick=\"bayesTrain(".$line["id"].", true, event)\"
class='tagsPic' title='".__('+1')."'>" .
"<img src=\"plugins/af_sort_bayes/thumb_down.png\"
style=\"cursor : pointer\" style=\"cursor : pointer\"
- onclick=\"bayesTrain(".$line["id"].", false)\"
- class='tagsPic' title='".__('-1')."'>";
+ onclick=\"bayesTrain(".$line["id"].", false, event)\"
+ class='tagsPic' title='".__('-1')."'>" .
+ "<img src=\"plugins/af_sort_bayes/chart_bar.png\"
+ style=\"cursor : pointer\" style=\"cursor : pointer\"
+ onclick=\"bayesShow(".$line["id"].")\"
+ class='tagsPic' title='".__('Show classifier info')."'>";
}
function hook_article_filter($article) {
$owner_uid = $article["owner_uid"];
- $nbs = new NaiveBayesianStorage($owner_uid);
- $nb = new NaiveBayesian($nbs);
+ // guid already includes owner_uid so we don't need to include it
+ $result = $this->dbh->query("SELECT id FROM {$this->sql_prefix}_references WHERE
+ document_id = '" . $this->dbh->escape_string($article['guid_hashed']) . "'");
- $ref = $nbs->getReference($article["guid"], false);
+ if (db_num_rows($result) != 0) {
+ _debug("bayes: article already categorized");
+ return $article;
+ }
- if (isset($ref["category_id"])) return $article; // already categorized
+ $nbs = new NaiveBayesianStorage($owner_uid);
+ $nb = new NaiveBayesian($nbs);
$categories = $nbs->getCategories();
$dst_category = $id_ugly;
- $bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"]));
+ $bayes_content = mb_substr(mb_strtolower($article["title"] . " " . strip_tags($article["content"])), 0, $this->max_document_length);
if ($count_neutral >= $this->auto_categorize_threshold) {
// enable automatic categorization
$prob_good = $result[$id_good];
$prob_bad = $result[$id_bad];
- if ($prob_good > 0.90) {
+ if (!is_nan($prob_good) && $prob_good > 0.90) {
$dst_category = $id_good;
$article["score_modifier"] += $this->score_modifier;
- } else if ($prob_bad > 0.90) {
+ } else if (!is_nan($prob_bad) && $prob_bad > 0.90) {
$dst_category = $id_bad;
$article["score_modifier"] -= $this->score_modifier;
}
$nb->updateProbabilities();
}
+ function showArticleStats() {
+ $article_id = (int) $_REQUEST["article_id"];
+
+ $result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " .
+ $article_id . " AND owner_uid = " . $_SESSION["uid"]);
+
+ if ($this->dbh->num_rows($result) != 0) {
+ $guid = $this->dbh->fetch_result($result, 0, "guid");
+ $title = $this->dbh->fetch_result($result, 0, "title");
+
+ $content = mb_substr(mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content"))), 0, $this->max_document_length);
+
+ print "<h2>" . $title . "</h2>";
+
+ $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
+ $nb = new NaiveBayesian($nbs);
+
+ $categories = $nbs->getCategories();
+
+ $ref = $nbs->getReference($guid, false);
+
+ $current_cat = isset($ref["category_id"]) ? $categories[$ref["category_id"]]["category"] : "N/A";
+
+ print "<p>" . T_sprintf("Currently stored as: %s", $current_cat) . "</p>";
+
+ $result = $nb->categorize($content);
+
+ print "<h3>" . __("Classifier result") . "</h3>";
+
+ print "<table>";
+ print "<tr><th>Category</th><th>Probability</th></tr>";
+
+ foreach ($result as $k => $v) {
+ print "<tr>";
+ print "<td>" . $categories[$k]["category"] . "</td>";
+ print "<td>" . $v . "</td>";
+
+ print "</tr>";
+ }
+
+ print "</table>";
+
+ } else {
+ print_error("Article not found");
+ }
+
+ print "<div align='center'>";
+
+ print "<button dojoType=\"dijit.form.Button\" onclick=\"return dijit.byId('bayesShowDlg').hide()\">".
+ __('Close this window')."</button>";
+
+ print "</div>";
+
+ }
+
function api_version() {
return 2;
}