]> git.wh0rd.org - tt-rss.git/blobdiff - plugins/af_sort_bayes/init.php
add basic classifier information window
[tt-rss.git] / plugins / af_sort_bayes / init.php
index 5daaca6dc7e9a33788a9a6793eac76e763b2a6d6..2293ea4905789548a639e0e26cbd50ad8db653a8 100644 (file)
@@ -7,6 +7,7 @@ class Af_Sort_Bayes extends Plugin {
        private $dbh;
        private $score_modifier = 50;
        private $sql_prefix = "ttrss_plugin_af_sort_bayes";
+       private $auto_categorize_threshold = 10000;
 
        function about() {
                return array(1.0,
@@ -16,7 +17,7 @@ class Af_Sort_Bayes extends Plugin {
 
        function init($host) {
                require_once __DIR__ . "/lib/class.naivebayesian.php";
-               require_once __DIR__ . "/lib/class.naivebayesian_ngram.php";
+               //require_once __DIR__ . "/lib/class.naivebayesian_ngram.php";
                require_once __DIR__ . "/lib/class.naivebayesianstorage.php";
 
                $this->host = $host;
@@ -86,7 +87,7 @@ class Af_Sort_Bayes extends Plugin {
                                                break;
                                        case "GOOD":
                                                $dst_category = "UGLY";
-                                               $score = -$this->score_modifier;
+                                               $score = 0;
                                                break;
                                }
                        }
@@ -116,12 +117,16 @@ class Af_Sort_Bayes extends Plugin {
        function hook_article_button($line) {
                return "<img src=\"plugins/af_sort_bayes/thumb_up.png\"
                        style=\"cursor : pointer\" style=\"cursor : pointer\"
-                       onclick=\"bayesTrain(".$line["id"].", true)\"
+                       onclick=\"bayesTrain(".$line["id"].", true, event)\"
                        class='tagsPic' title='".__('+1')."'>" .
                "<img src=\"plugins/af_sort_bayes/thumb_down.png\"
                        style=\"cursor : pointer\" style=\"cursor : pointer\"
-                       onclick=\"bayesTrain(".$line["id"].", false)\"
-                       class='tagsPic' title='".__('-1')."'>";
+                       onclick=\"bayesTrain(".$line["id"].", false, event)\"
+                       class='tagsPic' title='".__('-1')."'>" .
+               "<img src=\"plugins/af_sort_bayes/chart_bar.png\"
+                       style=\"cursor : pointer\" style=\"cursor : pointer\"
+                       onclick=\"bayesShow(".$line["id"].")\"
+                       class='tagsPic' title='".__('Show classifier info')."'>";
 
        }
 
@@ -201,18 +206,18 @@ class Af_Sort_Bayes extends Plugin {
                $this->dbh->query("COMMIT");
        }
 
-       function hook_prefs_tab($args) {
-               if ($args != "prefPrefs") return;
-
-               print "<div dojoType=\"dijit.layout.AccordionPane\" title=\"".__('Bayesian classifier (af_sort_bayes)')."\">";
-
+       function renderPrefsUI() {
                $result = $this->dbh->query("SELECT category, probability, word_count,
                        (SELECT COUNT(id) FROM {$this->sql_prefix}_references WHERE
                                category_id = {$this->sql_prefix}_categories.id) as doc_count
                        FROM {$this->sql_prefix}_categories WHERE owner_uid = " . $_SESSION["uid"]);
 
+               print "<h3>" . __("Statistics") . "</h3>";
+
+               print "<p>".T_sprintf("Required UGLY word count for automatic matching: %d", $this->auto_categorize_threshold)."</p>";
+
                print "<table>";
-               print "<tr><th>Category</th><th>Probability</th><th>Word count</th><th>Article count</th></tr>";
+               print "<tr><th>Category</th><th>Probability</th><th>Words</th><th>Articles</th></tr>";
 
                while ($line = $this->dbh->fetch_assoc($result)) {
                        print "<tr>";
@@ -226,10 +231,35 @@ class Af_Sort_Bayes extends Plugin {
 
                print "</table>";
 
+               print "<h3>" . __("Last matched articles") . "</h3>";
+
+               $result = $this->dbh->query("SELECT te.title, category, tf.title AS feed_title
+                       FROM ttrss_entries AS te, ttrss_user_entries AS tu, ttrss_feeds AS tf, {$this->sql_prefix}_references AS tr, {$this->sql_prefix}_categories AS tc
+                       WHERE tf.id = tu.feed_id AND tu.ref_id = te.id AND tc.id = tr.category_id AND tr.document_id = te.guid ORDER BY te.id DESC LIMIT 20");
+
+               print "<ul class=\"browseFeedList\" style=\"border-width : 1px\">";
+
+               while ($line = $this->dbh->fetch_assoc($result)) {
+                       print "<li>" . $line["category"] . ": " . $line["title"] . " (" . $line["feed_title"] . ")</li>";
+               }
+
+               print "</ul>";
+
+               print "<button dojoType=\"dijit.form.Button\" onclick=\"return bayesUpdateUI()\">".
+                       __('Refresh')."</button> ";
+
                print "<button dojoType=\"dijit.form.Button\" onclick=\"return bayesClearDatabase()\">".
                        __('Clear database')."</button> ";
 
                //
+       }
+
+       function hook_prefs_tab($args) {
+               if ($args != "prefPrefs") return;
+
+               print "<div id=\"af_sort_bayes_prefs\" dojoType=\"dijit.layout.AccordionPane\" title=\"".__('Bayesian classifier (af_sort_bayes)')."\">";
+
+               $this->renderPrefsUI();
 
                print "</div>";
        }
@@ -237,6 +267,15 @@ class Af_Sort_Bayes extends Plugin {
        function hook_article_filter($article) {
                $owner_uid = $article["owner_uid"];
 
+               // guid already includes owner_uid so we don't need to include it
+               $result = $this->dbh->query("SELECT id FROM {$this->sql_prefix}_references WHERE
+                       document_id = '" . $this->dbh->escape_string($article['guid_hashed']) . "'");
+
+               if (db_num_rows($result) != 0) {
+                       _debug("bayes: article already categorized");
+                       return $article;
+               }
+
                $nbs = new NaiveBayesianStorage($owner_uid);
                $nb = new NaiveBayesian($nbs);
 
@@ -265,12 +304,12 @@ class Af_Sort_Bayes extends Plugin {
 
                        $bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"]));
 
-                       if ($count_neutral >= 10000) {
+                       if ($count_neutral >= $this->auto_categorize_threshold) {
                                // enable automatic categorization
 
                                $result = $nb->categorize($bayes_content);
 
-                               print_r($result);
+                               //print_r($result);
 
                                if (count($result) == 3) {
                                        $prob_good = $result[$id_good];
@@ -310,6 +349,60 @@ class Af_Sort_Bayes extends Plugin {
                $nb->updateProbabilities();
        }
 
+       function showArticleStats() {
+               $article_id = (int) $_REQUEST["article_id"];
+
+               $result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " .
+                       $article_id . " AND owner_uid = " . $_SESSION["uid"]);
+
+               if ($this->dbh->num_rows($result) != 0) {
+                       $guid = $this->dbh->fetch_result($result, 0, "guid");
+                       $title = $this->dbh->fetch_result($result, 0, "title");
+                       $content = mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content")));
+
+                       print "<h2>" . $title . "</h2>";
+
+                       $nbs = new NaiveBayesianStorage($_SESSION["uid"]);
+                       $nb = new NaiveBayesian($nbs);
+
+                       $categories = $nbs->getCategories();
+
+                       $ref = $nbs->getReference($guid, false);
+
+                       $current_cat = isset($ref["category_id"]) ? $categories[$ref["category_id"]]["category"] : "N/A";
+
+                       print "<p>" . T_sprintf("Currently stored as: %s", $current_cat) . "</p>";
+
+                       $result = $nb->categorize($content);
+
+                       print "<h3>" . __("Classifier result") . "</h3>";
+
+                       print "<table>";
+                       print "<tr><th>Category</th><th>Probability</th></tr>";
+
+                       foreach ($result as $k => $v) {
+                               print "<tr>";
+                               print "<td>" . $categories[$k]["category"] . "</td>";
+                               print "<td>" . $v . "</td>";
+
+                               print "</tr>";
+                       }
+
+                       print "</table>";
+
+               } else {
+                       print_error("Article not found");
+               }
+
+               print "<div align='center'>";
+
+               print "<button dojoType=\"dijit.form.Button\" onclick=\"return dijit.byId('bayesShowDlg').hide()\">".
+                       __('Close this window')."</button>";
+
+               print "</div>";
+
+       }
+
        function api_version() {
                return 2;
        }