From: Andrew Dolgov Date: Wed, 17 Jun 2015 15:15:41 +0000 (+0300) Subject: use ngram tokens instead of whole words for matching X-Git-Tag: 16.3~300 X-Git-Url: https://git.wh0rd.org/?a=commitdiff_plain;h=3dcd00e4c42989ec1834c77157314b315377b1a9;p=tt-rss.git use ngram tokens instead of whole words for matching --- diff --git a/plugins/af_sort_bayes/init.php b/plugins/af_sort_bayes/init.php index 7699d2c4..5419a8f6 100644 --- a/plugins/af_sort_bayes/init.php +++ b/plugins/af_sort_bayes/init.php @@ -16,6 +16,7 @@ class Af_Sort_Bayes extends Plugin { function init($host) { require_once __DIR__ . "/lib/class.naivebayesian.php"; + require_once __DIR__ . "/lib/class.naivebayesian_ngram.php"; require_once __DIR__ . "/lib/class.naivebayesianstorage.php"; $this->host = $host; @@ -36,7 +37,7 @@ class Af_Sort_Bayes extends Plugin { $category = $train_up ? "GOOD" : "NEUTRAL"; $nbs = new NaiveBayesianStorage($_SESSION["uid"]); - $nb = new NaiveBayesian($nbs); + $nb = new NaiveBayesianNgram($nbs); $result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " . $article_id . " AND owner_uid = " . $_SESSION["uid"]); @@ -202,7 +203,7 @@ class Af_Sort_Bayes extends Plugin { $owner_uid = $article["owner_uid"]; $nbs = new NaiveBayesianStorage($owner_uid); - $nb = new NaiveBayesian($nbs); + $nb = new NaiveBayesianNgram($nbs); $categories = $nbs->getCategories(); @@ -227,7 +228,7 @@ class Af_Sort_Bayes extends Plugin { $bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"])); - if ($count_neutral >= 3000 && $count_good >= 1000) { + if ($count_neutral >= 20000 && $count_good >= 10000) { // enable automatic categorization $result = $nb->categorize($bayes_content); @@ -261,7 +262,7 @@ class Af_Sort_Bayes extends Plugin { $this->dbh->query("COMMIT"); $nbs = new NaiveBayesianStorage($_SESSION["uid"]); - $nb = new NaiveBayesian($nbs); + $nb = new NaiveBayesianNgram($nbs); $nb->updateProbabilities(); }