From: Andrew Dolgov Date: Thu, 18 Jun 2015 16:02:39 +0000 (+0300) Subject: limit maximum data length for bayesian processing X-Git-Tag: 16.3~275 X-Git-Url: https://git.wh0rd.org/?a=commitdiff_plain;h=132e42a9a7390eb1490f958db686d8655f171782;p=tt-rss.git limit maximum data length for bayesian processing --- diff --git a/plugins/af_sort_bayes/init.php b/plugins/af_sort_bayes/init.php index 51867f08..6599baa0 100644 --- a/plugins/af_sort_bayes/init.php +++ b/plugins/af_sort_bayes/init.php @@ -8,6 +8,7 @@ class Af_Sort_Bayes extends Plugin { private $score_modifier = 50; private $sql_prefix = "ttrss_plugin_af_sort_bayes"; private $auto_categorize_threshold = 10000; + private $max_document_length = 3000; // classifier can't rescale output for very long strings apparently function about() { return array(1.0, @@ -47,7 +48,7 @@ class Af_Sort_Bayes extends Plugin { if ($this->dbh->num_rows($result) != 0) { $guid = $this->dbh->fetch_result($result, 0, "guid"); $title = $this->dbh->fetch_result($result, 0, "title"); - $content = mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content"))); + $content = mb_substr(mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content"))), 0, $this->max_document_length); $score = $this->dbh->fetch_result($result, 0, "score"); $this->dbh->query("BEGIN"); @@ -302,7 +303,7 @@ class Af_Sort_Bayes extends Plugin { $dst_category = $id_ugly; - $bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"])); + $bayes_content = mb_substr(mb_strtolower($article["title"] . " " . strip_tags($article["content"])), 0, $this->max_document_length); if ($count_neutral >= $this->auto_categorize_threshold) { // enable automatic categorization @@ -358,7 +359,8 @@ class Af_Sort_Bayes extends Plugin { if ($this->dbh->num_rows($result) != 0) { $guid = $this->dbh->fetch_result($result, 0, "guid"); $title = $this->dbh->fetch_result($result, 0, "title"); - $content = mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content"))); + + $content = mb_substr(mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content"))), 0, $this->max_document_length); print "

" . $title . "

"; diff --git a/plugins/af_sort_bayes/lib/class.naivebayesianstorage.php b/plugins/af_sort_bayes/lib/class.naivebayesianstorage.php index 73c1ee4c..99db1fc7 100644 --- a/plugins/af_sort_bayes/lib/class.naivebayesianstorage.php +++ b/plugins/af_sort_bayes/lib/class.naivebayesianstorage.php @@ -47,6 +47,7 @@ class NaiveBayesianStorage { var $con = null; var $owner_uid = null; + var $max_document_length = 3000; // classifier can't rescale output for very long strings apparently function NaiveBayesianStorage($owner_uid) { $this->con = Db::get(); @@ -239,7 +240,8 @@ $this->con->escape_string($ref['document_id']) . "'"); if ($this->con->num_rows($rs) != 0) { - $ref['content'] = mb_strtolower($this->con->fetch_result($rs, 0, 'title') . ' ' . strip_tags($this->con->fetch_result($rs, 0, 'content'))); + $ref['content'] = mb_substr(mb_strtolower($this->con->fetch_result($rs, 0, 'title') . ' ' . strip_tags($this->con->fetch_result($rs, 0, 'content'))), 0, + $this->max_document_length); } }