From 132e42a9a7390eb1490f958db686d8655f171782 Mon Sep 17 00:00:00 2001
From: Andrew Dolgov <noreply@fakecake.org>
Date: Thu, 18 Jun 2015 19:02:39 +0300
Subject: [PATCH] limit maximum data length for bayesian processing

---
 plugins/af_sort_bayes/init.php                           | 8 +++++---
 plugins/af_sort_bayes/lib/class.naivebayesianstorage.php | 4 +++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/plugins/af_sort_bayes/init.php b/plugins/af_sort_bayes/init.php
index 51867f08..6599baa0 100644
--- a/plugins/af_sort_bayes/init.php
+++ b/plugins/af_sort_bayes/init.php
@@ -8,6 +8,7 @@ class Af_Sort_Bayes extends Plugin {
 	private $score_modifier = 50;
 	private $sql_prefix = "ttrss_plugin_af_sort_bayes";
 	private $auto_categorize_threshold = 10000;
+	private $max_document_length = 3000; // classifier can't rescale output for very long strings apparently
 
 	function about() {
 		return array(1.0,
@@ -47,7 +48,7 @@ class Af_Sort_Bayes extends Plugin {
 		if ($this->dbh->num_rows($result) != 0) {
 			$guid = $this->dbh->fetch_result($result, 0, "guid");
 			$title = $this->dbh->fetch_result($result, 0, "title");
-			$content = mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content")));
+			$content = mb_substr(mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content"))), 0, $this->max_document_length);
 			$score = $this->dbh->fetch_result($result, 0, "score");
 
 			$this->dbh->query("BEGIN");
@@ -302,7 +303,7 @@ class Af_Sort_Bayes extends Plugin {
 
 			$dst_category = $id_ugly;
 
-			$bayes_content = mb_strtolower($article["title"] . " " . strip_tags($article["content"]));
+			$bayes_content = mb_substr(mb_strtolower($article["title"] . " " . strip_tags($article["content"])), 0, $this->max_document_length);
 
 			if ($count_neutral >= $this->auto_categorize_threshold) {
 				// enable automatic categorization
@@ -358,7 +359,8 @@ class Af_Sort_Bayes extends Plugin {
 		if ($this->dbh->num_rows($result) != 0) {
 			$guid = $this->dbh->fetch_result($result, 0, "guid");
 			$title = $this->dbh->fetch_result($result, 0, "title");
-			$content = mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content")));
+
+			$content = mb_substr(mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content"))), 0, $this->max_document_length);
 
 			print "<h2>" . $title . "</h2>";
 
diff --git a/plugins/af_sort_bayes/lib/class.naivebayesianstorage.php b/plugins/af_sort_bayes/lib/class.naivebayesianstorage.php
index 73c1ee4c..99db1fc7 100644
--- a/plugins/af_sort_bayes/lib/class.naivebayesianstorage.php
+++ b/plugins/af_sort_bayes/lib/class.naivebayesianstorage.php
@@ -47,6 +47,7 @@
 	class NaiveBayesianStorage {
 		var $con = null;
 		var $owner_uid = null;
+		var $max_document_length = 3000; // classifier can't rescale output for very long strings apparently
 
 		function NaiveBayesianStorage($owner_uid) {
 			$this->con = Db::get();
@@ -239,7 +240,8 @@
 					$this->con->escape_string($ref['document_id']) . "'");
 
 				if ($this->con->num_rows($rs) != 0) {
-					$ref['content'] = mb_strtolower($this->con->fetch_result($rs, 0, 'title') . ' ' . strip_tags($this->con->fetch_result($rs, 0, 'content')));
+					$ref['content'] = mb_substr(mb_strtolower($this->con->fetch_result($rs, 0, 'title') . ' ' . strip_tags($this->con->fetch_result($rs, 0, 'content'))), 0,
+					$this->max_document_length);
 				}
 			}
 
-- 
2.39.5