From 3318d324105ee222a54afc94076878c12b588c24 Mon Sep 17 00:00:00 2001
From: Andrew Dolgov <noreply@fakecake.org>
Date: Fri, 19 Jun 2015 10:12:47 +0300
Subject: [PATCH] move language detection to a plugin, remove config.php
 constant

---
 config.php-dist                               |  6 ---
 include/functions.php                         |  2 -
 include/rssfuncs.php                          | 29 ++----------
 include/sanity_config.php                     |  4 +-
 plugins/af_lang_detect/init.php               | 46 +++++++++++++++++++
 .../languagedetect/LanguageDetect.php         |  0
 .../Text/LanguageDetect/Exception.php         |  0
 .../Text/LanguageDetect/ISO639.php            |  0
 .../Text/LanguageDetect/Parser.php            |  0
 .../languagedetect/data/lang.dat              |  0
 .../languagedetect/data/unicode_blocks.dat    |  0
 11 files changed, 53 insertions(+), 34 deletions(-)
 create mode 100644 plugins/af_lang_detect/init.php
 rename {lib => plugins/af_lang_detect}/languagedetect/LanguageDetect.php (100%)
 rename {lib => plugins/af_lang_detect}/languagedetect/Text/LanguageDetect/Exception.php (100%)
 rename {lib => plugins/af_lang_detect}/languagedetect/Text/LanguageDetect/ISO639.php (100%)
 rename {lib => plugins/af_lang_detect}/languagedetect/Text/LanguageDetect/Parser.php (100%)
 rename {lib => plugins/af_lang_detect}/languagedetect/data/lang.dat (100%)
 rename {lib => plugins/af_lang_detect}/languagedetect/data/unicode_blocks.dat (100%)

diff --git a/config.php-dist b/config.php-dist
index c0729b61..311b94df 100644
--- a/config.php-dist
+++ b/config.php-dist
@@ -180,12 +180,6 @@
 	define('CHECK_FOR_UPDATES', true);
 	// Check for updates automatically if running Git version
  
-	define('DETECT_ARTICLE_LANGUAGE', false);
-	// Detect article language when updating feeds, presently this is only
-	// used for hyphenation. This may increase amount of CPU time used by 
-	// update processes, disable if necessary (i.e. you are being billed
-	// for CPU time).
-
 	define('ENABLE_GZIP_OUTPUT', false);
 	// Selectively gzip output to improve wire performance. This requires
 	// PHP Zlib extension on the server.
diff --git a/include/functions.php b/include/functions.php
index edc196f3..5c10ac6a 100644
--- a/include/functions.php
+++ b/include/functions.php
@@ -99,8 +99,6 @@
 	require_once "lib/accept-to-gettext.php";
 	require_once "lib/gettext/gettext.inc";
 
-	require_once "lib/languagedetect/LanguageDetect.php";
-
 	function startup_gettext() {
 
 		# Get locale from Accept-Language header
diff --git a/include/rssfuncs.php b/include/rssfuncs.php
index 4efc843c..7a3ea740 100644
--- a/include/rssfuncs.php
+++ b/include/rssfuncs.php
@@ -443,13 +443,6 @@
 			$rss->init();
 		}
 
-		if (DETECT_ARTICLE_LANGUAGE) {
-			require_once "lib/languagedetect/LanguageDetect.php";
-
-			$lang = new Text_LanguageDetect();
-			$lang->setNameMode(2);
-		}
-
 //		print_r($rss);
 
 		$feed = db_escape_string($feed);
@@ -645,21 +638,6 @@
 					print "\n";
 				}
 
-				$entry_language = "";
-
-				if (DETECT_ARTICLE_LANGUAGE) {
-					$entry_language = $lang->detect($entry_title . " " . $entry_content, 1);
-
-					if (count($entry_language) > 0) {
-						$possible = array_keys($entry_language);
-						$entry_language = $possible[0];
-
-						_debug("detected language: $entry_language", $debug_enabled);
-					} else {
-						$entry_language = "";
-					}
-				}
-
 				$entry_comments = $item->get_comments_url();
 				$entry_author = $item->get_author();
 
@@ -695,17 +673,19 @@
 
 				_debug("done collecting data.", $debug_enabled);
 
-				$result = db_query("SELECT id, content_hash FROM ttrss_entries
+				$result = db_query("SELECT id, content_hash, lang FROM ttrss_entries
 					WHERE guid = '".db_escape_string($entry_guid)."' OR guid = '$entry_guid_hashed'");
 
 				if (db_num_rows($result) != 0) {
 					$base_entry_id = db_fetch_result($result, 0, "id");
 					$entry_stored_hash = db_fetch_result($result, 0, "content_hash");
 					$article_labels = get_article_labels($base_entry_id, $owner_uid);
+					$entry_language = db_fetch_result($result, 0, "lang");
 				} else {
 					$base_entry_id = false;
 					$entry_stored_hash = "";
 					$article_labels = array();
+					$entry_language = "";
 				}
 
 				$article = array("owner_uid" => $owner_uid, // read only
@@ -719,7 +699,7 @@
 					"author" => $entry_author,
 					"force_catchup" => false, // ugly hack for the time being
 					"score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed
-					"language" => $entry_language, // read only
+					"language" => $entry_language,
 					"feed" => array("id" => $feed,
 						"fetch_url" => $fetch_url,
 						"site_url" => $site_url)
@@ -783,6 +763,7 @@
 				$entry_force_catchup = $article["force_catchup"];
 				$article_labels = $article["labels"];
 				$entry_score_modifier = (int) $article["score_modifier"];
+				$entry_language = db_escape_string($article["language"]);
 
 				if ($debug_enabled) {
 					_debug("article labels:", $debug_enabled);
diff --git a/include/sanity_config.php b/include/sanity_config.php
index 6120e5fe..eaa1668b 100644
--- a/include/sanity_config.php
+++ b/include/sanity_config.php
@@ -1,3 +1,3 @@
-<?php # This file has been generated at:  Tue Feb 3 14:45:46 MSK 2015
+<?php # This file has been generated at:  Fri, Jun 19, 2015 10:11:43 AM
 define('GENERATED_CONFIG_CHECK', 26);
-$requred_defines = array( 'DB_TYPE', 'DB_HOST', 'DB_USER', 'DB_NAME', 'DB_PASS', 'MYSQL_CHARSET', 'SELF_URL_PATH', 'FEED_CRYPT_KEY', 'SINGLE_USER_MODE', 'SIMPLE_UPDATE_MODE', 'PHP_EXECUTABLE', 'LOCK_DIRECTORY', 'CACHE_DIR', 'ICONS_DIR', 'ICONS_URL', 'AUTH_AUTO_CREATE', 'AUTH_AUTO_LOGIN', 'FORCE_ARTICLE_PURGE', 'PUBSUBHUBBUB_HUB', 'PUBSUBHUBBUB_ENABLED', 'SPHINX_SERVER', 'SPHINX_INDEX', 'ENABLE_REGISTRATION', 'REG_NOTIFY_ADDRESS', 'REG_MAX_USERS', 'SESSION_COOKIE_LIFETIME', 'SESSION_CHECK_ADDRESS', 'SMTP_FROM_NAME', 'SMTP_FROM_ADDRESS', 'DIGEST_SUBJECT', 'SMTP_SERVER', 'SMTP_LOGIN', 'SMTP_PASSWORD', 'SMTP_SECURE', 'CHECK_FOR_UPDATES', 'DETECT_ARTICLE_LANGUAGE', 'ENABLE_GZIP_OUTPUT', 'PLUGINS', 'LOG_DESTINATION', 'CONFIG_VERSION'); ?>
+$requred_defines = array( 'DB_TYPE', 'DB_HOST', 'DB_USER', 'DB_NAME', 'DB_PASS', 'MYSQL_CHARSET', 'SELF_URL_PATH', 'FEED_CRYPT_KEY', 'SINGLE_USER_MODE', 'SIMPLE_UPDATE_MODE', 'PHP_EXECUTABLE', 'LOCK_DIRECTORY', 'CACHE_DIR', 'ICONS_DIR', 'ICONS_URL', 'AUTH_AUTO_CREATE', 'AUTH_AUTO_LOGIN', 'FORCE_ARTICLE_PURGE', 'PUBSUBHUBBUB_HUB', 'PUBSUBHUBBUB_ENABLED', 'SPHINX_SERVER', 'SPHINX_INDEX', 'ENABLE_REGISTRATION', 'REG_NOTIFY_ADDRESS', 'REG_MAX_USERS', 'SESSION_COOKIE_LIFETIME', 'SESSION_CHECK_ADDRESS', 'SMTP_FROM_NAME', 'SMTP_FROM_ADDRESS', 'DIGEST_SUBJECT', 'SMTP_SERVER', 'SMTP_LOGIN', 'SMTP_PASSWORD', 'SMTP_SECURE', 'CHECK_FOR_UPDATES', 'ENABLE_GZIP_OUTPUT', 'PLUGINS', 'LOG_DESTINATION', 'CONFIG_VERSION'); ?>
diff --git a/plugins/af_lang_detect/init.php b/plugins/af_lang_detect/init.php
new file mode 100644
index 00000000..3f2eb29f
--- /dev/null
+++ b/plugins/af_lang_detect/init.php
@@ -0,0 +1,46 @@
+<?php
+class Af_Lang_Detect extends Plugin {
+	private $host;
+	private $lang;
+
+	function about() {
+		return array(1.0,
+			"Detect article language",
+			"fox");
+	}
+
+	function init($host) {
+		$this->host = $host;
+
+		$host->add_hook($host::HOOK_ARTICLE_FILTER, $this);
+
+		require_once __DIR__ . "/languagedetect/LanguageDetect.php";
+
+		$this->lang = new Text_LanguageDetect();
+		$this->lang->setNameMode(2);
+	}
+
+	function hook_article_filter($article) {
+
+		if ($this->lang) {
+			$entry_language = $this->lang->detect($article['title'] . " " . $article['content'], 1);
+
+			if (count($entry_language) > 0) {
+				$possible = array_keys($entry_language);
+				$entry_language = $possible[0];
+
+				_debug("detected language: $entry_language");
+
+				$article["language"] = $entry_language;
+			}
+		}
+
+		return $article;
+	}
+
+	function api_version() {
+		return 2;
+	}
+
+}
+?>
diff --git a/lib/languagedetect/LanguageDetect.php b/plugins/af_lang_detect/languagedetect/LanguageDetect.php
similarity index 100%
rename from lib/languagedetect/LanguageDetect.php
rename to plugins/af_lang_detect/languagedetect/LanguageDetect.php
diff --git a/lib/languagedetect/Text/LanguageDetect/Exception.php b/plugins/af_lang_detect/languagedetect/Text/LanguageDetect/Exception.php
similarity index 100%
rename from lib/languagedetect/Text/LanguageDetect/Exception.php
rename to plugins/af_lang_detect/languagedetect/Text/LanguageDetect/Exception.php
diff --git a/lib/languagedetect/Text/LanguageDetect/ISO639.php b/plugins/af_lang_detect/languagedetect/Text/LanguageDetect/ISO639.php
similarity index 100%
rename from lib/languagedetect/Text/LanguageDetect/ISO639.php
rename to plugins/af_lang_detect/languagedetect/Text/LanguageDetect/ISO639.php
diff --git a/lib/languagedetect/Text/LanguageDetect/Parser.php b/plugins/af_lang_detect/languagedetect/Text/LanguageDetect/Parser.php
similarity index 100%
rename from lib/languagedetect/Text/LanguageDetect/Parser.php
rename to plugins/af_lang_detect/languagedetect/Text/LanguageDetect/Parser.php
diff --git a/lib/languagedetect/data/lang.dat b/plugins/af_lang_detect/languagedetect/data/lang.dat
similarity index 100%
rename from lib/languagedetect/data/lang.dat
rename to plugins/af_lang_detect/languagedetect/data/lang.dat
diff --git a/lib/languagedetect/data/unicode_blocks.dat b/plugins/af_lang_detect/languagedetect/data/unicode_blocks.dat
similarity index 100%
rename from lib/languagedetect/data/unicode_blocks.dat
rename to plugins/af_lang_detect/languagedetect/data/unicode_blocks.dat
-- 
2.39.5