]> git.wh0rd.org - tt-rss.git/commitdiff
implement per-feed stemming language setting
authorAndrew Dolgov <noreply@madoka.volgo-balt.ru>
Tue, 4 Aug 2015 10:32:52 +0000 (13:32 +0300)
committerAndrew Dolgov <noreply@madoka.volgo-balt.ru>
Tue, 4 Aug 2015 10:32:52 +0000 (13:32 +0300)
classes/pref/feeds.php
include/functions2.php
include/rssfuncs.php
schema/ttrss_schema_mysql.sql
schema/ttrss_schema_pgsql.sql
schema/versions/mysql/128.sql
schema/versions/pgsql/128.sql
update.php

index efa2c2af9b4b11bcaae94b1011cd92dc5d7fd924..01197d92c1e6062b6c6549bcdde29dff4147f9fb 100644 (file)
@@ -1,5 +1,7 @@
 <?php
 class Pref_Feeds extends Handler_Protected {
+       public static $feed_languages = array("English", "Danish", "Dutch", "Finnish", "French", "German", "Hungarian", "Italian", "Norwegian",
+               "Portuguese", "Russian", "Spanish", "Swedish", "Turkish", "Simple");
 
        function csrf_ignore($method) {
                $csrf_ignored = array("index", "getfeedtree", "add", "editcats", "editfeed",
@@ -593,6 +595,18 @@ class Pref_Feeds extends Handler_Protected {
                                'dojoType="dijit.form.Select"');
                }
 
+               /* FTS Stemming Language */
+
+               if (DB_TYPE == "pgsql") {
+                       $feed_language = $this->dbh->fetch_result($result, 0, "feed_language");
+
+                       print "<hr/>";
+
+                       print __('Language:') . " ";
+                       print_select("feed_language", $feed_language, $this::$feed_languages,
+                               'dojoType="dijit.form.Select"');
+               }
+
                print "</div>";
 
                print "<div class=\"dlgSec\">".__("Update")."</div>";
@@ -807,6 +821,18 @@ class Pref_Feeds extends Handler_Protected {
 
                }
 
+               /* FTS Stemming Language */
+
+               if (DB_TYPE == "pgsql") {
+                       print "<hr/>";
+
+                       print __('Language:') . " ";
+                       print_select("feed_language", "", $this::$feed_languages,
+                               'disabled="1" dojoType="dijit.form.Select"');
+
+                       $this->batch_edit_cbox("feed_language");
+               }
+
                print "</div>";
 
                print "<div class=\"dlgSec\">".__("Update")."</div>";
@@ -938,6 +964,8 @@ class Pref_Feeds extends Handler_Protected {
                $mark_unread_on_update = checkbox_to_sql_bool(
                        $this->dbh->escape_string($_POST["mark_unread_on_update"]));
 
+               $feed_language = $this->dbh->escape_string(trim($_POST["feed_language"]));
+
                if (strlen(FEED_CRYPT_KEY) > 0) {
                        require_once "crypt.php";
                        $auth_pass = substr(encrypt_string($auth_pass), 0, 250);
@@ -976,7 +1004,8 @@ class Pref_Feeds extends Handler_Protected {
                                hide_images = $hide_images,
                                include_in_digest = $include_in_digest,
                                always_display_enclosures = $always_display_enclosures,
-                               mark_unread_on_update = $mark_unread_on_update
+                               mark_unread_on_update = $mark_unread_on_update,
+                               feed_language = '$feed_language'
                        WHERE id = '$feed_id' AND owner_uid = " . $_SESSION["uid"]);
 
                        PluginHost::getInstance()->run_hooks(PluginHost::HOOK_PREFS_SAVE_FEED,
@@ -1051,6 +1080,10 @@ class Pref_Feeds extends Handler_Protected {
                                                $qpart = $category_qpart_nocomma;
                                                break;
 
+                                       case "feed_language":
+                                               $qpart = "feed_language = '$feed_language'";
+                                               break;
+
                                }
 
                                if ($qpart) {
index a9bb49df83cda9fef8782bf900a60f4989a79dcc..551d55d730838aee30c6c67df7111ef384957587 100644 (file)
 
                        if (DB_TYPE == "pgsql") {
                                array_push($query_keywords,
-                                       "(tsvector_combined @@ '$search_query_leftover'::tsquery)");
+                                       "(tsvector_combined @@ to_tsquery('english', '$search_query_leftover'))");
                        }
 
                }
index 5ebddf9eff4ef609ef46ae685e89ec29b4beda3f..c8e2ce28fb74c39683abb0ee8f6afaf0a4f6afc9 100644 (file)
                        feed_url,auth_pass,cache_images,
                        mark_unread_on_update, owner_uid,
                        pubsub_state, auth_pass_encrypted,
+                       feed_language,
                        (SELECT max(date_entered) FROM
                                ttrss_entries, ttrss_user_entries where ref_id = id AND feed_id = '$feed') AS last_article_timestamp
                        FROM ttrss_feeds WHERE id = '$feed'");
 
                $cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images"));
                $fetch_url = db_fetch_result($result, 0, "feed_url");
+               $feed_language = db_escape_string(mb_strtolower(db_fetch_result($result, 0, "feed_language")));
+               if (!$feed_language) $feed_language = 'english';
 
                $feed = db_escape_string($feed);
 
                        // We use local pluginhost here because we need to load different per-user feed plugins
                        $pluginhost->run_hooks(PluginHost::HOOK_FEED_PARSED, "hook_feed_parsed", $rss);
 
+                       _debug("language: $feed_language", $debug_enabled);
                        _debug("processing feed data...", $debug_enabled);
 
 //                     db_query("BEGIN");
                                                $tsvector_combined = db_escape_string(mb_substr($entry_title . ' ' . strip_tags($entry_content),
                                                        0, 1000000));
 
-                                               $tsvector_qpart = "tsvector_combined = to_tsvector('simple', '$tsvector_combined'),";
+                                               $tsvector_qpart = "tsvector_combined = to_tsvector('$feed_language', '$tsvector_combined'),";
 
                                        } else {
                                                $tsvector_qpart = "";
index 09234369e36c17d4ab5ebab5e20d7667c5988efc..8a6f7d681a375d934640ee979b52b9039928526d 100644 (file)
@@ -127,6 +127,7 @@ create table ttrss_feeds (id integer not null auto_increment primary key,
        view_settings varchar(250) not null default '',
        pubsub_state integer not null default 0,
        favicon_last_checked datetime default null,
+       feed_language varchar(100) not null default '',
        foreign key (owner_uid) references ttrss_users(id) ON DELETE CASCADE,
        foreign key (cat_id) references ttrss_feed_categories(id) ON DELETE SET NULL,
        foreign key (parent_feed) references ttrss_feeds(id) ON DELETE SET NULL) ENGINE=InnoDB DEFAULT CHARSET=UTF8;
index 4cdc15f9a8c3cc57082f135ca3b9924e0e34146b..9dafa693e11c409f9170fb789c0099c0cd75081b 100644 (file)
@@ -96,6 +96,7 @@ create table ttrss_feeds (id serial not null primary key,
        view_settings varchar(250) not null default '',
        pubsub_state integer not null default 0,
        favicon_last_checked timestamp default null,
+       feed_language varchar(100) not null default '',
        auth_pass_encrypted boolean not null default false);
 
 create index ttrss_feeds_owner_uid_index on ttrss_feeds(owner_uid);
index 0545cb3aeebce73b28e5b60801ebd1945e5d0794..0a4d7ab7c0a1ff4f1aa91b2c6ed0aeacc4198048 100644 (file)
@@ -1,5 +1,10 @@
 BEGIN;
 
+alter table ttrss_feeds add column feed_language varchar(100);
+update ttrss_feeds set feed_language = '';
+alter table ttrss_feeds change feed_language feed_language varchar(100) not null;
+alter table ttrss_feeds alter column feed_language set default '';
+
 UPDATE ttrss_version SET schema_version = 128;
 
 COMMIT;
index d85ce7fe93fa8b8a8b7d63443fbbe62587283906..3aba672201b949468104bef3667ca50c1f8e484b 100644 (file)
@@ -3,6 +3,11 @@ BEGIN;
 alter table ttrss_entries add column tsvector_combined tsvector;
 create index ttrss_entries_tsvector_combined_idx on ttrss_entries using gin(tsvector_combined);
 
+alter table ttrss_feeds add column feed_language varchar(100);
+update ttrss_feeds set feed_language = '';
+alter table ttrss_feeds alter column feed_language set not null;
+alter table ttrss_feeds alter column feed_language set default '';
+
 UPDATE ttrss_version SET schema_version = 128;
 
 COMMIT;
index 06578aaa4860421da3a1845694da6a96bf5c49d9..8fc28973f6dd3ce2dd60a741e47aa4b68f554480 100755 (executable)
@@ -33,7 +33,7 @@
                        "update-schema",
                        "convert-filters",
                        "force-update",
-                       "update-search-idx",
+                       "gen-search-idx",
                        "list-plugins",
                        "help");
 
@@ -81,7 +81,7 @@
                print "  --log FILE           - log messages to FILE\n";
                print "  --indexes            - recreate missing schema indexes\n";
                print "  --update-schema      - update database schema\n";
-               print "  --update-search-idx  - update PostgreSQL fulltext search index\n";
+               print "  --gen-search-idx     - generate basic PostgreSQL fulltext search index\n";
                print "  --convert-filters    - convert type1 filters to type2\n";
                print "  --force-update       - force update of all feeds\n";
                print "  --list-plugins       - list all available plugins\n";
 
        }
 
-       if (isset($options["update-search-idx"])) {
-               echo "Generating search index...\n";
+       if (isset($options["gen-search-idx"])) {
+               echo "Generating search index (stemming set to English)...\n";
 
                $result = db_query("SELECT COUNT(id) AS count FROM ttrss_entries");
                $count = db_fetch_result($result, 0, "count");
                                        $tsvector_combined = db_escape_string(mb_substr($line['title'] . ' ' . strip_tags($line['content']),
                                                0, 1000000));
 
-                                       db_query("UPDATE ttrss_entries SET tsvector_combined = to_tsvector('simple', '$tsvector_combined') WHERE id = " . $line["id"]);
+                                       db_query("UPDATE ttrss_entries SET tsvector_combined = to_tsvector('english', '$tsvector_combined') WHERE id = " . $line["id"]);
                                }
 
                                $offset += $limit;