]> git.wh0rd.org - tt-rss.git/commitdiff
more fts stuff for simple index
authorAndrew Dolgov <noreply@madoka.volgo-balt.ru>
Tue, 4 Aug 2015 09:52:49 +0000 (12:52 +0300)
committerAndrew Dolgov <noreply@madoka.volgo-balt.ru>
Tue, 4 Aug 2015 09:52:49 +0000 (12:52 +0300)
include/functions2.php
include/rssfuncs.php
schema/ttrss_schema_pgsql.sql
schema/versions/mysql/128.sql [new file with mode: 0644]
schema/versions/pgsql/128.sql [new file with mode: 0644]
update.php

index 206673b7d2af8c470de43abf865a6c9418ceeb11..a9bb49df83cda9fef8782bf900a60f4989a79dcc 100644 (file)
 
        function search_to_sql($search) {
 
-               /*if (DB_TYPE == "pgsql") {
-                       $search_escaped = db_escape_string($search);
-
-                       return array("(to_tsvector('english', SUBSTR(ttrss_entries.title, 0, 200) || ' ' || SUBSTR(content, 0, 800))
-                               @@ to_tsquery('$search_escaped'))", explode(" ", $search));
-               }*/
-
                $keywords = str_getcsv($search, " ");
                $query_keywords = array();
                $search_words = array();
-               $search_query_leftover = "";
+               $search_query_leftover = array();
 
                foreach ($keywords as $k) {
                        if (strpos($k, "-") === 0) {
 
                                        array_push($query_keywords, "(".SUBSTRING_FOR_DATE."(updated,1,LENGTH('$k')) $not = '$k')");
                                } else {
-                                       $search_query_leftover .= $k . " ";
 
-                                       if (!$not) array_push($search_words, $k);
+                                       if (DB_TYPE == "pgsql") {
+                                               $k = mb_strtolower($k);
+                                               array_push($search_query_leftover, $not ? "!$k" : $k);
+                                       } else {
+                                               array_push($query_keywords, "(UPPER(ttrss_entries.title) $not LIKE UPPER('%$k%')
+                                                       OR UPPER(ttrss_entries.content) $not LIKE UPPER('%$k%'))");
+                                       }
 
-                                       /*array_push($query_keywords, "(UPPER(ttrss_entries.title) $not LIKE UPPER('%$k%')
-                                               OR UPPER(ttrss_entries.content) $not LIKE UPPER('%$k%'))");
-                                       if (!$not) array_push($search_words, $k);*/
+                                       if (!$not) array_push($search_words, $k);
                                }
                        }
                }
 
-               if ($search_query_leftover) {
-                       $search_query_leftover = db_escape_string($search_query_leftover);
+               if (count($search_query_leftover) > 0) {
+                       $search_query_leftover = db_escape_string(implode(" & ", $search_query_leftover));
 
-                       array_push($query_keywords,
-                               "(to_tsvector('simple', SUBSTR(ttrss_entries.title, 0, 200) || ' ' || SUBSTR(content, 0, 800))
-                               @@ to_tsquery('$search_query_leftover'))");
+                       if (DB_TYPE == "pgsql") {
+                               array_push($query_keywords,
+                                       "(tsvector_combined @@ '$search_query_leftover'::tsquery)");
+                       }
 
                }
 
                                if ($feed == -3)
                                        $first_id_query_strategy_part = "true";
 
-                               // if previous topmost article id changed that means our current pagination is no longer valid
-                               $query = "SELECT DISTINCT
-                                               ttrss_feeds.title,
-                                               date_entered,
-                                               guid,
-                                               ttrss_entries.id,
-                                               ttrss_entries.title,
-                                               updated,
-                                               score,
-                                               marked,
-                                               published,
-                                               last_marked,
-                                               last_published
-                                       FROM
-                                               $from_qpart
-                                       WHERE
-                                       $feed_check_qpart
-                                       ttrss_user_entries.ref_id = ttrss_entries.id AND
-                                       ttrss_user_entries.owner_uid = '$owner_uid' AND
-                                       $search_query_part
-                                       $start_ts_query_part
-                                       $since_id_part
-                                       $first_id_query_strategy_part ORDER BY $order_by LIMIT 1";
+                               if (!$search) {
+                                       // if previous topmost article id changed that means our current pagination is no longer valid
+                                       $query = "SELECT DISTINCT
+                                                       ttrss_feeds.title,
+                                                       date_entered,
+                                                       guid,
+                                                       ttrss_entries.id,
+                                                       ttrss_entries.title,
+                                                       updated,
+                                                       score,
+                                                       marked,
+                                                       published,
+                                                       last_marked,
+                                                       last_published
+                                               FROM
+                                                       $from_qpart
+                                               WHERE
+                                               $feed_check_qpart
+                                               ttrss_user_entries.ref_id = ttrss_entries.id AND
+                                               ttrss_user_entries.owner_uid = '$owner_uid' AND
+                                               $search_query_part
+                                               $start_ts_query_part
+                                               $since_id_part
+                                               $first_id_query_strategy_part ORDER BY $order_by LIMIT 1";
 
                                        if ($_REQUEST["debug"]) {
                                                print $query;
 
                                        $result = db_query($query);
                                        if ($result && db_num_rows($result) > 0) {
-                                               $first_id = (int) db_fetch_result($result, 0, "id");
+                                               $first_id = (int)db_fetch_result($result, 0, "id");
 
                                                if ($offset > 0 && $first_id && $check_first_id && $first_id != $check_first_id) {
                                                        return array(-1, $feed_title, $feed_site_url, $last_error, $last_updated, $search_words, $first_id);
                                                }
                                        }
+                               }
 
                                $query = "SELECT DISTINCT
                                                date_entered,
index 6eb4e6d98017925779edf56d9a792d90201adf5c..5ebddf9eff4ef609ef46ae685e89ec29b4beda3f 100644 (file)
 
                                        _debug("RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
 
+                                       if (DB_TYPE == "pgsql") {
+                                               $tsvector_combined = db_escape_string(mb_substr($entry_title . ' ' . strip_tags($entry_content),
+                                                       0, 1000000));
+
+                                               $tsvector_qpart = "tsvector_combined = to_tsvector('simple', '$tsvector_combined'),";
+
+                                       } else {
+                                               $tsvector_qpart = "";
+                                       }
+
                                        db_query("UPDATE ttrss_entries
                                                SET title = '$entry_title',
                                                        content = '$entry_content',
                                                        content_hash = '$entry_current_hash',
                                                        updated = '$entry_timestamp_fmt',
+                                                       $tsvector_qpart
                                                        num_comments = '$num_comments',
                                                        plugin_data = '$entry_plugin_data',
                                                        author = '$entry_author',
index 2b37c95fb6bf82b1510d717bc044dddad311597a..4cdc15f9a8c3cc57082f135ca3b9924e0e34146b 100644 (file)
@@ -142,12 +142,14 @@ create table ttrss_entries (id serial not null primary key,
        num_comments integer not null default 0,
        comments varchar(250) not null default '',
        plugin_data text,
+       tsvector_combined tsvector,
        lang varchar(2),
        author varchar(250) not null default '');
 
 -- create index ttrss_entries_title_index on ttrss_entries(title);
 create index ttrss_entries_date_entered_index on ttrss_entries(date_entered);
 create index ttrss_entries_updated_idx on ttrss_entries(updated);
+create index ttrss_entries_tsvector_combined_idx on ttrss_entries using gin(tsvector_combined);
 
 create table ttrss_user_entries (
        int_id serial not null primary key,
diff --git a/schema/versions/mysql/128.sql b/schema/versions/mysql/128.sql
new file mode 100644 (file)
index 0000000..0545cb3
--- /dev/null
@@ -0,0 +1,5 @@
+BEGIN;
+
+UPDATE ttrss_version SET schema_version = 128;
+
+COMMIT;
diff --git a/schema/versions/pgsql/128.sql b/schema/versions/pgsql/128.sql
new file mode 100644 (file)
index 0000000..d85ce7f
--- /dev/null
@@ -0,0 +1,8 @@
+BEGIN;
+
+alter table ttrss_entries add column tsvector_combined tsvector;
+create index ttrss_entries_tsvector_combined_idx on ttrss_entries using gin(tsvector_combined);
+
+UPDATE ttrss_version SET schema_version = 128;
+
+COMMIT;
index 521b956ad33b7d60311928690ff0da1716864042..06578aaa4860421da3a1845694da6a96bf5c49d9 100755 (executable)
@@ -33,6 +33,7 @@
                        "update-schema",
                        "convert-filters",
                        "force-update",
+                       "update-search-idx",
                        "list-plugins",
                        "help");
 
@@ -80,6 +81,7 @@
                print "  --log FILE           - log messages to FILE\n";
                print "  --indexes            - recreate missing schema indexes\n";
                print "  --update-schema      - update database schema\n";
+               print "  --update-search-idx  - update PostgreSQL fulltext search index\n";
                print "  --convert-filters    - convert type1 filters to type2\n";
                print "  --force-update       - force update of all feeds\n";
                print "  --list-plugins       - list all available plugins\n";
 
        }
 
+       if (isset($options["update-search-idx"])) {
+               echo "Generating search index...\n";
+
+               $result = db_query("SELECT COUNT(id) AS count FROM ttrss_entries");
+               $count = db_fetch_result($result, 0, "count");
+
+               print "Total entries: $count.\n";
+
+               $offset = 0;
+               $limit = 1000;
+
+               while (true) {
+                       $result = db_query("SELECT id, title, content FROM ttrss_entries WHERE tsvector_combined IS NULL ORDER BY id LIMIT $limit OFFSET $offset");
+
+                       if (db_num_rows($result) != 0) {
+                               echo "Offset $offset...\n";
+
+                               while ($line = db_fetch_assoc($result)) {
+                                       $tsvector_combined = db_escape_string(mb_substr($line['title'] . ' ' . strip_tags($line['content']),
+                                               0, 1000000));
+
+                                       db_query("UPDATE ttrss_entries SET tsvector_combined = to_tsvector('simple', '$tsvector_combined') WHERE id = " . $line["id"]);
+                               }
+
+                               $offset += $limit;
+                       } else {
+                               echo "All done.\n";
+                               break;
+                       }
+
+               }
+
+       }
+
        if (isset($options["list-plugins"])) {
                $tmppluginhost = new PluginHost();
                $tmppluginhost->load_all($tmppluginhost::KIND_ALL);