]> git.wh0rd.org - tt-rss.git/blobdiff - update.php
gen-search-idx: do not rely on offsets
[tt-rss.git] / update.php
index 06578aaa4860421da3a1845694da6a96bf5c49d9..8a00cdd55b087d7453f4708be2d875fae0bbeacd 100755 (executable)
@@ -33,7 +33,7 @@
                        "update-schema",
                        "convert-filters",
                        "force-update",
-                       "update-search-idx",
+                       "gen-search-idx",
                        "list-plugins",
                        "help");
 
@@ -81,7 +81,7 @@
                print "  --log FILE           - log messages to FILE\n";
                print "  --indexes            - recreate missing schema indexes\n";
                print "  --update-schema      - update database schema\n";
-               print "  --update-search-idx  - update PostgreSQL fulltext search index\n";
+               print "  --gen-search-idx     - generate basic PostgreSQL fulltext search index\n";
                print "  --convert-filters    - convert type1 filters to type2\n";
                print "  --force-update       - force update of all feeds\n";
                print "  --list-plugins       - list all available plugins\n";
 
        }
 
-       if (isset($options["update-search-idx"])) {
-               echo "Generating search index...\n";
+       if (isset($options["gen-search-idx"])) {
+               echo "Generating search index (stemming set to English)...\n";
 
-               $result = db_query("SELECT COUNT(id) AS count FROM ttrss_entries");
+               $result = db_query("SELECT COUNT(id) AS count FROM ttrss_entries WHERE tsvector_combined IS NULL");
                $count = db_fetch_result($result, 0, "count");
 
-               print "Total entries: $count.\n";
+               print "Articles to process: $count.\n";
 
-               $offset = 0;
-               $limit = 1000;
+               $limit = 500;
+               $processed = 0;
 
                while (true) {
-                       $result = db_query("SELECT id, title, content FROM ttrss_entries WHERE tsvector_combined IS NULL ORDER BY id LIMIT $limit OFFSET $offset");
+                       $result = db_query("SELECT id, title, content FROM ttrss_entries WHERE tsvector_combined IS NULL ORDER BY id LIMIT $limit");
 
                        if (db_num_rows($result) != 0) {
-                               echo "Offset $offset...\n";
-
                                while ($line = db_fetch_assoc($result)) {
                                        $tsvector_combined = db_escape_string(mb_substr($line['title'] . ' ' . strip_tags($line['content']),
                                                0, 1000000));
 
-                                       db_query("UPDATE ttrss_entries SET tsvector_combined = to_tsvector('simple', '$tsvector_combined') WHERE id = " . $line["id"]);
+                                       db_query("UPDATE ttrss_entries SET tsvector_combined = to_tsvector('english', '$tsvector_combined') WHERE id = " . $line["id"]);
                                }
 
-                               $offset += $limit;
+                               $processed += db_num_rows($result);
+                               print "Processed $processed articles...\n";
+
                        } else {
                                echo "All done.\n";
                                break;