]> git.wh0rd.org Git - tt-rss.git/commitdiff
implement proof of concept postgresql pg_trgm N-gram based marking of similar article...
authorAndrew Dolgov <fox@madoka.volgo-balt.ru>
Mon, 9 Jul 2012 15:49:48 +0000 (19:49 +0400)
committerAndrew Dolgov <fox@madoka.volgo-balt.ru>
Mon, 9 Jul 2012 15:49:48 +0000 (19:49 +0400)
include/functions.php
include/rssfuncs.php

index 1e527e70db0f735c627ce3ab0ad13932a34ddb4e..b14515bdd322ab2b50a11b8677f6aab67b9a8293 100644 (file)
                                        </head><body>";
                        }
 
+                       $rv['title'] = $line['title'];
+
                        $rv['content'] .= "<div id=\"PTITLE-$id\" style=\"display : none\">" .
                                truncate_string(strip_tags($line['title']), 15) . "</div>";
 
index 12c4a57d21a9090150c7d74efb8b20cb9dc98e55..59fa3d54781776d70d05d4ff6758b1455940195d 100644 (file)
                                                        $published = 'false';
                                                }
 
+                                               // N-grams
+
+                                               if (DB_TYPE == "pgsql" and defined('_NGRAM_TITLE_DUPLICATE_THRESHOLD')) {
+
+                                                       $result = db_query($link, "SELECT COUNT(*) AS similar FROM
+                                                                       ttrss_entries,ttrss_user_entries
+                                                               WHERE ref_id = id AND updated >= NOW() - INTERVAL '7 day'
+                                                                       AND similarity(title, '$entry_title') >= "._NGRAM_TITLE_DUPLICATE_THRESHOLD."
+                                                                       AND owner_uid = $owner_uid");
+
+                                                       $ngram_similar = db_fetch_result($result, 0, "similar");
+
+                                                       if ($debug_enabled) {
+                                                               _debug("update_rss_feed: N-gram similar results: $ngram_similar");
+                                                       }
+
+                                                       if ($ngram_similar > 0) {
+                                                               $unread = 'false';
+                                                       }
+                                               }
+
                                                $result = db_query($link,
                                                        "INSERT INTO ttrss_user_entries
                                                                (ref_id, owner_uid, feed_id, unread, last_read, marked,