]> git.wh0rd.org Git - tt-rss.git/commitdiff
use transactions in feed update process, better handle feeds without entry dates...
authorAndrew Dolgov <fox@madoka.spb.ru>
Tue, 23 Aug 2005 05:46:48 +0000 (06:46 +0100)
committerAndrew Dolgov <fox@madoka.spb.ru>
Tue, 23 Aug 2005 05:46:48 +0000 (06:46 +0100)
backend.php
functions.php
ttrss_schema.sql

index 72c61a8f5f82dcca27c7eb06e0464065ce1cb368..012396182eab6dd298ea1b824b7d264be468a1f6 100644 (file)
        if ($op == "rpc") {
 
                $subop = $_GET["subop"];
+
+               if ($subop == "forceUpdateAll") {
+                       update_all_feeds($link, true);
+               }
+
+               if ($subop == "updateAll") {
+                       update_all_feeds($link, false);
+               }
                
                if ($subop == "catchupPage") {
 
index ef3bfa98e78b3fd5c99579a5111d2c547d288ed3..36f191e83f933d40b719f58be6d9bf78cfe4e47e 100644 (file)
@@ -3,6 +3,8 @@
 
        function update_all_feeds($link, $fetch) {
 
+               pg_query("BEGIN");
+
                if (!$fetch) {
 
                        $result = pg_query($link, "SELECT feed_url,id FROM ttrss_feeds WHERE
@@ -21,6 +23,8 @@
                        $num_unread += update_rss_feed($link, $line["feed_url"], $line["id"]);
                }
 
+               pg_query("COMMIT");
+
        }
 
        function update_rss_feed($link, $feed_url, $feed) {
                $num_unread = 0;
        
                if ($rss) {
-       
+
+                       pg_query("BEGIN");
+
                        $result = pg_query("SELECT title FROM ttrss_feeds WHERE id = '$feed'");
 
                        $registered_title = pg_fetch_result($result, 0, "title");
 
                        if (!$registered_title) {
-
                                $feed_title = $rss->channel["title"];
                                pg_query("UPDATE ttrss_feeds SET title = '$feed_title' WHERE id = '$feed'");
                        }
                                if (!$entry_guid) $entry_guid = $item["link"];
        
                                $entry_timestamp = "";
-                               
+
                                $rss_2_date = $item['pubdate'];
                                $rss_1_date = $item['dc']['date'];
                                $atom_date = $item['issued'];
                        
+                               $no_orig_date = 'false';
+                       
                                if ($atom_date != "") $entry_timestamp = parse_w3cdtf($atom_date);
                                if ($rss_1_date != "") $entry_timestamp = parse_w3cdtf($rss_1_date);
                                if ($rss_2_date != "") $entry_timestamp = strtotime($rss_2_date);
-                               if ($entry_timestamp == "") $entry_timestamp = 0;
+//                             if ($rss_3_date != "") $entry_timestamp = strtotime($rss_3_date);
+                               
+                               if ($entry_timestamp == "") {
+                                       $entry_timestamp = time();
+                                       $no_orig_date = 'true';
+                               }
 
                                if (!$entry_timestamp) continue;
 
@@ -77,7 +89,7 @@
        
                                $result = pg_query($link, "
                                        SELECT 
-                                               id,unread,md5_hash,last_read,
+                                               id,unread,md5_hash,last_read,no_orig_date,title,
                                                EXTRACT(EPOCH FROM updated) as updated_timestamp
                                        FROM
                                                ttrss_entries 
                                        $entry_timestamp = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
        
                                        $query = "INSERT INTO ttrss_entries 
-                                                       (title, guid, link, updated, content, feed_id, md5_hash) 
+                                                       (title, guid, link, updated, content, feed_id, 
+                                                               md5_hash, no_orig_date) 
                                                VALUES
                                                        ('$entry_title', '$entry_guid', '$entry_link', 
                                                                '$entry_timestamp', '$entry_content', '$feed', 
-                                                               '$content_md5')";
+                                                               '$content_md5', $no_orig_date)";
        
                                        $result = pg_query($link, $query);
 
        
                                        $unread = pg_fetch_result($result, 0, "unread");
                                        $md5_hash = pg_fetch_result($result, 0, "md5_hash");
-                                       
-//                                     if ($md5_hash != $content_md5 && CONTENT_CHECK_MD5) 
-//                                             $unread = "true";
+                                       $no_orig_date = pg_fetch_result($result, 0, "no_orig_date");
+                                       $orig_title = pg_fetch_result($result, 0, "title");
 
-                                       if (!$last_read || $md5_hash != $content_md5) {
-                                               $last_read = 'null';
+                                       // disable update detection for posts which didn't have correct
+                                       // publishment date, because they will always register as updated
+                                       // sadly this doesn't catch feed generators which input current date 
+                                       // in posts all the time (some planets do this)
+
+                                       if ($no_orig_date != 't' && (!$last_read || $md5_hash != $content_md5)) {
+                                               $last_read_qpart = 'last_read = null,';
                                        } else {
-                                               $last_read = "'$last_read'";
+                                               $last_read_qpart = '';
                                        }
 
-//                                     if ($unread || !CONTENT_CHECK_MD5) {
-//                                             $updated_query_part = "updated = '$entry_timestamp',";
-//                                     }
+                                       // mark post as updated on title change
+                                       // maybe we should mark it as unread instead?
 
-//                                     if ($updated_timestamp > $entry_timestamp) {
-//                                             $unread = "true";
-//                                             print "$updated_timestamp : $entry_timestamp<br>";
-//                                     }                       
+                                       if ($orig_title != $entry_title) {
+                                               $last_read_qpart = 'last_read = null,';
+                                       }
+
+                                       // don't bother updating timestamps on posts with broken pubDate
+                                       
+                                       if ($no_orig_date != 't') {
+                                               $update_timestamp_qpart = "updated = '$entry_timestamp_fmt',";
+                                       }
 
                                        $query = "UPDATE ttrss_entries 
                                                SET 
                                                        title ='$entry_title', 
                                                        link = '$entry_link', 
-                                                       updated = '$entry_timestamp_fmt',
+                                                       $update_timestamp_qpart
+                                                       $last_read_qpart
                                                        content = '$entry_content',
                                                        md5_hash = '$content_md5',
-                                                       last_read = $last_read,
                                                        unread = '$unread'
                                                WHERE
                                                        id = '$entry_id'";
                                $result = pg_query($link, "UPDATE ttrss_feeds SET last_updated = NOW()");
                        }
 
+                       pg_query("COMMIT");
+
                }
 
        }
index 20237ce7efb94ee856e9bb813e4cb13194e5ec25..fec7e4cfe4f4b65534505dd543943dc7de578af8 100644 (file)
@@ -34,5 +34,6 @@ create table ttrss_entries (id serial not null primary key,
        md5_hash varchar(200) not null unique,
        content text not null,
        last_read timestamp,
-       unread boolean default true);
+       no_orig_date boolean not null default false,
+       unread boolean not null default true);