]> git.wh0rd.org Git - tt-rss.git/commitdiff
implement ttrss_feeds.cache_content
authorAndrew Dolgov <fox@madoka.volgo-balt.ru>
Fri, 23 Nov 2012 09:22:34 +0000 (13:22 +0400)
committerAndrew Dolgov <fox@madoka.volgo-balt.ru>
Fri, 23 Nov 2012 09:22:34 +0000 (13:22 +0400)
classes/api.php
classes/feeds.php
classes/pref/feeds.php
db-updater.php
include/functions.php
include/rssfuncs.php
schema/ttrss_schema_mysql.sql
schema/ttrss_schema_pgsql.sql
schema/versions/mysql/99.sql [new file with mode: 0644]
schema/versions/pgsql/99.sql [new file with mode: 0644]

index 15576c7c0dbf0d443732eeaa450f0528bed5e754..e3dce0c174078c61ed0593c8466f2eb5905f2a93 100644 (file)
@@ -280,7 +280,7 @@ class API extends Handler {
 
                $article_id = join(",", array_filter(explode(",", db_escape_string($_REQUEST["article_id"])), is_numeric));
 
-               $query = "SELECT id,title,link,content,feed_id,comments,int_id,
+               $query = "SELECT id,title,link,content,cached_content,feed_id,comments,int_id,
                        marked,unread,published,
                        ".SUBSTRING_FOR_DATE."(updated,1,16) as updated,
                        author
@@ -309,7 +309,7 @@ class API extends Handler {
                                        "comments" => $line["comments"],
                                        "author" => $line["author"],
                                        "updated" => strtotime($line["updated"]),
-                                       "content" => $line["content"],
+                                       "content" => $line["cached_content"] != "" ? $line["cached_content"] : $line["content"],
                                        "feed_id" => $line["feed_id"],
                                        "attachments" => $attachments
                                );
index 9a74130b0e21841e35883f7ab2a8b0b29129845a..49adf38795b8381d0182ab43d08e383a051971f7 100644 (file)
@@ -158,16 +158,20 @@ class Feeds extends Handler_Protected {
                        // Update the feed if required with some basic flood control\r
 \r
                        $result = db_query($this->link,\r
-                               "SELECT cache_images,".SUBSTRING_FOR_DATE."(last_updated,1,19) AS last_updated\r
+                               "SELECT cache_images,cache_content,".SUBSTRING_FOR_DATE."(last_updated,1,19) AS last_updated\r
                                        FROM ttrss_feeds WHERE id = '$feed'");\r
 \r
                                if (db_num_rows($result) != 0) {\r
                                        $last_updated = strtotime(db_fetch_result($result, 0, "last_updated"));\r
                                        $cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images"));\r
+                                       $cache_content = sql_bool_to_bool(db_fetch_result($result, 0, "cache_content"));\r
 \r
-                                       if (!$cache_images && time() - $last_updated > 120 || isset($_REQUEST['DevForceUpdate'])) {\r
+                                       if (!$cache_images && !$cache_content && time() - $last_updated > 120 || isset($_REQUEST['DevForceUpdate'])) {\r
                                                include "rssfuncs.php";\r
                                                update_rss_feed($this->link, $feed, true, true);\r
+                                       } else {\r
+                                               db_query($this->link, "UPDATE ttrss_feeds SET last_updated = '1970-01-01', last_update_started = '1970-01-01'\r
+                                                       WHERE id = '$feed'");\r
                                        }\r
                                }\r
                }\r
@@ -234,6 +238,7 @@ class Feeds extends Handler_Protected {
                $feed_title = $qfh_ret[1];\r
                $feed_site_url = $qfh_ret[2];\r
                $last_error = $qfh_ret[3];\r
+               $cache_content = true;\r
 \r
                $vgroup_last_feed = $vgr_last_feed;\r
 \r
@@ -627,6 +632,10 @@ class Feeds extends Handler_Protected {
 \r
                                        $feed_site_url = $line["site_url"];\r
 \r
+                                       if ($cache_content && $line["cached_content"] != "") {\r
+                                               $line["content_preview"] =& $line["cached_content"];\r
+                                       }\r
+\r
                                        $article_content = sanitize($this->link, $line["content_preview"],\r
                                                        false, false, $feed_site_url);\r
 \r
index 4de3576c88db2e9f412ff80e1d119b053cc7dd6c..c51174b07647d5d392f0200c48c5df147397bfdb 100644 (file)
@@ -653,6 +653,19 @@ class Pref_Feeds extends Handler_Protected {
                        $checked>&nbsp;<label for=\"cache_images\">".
                __('Cache images locally')."</label>";
 
+               $cache_content = sql_bool_to_bool(db_fetch_result($result, 0, "cache_content"));
+
+               if ($cache_content) {
+                       $checked = "checked=\"1\"";
+               } else {
+                       $checked = "";
+               }
+
+               print "<hr/><input dojoType=\"dijit.form.CheckBox\" type=\"checkbox\" id=\"cache_content\"
+               name=\"cache_content\"
+                       $checked>&nbsp;<label for=\"cache_content\">".
+               __('Cache content locally')."</label>";
+
                $mark_unread_on_update = sql_bool_to_bool(db_fetch_result($result, 0, "mark_unread_on_update"));
 
                if ($mark_unread_on_update) {
@@ -914,6 +927,8 @@ class Pref_Feeds extends Handler_Protected {
                        db_escape_string($_POST["include_in_digest"]));
                $cache_images = checkbox_to_sql_bool(
                        db_escape_string($_POST["cache_images"]));
+               $cache_content = checkbox_to_sql_bool(
+                       db_escape_string($_POST["cache_content"]));
                $update_method = (int) db_escape_string($_POST["update_method"]);
 
                $always_display_enclosures = checkbox_to_sql_bool(
@@ -938,8 +953,6 @@ class Pref_Feeds extends Handler_Protected {
                        $category_qpart_nocomma = "";
                }
 
-               $cache_images_qpart = "cache_images = $cache_images,";
-
                if (!$batch) {
 
                        $result = db_query($this->link, "UPDATE ttrss_feeds SET
@@ -951,7 +964,8 @@ class Pref_Feeds extends Handler_Protected {
                                auth_pass = '$auth_pass',
                                private = $private,
                                rtl_content = $rtl_content,
-                               $cache_images_qpart
+                               cache_images = $cache_images,
+                               cache_content = $cache_content,
                                include_in_digest = $include_in_digest,
                                always_display_enclosures = $always_display_enclosures,
                                mark_unread_on_update = $mark_unread_on_update,
@@ -1023,6 +1037,10 @@ class Pref_Feeds extends Handler_Protected {
                                                $qpart = "cache_images = $cache_images";
                                                break;
 
+                                       case "cache_content":
+                                               $qpart = "cache_content = $cache_content";
+                                               break;
+
                                        case "rtl_content":
                                                $qpart = "rtl_content = $rtl_content";
                                                break;
index e0900828ce4d47bdd0ae99f63e48a747e316ac3e..216986f39e79413d9a60a8c9dd2c0951fd585049 100644 (file)
@@ -1,5 +1,5 @@
 <?php
-       set_include_path(get_include_path() . PATH_SEPARATOR . 
+       set_include_path(get_include_path() . PATH_SEPARATOR .
                dirname(__FILE__) . "/include");
 
        require_once "functions.php";
@@ -125,6 +125,7 @@ function confirmOP() {
                foreach (array_keys($update_versions) as $v) {
                        if ($v == $version + 1) {
                                print "<p>".T_sprintf("Updating to version %d...", $v)."</p>";
+                               db_query($link, "BEGIN");
                                $fp = fopen($update_versions[$v], "r");
                                if ($fp) {
                                        while (!feof($fp)) {
@@ -136,6 +137,7 @@ function confirmOP() {
                                        }
                                }
                                fclose($fp);
+                               db_query($link, "COMMIT");
 
                                print "<p>".__("Checking version... ");
 
index 821e314f6ce4b4555f83b856df36cd4362857d33..55333ccd667e265649c6e2738234a39755dc3ac6 100644 (file)
@@ -1,6 +1,6 @@
 <?php
        define('EXPECTED_CONFIG_VERSION', 26);
-       define('SCHEMA_VERSION', 98);
+       define('SCHEMA_VERSION', 99);
 
        $fetch_last_error = false;
 
                                }
                        }
 
-                       $content_query_part = "content as content_preview,";
+                       $content_query_part = "content as content_preview, cached_content, ";
 
                        if (is_numeric($feed)) {
 
 
                //if (!$zoom_mode) { print "<article id='$id'><![CDATA["; };
 
-               $result = db_query($link, "SELECT rtl_content, always_display_enclosures FROM ttrss_feeds
+               $result = db_query($link, "SELECT rtl_content, always_display_enclosures, cache_content FROM ttrss_feeds
                        WHERE id = '$feed_id' AND owner_uid = $owner_uid");
 
                if (db_num_rows($result) == 1) {
                        $rtl_content = sql_bool_to_bool(db_fetch_result($result, 0, "rtl_content"));
                        $always_display_enclosures = sql_bool_to_bool(db_fetch_result($result, 0, "always_display_enclosures"));
+                       $cache_content = sql_bool_to_bool(db_fetch_result($result, 0, "cache_content"));
                } else {
                        $rtl_content = false;
                        $always_display_enclosures = false;
+                       $cache_content = false;
                }
 
                if ($rtl_content) {
                        tag_cache,
                        author,
                        orig_feed_id,
-                       note
+                       note,
+                       cached_content
                        FROM ttrss_entries,ttrss_user_entries
                        WHERE   id = '$id' AND ref_id = id AND owner_uid = $owner_uid");
 
                                }
                        }
 
+                       if ($cache_content && $line["cached_content"] != "") {
+                               $line["content"] =& $line["cached_content"];
+                       }
+
                        $article_content = sanitize($link, $line["content"], false, $owner_uid,
                                $feed_site_url);
 
                                }
 
                                if ($show_content) {
+
+                                       if ($line["cached_content"] != "") {
+                                               $line["content_preview"] =& $line["cached_content"];
+                                       }
+
                                        if ($sanitize_content) {
                                                $headline_row["content"] = sanitize($link,
                                                        $line["content_preview"], false, false, $line["site_url"]);
index d1e9e6e01612ab7398f02a7426951cd8b81c5747..b26495f67623671833c48804328a4b0f21337d4e 100644 (file)
 
        } // function update_daemon_common
 
+       // ignore_daemon is not used
        function update_rss_feed($link, $feed, $ignore_daemon = false, $no_cache = false,
                $override_url = false) {
 
 
                $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug'];
 
-               if (!$_REQUEST["daemon"] && !$ignore_daemon) {
-                       return false;
-               }
-
                if ($debug_enabled) {
                        _debug("update_rss_feed: start");
                }
 
-               if (!$ignore_daemon) {
-
-                       if (DB_TYPE == "pgsql") {
-                                       $updstart_thresh_qpart = "(ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '120 seconds')";
-                               } else {
-                                       $updstart_thresh_qpart = "(ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 120 SECOND))";
-                               }
-
-                       $result = db_query($link, "SELECT id,update_interval,auth_login,
-                               auth_pass,cache_images,update_method,last_updated
-                               FROM ttrss_feeds WHERE id = '$feed' AND $updstart_thresh_qpart");
-
-               } else {
-
-                       $result = db_query($link, "SELECT id,update_interval,auth_login,
-                               feed_url,auth_pass,cache_images,update_method,last_updated,
-                               mark_unread_on_update, owner_uid, update_on_checksum_change,
-                               pubsub_state
-                               FROM ttrss_feeds WHERE id = '$feed'");
-
-               }
+               $result = db_query($link, "SELECT id,update_interval,auth_login,
+                       feed_url,auth_pass,cache_images,update_method,last_updated,cache_content,
+                       mark_unread_on_update, owner_uid, update_on_checksum_change,
+                       pubsub_state
+                       FROM ttrss_feeds WHERE id = '$feed'");
 
                if (db_num_rows($result) == 0) {
                        if ($debug_enabled) {
                }
 
                $cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images"));
+               $cache_content = sql_bool_to_bool(db_fetch_result($result, 0, "cache_content"));
                $fetch_url = db_fetch_result($result, 0, "feed_url");
 
                $feed = db_escape_string($feed);
                                }
 
                                $entry_content_unescaped = $entry_content;
+                               $entry_cached_content = "";
 
                                if ($use_simplepie) {
                                        $entry_comments = strip_tags($item->data["comments"]);
                                                _debug("update_rss_feed: base guid not found");
                                        }
 
+                                       if ($cache_content) {
+                                               if ($debug_enabled) {
+                                                       _debug("update_rss_feed: caching content...");
+                                               }
+
+                                               $entry_cached_content = cache_content($link, $entry_link, $auth_login, $auth_pass);
+
+                                               if ($cache_images && is_writable(CACHE_DIR . '/images'))
+                                                       $entry_cached_content = cache_images($entry_cached_content, $site_url, $debug_enabled);
+
+                                               $entry_cached_content = db_escape_string($entry_cached_content, false);
+
+                                       }
+
                                        // base post entry does not exist, create it
 
                                        $result = db_query($link,
                                                        updated,
                                                        content,
                                                        content_hash,
+                                                       cached_content,
                                                        no_orig_date,
                                                        date_updated,
                                                        date_entered,
                                                        '$entry_link',
                                                        '$entry_timestamp_fmt',
                                                        '$entry_content',
+                                                       '$entry_cached_content',
                                                        '$content_hash',
                                                        $no_orig_date,
                                                        NOW(),
                                        if ($content_hash != $orig_content_hash) {
                                                $post_needs_update = true;
                                                $update_insignificant = false;
+
+                                               if ($cache_content) {
+                                                       if ($debug_enabled) {
+                                                               _debug("update_rss_feed: caching content because original checksum changed...");
+                                                       }
+
+                                                       $entry_cached_content = cache_content($link, $entry_link, $auth_login, $auth_pass);
+
+                                                       if ($cache_images && is_writable(CACHE_DIR . '/images'))
+                                                               $entry_cached_content = cache_images($entry_cached_content, $site_url, $debug_enabled);
+
+                                                       $entry_cached_content = db_escape_string($entry_cached_content, false);
+                                               }
                                        }
 
                                        if (db_escape_string($orig_title) != $entry_title) {
                                                db_query($link, "UPDATE ttrss_entries
                                                        SET title = '$entry_title', content = '$entry_content',
                                                                content_hash = '$content_hash',
+                                                               cached_content = '$entry_cached_content',
                                                                updated = '$entry_timestamp_fmt',
                                                                num_comments = '$num_comments'
                                                        WHERE id = '$ref_id'");
                        }
                }
        }
+
+       function cache_content($link, $url, $login, $pass) {
+
+               $content = fetch_file_contents($url, $login, $pass);
+
+               if ($content) {
+                       $doc = new DOMDocument();
+                       @$doc->loadHTML($content);
+                       $xpath = new DOMXPath($doc);
+
+                       $node = $doc->getElementsByTagName('body')->item(0);
+
+                       if ($node) {
+                               $content = $doc->saveXML($node, LIBXML_NOEMPTYTAG);
+
+                               return $content;
+                       }
+               }
+
+               return "";
+       }
 ?>
index 4b2a3369db23cf772b9ee7ca0860870a0c6111f9..2877894260efd9a54e1eb89d43869652be7a7246 100644 (file)
@@ -116,6 +116,7 @@ create table ttrss_feeds (id integer not null auto_increment primary key,
        hidden bool not null default false,
        include_in_digest boolean not null default true,
        cache_images boolean not null default false,
+       cache_content boolean not null default false,
        auth_pass_encrypted boolean not null default false,
        last_viewed datetime default null,
        last_update_started datetime default null,
@@ -150,6 +151,7 @@ create table ttrss_entries (id integer not null primary key auto_increment,
        updated datetime not null,
        content longtext not null,
        content_hash varchar(250) not null,
+       cached_content longtext,
        no_orig_date bool not null default 0,
        date_entered datetime not null,
        date_updated datetime not null,
@@ -306,7 +308,7 @@ create table ttrss_tags (id integer primary key auto_increment,
 
 create table ttrss_version (schema_version int not null) ENGINE=InnoDB DEFAULT CHARSET=UTF8;
 
-insert into ttrss_version values (98);
+insert into ttrss_version values (99);
 
 create table ttrss_enclosures (id integer primary key auto_increment,
        content_url text not null,
index a8a769315c81aa4355491ea23e0319ac18e2bc7b..432ebf88dc9c0a8a72df86c2ce93559af77133f5 100644 (file)
@@ -78,6 +78,7 @@ create table ttrss_feeds (id serial not null primary key,
        include_in_digest boolean not null default true,
        rtl_content boolean not null default false,
        cache_images boolean not null default false,
+       cache_content boolean not null default false,
        last_viewed timestamp default null,
        last_update_started timestamp default null,
        update_method integer not null default 0,
@@ -130,6 +131,7 @@ create table ttrss_entries (id serial not null primary key,
        updated timestamp not null,
        content text not null,
        content_hash varchar(250) not null,
+       cached_content text,
        no_orig_date boolean not null default false,
        date_entered timestamp not null,
        date_updated timestamp not null,
@@ -254,7 +256,7 @@ create index ttrss_tags_post_int_id_idx on ttrss_tags(post_int_id);
 
 create table ttrss_version (schema_version int not null);
 
-insert into ttrss_version values (98);
+insert into ttrss_version values (99);
 
 create table ttrss_enclosures (id serial not null primary key,
        content_url text not null,
diff --git a/schema/versions/mysql/99.sql b/schema/versions/mysql/99.sql
new file mode 100644 (file)
index 0000000..d7f9e02
--- /dev/null
@@ -0,0 +1,12 @@
+begin;
+
+alter table ttrss_feeds add column cache_content bool;
+update ttrss_feeds set cache_content = false;
+alter table ttrss_feeds change cache_content cache_content bool not null;
+alter table ttrss_feeds alter column cache_content set default false;
+
+alter table ttrss_entries add column cached_content longtext;
+
+update ttrss_version set schema_version = 99;
+
+commit;
diff --git a/schema/versions/pgsql/99.sql b/schema/versions/pgsql/99.sql
new file mode 100644 (file)
index 0000000..846056c
--- /dev/null
@@ -0,0 +1,12 @@
+begin;
+
+alter table ttrss_feeds add column cache_content boolean;
+update ttrss_feeds set cache_content = false;
+alter table ttrss_feeds alter column cache_content set not null;
+alter table ttrss_feeds alter column cache_content set default false;
+
+alter table ttrss_entries add column cached_content text;
+
+update ttrss_version set schema_version = 99;
+
+commit;