]> git.wh0rd.org - tt-rss.git/blobdiff - classes/rssutils.php
Merge branch 'master' of git.fakecake.org:tt-rss into pdo-experimental
[tt-rss.git] / classes / rssutils.php
index 84d6941da60b14a7e4e3db165387d87fda10ef54..f6326720de07c6110e881d93e3fb19279e463a15 100644 (file)
@@ -1,9 +1,4 @@
 <?php
-define_default('DAEMON_UPDATE_LOGIN_LIMIT', 30);
-define_default('DAEMON_FEED_LIMIT', 500);
-define_default('DAEMON_SLEEP_INTERVAL', 120);
-define_default('_MIN_CACHE_FILE_SIZE', 1024);
-
 class RSSUtils {
        static function calculate_article_hash($article, $pluginhost) {
                $tmp = "";
@@ -62,17 +57,12 @@ class RSSUtils {
        }
 
        static function update_daemon_common($limit = DAEMON_FEED_LIMIT, $debug = true) {
-               // Process all other feeds using last_updated and interval parameters
-
                $schema_version = get_schema_version();
 
                if ($schema_version != SCHEMA_VERSION) {
                        die("Schema version is wrong, please upgrade the database.\n");
                }
 
-               define('PREFS_NO_CACHE', true);
-
-               // Test if the user has loggued in recently. If not, it does not update its feeds.
                if (!SINGLE_USER_MODE && DAEMON_UPDATE_LOGIN_LIMIT > 0) {
                        if (DB_TYPE == "pgsql") {
                                $login_thresh_qpart = "AND ttrss_users.last_login >= NOW() - INTERVAL '".DAEMON_UPDATE_LOGIN_LIMIT." days'";
@@ -83,7 +73,6 @@ class RSSUtils {
                        $login_thresh_qpart = "";
                }
 
-               // Test if the feed need a update (update interval exceeded).
                if (DB_TYPE == "pgsql") {
                        $update_limit_qpart = "AND ((
                                        ttrss_feeds.update_interval = 0
@@ -117,9 +106,7 @@ class RSSUtils {
                        $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 10 MINUTE))";
                }
 
-               // Test if there is a limit to number of updated feeds
-               $query_limit = "";
-               if($limit) $query_limit = sprintf("LIMIT %d", $limit);
+               $query_limit = $limit ? sprintf("LIMIT %d", $limit) : "";
 
                // Update the least recently updated feeds first
                $query_order = "ORDER BY last_updated";
@@ -137,27 +124,19 @@ class RSSUtils {
                                $updstart_thresh_qpart
                                $query_order $query_limit";
 
-               // We search for feed needing update.
                $result = db_query($query);
 
-               if($debug) _debug(sprintf("Scheduled %d feeds to update...", db_num_rows($result)));
+               if ($debug) _debug(sprintf("Scheduled %d feeds to update...", db_num_rows($result)));
 
-               // Here is a little cache magic in order to minimize risk of double feed updates.
                $feeds_to_update = array();
                while ($line = db_fetch_assoc($result)) {
-                       array_push($feeds_to_update, db_escape_string($line['feed_url']));
+                       array_push($feeds_to_update, $line['feed_url']);
                }
 
-               // We update the feed last update started date before anything else.
-               // There is no lag due to feed contents downloads
-               // It prevent an other process to update the same feed.
-
-               if(count($feeds_to_update) > 0) {
-                       $feeds_quoted = array();
-
-                       foreach ($feeds_to_update as $feed) {
-                               array_push($feeds_quoted, "'" . db_escape_string($feed) . "'");
-                       }
+               // Update last_update_started before actually starting the batch
+               // in order to minimize collision risk for parallel daemon tasks
+               if (count($feeds_to_update) > 0) {
+                       $feeds_quoted = array_map(function ($s) { return "'" . db_escape_string($s) . "'"; }, $feeds_to_update);
 
                        db_query(sprintf("UPDATE ttrss_feeds SET last_update_started = NOW()
                                WHERE feed_url IN (%s)", implode(',', $feeds_quoted)));
@@ -168,14 +147,12 @@ class RSSUtils {
 
                $batch_owners = array();
 
-               // For each feed, we call the feed update function.
                foreach ($feeds_to_update as $feed) {
                        if($debug) _debug("Base feed: $feed");
 
                        //update_rss_feed($line["id"], true);
 
                        // since we have the data cached, we can deal with other feeds with the same url
-
                        $tmp_result = db_query("SELECT DISTINCT ttrss_feeds.id,last_updated,ttrss_feeds.owner_uid
                        FROM ttrss_feeds, ttrss_users, ttrss_user_prefs WHERE
                                ttrss_user_prefs.owner_uid = ttrss_feeds.owner_uid AND
@@ -189,7 +166,7 @@ class RSSUtils {
 
                        if (db_num_rows($tmp_result) > 0) {
                                while ($tline = db_fetch_assoc($tmp_result)) {
-                                       if($debug) _debug(" => " . $tline["last_updated"] . ", " . $tline["id"] . " " . $tline["owner_uid"]);
+                                       if ($debug) _debug(" => " . $tline["last_updated"] . ", " . $tline["id"] . " " . $tline["owner_uid"]);
 
                                        if (array_search($tline["owner_uid"], $batch_owners) === FALSE)
                                                array_push($batch_owners, $tline["owner_uid"]);
@@ -228,9 +205,11 @@ class RSSUtils {
 
                $feed = db_escape_string($feed);
 
-               $result = db_query("SELECT feed_url,auth_pass,auth_login,auth_pass_encrypted
+               $result = db_query("SELECT owner_uid,feed_url,auth_pass,auth_login,auth_pass_encrypted
                                        FROM ttrss_feeds WHERE id = '$feed'");
 
+               $owner_uid = db_fetch_result($result, 0, "owner_uid");
+
                $auth_pass_encrypted = sql_bool_to_bool(db_fetch_result($result,
                        0, "auth_pass_encrypted"));
 
@@ -244,42 +223,59 @@ class RSSUtils {
 
                $fetch_url = db_fetch_result($result, 0, "feed_url");
 
-               $feed_data = fetch_file_contents($fetch_url, false,
-                       $auth_login, $auth_pass, false,
-                       FEED_FETCH_TIMEOUT,
-                       0);
-
-               global $fetch_curl_used;
+               $pluginhost = new PluginHost();
+               $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
 
-               if (!$fetch_curl_used) {
-                       $tmp = @gzdecode($feed_data);
+               $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
+               $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
+               $pluginhost->load_data();
 
-                       if ($tmp) $feed_data = $tmp;
+               $basic_info = array();
+               foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_BASIC_INFO) as $plugin) {
+                       $basic_info = $plugin->hook_feed_basic_info($basic_info, $fetch_url, $owner_uid, $feed, $auth_login, $auth_pass);
                }
 
-               $feed_data = trim($feed_data);
+               if (!$basic_info) {
+                       $feed_data = fetch_file_contents($fetch_url, false,
+                               $auth_login, $auth_pass, false,
+                               FEED_FETCH_TIMEOUT,
+                               0);
 
-               $rss = new FeedParser($feed_data);
-               $rss->init();
+                       global $fetch_curl_used;
 
-               if (!$rss->error()) {
+                       if (!$fetch_curl_used) {
+                               $tmp = @gzdecode($feed_data);
+
+                               if ($tmp) $feed_data = $tmp;
+                       }
+
+                       $feed_data = trim($feed_data);
+
+                       $rss = new FeedParser($feed_data);
+                       $rss->init();
 
+                       if (!$rss->error()) {
+                               $basic_info = array(
+                                       'title' => db_escape_string(mb_substr($rss->get_title(), 0, 199)),
+                                       'site_url' => db_escape_string(mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245))
+                               );
+                       }
+               }
+
+               if ($basic_info && is_array($basic_info)) {
                        $result = db_query("SELECT title, site_url FROM ttrss_feeds WHERE id = '$feed'");
 
                        $registered_title = db_fetch_result($result, 0, "title");
                        $orig_site_url = db_fetch_result($result, 0, "site_url");
 
-                       $site_url = db_escape_string(mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245));
-                       $feed_title = db_escape_string(mb_substr($rss->get_title(), 0, 199));
-
-                       if ($feed_title && (!$registered_title || $registered_title == "[Unknown]")) {
+                       if ($basic_info['title'] && (!$registered_title || $registered_title == "[Unknown]")) {
                                db_query("UPDATE ttrss_feeds SET
-                                       title = '$feed_title' WHERE id = '$feed'");
+                                       title = '${basic_info['title']}' WHERE id = '$feed'");
                        }
 
-                       if ($site_url && $orig_site_url != $site_url) {
+                       if ($basic_info['site_url'] && $orig_site_url != $basic_info['site_url']) {
                                db_query("UPDATE ttrss_feeds SET
-                                                       site_url = '$site_url' WHERE id = '$feed'");
+                                                       site_url = '${basic_info['site_url']}' WHERE id = '$feed'");
                        }
                }
        }
@@ -315,14 +311,14 @@ class RSSUtils {
                $result = db_query("SELECT id,update_interval,auth_login,
                        feed_url,auth_pass,cache_images,
                        mark_unread_on_update, owner_uid,
-                       pubsub_state, auth_pass_encrypted,
-                       feed_language
+                       auth_pass_encrypted, feed_language, 
+                       last_modified, 
+                       ".SUBSTRING_FOR_DATE."(last_unconditional, 1, 19) AS last_unconditional                 
                        FROM ttrss_feeds WHERE id = '$feed'");
 
                $owner_uid = db_fetch_result($result, 0, "owner_uid");
                $mark_unread_on_update = sql_bool_to_bool(db_fetch_result($result,
                        0, "mark_unread_on_update"));
-               $pubsub_state = db_fetch_result($result, 0, "pubsub_state");
                $auth_pass_encrypted = sql_bool_to_bool(db_fetch_result($result,
                        0, "auth_pass_encrypted"));
 
@@ -337,6 +333,8 @@ class RSSUtils {
                        $auth_pass = decrypt_string($auth_pass);
                }
 
+               $stored_last_modified = db_fetch_result($result, 0, "last_modified");
+        $last_unconditional = db_fetch_result($result, 0, "last_unconditional");
                $cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images"));
                $fetch_url = db_fetch_result($result, 0, "feed_url");
                $feed_language = db_escape_string(mb_strtolower(db_fetch_result($result, 0, "feed_language")));
@@ -384,18 +382,33 @@ class RSSUtils {
                        _debug("local cache will not be used for this feed", $debug_enabled);
                }
 
+               global $fetch_last_modified;
+
                // fetch feed from source
                if (!$feed_data) {
-                       _debug("fetching [$fetch_url]...", $debug_enabled);
+                       _debug("last unconditional update request: $last_unconditional");
 
                        if (ini_get("open_basedir") && function_exists("curl_init")) {
                                _debug("not using CURL due to open_basedir restrictions");
                        }
 
-                       $feed_data = fetch_file_contents($fetch_url, false,
-                               $auth_login, $auth_pass, false,
-                               $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT,
-                               0);
+            if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) {
+                _debug("maximum allowed interval for conditional requests exceeded, forcing refetch");
+
+                $force_refetch = true;
+            } else {
+                _debug("stored last modified for conditional request: $stored_last_modified", $debug_enabled);
+            }
+
+            _debug("fetching [$fetch_url] (force_refetch: $force_refetch)...", $debug_enabled);
+
+                       $feed_data = fetch_file_contents([
+                               "url" => $fetch_url,
+                               "login" => $auth_login,
+                               "pass" => $auth_pass,
+                               "timeout" => $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT,
+                               "last_modified" => $force_refetch ? "" : $stored_last_modified
+                       ]);
 
                        global $fetch_curl_used;
 
@@ -408,6 +421,14 @@ class RSSUtils {
                        $feed_data = trim($feed_data);
 
                        _debug("fetch done.", $debug_enabled);
+                       _debug("source last modified: " . $fetch_last_modified, $debug_enabled);
+
+                       if ($feed_data && $fetch_last_modified != $stored_last_modified) {
+                               $last_modified_escaped = db_escape_string(substr($fetch_last_modified, 0, 245));
+
+                               db_query("UPDATE ttrss_feeds SET last_modified = '$last_modified_escaped' WHERE id = '$feed'");
+
+                       }
 
                        // cache vanilla feed data for re-use
                        if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/simplepie")) {
@@ -522,6 +543,10 @@ class RSSUtils {
 
                        $filters = load_filters($feed, $owner_uid);
 
+                       if ($debug_enabled) {
+                           print_r($filters);
+            }
+
                        _debug("" . count($filters) . " filters loaded.", $debug_enabled);
 
                        $items = $rss->get_items();
@@ -530,60 +555,11 @@ class RSSUtils {
                                _debug("no articles found.", $debug_enabled);
 
                                db_query("UPDATE ttrss_feeds
-                                       SET last_updated = NOW(), last_error = '' WHERE id = '$feed'");
+                                       SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = '$feed'");
 
                                return; // no articles
                        }
 
-                       if ($pubsub_state != 2 && PUBSUBHUBBUB_ENABLED) {
-
-                               _debug("checking for PUSH hub...", $debug_enabled);
-
-                               $feed_hub_url = false;
-
-                               $links = $rss->get_links('hub');
-
-                               if ($links && is_array($links)) {
-                                       foreach ($links as $l) {
-                                               $feed_hub_url = $l;
-                                               break;
-                                       }
-                               }
-
-                               _debug("feed hub url: $feed_hub_url", $debug_enabled);
-
-                               $feed_self_url = $fetch_url;
-
-                               $links = $rss->get_links('self');
-
-                               if ($links && is_array($links)) {
-                                       foreach ($links as $l) {
-                                               $feed_self_url = $l;
-                                               break;
-                                       }
-                               }
-
-                               _debug("feed self url = $feed_self_url");
-
-                               if ($feed_hub_url && $feed_self_url && function_exists('curl_init') &&
-                                       !ini_get("open_basedir")) {
-
-                                       require_once 'lib/pubsubhubbub/Subscriber.php';
-
-                                       $callback_url = get_self_url_prefix() .
-                                               "/public.php?op=pubsub&id=$feed";
-
-                                       $s = new Pubsubhubbub\Subscriber\Subscriber($feed_hub_url, $callback_url);
-
-                                       $rc = $s->subscribe($feed_self_url);
-
-                                       _debug("feed hub url found, subscribe request sent. [rc=$rc]", $debug_enabled);
-
-                                       db_query("UPDATE ttrss_feeds SET pubsub_state = 1
-                                               WHERE id = '$feed'");
-                               }
-                       }
-
                        _debug("processing articles...", $debug_enabled);
 
                        $tstart = time();
@@ -705,6 +681,7 @@ class RSSUtils {
                                        "force_catchup" => false, // ugly hack for the time being
                                        "score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed
                                        "language" => $entry_language,
+                                       "num_comments" => $num_comments, // read only
                                        "feed" => array("id" => $feed,
                                                "fetch_url" => $fetch_url,
                                                "site_url" => $site_url,
@@ -965,16 +942,6 @@ class RSSUtils {
                                                                $last_read_qpart, $marked, $published, '$score', '', '',
                                                                '', $last_marked, $last_published)");
 
-                                               if (PUBSUBHUBBUB_HUB && $published == 'true') {
-                                                       $rss_link = get_self_url_prefix() .
-                                                               "/public.php?op=rss&id=-2&key=" .
-                                                               get_feed_access_key(-2, false, $owner_uid);
-
-                                                       $p = new pubsubhubbub\publisher\Publisher(PUBSUBHUBBUB_HUB);
-
-                                                       /* $pubsub_result = */ $p->publish_update($rss_link);
-                                               }
-
                                                $result = db_query(
                                                        "SELECT int_id FROM ttrss_user_entries WHERE
                                                                ref_id = '$ref_id' AND owner_uid = '$owner_uid' AND
@@ -1175,7 +1142,7 @@ class RSSUtils {
                        purge_feed($feed, 0, $debug_enabled);
 
                        db_query("UPDATE ttrss_feeds
-                               SET last_updated = NOW(), last_error = '' WHERE id = '$feed'");
+                               SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = '$feed'");
 
 //                     db_query("COMMIT");
 
@@ -1193,7 +1160,7 @@ class RSSUtils {
 
                        db_query(
                                "UPDATE ttrss_feeds SET last_error = '$error_msg',
-                               last_updated = NOW() WHERE id = '$feed'");
+                               last_updated = NOW(), last_unconditional = NOW()   WHERE id = '$feed'");
 
                        unset($rss);
                        return;
@@ -1218,7 +1185,7 @@ class RSSUtils {
                                if (!file_exists($local_filename)) {
                                        $file_content = fetch_file_contents($src);
 
-                                       if ($file_content && strlen($file_content) > _MIN_CACHE_FILE_SIZE) {
+                                       if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
                                                file_put_contents($local_filename, $file_content);
                                        }
                                } else {
@@ -1252,7 +1219,7 @@ class RSSUtils {
                                if (!file_exists($local_filename)) {
                                        $file_content = fetch_file_contents($src);
 
-                                       if ($file_content && strlen($file_content) > _MIN_CACHE_FILE_SIZE) {
+                                       if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
                                                file_put_contents($local_filename, $file_content);
                                        }
                                } else {
@@ -1309,7 +1276,7 @@ class RSSUtils {
 
                                if ($files) {
                                        foreach ($files as $file) {
-                                               if (time() - filemtime($file) > 86400*7) {
+                                               if (time() - filemtime($file) > 86400*CACHE_MAX_DAYS) {
                                                        unlink($file);
 
                                                        ++$num_deleted;
@@ -1552,6 +1519,10 @@ class RSSUtils {
                                                // 0       beshort         0xffd8          JPEG image data
                                                //error_log("check_feed_favicon: favicon_url=$favicon_url isa JPG image");
                                        }
+                                       elseif (preg_match('/^BM/', $contents)) {
+                                               // 0    string          BM      PC bitmap (OS2, Windows BMP files)
+                                               //error_log("check_feed_favicon, favicon_url=$favicon_url isa BMP image");
+                                       }
                                        else {
                                                //error_log("check_feed_favicon: favicon_url=$favicon_url isa UNKNOWN type");
                                                $contents = "";
@@ -1574,4 +1545,4 @@ class RSSUtils {
 
 
 
-}
\ No newline at end of file
+}