<?php
-define_default('DAEMON_UPDATE_LOGIN_LIMIT', 30);
-define_default('DAEMON_FEED_LIMIT', 500);
-define_default('DAEMON_SLEEP_INTERVAL', 120);
-define_default('_MIN_CACHE_FILE_SIZE', 1024);
-
class RSSUtils {
static function calculate_article_hash($article, $pluginhost) {
$tmp = "";
}
static function update_daemon_common($limit = DAEMON_FEED_LIMIT, $debug = true) {
- // Process all other feeds using last_updated and interval parameters
-
$schema_version = get_schema_version();
if ($schema_version != SCHEMA_VERSION) {
die("Schema version is wrong, please upgrade the database.\n");
}
- define('PREFS_NO_CACHE', true);
-
- // Test if the user has loggued in recently. If not, it does not update its feeds.
if (!SINGLE_USER_MODE && DAEMON_UPDATE_LOGIN_LIMIT > 0) {
if (DB_TYPE == "pgsql") {
$login_thresh_qpart = "AND ttrss_users.last_login >= NOW() - INTERVAL '".DAEMON_UPDATE_LOGIN_LIMIT." days'";
$login_thresh_qpart = "";
}
- // Test if the feed need a update (update interval exceeded).
if (DB_TYPE == "pgsql") {
$update_limit_qpart = "AND ((
ttrss_feeds.update_interval = 0
$updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 10 MINUTE))";
}
- // Test if there is a limit to number of updated feeds
- $query_limit = "";
- if($limit) $query_limit = sprintf("LIMIT %d", $limit);
+ $query_limit = $limit ? sprintf("LIMIT %d", $limit) : "";
// Update the least recently updated feeds first
$query_order = "ORDER BY last_updated";
$updstart_thresh_qpart
$query_order $query_limit";
- // We search for feed needing update.
$result = db_query($query);
- if($debug) _debug(sprintf("Scheduled %d feeds to update...", db_num_rows($result)));
+ if ($debug) _debug(sprintf("Scheduled %d feeds to update...", db_num_rows($result)));
- // Here is a little cache magic in order to minimize risk of double feed updates.
$feeds_to_update = array();
while ($line = db_fetch_assoc($result)) {
- array_push($feeds_to_update, db_escape_string($line['feed_url']));
+ array_push($feeds_to_update, $line['feed_url']);
}
- // We update the feed last update started date before anything else.
- // There is no lag due to feed contents downloads
- // It prevent an other process to update the same feed.
-
- if(count($feeds_to_update) > 0) {
- $feeds_quoted = array();
-
- foreach ($feeds_to_update as $feed) {
- array_push($feeds_quoted, "'" . db_escape_string($feed) . "'");
- }
+ // Update last_update_started before actually starting the batch
+ // in order to minimize collision risk for parallel daemon tasks
+ if (count($feeds_to_update) > 0) {
+ $feeds_quoted = array_map(function ($s) { return "'" . db_escape_string($s) . "'"; }, $feeds_to_update);
db_query(sprintf("UPDATE ttrss_feeds SET last_update_started = NOW()
WHERE feed_url IN (%s)", implode(',', $feeds_quoted)));
$batch_owners = array();
- // For each feed, we call the feed update function.
foreach ($feeds_to_update as $feed) {
if($debug) _debug("Base feed: $feed");
//update_rss_feed($line["id"], true);
// since we have the data cached, we can deal with other feeds with the same url
-
$tmp_result = db_query("SELECT DISTINCT ttrss_feeds.id,last_updated,ttrss_feeds.owner_uid
FROM ttrss_feeds, ttrss_users, ttrss_user_prefs WHERE
ttrss_user_prefs.owner_uid = ttrss_feeds.owner_uid AND
if (db_num_rows($tmp_result) > 0) {
while ($tline = db_fetch_assoc($tmp_result)) {
- if($debug) _debug(" => " . $tline["last_updated"] . ", " . $tline["id"] . " " . $tline["owner_uid"]);
+ if ($debug) _debug(" => " . $tline["last_updated"] . ", " . $tline["id"] . " " . $tline["owner_uid"]);
if (array_search($tline["owner_uid"], $batch_owners) === FALSE)
array_push($batch_owners, $tline["owner_uid"]);
$feed = db_escape_string($feed);
- $result = db_query("SELECT feed_url,auth_pass,auth_login,auth_pass_encrypted
+ $result = db_query("SELECT owner_uid,feed_url,auth_pass,auth_login,auth_pass_encrypted
FROM ttrss_feeds WHERE id = '$feed'");
+ $owner_uid = db_fetch_result($result, 0, "owner_uid");
+
$auth_pass_encrypted = sql_bool_to_bool(db_fetch_result($result,
0, "auth_pass_encrypted"));
$fetch_url = db_fetch_result($result, 0, "feed_url");
- $feed_data = fetch_file_contents($fetch_url, false,
- $auth_login, $auth_pass, false,
- FEED_FETCH_TIMEOUT,
- 0);
-
- global $fetch_curl_used;
+ $pluginhost = new PluginHost();
+ $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
- if (!$fetch_curl_used) {
- $tmp = @gzdecode($feed_data);
+ $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
+ $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
+ $pluginhost->load_data();
- if ($tmp) $feed_data = $tmp;
+ $basic_info = array();
+ foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_BASIC_INFO) as $plugin) {
+ $basic_info = $plugin->hook_feed_basic_info($basic_info, $fetch_url, $owner_uid, $feed, $auth_login, $auth_pass);
}
- $feed_data = trim($feed_data);
+ if (!$basic_info) {
+ $feed_data = fetch_file_contents($fetch_url, false,
+ $auth_login, $auth_pass, false,
+ FEED_FETCH_TIMEOUT,
+ 0);
- $rss = new FeedParser($feed_data);
- $rss->init();
+ global $fetch_curl_used;
- if (!$rss->error()) {
+ if (!$fetch_curl_used) {
+ $tmp = @gzdecode($feed_data);
+
+ if ($tmp) $feed_data = $tmp;
+ }
+
+ $feed_data = trim($feed_data);
+
+ $rss = new FeedParser($feed_data);
+ $rss->init();
+ if (!$rss->error()) {
+ $basic_info = array(
+ 'title' => db_escape_string(mb_substr($rss->get_title(), 0, 199)),
+ 'site_url' => db_escape_string(mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245))
+ );
+ }
+ }
+
+ if ($basic_info && is_array($basic_info)) {
$result = db_query("SELECT title, site_url FROM ttrss_feeds WHERE id = '$feed'");
$registered_title = db_fetch_result($result, 0, "title");
$orig_site_url = db_fetch_result($result, 0, "site_url");
- $site_url = db_escape_string(mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245));
- $feed_title = db_escape_string(mb_substr($rss->get_title(), 0, 199));
-
- if ($feed_title && (!$registered_title || $registered_title == "[Unknown]")) {
+ if ($basic_info['title'] && (!$registered_title || $registered_title == "[Unknown]")) {
db_query("UPDATE ttrss_feeds SET
- title = '$feed_title' WHERE id = '$feed'");
+ title = '${basic_info['title']}' WHERE id = '$feed'");
}
- if ($site_url && $orig_site_url != $site_url) {
+ if ($basic_info['site_url'] && $orig_site_url != $basic_info['site_url']) {
db_query("UPDATE ttrss_feeds SET
- site_url = '$site_url' WHERE id = '$feed'");
+ site_url = '${basic_info['site_url']}' WHERE id = '$feed'");
}
}
}
$result = db_query("SELECT id,update_interval,auth_login,
feed_url,auth_pass,cache_images,
mark_unread_on_update, owner_uid,
- pubsub_state, auth_pass_encrypted,
- feed_language
+ auth_pass_encrypted, feed_language,
+ last_modified,
+ ".SUBSTRING_FOR_DATE."(last_unconditional, 1, 19) AS last_unconditional
FROM ttrss_feeds WHERE id = '$feed'");
$owner_uid = db_fetch_result($result, 0, "owner_uid");
$mark_unread_on_update = sql_bool_to_bool(db_fetch_result($result,
0, "mark_unread_on_update"));
- $pubsub_state = db_fetch_result($result, 0, "pubsub_state");
$auth_pass_encrypted = sql_bool_to_bool(db_fetch_result($result,
0, "auth_pass_encrypted"));
$auth_pass = decrypt_string($auth_pass);
}
+ $stored_last_modified = db_fetch_result($result, 0, "last_modified");
+ $last_unconditional = db_fetch_result($result, 0, "last_unconditional");
$cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images"));
$fetch_url = db_fetch_result($result, 0, "feed_url");
$feed_language = db_escape_string(mb_strtolower(db_fetch_result($result, 0, "feed_language")));
_debug("local cache will not be used for this feed", $debug_enabled);
}
+ global $fetch_last_modified;
+
// fetch feed from source
if (!$feed_data) {
- _debug("fetching [$fetch_url]...", $debug_enabled);
+ _debug("last unconditional update request: $last_unconditional");
if (ini_get("open_basedir") && function_exists("curl_init")) {
_debug("not using CURL due to open_basedir restrictions");
}
- $feed_data = fetch_file_contents($fetch_url, false,
- $auth_login, $auth_pass, false,
- $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT,
- 0);
+ if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) {
+ _debug("maximum allowed interval for conditional requests exceeded, forcing refetch");
+
+ $force_refetch = true;
+ } else {
+ _debug("stored last modified for conditional request: $stored_last_modified", $debug_enabled);
+ }
+
+ _debug("fetching [$fetch_url] (force_refetch: $force_refetch)...", $debug_enabled);
+
+ $feed_data = fetch_file_contents([
+ "url" => $fetch_url,
+ "login" => $auth_login,
+ "pass" => $auth_pass,
+ "timeout" => $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT,
+ "last_modified" => $force_refetch ? "" : $stored_last_modified
+ ]);
global $fetch_curl_used;
$feed_data = trim($feed_data);
_debug("fetch done.", $debug_enabled);
+ _debug("source last modified: " . $fetch_last_modified, $debug_enabled);
+
+ if ($feed_data && $fetch_last_modified != $stored_last_modified) {
+ $last_modified_escaped = db_escape_string(substr($fetch_last_modified, 0, 245));
+
+ db_query("UPDATE ttrss_feeds SET last_modified = '$last_modified_escaped' WHERE id = '$feed'");
+
+ }
// cache vanilla feed data for re-use
if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/simplepie")) {
$filters = load_filters($feed, $owner_uid);
+ if ($debug_enabled) {
+ print_r($filters);
+ }
+
_debug("" . count($filters) . " filters loaded.", $debug_enabled);
$items = $rss->get_items();
_debug("no articles found.", $debug_enabled);
db_query("UPDATE ttrss_feeds
- SET last_updated = NOW(), last_error = '' WHERE id = '$feed'");
+ SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = '$feed'");
return; // no articles
}
- if ($pubsub_state != 2 && PUBSUBHUBBUB_ENABLED) {
-
- _debug("checking for PUSH hub...", $debug_enabled);
-
- $feed_hub_url = false;
-
- $links = $rss->get_links('hub');
-
- if ($links && is_array($links)) {
- foreach ($links as $l) {
- $feed_hub_url = $l;
- break;
- }
- }
-
- _debug("feed hub url: $feed_hub_url", $debug_enabled);
-
- $feed_self_url = $fetch_url;
-
- $links = $rss->get_links('self');
-
- if ($links && is_array($links)) {
- foreach ($links as $l) {
- $feed_self_url = $l;
- break;
- }
- }
-
- _debug("feed self url = $feed_self_url");
-
- if ($feed_hub_url && $feed_self_url && function_exists('curl_init') &&
- !ini_get("open_basedir")) {
-
- require_once 'lib/pubsubhubbub/Subscriber.php';
-
- $callback_url = get_self_url_prefix() .
- "/public.php?op=pubsub&id=$feed";
-
- $s = new Pubsubhubbub\Subscriber\Subscriber($feed_hub_url, $callback_url);
-
- $rc = $s->subscribe($feed_self_url);
-
- _debug("feed hub url found, subscribe request sent. [rc=$rc]", $debug_enabled);
-
- db_query("UPDATE ttrss_feeds SET pubsub_state = 1
- WHERE id = '$feed'");
- }
- }
-
_debug("processing articles...", $debug_enabled);
$tstart = time();
"force_catchup" => false, // ugly hack for the time being
"score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed
"language" => $entry_language,
+ "num_comments" => $num_comments, // read only
"feed" => array("id" => $feed,
"fetch_url" => $fetch_url,
"site_url" => $site_url,
$last_read_qpart, $marked, $published, '$score', '', '',
'', $last_marked, $last_published)");
- if (PUBSUBHUBBUB_HUB && $published == 'true') {
- $rss_link = get_self_url_prefix() .
- "/public.php?op=rss&id=-2&key=" .
- get_feed_access_key(-2, false, $owner_uid);
-
- $p = new pubsubhubbub\publisher\Publisher(PUBSUBHUBBUB_HUB);
-
- /* $pubsub_result = */ $p->publish_update($rss_link);
- }
-
$result = db_query(
"SELECT int_id FROM ttrss_user_entries WHERE
ref_id = '$ref_id' AND owner_uid = '$owner_uid' AND
purge_feed($feed, 0, $debug_enabled);
db_query("UPDATE ttrss_feeds
- SET last_updated = NOW(), last_error = '' WHERE id = '$feed'");
+ SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = '$feed'");
// db_query("COMMIT");
db_query(
"UPDATE ttrss_feeds SET last_error = '$error_msg',
- last_updated = NOW() WHERE id = '$feed'");
+ last_updated = NOW(), last_unconditional = NOW() WHERE id = '$feed'");
unset($rss);
return;
if (!file_exists($local_filename)) {
$file_content = fetch_file_contents($src);
- if ($file_content && strlen($file_content) > _MIN_CACHE_FILE_SIZE) {
+ if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
file_put_contents($local_filename, $file_content);
}
} else {
if (!file_exists($local_filename)) {
$file_content = fetch_file_contents($src);
- if ($file_content && strlen($file_content) > _MIN_CACHE_FILE_SIZE) {
+ if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
file_put_contents($local_filename, $file_content);
}
} else {
if ($files) {
foreach ($files as $file) {
- if (time() - filemtime($file) > 86400*7) {
+ if (time() - filemtime($file) > 86400*CACHE_MAX_DAYS) {
unlink($file);
++$num_deleted;
// 0 beshort 0xffd8 JPEG image data
//error_log("check_feed_favicon: favicon_url=$favicon_url isa JPG image");
}
+ elseif (preg_match('/^BM/', $contents)) {
+ // 0 string BM PC bitmap (OS2, Windows BMP files)
+ //error_log("check_feed_favicon, favicon_url=$favicon_url isa BMP image");
+ }
else {
//error_log("check_feed_favicon: favicon_url=$favicon_url isa UNKNOWN type");
$contents = "";
-}
\ No newline at end of file
+}