<?php
define('DAEMON_UPDATE_LOGIN_LIMIT', 30);
define('DAEMON_FEED_LIMIT', 100);
- define('DAEMON_SLEEP_INTERVAL', 120);
+ define('DAEMON_SLEEP_INTERVAL', 60);
function update_feedbrowser_cache($link) {
function update_daemon_common($link, $limit = DAEMON_FEED_LIMIT, $from_http = false, $debug = true) {
// Process all other feeds using last_updated and interval parameters
+ define('PREFS_NO_CACHE', true);
+
// Test if the user has loggued in recently. If not, it does not update its feeds.
if (!SINGLE_USER_MODE && DAEMON_UPDATE_LOGIN_LIMIT > 0) {
if (DB_TYPE == "pgsql") {
) OR (
ttrss_feeds.update_interval > 0
AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_feeds.update_interval || ' minutes') AS INTERVAL)
- ) OR ttrss_feeds.last_updated IS NULL)";
+ ) OR ttrss_feeds.last_updated IS NULL
+ OR last_updated = '1970-01-01 00:00:00')";
} else {
$update_limit_qpart = "AND ((
ttrss_feeds.update_interval = 0
) OR (
ttrss_feeds.update_interval > 0
AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL ttrss_feeds.update_interval MINUTE)
- ) OR ttrss_feeds.last_updated IS NULL)";
+ ) OR ttrss_feeds.last_updated IS NULL
+ OR last_updated = '1970-01-01 00:00:00')";
}
// Test if feed is currently being updated by another process.
}
// Send feed digests by email if needed.
- send_headlines_digests($link, 100, $debug);
+ send_headlines_digests($link, $debug);
} // function update_daemon_common
- function fetch_twitter_rss($link, $url, $owner_uid) {
-
- require_once 'lib/tmhoauth/tmhOAuth.php';
-
- $result = db_query($link, "SELECT twitter_oauth FROM ttrss_users
- WHERE id = $owner_uid");
-
- $access_token = json_decode(db_fetch_result($result, 0, 'twitter_oauth'), true);
- $url_escaped = db_escape_string($url);
-
- if ($access_token) {
-
- $tmhOAuth = new tmhOAuth(array(
- 'consumer_key' => CONSUMER_KEY,
- 'consumer_secret' => CONSUMER_SECRET,
- 'user_token' => $access_token['oauth_token'],
- 'user_secret' => $access_token['oauth_token_secret'],
- ));
-
- $code = $tmhOAuth->request('GET', $url);
-
- if ($code == 200) {
-
- $content = $tmhOAuth->response['response'];
-
- define('MAGPIE_CACHE_ON', false);
-
- $rss = new MagpieRSS($content, MAGPIE_OUTPUT_ENCODING,
- MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING );
-
- return $rss;
-
- } else {
-
- db_query($link, "UPDATE ttrss_feeds
- SET last_error = 'OAuth authorization failed ($code).'
- WHERE feed_url = '$url_escaped' AND owner_uid = $owner_uid");
- }
-
- } else {
-
- db_query($link, "UPDATE ttrss_feeds
- SET last_error = 'OAuth information not found.'
- WHERE feed_url = '$url_escaped' AND owner_uid = $owner_uid");
-
- return false;
- }
- }
-
- function update_rss_feed($link, $feed, $ignore_daemon = false, $no_cache = false) {
-
- global $memcache;
-
- /* Update all feeds with the same URL to utilize memcache */
-
- if ($memcache) {
- $result = db_query($link, "SELECT f1.id
- FROM ttrss_feeds AS f1, ttrss_feeds AS f2
- WHERE f2.feed_url = f1.feed_url AND f2.id = '$feed'");
-
- while ($line = db_fetch_assoc($result)) {
- update_rss_feed_real($link, $line["id"], $ignore_daemon, $no_cache);
- }
- } else {
- update_rss_feed_real($link, $feed, $ignore_daemon, $no_cache);
- }
- }
-
- function update_rss_feed_real($link, $feed, $ignore_daemon = false, $no_cache = false,
+ function update_rss_feed($link, $feed, $ignore_daemon = false, $no_cache = false,
$override_url = false) {
require_once "lib/simplepie/simplepie.inc";
require_once "lib/magpierss/rss_fetch.inc";
require_once 'lib/magpierss/rss_utils.inc';
- global $memcache;
-
$debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug'];
if (!$_REQUEST["daemon"] && !$ignore_daemon) {
}
$result = db_query($link, "SELECT id,update_interval,auth_login,
- auth_pass,cache_images,update_method
+ auth_pass,cache_images,update_method,last_updated
FROM ttrss_feeds WHERE id = '$feed' AND $updstart_thresh_qpart");
} else {
$auth_pass = urlencode($auth_pass);
}
- $update_interval = db_fetch_result($result, 0, "update_interval");
$cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images"));
$fetch_url = db_fetch_result($result, 0, "feed_url");
- if ($update_interval < 0) { return false; }
-
$feed = db_escape_string($feed);
if ($auth_login && $auth_pass ){
_debug("update_rss_feed: fetching [$fetch_url]...");
}
- $obj_id = md5("FDATA:$use_simplepie:$fetch_url");
-
- if ($memcache && $obj = $memcache->get($obj_id)) {
+ // Ignore cache if new feed or manual update.
+ $cache_age = (is_null($last_updated) || $last_updated == '1970-01-01 00:00:00') ?
+ -1 : get_feed_update_interval($link, $feed) * 60;
- if ($debug_enabled) {
- _debug("update_rss_feed: data found in memcache.");
- }
+ if ($update_method == 1) {
- $rss = $obj;
+ define('MAGPIE_CACHE_AGE', $cache_age);
+ define('MAGPIE_CACHE_ON', !$no_cache);
+ define('MAGPIE_FETCH_TIME_OUT', $no_cache ? 15 : 60);
+ define('MAGPIE_CACHE_DIR', CACHE_DIR . "/magpie");
+ $rss = @fetch_rss($fetch_url);
} else {
+ $simplepie_cache_dir = CACHE_DIR . "/simplepie";
- if ($update_method == 3) {
- $rss = fetch_twitter_rss($link, $fetch_url, $owner_uid);
- } else if ($update_method == 1) {
-
- define('MAGPIE_CACHE_AGE', get_feed_update_interval($link, $feed) * 60);
- define('MAGPIE_CACHE_ON', !$no_cache);
- define('MAGPIE_FETCH_TIME_OUT', 60);
- define('MAGPIE_CACHE_DIR', CACHE_DIR . "/magpie");
-
- $rss = @fetch_rss($fetch_url);
- } else {
- $simplepie_cache_dir = CACHE_DIR . "/simplepie";
-
- if (!is_dir($simplepie_cache_dir)) {
- mkdir($simplepie_cache_dir);
- }
-
- $rss = new SimplePie();
- $rss->set_useragent(SELF_USER_AGENT);
- # $rss->set_timeout(10);
- $rss->set_feed_url($fetch_url);
- $rss->set_output_encoding('UTF-8');
- $rss->force_feed(true);
+ if (!is_dir($simplepie_cache_dir)) {
+ mkdir($simplepie_cache_dir);
+ }
- if ($debug_enabled) {
- _debug("feed update interval (sec): " .
- get_feed_update_interval($link, $feed)*60);
- }
+ $rss = new SimplePie();
+ $rss->set_useragent(SELF_USER_AGENT);
+ $rss->set_timeout($no_cache ? 15 : 60);
+ $rss->set_feed_url($fetch_url);
+ $rss->set_output_encoding('UTF-8');
+ //$rss->force_feed(true);
- $rss->enable_cache(!$no_cache);
+ if ($debug_enabled) {
+ _debug("feed update interval (sec): " .
+ get_feed_update_interval($link, $feed)*60);
+ }
- if (!$no_cache) {
- $rss->set_cache_location($simplepie_cache_dir);
- $rss->set_cache_duration(get_feed_update_interval($link, $feed) * 60);
- }
+ $rss->enable_cache(!$no_cache);
- $rss->init();
+ if (!$no_cache) {
+ $rss->set_cache_location($simplepie_cache_dir);
+ $rss->set_cache_duration($cache_age);
}
- if ($memcache && $rss) $memcache->add($obj_id, $rss, 0, 300);
+ $rss->init();
}
// print_r($rss);
// db_query($link, "BEGIN");
- $result = db_query($link, "SELECT title,icon_url,site_url,owner_uid
+ if (DB_TYPE == "pgsql") {
+ $favicon_interval_qpart = "favicon_last_checked < NOW() - INTERVAL '12 hour'";
+ } else {
+ $favicon_interval_qpart = "favicon_last_checked < DATE_SUB(NOW(), INTERVAL 12 HOUR)";
+ }
+
+ $result = db_query($link, "SELECT title,icon_url,site_url,owner_uid,
+ (favicon_last_checked IS NULL OR $favicon_interval_qpart) AS
+ favicon_needs_check
FROM ttrss_feeds WHERE id = '$feed'");
$registered_title = db_fetch_result($result, 0, "title");
$orig_icon_url = db_fetch_result($result, 0, "icon_url");
$orig_site_url = db_fetch_result($result, 0, "site_url");
+ $favicon_needs_check = sql_bool_to_bool(db_fetch_result($result, 0,
+ "favicon_needs_check"));
$owner_uid = db_fetch_result($result, 0, "owner_uid");
if ($use_simplepie) {
- $site_url = $rss->get_link();
+ $site_url = db_escape_string(trim($rss->get_link()));
} else {
- $site_url = $rss->channel["link"];
+ $site_url = db_escape_string(trim($rss->channel["link"]));
+ }
+
+ // weird, weird Magpie
+ if (!$use_simplepie) {
+ if (!$site_url) $site_url = db_escape_string($rss->channel["link_"]);
}
$site_url = rewrite_relative_url($fetch_url, $site_url);
+ $site_url = substr($site_url, 0, 250);
if ($debug_enabled) {
_debug("update_rss_feed: checking favicon...");
}
- check_feed_favicon($site_url, $feed, $link);
+ if ($favicon_needs_check) {
+ check_feed_favicon($site_url, $feed, $link);
+
+ db_query($link, "UPDATE ttrss_feeds SET favicon_last_checked = NOW()
+ WHERE id = '$feed'");
+ }
if (!$registered_title || $registered_title == "[Unknown]") {
title = '$feed_title' WHERE id = '$feed'");
}
- // weird, weird Magpie
- if (!$use_simplepie) {
- if (!$site_url) $site_url = db_escape_string($rss->channel["link_"]);
- }
-
- if ($site_url && $orig_site_url != db_escape_string($site_url)) {
+ if ($site_url && $orig_site_url != $site_url) {
db_query($link, "UPDATE ttrss_feeds SET
site_url = '$site_url' WHERE id = '$feed'");
}
// print "I: " . $rss->channel["image"]["url"];
if (!$use_simplepie) {
- $icon_url = db_escape_string($rss->image["url"]);
+ $icon_url = db_escape_string(trim($rss->image["url"]));
} else {
- $icon_url = db_escape_string($rss->get_image_url());
+ $icon_url = db_escape_string(trim($rss->get_image_url()));
}
+ $icon_url = rewrite_relative_url($fetch_url, $icon_url);
$icon_url = substr($icon_url, 0, 250);
if ($icon_url && $orig_icon_url != $icon_url) {
$filters = load_filters($link, $feed, $owner_uid);
-// if ($debug_enabled) {
-// print_r($filters);
-// }
+ if ($debug_enabled) {
+ //print_r($filters);
+ _debug("update_rss_feed: " . count($filters) . " filters loaded.");
+ }
if ($use_simplepie) {
$iterator = $rss->get_items();
}
foreach ($iterator as $item) {
-
if ($_REQUEST['xdebug'] == 2) {
print_r($item);
}
$entry_content = $item["content:escaped"];
if (!$entry_content) $entry_content = $item["content:encoded"];
- if (!$entry_content) $entry_content = $item["content"]["encoded"];
+ if (!$entry_content && is_array($entry_content)) $entry_content = $item["content"]["encoded"];
if (!$entry_content) $entry_content = $item["content"];
if (is_array($entry_content)) $entry_content = $entry_content[0];
}
}
- if ($cache_images)
+ if ($cache_images && is_writable(CACHE_DIR . '/images'))
$entry_content = cache_images($entry_content, $site_url, $debug_enabled);
if ($_REQUEST["xdebug"] == 2) {
print "update_rss_feed: content: ";
- print_r(htmlspecialchars($entry_content));
+ print $entry_content;
print "\n";
}
$entry_content = db_escape_string($entry_content, false);
- $content_hash = "SHA1:x" . sha1(strip_tags($entry_content));
+ $content_hash = "SHA1:" . sha1(strip_tags($entry_content));
$entry_title = db_escape_string($entry_title);
$entry_link = db_escape_string($entry_link);
$entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8');
if ($debug_enabled) {
- _debug("update_rss_feed: unfiltered tags found:");
- print_r($entry_tags);
+ //_debug("update_rss_feed: unfiltered tags found:");
+ //print_r($entry_tags);
}
# sanitize content
$published = 'false';
}
+ // N-grams
+
+ if (DB_TYPE == "pgsql" and defined('_NGRAM_TITLE_DUPLICATE_THRESHOLD')) {
+
+ $result = db_query($link, "SELECT COUNT(*) AS similar FROM
+ ttrss_entries,ttrss_user_entries
+ WHERE ref_id = id AND updated >= NOW() - INTERVAL '7 day'
+ AND similarity(title, '$entry_title') >= "._NGRAM_TITLE_DUPLICATE_THRESHOLD."
+ AND owner_uid = $owner_uid");
+
+ $ngram_similar = db_fetch_result($result, 0, "similar");
+
+ if ($debug_enabled) {
+ _debug("update_rss_feed: N-gram similar results: $ngram_similar");
+ }
+
+ if ($ngram_similar > 0) {
+ $unread = 'false';
+ }
+ }
+
$result = db_query($link,
"INSERT INTO ttrss_user_entries
(ref_id, owner_uid, feed_id, unread, last_read, marked,
// check for manual tags (we have to do it here since they're loaded from filters)
foreach ($article_filters as $f) {
- if ($f[0] == "tag") {
+ if ($f["type"] == "tag") {
- $manual_tags = trim_array(explode(",", $f[1]));
+ $manual_tags = trim_array(explode(",", $f["param"]));
foreach ($manual_tags as $tag) {
if (tag_is_valid($tag)) {
if (!file_exists($local_filename)) {
$file_content = fetch_file_contents($src);
- if ($file_content) {
+ if ($file_content && strlen($file_content) > 1024) {
file_put_contents($local_filename, $file_content);
}
}
if (file_exists($local_filename)) {
$entry->setAttribute('src', SELF_URL_PATH . '/image.php?url=' .
- htmlspecialchars($src));
+ base64_encode($src));
}
}
}
$node = $doc->getElementsByTagName('body')->item(0);
- return $doc->saveXML($node);
+ return $doc->saveXML($node, LIBXML_NOEMPTYTAG);
}
function expire_cached_files($debug) {
- foreach (array("magpie", "simplepie", "images") as $dir) {
+ foreach (array("magpie", "simplepie", "images", "export") as $dir) {
$cache_dir = CACHE_DIR . "/$dir";
if ($debug) _debug("Expiring $cache_dir");
if (is_writable($cache_dir)) {
$files = glob("$cache_dir/*");
- foreach ($files as $file) {
- if (time() - filemtime($file) > 86400*7) {
- unlink($file);
+ if ($files)
+ foreach ($files as $file) {
+ if (time() - filemtime($file) > 86400*7) {
+ unlink($file);
- ++$num_deleted;
+ ++$num_deleted;
+ }
}
}
- }
if ($debug) _debug("Removed $num_deleted files.");
}
}
+ /**
+ * Source: http://www.php.net/manual/en/function.parse-url.php#104527
+ * Returns the url query as associative array
+ *
+ * @param string query
+ * @return array params
+ */
+ function convertUrlQuery($query) {
+ $queryParts = explode('&', $query);
+
+ $params = array();
+
+ foreach ($queryParts as $param) {
+ $item = explode('=', $param);
+ $params[$item[0]] = $item[1];
+ }
+
+ return $params;
+ }
?>