} // function update_daemon_common
+ // ignore_daemon is not used
function update_rss_feed($link, $feed, $ignore_daemon = false, $no_cache = false,
$override_url = false) {
$debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug'];
- if (!$_REQUEST["daemon"] && !$ignore_daemon) {
- return false;
- }
-
if ($debug_enabled) {
_debug("update_rss_feed: start");
}
- if (!$ignore_daemon) {
-
- if (DB_TYPE == "pgsql") {
- $updstart_thresh_qpart = "(ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '120 seconds')";
- } else {
- $updstart_thresh_qpart = "(ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 120 SECOND))";
- }
-
- $result = db_query($link, "SELECT id,update_interval,auth_login,
- auth_pass,cache_images,update_method,last_updated
- FROM ttrss_feeds WHERE id = '$feed' AND $updstart_thresh_qpart");
-
- } else {
-
- $result = db_query($link, "SELECT id,update_interval,auth_login,
- feed_url,auth_pass,cache_images,update_method,last_updated,
- mark_unread_on_update, owner_uid, update_on_checksum_change,
- pubsub_state
- FROM ttrss_feeds WHERE id = '$feed'");
-
- }
+ $result = db_query($link, "SELECT id,update_interval,auth_login,
+ feed_url,auth_pass,cache_images,update_method,last_updated,cache_content,
+ mark_unread_on_update, owner_uid, update_on_checksum_change,
+ pubsub_state
+ FROM ttrss_feeds WHERE id = '$feed'");
if (db_num_rows($result) == 0) {
if ($debug_enabled) {
}
$cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images"));
+ $cache_content = sql_bool_to_bool(db_fetch_result($result, 0, "cache_content"));
$fetch_url = db_fetch_result($result, 0, "feed_url");
$feed = db_escape_string($feed);
}
if ($debug_enabled) {
- _debug("update_rss_feed: loading filters...");
+ _debug("update_rss_feed: loading filters & labels...");
}
$filters = load_filters($link, $feed, $owner_uid);
+ $labels = get_all_labels($link, $owner_uid);
if ($debug_enabled) {
//print_r($filters);
_debug("update_rss_feed: " . count($filters) . " filters loaded.");
}
+ $filter_plugins = array();
+
+ if (defined('_ARTICLE_FILTER_PLUGINS')) {
+ foreach (explode(",", _ARTICLE_FILTER_PLUGINS) as $p) {
+ $pclass = "filter_" . trim($p);
+
+ if (class_exists($pclass)) {
+ $plugin = new $pclass($link);
+ array_push($filter_plugins, $plugin);
+ }
+ }
+ }
+
+ if ($debug_enabled) {
+ _debug("update_rss_feed: " . count($filter_plugins) . " filter plugins loaded.");
+ }
+
if ($use_simplepie) {
$iterator = $rss->get_items();
} else {
if (!$entry_guid) $entry_guid = make_guid_from_title($item["title"]);
}
+ if ($cache_content) {
+ $entry_guid = "ccache:$entry_guid";
+ }
+
+ if ($auth_login || $auth_pass) {
+ $entry_guid = "auth,$owner_uid:$entry_guid";
+ }
+
if ($debug_enabled) {
_debug("update_rss_feed: guid $entry_guid");
}
}
$entry_content_unescaped = $entry_content;
+ $entry_cached_content = "";
if ($use_simplepie) {
$entry_comments = strip_tags($item->data["comments"]);
$entry_content = db_escape_string($entry_content, false);
- $content_hash = "SHA1:" . sha1(strip_tags($entry_content));
-
$entry_title = db_escape_string($entry_title);
$entry_link = db_escape_string($entry_link);
$entry_comments = mb_substr(db_escape_string($entry_comments), 0, 250);
_debug("update_rss_feed: done collecting data [TITLE:$entry_title]");
}
+ // TODO: less memory-hungry implementation
+ if (count($filter_plugins) > 0) {
+ if ($debug_enabled) {
+ _debug("update_rss_feed: applying plugin filters...");
+ }
+
+ $article = array("owner_uid" => $owner_uid,
+ "title" => $entry_title,
+ "content" => $entry_content,
+ "link" => $entry_link,
+ "tags" => $entry_tags,
+ "author" => $entry_author);
+
+ foreach ($filter_plugins as $plugin) {
+ $article = $plugin->filter_article($article);
+ }
+
+ $entry_title = $article["title"];
+ $entry_content = $article["content"];
+ $entry_tags = $article["tags"];
+ $entry_author = $article["author"];
+ }
+
+ $content_hash = "SHA1:" . sha1(strip_tags($entry_content));
+
db_query($link, "BEGIN");
if (db_num_rows($result) == 0) {
_debug("update_rss_feed: base guid not found");
}
+ if ($cache_content) {
+ if ($debug_enabled) {
+ _debug("update_rss_feed: caching content (initial)...");
+ }
+
+ $entry_cached_content = cache_content($link, $entry_link, $auth_login, $auth_pass);
+
+ if ($cache_images && is_writable(CACHE_DIR . '/images'))
+ $entry_cached_content = cache_images($entry_cached_content, $site_url, $debug_enabled);
+
+ $entry_cached_content = db_escape_string($entry_cached_content, false);
+ }
+
// base post entry does not exist, create it
$result = db_query($link,
updated,
content,
content_hash,
+ cached_content,
no_orig_date,
date_updated,
date_entered,
'$entry_timestamp_fmt',
'$entry_content',
'$content_hash',
+ '$entry_cached_content',
$no_orig_date,
NOW(),
NOW(),
'$entry_comments',
'$num_comments',
'$entry_author')");
+
+ $article_labels = array();
+
} else {
// we keep encountering the entry in feeds, so we need to
// update date_updated column so that we don't get horrible
db_query($link, "UPDATE ttrss_entries SET date_updated = NOW()
WHERE id = '$base_entry_id'");
+
+ $article_labels = get_article_labels($link, $base_entry_id, $owner_uid);
}
// now it should exist, if not - bad luck then
id,content_hash,no_orig_date,title,
".SUBSTRING_FOR_DATE."(date_updated,1,19) as date_updated,
".SUBSTRING_FOR_DATE."(updated,1,19) as updated,
- num_comments
+ num_comments, cached_content
FROM
ttrss_entries
WHERE guid = '$entry_guid'");
$orig_content_hash = db_fetch_result($result, 0, "content_hash");
$orig_title = db_fetch_result($result, 0, "title");
$orig_num_comments = db_fetch_result($result, 0, "num_comments");
+ $orig_cached_content = trim(db_fetch_result($result, 0, "cached_content"));
$orig_date_updated = strtotime(db_fetch_result($result,
0, "date_updated"));
$post_needs_update = false;
$update_insignificant = false;
+ $cached_content_needs_update = false;
if ($orig_num_comments != $num_comments) {
$post_needs_update = true;
if ($content_hash != $orig_content_hash) {
$post_needs_update = true;
$update_insignificant = false;
+ $cached_content_needs_update = true;
+ }
+
+ if ($cache_content) {
+ if ($debug_enabled) {
+ _debug("update_rss_feed: caching content because original checksum changed...");
+ }
+
+ $entry_cached_content = cache_content($link, $entry_link, $auth_login, $auth_pass);
+
+ if ($entry_cached_content) {
+ if ($cache_images && is_writable(CACHE_DIR . '/images'))
+ $entry_cached_content = cache_images($entry_cached_content, $site_url, $debug_enabled);
+
+ $entry_cached_content = db_escape_string($entry_cached_content, false);
+ $post_needs_update = true;
+ } else {
+ $entry_cached_content = db_escape_string($orig_cached_content);
+ }
+ } else {
+ $entry_cached_content = db_escape_string($orig_cached_content);
}
if (db_escape_string($orig_title) != $entry_title) {
db_query($link, "UPDATE ttrss_entries
SET title = '$entry_title', content = '$entry_content',
content_hash = '$content_hash',
+ cached_content = '$entry_cached_content',
updated = '$entry_timestamp_fmt',
num_comments = '$num_comments'
WHERE id = '$ref_id'");
_debug("update_rss_feed: assigning labels...");
}
- assign_article_to_labels($link, $entry_ref_id, $article_filters,
- $owner_uid);
+ assign_article_to_label_filters($link, $entry_ref_id, $article_filters,
+ $owner_uid, $article_labels);
if ($debug_enabled) {
_debug("update_rss_feed: looking for enclosures...");
db_query($link, "COMMIT");
}
+ if (get_pref($link, "AUTO_ASSIGN_LABELS", $owner_uid, false)) {
+ if ($debug_enabled) {
+ _debug("update_rss_feed: auto-assigning labels...");
+ }
+
+ foreach ($labels as $label) {
+ $caption = $label["caption"];
+
+ if (preg_match("/\b$caption\b/i", "$tags_str " . strip_tags($entry_content) . " $entry_title")) {
+ if (!labels_contains_caption($article_labels, $caption)) {
+ label_add_article($link, $entry_ref_id, $caption, $owner_uid);
+ }
+ }
+ }
+ }
+
if ($debug_enabled) {
_debug("update_rss_feed: article processed");
}
return $params;
}
+
+ function get_article_filters($filters, $title, $content, $link, $timestamp, $author, $tags) {
+ $matches = array();
+
+ foreach ($filters as $filter) {
+ $match_any_rule = $filter["match_any_rule"];
+ $filter_match = false;
+
+ foreach ($filter["rules"] as $rule) {
+ $match = false;
+ $reg_exp = $rule["reg_exp"];
+
+ if (!$reg_exp)
+ continue;
+
+ switch ($rule["type"]) {
+ case "title":
+ $match = @preg_match("/$reg_exp/i", $title);
+ break;
+ case "content":
+ // we don't need to deal with multiline regexps
+ $content = preg_replace("/[\r\n\t]/", "", $content);
+
+ $match = @preg_match("/$reg_exp/i", $content);
+ break;
+ case "both":
+ // we don't need to deal with multiline regexps
+ $content = preg_replace("/[\r\n\t]/", "", $content);
+
+ $match = (@preg_match("/$reg_exp/i", $title) || @preg_match("/$reg_exp/i", $content));
+ break;
+ case "link":
+ $match = @preg_match("/$reg_exp/i", $link);
+ break;
+ case "author":
+ $match = @preg_match("/$reg_exp/i", $author);
+ break;
+ case "tag":
+ $tag_string = join(",", $tags);
+ $match = @preg_match("/$reg_exp/i", $tag_string);
+ break;
+ }
+
+ if ($match_any_rule) {
+ if ($match) {
+ $filter_match = true;
+ break;
+ }
+ } else {
+ $filter_match = $match;
+ if (!$match) {
+ break;
+ }
+ }
+ }
+
+ if ($filter_match) {
+ foreach ($filter["actions"] AS $action) {
+ array_push($matches, $action);
+ }
+ }
+ }
+
+ return $matches;
+ }
+
+ function find_article_filter($filters, $filter_name) {
+ foreach ($filters as $f) {
+ if ($f["type"] == $filter_name) {
+ return $f;
+ };
+ }
+ return false;
+ }
+
+ function find_article_filters($filters, $filter_name) {
+ $results = array();
+
+ foreach ($filters as $f) {
+ if ($f["type"] == $filter_name) {
+ array_push($results, $f);
+ };
+ }
+ return $results;
+ }
+
+ function calculate_article_score($filters) {
+ $score = 0;
+
+ foreach ($filters as $f) {
+ if ($f["type"] == "score") {
+ $score += $f["param"];
+ };
+ }
+ return $score;
+ }
+
+ function labels_contains_caption($labels, $caption) {
+ foreach ($labels as $label) {
+ if ($label[1] == $caption) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ function assign_article_to_label_filters($link, $id, $filters, $owner_uid, $article_labels) {
+ foreach ($filters as $f) {
+ if ($f["type"] == "label") {
+ if (!labels_contains_caption($article_labels, $f["param"])) {
+ label_add_article($link, $id, $f["param"], $owner_uid);
+ }
+ }
+ }
+ }
+
+ function cache_content($link, $url, $login, $pass) {
+
+ $content = fetch_file_contents($url, $login, $pass);
+
+ if ($content) {
+ $doc = new DOMDocument();
+ @$doc->loadHTML($content);
+ $xpath = new DOMXPath($doc);
+
+ $node = $doc->getElementsByTagName('body')->item(0);
+
+ if ($node) {
+ $content = $doc->saveXML($node, LIBXML_NOEMPTYTAG);
+
+ return $content;
+ }
+ }
+
+ return "";
+ }
?>