define('MAGPIE_CACHE_AGE', $cache_age);
define('MAGPIE_CACHE_ON', !$no_cache);
- define('MAGPIE_FETCH_TIME_OUT', 60);
+ define('MAGPIE_FETCH_TIME_OUT', $no_cache ? 15 : 60);
define('MAGPIE_CACHE_DIR', CACHE_DIR . "/magpie");
$rss = @fetch_rss($fetch_url);
$rss = new SimplePie();
$rss->set_useragent(SELF_USER_AGENT);
-# $rss->set_timeout(10);
+ $rss->set_timeout($no_cache ? 15 : 60);
$rss->set_feed_url($fetch_url);
$rss->set_output_encoding('UTF-8');
//$rss->force_feed(true);
}
if ($debug_enabled) {
- _debug("update_rss_feed: loading filters...");
+ _debug("update_rss_feed: loading filters & labels...");
}
$filters = load_filters($link, $feed, $owner_uid);
+ $labels = get_all_labels($link, $owner_uid);
-// if ($debug_enabled) {
-// print_r($filters);
-// }
+ if ($debug_enabled) {
+ //print_r($filters);
+ _debug("update_rss_feed: " . count($filters) . " filters loaded.");
+ }
if ($use_simplepie) {
$iterator = $rss->get_items();
//print_r($entry_tags);
}
- # sanitize content
-
- $entry_content = sanitize_article_content($entry_content);
- $entry_title = sanitize_article_content($entry_title);
-
if ($debug_enabled) {
_debug("update_rss_feed: done collecting data [TITLE:$entry_title]");
}
'$entry_comments',
'$num_comments',
'$entry_author')");
+
+ $article_labels = array();
+
} else {
// we keep encountering the entry in feeds, so we need to
// update date_updated column so that we don't get horrible
db_query($link, "UPDATE ttrss_entries SET date_updated = NOW()
WHERE id = '$base_entry_id'");
+
+ $article_labels = get_article_labels($link, $base_entry_id, $owner_uid);
}
// now it should exist, if not - bad luck then
_debug("update_rss_feed: assigning labels...");
}
- assign_article_to_labels($link, $entry_ref_id, $article_filters,
- $owner_uid);
+ assign_article_to_label_filters($link, $entry_ref_id, $article_filters,
+ $owner_uid, $article_labels);
if ($debug_enabled) {
_debug("update_rss_feed: looking for enclosures...");
db_query($link, "COMMIT");
}
+ if (get_pref($link, "AUTO_ASSIGN_LABELS", $owner_uid, false)) {
+ if ($debug_enabled) {
+ _debug("update_rss_feed: auto-assigning labels...");
+ }
+
+ foreach ($labels as $label) {
+ $caption = $label["caption"];
+
+ if (preg_match("/\b$caption\b/i", "$tags_str " . strip_tags($entry_content) . " $entry_title")) {
+ if (!labels_contains_caption($article_labels, $caption)) {
+ label_add_article($link, $entry_ref_id, $caption, $owner_uid);
+ }
+ }
+ }
+ }
+
if ($debug_enabled) {
_debug("update_rss_feed: article processed");
}
return $params;
}
+
+ function get_article_filters($filters, $title, $content, $link, $timestamp, $author, $tags) {
+ $matches = array();
+
+ foreach ($filters as $filter) {
+ $match_any_rule = $filter["match_any_rule"];
+ $filter_match = false;
+
+ foreach ($filter["rules"] as $rule) {
+ $match = false;
+ $reg_exp = $rule["reg_exp"];
+
+ if (!$reg_exp)
+ continue;
+
+ switch ($rule["type"]) {
+ case "title":
+ $match = @preg_match("/$reg_exp/i", $title);
+ break;
+ case "content":
+ // we don't need to deal with multiline regexps
+ $content = preg_replace("/[\r\n\t]/", "", $content);
+
+ $match = @preg_match("/$reg_exp/i", $content);
+ break;
+ case "both":
+ // we don't need to deal with multiline regexps
+ $content = preg_replace("/[\r\n\t]/", "", $content);
+
+ $match = (@preg_match("/$reg_exp/i", $title) || @preg_match("/$reg_exp/i", $content));
+ break;
+ case "link":
+ $match = @preg_match("/$reg_exp/i", $link);
+ break;
+ case "author":
+ $match = @preg_match("/$reg_exp/i", $author);
+ break;
+ case "tag":
+ $tag_string = join(",", $tags);
+ $match = @preg_match("/$reg_exp/i", $tag_string);
+ break;
+ }
+
+ if ($match_any_rule) {
+ if ($match) {
+ $filter_match = true;
+ break;
+ }
+ } else {
+ $filter_match = $match;
+ if (!$match) {
+ break;
+ }
+ }
+ }
+
+ if ($filter_match) {
+ foreach ($filter["actions"] AS $action) {
+ array_push($matches, $action);
+ }
+ }
+ }
+
+ return $matches;
+ }
+
+ function find_article_filter($filters, $filter_name) {
+ foreach ($filters as $f) {
+ if ($f["type"] == $filter_name) {
+ return $f;
+ };
+ }
+ return false;
+ }
+
+ function find_article_filters($filters, $filter_name) {
+ $results = array();
+
+ foreach ($filters as $f) {
+ if ($f["type"] == $filter_name) {
+ array_push($results, $f);
+ };
+ }
+ return $results;
+ }
+
+ function calculate_article_score($filters) {
+ $score = 0;
+
+ foreach ($filters as $f) {
+ if ($f["type"] == "score") {
+ $score += $f["param"];
+ };
+ }
+ return $score;
+ }
+
+ function labels_contains_caption($labels, $caption) {
+ foreach ($labels as $label) {
+ if ($label[1] == $caption) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ function assign_article_to_label_filters($link, $id, $filters, $owner_uid, $article_labels) {
+ foreach ($filters as $f) {
+ if ($f["type"] == "label") {
+ if (!labels_contains_caption($article_labels, $f["param"])) {
+ label_add_article($link, $id, $f["param"], $owner_uid);
+ }
+ }
+ }
+ }
?>