<?php
define('DAEMON_UPDATE_LOGIN_LIMIT', 30);
define('DAEMON_FEED_LIMIT', 100);
- define('DAEMON_SLEEP_INTERVAL', 120);
+ define('DAEMON_SLEEP_INTERVAL', 60);
function update_feedbrowser_cache($link) {
} // function update_daemon_common
- function fetch_twitter_rss($link, $url, $owner_uid) {
-
- require_once 'lib/tmhoauth/tmhOAuth.php';
- require_once "lib/magpierss/rss_fetch.inc";
- require_once 'lib/magpierss/rss_utils.inc';
-
- $result = db_query($link, "SELECT twitter_oauth FROM ttrss_users
- WHERE id = $owner_uid");
-
- $access_token = json_decode(db_fetch_result($result, 0, 'twitter_oauth'), true);
- $url_escaped = db_escape_string($url);
-
- if ($access_token) {
-
- $tmhOAuth = new tmhOAuth(array(
- 'consumer_key' => CONSUMER_KEY,
- 'consumer_secret' => CONSUMER_SECRET,
- 'user_token' => $access_token['oauth_token'],
- 'user_secret' => $access_token['oauth_token_secret'],
- ));
-
- $code = $tmhOAuth->request('GET', $url,
- convertUrlQuery(parse_url($url, PHP_URL_QUERY)));
-
- if ($code == 200) {
-
- $content = $tmhOAuth->response['response'];
-
- define('MAGPIE_CACHE_ON', false);
-
- $rss = new MagpieRSS($content, MAGPIE_OUTPUT_ENCODING,
- MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING );
-
- return $rss;
-
- } else {
-
- db_query($link, "UPDATE ttrss_feeds
- SET last_error = 'OAuth authorization failed ($code).'
- WHERE feed_url = '$url_escaped' AND owner_uid = $owner_uid");
- }
-
- } else {
-
- db_query($link, "UPDATE ttrss_feeds
- SET last_error = 'OAuth information not found.'
- WHERE feed_url = '$url_escaped' AND owner_uid = $owner_uid");
-
- return false;
- }
- }
-
function update_rss_feed($link, $feed, $ignore_daemon = false, $no_cache = false,
$override_url = false) {
$cache_age = (is_null($last_updated) || $last_updated == '1970-01-01 00:00:00') ?
-1 : get_feed_update_interval($link, $feed) * 60;
- if ($update_method == 3) {
- $rss = fetch_twitter_rss($link, $fetch_url, $owner_uid);
- } else if ($update_method == 1) {
+ if ($update_method == 1) {
define('MAGPIE_CACHE_AGE', $cache_age);
define('MAGPIE_CACHE_ON', !$no_cache);
- define('MAGPIE_FETCH_TIME_OUT', 60);
+ define('MAGPIE_FETCH_TIME_OUT', $no_cache ? 15 : 60);
define('MAGPIE_CACHE_DIR', CACHE_DIR . "/magpie");
$rss = @fetch_rss($fetch_url);
$rss = new SimplePie();
$rss->set_useragent(SELF_USER_AGENT);
-# $rss->set_timeout(10);
+ $rss->set_timeout($no_cache ? 15 : 60);
$rss->set_feed_url($fetch_url);
$rss->set_output_encoding('UTF-8');
//$rss->force_feed(true);
}
if ($debug_enabled) {
- _debug("update_rss_feed: loading filters...");
+ _debug("update_rss_feed: loading filters & labels...");
}
$filters = load_filters($link, $feed, $owner_uid);
+ $labels = get_all_labels($link, $owner_uid);
-// if ($debug_enabled) {
-// print_r($filters);
-// }
+ if ($debug_enabled) {
+ //print_r($filters);
+ _debug("update_rss_feed: " . count($filters) . " filters loaded.");
+ }
if ($use_simplepie) {
$iterator = $rss->get_items();
}
foreach ($iterator as $item) {
-
if ($_REQUEST['xdebug'] == 2) {
print_r($item);
}
$entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8');
if ($debug_enabled) {
- _debug("update_rss_feed: unfiltered tags found:");
- print_r($entry_tags);
+ //_debug("update_rss_feed: unfiltered tags found:");
+ //print_r($entry_tags);
}
- # sanitize content
-
- $entry_content = sanitize_article_content($entry_content);
- $entry_title = sanitize_article_content($entry_title);
-
if ($debug_enabled) {
_debug("update_rss_feed: done collecting data [TITLE:$entry_title]");
}
'$entry_comments',
'$num_comments',
'$entry_author')");
+
+ $article_labels = array();
+
} else {
// we keep encountering the entry in feeds, so we need to
// update date_updated column so that we don't get horrible
db_query($link, "UPDATE ttrss_entries SET date_updated = NOW()
WHERE id = '$base_entry_id'");
+
+ $article_labels = get_article_labels($link, $base_entry_id, $owner_uid);
}
// now it should exist, if not - bad luck then
$published = 'false';
}
+ // N-grams
+
+ if (DB_TYPE == "pgsql" and defined('_NGRAM_TITLE_DUPLICATE_THRESHOLD')) {
+
+ $result = db_query($link, "SELECT COUNT(*) AS similar FROM
+ ttrss_entries,ttrss_user_entries
+ WHERE ref_id = id AND updated >= NOW() - INTERVAL '7 day'
+ AND similarity(title, '$entry_title') >= "._NGRAM_TITLE_DUPLICATE_THRESHOLD."
+ AND owner_uid = $owner_uid");
+
+ $ngram_similar = db_fetch_result($result, 0, "similar");
+
+ if ($debug_enabled) {
+ _debug("update_rss_feed: N-gram similar results: $ngram_similar");
+ }
+
+ if ($ngram_similar > 0) {
+ $unread = 'false';
+ }
+ }
+
$result = db_query($link,
"INSERT INTO ttrss_user_entries
(ref_id, owner_uid, feed_id, unread, last_read, marked,
_debug("update_rss_feed: assigning labels...");
}
- assign_article_to_labels($link, $entry_ref_id, $article_filters,
- $owner_uid);
+ assign_article_to_label_filters($link, $entry_ref_id, $article_filters,
+ $owner_uid, $article_labels);
if ($debug_enabled) {
_debug("update_rss_feed: looking for enclosures...");
// check for manual tags (we have to do it here since they're loaded from filters)
foreach ($article_filters as $f) {
- if ($f[0] == "tag") {
+ if ($f["type"] == "tag") {
- $manual_tags = trim_array(explode(",", $f[1]));
+ $manual_tags = trim_array(explode(",", $f["param"]));
foreach ($manual_tags as $tag) {
if (tag_is_valid($tag)) {
db_query($link, "COMMIT");
}
+ if (get_pref($link, "AUTO_ASSIGN_LABELS", $owner_uid, false)) {
+ if ($debug_enabled) {
+ _debug("update_rss_feed: auto-assigning labels...");
+ }
+
+ foreach ($labels as $label) {
+ $caption = $label["caption"];
+
+ if (preg_match("/\b$caption\b/i", "$tags_str " . strip_tags($entry_content) . " $entry_title")) {
+ if (!labels_contains_caption($article_labels, $caption)) {
+ label_add_article($link, $entry_ref_id, $caption, $owner_uid);
+ }
+ }
+ }
+ }
+
if ($debug_enabled) {
_debug("update_rss_feed: article processed");
}
return $params;
}
+
+ function get_article_filters($filters, $title, $content, $link, $timestamp, $author, $tags) {
+ $matches = array();
+
+ foreach ($filters as $filter) {
+ $match_any_rule = $filter["match_any_rule"];
+ $filter_match = false;
+
+ foreach ($filter["rules"] as $rule) {
+ $match = false;
+ $reg_exp = $rule["reg_exp"];
+
+ if (!$reg_exp)
+ continue;
+
+ switch ($rule["type"]) {
+ case "title":
+ $match = @preg_match("/$reg_exp/i", $title);
+ break;
+ case "content":
+ // we don't need to deal with multiline regexps
+ $content = preg_replace("/[\r\n\t]/", "", $content);
+
+ $match = @preg_match("/$reg_exp/i", $content);
+ break;
+ case "both":
+ // we don't need to deal with multiline regexps
+ $content = preg_replace("/[\r\n\t]/", "", $content);
+
+ $match = (@preg_match("/$reg_exp/i", $title) || @preg_match("/$reg_exp/i", $content));
+ break;
+ case "link":
+ $match = @preg_match("/$reg_exp/i", $link);
+ break;
+ case "author":
+ $match = @preg_match("/$reg_exp/i", $author);
+ break;
+ case "tag":
+ $tag_string = join(",", $tags);
+ $match = @preg_match("/$reg_exp/i", $tag_string);
+ break;
+ }
+
+ if ($match_any_rule) {
+ if ($match) {
+ $filter_match = true;
+ break;
+ }
+ } else {
+ $filter_match = $match;
+ if (!$match) {
+ break;
+ }
+ }
+ }
+
+ if ($filter_match) {
+ foreach ($filter["actions"] AS $action) {
+ array_push($matches, $action);
+ }
+ }
+ }
+
+ return $matches;
+ }
+
+ function find_article_filter($filters, $filter_name) {
+ foreach ($filters as $f) {
+ if ($f["type"] == $filter_name) {
+ return $f;
+ };
+ }
+ return false;
+ }
+
+ function find_article_filters($filters, $filter_name) {
+ $results = array();
+
+ foreach ($filters as $f) {
+ if ($f["type"] == $filter_name) {
+ array_push($results, $f);
+ };
+ }
+ return $results;
+ }
+
+ function calculate_article_score($filters) {
+ $score = 0;
+
+ foreach ($filters as $f) {
+ if ($f["type"] == "score") {
+ $score += $f["param"];
+ };
+ }
+ return $score;
+ }
+
+ function labels_contains_caption($labels, $caption) {
+ foreach ($labels as $label) {
+ if ($label[1] == $caption) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ function assign_article_to_label_filters($link, $id, $filters, $owner_uid, $article_labels) {
+ foreach ($filters as $f) {
+ if ($f["type"] == "label") {
+ if (!labels_contains_caption($article_labels, $f["param"])) {
+ label_add_article($link, $id, $f["param"], $owner_uid);
+ }
+ }
+ }
+ }
?>