]> git.wh0rd.org - tt-rss.git/blobdiff - include/rssfuncs.php
modify include path order (closes #514)
[tt-rss.git] / include / rssfuncs.php
index 2aaff67cf90fe58a8362872421f2a0f76d48611a..b5949d720dc82cb860d310df5fd8bc9074d1a650 100644 (file)
 
        } // function update_daemon_common
 
+       // ignore_daemon is not used
        function update_rss_feed($link, $feed, $ignore_daemon = false, $no_cache = false,
                $override_url = false) {
 
 
                $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug'];
 
-               if (!$_REQUEST["daemon"] && !$ignore_daemon) {
-                       return false;
-               }
-
                if ($debug_enabled) {
                        _debug("update_rss_feed: start");
                }
 
-               if (!$ignore_daemon) {
-
-                       if (DB_TYPE == "pgsql") {
-                                       $updstart_thresh_qpart = "(ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '120 seconds')";
-                               } else {
-                                       $updstart_thresh_qpart = "(ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 120 SECOND))";
-                               }
-
-                       $result = db_query($link, "SELECT id,update_interval,auth_login,
-                               auth_pass,cache_images,update_method,last_updated
-                               FROM ttrss_feeds WHERE id = '$feed' AND $updstart_thresh_qpart");
-
-               } else {
-
-                       $result = db_query($link, "SELECT id,update_interval,auth_login,
-                               feed_url,auth_pass,cache_images,update_method,last_updated,
-                               mark_unread_on_update, owner_uid, update_on_checksum_change,
-                               pubsub_state
-                               FROM ttrss_feeds WHERE id = '$feed'");
-
-               }
+               $result = db_query($link, "SELECT id,update_interval,auth_login,
+                       feed_url,auth_pass,cache_images,update_method,last_updated,cache_content,
+                       mark_unread_on_update, owner_uid, update_on_checksum_change,
+                       pubsub_state
+                       FROM ttrss_feeds WHERE id = '$feed'");
 
                if (db_num_rows($result) == 0) {
                        if ($debug_enabled) {
                }
 
                $cache_images = sql_bool_to_bool(db_fetch_result($result, 0, "cache_images"));
+               $cache_content = sql_bool_to_bool(db_fetch_result($result, 0, "cache_content"));
                $fetch_url = db_fetch_result($result, 0, "feed_url");
 
                $feed = db_escape_string($feed);
                        }
 
                        if ($debug_enabled) {
-                               _debug("update_rss_feed: loading filters...");
+                               _debug("update_rss_feed: loading filters & labels...");
                        }
 
                        $filters = load_filters($link, $feed, $owner_uid);
+                       $labels = get_all_labels($link, $owner_uid);
 
                        if ($debug_enabled) {
                                //print_r($filters);
                                _debug("update_rss_feed: " . count($filters) . " filters loaded.");
                        }
 
+                       $filter_plugins = array();
+
+                       if (defined('_ARTICLE_FILTER_PLUGINS')) {
+                               foreach (explode(",", _ARTICLE_FILTER_PLUGINS) as $p) {
+                                       $pclass = "filter_" . trim($p);
+
+                                       if (class_exists($pclass)) {
+                                               $plugin = new $pclass($link);
+                                               array_push($filter_plugins, $plugin);
+                                       }
+                               }
+                       }
+
+                       if ($debug_enabled) {
+                               _debug("update_rss_feed: " . count($filter_plugins) . " filter plugins loaded.");
+                       }
+
                        if ($use_simplepie) {
                                $iterator = $rss->get_items();
                        } else {
                                        if (!$entry_guid) $entry_guid = make_guid_from_title($item["title"]);
                                }
 
+                               if ($cache_content) {
+                                       $entry_guid = "ccache:$entry_guid";
+                               }
+
+                               if ($auth_login || $auth_pass) {
+                                       $entry_guid = "auth,$owner_uid:$entry_guid";
+                               }
+
                                if ($debug_enabled) {
                                        _debug("update_rss_feed: guid $entry_guid");
                                }
                                }
 
                                $entry_content_unescaped = $entry_content;
+                               $entry_cached_content = "";
 
                                if ($use_simplepie) {
                                        $entry_comments = strip_tags($item->data["comments"]);
 
                                $entry_content = db_escape_string($entry_content, false);
 
-                               $content_hash = "SHA1:" . sha1(strip_tags($entry_content));
-
                                $entry_title = db_escape_string($entry_title);
                                $entry_link = db_escape_string($entry_link);
                                $entry_comments = mb_substr(db_escape_string($entry_comments), 0, 250);
                                        _debug("update_rss_feed: done collecting data [TITLE:$entry_title]");
                                }
 
+                               // TODO: less memory-hungry implementation
+                               if (count($filter_plugins) > 0) {
+                                       if ($debug_enabled) {
+                                               _debug("update_rss_feed: applying plugin filters...");
+                                       }
+
+                                       $article = array("owner_uid" => $owner_uid,
+                                               "title" => $entry_title,
+                                               "content" => $entry_content,
+                                               "link" => $entry_link,
+                                               "tags" => $entry_tags,
+                                               "author" => $entry_author);
+
+                                       foreach ($filter_plugins as $plugin) {
+                                               $article = $plugin->filter_article($article);
+                                       }
+
+                                       $entry_title = $article["title"];
+                                       $entry_content = $article["content"];
+                                       $entry_tags = $article["tags"];
+                                       $entry_author = $article["author"];
+                               }
+
+                               $content_hash = "SHA1:" . sha1(strip_tags($entry_content));
+
                                db_query($link, "BEGIN");
 
                                if (db_num_rows($result) == 0) {
                                                _debug("update_rss_feed: base guid not found");
                                        }
 
+                                       if ($cache_content) {
+                                               if ($debug_enabled) {
+                                                       _debug("update_rss_feed: caching content (initial)...");
+                                               }
+
+                                               $entry_cached_content = cache_content($link, $entry_link, $auth_login, $auth_pass);
+
+                                               if ($cache_images && is_writable(CACHE_DIR . '/images'))
+                                                       $entry_cached_content = cache_images($entry_cached_content, $site_url, $debug_enabled);
+
+                                               $entry_cached_content = db_escape_string($entry_cached_content, false);
+                                       }
+
                                        // base post entry does not exist, create it
 
                                        $result = db_query($link,
                                                        updated,
                                                        content,
                                                        content_hash,
+                                                       cached_content,
                                                        no_orig_date,
                                                        date_updated,
                                                        date_entered,
                                                        '$entry_timestamp_fmt',
                                                        '$entry_content',
                                                        '$content_hash',
+                                                       '$entry_cached_content',
                                                        $no_orig_date,
                                                        NOW(),
                                                        NOW(),
                                                        '$entry_comments',
                                                        '$num_comments',
                                                        '$entry_author')");
+
+                                       $article_labels = array();
+
                                } else {
                                        // we keep encountering the entry in feeds, so we need to
                                        // update date_updated column so that we don't get horrible
 
                                        db_query($link, "UPDATE ttrss_entries SET date_updated = NOW()
                                                WHERE id = '$base_entry_id'");
+
+                                       $article_labels = get_article_labels($link, $base_entry_id, $owner_uid);
                                }
 
                                // now it should exist, if not - bad luck then
                                                id,content_hash,no_orig_date,title,
                                                ".SUBSTRING_FOR_DATE."(date_updated,1,19) as date_updated,
                                                ".SUBSTRING_FOR_DATE."(updated,1,19) as updated,
-                                               num_comments
+                                               num_comments, cached_content
                                        FROM
                                                ttrss_entries
                                        WHERE guid = '$entry_guid'");
                                        $orig_content_hash = db_fetch_result($result, 0, "content_hash");
                                        $orig_title = db_fetch_result($result, 0, "title");
                                        $orig_num_comments = db_fetch_result($result, 0, "num_comments");
+                                       $orig_cached_content = trim(db_fetch_result($result, 0, "cached_content"));
                                        $orig_date_updated = strtotime(db_fetch_result($result,
                                                0, "date_updated"));
 
 
                                        $post_needs_update = false;
                                        $update_insignificant = false;
+                                       $cached_content_needs_update = false;
 
                                        if ($orig_num_comments != $num_comments) {
                                                $post_needs_update = true;
                                        if ($content_hash != $orig_content_hash) {
                                                $post_needs_update = true;
                                                $update_insignificant = false;
+                                               $cached_content_needs_update = true;
+                                       }
+
+                                       if ($cache_content) {
+                                               if ($debug_enabled) {
+                                                       _debug("update_rss_feed: caching content because original checksum changed...");
+                                               }
+
+                                               $entry_cached_content = cache_content($link, $entry_link, $auth_login, $auth_pass);
+
+                                               if ($entry_cached_content) {
+                                                       if ($cache_images && is_writable(CACHE_DIR . '/images'))
+                                                               $entry_cached_content = cache_images($entry_cached_content, $site_url, $debug_enabled);
+
+                                                       $entry_cached_content = db_escape_string($entry_cached_content, false);
+                                                       $post_needs_update = true;
+                                               } else {
+                                                       $entry_cached_content = db_escape_string($orig_cached_content);
+                                               }
+                                       } else {
+                                               $entry_cached_content = db_escape_string($orig_cached_content);
                                        }
 
                                        if (db_escape_string($orig_title) != $entry_title) {
                                                db_query($link, "UPDATE ttrss_entries
                                                        SET title = '$entry_title', content = '$entry_content',
                                                                content_hash = '$content_hash',
+                                                               cached_content = '$entry_cached_content',
                                                                updated = '$entry_timestamp_fmt',
                                                                num_comments = '$num_comments'
                                                        WHERE id = '$ref_id'");
                                        _debug("update_rss_feed: assigning labels...");
                                }
 
-                               assign_article_to_labels($link, $entry_ref_id, $article_filters,
-                                       $owner_uid);
+                               assign_article_to_label_filters($link, $entry_ref_id, $article_filters,
+                                       $owner_uid, $article_labels);
 
                                if ($debug_enabled) {
                                        _debug("update_rss_feed: looking for enclosures...");
                                        db_query($link, "COMMIT");
                                }
 
+                               if (get_pref($link, "AUTO_ASSIGN_LABELS", $owner_uid, false)) {
+                                       if ($debug_enabled) {
+                                               _debug("update_rss_feed: auto-assigning labels...");
+                                       }
+
+                                       foreach ($labels as $label) {
+                                               $caption = $label["caption"];
+
+                                               if (preg_match("/\b$caption\b/i", "$tags_str " . strip_tags($entry_content) . " $entry_title")) {
+                                                       if (!labels_contains_caption($article_labels, $caption)) {
+                                                               label_add_article($link, $entry_ref_id, $caption, $owner_uid);
+                                                       }
+                                               }
+                                       }
+                               }
+
                                if ($debug_enabled) {
                                        _debug("update_rss_feed: article processed");
                                }
 
                return $params;
        }
+
+       function get_article_filters($filters, $title, $content, $link, $timestamp, $author, $tags) {
+               $matches = array();
+
+               foreach ($filters as $filter) {
+                       $match_any_rule = $filter["match_any_rule"];
+                       $filter_match = false;
+
+                       foreach ($filter["rules"] as $rule) {
+                               $match = false;
+                               $reg_exp = $rule["reg_exp"];
+
+                               if (!$reg_exp)
+                                       continue;
+
+                               switch ($rule["type"]) {
+                               case "title":
+                                       $match = @preg_match("/$reg_exp/i", $title);
+                                       break;
+                               case "content":
+                                       // we don't need to deal with multiline regexps
+                                       $content = preg_replace("/[\r\n\t]/", "", $content);
+
+                                       $match = @preg_match("/$reg_exp/i", $content);
+                                       break;
+                               case "both":
+                                       // we don't need to deal with multiline regexps
+                                       $content = preg_replace("/[\r\n\t]/", "", $content);
+
+                                       $match = (@preg_match("/$reg_exp/i", $title) || @preg_match("/$reg_exp/i", $content));
+                                       break;
+                               case "link":
+                                       $match = @preg_match("/$reg_exp/i", $link);
+                                       break;
+                               case "author":
+                                       $match = @preg_match("/$reg_exp/i", $author);
+                                       break;
+                               case "tag":
+                                       $tag_string = join(",", $tags);
+                                       $match = @preg_match("/$reg_exp/i", $tag_string);
+                                       break;
+                               }
+
+                               if ($match_any_rule) {
+                                       if ($match) {
+                                               $filter_match = true;
+                                               break;
+                                       }
+                               } else {
+                                       $filter_match = $match;
+                                       if (!$match) {
+                                               break;
+                                       }
+                               }
+                       }
+
+                       if ($filter_match) {
+                               foreach ($filter["actions"] AS $action) {
+                                       array_push($matches, $action);
+                               }
+                       }
+               }
+
+               return $matches;
+       }
+
+       function find_article_filter($filters, $filter_name) {
+               foreach ($filters as $f) {
+                       if ($f["type"] == $filter_name) {
+                               return $f;
+                       };
+               }
+               return false;
+       }
+
+       function find_article_filters($filters, $filter_name) {
+               $results = array();
+
+               foreach ($filters as $f) {
+                       if ($f["type"] == $filter_name) {
+                               array_push($results, $f);
+                       };
+               }
+               return $results;
+       }
+
+       function calculate_article_score($filters) {
+               $score = 0;
+
+               foreach ($filters as $f) {
+                       if ($f["type"] == "score") {
+                               $score += $f["param"];
+                       };
+               }
+               return $score;
+       }
+
+       function labels_contains_caption($labels, $caption) {
+               foreach ($labels as $label) {
+                       if ($label[1] == $caption) {
+                               return true;
+                       }
+               }
+
+               return false;
+       }
+
+       function assign_article_to_label_filters($link, $id, $filters, $owner_uid, $article_labels) {
+               foreach ($filters as $f) {
+                       if ($f["type"] == "label") {
+                               if (!labels_contains_caption($article_labels, $f["param"])) {
+                                       label_add_article($link, $id, $f["param"], $owner_uid);
+                               }
+                       }
+               }
+       }
+
+       function cache_content($link, $url, $login, $pass) {
+
+               $content = fetch_file_contents($url, $login, $pass);
+
+               if ($content) {
+                       $doc = new DOMDocument();
+                       @$doc->loadHTML($content);
+                       $xpath = new DOMXPath($doc);
+
+                       $node = $doc->getElementsByTagName('body')->item(0);
+
+                       if ($node) {
+                               $content = $doc->saveXML($node, LIBXML_NOEMPTYTAG);
+
+                               return $content;
+                       }
+               }
+
+               return "";
+       }
 ?>