X-Git-Url: https://git.wh0rd.org/?a=blobdiff_plain;f=include%2Frssfuncs.php;h=33b6ff6e4018f1df17948ce158ecb36c2c37ec85;hb=65af3b2cbba06901612cf721359aea792037cc5a;hp=bf603567729b791c33ef8a23f6651bf97c8efc32;hpb=0edf1d0dc0bce5c1473fce3feda1087784e49e22;p=tt-rss.git diff --git a/include/rssfuncs.php b/include/rssfuncs.php old mode 100755 new mode 100644 index bf603567..33b6ff6e --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -2,7 +2,9 @@ define_default('DAEMON_UPDATE_LOGIN_LIMIT', 30); define_default('DAEMON_FEED_LIMIT', 500); define_default('DAEMON_SLEEP_INTERVAL', 120); - define_default('_MIN_CACHE_IMAGE_SIZE', 1024); + define_default('_MIN_CACHE_FILE_SIZE', 1024); + + // TODO: this needs to be removed from global namespace into classes/RSS.php or something function calculate_article_hash($article, $pluginhost) { $tmp = ""; @@ -23,9 +25,8 @@ function update_feedbrowser_cache() { $result = db_query("SELECT feed_url, site_url, title, COUNT(id) AS subscribers - FROM ttrss_feeds WHERE (SELECT COUNT(id) = 0 FROM ttrss_feeds AS tf - WHERE tf.feed_url = ttrss_feeds.feed_url - AND (private IS true OR auth_login != '' OR auth_pass != '' OR feed_url LIKE '%:%@%/%')) + FROM ttrss_feeds WHERE feed_url NOT IN (SELECT feed_url FROM ttrss_feeds + WHERE private IS true OR auth_login != '' OR auth_pass != '' OR feed_url LIKE '%:%@%/%') GROUP BY feed_url, site_url, title ORDER BY subscribers DESC LIMIT 1000"); db_query("BEGIN"); @@ -61,20 +62,7 @@ } - - /** - * Update a feed batch. - * Used by daemons to update n feeds by run. - * Only update feed needing a update, and not being processed - * by another process. - * - * @param mixed $link Database link - * @param integer $limit Maximum number of feeds in update batch. Default to DAEMON_FEED_LIMIT. - * @param boolean $from_http Set to true if you call this function from http to disable cli specific code. - * @param boolean $debug Set to false to disable debug output. Default to true. - * @return void - */ - function update_daemon_common($limit = DAEMON_FEED_LIMIT, $from_http = false, $debug = true) { + function update_daemon_common($limit = DAEMON_FEED_LIMIT, $debug = true) { // Process all other feeds using last_updated and interval parameters $schema_version = get_schema_version(); @@ -179,6 +167,8 @@ $nf = 0; $bstarted = microtime(true); + $batch_owners = array(); + // For each feed, we call the feed update function. foreach ($feeds_to_update as $feed) { if($debug) _debug("Base feed: $feed"); @@ -193,20 +183,20 @@ ttrss_users.id = ttrss_user_prefs.owner_uid AND ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL' AND ttrss_user_prefs.profile IS NULL AND - feed_url = '".db_escape_string($feed)."' AND - (ttrss_feeds.update_interval > 0 OR - ttrss_user_prefs.value != '-1') + feed_url = '".db_escape_string($feed)."' + $update_limit_qpart $login_thresh_qpart ORDER BY ttrss_feeds.id $query_limit"); if (db_num_rows($tmp_result) > 0) { - $rss = false; - while ($tline = db_fetch_assoc($tmp_result)) { if($debug) _debug(" => " . $tline["last_updated"] . ", " . $tline["id"] . " " . $tline["owner_uid"]); + if (array_search($tline["owner_uid"], $batch_owners) === FALSE) + array_push($batch_owners, $tline["owner_uid"]); + $fstarted = microtime(true); - $rss = update_rss_feed($tline["id"], true, false); + update_rss_feed($tline["id"], true, false); _debug_suppress(false); _debug(sprintf(" %.4f (sec)", microtime(true) - $fstarted)); @@ -221,14 +211,18 @@ microtime(true) - $bstarted, (microtime(true) - $bstarted) / $nf)); } - require_once "digest.php"; + foreach ($batch_owners as $owner_uid) { + _debug("Running housekeeping tasks for user $owner_uid..."); + + housekeeping_user($owner_uid); + } // Send feed digests by email if needed. - send_headlines_digests($debug); + Digest::send_headlines_digests($debug); return $nf; - } // function update_daemon_common + } // this is used when subscribing function set_basic_feed_info($feed) { @@ -244,7 +238,7 @@ $auth_login = db_fetch_result($result, 0, "auth_login"); $auth_pass = db_fetch_result($result, 0, "auth_pass"); - if ($auth_pass_encrypted) { + if ($auth_pass_encrypted && function_exists("mcrypt_decrypt")) { require_once "crypt.php"; $auth_pass = decrypt_string($auth_pass); } @@ -253,7 +247,7 @@ $feed_data = fetch_file_contents($fetch_url, false, $auth_login, $auth_pass, false, - FEED_FETCH_TIMEOUT_TIMEOUT, + FEED_FETCH_TIMEOUT, 0); global $fetch_curl_used; @@ -291,8 +285,10 @@ } } - // ignore_daemon is not used - function update_rss_feed($feed, $ignore_daemon = false, $no_cache = false, $rss = false) { + /** + * @SuppressWarnings(PHPMD.UnusedFormalParameter) + */ + function update_rss_feed($feed, $no_cache = false) { $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug']; @@ -301,6 +297,13 @@ $result = db_query("SELECT title FROM ttrss_feeds WHERE id = '$feed'"); + + if (db_num_rows($result) == 0) { + _debug("feed $feed NOT FOUND/SKIPPED", $debug_enabled); + user_error("Attempt to update unknown/invalid feed $feed", E_USER_WARNING); + return false; + } + $title = db_fetch_result($result, 0, "title"); // feed was batch-subscribed or something, we need to get basic info @@ -317,11 +320,6 @@ feed_language FROM ttrss_feeds WHERE id = '$feed'"); - if (db_num_rows($result) == 0) { - _debug("feed $feed NOT FOUND/SKIPPED", $debug_enabled); - return false; - } - $owner_uid = db_fetch_result($result, 0, "owner_uid"); $mark_unread_on_update = sql_bool_to_bool(db_fetch_result($result, 0, "mark_unread_on_update")); @@ -335,7 +333,7 @@ $auth_login = db_fetch_result($result, 0, "auth_login"); $auth_pass = db_fetch_result($result, 0, "auth_pass"); - if ($auth_pass_encrypted) { + if ($auth_pass_encrypted && function_exists("mcrypt_decrypt")) { require_once "crypt.php"; $auth_pass = decrypt_string($auth_pass); } @@ -359,110 +357,98 @@ $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid); $pluginhost->load_data(); - if ($rss && is_object($rss) && get_class($rss) == "FeedParser") { - _debug("using previously initialized parser object"); - } else { - $rss_hash = false; - - $force_refetch = isset($_REQUEST["force_refetch"]); - $feed_data = ""; + $rss_hash = false; - foreach ($pluginhost->get_hooks(PluginHost::HOOK_FETCH_FEED) as $plugin) { - $feed_data = $plugin->hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed, 0, $auth_login, $auth_pass); - } + $force_refetch = isset($_REQUEST["force_refetch"]); + $feed_data = ""; - // try cache - if (!$feed_data && - file_exists($cache_filename) && - is_readable($cache_filename) && - !$auth_login && !$auth_pass && - filemtime($cache_filename) > time() - 30) { + foreach ($pluginhost->get_hooks(PluginHost::HOOK_FETCH_FEED) as $plugin) { + $feed_data = $plugin->hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed, 0, $auth_login, $auth_pass); + } - _debug("using local cache [$cache_filename].", $debug_enabled); + // try cache + if (!$feed_data && + file_exists($cache_filename) && + is_readable($cache_filename) && + !$auth_login && !$auth_pass && + filemtime($cache_filename) > time() - 30) { - @$feed_data = file_get_contents($cache_filename); + _debug("using local cache [$cache_filename].", $debug_enabled); - if ($feed_data) { - $rss_hash = sha1($feed_data); - } + @$feed_data = file_get_contents($cache_filename); - } else { - _debug("local cache will not be used for this feed", $debug_enabled); + if ($feed_data) { + $rss_hash = sha1($feed_data); } - // fetch feed from source - if (!$feed_data) { - _debug("fetching [$fetch_url]...", $debug_enabled); + } else { + _debug("local cache will not be used for this feed", $debug_enabled); + } - if (ini_get("open_basedir") && function_exists("curl_init")) { - _debug("not using CURL due to open_basedir restrictions"); - } + // fetch feed from source + if (!$feed_data) { + _debug("fetching [$fetch_url]...", $debug_enabled); - $feed_data = fetch_file_contents($fetch_url, false, - $auth_login, $auth_pass, false, - $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT, - 0); + if (ini_get("open_basedir") && function_exists("curl_init")) { + _debug("not using CURL due to open_basedir restrictions"); + } - global $fetch_curl_used; + $feed_data = fetch_file_contents($fetch_url, false, + $auth_login, $auth_pass, false, + $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT, + 0); - if (!$fetch_curl_used) { - $tmp = @gzdecode($feed_data); + global $fetch_curl_used; - if ($tmp) $feed_data = $tmp; - } + if (!$fetch_curl_used) { + $tmp = @gzdecode($feed_data); - $feed_data = trim($feed_data); + if ($tmp) $feed_data = $tmp; + } - _debug("fetch done.", $debug_enabled); + $feed_data = trim($feed_data); - // cache vanilla feed data for re-use - if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/simplepie")) { - $new_rss_hash = sha1($feed_data); + _debug("fetch done.", $debug_enabled); - if ($new_rss_hash != $rss_hash) { - _debug("saving $cache_filename", $debug_enabled); - @file_put_contents($cache_filename, $feed_data); - } + // cache vanilla feed data for re-use + if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/simplepie")) { + $new_rss_hash = sha1($feed_data); + + if ($new_rss_hash != $rss_hash) { + _debug("saving $cache_filename", $debug_enabled); + @file_put_contents($cache_filename, $feed_data); } } + } - if (!$feed_data) { - global $fetch_last_error; - global $fetch_last_error_code; + if (!$feed_data) { + global $fetch_last_error; + global $fetch_last_error_code; - _debug("unable to fetch: $fetch_last_error [$fetch_last_error_code]", $debug_enabled); + _debug("unable to fetch: $fetch_last_error [$fetch_last_error_code]", $debug_enabled); - $error_escaped = ''; + $error_escaped = ''; - // If-Modified-Since - if ($fetch_last_error_code != 304) { - $error_escaped = db_escape_string($fetch_last_error); - } else { - _debug("source claims data not modified, nothing to do.", $debug_enabled); - } + // If-Modified-Since + if ($fetch_last_error_code != 304) { + $error_escaped = db_escape_string($fetch_last_error); + } else { + _debug("source claims data not modified, nothing to do.", $debug_enabled); + } - db_query( - "UPDATE ttrss_feeds SET last_error = '$error_escaped', - last_updated = NOW() WHERE id = '$feed'"); + db_query( + "UPDATE ttrss_feeds SET last_error = '$error_escaped', + last_updated = NOW() WHERE id = '$feed'"); - return; - } + return; } foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_FETCHED) as $plugin) { $feed_data = $plugin->hook_feed_fetched($feed_data, $fetch_url, $owner_uid, $feed); } - // set last update to now so if anything *simplepie* crashes later we won't be - // continuously failing on the same feed - //db_query("UPDATE ttrss_feeds SET last_updated = NOW() WHERE id = '$feed'"); - - if (!$rss) { - $rss = new FeedParser($feed_data); - $rss->init(); - } - -// print_r($rss); + $rss = new FeedParser($feed_data); + $rss->init(); $feed = db_escape_string($feed); @@ -583,12 +569,12 @@ if ($feed_hub_url && $feed_self_url && function_exists('curl_init') && !ini_get("open_basedir")) { - require_once 'lib/pubsubhubbub/subscriber.php'; + require_once 'lib/pubsubhubbub/Subscriber.php'; $callback_url = get_self_url_prefix() . "/public.php?op=pubsub&id=$feed"; - $s = new Subscriber($feed_hub_url, $callback_url); + $s = new Pubsubhubbub\Subscriber\Subscriber($feed_hub_url, $callback_url); $rc = $s->subscribe($feed_self_url); @@ -658,16 +644,12 @@ print "\n"; } - $entry_comments = $item->get_comments_url(); - $entry_author = $item->get_author(); + $entry_comments = db_escape_string(mb_substr($item->get_comments_url(), 0, 245)); + $num_comments = (int) $item->get_comments_count(); + $entry_author = $item->get_author(); // escaped later $entry_guid = db_escape_string(mb_substr($entry_guid, 0, 245)); - $entry_comments = db_escape_string(mb_substr(trim($entry_comments), 0, 245)); - $entry_author = db_escape_string(mb_substr(trim($entry_author), 0, 245)); - - $num_comments = (int) $item->get_comments_count(); - _debug("author $entry_author", $debug_enabled); _debug("num_comments: $num_comments", $debug_enabled); _debug("looking for tags...", $debug_enabled); @@ -699,10 +681,10 @@ if (db_num_rows($result) != 0) { $base_entry_id = db_fetch_result($result, 0, "id"); $entry_stored_hash = db_fetch_result($result, 0, "content_hash"); - $article_labels = get_article_labels($base_entry_id, $owner_uid); + $article_labels = Article::get_article_labels($base_entry_id, $owner_uid); $entry_language = db_fetch_result($result, 0, "lang"); - $existing_tags = get_article_tags($base_entry_id, $owner_uid); + $existing_tags = Article::get_article_tags($base_entry_id, $owner_uid); $entry_tags = array_unique(array_merge($entry_tags, $existing_tags)); } else { @@ -726,7 +708,8 @@ "language" => $entry_language, "feed" => array("id" => $feed, "fetch_url" => $fetch_url, - "site_url" => $site_url) + "site_url" => $site_url, + "cache_images" => $cache_images) ); $entry_plugin_data = ""; @@ -778,7 +761,7 @@ foreach ($article as $k => $v) { // i guess we'll have to take the risk of 4byte unicode labels & tags here - if (!is_array($article[$k])) { + if (is_string($article[$k])) { $article[$k] = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $v); } } @@ -786,12 +769,21 @@ /* Collect article tags here so we could filter by them: */ + $matched_rules = array(); + $article_filters = get_article_filters($filters, $article["title"], - $article["content"], $article["link"], 0, $article["author"], - $article["tags"]); + $article["content"], $article["link"], $article["author"], + $article["tags"], $matched_rules); if ($debug_enabled) { - _debug("article filters: ", $debug_enabled); + _debug("matched filter rules: ", $debug_enabled); + + if (count($matched_rules) != 0) { + print_r($matched_rules); + } + + _debug("filter actions: ", $debug_enabled); + if (count($article_filters) != 0) { print_r($article_filters); } @@ -828,7 +820,7 @@ $entry_tags = $article["tags"]; $entry_guid = db_escape_string($entry_guid); $entry_title = db_escape_string($article["title"]); - $entry_author = db_escape_string($article["author"]); + $entry_author = db_escape_string(mb_substr($article["author"], 0, 245)); $entry_link = db_escape_string($article["link"]); $entry_content = $article["content"]; // escaped below $entry_force_catchup = $article["force_catchup"]; @@ -838,13 +830,16 @@ if ($debug_enabled) { _debug("article labels:", $debug_enabled); - print_r($article_labels); + + if (count($article_labels) != 0) { + print_r($article_labels); + } } _debug("force catchup: $entry_force_catchup"); if ($cache_images && is_writable(CACHE_DIR . '/images')) - cache_images($entry_content, $site_url, $debug_enabled); + cache_media($entry_content, $site_url, $debug_enabled); $entry_content = db_escape_string($entry_content, false); @@ -855,11 +850,11 @@ if (db_num_rows($result) == 0) { - _debug("base guid [$entry_guid] not found", $debug_enabled); + _debug("base guid [$entry_guid or $entry_guid_hashed] not found, creating...", $debug_enabled); // base post entry does not exist, create it - $result = db_query( + db_query( "INSERT INTO ttrss_entries (title, guid, @@ -891,8 +886,6 @@ '$entry_language', '$entry_author')"); - } else { - $base_entry_id = db_fetch_result($result, 0, "id"); } // now it should exist, if not - bad luck then @@ -961,25 +954,6 @@ $published = 'false'; } - // N-grams - - /* if (DB_TYPE == "pgsql" and defined('_NGRAM_TITLE_DUPLICATE_THRESHOLD')) { - - $result = db_query("SELECT COUNT(*) AS similar FROM - ttrss_entries,ttrss_user_entries - WHERE ref_id = id AND updated >= NOW() - INTERVAL '7 day' - AND similarity(title, '$entry_title') >= "._NGRAM_TITLE_DUPLICATE_THRESHOLD." - AND owner_uid = $owner_uid"); - - $ngram_similar = db_fetch_result($result, 0, "similar"); - - _debug("N-gram similar results: $ngram_similar", $debug_enabled); - - if ($ngram_similar > 0) { - $unread = 'false'; - } - } */ - $last_marked = ($marked == 'true') ? 'NOW()' : 'NULL'; $last_published = ($published == 'true') ? 'NOW()' : 'NULL'; @@ -997,7 +971,7 @@ "/public.php?op=rss&id=-2&key=" . get_feed_access_key(-2, false, $owner_uid); - $p = new Publisher(PUBSUBHUBBUB_HUB); + $p = new pubsubhubbub\publisher\Publisher(PUBSUBHUBBUB_HUB); /* $pubsub_result = */ $p->publish_update($rss_link); } @@ -1058,7 +1032,7 @@ _debug("assigning labels [other]...", $debug_enabled); foreach ($article_labels as $label) { - label_add_article($entry_ref_id, $label[1], $owner_uid); + Labels::add_article($entry_ref_id, $label[1], $owner_uid); } _debug("assigning labels [filters]...", $debug_enabled); @@ -1077,11 +1051,15 @@ if (is_array($encs)) { foreach ($encs as $e) { $e_item = array( - $e->link, $e->type, $e->length, $e->title, $e->width, $e->height); + rewrite_relative_url($site_url, $e->link), + $e->type, $e->length, $e->title, $e->width, $e->height); array_push($enclosures, $e_item); } } + if ($cache_images && is_writable(CACHE_DIR . '/images')) + cache_enclosures($enclosures, $site_url, $debug_enabled); + if ($debug_enabled) { _debug("article enclosures:", $debug_enabled); print_r($enclosures); @@ -1219,14 +1197,39 @@ last_updated = NOW() WHERE id = '$feed'"); unset($rss); + return; } _debug("done", $debug_enabled); - return $rss; + return true; + } + + function cache_enclosures($enclosures, $site_url, $debug) { + foreach ($enclosures as $enc) { + + if (preg_match("/(image|audio|video)/", $enc[1])) { + + $src = rewrite_relative_url($site_url, $enc[0]); + + $local_filename = CACHE_DIR . "/images/" . sha1($src); + + if ($debug) _debug("cache_enclosures: downloading: $src to $local_filename"); + + if (!file_exists($local_filename)) { + $file_content = fetch_file_contents($src); + + if ($file_content && strlen($file_content) > _MIN_CACHE_FILE_SIZE) { + file_put_contents($local_filename, $file_content); + } + } else { + touch($local_filename); + } + } + } } - function cache_images($html, $site_url, $debug) { + function cache_media($html, $site_url, $debug) { libxml_use_internal_errors(true); $charset_hack = ' @@ -1237,20 +1240,20 @@ $doc->loadHTML($charset_hack . $html); $xpath = new DOMXPath($doc); - $entries = $xpath->query('(//img[@src])'); + $entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])'); foreach ($entries as $entry) { - if ($entry->hasAttribute('src')) { + if ($entry->hasAttribute('src') && strpos($entry->getAttribute('src'), "data:") !== 0) { $src = rewrite_relative_url($site_url, $entry->getAttribute('src')); - $local_filename = CACHE_DIR . "/images/" . sha1($src) . ".png"; + $local_filename = CACHE_DIR . "/images/" . sha1($src); - if ($debug) _debug("cache_images: downloading: $src to $local_filename"); + if ($debug) _debug("cache_media: downloading: $src to $local_filename"); if (!file_exists($local_filename)) { $file_content = fetch_file_contents($src); - if ($file_content && strlen($file_content) > _MIN_CACHE_IMAGE_SIZE) { + if ($file_content && strlen($file_content) > _MIN_CACHE_FILE_SIZE) { file_put_contents($local_filename, $file_content); } } else { @@ -1340,7 +1343,7 @@ return $params; } - function get_article_filters($filters, $title, $content, $link, $timestamp, $author, $tags) { + function get_article_filters($filters, $title, $content, $link, $author, $tags, &$matched_rules = false) { $matches = array(); foreach ($filters as $filter) { @@ -1358,29 +1361,29 @@ switch ($rule["type"]) { case "title": - $match = @preg_match("/$reg_exp/i", $title); + $match = @preg_match("/$reg_exp/iu", $title); break; case "content": // we don't need to deal with multiline regexps $content = preg_replace("/[\r\n\t]/", "", $content); - $match = @preg_match("/$reg_exp/i", $content); + $match = @preg_match("/$reg_exp/iu", $content); break; case "both": // we don't need to deal with multiline regexps $content = preg_replace("/[\r\n\t]/", "", $content); - $match = (@preg_match("/$reg_exp/i", $title) || @preg_match("/$reg_exp/i", $content)); + $match = (@preg_match("/$reg_exp/iu", $title) || @preg_match("/$reg_exp/iu", $content)); break; case "link": - $match = @preg_match("/$reg_exp/i", $link); + $match = @preg_match("/$reg_exp/iu", $link); break; case "author": - $match = @preg_match("/$reg_exp/i", $author); + $match = @preg_match("/$reg_exp/iu", $author); break; case "tag": foreach ($tags as $tag) { - if (@preg_match("/$reg_exp/i", $tag)) { + if (@preg_match("/$reg_exp/iu", $tag)) { $match = true; break; } @@ -1406,6 +1409,8 @@ if ($inverse) $filter_match = !$filter_match; if ($filter_match) { + if (is_array($matched_rules)) array_push($matched_rules, $rule); + foreach ($filter["actions"] AS $action) { array_push($matches, $action); @@ -1463,7 +1468,7 @@ foreach ($filters as $f) { if ($f["type"] == "label") { if (!labels_contains_caption($article_labels, $f["param"])) { - label_add_article($id, $f["param"], $owner_uid); + Labels::add_article($id, $f["param"], $owner_uid); } } } @@ -1474,15 +1479,6 @@ mb_strtolower(strip_tags($title), 'utf-8')); } - /* function verify_feed_xml($feed_data) { - libxml_use_internal_errors(true); - $doc = new DOMDocument(); - $doc->loadXML($feed_data); - $error = libxml_get_last_error(); - libxml_clear_errors(); - return $error; - } */ - function cleanup_counters_cache($debug) { $result = db_query("DELETE FROM ttrss_counters_cache WHERE feed_id > 0 AND @@ -1498,7 +1494,15 @@ ttrss_cat_counters_cache.owner_uid = ttrss_feed_categories.owner_uid) = 0"); $crows = db_affected_rows($result); - _debug("Removed $frows (feeds) $crows (cats) orphaned counter cache entries."); + if ($debug) _debug("Removed $frows (feeds) $crows (cats) orphaned counter cache entries."); + } + + function housekeeping_user($owner_uid) { + $tmph = new PluginHost(); + + load_user_plugins($owner_uid, $tmph); + + $tmph->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", ""); } function housekeeping_common($debug) { @@ -1509,13 +1513,62 @@ $count = update_feedbrowser_cache(); _debug("Feedbrowser updated, $count feeds processed."); - purge_orphans( true); + Article::purge_orphans( true); cleanup_counters_cache($debug); //$rc = cleanup_tags( 14, 50000); //_debug("Cleaned $rc cached tags."); PluginHost::getInstance()->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", ""); + } + + function check_feed_favicon($site_url, $feed) { + # print "FAVICON [$site_url]: $favicon_url\n"; + + $icon_file = ICONS_DIR . "/$feed.ico"; + if (!file_exists($icon_file)) { + $favicon_url = get_favicon_url($site_url); + + if ($favicon_url) { + // Limiting to "image" type misses those served with text/plain + $contents = fetch_file_contents($favicon_url); // , "image"); + + if ($contents) { + // Crude image type matching. + // Patterns gleaned from the file(1) source code. + if (preg_match('/^\x00\x00\x01\x00/', $contents)) { + // 0 string \000\000\001\000 MS Windows icon resource + //error_log("check_feed_favicon: favicon_url=$favicon_url isa MS Windows icon resource"); + } + elseif (preg_match('/^GIF8/', $contents)) { + // 0 string GIF8 GIF image data + //error_log("check_feed_favicon: favicon_url=$favicon_url isa GIF image"); + } + elseif (preg_match('/^\x89PNG\x0d\x0a\x1a\x0a/', $contents)) { + // 0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data + //error_log("check_feed_favicon: favicon_url=$favicon_url isa PNG image"); + } + elseif (preg_match('/^\xff\xd8/', $contents)) { + // 0 beshort 0xffd8 JPEG image data + //error_log("check_feed_favicon: favicon_url=$favicon_url isa JPG image"); + } + else { + //error_log("check_feed_favicon: favicon_url=$favicon_url isa UNKNOWN type"); + $contents = ""; + } + } + + if ($contents) { + $fp = @fopen($icon_file, "w"); + + if ($fp) { + fwrite($fp, $contents); + fclose($fp); + chmod($icon_file, 0644); + } + } + } + return $icon_file; + } } -?>