]> git.wh0rd.org - tt-rss.git/blobdiff - classes/rssutils.php
remove cache/simplepie
[tt-rss.git] / classes / rssutils.php
old mode 100644 (file)
new mode 100755 (executable)
index b6a00d3..e76dd3e
@@ -16,6 +16,11 @@ class RSSUtils {
                return sha1(implode(",", $pluginhost->get_plugin_names()) . $tmp);
        }
 
+       // Strips utf8mb4 characters (i.e. emoji) for mysql
+       static function strip_utf8mb4($str) {
+               return preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $str);
+       }
+
        static function update_feedbrowser_cache() {
 
                $pdo = Db::pdo();
@@ -45,8 +50,8 @@ class RSSUtils {
                        if (!$tmph->fetch()) {
 
                                $tmph = $pdo->prepare("INSERT INTO ttrss_feedbrowser_cache
-                                       (feed_url, site_url, title, subscribers) 
-                                       VALUES 
+                                       (feed_url, site_url, title, subscribers)
+                                       VALUES
                                        (?, ?, ?, ?)");
 
                                $tmph->execute([$feed_url, $site_url, $title, $subscribers]);
@@ -182,7 +187,19 @@ class RSSUtils {
                                        array_push($batch_owners, $tline["owner_uid"]);
 
                                $fstarted = microtime(true);
-                               RSSUtils::update_rss_feed($tline["id"], true, false);
+
+                               try {
+                                       RSSUtils::update_rss_feed($tline["id"], true, false);
+                               } catch (PDOException $e) {
+                                       Logger::get()->log_error(E_USER_NOTICE, $e->getMessage(), $e->getFile(), $e->getLine(), $e->getTraceAsString());
+
+                                       try {
+                                               $pdo->rollback();
+                                       } catch (PDOException $e) {
+                                               // it doesn't matter if there wasn't actually anything to rollback, PDO Exception can be
+                                               // thrown outside of an active transaction during feed update
+                                       }
+                               }
                                _debug_suppress(false);
 
                                _debug(sprintf("    %.4f (sec)", microtime(true) - $fstarted));
@@ -213,24 +230,15 @@ class RSSUtils {
 
                $pdo = Db::pdo();
 
-               $sth = $pdo->prepare("SELECT owner_uid,feed_url,auth_pass,auth_login,auth_pass_encrypted
+               $sth = $pdo->prepare("SELECT owner_uid,feed_url,auth_pass,auth_login
                                FROM ttrss_feeds WHERE id = ?");
                $sth->execute([$feed]);
 
                if ($row = $sth->fetch()) {
 
                        $owner_uid = $row["owner_uid"];
-
-                       $auth_pass_encrypted = $row["auth_pass_encrypted"];
-
                        $auth_login = $row["auth_login"];
                        $auth_pass = $row["auth_pass"];
-
-                       if ($auth_pass_encrypted && function_exists("mcrypt_decrypt")) {
-                               require_once "crypt.php";
-                               $auth_pass = decrypt_string($auth_pass);
-                       }
-
                        $fetch_url = $row["feed_url"];
 
                        $pluginhost = new PluginHost();
@@ -304,7 +312,7 @@ class RSSUtils {
         */
        static function update_rss_feed($feed, $no_cache = false) {
 
-               $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug'];
+               $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || clean($_REQUEST['xdebug']);
 
                _debug_suppress(!$debug_enabled);
                _debug("start", $debug_enabled);
@@ -332,17 +340,16 @@ class RSSUtils {
                $sth = $pdo->prepare("SELECT id,update_interval,auth_login,
                        feed_url,auth_pass,cache_images,
                        mark_unread_on_update, owner_uid,
-                       auth_pass_encrypted, feed_language, 
-                       last_modified, 
-                       ".SUBSTRING_FOR_DATE."(last_unconditional, 1, 19) AS last_unconditional                 
+                       auth_pass_encrypted, feed_language,
+                       last_modified,
+                       ".SUBSTRING_FOR_DATE."(last_unconditional, 1, 19) AS last_unconditional
                        FROM ttrss_feeds WHERE id = ?");
                $sth->execute([$feed]);
-               
+
                if ($row = $sth->fetch()) {
 
                        $owner_uid = $row["owner_uid"];
                        $mark_unread_on_update = $row["mark_unread_on_update"];
-                       $auth_pass_encrypted = $row["auth_pass_encrypted"];
 
                        $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
                                WHERE id = ?");
@@ -350,16 +357,11 @@ class RSSUtils {
 
                        $auth_login = $row["auth_login"];
                        $auth_pass = $row["auth_pass"];
-
-                       if ($auth_pass_encrypted && function_exists("mcrypt_decrypt")) {
-                               require_once "crypt.php";
-                               $auth_pass = decrypt_string($auth_pass);
-                       }
-
                        $stored_last_modified = $row["last_modified"];
                        $last_unconditional = $row["last_unconditional"];
                        $cache_images = $row["cache_images"];
                        $fetch_url = $row["feed_url"];
+
                        $feed_language = mb_strtolower($row["feed_language"]);
                        if (!$feed_language) $feed_language = 'english';
 
@@ -369,7 +371,7 @@ class RSSUtils {
 
                $date_feed_processed = date('Y-m-d H:i');
 
-               $cache_filename = CACHE_DIR . "/simplepie/" . sha1($fetch_url) . ".xml";
+               $cache_filename = CACHE_DIR . "/feeds/" . sha1($fetch_url) . ".xml";
 
                $pluginhost = new PluginHost();
                $pluginhost->set_debug($debug_enabled);
@@ -417,15 +419,15 @@ class RSSUtils {
                                _debug("not using CURL due to open_basedir restrictions");
                        }
 
-            if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) {
-                _debug("maximum allowed interval for conditional requests exceeded, forcing refetch");
+                       if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) {
+                               _debug("maximum allowed interval for conditional requests exceeded, forcing refetch");
 
-                $force_refetch = true;
-            } else {
-                _debug("stored last modified for conditional request: $stored_last_modified", $debug_enabled);
-            }
+                               $force_refetch = true;
+                       } else {
+                               _debug("stored last modified for conditional request: $stored_last_modified", $debug_enabled);
+                       }
 
-            _debug("fetching [$fetch_url] (force_refetch: $force_refetch)...", $debug_enabled);
+                       _debug("fetching [$fetch_url] (force_refetch: $force_refetch)...", $debug_enabled);
 
                        $feed_data = fetch_file_contents([
                                "url" => $fetch_url,
@@ -454,7 +456,7 @@ class RSSUtils {
                        }
 
                        // cache vanilla feed data for re-use
-                       if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/simplepie")) {
+                       if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/feeds")) {
                                $new_rss_hash = sha1($feed_data);
 
                                if ($new_rss_hash != $rss_hash) {
@@ -492,8 +494,6 @@ class RSSUtils {
                $rss = new FeedParser($feed_data);
                $rss->init();
 
-               $feed = $feed;
-
                if (!$rss->error()) {
 
                        // We use local pluginhost here because we need to load different per-user feed plugins
@@ -569,8 +569,8 @@ class RSSUtils {
                        $filters = load_filters($feed, $owner_uid);
 
                        if ($debug_enabled) {
-                           print_r($filters);
-            }
+                               print_r($filters);
+                       }
 
                        _debug("" . count($filters) . " filters loaded.", $debug_enabled);
 
@@ -593,7 +593,7 @@ class RSSUtils {
                        foreach ($items as $item) {
                                $pdo->beginTransaction();
 
-                               if ($_REQUEST['xdebug'] == 3) {
+                               if (clean($_REQUEST['xdebug']) == 3) {
                                        print_r($item);
                                }
 
@@ -634,15 +634,18 @@ class RSSUtils {
 
                                $entry_link = rewrite_relative_url($site_url, $item->get_link());
 
+                               $entry_language = mb_substr(trim($item->get_language()), 0, 2);
+
                                _debug("title $entry_title", $debug_enabled);
                                _debug("link $entry_link", $debug_enabled);
+                               _debug("language $entry_language", $debug_enabled);
 
                                if (!$entry_title) $entry_title = date("Y-m-d H:i:s", $entry_timestamp);;
 
                                $entry_content = $item->get_content();
                                if (!$entry_content) $entry_content = $item->get_description();
 
-                               if ($_REQUEST["xdebug"] == 2) {
+                               if (clean($_REQUEST["xdebug"]) == 2) {
                                        print "content: ";
                                        print htmlspecialchars($entry_content);
                                        print "\n";
@@ -672,9 +675,13 @@ class RSSUtils {
 
                                $entry_tags = array_unique($additional_tags);
 
-                               for ($i = 0; $i < count($entry_tags); $i++)
+                               for ($i = 0; $i < count($entry_tags); $i++) {
                                        $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8');
 
+                                       // we don't support numeric tags, let's prefix them
+                                       if (is_numeric($entry_tags[$i])) $entry_tags[$i] = 't:' . $entry_tags[$i];
+                               }
+
                                _debug("tags found: " . join(",", $entry_tags), $debug_enabled);
 
                                _debug("done collecting data.", $debug_enabled);
@@ -687,7 +694,6 @@ class RSSUtils {
                                        $base_entry_id = $row["id"];
                                        $entry_stored_hash = $row["content_hash"];
                                        $article_labels = Article::get_article_labels($base_entry_id, $owner_uid);
-                                       $entry_language = $row["lang"];
 
                                        $existing_tags = Article::get_article_tags($base_entry_id, $owner_uid);
                                        $entry_tags = array_unique(array_merge($entry_tags, $existing_tags));
@@ -695,7 +701,6 @@ class RSSUtils {
                                        $base_entry_id = false;
                                        $entry_stored_hash = "";
                                        $article_labels = array();
-                                       $entry_language = "";
                                }
 
                                $article = array("owner_uid" => $owner_uid, // read only
@@ -751,7 +756,7 @@ class RSSUtils {
                                        $entry_plugin_data .= mb_strtolower(get_class($plugin)) . ",";
                                }
 
-                               if ($_REQUEST["xdebug"] == 2) {
+                               if (clean($_REQUEST["xdebug"]) == 2) {
                                        print "processed content: ";
                                        print htmlspecialchars($article["content"]);
                                        print "\n";
@@ -760,12 +765,11 @@ class RSSUtils {
                                _debug("plugin data: $entry_plugin_data", $debug_enabled);
 
                                // Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077
-                               if (DB_TYPE == "mysql") {
+                               if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") {
                                        foreach ($article as $k => $v) {
-
                                                // i guess we'll have to take the risk of 4byte unicode labels & tags here
                                                if (is_string($article[$k])) {
-                                                       $article[$k] = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $v);
+                                                       $article[$k] = RSSUtils::strip_utf8mb4($v);
                                                }
                                        }
                                }
@@ -855,7 +859,7 @@ class RSSUtils {
 
                                        $usth = $pdo->prepare(
                                                "INSERT INTO ttrss_entries
-                                                       (title, 
+                                                       (title,
                                                        guid,
                                                        link,
                                                        updated,
@@ -924,7 +928,7 @@ class RSSUtils {
 
                                                _debug("user record FOUND: RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
                                        } else {
-                                               
+
                                                _debug("user record not found, creating...", $debug_enabled);
 
                                                if ($score >= -500 && !RSSUtils::find_article_filter($article_filters, 'catchup') && !$entry_force_catchup) {
@@ -932,7 +936,7 @@ class RSSUtils {
                                                        $last_read_qpart = null;
                                                } else {
                                                        $unread = 0;
-                                                       $last_read_qpart = 'NOW()';
+                                                       $last_read_qpart = date("Y-m-d H:i"); // we can't use NOW() here because it gets quoted
                                                }
 
                                                if (RSSUtils::find_article_filter($article_filters, 'mark') || $score > 1000) {
@@ -947,18 +951,18 @@ class RSSUtils {
                                                        $published = 0;
                                                }
 
-                                               $last_marked = ($marked == 'true') ? 'NOW()' : null;
-                                               $last_published = ($published == 'true') ? 'NOW()' : null;
+                                               $last_marked = ($marked == 1) ? 'NOW()' : 'NULL';
+                                               $last_published = ($published == 1) ? 'NOW()' : 'NULL';
 
                                                $sth = $pdo->prepare(
                                                        "INSERT INTO ttrss_user_entries
                                                                (ref_id, owner_uid, feed_id, unread, last_read, marked,
                                                                published, score, tag_cache, label_cache, uuid,
                                                                last_marked, last_published)
-                                                       VALUES (?, ?, ?, ?, ?, ?, ?, ?, '', '', '', ?, ?)");
+                                                       VALUES (?, ?, ?, ?, ?, ?, ?, ?, '', '', '', ".$last_marked.", ".$last_published.")");
 
                                                $sth->execute([$ref_id, $owner_uid, $feed, $unread, $last_read_qpart, $marked,
-                                                       $published, $score, $last_marked, $last_published]);
+                                                       $published, $score]);
 
                                                $sth = $pdo->prepare("SELECT int_id FROM ttrss_user_entries WHERE
                                                                ref_id = ? AND owner_uid = ? AND
@@ -972,18 +976,10 @@ class RSSUtils {
 
                                        _debug("resulting RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
 
-                                       if (DB_TYPE == "pgsql") {
-                                               $tsvector_combined = mb_substr($entry_title . ' ' .
-                                                       preg_replace('/[<\?\:]/', ' ', strip_tags($entry_content)),
-                                                       0, 1000000);
-
-                                               $tsvector_qpart = "tsvector_combined = to_tsvector(".$pdo->quote($feed_language).", ".$pdo->quote($tsvector_combined)."),";
-
-                                       } else {
+                                       if (DB_TYPE == "pgsql")
+                                               $tsvector_qpart = "tsvector_combined = to_tsvector(:ts_lang, :ts_content),";
+                                       else
                                                $tsvector_qpart = "";
-                                       }
-
-                                       //_debug($tsvector_qpart);
 
                                        $sth = $pdo->prepare("UPDATE ttrss_entries
                                                SET title = :title,
@@ -991,13 +987,14 @@ class RSSUtils {
                                                        content = :content,
                                                        content_hash = :content_hash,
                                                        updated = :updated,
+                                                       date_updated = NOW(),
                                                        num_comments = :num_comments,
                                                        plugin_data = :plugin_data,
                                                        author = :author,
-                                                       lang = :lang                                                                                                            
+                                                       lang = :lang
                                                WHERE id = :id");
 
-                                       $sth->execute([":title" => $entry_title,
+                                       $params = [":title" => $entry_title,
                                                ":content" => "$entry_content",
                                                ":content_hash" => $entry_current_hash,
                                                ":updated" => $entry_timestamp_fmt,
@@ -1005,7 +1002,14 @@ class RSSUtils {
                                                ":plugin_data" => $entry_plugin_data,
                                                ":author" => "$entry_author",
                                                ":lang" => $entry_language,
-                                               ":id" => $ref_id]);
+                                               ":id" => $ref_id];
+
+                                       if (DB_TYPE == "pgsql") {
+                                               $params[":ts_lang"] = $feed_language;
+                                               $params[":ts_content"] = mb_substr(strip_tags($entry_title . " " . $entry_content), 0, 900000);
+                                       }
+
+                                       $sth->execute($params);
 
                                        // update aux data
                                        $sth = $pdo->prepare("UPDATE ttrss_user_entries
@@ -1045,6 +1049,16 @@ class RSSUtils {
                                                $e_item = array(
                                                        rewrite_relative_url($site_url, $e->link),
                                                        $e->type, $e->length, $e->title, $e->width, $e->height);
+
+                                               // Yet another episode of "mysql utf8_general_ci is gimped"
+                                               if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") {
+                                                       for ($i = 0; $i < count($e_item); $i++) {
+                                                               if (is_string($e_item[$i])) {
+                                                                       $e_item[$i] = RSSUtils::strip_utf8mb4($e_item[$i]);
+                                                               }
+                                                       }
+                                               }
+
                                                array_push($enclosures, $e_item);
                                        }
                                }
@@ -1058,7 +1072,7 @@ class RSSUtils {
                                }
 
                                $esth = $pdo->prepare("SELECT id FROM ttrss_enclosures
-                                               WHERE content_url = ? AND post_id = ?");
+                                               WHERE content_url = ? AND content_type = ? AND post_id = ?");
 
                                $usth = $pdo->prepare("INSERT INTO ttrss_enclosures
                                                        (content_url, content_type, title, duration, post_id, width, height) VALUES
@@ -1072,7 +1086,7 @@ class RSSUtils {
                                        $enc_width = intval($enc[4]);
                                        $enc_height = intval($enc[5]);
 
-                                       $esth->execute([$enc_url, $entry_ref_id]);
+                                       $esth->execute([$enc_url, $enc_type, $entry_ref_id]);
 
                                        if (!$esth->fetch()) {
                                                $usth->execute([$enc_url, $enc_type, (string)$enc_title, $enc_dur, $entry_ref_id, $enc_width, $enc_height]);
@@ -1211,7 +1225,7 @@ class RSSUtils {
                                        if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
                                                file_put_contents($local_filename, $file_content);
                                        }
-                               } else {
+                               } else if (is_writable($local_filename)) {
                                        touch($local_filename);
                                }
                        }
@@ -1237,15 +1251,17 @@ class RSSUtils {
 
                                $local_filename = CACHE_DIR . "/images/" . sha1($src);
 
-                               if ($debug) _debug("cache_media: downloading: $src to $local_filename");
+                               if ($debug) _debug("cache_media: checking $src");
 
                                if (!file_exists($local_filename)) {
+                                       if ($debug) _debug("cache_media: downloading: $src to $local_filename");
+
                                        $file_content = fetch_file_contents($src);
 
                                        if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
                                                file_put_contents($local_filename, $file_content);
                                        }
-                               } else {
+                               } else if (is_writable($local_filename)) {
                                        touch($local_filename);
                                }
                        }
@@ -1288,7 +1304,7 @@ class RSSUtils {
        }
 
        static function expire_cached_files($debug) {
-               foreach (array("simplepie", "images", "export", "upload") as $dir) {
+               foreach (array("feeds", "images", "export", "upload") as $dir) {
                        $cache_dir = CACHE_DIR . "/$dir";
 
 //                     if ($debug) _debug("Expiring $cache_dir");