]> git.wh0rd.org - tt-rss.git/blobdiff - classes/rssutils.php
add a workaround to support numeric tags
[tt-rss.git] / classes / rssutils.php
old mode 100644 (file)
new mode 100755 (executable)
index 349f859..d125a50
@@ -16,6 +16,11 @@ class RSSUtils {
                return sha1(implode(",", $pluginhost->get_plugin_names()) . $tmp);
        }
 
+       // Strips utf8mb4 characters (i.e. emoji) for mysql
+       static function strip_utf8mb4($str) {
+               return preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $str);
+       }
+
        static function update_feedbrowser_cache() {
 
                $pdo = Db::pdo();
@@ -221,7 +226,7 @@ class RSSUtils {
 
                        $owner_uid = $row["owner_uid"];
 
-                       $auth_pass_encrypted = sql_bool_to_bool($row["auth_pass_encrypted"]);
+                       $auth_pass_encrypted = $row["auth_pass_encrypted"];
 
                        $auth_login = $row["auth_login"];
                        $auth_pass = $row["auth_pass"];
@@ -304,7 +309,7 @@ class RSSUtils {
         */
        static function update_rss_feed($feed, $no_cache = false) {
 
-               $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug'];
+               $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || clean($_REQUEST['xdebug']);
 
                _debug_suppress(!$debug_enabled);
                _debug("start", $debug_enabled);
@@ -341,8 +346,8 @@ class RSSUtils {
                if ($row = $sth->fetch()) {
 
                        $owner_uid = $row["owner_uid"];
-                       $mark_unread_on_update = sql_bool_to_bool($row["mark_unread_on_update"]);
-                       $auth_pass_encrypted = sql_bool_to_bool($row["auth_pass_encrypted"]);
+                       $mark_unread_on_update = $row["mark_unread_on_update"];
+                       $auth_pass_encrypted = $row["auth_pass_encrypted"];
 
                        $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
                                WHERE id = ?");
@@ -358,7 +363,7 @@ class RSSUtils {
 
                        $stored_last_modified = $row["last_modified"];
                        $last_unconditional = $row["last_unconditional"];
-                       $cache_images = sql_bool_to_bool($row["cache_images"]);
+                       $cache_images = $row["cache_images"];
                        $fetch_url = $row["feed_url"];
                        $feed_language = mb_strtolower($row["feed_language"]);
                        if (!$feed_language) $feed_language = 'english';
@@ -369,7 +374,7 @@ class RSSUtils {
 
                $date_feed_processed = date('Y-m-d H:i');
 
-               $cache_filename = CACHE_DIR . "/simplepie/" . sha1($fetch_url) . ".xml";
+               $cache_filename = CACHE_DIR . "/feeds/" . sha1($fetch_url) . ".xml";
 
                $pluginhost = new PluginHost();
                $pluginhost->set_debug($debug_enabled);
@@ -417,15 +422,15 @@ class RSSUtils {
                                _debug("not using CURL due to open_basedir restrictions");
                        }
 
-            if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) {
-                _debug("maximum allowed interval for conditional requests exceeded, forcing refetch");
+                       if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) {
+                               _debug("maximum allowed interval for conditional requests exceeded, forcing refetch");
 
-                $force_refetch = true;
-            } else {
-                _debug("stored last modified for conditional request: $stored_last_modified", $debug_enabled);
-            }
+                               $force_refetch = true;
+                       } else {
+                               _debug("stored last modified for conditional request: $stored_last_modified", $debug_enabled);
+                       }
 
-            _debug("fetching [$fetch_url] (force_refetch: $force_refetch)...", $debug_enabled);
+                       _debug("fetching [$fetch_url] (force_refetch: $force_refetch)...", $debug_enabled);
 
                        $feed_data = fetch_file_contents([
                                "url" => $fetch_url,
@@ -454,7 +459,7 @@ class RSSUtils {
                        }
 
                        // cache vanilla feed data for re-use
-                       if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/simplepie")) {
+                       if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/feeds")) {
                                $new_rss_hash = sha1($feed_data);
 
                                if ($new_rss_hash != $rss_hash) {
@@ -492,8 +497,6 @@ class RSSUtils {
                $rss = new FeedParser($feed_data);
                $rss->init();
 
-               $feed = $feed;
-
                if (!$rss->error()) {
 
                        // We use local pluginhost here because we need to load different per-user feed plugins
@@ -515,7 +518,7 @@ class RSSUtils {
                        $sth->execute([$feed]);
 
                        if ($row = $sth->fetch()) {
-                               $favicon_needs_check = sql_bool_to_bool($row["favicon_needs_check"]);
+                               $favicon_needs_check = $row["favicon_needs_check"];
                                $favicon_avg_color = $row["favicon_avg_color"];
                                $owner_uid = $row["owner_uid"];
                        } else {
@@ -569,8 +572,8 @@ class RSSUtils {
                        $filters = load_filters($feed, $owner_uid);
 
                        if ($debug_enabled) {
-                           print_r($filters);
-            }
+                               print_r($filters);
+                       }
 
                        _debug("" . count($filters) . " filters loaded.", $debug_enabled);
 
@@ -591,19 +594,26 @@ class RSSUtils {
                        $tstart = time();
 
                        foreach ($items as $item) {
-                               if ($_REQUEST['xdebug'] == 3) {
+                               $pdo->beginTransaction();
+
+                               if (clean($_REQUEST['xdebug']) == 3) {
                                        print_r($item);
                                }
 
                                if (ini_get("max_execution_time") > 0 && time() - $tstart >= ini_get("max_execution_time") * 0.7) {
                                        _debug("looks like there's too many articles to process at once, breaking out", $debug_enabled);
+                                       $pdo->commit();
                                        break;
                                }
 
                                $entry_guid = strip_tags($item->get_id());
                                if (!$entry_guid) $entry_guid = strip_tags($item->get_link());
                                if (!$entry_guid) $entry_guid = RSSUtils::make_guid_from_title($item->get_title());
-                               if (!$entry_guid) continue;
+
+                               if (!$entry_guid) {
+                                       $pdo->commit();
+                                       continue;
+                               }
 
                                $entry_guid = "$owner_uid,$entry_guid";
 
@@ -635,7 +645,7 @@ class RSSUtils {
                                $entry_content = $item->get_content();
                                if (!$entry_content) $entry_content = $item->get_description();
 
-                               if ($_REQUEST["xdebug"] == 2) {
+                               if (clean($_REQUEST["xdebug"]) == 2) {
                                        print "content: ";
                                        print htmlspecialchars($entry_content);
                                        print "\n";
@@ -665,9 +675,13 @@ class RSSUtils {
 
                                $entry_tags = array_unique($additional_tags);
 
-                               for ($i = 0; $i < count($entry_tags); $i++)
+                               for ($i = 0; $i < count($entry_tags); $i++) {
                                        $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8');
 
+                                       // we don't support numeric tags, let's prefix them
+                                       if (is_numeric($entry_tags[$i])) $entry_tags[$i] = 't:' . $entry_tags[$i];
+                               }
+
                                _debug("tags found: " . join(",", $entry_tags), $debug_enabled);
 
                                _debug("done collecting data.", $debug_enabled);
@@ -727,6 +741,7 @@ class RSSUtils {
                                                WHERE id = ?");
                                        $sth->execute([$base_entry_id]);
 
+                                       $pdo->commit();
                                        continue;
                                }
 
@@ -743,7 +758,7 @@ class RSSUtils {
                                        $entry_plugin_data .= mb_strtolower(get_class($plugin)) . ",";
                                }
 
-                               if ($_REQUEST["xdebug"] == 2) {
+                               if (clean($_REQUEST["xdebug"]) == 2) {
                                        print "processed content: ";
                                        print htmlspecialchars($article["content"]);
                                        print "\n";
@@ -754,10 +769,9 @@ class RSSUtils {
                                // Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077
                                if (DB_TYPE == "mysql") {
                                        foreach ($article as $k => $v) {
-
                                                // i guess we'll have to take the risk of 4byte unicode labels & tags here
                                                if (is_string($article[$k])) {
-                                                       $article[$k] = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $v);
+                                                       $article[$k] = RSSUtils::strip_utf8mb4($v);
                                                }
                                        }
                                }
@@ -871,14 +885,14 @@ class RSSUtils {
                                                        $entry_guid_hashed,
                                                        $entry_link,
                                                        $entry_timestamp_fmt,
-                                                       $entry_content,
+                                                       "$entry_content",
                                                        $entry_current_hash,
                                                        $date_feed_processed,
                                                        $entry_comments,
-                                                       $num_comments,
+                                                       (int)$num_comments,
                                                        $entry_plugin_data,
-                                                       $entry_language,
-                                                       $entry_author]);
+                                                       "$entry_language",
+                                                       "$entry_author"]);
 
                                }
 
@@ -895,6 +909,7 @@ class RSSUtils {
                                        $entry_ref_id = $ref_id;
 
                                        if (RSSUtils::find_article_filter($article_filters, "filter")) {
+                                               $pdo->commit();
                                                continue;
                                        }
 
@@ -923,7 +938,7 @@ class RSSUtils {
                                                        $last_read_qpart = null;
                                                } else {
                                                        $unread = 0;
-                                                       $last_read_qpart = 'NOW()';
+                                                       $last_read_qpart = date("Y-m-d H:i"); // we can't use NOW() here because it gets quoted
                                                }
 
                                                if (RSSUtils::find_article_filter($article_filters, 'mark') || $score > 1000) {
@@ -938,18 +953,18 @@ class RSSUtils {
                                                        $published = 0;
                                                }
 
-                                               $last_marked = ($marked == 'true') ? 'NOW()' : null;
-                                               $last_published = ($published == 'true') ? 'NOW()' : null;
+                                               $last_marked = ($marked == 1) ? 'NOW()' : 'NULL';
+                                               $last_published = ($published == 1) ? 'NOW()' : 'NULL';
 
                                                $sth = $pdo->prepare(
                                                        "INSERT INTO ttrss_user_entries
                                                                (ref_id, owner_uid, feed_id, unread, last_read, marked,
                                                                published, score, tag_cache, label_cache, uuid,
                                                                last_marked, last_published)
-                                                       VALUES (?, ?, ?, ?, ?, ?, ?, ?, '', '', '', ?, ?)");
+                                                       VALUES (?, ?, ?, ?, ?, ?, ?, ?, '', '', '', ".$last_marked.", ".$last_published.")");
 
                                                $sth->execute([$ref_id, $owner_uid, $feed, $unread, $last_read_qpart, $marked,
-                                                       $published, $score, $last_marked, $last_published]);
+                                                       $published, $score]);
 
                                                $sth = $pdo->prepare("SELECT int_id FROM ttrss_user_entries WHERE
                                                                ref_id = ? AND owner_uid = ? AND
@@ -964,29 +979,39 @@ class RSSUtils {
                                        _debug("resulting RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
 
                                        if (DB_TYPE == "pgsql") {
-                                               $tsvector_combined = mb_substr($entry_title . ' ' . strip_tags(str_replace('<', ' <', $entry_content)),
+                                               $tsvector_combined = mb_substr($entry_title . ' ' .
+                                                       preg_replace('/[<\?\:]/', ' ', strip_tags($entry_content)),
                                                        0, 1000000);
 
-                                               $tsvector_qpart = "tsvector_combined = to_tsvector('$feed_language', ".$pdo->quote($tsvector_combined)."),";
+                                               $tsvector_qpart = "tsvector_combined = to_tsvector(".$pdo->quote($feed_language).", ".$pdo->quote($tsvector_combined)."),";
 
                                        } else {
                                                $tsvector_qpart = "";
                                        }
 
+                                       //_debug($tsvector_qpart);
+
                                        $sth = $pdo->prepare("UPDATE ttrss_entries
-                                               SET title = ?,
-                                                       content = ?,
-                                                       content_hash = ?,
-                                                       updated = ?,
+                                               SET title = :title,
                                                        $tsvector_qpart
-                                                       num_comments = ?,
-                                                       plugin_data = ?,
-                                                       author = ?,
-                                                       lang = ?
-                                               WHERE id = ?");
-
-                                       $sth->execute([$entry_title, $entry_content, $entry_current_hash, $entry_timestamp_fmt,
-                                               $num_comments, $entry_plugin_data, $entry_author, $entry_language, $ref_id]);
+                                                       content = :content,
+                                                       content_hash = :content_hash,
+                                                       updated = :updated,
+                                                       num_comments = :num_comments,
+                                                       plugin_data = :plugin_data,
+                                                       author = :author,
+                                                       lang = :lang                                                                                                            
+                                               WHERE id = :id");
+
+                                       $sth->execute([":title" => $entry_title,
+                                               ":content" => "$entry_content",
+                                               ":content_hash" => $entry_current_hash,
+                                               ":updated" => $entry_timestamp_fmt,
+                                               ":num_comments" => (int)$num_comments,
+                                               ":plugin_data" => $entry_plugin_data,
+                                               ":author" => "$entry_author",
+                                               ":lang" => $entry_language,
+                                               ":id" => $ref_id]);
 
                                        // update aux data
                                        $sth = $pdo->prepare("UPDATE ttrss_user_entries
@@ -1026,6 +1051,16 @@ class RSSUtils {
                                                $e_item = array(
                                                        rewrite_relative_url($site_url, $e->link),
                                                        $e->type, $e->length, $e->title, $e->width, $e->height);
+
+                                               // Yet another episode of "mysql utf8_general_ci is gimped"
+                                               if (DB_TYPE == "mysql") {
+                                                       for ($i = 0; $i < count($e_item); $i++) {
+                                                               if (is_string($e_item[$i])) {
+                                                                       $e_item[$i] = RSSUtils::strip_utf8mb4($e_item[$i]);
+                                                               }
+                                                       }
+                                               }
+
                                                array_push($enclosures, $e_item);
                                        }
                                }
@@ -1048,7 +1083,7 @@ class RSSUtils {
                                foreach ($enclosures as $enc) {
                                        $enc_url = $enc[0];
                                        $enc_type = $enc[1];
-                                       $enc_dur = $enc[2];
+                                       $enc_dur = (int)$enc[2];
                                        $enc_title = $enc[3];
                                        $enc_width = intval($enc[4]);
                                        $enc_height = intval($enc[5]);
@@ -1138,6 +1173,8 @@ class RSSUtils {
                                }
 
                                _debug("article processed", $debug_enabled);
+
+                               $pdo->commit();
                        }
 
                        _debug("purging feed...", $debug_enabled);
@@ -1267,7 +1304,7 @@ class RSSUtils {
        }
 
        static function expire_cached_files($debug) {
-               foreach (array("simplepie", "images", "export", "upload") as $dir) {
+               foreach (array("simplepie", "feeds", "images", "export", "upload") as $dir) {
                        $cache_dir = CACHE_DIR . "/$dir";
 
 //                     if ($debug) _debug("Expiring $cache_dir");