X-Git-Url: https://git.wh0rd.org/?a=blobdiff_plain;f=classes%2Frssutils.php;h=e76dd3e78b4e94c3f14be79bc5d1d30ec59a20cd;hb=2d54eb1a87ca5832b3eb1118f0dcf89005fd94ac;hp=e659f096beed35364d8ddbc5e9a2134a3c079aea;hpb=26ad257de5aba4a6aa7b2cbf793518aa3a987555;p=tt-rss.git diff --git a/classes/rssutils.php b/classes/rssutils.php old mode 100644 new mode 100755 index e659f096..e76dd3e7 --- a/classes/rssutils.php +++ b/classes/rssutils.php @@ -16,6 +16,11 @@ class RSSUtils { return sha1(implode(",", $pluginhost->get_plugin_names()) . $tmp); } + // Strips utf8mb4 characters (i.e. emoji) for mysql + static function strip_utf8mb4($str) { + return preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $str); + } + static function update_feedbrowser_cache() { $pdo = Db::pdo(); @@ -45,8 +50,8 @@ class RSSUtils { if (!$tmph->fetch()) { $tmph = $pdo->prepare("INSERT INTO ttrss_feedbrowser_cache - (feed_url, site_url, title, subscribers) - VALUES + (feed_url, site_url, title, subscribers) + VALUES (?, ?, ?, ?)"); $tmph->execute([$feed_url, $site_url, $title, $subscribers]); @@ -182,7 +187,19 @@ class RSSUtils { array_push($batch_owners, $tline["owner_uid"]); $fstarted = microtime(true); - RSSUtils::update_rss_feed($tline["id"], true, false); + + try { + RSSUtils::update_rss_feed($tline["id"], true, false); + } catch (PDOException $e) { + Logger::get()->log_error(E_USER_NOTICE, $e->getMessage(), $e->getFile(), $e->getLine(), $e->getTraceAsString()); + + try { + $pdo->rollback(); + } catch (PDOException $e) { + // it doesn't matter if there wasn't actually anything to rollback, PDO Exception can be + // thrown outside of an active transaction during feed update + } + } _debug_suppress(false); _debug(sprintf(" %.4f (sec)", microtime(true) - $fstarted)); @@ -213,24 +230,15 @@ class RSSUtils { $pdo = Db::pdo(); - $sth = $pdo->prepare("SELECT owner_uid,feed_url,auth_pass,auth_login,auth_pass_encrypted + $sth = $pdo->prepare("SELECT owner_uid,feed_url,auth_pass,auth_login FROM ttrss_feeds WHERE id = ?"); $sth->execute([$feed]); if ($row = $sth->fetch()) { $owner_uid = $row["owner_uid"]; - - $auth_pass_encrypted = $row["auth_pass_encrypted"]; - $auth_login = $row["auth_login"]; $auth_pass = $row["auth_pass"]; - - if ($auth_pass_encrypted && function_exists("mcrypt_decrypt")) { - require_once "crypt.php"; - $auth_pass = decrypt_string($auth_pass); - } - $fetch_url = $row["feed_url"]; $pluginhost = new PluginHost(); @@ -332,17 +340,16 @@ class RSSUtils { $sth = $pdo->prepare("SELECT id,update_interval,auth_login, feed_url,auth_pass,cache_images, mark_unread_on_update, owner_uid, - auth_pass_encrypted, feed_language, - last_modified, - ".SUBSTRING_FOR_DATE."(last_unconditional, 1, 19) AS last_unconditional + auth_pass_encrypted, feed_language, + last_modified, + ".SUBSTRING_FOR_DATE."(last_unconditional, 1, 19) AS last_unconditional FROM ttrss_feeds WHERE id = ?"); $sth->execute([$feed]); - + if ($row = $sth->fetch()) { $owner_uid = $row["owner_uid"]; $mark_unread_on_update = $row["mark_unread_on_update"]; - $auth_pass_encrypted = $row["auth_pass_encrypted"]; $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW() WHERE id = ?"); @@ -350,16 +357,11 @@ class RSSUtils { $auth_login = $row["auth_login"]; $auth_pass = $row["auth_pass"]; - - if ($auth_pass_encrypted && function_exists("mcrypt_decrypt")) { - require_once "crypt.php"; - $auth_pass = decrypt_string($auth_pass); - } - $stored_last_modified = $row["last_modified"]; $last_unconditional = $row["last_unconditional"]; $cache_images = $row["cache_images"]; $fetch_url = $row["feed_url"]; + $feed_language = mb_strtolower($row["feed_language"]); if (!$feed_language) $feed_language = 'english'; @@ -417,15 +419,15 @@ class RSSUtils { _debug("not using CURL due to open_basedir restrictions"); } - if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) { - _debug("maximum allowed interval for conditional requests exceeded, forcing refetch"); + if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) { + _debug("maximum allowed interval for conditional requests exceeded, forcing refetch"); - $force_refetch = true; - } else { - _debug("stored last modified for conditional request: $stored_last_modified", $debug_enabled); - } + $force_refetch = true; + } else { + _debug("stored last modified for conditional request: $stored_last_modified", $debug_enabled); + } - _debug("fetching [$fetch_url] (force_refetch: $force_refetch)...", $debug_enabled); + _debug("fetching [$fetch_url] (force_refetch: $force_refetch)...", $debug_enabled); $feed_data = fetch_file_contents([ "url" => $fetch_url, @@ -567,8 +569,8 @@ class RSSUtils { $filters = load_filters($feed, $owner_uid); if ($debug_enabled) { - print_r($filters); - } + print_r($filters); + } _debug("" . count($filters) . " filters loaded.", $debug_enabled); @@ -632,8 +634,11 @@ class RSSUtils { $entry_link = rewrite_relative_url($site_url, $item->get_link()); + $entry_language = mb_substr(trim($item->get_language()), 0, 2); + _debug("title $entry_title", $debug_enabled); _debug("link $entry_link", $debug_enabled); + _debug("language $entry_language", $debug_enabled); if (!$entry_title) $entry_title = date("Y-m-d H:i:s", $entry_timestamp);; @@ -670,9 +675,13 @@ class RSSUtils { $entry_tags = array_unique($additional_tags); - for ($i = 0; $i < count($entry_tags); $i++) + for ($i = 0; $i < count($entry_tags); $i++) { $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8'); + // we don't support numeric tags, let's prefix them + if (is_numeric($entry_tags[$i])) $entry_tags[$i] = 't:' . $entry_tags[$i]; + } + _debug("tags found: " . join(",", $entry_tags), $debug_enabled); _debug("done collecting data.", $debug_enabled); @@ -685,7 +694,6 @@ class RSSUtils { $base_entry_id = $row["id"]; $entry_stored_hash = $row["content_hash"]; $article_labels = Article::get_article_labels($base_entry_id, $owner_uid); - $entry_language = $row["lang"]; $existing_tags = Article::get_article_tags($base_entry_id, $owner_uid); $entry_tags = array_unique(array_merge($entry_tags, $existing_tags)); @@ -693,7 +701,6 @@ class RSSUtils { $base_entry_id = false; $entry_stored_hash = ""; $article_labels = array(); - $entry_language = ""; } $article = array("owner_uid" => $owner_uid, // read only @@ -758,12 +765,11 @@ class RSSUtils { _debug("plugin data: $entry_plugin_data", $debug_enabled); // Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077 - if (DB_TYPE == "mysql") { + if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") { foreach ($article as $k => $v) { - // i guess we'll have to take the risk of 4byte unicode labels & tags here if (is_string($article[$k])) { - $article[$k] = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $v); + $article[$k] = RSSUtils::strip_utf8mb4($v); } } } @@ -853,7 +859,7 @@ class RSSUtils { $usth = $pdo->prepare( "INSERT INTO ttrss_entries - (title, + (title, guid, link, updated, @@ -922,7 +928,7 @@ class RSSUtils { _debug("user record FOUND: RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled); } else { - + _debug("user record not found, creating...", $debug_enabled); if ($score >= -500 && !RSSUtils::find_article_filter($article_filters, 'catchup') && !$entry_force_catchup) { @@ -970,18 +976,10 @@ class RSSUtils { _debug("resulting RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled); - if (DB_TYPE == "pgsql") { - $tsvector_combined = mb_substr($entry_title . ' ' . - preg_replace('/[<\?\:]/', ' ', strip_tags($entry_content)), - 0, 1000000); - - $tsvector_qpart = "tsvector_combined = to_tsvector(".$pdo->quote($feed_language).", ".$pdo->quote($tsvector_combined)."),"; - - } else { + if (DB_TYPE == "pgsql") + $tsvector_qpart = "tsvector_combined = to_tsvector(:ts_lang, :ts_content),"; + else $tsvector_qpart = ""; - } - - //_debug($tsvector_qpart); $sth = $pdo->prepare("UPDATE ttrss_entries SET title = :title, @@ -989,13 +987,14 @@ class RSSUtils { content = :content, content_hash = :content_hash, updated = :updated, + date_updated = NOW(), num_comments = :num_comments, plugin_data = :plugin_data, author = :author, - lang = :lang + lang = :lang WHERE id = :id"); - $sth->execute([":title" => $entry_title, + $params = [":title" => $entry_title, ":content" => "$entry_content", ":content_hash" => $entry_current_hash, ":updated" => $entry_timestamp_fmt, @@ -1003,7 +1002,14 @@ class RSSUtils { ":plugin_data" => $entry_plugin_data, ":author" => "$entry_author", ":lang" => $entry_language, - ":id" => $ref_id]); + ":id" => $ref_id]; + + if (DB_TYPE == "pgsql") { + $params[":ts_lang"] = $feed_language; + $params[":ts_content"] = mb_substr(strip_tags($entry_title . " " . $entry_content), 0, 900000); + } + + $sth->execute($params); // update aux data $sth = $pdo->prepare("UPDATE ttrss_user_entries @@ -1043,6 +1049,16 @@ class RSSUtils { $e_item = array( rewrite_relative_url($site_url, $e->link), $e->type, $e->length, $e->title, $e->width, $e->height); + + // Yet another episode of "mysql utf8_general_ci is gimped" + if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") { + for ($i = 0; $i < count($e_item); $i++) { + if (is_string($e_item[$i])) { + $e_item[$i] = RSSUtils::strip_utf8mb4($e_item[$i]); + } + } + } + array_push($enclosures, $e_item); } } @@ -1056,7 +1072,7 @@ class RSSUtils { } $esth = $pdo->prepare("SELECT id FROM ttrss_enclosures - WHERE content_url = ? AND post_id = ?"); + WHERE content_url = ? AND content_type = ? AND post_id = ?"); $usth = $pdo->prepare("INSERT INTO ttrss_enclosures (content_url, content_type, title, duration, post_id, width, height) VALUES @@ -1070,7 +1086,7 @@ class RSSUtils { $enc_width = intval($enc[4]); $enc_height = intval($enc[5]); - $esth->execute([$enc_url, $entry_ref_id]); + $esth->execute([$enc_url, $enc_type, $entry_ref_id]); if (!$esth->fetch()) { $usth->execute([$enc_url, $enc_type, (string)$enc_title, $enc_dur, $entry_ref_id, $enc_width, $enc_height]); @@ -1209,7 +1225,7 @@ class RSSUtils { if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) { file_put_contents($local_filename, $file_content); } - } else { + } else if (is_writable($local_filename)) { touch($local_filename); } } @@ -1235,15 +1251,17 @@ class RSSUtils { $local_filename = CACHE_DIR . "/images/" . sha1($src); - if ($debug) _debug("cache_media: downloading: $src to $local_filename"); + if ($debug) _debug("cache_media: checking $src"); if (!file_exists($local_filename)) { + if ($debug) _debug("cache_media: downloading: $src to $local_filename"); + $file_content = fetch_file_contents($src); if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) { file_put_contents($local_filename, $file_content); } - } else { + } else if (is_writable($local_filename)) { touch($local_filename); } } @@ -1286,7 +1304,7 @@ class RSSUtils { } static function expire_cached_files($debug) { - foreach (array("simplepie", "feeds", "images", "export", "upload") as $dir) { + foreach (array("feeds", "images", "export", "upload") as $dir) { $cache_dir = CACHE_DIR . "/$dir"; // if ($debug) _debug("Expiring $cache_dir");