]> git.wh0rd.org Git - tt-rss.git/commitdiff
strip utf8mb4 characters in enclosures on mysql
authorAndrew Dolgov <noreply@fakecake.org>
Mon, 29 Jan 2018 20:24:11 +0000 (23:24 +0300)
committerAndrew Dolgov <noreply@fakecake.org>
Mon, 29 Jan 2018 20:24:11 +0000 (23:24 +0300)
classes/rssutils.php [changed mode: 0644->0755]

old mode 100644 (file)
new mode 100755 (executable)
index e659f09..52ac680
@@ -16,6 +16,11 @@ class RSSUtils {
                return sha1(implode(",", $pluginhost->get_plugin_names()) . $tmp);
        }
 
+       // Strips utf8mb4 characters (i.e. emoji) for mysql
+       static function strip_utf8mb4($str) {
+        return preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $str);
+    }
+
        static function update_feedbrowser_cache() {
 
                $pdo = Db::pdo();
@@ -760,10 +765,9 @@ class RSSUtils {
                                // Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077
                                if (DB_TYPE == "mysql") {
                                        foreach ($article as $k => $v) {
-
                                                // i guess we'll have to take the risk of 4byte unicode labels & tags here
                                                if (is_string($article[$k])) {
-                                                       $article[$k] = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $v);
+                                                       $article[$k] = RSSUtils::strip_utf8mb4($v);
                                                }
                                        }
                                }
@@ -1043,7 +1047,17 @@ class RSSUtils {
                                                $e_item = array(
                                                        rewrite_relative_url($site_url, $e->link),
                                                        $e->type, $e->length, $e->title, $e->width, $e->height);
-                                               array_push($enclosures, $e_item);
+
+                                               // Yet another episode of "mysql utf8_general_ci is gimped"
+                                               if (DB_TYPE == "mysql") {
+                            for ($i = 0; $i < count($e_item); $i++) {
+                                if (is_string($e_item[$i])) {
+                                    $e_item[$i] = RSSUtils::strip_utf8mb4($e_item[$i]);
+                                }
+                            }
+                                               }
+
+                        array_push($enclosures, $e_item);
                                        }
                                }