]> git.wh0rd.org - tt-rss.git/commitdiff
experimental: decode numerical utf entities on import in entry title
authorAndrew Dolgov <fox@madoka.volgo-balt.ru>
Mon, 29 Apr 2013 12:59:36 +0000 (16:59 +0400)
committerAndrew Dolgov <fox@madoka.volgo-balt.ru>
Mon, 29 Apr 2013 12:59:36 +0000 (16:59 +0400)
include/rssfuncs.php

index 0ecab6a257c8419b354c73be1c2c1538ee8fcb9a..a5d3898ce12dc605d56ee8bf8d9cd61d9bb038c6 100644 (file)
                                _debug("date $entry_timestamp [$entry_timestamp_fmt]", $debug_enabled);
 
                                $entry_title = html_entity_decode($item->get_title(), ENT_COMPAT, 'UTF-8');
+                               $entry_title = decode_numeric_entities($entry_title);
 
                                $entry_link = rewrite_relative_url($site_url, $item->get_link());
 
 
                _debug("Cleaned $rc cached tags.");
        }
+
+       function utf8_entity_decode($entity){
+               $convmap = array(0x0, 0x10000, 0, 0xfffff);
+               return mb_decode_numericentity($entity, $convmap, 'UTF-8');
+       }
+
+       function decode_numeric_entities($body) {
+               $body = preg_replace('/&#\d{2,5};/ue', "utf8_entity_decode('$0')", $body );
+               $body = preg_replace('/&#x([a-fA-F0-7]{2,8});/ue', "utf8_entity_decode('&#'.hexdec('$1').';')", $body );
+               return $body;
+       }
 ?>