]> git.wh0rd.org - tt-rss.git/commitdiff
parser: add charset recoding hack for systems where libxml is build without support...
authorAndrew Dolgov <fox@madoka.volgo-balt.ru>
Thu, 11 Jul 2013 11:40:09 +0000 (15:40 +0400)
committerAndrew Dolgov <fox@madoka.volgo-balt.ru>
Thu, 11 Jul 2013 11:40:09 +0000 (15:40 +0400)
classes/feedparser.php

index eb8606de912339a5c8ad8d3f9cc278c830b06373..53f6c52a95c51cf348a8f3efaf94b2a505247229 100644 (file)
@@ -20,6 +20,24 @@ class FeedParser {
 
                $error = libxml_get_last_error();
 
+               // libxml compiled without iconv?
+               if ($error && $error->code == 32) {
+                       if (preg_match('/^(<\\?xml .*?)encoding="(.+?)"(.*?\\?>)/', $data, $matches) === 1) {
+                               libxml_clear_errors();
+
+                               $enc = $matches[2];
+
+                $data = iconv($enc, 'UTF-8//IGNORE', $data);
+                $data = preg_replace('/^<\\?xml .*?\\?>/', $matches[1] . $matches[3] , $data);
+
+                               $this->doc = new DOMDocument();
+                               $this->doc->loadXML($data);
+
+                               $error = libxml_get_last_error();
+                  }
+               }
+
+               // some terrible invalid unicode entity?
                if ($error && $error->code == 9) {
                        libxml_clear_errors();