From: Andrew Dolgov Date: Thu, 11 Jul 2013 11:40:09 +0000 (+0400) Subject: parser: add charset recoding hack for systems where libxml is build without support... X-Git-Tag: 1.9~41 X-Git-Url: https://git.wh0rd.org/?a=commitdiff_plain;h=4f00f55ca2ecd2e5a75c2c4ef37ca0e1143a7ac7;p=tt-rss.git parser: add charset recoding hack for systems where libxml is build without support for iconv (handles libxml error 32) --- diff --git a/classes/feedparser.php b/classes/feedparser.php index eb8606de..53f6c52a 100644 --- a/classes/feedparser.php +++ b/classes/feedparser.php @@ -20,6 +20,24 @@ class FeedParser { $error = libxml_get_last_error(); + // libxml compiled without iconv? + if ($error && $error->code == 32) { + if (preg_match('/^(<\\?xml .*?)encoding="(.+?)"(.*?\\?>)/', $data, $matches) === 1) { + libxml_clear_errors(); + + $enc = $matches[2]; + + $data = iconv($enc, 'UTF-8//IGNORE', $data); + $data = preg_replace('/^<\\?xml .*?\\?>/', $matches[1] . $matches[3] , $data); + + $this->doc = new DOMDocument(); + $this->doc->loadXML($data); + + $error = libxml_get_last_error(); + } + } + + // some terrible invalid unicode entity? if ($error && $error->code == 9) { libxml_clear_errors();