From: Andrew Dolgov Date: Tue, 7 Jul 2015 07:15:08 +0000 (+0300) Subject: af_readability: add a workaround for meta charset html pages X-Git-Tag: 16.3~244 X-Git-Url: https://git.wh0rd.org/?a=commitdiff_plain;h=b7d1306b197bc7ae60df706f81d1f5665ee04bed;p=tt-rss.git af_readability: add a workaround for meta charset html pages --- diff --git a/plugins/af_readability/init.php b/plugins/af_readability/init.php index 6cdd28fa..15b88d32 100644 --- a/plugins/af_readability/init.php +++ b/plugins/af_readability/init.php @@ -101,6 +101,19 @@ class Af_Readability extends Plugin { $tmp = fetch_file_contents($article["link"]); if ($tmp) { + $tmpdoc = new DOMDocument("1.0", "UTF-8"); + $tmpdoc->loadHTML($tmp); + + if ($tmpdoc->encoding != 'UTF-8') { + $tmpxpath = new DOMXPath($tmpdoc); + + foreach ($tmpxpath->query("//meta") as $elem) { + $elem->parentNode->removeChild($elem); + } + + $tmp = $tmpdoc->saveHTML(); + } + $r = new Readability($tmp, $article["link"]); if ($r->init()) {