From b7d1306b197bc7ae60df706f81d1f5665ee04bed Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 7 Jul 2015 10:15:08 +0300 Subject: [PATCH] af_readability: add a workaround for meta charset html pages --- plugins/af_readability/init.php | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/plugins/af_readability/init.php b/plugins/af_readability/init.php index 6cdd28fa..15b88d32 100644 --- a/plugins/af_readability/init.php +++ b/plugins/af_readability/init.php @@ -101,6 +101,19 @@ class Af_Readability extends Plugin { $tmp = fetch_file_contents($article["link"]); if ($tmp) { + $tmpdoc = new DOMDocument("1.0", "UTF-8"); + $tmpdoc->loadHTML($tmp); + + if ($tmpdoc->encoding != 'UTF-8') { + $tmpxpath = new DOMXPath($tmpdoc); + + foreach ($tmpxpath->query("//meta") as $elem) { + $elem->parentNode->removeChild($elem); + } + + $tmp = $tmpdoc->saveHTML(); + } + $r = new Readability($tmp, $article["link"]); if ($r->init()) { -- 2.39.5