From: Andrew Dolgov Date: Wed, 8 Jul 2015 07:35:19 +0000 (+0300) Subject: ad_readability: also check for content-type if possible X-Git-Tag: 16.3~242 X-Git-Url: https://git.wh0rd.org/?a=commitdiff_plain;h=831129f6a4c63530674c8bc73550fc83998971f3;p=tt-rss.git ad_readability: also check for content-type if possible --- diff --git a/plugins/af_readability/init.php b/plugins/af_readability/init.php index 15b88d32..b58be43d 100644 --- a/plugins/af_readability/init.php +++ b/plugins/af_readability/init.php @@ -98,11 +98,30 @@ class Af_Readability extends Plugin { if (!class_exists("Readability")) require_once(__DIR__ . "/classes/Readability.php"); + if (function_exists("curl_init")) { + $ch = curl_init($article["link"]); + curl_setopt($ch, CURLOPT_TIMEOUT, 5); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); + curl_setopt($ch, CURLOPT_HEADER, true); + curl_setopt($ch, CURLOPT_NOBODY, true); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, + !ini_get("safe_mode") && !ini_get("open_basedir")); + curl_setopt($ch, CURLOPT_USERAGENT, SELF_USER_AGENT); + + @$result = curl_exec($ch); + $content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); + + if (strpos($content_type, "text/html") === FALSE) + return $article; + } + $tmp = fetch_file_contents($article["link"]); if ($tmp) { $tmpdoc = new DOMDocument("1.0", "UTF-8"); - $tmpdoc->loadHTML($tmp); + + if (!$tmpdoc->loadHTML($tmp)) + return $article; if ($tmpdoc->encoding != 'UTF-8') { $tmpxpath = new DOMXPath($tmpdoc);