]> git.wh0rd.org Git - tt-rss.git/commitdiff
ad_readability: also check for content-type if possible
authorAndrew Dolgov <noreply@fakecake.org>
Wed, 8 Jul 2015 07:35:19 +0000 (10:35 +0300)
committerAndrew Dolgov <noreply@fakecake.org>
Wed, 8 Jul 2015 07:35:19 +0000 (10:35 +0300)
plugins/af_readability/init.php

index 15b88d32c7a51c0cfbf696e277098358b6822c00..b58be43d2f44df234f0eefc59d1bd197e6516820 100644 (file)
@@ -98,11 +98,30 @@ class Af_Readability extends Plugin {
 
                if (!class_exists("Readability")) require_once(__DIR__ . "/classes/Readability.php");
 
+               if (function_exists("curl_init")) {
+                       $ch = curl_init($article["link"]);
+                       curl_setopt($ch, CURLOPT_TIMEOUT, 5);
+                       curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
+                       curl_setopt($ch, CURLOPT_HEADER, true);
+                       curl_setopt($ch, CURLOPT_NOBODY, true);
+                       curl_setopt($ch, CURLOPT_FOLLOWLOCATION,
+                               !ini_get("safe_mode") && !ini_get("open_basedir"));
+                       curl_setopt($ch, CURLOPT_USERAGENT, SELF_USER_AGENT);
+
+                       @$result = curl_exec($ch);
+                       $content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
+
+                       if (strpos($content_type, "text/html") === FALSE)
+                               return $article;
+               }
+
                $tmp = fetch_file_contents($article["link"]);
 
                if ($tmp) {
                        $tmpdoc = new DOMDocument("1.0", "UTF-8");
-                       $tmpdoc->loadHTML($tmp);
+
+                       if (!$tmpdoc->loadHTML($tmp))
+                               return $article;
 
                        if ($tmpdoc->encoding != 'UTF-8') {
                                $tmpxpath = new DOMXPath($tmpdoc);