use catchall exception handler for readability

[tt-rss.git] / plugins / af_readability / init.php
diff --git a/plugins/af_readability/init.php b/plugins/af_readability/init.php

index 2d08a621c6857f3630e58352bb7302d793fe03d2..cccdf8af7da2b484b009eaaaa484f20ed3807ec1 100755 (executable)
--- a/plugins/af_readability/init.php
+++ b/plugins/af_readability/init.php
@@ -1,4 +1,7 @@
  <?php
+use andreskrey\Readability\Readability;
+use andreskrey\Readability\Configuration;
+
  class Af_Readability extends Plugin {
  
         /* @var PluginHost $host */
@@ -26,6 +29,10 @@ class Af_Readability extends Plugin {
         {
                 $this->host = $host;
  
+               if (version_compare(PHP_VERSION, '5.6.0', '<')) {
+                       return;
+               }
+
                 $host->add_hook($host::HOOK_ARTICLE_FILTER, $this);
                 $host->add_hook($host::HOOK_PREFS_TAB, $this);
                 $host->add_hook($host::HOOK_PREFS_EDIT_FEED, $this);
@@ -39,6 +46,10 @@ class Af_Readability extends Plugin {
  
                 print "<div dojoType=\"dijit.layout.AccordionPane\" title=\"".__('Readability settings (af_readability)')."\">";
  
+               if (version_compare(PHP_VERSION, '5.6.0', '<')) {
+                       print_error("This plugin requires PHP version 5.6.");
+               }
+
                 print_notice("Enable the plugin for specific feeds in the feed editor.");
  
                 print "<form dojoType=\"dijit.form.Form\">";
@@ -139,8 +150,6 @@ class Af_Readability extends Plugin {
         public function extract_content($url) {
                 global $fetch_effective_url;
  
-               if (!class_exists("Readability")) require_once(dirname(dirname(__DIR__)). "/lib/readability/Readability.php");
-
                 $tmp = fetch_file_contents([
                         "url" => $url,
                         "http_accept" => "text/*",
@@ -162,30 +171,35 @@ class Af_Readability extends Plugin {
                                 $tmp = $tmpdoc->saveHTML();
                         }
  
-                       $r = new Readability($tmp, $fetch_effective_url);
+                       $r = new Readability(new Configuration());
  
-                       if ($r->init()) {
-                               $tmpxpath = new DOMXPath($r->dom);
+                       try {
+                               if ($r->parse($tmp)) {
  
-                               $entries = $tmpxpath->query('(//a[@href]|//img[@src])');
+                                       $tmpxpath = new DOMXPath($r->getDOMDOcument());
+                                       $entries = $tmpxpath->query('(//a[@href]|//img[@src])');
  
-                               foreach ($entries as $entry) {
-                                       if ($entry->hasAttribute("href")) {
-                                               $entry->setAttribute("href",
-                                                               rewrite_relative_url($fetch_effective_url, $entry->getAttribute("href")));
+                                       foreach ($entries as $entry) {
+                                               if ($entry->hasAttribute("href")) {
+                                                       $entry->setAttribute("href",
+                                                                       rewrite_relative_url($fetch_effective_url, $entry->getAttribute("href")));
  
-                                       }
+                                               }
  
-                                       if ($entry->hasAttribute("src")) {
-                                               $entry->setAttribute("src",
-                                                               rewrite_relative_url($fetch_effective_url, $entry->getAttribute("src")));
+                                               if ($entry->hasAttribute("src")) {
+                                                       $entry->setAttribute("src",
+                                                                       rewrite_relative_url($fetch_effective_url, $entry->getAttribute("src")));
  
+                                               }
                                         }
  
+                                       return $r->getContent();
                                 }
  
-                               return $r->articleContent->innerHTML;
+                       } catch (Exception $e) {
+                               return false;
                         }
+
                 }
  
                 return false;
@@ -195,7 +209,10 @@ class Af_Readability extends Plugin {
  
                 $extracted_content = $this->extract_content($article["link"]);
  
-               if ($extracted_content) {
+               # let's see if there's anything of value in there
+               $content_test = trim(strip_tags(sanitize($extracted_content)));
+
+               if ($content_test) {
                         $article["content"] = $extracted_content;
                 }