]> git.wh0rd.org - tt-rss.git/blobdiff - plugins/af_readability/init.php
use catchall exception handler for readability
[tt-rss.git] / plugins / af_readability / init.php
index 97acb4375391a251dd98f549a3f0a5c2fe3cc5df..cccdf8af7da2b484b009eaaaa484f20ed3807ec1 100755 (executable)
@@ -1,6 +1,10 @@
 <?php
+use andreskrey\Readability\Readability;
+use andreskrey\Readability\Configuration;
+
 class Af_Readability extends Plugin {
 
+       /* @var PluginHost $host */
        private $host;
 
        function about() {
@@ -9,14 +13,26 @@ class Af_Readability extends Plugin {
                        "fox");
        }
 
+       function flags() {
+               return array("needs_curl" => true);
+       }
+
        function save() {
-               //
+               $enable_share_anything = checkbox_to_sql_bool($_POST["enable_share_anything"]);
+
+               $this->host->set($this, "enable_share_anything", $enable_share_anything);
+
+               echo __("Data saved.");
        }
 
        function init($host)
        {
                $this->host = $host;
 
+               if (version_compare(PHP_VERSION, '5.6.0', '<')) {
+                       return;
+               }
+
                $host->add_hook($host::HOOK_ARTICLE_FILTER, $this);
                $host->add_hook($host::HOOK_PREFS_TAB, $this);
                $host->add_hook($host::HOOK_PREFS_EDIT_FEED, $this);
@@ -28,16 +44,46 @@ class Af_Readability extends Plugin {
        function hook_prefs_tab($args) {
                if ($args != "prefFeeds") return;
 
-               print "<div dojoType=\"dijit.layout.AccordionPane\" title=\"".__('af_readability settings')."\">";
+               print "<div dojoType=\"dijit.layout.AccordionPane\" title=\"".__('Readability settings (af_readability)')."\">";
+
+               if (version_compare(PHP_VERSION, '5.6.0', '<')) {
+                       print_error("This plugin requires PHP version 5.6.");
+               }
 
                print_notice("Enable the plugin for specific feeds in the feed editor.");
 
+               print "<form dojoType=\"dijit.form.Form\">";
+
+               print "<script type=\"dojo/method\" event=\"onSubmit\" args=\"evt\">
+                       evt.preventDefault();
+                       if (this.validate()) {
+                               console.log(dojo.objectToQuery(this.getValues()));
+                               new Ajax.Request('backend.php', {
+                                       parameters: dojo.objectToQuery(this.getValues()),
+                                       onComplete: function(transport) {
+                                               notify_info(transport.responseText);
+                                       }
+                               });
+                               //this.reset();
+                       }
+                       </script>";
+
+               print_hidden("op", "pluginhandler");
+               print_hidden("method", "save");
+               print_hidden("plugin", "af_readability");
+
+               $enable_share_anything = $this->host->get($this, "enable_share_anything");
+
+               print_checkbox("enable_share_anything", $enable_share_anything);
+               print "&nbsp;<label for=\"enable_share_anything\">" . __("Use Readability for pages shared via bookmarklet.") . "</label>";
+
+               print "<p>"; print_button("submit", __("Save"));
+               print "</form>";
+
                $enabled_feeds = $this->host->get($this, "enabled_feeds");
-               if (!array($enabled_feeds)) {
-                       $enabled_feeds = array();
-               } else {
-                       $enabled_feeds = $this->filter_unknown_feeds($enabled_feeds);
-               }
+               if (!is_array($enabled_feeds)) $enabled_feeds = array();
+
+               $enabled_feeds = $this->filter_unknown_feeds($enabled_feeds);
                $this->host->set($this, "enabled_feeds", $enabled_feeds);
 
                if (count($enabled_feeds) > 0) {
@@ -49,7 +95,7 @@ class Af_Readability extends Plugin {
                                        "<img src='images/pub_set.png'
                                                style='vertical-align : middle'> <a href='#'
                                                onclick='editFeed($f)'>".
-                                       getFeedTitle($f) . "</a></li>";
+                                       Feeds::getFeedTitle($f) . "</a></li>";
                        }
                        print "</ul>";
                }
@@ -62,7 +108,7 @@ class Af_Readability extends Plugin {
                print "<div class=\"dlgSecCont\">";
 
                $enabled_feeds = $this->host->get($this, "enabled_feeds");
-               if (!array($enabled_feeds)) $enabled_feeds = array();
+               if (!is_array($enabled_feeds)) $enabled_feeds = array();
 
                $key = array_search($feed_id, $enabled_feeds);
                $checked = $key !== FALSE ? "checked" : "";
@@ -78,7 +124,7 @@ class Af_Readability extends Plugin {
                $enabled_feeds = $this->host->get($this, "enabled_feeds");
                if (!is_array($enabled_feeds)) $enabled_feeds = array();
 
-               $enable = checkbox_to_sql_bool($_POST["af_readability_enabled"]) == 'true';
+               $enable = checkbox_to_sql_bool($_POST["af_readability_enabled"]);
                $key = array_search($feed_id, $enabled_feeds);
 
                if ($enable) {
@@ -94,39 +140,26 @@ class Af_Readability extends Plugin {
                $this->host->set($this, "enabled_feeds", $enabled_feeds);
        }
 
+       /**
+        * @SuppressWarnings(PHPMD.UnusedFormalParameter)
+        */
        function hook_article_filter_action($article, $action) {
                return $this->process_article($article);
        }
 
-       function process_article($article) {
-
-               if (!class_exists("Readability")) require_once(dirname(dirname(__DIR__)). "/lib/readability/Readability.php");
-
-               if (!defined('NO_CURL') && function_exists('curl_init') && !ini_get("open_basedir")) {
+       public function extract_content($url) {
+               global $fetch_effective_url;
 
-                       $ch = curl_init($article["link"]);
+               $tmp = fetch_file_contents([
+                       "url" => $url,
+                       "http_accept" => "text/*",
+                       "type" => "text/html"]);
 
-                       curl_setopt($ch, CURLOPT_TIMEOUT, 5);
-                       curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
-                       curl_setopt($ch, CURLOPT_HEADER, true);
-                       curl_setopt($ch, CURLOPT_NOBODY, true);
-                       curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
-                       curl_setopt($ch, CURLOPT_USERAGENT, SELF_USER_AGENT);
-
-                       @$result = curl_exec($ch);
-                       $content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
-
-                       if (strpos($content_type, "text/html") === FALSE)
-                               return $article;
-               }
-
-               $tmp = fetch_file_contents($article["link"]);
-
-               if ($tmp && mb_strlen($tmp) < 65535 * 4) {
+               if ($tmp && mb_strlen($tmp) < 1024 * 500) {
                        $tmpdoc = new DOMDocument("1.0", "UTF-8");
 
-                       if (!$tmpdoc->loadHTML($tmp))
-                               return $article;
+                       if (!$tmpdoc->loadHTML('<?xml encoding="utf-8" ?>\n' . $tmp))
+                               return false;
 
                        if (strtolower($tmpdoc->encoding) != 'utf-8') {
                                $tmpxpath = new DOMXPath($tmpdoc);
@@ -138,31 +171,49 @@ class Af_Readability extends Plugin {
                                $tmp = $tmpdoc->saveHTML();
                        }
 
-                       $r = new Readability($tmp, $article["link"]);
+                       $r = new Readability(new Configuration());
 
-                       if ($r->init()) {
+                       try {
+                               if ($r->parse($tmp)) {
 
-                               $tmpxpath = new DOMXPath($r->dom);
+                                       $tmpxpath = new DOMXPath($r->getDOMDOcument());
+                                       $entries = $tmpxpath->query('(//a[@href]|//img[@src])');
 
-                               $entries = $tmpxpath->query('(//a[@href]|//img[@src])');
+                                       foreach ($entries as $entry) {
+                                               if ($entry->hasAttribute("href")) {
+                                                       $entry->setAttribute("href",
+                                                                       rewrite_relative_url($fetch_effective_url, $entry->getAttribute("href")));
 
-                               foreach ($entries as $entry) {
-                                       if ($entry->hasAttribute("href")) {
-                                               $entry->setAttribute("href",
-                                                       rewrite_relative_url($article["link"], $entry->getAttribute("href")));
+                                               }
 
-                                       }
-
-                                       if ($entry->hasAttribute("src")) {
-                                               $entry->setAttribute("src",
-                                                       rewrite_relative_url($article["link"], $entry->getAttribute("src")));
+                                               if ($entry->hasAttribute("src")) {
+                                                       $entry->setAttribute("src",
+                                                                       rewrite_relative_url($fetch_effective_url, $entry->getAttribute("src")));
 
+                                               }
                                        }
 
+                                       return $r->getContent();
                                }
 
-                               $article["content"] = $r->articleContent->innerHTML;
+                       } catch (Exception $e) {
+                               return false;
                        }
+
+               }
+
+               return false;
+       }
+
+       function process_article($article) {
+
+               $extracted_content = $this->extract_content($article["link"]);
+
+               # let's see if there's anything of value in there
+               $content_test = trim(strip_tags(sanitize($extracted_content)));
+
+               if ($content_test) {
+                       $article["content"] = $extracted_content;
                }
 
                return $article;
@@ -171,6 +222,8 @@ class Af_Readability extends Plugin {
        function hook_article_filter($article) {
 
                $enabled_feeds = $this->host->get($this, "enabled_feeds");
+               if (!is_array($enabled_feeds)) return $article;
+
                $key = array_search($article["feed"]["id"], $enabled_feeds);
                if ($key === FALSE) return $article;
 
@@ -185,14 +238,13 @@ class Af_Readability extends Plugin {
        private function filter_unknown_feeds($enabled_feeds) {
                $tmp = array();
 
-               if (!empty($enabled_feeds)) {
-                       foreach ($enabled_feeds as $feed) {
+               foreach ($enabled_feeds as $feed) {
 
-                               $result = db_query("SELECT id FROM ttrss_feeds WHERE id = '$feed' AND owner_uid = " . $_SESSION["uid"]);
+                       $sth = $this->pdo->prepare("SELECT id FROM ttrss_feeds WHERE id = ? AND owner_uid = ?");
+                       $sth->execute([$feed, $_SESSION['uid']]);
 
-                               if (db_num_rows($result) != 0) {
-                                       array_push($tmp, $feed);
-                               }
+                       if ($row = $sth->fetch()) {
+                               array_push($tmp, $feed);
                        }
                }
 
@@ -200,4 +252,3 @@ class Af_Readability extends Plugin {
        }
 
 }
-?>