<?php
+use andreskrey\Readability\Readability;
+use andreskrey\Readability\Configuration;
+
class Af_Readability extends Plugin {
/* @var PluginHost $host */
{
$this->host = $host;
+ if (version_compare(PHP_VERSION, '5.6.0', '<')) {
+ return;
+ }
+
$host->add_hook($host::HOOK_ARTICLE_FILTER, $this);
$host->add_hook($host::HOOK_PREFS_TAB, $this);
$host->add_hook($host::HOOK_PREFS_EDIT_FEED, $this);
print "<div dojoType=\"dijit.layout.AccordionPane\" title=\"".__('Readability settings (af_readability)')."\">";
+ if (version_compare(PHP_VERSION, '5.6.0', '<')) {
+ print_error("This plugin requires PHP version 5.6.");
+ }
+
print_notice("Enable the plugin for specific feeds in the feed editor.");
print "<form dojoType=\"dijit.form.Form\">";
public function extract_content($url) {
global $fetch_effective_url;
- if (!class_exists("Readability")) require_once(dirname(dirname(__DIR__)). "/lib/readability/Readability.php");
-
$tmp = fetch_file_contents([
"url" => $url,
"http_accept" => "text/*",
$tmp = $tmpdoc->saveHTML();
}
- $r = new Readability($tmp, $fetch_effective_url);
+ $r = new Readability(new Configuration());
- if ($r->init()) {
- $tmpxpath = new DOMXPath($r->dom);
+ try {
+ if ($r->parse($tmp)) {
- $entries = $tmpxpath->query('(//a[@href]|//img[@src])');
+ $tmpxpath = new DOMXPath($r->getDOMDOcument());
+ $entries = $tmpxpath->query('(//a[@href]|//img[@src])');
- foreach ($entries as $entry) {
- if ($entry->hasAttribute("href")) {
- $entry->setAttribute("href",
- rewrite_relative_url($fetch_effective_url, $entry->getAttribute("href")));
+ foreach ($entries as $entry) {
+ if ($entry->hasAttribute("href")) {
+ $entry->setAttribute("href",
+ rewrite_relative_url($fetch_effective_url, $entry->getAttribute("href")));
- }
+ }
- if ($entry->hasAttribute("src")) {
- $entry->setAttribute("src",
- rewrite_relative_url($fetch_effective_url, $entry->getAttribute("src")));
+ if ($entry->hasAttribute("src")) {
+ $entry->setAttribute("src",
+ rewrite_relative_url($fetch_effective_url, $entry->getAttribute("src")));
+ }
}
+ return $r->getContent();
}
- return $r->articleContent->innerHTML;
+ } catch (Exception $e) {
+ return false;
}
+
}
return false;
$extracted_content = $this->extract_content($article["link"]);
- if ($extracted_content) {
+ # let's see if there's anything of value in there
+ $content_test = trim(strip_tags(sanitize($extracted_content)));
+
+ if ($content_test) {
$article["content"] = $extracted_content;
}