]> git.wh0rd.org - tt-rss.git/blob - plugins/af_readability/init.php
af_readability: check if sanitized content has anything before replacing
[tt-rss.git] / plugins / af_readability / init.php
1 <?php
2 use andreskrey\Readability\Readability;
3 use andreskrey\Readability\Configuration;
4
5 class Af_Readability extends Plugin {
6
7 /* @var PluginHost $host */
8 private $host;
9
10 function about() {
11 return array(1.0,
12 "Try to inline article content using Readability",
13 "fox");
14 }
15
16 function flags() {
17 return array("needs_curl" => true);
18 }
19
20 function save() {
21 $enable_share_anything = checkbox_to_sql_bool($_POST["enable_share_anything"]);
22
23 $this->host->set($this, "enable_share_anything", $enable_share_anything);
24
25 echo __("Data saved.");
26 }
27
28 function init($host)
29 {
30 $this->host = $host;
31
32 if (version_compare(PHP_VERSION, '5.6.0', '<')) {
33 return;
34 }
35
36 $host->add_hook($host::HOOK_ARTICLE_FILTER, $this);
37 $host->add_hook($host::HOOK_PREFS_TAB, $this);
38 $host->add_hook($host::HOOK_PREFS_EDIT_FEED, $this);
39 $host->add_hook($host::HOOK_PREFS_SAVE_FEED, $this);
40
41 $host->add_filter_action($this, "action_inline", __("Inline content"));
42 }
43
44 function hook_prefs_tab($args) {
45 if ($args != "prefFeeds") return;
46
47 print "<div dojoType=\"dijit.layout.AccordionPane\" title=\"".__('Readability settings (af_readability)')."\">";
48
49 if (version_compare(PHP_VERSION, '5.6.0', '<')) {
50 print_error("This plugin requires PHP version 5.6.");
51 }
52
53 print_notice("Enable the plugin for specific feeds in the feed editor.");
54
55 print "<form dojoType=\"dijit.form.Form\">";
56
57 print "<script type=\"dojo/method\" event=\"onSubmit\" args=\"evt\">
58 evt.preventDefault();
59 if (this.validate()) {
60 console.log(dojo.objectToQuery(this.getValues()));
61 new Ajax.Request('backend.php', {
62 parameters: dojo.objectToQuery(this.getValues()),
63 onComplete: function(transport) {
64 notify_info(transport.responseText);
65 }
66 });
67 //this.reset();
68 }
69 </script>";
70
71 print_hidden("op", "pluginhandler");
72 print_hidden("method", "save");
73 print_hidden("plugin", "af_readability");
74
75 $enable_share_anything = $this->host->get($this, "enable_share_anything");
76
77 print_checkbox("enable_share_anything", $enable_share_anything);
78 print "&nbsp;<label for=\"enable_share_anything\">" . __("Use Readability for pages shared via bookmarklet.") . "</label>";
79
80 print "<p>"; print_button("submit", __("Save"));
81 print "</form>";
82
83 $enabled_feeds = $this->host->get($this, "enabled_feeds");
84 if (!is_array($enabled_feeds)) $enabled_feeds = array();
85
86 $enabled_feeds = $this->filter_unknown_feeds($enabled_feeds);
87 $this->host->set($this, "enabled_feeds", $enabled_feeds);
88
89 if (count($enabled_feeds) > 0) {
90 print "<h3>" . __("Currently enabled for (click to edit):") . "</h3>";
91
92 print "<ul class=\"browseFeedList\" style=\"border-width : 1px\">";
93 foreach ($enabled_feeds as $f) {
94 print "<li>" .
95 "<img src='images/pub_set.png'
96 style='vertical-align : middle'> <a href='#'
97 onclick='editFeed($f)'>".
98 Feeds::getFeedTitle($f) . "</a></li>";
99 }
100 print "</ul>";
101 }
102
103 print "</div>";
104 }
105
106 function hook_prefs_edit_feed($feed_id) {
107 print "<div class=\"dlgSec\">".__("Readability")."</div>";
108 print "<div class=\"dlgSecCont\">";
109
110 $enabled_feeds = $this->host->get($this, "enabled_feeds");
111 if (!is_array($enabled_feeds)) $enabled_feeds = array();
112
113 $key = array_search($feed_id, $enabled_feeds);
114 $checked = $key !== FALSE ? "checked" : "";
115
116 print "<hr/><input dojoType=\"dijit.form.CheckBox\" type=\"checkbox\" id=\"af_readability_enabled\"
117 name=\"af_readability_enabled\"
118 $checked>&nbsp;<label for=\"af_readability_enabled\">".__('Inline article content')."</label>";
119
120 print "</div>";
121 }
122
123 function hook_prefs_save_feed($feed_id) {
124 $enabled_feeds = $this->host->get($this, "enabled_feeds");
125 if (!is_array($enabled_feeds)) $enabled_feeds = array();
126
127 $enable = checkbox_to_sql_bool($_POST["af_readability_enabled"]);
128 $key = array_search($feed_id, $enabled_feeds);
129
130 if ($enable) {
131 if ($key === FALSE) {
132 array_push($enabled_feeds, $feed_id);
133 }
134 } else {
135 if ($key !== FALSE) {
136 unset($enabled_feeds[$key]);
137 }
138 }
139
140 $this->host->set($this, "enabled_feeds", $enabled_feeds);
141 }
142
143 /**
144 * @SuppressWarnings(PHPMD.UnusedFormalParameter)
145 */
146 function hook_article_filter_action($article, $action) {
147 return $this->process_article($article);
148 }
149
150 public function extract_content($url) {
151 global $fetch_effective_url;
152
153 $tmp = fetch_file_contents([
154 "url" => $url,
155 "http_accept" => "text/*",
156 "type" => "text/html"]);
157
158 if ($tmp && mb_strlen($tmp) < 1024 * 500) {
159 $tmpdoc = new DOMDocument("1.0", "UTF-8");
160
161 if (!$tmpdoc->loadHTML('<?xml encoding="utf-8" ?>\n' . $tmp))
162 return false;
163
164 if (strtolower($tmpdoc->encoding) != 'utf-8') {
165 $tmpxpath = new DOMXPath($tmpdoc);
166
167 foreach ($tmpxpath->query("//meta") as $elem) {
168 $elem->parentNode->removeChild($elem);
169 }
170
171 $tmp = $tmpdoc->saveHTML();
172 }
173
174 $r = new Readability(new Configuration());
175
176 try {
177 if ($r->parse($tmp)) {
178
179 $tmpxpath = new DOMXPath($r->getDOMDOcument());
180 $entries = $tmpxpath->query('(//a[@href]|//img[@src])');
181
182 foreach ($entries as $entry) {
183 if ($entry->hasAttribute("href")) {
184 $entry->setAttribute("href",
185 rewrite_relative_url($fetch_effective_url, $entry->getAttribute("href")));
186
187 }
188
189 if ($entry->hasAttribute("src")) {
190 $entry->setAttribute("src",
191 rewrite_relative_url($fetch_effective_url, $entry->getAttribute("src")));
192
193 }
194 }
195
196 return $r->getContent();
197 }
198
199 } catch (ParseException $e) {
200 return false;
201 }
202
203 }
204
205 return false;
206 }
207
208 function process_article($article) {
209
210 $extracted_content = $this->extract_content($article["link"]);
211
212 # let's see if there's anything of value in there
213 $content_test = trim(strip_tags(sanitize($extracted_content)));
214
215 if ($content_test) {
216 $article["content"] = $extracted_content;
217 }
218
219 return $article;
220 }
221
222 function hook_article_filter($article) {
223
224 $enabled_feeds = $this->host->get($this, "enabled_feeds");
225 if (!is_array($enabled_feeds)) return $article;
226
227 $key = array_search($article["feed"]["id"], $enabled_feeds);
228 if ($key === FALSE) return $article;
229
230 return $this->process_article($article);
231
232 }
233
234 function api_version() {
235 return 2;
236 }
237
238 private function filter_unknown_feeds($enabled_feeds) {
239 $tmp = array();
240
241 foreach ($enabled_feeds as $feed) {
242
243 $sth = $this->pdo->prepare("SELECT id FROM ttrss_feeds WHERE id = ? AND owner_uid = ?");
244 $sth->execute([$feed, $_SESSION['uid']]);
245
246 if ($row = $sth->fetch()) {
247 array_push($tmp, $feed);
248 }
249 }
250
251 return $tmp;
252 }
253
254 }