]> git.wh0rd.org - tt-rss.git/blame - classes/rssutils.php
pluginhost: always return an array in get_all()
[tt-rss.git] / classes / rssutils.php
CommitLineData
2c08214a 1<?php
e6c886bf
AD
2class RSSUtils {
3 static function calculate_article_hash($article, $pluginhost) {
af244f92
AD
4 $tmp = "";
5
6 foreach ($article as $k => $v) {
7 if ($k != "feed" && isset($v)) {
24e6ff5d
AD
8 $x = strip_tags(is_array($v) ? implode(",", $v) : $v);
9
10 //_debug("$k:" . sha1($x) . ":" . htmlspecialchars($x), true);
11
12 $tmp .= sha1("$k:" . sha1($x));
af244f92
AD
13 }
14 }
15
eb16bd9f 16 return sha1(implode(",", $pluginhost->get_plugin_names()) . $tmp);
b1840673
AD
17 }
18
e6c886bf 19 static function update_feedbrowser_cache() {
79178062 20
afcb105f
AD
21 $pdo = Db::pdo();
22
23 $sth = $pdo->query("SELECT feed_url, site_url, title, COUNT(id) AS subscribers
45378752
LD
24 FROM ttrss_feeds WHERE feed_url NOT IN (SELECT feed_url FROM ttrss_feeds
25 WHERE private IS true OR auth_login != '' OR auth_pass != '' OR feed_url LIKE '%:%@%/%')
79178062
AD
26 GROUP BY feed_url, site_url, title ORDER BY subscribers DESC LIMIT 1000");
27
afcb105f 28 $pdo->beginTransaction();
79178062 29
afcb105f 30 $pdo->query("DELETE FROM ttrss_feedbrowser_cache");
79178062
AD
31
32 $count = 0;
33
afcb105f
AD
34 while ($line = $sth->fetch()) {
35
0567016b
AD
36 $subscribers = $line["subscribers"];
37 $feed_url = $line["feed_url"];
38 $title = $line["title"];
39 $site_url = $line["site_url"];
79178062 40
afcb105f
AD
41 $tmph = $pdo->prepare("SELECT subscribers FROM
42 ttrss_feedbrowser_cache WHERE feed_url = ?");
43 $tmph->execute([$feed_url]);
44
45 if (!$tmph->fetch()) {
79178062 46
afcb105f
AD
47 $tmph = $pdo->prepare("INSERT INTO ttrss_feedbrowser_cache
48 (feed_url, site_url, title, subscribers)
49 VALUES
50 (?, ?, ?, ?)");
79178062 51
afcb105f 52 $tmph->execute([$feed_url, $site_url, $title, $subscribers]);
79178062
AD
53
54 ++$count;
55
56 }
57
58 }
59
afcb105f 60 $pdo->commit();
79178062
AD
61
62 return $count;
63
64 }
65
e6c886bf 66 static function update_daemon_common($limit = DAEMON_FEED_LIMIT, $debug = true) {
6322ac79 67 $schema_version = get_schema_version();
857efe49
AD
68
69 if ($schema_version != SCHEMA_VERSION) {
70 die("Schema version is wrong, please upgrade the database.\n");
71 }
72
afcb105f
AD
73 $pdo = Db::pdo();
74
09e8bdfd 75 if (!SINGLE_USER_MODE && DAEMON_UPDATE_LOGIN_LIMIT > 0) {
2c08214a
AD
76 if (DB_TYPE == "pgsql") {
77 $login_thresh_qpart = "AND ttrss_users.last_login >= NOW() - INTERVAL '".DAEMON_UPDATE_LOGIN_LIMIT." days'";
78 } else {
79 $login_thresh_qpart = "AND ttrss_users.last_login >= DATE_SUB(NOW(), INTERVAL ".DAEMON_UPDATE_LOGIN_LIMIT." DAY)";
80 }
81 } else {
82 $login_thresh_qpart = "";
83 }
84
2c08214a
AD
85 if (DB_TYPE == "pgsql") {
86 $update_limit_qpart = "AND ((
87 ttrss_feeds.update_interval = 0
ee0542ce 88 AND ttrss_user_prefs.value != '-1'
2c08214a
AD
89 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_user_prefs.value || ' minutes') AS INTERVAL)
90 ) OR (
91 ttrss_feeds.update_interval > 0
92 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_feeds.update_interval || ' minutes') AS INTERVAL)
f08426e3
AD
93 ) OR (ttrss_feeds.last_updated IS NULL
94 AND ttrss_user_prefs.value != '-1')
95 OR (last_updated = '1970-01-01 00:00:00'
96 AND ttrss_user_prefs.value != '-1'))";
2c08214a
AD
97 } else {
98 $update_limit_qpart = "AND ((
99 ttrss_feeds.update_interval = 0
ee0542ce 100 AND ttrss_user_prefs.value != '-1'
2c08214a
AD
101 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL CONVERT(ttrss_user_prefs.value, SIGNED INTEGER) MINUTE)
102 ) OR (
103 ttrss_feeds.update_interval > 0
104 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL ttrss_feeds.update_interval MINUTE)
f08426e3
AD
105 ) OR (ttrss_feeds.last_updated IS NULL
106 AND ttrss_user_prefs.value != '-1')
107 OR (last_updated = '1970-01-01 00:00:00'
108 AND ttrss_user_prefs.value != '-1'))";
2c08214a
AD
109 }
110
111 // Test if feed is currently being updated by another process.
112 if (DB_TYPE == "pgsql") {
566417c4 113 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '10 minutes')";
2c08214a 114 } else {
566417c4 115 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 10 MINUTE))";
2c08214a
AD
116 }
117
93af11cb 118 $query_limit = $limit ? sprintf("LIMIT %d", $limit) : "";
2c08214a 119
98070db0
TK
120 // Update the least recently updated feeds first
121 $query_order = "ORDER BY last_updated";
122 if (DB_TYPE == "pgsql") $query_order .= " NULLS FIRST";
123
fce451a4 124 $query = "SELECT DISTINCT ttrss_feeds.feed_url, ttrss_feeds.last_updated
2c08214a
AD
125 FROM
126 ttrss_feeds, ttrss_users, ttrss_user_prefs
f4ae0f05 127 WHERE
2c08214a 128 ttrss_feeds.owner_uid = ttrss_users.id
f08426e3 129 AND ttrss_user_prefs.profile IS NULL
2c08214a
AD
130 AND ttrss_users.id = ttrss_user_prefs.owner_uid
131 AND ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL'
132 $login_thresh_qpart $update_limit_qpart
1c4421fc 133 $updstart_thresh_qpart
98070db0 134 $query_order $query_limit";
fce451a4 135
afcb105f 136 $res = $pdo->query($query);
2c08214a 137
2c08214a 138 $feeds_to_update = array();
afcb105f 139 while ($line = $res->fetch()) {
93af11cb 140 array_push($feeds_to_update, $line['feed_url']);
2c08214a
AD
141 }
142
afcb105f
AD
143 if ($debug) _debug(sprintf("Scheduled %d feeds to update...", count($feeds_to_update)));
144
93af11cb
AD
145 // Update last_update_started before actually starting the batch
146 // in order to minimize collision risk for parallel daemon tasks
147 if (count($feeds_to_update) > 0) {
afcb105f 148 $feeds_qmarks = arr_qmarks($feeds_to_update);
1c4421fc 149
afcb105f
AD
150 $tmph = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
151 WHERE feed_url IN ($feeds_qmarks)");
152 $tmph->execute($feeds_to_update);
2c08214a
AD
153 }
154
8292d05b 155 $nf = 0;
2d9c5684 156 $bstarted = microtime(true);
8292d05b 157
5cbd1fe8
AD
158 $batch_owners = array();
159
afcb105f
AD
160 // since we have the data cached, we can deal with other feeds with the same url
161 $usth = $pdo->prepare("SELECT DISTINCT ttrss_feeds.id,last_updated,ttrss_feeds.owner_uid
ee0542ce
AD
162 FROM ttrss_feeds, ttrss_users, ttrss_user_prefs WHERE
163 ttrss_user_prefs.owner_uid = ttrss_feeds.owner_uid AND
164 ttrss_users.id = ttrss_user_prefs.owner_uid AND
165 ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL' AND
f08426e3 166 ttrss_user_prefs.profile IS NULL AND
afcb105f 167 feed_url = ?
9e84bab4 168 $update_limit_qpart
1c4421fc 169 $login_thresh_qpart
5929a0c1 170 ORDER BY ttrss_feeds.id $query_limit");
1c4421fc 171
afcb105f
AD
172 foreach ($feeds_to_update as $feed) {
173 if($debug) _debug("Base feed: $feed");
174
175 $usth->execute([$feed]);
176 //update_rss_feed($line["id"], true);
177
178 if ($tline = $usth->fetch()) {
179 if ($debug) _debug(" => " . $tline["last_updated"] . ", " . $tline["id"] . " " . $tline["owner_uid"]);
f08426e3 180
afcb105f
AD
181 if (array_search($tline["owner_uid"], $batch_owners) === FALSE)
182 array_push($batch_owners, $tline["owner_uid"]);
5cbd1fe8 183
afcb105f
AD
184 $fstarted = microtime(true);
185 RSSUtils::update_rss_feed($tline["id"], true, false);
186 _debug_suppress(false);
2d9c5684 187
afcb105f 188 _debug(sprintf(" %.4f (sec)", microtime(true) - $fstarted));
2d9c5684 189
afcb105f 190 ++$nf;
1c4421fc 191 }
2c08214a
AD
192 }
193
2d9c5684
AD
194 if ($nf > 0) {
195 _debug(sprintf("Processed %d feeds in %.4f (sec), %.4f (sec/feed avg)", $nf,
196 microtime(true) - $bstarted, (microtime(true) - $bstarted) / $nf));
197 }
198
5cbd1fe8
AD
199 foreach ($batch_owners as $owner_uid) {
200 _debug("Running housekeeping tasks for user $owner_uid...");
201
e6c886bf 202 RSSUtils::housekeeping_user($owner_uid);
5cbd1fe8
AD
203 }
204
2c08214a 205 // Send feed digests by email if needed.
c2f0f24e 206 Digest::send_headlines_digests($debug);
2c08214a 207
8292d05b 208 return $nf;
7b55001e 209 }
2c08214a 210
6022776d 211 // this is used when subscribing
e6c886bf 212 static function set_basic_feed_info($feed) {
6022776d 213
0567016b 214 $pdo = Db::pdo();
6022776d 215
0567016b
AD
216 $sth = $pdo->prepare("SELECT owner_uid,feed_url,auth_pass,auth_login,auth_pass_encrypted
217 FROM ttrss_feeds WHERE id = ?");
218 $sth->execute([$feed]);
6022776d 219
0567016b 220 if ($row = $sth->fetch()) {
bec5ba93 221
0567016b 222 $owner_uid = $row["owner_uid"];
6022776d 223
187abfe7 224 $auth_pass_encrypted = $row["auth_pass_encrypted"];
6022776d 225
0567016b
AD
226 $auth_login = $row["auth_login"];
227 $auth_pass = $row["auth_pass"];
6022776d 228
0567016b
AD
229 if ($auth_pass_encrypted && function_exists("mcrypt_decrypt")) {
230 require_once "crypt.php";
231 $auth_pass = decrypt_string($auth_pass);
232 }
6022776d 233
0567016b 234 $fetch_url = $row["feed_url"];
6022776d 235
0567016b
AD
236 $pluginhost = new PluginHost();
237 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
6022776d 238
0567016b
AD
239 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
240 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
241 $pluginhost->load_data();
242
243 $basic_info = array();
244 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_BASIC_INFO) as $plugin) {
245 $basic_info = $plugin->hook_feed_basic_info($basic_info, $fetch_url, $owner_uid, $feed, $auth_login, $auth_pass);
246 }
6022776d 247
0567016b
AD
248 if (!$basic_info) {
249 $feed_data = fetch_file_contents($fetch_url, false,
250 $auth_login, $auth_pass, false,
251 FEED_FETCH_TIMEOUT,
252 0);
bec5ba93 253
0567016b 254 global $fetch_curl_used;
bec5ba93 255
0567016b
AD
256 if (!$fetch_curl_used) {
257 $tmp = @gzdecode($feed_data);
bec5ba93 258
0567016b
AD
259 if ($tmp) $feed_data = $tmp;
260 }
6022776d 261
0567016b 262 $feed_data = trim($feed_data);
6022776d 263
0567016b
AD
264 $rss = new FeedParser($feed_data);
265 $rss->init();
6022776d 266
0567016b
AD
267 if (!$rss->error()) {
268 $basic_info = array(
269 'title' => mb_substr($rss->get_title(), 0, 199),
270 'site_url' => mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245)
271 );
272 }
3476690c 273 }
6022776d 274
0567016b
AD
275 if ($basic_info && is_array($basic_info)) {
276 $sth = $pdo->prepare("SELECT title, site_url FROM ttrss_feeds WHERE id = ?");
277 $sth->execute([$feed]);
6022776d 278
0567016b 279 if ($row = $sth->fetch()) {
6022776d 280
0567016b
AD
281 $registered_title = $row["title"];
282 $orig_site_url = $row["site_url"];
283
284 if ($basic_info['title'] && (!$registered_title || $registered_title == "[Unknown]")) {
285
286 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
287 title = ? WHERE id = ?");
288 $sth->execute([$basic_info['title'], $feed]);
289 }
6022776d 290
0567016b
AD
291 if ($basic_info['site_url'] && $orig_site_url != $basic_info['site_url']) {
292 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
293 site_url = ? WHERE id = ?");
294 $sth->execute([$basic_info['site_url'], $feed]);
295 }
296
297 }
6022776d
AD
298 }
299 }
300 }
301
7b55001e 302 /**
e6c886bf
AD
303 * @SuppressWarnings(PHPMD.UnusedFormalParameter)
304 */
305 static function update_rss_feed($feed, $no_cache = false) {
2c08214a 306
e6532439 307 $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || clean($_REQUEST['xdebug']);
2c08214a 308
4f71d743 309 _debug_suppress(!$debug_enabled);
68cccafc 310 _debug("start", $debug_enabled);
2c08214a 311
0567016b
AD
312 $pdo = Db::pdo();
313
314 $sth = $pdo->prepare("SELECT title FROM ttrss_feeds WHERE id = ?");
315 $sth->execute([$feed]);
bfe1eb4e 316
0567016b 317 if (!$row = $sth->fetch()) {
bfe1eb4e
AD
318 _debug("feed $feed NOT FOUND/SKIPPED", $debug_enabled);
319 user_error("Attempt to update unknown/invalid feed $feed", E_USER_WARNING);
320 return false;
321 }
322
0567016b 323 $title = $row["title"];
6bb96beb
AD
324
325 // feed was batch-subscribed or something, we need to get basic info
326 // this is not optimal currently as it fetches stuff separately TODO: optimize
327 if ($title == "[Unknown]") {
328 _debug("setting basic feed info for $feed...");
e6c886bf 329 RSSUtils::set_basic_feed_info($feed);
6bb96beb
AD
330 }
331
0567016b 332 $sth = $pdo->prepare("SELECT id,update_interval,auth_login,
5ba1ddd4 333 feed_url,auth_pass,cache_images,
5321e775 334 mark_unread_on_update, owner_uid,
153cb6d3 335 auth_pass_encrypted, feed_language,
e50c8eaa
AD
336 last_modified,
337 ".SUBSTRING_FOR_DATE."(last_unconditional, 1, 19) AS last_unconditional
0567016b
AD
338 FROM ttrss_feeds WHERE id = ?");
339 $sth->execute([$feed]);
340
341 if ($row = $sth->fetch()) {
2c08214a 342
0567016b 343 $owner_uid = $row["owner_uid"];
187abfe7
AD
344 $mark_unread_on_update = $row["mark_unread_on_update"];
345 $auth_pass_encrypted = $row["auth_pass_encrypted"];
2c08214a 346
0567016b
AD
347 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
348 WHERE id = ?");
349 $sth->execute([$feed]);
2c08214a 350
0567016b
AD
351 $auth_login = $row["auth_login"];
352 $auth_pass = $row["auth_pass"];
2c08214a 353
0567016b
AD
354 if ($auth_pass_encrypted && function_exists("mcrypt_decrypt")) {
355 require_once "crypt.php";
356 $auth_pass = decrypt_string($auth_pass);
357 }
044cff2d 358
0567016b
AD
359 $stored_last_modified = $row["last_modified"];
360 $last_unconditional = $row["last_unconditional"];
187abfe7 361 $cache_images = $row["cache_images"];
0567016b
AD
362 $fetch_url = $row["feed_url"];
363 $feed_language = mb_strtolower($row["feed_language"]);
364 if (!$feed_language) $feed_language = 'english';
2c08214a 365
0567016b
AD
366 } else {
367 return false;
368 }
2c08214a 369
f074ffe9 370 $date_feed_processed = date('Y-m-d H:i');
2c08214a 371
342e8a9e 372 $cache_filename = CACHE_DIR . "/feeds/" . sha1($fetch_url) . ".xml";
f074ffe9 373
ee65bef4
AD
374 $pluginhost = new PluginHost();
375 $pluginhost->set_debug($debug_enabled);
376 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
377
378 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
379 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
380 $pluginhost->load_data();
381
7b55001e 382 $rss_hash = false;
4f9cbdff 383
7b55001e
AD
384 $force_refetch = isset($_REQUEST["force_refetch"]);
385 $feed_data = "";
687a4f59 386
7b55001e
AD
387 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FETCH_FEED) as $plugin) {
388 $feed_data = $plugin->hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed, 0, $auth_login, $auth_pass);
389 }
2c08214a 390
7b55001e
AD
391 // try cache
392 if (!$feed_data &&
393 file_exists($cache_filename) &&
394 is_readable($cache_filename) &&
395 !$auth_login && !$auth_pass &&
396 filemtime($cache_filename) > time() - 30) {
be574731 397
7b55001e 398 _debug("using local cache [$cache_filename].", $debug_enabled);
52637d3b 399
7b55001e 400 @$feed_data = file_get_contents($cache_filename);
f074ffe9 401
7b55001e
AD
402 if ($feed_data) {
403 $rss_hash = sha1($feed_data);
88edaa93 404 }
ee65bef4 405
7b55001e
AD
406 } else {
407 _debug("local cache will not be used for this feed", $debug_enabled);
408 }
312742db 409
153cb6d3
AD
410 global $fetch_last_modified;
411
7b55001e
AD
412 // fetch feed from source
413 if (!$feed_data) {
e50c8eaa 414 _debug("last unconditional update request: $last_unconditional");
312742db 415
7b55001e
AD
416 if (ini_get("open_basedir") && function_exists("curl_init")) {
417 _debug("not using CURL due to open_basedir restrictions");
418 }
3f6f0857 419
e50c8eaa
AD
420 if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) {
421 _debug("maximum allowed interval for conditional requests exceeded, forcing refetch");
422
423 $force_refetch = true;
424 } else {
425 _debug("stored last modified for conditional request: $stored_last_modified", $debug_enabled);
426 }
153cb6d3 427
e50c8eaa 428 _debug("fetching [$fetch_url] (force_refetch: $force_refetch)...", $debug_enabled);
153cb6d3
AD
429
430 $feed_data = fetch_file_contents([
431 "url" => $fetch_url,
432 "login" => $auth_login,
433 "pass" => $auth_pass,
434 "timeout" => $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT,
435 "last_modified" => $force_refetch ? "" : $stored_last_modified
436 ]);
3f6f0857 437
7b55001e 438 global $fetch_curl_used;
3f6f0857 439
7b55001e
AD
440 if (!$fetch_curl_used) {
441 $tmp = @gzdecode($feed_data);
1367bc3f 442
7b55001e
AD
443 if ($tmp) $feed_data = $tmp;
444 }
017401dd 445
7b55001e 446 $feed_data = trim($feed_data);
fd687300 447
7b55001e 448 _debug("fetch done.", $debug_enabled);
9d930af9 449 _debug("source last modified: " . $fetch_last_modified, $debug_enabled);
153cb6d3
AD
450
451 if ($feed_data && $fetch_last_modified != $stored_last_modified) {
0567016b
AD
452 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_modified = ? WHERE id = ?");
453 $sth->execute([substr($fetch_last_modified, 0, 245), $feed]);
153cb6d3 454 }
95beaa14 455
7b55001e 456 // cache vanilla feed data for re-use
342e8a9e 457 if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/feeds")) {
7b55001e
AD
458 $new_rss_hash = sha1($feed_data);
459
460 if ($new_rss_hash != $rss_hash) {
461 _debug("saving $cache_filename", $debug_enabled);
462 @file_put_contents($cache_filename, $feed_data);
95beaa14 463 }
4f9cbdff 464 }
7b55001e 465 }
017401dd 466
7b55001e
AD
467 if (!$feed_data) {
468 global $fetch_last_error;
469 global $fetch_last_error_code;
f074ffe9 470
7b55001e 471 _debug("unable to fetch: $fetch_last_error [$fetch_last_error_code]", $debug_enabled);
f074ffe9 472
7b55001e
AD
473 // If-Modified-Since
474 if ($fetch_last_error_code != 304) {
0567016b 475 $error_message = $fetch_last_error;
7b55001e
AD
476 } else {
477 _debug("source claims data not modified, nothing to do.", $debug_enabled);
0567016b 478 $error_message = "";
7b55001e 479 }
4f9cbdff 480
0567016b
AD
481 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
482 last_updated = NOW() WHERE id = ?");
483 $sth->execute([$error_message, $feed]);
4f9cbdff 484
7b55001e 485 return;
f074ffe9
AD
486 }
487
1ffe3391 488 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_FETCHED) as $plugin) {
6791af0c 489 $feed_data = $plugin->hook_feed_fetched($feed_data, $fetch_url, $owner_uid, $feed);
017401dd
AD
490 }
491
07d3431e
AD
492 $rss = new FeedParser($feed_data);
493 $rss->init();
2c08214a 494
19b3992b 495 if (!$rss->error()) {
2c08214a 496
d2a421e3 497 // We use local pluginhost here because we need to load different per-user feed plugins
1ffe3391 498 $pluginhost->run_hooks(PluginHost::HOOK_FEED_PARSED, "hook_feed_parsed", $rss);
4412b877 499
df659891 500 _debug("language: $feed_language", $debug_enabled);
68cccafc 501 _debug("processing feed data...", $debug_enabled);
2c08214a 502
382268c6
AD
503 if (DB_TYPE == "pgsql") {
504 $favicon_interval_qpart = "favicon_last_checked < NOW() - INTERVAL '12 hour'";
505 } else {
506 $favicon_interval_qpart = "favicon_last_checked < DATE_SUB(NOW(), INTERVAL 12 HOUR)";
507 }
508
0567016b 509 $sth = $pdo->prepare("SELECT owner_uid,favicon_avg_color,
382268c6
AD
510 (favicon_last_checked IS NULL OR $favicon_interval_qpart) AS
511 favicon_needs_check
0567016b
AD
512 FROM ttrss_feeds WHERE id = ?");
513 $sth->execute([$feed]);
2c08214a 514
0567016b 515 if ($row = $sth->fetch()) {
187abfe7 516 $favicon_needs_check = $row["favicon_needs_check"];
0567016b
AD
517 $favicon_avg_color = $row["favicon_avg_color"];
518 $owner_uid = $row["owner_uid"];
519 } else {
520 return false;
521 }
2c08214a 522
0567016b 523 $site_url = mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245);
2c08214a 524
cd07592c
AD
525 _debug("site_url: $site_url", $debug_enabled);
526 _debug("feed_title: " . $rss->get_title(), $debug_enabled);
527
687a4f59 528 if ($favicon_needs_check || $force_refetch) {
36490f11
AD
529
530 /* terrible hack: if we crash on floicon shit here, we won't check
560cbd8c 531 * the icon avgcolor again (unless the icon got updated) */
36490f11 532
560cbd8c
AD
533 $favicon_file = ICONS_DIR . "/$feed.ico";
534 $favicon_modified = @filemtime($favicon_file);
535
68cccafc 536 _debug("checking favicon...", $debug_enabled);
687a4f59 537
e6c886bf 538 RSSUtils::check_feed_favicon($site_url, $feed);
560cbd8c
AD
539 $favicon_modified_new = @filemtime($favicon_file);
540
541 if ($favicon_modified_new > $favicon_modified)
542 $favicon_avg_color = '';
687a4f59 543
0567016b 544 $favicon_colorstring = "";
6ee0d4b0 545 if (file_exists($favicon_file) && function_exists("imagecreatefromstring") && $favicon_avg_color == '') {
e6c886bf 546 require_once "colors.php";
687a4f59 547
0567016b
AD
548 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_avg_color = 'fail' WHERE
549 id = ?");
550 $sth->execute([$feed]);
aafd55ba 551
0567016b
AD
552 $favicon_color = calculate_avg_color($favicon_file);
553
554 $favicon_colorstring = ",favicon_avg_color = " . $pdo->quote($favicon_color);
63c323f7 555
36490f11 556 } else if ($favicon_avg_color == 'fail') {
84ceb6bd 557 _debug("floicon failed on this file, not trying to recalculate avg color", $debug_enabled);
6ac722d5 558 }
687a4f59 559
0567016b
AD
560 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_last_checked = NOW()
561 $favicon_colorstring WHERE id = ?");
562 $sth->execute([$feed]);
f2798eb6 563 }
2c08214a 564
68cccafc 565 _debug("loading filters & labels...", $debug_enabled);
2c08214a 566
a42c55f0 567 $filters = load_filters($feed, $owner_uid);
2c08214a 568
02f3992a
AD
569 if ($debug_enabled) {
570 print_r($filters);
571 }
572
68cccafc 573 _debug("" . count($filters) . " filters loaded.", $debug_enabled);
2c08214a 574
19b3992b 575 $items = $rss->get_items();
2c08214a 576
19b3992b 577 if (!is_array($items)) {
68cccafc 578 _debug("no articles found.", $debug_enabled);
2c08214a 579
0567016b
AD
580 $sth = $pdo->prepare("UPDATE ttrss_feeds
581 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
582 $sth->execute([$feed]);
2c08214a 583
0567016b 584 return true; // no articles
2c08214a
AD
585 }
586
68cccafc 587 _debug("processing articles...", $debug_enabled);
2c08214a 588
6c9f3d4a
AD
589 $tstart = time();
590
19b3992b 591 foreach ($items as $item) {
0500e14c
AD
592 $pdo->beginTransaction();
593
e6532439 594 if (clean($_REQUEST['xdebug']) == 3) {
2c08214a
AD
595 print_r($item);
596 }
597
6c9f3d4a
AD
598 if (ini_get("max_execution_time") > 0 && time() - $tstart >= ini_get("max_execution_time") * 0.7) {
599 _debug("looks like there's too many articles to process at once, breaking out", $debug_enabled);
0500e14c 600 $pdo->commit();
6c9f3d4a
AD
601 break;
602 }
603
0567016b
AD
604 $entry_guid = strip_tags($item->get_id());
605 if (!$entry_guid) $entry_guid = strip_tags($item->get_link());
e6c886bf 606 if (!$entry_guid) $entry_guid = RSSUtils::make_guid_from_title($item->get_title());
0500e14c
AD
607
608 if (!$entry_guid) {
609 $pdo->commit();
610 continue;
611 }
2c08214a 612
3a4c8973
AD
613 $entry_guid = "$owner_uid,$entry_guid";
614
0567016b 615 $entry_guid_hashed = 'SHA1:' . sha1($entry_guid);
5e3d5480 616
68cccafc 617 _debug("guid $entry_guid / $entry_guid_hashed", $debug_enabled);
5e3d5480 618
0567016b 619 $entry_timestamp = strip_tags($item->get_date());
04d2f9c8
AD
620
621 _debug("orig date: " . $item->get_date(), $debug_enabled);
2c08214a 622
30123fe6 623 if ($entry_timestamp == -1 || !$entry_timestamp || $entry_timestamp > time()) {
2c08214a 624 $entry_timestamp = time();
2c08214a
AD
625 }
626
627 $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
628
68cccafc 629 _debug("date $entry_timestamp [$entry_timestamp_fmt]", $debug_enabled);
2c08214a 630
0567016b 631 $entry_title = strip_tags($item->get_title());
1b35d30c 632
5d56d100 633 $entry_link = rewrite_relative_url($site_url, $item->get_link());
2c08214a 634
68cccafc
AD
635 _debug("title $entry_title", $debug_enabled);
636 _debug("link $entry_link", $debug_enabled);
2c08214a
AD
637
638 if (!$entry_title) $entry_title = date("Y-m-d H:i:s", $entry_timestamp);;
639
19b3992b
AD
640 $entry_content = $item->get_content();
641 if (!$entry_content) $entry_content = $item->get_description();
2c08214a 642
e6532439 643 if (clean($_REQUEST["xdebug"]) == 2) {
9ec10352 644 print "content: ";
0bc503ff 645 print htmlspecialchars($entry_content);
3c696512 646 print "\n";
2c08214a
AD
647 }
648
0567016b 649 $entry_comments = mb_substr(strip_tags($item->get_comments_url()), 0, 245);
12ff230b 650 $num_comments = (int) $item->get_comments_count();
2c08214a 651
0567016b
AD
652 $entry_author = strip_tags($item->get_author());
653 $entry_guid = mb_substr($entry_guid, 0, 245);
2c08214a 654
68cccafc
AD
655 _debug("author $entry_author", $debug_enabled);
656 _debug("num_comments: $num_comments", $debug_enabled);
ee78f81c 657 _debug("looking for tags...", $debug_enabled);
2c08214a
AD
658
659 // parse <category> entries into tags
660
661 $additional_tags = array();
662
19b3992b 663 $additional_tags_src = $item->get_categories();
2c08214a 664
19b3992b
AD
665 if (is_array($additional_tags_src)) {
666 foreach ($additional_tags_src as $tobj) {
cd07592c 667 array_push($additional_tags, $tobj);
2c08214a 668 }
19b3992b 669 }
2c08214a 670
fa6fbd36 671 $entry_tags = array_unique($additional_tags);
2c08214a
AD
672
673 for ($i = 0; $i < count($entry_tags); $i++)
674 $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8');
675
ee78f81c
AD
676 _debug("tags found: " . join(",", $entry_tags), $debug_enabled);
677
68cccafc 678 _debug("done collecting data.", $debug_enabled);
2c08214a 679
0567016b
AD
680 $sth = $pdo->prepare("SELECT id, content_hash, lang FROM ttrss_entries
681 WHERE guid = ? OR guid = ?");
682 $sth->execute([$entry_guid, $entry_guid_hashed]);
b30abdad 683
0567016b
AD
684 if ($row = $sth->fetch()) {
685 $base_entry_id = $row["id"];
686 $entry_stored_hash = $row["content_hash"];
4a0da0e5 687 $article_labels = Article::get_article_labels($base_entry_id, $owner_uid);
0567016b 688 $entry_language = $row["lang"];
a8ac7661 689
2ed0d6c4 690 $existing_tags = Article::get_article_tags($base_entry_id, $owner_uid);
a8ac7661 691 $entry_tags = array_unique(array_merge($entry_tags, $existing_tags));
b30abdad 692 } else {
b1840673
AD
693 $base_entry_id = false;
694 $entry_stored_hash = "";
a29fe121 695 $article_labels = array();
3318d324 696 $entry_language = "";
b30abdad
AD
697 }
698
455b1401 699 $article = array("owner_uid" => $owner_uid, // read only
b30abdad 700 "guid" => $entry_guid, // read only
59e83455 701 "guid_hashed" => $entry_guid_hashed, // read only
19b3992b
AD
702 "title" => $entry_title,
703 "content" => $entry_content,
704 "link" => $entry_link,
a29fe121 705 "labels" => $article_labels, // current limitation: can add labels to article, can't remove them
19b3992b 706 "tags" => $entry_tags,
e02555c1 707 "author" => $entry_author,
c9299c28 708 "force_catchup" => false, // ugly hack for the time being
6de3a1be 709 "score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed
3318d324 710 "language" => $entry_language,
20d2195f 711 "num_comments" => $num_comments, // read only
f73e03e0
AD
712 "feed" => array("id" => $feed,
713 "fetch_url" => $fetch_url,
babfadbf
J
714 "site_url" => $site_url,
715 "cache_images" => $cache_images)
e6c886bf 716 );
cc85704f 717
b1840673 718 $entry_plugin_data = "";
e6c886bf 719 $entry_current_hash = RSSUtils::calculate_article_hash($article, $pluginhost);
b1840673
AD
720
721 _debug("article hash: $entry_current_hash [stored=$entry_stored_hash]", $debug_enabled);
722
522e8b35 723 if ($entry_current_hash == $entry_stored_hash && !isset($_REQUEST["force_rehash"])) {
b1840673
AD
724 _debug("stored article seems up to date [IID: $base_entry_id], updating timestamp only", $debug_enabled);
725
726 // we keep encountering the entry in feeds, so we need to
727 // update date_updated column so that we don't get horrible
728 // dupes when the entry gets purged and reinserted again e.g.
729 // in the case of SLOW SLOW OMG SLOW updating feeds
730
0567016b
AD
731 $sth = $pdo->prepare("UPDATE ttrss_entries SET date_updated = NOW()
732 WHERE id = ?");
733 $sth->execute([$base_entry_id]);
b1840673 734
0500e14c 735 $pdo->commit();
5bdcb8fd 736 continue;
b1840673
AD
737 }
738
739 _debug("hash differs, applying plugin filters:", $debug_enabled);
740
1ffe3391 741 foreach ($pluginhost->get_hooks(PluginHost::HOOK_ARTICLE_FILTER) as $plugin) {
b1840673
AD
742 _debug("... " . get_class($plugin), $debug_enabled);
743
744 $start = microtime(true);
19b3992b 745 $article = $plugin->hook_article_filter($article);
0084f0d1 746
b1840673
AD
747 _debug("=== " . sprintf("%.4f (sec)", microtime(true) - $start), $debug_enabled);
748
749 $entry_plugin_data .= mb_strtolower(get_class($plugin)) . ",";
cc85704f
AD
750 }
751
e6532439 752 if (clean($_REQUEST["xdebug"]) == 2) {
0bc503ff
AD
753 print "processed content: ";
754 print htmlspecialchars($article["content"]);
755 print "\n";
756 }
757
b1840673
AD
758 _debug("plugin data: $entry_plugin_data", $debug_enabled);
759
35c12dc4
AD
760 // Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077
761 if (DB_TYPE == "mysql") {
762 foreach ($article as $k => $v) {
35c37354
AD
763
764 // i guess we'll have to take the risk of 4byte unicode labels & tags here
dae16f72 765 if (is_string($article[$k])) {
35c37354
AD
766 $article[$k] = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $v);
767 }
35c12dc4
AD
768 }
769 }
770
b8774453
AD
771 /* Collect article tags here so we could filter by them: */
772
557d86fe
AD
773 $matched_rules = array();
774
e6c886bf 775 $article_filters = RSSUtils::get_article_filters($filters, $article["title"],
7b55001e 776 $article["content"], $article["link"], $article["author"],
557d86fe 777 $article["tags"], $matched_rules);
b8774453
AD
778
779 if ($debug_enabled) {
557d86fe
AD
780 _debug("matched filter rules: ", $debug_enabled);
781
782 if (count($matched_rules) != 0) {
783 print_r($matched_rules);
784 }
785
786 _debug("filter actions: ", $debug_enabled);
787
b8774453
AD
788 if (count($article_filters) != 0) {
789 print_r($article_filters);
790 }
791 }
792
e6c886bf 793 $plugin_filter_names = RSSUtils::find_article_filters($article_filters, "plugin");
b8774453
AD
794 $plugin_filter_actions = $pluginhost->get_filter_actions();
795
796 if (count($plugin_filter_names) > 0) {
797 _debug("applying plugin filter actions...", $debug_enabled);
798
799 foreach ($plugin_filter_names as $pfn) {
800 list($pfclass,$pfaction) = explode(":", $pfn["param"]);
801
802 if (isset($plugin_filter_actions[$pfclass])) {
803 $plugin = $pluginhost->get_plugin($pfclass);
804
805 _debug("... $pfclass: $pfaction", $debug_enabled);
806
807 if ($plugin) {
808 $start = microtime(true);
809 $article = $plugin->hook_article_filter_action($article, $pfaction);
810
811 _debug("=== " . sprintf("%.4f (sec)", microtime(true) - $start), $debug_enabled);
812 } else {
813 _debug("??? $pfclass: plugin object not found.");
814 }
815 } else {
816 _debug("??? $pfclass: filter plugin not registered.");
817 }
818 }
819 }
820
19b3992b 821 $entry_tags = $article["tags"];
0567016b
AD
822 $entry_title = strip_tags($article["title"]);
823 $entry_author = mb_substr(strip_tags($article["author"]), 0, 245);
824 $entry_link = strip_tags($article["link"]);
f935d98e 825 $entry_content = $article["content"]; // escaped below
c9299c28 826 $entry_force_catchup = $article["force_catchup"];
a29fe121 827 $article_labels = $article["labels"];
6de3a1be 828 $entry_score_modifier = (int) $article["score_modifier"];
0567016b 829 $entry_language = $article["language"];
a29fe121
AD
830
831 if ($debug_enabled) {
832 _debug("article labels:", $debug_enabled);
557d86fe
AD
833
834 if (count($article_labels) != 0) {
835 print_r($article_labels);
836 }
a29fe121 837 }
c9299c28
AD
838
839 _debug("force catchup: $entry_force_catchup");
f935d98e 840
0a3fd79b 841 if ($cache_images && is_writable(CACHE_DIR . '/images'))
e6c886bf 842 RSSUtils::cache_media($entry_content, $site_url, $debug_enabled);
0a3fd79b 843
0567016b
AD
844 $csth = $pdo->prepare("SELECT id FROM ttrss_entries
845 WHERE guid = ? OR guid = ?");
846 $csth->execute([$entry_guid, $entry_guid_hashed]);
9e222305 847
0567016b 848 if (!$row = $csth->fetch()) {
2c08214a 849
07d3431e 850 _debug("base guid [$entry_guid or $entry_guid_hashed] not found, creating...", $debug_enabled);
2c08214a
AD
851
852 // base post entry does not exist, create it
853
0567016b 854 $usth = $pdo->prepare(
2c08214a 855 "INSERT INTO ttrss_entries
0567016b 856 (title,
2c08214a
AD
857 guid,
858 link,
859 updated,
860 content,
861 content_hash,
862 no_orig_date,
863 date_updated,
864 date_entered,
865 comments,
866 num_comments,
b30abdad 867 plugin_data,
6b461797 868 lang,
2c08214a
AD
869 author)
870 VALUES
0567016b 871 (?, ?, ?, ?, ?, ?,
5ba1ddd4 872 false,
2c08214a 873 NOW(),
0567016b
AD
874 ?, ?, ?, ?, ?, ?)");
875
876 $usth->execute([$entry_title,
877 $entry_guid_hashed,
878 $entry_link,
879 $entry_timestamp_fmt,
93e70e36 880 "$entry_content",
0567016b
AD
881 $entry_current_hash,
882 $date_feed_processed,
883 $entry_comments,
187abfe7 884 (int)$num_comments,
0567016b 885 $entry_plugin_data,
93e70e36
AD
886 "$entry_language",
887 "$entry_author"]);
e8291805 888
2c08214a
AD
889 }
890
0567016b 891 $csth->execute([$entry_guid, $entry_guid_hashed]);
2c08214a
AD
892
893 $entry_ref_id = 0;
894 $entry_int_id = 0;
895
0567016b 896 if ($row = $csth->fetch()) {
2c08214a 897
68cccafc 898 _debug("base guid found, checking for user record", $debug_enabled);
2c08214a 899
0567016b 900 $ref_id = $row['id'];
2c08214a
AD
901 $entry_ref_id = $ref_id;
902
e6c886bf 903 if (RSSUtils::find_article_filter($article_filters, "filter")) {
0500e14c 904 $pdo->commit();
2c08214a
AD
905 continue;
906 }
907
e6c886bf 908 $score = RSSUtils::calculate_article_score($article_filters) + $entry_score_modifier;
2c08214a 909
6de3a1be 910 _debug("initial score: $score [including plugin modifier: $entry_score_modifier]", $debug_enabled);
2c08214a 911
4f186b1f
AD
912 // check for user post link to main table
913
0567016b
AD
914 $sth = $pdo->prepare("SELECT ref_id, int_id FROM ttrss_user_entries WHERE
915 ref_id = ? AND owner_uid = ?");
916 $sth->execute([$ref_id, $owner_uid]);
2c08214a
AD
917
918 // okay it doesn't exist - create user entry
0567016b
AD
919 if ($row = $sth->fetch()) {
920 $entry_ref_id = $row["ref_id"];
921 $entry_int_id = $row["int_id"];
2c08214a 922
0567016b
AD
923 _debug("user record FOUND: RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
924 } else {
925
68cccafc 926 _debug("user record not found, creating...", $debug_enabled);
2c08214a 927
e6c886bf 928 if ($score >= -500 && !RSSUtils::find_article_filter($article_filters, 'catchup') && !$entry_force_catchup) {
0567016b
AD
929 $unread = 1;
930 $last_read_qpart = null;
2c08214a 931 } else {
0567016b 932 $unread = 0;
2c08214a
AD
933 $last_read_qpart = 'NOW()';
934 }
935
e6c886bf 936 if (RSSUtils::find_article_filter($article_filters, 'mark') || $score > 1000) {
0567016b 937 $marked = 1;
2c08214a 938 } else {
0567016b 939 $marked = 0;
2c08214a
AD
940 }
941
e6c886bf 942 if (RSSUtils::find_article_filter($article_filters, 'publish')) {
0567016b 943 $published = 1;
2c08214a 944 } else {
0567016b 945 $published = 0;
2c08214a
AD
946 }
947
0567016b
AD
948 $last_marked = ($marked == 'true') ? 'NOW()' : null;
949 $last_published = ($published == 'true') ? 'NOW()' : null;
7873d588 950
0567016b 951 $sth = $pdo->prepare(
2c08214a
AD
952 "INSERT INTO ttrss_user_entries
953 (ref_id, owner_uid, feed_id, unread, last_read, marked,
7873d588
AD
954 published, score, tag_cache, label_cache, uuid,
955 last_marked, last_published)
aa16334f 956 VALUES (?, ?, ?, ?, ?, ?, ?, ?, '', '', '', ".$last_marked.", ".$last_published.")");
2c08214a 957
0567016b 958 $sth->execute([$ref_id, $owner_uid, $feed, $unread, $last_read_qpart, $marked,
aa16334f 959 $published, $score]);
2c08214a 960
0567016b
AD
961 $sth = $pdo->prepare("SELECT int_id FROM ttrss_user_entries WHERE
962 ref_id = ? AND owner_uid = ? AND
963 feed_id = ? LIMIT 1");
2c08214a 964
0567016b
AD
965 $sth->execute([$ref_id, $owner_uid, $feed]);
966
967 if ($row = $sth->fetch())
968 $entry_int_id = $row['int_id'];
2c08214a
AD
969 }
970
0567016b 971 _debug("resulting RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
2c08214a 972
e854442e 973 if (DB_TYPE == "pgsql") {
49a888ec
AD
974 $tsvector_combined = mb_substr($entry_title . ' ' .
975 preg_replace('/[<\?\:]/', ' ', strip_tags($entry_content)),
0567016b 976 0, 1000000);
e854442e 977
49a888ec 978 $tsvector_qpart = "tsvector_combined = to_tsvector(".$pdo->quote($feed_language).", ".$pdo->quote($tsvector_combined)."),";
e854442e
AD
979
980 } else {
981 $tsvector_qpart = "";
982 }
983
49a888ec
AD
984 //_debug($tsvector_qpart);
985
0567016b 986 $sth = $pdo->prepare("UPDATE ttrss_entries
49a888ec 987 SET title = :title,
e854442e 988 $tsvector_qpart
49a888ec
AD
989 content = :content,
990 content_hash = :content_hash,
991 updated = :updated,
992 num_comments = :num_comments,
993 plugin_data = :plugin_data,
994 author = :author,
995 lang = :lang
996 WHERE id = :id");
997
998 $sth->execute([":title" => $entry_title,
93e70e36 999 ":content" => "$entry_content",
49a888ec
AD
1000 ":content_hash" => $entry_current_hash,
1001 ":updated" => $entry_timestamp_fmt,
1002 ":num_comments" => (int)$num_comments,
1003 ":plugin_data" => $entry_plugin_data,
93e70e36 1004 ":author" => "$entry_author",
49a888ec
AD
1005 ":lang" => $entry_language,
1006 ":id" => $ref_id]);
b1840673 1007
59e83455 1008 // update aux data
0567016b
AD
1009 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1010 SET score = ? WHERE ref_id = ?");
1011 $sth->execute([$score, $ref_id]);
59e83455 1012
b1840673 1013 if ($mark_unread_on_update) {
24e6ff5d
AD
1014 _debug("article updated, marking unread as requested.", $debug_enabled);
1015
0567016b
AD
1016 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1017 SET last_read = null, unread = true WHERE ref_id = ?");
1018 $sth->execute([$ref_id]);
2c08214a
AD
1019 }
1020 }
1021
a29fe121
AD
1022 _debug("assigning labels [other]...", $debug_enabled);
1023
1024 foreach ($article_labels as $label) {
7c9b5a3f 1025 Labels::add_article($entry_ref_id, $label[1], $owner_uid);
a29fe121
AD
1026 }
1027
1028 _debug("assigning labels [filters]...", $debug_enabled);
2c08214a 1029
e6c886bf 1030 RSSUtils::assign_article_to_label_filters($entry_ref_id, $article_filters,
b24504b1 1031 $owner_uid, $article_labels);
2c08214a 1032
68cccafc 1033 _debug("looking for enclosures...", $debug_enabled);
2c08214a
AD
1034
1035 // enclosures
1036
1037 $enclosures = array();
1038
19b3992b 1039 $encs = $item->get_enclosures();
2c08214a 1040
19b3992b
AD
1041 if (is_array($encs)) {
1042 foreach ($encs as $e) {
1043 $e_item = array(
86e53429
AD
1044 rewrite_relative_url($site_url, $e->link),
1045 $e->type, $e->length, $e->title, $e->width, $e->height);
2c08214a 1046 array_push($enclosures, $e_item);
2c08214a
AD
1047 }
1048 }
1049
388d4dfa 1050 if ($cache_images && is_writable(CACHE_DIR . '/images'))
e6c886bf 1051 RSSUtils::cache_enclosures($enclosures, $site_url, $debug_enabled);
388d4dfa 1052
2c08214a 1053 if ($debug_enabled) {
68cccafc 1054 _debug("article enclosures:", $debug_enabled);
2c08214a
AD
1055 print_r($enclosures);
1056 }
1057
0567016b
AD
1058 $esth = $pdo->prepare("SELECT id FROM ttrss_enclosures
1059 WHERE content_url = ? AND post_id = ?");
2c08214a 1060
0567016b
AD
1061 $usth = $pdo->prepare("INSERT INTO ttrss_enclosures
1062 (content_url, content_type, title, duration, post_id, width, height) VALUES
1063 (?, ?, ?, ?, ?, ?, ?)");
5c54e683 1064
2c08214a 1065 foreach ($enclosures as $enc) {
0567016b
AD
1066 $enc_url = $enc[0];
1067 $enc_type = $enc[1];
0500e14c 1068 $enc_dur = (int)$enc[2];
0567016b 1069 $enc_title = $enc[3];
523bd90b
FE
1070 $enc_width = intval($enc[4]);
1071 $enc_height = intval($enc[5]);
2c08214a 1072
0567016b 1073 $esth->execute([$enc_url, $entry_ref_id]);
2c08214a 1074
0567016b
AD
1075 if (!$esth->fetch()) {
1076 $usth->execute([$enc_url, $enc_type, (string)$enc_title, $enc_dur, $entry_ref_id, $enc_width, $enc_height]);
2c08214a
AD
1077 }
1078 }
1079
2c08214a
AD
1080 // check for manual tags (we have to do it here since they're loaded from filters)
1081
1082 foreach ($article_filters as $f) {
6aff7845 1083 if ($f["type"] == "tag") {
2c08214a 1084
6aff7845 1085 $manual_tags = trim_array(explode(",", $f["param"]));
2c08214a
AD
1086
1087 foreach ($manual_tags as $tag) {
1088 if (tag_is_valid($tag)) {
1089 array_push($entry_tags, $tag);
1090 }
1091 }
1092 }
1093 }
1094
1095 // Skip boring tags
1096
6322ac79 1097 $boring_tags = trim_array(explode(",", mb_strtolower(get_pref(
2c08214a
AD
1098 'BLACKLISTED_TAGS', $owner_uid, ''), 'utf-8')));
1099
1100 $filtered_tags = array();
1101 $tags_to_cache = array();
1102
1103 if ($entry_tags && is_array($entry_tags)) {
1104 foreach ($entry_tags as $tag) {
1105 if (array_search($tag, $boring_tags) === false) {
1106 array_push($filtered_tags, $tag);
1107 }
1108 }
1109 }
1110
1111 $filtered_tags = array_unique($filtered_tags);
1112
1113 if ($debug_enabled) {
68cccafc 1114 _debug("filtered article tags:", $debug_enabled);
2c08214a
AD
1115 print_r($filtered_tags);
1116 }
1117
1118 // Save article tags in the database
1119
1120 if (count($filtered_tags) > 0) {
1121
0567016b
AD
1122 $tsth = $pdo->prepare("SELECT id FROM ttrss_tags
1123 WHERE tag_name = ? AND post_int_id = ? AND
1124 owner_uid = ? LIMIT 1");
1125
1126 $usth = $pdo->prepare("INSERT INTO ttrss_tags
1127 (owner_uid,tag_name,post_int_id)
1128 VALUES (?, ?, ?)");
2c08214a
AD
1129
1130 foreach ($filtered_tags as $tag) {
1131
1132 $tag = sanitize_tag($tag);
2c08214a
AD
1133
1134 if (!tag_is_valid($tag)) continue;
1135
0567016b 1136 $tsth->execute([$tag, $entry_int_id, $owner_uid]);
2c08214a 1137
0567016b
AD
1138 if (!$tsth->fetch()) {
1139 $usth->execute([$owner_uid, $tag, $entry_int_id]);
e6c886bf 1140 }
2c08214a
AD
1141
1142 array_push($tags_to_cache, $tag);
1143 }
1144
1145 /* update the cache */
1146
1147 $tags_to_cache = array_unique($tags_to_cache);
1148
0567016b 1149 $tags_str = join(",", $tags_to_cache);
2c08214a 1150
0567016b
AD
1151 $tsth = $pdo->prepare("UPDATE ttrss_user_entries
1152 SET tag_cache = ? WHERE ref_id = ?
1153 AND owner_uid = ?");
1154 $tsth->execute([$tags_str, $entry_ref_id, $owner_uid]);
2c08214a
AD
1155 }
1156
68cccafc 1157 _debug("article processed", $debug_enabled);
0500e14c
AD
1158
1159 $pdo->commit();
2c08214a
AD
1160 }
1161
68cccafc 1162 _debug("purging feed...", $debug_enabled);
2c08214a 1163
a42c55f0 1164 purge_feed($feed, 0, $debug_enabled);
2c08214a 1165
0567016b
AD
1166 $sth = $pdo->prepare("UPDATE ttrss_feeds
1167 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
1168 $sth->execute([$feed]);
2c08214a
AD
1169
1170 } else {
1171
0567016b 1172 $error_msg = mb_substr($rss->error(), 0, 245);
2c08214a 1173
4ad04ee2
AD
1174 _debug("fetch error: $error_msg", $debug_enabled);
1175
1176 if (count($rss->errors()) > 1) {
1177 foreach ($rss->errors() as $error) {
1178 _debug("+ $error");
1179 }
1180 }
2c08214a 1181
0567016b
AD
1182 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
1183 last_updated = NOW(), last_unconditional = NOW() WHERE id = ?");
1184 $sth->execute([$error_msg, $feed]);
2c08214a 1185
88edaa93 1186 unset($rss);
0567016b 1187 return false;
88edaa93 1188 }
2c08214a 1189
68cccafc 1190 _debug("done", $debug_enabled);
88edaa93 1191
7b55001e 1192 return true;
2c08214a
AD
1193 }
1194
e6c886bf 1195 static function cache_enclosures($enclosures, $site_url, $debug) {
388d4dfa
AD
1196 foreach ($enclosures as $enc) {
1197
1198 if (preg_match("/(image|audio|video)/", $enc[1])) {
1199
1200 $src = rewrite_relative_url($site_url, $enc[0]);
1201
1202 $local_filename = CACHE_DIR . "/images/" . sha1($src);
1203
1204 if ($debug) _debug("cache_enclosures: downloading: $src to $local_filename");
1205
1206 if (!file_exists($local_filename)) {
1207 $file_content = fetch_file_contents($src);
1208
6fd03996 1209 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
388d4dfa
AD
1210 file_put_contents($local_filename, $file_content);
1211 }
1212 } else {
1213 touch($local_filename);
1214 }
1215 }
1216 }
1217 }
1218
e6c886bf 1219 static function cache_media($html, $site_url, $debug) {
3c696512
AD
1220 libxml_use_internal_errors(true);
1221
1222 $charset_hack = '<head>
1223 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
1224 </head>';
1225
1226 $doc = new DOMDocument();
1227 $doc->loadHTML($charset_hack . $html);
1228 $xpath = new DOMXPath($doc);
1229
388d4dfa 1230 $entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])');
3c696512
AD
1231
1232 foreach ($entries as $entry) {
5edd605a 1233 if ($entry->hasAttribute('src') && strpos($entry->getAttribute('src'), "data:") !== 0) {
3c696512
AD
1234 $src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
1235
41bead9b 1236 $local_filename = CACHE_DIR . "/images/" . sha1($src);
3c696512 1237
41bead9b 1238 if ($debug) _debug("cache_media: downloading: $src to $local_filename");
3c696512
AD
1239
1240 if (!file_exists($local_filename)) {
1241 $file_content = fetch_file_contents($src);
1242
6fd03996 1243 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
3c696512
AD
1244 file_put_contents($local_filename, $file_content);
1245 }
4a27966e
J
1246 } else {
1247 touch($local_filename);
3c696512 1248 }
3c696512
AD
1249 }
1250 }
3c696512
AD
1251 }
1252
e6c886bf 1253 static function expire_error_log($debug) {
e2261e17
AD
1254 if ($debug) _debug("Removing old error log entries...");
1255
0567016b
AD
1256 $pdo = Db::pdo();
1257
e2261e17 1258 if (DB_TYPE == "pgsql") {
0567016b 1259 $pdo->query("DELETE FROM ttrss_error_log
e2261e17
AD
1260 WHERE created_at < NOW() - INTERVAL '7 days'");
1261 } else {
0567016b 1262 $pdo->query("DELETE FROM ttrss_error_log
e2261e17
AD
1263 WHERE created_at < DATE_SUB(NOW(), INTERVAL 7 DAY)");
1264 }
e2261e17
AD
1265 }
1266
e6c886bf 1267 static function expire_lock_files($debug) {
65465085 1268 //if ($debug) _debug("Removing old lock files...");
2a91b6ff
AD
1269
1270 $num_deleted = 0;
1271
1272 if (is_writable(LOCK_DIRECTORY)) {
1273 $files = glob(LOCK_DIRECTORY . "/*.lock");
1274
1275 if ($files) {
1276 foreach ($files as $file) {
11344971 1277 if (!file_is_locked(basename($file)) && time() - filemtime($file) > 86400*2) {
2a91b6ff
AD
1278 unlink($file);
1279 ++$num_deleted;
1280 }
1281 }
1282 }
1283 }
1284
65465085 1285 if ($debug) _debug("Removed $num_deleted old lock files.");
2a91b6ff
AD
1286 }
1287
e6c886bf 1288 static function expire_cached_files($debug) {
342e8a9e 1289 foreach (array("simplepie", "feeds", "images", "export", "upload") as $dir) {
3c696512 1290 $cache_dir = CACHE_DIR . "/$dir";
2c08214a 1291
65465085 1292// if ($debug) _debug("Expiring $cache_dir");
2c08214a 1293
3c696512
AD
1294 $num_deleted = 0;
1295
1296 if (is_writable($cache_dir)) {
1297 $files = glob("$cache_dir/*");
1298
2a91b6ff 1299 if ($files) {
2ab20c31 1300 foreach ($files as $file) {
6fd03996 1301 if (time() - filemtime($file) > 86400*CACHE_MAX_DAYS) {
2ab20c31 1302 unlink($file);
3c696512 1303
2ab20c31
AD
1304 ++$num_deleted;
1305 }
3c696512
AD
1306 }
1307 }
2a91b6ff 1308 }
3c696512 1309
65465085 1310 if ($debug) _debug("$cache_dir: removed $num_deleted files.");
3c696512
AD
1311 }
1312 }
2c08214a 1313
a3e0bdcf 1314 /**
e6c886bf
AD
1315 * Source: http://www.php.net/manual/en/function.parse-url.php#104527
1316 * Returns the url query as associative array
1317 *
1318 * @param string query
1319 * @return array params
1320 */
1321 static function convertUrlQuery($query) {
a3e0bdcf
AD
1322 $queryParts = explode('&', $query);
1323
1324 $params = array();
1325
1326 foreach ($queryParts as $param) {
1327 $item = explode('=', $param);
1328 $params[$item[0]] = $item[1];
1329 }
1330
1331 return $params;
1332 }
92c14e9d 1333
e6c886bf 1334 static function get_article_filters($filters, $title, $content, $link, $author, $tags, &$matched_rules = false) {
92c14e9d
AD
1335 $matches = array();
1336
1337 foreach ($filters as $filter) {
1338 $match_any_rule = $filter["match_any_rule"];
a3a896a1 1339 $inverse = $filter["inverse"];
92c14e9d
AD
1340 $filter_match = false;
1341
1342 foreach ($filter["rules"] as $rule) {
1343 $match = false;
ffa1bd7b 1344 $reg_exp = str_replace('/', '\/', $rule["reg_exp"]);
a3a896a1 1345 $rule_inverse = $rule["inverse"];
92c14e9d
AD
1346
1347 if (!$reg_exp)
1348 continue;
1349
1350 switch ($rule["type"]) {
e6c886bf
AD
1351 case "title":
1352 $match = @preg_match("/$reg_exp/iu", $title);
1353 break;
1354 case "content":
1355 // we don't need to deal with multiline regexps
1356 $content = preg_replace("/[\r\n\t]/", "", $content);
d03ae73e 1357
e6c886bf
AD
1358 $match = @preg_match("/$reg_exp/iu", $content);
1359 break;
1360 case "both":
1361 // we don't need to deal with multiline regexps
1362 $content = preg_replace("/[\r\n\t]/", "", $content);
d03ae73e 1363
e6c886bf
AD
1364 $match = (@preg_match("/$reg_exp/iu", $title) || @preg_match("/$reg_exp/iu", $content));
1365 break;
1366 case "link":
1367 $match = @preg_match("/$reg_exp/iu", $link);
1368 break;
1369 case "author":
1370 $match = @preg_match("/$reg_exp/iu", $author);
1371 break;
1372 case "tag":
1373 foreach ($tags as $tag) {
1374 if (@preg_match("/$reg_exp/iu", $tag)) {
1375 $match = true;
1376 break;
1377 }
7b80b5e1 1378 }
e6c886bf 1379 break;
92c14e9d
AD
1380 }
1381
a3a896a1
AD
1382 if ($rule_inverse) $match = !$match;
1383
92c14e9d
AD
1384 if ($match_any_rule) {
1385 if ($match) {
1386 $filter_match = true;
1387 break;
1388 }
1389 } else {
1390 $filter_match = $match;
1391 if (!$match) {
1392 break;
1393 }
1394 }
1395 }
1396
a3a896a1
AD
1397 if ($inverse) $filter_match = !$filter_match;
1398
92c14e9d 1399 if ($filter_match) {
557d86fe
AD
1400 if (is_array($matched_rules)) array_push($matched_rules, $rule);
1401
92c14e9d
AD
1402 foreach ($filter["actions"] AS $action) {
1403 array_push($matches, $action);
5e736e45
AD
1404
1405 // if Stop action encountered, perform no further processing
fd3e5e8d 1406 if (isset($action["type"]) && $action["type"] == "stop") return $matches;
92c14e9d
AD
1407 }
1408 }
1409 }
1410
1411 return $matches;
1412 }
1413
e6c886bf 1414 static function find_article_filter($filters, $filter_name) {
92c14e9d
AD
1415 foreach ($filters as $f) {
1416 if ($f["type"] == $filter_name) {
1417 return $f;
1418 };
1419 }
1420 return false;
1421 }
1422
e6c886bf 1423 static function find_article_filters($filters, $filter_name) {
92c14e9d
AD
1424 $results = array();
1425
1426 foreach ($filters as $f) {
1427 if ($f["type"] == $filter_name) {
1428 array_push($results, $f);
1429 };
1430 }
1431 return $results;
1432 }
1433
e6c886bf 1434 static function calculate_article_score($filters) {
92c14e9d
AD
1435 $score = 0;
1436
1437 foreach ($filters as $f) {
1438 if ($f["type"] == "score") {
1439 $score += $f["param"];
1440 };
1441 }
1442 return $score;
1443 }
1444
e6c886bf 1445 static function labels_contains_caption($labels, $caption) {
b24504b1
AD
1446 foreach ($labels as $label) {
1447 if ($label[1] == $caption) {
1448 return true;
1449 }
1450 }
1451
1452 return false;
1453 }
1454
e6c886bf 1455 static function assign_article_to_label_filters($id, $filters, $owner_uid, $article_labels) {
92c14e9d
AD
1456 foreach ($filters as $f) {
1457 if ($f["type"] == "label") {
e6c886bf 1458 if (!RSSUtils::labels_contains_caption($article_labels, $f["param"])) {
7c9b5a3f 1459 Labels::add_article($id, $f["param"], $owner_uid);
b24504b1
AD
1460 }
1461 }
92c14e9d
AD
1462 }
1463 }
87764a50 1464
e6c886bf 1465 static function make_guid_from_title($title) {
87d7e850
AD
1466 return preg_replace("/[ \"\',.:;]/", "-",
1467 mb_strtolower(strip_tags($title), 'utf-8'));
1468 }
1469
e6c886bf 1470 static function cleanup_counters_cache($debug) {
0567016b
AD
1471 $pdo = Db::pdo();
1472
1473 $res = $pdo->query("DELETE FROM ttrss_counters_cache
168cf351
AD
1474 WHERE feed_id > 0 AND
1475 (SELECT COUNT(id) FROM ttrss_feeds WHERE
1476 id = feed_id AND
1477 ttrss_counters_cache.owner_uid = ttrss_feeds.owner_uid) = 0");
168cf351 1478
0567016b
AD
1479 $frows = $res->rowCount();
1480
1481 $res = $pdo->query("DELETE FROM ttrss_cat_counters_cache
168cf351
AD
1482 WHERE feed_id > 0 AND
1483 (SELECT COUNT(id) FROM ttrss_feed_categories WHERE
1484 id = feed_id AND
1485 ttrss_cat_counters_cache.owner_uid = ttrss_feed_categories.owner_uid) = 0");
0567016b
AD
1486
1487 $crows = $res->rowCount();
168cf351 1488
7b55001e 1489 if ($debug) _debug("Removed $frows (feeds) $crows (cats) orphaned counter cache entries.");
168cf351
AD
1490 }
1491
e6c886bf 1492 static function housekeeping_user($owner_uid) {
5cbd1fe8
AD
1493 $tmph = new PluginHost();
1494
1495 load_user_plugins($owner_uid, $tmph);
1496
1497 $tmph->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
1498 }
1499
e6c886bf
AD
1500 static function housekeeping_common($debug) {
1501 RSSUtils::expire_cached_files($debug);
1502 RSSUtils::expire_lock_files($debug);
1503 RSSUtils::expire_error_log($debug);
e2cf81e2 1504
e6c886bf 1505 $count = RSSUtils::update_feedbrowser_cache();
e2cf81e2
AD
1506 _debug("Feedbrowser updated, $count feeds processed.");
1507
a230bf88 1508 Article::purge_orphans( true);
e6c886bf 1509 RSSUtils::cleanup_counters_cache($debug);
e2cf81e2 1510
9b736a20
AD
1511 //$rc = cleanup_tags( 14, 50000);
1512 //_debug("Cleaned $rc cached tags.");
8e470220 1513
00f22824 1514 PluginHost::getInstance()->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
e2cf81e2 1515 }
ea79a0e0 1516
e6c886bf
AD
1517 static function check_feed_favicon($site_url, $feed) {
1518 # print "FAVICON [$site_url]: $favicon_url\n";
a230bf88
AD
1519
1520 $icon_file = ICONS_DIR . "/$feed.ico";
1521
1522 if (!file_exists($icon_file)) {
1523 $favicon_url = get_favicon_url($site_url);
1524
1525 if ($favicon_url) {
1526 // Limiting to "image" type misses those served with text/plain
1527 $contents = fetch_file_contents($favicon_url); // , "image");
1528
1529 if ($contents) {
1530 // Crude image type matching.
1531 // Patterns gleaned from the file(1) source code.
1532 if (preg_match('/^\x00\x00\x01\x00/', $contents)) {
1533 // 0 string \000\000\001\000 MS Windows icon resource
1534 //error_log("check_feed_favicon: favicon_url=$favicon_url isa MS Windows icon resource");
1535 }
1536 elseif (preg_match('/^GIF8/', $contents)) {
1537 // 0 string GIF8 GIF image data
1538 //error_log("check_feed_favicon: favicon_url=$favicon_url isa GIF image");
1539 }
1540 elseif (preg_match('/^\x89PNG\x0d\x0a\x1a\x0a/', $contents)) {
1541 // 0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data
1542 //error_log("check_feed_favicon: favicon_url=$favicon_url isa PNG image");
1543 }
1544 elseif (preg_match('/^\xff\xd8/', $contents)) {
1545 // 0 beshort 0xffd8 JPEG image data
1546 //error_log("check_feed_favicon: favicon_url=$favicon_url isa JPG image");
1547 }
f9ad33c2
GG
1548 elseif (preg_match('/^BM/', $contents)) {
1549 // 0 string BM PC bitmap (OS2, Windows BMP files)
1550 //error_log("check_feed_favicon, favicon_url=$favicon_url isa BMP image");
1551 }
a230bf88
AD
1552 else {
1553 //error_log("check_feed_favicon: favicon_url=$favicon_url isa UNKNOWN type");
1554 $contents = "";
1555 }
1556 }
1557
1558 if ($contents) {
1559 $fp = @fopen($icon_file, "w");
1560
1561 if ($fp) {
1562 fwrite($fp, $contents);
1563 fclose($fp);
1564 chmod($icon_file, 0644);
1565 }
1566 }
1567 }
1568 return $icon_file;
1569 }
1570 }
e6c886bf
AD
1571
1572
1573
bec5ba93 1574}