]> git.wh0rd.org - tt-rss.git/blame - classes/rssutils.php
PDO: switch error reporting to exceptions
[tt-rss.git] / classes / rssutils.php
CommitLineData
2c08214a 1<?php
e6c886bf
AD
2class RSSUtils {
3 static function calculate_article_hash($article, $pluginhost) {
af244f92
AD
4 $tmp = "";
5
6 foreach ($article as $k => $v) {
7 if ($k != "feed" && isset($v)) {
24e6ff5d
AD
8 $x = strip_tags(is_array($v) ? implode(",", $v) : $v);
9
10 //_debug("$k:" . sha1($x) . ":" . htmlspecialchars($x), true);
11
12 $tmp .= sha1("$k:" . sha1($x));
af244f92
AD
13 }
14 }
15
eb16bd9f 16 return sha1(implode(",", $pluginhost->get_plugin_names()) . $tmp);
b1840673
AD
17 }
18
102a0135
AD
19 // Strips utf8mb4 characters (i.e. emoji) for mysql
20 static function strip_utf8mb4($str) {
7f4a4045
AD
21 return preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $str);
22 }
102a0135 23
e6c886bf 24 static function update_feedbrowser_cache() {
79178062 25
afcb105f
AD
26 $pdo = Db::pdo();
27
28 $sth = $pdo->query("SELECT feed_url, site_url, title, COUNT(id) AS subscribers
45378752
LD
29 FROM ttrss_feeds WHERE feed_url NOT IN (SELECT feed_url FROM ttrss_feeds
30 WHERE private IS true OR auth_login != '' OR auth_pass != '' OR feed_url LIKE '%:%@%/%')
79178062
AD
31 GROUP BY feed_url, site_url, title ORDER BY subscribers DESC LIMIT 1000");
32
afcb105f 33 $pdo->beginTransaction();
79178062 34
afcb105f 35 $pdo->query("DELETE FROM ttrss_feedbrowser_cache");
79178062
AD
36
37 $count = 0;
38
afcb105f
AD
39 while ($line = $sth->fetch()) {
40
0567016b
AD
41 $subscribers = $line["subscribers"];
42 $feed_url = $line["feed_url"];
43 $title = $line["title"];
44 $site_url = $line["site_url"];
79178062 45
afcb105f
AD
46 $tmph = $pdo->prepare("SELECT subscribers FROM
47 ttrss_feedbrowser_cache WHERE feed_url = ?");
48 $tmph->execute([$feed_url]);
49
50 if (!$tmph->fetch()) {
79178062 51
afcb105f
AD
52 $tmph = $pdo->prepare("INSERT INTO ttrss_feedbrowser_cache
53 (feed_url, site_url, title, subscribers)
54 VALUES
55 (?, ?, ?, ?)");
79178062 56
afcb105f 57 $tmph->execute([$feed_url, $site_url, $title, $subscribers]);
79178062
AD
58
59 ++$count;
60
61 }
62
63 }
64
afcb105f 65 $pdo->commit();
79178062
AD
66
67 return $count;
68
69 }
70
e6c886bf 71 static function update_daemon_common($limit = DAEMON_FEED_LIMIT, $debug = true) {
6322ac79 72 $schema_version = get_schema_version();
857efe49
AD
73
74 if ($schema_version != SCHEMA_VERSION) {
75 die("Schema version is wrong, please upgrade the database.\n");
76 }
77
afcb105f
AD
78 $pdo = Db::pdo();
79
09e8bdfd 80 if (!SINGLE_USER_MODE && DAEMON_UPDATE_LOGIN_LIMIT > 0) {
2c08214a
AD
81 if (DB_TYPE == "pgsql") {
82 $login_thresh_qpart = "AND ttrss_users.last_login >= NOW() - INTERVAL '".DAEMON_UPDATE_LOGIN_LIMIT." days'";
83 } else {
84 $login_thresh_qpart = "AND ttrss_users.last_login >= DATE_SUB(NOW(), INTERVAL ".DAEMON_UPDATE_LOGIN_LIMIT." DAY)";
85 }
86 } else {
87 $login_thresh_qpart = "";
88 }
89
2c08214a
AD
90 if (DB_TYPE == "pgsql") {
91 $update_limit_qpart = "AND ((
92 ttrss_feeds.update_interval = 0
ee0542ce 93 AND ttrss_user_prefs.value != '-1'
2c08214a
AD
94 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_user_prefs.value || ' minutes') AS INTERVAL)
95 ) OR (
96 ttrss_feeds.update_interval > 0
97 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_feeds.update_interval || ' minutes') AS INTERVAL)
f08426e3
AD
98 ) OR (ttrss_feeds.last_updated IS NULL
99 AND ttrss_user_prefs.value != '-1')
100 OR (last_updated = '1970-01-01 00:00:00'
101 AND ttrss_user_prefs.value != '-1'))";
2c08214a
AD
102 } else {
103 $update_limit_qpart = "AND ((
104 ttrss_feeds.update_interval = 0
ee0542ce 105 AND ttrss_user_prefs.value != '-1'
2c08214a
AD
106 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL CONVERT(ttrss_user_prefs.value, SIGNED INTEGER) MINUTE)
107 ) OR (
108 ttrss_feeds.update_interval > 0
109 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL ttrss_feeds.update_interval MINUTE)
f08426e3
AD
110 ) OR (ttrss_feeds.last_updated IS NULL
111 AND ttrss_user_prefs.value != '-1')
112 OR (last_updated = '1970-01-01 00:00:00'
113 AND ttrss_user_prefs.value != '-1'))";
2c08214a
AD
114 }
115
116 // Test if feed is currently being updated by another process.
117 if (DB_TYPE == "pgsql") {
566417c4 118 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '10 minutes')";
2c08214a 119 } else {
566417c4 120 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 10 MINUTE))";
2c08214a
AD
121 }
122
93af11cb 123 $query_limit = $limit ? sprintf("LIMIT %d", $limit) : "";
2c08214a 124
98070db0
TK
125 // Update the least recently updated feeds first
126 $query_order = "ORDER BY last_updated";
127 if (DB_TYPE == "pgsql") $query_order .= " NULLS FIRST";
128
fce451a4 129 $query = "SELECT DISTINCT ttrss_feeds.feed_url, ttrss_feeds.last_updated
2c08214a
AD
130 FROM
131 ttrss_feeds, ttrss_users, ttrss_user_prefs
f4ae0f05 132 WHERE
2c08214a 133 ttrss_feeds.owner_uid = ttrss_users.id
f08426e3 134 AND ttrss_user_prefs.profile IS NULL
2c08214a
AD
135 AND ttrss_users.id = ttrss_user_prefs.owner_uid
136 AND ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL'
137 $login_thresh_qpart $update_limit_qpart
1c4421fc 138 $updstart_thresh_qpart
98070db0 139 $query_order $query_limit";
fce451a4 140
afcb105f 141 $res = $pdo->query($query);
2c08214a 142
2c08214a 143 $feeds_to_update = array();
afcb105f 144 while ($line = $res->fetch()) {
93af11cb 145 array_push($feeds_to_update, $line['feed_url']);
2c08214a
AD
146 }
147
afcb105f
AD
148 if ($debug) _debug(sprintf("Scheduled %d feeds to update...", count($feeds_to_update)));
149
93af11cb
AD
150 // Update last_update_started before actually starting the batch
151 // in order to minimize collision risk for parallel daemon tasks
152 if (count($feeds_to_update) > 0) {
afcb105f 153 $feeds_qmarks = arr_qmarks($feeds_to_update);
1c4421fc 154
afcb105f
AD
155 $tmph = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
156 WHERE feed_url IN ($feeds_qmarks)");
157 $tmph->execute($feeds_to_update);
2c08214a
AD
158 }
159
8292d05b 160 $nf = 0;
2d9c5684 161 $bstarted = microtime(true);
8292d05b 162
5cbd1fe8
AD
163 $batch_owners = array();
164
afcb105f
AD
165 // since we have the data cached, we can deal with other feeds with the same url
166 $usth = $pdo->prepare("SELECT DISTINCT ttrss_feeds.id,last_updated,ttrss_feeds.owner_uid
ee0542ce
AD
167 FROM ttrss_feeds, ttrss_users, ttrss_user_prefs WHERE
168 ttrss_user_prefs.owner_uid = ttrss_feeds.owner_uid AND
169 ttrss_users.id = ttrss_user_prefs.owner_uid AND
170 ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL' AND
f08426e3 171 ttrss_user_prefs.profile IS NULL AND
afcb105f 172 feed_url = ?
9e84bab4 173 $update_limit_qpart
1c4421fc 174 $login_thresh_qpart
5929a0c1 175 ORDER BY ttrss_feeds.id $query_limit");
1c4421fc 176
afcb105f
AD
177 foreach ($feeds_to_update as $feed) {
178 if($debug) _debug("Base feed: $feed");
179
180 $usth->execute([$feed]);
181 //update_rss_feed($line["id"], true);
182
183 if ($tline = $usth->fetch()) {
184 if ($debug) _debug(" => " . $tline["last_updated"] . ", " . $tline["id"] . " " . $tline["owner_uid"]);
f08426e3 185
afcb105f
AD
186 if (array_search($tline["owner_uid"], $batch_owners) === FALSE)
187 array_push($batch_owners, $tline["owner_uid"]);
5cbd1fe8 188
afcb105f
AD
189 $fstarted = microtime(true);
190 RSSUtils::update_rss_feed($tline["id"], true, false);
191 _debug_suppress(false);
2d9c5684 192
afcb105f 193 _debug(sprintf(" %.4f (sec)", microtime(true) - $fstarted));
2d9c5684 194
afcb105f 195 ++$nf;
1c4421fc 196 }
2c08214a
AD
197 }
198
2d9c5684
AD
199 if ($nf > 0) {
200 _debug(sprintf("Processed %d feeds in %.4f (sec), %.4f (sec/feed avg)", $nf,
201 microtime(true) - $bstarted, (microtime(true) - $bstarted) / $nf));
202 }
203
5cbd1fe8
AD
204 foreach ($batch_owners as $owner_uid) {
205 _debug("Running housekeeping tasks for user $owner_uid...");
206
e6c886bf 207 RSSUtils::housekeeping_user($owner_uid);
5cbd1fe8
AD
208 }
209
2c08214a 210 // Send feed digests by email if needed.
c2f0f24e 211 Digest::send_headlines_digests($debug);
2c08214a 212
8292d05b 213 return $nf;
7b55001e 214 }
2c08214a 215
6022776d 216 // this is used when subscribing
e6c886bf 217 static function set_basic_feed_info($feed) {
6022776d 218
0567016b 219 $pdo = Db::pdo();
6022776d 220
069aea59 221 $sth = $pdo->prepare("SELECT owner_uid,feed_url,auth_pass,auth_login
0567016b
AD
222 FROM ttrss_feeds WHERE id = ?");
223 $sth->execute([$feed]);
6022776d 224
0567016b 225 if ($row = $sth->fetch()) {
bec5ba93 226
0567016b 227 $owner_uid = $row["owner_uid"];
0567016b
AD
228 $auth_login = $row["auth_login"];
229 $auth_pass = $row["auth_pass"];
0567016b 230 $fetch_url = $row["feed_url"];
6022776d 231
0567016b
AD
232 $pluginhost = new PluginHost();
233 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
6022776d 234
0567016b
AD
235 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
236 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
237 $pluginhost->load_data();
238
239 $basic_info = array();
240 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_BASIC_INFO) as $plugin) {
241 $basic_info = $plugin->hook_feed_basic_info($basic_info, $fetch_url, $owner_uid, $feed, $auth_login, $auth_pass);
242 }
6022776d 243
0567016b
AD
244 if (!$basic_info) {
245 $feed_data = fetch_file_contents($fetch_url, false,
246 $auth_login, $auth_pass, false,
247 FEED_FETCH_TIMEOUT,
248 0);
bec5ba93 249
0567016b 250 global $fetch_curl_used;
bec5ba93 251
0567016b
AD
252 if (!$fetch_curl_used) {
253 $tmp = @gzdecode($feed_data);
bec5ba93 254
0567016b
AD
255 if ($tmp) $feed_data = $tmp;
256 }
6022776d 257
0567016b 258 $feed_data = trim($feed_data);
6022776d 259
0567016b
AD
260 $rss = new FeedParser($feed_data);
261 $rss->init();
6022776d 262
0567016b
AD
263 if (!$rss->error()) {
264 $basic_info = array(
265 'title' => mb_substr($rss->get_title(), 0, 199),
266 'site_url' => mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245)
267 );
268 }
3476690c 269 }
6022776d 270
0567016b
AD
271 if ($basic_info && is_array($basic_info)) {
272 $sth = $pdo->prepare("SELECT title, site_url FROM ttrss_feeds WHERE id = ?");
273 $sth->execute([$feed]);
6022776d 274
0567016b 275 if ($row = $sth->fetch()) {
6022776d 276
0567016b
AD
277 $registered_title = $row["title"];
278 $orig_site_url = $row["site_url"];
279
280 if ($basic_info['title'] && (!$registered_title || $registered_title == "[Unknown]")) {
281
282 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
283 title = ? WHERE id = ?");
284 $sth->execute([$basic_info['title'], $feed]);
285 }
6022776d 286
0567016b
AD
287 if ($basic_info['site_url'] && $orig_site_url != $basic_info['site_url']) {
288 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
289 site_url = ? WHERE id = ?");
290 $sth->execute([$basic_info['site_url'], $feed]);
291 }
292
293 }
6022776d
AD
294 }
295 }
296 }
297
7b55001e 298 /**
e6c886bf
AD
299 * @SuppressWarnings(PHPMD.UnusedFormalParameter)
300 */
301 static function update_rss_feed($feed, $no_cache = false) {
2c08214a 302
e6532439 303 $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || clean($_REQUEST['xdebug']);
2c08214a 304
4f71d743 305 _debug_suppress(!$debug_enabled);
68cccafc 306 _debug("start", $debug_enabled);
2c08214a 307
0567016b
AD
308 $pdo = Db::pdo();
309
310 $sth = $pdo->prepare("SELECT title FROM ttrss_feeds WHERE id = ?");
311 $sth->execute([$feed]);
bfe1eb4e 312
0567016b 313 if (!$row = $sth->fetch()) {
bfe1eb4e
AD
314 _debug("feed $feed NOT FOUND/SKIPPED", $debug_enabled);
315 user_error("Attempt to update unknown/invalid feed $feed", E_USER_WARNING);
316 return false;
317 }
318
0567016b 319 $title = $row["title"];
6bb96beb
AD
320
321 // feed was batch-subscribed or something, we need to get basic info
322 // this is not optimal currently as it fetches stuff separately TODO: optimize
323 if ($title == "[Unknown]") {
324 _debug("setting basic feed info for $feed...");
e6c886bf 325 RSSUtils::set_basic_feed_info($feed);
6bb96beb
AD
326 }
327
0567016b 328 $sth = $pdo->prepare("SELECT id,update_interval,auth_login,
5ba1ddd4 329 feed_url,auth_pass,cache_images,
5321e775 330 mark_unread_on_update, owner_uid,
153cb6d3 331 auth_pass_encrypted, feed_language,
e50c8eaa
AD
332 last_modified,
333 ".SUBSTRING_FOR_DATE."(last_unconditional, 1, 19) AS last_unconditional
0567016b
AD
334 FROM ttrss_feeds WHERE id = ?");
335 $sth->execute([$feed]);
336
337 if ($row = $sth->fetch()) {
2c08214a 338
0567016b 339 $owner_uid = $row["owner_uid"];
187abfe7 340 $mark_unread_on_update = $row["mark_unread_on_update"];
2c08214a 341
0567016b
AD
342 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
343 WHERE id = ?");
344 $sth->execute([$feed]);
2c08214a 345
0567016b
AD
346 $auth_login = $row["auth_login"];
347 $auth_pass = $row["auth_pass"];
0567016b
AD
348 $stored_last_modified = $row["last_modified"];
349 $last_unconditional = $row["last_unconditional"];
187abfe7 350 $cache_images = $row["cache_images"];
0567016b 351 $fetch_url = $row["feed_url"];
069aea59 352
0567016b
AD
353 $feed_language = mb_strtolower($row["feed_language"]);
354 if (!$feed_language) $feed_language = 'english';
2c08214a 355
0567016b
AD
356 } else {
357 return false;
358 }
2c08214a 359
f074ffe9 360 $date_feed_processed = date('Y-m-d H:i');
2c08214a 361
342e8a9e 362 $cache_filename = CACHE_DIR . "/feeds/" . sha1($fetch_url) . ".xml";
f074ffe9 363
ee65bef4
AD
364 $pluginhost = new PluginHost();
365 $pluginhost->set_debug($debug_enabled);
366 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
367
368 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
369 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
370 $pluginhost->load_data();
371
7b55001e 372 $rss_hash = false;
4f9cbdff 373
7b55001e
AD
374 $force_refetch = isset($_REQUEST["force_refetch"]);
375 $feed_data = "";
687a4f59 376
7b55001e
AD
377 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FETCH_FEED) as $plugin) {
378 $feed_data = $plugin->hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed, 0, $auth_login, $auth_pass);
379 }
2c08214a 380
7b55001e
AD
381 // try cache
382 if (!$feed_data &&
383 file_exists($cache_filename) &&
384 is_readable($cache_filename) &&
385 !$auth_login && !$auth_pass &&
386 filemtime($cache_filename) > time() - 30) {
be574731 387
7b55001e 388 _debug("using local cache [$cache_filename].", $debug_enabled);
52637d3b 389
7b55001e 390 @$feed_data = file_get_contents($cache_filename);
f074ffe9 391
7b55001e
AD
392 if ($feed_data) {
393 $rss_hash = sha1($feed_data);
88edaa93 394 }
ee65bef4 395
7b55001e
AD
396 } else {
397 _debug("local cache will not be used for this feed", $debug_enabled);
398 }
312742db 399
153cb6d3
AD
400 global $fetch_last_modified;
401
7b55001e
AD
402 // fetch feed from source
403 if (!$feed_data) {
e50c8eaa 404 _debug("last unconditional update request: $last_unconditional");
312742db 405
7b55001e
AD
406 if (ini_get("open_basedir") && function_exists("curl_init")) {
407 _debug("not using CURL due to open_basedir restrictions");
408 }
3f6f0857 409
7f4a4045
AD
410 if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) {
411 _debug("maximum allowed interval for conditional requests exceeded, forcing refetch");
e50c8eaa 412
7f4a4045
AD
413 $force_refetch = true;
414 } else {
415 _debug("stored last modified for conditional request: $stored_last_modified", $debug_enabled);
416 }
153cb6d3 417
7f4a4045 418 _debug("fetching [$fetch_url] (force_refetch: $force_refetch)...", $debug_enabled);
153cb6d3
AD
419
420 $feed_data = fetch_file_contents([
421 "url" => $fetch_url,
422 "login" => $auth_login,
423 "pass" => $auth_pass,
424 "timeout" => $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT,
425 "last_modified" => $force_refetch ? "" : $stored_last_modified
426 ]);
3f6f0857 427
7b55001e 428 global $fetch_curl_used;
3f6f0857 429
7b55001e
AD
430 if (!$fetch_curl_used) {
431 $tmp = @gzdecode($feed_data);
1367bc3f 432
7b55001e
AD
433 if ($tmp) $feed_data = $tmp;
434 }
017401dd 435
7b55001e 436 $feed_data = trim($feed_data);
fd687300 437
7b55001e 438 _debug("fetch done.", $debug_enabled);
9d930af9 439 _debug("source last modified: " . $fetch_last_modified, $debug_enabled);
153cb6d3
AD
440
441 if ($feed_data && $fetch_last_modified != $stored_last_modified) {
0567016b
AD
442 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_modified = ? WHERE id = ?");
443 $sth->execute([substr($fetch_last_modified, 0, 245), $feed]);
153cb6d3 444 }
95beaa14 445
7b55001e 446 // cache vanilla feed data for re-use
342e8a9e 447 if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/feeds")) {
7b55001e
AD
448 $new_rss_hash = sha1($feed_data);
449
450 if ($new_rss_hash != $rss_hash) {
451 _debug("saving $cache_filename", $debug_enabled);
452 @file_put_contents($cache_filename, $feed_data);
95beaa14 453 }
4f9cbdff 454 }
7b55001e 455 }
017401dd 456
7b55001e
AD
457 if (!$feed_data) {
458 global $fetch_last_error;
459 global $fetch_last_error_code;
f074ffe9 460
7b55001e 461 _debug("unable to fetch: $fetch_last_error [$fetch_last_error_code]", $debug_enabled);
f074ffe9 462
7b55001e
AD
463 // If-Modified-Since
464 if ($fetch_last_error_code != 304) {
0567016b 465 $error_message = $fetch_last_error;
7b55001e
AD
466 } else {
467 _debug("source claims data not modified, nothing to do.", $debug_enabled);
0567016b 468 $error_message = "";
7b55001e 469 }
4f9cbdff 470
0567016b
AD
471 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
472 last_updated = NOW() WHERE id = ?");
473 $sth->execute([$error_message, $feed]);
4f9cbdff 474
7b55001e 475 return;
f074ffe9
AD
476 }
477
1ffe3391 478 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_FETCHED) as $plugin) {
6791af0c 479 $feed_data = $plugin->hook_feed_fetched($feed_data, $fetch_url, $owner_uid, $feed);
017401dd
AD
480 }
481
07d3431e
AD
482 $rss = new FeedParser($feed_data);
483 $rss->init();
2c08214a 484
19b3992b 485 if (!$rss->error()) {
2c08214a 486
d2a421e3 487 // We use local pluginhost here because we need to load different per-user feed plugins
1ffe3391 488 $pluginhost->run_hooks(PluginHost::HOOK_FEED_PARSED, "hook_feed_parsed", $rss);
4412b877 489
df659891 490 _debug("language: $feed_language", $debug_enabled);
68cccafc 491 _debug("processing feed data...", $debug_enabled);
2c08214a 492
382268c6
AD
493 if (DB_TYPE == "pgsql") {
494 $favicon_interval_qpart = "favicon_last_checked < NOW() - INTERVAL '12 hour'";
495 } else {
496 $favicon_interval_qpart = "favicon_last_checked < DATE_SUB(NOW(), INTERVAL 12 HOUR)";
497 }
498
0567016b 499 $sth = $pdo->prepare("SELECT owner_uid,favicon_avg_color,
382268c6
AD
500 (favicon_last_checked IS NULL OR $favicon_interval_qpart) AS
501 favicon_needs_check
0567016b
AD
502 FROM ttrss_feeds WHERE id = ?");
503 $sth->execute([$feed]);
2c08214a 504
0567016b 505 if ($row = $sth->fetch()) {
187abfe7 506 $favicon_needs_check = $row["favicon_needs_check"];
0567016b
AD
507 $favicon_avg_color = $row["favicon_avg_color"];
508 $owner_uid = $row["owner_uid"];
509 } else {
510 return false;
511 }
2c08214a 512
0567016b 513 $site_url = mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245);
2c08214a 514
cd07592c
AD
515 _debug("site_url: $site_url", $debug_enabled);
516 _debug("feed_title: " . $rss->get_title(), $debug_enabled);
517
687a4f59 518 if ($favicon_needs_check || $force_refetch) {
36490f11
AD
519
520 /* terrible hack: if we crash on floicon shit here, we won't check
560cbd8c 521 * the icon avgcolor again (unless the icon got updated) */
36490f11 522
560cbd8c
AD
523 $favicon_file = ICONS_DIR . "/$feed.ico";
524 $favicon_modified = @filemtime($favicon_file);
525
68cccafc 526 _debug("checking favicon...", $debug_enabled);
687a4f59 527
e6c886bf 528 RSSUtils::check_feed_favicon($site_url, $feed);
560cbd8c
AD
529 $favicon_modified_new = @filemtime($favicon_file);
530
531 if ($favicon_modified_new > $favicon_modified)
532 $favicon_avg_color = '';
687a4f59 533
0567016b 534 $favicon_colorstring = "";
6ee0d4b0 535 if (file_exists($favicon_file) && function_exists("imagecreatefromstring") && $favicon_avg_color == '') {
e6c886bf 536 require_once "colors.php";
687a4f59 537
0567016b
AD
538 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_avg_color = 'fail' WHERE
539 id = ?");
540 $sth->execute([$feed]);
aafd55ba 541
0567016b
AD
542 $favicon_color = calculate_avg_color($favicon_file);
543
544 $favicon_colorstring = ",favicon_avg_color = " . $pdo->quote($favicon_color);
63c323f7 545
36490f11 546 } else if ($favicon_avg_color == 'fail') {
84ceb6bd 547 _debug("floicon failed on this file, not trying to recalculate avg color", $debug_enabled);
6ac722d5 548 }
687a4f59 549
0567016b
AD
550 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_last_checked = NOW()
551 $favicon_colorstring WHERE id = ?");
552 $sth->execute([$feed]);
f2798eb6 553 }
2c08214a 554
68cccafc 555 _debug("loading filters & labels...", $debug_enabled);
2c08214a 556
a42c55f0 557 $filters = load_filters($feed, $owner_uid);
2c08214a 558
02f3992a 559 if ($debug_enabled) {
7f4a4045
AD
560 print_r($filters);
561 }
02f3992a 562
68cccafc 563 _debug("" . count($filters) . " filters loaded.", $debug_enabled);
2c08214a 564
19b3992b 565 $items = $rss->get_items();
2c08214a 566
19b3992b 567 if (!is_array($items)) {
68cccafc 568 _debug("no articles found.", $debug_enabled);
2c08214a 569
0567016b
AD
570 $sth = $pdo->prepare("UPDATE ttrss_feeds
571 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
572 $sth->execute([$feed]);
2c08214a 573
0567016b 574 return true; // no articles
2c08214a
AD
575 }
576
68cccafc 577 _debug("processing articles...", $debug_enabled);
2c08214a 578
6c9f3d4a
AD
579 $tstart = time();
580
19b3992b 581 foreach ($items as $item) {
0500e14c
AD
582 $pdo->beginTransaction();
583
e6532439 584 if (clean($_REQUEST['xdebug']) == 3) {
2c08214a
AD
585 print_r($item);
586 }
587
6c9f3d4a
AD
588 if (ini_get("max_execution_time") > 0 && time() - $tstart >= ini_get("max_execution_time") * 0.7) {
589 _debug("looks like there's too many articles to process at once, breaking out", $debug_enabled);
0500e14c 590 $pdo->commit();
6c9f3d4a
AD
591 break;
592 }
593
0567016b
AD
594 $entry_guid = strip_tags($item->get_id());
595 if (!$entry_guid) $entry_guid = strip_tags($item->get_link());
e6c886bf 596 if (!$entry_guid) $entry_guid = RSSUtils::make_guid_from_title($item->get_title());
0500e14c
AD
597
598 if (!$entry_guid) {
599 $pdo->commit();
600 continue;
601 }
2c08214a 602
3a4c8973
AD
603 $entry_guid = "$owner_uid,$entry_guid";
604
0567016b 605 $entry_guid_hashed = 'SHA1:' . sha1($entry_guid);
5e3d5480 606
68cccafc 607 _debug("guid $entry_guid / $entry_guid_hashed", $debug_enabled);
5e3d5480 608
0567016b 609 $entry_timestamp = strip_tags($item->get_date());
04d2f9c8
AD
610
611 _debug("orig date: " . $item->get_date(), $debug_enabled);
2c08214a 612
30123fe6 613 if ($entry_timestamp == -1 || !$entry_timestamp || $entry_timestamp > time()) {
2c08214a 614 $entry_timestamp = time();
2c08214a
AD
615 }
616
617 $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
618
68cccafc 619 _debug("date $entry_timestamp [$entry_timestamp_fmt]", $debug_enabled);
2c08214a 620
0567016b 621 $entry_title = strip_tags($item->get_title());
1b35d30c 622
5d56d100 623 $entry_link = rewrite_relative_url($site_url, $item->get_link());
2c08214a 624
3bbaf902 625 $entry_language = mb_substr(trim($item->get_language()), 0, 2);
22a866ed 626
68cccafc
AD
627 _debug("title $entry_title", $debug_enabled);
628 _debug("link $entry_link", $debug_enabled);
22a866ed 629 _debug("language $entry_language", $debug_enabled);
2c08214a
AD
630
631 if (!$entry_title) $entry_title = date("Y-m-d H:i:s", $entry_timestamp);;
632
19b3992b
AD
633 $entry_content = $item->get_content();
634 if (!$entry_content) $entry_content = $item->get_description();
2c08214a 635
e6532439 636 if (clean($_REQUEST["xdebug"]) == 2) {
9ec10352 637 print "content: ";
0bc503ff 638 print htmlspecialchars($entry_content);
3c696512 639 print "\n";
2c08214a
AD
640 }
641
0567016b 642 $entry_comments = mb_substr(strip_tags($item->get_comments_url()), 0, 245);
12ff230b 643 $num_comments = (int) $item->get_comments_count();
2c08214a 644
0567016b
AD
645 $entry_author = strip_tags($item->get_author());
646 $entry_guid = mb_substr($entry_guid, 0, 245);
2c08214a 647
68cccafc
AD
648 _debug("author $entry_author", $debug_enabled);
649 _debug("num_comments: $num_comments", $debug_enabled);
ee78f81c 650 _debug("looking for tags...", $debug_enabled);
2c08214a
AD
651
652 // parse <category> entries into tags
653
654 $additional_tags = array();
655
19b3992b 656 $additional_tags_src = $item->get_categories();
2c08214a 657
19b3992b
AD
658 if (is_array($additional_tags_src)) {
659 foreach ($additional_tags_src as $tobj) {
cd07592c 660 array_push($additional_tags, $tobj);
2c08214a 661 }
19b3992b 662 }
2c08214a 663
fa6fbd36 664 $entry_tags = array_unique($additional_tags);
2c08214a 665
5edf4b73 666 for ($i = 0; $i < count($entry_tags); $i++) {
2c08214a
AD
667 $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8');
668
5edf4b73
AD
669 // we don't support numeric tags, let's prefix them
670 if (is_numeric($entry_tags[$i])) $entry_tags[$i] = 't:' . $entry_tags[$i];
671 }
672
ee78f81c
AD
673 _debug("tags found: " . join(",", $entry_tags), $debug_enabled);
674
68cccafc 675 _debug("done collecting data.", $debug_enabled);
2c08214a 676
0567016b
AD
677 $sth = $pdo->prepare("SELECT id, content_hash, lang FROM ttrss_entries
678 WHERE guid = ? OR guid = ?");
679 $sth->execute([$entry_guid, $entry_guid_hashed]);
b30abdad 680
0567016b
AD
681 if ($row = $sth->fetch()) {
682 $base_entry_id = $row["id"];
683 $entry_stored_hash = $row["content_hash"];
4a0da0e5 684 $article_labels = Article::get_article_labels($base_entry_id, $owner_uid);
a8ac7661 685
2ed0d6c4 686 $existing_tags = Article::get_article_tags($base_entry_id, $owner_uid);
a8ac7661 687 $entry_tags = array_unique(array_merge($entry_tags, $existing_tags));
b30abdad 688 } else {
b1840673
AD
689 $base_entry_id = false;
690 $entry_stored_hash = "";
a29fe121 691 $article_labels = array();
b30abdad
AD
692 }
693
455b1401 694 $article = array("owner_uid" => $owner_uid, // read only
b30abdad 695 "guid" => $entry_guid, // read only
59e83455 696 "guid_hashed" => $entry_guid_hashed, // read only
19b3992b
AD
697 "title" => $entry_title,
698 "content" => $entry_content,
699 "link" => $entry_link,
a29fe121 700 "labels" => $article_labels, // current limitation: can add labels to article, can't remove them
19b3992b 701 "tags" => $entry_tags,
e02555c1 702 "author" => $entry_author,
c9299c28 703 "force_catchup" => false, // ugly hack for the time being
6de3a1be 704 "score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed
3318d324 705 "language" => $entry_language,
20d2195f 706 "num_comments" => $num_comments, // read only
f73e03e0
AD
707 "feed" => array("id" => $feed,
708 "fetch_url" => $fetch_url,
babfadbf
J
709 "site_url" => $site_url,
710 "cache_images" => $cache_images)
e6c886bf 711 );
cc85704f 712
b1840673 713 $entry_plugin_data = "";
e6c886bf 714 $entry_current_hash = RSSUtils::calculate_article_hash($article, $pluginhost);
b1840673
AD
715
716 _debug("article hash: $entry_current_hash [stored=$entry_stored_hash]", $debug_enabled);
717
522e8b35 718 if ($entry_current_hash == $entry_stored_hash && !isset($_REQUEST["force_rehash"])) {
b1840673
AD
719 _debug("stored article seems up to date [IID: $base_entry_id], updating timestamp only", $debug_enabled);
720
721 // we keep encountering the entry in feeds, so we need to
722 // update date_updated column so that we don't get horrible
723 // dupes when the entry gets purged and reinserted again e.g.
724 // in the case of SLOW SLOW OMG SLOW updating feeds
725
0567016b
AD
726 $sth = $pdo->prepare("UPDATE ttrss_entries SET date_updated = NOW()
727 WHERE id = ?");
728 $sth->execute([$base_entry_id]);
b1840673 729
0500e14c 730 $pdo->commit();
5bdcb8fd 731 continue;
b1840673
AD
732 }
733
734 _debug("hash differs, applying plugin filters:", $debug_enabled);
735
1ffe3391 736 foreach ($pluginhost->get_hooks(PluginHost::HOOK_ARTICLE_FILTER) as $plugin) {
b1840673
AD
737 _debug("... " . get_class($plugin), $debug_enabled);
738
739 $start = microtime(true);
19b3992b 740 $article = $plugin->hook_article_filter($article);
0084f0d1 741
b1840673
AD
742 _debug("=== " . sprintf("%.4f (sec)", microtime(true) - $start), $debug_enabled);
743
744 $entry_plugin_data .= mb_strtolower(get_class($plugin)) . ",";
cc85704f
AD
745 }
746
e6532439 747 if (clean($_REQUEST["xdebug"]) == 2) {
0bc503ff
AD
748 print "processed content: ";
749 print htmlspecialchars($article["content"]);
750 print "\n";
751 }
752
b1840673
AD
753 _debug("plugin data: $entry_plugin_data", $debug_enabled);
754
35c12dc4 755 // Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077
2b8afd49 756 if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") {
35c12dc4 757 foreach ($article as $k => $v) {
35c37354 758 // i guess we'll have to take the risk of 4byte unicode labels & tags here
dae16f72 759 if (is_string($article[$k])) {
102a0135 760 $article[$k] = RSSUtils::strip_utf8mb4($v);
35c37354 761 }
35c12dc4
AD
762 }
763 }
764
b8774453
AD
765 /* Collect article tags here so we could filter by them: */
766
557d86fe
AD
767 $matched_rules = array();
768
e6c886bf 769 $article_filters = RSSUtils::get_article_filters($filters, $article["title"],
7b55001e 770 $article["content"], $article["link"], $article["author"],
557d86fe 771 $article["tags"], $matched_rules);
b8774453
AD
772
773 if ($debug_enabled) {
557d86fe
AD
774 _debug("matched filter rules: ", $debug_enabled);
775
776 if (count($matched_rules) != 0) {
777 print_r($matched_rules);
778 }
779
780 _debug("filter actions: ", $debug_enabled);
781
b8774453
AD
782 if (count($article_filters) != 0) {
783 print_r($article_filters);
784 }
785 }
786
e6c886bf 787 $plugin_filter_names = RSSUtils::find_article_filters($article_filters, "plugin");
b8774453
AD
788 $plugin_filter_actions = $pluginhost->get_filter_actions();
789
790 if (count($plugin_filter_names) > 0) {
791 _debug("applying plugin filter actions...", $debug_enabled);
792
793 foreach ($plugin_filter_names as $pfn) {
794 list($pfclass,$pfaction) = explode(":", $pfn["param"]);
795
796 if (isset($plugin_filter_actions[$pfclass])) {
797 $plugin = $pluginhost->get_plugin($pfclass);
798
799 _debug("... $pfclass: $pfaction", $debug_enabled);
800
801 if ($plugin) {
802 $start = microtime(true);
803 $article = $plugin->hook_article_filter_action($article, $pfaction);
804
805 _debug("=== " . sprintf("%.4f (sec)", microtime(true) - $start), $debug_enabled);
806 } else {
807 _debug("??? $pfclass: plugin object not found.");
808 }
809 } else {
810 _debug("??? $pfclass: filter plugin not registered.");
811 }
812 }
813 }
814
19b3992b 815 $entry_tags = $article["tags"];
0567016b
AD
816 $entry_title = strip_tags($article["title"]);
817 $entry_author = mb_substr(strip_tags($article["author"]), 0, 245);
818 $entry_link = strip_tags($article["link"]);
f935d98e 819 $entry_content = $article["content"]; // escaped below
c9299c28 820 $entry_force_catchup = $article["force_catchup"];
a29fe121 821 $article_labels = $article["labels"];
6de3a1be 822 $entry_score_modifier = (int) $article["score_modifier"];
0567016b 823 $entry_language = $article["language"];
a29fe121
AD
824
825 if ($debug_enabled) {
826 _debug("article labels:", $debug_enabled);
557d86fe
AD
827
828 if (count($article_labels) != 0) {
829 print_r($article_labels);
830 }
a29fe121 831 }
c9299c28
AD
832
833 _debug("force catchup: $entry_force_catchup");
f935d98e 834
0a3fd79b 835 if ($cache_images && is_writable(CACHE_DIR . '/images'))
e6c886bf 836 RSSUtils::cache_media($entry_content, $site_url, $debug_enabled);
0a3fd79b 837
0567016b
AD
838 $csth = $pdo->prepare("SELECT id FROM ttrss_entries
839 WHERE guid = ? OR guid = ?");
840 $csth->execute([$entry_guid, $entry_guid_hashed]);
9e222305 841
0567016b 842 if (!$row = $csth->fetch()) {
2c08214a 843
07d3431e 844 _debug("base guid [$entry_guid or $entry_guid_hashed] not found, creating...", $debug_enabled);
2c08214a
AD
845
846 // base post entry does not exist, create it
847
0567016b 848 $usth = $pdo->prepare(
2c08214a 849 "INSERT INTO ttrss_entries
0567016b 850 (title,
2c08214a
AD
851 guid,
852 link,
853 updated,
854 content,
855 content_hash,
856 no_orig_date,
857 date_updated,
858 date_entered,
859 comments,
860 num_comments,
b30abdad 861 plugin_data,
6b461797 862 lang,
2c08214a
AD
863 author)
864 VALUES
0567016b 865 (?, ?, ?, ?, ?, ?,
5ba1ddd4 866 false,
2c08214a 867 NOW(),
0567016b
AD
868 ?, ?, ?, ?, ?, ?)");
869
870 $usth->execute([$entry_title,
871 $entry_guid_hashed,
872 $entry_link,
873 $entry_timestamp_fmt,
93e70e36 874 "$entry_content",
0567016b
AD
875 $entry_current_hash,
876 $date_feed_processed,
877 $entry_comments,
187abfe7 878 (int)$num_comments,
0567016b 879 $entry_plugin_data,
93e70e36
AD
880 "$entry_language",
881 "$entry_author"]);
e8291805 882
2c08214a
AD
883 }
884
0567016b 885 $csth->execute([$entry_guid, $entry_guid_hashed]);
2c08214a
AD
886
887 $entry_ref_id = 0;
888 $entry_int_id = 0;
889
0567016b 890 if ($row = $csth->fetch()) {
2c08214a 891
68cccafc 892 _debug("base guid found, checking for user record", $debug_enabled);
2c08214a 893
0567016b 894 $ref_id = $row['id'];
2c08214a
AD
895 $entry_ref_id = $ref_id;
896
e6c886bf 897 if (RSSUtils::find_article_filter($article_filters, "filter")) {
0500e14c 898 $pdo->commit();
2c08214a
AD
899 continue;
900 }
901
e6c886bf 902 $score = RSSUtils::calculate_article_score($article_filters) + $entry_score_modifier;
2c08214a 903
6de3a1be 904 _debug("initial score: $score [including plugin modifier: $entry_score_modifier]", $debug_enabled);
2c08214a 905
4f186b1f
AD
906 // check for user post link to main table
907
0567016b
AD
908 $sth = $pdo->prepare("SELECT ref_id, int_id FROM ttrss_user_entries WHERE
909 ref_id = ? AND owner_uid = ?");
910 $sth->execute([$ref_id, $owner_uid]);
2c08214a
AD
911
912 // okay it doesn't exist - create user entry
0567016b
AD
913 if ($row = $sth->fetch()) {
914 $entry_ref_id = $row["ref_id"];
915 $entry_int_id = $row["int_id"];
2c08214a 916
0567016b
AD
917 _debug("user record FOUND: RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
918 } else {
919
68cccafc 920 _debug("user record not found, creating...", $debug_enabled);
2c08214a 921
e6c886bf 922 if ($score >= -500 && !RSSUtils::find_article_filter($article_filters, 'catchup') && !$entry_force_catchup) {
0567016b
AD
923 $unread = 1;
924 $last_read_qpart = null;
2c08214a 925 } else {
0567016b 926 $unread = 0;
d4c05d0b 927 $last_read_qpart = date("Y-m-d H:i"); // we can't use NOW() here because it gets quoted
2c08214a
AD
928 }
929
e6c886bf 930 if (RSSUtils::find_article_filter($article_filters, 'mark') || $score > 1000) {
0567016b 931 $marked = 1;
2c08214a 932 } else {
0567016b 933 $marked = 0;
2c08214a
AD
934 }
935
e6c886bf 936 if (RSSUtils::find_article_filter($article_filters, 'publish')) {
0567016b 937 $published = 1;
2c08214a 938 } else {
0567016b 939 $published = 0;
2c08214a
AD
940 }
941
26ad257d 942 $last_marked = ($marked == 1) ? 'NOW()' : 'NULL';
943 $last_published = ($published == 1) ? 'NOW()' : 'NULL';
7873d588 944
0567016b 945 $sth = $pdo->prepare(
2c08214a
AD
946 "INSERT INTO ttrss_user_entries
947 (ref_id, owner_uid, feed_id, unread, last_read, marked,
7873d588
AD
948 published, score, tag_cache, label_cache, uuid,
949 last_marked, last_published)
aa16334f 950 VALUES (?, ?, ?, ?, ?, ?, ?, ?, '', '', '', ".$last_marked.", ".$last_published.")");
2c08214a 951
0567016b 952 $sth->execute([$ref_id, $owner_uid, $feed, $unread, $last_read_qpart, $marked,
aa16334f 953 $published, $score]);
2c08214a 954
0567016b
AD
955 $sth = $pdo->prepare("SELECT int_id FROM ttrss_user_entries WHERE
956 ref_id = ? AND owner_uid = ? AND
957 feed_id = ? LIMIT 1");
2c08214a 958
0567016b
AD
959 $sth->execute([$ref_id, $owner_uid, $feed]);
960
961 if ($row = $sth->fetch())
962 $entry_int_id = $row['int_id'];
2c08214a
AD
963 }
964
0567016b 965 _debug("resulting RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
2c08214a 966
963c2264
AD
967 if (DB_TYPE == "pgsql")
968 $tsvector_qpart = "tsvector_combined = to_tsvector(:ts_lang, :ts_content),";
969 else
e854442e 970 $tsvector_qpart = "";
49a888ec 971
0567016b 972 $sth = $pdo->prepare("UPDATE ttrss_entries
49a888ec 973 SET title = :title,
e854442e 974 $tsvector_qpart
49a888ec
AD
975 content = :content,
976 content_hash = :content_hash,
977 updated = :updated,
66fe33e7 978 date_updated = NOW(),
49a888ec
AD
979 num_comments = :num_comments,
980 plugin_data = :plugin_data,
981 author = :author,
982 lang = :lang
983 WHERE id = :id");
984
963c2264 985 $params = [":title" => $entry_title,
93e70e36 986 ":content" => "$entry_content",
49a888ec
AD
987 ":content_hash" => $entry_current_hash,
988 ":updated" => $entry_timestamp_fmt,
989 ":num_comments" => (int)$num_comments,
990 ":plugin_data" => $entry_plugin_data,
93e70e36 991 ":author" => "$entry_author",
49a888ec 992 ":lang" => $entry_language,
963c2264
AD
993 ":id" => $ref_id];
994
995 if (DB_TYPE == "pgsql") {
996 $params[":ts_lang"] = $feed_language;
6e6c3a87 997 $params[":ts_content"] = mb_substr(strip_tags($entry_title . " " . $entry_content), 0, 900000);
963c2264
AD
998 }
999
1000 $sth->execute($params);
b1840673 1001
59e83455 1002 // update aux data
0567016b
AD
1003 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1004 SET score = ? WHERE ref_id = ?");
1005 $sth->execute([$score, $ref_id]);
59e83455 1006
b1840673 1007 if ($mark_unread_on_update) {
24e6ff5d
AD
1008 _debug("article updated, marking unread as requested.", $debug_enabled);
1009
0567016b
AD
1010 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1011 SET last_read = null, unread = true WHERE ref_id = ?");
1012 $sth->execute([$ref_id]);
2c08214a
AD
1013 }
1014 }
1015
a29fe121
AD
1016 _debug("assigning labels [other]...", $debug_enabled);
1017
1018 foreach ($article_labels as $label) {
7c9b5a3f 1019 Labels::add_article($entry_ref_id, $label[1], $owner_uid);
a29fe121
AD
1020 }
1021
1022 _debug("assigning labels [filters]...", $debug_enabled);
2c08214a 1023
e6c886bf 1024 RSSUtils::assign_article_to_label_filters($entry_ref_id, $article_filters,
b24504b1 1025 $owner_uid, $article_labels);
2c08214a 1026
68cccafc 1027 _debug("looking for enclosures...", $debug_enabled);
2c08214a
AD
1028
1029 // enclosures
1030
1031 $enclosures = array();
1032
19b3992b 1033 $encs = $item->get_enclosures();
2c08214a 1034
19b3992b
AD
1035 if (is_array($encs)) {
1036 foreach ($encs as $e) {
1037 $e_item = array(
86e53429
AD
1038 rewrite_relative_url($site_url, $e->link),
1039 $e->type, $e->length, $e->title, $e->width, $e->height);
102a0135
AD
1040
1041 // Yet another episode of "mysql utf8_general_ci is gimped"
2b8afd49 1042 if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") {
7f4a4045
AD
1043 for ($i = 0; $i < count($e_item); $i++) {
1044 if (is_string($e_item[$i])) {
1045 $e_item[$i] = RSSUtils::strip_utf8mb4($e_item[$i]);
1046 }
1047 }
102a0135
AD
1048 }
1049
7f4a4045 1050 array_push($enclosures, $e_item);
2c08214a
AD
1051 }
1052 }
1053
388d4dfa 1054 if ($cache_images && is_writable(CACHE_DIR . '/images'))
e6c886bf 1055 RSSUtils::cache_enclosures($enclosures, $site_url, $debug_enabled);
388d4dfa 1056
2c08214a 1057 if ($debug_enabled) {
68cccafc 1058 _debug("article enclosures:", $debug_enabled);
2c08214a
AD
1059 print_r($enclosures);
1060 }
1061
0567016b 1062 $esth = $pdo->prepare("SELECT id FROM ttrss_enclosures
ac8a0e7d 1063 WHERE content_url = ? AND content_type = ? AND post_id = ?");
2c08214a 1064
0567016b
AD
1065 $usth = $pdo->prepare("INSERT INTO ttrss_enclosures
1066 (content_url, content_type, title, duration, post_id, width, height) VALUES
1067 (?, ?, ?, ?, ?, ?, ?)");
5c54e683 1068
2c08214a 1069 foreach ($enclosures as $enc) {
0567016b
AD
1070 $enc_url = $enc[0];
1071 $enc_type = $enc[1];
0500e14c 1072 $enc_dur = (int)$enc[2];
0567016b 1073 $enc_title = $enc[3];
523bd90b
FE
1074 $enc_width = intval($enc[4]);
1075 $enc_height = intval($enc[5]);
2c08214a 1076
ac8a0e7d 1077 $esth->execute([$enc_url, $enc_type, $entry_ref_id]);
2c08214a 1078
0567016b
AD
1079 if (!$esth->fetch()) {
1080 $usth->execute([$enc_url, $enc_type, (string)$enc_title, $enc_dur, $entry_ref_id, $enc_width, $enc_height]);
2c08214a
AD
1081 }
1082 }
1083
2c08214a
AD
1084 // check for manual tags (we have to do it here since they're loaded from filters)
1085
1086 foreach ($article_filters as $f) {
6aff7845 1087 if ($f["type"] == "tag") {
2c08214a 1088
6aff7845 1089 $manual_tags = trim_array(explode(",", $f["param"]));
2c08214a
AD
1090
1091 foreach ($manual_tags as $tag) {
1092 if (tag_is_valid($tag)) {
1093 array_push($entry_tags, $tag);
1094 }
1095 }
1096 }
1097 }
1098
1099 // Skip boring tags
1100
6322ac79 1101 $boring_tags = trim_array(explode(",", mb_strtolower(get_pref(
2c08214a
AD
1102 'BLACKLISTED_TAGS', $owner_uid, ''), 'utf-8')));
1103
1104 $filtered_tags = array();
1105 $tags_to_cache = array();
1106
1107 if ($entry_tags && is_array($entry_tags)) {
1108 foreach ($entry_tags as $tag) {
1109 if (array_search($tag, $boring_tags) === false) {
1110 array_push($filtered_tags, $tag);
1111 }
1112 }
1113 }
1114
1115 $filtered_tags = array_unique($filtered_tags);
1116
1117 if ($debug_enabled) {
68cccafc 1118 _debug("filtered article tags:", $debug_enabled);
2c08214a
AD
1119 print_r($filtered_tags);
1120 }
1121
1122 // Save article tags in the database
1123
1124 if (count($filtered_tags) > 0) {
1125
0567016b
AD
1126 $tsth = $pdo->prepare("SELECT id FROM ttrss_tags
1127 WHERE tag_name = ? AND post_int_id = ? AND
1128 owner_uid = ? LIMIT 1");
1129
1130 $usth = $pdo->prepare("INSERT INTO ttrss_tags
1131 (owner_uid,tag_name,post_int_id)
1132 VALUES (?, ?, ?)");
2c08214a
AD
1133
1134 foreach ($filtered_tags as $tag) {
1135
1136 $tag = sanitize_tag($tag);
2c08214a
AD
1137
1138 if (!tag_is_valid($tag)) continue;
1139
0567016b 1140 $tsth->execute([$tag, $entry_int_id, $owner_uid]);
2c08214a 1141
0567016b
AD
1142 if (!$tsth->fetch()) {
1143 $usth->execute([$owner_uid, $tag, $entry_int_id]);
e6c886bf 1144 }
2c08214a
AD
1145
1146 array_push($tags_to_cache, $tag);
1147 }
1148
1149 /* update the cache */
1150
1151 $tags_to_cache = array_unique($tags_to_cache);
1152
0567016b 1153 $tags_str = join(",", $tags_to_cache);
2c08214a 1154
0567016b
AD
1155 $tsth = $pdo->prepare("UPDATE ttrss_user_entries
1156 SET tag_cache = ? WHERE ref_id = ?
1157 AND owner_uid = ?");
1158 $tsth->execute([$tags_str, $entry_ref_id, $owner_uid]);
2c08214a
AD
1159 }
1160
68cccafc 1161 _debug("article processed", $debug_enabled);
0500e14c
AD
1162
1163 $pdo->commit();
2c08214a
AD
1164 }
1165
68cccafc 1166 _debug("purging feed...", $debug_enabled);
2c08214a 1167
a42c55f0 1168 purge_feed($feed, 0, $debug_enabled);
2c08214a 1169
0567016b
AD
1170 $sth = $pdo->prepare("UPDATE ttrss_feeds
1171 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
1172 $sth->execute([$feed]);
2c08214a
AD
1173
1174 } else {
1175
0567016b 1176 $error_msg = mb_substr($rss->error(), 0, 245);
2c08214a 1177
4ad04ee2
AD
1178 _debug("fetch error: $error_msg", $debug_enabled);
1179
1180 if (count($rss->errors()) > 1) {
1181 foreach ($rss->errors() as $error) {
1182 _debug("+ $error");
1183 }
1184 }
2c08214a 1185
0567016b
AD
1186 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
1187 last_updated = NOW(), last_unconditional = NOW() WHERE id = ?");
1188 $sth->execute([$error_msg, $feed]);
2c08214a 1189
88edaa93 1190 unset($rss);
0567016b 1191 return false;
88edaa93 1192 }
2c08214a 1193
68cccafc 1194 _debug("done", $debug_enabled);
88edaa93 1195
7b55001e 1196 return true;
2c08214a
AD
1197 }
1198
e6c886bf 1199 static function cache_enclosures($enclosures, $site_url, $debug) {
388d4dfa
AD
1200 foreach ($enclosures as $enc) {
1201
1202 if (preg_match("/(image|audio|video)/", $enc[1])) {
1203
1204 $src = rewrite_relative_url($site_url, $enc[0]);
1205
1206 $local_filename = CACHE_DIR . "/images/" . sha1($src);
1207
1208 if ($debug) _debug("cache_enclosures: downloading: $src to $local_filename");
1209
1210 if (!file_exists($local_filename)) {
1211 $file_content = fetch_file_contents($src);
1212
6fd03996 1213 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
388d4dfa
AD
1214 file_put_contents($local_filename, $file_content);
1215 }
1216 } else {
1217 touch($local_filename);
1218 }
1219 }
1220 }
1221 }
1222
e6c886bf 1223 static function cache_media($html, $site_url, $debug) {
3c696512
AD
1224 libxml_use_internal_errors(true);
1225
1226 $charset_hack = '<head>
1227 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
1228 </head>';
1229
1230 $doc = new DOMDocument();
1231 $doc->loadHTML($charset_hack . $html);
1232 $xpath = new DOMXPath($doc);
1233
388d4dfa 1234 $entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])');
3c696512
AD
1235
1236 foreach ($entries as $entry) {
5edd605a 1237 if ($entry->hasAttribute('src') && strpos($entry->getAttribute('src'), "data:") !== 0) {
3c696512
AD
1238 $src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
1239
41bead9b 1240 $local_filename = CACHE_DIR . "/images/" . sha1($src);
3c696512 1241
163b50b1 1242 if ($debug) _debug("cache_media: checking $src");
3c696512
AD
1243
1244 if (!file_exists($local_filename)) {
163b50b1
AD
1245 if ($debug) _debug("cache_media: downloading: $src to $local_filename");
1246
3c696512
AD
1247 $file_content = fetch_file_contents($src);
1248
6fd03996 1249 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
3c696512
AD
1250 file_put_contents($local_filename, $file_content);
1251 }
4a27966e
J
1252 } else {
1253 touch($local_filename);
3c696512 1254 }
3c696512
AD
1255 }
1256 }
3c696512
AD
1257 }
1258
e6c886bf 1259 static function expire_error_log($debug) {
e2261e17
AD
1260 if ($debug) _debug("Removing old error log entries...");
1261
0567016b
AD
1262 $pdo = Db::pdo();
1263
e2261e17 1264 if (DB_TYPE == "pgsql") {
0567016b 1265 $pdo->query("DELETE FROM ttrss_error_log
e2261e17
AD
1266 WHERE created_at < NOW() - INTERVAL '7 days'");
1267 } else {
0567016b 1268 $pdo->query("DELETE FROM ttrss_error_log
e2261e17
AD
1269 WHERE created_at < DATE_SUB(NOW(), INTERVAL 7 DAY)");
1270 }
e2261e17
AD
1271 }
1272
e6c886bf 1273 static function expire_lock_files($debug) {
65465085 1274 //if ($debug) _debug("Removing old lock files...");
2a91b6ff
AD
1275
1276 $num_deleted = 0;
1277
1278 if (is_writable(LOCK_DIRECTORY)) {
1279 $files = glob(LOCK_DIRECTORY . "/*.lock");
1280
1281 if ($files) {
1282 foreach ($files as $file) {
11344971 1283 if (!file_is_locked(basename($file)) && time() - filemtime($file) > 86400*2) {
2a91b6ff
AD
1284 unlink($file);
1285 ++$num_deleted;
1286 }
1287 }
1288 }
1289 }
1290
65465085 1291 if ($debug) _debug("Removed $num_deleted old lock files.");
2a91b6ff
AD
1292 }
1293
e6c886bf 1294 static function expire_cached_files($debug) {
342e8a9e 1295 foreach (array("simplepie", "feeds", "images", "export", "upload") as $dir) {
3c696512 1296 $cache_dir = CACHE_DIR . "/$dir";
2c08214a 1297
65465085 1298// if ($debug) _debug("Expiring $cache_dir");
2c08214a 1299
3c696512
AD
1300 $num_deleted = 0;
1301
1302 if (is_writable($cache_dir)) {
1303 $files = glob("$cache_dir/*");
1304
2a91b6ff 1305 if ($files) {
2ab20c31 1306 foreach ($files as $file) {
6fd03996 1307 if (time() - filemtime($file) > 86400*CACHE_MAX_DAYS) {
2ab20c31 1308 unlink($file);
3c696512 1309
2ab20c31
AD
1310 ++$num_deleted;
1311 }
3c696512
AD
1312 }
1313 }
2a91b6ff 1314 }
3c696512 1315
65465085 1316 if ($debug) _debug("$cache_dir: removed $num_deleted files.");
3c696512
AD
1317 }
1318 }
2c08214a 1319
a3e0bdcf 1320 /**
e6c886bf
AD
1321 * Source: http://www.php.net/manual/en/function.parse-url.php#104527
1322 * Returns the url query as associative array
1323 *
1324 * @param string query
1325 * @return array params
1326 */
1327 static function convertUrlQuery($query) {
a3e0bdcf
AD
1328 $queryParts = explode('&', $query);
1329
1330 $params = array();
1331
1332 foreach ($queryParts as $param) {
1333 $item = explode('=', $param);
1334 $params[$item[0]] = $item[1];
1335 }
1336
1337 return $params;
1338 }
92c14e9d 1339
e6c886bf 1340 static function get_article_filters($filters, $title, $content, $link, $author, $tags, &$matched_rules = false) {
92c14e9d
AD
1341 $matches = array();
1342
1343 foreach ($filters as $filter) {
1344 $match_any_rule = $filter["match_any_rule"];
a3a896a1 1345 $inverse = $filter["inverse"];
92c14e9d
AD
1346 $filter_match = false;
1347
1348 foreach ($filter["rules"] as $rule) {
1349 $match = false;
ffa1bd7b 1350 $reg_exp = str_replace('/', '\/', $rule["reg_exp"]);
a3a896a1 1351 $rule_inverse = $rule["inverse"];
92c14e9d
AD
1352
1353 if (!$reg_exp)
1354 continue;
1355
1356 switch ($rule["type"]) {
e6c886bf
AD
1357 case "title":
1358 $match = @preg_match("/$reg_exp/iu", $title);
1359 break;
1360 case "content":
1361 // we don't need to deal with multiline regexps
1362 $content = preg_replace("/[\r\n\t]/", "", $content);
d03ae73e 1363
e6c886bf
AD
1364 $match = @preg_match("/$reg_exp/iu", $content);
1365 break;
1366 case "both":
1367 // we don't need to deal with multiline regexps
1368 $content = preg_replace("/[\r\n\t]/", "", $content);
d03ae73e 1369
e6c886bf
AD
1370 $match = (@preg_match("/$reg_exp/iu", $title) || @preg_match("/$reg_exp/iu", $content));
1371 break;
1372 case "link":
1373 $match = @preg_match("/$reg_exp/iu", $link);
1374 break;
1375 case "author":
1376 $match = @preg_match("/$reg_exp/iu", $author);
1377 break;
1378 case "tag":
1379 foreach ($tags as $tag) {
1380 if (@preg_match("/$reg_exp/iu", $tag)) {
1381 $match = true;
1382 break;
1383 }
7b80b5e1 1384 }
e6c886bf 1385 break;
92c14e9d
AD
1386 }
1387
a3a896a1
AD
1388 if ($rule_inverse) $match = !$match;
1389
92c14e9d
AD
1390 if ($match_any_rule) {
1391 if ($match) {
1392 $filter_match = true;
1393 break;
1394 }
1395 } else {
1396 $filter_match = $match;
1397 if (!$match) {
1398 break;
1399 }
1400 }
1401 }
1402
a3a896a1
AD
1403 if ($inverse) $filter_match = !$filter_match;
1404
92c14e9d 1405 if ($filter_match) {
557d86fe
AD
1406 if (is_array($matched_rules)) array_push($matched_rules, $rule);
1407
92c14e9d
AD
1408 foreach ($filter["actions"] AS $action) {
1409 array_push($matches, $action);
5e736e45
AD
1410
1411 // if Stop action encountered, perform no further processing
fd3e5e8d 1412 if (isset($action["type"]) && $action["type"] == "stop") return $matches;
92c14e9d
AD
1413 }
1414 }
1415 }
1416
1417 return $matches;
1418 }
1419
e6c886bf 1420 static function find_article_filter($filters, $filter_name) {
92c14e9d
AD
1421 foreach ($filters as $f) {
1422 if ($f["type"] == $filter_name) {
1423 return $f;
1424 };
1425 }
1426 return false;
1427 }
1428
e6c886bf 1429 static function find_article_filters($filters, $filter_name) {
92c14e9d
AD
1430 $results = array();
1431
1432 foreach ($filters as $f) {
1433 if ($f["type"] == $filter_name) {
1434 array_push($results, $f);
1435 };
1436 }
1437 return $results;
1438 }
1439
e6c886bf 1440 static function calculate_article_score($filters) {
92c14e9d
AD
1441 $score = 0;
1442
1443 foreach ($filters as $f) {
1444 if ($f["type"] == "score") {
1445 $score += $f["param"];
1446 };
1447 }
1448 return $score;
1449 }
1450
e6c886bf 1451 static function labels_contains_caption($labels, $caption) {
b24504b1
AD
1452 foreach ($labels as $label) {
1453 if ($label[1] == $caption) {
1454 return true;
1455 }
1456 }
1457
1458 return false;
1459 }
1460
e6c886bf 1461 static function assign_article_to_label_filters($id, $filters, $owner_uid, $article_labels) {
92c14e9d
AD
1462 foreach ($filters as $f) {
1463 if ($f["type"] == "label") {
e6c886bf 1464 if (!RSSUtils::labels_contains_caption($article_labels, $f["param"])) {
7c9b5a3f 1465 Labels::add_article($id, $f["param"], $owner_uid);
b24504b1
AD
1466 }
1467 }
92c14e9d
AD
1468 }
1469 }
87764a50 1470
e6c886bf 1471 static function make_guid_from_title($title) {
87d7e850
AD
1472 return preg_replace("/[ \"\',.:;]/", "-",
1473 mb_strtolower(strip_tags($title), 'utf-8'));
1474 }
1475
e6c886bf 1476 static function cleanup_counters_cache($debug) {
0567016b
AD
1477 $pdo = Db::pdo();
1478
1479 $res = $pdo->query("DELETE FROM ttrss_counters_cache
168cf351
AD
1480 WHERE feed_id > 0 AND
1481 (SELECT COUNT(id) FROM ttrss_feeds WHERE
1482 id = feed_id AND
1483 ttrss_counters_cache.owner_uid = ttrss_feeds.owner_uid) = 0");
168cf351 1484
0567016b
AD
1485 $frows = $res->rowCount();
1486
1487 $res = $pdo->query("DELETE FROM ttrss_cat_counters_cache
168cf351
AD
1488 WHERE feed_id > 0 AND
1489 (SELECT COUNT(id) FROM ttrss_feed_categories WHERE
1490 id = feed_id AND
1491 ttrss_cat_counters_cache.owner_uid = ttrss_feed_categories.owner_uid) = 0");
0567016b
AD
1492
1493 $crows = $res->rowCount();
168cf351 1494
7b55001e 1495 if ($debug) _debug("Removed $frows (feeds) $crows (cats) orphaned counter cache entries.");
168cf351
AD
1496 }
1497
e6c886bf 1498 static function housekeeping_user($owner_uid) {
5cbd1fe8
AD
1499 $tmph = new PluginHost();
1500
1501 load_user_plugins($owner_uid, $tmph);
1502
1503 $tmph->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
1504 }
1505
e6c886bf
AD
1506 static function housekeeping_common($debug) {
1507 RSSUtils::expire_cached_files($debug);
1508 RSSUtils::expire_lock_files($debug);
1509 RSSUtils::expire_error_log($debug);
e2cf81e2 1510
e6c886bf 1511 $count = RSSUtils::update_feedbrowser_cache();
e2cf81e2
AD
1512 _debug("Feedbrowser updated, $count feeds processed.");
1513
a230bf88 1514 Article::purge_orphans( true);
e6c886bf 1515 RSSUtils::cleanup_counters_cache($debug);
e2cf81e2 1516
9b736a20
AD
1517 //$rc = cleanup_tags( 14, 50000);
1518 //_debug("Cleaned $rc cached tags.");
8e470220 1519
00f22824 1520 PluginHost::getInstance()->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
e2cf81e2 1521 }
ea79a0e0 1522
e6c886bf
AD
1523 static function check_feed_favicon($site_url, $feed) {
1524 # print "FAVICON [$site_url]: $favicon_url\n";
a230bf88
AD
1525
1526 $icon_file = ICONS_DIR . "/$feed.ico";
1527
1528 if (!file_exists($icon_file)) {
1529 $favicon_url = get_favicon_url($site_url);
1530
1531 if ($favicon_url) {
1532 // Limiting to "image" type misses those served with text/plain
1533 $contents = fetch_file_contents($favicon_url); // , "image");
1534
1535 if ($contents) {
1536 // Crude image type matching.
1537 // Patterns gleaned from the file(1) source code.
1538 if (preg_match('/^\x00\x00\x01\x00/', $contents)) {
1539 // 0 string \000\000\001\000 MS Windows icon resource
1540 //error_log("check_feed_favicon: favicon_url=$favicon_url isa MS Windows icon resource");
1541 }
1542 elseif (preg_match('/^GIF8/', $contents)) {
1543 // 0 string GIF8 GIF image data
1544 //error_log("check_feed_favicon: favicon_url=$favicon_url isa GIF image");
1545 }
1546 elseif (preg_match('/^\x89PNG\x0d\x0a\x1a\x0a/', $contents)) {
1547 // 0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data
1548 //error_log("check_feed_favicon: favicon_url=$favicon_url isa PNG image");
1549 }
1550 elseif (preg_match('/^\xff\xd8/', $contents)) {
1551 // 0 beshort 0xffd8 JPEG image data
1552 //error_log("check_feed_favicon: favicon_url=$favicon_url isa JPG image");
1553 }
f9ad33c2
GG
1554 elseif (preg_match('/^BM/', $contents)) {
1555 // 0 string BM PC bitmap (OS2, Windows BMP files)
1556 //error_log("check_feed_favicon, favicon_url=$favicon_url isa BMP image");
1557 }
a230bf88
AD
1558 else {
1559 //error_log("check_feed_favicon: favicon_url=$favicon_url isa UNKNOWN type");
1560 $contents = "";
1561 }
1562 }
1563
1564 if ($contents) {
1565 $fp = @fopen($icon_file, "w");
1566
1567 if ($fp) {
1568 fwrite($fp, $contents);
1569 fclose($fp);
1570 chmod($icon_file, 0644);
1571 }
1572 }
1573 }
1574 return $icon_file;
1575 }
1576 }
e6c886bf
AD
1577
1578
1579
bec5ba93 1580}