]> git.wh0rd.org - tt-rss.git/blame - classes/rssutils.php
add .editorconfig
[tt-rss.git] / classes / rssutils.php
CommitLineData
2c08214a 1<?php
e6c886bf
AD
2class RSSUtils {
3 static function calculate_article_hash($article, $pluginhost) {
af244f92
AD
4 $tmp = "";
5
6 foreach ($article as $k => $v) {
7 if ($k != "feed" && isset($v)) {
24e6ff5d
AD
8 $x = strip_tags(is_array($v) ? implode(",", $v) : $v);
9
10 //_debug("$k:" . sha1($x) . ":" . htmlspecialchars($x), true);
11
12 $tmp .= sha1("$k:" . sha1($x));
af244f92
AD
13 }
14 }
15
eb16bd9f 16 return sha1(implode(",", $pluginhost->get_plugin_names()) . $tmp);
b1840673
AD
17 }
18
102a0135
AD
19 // Strips utf8mb4 characters (i.e. emoji) for mysql
20 static function strip_utf8mb4($str) {
21 return preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $str);
22 }
23
e6c886bf 24 static function update_feedbrowser_cache() {
79178062 25
afcb105f
AD
26 $pdo = Db::pdo();
27
28 $sth = $pdo->query("SELECT feed_url, site_url, title, COUNT(id) AS subscribers
45378752
LD
29 FROM ttrss_feeds WHERE feed_url NOT IN (SELECT feed_url FROM ttrss_feeds
30 WHERE private IS true OR auth_login != '' OR auth_pass != '' OR feed_url LIKE '%:%@%/%')
79178062
AD
31 GROUP BY feed_url, site_url, title ORDER BY subscribers DESC LIMIT 1000");
32
afcb105f 33 $pdo->beginTransaction();
79178062 34
afcb105f 35 $pdo->query("DELETE FROM ttrss_feedbrowser_cache");
79178062
AD
36
37 $count = 0;
38
afcb105f
AD
39 while ($line = $sth->fetch()) {
40
0567016b
AD
41 $subscribers = $line["subscribers"];
42 $feed_url = $line["feed_url"];
43 $title = $line["title"];
44 $site_url = $line["site_url"];
79178062 45
afcb105f
AD
46 $tmph = $pdo->prepare("SELECT subscribers FROM
47 ttrss_feedbrowser_cache WHERE feed_url = ?");
48 $tmph->execute([$feed_url]);
49
50 if (!$tmph->fetch()) {
79178062 51
afcb105f
AD
52 $tmph = $pdo->prepare("INSERT INTO ttrss_feedbrowser_cache
53 (feed_url, site_url, title, subscribers)
54 VALUES
55 (?, ?, ?, ?)");
79178062 56
afcb105f 57 $tmph->execute([$feed_url, $site_url, $title, $subscribers]);
79178062
AD
58
59 ++$count;
60
61 }
62
63 }
64
afcb105f 65 $pdo->commit();
79178062
AD
66
67 return $count;
68
69 }
70
e6c886bf 71 static function update_daemon_common($limit = DAEMON_FEED_LIMIT, $debug = true) {
6322ac79 72 $schema_version = get_schema_version();
857efe49
AD
73
74 if ($schema_version != SCHEMA_VERSION) {
75 die("Schema version is wrong, please upgrade the database.\n");
76 }
77
afcb105f
AD
78 $pdo = Db::pdo();
79
09e8bdfd 80 if (!SINGLE_USER_MODE && DAEMON_UPDATE_LOGIN_LIMIT > 0) {
2c08214a
AD
81 if (DB_TYPE == "pgsql") {
82 $login_thresh_qpart = "AND ttrss_users.last_login >= NOW() - INTERVAL '".DAEMON_UPDATE_LOGIN_LIMIT." days'";
83 } else {
84 $login_thresh_qpart = "AND ttrss_users.last_login >= DATE_SUB(NOW(), INTERVAL ".DAEMON_UPDATE_LOGIN_LIMIT." DAY)";
85 }
86 } else {
87 $login_thresh_qpart = "";
88 }
89
2c08214a
AD
90 if (DB_TYPE == "pgsql") {
91 $update_limit_qpart = "AND ((
92 ttrss_feeds.update_interval = 0
ee0542ce 93 AND ttrss_user_prefs.value != '-1'
2c08214a
AD
94 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_user_prefs.value || ' minutes') AS INTERVAL)
95 ) OR (
96 ttrss_feeds.update_interval > 0
97 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_feeds.update_interval || ' minutes') AS INTERVAL)
f08426e3
AD
98 ) OR (ttrss_feeds.last_updated IS NULL
99 AND ttrss_user_prefs.value != '-1')
100 OR (last_updated = '1970-01-01 00:00:00'
101 AND ttrss_user_prefs.value != '-1'))";
2c08214a
AD
102 } else {
103 $update_limit_qpart = "AND ((
104 ttrss_feeds.update_interval = 0
ee0542ce 105 AND ttrss_user_prefs.value != '-1'
2c08214a
AD
106 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL CONVERT(ttrss_user_prefs.value, SIGNED INTEGER) MINUTE)
107 ) OR (
108 ttrss_feeds.update_interval > 0
109 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL ttrss_feeds.update_interval MINUTE)
f08426e3
AD
110 ) OR (ttrss_feeds.last_updated IS NULL
111 AND ttrss_user_prefs.value != '-1')
112 OR (last_updated = '1970-01-01 00:00:00'
113 AND ttrss_user_prefs.value != '-1'))";
2c08214a
AD
114 }
115
116 // Test if feed is currently being updated by another process.
117 if (DB_TYPE == "pgsql") {
566417c4 118 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '10 minutes')";
2c08214a 119 } else {
566417c4 120 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 10 MINUTE))";
2c08214a
AD
121 }
122
93af11cb 123 $query_limit = $limit ? sprintf("LIMIT %d", $limit) : "";
2c08214a 124
98070db0
TK
125 // Update the least recently updated feeds first
126 $query_order = "ORDER BY last_updated";
127 if (DB_TYPE == "pgsql") $query_order .= " NULLS FIRST";
128
fce451a4 129 $query = "SELECT DISTINCT ttrss_feeds.feed_url, ttrss_feeds.last_updated
2c08214a
AD
130 FROM
131 ttrss_feeds, ttrss_users, ttrss_user_prefs
f4ae0f05 132 WHERE
2c08214a 133 ttrss_feeds.owner_uid = ttrss_users.id
f08426e3 134 AND ttrss_user_prefs.profile IS NULL
2c08214a
AD
135 AND ttrss_users.id = ttrss_user_prefs.owner_uid
136 AND ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL'
137 $login_thresh_qpart $update_limit_qpart
1c4421fc 138 $updstart_thresh_qpart
98070db0 139 $query_order $query_limit";
fce451a4 140
afcb105f 141 $res = $pdo->query($query);
2c08214a 142
2c08214a 143 $feeds_to_update = array();
afcb105f 144 while ($line = $res->fetch()) {
93af11cb 145 array_push($feeds_to_update, $line['feed_url']);
2c08214a
AD
146 }
147
afcb105f
AD
148 if ($debug) _debug(sprintf("Scheduled %d feeds to update...", count($feeds_to_update)));
149
93af11cb
AD
150 // Update last_update_started before actually starting the batch
151 // in order to minimize collision risk for parallel daemon tasks
152 if (count($feeds_to_update) > 0) {
afcb105f 153 $feeds_qmarks = arr_qmarks($feeds_to_update);
1c4421fc 154
afcb105f
AD
155 $tmph = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
156 WHERE feed_url IN ($feeds_qmarks)");
157 $tmph->execute($feeds_to_update);
2c08214a
AD
158 }
159
8292d05b 160 $nf = 0;
2d9c5684 161 $bstarted = microtime(true);
8292d05b 162
5cbd1fe8
AD
163 $batch_owners = array();
164
afcb105f
AD
165 // since we have the data cached, we can deal with other feeds with the same url
166 $usth = $pdo->prepare("SELECT DISTINCT ttrss_feeds.id,last_updated,ttrss_feeds.owner_uid
ee0542ce
AD
167 FROM ttrss_feeds, ttrss_users, ttrss_user_prefs WHERE
168 ttrss_user_prefs.owner_uid = ttrss_feeds.owner_uid AND
169 ttrss_users.id = ttrss_user_prefs.owner_uid AND
170 ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL' AND
f08426e3 171 ttrss_user_prefs.profile IS NULL AND
afcb105f 172 feed_url = ?
9e84bab4 173 $update_limit_qpart
1c4421fc 174 $login_thresh_qpart
5929a0c1 175 ORDER BY ttrss_feeds.id $query_limit");
1c4421fc 176
afcb105f
AD
177 foreach ($feeds_to_update as $feed) {
178 if($debug) _debug("Base feed: $feed");
179
180 $usth->execute([$feed]);
181 //update_rss_feed($line["id"], true);
182
183 if ($tline = $usth->fetch()) {
184 if ($debug) _debug(" => " . $tline["last_updated"] . ", " . $tline["id"] . " " . $tline["owner_uid"]);
f08426e3 185
afcb105f
AD
186 if (array_search($tline["owner_uid"], $batch_owners) === FALSE)
187 array_push($batch_owners, $tline["owner_uid"]);
5cbd1fe8 188
afcb105f
AD
189 $fstarted = microtime(true);
190 RSSUtils::update_rss_feed($tline["id"], true, false);
191 _debug_suppress(false);
2d9c5684 192
afcb105f 193 _debug(sprintf(" %.4f (sec)", microtime(true) - $fstarted));
2d9c5684 194
afcb105f 195 ++$nf;
1c4421fc 196 }
2c08214a
AD
197 }
198
2d9c5684
AD
199 if ($nf > 0) {
200 _debug(sprintf("Processed %d feeds in %.4f (sec), %.4f (sec/feed avg)", $nf,
201 microtime(true) - $bstarted, (microtime(true) - $bstarted) / $nf));
202 }
203
5cbd1fe8
AD
204 foreach ($batch_owners as $owner_uid) {
205 _debug("Running housekeeping tasks for user $owner_uid...");
206
e6c886bf 207 RSSUtils::housekeeping_user($owner_uid);
5cbd1fe8
AD
208 }
209
2c08214a 210 // Send feed digests by email if needed.
c2f0f24e 211 Digest::send_headlines_digests($debug);
2c08214a 212
8292d05b 213 return $nf;
7b55001e 214 }
2c08214a 215
6022776d 216 // this is used when subscribing
e6c886bf 217 static function set_basic_feed_info($feed) {
6022776d 218
0567016b 219 $pdo = Db::pdo();
6022776d 220
0567016b
AD
221 $sth = $pdo->prepare("SELECT owner_uid,feed_url,auth_pass,auth_login,auth_pass_encrypted
222 FROM ttrss_feeds WHERE id = ?");
223 $sth->execute([$feed]);
6022776d 224
0567016b 225 if ($row = $sth->fetch()) {
bec5ba93 226
0567016b 227 $owner_uid = $row["owner_uid"];
6022776d 228
187abfe7 229 $auth_pass_encrypted = $row["auth_pass_encrypted"];
6022776d 230
0567016b
AD
231 $auth_login = $row["auth_login"];
232 $auth_pass = $row["auth_pass"];
6022776d 233
0567016b
AD
234 if ($auth_pass_encrypted && function_exists("mcrypt_decrypt")) {
235 require_once "crypt.php";
236 $auth_pass = decrypt_string($auth_pass);
237 }
6022776d 238
0567016b 239 $fetch_url = $row["feed_url"];
6022776d 240
0567016b
AD
241 $pluginhost = new PluginHost();
242 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
6022776d 243
0567016b
AD
244 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
245 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
246 $pluginhost->load_data();
247
248 $basic_info = array();
249 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_BASIC_INFO) as $plugin) {
250 $basic_info = $plugin->hook_feed_basic_info($basic_info, $fetch_url, $owner_uid, $feed, $auth_login, $auth_pass);
251 }
6022776d 252
0567016b
AD
253 if (!$basic_info) {
254 $feed_data = fetch_file_contents($fetch_url, false,
255 $auth_login, $auth_pass, false,
256 FEED_FETCH_TIMEOUT,
257 0);
bec5ba93 258
0567016b 259 global $fetch_curl_used;
bec5ba93 260
0567016b
AD
261 if (!$fetch_curl_used) {
262 $tmp = @gzdecode($feed_data);
bec5ba93 263
0567016b
AD
264 if ($tmp) $feed_data = $tmp;
265 }
6022776d 266
0567016b 267 $feed_data = trim($feed_data);
6022776d 268
0567016b
AD
269 $rss = new FeedParser($feed_data);
270 $rss->init();
6022776d 271
0567016b
AD
272 if (!$rss->error()) {
273 $basic_info = array(
274 'title' => mb_substr($rss->get_title(), 0, 199),
275 'site_url' => mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245)
276 );
277 }
3476690c 278 }
6022776d 279
0567016b
AD
280 if ($basic_info && is_array($basic_info)) {
281 $sth = $pdo->prepare("SELECT title, site_url FROM ttrss_feeds WHERE id = ?");
282 $sth->execute([$feed]);
6022776d 283
0567016b 284 if ($row = $sth->fetch()) {
6022776d 285
0567016b
AD
286 $registered_title = $row["title"];
287 $orig_site_url = $row["site_url"];
288
289 if ($basic_info['title'] && (!$registered_title || $registered_title == "[Unknown]")) {
290
291 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
292 title = ? WHERE id = ?");
293 $sth->execute([$basic_info['title'], $feed]);
294 }
6022776d 295
0567016b
AD
296 if ($basic_info['site_url'] && $orig_site_url != $basic_info['site_url']) {
297 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
298 site_url = ? WHERE id = ?");
299 $sth->execute([$basic_info['site_url'], $feed]);
300 }
301
302 }
6022776d
AD
303 }
304 }
305 }
306
7b55001e 307 /**
e6c886bf
AD
308 * @SuppressWarnings(PHPMD.UnusedFormalParameter)
309 */
310 static function update_rss_feed($feed, $no_cache = false) {
2c08214a 311
e6532439 312 $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || clean($_REQUEST['xdebug']);
2c08214a 313
4f71d743 314 _debug_suppress(!$debug_enabled);
68cccafc 315 _debug("start", $debug_enabled);
2c08214a 316
0567016b
AD
317 $pdo = Db::pdo();
318
319 $sth = $pdo->prepare("SELECT title FROM ttrss_feeds WHERE id = ?");
320 $sth->execute([$feed]);
bfe1eb4e 321
0567016b 322 if (!$row = $sth->fetch()) {
bfe1eb4e
AD
323 _debug("feed $feed NOT FOUND/SKIPPED", $debug_enabled);
324 user_error("Attempt to update unknown/invalid feed $feed", E_USER_WARNING);
325 return false;
326 }
327
0567016b 328 $title = $row["title"];
6bb96beb
AD
329
330 // feed was batch-subscribed or something, we need to get basic info
331 // this is not optimal currently as it fetches stuff separately TODO: optimize
332 if ($title == "[Unknown]") {
333 _debug("setting basic feed info for $feed...");
e6c886bf 334 RSSUtils::set_basic_feed_info($feed);
6bb96beb
AD
335 }
336
0567016b 337 $sth = $pdo->prepare("SELECT id,update_interval,auth_login,
5ba1ddd4 338 feed_url,auth_pass,cache_images,
5321e775 339 mark_unread_on_update, owner_uid,
153cb6d3 340 auth_pass_encrypted, feed_language,
e50c8eaa
AD
341 last_modified,
342 ".SUBSTRING_FOR_DATE."(last_unconditional, 1, 19) AS last_unconditional
0567016b
AD
343 FROM ttrss_feeds WHERE id = ?");
344 $sth->execute([$feed]);
345
346 if ($row = $sth->fetch()) {
2c08214a 347
0567016b 348 $owner_uid = $row["owner_uid"];
187abfe7
AD
349 $mark_unread_on_update = $row["mark_unread_on_update"];
350 $auth_pass_encrypted = $row["auth_pass_encrypted"];
2c08214a 351
0567016b
AD
352 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
353 WHERE id = ?");
354 $sth->execute([$feed]);
2c08214a 355
0567016b
AD
356 $auth_login = $row["auth_login"];
357 $auth_pass = $row["auth_pass"];
2c08214a 358
0567016b
AD
359 if ($auth_pass_encrypted && function_exists("mcrypt_decrypt")) {
360 require_once "crypt.php";
361 $auth_pass = decrypt_string($auth_pass);
362 }
044cff2d 363
0567016b
AD
364 $stored_last_modified = $row["last_modified"];
365 $last_unconditional = $row["last_unconditional"];
187abfe7 366 $cache_images = $row["cache_images"];
0567016b
AD
367 $fetch_url = $row["feed_url"];
368 $feed_language = mb_strtolower($row["feed_language"]);
369 if (!$feed_language) $feed_language = 'english';
2c08214a 370
0567016b
AD
371 } else {
372 return false;
373 }
2c08214a 374
f074ffe9 375 $date_feed_processed = date('Y-m-d H:i');
2c08214a 376
342e8a9e 377 $cache_filename = CACHE_DIR . "/feeds/" . sha1($fetch_url) . ".xml";
f074ffe9 378
ee65bef4
AD
379 $pluginhost = new PluginHost();
380 $pluginhost->set_debug($debug_enabled);
381 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
382
383 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
384 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
385 $pluginhost->load_data();
386
7b55001e 387 $rss_hash = false;
4f9cbdff 388
7b55001e
AD
389 $force_refetch = isset($_REQUEST["force_refetch"]);
390 $feed_data = "";
687a4f59 391
7b55001e
AD
392 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FETCH_FEED) as $plugin) {
393 $feed_data = $plugin->hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed, 0, $auth_login, $auth_pass);
394 }
2c08214a 395
7b55001e
AD
396 // try cache
397 if (!$feed_data &&
398 file_exists($cache_filename) &&
399 is_readable($cache_filename) &&
400 !$auth_login && !$auth_pass &&
401 filemtime($cache_filename) > time() - 30) {
be574731 402
7b55001e 403 _debug("using local cache [$cache_filename].", $debug_enabled);
52637d3b 404
7b55001e 405 @$feed_data = file_get_contents($cache_filename);
f074ffe9 406
7b55001e
AD
407 if ($feed_data) {
408 $rss_hash = sha1($feed_data);
88edaa93 409 }
ee65bef4 410
7b55001e
AD
411 } else {
412 _debug("local cache will not be used for this feed", $debug_enabled);
413 }
312742db 414
153cb6d3
AD
415 global $fetch_last_modified;
416
7b55001e
AD
417 // fetch feed from source
418 if (!$feed_data) {
e50c8eaa 419 _debug("last unconditional update request: $last_unconditional");
312742db 420
7b55001e
AD
421 if (ini_get("open_basedir") && function_exists("curl_init")) {
422 _debug("not using CURL due to open_basedir restrictions");
423 }
3f6f0857 424
e50c8eaa
AD
425 if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) {
426 _debug("maximum allowed interval for conditional requests exceeded, forcing refetch");
427
428 $force_refetch = true;
429 } else {
430 _debug("stored last modified for conditional request: $stored_last_modified", $debug_enabled);
431 }
153cb6d3 432
e50c8eaa 433 _debug("fetching [$fetch_url] (force_refetch: $force_refetch)...", $debug_enabled);
153cb6d3
AD
434
435 $feed_data = fetch_file_contents([
436 "url" => $fetch_url,
437 "login" => $auth_login,
438 "pass" => $auth_pass,
439 "timeout" => $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT,
440 "last_modified" => $force_refetch ? "" : $stored_last_modified
441 ]);
3f6f0857 442
7b55001e 443 global $fetch_curl_used;
3f6f0857 444
7b55001e
AD
445 if (!$fetch_curl_used) {
446 $tmp = @gzdecode($feed_data);
1367bc3f 447
7b55001e
AD
448 if ($tmp) $feed_data = $tmp;
449 }
017401dd 450
7b55001e 451 $feed_data = trim($feed_data);
fd687300 452
7b55001e 453 _debug("fetch done.", $debug_enabled);
9d930af9 454 _debug("source last modified: " . $fetch_last_modified, $debug_enabled);
153cb6d3
AD
455
456 if ($feed_data && $fetch_last_modified != $stored_last_modified) {
0567016b
AD
457 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_modified = ? WHERE id = ?");
458 $sth->execute([substr($fetch_last_modified, 0, 245), $feed]);
153cb6d3 459 }
95beaa14 460
7b55001e 461 // cache vanilla feed data for re-use
342e8a9e 462 if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/feeds")) {
7b55001e
AD
463 $new_rss_hash = sha1($feed_data);
464
465 if ($new_rss_hash != $rss_hash) {
466 _debug("saving $cache_filename", $debug_enabled);
467 @file_put_contents($cache_filename, $feed_data);
95beaa14 468 }
4f9cbdff 469 }
7b55001e 470 }
017401dd 471
7b55001e
AD
472 if (!$feed_data) {
473 global $fetch_last_error;
474 global $fetch_last_error_code;
f074ffe9 475
7b55001e 476 _debug("unable to fetch: $fetch_last_error [$fetch_last_error_code]", $debug_enabled);
f074ffe9 477
7b55001e
AD
478 // If-Modified-Since
479 if ($fetch_last_error_code != 304) {
0567016b 480 $error_message = $fetch_last_error;
7b55001e
AD
481 } else {
482 _debug("source claims data not modified, nothing to do.", $debug_enabled);
0567016b 483 $error_message = "";
7b55001e 484 }
4f9cbdff 485
0567016b
AD
486 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
487 last_updated = NOW() WHERE id = ?");
488 $sth->execute([$error_message, $feed]);
4f9cbdff 489
7b55001e 490 return;
f074ffe9
AD
491 }
492
1ffe3391 493 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_FETCHED) as $plugin) {
6791af0c 494 $feed_data = $plugin->hook_feed_fetched($feed_data, $fetch_url, $owner_uid, $feed);
017401dd
AD
495 }
496
07d3431e
AD
497 $rss = new FeedParser($feed_data);
498 $rss->init();
2c08214a 499
19b3992b 500 if (!$rss->error()) {
2c08214a 501
d2a421e3 502 // We use local pluginhost here because we need to load different per-user feed plugins
1ffe3391 503 $pluginhost->run_hooks(PluginHost::HOOK_FEED_PARSED, "hook_feed_parsed", $rss);
4412b877 504
df659891 505 _debug("language: $feed_language", $debug_enabled);
68cccafc 506 _debug("processing feed data...", $debug_enabled);
2c08214a 507
382268c6
AD
508 if (DB_TYPE == "pgsql") {
509 $favicon_interval_qpart = "favicon_last_checked < NOW() - INTERVAL '12 hour'";
510 } else {
511 $favicon_interval_qpart = "favicon_last_checked < DATE_SUB(NOW(), INTERVAL 12 HOUR)";
512 }
513
0567016b 514 $sth = $pdo->prepare("SELECT owner_uid,favicon_avg_color,
382268c6
AD
515 (favicon_last_checked IS NULL OR $favicon_interval_qpart) AS
516 favicon_needs_check
0567016b
AD
517 FROM ttrss_feeds WHERE id = ?");
518 $sth->execute([$feed]);
2c08214a 519
0567016b 520 if ($row = $sth->fetch()) {
187abfe7 521 $favicon_needs_check = $row["favicon_needs_check"];
0567016b
AD
522 $favicon_avg_color = $row["favicon_avg_color"];
523 $owner_uid = $row["owner_uid"];
524 } else {
525 return false;
526 }
2c08214a 527
0567016b 528 $site_url = mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245);
2c08214a 529
cd07592c
AD
530 _debug("site_url: $site_url", $debug_enabled);
531 _debug("feed_title: " . $rss->get_title(), $debug_enabled);
532
687a4f59 533 if ($favicon_needs_check || $force_refetch) {
36490f11
AD
534
535 /* terrible hack: if we crash on floicon shit here, we won't check
560cbd8c 536 * the icon avgcolor again (unless the icon got updated) */
36490f11 537
560cbd8c
AD
538 $favicon_file = ICONS_DIR . "/$feed.ico";
539 $favicon_modified = @filemtime($favicon_file);
540
68cccafc 541 _debug("checking favicon...", $debug_enabled);
687a4f59 542
e6c886bf 543 RSSUtils::check_feed_favicon($site_url, $feed);
560cbd8c
AD
544 $favicon_modified_new = @filemtime($favicon_file);
545
546 if ($favicon_modified_new > $favicon_modified)
547 $favicon_avg_color = '';
687a4f59 548
0567016b 549 $favicon_colorstring = "";
6ee0d4b0 550 if (file_exists($favicon_file) && function_exists("imagecreatefromstring") && $favicon_avg_color == '') {
e6c886bf 551 require_once "colors.php";
687a4f59 552
0567016b
AD
553 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_avg_color = 'fail' WHERE
554 id = ?");
555 $sth->execute([$feed]);
aafd55ba 556
0567016b
AD
557 $favicon_color = calculate_avg_color($favicon_file);
558
559 $favicon_colorstring = ",favicon_avg_color = " . $pdo->quote($favicon_color);
63c323f7 560
36490f11 561 } else if ($favicon_avg_color == 'fail') {
84ceb6bd 562 _debug("floicon failed on this file, not trying to recalculate avg color", $debug_enabled);
6ac722d5 563 }
687a4f59 564
0567016b
AD
565 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_last_checked = NOW()
566 $favicon_colorstring WHERE id = ?");
567 $sth->execute([$feed]);
f2798eb6 568 }
2c08214a 569
68cccafc 570 _debug("loading filters & labels...", $debug_enabled);
2c08214a 571
a42c55f0 572 $filters = load_filters($feed, $owner_uid);
2c08214a 573
02f3992a
AD
574 if ($debug_enabled) {
575 print_r($filters);
576 }
577
68cccafc 578 _debug("" . count($filters) . " filters loaded.", $debug_enabled);
2c08214a 579
19b3992b 580 $items = $rss->get_items();
2c08214a 581
19b3992b 582 if (!is_array($items)) {
68cccafc 583 _debug("no articles found.", $debug_enabled);
2c08214a 584
0567016b
AD
585 $sth = $pdo->prepare("UPDATE ttrss_feeds
586 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
587 $sth->execute([$feed]);
2c08214a 588
0567016b 589 return true; // no articles
2c08214a
AD
590 }
591
68cccafc 592 _debug("processing articles...", $debug_enabled);
2c08214a 593
6c9f3d4a
AD
594 $tstart = time();
595
19b3992b 596 foreach ($items as $item) {
0500e14c
AD
597 $pdo->beginTransaction();
598
e6532439 599 if (clean($_REQUEST['xdebug']) == 3) {
2c08214a
AD
600 print_r($item);
601 }
602
6c9f3d4a
AD
603 if (ini_get("max_execution_time") > 0 && time() - $tstart >= ini_get("max_execution_time") * 0.7) {
604 _debug("looks like there's too many articles to process at once, breaking out", $debug_enabled);
0500e14c 605 $pdo->commit();
6c9f3d4a
AD
606 break;
607 }
608
0567016b
AD
609 $entry_guid = strip_tags($item->get_id());
610 if (!$entry_guid) $entry_guid = strip_tags($item->get_link());
e6c886bf 611 if (!$entry_guid) $entry_guid = RSSUtils::make_guid_from_title($item->get_title());
0500e14c
AD
612
613 if (!$entry_guid) {
614 $pdo->commit();
615 continue;
616 }
2c08214a 617
3a4c8973
AD
618 $entry_guid = "$owner_uid,$entry_guid";
619
0567016b 620 $entry_guid_hashed = 'SHA1:' . sha1($entry_guid);
5e3d5480 621
68cccafc 622 _debug("guid $entry_guid / $entry_guid_hashed", $debug_enabled);
5e3d5480 623
0567016b 624 $entry_timestamp = strip_tags($item->get_date());
04d2f9c8
AD
625
626 _debug("orig date: " . $item->get_date(), $debug_enabled);
2c08214a 627
30123fe6 628 if ($entry_timestamp == -1 || !$entry_timestamp || $entry_timestamp > time()) {
2c08214a 629 $entry_timestamp = time();
2c08214a
AD
630 }
631
632 $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
633
68cccafc 634 _debug("date $entry_timestamp [$entry_timestamp_fmt]", $debug_enabled);
2c08214a 635
0567016b 636 $entry_title = strip_tags($item->get_title());
1b35d30c 637
5d56d100 638 $entry_link = rewrite_relative_url($site_url, $item->get_link());
2c08214a 639
68cccafc
AD
640 _debug("title $entry_title", $debug_enabled);
641 _debug("link $entry_link", $debug_enabled);
2c08214a
AD
642
643 if (!$entry_title) $entry_title = date("Y-m-d H:i:s", $entry_timestamp);;
644
19b3992b
AD
645 $entry_content = $item->get_content();
646 if (!$entry_content) $entry_content = $item->get_description();
2c08214a 647
e6532439 648 if (clean($_REQUEST["xdebug"]) == 2) {
9ec10352 649 print "content: ";
0bc503ff 650 print htmlspecialchars($entry_content);
3c696512 651 print "\n";
2c08214a
AD
652 }
653
0567016b 654 $entry_comments = mb_substr(strip_tags($item->get_comments_url()), 0, 245);
12ff230b 655 $num_comments = (int) $item->get_comments_count();
2c08214a 656
0567016b
AD
657 $entry_author = strip_tags($item->get_author());
658 $entry_guid = mb_substr($entry_guid, 0, 245);
2c08214a 659
68cccafc
AD
660 _debug("author $entry_author", $debug_enabled);
661 _debug("num_comments: $num_comments", $debug_enabled);
ee78f81c 662 _debug("looking for tags...", $debug_enabled);
2c08214a
AD
663
664 // parse <category> entries into tags
665
666 $additional_tags = array();
667
19b3992b 668 $additional_tags_src = $item->get_categories();
2c08214a 669
19b3992b
AD
670 if (is_array($additional_tags_src)) {
671 foreach ($additional_tags_src as $tobj) {
cd07592c 672 array_push($additional_tags, $tobj);
2c08214a 673 }
19b3992b 674 }
2c08214a 675
fa6fbd36 676 $entry_tags = array_unique($additional_tags);
2c08214a
AD
677
678 for ($i = 0; $i < count($entry_tags); $i++)
679 $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8');
680
ee78f81c
AD
681 _debug("tags found: " . join(",", $entry_tags), $debug_enabled);
682
68cccafc 683 _debug("done collecting data.", $debug_enabled);
2c08214a 684
0567016b
AD
685 $sth = $pdo->prepare("SELECT id, content_hash, lang FROM ttrss_entries
686 WHERE guid = ? OR guid = ?");
687 $sth->execute([$entry_guid, $entry_guid_hashed]);
b30abdad 688
0567016b
AD
689 if ($row = $sth->fetch()) {
690 $base_entry_id = $row["id"];
691 $entry_stored_hash = $row["content_hash"];
4a0da0e5 692 $article_labels = Article::get_article_labels($base_entry_id, $owner_uid);
0567016b 693 $entry_language = $row["lang"];
a8ac7661 694
2ed0d6c4 695 $existing_tags = Article::get_article_tags($base_entry_id, $owner_uid);
a8ac7661 696 $entry_tags = array_unique(array_merge($entry_tags, $existing_tags));
b30abdad 697 } else {
b1840673
AD
698 $base_entry_id = false;
699 $entry_stored_hash = "";
a29fe121 700 $article_labels = array();
3318d324 701 $entry_language = "";
b30abdad
AD
702 }
703
455b1401 704 $article = array("owner_uid" => $owner_uid, // read only
b30abdad 705 "guid" => $entry_guid, // read only
59e83455 706 "guid_hashed" => $entry_guid_hashed, // read only
19b3992b
AD
707 "title" => $entry_title,
708 "content" => $entry_content,
709 "link" => $entry_link,
a29fe121 710 "labels" => $article_labels, // current limitation: can add labels to article, can't remove them
19b3992b 711 "tags" => $entry_tags,
e02555c1 712 "author" => $entry_author,
c9299c28 713 "force_catchup" => false, // ugly hack for the time being
6de3a1be 714 "score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed
3318d324 715 "language" => $entry_language,
20d2195f 716 "num_comments" => $num_comments, // read only
f73e03e0
AD
717 "feed" => array("id" => $feed,
718 "fetch_url" => $fetch_url,
babfadbf
J
719 "site_url" => $site_url,
720 "cache_images" => $cache_images)
e6c886bf 721 );
cc85704f 722
b1840673 723 $entry_plugin_data = "";
e6c886bf 724 $entry_current_hash = RSSUtils::calculate_article_hash($article, $pluginhost);
b1840673
AD
725
726 _debug("article hash: $entry_current_hash [stored=$entry_stored_hash]", $debug_enabled);
727
522e8b35 728 if ($entry_current_hash == $entry_stored_hash && !isset($_REQUEST["force_rehash"])) {
b1840673
AD
729 _debug("stored article seems up to date [IID: $base_entry_id], updating timestamp only", $debug_enabled);
730
731 // we keep encountering the entry in feeds, so we need to
732 // update date_updated column so that we don't get horrible
733 // dupes when the entry gets purged and reinserted again e.g.
734 // in the case of SLOW SLOW OMG SLOW updating feeds
735
0567016b
AD
736 $sth = $pdo->prepare("UPDATE ttrss_entries SET date_updated = NOW()
737 WHERE id = ?");
738 $sth->execute([$base_entry_id]);
b1840673 739
0500e14c 740 $pdo->commit();
5bdcb8fd 741 continue;
b1840673
AD
742 }
743
744 _debug("hash differs, applying plugin filters:", $debug_enabled);
745
1ffe3391 746 foreach ($pluginhost->get_hooks(PluginHost::HOOK_ARTICLE_FILTER) as $plugin) {
b1840673
AD
747 _debug("... " . get_class($plugin), $debug_enabled);
748
749 $start = microtime(true);
19b3992b 750 $article = $plugin->hook_article_filter($article);
0084f0d1 751
b1840673
AD
752 _debug("=== " . sprintf("%.4f (sec)", microtime(true) - $start), $debug_enabled);
753
754 $entry_plugin_data .= mb_strtolower(get_class($plugin)) . ",";
cc85704f
AD
755 }
756
e6532439 757 if (clean($_REQUEST["xdebug"]) == 2) {
0bc503ff
AD
758 print "processed content: ";
759 print htmlspecialchars($article["content"]);
760 print "\n";
761 }
762
b1840673
AD
763 _debug("plugin data: $entry_plugin_data", $debug_enabled);
764
35c12dc4
AD
765 // Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077
766 if (DB_TYPE == "mysql") {
767 foreach ($article as $k => $v) {
35c37354 768 // i guess we'll have to take the risk of 4byte unicode labels & tags here
dae16f72 769 if (is_string($article[$k])) {
102a0135 770 $article[$k] = RSSUtils::strip_utf8mb4($v);
35c37354 771 }
35c12dc4
AD
772 }
773 }
774
b8774453
AD
775 /* Collect article tags here so we could filter by them: */
776
557d86fe
AD
777 $matched_rules = array();
778
e6c886bf 779 $article_filters = RSSUtils::get_article_filters($filters, $article["title"],
7b55001e 780 $article["content"], $article["link"], $article["author"],
557d86fe 781 $article["tags"], $matched_rules);
b8774453
AD
782
783 if ($debug_enabled) {
557d86fe
AD
784 _debug("matched filter rules: ", $debug_enabled);
785
786 if (count($matched_rules) != 0) {
787 print_r($matched_rules);
788 }
789
790 _debug("filter actions: ", $debug_enabled);
791
b8774453
AD
792 if (count($article_filters) != 0) {
793 print_r($article_filters);
794 }
795 }
796
e6c886bf 797 $plugin_filter_names = RSSUtils::find_article_filters($article_filters, "plugin");
b8774453
AD
798 $plugin_filter_actions = $pluginhost->get_filter_actions();
799
800 if (count($plugin_filter_names) > 0) {
801 _debug("applying plugin filter actions...", $debug_enabled);
802
803 foreach ($plugin_filter_names as $pfn) {
804 list($pfclass,$pfaction) = explode(":", $pfn["param"]);
805
806 if (isset($plugin_filter_actions[$pfclass])) {
807 $plugin = $pluginhost->get_plugin($pfclass);
808
809 _debug("... $pfclass: $pfaction", $debug_enabled);
810
811 if ($plugin) {
812 $start = microtime(true);
813 $article = $plugin->hook_article_filter_action($article, $pfaction);
814
815 _debug("=== " . sprintf("%.4f (sec)", microtime(true) - $start), $debug_enabled);
816 } else {
817 _debug("??? $pfclass: plugin object not found.");
818 }
819 } else {
820 _debug("??? $pfclass: filter plugin not registered.");
821 }
822 }
823 }
824
19b3992b 825 $entry_tags = $article["tags"];
0567016b
AD
826 $entry_title = strip_tags($article["title"]);
827 $entry_author = mb_substr(strip_tags($article["author"]), 0, 245);
828 $entry_link = strip_tags($article["link"]);
f935d98e 829 $entry_content = $article["content"]; // escaped below
c9299c28 830 $entry_force_catchup = $article["force_catchup"];
a29fe121 831 $article_labels = $article["labels"];
6de3a1be 832 $entry_score_modifier = (int) $article["score_modifier"];
0567016b 833 $entry_language = $article["language"];
a29fe121
AD
834
835 if ($debug_enabled) {
836 _debug("article labels:", $debug_enabled);
557d86fe
AD
837
838 if (count($article_labels) != 0) {
839 print_r($article_labels);
840 }
a29fe121 841 }
c9299c28
AD
842
843 _debug("force catchup: $entry_force_catchup");
f935d98e 844
0a3fd79b 845 if ($cache_images && is_writable(CACHE_DIR . '/images'))
e6c886bf 846 RSSUtils::cache_media($entry_content, $site_url, $debug_enabled);
0a3fd79b 847
0567016b
AD
848 $csth = $pdo->prepare("SELECT id FROM ttrss_entries
849 WHERE guid = ? OR guid = ?");
850 $csth->execute([$entry_guid, $entry_guid_hashed]);
9e222305 851
0567016b 852 if (!$row = $csth->fetch()) {
2c08214a 853
07d3431e 854 _debug("base guid [$entry_guid or $entry_guid_hashed] not found, creating...", $debug_enabled);
2c08214a
AD
855
856 // base post entry does not exist, create it
857
0567016b 858 $usth = $pdo->prepare(
2c08214a 859 "INSERT INTO ttrss_entries
0567016b 860 (title,
2c08214a
AD
861 guid,
862 link,
863 updated,
864 content,
865 content_hash,
866 no_orig_date,
867 date_updated,
868 date_entered,
869 comments,
870 num_comments,
b30abdad 871 plugin_data,
6b461797 872 lang,
2c08214a
AD
873 author)
874 VALUES
0567016b 875 (?, ?, ?, ?, ?, ?,
5ba1ddd4 876 false,
2c08214a 877 NOW(),
0567016b
AD
878 ?, ?, ?, ?, ?, ?)");
879
880 $usth->execute([$entry_title,
881 $entry_guid_hashed,
882 $entry_link,
883 $entry_timestamp_fmt,
93e70e36 884 "$entry_content",
0567016b
AD
885 $entry_current_hash,
886 $date_feed_processed,
887 $entry_comments,
187abfe7 888 (int)$num_comments,
0567016b 889 $entry_plugin_data,
93e70e36
AD
890 "$entry_language",
891 "$entry_author"]);
e8291805 892
2c08214a
AD
893 }
894
0567016b 895 $csth->execute([$entry_guid, $entry_guid_hashed]);
2c08214a
AD
896
897 $entry_ref_id = 0;
898 $entry_int_id = 0;
899
0567016b 900 if ($row = $csth->fetch()) {
2c08214a 901
68cccafc 902 _debug("base guid found, checking for user record", $debug_enabled);
2c08214a 903
0567016b 904 $ref_id = $row['id'];
2c08214a
AD
905 $entry_ref_id = $ref_id;
906
e6c886bf 907 if (RSSUtils::find_article_filter($article_filters, "filter")) {
0500e14c 908 $pdo->commit();
2c08214a
AD
909 continue;
910 }
911
e6c886bf 912 $score = RSSUtils::calculate_article_score($article_filters) + $entry_score_modifier;
2c08214a 913
6de3a1be 914 _debug("initial score: $score [including plugin modifier: $entry_score_modifier]", $debug_enabled);
2c08214a 915
4f186b1f
AD
916 // check for user post link to main table
917
0567016b
AD
918 $sth = $pdo->prepare("SELECT ref_id, int_id FROM ttrss_user_entries WHERE
919 ref_id = ? AND owner_uid = ?");
920 $sth->execute([$ref_id, $owner_uid]);
2c08214a
AD
921
922 // okay it doesn't exist - create user entry
0567016b
AD
923 if ($row = $sth->fetch()) {
924 $entry_ref_id = $row["ref_id"];
925 $entry_int_id = $row["int_id"];
2c08214a 926
0567016b
AD
927 _debug("user record FOUND: RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
928 } else {
929
68cccafc 930 _debug("user record not found, creating...", $debug_enabled);
2c08214a 931
e6c886bf 932 if ($score >= -500 && !RSSUtils::find_article_filter($article_filters, 'catchup') && !$entry_force_catchup) {
0567016b
AD
933 $unread = 1;
934 $last_read_qpart = null;
2c08214a 935 } else {
0567016b 936 $unread = 0;
d4c05d0b 937 $last_read_qpart = date("Y-m-d H:i"); // we can't use NOW() here because it gets quoted
2c08214a
AD
938 }
939
e6c886bf 940 if (RSSUtils::find_article_filter($article_filters, 'mark') || $score > 1000) {
0567016b 941 $marked = 1;
2c08214a 942 } else {
0567016b 943 $marked = 0;
2c08214a
AD
944 }
945
e6c886bf 946 if (RSSUtils::find_article_filter($article_filters, 'publish')) {
0567016b 947 $published = 1;
2c08214a 948 } else {
0567016b 949 $published = 0;
2c08214a
AD
950 }
951
26ad257d 952 $last_marked = ($marked == 1) ? 'NOW()' : 'NULL';
953 $last_published = ($published == 1) ? 'NOW()' : 'NULL';
7873d588 954
0567016b 955 $sth = $pdo->prepare(
2c08214a
AD
956 "INSERT INTO ttrss_user_entries
957 (ref_id, owner_uid, feed_id, unread, last_read, marked,
7873d588
AD
958 published, score, tag_cache, label_cache, uuid,
959 last_marked, last_published)
aa16334f 960 VALUES (?, ?, ?, ?, ?, ?, ?, ?, '', '', '', ".$last_marked.", ".$last_published.")");
2c08214a 961
0567016b 962 $sth->execute([$ref_id, $owner_uid, $feed, $unread, $last_read_qpart, $marked,
aa16334f 963 $published, $score]);
2c08214a 964
0567016b
AD
965 $sth = $pdo->prepare("SELECT int_id FROM ttrss_user_entries WHERE
966 ref_id = ? AND owner_uid = ? AND
967 feed_id = ? LIMIT 1");
2c08214a 968
0567016b
AD
969 $sth->execute([$ref_id, $owner_uid, $feed]);
970
971 if ($row = $sth->fetch())
972 $entry_int_id = $row['int_id'];
2c08214a
AD
973 }
974
0567016b 975 _debug("resulting RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
2c08214a 976
e854442e 977 if (DB_TYPE == "pgsql") {
49a888ec
AD
978 $tsvector_combined = mb_substr($entry_title . ' ' .
979 preg_replace('/[<\?\:]/', ' ', strip_tags($entry_content)),
0567016b 980 0, 1000000);
e854442e 981
49a888ec 982 $tsvector_qpart = "tsvector_combined = to_tsvector(".$pdo->quote($feed_language).", ".$pdo->quote($tsvector_combined)."),";
e854442e
AD
983
984 } else {
985 $tsvector_qpart = "";
986 }
987
49a888ec
AD
988 //_debug($tsvector_qpart);
989
0567016b 990 $sth = $pdo->prepare("UPDATE ttrss_entries
49a888ec 991 SET title = :title,
e854442e 992 $tsvector_qpart
49a888ec
AD
993 content = :content,
994 content_hash = :content_hash,
995 updated = :updated,
996 num_comments = :num_comments,
997 plugin_data = :plugin_data,
998 author = :author,
999 lang = :lang
1000 WHERE id = :id");
1001
1002 $sth->execute([":title" => $entry_title,
93e70e36 1003 ":content" => "$entry_content",
49a888ec
AD
1004 ":content_hash" => $entry_current_hash,
1005 ":updated" => $entry_timestamp_fmt,
1006 ":num_comments" => (int)$num_comments,
1007 ":plugin_data" => $entry_plugin_data,
93e70e36 1008 ":author" => "$entry_author",
49a888ec
AD
1009 ":lang" => $entry_language,
1010 ":id" => $ref_id]);
b1840673 1011
59e83455 1012 // update aux data
0567016b
AD
1013 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1014 SET score = ? WHERE ref_id = ?");
1015 $sth->execute([$score, $ref_id]);
59e83455 1016
b1840673 1017 if ($mark_unread_on_update) {
24e6ff5d
AD
1018 _debug("article updated, marking unread as requested.", $debug_enabled);
1019
0567016b
AD
1020 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1021 SET last_read = null, unread = true WHERE ref_id = ?");
1022 $sth->execute([$ref_id]);
2c08214a
AD
1023 }
1024 }
1025
a29fe121
AD
1026 _debug("assigning labels [other]...", $debug_enabled);
1027
1028 foreach ($article_labels as $label) {
7c9b5a3f 1029 Labels::add_article($entry_ref_id, $label[1], $owner_uid);
a29fe121
AD
1030 }
1031
1032 _debug("assigning labels [filters]...", $debug_enabled);
2c08214a 1033
e6c886bf 1034 RSSUtils::assign_article_to_label_filters($entry_ref_id, $article_filters,
b24504b1 1035 $owner_uid, $article_labels);
2c08214a 1036
68cccafc 1037 _debug("looking for enclosures...", $debug_enabled);
2c08214a
AD
1038
1039 // enclosures
1040
1041 $enclosures = array();
1042
19b3992b 1043 $encs = $item->get_enclosures();
2c08214a 1044
19b3992b
AD
1045 if (is_array($encs)) {
1046 foreach ($encs as $e) {
1047 $e_item = array(
86e53429
AD
1048 rewrite_relative_url($site_url, $e->link),
1049 $e->type, $e->length, $e->title, $e->width, $e->height);
102a0135
AD
1050
1051 // Yet another episode of "mysql utf8_general_ci is gimped"
1052 if (DB_TYPE == "mysql") {
1053 for ($i = 0; $i < count($e_item); $i++) {
1054 if (is_string($e_item[$i])) {
1055 $e_item[$i] = RSSUtils::strip_utf8mb4($e_item[$i]);
1056 }
1057 }
1058 }
1059
1060 array_push($enclosures, $e_item);
2c08214a
AD
1061 }
1062 }
1063
388d4dfa 1064 if ($cache_images && is_writable(CACHE_DIR . '/images'))
e6c886bf 1065 RSSUtils::cache_enclosures($enclosures, $site_url, $debug_enabled);
388d4dfa 1066
2c08214a 1067 if ($debug_enabled) {
68cccafc 1068 _debug("article enclosures:", $debug_enabled);
2c08214a
AD
1069 print_r($enclosures);
1070 }
1071
0567016b
AD
1072 $esth = $pdo->prepare("SELECT id FROM ttrss_enclosures
1073 WHERE content_url = ? AND post_id = ?");
2c08214a 1074
0567016b
AD
1075 $usth = $pdo->prepare("INSERT INTO ttrss_enclosures
1076 (content_url, content_type, title, duration, post_id, width, height) VALUES
1077 (?, ?, ?, ?, ?, ?, ?)");
5c54e683 1078
2c08214a 1079 foreach ($enclosures as $enc) {
0567016b
AD
1080 $enc_url = $enc[0];
1081 $enc_type = $enc[1];
0500e14c 1082 $enc_dur = (int)$enc[2];
0567016b 1083 $enc_title = $enc[3];
523bd90b
FE
1084 $enc_width = intval($enc[4]);
1085 $enc_height = intval($enc[5]);
2c08214a 1086
0567016b 1087 $esth->execute([$enc_url, $entry_ref_id]);
2c08214a 1088
0567016b
AD
1089 if (!$esth->fetch()) {
1090 $usth->execute([$enc_url, $enc_type, (string)$enc_title, $enc_dur, $entry_ref_id, $enc_width, $enc_height]);
2c08214a
AD
1091 }
1092 }
1093
2c08214a
AD
1094 // check for manual tags (we have to do it here since they're loaded from filters)
1095
1096 foreach ($article_filters as $f) {
6aff7845 1097 if ($f["type"] == "tag") {
2c08214a 1098
6aff7845 1099 $manual_tags = trim_array(explode(",", $f["param"]));
2c08214a
AD
1100
1101 foreach ($manual_tags as $tag) {
1102 if (tag_is_valid($tag)) {
1103 array_push($entry_tags, $tag);
1104 }
1105 }
1106 }
1107 }
1108
1109 // Skip boring tags
1110
6322ac79 1111 $boring_tags = trim_array(explode(",", mb_strtolower(get_pref(
2c08214a
AD
1112 'BLACKLISTED_TAGS', $owner_uid, ''), 'utf-8')));
1113
1114 $filtered_tags = array();
1115 $tags_to_cache = array();
1116
1117 if ($entry_tags && is_array($entry_tags)) {
1118 foreach ($entry_tags as $tag) {
1119 if (array_search($tag, $boring_tags) === false) {
1120 array_push($filtered_tags, $tag);
1121 }
1122 }
1123 }
1124
1125 $filtered_tags = array_unique($filtered_tags);
1126
1127 if ($debug_enabled) {
68cccafc 1128 _debug("filtered article tags:", $debug_enabled);
2c08214a
AD
1129 print_r($filtered_tags);
1130 }
1131
1132 // Save article tags in the database
1133
1134 if (count($filtered_tags) > 0) {
1135
0567016b
AD
1136 $tsth = $pdo->prepare("SELECT id FROM ttrss_tags
1137 WHERE tag_name = ? AND post_int_id = ? AND
1138 owner_uid = ? LIMIT 1");
1139
1140 $usth = $pdo->prepare("INSERT INTO ttrss_tags
1141 (owner_uid,tag_name,post_int_id)
1142 VALUES (?, ?, ?)");
2c08214a
AD
1143
1144 foreach ($filtered_tags as $tag) {
1145
1146 $tag = sanitize_tag($tag);
2c08214a
AD
1147
1148 if (!tag_is_valid($tag)) continue;
1149
0567016b 1150 $tsth->execute([$tag, $entry_int_id, $owner_uid]);
2c08214a 1151
0567016b
AD
1152 if (!$tsth->fetch()) {
1153 $usth->execute([$owner_uid, $tag, $entry_int_id]);
e6c886bf 1154 }
2c08214a
AD
1155
1156 array_push($tags_to_cache, $tag);
1157 }
1158
1159 /* update the cache */
1160
1161 $tags_to_cache = array_unique($tags_to_cache);
1162
0567016b 1163 $tags_str = join(",", $tags_to_cache);
2c08214a 1164
0567016b
AD
1165 $tsth = $pdo->prepare("UPDATE ttrss_user_entries
1166 SET tag_cache = ? WHERE ref_id = ?
1167 AND owner_uid = ?");
1168 $tsth->execute([$tags_str, $entry_ref_id, $owner_uid]);
2c08214a
AD
1169 }
1170
68cccafc 1171 _debug("article processed", $debug_enabled);
0500e14c
AD
1172
1173 $pdo->commit();
2c08214a
AD
1174 }
1175
68cccafc 1176 _debug("purging feed...", $debug_enabled);
2c08214a 1177
a42c55f0 1178 purge_feed($feed, 0, $debug_enabled);
2c08214a 1179
0567016b
AD
1180 $sth = $pdo->prepare("UPDATE ttrss_feeds
1181 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
1182 $sth->execute([$feed]);
2c08214a
AD
1183
1184 } else {
1185
0567016b 1186 $error_msg = mb_substr($rss->error(), 0, 245);
2c08214a 1187
4ad04ee2
AD
1188 _debug("fetch error: $error_msg", $debug_enabled);
1189
1190 if (count($rss->errors()) > 1) {
1191 foreach ($rss->errors() as $error) {
1192 _debug("+ $error");
1193 }
1194 }
2c08214a 1195
0567016b
AD
1196 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
1197 last_updated = NOW(), last_unconditional = NOW() WHERE id = ?");
1198 $sth->execute([$error_msg, $feed]);
2c08214a 1199
88edaa93 1200 unset($rss);
0567016b 1201 return false;
88edaa93 1202 }
2c08214a 1203
68cccafc 1204 _debug("done", $debug_enabled);
88edaa93 1205
7b55001e 1206 return true;
2c08214a
AD
1207 }
1208
e6c886bf 1209 static function cache_enclosures($enclosures, $site_url, $debug) {
388d4dfa
AD
1210 foreach ($enclosures as $enc) {
1211
1212 if (preg_match("/(image|audio|video)/", $enc[1])) {
1213
1214 $src = rewrite_relative_url($site_url, $enc[0]);
1215
1216 $local_filename = CACHE_DIR . "/images/" . sha1($src);
1217
1218 if ($debug) _debug("cache_enclosures: downloading: $src to $local_filename");
1219
1220 if (!file_exists($local_filename)) {
1221 $file_content = fetch_file_contents($src);
1222
6fd03996 1223 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
388d4dfa
AD
1224 file_put_contents($local_filename, $file_content);
1225 }
1226 } else {
1227 touch($local_filename);
1228 }
1229 }
1230 }
1231 }
1232
e6c886bf 1233 static function cache_media($html, $site_url, $debug) {
3c696512
AD
1234 libxml_use_internal_errors(true);
1235
1236 $charset_hack = '<head>
1237 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
1238 </head>';
1239
1240 $doc = new DOMDocument();
1241 $doc->loadHTML($charset_hack . $html);
1242 $xpath = new DOMXPath($doc);
1243
388d4dfa 1244 $entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])');
3c696512
AD
1245
1246 foreach ($entries as $entry) {
5edd605a 1247 if ($entry->hasAttribute('src') && strpos($entry->getAttribute('src'), "data:") !== 0) {
3c696512
AD
1248 $src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
1249
41bead9b 1250 $local_filename = CACHE_DIR . "/images/" . sha1($src);
3c696512 1251
41bead9b 1252 if ($debug) _debug("cache_media: downloading: $src to $local_filename");
3c696512
AD
1253
1254 if (!file_exists($local_filename)) {
1255 $file_content = fetch_file_contents($src);
1256
6fd03996 1257 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
3c696512
AD
1258 file_put_contents($local_filename, $file_content);
1259 }
4a27966e
J
1260 } else {
1261 touch($local_filename);
3c696512 1262 }
3c696512
AD
1263 }
1264 }
3c696512
AD
1265 }
1266
e6c886bf 1267 static function expire_error_log($debug) {
e2261e17
AD
1268 if ($debug) _debug("Removing old error log entries...");
1269
0567016b
AD
1270 $pdo = Db::pdo();
1271
e2261e17 1272 if (DB_TYPE == "pgsql") {
0567016b 1273 $pdo->query("DELETE FROM ttrss_error_log
e2261e17
AD
1274 WHERE created_at < NOW() - INTERVAL '7 days'");
1275 } else {
0567016b 1276 $pdo->query("DELETE FROM ttrss_error_log
e2261e17
AD
1277 WHERE created_at < DATE_SUB(NOW(), INTERVAL 7 DAY)");
1278 }
e2261e17
AD
1279 }
1280
e6c886bf 1281 static function expire_lock_files($debug) {
65465085 1282 //if ($debug) _debug("Removing old lock files...");
2a91b6ff
AD
1283
1284 $num_deleted = 0;
1285
1286 if (is_writable(LOCK_DIRECTORY)) {
1287 $files = glob(LOCK_DIRECTORY . "/*.lock");
1288
1289 if ($files) {
1290 foreach ($files as $file) {
11344971 1291 if (!file_is_locked(basename($file)) && time() - filemtime($file) > 86400*2) {
2a91b6ff
AD
1292 unlink($file);
1293 ++$num_deleted;
1294 }
1295 }
1296 }
1297 }
1298
65465085 1299 if ($debug) _debug("Removed $num_deleted old lock files.");
2a91b6ff
AD
1300 }
1301
e6c886bf 1302 static function expire_cached_files($debug) {
342e8a9e 1303 foreach (array("simplepie", "feeds", "images", "export", "upload") as $dir) {
3c696512 1304 $cache_dir = CACHE_DIR . "/$dir";
2c08214a 1305
65465085 1306// if ($debug) _debug("Expiring $cache_dir");
2c08214a 1307
3c696512
AD
1308 $num_deleted = 0;
1309
1310 if (is_writable($cache_dir)) {
1311 $files = glob("$cache_dir/*");
1312
2a91b6ff 1313 if ($files) {
2ab20c31 1314 foreach ($files as $file) {
6fd03996 1315 if (time() - filemtime($file) > 86400*CACHE_MAX_DAYS) {
2ab20c31 1316 unlink($file);
3c696512 1317
2ab20c31
AD
1318 ++$num_deleted;
1319 }
3c696512
AD
1320 }
1321 }
2a91b6ff 1322 }
3c696512 1323
65465085 1324 if ($debug) _debug("$cache_dir: removed $num_deleted files.");
3c696512
AD
1325 }
1326 }
2c08214a 1327
a3e0bdcf 1328 /**
e6c886bf
AD
1329 * Source: http://www.php.net/manual/en/function.parse-url.php#104527
1330 * Returns the url query as associative array
1331 *
1332 * @param string query
1333 * @return array params
1334 */
1335 static function convertUrlQuery($query) {
a3e0bdcf
AD
1336 $queryParts = explode('&', $query);
1337
1338 $params = array();
1339
1340 foreach ($queryParts as $param) {
1341 $item = explode('=', $param);
1342 $params[$item[0]] = $item[1];
1343 }
1344
1345 return $params;
1346 }
92c14e9d 1347
e6c886bf 1348 static function get_article_filters($filters, $title, $content, $link, $author, $tags, &$matched_rules = false) {
92c14e9d
AD
1349 $matches = array();
1350
1351 foreach ($filters as $filter) {
1352 $match_any_rule = $filter["match_any_rule"];
a3a896a1 1353 $inverse = $filter["inverse"];
92c14e9d
AD
1354 $filter_match = false;
1355
1356 foreach ($filter["rules"] as $rule) {
1357 $match = false;
ffa1bd7b 1358 $reg_exp = str_replace('/', '\/', $rule["reg_exp"]);
a3a896a1 1359 $rule_inverse = $rule["inverse"];
92c14e9d
AD
1360
1361 if (!$reg_exp)
1362 continue;
1363
1364 switch ($rule["type"]) {
e6c886bf
AD
1365 case "title":
1366 $match = @preg_match("/$reg_exp/iu", $title);
1367 break;
1368 case "content":
1369 // we don't need to deal with multiline regexps
1370 $content = preg_replace("/[\r\n\t]/", "", $content);
d03ae73e 1371
e6c886bf
AD
1372 $match = @preg_match("/$reg_exp/iu", $content);
1373 break;
1374 case "both":
1375 // we don't need to deal with multiline regexps
1376 $content = preg_replace("/[\r\n\t]/", "", $content);
d03ae73e 1377
e6c886bf
AD
1378 $match = (@preg_match("/$reg_exp/iu", $title) || @preg_match("/$reg_exp/iu", $content));
1379 break;
1380 case "link":
1381 $match = @preg_match("/$reg_exp/iu", $link);
1382 break;
1383 case "author":
1384 $match = @preg_match("/$reg_exp/iu", $author);
1385 break;
1386 case "tag":
1387 foreach ($tags as $tag) {
1388 if (@preg_match("/$reg_exp/iu", $tag)) {
1389 $match = true;
1390 break;
1391 }
7b80b5e1 1392 }
e6c886bf 1393 break;
92c14e9d
AD
1394 }
1395
a3a896a1
AD
1396 if ($rule_inverse) $match = !$match;
1397
92c14e9d
AD
1398 if ($match_any_rule) {
1399 if ($match) {
1400 $filter_match = true;
1401 break;
1402 }
1403 } else {
1404 $filter_match = $match;
1405 if (!$match) {
1406 break;
1407 }
1408 }
1409 }
1410
a3a896a1
AD
1411 if ($inverse) $filter_match = !$filter_match;
1412
92c14e9d 1413 if ($filter_match) {
557d86fe
AD
1414 if (is_array($matched_rules)) array_push($matched_rules, $rule);
1415
92c14e9d
AD
1416 foreach ($filter["actions"] AS $action) {
1417 array_push($matches, $action);
5e736e45
AD
1418
1419 // if Stop action encountered, perform no further processing
fd3e5e8d 1420 if (isset($action["type"]) && $action["type"] == "stop") return $matches;
92c14e9d
AD
1421 }
1422 }
1423 }
1424
1425 return $matches;
1426 }
1427
e6c886bf 1428 static function find_article_filter($filters, $filter_name) {
92c14e9d
AD
1429 foreach ($filters as $f) {
1430 if ($f["type"] == $filter_name) {
1431 return $f;
1432 };
1433 }
1434 return false;
1435 }
1436
e6c886bf 1437 static function find_article_filters($filters, $filter_name) {
92c14e9d
AD
1438 $results = array();
1439
1440 foreach ($filters as $f) {
1441 if ($f["type"] == $filter_name) {
1442 array_push($results, $f);
1443 };
1444 }
1445 return $results;
1446 }
1447
e6c886bf 1448 static function calculate_article_score($filters) {
92c14e9d
AD
1449 $score = 0;
1450
1451 foreach ($filters as $f) {
1452 if ($f["type"] == "score") {
1453 $score += $f["param"];
1454 };
1455 }
1456 return $score;
1457 }
1458
e6c886bf 1459 static function labels_contains_caption($labels, $caption) {
b24504b1
AD
1460 foreach ($labels as $label) {
1461 if ($label[1] == $caption) {
1462 return true;
1463 }
1464 }
1465
1466 return false;
1467 }
1468
e6c886bf 1469 static function assign_article_to_label_filters($id, $filters, $owner_uid, $article_labels) {
92c14e9d
AD
1470 foreach ($filters as $f) {
1471 if ($f["type"] == "label") {
e6c886bf 1472 if (!RSSUtils::labels_contains_caption($article_labels, $f["param"])) {
7c9b5a3f 1473 Labels::add_article($id, $f["param"], $owner_uid);
b24504b1
AD
1474 }
1475 }
92c14e9d
AD
1476 }
1477 }
87764a50 1478
e6c886bf 1479 static function make_guid_from_title($title) {
87d7e850
AD
1480 return preg_replace("/[ \"\',.:;]/", "-",
1481 mb_strtolower(strip_tags($title), 'utf-8'));
1482 }
1483
e6c886bf 1484 static function cleanup_counters_cache($debug) {
0567016b
AD
1485 $pdo = Db::pdo();
1486
1487 $res = $pdo->query("DELETE FROM ttrss_counters_cache
168cf351
AD
1488 WHERE feed_id > 0 AND
1489 (SELECT COUNT(id) FROM ttrss_feeds WHERE
1490 id = feed_id AND
1491 ttrss_counters_cache.owner_uid = ttrss_feeds.owner_uid) = 0");
168cf351 1492
0567016b
AD
1493 $frows = $res->rowCount();
1494
1495 $res = $pdo->query("DELETE FROM ttrss_cat_counters_cache
168cf351
AD
1496 WHERE feed_id > 0 AND
1497 (SELECT COUNT(id) FROM ttrss_feed_categories WHERE
1498 id = feed_id AND
1499 ttrss_cat_counters_cache.owner_uid = ttrss_feed_categories.owner_uid) = 0");
0567016b
AD
1500
1501 $crows = $res->rowCount();
168cf351 1502
7b55001e 1503 if ($debug) _debug("Removed $frows (feeds) $crows (cats) orphaned counter cache entries.");
168cf351
AD
1504 }
1505
e6c886bf 1506 static function housekeeping_user($owner_uid) {
5cbd1fe8
AD
1507 $tmph = new PluginHost();
1508
1509 load_user_plugins($owner_uid, $tmph);
1510
1511 $tmph->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
1512 }
1513
e6c886bf
AD
1514 static function housekeeping_common($debug) {
1515 RSSUtils::expire_cached_files($debug);
1516 RSSUtils::expire_lock_files($debug);
1517 RSSUtils::expire_error_log($debug);
e2cf81e2 1518
e6c886bf 1519 $count = RSSUtils::update_feedbrowser_cache();
e2cf81e2
AD
1520 _debug("Feedbrowser updated, $count feeds processed.");
1521
a230bf88 1522 Article::purge_orphans( true);
e6c886bf 1523 RSSUtils::cleanup_counters_cache($debug);
e2cf81e2 1524
9b736a20
AD
1525 //$rc = cleanup_tags( 14, 50000);
1526 //_debug("Cleaned $rc cached tags.");
8e470220 1527
00f22824 1528 PluginHost::getInstance()->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
e2cf81e2 1529 }
ea79a0e0 1530
e6c886bf
AD
1531 static function check_feed_favicon($site_url, $feed) {
1532 # print "FAVICON [$site_url]: $favicon_url\n";
a230bf88
AD
1533
1534 $icon_file = ICONS_DIR . "/$feed.ico";
1535
1536 if (!file_exists($icon_file)) {
1537 $favicon_url = get_favicon_url($site_url);
1538
1539 if ($favicon_url) {
1540 // Limiting to "image" type misses those served with text/plain
1541 $contents = fetch_file_contents($favicon_url); // , "image");
1542
1543 if ($contents) {
1544 // Crude image type matching.
1545 // Patterns gleaned from the file(1) source code.
1546 if (preg_match('/^\x00\x00\x01\x00/', $contents)) {
1547 // 0 string \000\000\001\000 MS Windows icon resource
1548 //error_log("check_feed_favicon: favicon_url=$favicon_url isa MS Windows icon resource");
1549 }
1550 elseif (preg_match('/^GIF8/', $contents)) {
1551 // 0 string GIF8 GIF image data
1552 //error_log("check_feed_favicon: favicon_url=$favicon_url isa GIF image");
1553 }
1554 elseif (preg_match('/^\x89PNG\x0d\x0a\x1a\x0a/', $contents)) {
1555 // 0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data
1556 //error_log("check_feed_favicon: favicon_url=$favicon_url isa PNG image");
1557 }
1558 elseif (preg_match('/^\xff\xd8/', $contents)) {
1559 // 0 beshort 0xffd8 JPEG image data
1560 //error_log("check_feed_favicon: favicon_url=$favicon_url isa JPG image");
1561 }
f9ad33c2
GG
1562 elseif (preg_match('/^BM/', $contents)) {
1563 // 0 string BM PC bitmap (OS2, Windows BMP files)
1564 //error_log("check_feed_favicon, favicon_url=$favicon_url isa BMP image");
1565 }
a230bf88
AD
1566 else {
1567 //error_log("check_feed_favicon: favicon_url=$favicon_url isa UNKNOWN type");
1568 $contents = "";
1569 }
1570 }
1571
1572 if ($contents) {
1573 $fp = @fopen($icon_file, "w");
1574
1575 if ($fp) {
1576 fwrite($fp, $contents);
1577 fclose($fp);
1578 chmod($icon_file, 0644);
1579 }
1580 }
1581 }
1582 return $icon_file;
1583 }
1584 }
e6c886bf
AD
1585
1586
1587
bec5ba93 1588}