]> git.wh0rd.org - tt-rss.git/blame - classes/rssutils.php
fix session write handler always assuming that database entry exists and failing...
[tt-rss.git] / classes / rssutils.php
CommitLineData
2c08214a 1<?php
e6c886bf
AD
2class RSSUtils {
3 static function calculate_article_hash($article, $pluginhost) {
af244f92
AD
4 $tmp = "";
5
6 foreach ($article as $k => $v) {
7 if ($k != "feed" && isset($v)) {
24e6ff5d
AD
8 $x = strip_tags(is_array($v) ? implode(",", $v) : $v);
9
10 //_debug("$k:" . sha1($x) . ":" . htmlspecialchars($x), true);
11
12 $tmp .= sha1("$k:" . sha1($x));
af244f92
AD
13 }
14 }
15
eb16bd9f 16 return sha1(implode(",", $pluginhost->get_plugin_names()) . $tmp);
b1840673
AD
17 }
18
102a0135
AD
19 // Strips utf8mb4 characters (i.e. emoji) for mysql
20 static function strip_utf8mb4($str) {
7f4a4045
AD
21 return preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $str);
22 }
102a0135 23
e6c886bf 24 static function update_feedbrowser_cache() {
79178062 25
afcb105f
AD
26 $pdo = Db::pdo();
27
28 $sth = $pdo->query("SELECT feed_url, site_url, title, COUNT(id) AS subscribers
45378752
LD
29 FROM ttrss_feeds WHERE feed_url NOT IN (SELECT feed_url FROM ttrss_feeds
30 WHERE private IS true OR auth_login != '' OR auth_pass != '' OR feed_url LIKE '%:%@%/%')
79178062
AD
31 GROUP BY feed_url, site_url, title ORDER BY subscribers DESC LIMIT 1000");
32
afcb105f 33 $pdo->beginTransaction();
79178062 34
afcb105f 35 $pdo->query("DELETE FROM ttrss_feedbrowser_cache");
79178062
AD
36
37 $count = 0;
38
afcb105f
AD
39 while ($line = $sth->fetch()) {
40
0567016b
AD
41 $subscribers = $line["subscribers"];
42 $feed_url = $line["feed_url"];
43 $title = $line["title"];
44 $site_url = $line["site_url"];
79178062 45
afcb105f
AD
46 $tmph = $pdo->prepare("SELECT subscribers FROM
47 ttrss_feedbrowser_cache WHERE feed_url = ?");
48 $tmph->execute([$feed_url]);
49
50 if (!$tmph->fetch()) {
79178062 51
afcb105f
AD
52 $tmph = $pdo->prepare("INSERT INTO ttrss_feedbrowser_cache
53 (feed_url, site_url, title, subscribers)
54 VALUES
55 (?, ?, ?, ?)");
79178062 56
afcb105f 57 $tmph->execute([$feed_url, $site_url, $title, $subscribers]);
79178062
AD
58
59 ++$count;
60
61 }
62
63 }
64
afcb105f 65 $pdo->commit();
79178062
AD
66
67 return $count;
68
69 }
70
e6c886bf 71 static function update_daemon_common($limit = DAEMON_FEED_LIMIT, $debug = true) {
6322ac79 72 $schema_version = get_schema_version();
857efe49
AD
73
74 if ($schema_version != SCHEMA_VERSION) {
75 die("Schema version is wrong, please upgrade the database.\n");
76 }
77
afcb105f
AD
78 $pdo = Db::pdo();
79
09e8bdfd 80 if (!SINGLE_USER_MODE && DAEMON_UPDATE_LOGIN_LIMIT > 0) {
2c08214a
AD
81 if (DB_TYPE == "pgsql") {
82 $login_thresh_qpart = "AND ttrss_users.last_login >= NOW() - INTERVAL '".DAEMON_UPDATE_LOGIN_LIMIT." days'";
83 } else {
84 $login_thresh_qpart = "AND ttrss_users.last_login >= DATE_SUB(NOW(), INTERVAL ".DAEMON_UPDATE_LOGIN_LIMIT." DAY)";
85 }
86 } else {
87 $login_thresh_qpart = "";
88 }
89
2c08214a
AD
90 if (DB_TYPE == "pgsql") {
91 $update_limit_qpart = "AND ((
92 ttrss_feeds.update_interval = 0
ee0542ce 93 AND ttrss_user_prefs.value != '-1'
2c08214a
AD
94 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_user_prefs.value || ' minutes') AS INTERVAL)
95 ) OR (
96 ttrss_feeds.update_interval > 0
97 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_feeds.update_interval || ' minutes') AS INTERVAL)
f08426e3
AD
98 ) OR (ttrss_feeds.last_updated IS NULL
99 AND ttrss_user_prefs.value != '-1')
100 OR (last_updated = '1970-01-01 00:00:00'
101 AND ttrss_user_prefs.value != '-1'))";
2c08214a
AD
102 } else {
103 $update_limit_qpart = "AND ((
104 ttrss_feeds.update_interval = 0
ee0542ce 105 AND ttrss_user_prefs.value != '-1'
2c08214a
AD
106 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL CONVERT(ttrss_user_prefs.value, SIGNED INTEGER) MINUTE)
107 ) OR (
108 ttrss_feeds.update_interval > 0
109 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL ttrss_feeds.update_interval MINUTE)
f08426e3
AD
110 ) OR (ttrss_feeds.last_updated IS NULL
111 AND ttrss_user_prefs.value != '-1')
112 OR (last_updated = '1970-01-01 00:00:00'
113 AND ttrss_user_prefs.value != '-1'))";
2c08214a
AD
114 }
115
116 // Test if feed is currently being updated by another process.
117 if (DB_TYPE == "pgsql") {
566417c4 118 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '10 minutes')";
2c08214a 119 } else {
566417c4 120 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 10 MINUTE))";
2c08214a
AD
121 }
122
93af11cb 123 $query_limit = $limit ? sprintf("LIMIT %d", $limit) : "";
2c08214a 124
98070db0
TK
125 // Update the least recently updated feeds first
126 $query_order = "ORDER BY last_updated";
127 if (DB_TYPE == "pgsql") $query_order .= " NULLS FIRST";
128
fce451a4 129 $query = "SELECT DISTINCT ttrss_feeds.feed_url, ttrss_feeds.last_updated
2c08214a
AD
130 FROM
131 ttrss_feeds, ttrss_users, ttrss_user_prefs
f4ae0f05 132 WHERE
2c08214a 133 ttrss_feeds.owner_uid = ttrss_users.id
f08426e3 134 AND ttrss_user_prefs.profile IS NULL
2c08214a
AD
135 AND ttrss_users.id = ttrss_user_prefs.owner_uid
136 AND ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL'
137 $login_thresh_qpart $update_limit_qpart
1c4421fc 138 $updstart_thresh_qpart
98070db0 139 $query_order $query_limit";
fce451a4 140
afcb105f 141 $res = $pdo->query($query);
2c08214a 142
2c08214a 143 $feeds_to_update = array();
afcb105f 144 while ($line = $res->fetch()) {
93af11cb 145 array_push($feeds_to_update, $line['feed_url']);
2c08214a
AD
146 }
147
afcb105f
AD
148 if ($debug) _debug(sprintf("Scheduled %d feeds to update...", count($feeds_to_update)));
149
93af11cb
AD
150 // Update last_update_started before actually starting the batch
151 // in order to minimize collision risk for parallel daemon tasks
152 if (count($feeds_to_update) > 0) {
afcb105f 153 $feeds_qmarks = arr_qmarks($feeds_to_update);
1c4421fc 154
afcb105f
AD
155 $tmph = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
156 WHERE feed_url IN ($feeds_qmarks)");
157 $tmph->execute($feeds_to_update);
2c08214a
AD
158 }
159
8292d05b 160 $nf = 0;
2d9c5684 161 $bstarted = microtime(true);
8292d05b 162
5cbd1fe8
AD
163 $batch_owners = array();
164
afcb105f
AD
165 // since we have the data cached, we can deal with other feeds with the same url
166 $usth = $pdo->prepare("SELECT DISTINCT ttrss_feeds.id,last_updated,ttrss_feeds.owner_uid
ee0542ce
AD
167 FROM ttrss_feeds, ttrss_users, ttrss_user_prefs WHERE
168 ttrss_user_prefs.owner_uid = ttrss_feeds.owner_uid AND
169 ttrss_users.id = ttrss_user_prefs.owner_uid AND
170 ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL' AND
f08426e3 171 ttrss_user_prefs.profile IS NULL AND
afcb105f 172 feed_url = ?
9e84bab4 173 $update_limit_qpart
1c4421fc 174 $login_thresh_qpart
5929a0c1 175 ORDER BY ttrss_feeds.id $query_limit");
1c4421fc 176
afcb105f
AD
177 foreach ($feeds_to_update as $feed) {
178 if($debug) _debug("Base feed: $feed");
179
180 $usth->execute([$feed]);
181 //update_rss_feed($line["id"], true);
182
183 if ($tline = $usth->fetch()) {
184 if ($debug) _debug(" => " . $tline["last_updated"] . ", " . $tline["id"] . " " . $tline["owner_uid"]);
f08426e3 185
afcb105f
AD
186 if (array_search($tline["owner_uid"], $batch_owners) === FALSE)
187 array_push($batch_owners, $tline["owner_uid"]);
5cbd1fe8 188
afcb105f 189 $fstarted = microtime(true);
62d0060a
AD
190
191 try {
192 RSSUtils::update_rss_feed($tline["id"], true, false);
193 } catch (PDOException $e) {
194 Logger::get()->log_error(E_USER_NOTICE, $e->getMessage(), $e->getFile(), $e->getLine(), $e->getTraceAsString());
195 }
afcb105f 196 _debug_suppress(false);
2d9c5684 197
afcb105f 198 _debug(sprintf(" %.4f (sec)", microtime(true) - $fstarted));
2d9c5684 199
afcb105f 200 ++$nf;
1c4421fc 201 }
2c08214a
AD
202 }
203
2d9c5684
AD
204 if ($nf > 0) {
205 _debug(sprintf("Processed %d feeds in %.4f (sec), %.4f (sec/feed avg)", $nf,
206 microtime(true) - $bstarted, (microtime(true) - $bstarted) / $nf));
207 }
208
5cbd1fe8
AD
209 foreach ($batch_owners as $owner_uid) {
210 _debug("Running housekeeping tasks for user $owner_uid...");
211
e6c886bf 212 RSSUtils::housekeeping_user($owner_uid);
5cbd1fe8
AD
213 }
214
2c08214a 215 // Send feed digests by email if needed.
c2f0f24e 216 Digest::send_headlines_digests($debug);
2c08214a 217
8292d05b 218 return $nf;
7b55001e 219 }
2c08214a 220
6022776d 221 // this is used when subscribing
e6c886bf 222 static function set_basic_feed_info($feed) {
6022776d 223
0567016b 224 $pdo = Db::pdo();
6022776d 225
069aea59 226 $sth = $pdo->prepare("SELECT owner_uid,feed_url,auth_pass,auth_login
0567016b
AD
227 FROM ttrss_feeds WHERE id = ?");
228 $sth->execute([$feed]);
6022776d 229
0567016b 230 if ($row = $sth->fetch()) {
bec5ba93 231
0567016b 232 $owner_uid = $row["owner_uid"];
0567016b
AD
233 $auth_login = $row["auth_login"];
234 $auth_pass = $row["auth_pass"];
0567016b 235 $fetch_url = $row["feed_url"];
6022776d 236
0567016b
AD
237 $pluginhost = new PluginHost();
238 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
6022776d 239
0567016b
AD
240 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
241 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
242 $pluginhost->load_data();
243
244 $basic_info = array();
245 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_BASIC_INFO) as $plugin) {
246 $basic_info = $plugin->hook_feed_basic_info($basic_info, $fetch_url, $owner_uid, $feed, $auth_login, $auth_pass);
247 }
6022776d 248
0567016b
AD
249 if (!$basic_info) {
250 $feed_data = fetch_file_contents($fetch_url, false,
251 $auth_login, $auth_pass, false,
252 FEED_FETCH_TIMEOUT,
253 0);
bec5ba93 254
0567016b 255 global $fetch_curl_used;
bec5ba93 256
0567016b
AD
257 if (!$fetch_curl_used) {
258 $tmp = @gzdecode($feed_data);
bec5ba93 259
0567016b
AD
260 if ($tmp) $feed_data = $tmp;
261 }
6022776d 262
0567016b 263 $feed_data = trim($feed_data);
6022776d 264
0567016b
AD
265 $rss = new FeedParser($feed_data);
266 $rss->init();
6022776d 267
0567016b
AD
268 if (!$rss->error()) {
269 $basic_info = array(
270 'title' => mb_substr($rss->get_title(), 0, 199),
271 'site_url' => mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245)
272 );
273 }
3476690c 274 }
6022776d 275
0567016b
AD
276 if ($basic_info && is_array($basic_info)) {
277 $sth = $pdo->prepare("SELECT title, site_url FROM ttrss_feeds WHERE id = ?");
278 $sth->execute([$feed]);
6022776d 279
0567016b 280 if ($row = $sth->fetch()) {
6022776d 281
0567016b
AD
282 $registered_title = $row["title"];
283 $orig_site_url = $row["site_url"];
284
285 if ($basic_info['title'] && (!$registered_title || $registered_title == "[Unknown]")) {
286
287 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
288 title = ? WHERE id = ?");
289 $sth->execute([$basic_info['title'], $feed]);
290 }
6022776d 291
0567016b
AD
292 if ($basic_info['site_url'] && $orig_site_url != $basic_info['site_url']) {
293 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
294 site_url = ? WHERE id = ?");
295 $sth->execute([$basic_info['site_url'], $feed]);
296 }
297
298 }
6022776d
AD
299 }
300 }
301 }
302
7b55001e 303 /**
e6c886bf
AD
304 * @SuppressWarnings(PHPMD.UnusedFormalParameter)
305 */
306 static function update_rss_feed($feed, $no_cache = false) {
2c08214a 307
e6532439 308 $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || clean($_REQUEST['xdebug']);
2c08214a 309
4f71d743 310 _debug_suppress(!$debug_enabled);
68cccafc 311 _debug("start", $debug_enabled);
2c08214a 312
0567016b
AD
313 $pdo = Db::pdo();
314
315 $sth = $pdo->prepare("SELECT title FROM ttrss_feeds WHERE id = ?");
316 $sth->execute([$feed]);
bfe1eb4e 317
0567016b 318 if (!$row = $sth->fetch()) {
bfe1eb4e
AD
319 _debug("feed $feed NOT FOUND/SKIPPED", $debug_enabled);
320 user_error("Attempt to update unknown/invalid feed $feed", E_USER_WARNING);
321 return false;
322 }
323
0567016b 324 $title = $row["title"];
6bb96beb
AD
325
326 // feed was batch-subscribed or something, we need to get basic info
327 // this is not optimal currently as it fetches stuff separately TODO: optimize
328 if ($title == "[Unknown]") {
329 _debug("setting basic feed info for $feed...");
e6c886bf 330 RSSUtils::set_basic_feed_info($feed);
6bb96beb
AD
331 }
332
0567016b 333 $sth = $pdo->prepare("SELECT id,update_interval,auth_login,
5ba1ddd4 334 feed_url,auth_pass,cache_images,
5321e775 335 mark_unread_on_update, owner_uid,
153cb6d3 336 auth_pass_encrypted, feed_language,
e50c8eaa
AD
337 last_modified,
338 ".SUBSTRING_FOR_DATE."(last_unconditional, 1, 19) AS last_unconditional
0567016b
AD
339 FROM ttrss_feeds WHERE id = ?");
340 $sth->execute([$feed]);
341
342 if ($row = $sth->fetch()) {
2c08214a 343
0567016b 344 $owner_uid = $row["owner_uid"];
187abfe7 345 $mark_unread_on_update = $row["mark_unread_on_update"];
2c08214a 346
0567016b
AD
347 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
348 WHERE id = ?");
349 $sth->execute([$feed]);
2c08214a 350
0567016b
AD
351 $auth_login = $row["auth_login"];
352 $auth_pass = $row["auth_pass"];
0567016b
AD
353 $stored_last_modified = $row["last_modified"];
354 $last_unconditional = $row["last_unconditional"];
187abfe7 355 $cache_images = $row["cache_images"];
0567016b 356 $fetch_url = $row["feed_url"];
069aea59 357
0567016b
AD
358 $feed_language = mb_strtolower($row["feed_language"]);
359 if (!$feed_language) $feed_language = 'english';
2c08214a 360
0567016b
AD
361 } else {
362 return false;
363 }
2c08214a 364
f074ffe9 365 $date_feed_processed = date('Y-m-d H:i');
2c08214a 366
342e8a9e 367 $cache_filename = CACHE_DIR . "/feeds/" . sha1($fetch_url) . ".xml";
f074ffe9 368
ee65bef4
AD
369 $pluginhost = new PluginHost();
370 $pluginhost->set_debug($debug_enabled);
371 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
372
373 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
374 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
375 $pluginhost->load_data();
376
7b55001e 377 $rss_hash = false;
4f9cbdff 378
7b55001e
AD
379 $force_refetch = isset($_REQUEST["force_refetch"]);
380 $feed_data = "";
687a4f59 381
7b55001e
AD
382 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FETCH_FEED) as $plugin) {
383 $feed_data = $plugin->hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed, 0, $auth_login, $auth_pass);
384 }
2c08214a 385
7b55001e
AD
386 // try cache
387 if (!$feed_data &&
388 file_exists($cache_filename) &&
389 is_readable($cache_filename) &&
390 !$auth_login && !$auth_pass &&
391 filemtime($cache_filename) > time() - 30) {
be574731 392
7b55001e 393 _debug("using local cache [$cache_filename].", $debug_enabled);
52637d3b 394
7b55001e 395 @$feed_data = file_get_contents($cache_filename);
f074ffe9 396
7b55001e
AD
397 if ($feed_data) {
398 $rss_hash = sha1($feed_data);
88edaa93 399 }
ee65bef4 400
7b55001e
AD
401 } else {
402 _debug("local cache will not be used for this feed", $debug_enabled);
403 }
312742db 404
153cb6d3
AD
405 global $fetch_last_modified;
406
7b55001e
AD
407 // fetch feed from source
408 if (!$feed_data) {
e50c8eaa 409 _debug("last unconditional update request: $last_unconditional");
312742db 410
7b55001e
AD
411 if (ini_get("open_basedir") && function_exists("curl_init")) {
412 _debug("not using CURL due to open_basedir restrictions");
413 }
3f6f0857 414
7f4a4045
AD
415 if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) {
416 _debug("maximum allowed interval for conditional requests exceeded, forcing refetch");
e50c8eaa 417
7f4a4045
AD
418 $force_refetch = true;
419 } else {
420 _debug("stored last modified for conditional request: $stored_last_modified", $debug_enabled);
421 }
153cb6d3 422
7f4a4045 423 _debug("fetching [$fetch_url] (force_refetch: $force_refetch)...", $debug_enabled);
153cb6d3
AD
424
425 $feed_data = fetch_file_contents([
426 "url" => $fetch_url,
427 "login" => $auth_login,
428 "pass" => $auth_pass,
429 "timeout" => $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT,
430 "last_modified" => $force_refetch ? "" : $stored_last_modified
431 ]);
3f6f0857 432
7b55001e 433 global $fetch_curl_used;
3f6f0857 434
7b55001e
AD
435 if (!$fetch_curl_used) {
436 $tmp = @gzdecode($feed_data);
1367bc3f 437
7b55001e
AD
438 if ($tmp) $feed_data = $tmp;
439 }
017401dd 440
7b55001e 441 $feed_data = trim($feed_data);
fd687300 442
7b55001e 443 _debug("fetch done.", $debug_enabled);
9d930af9 444 _debug("source last modified: " . $fetch_last_modified, $debug_enabled);
153cb6d3
AD
445
446 if ($feed_data && $fetch_last_modified != $stored_last_modified) {
0567016b
AD
447 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_modified = ? WHERE id = ?");
448 $sth->execute([substr($fetch_last_modified, 0, 245), $feed]);
153cb6d3 449 }
95beaa14 450
7b55001e 451 // cache vanilla feed data for re-use
342e8a9e 452 if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/feeds")) {
7b55001e
AD
453 $new_rss_hash = sha1($feed_data);
454
455 if ($new_rss_hash != $rss_hash) {
456 _debug("saving $cache_filename", $debug_enabled);
457 @file_put_contents($cache_filename, $feed_data);
95beaa14 458 }
4f9cbdff 459 }
7b55001e 460 }
017401dd 461
7b55001e
AD
462 if (!$feed_data) {
463 global $fetch_last_error;
464 global $fetch_last_error_code;
f074ffe9 465
7b55001e 466 _debug("unable to fetch: $fetch_last_error [$fetch_last_error_code]", $debug_enabled);
f074ffe9 467
7b55001e
AD
468 // If-Modified-Since
469 if ($fetch_last_error_code != 304) {
0567016b 470 $error_message = $fetch_last_error;
7b55001e
AD
471 } else {
472 _debug("source claims data not modified, nothing to do.", $debug_enabled);
0567016b 473 $error_message = "";
7b55001e 474 }
4f9cbdff 475
0567016b
AD
476 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
477 last_updated = NOW() WHERE id = ?");
478 $sth->execute([$error_message, $feed]);
4f9cbdff 479
7b55001e 480 return;
f074ffe9
AD
481 }
482
1ffe3391 483 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_FETCHED) as $plugin) {
6791af0c 484 $feed_data = $plugin->hook_feed_fetched($feed_data, $fetch_url, $owner_uid, $feed);
017401dd
AD
485 }
486
07d3431e
AD
487 $rss = new FeedParser($feed_data);
488 $rss->init();
2c08214a 489
19b3992b 490 if (!$rss->error()) {
2c08214a 491
d2a421e3 492 // We use local pluginhost here because we need to load different per-user feed plugins
1ffe3391 493 $pluginhost->run_hooks(PluginHost::HOOK_FEED_PARSED, "hook_feed_parsed", $rss);
4412b877 494
df659891 495 _debug("language: $feed_language", $debug_enabled);
68cccafc 496 _debug("processing feed data...", $debug_enabled);
2c08214a 497
382268c6
AD
498 if (DB_TYPE == "pgsql") {
499 $favicon_interval_qpart = "favicon_last_checked < NOW() - INTERVAL '12 hour'";
500 } else {
501 $favicon_interval_qpart = "favicon_last_checked < DATE_SUB(NOW(), INTERVAL 12 HOUR)";
502 }
503
0567016b 504 $sth = $pdo->prepare("SELECT owner_uid,favicon_avg_color,
382268c6
AD
505 (favicon_last_checked IS NULL OR $favicon_interval_qpart) AS
506 favicon_needs_check
0567016b
AD
507 FROM ttrss_feeds WHERE id = ?");
508 $sth->execute([$feed]);
2c08214a 509
0567016b 510 if ($row = $sth->fetch()) {
187abfe7 511 $favicon_needs_check = $row["favicon_needs_check"];
0567016b
AD
512 $favicon_avg_color = $row["favicon_avg_color"];
513 $owner_uid = $row["owner_uid"];
514 } else {
515 return false;
516 }
2c08214a 517
0567016b 518 $site_url = mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245);
2c08214a 519
cd07592c
AD
520 _debug("site_url: $site_url", $debug_enabled);
521 _debug("feed_title: " . $rss->get_title(), $debug_enabled);
522
687a4f59 523 if ($favicon_needs_check || $force_refetch) {
36490f11
AD
524
525 /* terrible hack: if we crash on floicon shit here, we won't check
560cbd8c 526 * the icon avgcolor again (unless the icon got updated) */
36490f11 527
560cbd8c
AD
528 $favicon_file = ICONS_DIR . "/$feed.ico";
529 $favicon_modified = @filemtime($favicon_file);
530
68cccafc 531 _debug("checking favicon...", $debug_enabled);
687a4f59 532
e6c886bf 533 RSSUtils::check_feed_favicon($site_url, $feed);
560cbd8c
AD
534 $favicon_modified_new = @filemtime($favicon_file);
535
536 if ($favicon_modified_new > $favicon_modified)
537 $favicon_avg_color = '';
687a4f59 538
0567016b 539 $favicon_colorstring = "";
6ee0d4b0 540 if (file_exists($favicon_file) && function_exists("imagecreatefromstring") && $favicon_avg_color == '') {
e6c886bf 541 require_once "colors.php";
687a4f59 542
0567016b
AD
543 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_avg_color = 'fail' WHERE
544 id = ?");
545 $sth->execute([$feed]);
aafd55ba 546
0567016b
AD
547 $favicon_color = calculate_avg_color($favicon_file);
548
549 $favicon_colorstring = ",favicon_avg_color = " . $pdo->quote($favicon_color);
63c323f7 550
36490f11 551 } else if ($favicon_avg_color == 'fail') {
84ceb6bd 552 _debug("floicon failed on this file, not trying to recalculate avg color", $debug_enabled);
6ac722d5 553 }
687a4f59 554
0567016b
AD
555 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_last_checked = NOW()
556 $favicon_colorstring WHERE id = ?");
557 $sth->execute([$feed]);
f2798eb6 558 }
2c08214a 559
68cccafc 560 _debug("loading filters & labels...", $debug_enabled);
2c08214a 561
a42c55f0 562 $filters = load_filters($feed, $owner_uid);
2c08214a 563
02f3992a 564 if ($debug_enabled) {
7f4a4045
AD
565 print_r($filters);
566 }
02f3992a 567
68cccafc 568 _debug("" . count($filters) . " filters loaded.", $debug_enabled);
2c08214a 569
19b3992b 570 $items = $rss->get_items();
2c08214a 571
19b3992b 572 if (!is_array($items)) {
68cccafc 573 _debug("no articles found.", $debug_enabled);
2c08214a 574
0567016b
AD
575 $sth = $pdo->prepare("UPDATE ttrss_feeds
576 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
577 $sth->execute([$feed]);
2c08214a 578
0567016b 579 return true; // no articles
2c08214a
AD
580 }
581
68cccafc 582 _debug("processing articles...", $debug_enabled);
2c08214a 583
6c9f3d4a
AD
584 $tstart = time();
585
19b3992b 586 foreach ($items as $item) {
0500e14c
AD
587 $pdo->beginTransaction();
588
e6532439 589 if (clean($_REQUEST['xdebug']) == 3) {
2c08214a
AD
590 print_r($item);
591 }
592
6c9f3d4a
AD
593 if (ini_get("max_execution_time") > 0 && time() - $tstart >= ini_get("max_execution_time") * 0.7) {
594 _debug("looks like there's too many articles to process at once, breaking out", $debug_enabled);
0500e14c 595 $pdo->commit();
6c9f3d4a
AD
596 break;
597 }
598
0567016b
AD
599 $entry_guid = strip_tags($item->get_id());
600 if (!$entry_guid) $entry_guid = strip_tags($item->get_link());
e6c886bf 601 if (!$entry_guid) $entry_guid = RSSUtils::make_guid_from_title($item->get_title());
0500e14c
AD
602
603 if (!$entry_guid) {
604 $pdo->commit();
605 continue;
606 }
2c08214a 607
3a4c8973
AD
608 $entry_guid = "$owner_uid,$entry_guid";
609
0567016b 610 $entry_guid_hashed = 'SHA1:' . sha1($entry_guid);
5e3d5480 611
68cccafc 612 _debug("guid $entry_guid / $entry_guid_hashed", $debug_enabled);
5e3d5480 613
0567016b 614 $entry_timestamp = strip_tags($item->get_date());
04d2f9c8
AD
615
616 _debug("orig date: " . $item->get_date(), $debug_enabled);
2c08214a 617
30123fe6 618 if ($entry_timestamp == -1 || !$entry_timestamp || $entry_timestamp > time()) {
2c08214a 619 $entry_timestamp = time();
2c08214a
AD
620 }
621
622 $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
623
68cccafc 624 _debug("date $entry_timestamp [$entry_timestamp_fmt]", $debug_enabled);
2c08214a 625
0567016b 626 $entry_title = strip_tags($item->get_title());
1b35d30c 627
5d56d100 628 $entry_link = rewrite_relative_url($site_url, $item->get_link());
2c08214a 629
3bbaf902 630 $entry_language = mb_substr(trim($item->get_language()), 0, 2);
22a866ed 631
68cccafc
AD
632 _debug("title $entry_title", $debug_enabled);
633 _debug("link $entry_link", $debug_enabled);
22a866ed 634 _debug("language $entry_language", $debug_enabled);
2c08214a
AD
635
636 if (!$entry_title) $entry_title = date("Y-m-d H:i:s", $entry_timestamp);;
637
19b3992b
AD
638 $entry_content = $item->get_content();
639 if (!$entry_content) $entry_content = $item->get_description();
2c08214a 640
e6532439 641 if (clean($_REQUEST["xdebug"]) == 2) {
9ec10352 642 print "content: ";
0bc503ff 643 print htmlspecialchars($entry_content);
3c696512 644 print "\n";
2c08214a
AD
645 }
646
0567016b 647 $entry_comments = mb_substr(strip_tags($item->get_comments_url()), 0, 245);
12ff230b 648 $num_comments = (int) $item->get_comments_count();
2c08214a 649
0567016b
AD
650 $entry_author = strip_tags($item->get_author());
651 $entry_guid = mb_substr($entry_guid, 0, 245);
2c08214a 652
68cccafc
AD
653 _debug("author $entry_author", $debug_enabled);
654 _debug("num_comments: $num_comments", $debug_enabled);
ee78f81c 655 _debug("looking for tags...", $debug_enabled);
2c08214a
AD
656
657 // parse <category> entries into tags
658
659 $additional_tags = array();
660
19b3992b 661 $additional_tags_src = $item->get_categories();
2c08214a 662
19b3992b
AD
663 if (is_array($additional_tags_src)) {
664 foreach ($additional_tags_src as $tobj) {
cd07592c 665 array_push($additional_tags, $tobj);
2c08214a 666 }
19b3992b 667 }
2c08214a 668
fa6fbd36 669 $entry_tags = array_unique($additional_tags);
2c08214a 670
5edf4b73 671 for ($i = 0; $i < count($entry_tags); $i++) {
2c08214a
AD
672 $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8');
673
5edf4b73
AD
674 // we don't support numeric tags, let's prefix them
675 if (is_numeric($entry_tags[$i])) $entry_tags[$i] = 't:' . $entry_tags[$i];
676 }
677
ee78f81c
AD
678 _debug("tags found: " . join(",", $entry_tags), $debug_enabled);
679
68cccafc 680 _debug("done collecting data.", $debug_enabled);
2c08214a 681
0567016b
AD
682 $sth = $pdo->prepare("SELECT id, content_hash, lang FROM ttrss_entries
683 WHERE guid = ? OR guid = ?");
684 $sth->execute([$entry_guid, $entry_guid_hashed]);
b30abdad 685
0567016b
AD
686 if ($row = $sth->fetch()) {
687 $base_entry_id = $row["id"];
688 $entry_stored_hash = $row["content_hash"];
4a0da0e5 689 $article_labels = Article::get_article_labels($base_entry_id, $owner_uid);
a8ac7661 690
2ed0d6c4 691 $existing_tags = Article::get_article_tags($base_entry_id, $owner_uid);
a8ac7661 692 $entry_tags = array_unique(array_merge($entry_tags, $existing_tags));
b30abdad 693 } else {
b1840673
AD
694 $base_entry_id = false;
695 $entry_stored_hash = "";
a29fe121 696 $article_labels = array();
b30abdad
AD
697 }
698
455b1401 699 $article = array("owner_uid" => $owner_uid, // read only
b30abdad 700 "guid" => $entry_guid, // read only
59e83455 701 "guid_hashed" => $entry_guid_hashed, // read only
19b3992b
AD
702 "title" => $entry_title,
703 "content" => $entry_content,
704 "link" => $entry_link,
a29fe121 705 "labels" => $article_labels, // current limitation: can add labels to article, can't remove them
19b3992b 706 "tags" => $entry_tags,
e02555c1 707 "author" => $entry_author,
c9299c28 708 "force_catchup" => false, // ugly hack for the time being
6de3a1be 709 "score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed
3318d324 710 "language" => $entry_language,
20d2195f 711 "num_comments" => $num_comments, // read only
f73e03e0
AD
712 "feed" => array("id" => $feed,
713 "fetch_url" => $fetch_url,
babfadbf
J
714 "site_url" => $site_url,
715 "cache_images" => $cache_images)
e6c886bf 716 );
cc85704f 717
b1840673 718 $entry_plugin_data = "";
e6c886bf 719 $entry_current_hash = RSSUtils::calculate_article_hash($article, $pluginhost);
b1840673
AD
720
721 _debug("article hash: $entry_current_hash [stored=$entry_stored_hash]", $debug_enabled);
722
522e8b35 723 if ($entry_current_hash == $entry_stored_hash && !isset($_REQUEST["force_rehash"])) {
b1840673
AD
724 _debug("stored article seems up to date [IID: $base_entry_id], updating timestamp only", $debug_enabled);
725
726 // we keep encountering the entry in feeds, so we need to
727 // update date_updated column so that we don't get horrible
728 // dupes when the entry gets purged and reinserted again e.g.
729 // in the case of SLOW SLOW OMG SLOW updating feeds
730
0567016b
AD
731 $sth = $pdo->prepare("UPDATE ttrss_entries SET date_updated = NOW()
732 WHERE id = ?");
733 $sth->execute([$base_entry_id]);
b1840673 734
0500e14c 735 $pdo->commit();
5bdcb8fd 736 continue;
b1840673
AD
737 }
738
739 _debug("hash differs, applying plugin filters:", $debug_enabled);
740
1ffe3391 741 foreach ($pluginhost->get_hooks(PluginHost::HOOK_ARTICLE_FILTER) as $plugin) {
b1840673
AD
742 _debug("... " . get_class($plugin), $debug_enabled);
743
744 $start = microtime(true);
19b3992b 745 $article = $plugin->hook_article_filter($article);
0084f0d1 746
b1840673
AD
747 _debug("=== " . sprintf("%.4f (sec)", microtime(true) - $start), $debug_enabled);
748
749 $entry_plugin_data .= mb_strtolower(get_class($plugin)) . ",";
cc85704f
AD
750 }
751
e6532439 752 if (clean($_REQUEST["xdebug"]) == 2) {
0bc503ff
AD
753 print "processed content: ";
754 print htmlspecialchars($article["content"]);
755 print "\n";
756 }
757
b1840673
AD
758 _debug("plugin data: $entry_plugin_data", $debug_enabled);
759
35c12dc4 760 // Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077
2b8afd49 761 if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") {
35c12dc4 762 foreach ($article as $k => $v) {
35c37354 763 // i guess we'll have to take the risk of 4byte unicode labels & tags here
dae16f72 764 if (is_string($article[$k])) {
102a0135 765 $article[$k] = RSSUtils::strip_utf8mb4($v);
35c37354 766 }
35c12dc4
AD
767 }
768 }
769
b8774453
AD
770 /* Collect article tags here so we could filter by them: */
771
557d86fe
AD
772 $matched_rules = array();
773
e6c886bf 774 $article_filters = RSSUtils::get_article_filters($filters, $article["title"],
7b55001e 775 $article["content"], $article["link"], $article["author"],
557d86fe 776 $article["tags"], $matched_rules);
b8774453
AD
777
778 if ($debug_enabled) {
557d86fe
AD
779 _debug("matched filter rules: ", $debug_enabled);
780
781 if (count($matched_rules) != 0) {
782 print_r($matched_rules);
783 }
784
785 _debug("filter actions: ", $debug_enabled);
786
b8774453
AD
787 if (count($article_filters) != 0) {
788 print_r($article_filters);
789 }
790 }
791
e6c886bf 792 $plugin_filter_names = RSSUtils::find_article_filters($article_filters, "plugin");
b8774453
AD
793 $plugin_filter_actions = $pluginhost->get_filter_actions();
794
795 if (count($plugin_filter_names) > 0) {
796 _debug("applying plugin filter actions...", $debug_enabled);
797
798 foreach ($plugin_filter_names as $pfn) {
799 list($pfclass,$pfaction) = explode(":", $pfn["param"]);
800
801 if (isset($plugin_filter_actions[$pfclass])) {
802 $plugin = $pluginhost->get_plugin($pfclass);
803
804 _debug("... $pfclass: $pfaction", $debug_enabled);
805
806 if ($plugin) {
807 $start = microtime(true);
808 $article = $plugin->hook_article_filter_action($article, $pfaction);
809
810 _debug("=== " . sprintf("%.4f (sec)", microtime(true) - $start), $debug_enabled);
811 } else {
812 _debug("??? $pfclass: plugin object not found.");
813 }
814 } else {
815 _debug("??? $pfclass: filter plugin not registered.");
816 }
817 }
818 }
819
19b3992b 820 $entry_tags = $article["tags"];
0567016b
AD
821 $entry_title = strip_tags($article["title"]);
822 $entry_author = mb_substr(strip_tags($article["author"]), 0, 245);
823 $entry_link = strip_tags($article["link"]);
f935d98e 824 $entry_content = $article["content"]; // escaped below
c9299c28 825 $entry_force_catchup = $article["force_catchup"];
a29fe121 826 $article_labels = $article["labels"];
6de3a1be 827 $entry_score_modifier = (int) $article["score_modifier"];
0567016b 828 $entry_language = $article["language"];
a29fe121
AD
829
830 if ($debug_enabled) {
831 _debug("article labels:", $debug_enabled);
557d86fe
AD
832
833 if (count($article_labels) != 0) {
834 print_r($article_labels);
835 }
a29fe121 836 }
c9299c28
AD
837
838 _debug("force catchup: $entry_force_catchup");
f935d98e 839
0a3fd79b 840 if ($cache_images && is_writable(CACHE_DIR . '/images'))
e6c886bf 841 RSSUtils::cache_media($entry_content, $site_url, $debug_enabled);
0a3fd79b 842
0567016b
AD
843 $csth = $pdo->prepare("SELECT id FROM ttrss_entries
844 WHERE guid = ? OR guid = ?");
845 $csth->execute([$entry_guid, $entry_guid_hashed]);
9e222305 846
0567016b 847 if (!$row = $csth->fetch()) {
2c08214a 848
07d3431e 849 _debug("base guid [$entry_guid or $entry_guid_hashed] not found, creating...", $debug_enabled);
2c08214a
AD
850
851 // base post entry does not exist, create it
852
0567016b 853 $usth = $pdo->prepare(
2c08214a 854 "INSERT INTO ttrss_entries
0567016b 855 (title,
2c08214a
AD
856 guid,
857 link,
858 updated,
859 content,
860 content_hash,
861 no_orig_date,
862 date_updated,
863 date_entered,
864 comments,
865 num_comments,
b30abdad 866 plugin_data,
6b461797 867 lang,
2c08214a
AD
868 author)
869 VALUES
0567016b 870 (?, ?, ?, ?, ?, ?,
5ba1ddd4 871 false,
2c08214a 872 NOW(),
0567016b
AD
873 ?, ?, ?, ?, ?, ?)");
874
875 $usth->execute([$entry_title,
876 $entry_guid_hashed,
877 $entry_link,
878 $entry_timestamp_fmt,
93e70e36 879 "$entry_content",
0567016b
AD
880 $entry_current_hash,
881 $date_feed_processed,
882 $entry_comments,
187abfe7 883 (int)$num_comments,
0567016b 884 $entry_plugin_data,
93e70e36
AD
885 "$entry_language",
886 "$entry_author"]);
e8291805 887
2c08214a
AD
888 }
889
0567016b 890 $csth->execute([$entry_guid, $entry_guid_hashed]);
2c08214a
AD
891
892 $entry_ref_id = 0;
893 $entry_int_id = 0;
894
0567016b 895 if ($row = $csth->fetch()) {
2c08214a 896
68cccafc 897 _debug("base guid found, checking for user record", $debug_enabled);
2c08214a 898
0567016b 899 $ref_id = $row['id'];
2c08214a
AD
900 $entry_ref_id = $ref_id;
901
e6c886bf 902 if (RSSUtils::find_article_filter($article_filters, "filter")) {
0500e14c 903 $pdo->commit();
2c08214a
AD
904 continue;
905 }
906
e6c886bf 907 $score = RSSUtils::calculate_article_score($article_filters) + $entry_score_modifier;
2c08214a 908
6de3a1be 909 _debug("initial score: $score [including plugin modifier: $entry_score_modifier]", $debug_enabled);
2c08214a 910
4f186b1f
AD
911 // check for user post link to main table
912
0567016b
AD
913 $sth = $pdo->prepare("SELECT ref_id, int_id FROM ttrss_user_entries WHERE
914 ref_id = ? AND owner_uid = ?");
915 $sth->execute([$ref_id, $owner_uid]);
2c08214a
AD
916
917 // okay it doesn't exist - create user entry
0567016b
AD
918 if ($row = $sth->fetch()) {
919 $entry_ref_id = $row["ref_id"];
920 $entry_int_id = $row["int_id"];
2c08214a 921
0567016b
AD
922 _debug("user record FOUND: RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
923 } else {
924
68cccafc 925 _debug("user record not found, creating...", $debug_enabled);
2c08214a 926
e6c886bf 927 if ($score >= -500 && !RSSUtils::find_article_filter($article_filters, 'catchup') && !$entry_force_catchup) {
0567016b
AD
928 $unread = 1;
929 $last_read_qpart = null;
2c08214a 930 } else {
0567016b 931 $unread = 0;
d4c05d0b 932 $last_read_qpart = date("Y-m-d H:i"); // we can't use NOW() here because it gets quoted
2c08214a
AD
933 }
934
e6c886bf 935 if (RSSUtils::find_article_filter($article_filters, 'mark') || $score > 1000) {
0567016b 936 $marked = 1;
2c08214a 937 } else {
0567016b 938 $marked = 0;
2c08214a
AD
939 }
940
e6c886bf 941 if (RSSUtils::find_article_filter($article_filters, 'publish')) {
0567016b 942 $published = 1;
2c08214a 943 } else {
0567016b 944 $published = 0;
2c08214a
AD
945 }
946
26ad257d 947 $last_marked = ($marked == 1) ? 'NOW()' : 'NULL';
948 $last_published = ($published == 1) ? 'NOW()' : 'NULL';
7873d588 949
0567016b 950 $sth = $pdo->prepare(
2c08214a
AD
951 "INSERT INTO ttrss_user_entries
952 (ref_id, owner_uid, feed_id, unread, last_read, marked,
7873d588
AD
953 published, score, tag_cache, label_cache, uuid,
954 last_marked, last_published)
aa16334f 955 VALUES (?, ?, ?, ?, ?, ?, ?, ?, '', '', '', ".$last_marked.", ".$last_published.")");
2c08214a 956
0567016b 957 $sth->execute([$ref_id, $owner_uid, $feed, $unread, $last_read_qpart, $marked,
aa16334f 958 $published, $score]);
2c08214a 959
0567016b
AD
960 $sth = $pdo->prepare("SELECT int_id FROM ttrss_user_entries WHERE
961 ref_id = ? AND owner_uid = ? AND
962 feed_id = ? LIMIT 1");
2c08214a 963
0567016b
AD
964 $sth->execute([$ref_id, $owner_uid, $feed]);
965
966 if ($row = $sth->fetch())
967 $entry_int_id = $row['int_id'];
2c08214a
AD
968 }
969
0567016b 970 _debug("resulting RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
2c08214a 971
963c2264
AD
972 if (DB_TYPE == "pgsql")
973 $tsvector_qpart = "tsvector_combined = to_tsvector(:ts_lang, :ts_content),";
974 else
e854442e 975 $tsvector_qpart = "";
49a888ec 976
0567016b 977 $sth = $pdo->prepare("UPDATE ttrss_entries
49a888ec 978 SET title = :title,
e854442e 979 $tsvector_qpart
49a888ec
AD
980 content = :content,
981 content_hash = :content_hash,
982 updated = :updated,
66fe33e7 983 date_updated = NOW(),
49a888ec
AD
984 num_comments = :num_comments,
985 plugin_data = :plugin_data,
986 author = :author,
987 lang = :lang
988 WHERE id = :id");
989
963c2264 990 $params = [":title" => $entry_title,
93e70e36 991 ":content" => "$entry_content",
49a888ec
AD
992 ":content_hash" => $entry_current_hash,
993 ":updated" => $entry_timestamp_fmt,
994 ":num_comments" => (int)$num_comments,
995 ":plugin_data" => $entry_plugin_data,
93e70e36 996 ":author" => "$entry_author",
49a888ec 997 ":lang" => $entry_language,
963c2264
AD
998 ":id" => $ref_id];
999
1000 if (DB_TYPE == "pgsql") {
1001 $params[":ts_lang"] = $feed_language;
6e6c3a87 1002 $params[":ts_content"] = mb_substr(strip_tags($entry_title . " " . $entry_content), 0, 900000);
963c2264
AD
1003 }
1004
1005 $sth->execute($params);
b1840673 1006
59e83455 1007 // update aux data
0567016b
AD
1008 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1009 SET score = ? WHERE ref_id = ?");
1010 $sth->execute([$score, $ref_id]);
59e83455 1011
b1840673 1012 if ($mark_unread_on_update) {
24e6ff5d
AD
1013 _debug("article updated, marking unread as requested.", $debug_enabled);
1014
0567016b
AD
1015 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1016 SET last_read = null, unread = true WHERE ref_id = ?");
1017 $sth->execute([$ref_id]);
2c08214a
AD
1018 }
1019 }
1020
a29fe121
AD
1021 _debug("assigning labels [other]...", $debug_enabled);
1022
1023 foreach ($article_labels as $label) {
7c9b5a3f 1024 Labels::add_article($entry_ref_id, $label[1], $owner_uid);
a29fe121
AD
1025 }
1026
1027 _debug("assigning labels [filters]...", $debug_enabled);
2c08214a 1028
e6c886bf 1029 RSSUtils::assign_article_to_label_filters($entry_ref_id, $article_filters,
b24504b1 1030 $owner_uid, $article_labels);
2c08214a 1031
68cccafc 1032 _debug("looking for enclosures...", $debug_enabled);
2c08214a
AD
1033
1034 // enclosures
1035
1036 $enclosures = array();
1037
19b3992b 1038 $encs = $item->get_enclosures();
2c08214a 1039
19b3992b
AD
1040 if (is_array($encs)) {
1041 foreach ($encs as $e) {
1042 $e_item = array(
86e53429
AD
1043 rewrite_relative_url($site_url, $e->link),
1044 $e->type, $e->length, $e->title, $e->width, $e->height);
102a0135
AD
1045
1046 // Yet another episode of "mysql utf8_general_ci is gimped"
2b8afd49 1047 if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") {
7f4a4045
AD
1048 for ($i = 0; $i < count($e_item); $i++) {
1049 if (is_string($e_item[$i])) {
1050 $e_item[$i] = RSSUtils::strip_utf8mb4($e_item[$i]);
1051 }
1052 }
102a0135
AD
1053 }
1054
7f4a4045 1055 array_push($enclosures, $e_item);
2c08214a
AD
1056 }
1057 }
1058
388d4dfa 1059 if ($cache_images && is_writable(CACHE_DIR . '/images'))
e6c886bf 1060 RSSUtils::cache_enclosures($enclosures, $site_url, $debug_enabled);
388d4dfa 1061
2c08214a 1062 if ($debug_enabled) {
68cccafc 1063 _debug("article enclosures:", $debug_enabled);
2c08214a
AD
1064 print_r($enclosures);
1065 }
1066
0567016b 1067 $esth = $pdo->prepare("SELECT id FROM ttrss_enclosures
ac8a0e7d 1068 WHERE content_url = ? AND content_type = ? AND post_id = ?");
2c08214a 1069
0567016b
AD
1070 $usth = $pdo->prepare("INSERT INTO ttrss_enclosures
1071 (content_url, content_type, title, duration, post_id, width, height) VALUES
1072 (?, ?, ?, ?, ?, ?, ?)");
5c54e683 1073
2c08214a 1074 foreach ($enclosures as $enc) {
0567016b
AD
1075 $enc_url = $enc[0];
1076 $enc_type = $enc[1];
0500e14c 1077 $enc_dur = (int)$enc[2];
0567016b 1078 $enc_title = $enc[3];
523bd90b
FE
1079 $enc_width = intval($enc[4]);
1080 $enc_height = intval($enc[5]);
2c08214a 1081
ac8a0e7d 1082 $esth->execute([$enc_url, $enc_type, $entry_ref_id]);
2c08214a 1083
0567016b
AD
1084 if (!$esth->fetch()) {
1085 $usth->execute([$enc_url, $enc_type, (string)$enc_title, $enc_dur, $entry_ref_id, $enc_width, $enc_height]);
2c08214a
AD
1086 }
1087 }
1088
2c08214a
AD
1089 // check for manual tags (we have to do it here since they're loaded from filters)
1090
1091 foreach ($article_filters as $f) {
6aff7845 1092 if ($f["type"] == "tag") {
2c08214a 1093
6aff7845 1094 $manual_tags = trim_array(explode(",", $f["param"]));
2c08214a
AD
1095
1096 foreach ($manual_tags as $tag) {
1097 if (tag_is_valid($tag)) {
1098 array_push($entry_tags, $tag);
1099 }
1100 }
1101 }
1102 }
1103
1104 // Skip boring tags
1105
6322ac79 1106 $boring_tags = trim_array(explode(",", mb_strtolower(get_pref(
2c08214a
AD
1107 'BLACKLISTED_TAGS', $owner_uid, ''), 'utf-8')));
1108
1109 $filtered_tags = array();
1110 $tags_to_cache = array();
1111
1112 if ($entry_tags && is_array($entry_tags)) {
1113 foreach ($entry_tags as $tag) {
1114 if (array_search($tag, $boring_tags) === false) {
1115 array_push($filtered_tags, $tag);
1116 }
1117 }
1118 }
1119
1120 $filtered_tags = array_unique($filtered_tags);
1121
1122 if ($debug_enabled) {
68cccafc 1123 _debug("filtered article tags:", $debug_enabled);
2c08214a
AD
1124 print_r($filtered_tags);
1125 }
1126
1127 // Save article tags in the database
1128
1129 if (count($filtered_tags) > 0) {
1130
0567016b
AD
1131 $tsth = $pdo->prepare("SELECT id FROM ttrss_tags
1132 WHERE tag_name = ? AND post_int_id = ? AND
1133 owner_uid = ? LIMIT 1");
1134
1135 $usth = $pdo->prepare("INSERT INTO ttrss_tags
1136 (owner_uid,tag_name,post_int_id)
1137 VALUES (?, ?, ?)");
2c08214a
AD
1138
1139 foreach ($filtered_tags as $tag) {
1140
1141 $tag = sanitize_tag($tag);
2c08214a
AD
1142
1143 if (!tag_is_valid($tag)) continue;
1144
0567016b 1145 $tsth->execute([$tag, $entry_int_id, $owner_uid]);
2c08214a 1146
0567016b
AD
1147 if (!$tsth->fetch()) {
1148 $usth->execute([$owner_uid, $tag, $entry_int_id]);
e6c886bf 1149 }
2c08214a
AD
1150
1151 array_push($tags_to_cache, $tag);
1152 }
1153
1154 /* update the cache */
1155
1156 $tags_to_cache = array_unique($tags_to_cache);
1157
0567016b 1158 $tags_str = join(",", $tags_to_cache);
2c08214a 1159
0567016b
AD
1160 $tsth = $pdo->prepare("UPDATE ttrss_user_entries
1161 SET tag_cache = ? WHERE ref_id = ?
1162 AND owner_uid = ?");
1163 $tsth->execute([$tags_str, $entry_ref_id, $owner_uid]);
2c08214a
AD
1164 }
1165
68cccafc 1166 _debug("article processed", $debug_enabled);
0500e14c
AD
1167
1168 $pdo->commit();
2c08214a
AD
1169 }
1170
68cccafc 1171 _debug("purging feed...", $debug_enabled);
2c08214a 1172
a42c55f0 1173 purge_feed($feed, 0, $debug_enabled);
2c08214a 1174
0567016b
AD
1175 $sth = $pdo->prepare("UPDATE ttrss_feeds
1176 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
1177 $sth->execute([$feed]);
2c08214a
AD
1178
1179 } else {
1180
0567016b 1181 $error_msg = mb_substr($rss->error(), 0, 245);
2c08214a 1182
4ad04ee2
AD
1183 _debug("fetch error: $error_msg", $debug_enabled);
1184
1185 if (count($rss->errors()) > 1) {
1186 foreach ($rss->errors() as $error) {
1187 _debug("+ $error");
1188 }
1189 }
2c08214a 1190
0567016b
AD
1191 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
1192 last_updated = NOW(), last_unconditional = NOW() WHERE id = ?");
1193 $sth->execute([$error_msg, $feed]);
2c08214a 1194
88edaa93 1195 unset($rss);
0567016b 1196 return false;
88edaa93 1197 }
2c08214a 1198
68cccafc 1199 _debug("done", $debug_enabled);
88edaa93 1200
7b55001e 1201 return true;
2c08214a
AD
1202 }
1203
e6c886bf 1204 static function cache_enclosures($enclosures, $site_url, $debug) {
388d4dfa
AD
1205 foreach ($enclosures as $enc) {
1206
1207 if (preg_match("/(image|audio|video)/", $enc[1])) {
1208
1209 $src = rewrite_relative_url($site_url, $enc[0]);
1210
1211 $local_filename = CACHE_DIR . "/images/" . sha1($src);
1212
1213 if ($debug) _debug("cache_enclosures: downloading: $src to $local_filename");
1214
1215 if (!file_exists($local_filename)) {
1216 $file_content = fetch_file_contents($src);
1217
6fd03996 1218 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
388d4dfa
AD
1219 file_put_contents($local_filename, $file_content);
1220 }
665495b9 1221 } else if (is_writable($local_filename)) {
388d4dfa
AD
1222 touch($local_filename);
1223 }
1224 }
1225 }
1226 }
1227
e6c886bf 1228 static function cache_media($html, $site_url, $debug) {
3c696512
AD
1229 libxml_use_internal_errors(true);
1230
1231 $charset_hack = '<head>
1232 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
1233 </head>';
1234
1235 $doc = new DOMDocument();
1236 $doc->loadHTML($charset_hack . $html);
1237 $xpath = new DOMXPath($doc);
1238
388d4dfa 1239 $entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])');
3c696512
AD
1240
1241 foreach ($entries as $entry) {
5edd605a 1242 if ($entry->hasAttribute('src') && strpos($entry->getAttribute('src'), "data:") !== 0) {
3c696512
AD
1243 $src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
1244
41bead9b 1245 $local_filename = CACHE_DIR . "/images/" . sha1($src);
3c696512 1246
163b50b1 1247 if ($debug) _debug("cache_media: checking $src");
3c696512
AD
1248
1249 if (!file_exists($local_filename)) {
163b50b1
AD
1250 if ($debug) _debug("cache_media: downloading: $src to $local_filename");
1251
3c696512
AD
1252 $file_content = fetch_file_contents($src);
1253
6fd03996 1254 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
3c696512
AD
1255 file_put_contents($local_filename, $file_content);
1256 }
665495b9 1257 } else if (is_writable($local_filename)) {
4a27966e 1258 touch($local_filename);
3c696512 1259 }
3c696512
AD
1260 }
1261 }
3c696512
AD
1262 }
1263
e6c886bf 1264 static function expire_error_log($debug) {
e2261e17
AD
1265 if ($debug) _debug("Removing old error log entries...");
1266
0567016b
AD
1267 $pdo = Db::pdo();
1268
e2261e17 1269 if (DB_TYPE == "pgsql") {
0567016b 1270 $pdo->query("DELETE FROM ttrss_error_log
e2261e17
AD
1271 WHERE created_at < NOW() - INTERVAL '7 days'");
1272 } else {
0567016b 1273 $pdo->query("DELETE FROM ttrss_error_log
e2261e17
AD
1274 WHERE created_at < DATE_SUB(NOW(), INTERVAL 7 DAY)");
1275 }
e2261e17
AD
1276 }
1277
e6c886bf 1278 static function expire_lock_files($debug) {
65465085 1279 //if ($debug) _debug("Removing old lock files...");
2a91b6ff
AD
1280
1281 $num_deleted = 0;
1282
1283 if (is_writable(LOCK_DIRECTORY)) {
1284 $files = glob(LOCK_DIRECTORY . "/*.lock");
1285
1286 if ($files) {
1287 foreach ($files as $file) {
11344971 1288 if (!file_is_locked(basename($file)) && time() - filemtime($file) > 86400*2) {
2a91b6ff
AD
1289 unlink($file);
1290 ++$num_deleted;
1291 }
1292 }
1293 }
1294 }
1295
65465085 1296 if ($debug) _debug("Removed $num_deleted old lock files.");
2a91b6ff
AD
1297 }
1298
e6c886bf 1299 static function expire_cached_files($debug) {
342e8a9e 1300 foreach (array("simplepie", "feeds", "images", "export", "upload") as $dir) {
3c696512 1301 $cache_dir = CACHE_DIR . "/$dir";
2c08214a 1302
65465085 1303// if ($debug) _debug("Expiring $cache_dir");
2c08214a 1304
3c696512
AD
1305 $num_deleted = 0;
1306
1307 if (is_writable($cache_dir)) {
1308 $files = glob("$cache_dir/*");
1309
2a91b6ff 1310 if ($files) {
2ab20c31 1311 foreach ($files as $file) {
6fd03996 1312 if (time() - filemtime($file) > 86400*CACHE_MAX_DAYS) {
2ab20c31 1313 unlink($file);
3c696512 1314
2ab20c31
AD
1315 ++$num_deleted;
1316 }
3c696512
AD
1317 }
1318 }
2a91b6ff 1319 }
3c696512 1320
65465085 1321 if ($debug) _debug("$cache_dir: removed $num_deleted files.");
3c696512
AD
1322 }
1323 }
2c08214a 1324
a3e0bdcf 1325 /**
e6c886bf
AD
1326 * Source: http://www.php.net/manual/en/function.parse-url.php#104527
1327 * Returns the url query as associative array
1328 *
1329 * @param string query
1330 * @return array params
1331 */
1332 static function convertUrlQuery($query) {
a3e0bdcf
AD
1333 $queryParts = explode('&', $query);
1334
1335 $params = array();
1336
1337 foreach ($queryParts as $param) {
1338 $item = explode('=', $param);
1339 $params[$item[0]] = $item[1];
1340 }
1341
1342 return $params;
1343 }
92c14e9d 1344
e6c886bf 1345 static function get_article_filters($filters, $title, $content, $link, $author, $tags, &$matched_rules = false) {
92c14e9d
AD
1346 $matches = array();
1347
1348 foreach ($filters as $filter) {
1349 $match_any_rule = $filter["match_any_rule"];
a3a896a1 1350 $inverse = $filter["inverse"];
92c14e9d
AD
1351 $filter_match = false;
1352
1353 foreach ($filter["rules"] as $rule) {
1354 $match = false;
ffa1bd7b 1355 $reg_exp = str_replace('/', '\/', $rule["reg_exp"]);
a3a896a1 1356 $rule_inverse = $rule["inverse"];
92c14e9d
AD
1357
1358 if (!$reg_exp)
1359 continue;
1360
1361 switch ($rule["type"]) {
e6c886bf
AD
1362 case "title":
1363 $match = @preg_match("/$reg_exp/iu", $title);
1364 break;
1365 case "content":
1366 // we don't need to deal with multiline regexps
1367 $content = preg_replace("/[\r\n\t]/", "", $content);
d03ae73e 1368
e6c886bf
AD
1369 $match = @preg_match("/$reg_exp/iu", $content);
1370 break;
1371 case "both":
1372 // we don't need to deal with multiline regexps
1373 $content = preg_replace("/[\r\n\t]/", "", $content);
d03ae73e 1374
e6c886bf
AD
1375 $match = (@preg_match("/$reg_exp/iu", $title) || @preg_match("/$reg_exp/iu", $content));
1376 break;
1377 case "link":
1378 $match = @preg_match("/$reg_exp/iu", $link);
1379 break;
1380 case "author":
1381 $match = @preg_match("/$reg_exp/iu", $author);
1382 break;
1383 case "tag":
1384 foreach ($tags as $tag) {
1385 if (@preg_match("/$reg_exp/iu", $tag)) {
1386 $match = true;
1387 break;
1388 }
7b80b5e1 1389 }
e6c886bf 1390 break;
92c14e9d
AD
1391 }
1392
a3a896a1
AD
1393 if ($rule_inverse) $match = !$match;
1394
92c14e9d
AD
1395 if ($match_any_rule) {
1396 if ($match) {
1397 $filter_match = true;
1398 break;
1399 }
1400 } else {
1401 $filter_match = $match;
1402 if (!$match) {
1403 break;
1404 }
1405 }
1406 }
1407
a3a896a1
AD
1408 if ($inverse) $filter_match = !$filter_match;
1409
92c14e9d 1410 if ($filter_match) {
557d86fe
AD
1411 if (is_array($matched_rules)) array_push($matched_rules, $rule);
1412
92c14e9d
AD
1413 foreach ($filter["actions"] AS $action) {
1414 array_push($matches, $action);
5e736e45
AD
1415
1416 // if Stop action encountered, perform no further processing
fd3e5e8d 1417 if (isset($action["type"]) && $action["type"] == "stop") return $matches;
92c14e9d
AD
1418 }
1419 }
1420 }
1421
1422 return $matches;
1423 }
1424
e6c886bf 1425 static function find_article_filter($filters, $filter_name) {
92c14e9d
AD
1426 foreach ($filters as $f) {
1427 if ($f["type"] == $filter_name) {
1428 return $f;
1429 };
1430 }
1431 return false;
1432 }
1433
e6c886bf 1434 static function find_article_filters($filters, $filter_name) {
92c14e9d
AD
1435 $results = array();
1436
1437 foreach ($filters as $f) {
1438 if ($f["type"] == $filter_name) {
1439 array_push($results, $f);
1440 };
1441 }
1442 return $results;
1443 }
1444
e6c886bf 1445 static function calculate_article_score($filters) {
92c14e9d
AD
1446 $score = 0;
1447
1448 foreach ($filters as $f) {
1449 if ($f["type"] == "score") {
1450 $score += $f["param"];
1451 };
1452 }
1453 return $score;
1454 }
1455
e6c886bf 1456 static function labels_contains_caption($labels, $caption) {
b24504b1
AD
1457 foreach ($labels as $label) {
1458 if ($label[1] == $caption) {
1459 return true;
1460 }
1461 }
1462
1463 return false;
1464 }
1465
e6c886bf 1466 static function assign_article_to_label_filters($id, $filters, $owner_uid, $article_labels) {
92c14e9d
AD
1467 foreach ($filters as $f) {
1468 if ($f["type"] == "label") {
e6c886bf 1469 if (!RSSUtils::labels_contains_caption($article_labels, $f["param"])) {
7c9b5a3f 1470 Labels::add_article($id, $f["param"], $owner_uid);
b24504b1
AD
1471 }
1472 }
92c14e9d
AD
1473 }
1474 }
87764a50 1475
e6c886bf 1476 static function make_guid_from_title($title) {
87d7e850
AD
1477 return preg_replace("/[ \"\',.:;]/", "-",
1478 mb_strtolower(strip_tags($title), 'utf-8'));
1479 }
1480
e6c886bf 1481 static function cleanup_counters_cache($debug) {
0567016b
AD
1482 $pdo = Db::pdo();
1483
1484 $res = $pdo->query("DELETE FROM ttrss_counters_cache
168cf351
AD
1485 WHERE feed_id > 0 AND
1486 (SELECT COUNT(id) FROM ttrss_feeds WHERE
1487 id = feed_id AND
1488 ttrss_counters_cache.owner_uid = ttrss_feeds.owner_uid) = 0");
168cf351 1489
0567016b
AD
1490 $frows = $res->rowCount();
1491
1492 $res = $pdo->query("DELETE FROM ttrss_cat_counters_cache
168cf351
AD
1493 WHERE feed_id > 0 AND
1494 (SELECT COUNT(id) FROM ttrss_feed_categories WHERE
1495 id = feed_id AND
1496 ttrss_cat_counters_cache.owner_uid = ttrss_feed_categories.owner_uid) = 0");
0567016b
AD
1497
1498 $crows = $res->rowCount();
168cf351 1499
7b55001e 1500 if ($debug) _debug("Removed $frows (feeds) $crows (cats) orphaned counter cache entries.");
168cf351
AD
1501 }
1502
e6c886bf 1503 static function housekeeping_user($owner_uid) {
5cbd1fe8
AD
1504 $tmph = new PluginHost();
1505
1506 load_user_plugins($owner_uid, $tmph);
1507
1508 $tmph->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
1509 }
1510
e6c886bf
AD
1511 static function housekeeping_common($debug) {
1512 RSSUtils::expire_cached_files($debug);
1513 RSSUtils::expire_lock_files($debug);
1514 RSSUtils::expire_error_log($debug);
e2cf81e2 1515
e6c886bf 1516 $count = RSSUtils::update_feedbrowser_cache();
e2cf81e2
AD
1517 _debug("Feedbrowser updated, $count feeds processed.");
1518
a230bf88 1519 Article::purge_orphans( true);
e6c886bf 1520 RSSUtils::cleanup_counters_cache($debug);
e2cf81e2 1521
9b736a20
AD
1522 //$rc = cleanup_tags( 14, 50000);
1523 //_debug("Cleaned $rc cached tags.");
8e470220 1524
00f22824 1525 PluginHost::getInstance()->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
e2cf81e2 1526 }
ea79a0e0 1527
e6c886bf
AD
1528 static function check_feed_favicon($site_url, $feed) {
1529 # print "FAVICON [$site_url]: $favicon_url\n";
a230bf88
AD
1530
1531 $icon_file = ICONS_DIR . "/$feed.ico";
1532
1533 if (!file_exists($icon_file)) {
1534 $favicon_url = get_favicon_url($site_url);
1535
1536 if ($favicon_url) {
1537 // Limiting to "image" type misses those served with text/plain
1538 $contents = fetch_file_contents($favicon_url); // , "image");
1539
1540 if ($contents) {
1541 // Crude image type matching.
1542 // Patterns gleaned from the file(1) source code.
1543 if (preg_match('/^\x00\x00\x01\x00/', $contents)) {
1544 // 0 string \000\000\001\000 MS Windows icon resource
1545 //error_log("check_feed_favicon: favicon_url=$favicon_url isa MS Windows icon resource");
1546 }
1547 elseif (preg_match('/^GIF8/', $contents)) {
1548 // 0 string GIF8 GIF image data
1549 //error_log("check_feed_favicon: favicon_url=$favicon_url isa GIF image");
1550 }
1551 elseif (preg_match('/^\x89PNG\x0d\x0a\x1a\x0a/', $contents)) {
1552 // 0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data
1553 //error_log("check_feed_favicon: favicon_url=$favicon_url isa PNG image");
1554 }
1555 elseif (preg_match('/^\xff\xd8/', $contents)) {
1556 // 0 beshort 0xffd8 JPEG image data
1557 //error_log("check_feed_favicon: favicon_url=$favicon_url isa JPG image");
1558 }
f9ad33c2
GG
1559 elseif (preg_match('/^BM/', $contents)) {
1560 // 0 string BM PC bitmap (OS2, Windows BMP files)
1561 //error_log("check_feed_favicon, favicon_url=$favicon_url isa BMP image");
1562 }
a230bf88
AD
1563 else {
1564 //error_log("check_feed_favicon: favicon_url=$favicon_url isa UNKNOWN type");
1565 $contents = "";
1566 }
1567 }
1568
1569 if ($contents) {
1570 $fp = @fopen($icon_file, "w");
1571
1572 if ($fp) {
1573 fwrite($fp, $contents);
1574 fclose($fp);
1575 chmod($icon_file, 0644);
1576 }
1577 }
1578 }
1579 return $icon_file;
1580 }
1581 }
e6c886bf
AD
1582
1583
1584
bec5ba93 1585}