]> git.wh0rd.org - tt-rss.git/blame - classes/rssutils.php
remove cache/simplepie
[tt-rss.git] / classes / rssutils.php
CommitLineData
2c08214a 1<?php
e6c886bf
AD
2class RSSUtils {
3 static function calculate_article_hash($article, $pluginhost) {
af244f92
AD
4 $tmp = "";
5
6 foreach ($article as $k => $v) {
7 if ($k != "feed" && isset($v)) {
24e6ff5d
AD
8 $x = strip_tags(is_array($v) ? implode(",", $v) : $v);
9
10 //_debug("$k:" . sha1($x) . ":" . htmlspecialchars($x), true);
11
12 $tmp .= sha1("$k:" . sha1($x));
af244f92
AD
13 }
14 }
15
eb16bd9f 16 return sha1(implode(",", $pluginhost->get_plugin_names()) . $tmp);
b1840673
AD
17 }
18
102a0135
AD
19 // Strips utf8mb4 characters (i.e. emoji) for mysql
20 static function strip_utf8mb4($str) {
7f4a4045
AD
21 return preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $str);
22 }
102a0135 23
e6c886bf 24 static function update_feedbrowser_cache() {
79178062 25
afcb105f
AD
26 $pdo = Db::pdo();
27
28 $sth = $pdo->query("SELECT feed_url, site_url, title, COUNT(id) AS subscribers
45378752
LD
29 FROM ttrss_feeds WHERE feed_url NOT IN (SELECT feed_url FROM ttrss_feeds
30 WHERE private IS true OR auth_login != '' OR auth_pass != '' OR feed_url LIKE '%:%@%/%')
79178062
AD
31 GROUP BY feed_url, site_url, title ORDER BY subscribers DESC LIMIT 1000");
32
afcb105f 33 $pdo->beginTransaction();
79178062 34
afcb105f 35 $pdo->query("DELETE FROM ttrss_feedbrowser_cache");
79178062
AD
36
37 $count = 0;
38
afcb105f
AD
39 while ($line = $sth->fetch()) {
40
0567016b
AD
41 $subscribers = $line["subscribers"];
42 $feed_url = $line["feed_url"];
43 $title = $line["title"];
44 $site_url = $line["site_url"];
79178062 45
afcb105f
AD
46 $tmph = $pdo->prepare("SELECT subscribers FROM
47 ttrss_feedbrowser_cache WHERE feed_url = ?");
48 $tmph->execute([$feed_url]);
49
50 if (!$tmph->fetch()) {
79178062 51
afcb105f 52 $tmph = $pdo->prepare("INSERT INTO ttrss_feedbrowser_cache
2c940c48
AD
53 (feed_url, site_url, title, subscribers)
54 VALUES
afcb105f 55 (?, ?, ?, ?)");
79178062 56
afcb105f 57 $tmph->execute([$feed_url, $site_url, $title, $subscribers]);
79178062
AD
58
59 ++$count;
60
61 }
62
63 }
64
afcb105f 65 $pdo->commit();
79178062
AD
66
67 return $count;
68
69 }
70
e6c886bf 71 static function update_daemon_common($limit = DAEMON_FEED_LIMIT, $debug = true) {
6322ac79 72 $schema_version = get_schema_version();
857efe49
AD
73
74 if ($schema_version != SCHEMA_VERSION) {
75 die("Schema version is wrong, please upgrade the database.\n");
76 }
77
afcb105f
AD
78 $pdo = Db::pdo();
79
09e8bdfd 80 if (!SINGLE_USER_MODE && DAEMON_UPDATE_LOGIN_LIMIT > 0) {
2c08214a
AD
81 if (DB_TYPE == "pgsql") {
82 $login_thresh_qpart = "AND ttrss_users.last_login >= NOW() - INTERVAL '".DAEMON_UPDATE_LOGIN_LIMIT." days'";
83 } else {
84 $login_thresh_qpart = "AND ttrss_users.last_login >= DATE_SUB(NOW(), INTERVAL ".DAEMON_UPDATE_LOGIN_LIMIT." DAY)";
85 }
86 } else {
87 $login_thresh_qpart = "";
88 }
89
2c08214a
AD
90 if (DB_TYPE == "pgsql") {
91 $update_limit_qpart = "AND ((
92 ttrss_feeds.update_interval = 0
ee0542ce 93 AND ttrss_user_prefs.value != '-1'
2c08214a
AD
94 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_user_prefs.value || ' minutes') AS INTERVAL)
95 ) OR (
96 ttrss_feeds.update_interval > 0
97 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_feeds.update_interval || ' minutes') AS INTERVAL)
f08426e3
AD
98 ) OR (ttrss_feeds.last_updated IS NULL
99 AND ttrss_user_prefs.value != '-1')
100 OR (last_updated = '1970-01-01 00:00:00'
101 AND ttrss_user_prefs.value != '-1'))";
2c08214a
AD
102 } else {
103 $update_limit_qpart = "AND ((
104 ttrss_feeds.update_interval = 0
ee0542ce 105 AND ttrss_user_prefs.value != '-1'
2c08214a
AD
106 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL CONVERT(ttrss_user_prefs.value, SIGNED INTEGER) MINUTE)
107 ) OR (
108 ttrss_feeds.update_interval > 0
109 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL ttrss_feeds.update_interval MINUTE)
f08426e3
AD
110 ) OR (ttrss_feeds.last_updated IS NULL
111 AND ttrss_user_prefs.value != '-1')
112 OR (last_updated = '1970-01-01 00:00:00'
113 AND ttrss_user_prefs.value != '-1'))";
2c08214a
AD
114 }
115
116 // Test if feed is currently being updated by another process.
117 if (DB_TYPE == "pgsql") {
566417c4 118 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '10 minutes')";
2c08214a 119 } else {
566417c4 120 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 10 MINUTE))";
2c08214a
AD
121 }
122
93af11cb 123 $query_limit = $limit ? sprintf("LIMIT %d", $limit) : "";
2c08214a 124
98070db0
TK
125 // Update the least recently updated feeds first
126 $query_order = "ORDER BY last_updated";
127 if (DB_TYPE == "pgsql") $query_order .= " NULLS FIRST";
128
fce451a4 129 $query = "SELECT DISTINCT ttrss_feeds.feed_url, ttrss_feeds.last_updated
2c08214a
AD
130 FROM
131 ttrss_feeds, ttrss_users, ttrss_user_prefs
f4ae0f05 132 WHERE
2c08214a 133 ttrss_feeds.owner_uid = ttrss_users.id
f08426e3 134 AND ttrss_user_prefs.profile IS NULL
2c08214a
AD
135 AND ttrss_users.id = ttrss_user_prefs.owner_uid
136 AND ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL'
137 $login_thresh_qpart $update_limit_qpart
1c4421fc 138 $updstart_thresh_qpart
98070db0 139 $query_order $query_limit";
fce451a4 140
afcb105f 141 $res = $pdo->query($query);
2c08214a 142
2c08214a 143 $feeds_to_update = array();
afcb105f 144 while ($line = $res->fetch()) {
93af11cb 145 array_push($feeds_to_update, $line['feed_url']);
2c08214a
AD
146 }
147
afcb105f
AD
148 if ($debug) _debug(sprintf("Scheduled %d feeds to update...", count($feeds_to_update)));
149
93af11cb
AD
150 // Update last_update_started before actually starting the batch
151 // in order to minimize collision risk for parallel daemon tasks
152 if (count($feeds_to_update) > 0) {
afcb105f 153 $feeds_qmarks = arr_qmarks($feeds_to_update);
1c4421fc 154
afcb105f
AD
155 $tmph = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
156 WHERE feed_url IN ($feeds_qmarks)");
157 $tmph->execute($feeds_to_update);
2c08214a
AD
158 }
159
8292d05b 160 $nf = 0;
2d9c5684 161 $bstarted = microtime(true);
8292d05b 162
5cbd1fe8
AD
163 $batch_owners = array();
164
afcb105f
AD
165 // since we have the data cached, we can deal with other feeds with the same url
166 $usth = $pdo->prepare("SELECT DISTINCT ttrss_feeds.id,last_updated,ttrss_feeds.owner_uid
ee0542ce
AD
167 FROM ttrss_feeds, ttrss_users, ttrss_user_prefs WHERE
168 ttrss_user_prefs.owner_uid = ttrss_feeds.owner_uid AND
169 ttrss_users.id = ttrss_user_prefs.owner_uid AND
170 ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL' AND
f08426e3 171 ttrss_user_prefs.profile IS NULL AND
afcb105f 172 feed_url = ?
9e84bab4 173 $update_limit_qpart
1c4421fc 174 $login_thresh_qpart
5929a0c1 175 ORDER BY ttrss_feeds.id $query_limit");
1c4421fc 176
afcb105f
AD
177 foreach ($feeds_to_update as $feed) {
178 if($debug) _debug("Base feed: $feed");
179
180 $usth->execute([$feed]);
181 //update_rss_feed($line["id"], true);
182
183 if ($tline = $usth->fetch()) {
184 if ($debug) _debug(" => " . $tline["last_updated"] . ", " . $tline["id"] . " " . $tline["owner_uid"]);
f08426e3 185
afcb105f
AD
186 if (array_search($tline["owner_uid"], $batch_owners) === FALSE)
187 array_push($batch_owners, $tline["owner_uid"]);
5cbd1fe8 188
afcb105f 189 $fstarted = microtime(true);
62d0060a
AD
190
191 try {
192 RSSUtils::update_rss_feed($tline["id"], true, false);
193 } catch (PDOException $e) {
194 Logger::get()->log_error(E_USER_NOTICE, $e->getMessage(), $e->getFile(), $e->getLine(), $e->getTraceAsString());
2c940c48
AD
195
196 try {
197 $pdo->rollback();
198 } catch (PDOException $e) {
199 // it doesn't matter if there wasn't actually anything to rollback, PDO Exception can be
200 // thrown outside of an active transaction during feed update
201 }
62d0060a 202 }
afcb105f 203 _debug_suppress(false);
2d9c5684 204
afcb105f 205 _debug(sprintf(" %.4f (sec)", microtime(true) - $fstarted));
2d9c5684 206
afcb105f 207 ++$nf;
1c4421fc 208 }
2c08214a
AD
209 }
210
2d9c5684
AD
211 if ($nf > 0) {
212 _debug(sprintf("Processed %d feeds in %.4f (sec), %.4f (sec/feed avg)", $nf,
213 microtime(true) - $bstarted, (microtime(true) - $bstarted) / $nf));
214 }
215
5cbd1fe8
AD
216 foreach ($batch_owners as $owner_uid) {
217 _debug("Running housekeeping tasks for user $owner_uid...");
218
e6c886bf 219 RSSUtils::housekeeping_user($owner_uid);
5cbd1fe8
AD
220 }
221
2c08214a 222 // Send feed digests by email if needed.
c2f0f24e 223 Digest::send_headlines_digests($debug);
2c08214a 224
8292d05b 225 return $nf;
7b55001e 226 }
2c08214a 227
6022776d 228 // this is used when subscribing
e6c886bf 229 static function set_basic_feed_info($feed) {
6022776d 230
0567016b 231 $pdo = Db::pdo();
6022776d 232
069aea59 233 $sth = $pdo->prepare("SELECT owner_uid,feed_url,auth_pass,auth_login
0567016b
AD
234 FROM ttrss_feeds WHERE id = ?");
235 $sth->execute([$feed]);
6022776d 236
0567016b 237 if ($row = $sth->fetch()) {
bec5ba93 238
0567016b 239 $owner_uid = $row["owner_uid"];
0567016b
AD
240 $auth_login = $row["auth_login"];
241 $auth_pass = $row["auth_pass"];
0567016b 242 $fetch_url = $row["feed_url"];
6022776d 243
0567016b
AD
244 $pluginhost = new PluginHost();
245 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
6022776d 246
0567016b
AD
247 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
248 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
249 $pluginhost->load_data();
250
251 $basic_info = array();
252 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_BASIC_INFO) as $plugin) {
253 $basic_info = $plugin->hook_feed_basic_info($basic_info, $fetch_url, $owner_uid, $feed, $auth_login, $auth_pass);
254 }
6022776d 255
0567016b
AD
256 if (!$basic_info) {
257 $feed_data = fetch_file_contents($fetch_url, false,
258 $auth_login, $auth_pass, false,
259 FEED_FETCH_TIMEOUT,
260 0);
bec5ba93 261
0567016b 262 global $fetch_curl_used;
bec5ba93 263
0567016b
AD
264 if (!$fetch_curl_used) {
265 $tmp = @gzdecode($feed_data);
bec5ba93 266
0567016b
AD
267 if ($tmp) $feed_data = $tmp;
268 }
6022776d 269
0567016b 270 $feed_data = trim($feed_data);
6022776d 271
0567016b
AD
272 $rss = new FeedParser($feed_data);
273 $rss->init();
6022776d 274
0567016b
AD
275 if (!$rss->error()) {
276 $basic_info = array(
277 'title' => mb_substr($rss->get_title(), 0, 199),
278 'site_url' => mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245)
279 );
280 }
3476690c 281 }
6022776d 282
0567016b
AD
283 if ($basic_info && is_array($basic_info)) {
284 $sth = $pdo->prepare("SELECT title, site_url FROM ttrss_feeds WHERE id = ?");
285 $sth->execute([$feed]);
6022776d 286
0567016b 287 if ($row = $sth->fetch()) {
6022776d 288
0567016b
AD
289 $registered_title = $row["title"];
290 $orig_site_url = $row["site_url"];
291
292 if ($basic_info['title'] && (!$registered_title || $registered_title == "[Unknown]")) {
293
294 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
295 title = ? WHERE id = ?");
296 $sth->execute([$basic_info['title'], $feed]);
297 }
6022776d 298
0567016b
AD
299 if ($basic_info['site_url'] && $orig_site_url != $basic_info['site_url']) {
300 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
301 site_url = ? WHERE id = ?");
302 $sth->execute([$basic_info['site_url'], $feed]);
303 }
304
305 }
6022776d
AD
306 }
307 }
308 }
309
7b55001e 310 /**
e6c886bf
AD
311 * @SuppressWarnings(PHPMD.UnusedFormalParameter)
312 */
313 static function update_rss_feed($feed, $no_cache = false) {
2c08214a 314
e6532439 315 $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || clean($_REQUEST['xdebug']);
2c08214a 316
4f71d743 317 _debug_suppress(!$debug_enabled);
68cccafc 318 _debug("start", $debug_enabled);
2c08214a 319
0567016b
AD
320 $pdo = Db::pdo();
321
322 $sth = $pdo->prepare("SELECT title FROM ttrss_feeds WHERE id = ?");
323 $sth->execute([$feed]);
bfe1eb4e 324
0567016b 325 if (!$row = $sth->fetch()) {
bfe1eb4e
AD
326 _debug("feed $feed NOT FOUND/SKIPPED", $debug_enabled);
327 user_error("Attempt to update unknown/invalid feed $feed", E_USER_WARNING);
328 return false;
329 }
330
0567016b 331 $title = $row["title"];
6bb96beb
AD
332
333 // feed was batch-subscribed or something, we need to get basic info
334 // this is not optimal currently as it fetches stuff separately TODO: optimize
335 if ($title == "[Unknown]") {
336 _debug("setting basic feed info for $feed...");
e6c886bf 337 RSSUtils::set_basic_feed_info($feed);
6bb96beb
AD
338 }
339
0567016b 340 $sth = $pdo->prepare("SELECT id,update_interval,auth_login,
5ba1ddd4 341 feed_url,auth_pass,cache_images,
5321e775 342 mark_unread_on_update, owner_uid,
2c940c48
AD
343 auth_pass_encrypted, feed_language,
344 last_modified,
345 ".SUBSTRING_FOR_DATE."(last_unconditional, 1, 19) AS last_unconditional
0567016b
AD
346 FROM ttrss_feeds WHERE id = ?");
347 $sth->execute([$feed]);
2c940c48 348
0567016b 349 if ($row = $sth->fetch()) {
2c08214a 350
0567016b 351 $owner_uid = $row["owner_uid"];
187abfe7 352 $mark_unread_on_update = $row["mark_unread_on_update"];
2c08214a 353
0567016b
AD
354 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
355 WHERE id = ?");
356 $sth->execute([$feed]);
2c08214a 357
0567016b
AD
358 $auth_login = $row["auth_login"];
359 $auth_pass = $row["auth_pass"];
0567016b
AD
360 $stored_last_modified = $row["last_modified"];
361 $last_unconditional = $row["last_unconditional"];
187abfe7 362 $cache_images = $row["cache_images"];
0567016b 363 $fetch_url = $row["feed_url"];
069aea59 364
0567016b
AD
365 $feed_language = mb_strtolower($row["feed_language"]);
366 if (!$feed_language) $feed_language = 'english';
2c08214a 367
0567016b
AD
368 } else {
369 return false;
370 }
2c08214a 371
f074ffe9 372 $date_feed_processed = date('Y-m-d H:i');
2c08214a 373
342e8a9e 374 $cache_filename = CACHE_DIR . "/feeds/" . sha1($fetch_url) . ".xml";
f074ffe9 375
ee65bef4
AD
376 $pluginhost = new PluginHost();
377 $pluginhost->set_debug($debug_enabled);
378 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
379
380 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
381 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
382 $pluginhost->load_data();
383
7b55001e 384 $rss_hash = false;
4f9cbdff 385
7b55001e
AD
386 $force_refetch = isset($_REQUEST["force_refetch"]);
387 $feed_data = "";
687a4f59 388
7b55001e
AD
389 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FETCH_FEED) as $plugin) {
390 $feed_data = $plugin->hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed, 0, $auth_login, $auth_pass);
391 }
2c08214a 392
7b55001e
AD
393 // try cache
394 if (!$feed_data &&
395 file_exists($cache_filename) &&
396 is_readable($cache_filename) &&
397 !$auth_login && !$auth_pass &&
398 filemtime($cache_filename) > time() - 30) {
be574731 399
7b55001e 400 _debug("using local cache [$cache_filename].", $debug_enabled);
52637d3b 401
7b55001e 402 @$feed_data = file_get_contents($cache_filename);
f074ffe9 403
7b55001e
AD
404 if ($feed_data) {
405 $rss_hash = sha1($feed_data);
88edaa93 406 }
ee65bef4 407
7b55001e
AD
408 } else {
409 _debug("local cache will not be used for this feed", $debug_enabled);
410 }
312742db 411
153cb6d3
AD
412 global $fetch_last_modified;
413
7b55001e
AD
414 // fetch feed from source
415 if (!$feed_data) {
e50c8eaa 416 _debug("last unconditional update request: $last_unconditional");
312742db 417
7b55001e
AD
418 if (ini_get("open_basedir") && function_exists("curl_init")) {
419 _debug("not using CURL due to open_basedir restrictions");
420 }
3f6f0857 421
7f4a4045
AD
422 if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) {
423 _debug("maximum allowed interval for conditional requests exceeded, forcing refetch");
e50c8eaa 424
7f4a4045
AD
425 $force_refetch = true;
426 } else {
427 _debug("stored last modified for conditional request: $stored_last_modified", $debug_enabled);
428 }
153cb6d3 429
7f4a4045 430 _debug("fetching [$fetch_url] (force_refetch: $force_refetch)...", $debug_enabled);
153cb6d3
AD
431
432 $feed_data = fetch_file_contents([
433 "url" => $fetch_url,
434 "login" => $auth_login,
435 "pass" => $auth_pass,
436 "timeout" => $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT,
437 "last_modified" => $force_refetch ? "" : $stored_last_modified
438 ]);
3f6f0857 439
7b55001e 440 global $fetch_curl_used;
3f6f0857 441
7b55001e
AD
442 if (!$fetch_curl_used) {
443 $tmp = @gzdecode($feed_data);
1367bc3f 444
7b55001e
AD
445 if ($tmp) $feed_data = $tmp;
446 }
017401dd 447
7b55001e 448 $feed_data = trim($feed_data);
fd687300 449
7b55001e 450 _debug("fetch done.", $debug_enabled);
9d930af9 451 _debug("source last modified: " . $fetch_last_modified, $debug_enabled);
153cb6d3
AD
452
453 if ($feed_data && $fetch_last_modified != $stored_last_modified) {
0567016b
AD
454 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_modified = ? WHERE id = ?");
455 $sth->execute([substr($fetch_last_modified, 0, 245), $feed]);
153cb6d3 456 }
95beaa14 457
7b55001e 458 // cache vanilla feed data for re-use
342e8a9e 459 if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/feeds")) {
7b55001e
AD
460 $new_rss_hash = sha1($feed_data);
461
462 if ($new_rss_hash != $rss_hash) {
463 _debug("saving $cache_filename", $debug_enabled);
464 @file_put_contents($cache_filename, $feed_data);
95beaa14 465 }
4f9cbdff 466 }
7b55001e 467 }
017401dd 468
7b55001e
AD
469 if (!$feed_data) {
470 global $fetch_last_error;
471 global $fetch_last_error_code;
f074ffe9 472
7b55001e 473 _debug("unable to fetch: $fetch_last_error [$fetch_last_error_code]", $debug_enabled);
f074ffe9 474
7b55001e
AD
475 // If-Modified-Since
476 if ($fetch_last_error_code != 304) {
0567016b 477 $error_message = $fetch_last_error;
7b55001e
AD
478 } else {
479 _debug("source claims data not modified, nothing to do.", $debug_enabled);
0567016b 480 $error_message = "";
7b55001e 481 }
4f9cbdff 482
0567016b
AD
483 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
484 last_updated = NOW() WHERE id = ?");
485 $sth->execute([$error_message, $feed]);
4f9cbdff 486
7b55001e 487 return;
f074ffe9
AD
488 }
489
1ffe3391 490 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_FETCHED) as $plugin) {
6791af0c 491 $feed_data = $plugin->hook_feed_fetched($feed_data, $fetch_url, $owner_uid, $feed);
017401dd
AD
492 }
493
07d3431e
AD
494 $rss = new FeedParser($feed_data);
495 $rss->init();
2c08214a 496
19b3992b 497 if (!$rss->error()) {
2c08214a 498
d2a421e3 499 // We use local pluginhost here because we need to load different per-user feed plugins
1ffe3391 500 $pluginhost->run_hooks(PluginHost::HOOK_FEED_PARSED, "hook_feed_parsed", $rss);
4412b877 501
df659891 502 _debug("language: $feed_language", $debug_enabled);
68cccafc 503 _debug("processing feed data...", $debug_enabled);
2c08214a 504
382268c6
AD
505 if (DB_TYPE == "pgsql") {
506 $favicon_interval_qpart = "favicon_last_checked < NOW() - INTERVAL '12 hour'";
507 } else {
508 $favicon_interval_qpart = "favicon_last_checked < DATE_SUB(NOW(), INTERVAL 12 HOUR)";
509 }
510
0567016b 511 $sth = $pdo->prepare("SELECT owner_uid,favicon_avg_color,
382268c6
AD
512 (favicon_last_checked IS NULL OR $favicon_interval_qpart) AS
513 favicon_needs_check
0567016b
AD
514 FROM ttrss_feeds WHERE id = ?");
515 $sth->execute([$feed]);
2c08214a 516
0567016b 517 if ($row = $sth->fetch()) {
187abfe7 518 $favicon_needs_check = $row["favicon_needs_check"];
0567016b
AD
519 $favicon_avg_color = $row["favicon_avg_color"];
520 $owner_uid = $row["owner_uid"];
521 } else {
522 return false;
523 }
2c08214a 524
0567016b 525 $site_url = mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245);
2c08214a 526
cd07592c
AD
527 _debug("site_url: $site_url", $debug_enabled);
528 _debug("feed_title: " . $rss->get_title(), $debug_enabled);
529
687a4f59 530 if ($favicon_needs_check || $force_refetch) {
36490f11
AD
531
532 /* terrible hack: if we crash on floicon shit here, we won't check
560cbd8c 533 * the icon avgcolor again (unless the icon got updated) */
36490f11 534
560cbd8c
AD
535 $favicon_file = ICONS_DIR . "/$feed.ico";
536 $favicon_modified = @filemtime($favicon_file);
537
68cccafc 538 _debug("checking favicon...", $debug_enabled);
687a4f59 539
e6c886bf 540 RSSUtils::check_feed_favicon($site_url, $feed);
560cbd8c
AD
541 $favicon_modified_new = @filemtime($favicon_file);
542
543 if ($favicon_modified_new > $favicon_modified)
544 $favicon_avg_color = '';
687a4f59 545
0567016b 546 $favicon_colorstring = "";
6ee0d4b0 547 if (file_exists($favicon_file) && function_exists("imagecreatefromstring") && $favicon_avg_color == '') {
e6c886bf 548 require_once "colors.php";
687a4f59 549
0567016b
AD
550 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_avg_color = 'fail' WHERE
551 id = ?");
552 $sth->execute([$feed]);
aafd55ba 553
0567016b
AD
554 $favicon_color = calculate_avg_color($favicon_file);
555
556 $favicon_colorstring = ",favicon_avg_color = " . $pdo->quote($favicon_color);
63c323f7 557
36490f11 558 } else if ($favicon_avg_color == 'fail') {
84ceb6bd 559 _debug("floicon failed on this file, not trying to recalculate avg color", $debug_enabled);
6ac722d5 560 }
687a4f59 561
0567016b
AD
562 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_last_checked = NOW()
563 $favicon_colorstring WHERE id = ?");
564 $sth->execute([$feed]);
f2798eb6 565 }
2c08214a 566
68cccafc 567 _debug("loading filters & labels...", $debug_enabled);
2c08214a 568
a42c55f0 569 $filters = load_filters($feed, $owner_uid);
2c08214a 570
02f3992a 571 if ($debug_enabled) {
7f4a4045
AD
572 print_r($filters);
573 }
02f3992a 574
68cccafc 575 _debug("" . count($filters) . " filters loaded.", $debug_enabled);
2c08214a 576
19b3992b 577 $items = $rss->get_items();
2c08214a 578
19b3992b 579 if (!is_array($items)) {
68cccafc 580 _debug("no articles found.", $debug_enabled);
2c08214a 581
0567016b
AD
582 $sth = $pdo->prepare("UPDATE ttrss_feeds
583 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
584 $sth->execute([$feed]);
2c08214a 585
0567016b 586 return true; // no articles
2c08214a
AD
587 }
588
68cccafc 589 _debug("processing articles...", $debug_enabled);
2c08214a 590
6c9f3d4a
AD
591 $tstart = time();
592
19b3992b 593 foreach ($items as $item) {
0500e14c
AD
594 $pdo->beginTransaction();
595
e6532439 596 if (clean($_REQUEST['xdebug']) == 3) {
2c08214a
AD
597 print_r($item);
598 }
599
6c9f3d4a
AD
600 if (ini_get("max_execution_time") > 0 && time() - $tstart >= ini_get("max_execution_time") * 0.7) {
601 _debug("looks like there's too many articles to process at once, breaking out", $debug_enabled);
0500e14c 602 $pdo->commit();
6c9f3d4a
AD
603 break;
604 }
605
0567016b
AD
606 $entry_guid = strip_tags($item->get_id());
607 if (!$entry_guid) $entry_guid = strip_tags($item->get_link());
e6c886bf 608 if (!$entry_guid) $entry_guid = RSSUtils::make_guid_from_title($item->get_title());
0500e14c
AD
609
610 if (!$entry_guid) {
611 $pdo->commit();
612 continue;
613 }
2c08214a 614
3a4c8973
AD
615 $entry_guid = "$owner_uid,$entry_guid";
616
0567016b 617 $entry_guid_hashed = 'SHA1:' . sha1($entry_guid);
5e3d5480 618
68cccafc 619 _debug("guid $entry_guid / $entry_guid_hashed", $debug_enabled);
5e3d5480 620
0567016b 621 $entry_timestamp = strip_tags($item->get_date());
04d2f9c8
AD
622
623 _debug("orig date: " . $item->get_date(), $debug_enabled);
2c08214a 624
30123fe6 625 if ($entry_timestamp == -1 || !$entry_timestamp || $entry_timestamp > time()) {
2c08214a 626 $entry_timestamp = time();
2c08214a
AD
627 }
628
629 $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
630
68cccafc 631 _debug("date $entry_timestamp [$entry_timestamp_fmt]", $debug_enabled);
2c08214a 632
0567016b 633 $entry_title = strip_tags($item->get_title());
1b35d30c 634
5d56d100 635 $entry_link = rewrite_relative_url($site_url, $item->get_link());
2c08214a 636
3bbaf902 637 $entry_language = mb_substr(trim($item->get_language()), 0, 2);
22a866ed 638
68cccafc
AD
639 _debug("title $entry_title", $debug_enabled);
640 _debug("link $entry_link", $debug_enabled);
22a866ed 641 _debug("language $entry_language", $debug_enabled);
2c08214a
AD
642
643 if (!$entry_title) $entry_title = date("Y-m-d H:i:s", $entry_timestamp);;
644
19b3992b
AD
645 $entry_content = $item->get_content();
646 if (!$entry_content) $entry_content = $item->get_description();
2c08214a 647
e6532439 648 if (clean($_REQUEST["xdebug"]) == 2) {
9ec10352 649 print "content: ";
0bc503ff 650 print htmlspecialchars($entry_content);
3c696512 651 print "\n";
2c08214a
AD
652 }
653
0567016b 654 $entry_comments = mb_substr(strip_tags($item->get_comments_url()), 0, 245);
12ff230b 655 $num_comments = (int) $item->get_comments_count();
2c08214a 656
0567016b
AD
657 $entry_author = strip_tags($item->get_author());
658 $entry_guid = mb_substr($entry_guid, 0, 245);
2c08214a 659
68cccafc
AD
660 _debug("author $entry_author", $debug_enabled);
661 _debug("num_comments: $num_comments", $debug_enabled);
ee78f81c 662 _debug("looking for tags...", $debug_enabled);
2c08214a
AD
663
664 // parse <category> entries into tags
665
666 $additional_tags = array();
667
19b3992b 668 $additional_tags_src = $item->get_categories();
2c08214a 669
19b3992b
AD
670 if (is_array($additional_tags_src)) {
671 foreach ($additional_tags_src as $tobj) {
cd07592c 672 array_push($additional_tags, $tobj);
2c08214a 673 }
19b3992b 674 }
2c08214a 675
fa6fbd36 676 $entry_tags = array_unique($additional_tags);
2c08214a 677
5edf4b73 678 for ($i = 0; $i < count($entry_tags); $i++) {
2c08214a
AD
679 $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8');
680
5edf4b73
AD
681 // we don't support numeric tags, let's prefix them
682 if (is_numeric($entry_tags[$i])) $entry_tags[$i] = 't:' . $entry_tags[$i];
683 }
684
ee78f81c
AD
685 _debug("tags found: " . join(",", $entry_tags), $debug_enabled);
686
68cccafc 687 _debug("done collecting data.", $debug_enabled);
2c08214a 688
0567016b
AD
689 $sth = $pdo->prepare("SELECT id, content_hash, lang FROM ttrss_entries
690 WHERE guid = ? OR guid = ?");
691 $sth->execute([$entry_guid, $entry_guid_hashed]);
b30abdad 692
0567016b
AD
693 if ($row = $sth->fetch()) {
694 $base_entry_id = $row["id"];
695 $entry_stored_hash = $row["content_hash"];
4a0da0e5 696 $article_labels = Article::get_article_labels($base_entry_id, $owner_uid);
a8ac7661 697
2ed0d6c4 698 $existing_tags = Article::get_article_tags($base_entry_id, $owner_uid);
a8ac7661 699 $entry_tags = array_unique(array_merge($entry_tags, $existing_tags));
b30abdad 700 } else {
b1840673
AD
701 $base_entry_id = false;
702 $entry_stored_hash = "";
a29fe121 703 $article_labels = array();
b30abdad
AD
704 }
705
455b1401 706 $article = array("owner_uid" => $owner_uid, // read only
b30abdad 707 "guid" => $entry_guid, // read only
59e83455 708 "guid_hashed" => $entry_guid_hashed, // read only
19b3992b
AD
709 "title" => $entry_title,
710 "content" => $entry_content,
711 "link" => $entry_link,
a29fe121 712 "labels" => $article_labels, // current limitation: can add labels to article, can't remove them
19b3992b 713 "tags" => $entry_tags,
e02555c1 714 "author" => $entry_author,
c9299c28 715 "force_catchup" => false, // ugly hack for the time being
6de3a1be 716 "score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed
3318d324 717 "language" => $entry_language,
20d2195f 718 "num_comments" => $num_comments, // read only
f73e03e0
AD
719 "feed" => array("id" => $feed,
720 "fetch_url" => $fetch_url,
babfadbf
J
721 "site_url" => $site_url,
722 "cache_images" => $cache_images)
e6c886bf 723 );
cc85704f 724
b1840673 725 $entry_plugin_data = "";
e6c886bf 726 $entry_current_hash = RSSUtils::calculate_article_hash($article, $pluginhost);
b1840673
AD
727
728 _debug("article hash: $entry_current_hash [stored=$entry_stored_hash]", $debug_enabled);
729
522e8b35 730 if ($entry_current_hash == $entry_stored_hash && !isset($_REQUEST["force_rehash"])) {
b1840673
AD
731 _debug("stored article seems up to date [IID: $base_entry_id], updating timestamp only", $debug_enabled);
732
733 // we keep encountering the entry in feeds, so we need to
734 // update date_updated column so that we don't get horrible
735 // dupes when the entry gets purged and reinserted again e.g.
736 // in the case of SLOW SLOW OMG SLOW updating feeds
737
0567016b
AD
738 $sth = $pdo->prepare("UPDATE ttrss_entries SET date_updated = NOW()
739 WHERE id = ?");
740 $sth->execute([$base_entry_id]);
b1840673 741
0500e14c 742 $pdo->commit();
5bdcb8fd 743 continue;
b1840673
AD
744 }
745
746 _debug("hash differs, applying plugin filters:", $debug_enabled);
747
1ffe3391 748 foreach ($pluginhost->get_hooks(PluginHost::HOOK_ARTICLE_FILTER) as $plugin) {
b1840673
AD
749 _debug("... " . get_class($plugin), $debug_enabled);
750
751 $start = microtime(true);
19b3992b 752 $article = $plugin->hook_article_filter($article);
0084f0d1 753
b1840673
AD
754 _debug("=== " . sprintf("%.4f (sec)", microtime(true) - $start), $debug_enabled);
755
756 $entry_plugin_data .= mb_strtolower(get_class($plugin)) . ",";
cc85704f
AD
757 }
758
e6532439 759 if (clean($_REQUEST["xdebug"]) == 2) {
0bc503ff
AD
760 print "processed content: ";
761 print htmlspecialchars($article["content"]);
762 print "\n";
763 }
764
b1840673
AD
765 _debug("plugin data: $entry_plugin_data", $debug_enabled);
766
35c12dc4 767 // Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077
2b8afd49 768 if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") {
35c12dc4 769 foreach ($article as $k => $v) {
35c37354 770 // i guess we'll have to take the risk of 4byte unicode labels & tags here
dae16f72 771 if (is_string($article[$k])) {
102a0135 772 $article[$k] = RSSUtils::strip_utf8mb4($v);
35c37354 773 }
35c12dc4
AD
774 }
775 }
776
b8774453
AD
777 /* Collect article tags here so we could filter by them: */
778
557d86fe
AD
779 $matched_rules = array();
780
e6c886bf 781 $article_filters = RSSUtils::get_article_filters($filters, $article["title"],
7b55001e 782 $article["content"], $article["link"], $article["author"],
557d86fe 783 $article["tags"], $matched_rules);
b8774453
AD
784
785 if ($debug_enabled) {
557d86fe
AD
786 _debug("matched filter rules: ", $debug_enabled);
787
788 if (count($matched_rules) != 0) {
789 print_r($matched_rules);
790 }
791
792 _debug("filter actions: ", $debug_enabled);
793
b8774453
AD
794 if (count($article_filters) != 0) {
795 print_r($article_filters);
796 }
797 }
798
e6c886bf 799 $plugin_filter_names = RSSUtils::find_article_filters($article_filters, "plugin");
b8774453
AD
800 $plugin_filter_actions = $pluginhost->get_filter_actions();
801
802 if (count($plugin_filter_names) > 0) {
803 _debug("applying plugin filter actions...", $debug_enabled);
804
805 foreach ($plugin_filter_names as $pfn) {
806 list($pfclass,$pfaction) = explode(":", $pfn["param"]);
807
808 if (isset($plugin_filter_actions[$pfclass])) {
809 $plugin = $pluginhost->get_plugin($pfclass);
810
811 _debug("... $pfclass: $pfaction", $debug_enabled);
812
813 if ($plugin) {
814 $start = microtime(true);
815 $article = $plugin->hook_article_filter_action($article, $pfaction);
816
817 _debug("=== " . sprintf("%.4f (sec)", microtime(true) - $start), $debug_enabled);
818 } else {
819 _debug("??? $pfclass: plugin object not found.");
820 }
821 } else {
822 _debug("??? $pfclass: filter plugin not registered.");
823 }
824 }
825 }
826
19b3992b 827 $entry_tags = $article["tags"];
0567016b
AD
828 $entry_title = strip_tags($article["title"]);
829 $entry_author = mb_substr(strip_tags($article["author"]), 0, 245);
830 $entry_link = strip_tags($article["link"]);
f935d98e 831 $entry_content = $article["content"]; // escaped below
c9299c28 832 $entry_force_catchup = $article["force_catchup"];
a29fe121 833 $article_labels = $article["labels"];
6de3a1be 834 $entry_score_modifier = (int) $article["score_modifier"];
0567016b 835 $entry_language = $article["language"];
a29fe121
AD
836
837 if ($debug_enabled) {
838 _debug("article labels:", $debug_enabled);
557d86fe
AD
839
840 if (count($article_labels) != 0) {
841 print_r($article_labels);
842 }
a29fe121 843 }
c9299c28
AD
844
845 _debug("force catchup: $entry_force_catchup");
f935d98e 846
0a3fd79b 847 if ($cache_images && is_writable(CACHE_DIR . '/images'))
e6c886bf 848 RSSUtils::cache_media($entry_content, $site_url, $debug_enabled);
0a3fd79b 849
0567016b
AD
850 $csth = $pdo->prepare("SELECT id FROM ttrss_entries
851 WHERE guid = ? OR guid = ?");
852 $csth->execute([$entry_guid, $entry_guid_hashed]);
9e222305 853
0567016b 854 if (!$row = $csth->fetch()) {
2c08214a 855
07d3431e 856 _debug("base guid [$entry_guid or $entry_guid_hashed] not found, creating...", $debug_enabled);
2c08214a
AD
857
858 // base post entry does not exist, create it
859
0567016b 860 $usth = $pdo->prepare(
2c08214a 861 "INSERT INTO ttrss_entries
2c940c48 862 (title,
2c08214a
AD
863 guid,
864 link,
865 updated,
866 content,
867 content_hash,
868 no_orig_date,
869 date_updated,
870 date_entered,
871 comments,
872 num_comments,
b30abdad 873 plugin_data,
6b461797 874 lang,
2c08214a
AD
875 author)
876 VALUES
0567016b 877 (?, ?, ?, ?, ?, ?,
5ba1ddd4 878 false,
2c08214a 879 NOW(),
0567016b
AD
880 ?, ?, ?, ?, ?, ?)");
881
882 $usth->execute([$entry_title,
883 $entry_guid_hashed,
884 $entry_link,
885 $entry_timestamp_fmt,
93e70e36 886 "$entry_content",
0567016b
AD
887 $entry_current_hash,
888 $date_feed_processed,
889 $entry_comments,
187abfe7 890 (int)$num_comments,
0567016b 891 $entry_plugin_data,
93e70e36
AD
892 "$entry_language",
893 "$entry_author"]);
e8291805 894
2c08214a
AD
895 }
896
0567016b 897 $csth->execute([$entry_guid, $entry_guid_hashed]);
2c08214a
AD
898
899 $entry_ref_id = 0;
900 $entry_int_id = 0;
901
0567016b 902 if ($row = $csth->fetch()) {
2c08214a 903
68cccafc 904 _debug("base guid found, checking for user record", $debug_enabled);
2c08214a 905
0567016b 906 $ref_id = $row['id'];
2c08214a
AD
907 $entry_ref_id = $ref_id;
908
e6c886bf 909 if (RSSUtils::find_article_filter($article_filters, "filter")) {
0500e14c 910 $pdo->commit();
2c08214a
AD
911 continue;
912 }
913
e6c886bf 914 $score = RSSUtils::calculate_article_score($article_filters) + $entry_score_modifier;
2c08214a 915
6de3a1be 916 _debug("initial score: $score [including plugin modifier: $entry_score_modifier]", $debug_enabled);
2c08214a 917
4f186b1f
AD
918 // check for user post link to main table
919
0567016b
AD
920 $sth = $pdo->prepare("SELECT ref_id, int_id FROM ttrss_user_entries WHERE
921 ref_id = ? AND owner_uid = ?");
922 $sth->execute([$ref_id, $owner_uid]);
2c08214a
AD
923
924 // okay it doesn't exist - create user entry
0567016b
AD
925 if ($row = $sth->fetch()) {
926 $entry_ref_id = $row["ref_id"];
927 $entry_int_id = $row["int_id"];
2c08214a 928
0567016b
AD
929 _debug("user record FOUND: RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
930 } else {
2c940c48 931
68cccafc 932 _debug("user record not found, creating...", $debug_enabled);
2c08214a 933
e6c886bf 934 if ($score >= -500 && !RSSUtils::find_article_filter($article_filters, 'catchup') && !$entry_force_catchup) {
0567016b
AD
935 $unread = 1;
936 $last_read_qpart = null;
2c08214a 937 } else {
0567016b 938 $unread = 0;
d4c05d0b 939 $last_read_qpart = date("Y-m-d H:i"); // we can't use NOW() here because it gets quoted
2c08214a
AD
940 }
941
e6c886bf 942 if (RSSUtils::find_article_filter($article_filters, 'mark') || $score > 1000) {
0567016b 943 $marked = 1;
2c08214a 944 } else {
0567016b 945 $marked = 0;
2c08214a
AD
946 }
947
e6c886bf 948 if (RSSUtils::find_article_filter($article_filters, 'publish')) {
0567016b 949 $published = 1;
2c08214a 950 } else {
0567016b 951 $published = 0;
2c08214a
AD
952 }
953
26ad257d 954 $last_marked = ($marked == 1) ? 'NOW()' : 'NULL';
955 $last_published = ($published == 1) ? 'NOW()' : 'NULL';
7873d588 956
0567016b 957 $sth = $pdo->prepare(
2c08214a
AD
958 "INSERT INTO ttrss_user_entries
959 (ref_id, owner_uid, feed_id, unread, last_read, marked,
7873d588
AD
960 published, score, tag_cache, label_cache, uuid,
961 last_marked, last_published)
aa16334f 962 VALUES (?, ?, ?, ?, ?, ?, ?, ?, '', '', '', ".$last_marked.", ".$last_published.")");
2c08214a 963
0567016b 964 $sth->execute([$ref_id, $owner_uid, $feed, $unread, $last_read_qpart, $marked,
aa16334f 965 $published, $score]);
2c08214a 966
0567016b
AD
967 $sth = $pdo->prepare("SELECT int_id FROM ttrss_user_entries WHERE
968 ref_id = ? AND owner_uid = ? AND
969 feed_id = ? LIMIT 1");
2c08214a 970
0567016b
AD
971 $sth->execute([$ref_id, $owner_uid, $feed]);
972
973 if ($row = $sth->fetch())
974 $entry_int_id = $row['int_id'];
2c08214a
AD
975 }
976
0567016b 977 _debug("resulting RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
2c08214a 978
963c2264
AD
979 if (DB_TYPE == "pgsql")
980 $tsvector_qpart = "tsvector_combined = to_tsvector(:ts_lang, :ts_content),";
981 else
e854442e 982 $tsvector_qpart = "";
49a888ec 983
0567016b 984 $sth = $pdo->prepare("UPDATE ttrss_entries
49a888ec 985 SET title = :title,
e854442e 986 $tsvector_qpart
49a888ec
AD
987 content = :content,
988 content_hash = :content_hash,
989 updated = :updated,
66fe33e7 990 date_updated = NOW(),
49a888ec
AD
991 num_comments = :num_comments,
992 plugin_data = :plugin_data,
993 author = :author,
2c940c48 994 lang = :lang
49a888ec
AD
995 WHERE id = :id");
996
963c2264 997 $params = [":title" => $entry_title,
93e70e36 998 ":content" => "$entry_content",
49a888ec
AD
999 ":content_hash" => $entry_current_hash,
1000 ":updated" => $entry_timestamp_fmt,
1001 ":num_comments" => (int)$num_comments,
1002 ":plugin_data" => $entry_plugin_data,
93e70e36 1003 ":author" => "$entry_author",
49a888ec 1004 ":lang" => $entry_language,
963c2264
AD
1005 ":id" => $ref_id];
1006
1007 if (DB_TYPE == "pgsql") {
1008 $params[":ts_lang"] = $feed_language;
6e6c3a87 1009 $params[":ts_content"] = mb_substr(strip_tags($entry_title . " " . $entry_content), 0, 900000);
963c2264
AD
1010 }
1011
1012 $sth->execute($params);
b1840673 1013
59e83455 1014 // update aux data
0567016b
AD
1015 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1016 SET score = ? WHERE ref_id = ?");
1017 $sth->execute([$score, $ref_id]);
59e83455 1018
b1840673 1019 if ($mark_unread_on_update) {
24e6ff5d
AD
1020 _debug("article updated, marking unread as requested.", $debug_enabled);
1021
0567016b
AD
1022 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1023 SET last_read = null, unread = true WHERE ref_id = ?");
1024 $sth->execute([$ref_id]);
2c08214a
AD
1025 }
1026 }
1027
a29fe121
AD
1028 _debug("assigning labels [other]...", $debug_enabled);
1029
1030 foreach ($article_labels as $label) {
7c9b5a3f 1031 Labels::add_article($entry_ref_id, $label[1], $owner_uid);
a29fe121
AD
1032 }
1033
1034 _debug("assigning labels [filters]...", $debug_enabled);
2c08214a 1035
e6c886bf 1036 RSSUtils::assign_article_to_label_filters($entry_ref_id, $article_filters,
b24504b1 1037 $owner_uid, $article_labels);
2c08214a 1038
68cccafc 1039 _debug("looking for enclosures...", $debug_enabled);
2c08214a
AD
1040
1041 // enclosures
1042
1043 $enclosures = array();
1044
19b3992b 1045 $encs = $item->get_enclosures();
2c08214a 1046
19b3992b
AD
1047 if (is_array($encs)) {
1048 foreach ($encs as $e) {
1049 $e_item = array(
86e53429
AD
1050 rewrite_relative_url($site_url, $e->link),
1051 $e->type, $e->length, $e->title, $e->width, $e->height);
102a0135
AD
1052
1053 // Yet another episode of "mysql utf8_general_ci is gimped"
2b8afd49 1054 if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") {
7f4a4045
AD
1055 for ($i = 0; $i < count($e_item); $i++) {
1056 if (is_string($e_item[$i])) {
1057 $e_item[$i] = RSSUtils::strip_utf8mb4($e_item[$i]);
1058 }
1059 }
102a0135
AD
1060 }
1061
7f4a4045 1062 array_push($enclosures, $e_item);
2c08214a
AD
1063 }
1064 }
1065
388d4dfa 1066 if ($cache_images && is_writable(CACHE_DIR . '/images'))
e6c886bf 1067 RSSUtils::cache_enclosures($enclosures, $site_url, $debug_enabled);
388d4dfa 1068
2c08214a 1069 if ($debug_enabled) {
68cccafc 1070 _debug("article enclosures:", $debug_enabled);
2c08214a
AD
1071 print_r($enclosures);
1072 }
1073
0567016b 1074 $esth = $pdo->prepare("SELECT id FROM ttrss_enclosures
ac8a0e7d 1075 WHERE content_url = ? AND content_type = ? AND post_id = ?");
2c08214a 1076
0567016b
AD
1077 $usth = $pdo->prepare("INSERT INTO ttrss_enclosures
1078 (content_url, content_type, title, duration, post_id, width, height) VALUES
1079 (?, ?, ?, ?, ?, ?, ?)");
5c54e683 1080
2c08214a 1081 foreach ($enclosures as $enc) {
0567016b
AD
1082 $enc_url = $enc[0];
1083 $enc_type = $enc[1];
0500e14c 1084 $enc_dur = (int)$enc[2];
0567016b 1085 $enc_title = $enc[3];
523bd90b
FE
1086 $enc_width = intval($enc[4]);
1087 $enc_height = intval($enc[5]);
2c08214a 1088
ac8a0e7d 1089 $esth->execute([$enc_url, $enc_type, $entry_ref_id]);
2c08214a 1090
0567016b
AD
1091 if (!$esth->fetch()) {
1092 $usth->execute([$enc_url, $enc_type, (string)$enc_title, $enc_dur, $entry_ref_id, $enc_width, $enc_height]);
2c08214a
AD
1093 }
1094 }
1095
2c08214a
AD
1096 // check for manual tags (we have to do it here since they're loaded from filters)
1097
1098 foreach ($article_filters as $f) {
6aff7845 1099 if ($f["type"] == "tag") {
2c08214a 1100
6aff7845 1101 $manual_tags = trim_array(explode(",", $f["param"]));
2c08214a
AD
1102
1103 foreach ($manual_tags as $tag) {
1104 if (tag_is_valid($tag)) {
1105 array_push($entry_tags, $tag);
1106 }
1107 }
1108 }
1109 }
1110
1111 // Skip boring tags
1112
6322ac79 1113 $boring_tags = trim_array(explode(",", mb_strtolower(get_pref(
2c08214a
AD
1114 'BLACKLISTED_TAGS', $owner_uid, ''), 'utf-8')));
1115
1116 $filtered_tags = array();
1117 $tags_to_cache = array();
1118
1119 if ($entry_tags && is_array($entry_tags)) {
1120 foreach ($entry_tags as $tag) {
1121 if (array_search($tag, $boring_tags) === false) {
1122 array_push($filtered_tags, $tag);
1123 }
1124 }
1125 }
1126
1127 $filtered_tags = array_unique($filtered_tags);
1128
1129 if ($debug_enabled) {
68cccafc 1130 _debug("filtered article tags:", $debug_enabled);
2c08214a
AD
1131 print_r($filtered_tags);
1132 }
1133
1134 // Save article tags in the database
1135
1136 if (count($filtered_tags) > 0) {
1137
0567016b
AD
1138 $tsth = $pdo->prepare("SELECT id FROM ttrss_tags
1139 WHERE tag_name = ? AND post_int_id = ? AND
1140 owner_uid = ? LIMIT 1");
1141
1142 $usth = $pdo->prepare("INSERT INTO ttrss_tags
1143 (owner_uid,tag_name,post_int_id)
1144 VALUES (?, ?, ?)");
2c08214a
AD
1145
1146 foreach ($filtered_tags as $tag) {
1147
1148 $tag = sanitize_tag($tag);
2c08214a
AD
1149
1150 if (!tag_is_valid($tag)) continue;
1151
0567016b 1152 $tsth->execute([$tag, $entry_int_id, $owner_uid]);
2c08214a 1153
0567016b
AD
1154 if (!$tsth->fetch()) {
1155 $usth->execute([$owner_uid, $tag, $entry_int_id]);
e6c886bf 1156 }
2c08214a
AD
1157
1158 array_push($tags_to_cache, $tag);
1159 }
1160
1161 /* update the cache */
1162
1163 $tags_to_cache = array_unique($tags_to_cache);
1164
0567016b 1165 $tags_str = join(",", $tags_to_cache);
2c08214a 1166
0567016b
AD
1167 $tsth = $pdo->prepare("UPDATE ttrss_user_entries
1168 SET tag_cache = ? WHERE ref_id = ?
1169 AND owner_uid = ?");
1170 $tsth->execute([$tags_str, $entry_ref_id, $owner_uid]);
2c08214a
AD
1171 }
1172
68cccafc 1173 _debug("article processed", $debug_enabled);
0500e14c
AD
1174
1175 $pdo->commit();
2c08214a
AD
1176 }
1177
68cccafc 1178 _debug("purging feed...", $debug_enabled);
2c08214a 1179
a42c55f0 1180 purge_feed($feed, 0, $debug_enabled);
2c08214a 1181
0567016b
AD
1182 $sth = $pdo->prepare("UPDATE ttrss_feeds
1183 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
1184 $sth->execute([$feed]);
2c08214a
AD
1185
1186 } else {
1187
0567016b 1188 $error_msg = mb_substr($rss->error(), 0, 245);
2c08214a 1189
4ad04ee2
AD
1190 _debug("fetch error: $error_msg", $debug_enabled);
1191
1192 if (count($rss->errors()) > 1) {
1193 foreach ($rss->errors() as $error) {
1194 _debug("+ $error");
1195 }
1196 }
2c08214a 1197
0567016b
AD
1198 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
1199 last_updated = NOW(), last_unconditional = NOW() WHERE id = ?");
1200 $sth->execute([$error_msg, $feed]);
2c08214a 1201
88edaa93 1202 unset($rss);
0567016b 1203 return false;
88edaa93 1204 }
2c08214a 1205
68cccafc 1206 _debug("done", $debug_enabled);
88edaa93 1207
7b55001e 1208 return true;
2c08214a
AD
1209 }
1210
e6c886bf 1211 static function cache_enclosures($enclosures, $site_url, $debug) {
388d4dfa
AD
1212 foreach ($enclosures as $enc) {
1213
1214 if (preg_match("/(image|audio|video)/", $enc[1])) {
1215
1216 $src = rewrite_relative_url($site_url, $enc[0]);
1217
1218 $local_filename = CACHE_DIR . "/images/" . sha1($src);
1219
1220 if ($debug) _debug("cache_enclosures: downloading: $src to $local_filename");
1221
1222 if (!file_exists($local_filename)) {
1223 $file_content = fetch_file_contents($src);
1224
6fd03996 1225 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
388d4dfa
AD
1226 file_put_contents($local_filename, $file_content);
1227 }
665495b9 1228 } else if (is_writable($local_filename)) {
388d4dfa
AD
1229 touch($local_filename);
1230 }
1231 }
1232 }
1233 }
1234
e6c886bf 1235 static function cache_media($html, $site_url, $debug) {
3c696512
AD
1236 libxml_use_internal_errors(true);
1237
1238 $charset_hack = '<head>
1239 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
1240 </head>';
1241
1242 $doc = new DOMDocument();
1243 $doc->loadHTML($charset_hack . $html);
1244 $xpath = new DOMXPath($doc);
1245
388d4dfa 1246 $entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])');
3c696512
AD
1247
1248 foreach ($entries as $entry) {
5edd605a 1249 if ($entry->hasAttribute('src') && strpos($entry->getAttribute('src'), "data:") !== 0) {
3c696512
AD
1250 $src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
1251
41bead9b 1252 $local_filename = CACHE_DIR . "/images/" . sha1($src);
3c696512 1253
163b50b1 1254 if ($debug) _debug("cache_media: checking $src");
3c696512
AD
1255
1256 if (!file_exists($local_filename)) {
163b50b1
AD
1257 if ($debug) _debug("cache_media: downloading: $src to $local_filename");
1258
3c696512
AD
1259 $file_content = fetch_file_contents($src);
1260
6fd03996 1261 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
3c696512
AD
1262 file_put_contents($local_filename, $file_content);
1263 }
665495b9 1264 } else if (is_writable($local_filename)) {
4a27966e 1265 touch($local_filename);
3c696512 1266 }
3c696512
AD
1267 }
1268 }
3c696512
AD
1269 }
1270
e6c886bf 1271 static function expire_error_log($debug) {
e2261e17
AD
1272 if ($debug) _debug("Removing old error log entries...");
1273
0567016b
AD
1274 $pdo = Db::pdo();
1275
e2261e17 1276 if (DB_TYPE == "pgsql") {
0567016b 1277 $pdo->query("DELETE FROM ttrss_error_log
e2261e17
AD
1278 WHERE created_at < NOW() - INTERVAL '7 days'");
1279 } else {
0567016b 1280 $pdo->query("DELETE FROM ttrss_error_log
e2261e17
AD
1281 WHERE created_at < DATE_SUB(NOW(), INTERVAL 7 DAY)");
1282 }
e2261e17
AD
1283 }
1284
e6c886bf 1285 static function expire_lock_files($debug) {
65465085 1286 //if ($debug) _debug("Removing old lock files...");
2a91b6ff
AD
1287
1288 $num_deleted = 0;
1289
1290 if (is_writable(LOCK_DIRECTORY)) {
1291 $files = glob(LOCK_DIRECTORY . "/*.lock");
1292
1293 if ($files) {
1294 foreach ($files as $file) {
11344971 1295 if (!file_is_locked(basename($file)) && time() - filemtime($file) > 86400*2) {
2a91b6ff
AD
1296 unlink($file);
1297 ++$num_deleted;
1298 }
1299 }
1300 }
1301 }
1302
65465085 1303 if ($debug) _debug("Removed $num_deleted old lock files.");
2a91b6ff
AD
1304 }
1305
e6c886bf 1306 static function expire_cached_files($debug) {
2d54eb1a 1307 foreach (array("feeds", "images", "export", "upload") as $dir) {
3c696512 1308 $cache_dir = CACHE_DIR . "/$dir";
2c08214a 1309
65465085 1310// if ($debug) _debug("Expiring $cache_dir");
2c08214a 1311
3c696512
AD
1312 $num_deleted = 0;
1313
1314 if (is_writable($cache_dir)) {
1315 $files = glob("$cache_dir/*");
1316
2a91b6ff 1317 if ($files) {
2ab20c31 1318 foreach ($files as $file) {
6fd03996 1319 if (time() - filemtime($file) > 86400*CACHE_MAX_DAYS) {
2ab20c31 1320 unlink($file);
3c696512 1321
2ab20c31
AD
1322 ++$num_deleted;
1323 }
3c696512
AD
1324 }
1325 }
2a91b6ff 1326 }
3c696512 1327
65465085 1328 if ($debug) _debug("$cache_dir: removed $num_deleted files.");
3c696512
AD
1329 }
1330 }
2c08214a 1331
a3e0bdcf 1332 /**
e6c886bf
AD
1333 * Source: http://www.php.net/manual/en/function.parse-url.php#104527
1334 * Returns the url query as associative array
1335 *
1336 * @param string query
1337 * @return array params
1338 */
1339 static function convertUrlQuery($query) {
a3e0bdcf
AD
1340 $queryParts = explode('&', $query);
1341
1342 $params = array();
1343
1344 foreach ($queryParts as $param) {
1345 $item = explode('=', $param);
1346 $params[$item[0]] = $item[1];
1347 }
1348
1349 return $params;
1350 }
92c14e9d 1351
e6c886bf 1352 static function get_article_filters($filters, $title, $content, $link, $author, $tags, &$matched_rules = false) {
92c14e9d
AD
1353 $matches = array();
1354
1355 foreach ($filters as $filter) {
1356 $match_any_rule = $filter["match_any_rule"];
a3a896a1 1357 $inverse = $filter["inverse"];
92c14e9d
AD
1358 $filter_match = false;
1359
1360 foreach ($filter["rules"] as $rule) {
1361 $match = false;
ffa1bd7b 1362 $reg_exp = str_replace('/', '\/', $rule["reg_exp"]);
a3a896a1 1363 $rule_inverse = $rule["inverse"];
92c14e9d
AD
1364
1365 if (!$reg_exp)
1366 continue;
1367
1368 switch ($rule["type"]) {
e6c886bf
AD
1369 case "title":
1370 $match = @preg_match("/$reg_exp/iu", $title);
1371 break;
1372 case "content":
1373 // we don't need to deal with multiline regexps
1374 $content = preg_replace("/[\r\n\t]/", "", $content);
d03ae73e 1375
e6c886bf
AD
1376 $match = @preg_match("/$reg_exp/iu", $content);
1377 break;
1378 case "both":
1379 // we don't need to deal with multiline regexps
1380 $content = preg_replace("/[\r\n\t]/", "", $content);
d03ae73e 1381
e6c886bf
AD
1382 $match = (@preg_match("/$reg_exp/iu", $title) || @preg_match("/$reg_exp/iu", $content));
1383 break;
1384 case "link":
1385 $match = @preg_match("/$reg_exp/iu", $link);
1386 break;
1387 case "author":
1388 $match = @preg_match("/$reg_exp/iu", $author);
1389 break;
1390 case "tag":
1391 foreach ($tags as $tag) {
1392 if (@preg_match("/$reg_exp/iu", $tag)) {
1393 $match = true;
1394 break;
1395 }
7b80b5e1 1396 }
e6c886bf 1397 break;
92c14e9d
AD
1398 }
1399
a3a896a1
AD
1400 if ($rule_inverse) $match = !$match;
1401
92c14e9d
AD
1402 if ($match_any_rule) {
1403 if ($match) {
1404 $filter_match = true;
1405 break;
1406 }
1407 } else {
1408 $filter_match = $match;
1409 if (!$match) {
1410 break;
1411 }
1412 }
1413 }
1414
a3a896a1
AD
1415 if ($inverse) $filter_match = !$filter_match;
1416
92c14e9d 1417 if ($filter_match) {
557d86fe
AD
1418 if (is_array($matched_rules)) array_push($matched_rules, $rule);
1419
92c14e9d
AD
1420 foreach ($filter["actions"] AS $action) {
1421 array_push($matches, $action);
5e736e45
AD
1422
1423 // if Stop action encountered, perform no further processing
fd3e5e8d 1424 if (isset($action["type"]) && $action["type"] == "stop") return $matches;
92c14e9d
AD
1425 }
1426 }
1427 }
1428
1429 return $matches;
1430 }
1431
e6c886bf 1432 static function find_article_filter($filters, $filter_name) {
92c14e9d
AD
1433 foreach ($filters as $f) {
1434 if ($f["type"] == $filter_name) {
1435 return $f;
1436 };
1437 }
1438 return false;
1439 }
1440
e6c886bf 1441 static function find_article_filters($filters, $filter_name) {
92c14e9d
AD
1442 $results = array();
1443
1444 foreach ($filters as $f) {
1445 if ($f["type"] == $filter_name) {
1446 array_push($results, $f);
1447 };
1448 }
1449 return $results;
1450 }
1451
e6c886bf 1452 static function calculate_article_score($filters) {
92c14e9d
AD
1453 $score = 0;
1454
1455 foreach ($filters as $f) {
1456 if ($f["type"] == "score") {
1457 $score += $f["param"];
1458 };
1459 }
1460 return $score;
1461 }
1462
e6c886bf 1463 static function labels_contains_caption($labels, $caption) {
b24504b1
AD
1464 foreach ($labels as $label) {
1465 if ($label[1] == $caption) {
1466 return true;
1467 }
1468 }
1469
1470 return false;
1471 }
1472
e6c886bf 1473 static function assign_article_to_label_filters($id, $filters, $owner_uid, $article_labels) {
92c14e9d
AD
1474 foreach ($filters as $f) {
1475 if ($f["type"] == "label") {
e6c886bf 1476 if (!RSSUtils::labels_contains_caption($article_labels, $f["param"])) {
7c9b5a3f 1477 Labels::add_article($id, $f["param"], $owner_uid);
b24504b1
AD
1478 }
1479 }
92c14e9d
AD
1480 }
1481 }
87764a50 1482
e6c886bf 1483 static function make_guid_from_title($title) {
87d7e850
AD
1484 return preg_replace("/[ \"\',.:;]/", "-",
1485 mb_strtolower(strip_tags($title), 'utf-8'));
1486 }
1487
e6c886bf 1488 static function cleanup_counters_cache($debug) {
0567016b
AD
1489 $pdo = Db::pdo();
1490
1491 $res = $pdo->query("DELETE FROM ttrss_counters_cache
168cf351
AD
1492 WHERE feed_id > 0 AND
1493 (SELECT COUNT(id) FROM ttrss_feeds WHERE
1494 id = feed_id AND
1495 ttrss_counters_cache.owner_uid = ttrss_feeds.owner_uid) = 0");
168cf351 1496
0567016b
AD
1497 $frows = $res->rowCount();
1498
1499 $res = $pdo->query("DELETE FROM ttrss_cat_counters_cache
168cf351
AD
1500 WHERE feed_id > 0 AND
1501 (SELECT COUNT(id) FROM ttrss_feed_categories WHERE
1502 id = feed_id AND
1503 ttrss_cat_counters_cache.owner_uid = ttrss_feed_categories.owner_uid) = 0");
0567016b
AD
1504
1505 $crows = $res->rowCount();
168cf351 1506
7b55001e 1507 if ($debug) _debug("Removed $frows (feeds) $crows (cats) orphaned counter cache entries.");
168cf351
AD
1508 }
1509
e6c886bf 1510 static function housekeeping_user($owner_uid) {
5cbd1fe8
AD
1511 $tmph = new PluginHost();
1512
1513 load_user_plugins($owner_uid, $tmph);
1514
1515 $tmph->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
1516 }
1517
e6c886bf
AD
1518 static function housekeeping_common($debug) {
1519 RSSUtils::expire_cached_files($debug);
1520 RSSUtils::expire_lock_files($debug);
1521 RSSUtils::expire_error_log($debug);
e2cf81e2 1522
e6c886bf 1523 $count = RSSUtils::update_feedbrowser_cache();
e2cf81e2
AD
1524 _debug("Feedbrowser updated, $count feeds processed.");
1525
a230bf88 1526 Article::purge_orphans( true);
e6c886bf 1527 RSSUtils::cleanup_counters_cache($debug);
e2cf81e2 1528
9b736a20
AD
1529 //$rc = cleanup_tags( 14, 50000);
1530 //_debug("Cleaned $rc cached tags.");
8e470220 1531
00f22824 1532 PluginHost::getInstance()->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
e2cf81e2 1533 }
ea79a0e0 1534
e6c886bf
AD
1535 static function check_feed_favicon($site_url, $feed) {
1536 # print "FAVICON [$site_url]: $favicon_url\n";
a230bf88
AD
1537
1538 $icon_file = ICONS_DIR . "/$feed.ico";
1539
1540 if (!file_exists($icon_file)) {
1541 $favicon_url = get_favicon_url($site_url);
1542
1543 if ($favicon_url) {
1544 // Limiting to "image" type misses those served with text/plain
1545 $contents = fetch_file_contents($favicon_url); // , "image");
1546
1547 if ($contents) {
1548 // Crude image type matching.
1549 // Patterns gleaned from the file(1) source code.
1550 if (preg_match('/^\x00\x00\x01\x00/', $contents)) {
1551 // 0 string \000\000\001\000 MS Windows icon resource
1552 //error_log("check_feed_favicon: favicon_url=$favicon_url isa MS Windows icon resource");
1553 }
1554 elseif (preg_match('/^GIF8/', $contents)) {
1555 // 0 string GIF8 GIF image data
1556 //error_log("check_feed_favicon: favicon_url=$favicon_url isa GIF image");
1557 }
1558 elseif (preg_match('/^\x89PNG\x0d\x0a\x1a\x0a/', $contents)) {
1559 // 0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data
1560 //error_log("check_feed_favicon: favicon_url=$favicon_url isa PNG image");
1561 }
1562 elseif (preg_match('/^\xff\xd8/', $contents)) {
1563 // 0 beshort 0xffd8 JPEG image data
1564 //error_log("check_feed_favicon: favicon_url=$favicon_url isa JPG image");
1565 }
f9ad33c2
GG
1566 elseif (preg_match('/^BM/', $contents)) {
1567 // 0 string BM PC bitmap (OS2, Windows BMP files)
1568 //error_log("check_feed_favicon, favicon_url=$favicon_url isa BMP image");
1569 }
a230bf88
AD
1570 else {
1571 //error_log("check_feed_favicon: favicon_url=$favicon_url isa UNKNOWN type");
1572 $contents = "";
1573 }
1574 }
1575
1576 if ($contents) {
1577 $fp = @fopen($icon_file, "w");
1578
1579 if ($fp) {
1580 fwrite($fp, $contents);
1581 fclose($fp);
1582 chmod($icon_file, 0644);
1583 }
1584 }
1585 }
1586 return $icon_file;
1587 }
1588 }
e6c886bf
AD
1589
1590
1591
bec5ba93 1592}