]> git.wh0rd.org - tt-rss.git/blame - classes/rssutils.php
fix previous to not crash if document first child is not a DOMElement or whatever
[tt-rss.git] / classes / rssutils.php
CommitLineData
2c08214a 1<?php
e6c886bf
AD
2class RSSUtils {
3 static function calculate_article_hash($article, $pluginhost) {
af244f92
AD
4 $tmp = "";
5
6 foreach ($article as $k => $v) {
7 if ($k != "feed" && isset($v)) {
24e6ff5d
AD
8 $x = strip_tags(is_array($v) ? implode(",", $v) : $v);
9
10 //_debug("$k:" . sha1($x) . ":" . htmlspecialchars($x), true);
11
12 $tmp .= sha1("$k:" . sha1($x));
af244f92
AD
13 }
14 }
15
eb16bd9f 16 return sha1(implode(",", $pluginhost->get_plugin_names()) . $tmp);
b1840673
AD
17 }
18
102a0135
AD
19 // Strips utf8mb4 characters (i.e. emoji) for mysql
20 static function strip_utf8mb4($str) {
7f4a4045
AD
21 return preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $str);
22 }
102a0135 23
e6c886bf 24 static function update_feedbrowser_cache() {
79178062 25
afcb105f
AD
26 $pdo = Db::pdo();
27
28 $sth = $pdo->query("SELECT feed_url, site_url, title, COUNT(id) AS subscribers
45378752
LD
29 FROM ttrss_feeds WHERE feed_url NOT IN (SELECT feed_url FROM ttrss_feeds
30 WHERE private IS true OR auth_login != '' OR auth_pass != '' OR feed_url LIKE '%:%@%/%')
79178062
AD
31 GROUP BY feed_url, site_url, title ORDER BY subscribers DESC LIMIT 1000");
32
afcb105f 33 $pdo->beginTransaction();
79178062 34
afcb105f 35 $pdo->query("DELETE FROM ttrss_feedbrowser_cache");
79178062
AD
36
37 $count = 0;
38
afcb105f
AD
39 while ($line = $sth->fetch()) {
40
0567016b
AD
41 $subscribers = $line["subscribers"];
42 $feed_url = $line["feed_url"];
43 $title = $line["title"];
44 $site_url = $line["site_url"];
79178062 45
afcb105f
AD
46 $tmph = $pdo->prepare("SELECT subscribers FROM
47 ttrss_feedbrowser_cache WHERE feed_url = ?");
48 $tmph->execute([$feed_url]);
49
50 if (!$tmph->fetch()) {
79178062 51
afcb105f
AD
52 $tmph = $pdo->prepare("INSERT INTO ttrss_feedbrowser_cache
53 (feed_url, site_url, title, subscribers)
54 VALUES
55 (?, ?, ?, ?)");
79178062 56
afcb105f 57 $tmph->execute([$feed_url, $site_url, $title, $subscribers]);
79178062
AD
58
59 ++$count;
60
61 }
62
63 }
64
afcb105f 65 $pdo->commit();
79178062
AD
66
67 return $count;
68
69 }
70
e6c886bf 71 static function update_daemon_common($limit = DAEMON_FEED_LIMIT, $debug = true) {
6322ac79 72 $schema_version = get_schema_version();
857efe49
AD
73
74 if ($schema_version != SCHEMA_VERSION) {
75 die("Schema version is wrong, please upgrade the database.\n");
76 }
77
afcb105f
AD
78 $pdo = Db::pdo();
79
09e8bdfd 80 if (!SINGLE_USER_MODE && DAEMON_UPDATE_LOGIN_LIMIT > 0) {
2c08214a
AD
81 if (DB_TYPE == "pgsql") {
82 $login_thresh_qpart = "AND ttrss_users.last_login >= NOW() - INTERVAL '".DAEMON_UPDATE_LOGIN_LIMIT." days'";
83 } else {
84 $login_thresh_qpart = "AND ttrss_users.last_login >= DATE_SUB(NOW(), INTERVAL ".DAEMON_UPDATE_LOGIN_LIMIT." DAY)";
85 }
86 } else {
87 $login_thresh_qpart = "";
88 }
89
2c08214a
AD
90 if (DB_TYPE == "pgsql") {
91 $update_limit_qpart = "AND ((
92 ttrss_feeds.update_interval = 0
ee0542ce 93 AND ttrss_user_prefs.value != '-1'
2c08214a
AD
94 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_user_prefs.value || ' minutes') AS INTERVAL)
95 ) OR (
96 ttrss_feeds.update_interval > 0
97 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_feeds.update_interval || ' minutes') AS INTERVAL)
f08426e3
AD
98 ) OR (ttrss_feeds.last_updated IS NULL
99 AND ttrss_user_prefs.value != '-1')
100 OR (last_updated = '1970-01-01 00:00:00'
101 AND ttrss_user_prefs.value != '-1'))";
2c08214a
AD
102 } else {
103 $update_limit_qpart = "AND ((
104 ttrss_feeds.update_interval = 0
ee0542ce 105 AND ttrss_user_prefs.value != '-1'
2c08214a
AD
106 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL CONVERT(ttrss_user_prefs.value, SIGNED INTEGER) MINUTE)
107 ) OR (
108 ttrss_feeds.update_interval > 0
109 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL ttrss_feeds.update_interval MINUTE)
f08426e3
AD
110 ) OR (ttrss_feeds.last_updated IS NULL
111 AND ttrss_user_prefs.value != '-1')
112 OR (last_updated = '1970-01-01 00:00:00'
113 AND ttrss_user_prefs.value != '-1'))";
2c08214a
AD
114 }
115
116 // Test if feed is currently being updated by another process.
117 if (DB_TYPE == "pgsql") {
566417c4 118 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '10 minutes')";
2c08214a 119 } else {
566417c4 120 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 10 MINUTE))";
2c08214a
AD
121 }
122
93af11cb 123 $query_limit = $limit ? sprintf("LIMIT %d", $limit) : "";
2c08214a 124
98070db0
TK
125 // Update the least recently updated feeds first
126 $query_order = "ORDER BY last_updated";
127 if (DB_TYPE == "pgsql") $query_order .= " NULLS FIRST";
128
fce451a4 129 $query = "SELECT DISTINCT ttrss_feeds.feed_url, ttrss_feeds.last_updated
2c08214a
AD
130 FROM
131 ttrss_feeds, ttrss_users, ttrss_user_prefs
f4ae0f05 132 WHERE
2c08214a 133 ttrss_feeds.owner_uid = ttrss_users.id
f08426e3 134 AND ttrss_user_prefs.profile IS NULL
2c08214a
AD
135 AND ttrss_users.id = ttrss_user_prefs.owner_uid
136 AND ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL'
137 $login_thresh_qpart $update_limit_qpart
1c4421fc 138 $updstart_thresh_qpart
98070db0 139 $query_order $query_limit";
fce451a4 140
afcb105f 141 $res = $pdo->query($query);
2c08214a 142
2c08214a 143 $feeds_to_update = array();
afcb105f 144 while ($line = $res->fetch()) {
93af11cb 145 array_push($feeds_to_update, $line['feed_url']);
2c08214a
AD
146 }
147
afcb105f
AD
148 if ($debug) _debug(sprintf("Scheduled %d feeds to update...", count($feeds_to_update)));
149
93af11cb
AD
150 // Update last_update_started before actually starting the batch
151 // in order to minimize collision risk for parallel daemon tasks
152 if (count($feeds_to_update) > 0) {
afcb105f 153 $feeds_qmarks = arr_qmarks($feeds_to_update);
1c4421fc 154
afcb105f
AD
155 $tmph = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
156 WHERE feed_url IN ($feeds_qmarks)");
157 $tmph->execute($feeds_to_update);
2c08214a
AD
158 }
159
8292d05b 160 $nf = 0;
2d9c5684 161 $bstarted = microtime(true);
8292d05b 162
5cbd1fe8
AD
163 $batch_owners = array();
164
afcb105f
AD
165 // since we have the data cached, we can deal with other feeds with the same url
166 $usth = $pdo->prepare("SELECT DISTINCT ttrss_feeds.id,last_updated,ttrss_feeds.owner_uid
ee0542ce
AD
167 FROM ttrss_feeds, ttrss_users, ttrss_user_prefs WHERE
168 ttrss_user_prefs.owner_uid = ttrss_feeds.owner_uid AND
169 ttrss_users.id = ttrss_user_prefs.owner_uid AND
170 ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL' AND
f08426e3 171 ttrss_user_prefs.profile IS NULL AND
afcb105f 172 feed_url = ?
9e84bab4 173 $update_limit_qpart
1c4421fc 174 $login_thresh_qpart
5929a0c1 175 ORDER BY ttrss_feeds.id $query_limit");
1c4421fc 176
afcb105f
AD
177 foreach ($feeds_to_update as $feed) {
178 if($debug) _debug("Base feed: $feed");
179
180 $usth->execute([$feed]);
181 //update_rss_feed($line["id"], true);
182
183 if ($tline = $usth->fetch()) {
184 if ($debug) _debug(" => " . $tline["last_updated"] . ", " . $tline["id"] . " " . $tline["owner_uid"]);
f08426e3 185
afcb105f
AD
186 if (array_search($tline["owner_uid"], $batch_owners) === FALSE)
187 array_push($batch_owners, $tline["owner_uid"]);
5cbd1fe8 188
afcb105f
AD
189 $fstarted = microtime(true);
190 RSSUtils::update_rss_feed($tline["id"], true, false);
191 _debug_suppress(false);
2d9c5684 192
afcb105f 193 _debug(sprintf(" %.4f (sec)", microtime(true) - $fstarted));
2d9c5684 194
afcb105f 195 ++$nf;
1c4421fc 196 }
2c08214a
AD
197 }
198
2d9c5684
AD
199 if ($nf > 0) {
200 _debug(sprintf("Processed %d feeds in %.4f (sec), %.4f (sec/feed avg)", $nf,
201 microtime(true) - $bstarted, (microtime(true) - $bstarted) / $nf));
202 }
203
5cbd1fe8
AD
204 foreach ($batch_owners as $owner_uid) {
205 _debug("Running housekeeping tasks for user $owner_uid...");
206
e6c886bf 207 RSSUtils::housekeeping_user($owner_uid);
5cbd1fe8
AD
208 }
209
2c08214a 210 // Send feed digests by email if needed.
c2f0f24e 211 Digest::send_headlines_digests($debug);
2c08214a 212
8292d05b 213 return $nf;
7b55001e 214 }
2c08214a 215
6022776d 216 // this is used when subscribing
e6c886bf 217 static function set_basic_feed_info($feed) {
6022776d 218
0567016b 219 $pdo = Db::pdo();
6022776d 220
0567016b
AD
221 $sth = $pdo->prepare("SELECT owner_uid,feed_url,auth_pass,auth_login,auth_pass_encrypted
222 FROM ttrss_feeds WHERE id = ?");
223 $sth->execute([$feed]);
6022776d 224
0567016b 225 if ($row = $sth->fetch()) {
bec5ba93 226
0567016b 227 $owner_uid = $row["owner_uid"];
6022776d 228
187abfe7 229 $auth_pass_encrypted = $row["auth_pass_encrypted"];
6022776d 230
0567016b
AD
231 $auth_login = $row["auth_login"];
232 $auth_pass = $row["auth_pass"];
6022776d 233
0567016b
AD
234 if ($auth_pass_encrypted && function_exists("mcrypt_decrypt")) {
235 require_once "crypt.php";
236 $auth_pass = decrypt_string($auth_pass);
237 }
6022776d 238
0567016b 239 $fetch_url = $row["feed_url"];
6022776d 240
0567016b
AD
241 $pluginhost = new PluginHost();
242 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
6022776d 243
0567016b
AD
244 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
245 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
246 $pluginhost->load_data();
247
248 $basic_info = array();
249 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_BASIC_INFO) as $plugin) {
250 $basic_info = $plugin->hook_feed_basic_info($basic_info, $fetch_url, $owner_uid, $feed, $auth_login, $auth_pass);
251 }
6022776d 252
0567016b
AD
253 if (!$basic_info) {
254 $feed_data = fetch_file_contents($fetch_url, false,
255 $auth_login, $auth_pass, false,
256 FEED_FETCH_TIMEOUT,
257 0);
bec5ba93 258
0567016b 259 global $fetch_curl_used;
bec5ba93 260
0567016b
AD
261 if (!$fetch_curl_used) {
262 $tmp = @gzdecode($feed_data);
bec5ba93 263
0567016b
AD
264 if ($tmp) $feed_data = $tmp;
265 }
6022776d 266
0567016b 267 $feed_data = trim($feed_data);
6022776d 268
0567016b
AD
269 $rss = new FeedParser($feed_data);
270 $rss->init();
6022776d 271
0567016b
AD
272 if (!$rss->error()) {
273 $basic_info = array(
274 'title' => mb_substr($rss->get_title(), 0, 199),
275 'site_url' => mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245)
276 );
277 }
3476690c 278 }
6022776d 279
0567016b
AD
280 if ($basic_info && is_array($basic_info)) {
281 $sth = $pdo->prepare("SELECT title, site_url FROM ttrss_feeds WHERE id = ?");
282 $sth->execute([$feed]);
6022776d 283
0567016b 284 if ($row = $sth->fetch()) {
6022776d 285
0567016b
AD
286 $registered_title = $row["title"];
287 $orig_site_url = $row["site_url"];
288
289 if ($basic_info['title'] && (!$registered_title || $registered_title == "[Unknown]")) {
290
291 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
292 title = ? WHERE id = ?");
293 $sth->execute([$basic_info['title'], $feed]);
294 }
6022776d 295
0567016b
AD
296 if ($basic_info['site_url'] && $orig_site_url != $basic_info['site_url']) {
297 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
298 site_url = ? WHERE id = ?");
299 $sth->execute([$basic_info['site_url'], $feed]);
300 }
301
302 }
6022776d
AD
303 }
304 }
305 }
306
7b55001e 307 /**
e6c886bf
AD
308 * @SuppressWarnings(PHPMD.UnusedFormalParameter)
309 */
310 static function update_rss_feed($feed, $no_cache = false) {
2c08214a 311
e6532439 312 $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || clean($_REQUEST['xdebug']);
2c08214a 313
4f71d743 314 _debug_suppress(!$debug_enabled);
68cccafc 315 _debug("start", $debug_enabled);
2c08214a 316
0567016b
AD
317 $pdo = Db::pdo();
318
319 $sth = $pdo->prepare("SELECT title FROM ttrss_feeds WHERE id = ?");
320 $sth->execute([$feed]);
bfe1eb4e 321
0567016b 322 if (!$row = $sth->fetch()) {
bfe1eb4e
AD
323 _debug("feed $feed NOT FOUND/SKIPPED", $debug_enabled);
324 user_error("Attempt to update unknown/invalid feed $feed", E_USER_WARNING);
325 return false;
326 }
327
0567016b 328 $title = $row["title"];
6bb96beb
AD
329
330 // feed was batch-subscribed or something, we need to get basic info
331 // this is not optimal currently as it fetches stuff separately TODO: optimize
332 if ($title == "[Unknown]") {
333 _debug("setting basic feed info for $feed...");
e6c886bf 334 RSSUtils::set_basic_feed_info($feed);
6bb96beb
AD
335 }
336
0567016b 337 $sth = $pdo->prepare("SELECT id,update_interval,auth_login,
5ba1ddd4 338 feed_url,auth_pass,cache_images,
5321e775 339 mark_unread_on_update, owner_uid,
153cb6d3 340 auth_pass_encrypted, feed_language,
e50c8eaa
AD
341 last_modified,
342 ".SUBSTRING_FOR_DATE."(last_unconditional, 1, 19) AS last_unconditional
0567016b
AD
343 FROM ttrss_feeds WHERE id = ?");
344 $sth->execute([$feed]);
345
346 if ($row = $sth->fetch()) {
2c08214a 347
0567016b 348 $owner_uid = $row["owner_uid"];
187abfe7
AD
349 $mark_unread_on_update = $row["mark_unread_on_update"];
350 $auth_pass_encrypted = $row["auth_pass_encrypted"];
2c08214a 351
0567016b
AD
352 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
353 WHERE id = ?");
354 $sth->execute([$feed]);
2c08214a 355
0567016b
AD
356 $auth_login = $row["auth_login"];
357 $auth_pass = $row["auth_pass"];
2c08214a 358
0567016b
AD
359 if ($auth_pass_encrypted && function_exists("mcrypt_decrypt")) {
360 require_once "crypt.php";
361 $auth_pass = decrypt_string($auth_pass);
362 }
044cff2d 363
0567016b
AD
364 $stored_last_modified = $row["last_modified"];
365 $last_unconditional = $row["last_unconditional"];
187abfe7 366 $cache_images = $row["cache_images"];
0567016b
AD
367 $fetch_url = $row["feed_url"];
368 $feed_language = mb_strtolower($row["feed_language"]);
369 if (!$feed_language) $feed_language = 'english';
2c08214a 370
0567016b
AD
371 } else {
372 return false;
373 }
2c08214a 374
f074ffe9 375 $date_feed_processed = date('Y-m-d H:i');
2c08214a 376
342e8a9e 377 $cache_filename = CACHE_DIR . "/feeds/" . sha1($fetch_url) . ".xml";
f074ffe9 378
ee65bef4
AD
379 $pluginhost = new PluginHost();
380 $pluginhost->set_debug($debug_enabled);
381 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
382
383 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
384 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
385 $pluginhost->load_data();
386
7b55001e 387 $rss_hash = false;
4f9cbdff 388
7b55001e
AD
389 $force_refetch = isset($_REQUEST["force_refetch"]);
390 $feed_data = "";
687a4f59 391
7b55001e
AD
392 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FETCH_FEED) as $plugin) {
393 $feed_data = $plugin->hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed, 0, $auth_login, $auth_pass);
394 }
2c08214a 395
7b55001e
AD
396 // try cache
397 if (!$feed_data &&
398 file_exists($cache_filename) &&
399 is_readable($cache_filename) &&
400 !$auth_login && !$auth_pass &&
401 filemtime($cache_filename) > time() - 30) {
be574731 402
7b55001e 403 _debug("using local cache [$cache_filename].", $debug_enabled);
52637d3b 404
7b55001e 405 @$feed_data = file_get_contents($cache_filename);
f074ffe9 406
7b55001e
AD
407 if ($feed_data) {
408 $rss_hash = sha1($feed_data);
88edaa93 409 }
ee65bef4 410
7b55001e
AD
411 } else {
412 _debug("local cache will not be used for this feed", $debug_enabled);
413 }
312742db 414
153cb6d3
AD
415 global $fetch_last_modified;
416
7b55001e
AD
417 // fetch feed from source
418 if (!$feed_data) {
e50c8eaa 419 _debug("last unconditional update request: $last_unconditional");
312742db 420
7b55001e
AD
421 if (ini_get("open_basedir") && function_exists("curl_init")) {
422 _debug("not using CURL due to open_basedir restrictions");
423 }
3f6f0857 424
7f4a4045
AD
425 if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) {
426 _debug("maximum allowed interval for conditional requests exceeded, forcing refetch");
e50c8eaa 427
7f4a4045
AD
428 $force_refetch = true;
429 } else {
430 _debug("stored last modified for conditional request: $stored_last_modified", $debug_enabled);
431 }
153cb6d3 432
7f4a4045 433 _debug("fetching [$fetch_url] (force_refetch: $force_refetch)...", $debug_enabled);
153cb6d3
AD
434
435 $feed_data = fetch_file_contents([
436 "url" => $fetch_url,
437 "login" => $auth_login,
438 "pass" => $auth_pass,
439 "timeout" => $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT,
440 "last_modified" => $force_refetch ? "" : $stored_last_modified
441 ]);
3f6f0857 442
7b55001e 443 global $fetch_curl_used;
3f6f0857 444
7b55001e
AD
445 if (!$fetch_curl_used) {
446 $tmp = @gzdecode($feed_data);
1367bc3f 447
7b55001e
AD
448 if ($tmp) $feed_data = $tmp;
449 }
017401dd 450
7b55001e 451 $feed_data = trim($feed_data);
fd687300 452
7b55001e 453 _debug("fetch done.", $debug_enabled);
9d930af9 454 _debug("source last modified: " . $fetch_last_modified, $debug_enabled);
153cb6d3
AD
455
456 if ($feed_data && $fetch_last_modified != $stored_last_modified) {
0567016b
AD
457 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_modified = ? WHERE id = ?");
458 $sth->execute([substr($fetch_last_modified, 0, 245), $feed]);
153cb6d3 459 }
95beaa14 460
7b55001e 461 // cache vanilla feed data for re-use
342e8a9e 462 if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/feeds")) {
7b55001e
AD
463 $new_rss_hash = sha1($feed_data);
464
465 if ($new_rss_hash != $rss_hash) {
466 _debug("saving $cache_filename", $debug_enabled);
467 @file_put_contents($cache_filename, $feed_data);
95beaa14 468 }
4f9cbdff 469 }
7b55001e 470 }
017401dd 471
7b55001e
AD
472 if (!$feed_data) {
473 global $fetch_last_error;
474 global $fetch_last_error_code;
f074ffe9 475
7b55001e 476 _debug("unable to fetch: $fetch_last_error [$fetch_last_error_code]", $debug_enabled);
f074ffe9 477
7b55001e
AD
478 // If-Modified-Since
479 if ($fetch_last_error_code != 304) {
0567016b 480 $error_message = $fetch_last_error;
7b55001e
AD
481 } else {
482 _debug("source claims data not modified, nothing to do.", $debug_enabled);
0567016b 483 $error_message = "";
7b55001e 484 }
4f9cbdff 485
0567016b
AD
486 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
487 last_updated = NOW() WHERE id = ?");
488 $sth->execute([$error_message, $feed]);
4f9cbdff 489
7b55001e 490 return;
f074ffe9
AD
491 }
492
1ffe3391 493 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_FETCHED) as $plugin) {
6791af0c 494 $feed_data = $plugin->hook_feed_fetched($feed_data, $fetch_url, $owner_uid, $feed);
017401dd
AD
495 }
496
07d3431e
AD
497 $rss = new FeedParser($feed_data);
498 $rss->init();
2c08214a 499
19b3992b 500 if (!$rss->error()) {
2c08214a 501
d2a421e3 502 // We use local pluginhost here because we need to load different per-user feed plugins
1ffe3391 503 $pluginhost->run_hooks(PluginHost::HOOK_FEED_PARSED, "hook_feed_parsed", $rss);
4412b877 504
df659891 505 _debug("language: $feed_language", $debug_enabled);
68cccafc 506 _debug("processing feed data...", $debug_enabled);
2c08214a 507
382268c6
AD
508 if (DB_TYPE == "pgsql") {
509 $favicon_interval_qpart = "favicon_last_checked < NOW() - INTERVAL '12 hour'";
510 } else {
511 $favicon_interval_qpart = "favicon_last_checked < DATE_SUB(NOW(), INTERVAL 12 HOUR)";
512 }
513
0567016b 514 $sth = $pdo->prepare("SELECT owner_uid,favicon_avg_color,
382268c6
AD
515 (favicon_last_checked IS NULL OR $favicon_interval_qpart) AS
516 favicon_needs_check
0567016b
AD
517 FROM ttrss_feeds WHERE id = ?");
518 $sth->execute([$feed]);
2c08214a 519
0567016b 520 if ($row = $sth->fetch()) {
187abfe7 521 $favicon_needs_check = $row["favicon_needs_check"];
0567016b
AD
522 $favicon_avg_color = $row["favicon_avg_color"];
523 $owner_uid = $row["owner_uid"];
524 } else {
525 return false;
526 }
2c08214a 527
0567016b 528 $site_url = mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245);
2c08214a 529
cd07592c
AD
530 _debug("site_url: $site_url", $debug_enabled);
531 _debug("feed_title: " . $rss->get_title(), $debug_enabled);
532
687a4f59 533 if ($favicon_needs_check || $force_refetch) {
36490f11
AD
534
535 /* terrible hack: if we crash on floicon shit here, we won't check
560cbd8c 536 * the icon avgcolor again (unless the icon got updated) */
36490f11 537
560cbd8c
AD
538 $favicon_file = ICONS_DIR . "/$feed.ico";
539 $favicon_modified = @filemtime($favicon_file);
540
68cccafc 541 _debug("checking favicon...", $debug_enabled);
687a4f59 542
e6c886bf 543 RSSUtils::check_feed_favicon($site_url, $feed);
560cbd8c
AD
544 $favicon_modified_new = @filemtime($favicon_file);
545
546 if ($favicon_modified_new > $favicon_modified)
547 $favicon_avg_color = '';
687a4f59 548
0567016b 549 $favicon_colorstring = "";
6ee0d4b0 550 if (file_exists($favicon_file) && function_exists("imagecreatefromstring") && $favicon_avg_color == '') {
e6c886bf 551 require_once "colors.php";
687a4f59 552
0567016b
AD
553 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_avg_color = 'fail' WHERE
554 id = ?");
555 $sth->execute([$feed]);
aafd55ba 556
0567016b
AD
557 $favicon_color = calculate_avg_color($favicon_file);
558
559 $favicon_colorstring = ",favicon_avg_color = " . $pdo->quote($favicon_color);
63c323f7 560
36490f11 561 } else if ($favicon_avg_color == 'fail') {
84ceb6bd 562 _debug("floicon failed on this file, not trying to recalculate avg color", $debug_enabled);
6ac722d5 563 }
687a4f59 564
0567016b
AD
565 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_last_checked = NOW()
566 $favicon_colorstring WHERE id = ?");
567 $sth->execute([$feed]);
f2798eb6 568 }
2c08214a 569
68cccafc 570 _debug("loading filters & labels...", $debug_enabled);
2c08214a 571
a42c55f0 572 $filters = load_filters($feed, $owner_uid);
2c08214a 573
02f3992a 574 if ($debug_enabled) {
7f4a4045
AD
575 print_r($filters);
576 }
02f3992a 577
68cccafc 578 _debug("" . count($filters) . " filters loaded.", $debug_enabled);
2c08214a 579
19b3992b 580 $items = $rss->get_items();
2c08214a 581
19b3992b 582 if (!is_array($items)) {
68cccafc 583 _debug("no articles found.", $debug_enabled);
2c08214a 584
0567016b
AD
585 $sth = $pdo->prepare("UPDATE ttrss_feeds
586 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
587 $sth->execute([$feed]);
2c08214a 588
0567016b 589 return true; // no articles
2c08214a
AD
590 }
591
68cccafc 592 _debug("processing articles...", $debug_enabled);
2c08214a 593
6c9f3d4a
AD
594 $tstart = time();
595
19b3992b 596 foreach ($items as $item) {
0500e14c
AD
597 $pdo->beginTransaction();
598
e6532439 599 if (clean($_REQUEST['xdebug']) == 3) {
2c08214a
AD
600 print_r($item);
601 }
602
6c9f3d4a
AD
603 if (ini_get("max_execution_time") > 0 && time() - $tstart >= ini_get("max_execution_time") * 0.7) {
604 _debug("looks like there's too many articles to process at once, breaking out", $debug_enabled);
0500e14c 605 $pdo->commit();
6c9f3d4a
AD
606 break;
607 }
608
0567016b
AD
609 $entry_guid = strip_tags($item->get_id());
610 if (!$entry_guid) $entry_guid = strip_tags($item->get_link());
e6c886bf 611 if (!$entry_guid) $entry_guid = RSSUtils::make_guid_from_title($item->get_title());
0500e14c
AD
612
613 if (!$entry_guid) {
614 $pdo->commit();
615 continue;
616 }
2c08214a 617
3a4c8973
AD
618 $entry_guid = "$owner_uid,$entry_guid";
619
0567016b 620 $entry_guid_hashed = 'SHA1:' . sha1($entry_guid);
5e3d5480 621
68cccafc 622 _debug("guid $entry_guid / $entry_guid_hashed", $debug_enabled);
5e3d5480 623
0567016b 624 $entry_timestamp = strip_tags($item->get_date());
04d2f9c8
AD
625
626 _debug("orig date: " . $item->get_date(), $debug_enabled);
2c08214a 627
30123fe6 628 if ($entry_timestamp == -1 || !$entry_timestamp || $entry_timestamp > time()) {
2c08214a 629 $entry_timestamp = time();
2c08214a
AD
630 }
631
632 $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
633
68cccafc 634 _debug("date $entry_timestamp [$entry_timestamp_fmt]", $debug_enabled);
2c08214a 635
0567016b 636 $entry_title = strip_tags($item->get_title());
1b35d30c 637
5d56d100 638 $entry_link = rewrite_relative_url($site_url, $item->get_link());
2c08214a 639
3bbaf902 640 $entry_language = mb_substr(trim($item->get_language()), 0, 2);
22a866ed 641
68cccafc
AD
642 _debug("title $entry_title", $debug_enabled);
643 _debug("link $entry_link", $debug_enabled);
22a866ed 644 _debug("language $entry_language", $debug_enabled);
2c08214a
AD
645
646 if (!$entry_title) $entry_title = date("Y-m-d H:i:s", $entry_timestamp);;
647
19b3992b
AD
648 $entry_content = $item->get_content();
649 if (!$entry_content) $entry_content = $item->get_description();
2c08214a 650
e6532439 651 if (clean($_REQUEST["xdebug"]) == 2) {
9ec10352 652 print "content: ";
0bc503ff 653 print htmlspecialchars($entry_content);
3c696512 654 print "\n";
2c08214a
AD
655 }
656
0567016b 657 $entry_comments = mb_substr(strip_tags($item->get_comments_url()), 0, 245);
12ff230b 658 $num_comments = (int) $item->get_comments_count();
2c08214a 659
0567016b
AD
660 $entry_author = strip_tags($item->get_author());
661 $entry_guid = mb_substr($entry_guid, 0, 245);
2c08214a 662
68cccafc
AD
663 _debug("author $entry_author", $debug_enabled);
664 _debug("num_comments: $num_comments", $debug_enabled);
ee78f81c 665 _debug("looking for tags...", $debug_enabled);
2c08214a
AD
666
667 // parse <category> entries into tags
668
669 $additional_tags = array();
670
19b3992b 671 $additional_tags_src = $item->get_categories();
2c08214a 672
19b3992b
AD
673 if (is_array($additional_tags_src)) {
674 foreach ($additional_tags_src as $tobj) {
cd07592c 675 array_push($additional_tags, $tobj);
2c08214a 676 }
19b3992b 677 }
2c08214a 678
fa6fbd36 679 $entry_tags = array_unique($additional_tags);
2c08214a 680
5edf4b73 681 for ($i = 0; $i < count($entry_tags); $i++) {
2c08214a
AD
682 $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8');
683
5edf4b73
AD
684 // we don't support numeric tags, let's prefix them
685 if (is_numeric($entry_tags[$i])) $entry_tags[$i] = 't:' . $entry_tags[$i];
686 }
687
ee78f81c
AD
688 _debug("tags found: " . join(",", $entry_tags), $debug_enabled);
689
68cccafc 690 _debug("done collecting data.", $debug_enabled);
2c08214a 691
0567016b
AD
692 $sth = $pdo->prepare("SELECT id, content_hash, lang FROM ttrss_entries
693 WHERE guid = ? OR guid = ?");
694 $sth->execute([$entry_guid, $entry_guid_hashed]);
b30abdad 695
0567016b
AD
696 if ($row = $sth->fetch()) {
697 $base_entry_id = $row["id"];
698 $entry_stored_hash = $row["content_hash"];
4a0da0e5 699 $article_labels = Article::get_article_labels($base_entry_id, $owner_uid);
a8ac7661 700
2ed0d6c4 701 $existing_tags = Article::get_article_tags($base_entry_id, $owner_uid);
a8ac7661 702 $entry_tags = array_unique(array_merge($entry_tags, $existing_tags));
b30abdad 703 } else {
b1840673
AD
704 $base_entry_id = false;
705 $entry_stored_hash = "";
a29fe121 706 $article_labels = array();
b30abdad
AD
707 }
708
455b1401 709 $article = array("owner_uid" => $owner_uid, // read only
b30abdad 710 "guid" => $entry_guid, // read only
59e83455 711 "guid_hashed" => $entry_guid_hashed, // read only
19b3992b
AD
712 "title" => $entry_title,
713 "content" => $entry_content,
714 "link" => $entry_link,
a29fe121 715 "labels" => $article_labels, // current limitation: can add labels to article, can't remove them
19b3992b 716 "tags" => $entry_tags,
e02555c1 717 "author" => $entry_author,
c9299c28 718 "force_catchup" => false, // ugly hack for the time being
6de3a1be 719 "score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed
3318d324 720 "language" => $entry_language,
20d2195f 721 "num_comments" => $num_comments, // read only
f73e03e0
AD
722 "feed" => array("id" => $feed,
723 "fetch_url" => $fetch_url,
babfadbf
J
724 "site_url" => $site_url,
725 "cache_images" => $cache_images)
e6c886bf 726 );
cc85704f 727
b1840673 728 $entry_plugin_data = "";
e6c886bf 729 $entry_current_hash = RSSUtils::calculate_article_hash($article, $pluginhost);
b1840673
AD
730
731 _debug("article hash: $entry_current_hash [stored=$entry_stored_hash]", $debug_enabled);
732
522e8b35 733 if ($entry_current_hash == $entry_stored_hash && !isset($_REQUEST["force_rehash"])) {
b1840673
AD
734 _debug("stored article seems up to date [IID: $base_entry_id], updating timestamp only", $debug_enabled);
735
736 // we keep encountering the entry in feeds, so we need to
737 // update date_updated column so that we don't get horrible
738 // dupes when the entry gets purged and reinserted again e.g.
739 // in the case of SLOW SLOW OMG SLOW updating feeds
740
0567016b
AD
741 $sth = $pdo->prepare("UPDATE ttrss_entries SET date_updated = NOW()
742 WHERE id = ?");
743 $sth->execute([$base_entry_id]);
b1840673 744
0500e14c 745 $pdo->commit();
5bdcb8fd 746 continue;
b1840673
AD
747 }
748
749 _debug("hash differs, applying plugin filters:", $debug_enabled);
750
1ffe3391 751 foreach ($pluginhost->get_hooks(PluginHost::HOOK_ARTICLE_FILTER) as $plugin) {
b1840673
AD
752 _debug("... " . get_class($plugin), $debug_enabled);
753
754 $start = microtime(true);
19b3992b 755 $article = $plugin->hook_article_filter($article);
0084f0d1 756
b1840673
AD
757 _debug("=== " . sprintf("%.4f (sec)", microtime(true) - $start), $debug_enabled);
758
759 $entry_plugin_data .= mb_strtolower(get_class($plugin)) . ",";
cc85704f
AD
760 }
761
e6532439 762 if (clean($_REQUEST["xdebug"]) == 2) {
0bc503ff
AD
763 print "processed content: ";
764 print htmlspecialchars($article["content"]);
765 print "\n";
766 }
767
b1840673
AD
768 _debug("plugin data: $entry_plugin_data", $debug_enabled);
769
35c12dc4 770 // Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077
2b8afd49 771 if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") {
35c12dc4 772 foreach ($article as $k => $v) {
35c37354 773 // i guess we'll have to take the risk of 4byte unicode labels & tags here
dae16f72 774 if (is_string($article[$k])) {
102a0135 775 $article[$k] = RSSUtils::strip_utf8mb4($v);
35c37354 776 }
35c12dc4
AD
777 }
778 }
779
b8774453
AD
780 /* Collect article tags here so we could filter by them: */
781
557d86fe
AD
782 $matched_rules = array();
783
e6c886bf 784 $article_filters = RSSUtils::get_article_filters($filters, $article["title"],
7b55001e 785 $article["content"], $article["link"], $article["author"],
557d86fe 786 $article["tags"], $matched_rules);
b8774453
AD
787
788 if ($debug_enabled) {
557d86fe
AD
789 _debug("matched filter rules: ", $debug_enabled);
790
791 if (count($matched_rules) != 0) {
792 print_r($matched_rules);
793 }
794
795 _debug("filter actions: ", $debug_enabled);
796
b8774453
AD
797 if (count($article_filters) != 0) {
798 print_r($article_filters);
799 }
800 }
801
e6c886bf 802 $plugin_filter_names = RSSUtils::find_article_filters($article_filters, "plugin");
b8774453
AD
803 $plugin_filter_actions = $pluginhost->get_filter_actions();
804
805 if (count($plugin_filter_names) > 0) {
806 _debug("applying plugin filter actions...", $debug_enabled);
807
808 foreach ($plugin_filter_names as $pfn) {
809 list($pfclass,$pfaction) = explode(":", $pfn["param"]);
810
811 if (isset($plugin_filter_actions[$pfclass])) {
812 $plugin = $pluginhost->get_plugin($pfclass);
813
814 _debug("... $pfclass: $pfaction", $debug_enabled);
815
816 if ($plugin) {
817 $start = microtime(true);
818 $article = $plugin->hook_article_filter_action($article, $pfaction);
819
820 _debug("=== " . sprintf("%.4f (sec)", microtime(true) - $start), $debug_enabled);
821 } else {
822 _debug("??? $pfclass: plugin object not found.");
823 }
824 } else {
825 _debug("??? $pfclass: filter plugin not registered.");
826 }
827 }
828 }
829
19b3992b 830 $entry_tags = $article["tags"];
0567016b
AD
831 $entry_title = strip_tags($article["title"]);
832 $entry_author = mb_substr(strip_tags($article["author"]), 0, 245);
833 $entry_link = strip_tags($article["link"]);
f935d98e 834 $entry_content = $article["content"]; // escaped below
c9299c28 835 $entry_force_catchup = $article["force_catchup"];
a29fe121 836 $article_labels = $article["labels"];
6de3a1be 837 $entry_score_modifier = (int) $article["score_modifier"];
0567016b 838 $entry_language = $article["language"];
a29fe121
AD
839
840 if ($debug_enabled) {
841 _debug("article labels:", $debug_enabled);
557d86fe
AD
842
843 if (count($article_labels) != 0) {
844 print_r($article_labels);
845 }
a29fe121 846 }
c9299c28
AD
847
848 _debug("force catchup: $entry_force_catchup");
f935d98e 849
0a3fd79b 850 if ($cache_images && is_writable(CACHE_DIR . '/images'))
e6c886bf 851 RSSUtils::cache_media($entry_content, $site_url, $debug_enabled);
0a3fd79b 852
0567016b
AD
853 $csth = $pdo->prepare("SELECT id FROM ttrss_entries
854 WHERE guid = ? OR guid = ?");
855 $csth->execute([$entry_guid, $entry_guid_hashed]);
9e222305 856
0567016b 857 if (!$row = $csth->fetch()) {
2c08214a 858
07d3431e 859 _debug("base guid [$entry_guid or $entry_guid_hashed] not found, creating...", $debug_enabled);
2c08214a
AD
860
861 // base post entry does not exist, create it
862
0567016b 863 $usth = $pdo->prepare(
2c08214a 864 "INSERT INTO ttrss_entries
0567016b 865 (title,
2c08214a
AD
866 guid,
867 link,
868 updated,
869 content,
870 content_hash,
871 no_orig_date,
872 date_updated,
873 date_entered,
874 comments,
875 num_comments,
b30abdad 876 plugin_data,
6b461797 877 lang,
2c08214a
AD
878 author)
879 VALUES
0567016b 880 (?, ?, ?, ?, ?, ?,
5ba1ddd4 881 false,
2c08214a 882 NOW(),
0567016b
AD
883 ?, ?, ?, ?, ?, ?)");
884
885 $usth->execute([$entry_title,
886 $entry_guid_hashed,
887 $entry_link,
888 $entry_timestamp_fmt,
93e70e36 889 "$entry_content",
0567016b
AD
890 $entry_current_hash,
891 $date_feed_processed,
892 $entry_comments,
187abfe7 893 (int)$num_comments,
0567016b 894 $entry_plugin_data,
93e70e36
AD
895 "$entry_language",
896 "$entry_author"]);
e8291805 897
2c08214a
AD
898 }
899
0567016b 900 $csth->execute([$entry_guid, $entry_guid_hashed]);
2c08214a
AD
901
902 $entry_ref_id = 0;
903 $entry_int_id = 0;
904
0567016b 905 if ($row = $csth->fetch()) {
2c08214a 906
68cccafc 907 _debug("base guid found, checking for user record", $debug_enabled);
2c08214a 908
0567016b 909 $ref_id = $row['id'];
2c08214a
AD
910 $entry_ref_id = $ref_id;
911
e6c886bf 912 if (RSSUtils::find_article_filter($article_filters, "filter")) {
0500e14c 913 $pdo->commit();
2c08214a
AD
914 continue;
915 }
916
e6c886bf 917 $score = RSSUtils::calculate_article_score($article_filters) + $entry_score_modifier;
2c08214a 918
6de3a1be 919 _debug("initial score: $score [including plugin modifier: $entry_score_modifier]", $debug_enabled);
2c08214a 920
4f186b1f
AD
921 // check for user post link to main table
922
0567016b
AD
923 $sth = $pdo->prepare("SELECT ref_id, int_id FROM ttrss_user_entries WHERE
924 ref_id = ? AND owner_uid = ?");
925 $sth->execute([$ref_id, $owner_uid]);
2c08214a
AD
926
927 // okay it doesn't exist - create user entry
0567016b
AD
928 if ($row = $sth->fetch()) {
929 $entry_ref_id = $row["ref_id"];
930 $entry_int_id = $row["int_id"];
2c08214a 931
0567016b
AD
932 _debug("user record FOUND: RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
933 } else {
934
68cccafc 935 _debug("user record not found, creating...", $debug_enabled);
2c08214a 936
e6c886bf 937 if ($score >= -500 && !RSSUtils::find_article_filter($article_filters, 'catchup') && !$entry_force_catchup) {
0567016b
AD
938 $unread = 1;
939 $last_read_qpart = null;
2c08214a 940 } else {
0567016b 941 $unread = 0;
d4c05d0b 942 $last_read_qpart = date("Y-m-d H:i"); // we can't use NOW() here because it gets quoted
2c08214a
AD
943 }
944
e6c886bf 945 if (RSSUtils::find_article_filter($article_filters, 'mark') || $score > 1000) {
0567016b 946 $marked = 1;
2c08214a 947 } else {
0567016b 948 $marked = 0;
2c08214a
AD
949 }
950
e6c886bf 951 if (RSSUtils::find_article_filter($article_filters, 'publish')) {
0567016b 952 $published = 1;
2c08214a 953 } else {
0567016b 954 $published = 0;
2c08214a
AD
955 }
956
26ad257d 957 $last_marked = ($marked == 1) ? 'NOW()' : 'NULL';
958 $last_published = ($published == 1) ? 'NOW()' : 'NULL';
7873d588 959
0567016b 960 $sth = $pdo->prepare(
2c08214a
AD
961 "INSERT INTO ttrss_user_entries
962 (ref_id, owner_uid, feed_id, unread, last_read, marked,
7873d588
AD
963 published, score, tag_cache, label_cache, uuid,
964 last_marked, last_published)
aa16334f 965 VALUES (?, ?, ?, ?, ?, ?, ?, ?, '', '', '', ".$last_marked.", ".$last_published.")");
2c08214a 966
0567016b 967 $sth->execute([$ref_id, $owner_uid, $feed, $unread, $last_read_qpart, $marked,
aa16334f 968 $published, $score]);
2c08214a 969
0567016b
AD
970 $sth = $pdo->prepare("SELECT int_id FROM ttrss_user_entries WHERE
971 ref_id = ? AND owner_uid = ? AND
972 feed_id = ? LIMIT 1");
2c08214a 973
0567016b
AD
974 $sth->execute([$ref_id, $owner_uid, $feed]);
975
976 if ($row = $sth->fetch())
977 $entry_int_id = $row['int_id'];
2c08214a
AD
978 }
979
0567016b 980 _debug("resulting RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
2c08214a 981
963c2264
AD
982 if (DB_TYPE == "pgsql")
983 $tsvector_qpart = "tsvector_combined = to_tsvector(:ts_lang, :ts_content),";
984 else
e854442e 985 $tsvector_qpart = "";
49a888ec 986
0567016b 987 $sth = $pdo->prepare("UPDATE ttrss_entries
49a888ec 988 SET title = :title,
e854442e 989 $tsvector_qpart
49a888ec
AD
990 content = :content,
991 content_hash = :content_hash,
992 updated = :updated,
66fe33e7 993 date_updated = NOW(),
49a888ec
AD
994 num_comments = :num_comments,
995 plugin_data = :plugin_data,
996 author = :author,
997 lang = :lang
998 WHERE id = :id");
999
963c2264 1000 $params = [":title" => $entry_title,
93e70e36 1001 ":content" => "$entry_content",
49a888ec
AD
1002 ":content_hash" => $entry_current_hash,
1003 ":updated" => $entry_timestamp_fmt,
1004 ":num_comments" => (int)$num_comments,
1005 ":plugin_data" => $entry_plugin_data,
93e70e36 1006 ":author" => "$entry_author",
49a888ec 1007 ":lang" => $entry_language,
963c2264
AD
1008 ":id" => $ref_id];
1009
1010 if (DB_TYPE == "pgsql") {
1011 $params[":ts_lang"] = $feed_language;
6e6c3a87 1012 $params[":ts_content"] = mb_substr(strip_tags($entry_title . " " . $entry_content), 0, 900000);
963c2264
AD
1013 }
1014
1015 $sth->execute($params);
b1840673 1016
59e83455 1017 // update aux data
0567016b
AD
1018 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1019 SET score = ? WHERE ref_id = ?");
1020 $sth->execute([$score, $ref_id]);
59e83455 1021
b1840673 1022 if ($mark_unread_on_update) {
24e6ff5d
AD
1023 _debug("article updated, marking unread as requested.", $debug_enabled);
1024
0567016b
AD
1025 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1026 SET last_read = null, unread = true WHERE ref_id = ?");
1027 $sth->execute([$ref_id]);
2c08214a
AD
1028 }
1029 }
1030
a29fe121
AD
1031 _debug("assigning labels [other]...", $debug_enabled);
1032
1033 foreach ($article_labels as $label) {
7c9b5a3f 1034 Labels::add_article($entry_ref_id, $label[1], $owner_uid);
a29fe121
AD
1035 }
1036
1037 _debug("assigning labels [filters]...", $debug_enabled);
2c08214a 1038
e6c886bf 1039 RSSUtils::assign_article_to_label_filters($entry_ref_id, $article_filters,
b24504b1 1040 $owner_uid, $article_labels);
2c08214a 1041
68cccafc 1042 _debug("looking for enclosures...", $debug_enabled);
2c08214a
AD
1043
1044 // enclosures
1045
1046 $enclosures = array();
1047
19b3992b 1048 $encs = $item->get_enclosures();
2c08214a 1049
19b3992b
AD
1050 if (is_array($encs)) {
1051 foreach ($encs as $e) {
1052 $e_item = array(
86e53429
AD
1053 rewrite_relative_url($site_url, $e->link),
1054 $e->type, $e->length, $e->title, $e->width, $e->height);
102a0135
AD
1055
1056 // Yet another episode of "mysql utf8_general_ci is gimped"
2b8afd49 1057 if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") {
7f4a4045
AD
1058 for ($i = 0; $i < count($e_item); $i++) {
1059 if (is_string($e_item[$i])) {
1060 $e_item[$i] = RSSUtils::strip_utf8mb4($e_item[$i]);
1061 }
1062 }
102a0135
AD
1063 }
1064
7f4a4045 1065 array_push($enclosures, $e_item);
2c08214a
AD
1066 }
1067 }
1068
388d4dfa 1069 if ($cache_images && is_writable(CACHE_DIR . '/images'))
e6c886bf 1070 RSSUtils::cache_enclosures($enclosures, $site_url, $debug_enabled);
388d4dfa 1071
2c08214a 1072 if ($debug_enabled) {
68cccafc 1073 _debug("article enclosures:", $debug_enabled);
2c08214a
AD
1074 print_r($enclosures);
1075 }
1076
0567016b
AD
1077 $esth = $pdo->prepare("SELECT id FROM ttrss_enclosures
1078 WHERE content_url = ? AND post_id = ?");
2c08214a 1079
0567016b
AD
1080 $usth = $pdo->prepare("INSERT INTO ttrss_enclosures
1081 (content_url, content_type, title, duration, post_id, width, height) VALUES
1082 (?, ?, ?, ?, ?, ?, ?)");
5c54e683 1083
2c08214a 1084 foreach ($enclosures as $enc) {
0567016b
AD
1085 $enc_url = $enc[0];
1086 $enc_type = $enc[1];
0500e14c 1087 $enc_dur = (int)$enc[2];
0567016b 1088 $enc_title = $enc[3];
523bd90b
FE
1089 $enc_width = intval($enc[4]);
1090 $enc_height = intval($enc[5]);
2c08214a 1091
0567016b 1092 $esth->execute([$enc_url, $entry_ref_id]);
2c08214a 1093
0567016b
AD
1094 if (!$esth->fetch()) {
1095 $usth->execute([$enc_url, $enc_type, (string)$enc_title, $enc_dur, $entry_ref_id, $enc_width, $enc_height]);
2c08214a
AD
1096 }
1097 }
1098
2c08214a
AD
1099 // check for manual tags (we have to do it here since they're loaded from filters)
1100
1101 foreach ($article_filters as $f) {
6aff7845 1102 if ($f["type"] == "tag") {
2c08214a 1103
6aff7845 1104 $manual_tags = trim_array(explode(",", $f["param"]));
2c08214a
AD
1105
1106 foreach ($manual_tags as $tag) {
1107 if (tag_is_valid($tag)) {
1108 array_push($entry_tags, $tag);
1109 }
1110 }
1111 }
1112 }
1113
1114 // Skip boring tags
1115
6322ac79 1116 $boring_tags = trim_array(explode(",", mb_strtolower(get_pref(
2c08214a
AD
1117 'BLACKLISTED_TAGS', $owner_uid, ''), 'utf-8')));
1118
1119 $filtered_tags = array();
1120 $tags_to_cache = array();
1121
1122 if ($entry_tags && is_array($entry_tags)) {
1123 foreach ($entry_tags as $tag) {
1124 if (array_search($tag, $boring_tags) === false) {
1125 array_push($filtered_tags, $tag);
1126 }
1127 }
1128 }
1129
1130 $filtered_tags = array_unique($filtered_tags);
1131
1132 if ($debug_enabled) {
68cccafc 1133 _debug("filtered article tags:", $debug_enabled);
2c08214a
AD
1134 print_r($filtered_tags);
1135 }
1136
1137 // Save article tags in the database
1138
1139 if (count($filtered_tags) > 0) {
1140
0567016b
AD
1141 $tsth = $pdo->prepare("SELECT id FROM ttrss_tags
1142 WHERE tag_name = ? AND post_int_id = ? AND
1143 owner_uid = ? LIMIT 1");
1144
1145 $usth = $pdo->prepare("INSERT INTO ttrss_tags
1146 (owner_uid,tag_name,post_int_id)
1147 VALUES (?, ?, ?)");
2c08214a
AD
1148
1149 foreach ($filtered_tags as $tag) {
1150
1151 $tag = sanitize_tag($tag);
2c08214a
AD
1152
1153 if (!tag_is_valid($tag)) continue;
1154
0567016b 1155 $tsth->execute([$tag, $entry_int_id, $owner_uid]);
2c08214a 1156
0567016b
AD
1157 if (!$tsth->fetch()) {
1158 $usth->execute([$owner_uid, $tag, $entry_int_id]);
e6c886bf 1159 }
2c08214a
AD
1160
1161 array_push($tags_to_cache, $tag);
1162 }
1163
1164 /* update the cache */
1165
1166 $tags_to_cache = array_unique($tags_to_cache);
1167
0567016b 1168 $tags_str = join(",", $tags_to_cache);
2c08214a 1169
0567016b
AD
1170 $tsth = $pdo->prepare("UPDATE ttrss_user_entries
1171 SET tag_cache = ? WHERE ref_id = ?
1172 AND owner_uid = ?");
1173 $tsth->execute([$tags_str, $entry_ref_id, $owner_uid]);
2c08214a
AD
1174 }
1175
68cccafc 1176 _debug("article processed", $debug_enabled);
0500e14c
AD
1177
1178 $pdo->commit();
2c08214a
AD
1179 }
1180
68cccafc 1181 _debug("purging feed...", $debug_enabled);
2c08214a 1182
a42c55f0 1183 purge_feed($feed, 0, $debug_enabled);
2c08214a 1184
0567016b
AD
1185 $sth = $pdo->prepare("UPDATE ttrss_feeds
1186 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
1187 $sth->execute([$feed]);
2c08214a
AD
1188
1189 } else {
1190
0567016b 1191 $error_msg = mb_substr($rss->error(), 0, 245);
2c08214a 1192
4ad04ee2
AD
1193 _debug("fetch error: $error_msg", $debug_enabled);
1194
1195 if (count($rss->errors()) > 1) {
1196 foreach ($rss->errors() as $error) {
1197 _debug("+ $error");
1198 }
1199 }
2c08214a 1200
0567016b
AD
1201 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
1202 last_updated = NOW(), last_unconditional = NOW() WHERE id = ?");
1203 $sth->execute([$error_msg, $feed]);
2c08214a 1204
88edaa93 1205 unset($rss);
0567016b 1206 return false;
88edaa93 1207 }
2c08214a 1208
68cccafc 1209 _debug("done", $debug_enabled);
88edaa93 1210
7b55001e 1211 return true;
2c08214a
AD
1212 }
1213
e6c886bf 1214 static function cache_enclosures($enclosures, $site_url, $debug) {
388d4dfa
AD
1215 foreach ($enclosures as $enc) {
1216
1217 if (preg_match("/(image|audio|video)/", $enc[1])) {
1218
1219 $src = rewrite_relative_url($site_url, $enc[0]);
1220
1221 $local_filename = CACHE_DIR . "/images/" . sha1($src);
1222
1223 if ($debug) _debug("cache_enclosures: downloading: $src to $local_filename");
1224
1225 if (!file_exists($local_filename)) {
1226 $file_content = fetch_file_contents($src);
1227
6fd03996 1228 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
388d4dfa
AD
1229 file_put_contents($local_filename, $file_content);
1230 }
1231 } else {
1232 touch($local_filename);
1233 }
1234 }
1235 }
1236 }
1237
e6c886bf 1238 static function cache_media($html, $site_url, $debug) {
3c696512
AD
1239 libxml_use_internal_errors(true);
1240
1241 $charset_hack = '<head>
1242 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
1243 </head>';
1244
1245 $doc = new DOMDocument();
1246 $doc->loadHTML($charset_hack . $html);
1247 $xpath = new DOMXPath($doc);
1248
388d4dfa 1249 $entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])');
3c696512
AD
1250
1251 foreach ($entries as $entry) {
5edd605a 1252 if ($entry->hasAttribute('src') && strpos($entry->getAttribute('src'), "data:") !== 0) {
3c696512
AD
1253 $src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
1254
41bead9b 1255 $local_filename = CACHE_DIR . "/images/" . sha1($src);
3c696512 1256
41bead9b 1257 if ($debug) _debug("cache_media: downloading: $src to $local_filename");
3c696512
AD
1258
1259 if (!file_exists($local_filename)) {
1260 $file_content = fetch_file_contents($src);
1261
6fd03996 1262 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
3c696512
AD
1263 file_put_contents($local_filename, $file_content);
1264 }
4a27966e
J
1265 } else {
1266 touch($local_filename);
3c696512 1267 }
3c696512
AD
1268 }
1269 }
3c696512
AD
1270 }
1271
e6c886bf 1272 static function expire_error_log($debug) {
e2261e17
AD
1273 if ($debug) _debug("Removing old error log entries...");
1274
0567016b
AD
1275 $pdo = Db::pdo();
1276
e2261e17 1277 if (DB_TYPE == "pgsql") {
0567016b 1278 $pdo->query("DELETE FROM ttrss_error_log
e2261e17
AD
1279 WHERE created_at < NOW() - INTERVAL '7 days'");
1280 } else {
0567016b 1281 $pdo->query("DELETE FROM ttrss_error_log
e2261e17
AD
1282 WHERE created_at < DATE_SUB(NOW(), INTERVAL 7 DAY)");
1283 }
e2261e17
AD
1284 }
1285
e6c886bf 1286 static function expire_lock_files($debug) {
65465085 1287 //if ($debug) _debug("Removing old lock files...");
2a91b6ff
AD
1288
1289 $num_deleted = 0;
1290
1291 if (is_writable(LOCK_DIRECTORY)) {
1292 $files = glob(LOCK_DIRECTORY . "/*.lock");
1293
1294 if ($files) {
1295 foreach ($files as $file) {
11344971 1296 if (!file_is_locked(basename($file)) && time() - filemtime($file) > 86400*2) {
2a91b6ff
AD
1297 unlink($file);
1298 ++$num_deleted;
1299 }
1300 }
1301 }
1302 }
1303
65465085 1304 if ($debug) _debug("Removed $num_deleted old lock files.");
2a91b6ff
AD
1305 }
1306
e6c886bf 1307 static function expire_cached_files($debug) {
342e8a9e 1308 foreach (array("simplepie", "feeds", "images", "export", "upload") as $dir) {
3c696512 1309 $cache_dir = CACHE_DIR . "/$dir";
2c08214a 1310
65465085 1311// if ($debug) _debug("Expiring $cache_dir");
2c08214a 1312
3c696512
AD
1313 $num_deleted = 0;
1314
1315 if (is_writable($cache_dir)) {
1316 $files = glob("$cache_dir/*");
1317
2a91b6ff 1318 if ($files) {
2ab20c31 1319 foreach ($files as $file) {
6fd03996 1320 if (time() - filemtime($file) > 86400*CACHE_MAX_DAYS) {
2ab20c31 1321 unlink($file);
3c696512 1322
2ab20c31
AD
1323 ++$num_deleted;
1324 }
3c696512
AD
1325 }
1326 }
2a91b6ff 1327 }
3c696512 1328
65465085 1329 if ($debug) _debug("$cache_dir: removed $num_deleted files.");
3c696512
AD
1330 }
1331 }
2c08214a 1332
a3e0bdcf 1333 /**
e6c886bf
AD
1334 * Source: http://www.php.net/manual/en/function.parse-url.php#104527
1335 * Returns the url query as associative array
1336 *
1337 * @param string query
1338 * @return array params
1339 */
1340 static function convertUrlQuery($query) {
a3e0bdcf
AD
1341 $queryParts = explode('&', $query);
1342
1343 $params = array();
1344
1345 foreach ($queryParts as $param) {
1346 $item = explode('=', $param);
1347 $params[$item[0]] = $item[1];
1348 }
1349
1350 return $params;
1351 }
92c14e9d 1352
e6c886bf 1353 static function get_article_filters($filters, $title, $content, $link, $author, $tags, &$matched_rules = false) {
92c14e9d
AD
1354 $matches = array();
1355
1356 foreach ($filters as $filter) {
1357 $match_any_rule = $filter["match_any_rule"];
a3a896a1 1358 $inverse = $filter["inverse"];
92c14e9d
AD
1359 $filter_match = false;
1360
1361 foreach ($filter["rules"] as $rule) {
1362 $match = false;
ffa1bd7b 1363 $reg_exp = str_replace('/', '\/', $rule["reg_exp"]);
a3a896a1 1364 $rule_inverse = $rule["inverse"];
92c14e9d
AD
1365
1366 if (!$reg_exp)
1367 continue;
1368
1369 switch ($rule["type"]) {
e6c886bf
AD
1370 case "title":
1371 $match = @preg_match("/$reg_exp/iu", $title);
1372 break;
1373 case "content":
1374 // we don't need to deal with multiline regexps
1375 $content = preg_replace("/[\r\n\t]/", "", $content);
d03ae73e 1376
e6c886bf
AD
1377 $match = @preg_match("/$reg_exp/iu", $content);
1378 break;
1379 case "both":
1380 // we don't need to deal with multiline regexps
1381 $content = preg_replace("/[\r\n\t]/", "", $content);
d03ae73e 1382
e6c886bf
AD
1383 $match = (@preg_match("/$reg_exp/iu", $title) || @preg_match("/$reg_exp/iu", $content));
1384 break;
1385 case "link":
1386 $match = @preg_match("/$reg_exp/iu", $link);
1387 break;
1388 case "author":
1389 $match = @preg_match("/$reg_exp/iu", $author);
1390 break;
1391 case "tag":
1392 foreach ($tags as $tag) {
1393 if (@preg_match("/$reg_exp/iu", $tag)) {
1394 $match = true;
1395 break;
1396 }
7b80b5e1 1397 }
e6c886bf 1398 break;
92c14e9d
AD
1399 }
1400
a3a896a1
AD
1401 if ($rule_inverse) $match = !$match;
1402
92c14e9d
AD
1403 if ($match_any_rule) {
1404 if ($match) {
1405 $filter_match = true;
1406 break;
1407 }
1408 } else {
1409 $filter_match = $match;
1410 if (!$match) {
1411 break;
1412 }
1413 }
1414 }
1415
a3a896a1
AD
1416 if ($inverse) $filter_match = !$filter_match;
1417
92c14e9d 1418 if ($filter_match) {
557d86fe
AD
1419 if (is_array($matched_rules)) array_push($matched_rules, $rule);
1420
92c14e9d
AD
1421 foreach ($filter["actions"] AS $action) {
1422 array_push($matches, $action);
5e736e45
AD
1423
1424 // if Stop action encountered, perform no further processing
fd3e5e8d 1425 if (isset($action["type"]) && $action["type"] == "stop") return $matches;
92c14e9d
AD
1426 }
1427 }
1428 }
1429
1430 return $matches;
1431 }
1432
e6c886bf 1433 static function find_article_filter($filters, $filter_name) {
92c14e9d
AD
1434 foreach ($filters as $f) {
1435 if ($f["type"] == $filter_name) {
1436 return $f;
1437 };
1438 }
1439 return false;
1440 }
1441
e6c886bf 1442 static function find_article_filters($filters, $filter_name) {
92c14e9d
AD
1443 $results = array();
1444
1445 foreach ($filters as $f) {
1446 if ($f["type"] == $filter_name) {
1447 array_push($results, $f);
1448 };
1449 }
1450 return $results;
1451 }
1452
e6c886bf 1453 static function calculate_article_score($filters) {
92c14e9d
AD
1454 $score = 0;
1455
1456 foreach ($filters as $f) {
1457 if ($f["type"] == "score") {
1458 $score += $f["param"];
1459 };
1460 }
1461 return $score;
1462 }
1463
e6c886bf 1464 static function labels_contains_caption($labels, $caption) {
b24504b1
AD
1465 foreach ($labels as $label) {
1466 if ($label[1] == $caption) {
1467 return true;
1468 }
1469 }
1470
1471 return false;
1472 }
1473
e6c886bf 1474 static function assign_article_to_label_filters($id, $filters, $owner_uid, $article_labels) {
92c14e9d
AD
1475 foreach ($filters as $f) {
1476 if ($f["type"] == "label") {
e6c886bf 1477 if (!RSSUtils::labels_contains_caption($article_labels, $f["param"])) {
7c9b5a3f 1478 Labels::add_article($id, $f["param"], $owner_uid);
b24504b1
AD
1479 }
1480 }
92c14e9d
AD
1481 }
1482 }
87764a50 1483
e6c886bf 1484 static function make_guid_from_title($title) {
87d7e850
AD
1485 return preg_replace("/[ \"\',.:;]/", "-",
1486 mb_strtolower(strip_tags($title), 'utf-8'));
1487 }
1488
e6c886bf 1489 static function cleanup_counters_cache($debug) {
0567016b
AD
1490 $pdo = Db::pdo();
1491
1492 $res = $pdo->query("DELETE FROM ttrss_counters_cache
168cf351
AD
1493 WHERE feed_id > 0 AND
1494 (SELECT COUNT(id) FROM ttrss_feeds WHERE
1495 id = feed_id AND
1496 ttrss_counters_cache.owner_uid = ttrss_feeds.owner_uid) = 0");
168cf351 1497
0567016b
AD
1498 $frows = $res->rowCount();
1499
1500 $res = $pdo->query("DELETE FROM ttrss_cat_counters_cache
168cf351
AD
1501 WHERE feed_id > 0 AND
1502 (SELECT COUNT(id) FROM ttrss_feed_categories WHERE
1503 id = feed_id AND
1504 ttrss_cat_counters_cache.owner_uid = ttrss_feed_categories.owner_uid) = 0");
0567016b
AD
1505
1506 $crows = $res->rowCount();
168cf351 1507
7b55001e 1508 if ($debug) _debug("Removed $frows (feeds) $crows (cats) orphaned counter cache entries.");
168cf351
AD
1509 }
1510
e6c886bf 1511 static function housekeeping_user($owner_uid) {
5cbd1fe8
AD
1512 $tmph = new PluginHost();
1513
1514 load_user_plugins($owner_uid, $tmph);
1515
1516 $tmph->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
1517 }
1518
e6c886bf
AD
1519 static function housekeeping_common($debug) {
1520 RSSUtils::expire_cached_files($debug);
1521 RSSUtils::expire_lock_files($debug);
1522 RSSUtils::expire_error_log($debug);
e2cf81e2 1523
e6c886bf 1524 $count = RSSUtils::update_feedbrowser_cache();
e2cf81e2
AD
1525 _debug("Feedbrowser updated, $count feeds processed.");
1526
a230bf88 1527 Article::purge_orphans( true);
e6c886bf 1528 RSSUtils::cleanup_counters_cache($debug);
e2cf81e2 1529
9b736a20
AD
1530 //$rc = cleanup_tags( 14, 50000);
1531 //_debug("Cleaned $rc cached tags.");
8e470220 1532
00f22824 1533 PluginHost::getInstance()->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
e2cf81e2 1534 }
ea79a0e0 1535
e6c886bf
AD
1536 static function check_feed_favicon($site_url, $feed) {
1537 # print "FAVICON [$site_url]: $favicon_url\n";
a230bf88
AD
1538
1539 $icon_file = ICONS_DIR . "/$feed.ico";
1540
1541 if (!file_exists($icon_file)) {
1542 $favicon_url = get_favicon_url($site_url);
1543
1544 if ($favicon_url) {
1545 // Limiting to "image" type misses those served with text/plain
1546 $contents = fetch_file_contents($favicon_url); // , "image");
1547
1548 if ($contents) {
1549 // Crude image type matching.
1550 // Patterns gleaned from the file(1) source code.
1551 if (preg_match('/^\x00\x00\x01\x00/', $contents)) {
1552 // 0 string \000\000\001\000 MS Windows icon resource
1553 //error_log("check_feed_favicon: favicon_url=$favicon_url isa MS Windows icon resource");
1554 }
1555 elseif (preg_match('/^GIF8/', $contents)) {
1556 // 0 string GIF8 GIF image data
1557 //error_log("check_feed_favicon: favicon_url=$favicon_url isa GIF image");
1558 }
1559 elseif (preg_match('/^\x89PNG\x0d\x0a\x1a\x0a/', $contents)) {
1560 // 0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data
1561 //error_log("check_feed_favicon: favicon_url=$favicon_url isa PNG image");
1562 }
1563 elseif (preg_match('/^\xff\xd8/', $contents)) {
1564 // 0 beshort 0xffd8 JPEG image data
1565 //error_log("check_feed_favicon: favicon_url=$favicon_url isa JPG image");
1566 }
f9ad33c2
GG
1567 elseif (preg_match('/^BM/', $contents)) {
1568 // 0 string BM PC bitmap (OS2, Windows BMP files)
1569 //error_log("check_feed_favicon, favicon_url=$favicon_url isa BMP image");
1570 }
a230bf88
AD
1571 else {
1572 //error_log("check_feed_favicon: favicon_url=$favicon_url isa UNKNOWN type");
1573 $contents = "";
1574 }
1575 }
1576
1577 if ($contents) {
1578 $fp = @fopen($icon_file, "w");
1579
1580 if ($fp) {
1581 fwrite($fp, $contents);
1582 fclose($fp);
1583 chmod($icon_file, 0644);
1584 }
1585 }
1586 }
1587 return $icon_file;
1588 }
1589 }
e6c886bf
AD
1590
1591
1592
bec5ba93 1593}