]> git.wh0rd.org - tt-rss.git/blame - classes/rssutils.php
rebase translations
[tt-rss.git] / classes / rssutils.php
CommitLineData
2c08214a 1<?php
e6c886bf
AD
2class RSSUtils {
3 static function calculate_article_hash($article, $pluginhost) {
af244f92
AD
4 $tmp = "";
5
6 foreach ($article as $k => $v) {
7 if ($k != "feed" && isset($v)) {
24e6ff5d
AD
8 $x = strip_tags(is_array($v) ? implode(",", $v) : $v);
9
10 //_debug("$k:" . sha1($x) . ":" . htmlspecialchars($x), true);
11
12 $tmp .= sha1("$k:" . sha1($x));
af244f92
AD
13 }
14 }
15
eb16bd9f 16 return sha1(implode(",", $pluginhost->get_plugin_names()) . $tmp);
b1840673
AD
17 }
18
102a0135
AD
19 // Strips utf8mb4 characters (i.e. emoji) for mysql
20 static function strip_utf8mb4($str) {
7f4a4045
AD
21 return preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $str);
22 }
102a0135 23
e6c886bf 24 static function update_feedbrowser_cache() {
79178062 25
afcb105f
AD
26 $pdo = Db::pdo();
27
28 $sth = $pdo->query("SELECT feed_url, site_url, title, COUNT(id) AS subscribers
45378752
LD
29 FROM ttrss_feeds WHERE feed_url NOT IN (SELECT feed_url FROM ttrss_feeds
30 WHERE private IS true OR auth_login != '' OR auth_pass != '' OR feed_url LIKE '%:%@%/%')
79178062
AD
31 GROUP BY feed_url, site_url, title ORDER BY subscribers DESC LIMIT 1000");
32
afcb105f 33 $pdo->beginTransaction();
79178062 34
afcb105f 35 $pdo->query("DELETE FROM ttrss_feedbrowser_cache");
79178062
AD
36
37 $count = 0;
38
afcb105f
AD
39 while ($line = $sth->fetch()) {
40
0567016b
AD
41 $subscribers = $line["subscribers"];
42 $feed_url = $line["feed_url"];
43 $title = $line["title"];
44 $site_url = $line["site_url"];
79178062 45
afcb105f
AD
46 $tmph = $pdo->prepare("SELECT subscribers FROM
47 ttrss_feedbrowser_cache WHERE feed_url = ?");
48 $tmph->execute([$feed_url]);
49
50 if (!$tmph->fetch()) {
79178062 51
afcb105f
AD
52 $tmph = $pdo->prepare("INSERT INTO ttrss_feedbrowser_cache
53 (feed_url, site_url, title, subscribers)
54 VALUES
55 (?, ?, ?, ?)");
79178062 56
afcb105f 57 $tmph->execute([$feed_url, $site_url, $title, $subscribers]);
79178062
AD
58
59 ++$count;
60
61 }
62
63 }
64
afcb105f 65 $pdo->commit();
79178062
AD
66
67 return $count;
68
69 }
70
e6c886bf 71 static function update_daemon_common($limit = DAEMON_FEED_LIMIT, $debug = true) {
6322ac79 72 $schema_version = get_schema_version();
857efe49
AD
73
74 if ($schema_version != SCHEMA_VERSION) {
75 die("Schema version is wrong, please upgrade the database.\n");
76 }
77
afcb105f
AD
78 $pdo = Db::pdo();
79
09e8bdfd 80 if (!SINGLE_USER_MODE && DAEMON_UPDATE_LOGIN_LIMIT > 0) {
2c08214a
AD
81 if (DB_TYPE == "pgsql") {
82 $login_thresh_qpart = "AND ttrss_users.last_login >= NOW() - INTERVAL '".DAEMON_UPDATE_LOGIN_LIMIT." days'";
83 } else {
84 $login_thresh_qpart = "AND ttrss_users.last_login >= DATE_SUB(NOW(), INTERVAL ".DAEMON_UPDATE_LOGIN_LIMIT." DAY)";
85 }
86 } else {
87 $login_thresh_qpart = "";
88 }
89
2c08214a
AD
90 if (DB_TYPE == "pgsql") {
91 $update_limit_qpart = "AND ((
92 ttrss_feeds.update_interval = 0
ee0542ce 93 AND ttrss_user_prefs.value != '-1'
2c08214a
AD
94 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_user_prefs.value || ' minutes') AS INTERVAL)
95 ) OR (
96 ttrss_feeds.update_interval > 0
97 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_feeds.update_interval || ' minutes') AS INTERVAL)
f08426e3
AD
98 ) OR (ttrss_feeds.last_updated IS NULL
99 AND ttrss_user_prefs.value != '-1')
100 OR (last_updated = '1970-01-01 00:00:00'
101 AND ttrss_user_prefs.value != '-1'))";
2c08214a
AD
102 } else {
103 $update_limit_qpart = "AND ((
104 ttrss_feeds.update_interval = 0
ee0542ce 105 AND ttrss_user_prefs.value != '-1'
2c08214a
AD
106 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL CONVERT(ttrss_user_prefs.value, SIGNED INTEGER) MINUTE)
107 ) OR (
108 ttrss_feeds.update_interval > 0
109 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL ttrss_feeds.update_interval MINUTE)
f08426e3
AD
110 ) OR (ttrss_feeds.last_updated IS NULL
111 AND ttrss_user_prefs.value != '-1')
112 OR (last_updated = '1970-01-01 00:00:00'
113 AND ttrss_user_prefs.value != '-1'))";
2c08214a
AD
114 }
115
116 // Test if feed is currently being updated by another process.
117 if (DB_TYPE == "pgsql") {
566417c4 118 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '10 minutes')";
2c08214a 119 } else {
566417c4 120 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 10 MINUTE))";
2c08214a
AD
121 }
122
93af11cb 123 $query_limit = $limit ? sprintf("LIMIT %d", $limit) : "";
2c08214a 124
98070db0
TK
125 // Update the least recently updated feeds first
126 $query_order = "ORDER BY last_updated";
127 if (DB_TYPE == "pgsql") $query_order .= " NULLS FIRST";
128
fce451a4 129 $query = "SELECT DISTINCT ttrss_feeds.feed_url, ttrss_feeds.last_updated
2c08214a
AD
130 FROM
131 ttrss_feeds, ttrss_users, ttrss_user_prefs
f4ae0f05 132 WHERE
2c08214a 133 ttrss_feeds.owner_uid = ttrss_users.id
f08426e3 134 AND ttrss_user_prefs.profile IS NULL
2c08214a
AD
135 AND ttrss_users.id = ttrss_user_prefs.owner_uid
136 AND ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL'
137 $login_thresh_qpart $update_limit_qpart
1c4421fc 138 $updstart_thresh_qpart
98070db0 139 $query_order $query_limit";
fce451a4 140
afcb105f 141 $res = $pdo->query($query);
2c08214a 142
2c08214a 143 $feeds_to_update = array();
afcb105f 144 while ($line = $res->fetch()) {
93af11cb 145 array_push($feeds_to_update, $line['feed_url']);
2c08214a
AD
146 }
147
afcb105f
AD
148 if ($debug) _debug(sprintf("Scheduled %d feeds to update...", count($feeds_to_update)));
149
93af11cb
AD
150 // Update last_update_started before actually starting the batch
151 // in order to minimize collision risk for parallel daemon tasks
152 if (count($feeds_to_update) > 0) {
afcb105f 153 $feeds_qmarks = arr_qmarks($feeds_to_update);
1c4421fc 154
afcb105f
AD
155 $tmph = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
156 WHERE feed_url IN ($feeds_qmarks)");
157 $tmph->execute($feeds_to_update);
2c08214a
AD
158 }
159
8292d05b 160 $nf = 0;
2d9c5684 161 $bstarted = microtime(true);
8292d05b 162
5cbd1fe8
AD
163 $batch_owners = array();
164
afcb105f
AD
165 // since we have the data cached, we can deal with other feeds with the same url
166 $usth = $pdo->prepare("SELECT DISTINCT ttrss_feeds.id,last_updated,ttrss_feeds.owner_uid
ee0542ce
AD
167 FROM ttrss_feeds, ttrss_users, ttrss_user_prefs WHERE
168 ttrss_user_prefs.owner_uid = ttrss_feeds.owner_uid AND
169 ttrss_users.id = ttrss_user_prefs.owner_uid AND
170 ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL' AND
f08426e3 171 ttrss_user_prefs.profile IS NULL AND
afcb105f 172 feed_url = ?
9e84bab4 173 $update_limit_qpart
1c4421fc 174 $login_thresh_qpart
5929a0c1 175 ORDER BY ttrss_feeds.id $query_limit");
1c4421fc 176
afcb105f
AD
177 foreach ($feeds_to_update as $feed) {
178 if($debug) _debug("Base feed: $feed");
179
180 $usth->execute([$feed]);
181 //update_rss_feed($line["id"], true);
182
183 if ($tline = $usth->fetch()) {
184 if ($debug) _debug(" => " . $tline["last_updated"] . ", " . $tline["id"] . " " . $tline["owner_uid"]);
f08426e3 185
afcb105f
AD
186 if (array_search($tline["owner_uid"], $batch_owners) === FALSE)
187 array_push($batch_owners, $tline["owner_uid"]);
5cbd1fe8 188
afcb105f
AD
189 $fstarted = microtime(true);
190 RSSUtils::update_rss_feed($tline["id"], true, false);
191 _debug_suppress(false);
2d9c5684 192
afcb105f 193 _debug(sprintf(" %.4f (sec)", microtime(true) - $fstarted));
2d9c5684 194
afcb105f 195 ++$nf;
1c4421fc 196 }
2c08214a
AD
197 }
198
2d9c5684
AD
199 if ($nf > 0) {
200 _debug(sprintf("Processed %d feeds in %.4f (sec), %.4f (sec/feed avg)", $nf,
201 microtime(true) - $bstarted, (microtime(true) - $bstarted) / $nf));
202 }
203
5cbd1fe8
AD
204 foreach ($batch_owners as $owner_uid) {
205 _debug("Running housekeeping tasks for user $owner_uid...");
206
e6c886bf 207 RSSUtils::housekeeping_user($owner_uid);
5cbd1fe8
AD
208 }
209
2c08214a 210 // Send feed digests by email if needed.
c2f0f24e 211 Digest::send_headlines_digests($debug);
2c08214a 212
8292d05b 213 return $nf;
7b55001e 214 }
2c08214a 215
6022776d 216 // this is used when subscribing
e6c886bf 217 static function set_basic_feed_info($feed) {
6022776d 218
0567016b 219 $pdo = Db::pdo();
6022776d 220
0567016b
AD
221 $sth = $pdo->prepare("SELECT owner_uid,feed_url,auth_pass,auth_login,auth_pass_encrypted
222 FROM ttrss_feeds WHERE id = ?");
223 $sth->execute([$feed]);
6022776d 224
0567016b 225 if ($row = $sth->fetch()) {
bec5ba93 226
0567016b 227 $owner_uid = $row["owner_uid"];
6022776d 228
187abfe7 229 $auth_pass_encrypted = $row["auth_pass_encrypted"];
6022776d 230
0567016b
AD
231 $auth_login = $row["auth_login"];
232 $auth_pass = $row["auth_pass"];
6022776d 233
0567016b
AD
234 if ($auth_pass_encrypted && function_exists("mcrypt_decrypt")) {
235 require_once "crypt.php";
236 $auth_pass = decrypt_string($auth_pass);
237 }
6022776d 238
0567016b 239 $fetch_url = $row["feed_url"];
6022776d 240
0567016b
AD
241 $pluginhost = new PluginHost();
242 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
6022776d 243
0567016b
AD
244 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
245 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
246 $pluginhost->load_data();
247
248 $basic_info = array();
249 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_BASIC_INFO) as $plugin) {
250 $basic_info = $plugin->hook_feed_basic_info($basic_info, $fetch_url, $owner_uid, $feed, $auth_login, $auth_pass);
251 }
6022776d 252
0567016b
AD
253 if (!$basic_info) {
254 $feed_data = fetch_file_contents($fetch_url, false,
255 $auth_login, $auth_pass, false,
256 FEED_FETCH_TIMEOUT,
257 0);
bec5ba93 258
0567016b 259 global $fetch_curl_used;
bec5ba93 260
0567016b
AD
261 if (!$fetch_curl_used) {
262 $tmp = @gzdecode($feed_data);
bec5ba93 263
0567016b
AD
264 if ($tmp) $feed_data = $tmp;
265 }
6022776d 266
0567016b 267 $feed_data = trim($feed_data);
6022776d 268
0567016b
AD
269 $rss = new FeedParser($feed_data);
270 $rss->init();
6022776d 271
0567016b
AD
272 if (!$rss->error()) {
273 $basic_info = array(
274 'title' => mb_substr($rss->get_title(), 0, 199),
275 'site_url' => mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245)
276 );
277 }
3476690c 278 }
6022776d 279
0567016b
AD
280 if ($basic_info && is_array($basic_info)) {
281 $sth = $pdo->prepare("SELECT title, site_url FROM ttrss_feeds WHERE id = ?");
282 $sth->execute([$feed]);
6022776d 283
0567016b 284 if ($row = $sth->fetch()) {
6022776d 285
0567016b
AD
286 $registered_title = $row["title"];
287 $orig_site_url = $row["site_url"];
288
289 if ($basic_info['title'] && (!$registered_title || $registered_title == "[Unknown]")) {
290
291 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
292 title = ? WHERE id = ?");
293 $sth->execute([$basic_info['title'], $feed]);
294 }
6022776d 295
0567016b
AD
296 if ($basic_info['site_url'] && $orig_site_url != $basic_info['site_url']) {
297 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
298 site_url = ? WHERE id = ?");
299 $sth->execute([$basic_info['site_url'], $feed]);
300 }
301
302 }
6022776d
AD
303 }
304 }
305 }
306
7b55001e 307 /**
e6c886bf
AD
308 * @SuppressWarnings(PHPMD.UnusedFormalParameter)
309 */
310 static function update_rss_feed($feed, $no_cache = false) {
2c08214a 311
e6532439 312 $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || clean($_REQUEST['xdebug']);
2c08214a 313
4f71d743 314 _debug_suppress(!$debug_enabled);
68cccafc 315 _debug("start", $debug_enabled);
2c08214a 316
0567016b
AD
317 $pdo = Db::pdo();
318
319 $sth = $pdo->prepare("SELECT title FROM ttrss_feeds WHERE id = ?");
320 $sth->execute([$feed]);
bfe1eb4e 321
0567016b 322 if (!$row = $sth->fetch()) {
bfe1eb4e
AD
323 _debug("feed $feed NOT FOUND/SKIPPED", $debug_enabled);
324 user_error("Attempt to update unknown/invalid feed $feed", E_USER_WARNING);
325 return false;
326 }
327
0567016b 328 $title = $row["title"];
6bb96beb
AD
329
330 // feed was batch-subscribed or something, we need to get basic info
331 // this is not optimal currently as it fetches stuff separately TODO: optimize
332 if ($title == "[Unknown]") {
333 _debug("setting basic feed info for $feed...");
e6c886bf 334 RSSUtils::set_basic_feed_info($feed);
6bb96beb
AD
335 }
336
0567016b 337 $sth = $pdo->prepare("SELECT id,update_interval,auth_login,
5ba1ddd4 338 feed_url,auth_pass,cache_images,
5321e775 339 mark_unread_on_update, owner_uid,
153cb6d3 340 auth_pass_encrypted, feed_language,
e50c8eaa
AD
341 last_modified,
342 ".SUBSTRING_FOR_DATE."(last_unconditional, 1, 19) AS last_unconditional
0567016b
AD
343 FROM ttrss_feeds WHERE id = ?");
344 $sth->execute([$feed]);
345
346 if ($row = $sth->fetch()) {
2c08214a 347
0567016b 348 $owner_uid = $row["owner_uid"];
187abfe7
AD
349 $mark_unread_on_update = $row["mark_unread_on_update"];
350 $auth_pass_encrypted = $row["auth_pass_encrypted"];
2c08214a 351
0567016b
AD
352 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
353 WHERE id = ?");
354 $sth->execute([$feed]);
2c08214a 355
0567016b
AD
356 $auth_login = $row["auth_login"];
357 $auth_pass = $row["auth_pass"];
2c08214a 358
0567016b
AD
359 if ($auth_pass_encrypted && function_exists("mcrypt_decrypt")) {
360 require_once "crypt.php";
361 $auth_pass = decrypt_string($auth_pass);
362 }
044cff2d 363
0567016b
AD
364 $stored_last_modified = $row["last_modified"];
365 $last_unconditional = $row["last_unconditional"];
187abfe7 366 $cache_images = $row["cache_images"];
0567016b
AD
367 $fetch_url = $row["feed_url"];
368 $feed_language = mb_strtolower($row["feed_language"]);
369 if (!$feed_language) $feed_language = 'english';
2c08214a 370
0567016b
AD
371 } else {
372 return false;
373 }
2c08214a 374
f074ffe9 375 $date_feed_processed = date('Y-m-d H:i');
2c08214a 376
342e8a9e 377 $cache_filename = CACHE_DIR . "/feeds/" . sha1($fetch_url) . ".xml";
f074ffe9 378
ee65bef4
AD
379 $pluginhost = new PluginHost();
380 $pluginhost->set_debug($debug_enabled);
381 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
382
383 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
384 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
385 $pluginhost->load_data();
386
7b55001e 387 $rss_hash = false;
4f9cbdff 388
7b55001e
AD
389 $force_refetch = isset($_REQUEST["force_refetch"]);
390 $feed_data = "";
687a4f59 391
7b55001e
AD
392 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FETCH_FEED) as $plugin) {
393 $feed_data = $plugin->hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed, 0, $auth_login, $auth_pass);
394 }
2c08214a 395
7b55001e
AD
396 // try cache
397 if (!$feed_data &&
398 file_exists($cache_filename) &&
399 is_readable($cache_filename) &&
400 !$auth_login && !$auth_pass &&
401 filemtime($cache_filename) > time() - 30) {
be574731 402
7b55001e 403 _debug("using local cache [$cache_filename].", $debug_enabled);
52637d3b 404
7b55001e 405 @$feed_data = file_get_contents($cache_filename);
f074ffe9 406
7b55001e
AD
407 if ($feed_data) {
408 $rss_hash = sha1($feed_data);
88edaa93 409 }
ee65bef4 410
7b55001e
AD
411 } else {
412 _debug("local cache will not be used for this feed", $debug_enabled);
413 }
312742db 414
153cb6d3
AD
415 global $fetch_last_modified;
416
7b55001e
AD
417 // fetch feed from source
418 if (!$feed_data) {
e50c8eaa 419 _debug("last unconditional update request: $last_unconditional");
312742db 420
7b55001e
AD
421 if (ini_get("open_basedir") && function_exists("curl_init")) {
422 _debug("not using CURL due to open_basedir restrictions");
423 }
3f6f0857 424
7f4a4045
AD
425 if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) {
426 _debug("maximum allowed interval for conditional requests exceeded, forcing refetch");
e50c8eaa 427
7f4a4045
AD
428 $force_refetch = true;
429 } else {
430 _debug("stored last modified for conditional request: $stored_last_modified", $debug_enabled);
431 }
153cb6d3 432
7f4a4045 433 _debug("fetching [$fetch_url] (force_refetch: $force_refetch)...", $debug_enabled);
153cb6d3
AD
434
435 $feed_data = fetch_file_contents([
436 "url" => $fetch_url,
437 "login" => $auth_login,
438 "pass" => $auth_pass,
439 "timeout" => $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT,
440 "last_modified" => $force_refetch ? "" : $stored_last_modified
441 ]);
3f6f0857 442
7b55001e 443 global $fetch_curl_used;
3f6f0857 444
7b55001e
AD
445 if (!$fetch_curl_used) {
446 $tmp = @gzdecode($feed_data);
1367bc3f 447
7b55001e
AD
448 if ($tmp) $feed_data = $tmp;
449 }
017401dd 450
7b55001e 451 $feed_data = trim($feed_data);
fd687300 452
7b55001e 453 _debug("fetch done.", $debug_enabled);
9d930af9 454 _debug("source last modified: " . $fetch_last_modified, $debug_enabled);
153cb6d3
AD
455
456 if ($feed_data && $fetch_last_modified != $stored_last_modified) {
0567016b
AD
457 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_modified = ? WHERE id = ?");
458 $sth->execute([substr($fetch_last_modified, 0, 245), $feed]);
153cb6d3 459 }
95beaa14 460
7b55001e 461 // cache vanilla feed data for re-use
342e8a9e 462 if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/feeds")) {
7b55001e
AD
463 $new_rss_hash = sha1($feed_data);
464
465 if ($new_rss_hash != $rss_hash) {
466 _debug("saving $cache_filename", $debug_enabled);
467 @file_put_contents($cache_filename, $feed_data);
95beaa14 468 }
4f9cbdff 469 }
7b55001e 470 }
017401dd 471
7b55001e
AD
472 if (!$feed_data) {
473 global $fetch_last_error;
474 global $fetch_last_error_code;
f074ffe9 475
7b55001e 476 _debug("unable to fetch: $fetch_last_error [$fetch_last_error_code]", $debug_enabled);
f074ffe9 477
7b55001e
AD
478 // If-Modified-Since
479 if ($fetch_last_error_code != 304) {
0567016b 480 $error_message = $fetch_last_error;
7b55001e
AD
481 } else {
482 _debug("source claims data not modified, nothing to do.", $debug_enabled);
0567016b 483 $error_message = "";
7b55001e 484 }
4f9cbdff 485
0567016b
AD
486 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
487 last_updated = NOW() WHERE id = ?");
488 $sth->execute([$error_message, $feed]);
4f9cbdff 489
7b55001e 490 return;
f074ffe9
AD
491 }
492
1ffe3391 493 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_FETCHED) as $plugin) {
6791af0c 494 $feed_data = $plugin->hook_feed_fetched($feed_data, $fetch_url, $owner_uid, $feed);
017401dd
AD
495 }
496
07d3431e
AD
497 $rss = new FeedParser($feed_data);
498 $rss->init();
2c08214a 499
19b3992b 500 if (!$rss->error()) {
2c08214a 501
d2a421e3 502 // We use local pluginhost here because we need to load different per-user feed plugins
1ffe3391 503 $pluginhost->run_hooks(PluginHost::HOOK_FEED_PARSED, "hook_feed_parsed", $rss);
4412b877 504
df659891 505 _debug("language: $feed_language", $debug_enabled);
68cccafc 506 _debug("processing feed data...", $debug_enabled);
2c08214a 507
382268c6
AD
508 if (DB_TYPE == "pgsql") {
509 $favicon_interval_qpart = "favicon_last_checked < NOW() - INTERVAL '12 hour'";
510 } else {
511 $favicon_interval_qpart = "favicon_last_checked < DATE_SUB(NOW(), INTERVAL 12 HOUR)";
512 }
513
0567016b 514 $sth = $pdo->prepare("SELECT owner_uid,favicon_avg_color,
382268c6
AD
515 (favicon_last_checked IS NULL OR $favicon_interval_qpart) AS
516 favicon_needs_check
0567016b
AD
517 FROM ttrss_feeds WHERE id = ?");
518 $sth->execute([$feed]);
2c08214a 519
0567016b 520 if ($row = $sth->fetch()) {
187abfe7 521 $favicon_needs_check = $row["favicon_needs_check"];
0567016b
AD
522 $favicon_avg_color = $row["favicon_avg_color"];
523 $owner_uid = $row["owner_uid"];
524 } else {
525 return false;
526 }
2c08214a 527
0567016b 528 $site_url = mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245);
2c08214a 529
cd07592c
AD
530 _debug("site_url: $site_url", $debug_enabled);
531 _debug("feed_title: " . $rss->get_title(), $debug_enabled);
532
687a4f59 533 if ($favicon_needs_check || $force_refetch) {
36490f11
AD
534
535 /* terrible hack: if we crash on floicon shit here, we won't check
560cbd8c 536 * the icon avgcolor again (unless the icon got updated) */
36490f11 537
560cbd8c
AD
538 $favicon_file = ICONS_DIR . "/$feed.ico";
539 $favicon_modified = @filemtime($favicon_file);
540
68cccafc 541 _debug("checking favicon...", $debug_enabled);
687a4f59 542
e6c886bf 543 RSSUtils::check_feed_favicon($site_url, $feed);
560cbd8c
AD
544 $favicon_modified_new = @filemtime($favicon_file);
545
546 if ($favicon_modified_new > $favicon_modified)
547 $favicon_avg_color = '';
687a4f59 548
0567016b 549 $favicon_colorstring = "";
6ee0d4b0 550 if (file_exists($favicon_file) && function_exists("imagecreatefromstring") && $favicon_avg_color == '') {
e6c886bf 551 require_once "colors.php";
687a4f59 552
0567016b
AD
553 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_avg_color = 'fail' WHERE
554 id = ?");
555 $sth->execute([$feed]);
aafd55ba 556
0567016b
AD
557 $favicon_color = calculate_avg_color($favicon_file);
558
559 $favicon_colorstring = ",favicon_avg_color = " . $pdo->quote($favicon_color);
63c323f7 560
36490f11 561 } else if ($favicon_avg_color == 'fail') {
84ceb6bd 562 _debug("floicon failed on this file, not trying to recalculate avg color", $debug_enabled);
6ac722d5 563 }
687a4f59 564
0567016b
AD
565 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_last_checked = NOW()
566 $favicon_colorstring WHERE id = ?");
567 $sth->execute([$feed]);
f2798eb6 568 }
2c08214a 569
68cccafc 570 _debug("loading filters & labels...", $debug_enabled);
2c08214a 571
a42c55f0 572 $filters = load_filters($feed, $owner_uid);
2c08214a 573
02f3992a 574 if ($debug_enabled) {
7f4a4045
AD
575 print_r($filters);
576 }
02f3992a 577
68cccafc 578 _debug("" . count($filters) . " filters loaded.", $debug_enabled);
2c08214a 579
19b3992b 580 $items = $rss->get_items();
2c08214a 581
19b3992b 582 if (!is_array($items)) {
68cccafc 583 _debug("no articles found.", $debug_enabled);
2c08214a 584
0567016b
AD
585 $sth = $pdo->prepare("UPDATE ttrss_feeds
586 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
587 $sth->execute([$feed]);
2c08214a 588
0567016b 589 return true; // no articles
2c08214a
AD
590 }
591
68cccafc 592 _debug("processing articles...", $debug_enabled);
2c08214a 593
6c9f3d4a
AD
594 $tstart = time();
595
19b3992b 596 foreach ($items as $item) {
0500e14c
AD
597 $pdo->beginTransaction();
598
e6532439 599 if (clean($_REQUEST['xdebug']) == 3) {
2c08214a
AD
600 print_r($item);
601 }
602
6c9f3d4a
AD
603 if (ini_get("max_execution_time") > 0 && time() - $tstart >= ini_get("max_execution_time") * 0.7) {
604 _debug("looks like there's too many articles to process at once, breaking out", $debug_enabled);
0500e14c 605 $pdo->commit();
6c9f3d4a
AD
606 break;
607 }
608
0567016b
AD
609 $entry_guid = strip_tags($item->get_id());
610 if (!$entry_guid) $entry_guid = strip_tags($item->get_link());
e6c886bf 611 if (!$entry_guid) $entry_guid = RSSUtils::make_guid_from_title($item->get_title());
0500e14c
AD
612
613 if (!$entry_guid) {
614 $pdo->commit();
615 continue;
616 }
2c08214a 617
3a4c8973
AD
618 $entry_guid = "$owner_uid,$entry_guid";
619
0567016b 620 $entry_guid_hashed = 'SHA1:' . sha1($entry_guid);
5e3d5480 621
68cccafc 622 _debug("guid $entry_guid / $entry_guid_hashed", $debug_enabled);
5e3d5480 623
0567016b 624 $entry_timestamp = strip_tags($item->get_date());
04d2f9c8
AD
625
626 _debug("orig date: " . $item->get_date(), $debug_enabled);
2c08214a 627
30123fe6 628 if ($entry_timestamp == -1 || !$entry_timestamp || $entry_timestamp > time()) {
2c08214a 629 $entry_timestamp = time();
2c08214a
AD
630 }
631
632 $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
633
68cccafc 634 _debug("date $entry_timestamp [$entry_timestamp_fmt]", $debug_enabled);
2c08214a 635
0567016b 636 $entry_title = strip_tags($item->get_title());
1b35d30c 637
5d56d100 638 $entry_link = rewrite_relative_url($site_url, $item->get_link());
2c08214a 639
68cccafc
AD
640 _debug("title $entry_title", $debug_enabled);
641 _debug("link $entry_link", $debug_enabled);
2c08214a
AD
642
643 if (!$entry_title) $entry_title = date("Y-m-d H:i:s", $entry_timestamp);;
644
19b3992b
AD
645 $entry_content = $item->get_content();
646 if (!$entry_content) $entry_content = $item->get_description();
2c08214a 647
e6532439 648 if (clean($_REQUEST["xdebug"]) == 2) {
9ec10352 649 print "content: ";
0bc503ff 650 print htmlspecialchars($entry_content);
3c696512 651 print "\n";
2c08214a
AD
652 }
653
0567016b 654 $entry_comments = mb_substr(strip_tags($item->get_comments_url()), 0, 245);
12ff230b 655 $num_comments = (int) $item->get_comments_count();
2c08214a 656
0567016b
AD
657 $entry_author = strip_tags($item->get_author());
658 $entry_guid = mb_substr($entry_guid, 0, 245);
2c08214a 659
68cccafc
AD
660 _debug("author $entry_author", $debug_enabled);
661 _debug("num_comments: $num_comments", $debug_enabled);
ee78f81c 662 _debug("looking for tags...", $debug_enabled);
2c08214a
AD
663
664 // parse <category> entries into tags
665
666 $additional_tags = array();
667
19b3992b 668 $additional_tags_src = $item->get_categories();
2c08214a 669
19b3992b
AD
670 if (is_array($additional_tags_src)) {
671 foreach ($additional_tags_src as $tobj) {
cd07592c 672 array_push($additional_tags, $tobj);
2c08214a 673 }
19b3992b 674 }
2c08214a 675
fa6fbd36 676 $entry_tags = array_unique($additional_tags);
2c08214a 677
5edf4b73 678 for ($i = 0; $i < count($entry_tags); $i++) {
2c08214a
AD
679 $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8');
680
5edf4b73
AD
681 // we don't support numeric tags, let's prefix them
682 if (is_numeric($entry_tags[$i])) $entry_tags[$i] = 't:' . $entry_tags[$i];
683 }
684
ee78f81c
AD
685 _debug("tags found: " . join(",", $entry_tags), $debug_enabled);
686
68cccafc 687 _debug("done collecting data.", $debug_enabled);
2c08214a 688
0567016b
AD
689 $sth = $pdo->prepare("SELECT id, content_hash, lang FROM ttrss_entries
690 WHERE guid = ? OR guid = ?");
691 $sth->execute([$entry_guid, $entry_guid_hashed]);
b30abdad 692
0567016b
AD
693 if ($row = $sth->fetch()) {
694 $base_entry_id = $row["id"];
695 $entry_stored_hash = $row["content_hash"];
4a0da0e5 696 $article_labels = Article::get_article_labels($base_entry_id, $owner_uid);
0567016b 697 $entry_language = $row["lang"];
a8ac7661 698
2ed0d6c4 699 $existing_tags = Article::get_article_tags($base_entry_id, $owner_uid);
a8ac7661 700 $entry_tags = array_unique(array_merge($entry_tags, $existing_tags));
b30abdad 701 } else {
b1840673
AD
702 $base_entry_id = false;
703 $entry_stored_hash = "";
a29fe121 704 $article_labels = array();
3318d324 705 $entry_language = "";
b30abdad
AD
706 }
707
455b1401 708 $article = array("owner_uid" => $owner_uid, // read only
b30abdad 709 "guid" => $entry_guid, // read only
59e83455 710 "guid_hashed" => $entry_guid_hashed, // read only
19b3992b
AD
711 "title" => $entry_title,
712 "content" => $entry_content,
713 "link" => $entry_link,
a29fe121 714 "labels" => $article_labels, // current limitation: can add labels to article, can't remove them
19b3992b 715 "tags" => $entry_tags,
e02555c1 716 "author" => $entry_author,
c9299c28 717 "force_catchup" => false, // ugly hack for the time being
6de3a1be 718 "score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed
3318d324 719 "language" => $entry_language,
20d2195f 720 "num_comments" => $num_comments, // read only
f73e03e0
AD
721 "feed" => array("id" => $feed,
722 "fetch_url" => $fetch_url,
babfadbf
J
723 "site_url" => $site_url,
724 "cache_images" => $cache_images)
e6c886bf 725 );
cc85704f 726
b1840673 727 $entry_plugin_data = "";
e6c886bf 728 $entry_current_hash = RSSUtils::calculate_article_hash($article, $pluginhost);
b1840673
AD
729
730 _debug("article hash: $entry_current_hash [stored=$entry_stored_hash]", $debug_enabled);
731
522e8b35 732 if ($entry_current_hash == $entry_stored_hash && !isset($_REQUEST["force_rehash"])) {
b1840673
AD
733 _debug("stored article seems up to date [IID: $base_entry_id], updating timestamp only", $debug_enabled);
734
735 // we keep encountering the entry in feeds, so we need to
736 // update date_updated column so that we don't get horrible
737 // dupes when the entry gets purged and reinserted again e.g.
738 // in the case of SLOW SLOW OMG SLOW updating feeds
739
0567016b
AD
740 $sth = $pdo->prepare("UPDATE ttrss_entries SET date_updated = NOW()
741 WHERE id = ?");
742 $sth->execute([$base_entry_id]);
b1840673 743
0500e14c 744 $pdo->commit();
5bdcb8fd 745 continue;
b1840673
AD
746 }
747
748 _debug("hash differs, applying plugin filters:", $debug_enabled);
749
1ffe3391 750 foreach ($pluginhost->get_hooks(PluginHost::HOOK_ARTICLE_FILTER) as $plugin) {
b1840673
AD
751 _debug("... " . get_class($plugin), $debug_enabled);
752
753 $start = microtime(true);
19b3992b 754 $article = $plugin->hook_article_filter($article);
0084f0d1 755
b1840673
AD
756 _debug("=== " . sprintf("%.4f (sec)", microtime(true) - $start), $debug_enabled);
757
758 $entry_plugin_data .= mb_strtolower(get_class($plugin)) . ",";
cc85704f
AD
759 }
760
e6532439 761 if (clean($_REQUEST["xdebug"]) == 2) {
0bc503ff
AD
762 print "processed content: ";
763 print htmlspecialchars($article["content"]);
764 print "\n";
765 }
766
b1840673
AD
767 _debug("plugin data: $entry_plugin_data", $debug_enabled);
768
35c12dc4
AD
769 // Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077
770 if (DB_TYPE == "mysql") {
771 foreach ($article as $k => $v) {
35c37354 772 // i guess we'll have to take the risk of 4byte unicode labels & tags here
dae16f72 773 if (is_string($article[$k])) {
102a0135 774 $article[$k] = RSSUtils::strip_utf8mb4($v);
35c37354 775 }
35c12dc4
AD
776 }
777 }
778
b8774453
AD
779 /* Collect article tags here so we could filter by them: */
780
557d86fe
AD
781 $matched_rules = array();
782
e6c886bf 783 $article_filters = RSSUtils::get_article_filters($filters, $article["title"],
7b55001e 784 $article["content"], $article["link"], $article["author"],
557d86fe 785 $article["tags"], $matched_rules);
b8774453
AD
786
787 if ($debug_enabled) {
557d86fe
AD
788 _debug("matched filter rules: ", $debug_enabled);
789
790 if (count($matched_rules) != 0) {
791 print_r($matched_rules);
792 }
793
794 _debug("filter actions: ", $debug_enabled);
795
b8774453
AD
796 if (count($article_filters) != 0) {
797 print_r($article_filters);
798 }
799 }
800
e6c886bf 801 $plugin_filter_names = RSSUtils::find_article_filters($article_filters, "plugin");
b8774453
AD
802 $plugin_filter_actions = $pluginhost->get_filter_actions();
803
804 if (count($plugin_filter_names) > 0) {
805 _debug("applying plugin filter actions...", $debug_enabled);
806
807 foreach ($plugin_filter_names as $pfn) {
808 list($pfclass,$pfaction) = explode(":", $pfn["param"]);
809
810 if (isset($plugin_filter_actions[$pfclass])) {
811 $plugin = $pluginhost->get_plugin($pfclass);
812
813 _debug("... $pfclass: $pfaction", $debug_enabled);
814
815 if ($plugin) {
816 $start = microtime(true);
817 $article = $plugin->hook_article_filter_action($article, $pfaction);
818
819 _debug("=== " . sprintf("%.4f (sec)", microtime(true) - $start), $debug_enabled);
820 } else {
821 _debug("??? $pfclass: plugin object not found.");
822 }
823 } else {
824 _debug("??? $pfclass: filter plugin not registered.");
825 }
826 }
827 }
828
19b3992b 829 $entry_tags = $article["tags"];
0567016b
AD
830 $entry_title = strip_tags($article["title"]);
831 $entry_author = mb_substr(strip_tags($article["author"]), 0, 245);
832 $entry_link = strip_tags($article["link"]);
f935d98e 833 $entry_content = $article["content"]; // escaped below
c9299c28 834 $entry_force_catchup = $article["force_catchup"];
a29fe121 835 $article_labels = $article["labels"];
6de3a1be 836 $entry_score_modifier = (int) $article["score_modifier"];
0567016b 837 $entry_language = $article["language"];
a29fe121
AD
838
839 if ($debug_enabled) {
840 _debug("article labels:", $debug_enabled);
557d86fe
AD
841
842 if (count($article_labels) != 0) {
843 print_r($article_labels);
844 }
a29fe121 845 }
c9299c28
AD
846
847 _debug("force catchup: $entry_force_catchup");
f935d98e 848
0a3fd79b 849 if ($cache_images && is_writable(CACHE_DIR . '/images'))
e6c886bf 850 RSSUtils::cache_media($entry_content, $site_url, $debug_enabled);
0a3fd79b 851
0567016b
AD
852 $csth = $pdo->prepare("SELECT id FROM ttrss_entries
853 WHERE guid = ? OR guid = ?");
854 $csth->execute([$entry_guid, $entry_guid_hashed]);
9e222305 855
0567016b 856 if (!$row = $csth->fetch()) {
2c08214a 857
07d3431e 858 _debug("base guid [$entry_guid or $entry_guid_hashed] not found, creating...", $debug_enabled);
2c08214a
AD
859
860 // base post entry does not exist, create it
861
0567016b 862 $usth = $pdo->prepare(
2c08214a 863 "INSERT INTO ttrss_entries
0567016b 864 (title,
2c08214a
AD
865 guid,
866 link,
867 updated,
868 content,
869 content_hash,
870 no_orig_date,
871 date_updated,
872 date_entered,
873 comments,
874 num_comments,
b30abdad 875 plugin_data,
6b461797 876 lang,
2c08214a
AD
877 author)
878 VALUES
0567016b 879 (?, ?, ?, ?, ?, ?,
5ba1ddd4 880 false,
2c08214a 881 NOW(),
0567016b
AD
882 ?, ?, ?, ?, ?, ?)");
883
884 $usth->execute([$entry_title,
885 $entry_guid_hashed,
886 $entry_link,
887 $entry_timestamp_fmt,
93e70e36 888 "$entry_content",
0567016b
AD
889 $entry_current_hash,
890 $date_feed_processed,
891 $entry_comments,
187abfe7 892 (int)$num_comments,
0567016b 893 $entry_plugin_data,
93e70e36
AD
894 "$entry_language",
895 "$entry_author"]);
e8291805 896
2c08214a
AD
897 }
898
0567016b 899 $csth->execute([$entry_guid, $entry_guid_hashed]);
2c08214a
AD
900
901 $entry_ref_id = 0;
902 $entry_int_id = 0;
903
0567016b 904 if ($row = $csth->fetch()) {
2c08214a 905
68cccafc 906 _debug("base guid found, checking for user record", $debug_enabled);
2c08214a 907
0567016b 908 $ref_id = $row['id'];
2c08214a
AD
909 $entry_ref_id = $ref_id;
910
e6c886bf 911 if (RSSUtils::find_article_filter($article_filters, "filter")) {
0500e14c 912 $pdo->commit();
2c08214a
AD
913 continue;
914 }
915
e6c886bf 916 $score = RSSUtils::calculate_article_score($article_filters) + $entry_score_modifier;
2c08214a 917
6de3a1be 918 _debug("initial score: $score [including plugin modifier: $entry_score_modifier]", $debug_enabled);
2c08214a 919
4f186b1f
AD
920 // check for user post link to main table
921
0567016b
AD
922 $sth = $pdo->prepare("SELECT ref_id, int_id FROM ttrss_user_entries WHERE
923 ref_id = ? AND owner_uid = ?");
924 $sth->execute([$ref_id, $owner_uid]);
2c08214a
AD
925
926 // okay it doesn't exist - create user entry
0567016b
AD
927 if ($row = $sth->fetch()) {
928 $entry_ref_id = $row["ref_id"];
929 $entry_int_id = $row["int_id"];
2c08214a 930
0567016b
AD
931 _debug("user record FOUND: RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
932 } else {
933
68cccafc 934 _debug("user record not found, creating...", $debug_enabled);
2c08214a 935
e6c886bf 936 if ($score >= -500 && !RSSUtils::find_article_filter($article_filters, 'catchup') && !$entry_force_catchup) {
0567016b
AD
937 $unread = 1;
938 $last_read_qpart = null;
2c08214a 939 } else {
0567016b 940 $unread = 0;
d4c05d0b 941 $last_read_qpart = date("Y-m-d H:i"); // we can't use NOW() here because it gets quoted
2c08214a
AD
942 }
943
e6c886bf 944 if (RSSUtils::find_article_filter($article_filters, 'mark') || $score > 1000) {
0567016b 945 $marked = 1;
2c08214a 946 } else {
0567016b 947 $marked = 0;
2c08214a
AD
948 }
949
e6c886bf 950 if (RSSUtils::find_article_filter($article_filters, 'publish')) {
0567016b 951 $published = 1;
2c08214a 952 } else {
0567016b 953 $published = 0;
2c08214a
AD
954 }
955
26ad257d 956 $last_marked = ($marked == 1) ? 'NOW()' : 'NULL';
957 $last_published = ($published == 1) ? 'NOW()' : 'NULL';
7873d588 958
0567016b 959 $sth = $pdo->prepare(
2c08214a
AD
960 "INSERT INTO ttrss_user_entries
961 (ref_id, owner_uid, feed_id, unread, last_read, marked,
7873d588
AD
962 published, score, tag_cache, label_cache, uuid,
963 last_marked, last_published)
aa16334f 964 VALUES (?, ?, ?, ?, ?, ?, ?, ?, '', '', '', ".$last_marked.", ".$last_published.")");
2c08214a 965
0567016b 966 $sth->execute([$ref_id, $owner_uid, $feed, $unread, $last_read_qpart, $marked,
aa16334f 967 $published, $score]);
2c08214a 968
0567016b
AD
969 $sth = $pdo->prepare("SELECT int_id FROM ttrss_user_entries WHERE
970 ref_id = ? AND owner_uid = ? AND
971 feed_id = ? LIMIT 1");
2c08214a 972
0567016b
AD
973 $sth->execute([$ref_id, $owner_uid, $feed]);
974
975 if ($row = $sth->fetch())
976 $entry_int_id = $row['int_id'];
2c08214a
AD
977 }
978
0567016b 979 _debug("resulting RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
2c08214a 980
963c2264
AD
981 if (DB_TYPE == "pgsql")
982 $tsvector_qpart = "tsvector_combined = to_tsvector(:ts_lang, :ts_content),";
983 else
e854442e 984 $tsvector_qpart = "";
49a888ec 985
0567016b 986 $sth = $pdo->prepare("UPDATE ttrss_entries
49a888ec 987 SET title = :title,
e854442e 988 $tsvector_qpart
49a888ec
AD
989 content = :content,
990 content_hash = :content_hash,
991 updated = :updated,
992 num_comments = :num_comments,
993 plugin_data = :plugin_data,
994 author = :author,
995 lang = :lang
996 WHERE id = :id");
997
963c2264 998 $params = [":title" => $entry_title,
93e70e36 999 ":content" => "$entry_content",
49a888ec
AD
1000 ":content_hash" => $entry_current_hash,
1001 ":updated" => $entry_timestamp_fmt,
1002 ":num_comments" => (int)$num_comments,
1003 ":plugin_data" => $entry_plugin_data,
93e70e36 1004 ":author" => "$entry_author",
49a888ec 1005 ":lang" => $entry_language,
963c2264
AD
1006 ":id" => $ref_id];
1007
1008 if (DB_TYPE == "pgsql") {
1009 $params[":ts_lang"] = $feed_language;
1010 $params[":ts_content"] = mb_substr(strip_tags($entry_title . " " . $entry_content), 0, 1000000);
1011 }
1012
1013 $sth->execute($params);
b1840673 1014
59e83455 1015 // update aux data
0567016b
AD
1016 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1017 SET score = ? WHERE ref_id = ?");
1018 $sth->execute([$score, $ref_id]);
59e83455 1019
b1840673 1020 if ($mark_unread_on_update) {
24e6ff5d
AD
1021 _debug("article updated, marking unread as requested.", $debug_enabled);
1022
0567016b
AD
1023 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1024 SET last_read = null, unread = true WHERE ref_id = ?");
1025 $sth->execute([$ref_id]);
2c08214a
AD
1026 }
1027 }
1028
a29fe121
AD
1029 _debug("assigning labels [other]...", $debug_enabled);
1030
1031 foreach ($article_labels as $label) {
7c9b5a3f 1032 Labels::add_article($entry_ref_id, $label[1], $owner_uid);
a29fe121
AD
1033 }
1034
1035 _debug("assigning labels [filters]...", $debug_enabled);
2c08214a 1036
e6c886bf 1037 RSSUtils::assign_article_to_label_filters($entry_ref_id, $article_filters,
b24504b1 1038 $owner_uid, $article_labels);
2c08214a 1039
68cccafc 1040 _debug("looking for enclosures...", $debug_enabled);
2c08214a
AD
1041
1042 // enclosures
1043
1044 $enclosures = array();
1045
19b3992b 1046 $encs = $item->get_enclosures();
2c08214a 1047
19b3992b
AD
1048 if (is_array($encs)) {
1049 foreach ($encs as $e) {
1050 $e_item = array(
86e53429
AD
1051 rewrite_relative_url($site_url, $e->link),
1052 $e->type, $e->length, $e->title, $e->width, $e->height);
102a0135
AD
1053
1054 // Yet another episode of "mysql utf8_general_ci is gimped"
1055 if (DB_TYPE == "mysql") {
7f4a4045
AD
1056 for ($i = 0; $i < count($e_item); $i++) {
1057 if (is_string($e_item[$i])) {
1058 $e_item[$i] = RSSUtils::strip_utf8mb4($e_item[$i]);
1059 }
1060 }
102a0135
AD
1061 }
1062
7f4a4045 1063 array_push($enclosures, $e_item);
2c08214a
AD
1064 }
1065 }
1066
388d4dfa 1067 if ($cache_images && is_writable(CACHE_DIR . '/images'))
e6c886bf 1068 RSSUtils::cache_enclosures($enclosures, $site_url, $debug_enabled);
388d4dfa 1069
2c08214a 1070 if ($debug_enabled) {
68cccafc 1071 _debug("article enclosures:", $debug_enabled);
2c08214a
AD
1072 print_r($enclosures);
1073 }
1074
0567016b
AD
1075 $esth = $pdo->prepare("SELECT id FROM ttrss_enclosures
1076 WHERE content_url = ? AND post_id = ?");
2c08214a 1077
0567016b
AD
1078 $usth = $pdo->prepare("INSERT INTO ttrss_enclosures
1079 (content_url, content_type, title, duration, post_id, width, height) VALUES
1080 (?, ?, ?, ?, ?, ?, ?)");
5c54e683 1081
2c08214a 1082 foreach ($enclosures as $enc) {
0567016b
AD
1083 $enc_url = $enc[0];
1084 $enc_type = $enc[1];
0500e14c 1085 $enc_dur = (int)$enc[2];
0567016b 1086 $enc_title = $enc[3];
523bd90b
FE
1087 $enc_width = intval($enc[4]);
1088 $enc_height = intval($enc[5]);
2c08214a 1089
0567016b 1090 $esth->execute([$enc_url, $entry_ref_id]);
2c08214a 1091
0567016b
AD
1092 if (!$esth->fetch()) {
1093 $usth->execute([$enc_url, $enc_type, (string)$enc_title, $enc_dur, $entry_ref_id, $enc_width, $enc_height]);
2c08214a
AD
1094 }
1095 }
1096
2c08214a
AD
1097 // check for manual tags (we have to do it here since they're loaded from filters)
1098
1099 foreach ($article_filters as $f) {
6aff7845 1100 if ($f["type"] == "tag") {
2c08214a 1101
6aff7845 1102 $manual_tags = trim_array(explode(",", $f["param"]));
2c08214a
AD
1103
1104 foreach ($manual_tags as $tag) {
1105 if (tag_is_valid($tag)) {
1106 array_push($entry_tags, $tag);
1107 }
1108 }
1109 }
1110 }
1111
1112 // Skip boring tags
1113
6322ac79 1114 $boring_tags = trim_array(explode(",", mb_strtolower(get_pref(
2c08214a
AD
1115 'BLACKLISTED_TAGS', $owner_uid, ''), 'utf-8')));
1116
1117 $filtered_tags = array();
1118 $tags_to_cache = array();
1119
1120 if ($entry_tags && is_array($entry_tags)) {
1121 foreach ($entry_tags as $tag) {
1122 if (array_search($tag, $boring_tags) === false) {
1123 array_push($filtered_tags, $tag);
1124 }
1125 }
1126 }
1127
1128 $filtered_tags = array_unique($filtered_tags);
1129
1130 if ($debug_enabled) {
68cccafc 1131 _debug("filtered article tags:", $debug_enabled);
2c08214a
AD
1132 print_r($filtered_tags);
1133 }
1134
1135 // Save article tags in the database
1136
1137 if (count($filtered_tags) > 0) {
1138
0567016b
AD
1139 $tsth = $pdo->prepare("SELECT id FROM ttrss_tags
1140 WHERE tag_name = ? AND post_int_id = ? AND
1141 owner_uid = ? LIMIT 1");
1142
1143 $usth = $pdo->prepare("INSERT INTO ttrss_tags
1144 (owner_uid,tag_name,post_int_id)
1145 VALUES (?, ?, ?)");
2c08214a
AD
1146
1147 foreach ($filtered_tags as $tag) {
1148
1149 $tag = sanitize_tag($tag);
2c08214a
AD
1150
1151 if (!tag_is_valid($tag)) continue;
1152
0567016b 1153 $tsth->execute([$tag, $entry_int_id, $owner_uid]);
2c08214a 1154
0567016b
AD
1155 if (!$tsth->fetch()) {
1156 $usth->execute([$owner_uid, $tag, $entry_int_id]);
e6c886bf 1157 }
2c08214a
AD
1158
1159 array_push($tags_to_cache, $tag);
1160 }
1161
1162 /* update the cache */
1163
1164 $tags_to_cache = array_unique($tags_to_cache);
1165
0567016b 1166 $tags_str = join(",", $tags_to_cache);
2c08214a 1167
0567016b
AD
1168 $tsth = $pdo->prepare("UPDATE ttrss_user_entries
1169 SET tag_cache = ? WHERE ref_id = ?
1170 AND owner_uid = ?");
1171 $tsth->execute([$tags_str, $entry_ref_id, $owner_uid]);
2c08214a
AD
1172 }
1173
68cccafc 1174 _debug("article processed", $debug_enabled);
0500e14c
AD
1175
1176 $pdo->commit();
2c08214a
AD
1177 }
1178
68cccafc 1179 _debug("purging feed...", $debug_enabled);
2c08214a 1180
a42c55f0 1181 purge_feed($feed, 0, $debug_enabled);
2c08214a 1182
0567016b
AD
1183 $sth = $pdo->prepare("UPDATE ttrss_feeds
1184 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
1185 $sth->execute([$feed]);
2c08214a
AD
1186
1187 } else {
1188
0567016b 1189 $error_msg = mb_substr($rss->error(), 0, 245);
2c08214a 1190
4ad04ee2
AD
1191 _debug("fetch error: $error_msg", $debug_enabled);
1192
1193 if (count($rss->errors()) > 1) {
1194 foreach ($rss->errors() as $error) {
1195 _debug("+ $error");
1196 }
1197 }
2c08214a 1198
0567016b
AD
1199 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
1200 last_updated = NOW(), last_unconditional = NOW() WHERE id = ?");
1201 $sth->execute([$error_msg, $feed]);
2c08214a 1202
88edaa93 1203 unset($rss);
0567016b 1204 return false;
88edaa93 1205 }
2c08214a 1206
68cccafc 1207 _debug("done", $debug_enabled);
88edaa93 1208
7b55001e 1209 return true;
2c08214a
AD
1210 }
1211
e6c886bf 1212 static function cache_enclosures($enclosures, $site_url, $debug) {
388d4dfa
AD
1213 foreach ($enclosures as $enc) {
1214
1215 if (preg_match("/(image|audio|video)/", $enc[1])) {
1216
1217 $src = rewrite_relative_url($site_url, $enc[0]);
1218
1219 $local_filename = CACHE_DIR . "/images/" . sha1($src);
1220
1221 if ($debug) _debug("cache_enclosures: downloading: $src to $local_filename");
1222
1223 if (!file_exists($local_filename)) {
1224 $file_content = fetch_file_contents($src);
1225
6fd03996 1226 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
388d4dfa
AD
1227 file_put_contents($local_filename, $file_content);
1228 }
1229 } else {
1230 touch($local_filename);
1231 }
1232 }
1233 }
1234 }
1235
e6c886bf 1236 static function cache_media($html, $site_url, $debug) {
3c696512
AD
1237 libxml_use_internal_errors(true);
1238
1239 $charset_hack = '<head>
1240 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
1241 </head>';
1242
1243 $doc = new DOMDocument();
1244 $doc->loadHTML($charset_hack . $html);
1245 $xpath = new DOMXPath($doc);
1246
388d4dfa 1247 $entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])');
3c696512
AD
1248
1249 foreach ($entries as $entry) {
5edd605a 1250 if ($entry->hasAttribute('src') && strpos($entry->getAttribute('src'), "data:") !== 0) {
3c696512
AD
1251 $src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
1252
41bead9b 1253 $local_filename = CACHE_DIR . "/images/" . sha1($src);
3c696512 1254
41bead9b 1255 if ($debug) _debug("cache_media: downloading: $src to $local_filename");
3c696512
AD
1256
1257 if (!file_exists($local_filename)) {
1258 $file_content = fetch_file_contents($src);
1259
6fd03996 1260 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
3c696512
AD
1261 file_put_contents($local_filename, $file_content);
1262 }
4a27966e
J
1263 } else {
1264 touch($local_filename);
3c696512 1265 }
3c696512
AD
1266 }
1267 }
3c696512
AD
1268 }
1269
e6c886bf 1270 static function expire_error_log($debug) {
e2261e17
AD
1271 if ($debug) _debug("Removing old error log entries...");
1272
0567016b
AD
1273 $pdo = Db::pdo();
1274
e2261e17 1275 if (DB_TYPE == "pgsql") {
0567016b 1276 $pdo->query("DELETE FROM ttrss_error_log
e2261e17
AD
1277 WHERE created_at < NOW() - INTERVAL '7 days'");
1278 } else {
0567016b 1279 $pdo->query("DELETE FROM ttrss_error_log
e2261e17
AD
1280 WHERE created_at < DATE_SUB(NOW(), INTERVAL 7 DAY)");
1281 }
e2261e17
AD
1282 }
1283
e6c886bf 1284 static function expire_lock_files($debug) {
65465085 1285 //if ($debug) _debug("Removing old lock files...");
2a91b6ff
AD
1286
1287 $num_deleted = 0;
1288
1289 if (is_writable(LOCK_DIRECTORY)) {
1290 $files = glob(LOCK_DIRECTORY . "/*.lock");
1291
1292 if ($files) {
1293 foreach ($files as $file) {
11344971 1294 if (!file_is_locked(basename($file)) && time() - filemtime($file) > 86400*2) {
2a91b6ff
AD
1295 unlink($file);
1296 ++$num_deleted;
1297 }
1298 }
1299 }
1300 }
1301
65465085 1302 if ($debug) _debug("Removed $num_deleted old lock files.");
2a91b6ff
AD
1303 }
1304
e6c886bf 1305 static function expire_cached_files($debug) {
342e8a9e 1306 foreach (array("simplepie", "feeds", "images", "export", "upload") as $dir) {
3c696512 1307 $cache_dir = CACHE_DIR . "/$dir";
2c08214a 1308
65465085 1309// if ($debug) _debug("Expiring $cache_dir");
2c08214a 1310
3c696512
AD
1311 $num_deleted = 0;
1312
1313 if (is_writable($cache_dir)) {
1314 $files = glob("$cache_dir/*");
1315
2a91b6ff 1316 if ($files) {
2ab20c31 1317 foreach ($files as $file) {
6fd03996 1318 if (time() - filemtime($file) > 86400*CACHE_MAX_DAYS) {
2ab20c31 1319 unlink($file);
3c696512 1320
2ab20c31
AD
1321 ++$num_deleted;
1322 }
3c696512
AD
1323 }
1324 }
2a91b6ff 1325 }
3c696512 1326
65465085 1327 if ($debug) _debug("$cache_dir: removed $num_deleted files.");
3c696512
AD
1328 }
1329 }
2c08214a 1330
a3e0bdcf 1331 /**
e6c886bf
AD
1332 * Source: http://www.php.net/manual/en/function.parse-url.php#104527
1333 * Returns the url query as associative array
1334 *
1335 * @param string query
1336 * @return array params
1337 */
1338 static function convertUrlQuery($query) {
a3e0bdcf
AD
1339 $queryParts = explode('&', $query);
1340
1341 $params = array();
1342
1343 foreach ($queryParts as $param) {
1344 $item = explode('=', $param);
1345 $params[$item[0]] = $item[1];
1346 }
1347
1348 return $params;
1349 }
92c14e9d 1350
e6c886bf 1351 static function get_article_filters($filters, $title, $content, $link, $author, $tags, &$matched_rules = false) {
92c14e9d
AD
1352 $matches = array();
1353
1354 foreach ($filters as $filter) {
1355 $match_any_rule = $filter["match_any_rule"];
a3a896a1 1356 $inverse = $filter["inverse"];
92c14e9d
AD
1357 $filter_match = false;
1358
1359 foreach ($filter["rules"] as $rule) {
1360 $match = false;
ffa1bd7b 1361 $reg_exp = str_replace('/', '\/', $rule["reg_exp"]);
a3a896a1 1362 $rule_inverse = $rule["inverse"];
92c14e9d
AD
1363
1364 if (!$reg_exp)
1365 continue;
1366
1367 switch ($rule["type"]) {
e6c886bf
AD
1368 case "title":
1369 $match = @preg_match("/$reg_exp/iu", $title);
1370 break;
1371 case "content":
1372 // we don't need to deal with multiline regexps
1373 $content = preg_replace("/[\r\n\t]/", "", $content);
d03ae73e 1374
e6c886bf
AD
1375 $match = @preg_match("/$reg_exp/iu", $content);
1376 break;
1377 case "both":
1378 // we don't need to deal with multiline regexps
1379 $content = preg_replace("/[\r\n\t]/", "", $content);
d03ae73e 1380
e6c886bf
AD
1381 $match = (@preg_match("/$reg_exp/iu", $title) || @preg_match("/$reg_exp/iu", $content));
1382 break;
1383 case "link":
1384 $match = @preg_match("/$reg_exp/iu", $link);
1385 break;
1386 case "author":
1387 $match = @preg_match("/$reg_exp/iu", $author);
1388 break;
1389 case "tag":
1390 foreach ($tags as $tag) {
1391 if (@preg_match("/$reg_exp/iu", $tag)) {
1392 $match = true;
1393 break;
1394 }
7b80b5e1 1395 }
e6c886bf 1396 break;
92c14e9d
AD
1397 }
1398
a3a896a1
AD
1399 if ($rule_inverse) $match = !$match;
1400
92c14e9d
AD
1401 if ($match_any_rule) {
1402 if ($match) {
1403 $filter_match = true;
1404 break;
1405 }
1406 } else {
1407 $filter_match = $match;
1408 if (!$match) {
1409 break;
1410 }
1411 }
1412 }
1413
a3a896a1
AD
1414 if ($inverse) $filter_match = !$filter_match;
1415
92c14e9d 1416 if ($filter_match) {
557d86fe
AD
1417 if (is_array($matched_rules)) array_push($matched_rules, $rule);
1418
92c14e9d
AD
1419 foreach ($filter["actions"] AS $action) {
1420 array_push($matches, $action);
5e736e45
AD
1421
1422 // if Stop action encountered, perform no further processing
fd3e5e8d 1423 if (isset($action["type"]) && $action["type"] == "stop") return $matches;
92c14e9d
AD
1424 }
1425 }
1426 }
1427
1428 return $matches;
1429 }
1430
e6c886bf 1431 static function find_article_filter($filters, $filter_name) {
92c14e9d
AD
1432 foreach ($filters as $f) {
1433 if ($f["type"] == $filter_name) {
1434 return $f;
1435 };
1436 }
1437 return false;
1438 }
1439
e6c886bf 1440 static function find_article_filters($filters, $filter_name) {
92c14e9d
AD
1441 $results = array();
1442
1443 foreach ($filters as $f) {
1444 if ($f["type"] == $filter_name) {
1445 array_push($results, $f);
1446 };
1447 }
1448 return $results;
1449 }
1450
e6c886bf 1451 static function calculate_article_score($filters) {
92c14e9d
AD
1452 $score = 0;
1453
1454 foreach ($filters as $f) {
1455 if ($f["type"] == "score") {
1456 $score += $f["param"];
1457 };
1458 }
1459 return $score;
1460 }
1461
e6c886bf 1462 static function labels_contains_caption($labels, $caption) {
b24504b1
AD
1463 foreach ($labels as $label) {
1464 if ($label[1] == $caption) {
1465 return true;
1466 }
1467 }
1468
1469 return false;
1470 }
1471
e6c886bf 1472 static function assign_article_to_label_filters($id, $filters, $owner_uid, $article_labels) {
92c14e9d
AD
1473 foreach ($filters as $f) {
1474 if ($f["type"] == "label") {
e6c886bf 1475 if (!RSSUtils::labels_contains_caption($article_labels, $f["param"])) {
7c9b5a3f 1476 Labels::add_article($id, $f["param"], $owner_uid);
b24504b1
AD
1477 }
1478 }
92c14e9d
AD
1479 }
1480 }
87764a50 1481
e6c886bf 1482 static function make_guid_from_title($title) {
87d7e850
AD
1483 return preg_replace("/[ \"\',.:;]/", "-",
1484 mb_strtolower(strip_tags($title), 'utf-8'));
1485 }
1486
e6c886bf 1487 static function cleanup_counters_cache($debug) {
0567016b
AD
1488 $pdo = Db::pdo();
1489
1490 $res = $pdo->query("DELETE FROM ttrss_counters_cache
168cf351
AD
1491 WHERE feed_id > 0 AND
1492 (SELECT COUNT(id) FROM ttrss_feeds WHERE
1493 id = feed_id AND
1494 ttrss_counters_cache.owner_uid = ttrss_feeds.owner_uid) = 0");
168cf351 1495
0567016b
AD
1496 $frows = $res->rowCount();
1497
1498 $res = $pdo->query("DELETE FROM ttrss_cat_counters_cache
168cf351
AD
1499 WHERE feed_id > 0 AND
1500 (SELECT COUNT(id) FROM ttrss_feed_categories WHERE
1501 id = feed_id AND
1502 ttrss_cat_counters_cache.owner_uid = ttrss_feed_categories.owner_uid) = 0");
0567016b
AD
1503
1504 $crows = $res->rowCount();
168cf351 1505
7b55001e 1506 if ($debug) _debug("Removed $frows (feeds) $crows (cats) orphaned counter cache entries.");
168cf351
AD
1507 }
1508
e6c886bf 1509 static function housekeeping_user($owner_uid) {
5cbd1fe8
AD
1510 $tmph = new PluginHost();
1511
1512 load_user_plugins($owner_uid, $tmph);
1513
1514 $tmph->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
1515 }
1516
e6c886bf
AD
1517 static function housekeeping_common($debug) {
1518 RSSUtils::expire_cached_files($debug);
1519 RSSUtils::expire_lock_files($debug);
1520 RSSUtils::expire_error_log($debug);
e2cf81e2 1521
e6c886bf 1522 $count = RSSUtils::update_feedbrowser_cache();
e2cf81e2
AD
1523 _debug("Feedbrowser updated, $count feeds processed.");
1524
a230bf88 1525 Article::purge_orphans( true);
e6c886bf 1526 RSSUtils::cleanup_counters_cache($debug);
e2cf81e2 1527
9b736a20
AD
1528 //$rc = cleanup_tags( 14, 50000);
1529 //_debug("Cleaned $rc cached tags.");
8e470220 1530
00f22824 1531 PluginHost::getInstance()->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
e2cf81e2 1532 }
ea79a0e0 1533
e6c886bf
AD
1534 static function check_feed_favicon($site_url, $feed) {
1535 # print "FAVICON [$site_url]: $favicon_url\n";
a230bf88
AD
1536
1537 $icon_file = ICONS_DIR . "/$feed.ico";
1538
1539 if (!file_exists($icon_file)) {
1540 $favicon_url = get_favicon_url($site_url);
1541
1542 if ($favicon_url) {
1543 // Limiting to "image" type misses those served with text/plain
1544 $contents = fetch_file_contents($favicon_url); // , "image");
1545
1546 if ($contents) {
1547 // Crude image type matching.
1548 // Patterns gleaned from the file(1) source code.
1549 if (preg_match('/^\x00\x00\x01\x00/', $contents)) {
1550 // 0 string \000\000\001\000 MS Windows icon resource
1551 //error_log("check_feed_favicon: favicon_url=$favicon_url isa MS Windows icon resource");
1552 }
1553 elseif (preg_match('/^GIF8/', $contents)) {
1554 // 0 string GIF8 GIF image data
1555 //error_log("check_feed_favicon: favicon_url=$favicon_url isa GIF image");
1556 }
1557 elseif (preg_match('/^\x89PNG\x0d\x0a\x1a\x0a/', $contents)) {
1558 // 0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data
1559 //error_log("check_feed_favicon: favicon_url=$favicon_url isa PNG image");
1560 }
1561 elseif (preg_match('/^\xff\xd8/', $contents)) {
1562 // 0 beshort 0xffd8 JPEG image data
1563 //error_log("check_feed_favicon: favicon_url=$favicon_url isa JPG image");
1564 }
f9ad33c2
GG
1565 elseif (preg_match('/^BM/', $contents)) {
1566 // 0 string BM PC bitmap (OS2, Windows BMP files)
1567 //error_log("check_feed_favicon, favicon_url=$favicon_url isa BMP image");
1568 }
a230bf88
AD
1569 else {
1570 //error_log("check_feed_favicon: favicon_url=$favicon_url isa UNKNOWN type");
1571 $contents = "";
1572 }
1573 }
1574
1575 if ($contents) {
1576 $fp = @fopen($icon_file, "w");
1577
1578 if ($fp) {
1579 fwrite($fp, $contents);
1580 fclose($fp);
1581 chmod($icon_file, 0644);
1582 }
1583 }
1584 }
1585 return $icon_file;
1586 }
1587 }
e6c886bf
AD
1588
1589
1590
bec5ba93 1591}