]> git.wh0rd.org - tt-rss.git/blame - classes/rssutils.php
upd default.css
[tt-rss.git] / classes / rssutils.php
CommitLineData
2c08214a 1<?php
e6c886bf
AD
2class RSSUtils {
3 static function calculate_article_hash($article, $pluginhost) {
af244f92
AD
4 $tmp = "";
5
6 foreach ($article as $k => $v) {
7 if ($k != "feed" && isset($v)) {
24e6ff5d
AD
8 $x = strip_tags(is_array($v) ? implode(",", $v) : $v);
9
10 //_debug("$k:" . sha1($x) . ":" . htmlspecialchars($x), true);
11
12 $tmp .= sha1("$k:" . sha1($x));
af244f92
AD
13 }
14 }
15
eb16bd9f 16 return sha1(implode(",", $pluginhost->get_plugin_names()) . $tmp);
b1840673
AD
17 }
18
e6c886bf 19 static function update_feedbrowser_cache() {
79178062 20
afcb105f
AD
21 $pdo = Db::pdo();
22
23 $sth = $pdo->query("SELECT feed_url, site_url, title, COUNT(id) AS subscribers
45378752
LD
24 FROM ttrss_feeds WHERE feed_url NOT IN (SELECT feed_url FROM ttrss_feeds
25 WHERE private IS true OR auth_login != '' OR auth_pass != '' OR feed_url LIKE '%:%@%/%')
79178062
AD
26 GROUP BY feed_url, site_url, title ORDER BY subscribers DESC LIMIT 1000");
27
afcb105f 28 $pdo->beginTransaction();
79178062 29
afcb105f 30 $pdo->query("DELETE FROM ttrss_feedbrowser_cache");
79178062
AD
31
32 $count = 0;
33
afcb105f
AD
34 while ($line = $sth->fetch()) {
35
0567016b
AD
36 $subscribers = $line["subscribers"];
37 $feed_url = $line["feed_url"];
38 $title = $line["title"];
39 $site_url = $line["site_url"];
79178062 40
afcb105f
AD
41 $tmph = $pdo->prepare("SELECT subscribers FROM
42 ttrss_feedbrowser_cache WHERE feed_url = ?");
43 $tmph->execute([$feed_url]);
44
45 if (!$tmph->fetch()) {
79178062 46
afcb105f
AD
47 $tmph = $pdo->prepare("INSERT INTO ttrss_feedbrowser_cache
48 (feed_url, site_url, title, subscribers)
49 VALUES
50 (?, ?, ?, ?)");
79178062 51
afcb105f 52 $tmph->execute([$feed_url, $site_url, $title, $subscribers]);
79178062
AD
53
54 ++$count;
55
56 }
57
58 }
59
afcb105f 60 $pdo->commit();
79178062
AD
61
62 return $count;
63
64 }
65
e6c886bf 66 static function update_daemon_common($limit = DAEMON_FEED_LIMIT, $debug = true) {
6322ac79 67 $schema_version = get_schema_version();
857efe49
AD
68
69 if ($schema_version != SCHEMA_VERSION) {
70 die("Schema version is wrong, please upgrade the database.\n");
71 }
72
afcb105f
AD
73 $pdo = Db::pdo();
74
09e8bdfd 75 if (!SINGLE_USER_MODE && DAEMON_UPDATE_LOGIN_LIMIT > 0) {
2c08214a
AD
76 if (DB_TYPE == "pgsql") {
77 $login_thresh_qpart = "AND ttrss_users.last_login >= NOW() - INTERVAL '".DAEMON_UPDATE_LOGIN_LIMIT." days'";
78 } else {
79 $login_thresh_qpart = "AND ttrss_users.last_login >= DATE_SUB(NOW(), INTERVAL ".DAEMON_UPDATE_LOGIN_LIMIT." DAY)";
80 }
81 } else {
82 $login_thresh_qpart = "";
83 }
84
2c08214a
AD
85 if (DB_TYPE == "pgsql") {
86 $update_limit_qpart = "AND ((
87 ttrss_feeds.update_interval = 0
ee0542ce 88 AND ttrss_user_prefs.value != '-1'
2c08214a
AD
89 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_user_prefs.value || ' minutes') AS INTERVAL)
90 ) OR (
91 ttrss_feeds.update_interval > 0
92 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_feeds.update_interval || ' minutes') AS INTERVAL)
f08426e3
AD
93 ) OR (ttrss_feeds.last_updated IS NULL
94 AND ttrss_user_prefs.value != '-1')
95 OR (last_updated = '1970-01-01 00:00:00'
96 AND ttrss_user_prefs.value != '-1'))";
2c08214a
AD
97 } else {
98 $update_limit_qpart = "AND ((
99 ttrss_feeds.update_interval = 0
ee0542ce 100 AND ttrss_user_prefs.value != '-1'
2c08214a
AD
101 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL CONVERT(ttrss_user_prefs.value, SIGNED INTEGER) MINUTE)
102 ) OR (
103 ttrss_feeds.update_interval > 0
104 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL ttrss_feeds.update_interval MINUTE)
f08426e3
AD
105 ) OR (ttrss_feeds.last_updated IS NULL
106 AND ttrss_user_prefs.value != '-1')
107 OR (last_updated = '1970-01-01 00:00:00'
108 AND ttrss_user_prefs.value != '-1'))";
2c08214a
AD
109 }
110
111 // Test if feed is currently being updated by another process.
112 if (DB_TYPE == "pgsql") {
566417c4 113 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '10 minutes')";
2c08214a 114 } else {
566417c4 115 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 10 MINUTE))";
2c08214a
AD
116 }
117
93af11cb 118 $query_limit = $limit ? sprintf("LIMIT %d", $limit) : "";
2c08214a 119
98070db0
TK
120 // Update the least recently updated feeds first
121 $query_order = "ORDER BY last_updated";
122 if (DB_TYPE == "pgsql") $query_order .= " NULLS FIRST";
123
fce451a4 124 $query = "SELECT DISTINCT ttrss_feeds.feed_url, ttrss_feeds.last_updated
2c08214a
AD
125 FROM
126 ttrss_feeds, ttrss_users, ttrss_user_prefs
f4ae0f05 127 WHERE
2c08214a 128 ttrss_feeds.owner_uid = ttrss_users.id
f08426e3 129 AND ttrss_user_prefs.profile IS NULL
2c08214a
AD
130 AND ttrss_users.id = ttrss_user_prefs.owner_uid
131 AND ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL'
132 $login_thresh_qpart $update_limit_qpart
1c4421fc 133 $updstart_thresh_qpart
98070db0 134 $query_order $query_limit";
fce451a4 135
afcb105f 136 $res = $pdo->query($query);
2c08214a 137
2c08214a 138 $feeds_to_update = array();
afcb105f 139 while ($line = $res->fetch()) {
93af11cb 140 array_push($feeds_to_update, $line['feed_url']);
2c08214a
AD
141 }
142
afcb105f
AD
143 if ($debug) _debug(sprintf("Scheduled %d feeds to update...", count($feeds_to_update)));
144
93af11cb
AD
145 // Update last_update_started before actually starting the batch
146 // in order to minimize collision risk for parallel daemon tasks
147 if (count($feeds_to_update) > 0) {
afcb105f 148 $feeds_qmarks = arr_qmarks($feeds_to_update);
1c4421fc 149
afcb105f
AD
150 $tmph = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
151 WHERE feed_url IN ($feeds_qmarks)");
152 $tmph->execute($feeds_to_update);
2c08214a
AD
153 }
154
8292d05b 155 $nf = 0;
2d9c5684 156 $bstarted = microtime(true);
8292d05b 157
5cbd1fe8
AD
158 $batch_owners = array();
159
afcb105f
AD
160 // since we have the data cached, we can deal with other feeds with the same url
161 $usth = $pdo->prepare("SELECT DISTINCT ttrss_feeds.id,last_updated,ttrss_feeds.owner_uid
ee0542ce
AD
162 FROM ttrss_feeds, ttrss_users, ttrss_user_prefs WHERE
163 ttrss_user_prefs.owner_uid = ttrss_feeds.owner_uid AND
164 ttrss_users.id = ttrss_user_prefs.owner_uid AND
165 ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL' AND
f08426e3 166 ttrss_user_prefs.profile IS NULL AND
afcb105f 167 feed_url = ?
9e84bab4 168 $update_limit_qpart
1c4421fc 169 $login_thresh_qpart
5929a0c1 170 ORDER BY ttrss_feeds.id $query_limit");
1c4421fc 171
afcb105f
AD
172 foreach ($feeds_to_update as $feed) {
173 if($debug) _debug("Base feed: $feed");
174
175 $usth->execute([$feed]);
176 //update_rss_feed($line["id"], true);
177
178 if ($tline = $usth->fetch()) {
179 if ($debug) _debug(" => " . $tline["last_updated"] . ", " . $tline["id"] . " " . $tline["owner_uid"]);
f08426e3 180
afcb105f
AD
181 if (array_search($tline["owner_uid"], $batch_owners) === FALSE)
182 array_push($batch_owners, $tline["owner_uid"]);
5cbd1fe8 183
afcb105f
AD
184 $fstarted = microtime(true);
185 RSSUtils::update_rss_feed($tline["id"], true, false);
186 _debug_suppress(false);
2d9c5684 187
afcb105f 188 _debug(sprintf(" %.4f (sec)", microtime(true) - $fstarted));
2d9c5684 189
afcb105f 190 ++$nf;
1c4421fc 191 }
2c08214a
AD
192 }
193
2d9c5684
AD
194 if ($nf > 0) {
195 _debug(sprintf("Processed %d feeds in %.4f (sec), %.4f (sec/feed avg)", $nf,
196 microtime(true) - $bstarted, (microtime(true) - $bstarted) / $nf));
197 }
198
5cbd1fe8
AD
199 foreach ($batch_owners as $owner_uid) {
200 _debug("Running housekeeping tasks for user $owner_uid...");
201
e6c886bf 202 RSSUtils::housekeeping_user($owner_uid);
5cbd1fe8
AD
203 }
204
2c08214a 205 // Send feed digests by email if needed.
c2f0f24e 206 Digest::send_headlines_digests($debug);
2c08214a 207
8292d05b 208 return $nf;
7b55001e 209 }
2c08214a 210
6022776d 211 // this is used when subscribing
e6c886bf 212 static function set_basic_feed_info($feed) {
6022776d 213
0567016b 214 $pdo = Db::pdo();
6022776d 215
0567016b
AD
216 $sth = $pdo->prepare("SELECT owner_uid,feed_url,auth_pass,auth_login,auth_pass_encrypted
217 FROM ttrss_feeds WHERE id = ?");
218 $sth->execute([$feed]);
6022776d 219
0567016b 220 if ($row = $sth->fetch()) {
bec5ba93 221
0567016b 222 $owner_uid = $row["owner_uid"];
6022776d 223
187abfe7 224 $auth_pass_encrypted = $row["auth_pass_encrypted"];
6022776d 225
0567016b
AD
226 $auth_login = $row["auth_login"];
227 $auth_pass = $row["auth_pass"];
6022776d 228
0567016b
AD
229 if ($auth_pass_encrypted && function_exists("mcrypt_decrypt")) {
230 require_once "crypt.php";
231 $auth_pass = decrypt_string($auth_pass);
232 }
6022776d 233
0567016b 234 $fetch_url = $row["feed_url"];
6022776d 235
0567016b
AD
236 $pluginhost = new PluginHost();
237 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
6022776d 238
0567016b
AD
239 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
240 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
241 $pluginhost->load_data();
242
243 $basic_info = array();
244 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_BASIC_INFO) as $plugin) {
245 $basic_info = $plugin->hook_feed_basic_info($basic_info, $fetch_url, $owner_uid, $feed, $auth_login, $auth_pass);
246 }
6022776d 247
0567016b
AD
248 if (!$basic_info) {
249 $feed_data = fetch_file_contents($fetch_url, false,
250 $auth_login, $auth_pass, false,
251 FEED_FETCH_TIMEOUT,
252 0);
bec5ba93 253
0567016b 254 global $fetch_curl_used;
bec5ba93 255
0567016b
AD
256 if (!$fetch_curl_used) {
257 $tmp = @gzdecode($feed_data);
bec5ba93 258
0567016b
AD
259 if ($tmp) $feed_data = $tmp;
260 }
6022776d 261
0567016b 262 $feed_data = trim($feed_data);
6022776d 263
0567016b
AD
264 $rss = new FeedParser($feed_data);
265 $rss->init();
6022776d 266
0567016b
AD
267 if (!$rss->error()) {
268 $basic_info = array(
269 'title' => mb_substr($rss->get_title(), 0, 199),
270 'site_url' => mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245)
271 );
272 }
3476690c 273 }
6022776d 274
0567016b
AD
275 if ($basic_info && is_array($basic_info)) {
276 $sth = $pdo->prepare("SELECT title, site_url FROM ttrss_feeds WHERE id = ?");
277 $sth->execute([$feed]);
6022776d 278
0567016b 279 if ($row = $sth->fetch()) {
6022776d 280
0567016b
AD
281 $registered_title = $row["title"];
282 $orig_site_url = $row["site_url"];
283
284 if ($basic_info['title'] && (!$registered_title || $registered_title == "[Unknown]")) {
285
286 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
287 title = ? WHERE id = ?");
288 $sth->execute([$basic_info['title'], $feed]);
289 }
6022776d 290
0567016b
AD
291 if ($basic_info['site_url'] && $orig_site_url != $basic_info['site_url']) {
292 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
293 site_url = ? WHERE id = ?");
294 $sth->execute([$basic_info['site_url'], $feed]);
295 }
296
297 }
6022776d
AD
298 }
299 }
300 }
301
7b55001e 302 /**
e6c886bf
AD
303 * @SuppressWarnings(PHPMD.UnusedFormalParameter)
304 */
305 static function update_rss_feed($feed, $no_cache = false) {
2c08214a 306
2c08214a
AD
307 $debug_enabled = defined('DAEMON_EXTENDED_DEBUG') || $_REQUEST['xdebug'];
308
4f71d743 309 _debug_suppress(!$debug_enabled);
68cccafc 310 _debug("start", $debug_enabled);
2c08214a 311
0567016b
AD
312 $pdo = Db::pdo();
313
314 $sth = $pdo->prepare("SELECT title FROM ttrss_feeds WHERE id = ?");
315 $sth->execute([$feed]);
bfe1eb4e 316
0567016b 317 if (!$row = $sth->fetch()) {
bfe1eb4e
AD
318 _debug("feed $feed NOT FOUND/SKIPPED", $debug_enabled);
319 user_error("Attempt to update unknown/invalid feed $feed", E_USER_WARNING);
320 return false;
321 }
322
0567016b 323 $title = $row["title"];
6bb96beb
AD
324
325 // feed was batch-subscribed or something, we need to get basic info
326 // this is not optimal currently as it fetches stuff separately TODO: optimize
327 if ($title == "[Unknown]") {
328 _debug("setting basic feed info for $feed...");
e6c886bf 329 RSSUtils::set_basic_feed_info($feed);
6bb96beb
AD
330 }
331
0567016b 332 $sth = $pdo->prepare("SELECT id,update_interval,auth_login,
5ba1ddd4 333 feed_url,auth_pass,cache_images,
5321e775 334 mark_unread_on_update, owner_uid,
153cb6d3 335 auth_pass_encrypted, feed_language,
e50c8eaa
AD
336 last_modified,
337 ".SUBSTRING_FOR_DATE."(last_unconditional, 1, 19) AS last_unconditional
0567016b
AD
338 FROM ttrss_feeds WHERE id = ?");
339 $sth->execute([$feed]);
340
341 if ($row = $sth->fetch()) {
2c08214a 342
0567016b 343 $owner_uid = $row["owner_uid"];
187abfe7
AD
344 $mark_unread_on_update = $row["mark_unread_on_update"];
345 $auth_pass_encrypted = $row["auth_pass_encrypted"];
2c08214a 346
0567016b
AD
347 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
348 WHERE id = ?");
349 $sth->execute([$feed]);
2c08214a 350
0567016b
AD
351 $auth_login = $row["auth_login"];
352 $auth_pass = $row["auth_pass"];
2c08214a 353
0567016b
AD
354 if ($auth_pass_encrypted && function_exists("mcrypt_decrypt")) {
355 require_once "crypt.php";
356 $auth_pass = decrypt_string($auth_pass);
357 }
044cff2d 358
0567016b
AD
359 $stored_last_modified = $row["last_modified"];
360 $last_unconditional = $row["last_unconditional"];
187abfe7 361 $cache_images = $row["cache_images"];
0567016b
AD
362 $fetch_url = $row["feed_url"];
363 $feed_language = mb_strtolower($row["feed_language"]);
364 if (!$feed_language) $feed_language = 'english';
2c08214a 365
0567016b
AD
366 } else {
367 return false;
368 }
2c08214a 369
f074ffe9 370 $date_feed_processed = date('Y-m-d H:i');
2c08214a 371
865a3ed6 372 $cache_filename = CACHE_DIR . "/simplepie/" . sha1($fetch_url) . ".xml";
f074ffe9 373
ee65bef4
AD
374 $pluginhost = new PluginHost();
375 $pluginhost->set_debug($debug_enabled);
376 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
377
378 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
379 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
380 $pluginhost->load_data();
381
7b55001e 382 $rss_hash = false;
4f9cbdff 383
7b55001e
AD
384 $force_refetch = isset($_REQUEST["force_refetch"]);
385 $feed_data = "";
687a4f59 386
7b55001e
AD
387 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FETCH_FEED) as $plugin) {
388 $feed_data = $plugin->hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed, 0, $auth_login, $auth_pass);
389 }
2c08214a 390
7b55001e
AD
391 // try cache
392 if (!$feed_data &&
393 file_exists($cache_filename) &&
394 is_readable($cache_filename) &&
395 !$auth_login && !$auth_pass &&
396 filemtime($cache_filename) > time() - 30) {
be574731 397
7b55001e 398 _debug("using local cache [$cache_filename].", $debug_enabled);
52637d3b 399
7b55001e 400 @$feed_data = file_get_contents($cache_filename);
f074ffe9 401
7b55001e
AD
402 if ($feed_data) {
403 $rss_hash = sha1($feed_data);
88edaa93 404 }
ee65bef4 405
7b55001e
AD
406 } else {
407 _debug("local cache will not be used for this feed", $debug_enabled);
408 }
312742db 409
153cb6d3
AD
410 global $fetch_last_modified;
411
7b55001e
AD
412 // fetch feed from source
413 if (!$feed_data) {
e50c8eaa 414 _debug("last unconditional update request: $last_unconditional");
312742db 415
7b55001e
AD
416 if (ini_get("open_basedir") && function_exists("curl_init")) {
417 _debug("not using CURL due to open_basedir restrictions");
418 }
3f6f0857 419
e50c8eaa
AD
420 if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) {
421 _debug("maximum allowed interval for conditional requests exceeded, forcing refetch");
422
423 $force_refetch = true;
424 } else {
425 _debug("stored last modified for conditional request: $stored_last_modified", $debug_enabled);
426 }
153cb6d3 427
e50c8eaa 428 _debug("fetching [$fetch_url] (force_refetch: $force_refetch)...", $debug_enabled);
153cb6d3
AD
429
430 $feed_data = fetch_file_contents([
431 "url" => $fetch_url,
432 "login" => $auth_login,
433 "pass" => $auth_pass,
434 "timeout" => $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT,
435 "last_modified" => $force_refetch ? "" : $stored_last_modified
436 ]);
3f6f0857 437
7b55001e 438 global $fetch_curl_used;
3f6f0857 439
7b55001e
AD
440 if (!$fetch_curl_used) {
441 $tmp = @gzdecode($feed_data);
1367bc3f 442
7b55001e
AD
443 if ($tmp) $feed_data = $tmp;
444 }
017401dd 445
7b55001e 446 $feed_data = trim($feed_data);
fd687300 447
7b55001e 448 _debug("fetch done.", $debug_enabled);
9d930af9 449 _debug("source last modified: " . $fetch_last_modified, $debug_enabled);
153cb6d3
AD
450
451 if ($feed_data && $fetch_last_modified != $stored_last_modified) {
0567016b
AD
452 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_modified = ? WHERE id = ?");
453 $sth->execute([substr($fetch_last_modified, 0, 245), $feed]);
153cb6d3 454 }
95beaa14 455
7b55001e
AD
456 // cache vanilla feed data for re-use
457 if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/simplepie")) {
458 $new_rss_hash = sha1($feed_data);
459
460 if ($new_rss_hash != $rss_hash) {
461 _debug("saving $cache_filename", $debug_enabled);
462 @file_put_contents($cache_filename, $feed_data);
95beaa14 463 }
4f9cbdff 464 }
7b55001e 465 }
017401dd 466
7b55001e
AD
467 if (!$feed_data) {
468 global $fetch_last_error;
469 global $fetch_last_error_code;
f074ffe9 470
7b55001e 471 _debug("unable to fetch: $fetch_last_error [$fetch_last_error_code]", $debug_enabled);
f074ffe9 472
7b55001e
AD
473 // If-Modified-Since
474 if ($fetch_last_error_code != 304) {
0567016b 475 $error_message = $fetch_last_error;
7b55001e
AD
476 } else {
477 _debug("source claims data not modified, nothing to do.", $debug_enabled);
0567016b 478 $error_message = "";
7b55001e 479 }
4f9cbdff 480
0567016b
AD
481 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
482 last_updated = NOW() WHERE id = ?");
483 $sth->execute([$error_message, $feed]);
4f9cbdff 484
7b55001e 485 return;
f074ffe9
AD
486 }
487
1ffe3391 488 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_FETCHED) as $plugin) {
6791af0c 489 $feed_data = $plugin->hook_feed_fetched($feed_data, $fetch_url, $owner_uid, $feed);
017401dd
AD
490 }
491
07d3431e
AD
492 $rss = new FeedParser($feed_data);
493 $rss->init();
2c08214a 494
0567016b 495 $feed = $feed;
2c08214a 496
19b3992b 497 if (!$rss->error()) {
2c08214a 498
d2a421e3 499 // We use local pluginhost here because we need to load different per-user feed plugins
1ffe3391 500 $pluginhost->run_hooks(PluginHost::HOOK_FEED_PARSED, "hook_feed_parsed", $rss);
4412b877 501
df659891 502 _debug("language: $feed_language", $debug_enabled);
68cccafc 503 _debug("processing feed data...", $debug_enabled);
2c08214a 504
382268c6
AD
505 if (DB_TYPE == "pgsql") {
506 $favicon_interval_qpart = "favicon_last_checked < NOW() - INTERVAL '12 hour'";
507 } else {
508 $favicon_interval_qpart = "favicon_last_checked < DATE_SUB(NOW(), INTERVAL 12 HOUR)";
509 }
510
0567016b 511 $sth = $pdo->prepare("SELECT owner_uid,favicon_avg_color,
382268c6
AD
512 (favicon_last_checked IS NULL OR $favicon_interval_qpart) AS
513 favicon_needs_check
0567016b
AD
514 FROM ttrss_feeds WHERE id = ?");
515 $sth->execute([$feed]);
2c08214a 516
0567016b 517 if ($row = $sth->fetch()) {
187abfe7 518 $favicon_needs_check = $row["favicon_needs_check"];
0567016b
AD
519 $favicon_avg_color = $row["favicon_avg_color"];
520 $owner_uid = $row["owner_uid"];
521 } else {
522 return false;
523 }
2c08214a 524
0567016b 525 $site_url = mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245);
2c08214a 526
cd07592c
AD
527 _debug("site_url: $site_url", $debug_enabled);
528 _debug("feed_title: " . $rss->get_title(), $debug_enabled);
529
687a4f59 530 if ($favicon_needs_check || $force_refetch) {
36490f11
AD
531
532 /* terrible hack: if we crash on floicon shit here, we won't check
560cbd8c 533 * the icon avgcolor again (unless the icon got updated) */
36490f11 534
560cbd8c
AD
535 $favicon_file = ICONS_DIR . "/$feed.ico";
536 $favicon_modified = @filemtime($favicon_file);
537
68cccafc 538 _debug("checking favicon...", $debug_enabled);
687a4f59 539
e6c886bf 540 RSSUtils::check_feed_favicon($site_url, $feed);
560cbd8c
AD
541 $favicon_modified_new = @filemtime($favicon_file);
542
543 if ($favicon_modified_new > $favicon_modified)
544 $favicon_avg_color = '';
687a4f59 545
0567016b 546 $favicon_colorstring = "";
6ee0d4b0 547 if (file_exists($favicon_file) && function_exists("imagecreatefromstring") && $favicon_avg_color == '') {
e6c886bf 548 require_once "colors.php";
687a4f59 549
0567016b
AD
550 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_avg_color = 'fail' WHERE
551 id = ?");
552 $sth->execute([$feed]);
aafd55ba 553
0567016b
AD
554 $favicon_color = calculate_avg_color($favicon_file);
555
556 $favicon_colorstring = ",favicon_avg_color = " . $pdo->quote($favicon_color);
63c323f7 557
36490f11 558 } else if ($favicon_avg_color == 'fail') {
84ceb6bd 559 _debug("floicon failed on this file, not trying to recalculate avg color", $debug_enabled);
6ac722d5 560 }
687a4f59 561
0567016b
AD
562 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_last_checked = NOW()
563 $favicon_colorstring WHERE id = ?");
564 $sth->execute([$feed]);
f2798eb6 565 }
2c08214a 566
68cccafc 567 _debug("loading filters & labels...", $debug_enabled);
2c08214a 568
a42c55f0 569 $filters = load_filters($feed, $owner_uid);
2c08214a 570
02f3992a
AD
571 if ($debug_enabled) {
572 print_r($filters);
573 }
574
68cccafc 575 _debug("" . count($filters) . " filters loaded.", $debug_enabled);
2c08214a 576
19b3992b 577 $items = $rss->get_items();
2c08214a 578
19b3992b 579 if (!is_array($items)) {
68cccafc 580 _debug("no articles found.", $debug_enabled);
2c08214a 581
0567016b
AD
582 $sth = $pdo->prepare("UPDATE ttrss_feeds
583 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
584 $sth->execute([$feed]);
2c08214a 585
0567016b 586 return true; // no articles
2c08214a
AD
587 }
588
68cccafc 589 _debug("processing articles...", $debug_enabled);
2c08214a 590
6c9f3d4a
AD
591 $tstart = time();
592
19b3992b 593 foreach ($items as $item) {
0500e14c
AD
594 $pdo->beginTransaction();
595
5d56d100 596 if ($_REQUEST['xdebug'] == 3) {
2c08214a
AD
597 print_r($item);
598 }
599
6c9f3d4a
AD
600 if (ini_get("max_execution_time") > 0 && time() - $tstart >= ini_get("max_execution_time") * 0.7) {
601 _debug("looks like there's too many articles to process at once, breaking out", $debug_enabled);
0500e14c 602 $pdo->commit();
6c9f3d4a
AD
603 break;
604 }
605
0567016b
AD
606 $entry_guid = strip_tags($item->get_id());
607 if (!$entry_guid) $entry_guid = strip_tags($item->get_link());
e6c886bf 608 if (!$entry_guid) $entry_guid = RSSUtils::make_guid_from_title($item->get_title());
0500e14c
AD
609
610 if (!$entry_guid) {
611 $pdo->commit();
612 continue;
613 }
2c08214a 614
3a4c8973
AD
615 $entry_guid = "$owner_uid,$entry_guid";
616
0567016b 617 $entry_guid_hashed = 'SHA1:' . sha1($entry_guid);
5e3d5480 618
68cccafc 619 _debug("guid $entry_guid / $entry_guid_hashed", $debug_enabled);
5e3d5480 620
0567016b 621 $entry_timestamp = strip_tags($item->get_date());
04d2f9c8
AD
622
623 _debug("orig date: " . $item->get_date(), $debug_enabled);
2c08214a 624
30123fe6 625 if ($entry_timestamp == -1 || !$entry_timestamp || $entry_timestamp > time()) {
2c08214a 626 $entry_timestamp = time();
2c08214a
AD
627 }
628
629 $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
630
68cccafc 631 _debug("date $entry_timestamp [$entry_timestamp_fmt]", $debug_enabled);
2c08214a 632
0567016b 633 $entry_title = strip_tags($item->get_title());
1b35d30c 634
5d56d100 635 $entry_link = rewrite_relative_url($site_url, $item->get_link());
2c08214a 636
68cccafc
AD
637 _debug("title $entry_title", $debug_enabled);
638 _debug("link $entry_link", $debug_enabled);
2c08214a
AD
639
640 if (!$entry_title) $entry_title = date("Y-m-d H:i:s", $entry_timestamp);;
641
19b3992b
AD
642 $entry_content = $item->get_content();
643 if (!$entry_content) $entry_content = $item->get_description();
2c08214a
AD
644
645 if ($_REQUEST["xdebug"] == 2) {
9ec10352 646 print "content: ";
0bc503ff 647 print htmlspecialchars($entry_content);
3c696512 648 print "\n";
2c08214a
AD
649 }
650
0567016b 651 $entry_comments = mb_substr(strip_tags($item->get_comments_url()), 0, 245);
12ff230b 652 $num_comments = (int) $item->get_comments_count();
2c08214a 653
0567016b
AD
654 $entry_author = strip_tags($item->get_author());
655 $entry_guid = mb_substr($entry_guid, 0, 245);
2c08214a 656
68cccafc
AD
657 _debug("author $entry_author", $debug_enabled);
658 _debug("num_comments: $num_comments", $debug_enabled);
ee78f81c 659 _debug("looking for tags...", $debug_enabled);
2c08214a
AD
660
661 // parse <category> entries into tags
662
663 $additional_tags = array();
664
19b3992b 665 $additional_tags_src = $item->get_categories();
2c08214a 666
19b3992b
AD
667 if (is_array($additional_tags_src)) {
668 foreach ($additional_tags_src as $tobj) {
cd07592c 669 array_push($additional_tags, $tobj);
2c08214a 670 }
19b3992b 671 }
2c08214a 672
fa6fbd36 673 $entry_tags = array_unique($additional_tags);
2c08214a
AD
674
675 for ($i = 0; $i < count($entry_tags); $i++)
676 $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8');
677
ee78f81c
AD
678 _debug("tags found: " . join(",", $entry_tags), $debug_enabled);
679
68cccafc 680 _debug("done collecting data.", $debug_enabled);
2c08214a 681
0567016b
AD
682 $sth = $pdo->prepare("SELECT id, content_hash, lang FROM ttrss_entries
683 WHERE guid = ? OR guid = ?");
684 $sth->execute([$entry_guid, $entry_guid_hashed]);
b30abdad 685
0567016b
AD
686 if ($row = $sth->fetch()) {
687 $base_entry_id = $row["id"];
688 $entry_stored_hash = $row["content_hash"];
4a0da0e5 689 $article_labels = Article::get_article_labels($base_entry_id, $owner_uid);
0567016b 690 $entry_language = $row["lang"];
a8ac7661 691
2ed0d6c4 692 $existing_tags = Article::get_article_tags($base_entry_id, $owner_uid);
a8ac7661 693 $entry_tags = array_unique(array_merge($entry_tags, $existing_tags));
b30abdad 694 } else {
b1840673
AD
695 $base_entry_id = false;
696 $entry_stored_hash = "";
a29fe121 697 $article_labels = array();
3318d324 698 $entry_language = "";
b30abdad
AD
699 }
700
455b1401 701 $article = array("owner_uid" => $owner_uid, // read only
b30abdad 702 "guid" => $entry_guid, // read only
59e83455 703 "guid_hashed" => $entry_guid_hashed, // read only
19b3992b
AD
704 "title" => $entry_title,
705 "content" => $entry_content,
706 "link" => $entry_link,
a29fe121 707 "labels" => $article_labels, // current limitation: can add labels to article, can't remove them
19b3992b 708 "tags" => $entry_tags,
e02555c1 709 "author" => $entry_author,
c9299c28 710 "force_catchup" => false, // ugly hack for the time being
6de3a1be 711 "score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed
3318d324 712 "language" => $entry_language,
20d2195f 713 "num_comments" => $num_comments, // read only
f73e03e0
AD
714 "feed" => array("id" => $feed,
715 "fetch_url" => $fetch_url,
babfadbf
J
716 "site_url" => $site_url,
717 "cache_images" => $cache_images)
e6c886bf 718 );
cc85704f 719
b1840673 720 $entry_plugin_data = "";
e6c886bf 721 $entry_current_hash = RSSUtils::calculate_article_hash($article, $pluginhost);
b1840673
AD
722
723 _debug("article hash: $entry_current_hash [stored=$entry_stored_hash]", $debug_enabled);
724
522e8b35 725 if ($entry_current_hash == $entry_stored_hash && !isset($_REQUEST["force_rehash"])) {
b1840673
AD
726 _debug("stored article seems up to date [IID: $base_entry_id], updating timestamp only", $debug_enabled);
727
728 // we keep encountering the entry in feeds, so we need to
729 // update date_updated column so that we don't get horrible
730 // dupes when the entry gets purged and reinserted again e.g.
731 // in the case of SLOW SLOW OMG SLOW updating feeds
732
0567016b
AD
733 $sth = $pdo->prepare("UPDATE ttrss_entries SET date_updated = NOW()
734 WHERE id = ?");
735 $sth->execute([$base_entry_id]);
b1840673 736
0500e14c 737 $pdo->commit();
5bdcb8fd 738 continue;
b1840673
AD
739 }
740
741 _debug("hash differs, applying plugin filters:", $debug_enabled);
742
1ffe3391 743 foreach ($pluginhost->get_hooks(PluginHost::HOOK_ARTICLE_FILTER) as $plugin) {
b1840673
AD
744 _debug("... " . get_class($plugin), $debug_enabled);
745
746 $start = microtime(true);
19b3992b 747 $article = $plugin->hook_article_filter($article);
0084f0d1 748
b1840673
AD
749 _debug("=== " . sprintf("%.4f (sec)", microtime(true) - $start), $debug_enabled);
750
751 $entry_plugin_data .= mb_strtolower(get_class($plugin)) . ",";
cc85704f
AD
752 }
753
0bc503ff
AD
754 if ($_REQUEST["xdebug"] == 2) {
755 print "processed content: ";
756 print htmlspecialchars($article["content"]);
757 print "\n";
758 }
759
b1840673
AD
760 _debug("plugin data: $entry_plugin_data", $debug_enabled);
761
35c12dc4
AD
762 // Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077
763 if (DB_TYPE == "mysql") {
764 foreach ($article as $k => $v) {
35c37354
AD
765
766 // i guess we'll have to take the risk of 4byte unicode labels & tags here
dae16f72 767 if (is_string($article[$k])) {
35c37354
AD
768 $article[$k] = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $v);
769 }
35c12dc4
AD
770 }
771 }
772
b8774453
AD
773 /* Collect article tags here so we could filter by them: */
774
557d86fe
AD
775 $matched_rules = array();
776
e6c886bf 777 $article_filters = RSSUtils::get_article_filters($filters, $article["title"],
7b55001e 778 $article["content"], $article["link"], $article["author"],
557d86fe 779 $article["tags"], $matched_rules);
b8774453
AD
780
781 if ($debug_enabled) {
557d86fe
AD
782 _debug("matched filter rules: ", $debug_enabled);
783
784 if (count($matched_rules) != 0) {
785 print_r($matched_rules);
786 }
787
788 _debug("filter actions: ", $debug_enabled);
789
b8774453
AD
790 if (count($article_filters) != 0) {
791 print_r($article_filters);
792 }
793 }
794
e6c886bf 795 $plugin_filter_names = RSSUtils::find_article_filters($article_filters, "plugin");
b8774453
AD
796 $plugin_filter_actions = $pluginhost->get_filter_actions();
797
798 if (count($plugin_filter_names) > 0) {
799 _debug("applying plugin filter actions...", $debug_enabled);
800
801 foreach ($plugin_filter_names as $pfn) {
802 list($pfclass,$pfaction) = explode(":", $pfn["param"]);
803
804 if (isset($plugin_filter_actions[$pfclass])) {
805 $plugin = $pluginhost->get_plugin($pfclass);
806
807 _debug("... $pfclass: $pfaction", $debug_enabled);
808
809 if ($plugin) {
810 $start = microtime(true);
811 $article = $plugin->hook_article_filter_action($article, $pfaction);
812
813 _debug("=== " . sprintf("%.4f (sec)", microtime(true) - $start), $debug_enabled);
814 } else {
815 _debug("??? $pfclass: plugin object not found.");
816 }
817 } else {
818 _debug("??? $pfclass: filter plugin not registered.");
819 }
820 }
821 }
822
19b3992b 823 $entry_tags = $article["tags"];
0567016b
AD
824 $entry_title = strip_tags($article["title"]);
825 $entry_author = mb_substr(strip_tags($article["author"]), 0, 245);
826 $entry_link = strip_tags($article["link"]);
f935d98e 827 $entry_content = $article["content"]; // escaped below
c9299c28 828 $entry_force_catchup = $article["force_catchup"];
a29fe121 829 $article_labels = $article["labels"];
6de3a1be 830 $entry_score_modifier = (int) $article["score_modifier"];
0567016b 831 $entry_language = $article["language"];
a29fe121
AD
832
833 if ($debug_enabled) {
834 _debug("article labels:", $debug_enabled);
557d86fe
AD
835
836 if (count($article_labels) != 0) {
837 print_r($article_labels);
838 }
a29fe121 839 }
c9299c28
AD
840
841 _debug("force catchup: $entry_force_catchup");
f935d98e 842
0a3fd79b 843 if ($cache_images && is_writable(CACHE_DIR . '/images'))
e6c886bf 844 RSSUtils::cache_media($entry_content, $site_url, $debug_enabled);
0a3fd79b 845
0567016b
AD
846 $csth = $pdo->prepare("SELECT id FROM ttrss_entries
847 WHERE guid = ? OR guid = ?");
848 $csth->execute([$entry_guid, $entry_guid_hashed]);
9e222305 849
0567016b 850 if (!$row = $csth->fetch()) {
2c08214a 851
07d3431e 852 _debug("base guid [$entry_guid or $entry_guid_hashed] not found, creating...", $debug_enabled);
2c08214a
AD
853
854 // base post entry does not exist, create it
855
0567016b 856 $usth = $pdo->prepare(
2c08214a 857 "INSERT INTO ttrss_entries
0567016b 858 (title,
2c08214a
AD
859 guid,
860 link,
861 updated,
862 content,
863 content_hash,
864 no_orig_date,
865 date_updated,
866 date_entered,
867 comments,
868 num_comments,
b30abdad 869 plugin_data,
6b461797 870 lang,
2c08214a
AD
871 author)
872 VALUES
0567016b 873 (?, ?, ?, ?, ?, ?,
5ba1ddd4 874 false,
2c08214a 875 NOW(),
0567016b
AD
876 ?, ?, ?, ?, ?, ?)");
877
878 $usth->execute([$entry_title,
879 $entry_guid_hashed,
880 $entry_link,
881 $entry_timestamp_fmt,
882 $entry_content,
883 $entry_current_hash,
884 $date_feed_processed,
885 $entry_comments,
187abfe7 886 (int)$num_comments,
0567016b
AD
887 $entry_plugin_data,
888 $entry_language,
889 $entry_author]);
e8291805 890
2c08214a
AD
891 }
892
0567016b 893 $csth->execute([$entry_guid, $entry_guid_hashed]);
2c08214a
AD
894
895 $entry_ref_id = 0;
896 $entry_int_id = 0;
897
0567016b 898 if ($row = $csth->fetch()) {
2c08214a 899
68cccafc 900 _debug("base guid found, checking for user record", $debug_enabled);
2c08214a 901
0567016b 902 $ref_id = $row['id'];
2c08214a
AD
903 $entry_ref_id = $ref_id;
904
e6c886bf 905 if (RSSUtils::find_article_filter($article_filters, "filter")) {
0500e14c 906 $pdo->commit();
2c08214a
AD
907 continue;
908 }
909
e6c886bf 910 $score = RSSUtils::calculate_article_score($article_filters) + $entry_score_modifier;
2c08214a 911
6de3a1be 912 _debug("initial score: $score [including plugin modifier: $entry_score_modifier]", $debug_enabled);
2c08214a 913
4f186b1f
AD
914 // check for user post link to main table
915
0567016b
AD
916 $sth = $pdo->prepare("SELECT ref_id, int_id FROM ttrss_user_entries WHERE
917 ref_id = ? AND owner_uid = ?");
918 $sth->execute([$ref_id, $owner_uid]);
2c08214a
AD
919
920 // okay it doesn't exist - create user entry
0567016b
AD
921 if ($row = $sth->fetch()) {
922 $entry_ref_id = $row["ref_id"];
923 $entry_int_id = $row["int_id"];
2c08214a 924
0567016b
AD
925 _debug("user record FOUND: RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
926 } else {
927
68cccafc 928 _debug("user record not found, creating...", $debug_enabled);
2c08214a 929
e6c886bf 930 if ($score >= -500 && !RSSUtils::find_article_filter($article_filters, 'catchup') && !$entry_force_catchup) {
0567016b
AD
931 $unread = 1;
932 $last_read_qpart = null;
2c08214a 933 } else {
0567016b 934 $unread = 0;
2c08214a
AD
935 $last_read_qpart = 'NOW()';
936 }
937
e6c886bf 938 if (RSSUtils::find_article_filter($article_filters, 'mark') || $score > 1000) {
0567016b 939 $marked = 1;
2c08214a 940 } else {
0567016b 941 $marked = 0;
2c08214a
AD
942 }
943
e6c886bf 944 if (RSSUtils::find_article_filter($article_filters, 'publish')) {
0567016b 945 $published = 1;
2c08214a 946 } else {
0567016b 947 $published = 0;
2c08214a
AD
948 }
949
0567016b
AD
950 $last_marked = ($marked == 'true') ? 'NOW()' : null;
951 $last_published = ($published == 'true') ? 'NOW()' : null;
7873d588 952
0567016b 953 $sth = $pdo->prepare(
2c08214a
AD
954 "INSERT INTO ttrss_user_entries
955 (ref_id, owner_uid, feed_id, unread, last_read, marked,
7873d588
AD
956 published, score, tag_cache, label_cache, uuid,
957 last_marked, last_published)
0567016b 958 VALUES (?, ?, ?, ?, ?, ?, ?, ?, '', '', '', ?, ?)");
2c08214a 959
0567016b
AD
960 $sth->execute([$ref_id, $owner_uid, $feed, $unread, $last_read_qpart, $marked,
961 $published, $score, $last_marked, $last_published]);
2c08214a 962
0567016b
AD
963 $sth = $pdo->prepare("SELECT int_id FROM ttrss_user_entries WHERE
964 ref_id = ? AND owner_uid = ? AND
965 feed_id = ? LIMIT 1");
2c08214a 966
0567016b
AD
967 $sth->execute([$ref_id, $owner_uid, $feed]);
968
969 if ($row = $sth->fetch())
970 $entry_int_id = $row['int_id'];
2c08214a
AD
971 }
972
0567016b 973 _debug("resulting RID: $entry_ref_id, IID: $entry_int_id", $debug_enabled);
2c08214a 974
e854442e 975 if (DB_TYPE == "pgsql") {
49a888ec
AD
976 $tsvector_combined = mb_substr($entry_title . ' ' .
977 preg_replace('/[<\?\:]/', ' ', strip_tags($entry_content)),
0567016b 978 0, 1000000);
e854442e 979
49a888ec 980 $tsvector_qpart = "tsvector_combined = to_tsvector(".$pdo->quote($feed_language).", ".$pdo->quote($tsvector_combined)."),";
e854442e
AD
981
982 } else {
983 $tsvector_qpart = "";
984 }
985
49a888ec
AD
986 //_debug($tsvector_qpart);
987
0567016b 988 $sth = $pdo->prepare("UPDATE ttrss_entries
49a888ec 989 SET title = :title,
e854442e 990 $tsvector_qpart
49a888ec
AD
991 content = :content,
992 content_hash = :content_hash,
993 updated = :updated,
994 num_comments = :num_comments,
995 plugin_data = :plugin_data,
996 author = :author,
997 lang = :lang
998 WHERE id = :id");
999
1000 $sth->execute([":title" => $entry_title,
1001 ":content" => $entry_content,
1002 ":content_hash" => $entry_current_hash,
1003 ":updated" => $entry_timestamp_fmt,
1004 ":num_comments" => (int)$num_comments,
1005 ":plugin_data" => $entry_plugin_data,
1006 ":author" => $entry_author,
1007 ":lang" => $entry_language,
1008 ":id" => $ref_id]);
b1840673 1009
59e83455 1010 // update aux data
0567016b
AD
1011 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1012 SET score = ? WHERE ref_id = ?");
1013 $sth->execute([$score, $ref_id]);
59e83455 1014
b1840673 1015 if ($mark_unread_on_update) {
24e6ff5d
AD
1016 _debug("article updated, marking unread as requested.", $debug_enabled);
1017
0567016b
AD
1018 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1019 SET last_read = null, unread = true WHERE ref_id = ?");
1020 $sth->execute([$ref_id]);
2c08214a
AD
1021 }
1022 }
1023
a29fe121
AD
1024 _debug("assigning labels [other]...", $debug_enabled);
1025
1026 foreach ($article_labels as $label) {
7c9b5a3f 1027 Labels::add_article($entry_ref_id, $label[1], $owner_uid);
a29fe121
AD
1028 }
1029
1030 _debug("assigning labels [filters]...", $debug_enabled);
2c08214a 1031
e6c886bf 1032 RSSUtils::assign_article_to_label_filters($entry_ref_id, $article_filters,
b24504b1 1033 $owner_uid, $article_labels);
2c08214a 1034
68cccafc 1035 _debug("looking for enclosures...", $debug_enabled);
2c08214a
AD
1036
1037 // enclosures
1038
1039 $enclosures = array();
1040
19b3992b 1041 $encs = $item->get_enclosures();
2c08214a 1042
19b3992b
AD
1043 if (is_array($encs)) {
1044 foreach ($encs as $e) {
1045 $e_item = array(
86e53429
AD
1046 rewrite_relative_url($site_url, $e->link),
1047 $e->type, $e->length, $e->title, $e->width, $e->height);
2c08214a 1048 array_push($enclosures, $e_item);
2c08214a
AD
1049 }
1050 }
1051
388d4dfa 1052 if ($cache_images && is_writable(CACHE_DIR . '/images'))
e6c886bf 1053 RSSUtils::cache_enclosures($enclosures, $site_url, $debug_enabled);
388d4dfa 1054
2c08214a 1055 if ($debug_enabled) {
68cccafc 1056 _debug("article enclosures:", $debug_enabled);
2c08214a
AD
1057 print_r($enclosures);
1058 }
1059
0567016b
AD
1060 $esth = $pdo->prepare("SELECT id FROM ttrss_enclosures
1061 WHERE content_url = ? AND post_id = ?");
2c08214a 1062
0567016b
AD
1063 $usth = $pdo->prepare("INSERT INTO ttrss_enclosures
1064 (content_url, content_type, title, duration, post_id, width, height) VALUES
1065 (?, ?, ?, ?, ?, ?, ?)");
5c54e683 1066
2c08214a 1067 foreach ($enclosures as $enc) {
0567016b
AD
1068 $enc_url = $enc[0];
1069 $enc_type = $enc[1];
0500e14c 1070 $enc_dur = (int)$enc[2];
0567016b 1071 $enc_title = $enc[3];
523bd90b
FE
1072 $enc_width = intval($enc[4]);
1073 $enc_height = intval($enc[5]);
2c08214a 1074
0567016b 1075 $esth->execute([$enc_url, $entry_ref_id]);
2c08214a 1076
0567016b
AD
1077 if (!$esth->fetch()) {
1078 $usth->execute([$enc_url, $enc_type, (string)$enc_title, $enc_dur, $entry_ref_id, $enc_width, $enc_height]);
2c08214a
AD
1079 }
1080 }
1081
2c08214a
AD
1082 // check for manual tags (we have to do it here since they're loaded from filters)
1083
1084 foreach ($article_filters as $f) {
6aff7845 1085 if ($f["type"] == "tag") {
2c08214a 1086
6aff7845 1087 $manual_tags = trim_array(explode(",", $f["param"]));
2c08214a
AD
1088
1089 foreach ($manual_tags as $tag) {
1090 if (tag_is_valid($tag)) {
1091 array_push($entry_tags, $tag);
1092 }
1093 }
1094 }
1095 }
1096
1097 // Skip boring tags
1098
6322ac79 1099 $boring_tags = trim_array(explode(",", mb_strtolower(get_pref(
2c08214a
AD
1100 'BLACKLISTED_TAGS', $owner_uid, ''), 'utf-8')));
1101
1102 $filtered_tags = array();
1103 $tags_to_cache = array();
1104
1105 if ($entry_tags && is_array($entry_tags)) {
1106 foreach ($entry_tags as $tag) {
1107 if (array_search($tag, $boring_tags) === false) {
1108 array_push($filtered_tags, $tag);
1109 }
1110 }
1111 }
1112
1113 $filtered_tags = array_unique($filtered_tags);
1114
1115 if ($debug_enabled) {
68cccafc 1116 _debug("filtered article tags:", $debug_enabled);
2c08214a
AD
1117 print_r($filtered_tags);
1118 }
1119
1120 // Save article tags in the database
1121
1122 if (count($filtered_tags) > 0) {
1123
0567016b
AD
1124 $tsth = $pdo->prepare("SELECT id FROM ttrss_tags
1125 WHERE tag_name = ? AND post_int_id = ? AND
1126 owner_uid = ? LIMIT 1");
1127
1128 $usth = $pdo->prepare("INSERT INTO ttrss_tags
1129 (owner_uid,tag_name,post_int_id)
1130 VALUES (?, ?, ?)");
2c08214a
AD
1131
1132 foreach ($filtered_tags as $tag) {
1133
1134 $tag = sanitize_tag($tag);
2c08214a
AD
1135
1136 if (!tag_is_valid($tag)) continue;
1137
0567016b 1138 $tsth->execute([$tag, $entry_int_id, $owner_uid]);
2c08214a 1139
0567016b
AD
1140 if (!$tsth->fetch()) {
1141 $usth->execute([$owner_uid, $tag, $entry_int_id]);
e6c886bf 1142 }
2c08214a
AD
1143
1144 array_push($tags_to_cache, $tag);
1145 }
1146
1147 /* update the cache */
1148
1149 $tags_to_cache = array_unique($tags_to_cache);
1150
0567016b 1151 $tags_str = join(",", $tags_to_cache);
2c08214a 1152
0567016b
AD
1153 $tsth = $pdo->prepare("UPDATE ttrss_user_entries
1154 SET tag_cache = ? WHERE ref_id = ?
1155 AND owner_uid = ?");
1156 $tsth->execute([$tags_str, $entry_ref_id, $owner_uid]);
2c08214a
AD
1157 }
1158
68cccafc 1159 _debug("article processed", $debug_enabled);
0500e14c
AD
1160
1161 $pdo->commit();
2c08214a
AD
1162 }
1163
68cccafc 1164 _debug("purging feed...", $debug_enabled);
2c08214a 1165
a42c55f0 1166 purge_feed($feed, 0, $debug_enabled);
2c08214a 1167
0567016b
AD
1168 $sth = $pdo->prepare("UPDATE ttrss_feeds
1169 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
1170 $sth->execute([$feed]);
2c08214a
AD
1171
1172 } else {
1173
0567016b 1174 $error_msg = mb_substr($rss->error(), 0, 245);
2c08214a 1175
4ad04ee2
AD
1176 _debug("fetch error: $error_msg", $debug_enabled);
1177
1178 if (count($rss->errors()) > 1) {
1179 foreach ($rss->errors() as $error) {
1180 _debug("+ $error");
1181 }
1182 }
2c08214a 1183
0567016b
AD
1184 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
1185 last_updated = NOW(), last_unconditional = NOW() WHERE id = ?");
1186 $sth->execute([$error_msg, $feed]);
2c08214a 1187
88edaa93 1188 unset($rss);
0567016b 1189 return false;
88edaa93 1190 }
2c08214a 1191
68cccafc 1192 _debug("done", $debug_enabled);
88edaa93 1193
7b55001e 1194 return true;
2c08214a
AD
1195 }
1196
e6c886bf 1197 static function cache_enclosures($enclosures, $site_url, $debug) {
388d4dfa
AD
1198 foreach ($enclosures as $enc) {
1199
1200 if (preg_match("/(image|audio|video)/", $enc[1])) {
1201
1202 $src = rewrite_relative_url($site_url, $enc[0]);
1203
1204 $local_filename = CACHE_DIR . "/images/" . sha1($src);
1205
1206 if ($debug) _debug("cache_enclosures: downloading: $src to $local_filename");
1207
1208 if (!file_exists($local_filename)) {
1209 $file_content = fetch_file_contents($src);
1210
6fd03996 1211 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
388d4dfa
AD
1212 file_put_contents($local_filename, $file_content);
1213 }
1214 } else {
1215 touch($local_filename);
1216 }
1217 }
1218 }
1219 }
1220
e6c886bf 1221 static function cache_media($html, $site_url, $debug) {
3c696512
AD
1222 libxml_use_internal_errors(true);
1223
1224 $charset_hack = '<head>
1225 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
1226 </head>';
1227
1228 $doc = new DOMDocument();
1229 $doc->loadHTML($charset_hack . $html);
1230 $xpath = new DOMXPath($doc);
1231
388d4dfa 1232 $entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])');
3c696512
AD
1233
1234 foreach ($entries as $entry) {
5edd605a 1235 if ($entry->hasAttribute('src') && strpos($entry->getAttribute('src'), "data:") !== 0) {
3c696512
AD
1236 $src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
1237
41bead9b 1238 $local_filename = CACHE_DIR . "/images/" . sha1($src);
3c696512 1239
41bead9b 1240 if ($debug) _debug("cache_media: downloading: $src to $local_filename");
3c696512
AD
1241
1242 if (!file_exists($local_filename)) {
1243 $file_content = fetch_file_contents($src);
1244
6fd03996 1245 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
3c696512
AD
1246 file_put_contents($local_filename, $file_content);
1247 }
4a27966e
J
1248 } else {
1249 touch($local_filename);
3c696512 1250 }
3c696512
AD
1251 }
1252 }
3c696512
AD
1253 }
1254
e6c886bf 1255 static function expire_error_log($debug) {
e2261e17
AD
1256 if ($debug) _debug("Removing old error log entries...");
1257
0567016b
AD
1258 $pdo = Db::pdo();
1259
e2261e17 1260 if (DB_TYPE == "pgsql") {
0567016b 1261 $pdo->query("DELETE FROM ttrss_error_log
e2261e17
AD
1262 WHERE created_at < NOW() - INTERVAL '7 days'");
1263 } else {
0567016b 1264 $pdo->query("DELETE FROM ttrss_error_log
e2261e17
AD
1265 WHERE created_at < DATE_SUB(NOW(), INTERVAL 7 DAY)");
1266 }
e2261e17
AD
1267 }
1268
e6c886bf 1269 static function expire_lock_files($debug) {
65465085 1270 //if ($debug) _debug("Removing old lock files...");
2a91b6ff
AD
1271
1272 $num_deleted = 0;
1273
1274 if (is_writable(LOCK_DIRECTORY)) {
1275 $files = glob(LOCK_DIRECTORY . "/*.lock");
1276
1277 if ($files) {
1278 foreach ($files as $file) {
11344971 1279 if (!file_is_locked(basename($file)) && time() - filemtime($file) > 86400*2) {
2a91b6ff
AD
1280 unlink($file);
1281 ++$num_deleted;
1282 }
1283 }
1284 }
1285 }
1286
65465085 1287 if ($debug) _debug("Removed $num_deleted old lock files.");
2a91b6ff
AD
1288 }
1289
e6c886bf 1290 static function expire_cached_files($debug) {
3306daec 1291 foreach (array("simplepie", "images", "export", "upload") as $dir) {
3c696512 1292 $cache_dir = CACHE_DIR . "/$dir";
2c08214a 1293
65465085 1294// if ($debug) _debug("Expiring $cache_dir");
2c08214a 1295
3c696512
AD
1296 $num_deleted = 0;
1297
1298 if (is_writable($cache_dir)) {
1299 $files = glob("$cache_dir/*");
1300
2a91b6ff 1301 if ($files) {
2ab20c31 1302 foreach ($files as $file) {
6fd03996 1303 if (time() - filemtime($file) > 86400*CACHE_MAX_DAYS) {
2ab20c31 1304 unlink($file);
3c696512 1305
2ab20c31
AD
1306 ++$num_deleted;
1307 }
3c696512
AD
1308 }
1309 }
2a91b6ff 1310 }
3c696512 1311
65465085 1312 if ($debug) _debug("$cache_dir: removed $num_deleted files.");
3c696512
AD
1313 }
1314 }
2c08214a 1315
a3e0bdcf 1316 /**
e6c886bf
AD
1317 * Source: http://www.php.net/manual/en/function.parse-url.php#104527
1318 * Returns the url query as associative array
1319 *
1320 * @param string query
1321 * @return array params
1322 */
1323 static function convertUrlQuery($query) {
a3e0bdcf
AD
1324 $queryParts = explode('&', $query);
1325
1326 $params = array();
1327
1328 foreach ($queryParts as $param) {
1329 $item = explode('=', $param);
1330 $params[$item[0]] = $item[1];
1331 }
1332
1333 return $params;
1334 }
92c14e9d 1335
e6c886bf 1336 static function get_article_filters($filters, $title, $content, $link, $author, $tags, &$matched_rules = false) {
92c14e9d
AD
1337 $matches = array();
1338
1339 foreach ($filters as $filter) {
1340 $match_any_rule = $filter["match_any_rule"];
a3a896a1 1341 $inverse = $filter["inverse"];
92c14e9d
AD
1342 $filter_match = false;
1343
1344 foreach ($filter["rules"] as $rule) {
1345 $match = false;
ffa1bd7b 1346 $reg_exp = str_replace('/', '\/', $rule["reg_exp"]);
a3a896a1 1347 $rule_inverse = $rule["inverse"];
92c14e9d
AD
1348
1349 if (!$reg_exp)
1350 continue;
1351
1352 switch ($rule["type"]) {
e6c886bf
AD
1353 case "title":
1354 $match = @preg_match("/$reg_exp/iu", $title);
1355 break;
1356 case "content":
1357 // we don't need to deal with multiline regexps
1358 $content = preg_replace("/[\r\n\t]/", "", $content);
d03ae73e 1359
e6c886bf
AD
1360 $match = @preg_match("/$reg_exp/iu", $content);
1361 break;
1362 case "both":
1363 // we don't need to deal with multiline regexps
1364 $content = preg_replace("/[\r\n\t]/", "", $content);
d03ae73e 1365
e6c886bf
AD
1366 $match = (@preg_match("/$reg_exp/iu", $title) || @preg_match("/$reg_exp/iu", $content));
1367 break;
1368 case "link":
1369 $match = @preg_match("/$reg_exp/iu", $link);
1370 break;
1371 case "author":
1372 $match = @preg_match("/$reg_exp/iu", $author);
1373 break;
1374 case "tag":
1375 foreach ($tags as $tag) {
1376 if (@preg_match("/$reg_exp/iu", $tag)) {
1377 $match = true;
1378 break;
1379 }
7b80b5e1 1380 }
e6c886bf 1381 break;
92c14e9d
AD
1382 }
1383
a3a896a1
AD
1384 if ($rule_inverse) $match = !$match;
1385
92c14e9d
AD
1386 if ($match_any_rule) {
1387 if ($match) {
1388 $filter_match = true;
1389 break;
1390 }
1391 } else {
1392 $filter_match = $match;
1393 if (!$match) {
1394 break;
1395 }
1396 }
1397 }
1398
a3a896a1
AD
1399 if ($inverse) $filter_match = !$filter_match;
1400
92c14e9d 1401 if ($filter_match) {
557d86fe
AD
1402 if (is_array($matched_rules)) array_push($matched_rules, $rule);
1403
92c14e9d
AD
1404 foreach ($filter["actions"] AS $action) {
1405 array_push($matches, $action);
5e736e45
AD
1406
1407 // if Stop action encountered, perform no further processing
fd3e5e8d 1408 if (isset($action["type"]) && $action["type"] == "stop") return $matches;
92c14e9d
AD
1409 }
1410 }
1411 }
1412
1413 return $matches;
1414 }
1415
e6c886bf 1416 static function find_article_filter($filters, $filter_name) {
92c14e9d
AD
1417 foreach ($filters as $f) {
1418 if ($f["type"] == $filter_name) {
1419 return $f;
1420 };
1421 }
1422 return false;
1423 }
1424
e6c886bf 1425 static function find_article_filters($filters, $filter_name) {
92c14e9d
AD
1426 $results = array();
1427
1428 foreach ($filters as $f) {
1429 if ($f["type"] == $filter_name) {
1430 array_push($results, $f);
1431 };
1432 }
1433 return $results;
1434 }
1435
e6c886bf 1436 static function calculate_article_score($filters) {
92c14e9d
AD
1437 $score = 0;
1438
1439 foreach ($filters as $f) {
1440 if ($f["type"] == "score") {
1441 $score += $f["param"];
1442 };
1443 }
1444 return $score;
1445 }
1446
e6c886bf 1447 static function labels_contains_caption($labels, $caption) {
b24504b1
AD
1448 foreach ($labels as $label) {
1449 if ($label[1] == $caption) {
1450 return true;
1451 }
1452 }
1453
1454 return false;
1455 }
1456
e6c886bf 1457 static function assign_article_to_label_filters($id, $filters, $owner_uid, $article_labels) {
92c14e9d
AD
1458 foreach ($filters as $f) {
1459 if ($f["type"] == "label") {
e6c886bf 1460 if (!RSSUtils::labels_contains_caption($article_labels, $f["param"])) {
7c9b5a3f 1461 Labels::add_article($id, $f["param"], $owner_uid);
b24504b1
AD
1462 }
1463 }
92c14e9d
AD
1464 }
1465 }
87764a50 1466
e6c886bf 1467 static function make_guid_from_title($title) {
87d7e850
AD
1468 return preg_replace("/[ \"\',.:;]/", "-",
1469 mb_strtolower(strip_tags($title), 'utf-8'));
1470 }
1471
e6c886bf 1472 static function cleanup_counters_cache($debug) {
0567016b
AD
1473 $pdo = Db::pdo();
1474
1475 $res = $pdo->query("DELETE FROM ttrss_counters_cache
168cf351
AD
1476 WHERE feed_id > 0 AND
1477 (SELECT COUNT(id) FROM ttrss_feeds WHERE
1478 id = feed_id AND
1479 ttrss_counters_cache.owner_uid = ttrss_feeds.owner_uid) = 0");
168cf351 1480
0567016b
AD
1481 $frows = $res->rowCount();
1482
1483 $res = $pdo->query("DELETE FROM ttrss_cat_counters_cache
168cf351
AD
1484 WHERE feed_id > 0 AND
1485 (SELECT COUNT(id) FROM ttrss_feed_categories WHERE
1486 id = feed_id AND
1487 ttrss_cat_counters_cache.owner_uid = ttrss_feed_categories.owner_uid) = 0");
0567016b
AD
1488
1489 $crows = $res->rowCount();
168cf351 1490
7b55001e 1491 if ($debug) _debug("Removed $frows (feeds) $crows (cats) orphaned counter cache entries.");
168cf351
AD
1492 }
1493
e6c886bf 1494 static function housekeeping_user($owner_uid) {
5cbd1fe8
AD
1495 $tmph = new PluginHost();
1496
1497 load_user_plugins($owner_uid, $tmph);
1498
1499 $tmph->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
1500 }
1501
e6c886bf
AD
1502 static function housekeeping_common($debug) {
1503 RSSUtils::expire_cached_files($debug);
1504 RSSUtils::expire_lock_files($debug);
1505 RSSUtils::expire_error_log($debug);
e2cf81e2 1506
e6c886bf 1507 $count = RSSUtils::update_feedbrowser_cache();
e2cf81e2
AD
1508 _debug("Feedbrowser updated, $count feeds processed.");
1509
a230bf88 1510 Article::purge_orphans( true);
e6c886bf 1511 RSSUtils::cleanup_counters_cache($debug);
e2cf81e2 1512
9b736a20
AD
1513 //$rc = cleanup_tags( 14, 50000);
1514 //_debug("Cleaned $rc cached tags.");
8e470220 1515
00f22824 1516 PluginHost::getInstance()->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
e2cf81e2 1517 }
ea79a0e0 1518
e6c886bf
AD
1519 static function check_feed_favicon($site_url, $feed) {
1520 # print "FAVICON [$site_url]: $favicon_url\n";
a230bf88
AD
1521
1522 $icon_file = ICONS_DIR . "/$feed.ico";
1523
1524 if (!file_exists($icon_file)) {
1525 $favicon_url = get_favicon_url($site_url);
1526
1527 if ($favicon_url) {
1528 // Limiting to "image" type misses those served with text/plain
1529 $contents = fetch_file_contents($favicon_url); // , "image");
1530
1531 if ($contents) {
1532 // Crude image type matching.
1533 // Patterns gleaned from the file(1) source code.
1534 if (preg_match('/^\x00\x00\x01\x00/', $contents)) {
1535 // 0 string \000\000\001\000 MS Windows icon resource
1536 //error_log("check_feed_favicon: favicon_url=$favicon_url isa MS Windows icon resource");
1537 }
1538 elseif (preg_match('/^GIF8/', $contents)) {
1539 // 0 string GIF8 GIF image data
1540 //error_log("check_feed_favicon: favicon_url=$favicon_url isa GIF image");
1541 }
1542 elseif (preg_match('/^\x89PNG\x0d\x0a\x1a\x0a/', $contents)) {
1543 // 0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data
1544 //error_log("check_feed_favicon: favicon_url=$favicon_url isa PNG image");
1545 }
1546 elseif (preg_match('/^\xff\xd8/', $contents)) {
1547 // 0 beshort 0xffd8 JPEG image data
1548 //error_log("check_feed_favicon: favicon_url=$favicon_url isa JPG image");
1549 }
f9ad33c2
GG
1550 elseif (preg_match('/^BM/', $contents)) {
1551 // 0 string BM PC bitmap (OS2, Windows BMP files)
1552 //error_log("check_feed_favicon, favicon_url=$favicon_url isa BMP image");
1553 }
a230bf88
AD
1554 else {
1555 //error_log("check_feed_favicon: favicon_url=$favicon_url isa UNKNOWN type");
1556 $contents = "";
1557 }
1558 }
1559
1560 if ($contents) {
1561 $fp = @fopen($icon_file, "w");
1562
1563 if ($fp) {
1564 fwrite($fp, $contents);
1565 fclose($fp);
1566 chmod($icon_file, 0644);
1567 }
1568 }
1569 }
1570 return $icon_file;
1571 }
1572 }
e6c886bf
AD
1573
1574
1575
bec5ba93 1576}