]> git.wh0rd.org - tt-rss.git/blob - classes/rssutils.php
support disabling of e-mail digests entirely
[tt-rss.git] / classes / rssutils.php
1 <?php
2 class RSSUtils {
3 static function calculate_article_hash($article, $pluginhost) {
4 $tmp = "";
5
6 foreach ($article as $k => $v) {
7 if ($k != "feed" && isset($v)) {
8 $x = strip_tags(is_array($v) ? implode(",", $v) : $v);
9
10 $tmp .= sha1("$k:" . sha1($x));
11 }
12 }
13
14 return sha1(implode(",", $pluginhost->get_plugin_names()) . $tmp);
15 }
16
17 // Strips utf8mb4 characters (i.e. emoji) for mysql
18 static function strip_utf8mb4($str) {
19 return preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $str);
20 }
21
22 static function update_feedbrowser_cache() {
23
24 $pdo = Db::pdo();
25
26 $sth = $pdo->query("SELECT feed_url, site_url, title, COUNT(id) AS subscribers
27 FROM ttrss_feeds WHERE feed_url NOT IN (SELECT feed_url FROM ttrss_feeds
28 WHERE private IS true OR auth_login != '' OR auth_pass != '' OR feed_url LIKE '%:%@%/%')
29 GROUP BY feed_url, site_url, title ORDER BY subscribers DESC LIMIT 1000");
30
31 $pdo->beginTransaction();
32
33 $pdo->query("DELETE FROM ttrss_feedbrowser_cache");
34
35 $count = 0;
36
37 while ($line = $sth->fetch()) {
38
39 $subscribers = $line["subscribers"];
40 $feed_url = $line["feed_url"];
41 $title = $line["title"];
42 $site_url = $line["site_url"];
43
44 $tmph = $pdo->prepare("SELECT subscribers FROM
45 ttrss_feedbrowser_cache WHERE feed_url = ?");
46 $tmph->execute([$feed_url]);
47
48 if (!$tmph->fetch()) {
49
50 $tmph = $pdo->prepare("INSERT INTO ttrss_feedbrowser_cache
51 (feed_url, site_url, title, subscribers)
52 VALUES
53 (?, ?, ?, ?)");
54
55 $tmph->execute([$feed_url, $site_url, $title, $subscribers]);
56
57 ++$count;
58
59 }
60
61 }
62
63 $pdo->commit();
64
65 return $count;
66
67 }
68
69 static function update_daemon_common($limit = DAEMON_FEED_LIMIT) {
70 $schema_version = get_schema_version();
71
72 if ($schema_version != SCHEMA_VERSION) {
73 die("Schema version is wrong, please upgrade the database.\n");
74 }
75
76 $pdo = Db::pdo();
77
78 if (!SINGLE_USER_MODE && DAEMON_UPDATE_LOGIN_LIMIT > 0) {
79 if (DB_TYPE == "pgsql") {
80 $login_thresh_qpart = "AND ttrss_users.last_login >= NOW() - INTERVAL '".DAEMON_UPDATE_LOGIN_LIMIT." days'";
81 } else {
82 $login_thresh_qpart = "AND ttrss_users.last_login >= DATE_SUB(NOW(), INTERVAL ".DAEMON_UPDATE_LOGIN_LIMIT." DAY)";
83 }
84 } else {
85 $login_thresh_qpart = "";
86 }
87
88 if (DB_TYPE == "pgsql") {
89 $update_limit_qpart = "AND ((
90 ttrss_feeds.update_interval = 0
91 AND ttrss_user_prefs.value != '-1'
92 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_user_prefs.value || ' minutes') AS INTERVAL)
93 ) OR (
94 ttrss_feeds.update_interval > 0
95 AND ttrss_feeds.last_updated < NOW() - CAST((ttrss_feeds.update_interval || ' minutes') AS INTERVAL)
96 ) OR (ttrss_feeds.last_updated IS NULL
97 AND ttrss_user_prefs.value != '-1')
98 OR (last_updated = '1970-01-01 00:00:00'
99 AND ttrss_user_prefs.value != '-1'))";
100 } else {
101 $update_limit_qpart = "AND ((
102 ttrss_feeds.update_interval = 0
103 AND ttrss_user_prefs.value != '-1'
104 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL CONVERT(ttrss_user_prefs.value, SIGNED INTEGER) MINUTE)
105 ) OR (
106 ttrss_feeds.update_interval > 0
107 AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL ttrss_feeds.update_interval MINUTE)
108 ) OR (ttrss_feeds.last_updated IS NULL
109 AND ttrss_user_prefs.value != '-1')
110 OR (last_updated = '1970-01-01 00:00:00'
111 AND ttrss_user_prefs.value != '-1'))";
112 }
113
114 // Test if feed is currently being updated by another process.
115 if (DB_TYPE == "pgsql") {
116 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '10 minutes')";
117 } else {
118 $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 10 MINUTE))";
119 }
120
121 $query_limit = $limit ? sprintf("LIMIT %d", $limit) : "";
122
123 // Update the least recently updated feeds first
124 $query_order = "ORDER BY last_updated";
125 if (DB_TYPE == "pgsql") $query_order .= " NULLS FIRST";
126
127 $query = "SELECT DISTINCT ttrss_feeds.feed_url, ttrss_feeds.last_updated
128 FROM
129 ttrss_feeds, ttrss_users, ttrss_user_prefs
130 WHERE
131 ttrss_feeds.owner_uid = ttrss_users.id
132 AND ttrss_user_prefs.profile IS NULL
133 AND ttrss_users.id = ttrss_user_prefs.owner_uid
134 AND ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL'
135 $login_thresh_qpart $update_limit_qpart
136 $updstart_thresh_qpart
137 $query_order $query_limit";
138
139 $res = $pdo->query($query);
140
141 $feeds_to_update = array();
142 while ($line = $res->fetch()) {
143 array_push($feeds_to_update, $line['feed_url']);
144 }
145
146 Debug::log(sprintf("Scheduled %d feeds to update...", count($feeds_to_update)));
147
148 // Update last_update_started before actually starting the batch
149 // in order to minimize collision risk for parallel daemon tasks
150 if (count($feeds_to_update) > 0) {
151 $feeds_qmarks = arr_qmarks($feeds_to_update);
152
153 $tmph = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
154 WHERE feed_url IN ($feeds_qmarks)");
155 $tmph->execute($feeds_to_update);
156 }
157
158 $nf = 0;
159 $bstarted = microtime(true);
160
161 $batch_owners = array();
162
163 // since we have the data cached, we can deal with other feeds with the same url
164 $usth = $pdo->prepare("SELECT DISTINCT ttrss_feeds.id,last_updated,ttrss_feeds.owner_uid
165 FROM ttrss_feeds, ttrss_users, ttrss_user_prefs WHERE
166 ttrss_user_prefs.owner_uid = ttrss_feeds.owner_uid AND
167 ttrss_users.id = ttrss_user_prefs.owner_uid AND
168 ttrss_user_prefs.pref_name = 'DEFAULT_UPDATE_INTERVAL' AND
169 ttrss_user_prefs.profile IS NULL AND
170 feed_url = ?
171 $update_limit_qpart
172 $login_thresh_qpart
173 ORDER BY ttrss_feeds.id $query_limit");
174
175 foreach ($feeds_to_update as $feed) {
176 Debug::log("Base feed: $feed");
177
178 $usth->execute([$feed]);
179 //update_rss_feed($line["id"], true);
180
181 if ($tline = $usth->fetch()) {
182 Debug::log(" => " . $tline["last_updated"] . ", " . $tline["id"] . " " . $tline["owner_uid"]);
183
184 if (array_search($tline["owner_uid"], $batch_owners) === FALSE)
185 array_push($batch_owners, $tline["owner_uid"]);
186
187 $fstarted = microtime(true);
188
189 try {
190 RSSUtils::update_rss_feed($tline["id"], true, false);
191 } catch (PDOException $e) {
192 Logger::get()->log_error(E_USER_NOTICE, $e->getMessage(), $e->getFile(), $e->getLine(), $e->getTraceAsString());
193
194 try {
195 $pdo->rollback();
196 } catch (PDOException $e) {
197 // it doesn't matter if there wasn't actually anything to rollback, PDO Exception can be
198 // thrown outside of an active transaction during feed update
199 }
200 }
201
202 Debug::log(sprintf(" %.4f (sec)", microtime(true) - $fstarted));
203
204 ++$nf;
205 }
206 }
207
208 if ($nf > 0) {
209 Debug::log(sprintf("Processed %d feeds in %.4f (sec), %.4f (sec/feed avg)", $nf,
210 microtime(true) - $bstarted, (microtime(true) - $bstarted) / $nf));
211 }
212
213 foreach ($batch_owners as $owner_uid) {
214 Debug::log("Running housekeeping tasks for user $owner_uid...");
215
216 RSSUtils::housekeeping_user($owner_uid);
217 }
218
219 // Send feed digests by email if needed.
220 if (DIGEST_SUBJECT !== false)
221 Digest::send_headlines_digests();
222
223 return $nf;
224 }
225
226 // this is used when subscribing
227 static function set_basic_feed_info($feed) {
228
229 $pdo = Db::pdo();
230
231 $sth = $pdo->prepare("SELECT owner_uid,feed_url,auth_pass,auth_login
232 FROM ttrss_feeds WHERE id = ?");
233 $sth->execute([$feed]);
234
235 if ($row = $sth->fetch()) {
236
237 $owner_uid = $row["owner_uid"];
238 $auth_login = $row["auth_login"];
239 $auth_pass = $row["auth_pass"];
240 $fetch_url = $row["feed_url"];
241
242 $pluginhost = new PluginHost();
243 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
244
245 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
246 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
247 $pluginhost->load_data();
248
249 $basic_info = array();
250 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_BASIC_INFO) as $plugin) {
251 $basic_info = $plugin->hook_feed_basic_info($basic_info, $fetch_url, $owner_uid, $feed, $auth_login, $auth_pass);
252 }
253
254 if (!$basic_info) {
255 $feed_data = fetch_file_contents($fetch_url, false,
256 $auth_login, $auth_pass, false,
257 FEED_FETCH_TIMEOUT,
258 0);
259
260 global $fetch_curl_used;
261
262 if (!$fetch_curl_used) {
263 $tmp = @gzdecode($feed_data);
264
265 if ($tmp) $feed_data = $tmp;
266 }
267
268 $feed_data = trim($feed_data);
269
270 $rss = new FeedParser($feed_data);
271 $rss->init();
272
273 if (!$rss->error()) {
274 $basic_info = array(
275 'title' => mb_substr($rss->get_title(), 0, 199),
276 'site_url' => mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245)
277 );
278 }
279 }
280
281 if ($basic_info && is_array($basic_info)) {
282 $sth = $pdo->prepare("SELECT title, site_url FROM ttrss_feeds WHERE id = ?");
283 $sth->execute([$feed]);
284
285 if ($row = $sth->fetch()) {
286
287 $registered_title = $row["title"];
288 $orig_site_url = $row["site_url"];
289
290 if ($basic_info['title'] && (!$registered_title || $registered_title == "[Unknown]")) {
291
292 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
293 title = ? WHERE id = ?");
294 $sth->execute([$basic_info['title'], $feed]);
295 }
296
297 if ($basic_info['site_url'] && $orig_site_url != $basic_info['site_url']) {
298 $sth = $pdo->prepare("UPDATE ttrss_feeds SET
299 site_url = ? WHERE id = ?");
300 $sth->execute([$basic_info['site_url'], $feed]);
301 }
302
303 }
304 }
305 }
306 }
307
308 /**
309 * @SuppressWarnings(PHPMD.UnusedFormalParameter)
310 */
311 static function update_rss_feed($feed, $no_cache = false) {
312
313 Debug::log("start", Debug::$LOG_VERBOSE);
314
315 $pdo = Db::pdo();
316
317 $sth = $pdo->prepare("SELECT title FROM ttrss_feeds WHERE id = ?");
318 $sth->execute([$feed]);
319
320 if (!$row = $sth->fetch()) {
321 Debug::log("feed $feed not found, skipping.");
322 user_error("Attempt to update unknown/invalid feed $feed", E_USER_WARNING);
323 return false;
324 }
325
326 $title = $row["title"];
327
328 // feed was batch-subscribed or something, we need to get basic info
329 // this is not optimal currently as it fetches stuff separately TODO: optimize
330 if ($title == "[Unknown]") {
331 Debug::log("setting basic feed info for $feed...");
332 RSSUtils::set_basic_feed_info($feed);
333 }
334
335 $sth = $pdo->prepare("SELECT id,update_interval,auth_login,
336 feed_url,auth_pass,cache_images,
337 mark_unread_on_update, owner_uid,
338 auth_pass_encrypted, feed_language,
339 last_modified,
340 ".SUBSTRING_FOR_DATE."(last_unconditional, 1, 19) AS last_unconditional
341 FROM ttrss_feeds WHERE id = ?");
342 $sth->execute([$feed]);
343
344 if ($row = $sth->fetch()) {
345
346 $owner_uid = $row["owner_uid"];
347 $mark_unread_on_update = $row["mark_unread_on_update"];
348
349 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_update_started = NOW()
350 WHERE id = ?");
351 $sth->execute([$feed]);
352
353 $auth_login = $row["auth_login"];
354 $auth_pass = $row["auth_pass"];
355 $stored_last_modified = $row["last_modified"];
356 $last_unconditional = $row["last_unconditional"];
357 $cache_images = $row["cache_images"];
358 $fetch_url = $row["feed_url"];
359
360 $feed_language = mb_strtolower($row["feed_language"]);
361 if (!$feed_language) $feed_language = 'english';
362
363 } else {
364 return false;
365 }
366
367 $date_feed_processed = date('Y-m-d H:i');
368
369 $cache_filename = CACHE_DIR . "/feeds/" . sha1($fetch_url) . ".xml";
370
371 $pluginhost = new PluginHost();
372 $user_plugins = get_pref("_ENABLED_PLUGINS", $owner_uid);
373
374 $pluginhost->load(PLUGINS, PluginHost::KIND_ALL);
375 $pluginhost->load($user_plugins, PluginHost::KIND_USER, $owner_uid);
376 $pluginhost->load_data();
377
378 $rss_hash = false;
379
380 $force_refetch = isset($_REQUEST["force_refetch"]);
381 $feed_data = "";
382
383 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FETCH_FEED) as $plugin) {
384 $feed_data = $plugin->hook_fetch_feed($feed_data, $fetch_url, $owner_uid, $feed, 0, $auth_login, $auth_pass);
385 }
386
387 // try cache
388 if (!$feed_data &&
389 file_exists($cache_filename) &&
390 is_readable($cache_filename) &&
391 !$auth_login && !$auth_pass &&
392 filemtime($cache_filename) > time() - 30) {
393
394 Debug::log("using local cache [$cache_filename].", Debug::$LOG_VERBOSE);
395
396 @$feed_data = file_get_contents($cache_filename);
397
398 if ($feed_data) {
399 $rss_hash = sha1($feed_data);
400 }
401
402 } else {
403 Debug::log("local cache will not be used for this feed", Debug::$LOG_VERBOSE);
404 }
405
406 global $fetch_last_modified;
407
408 // fetch feed from source
409 if (!$feed_data) {
410 Debug::log("last unconditional update request: $last_unconditional", Debug::$LOG_VERBOSE);
411
412 if (ini_get("open_basedir") && function_exists("curl_init")) {
413 Debug::log("not using CURL due to open_basedir restrictions", Debug::$LOG_VERBOSE);
414 }
415
416 if (time() - strtotime($last_unconditional) > MAX_CONDITIONAL_INTERVAL) {
417 Debug::log("maximum allowed interval for conditional requests exceeded, forcing refetch", Debug::$LOG_VERBOSE);
418
419 $force_refetch = true;
420 } else {
421 Debug::log("stored last modified for conditional request: $stored_last_modified", Debug::$LOG_VERBOSE);
422 }
423
424 Debug::log("fetching [$fetch_url] (force_refetch: $force_refetch)...", Debug::$LOG_VERBOSE);
425
426 $feed_data = fetch_file_contents([
427 "url" => $fetch_url,
428 "login" => $auth_login,
429 "pass" => $auth_pass,
430 "timeout" => $no_cache ? FEED_FETCH_NO_CACHE_TIMEOUT : FEED_FETCH_TIMEOUT,
431 "last_modified" => $force_refetch ? "" : $stored_last_modified
432 ]);
433
434 global $fetch_curl_used;
435
436 if (!$fetch_curl_used) {
437 $tmp = @gzdecode($feed_data);
438
439 if ($tmp) $feed_data = $tmp;
440 }
441
442 $feed_data = trim($feed_data);
443
444 Debug::log("fetch done.", Debug::$LOG_VERBOSE);
445 Debug::log("source last modified: " . $fetch_last_modified, Debug::$LOG_VERBOSE);
446
447 if ($feed_data && $fetch_last_modified != $stored_last_modified) {
448 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_modified = ? WHERE id = ?");
449 $sth->execute([substr($fetch_last_modified, 0, 245), $feed]);
450 }
451
452 // cache vanilla feed data for re-use
453 if ($feed_data && !$auth_pass && !$auth_login && is_writable(CACHE_DIR . "/feeds")) {
454 $new_rss_hash = sha1($feed_data);
455
456 if ($new_rss_hash != $rss_hash) {
457 Debug::log("saving $cache_filename", Debug::$LOG_VERBOSE);
458 @file_put_contents($cache_filename, $feed_data);
459 }
460 }
461 }
462
463 if (!$feed_data) {
464 global $fetch_last_error;
465 global $fetch_last_error_code;
466
467 Debug::log("unable to fetch: $fetch_last_error [$fetch_last_error_code]", Debug::$LOG_VERBOSE);
468
469 // If-Modified-Since
470 if ($fetch_last_error_code != 304) {
471 $error_message = $fetch_last_error;
472 } else {
473 Debug::log("source claims data not modified, nothing to do.", Debug::$LOG_VERBOSE);
474 $error_message = "";
475 }
476
477 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
478 last_updated = NOW() WHERE id = ?");
479 $sth->execute([$error_message, $feed]);
480
481 return;
482 }
483
484 foreach ($pluginhost->get_hooks(PluginHost::HOOK_FEED_FETCHED) as $plugin) {
485 $feed_data = $plugin->hook_feed_fetched($feed_data, $fetch_url, $owner_uid, $feed);
486 }
487
488 $rss = new FeedParser($feed_data);
489 $rss->init();
490
491 if (!$rss->error()) {
492
493 // We use local pluginhost here because we need to load different per-user feed plugins
494 $pluginhost->run_hooks(PluginHost::HOOK_FEED_PARSED, "hook_feed_parsed", $rss);
495
496 Debug::log("language: $feed_language", Debug::$LOG_VERBOSE);
497 Debug::log("processing feed data...", Debug::$LOG_VERBOSE);
498
499 if (DB_TYPE == "pgsql") {
500 $favicon_interval_qpart = "favicon_last_checked < NOW() - INTERVAL '12 hour'";
501 } else {
502 $favicon_interval_qpart = "favicon_last_checked < DATE_SUB(NOW(), INTERVAL 12 HOUR)";
503 }
504
505 $sth = $pdo->prepare("SELECT owner_uid,favicon_avg_color,
506 (favicon_last_checked IS NULL OR $favicon_interval_qpart) AS
507 favicon_needs_check
508 FROM ttrss_feeds WHERE id = ?");
509 $sth->execute([$feed]);
510
511 if ($row = $sth->fetch()) {
512 $favicon_needs_check = $row["favicon_needs_check"];
513 $favicon_avg_color = $row["favicon_avg_color"];
514 $owner_uid = $row["owner_uid"];
515 } else {
516 return false;
517 }
518
519 $site_url = mb_substr(rewrite_relative_url($fetch_url, $rss->get_link()), 0, 245);
520
521 Debug::log("site_url: $site_url", Debug::$LOG_VERBOSE);
522 Debug::log("feed_title: " . $rss->get_title(), Debug::$LOG_VERBOSE);
523
524 if ($favicon_needs_check || $force_refetch) {
525
526 /* terrible hack: if we crash on floicon shit here, we won't check
527 * the icon avgcolor again (unless the icon got updated) */
528
529 $favicon_file = ICONS_DIR . "/$feed.ico";
530 $favicon_modified = @filemtime($favicon_file);
531
532 Debug::log("checking favicon...", Debug::$LOG_VERBOSE);
533
534 RSSUtils::check_feed_favicon($site_url, $feed);
535 $favicon_modified_new = @filemtime($favicon_file);
536
537 if ($favicon_modified_new > $favicon_modified)
538 $favicon_avg_color = '';
539
540 $favicon_colorstring = "";
541 if (file_exists($favicon_file) && function_exists("imagecreatefromstring") && $favicon_avg_color == '') {
542 require_once "colors.php";
543
544 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_avg_color = 'fail' WHERE
545 id = ?");
546 $sth->execute([$feed]);
547
548 $favicon_color = calculate_avg_color($favicon_file);
549
550 $favicon_colorstring = ",favicon_avg_color = " . $pdo->quote($favicon_color);
551
552 } else if ($favicon_avg_color == 'fail') {
553 Debug::log("floicon failed on this file, not trying to recalculate avg color", Debug::$LOG_VERBOSE);
554 }
555
556 $sth = $pdo->prepare("UPDATE ttrss_feeds SET favicon_last_checked = NOW()
557 $favicon_colorstring WHERE id = ?");
558 $sth->execute([$feed]);
559 }
560
561 Debug::log("loading filters & labels...", Debug::$LOG_VERBOSE);
562
563 $filters = load_filters($feed, $owner_uid);
564
565 if (Debug::get_loglevel() >= Debug::$LOG_EXTENDED) {
566 print_r($filters);
567 }
568
569 Debug::log("" . count($filters) . " filters loaded.", Debug::$LOG_VERBOSE);
570
571 $items = $rss->get_items();
572
573 if (!is_array($items)) {
574 Debug::log("no articles found.", Debug::$LOG_VERBOSE);
575
576 $sth = $pdo->prepare("UPDATE ttrss_feeds
577 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
578 $sth->execute([$feed]);
579
580 return true; // no articles
581 }
582
583 Debug::log("processing articles...", Debug::$LOG_VERBOSE);
584
585 $tstart = time();
586
587 foreach ($items as $item) {
588 $pdo->beginTransaction();
589
590 if (Debug::get_loglevel() >= 3) {
591 print_r($item);
592 }
593
594 if (ini_get("max_execution_time") > 0 && time() - $tstart >= ini_get("max_execution_time") * 0.7) {
595 Debug::log("looks like there's too many articles to process at once, breaking out", Debug::$LOG_VERBOSE);
596 $pdo->commit();
597 break;
598 }
599
600 $entry_guid = strip_tags($item->get_id());
601 if (!$entry_guid) $entry_guid = strip_tags($item->get_link());
602 if (!$entry_guid) $entry_guid = RSSUtils::make_guid_from_title($item->get_title());
603
604 if (!$entry_guid) {
605 $pdo->commit();
606 continue;
607 }
608
609 $entry_guid = "$owner_uid,$entry_guid";
610
611 $entry_guid_hashed = 'SHA1:' . sha1($entry_guid);
612
613 Debug::log("guid $entry_guid / $entry_guid_hashed", Debug::$LOG_VERBOSE);
614
615 $entry_timestamp = strip_tags($item->get_date());
616
617 Debug::log("orig date: " . $item->get_date(), Debug::$LOG_VERBOSE);
618
619 if ($entry_timestamp == -1 || !$entry_timestamp || $entry_timestamp > time()) {
620 $entry_timestamp = time();
621 }
622
623 $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp);
624
625 Debug::log("date $entry_timestamp [$entry_timestamp_fmt]", Debug::$LOG_VERBOSE);
626
627 $entry_title = strip_tags($item->get_title());
628
629 $entry_link = rewrite_relative_url($site_url, $item->get_link());
630
631 $entry_language = mb_substr(trim($item->get_language()), 0, 2);
632
633 Debug::log("title $entry_title", Debug::$LOG_VERBOSE);
634 Debug::log("link $entry_link", Debug::$LOG_VERBOSE);
635 Debug::log("language $entry_language", Debug::$LOG_VERBOSE);
636
637 if (!$entry_title) $entry_title = date("Y-m-d H:i:s", $entry_timestamp);;
638
639 $entry_content = $item->get_content();
640 if (!$entry_content) $entry_content = $item->get_description();
641
642 if (Debug::get_loglevel() >= 3) {
643 print "content: ";
644 print htmlspecialchars($entry_content);
645 print "\n";
646 }
647
648 $entry_comments = mb_substr(strip_tags($item->get_comments_url()), 0, 245);
649 $num_comments = (int) $item->get_comments_count();
650
651 $entry_author = strip_tags($item->get_author());
652 $entry_guid = mb_substr($entry_guid, 0, 245);
653
654 Debug::log("author $entry_author", Debug::$LOG_VERBOSE);
655 Debug::log("num_comments: $num_comments", Debug::$LOG_VERBOSE);
656 Debug::log("looking for tags...", Debug::$LOG_VERBOSE);
657
658 // parse <category> entries into tags
659
660 $additional_tags = array();
661
662 $additional_tags_src = $item->get_categories();
663
664 if (is_array($additional_tags_src)) {
665 foreach ($additional_tags_src as $tobj) {
666 array_push($additional_tags, $tobj);
667 }
668 }
669
670 $entry_tags = array_unique($additional_tags);
671
672 for ($i = 0; $i < count($entry_tags); $i++) {
673 $entry_tags[$i] = mb_strtolower($entry_tags[$i], 'utf-8');
674
675 // we don't support numeric tags, let's prefix them
676 if (is_numeric($entry_tags[$i])) $entry_tags[$i] = 't:' . $entry_tags[$i];
677 }
678
679 Debug::log("tags found: " . join(",", $entry_tags), Debug::$LOG_VERBOSE);
680
681 Debug::log("done collecting data.", Debug::$LOG_VERBOSE);
682
683 $sth = $pdo->prepare("SELECT id, content_hash, lang FROM ttrss_entries
684 WHERE guid = ? OR guid = ?");
685 $sth->execute([$entry_guid, $entry_guid_hashed]);
686
687 if ($row = $sth->fetch()) {
688 $base_entry_id = $row["id"];
689 $entry_stored_hash = $row["content_hash"];
690 $article_labels = Article::get_article_labels($base_entry_id, $owner_uid);
691
692 $existing_tags = Article::get_article_tags($base_entry_id, $owner_uid);
693 $entry_tags = array_unique(array_merge($entry_tags, $existing_tags));
694 } else {
695 $base_entry_id = false;
696 $entry_stored_hash = "";
697 $article_labels = array();
698 }
699
700 $article = array("owner_uid" => $owner_uid, // read only
701 "guid" => $entry_guid, // read only
702 "guid_hashed" => $entry_guid_hashed, // read only
703 "title" => $entry_title,
704 "content" => $entry_content,
705 "link" => $entry_link,
706 "labels" => $article_labels, // current limitation: can add labels to article, can't remove them
707 "tags" => $entry_tags,
708 "author" => $entry_author,
709 "force_catchup" => false, // ugly hack for the time being
710 "score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed
711 "language" => $entry_language,
712 "num_comments" => $num_comments, // read only
713 "feed" => array("id" => $feed,
714 "fetch_url" => $fetch_url,
715 "site_url" => $site_url,
716 "cache_images" => $cache_images)
717 );
718
719 $entry_plugin_data = "";
720 $entry_current_hash = RSSUtils::calculate_article_hash($article, $pluginhost);
721
722 Debug::log("article hash: $entry_current_hash [stored=$entry_stored_hash]", Debug::$LOG_VERBOSE);
723
724 if ($entry_current_hash == $entry_stored_hash && !isset($_REQUEST["force_rehash"])) {
725 Debug::log("stored article seems up to date [IID: $base_entry_id], updating timestamp only", Debug::$LOG_VERBOSE);
726
727 // we keep encountering the entry in feeds, so we need to
728 // update date_updated column so that we don't get horrible
729 // dupes when the entry gets purged and reinserted again e.g.
730 // in the case of SLOW SLOW OMG SLOW updating feeds
731
732 $sth = $pdo->prepare("UPDATE ttrss_entries SET date_updated = NOW()
733 WHERE id = ?");
734 $sth->execute([$base_entry_id]);
735
736 $pdo->commit();
737 continue;
738 }
739
740 Debug::log("hash differs, applying plugin filters:", Debug::$LOG_VERBOSE);
741
742 foreach ($pluginhost->get_hooks(PluginHost::HOOK_ARTICLE_FILTER) as $plugin) {
743 Debug::log("... " . get_class($plugin), Debug::$LOG_VERBOSE);
744
745 $start = microtime(true);
746 $article = $plugin->hook_article_filter($article);
747
748 Debug::log(sprintf("=== %.4f (sec)", microtime(true) - $start), Debug::$LOG_VERBOSE);
749
750 $entry_plugin_data .= mb_strtolower(get_class($plugin)) . ",";
751 }
752
753 if (Debug::get_loglevel() >= 3) {
754 print "processed content: ";
755 print htmlspecialchars($article["content"]);
756 print "\n";
757 }
758
759 Debug::log("plugin data: $entry_plugin_data", Debug::$LOG_VERBOSE);
760
761 // Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077
762 if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") {
763 foreach ($article as $k => $v) {
764 // i guess we'll have to take the risk of 4byte unicode labels & tags here
765 if (is_string($article[$k])) {
766 $article[$k] = RSSUtils::strip_utf8mb4($v);
767 }
768 }
769 }
770
771 /* Collect article tags here so we could filter by them: */
772
773 $matched_rules = array();
774
775 $article_filters = RSSUtils::get_article_filters($filters, $article["title"],
776 $article["content"], $article["link"], $article["author"],
777 $article["tags"], $matched_rules);
778
779 if (Debug::get_loglevel() >= Debug::$LOG_EXTENDED) {
780 Debug::log("matched filter rules: ", Debug::$LOG_VERBOSE);
781
782 if (count($matched_rules) != 0) {
783 print_r($matched_rules);
784 }
785
786 Debug::log("filter actions: ", Debug::$LOG_VERBOSE);
787
788 if (count($article_filters) != 0) {
789 print_r($article_filters);
790 }
791 }
792
793 $plugin_filter_names = RSSUtils::find_article_filters($article_filters, "plugin");
794 $plugin_filter_actions = $pluginhost->get_filter_actions();
795
796 if (count($plugin_filter_names) > 0) {
797 Debug::log("applying plugin filter actions...", Debug::$LOG_VERBOSE);
798
799 foreach ($plugin_filter_names as $pfn) {
800 list($pfclass,$pfaction) = explode(":", $pfn["param"]);
801
802 if (isset($plugin_filter_actions[$pfclass])) {
803 $plugin = $pluginhost->get_plugin($pfclass);
804
805 Debug::log("... $pfclass: $pfaction", Debug::$LOG_VERBOSE);
806
807 if ($plugin) {
808 $start = microtime(true);
809 $article = $plugin->hook_article_filter_action($article, $pfaction);
810
811 Debug::log(sprintf("=== %.4f (sec)"), Debug::$LOG_VERBOSE);
812 } else {
813 Debug::log("??? $pfclass: plugin object not found.", Debug::$LOG_VERBOSE);
814 }
815 } else {
816 Debug::log("??? $pfclass: filter plugin not registered.", Debug::$LOG_VERBOSE);
817 }
818 }
819 }
820
821 $entry_tags = $article["tags"];
822 $entry_title = strip_tags($article["title"]);
823 $entry_author = mb_substr(strip_tags($article["author"]), 0, 245);
824 $entry_link = strip_tags($article["link"]);
825 $entry_content = $article["content"]; // escaped below
826 $entry_force_catchup = $article["force_catchup"];
827 $article_labels = $article["labels"];
828 $entry_score_modifier = (int) $article["score_modifier"];
829 $entry_language = $article["language"];
830
831 if (Debug::get_loglevel() >= Debug::$LOG_EXTENDED) {
832 Debug::log("article labels:", Debug::$LOG_VERBOSE);
833
834 if (count($article_labels) != 0) {
835 print_r($article_labels);
836 }
837 }
838
839 Debug::log("force catchup: $entry_force_catchup", Debug::$LOG_VERBOSE);
840
841 if ($cache_images && is_writable(CACHE_DIR . '/images'))
842 RSSUtils::cache_media($entry_content, $site_url);
843
844 $csth = $pdo->prepare("SELECT id FROM ttrss_entries
845 WHERE guid = ? OR guid = ?");
846 $csth->execute([$entry_guid, $entry_guid_hashed]);
847
848 if (!$row = $csth->fetch()) {
849
850 Debug::log("base guid [$entry_guid or $entry_guid_hashed] not found, creating...", Debug::$LOG_VERBOSE);
851
852 // base post entry does not exist, create it
853
854 $usth = $pdo->prepare(
855 "INSERT INTO ttrss_entries
856 (title,
857 guid,
858 link,
859 updated,
860 content,
861 content_hash,
862 no_orig_date,
863 date_updated,
864 date_entered,
865 comments,
866 num_comments,
867 plugin_data,
868 lang,
869 author)
870 VALUES
871 (?, ?, ?, ?, ?, ?,
872 false,
873 NOW(),
874 ?, ?, ?, ?, ?, ?)");
875
876 $usth->execute([$entry_title,
877 $entry_guid_hashed,
878 $entry_link,
879 $entry_timestamp_fmt,
880 "$entry_content",
881 $entry_current_hash,
882 $date_feed_processed,
883 $entry_comments,
884 (int)$num_comments,
885 $entry_plugin_data,
886 "$entry_language",
887 "$entry_author"]);
888
889 }
890
891 $csth->execute([$entry_guid, $entry_guid_hashed]);
892
893 $entry_ref_id = 0;
894 $entry_int_id = 0;
895
896 if ($row = $csth->fetch()) {
897
898 Debug::log("base guid found, checking for user record", Debug::$LOG_VERBOSE);
899
900 $ref_id = $row['id'];
901 $entry_ref_id = $ref_id;
902
903 if (RSSUtils::find_article_filter($article_filters, "filter")) {
904 $pdo->commit();
905 continue;
906 }
907
908 $score = RSSUtils::calculate_article_score($article_filters) + $entry_score_modifier;
909
910 Debug::log("initial score: $score [including plugin modifier: $entry_score_modifier]", Debug::$LOG_VERBOSE);
911
912 // check for user post link to main table
913
914 $sth = $pdo->prepare("SELECT ref_id, int_id FROM ttrss_user_entries WHERE
915 ref_id = ? AND owner_uid = ?");
916 $sth->execute([$ref_id, $owner_uid]);
917
918 // okay it doesn't exist - create user entry
919 if ($row = $sth->fetch()) {
920 $entry_ref_id = $row["ref_id"];
921 $entry_int_id = $row["int_id"];
922
923 Debug::log("user record FOUND: RID: $entry_ref_id, IID: $entry_int_id", Debug::$LOG_VERBOSE);
924 } else {
925
926 Debug::log("user record not found, creating...", Debug::$LOG_VERBOSE);
927
928 if ($score >= -500 && !RSSUtils::find_article_filter($article_filters, 'catchup') && !$entry_force_catchup) {
929 $unread = 1;
930 $last_read_qpart = null;
931 } else {
932 $unread = 0;
933 $last_read_qpart = date("Y-m-d H:i"); // we can't use NOW() here because it gets quoted
934 }
935
936 if (RSSUtils::find_article_filter($article_filters, 'mark') || $score > 1000) {
937 $marked = 1;
938 } else {
939 $marked = 0;
940 }
941
942 if (RSSUtils::find_article_filter($article_filters, 'publish')) {
943 $published = 1;
944 } else {
945 $published = 0;
946 }
947
948 $last_marked = ($marked == 1) ? 'NOW()' : 'NULL';
949 $last_published = ($published == 1) ? 'NOW()' : 'NULL';
950
951 $sth = $pdo->prepare(
952 "INSERT INTO ttrss_user_entries
953 (ref_id, owner_uid, feed_id, unread, last_read, marked,
954 published, score, tag_cache, label_cache, uuid,
955 last_marked, last_published)
956 VALUES (?, ?, ?, ?, ?, ?, ?, ?, '', '', '', ".$last_marked.", ".$last_published.")");
957
958 $sth->execute([$ref_id, $owner_uid, $feed, $unread, $last_read_qpart, $marked,
959 $published, $score]);
960
961 $sth = $pdo->prepare("SELECT int_id FROM ttrss_user_entries WHERE
962 ref_id = ? AND owner_uid = ? AND
963 feed_id = ? LIMIT 1");
964
965 $sth->execute([$ref_id, $owner_uid, $feed]);
966
967 if ($row = $sth->fetch())
968 $entry_int_id = $row['int_id'];
969 }
970
971 Debug::log("resulting RID: $entry_ref_id, IID: $entry_int_id", Debug::$LOG_VERBOSE);
972
973 if (DB_TYPE == "pgsql")
974 $tsvector_qpart = "tsvector_combined = to_tsvector(:ts_lang, :ts_content),";
975 else
976 $tsvector_qpart = "";
977
978 $sth = $pdo->prepare("UPDATE ttrss_entries
979 SET title = :title,
980 $tsvector_qpart
981 content = :content,
982 content_hash = :content_hash,
983 updated = :updated,
984 date_updated = NOW(),
985 num_comments = :num_comments,
986 plugin_data = :plugin_data,
987 author = :author,
988 lang = :lang
989 WHERE id = :id");
990
991 $params = [":title" => $entry_title,
992 ":content" => "$entry_content",
993 ":content_hash" => $entry_current_hash,
994 ":updated" => $entry_timestamp_fmt,
995 ":num_comments" => (int)$num_comments,
996 ":plugin_data" => $entry_plugin_data,
997 ":author" => "$entry_author",
998 ":lang" => $entry_language,
999 ":id" => $ref_id];
1000
1001 if (DB_TYPE == "pgsql") {
1002 $params[":ts_lang"] = $feed_language;
1003 $params[":ts_content"] = mb_substr(strip_tags($entry_title . " " . $entry_content), 0, 900000);
1004 }
1005
1006 $sth->execute($params);
1007
1008 // update aux data
1009 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1010 SET score = ? WHERE ref_id = ?");
1011 $sth->execute([$score, $ref_id]);
1012
1013 if ($mark_unread_on_update) {
1014 Debug::log("article updated, marking unread as requested.", Debug::$LOG_VERBOSE);
1015
1016 $sth = $pdo->prepare("UPDATE ttrss_user_entries
1017 SET last_read = null, unread = true WHERE ref_id = ?");
1018 $sth->execute([$ref_id]);
1019 }
1020 }
1021
1022 Debug::log("assigning labels [other]...", Debug::$LOG_VERBOSE);
1023
1024 foreach ($article_labels as $label) {
1025 Labels::add_article($entry_ref_id, $label[1], $owner_uid);
1026 }
1027
1028 Debug::log("assigning labels [filters]...", Debug::$LOG_VERBOSE);
1029
1030 RSSUtils::assign_article_to_label_filters($entry_ref_id, $article_filters,
1031 $owner_uid, $article_labels);
1032
1033 Debug::log("looking for enclosures...", Debug::$LOG_VERBOSE);
1034
1035 // enclosures
1036
1037 $enclosures = array();
1038
1039 $encs = $item->get_enclosures();
1040
1041 if (is_array($encs)) {
1042 foreach ($encs as $e) {
1043 $e_item = array(
1044 rewrite_relative_url($site_url, $e->link),
1045 $e->type, $e->length, $e->title, $e->width, $e->height);
1046
1047 // Yet another episode of "mysql utf8_general_ci is gimped"
1048 if (DB_TYPE == "mysql" && MYSQL_CHARSET != "UTF8MB4") {
1049 for ($i = 0; $i < count($e_item); $i++) {
1050 if (is_string($e_item[$i])) {
1051 $e_item[$i] = RSSUtils::strip_utf8mb4($e_item[$i]);
1052 }
1053 }
1054 }
1055
1056 array_push($enclosures, $e_item);
1057 }
1058 }
1059
1060 if ($cache_images && is_writable(CACHE_DIR . '/images'))
1061 RSSUtils::cache_enclosures($enclosures, $site_url);
1062
1063 if (Debug::get_loglevel() >= Debug::$LOG_EXTENDED) {
1064 Debug::log("article enclosures:", Debug::$LOG_VERBOSE);
1065 print_r($enclosures);
1066 }
1067
1068 $esth = $pdo->prepare("SELECT id FROM ttrss_enclosures
1069 WHERE content_url = ? AND content_type = ? AND post_id = ?");
1070
1071 $usth = $pdo->prepare("INSERT INTO ttrss_enclosures
1072 (content_url, content_type, title, duration, post_id, width, height) VALUES
1073 (?, ?, ?, ?, ?, ?, ?)");
1074
1075 foreach ($enclosures as $enc) {
1076 $enc_url = $enc[0];
1077 $enc_type = $enc[1];
1078 $enc_dur = (int)$enc[2];
1079 $enc_title = $enc[3];
1080 $enc_width = intval($enc[4]);
1081 $enc_height = intval($enc[5]);
1082
1083 $esth->execute([$enc_url, $enc_type, $entry_ref_id]);
1084
1085 if (!$esth->fetch()) {
1086 $usth->execute([$enc_url, $enc_type, (string)$enc_title, $enc_dur, $entry_ref_id, $enc_width, $enc_height]);
1087 }
1088 }
1089
1090 // check for manual tags (we have to do it here since they're loaded from filters)
1091
1092 foreach ($article_filters as $f) {
1093 if ($f["type"] == "tag") {
1094
1095 $manual_tags = trim_array(explode(",", $f["param"]));
1096
1097 foreach ($manual_tags as $tag) {
1098 if (tag_is_valid($tag)) {
1099 array_push($entry_tags, $tag);
1100 }
1101 }
1102 }
1103 }
1104
1105 // Skip boring tags
1106
1107 $boring_tags = trim_array(explode(",", mb_strtolower(get_pref(
1108 'BLACKLISTED_TAGS', $owner_uid, ''), 'utf-8')));
1109
1110 $filtered_tags = array();
1111 $tags_to_cache = array();
1112
1113 if ($entry_tags && is_array($entry_tags)) {
1114 foreach ($entry_tags as $tag) {
1115 if (array_search($tag, $boring_tags) === false) {
1116 array_push($filtered_tags, $tag);
1117 }
1118 }
1119 }
1120
1121 $filtered_tags = array_unique($filtered_tags);
1122
1123 if (Debug::get_loglevel() >= Debug::$LOG_EXTENDED) {
1124 Debug::log("filtered article tags:", Debug::$LOG_VERBOSE);
1125 print_r($filtered_tags);
1126 }
1127
1128 // Save article tags in the database
1129
1130 if (count($filtered_tags) > 0) {
1131
1132 $tsth = $pdo->prepare("SELECT id FROM ttrss_tags
1133 WHERE tag_name = ? AND post_int_id = ? AND
1134 owner_uid = ? LIMIT 1");
1135
1136 $usth = $pdo->prepare("INSERT INTO ttrss_tags
1137 (owner_uid,tag_name,post_int_id)
1138 VALUES (?, ?, ?)");
1139
1140 foreach ($filtered_tags as $tag) {
1141
1142 $tag = sanitize_tag($tag);
1143
1144 if (!tag_is_valid($tag)) continue;
1145
1146 $tsth->execute([$tag, $entry_int_id, $owner_uid]);
1147
1148 if (!$tsth->fetch()) {
1149 $usth->execute([$owner_uid, $tag, $entry_int_id]);
1150 }
1151
1152 array_push($tags_to_cache, $tag);
1153 }
1154
1155 /* update the cache */
1156
1157 $tags_to_cache = array_unique($tags_to_cache);
1158
1159 $tags_str = join(",", $tags_to_cache);
1160
1161 $tsth = $pdo->prepare("UPDATE ttrss_user_entries
1162 SET tag_cache = ? WHERE ref_id = ?
1163 AND owner_uid = ?");
1164 $tsth->execute([$tags_str, $entry_ref_id, $owner_uid]);
1165 }
1166
1167 Debug::log("article processed", Debug::$LOG_VERBOSE);
1168
1169 $pdo->commit();
1170 }
1171
1172 Debug::log("purging feed...", Debug::$LOG_VERBOSE);
1173
1174 purge_feed($feed, 0);
1175
1176 $sth = $pdo->prepare("UPDATE ttrss_feeds
1177 SET last_updated = NOW(), last_unconditional = NOW(), last_error = '' WHERE id = ?");
1178 $sth->execute([$feed]);
1179
1180 } else {
1181
1182 $error_msg = mb_substr($rss->error(), 0, 245);
1183
1184 Debug::log("fetch error: $error_msg", Debug::$LOG_VERBOSE);
1185
1186 if (count($rss->errors()) > 1) {
1187 foreach ($rss->errors() as $error) {
1188 Debug::log("+ $error", Debug::$LOG_VERBOSE);
1189 }
1190 }
1191
1192 $sth = $pdo->prepare("UPDATE ttrss_feeds SET last_error = ?,
1193 last_updated = NOW(), last_unconditional = NOW() WHERE id = ?");
1194 $sth->execute([$error_msg, $feed]);
1195
1196 unset($rss);
1197 return false;
1198 }
1199
1200 Debug::log("done", Debug::$LOG_VERBOSE);
1201
1202 return true;
1203 }
1204
1205 static function cache_enclosures($enclosures, $site_url) {
1206 foreach ($enclosures as $enc) {
1207
1208 if (preg_match("/(image|audio|video)/", $enc[1])) {
1209
1210 $src = rewrite_relative_url($site_url, $enc[0]);
1211
1212 $local_filename = CACHE_DIR . "/images/" . sha1($src);
1213
1214 Debug::log("cache_enclosures: downloading: $src to $local_filename", Debug::$LOG_VERBOSE);
1215
1216 if (!file_exists($local_filename)) {
1217 $file_content = fetch_file_contents($src);
1218
1219 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
1220 file_put_contents($local_filename, $file_content);
1221 }
1222 } else if (is_writable($local_filename)) {
1223 touch($local_filename);
1224 }
1225 }
1226 }
1227 }
1228
1229 static function cache_media($html, $site_url) {
1230 libxml_use_internal_errors(true);
1231
1232 $charset_hack = '<head>
1233 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
1234 </head>';
1235
1236 $doc = new DOMDocument();
1237 $doc->loadHTML($charset_hack . $html);
1238 $xpath = new DOMXPath($doc);
1239
1240 $entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])');
1241
1242 foreach ($entries as $entry) {
1243 if ($entry->hasAttribute('src') && strpos($entry->getAttribute('src'), "data:") !== 0) {
1244 $src = rewrite_relative_url($site_url, $entry->getAttribute('src'));
1245
1246 $local_filename = CACHE_DIR . "/images/" . sha1($src);
1247
1248 Debug::log("cache_media: checking $src", Debug::$LOG_VERBOSE);
1249
1250 if (!file_exists($local_filename)) {
1251 Debug::log("cache_media: downloading: $src to $local_filename", Debug::$LOG_VERBOSE);
1252
1253 $file_content = fetch_file_contents($src);
1254
1255 if ($file_content && strlen($file_content) > MIN_CACHE_FILE_SIZE) {
1256 file_put_contents($local_filename, $file_content);
1257 }
1258 } else if (is_writable($local_filename)) {
1259 touch($local_filename);
1260 }
1261 }
1262 }
1263 }
1264
1265 static function expire_error_log() {
1266 Debug::log("Removing old error log entries...");
1267
1268 $pdo = Db::pdo();
1269
1270 if (DB_TYPE == "pgsql") {
1271 $pdo->query("DELETE FROM ttrss_error_log
1272 WHERE created_at < NOW() - INTERVAL '7 days'");
1273 } else {
1274 $pdo->query("DELETE FROM ttrss_error_log
1275 WHERE created_at < DATE_SUB(NOW(), INTERVAL 7 DAY)");
1276 }
1277 }
1278
1279 static function expire_lock_files() {
1280 Debug::log("Removing old lock files...", Debug::$LOG_VERBOSE);
1281
1282 $num_deleted = 0;
1283
1284 if (is_writable(LOCK_DIRECTORY)) {
1285 $files = glob(LOCK_DIRECTORY . "/*.lock");
1286
1287 if ($files) {
1288 foreach ($files as $file) {
1289 if (!file_is_locked(basename($file)) && time() - filemtime($file) > 86400*2) {
1290 unlink($file);
1291 ++$num_deleted;
1292 }
1293 }
1294 }
1295 }
1296
1297 Debug::log("Removed $num_deleted old lock files.");
1298 }
1299
1300 static function expire_cached_files() {
1301 foreach (array("feeds", "images", "export", "upload") as $dir) {
1302 $cache_dir = CACHE_DIR . "/$dir";
1303
1304 Debug::log("Expiring $cache_dir", Debug::$LOG_VERBOSE);
1305
1306 $num_deleted = 0;
1307
1308 if (is_writable($cache_dir)) {
1309 $files = glob("$cache_dir/*");
1310
1311 if ($files) {
1312 foreach ($files as $file) {
1313 if (time() - filemtime($file) > 86400*CACHE_MAX_DAYS) {
1314 unlink($file);
1315
1316 ++$num_deleted;
1317 }
1318 }
1319 }
1320 }
1321
1322 Debug::log("$cache_dir: removed $num_deleted files.");
1323 }
1324 }
1325
1326 /**
1327 * Source: http://www.php.net/manual/en/function.parse-url.php#104527
1328 * Returns the url query as associative array
1329 *
1330 * @param string query
1331 * @return array params
1332 */
1333 static function convertUrlQuery($query) {
1334 $queryParts = explode('&', $query);
1335
1336 $params = array();
1337
1338 foreach ($queryParts as $param) {
1339 $item = explode('=', $param);
1340 $params[$item[0]] = $item[1];
1341 }
1342
1343 return $params;
1344 }
1345
1346 static function get_article_filters($filters, $title, $content, $link, $author, $tags, &$matched_rules = false) {
1347 $matches = array();
1348
1349 foreach ($filters as $filter) {
1350 $match_any_rule = $filter["match_any_rule"];
1351 $inverse = $filter["inverse"];
1352 $filter_match = false;
1353
1354 foreach ($filter["rules"] as $rule) {
1355 $match = false;
1356 $reg_exp = str_replace('/', '\/', $rule["reg_exp"]);
1357 $rule_inverse = $rule["inverse"];
1358
1359 if (!$reg_exp)
1360 continue;
1361
1362 switch ($rule["type"]) {
1363 case "title":
1364 $match = @preg_match("/$reg_exp/iu", $title);
1365 break;
1366 case "content":
1367 // we don't need to deal with multiline regexps
1368 $content = preg_replace("/[\r\n\t]/", "", $content);
1369
1370 $match = @preg_match("/$reg_exp/iu", $content);
1371 break;
1372 case "both":
1373 // we don't need to deal with multiline regexps
1374 $content = preg_replace("/[\r\n\t]/", "", $content);
1375
1376 $match = (@preg_match("/$reg_exp/iu", $title) || @preg_match("/$reg_exp/iu", $content));
1377 break;
1378 case "link":
1379 $match = @preg_match("/$reg_exp/iu", $link);
1380 break;
1381 case "author":
1382 $match = @preg_match("/$reg_exp/iu", $author);
1383 break;
1384 case "tag":
1385 foreach ($tags as $tag) {
1386 if (@preg_match("/$reg_exp/iu", $tag)) {
1387 $match = true;
1388 break;
1389 }
1390 }
1391 break;
1392 }
1393
1394 if ($rule_inverse) $match = !$match;
1395
1396 if ($match_any_rule) {
1397 if ($match) {
1398 $filter_match = true;
1399 break;
1400 }
1401 } else {
1402 $filter_match = $match;
1403 if (!$match) {
1404 break;
1405 }
1406 }
1407 }
1408
1409 if ($inverse) $filter_match = !$filter_match;
1410
1411 if ($filter_match) {
1412 if (is_array($matched_rules)) array_push($matched_rules, $rule);
1413
1414 foreach ($filter["actions"] AS $action) {
1415 array_push($matches, $action);
1416
1417 // if Stop action encountered, perform no further processing
1418 if (isset($action["type"]) && $action["type"] == "stop") return $matches;
1419 }
1420 }
1421 }
1422
1423 return $matches;
1424 }
1425
1426 static function find_article_filter($filters, $filter_name) {
1427 foreach ($filters as $f) {
1428 if ($f["type"] == $filter_name) {
1429 return $f;
1430 };
1431 }
1432 return false;
1433 }
1434
1435 static function find_article_filters($filters, $filter_name) {
1436 $results = array();
1437
1438 foreach ($filters as $f) {
1439 if ($f["type"] == $filter_name) {
1440 array_push($results, $f);
1441 };
1442 }
1443 return $results;
1444 }
1445
1446 static function calculate_article_score($filters) {
1447 $score = 0;
1448
1449 foreach ($filters as $f) {
1450 if ($f["type"] == "score") {
1451 $score += $f["param"];
1452 };
1453 }
1454 return $score;
1455 }
1456
1457 static function labels_contains_caption($labels, $caption) {
1458 foreach ($labels as $label) {
1459 if ($label[1] == $caption) {
1460 return true;
1461 }
1462 }
1463
1464 return false;
1465 }
1466
1467 static function assign_article_to_label_filters($id, $filters, $owner_uid, $article_labels) {
1468 foreach ($filters as $f) {
1469 if ($f["type"] == "label") {
1470 if (!RSSUtils::labels_contains_caption($article_labels, $f["param"])) {
1471 Labels::add_article($id, $f["param"], $owner_uid);
1472 }
1473 }
1474 }
1475 }
1476
1477 static function make_guid_from_title($title) {
1478 return preg_replace("/[ \"\',.:;]/", "-",
1479 mb_strtolower(strip_tags($title), 'utf-8'));
1480 }
1481
1482 static function cleanup_counters_cache() {
1483 $pdo = Db::pdo();
1484
1485 $res = $pdo->query("DELETE FROM ttrss_counters_cache
1486 WHERE feed_id > 0 AND
1487 (SELECT COUNT(id) FROM ttrss_feeds WHERE
1488 id = feed_id AND
1489 ttrss_counters_cache.owner_uid = ttrss_feeds.owner_uid) = 0");
1490
1491 $frows = $res->rowCount();
1492
1493 $res = $pdo->query("DELETE FROM ttrss_cat_counters_cache
1494 WHERE feed_id > 0 AND
1495 (SELECT COUNT(id) FROM ttrss_feed_categories WHERE
1496 id = feed_id AND
1497 ttrss_cat_counters_cache.owner_uid = ttrss_feed_categories.owner_uid) = 0");
1498
1499 $crows = $res->rowCount();
1500
1501 Debug::log("Removed $frows (feeds) $crows (cats) orphaned counter cache entries.");
1502 }
1503
1504 static function housekeeping_user($owner_uid) {
1505 $tmph = new PluginHost();
1506
1507 load_user_plugins($owner_uid, $tmph);
1508
1509 $tmph->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
1510 }
1511
1512 static function housekeeping_common() {
1513 RSSUtils::expire_cached_files();
1514 RSSUtils::expire_lock_files();
1515 RSSUtils::expire_error_log();
1516
1517 $count = RSSUtils::update_feedbrowser_cache();
1518 Debug::log("Feedbrowser updated, $count feeds processed.");
1519
1520 Article::purge_orphans();
1521 RSSUtils::cleanup_counters_cache();
1522
1523 PluginHost::getInstance()->run_hooks(PluginHost::HOOK_HOUSE_KEEPING, "hook_house_keeping", "");
1524 }
1525
1526 static function check_feed_favicon($site_url, $feed) {
1527 # print "FAVICON [$site_url]: $favicon_url\n";
1528
1529 $icon_file = ICONS_DIR . "/$feed.ico";
1530
1531 if (!file_exists($icon_file)) {
1532 $favicon_url = get_favicon_url($site_url);
1533
1534 if ($favicon_url) {
1535 // Limiting to "image" type misses those served with text/plain
1536 $contents = fetch_file_contents($favicon_url); // , "image");
1537
1538 if ($contents) {
1539 // Crude image type matching.
1540 // Patterns gleaned from the file(1) source code.
1541 if (preg_match('/^\x00\x00\x01\x00/', $contents)) {
1542 // 0 string \000\000\001\000 MS Windows icon resource
1543 //error_log("check_feed_favicon: favicon_url=$favicon_url isa MS Windows icon resource");
1544 }
1545 elseif (preg_match('/^GIF8/', $contents)) {
1546 // 0 string GIF8 GIF image data
1547 //error_log("check_feed_favicon: favicon_url=$favicon_url isa GIF image");
1548 }
1549 elseif (preg_match('/^\x89PNG\x0d\x0a\x1a\x0a/', $contents)) {
1550 // 0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data
1551 //error_log("check_feed_favicon: favicon_url=$favicon_url isa PNG image");
1552 }
1553 elseif (preg_match('/^\xff\xd8/', $contents)) {
1554 // 0 beshort 0xffd8 JPEG image data
1555 //error_log("check_feed_favicon: favicon_url=$favicon_url isa JPG image");
1556 }
1557 elseif (preg_match('/^BM/', $contents)) {
1558 // 0 string BM PC bitmap (OS2, Windows BMP files)
1559 //error_log("check_feed_favicon, favicon_url=$favicon_url isa BMP image");
1560 }
1561 else {
1562 //error_log("check_feed_favicon: favicon_url=$favicon_url isa UNKNOWN type");
1563 $contents = "";
1564 }
1565 }
1566
1567 if ($contents) {
1568 $fp = @fopen($icon_file, "w");
1569
1570 if ($fp) {
1571 fwrite($fp, $contents);
1572 fclose($fp);
1573 chmod($icon_file, 0644);
1574 }
1575 }
1576 }
1577 return $icon_file;
1578 }
1579 }
1580
1581
1582
1583 }