-#!/usr/bin/php
+#!/usr/bin/env php
<?php
- // This is an experimental multiprocess update daemon.
- // Some configurable variable may be found below.
-
- // define('DEFAULT_ERROR_LEVEL', E_ALL);
- define('DEFAULT_ERROR_LEVEL', E_ERROR | E_WARNING | E_PARSE);
+ set_include_path(dirname(__FILE__) ."/include" . PATH_SEPARATOR .
+ get_include_path());
declare(ticks = 1);
+ chdir(dirname(__FILE__));
- define('MAGPIE_CACHE_DIR', '/var/tmp/magpie-ttrss-cache-daemon');
- define('SIMPLEPIE_CACHE_DIR', '/var/tmp/simplepie-ttrss-cache-daemon');
define('DISABLE_SESSIONS', true);
- define('MAX_JOBS', 2);
-
require_once "version.php";
- if (strpos(VERSION, ".99") !== false) {
+ if (strpos(VERSION, ".99") !== false || getenv('DAEMON_XDEBUG')) {
define('DAEMON_EXTENDED_DEBUG', true);
}
- define('PURGE_INTERVAL', 3600); // seconds
-
+ require_once "functions.php";
+ require_once "rssfuncs.php";
require_once "sanity_check.php";
require_once "config.php";
+ require_once "db.php";
+ require_once "db-prefs.php";
- define('SPAWN_INTERVAL', DAEMON_SLEEP_INTERVAL);
+ // defaults
+ define('PURGE_INTERVAL', 3600); // seconds
+ define('MAX_CHILD_RUNTIME', 600); // seconds
+ define('MAX_JOBS', 2);
+ define('SPAWN_INTERVAL', DAEMON_SLEEP_INTERVAL); // seconds
- if (!ENABLE_UPDATE_DAEMON) {
- die("Please enable option ENABLE_UPDATE_DAEMON in config.php\n");
+ if (!function_exists('pcntl_fork')) {
+ die("error: This script requires PHP compiled with PCNTL module.\n");
}
-
- require_once "db.php";
- require_once "db-prefs.php";
- require_once "functions.php";
- require_once "magpierss/rss_fetch.inc";
- error_reporting(DEFAULT_ERROR_LEVEL);
+ $master_handlers_installed = false;
+
+ $children = array();
+ $ctimes = array();
- $running_jobs = 0;
$last_checkpoint = -1;
- function sigalrm_handler() {
- die("received SIGALRM, hang in feed update?\n");
+ function reap_children() {
+ global $children;
+ global $ctimes;
+
+ $tmp = array();
+
+ foreach ($children as $pid) {
+ if (pcntl_waitpid($pid, $status, WNOHANG) != $pid) {
+
+ if (file_is_locked("update_daemon-$pid.lock")) {
+ array_push($tmp, $pid);
+ } else {
+ _debug("[reap_children] child $pid seems active but lockfile is unlocked.");
+ unset($ctimes[$pid]);
+
+ }
+ } else {
+ _debug("[reap_children] child $pid reaped.");
+ unset($ctimes[$pid]);
+ }
+ }
+
+ $children = $tmp;
+
+ return count($tmp);
+ }
+
+ function check_ctimes() {
+ global $ctimes;
+
+ foreach (array_keys($ctimes) as $pid) {
+ $started = $ctimes[$pid];
+
+ if (time() - $started > MAX_CHILD_RUNTIME) {
+ _debug("[MASTER] child process $pid seems to be stuck, aborting...");
+ posix_kill($pid, SIGKILL);
+ }
+ }
}
function sigchld_handler($signal) {
- global $running_jobs;
- if ($running_jobs > 0) $running_jobs--;
- print posix_getpid() . ": SIGCHLD received, jobs left: $running_jobs\n";
+ $running_jobs = reap_children();
+
+ _debug("[SIGCHLD] jobs left: $running_jobs");
+
pcntl_waitpid(-1, $status, WNOHANG);
}
+ function shutdown($caller_pid) {
+ if ($caller_pid == posix_getpid()) {
+ if (file_exists(LOCK_DIRECTORY . "/update_daemon.lock")) {
+ _debug("removing lockfile (master)...");
+ unlink(LOCK_DIRECTORY . "/update_daemon.lock");
+ }
+ }
+ }
+
+ function task_shutdown() {
+ $pid = posix_getpid();
+
+ if (file_exists(LOCK_DIRECTORY . "/update_daemon-$pid.lock")) {
+ _debug("removing lockfile ($pid)...");
+ unlink(LOCK_DIRECTORY . "/update_daemon-$pid.lock");
+ }
+ }
+
function sigint_handler() {
- unlink(LOCK_DIRECTORY . "/update_daemon.lock");
- die("Received SIGINT. Exiting.\n");
+ _debug("[MASTER] SIG_INT received.\n");
+ shutdown(posix_getpid());
+ die;
+ }
+
+ function task_sigint_handler() {
+ _debug("[TASK] SIG_INT received.\n");
+ task_shutdown();
+ die;
}
- pcntl_signal(SIGALRM, 'sigalrm_handler');
pcntl_signal(SIGCHLD, 'sigchld_handler');
- pcntl_signal(SIGINT, 'sigint_handler');
- if (file_is_locked("update_daemon.lock")) {
- die("error: Can't create lockfile. ".
- "Maybe another daemon is already running.\n");
+ $longopts = array("log:",
+ "tasks:",
+ "interval",
+ "quiet",
+ "help");
+
+ $options = getopt("", $longopts);
+
+ if (isset($options["help"]) ) {
+ print "Tiny Tiny RSS update daemon.\n\n";
+ print "Options:\n";
+ print " --log FILE - log messages to FILE\n";
+ print " --tasks N - amount of update tasks to spawn\n";
+ print " default: " . MAX_JOBS . "\n";
+ print " --interval N - task spawn interval\n";
+ print " default: " . SPAWN_INTERVAL . " seconds.\n";
+ print " --quiet - don't output messages to stdout\n";
+ return;
+ }
+
+ define('QUIET', isset($options['quiet']));
+
+ if (isset($options["tasks"])) {
+ _debug("Set to spawn " . $options["tasks"] . " children.");
+ $max_jobs = $option["tasks"];
+ } else {
+ $max_jobs = MAX_JOBS;
+ }
+
+ if (isset($options["interval"])) {
+ _debug("Spawn interval: " . $options["interval"] . " seconds.");
+ $spawn_interval = $option["interval"];
+ } else {
+ $spawn_interval = SPAWN_INTERVAL;
+ }
+
+ if (isset($options["log"])) {
+ _debug("Logging to " . $options["log"]);
+ define('LOGFILE', $options["log"]);
}
if (file_is_locked("update_daemon.lock")) {
"Maybe another daemon is already running.\n");
}
- if (!pcntl_fork()) {
- $lock_handle = make_lockfile("update_daemon.lock");
+ // Try to lock a file in order to avoid concurrent update.
+ $lock_handle = make_lockfile("update_daemon.lock");
- if (!$lock_handle) {
- die("error: Can't create lockfile. ".
- "Maybe another daemon is already running.\n");
- }
-
- while (true) { sleep(100); }
+ if (!$lock_handle) {
+ die("error: Can't create lockfile. ".
+ "Maybe another daemon is already running.\n");
}
// Testing database connection.
// It is unnecessary to start the fork loop if database is not ok.
- $link = db_connect(DB_HOST, DB_USER, DB_PASS, DB_NAME);
+ $link = db_connect(DB_HOST, DB_USER, DB_PASS, DB_NAME);
- if (!$link) {
- if (DB_TYPE == "mysql") {
- print mysql_error();
- }
- // PG seems to display its own errors just fine by default.
- return;
- }
+ if (!init_connection($link)) return;
db_close($link);
-
while (true) {
- $next_spawn = $last_checkpoint + SPAWN_INTERVAL - time();
+ // Since sleep is interupted by SIGCHLD, we need another way to
+ // respect the spawn interval
+ $next_spawn = $last_checkpoint + $spawn_interval - time();
- if ($next_spawn % 10 == 0) {
- print "[MASTER] active jobs: $running_jobs, next spawn at $next_spawn sec\n";
+ if ($next_spawn % 60 == 0) {
+ $running_jobs = count($children);
+ _debug("[MASTER] active jobs: $running_jobs, next spawn at $next_spawn sec.");
}
- if ($last_checkpoint + SPAWN_INTERVAL < time()) {
+ if ($last_checkpoint + $spawn_interval < time()) {
+
+ check_ctimes();
+ reap_children();
- for ($j = $running_jobs; $j < MAX_JOBS; $j++) {
- print "[MASTER] spawning client $j...";
+ for ($j = count($children); $j < $max_jobs; $j++) {
$pid = pcntl_fork();
if ($pid == -1) {
die("fork failed!\n");
} else if ($pid) {
- $running_jobs++;
- print "OK [$running_jobs]\n";
+
+ if (!$master_handlers_installed) {
+ _debug("[MASTER] installing shutdown handlers");
+ pcntl_signal(SIGINT, 'sigint_handler');
+ register_shutdown_function('shutdown', posix_getpid());
+ $master_handlers_installed = true;
+ }
+
+ _debug("[MASTER] spawned client $j [PID:$pid]...");
+ array_push($children, $pid);
+ $ctimes[$pid] = time();
} else {
pcntl_signal(SIGCHLD, SIG_IGN);
- pcntl_signal(SIGINT, SIG_DFL);
+ pcntl_signal(SIGINT, 'task_sigint_handler');
+
+ register_shutdown_function('task_shutdown');
+
+ $my_pid = posix_getpid();
+ $lock_filename = "update_daemon-$my_pid.lock";
+
+ $lock_handle = make_lockfile($lock_filename);
+
+ if (!$lock_handle) {
+ die("error: Can't create lockfile ($lock_filename). ".
+ "Maybe another daemon is already running.\n");
+ }
// ****** Updating RSS code *******
// Only run in fork process.
$start_timestamp = time();
- $link = db_connect(DB_HOST, DB_USER, DB_PASS, DB_NAME);
-
- if (!$link) {
- if (DB_TYPE == "mysql") {
- print mysql_error();
- }
- // PG seems to display its own errors just fine by default.
- return;
- }
+ $link = db_connect(DB_HOST, DB_USER, DB_PASS, DB_NAME);
- if (DB_TYPE == "pgsql") {
- pg_query("set client_encoding = 'utf-8'");
- pg_set_client_encoding("UNICODE");
- } else {
- if (defined('MYSQL_CHARSET') && MYSQL_CHARSET) {
- db_query($link, "SET NAMES " . MYSQL_CHARSET);
- // db_query($link, "SET CHARACTER SET " . MYSQL_CHARSET);
- }
- }
+ if (!init_connection($link)) return;
// We disable stamp file, since it is of no use in a multiprocess update.
// not really, tho for the time being -fox
if (!make_stampfile('update_daemon.stamp')) {
- print "warning: unable to create stampfile";
- }
-
- // $last_purge = 0;
-
- // if (time() - $last_purge > PURGE_INTERVAL) {
-
- // FIXME : $last_purge is of no use in a multiprocess update.
- // FIXME : We ALWAYS purge old posts.
- _debug("Purging old posts (random 30 feeds)...");
- global_purge_old_posts($link, true, 30);
-
- // $last_purge = time();
- // }
-
- // Process all other feeds using last_updated and interval parameters
-
- $random_qpart = sql_random_function();
-
- if (DAEMON_UPDATE_LOGIN_LIMIT > 0) {
- if (DB_TYPE == "pgsql") {
- $login_thresh_qpart = "AND ttrss_users.last_login >= NOW() - INTERVAL '".DAEMON_UPDATE_LOGIN_LIMIT." days'";
- } else {
- $login_thresh_qpart = "AND ttrss_users.last_login >= DATE_SUB(NOW(), INTERVAL ".DAEMON_UPDATE_LOGIN_LIMIT." DAY)";
- }
- } else {
- $login_thresh_qpart = "";
- }
-
- if (DB_TYPE == "pgsql") {
- $update_limit_qpart = "AND ttrss_feeds.last_updated < NOW() - INTERVAL '".(DAEMON_SLEEP_INTERVAL*2)." seconds'";
- } else {
- $update_limit_qpart = "AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL ".(DAEMON_SLEEP_INTERVAL*2)." SECOND)";
- }
-
- if (DB_TYPE == "pgsql") {
- $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '120 seconds')";
- } else {
- $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 120 SECOND))";
- }
-
- $result = db_query($link, "SELECT feed_url,ttrss_feeds.id,owner_uid,
- SUBSTRING(last_updated,1,19) AS last_updated,
- update_interval
- FROM
- ttrss_feeds,ttrss_users
- WHERE
- ttrss_users.id = owner_uid $login_thresh_qpart $update_limit_qpart
- $updstart_thresh_qpart
- ORDER BY $random_qpart DESC LIMIT " . DAEMON_FEED_LIMIT);
-
- $user_prefs_cache = array();
-
- _debug(sprintf("Scheduled %d feeds to update...\n", db_num_rows($result)));
-
- // Here is a little cache magic in order to minimize risk of double feed updates.
- $feeds_to_update = array();
- while ($line = db_fetch_assoc($result)) {
- $feeds_to_update[$line['id']] = $line;
+ die("error: unable to create stampfile\n");
}
- // We update the feed last update started date before anything else.
- // There is no lag due to feed contents downloads
- // It prevent an other process to update the same feed.
- $feed_ids = array_keys($feeds_to_update);
- if($feed_ids) {
- db_query($link, sprintf("UPDATE ttrss_feeds SET last_update_started = NOW()
- WHERE id IN (%s)", implode(',', $feed_ids)));
- }
-
- while ($line = array_pop($feeds_to_update)) {
-
- $upd_intl = $line["update_interval"];
- $user_id = $line["owner_uid"];
-
- if (!$upd_intl || $upd_intl == 0) {
- if (!$user_prefs_cache[$user_id]['DEFAULT_UPDATE_INTERVAL']) {
- $upd_intl = get_pref($link, 'DEFAULT_UPDATE_INTERVAL', $user_id);
- $user_prefs_cache[$user_id]['DEFAULT_UPDATE_INTERVAL'] = $upd_intl;
- } else {
- $upd_intl = $user_prefs_cache[$user_id]['DEFAULT_UPDATE_INTERVAL'];
- }
- }
-
- if ($upd_intl < 0) {
- # print "Updates disabled.\n";
- continue;
- }
-
- _debug("Feed: " . $line["feed_url"] . ", " . $line["last_updated"]);
+ // Call to the feed batch update function
+ // or regenerate feedbrowser cache
- // _debug(sprintf("\tLU: %d, INTL: %d, UID: %d) ",
- // time() - strtotime($line["last_updated"]), $upd_intl*60, $user_id));
-
- if (!$line["last_updated"] ||
- time() - strtotime($line["last_updated"]) > ($upd_intl * 60)) {
-
- _debug("Updating...");
+ if (rand(0,100) > 30) {
+ _debug("Waiting before update..");
+ sleep(rand(5,15));
+ update_daemon_common($link);
+ } else {
+ $count = update_feedbrowser_cache($link);
+ _debug("Feedbrowser updated, $count feeds processed.");
- pcntl_alarm(300);
+ purge_orphans($link, true);
- update_rss_feed($link, $line["feed_url"], $line["id"], true);
+ $rc = cleanup_tags($link, 14, 50000);
- pcntl_alarm(0);
+ _debug("Cleaned $rc cached tags.");
- sleep(1); // prevent flood (FIXME make this an option?)
- } else {
- _debug("Update not needed.");
- }
+ global $pluginhost;
+ $pluginhost->run_hooks($pluginhost::HOOK_UPDATE_TASK, "hook_update_task", $op);
}
- if (DAEMON_SENDS_DIGESTS) send_headlines_digests($link);
-
- print "Elapsed time: " . (time() - $start_timestamp) . " second(s)\n";
+ _debug("Elapsed time: " . (time() - $start_timestamp) . " second(s)");
db_close($link);
// We are in a fork.
// We wait a little before exiting to avoid to be faster than our parent process.
sleep(1);
+
+ unlink(LOCK_DIRECTORY . "/$lock_filename");
+
// We exit in order to avoid fork bombing.
exit(0);
}