X-Git-Url: https://git.wh0rd.org/?a=blobdiff_plain;f=update_daemon2.php;h=6ccdbcb100e5a7fc2afd614563a1098159b92e76;hb=9b27cec8c01ff231085bc2045d196ace85e299d9;hp=7e06cf9a9b64bc775fc182a34a3b3cab84bb781a;hpb=e93384052309f1119c205b6f21ae21f06fc03e88;p=tt-rss.git diff --git a/update_daemon2.php b/update_daemon2.php old mode 100644 new mode 100755 index 7e06cf9a..6ccdbcb1 --- a/update_daemon2.php +++ b/update_daemon2.php @@ -3,17 +3,11 @@ // This is an experimental multiprocess update daemon. // Some configurable variable may be found below. - // define('DEFAULT_ERROR_LEVEL', E_ALL); - define('DEFAULT_ERROR_LEVEL', E_ERROR | E_WARNING | E_PARSE); - declare(ticks = 1); + chdir(dirname(__FILE__)); - define('MAGPIE_CACHE_DIR', '/var/tmp/magpie-ttrss-cache-daemon'); - define('SIMPLEPIE_CACHE_DIR', '/var/tmp/simplepie-ttrss-cache-daemon'); define('DISABLE_SESSIONS', true); - define('MAX_JOBS', 2); - require_once "version.php"; if (strpos(VERSION, ".99") !== false || getenv('DAEMON_XDEBUG')) { @@ -21,72 +15,108 @@ } define('PURGE_INTERVAL', 3600); // seconds + define('MAX_CHILD_RUNTIME', 600); // seconds require_once "sanity_check.php"; require_once "config.php"; + define('MAX_JOBS', 2); define('SPAWN_INTERVAL', DAEMON_SLEEP_INTERVAL); - if (!ENABLE_UPDATE_DAEMON) { - die("Please enable option ENABLE_UPDATE_DAEMON in config.php\n"); + if (!function_exists('pcntl_fork')) { + die("error: This script requires PHP compiled with PCNTL module.\n"); } - + require_once "db.php"; require_once "db-prefs.php"; require_once "functions.php"; - require_once "magpierss/rss_fetch.inc"; - - error_reporting(DEFAULT_ERROR_LEVEL); + require_once "lib/magpierss/rss_fetch.inc"; $children = array(); + $ctimes = array(); $last_checkpoint = -1; - function sigalrm_handler() { - die("received SIGALRM, hang in feed update?\n"); - } - - function sigchld_handler($signal) { + function reap_children() { global $children; + global $ctimes; $tmp = array(); foreach ($children as $pid) { if (pcntl_waitpid($pid, $status, WNOHANG) != $pid) { - array_push($tmp, $pid); + + if (file_is_locked("update_daemon-$pid.lock")) { + array_push($tmp, $pid); + } else { + _debug("[reap_children] child $pid seems active but lockfile is unlocked."); + } } else { - _debug("[SIGCHLD] child $pid reaped."); + _debug("[reap_children] child $pid reaped."); + unset($ctimes[$pid]); } } $children = $tmp; - $running_jobs = count($children); + return count($tmp); + } + + function check_ctimes() { + global $ctimes; + + foreach (array_keys($ctimes) as $pid) { + $started = $ctimes[$pid]; + + if (time() - $started > MAX_CHILD_RUNTIME) { + _debug("[MASTER] child process $pid seems to be stuck, aborting..."); + posix_kill($pid, SIGKILL); + } + } + } + + function sigchld_handler($signal) { + $running_jobs = reap_children(); _debug("[SIGCHLD] jobs left: $running_jobs"); + pcntl_waitpid(-1, $status, WNOHANG); } - function sigint_handler() { - unlink(LOCK_DIRECTORY . "/update_daemon.lock"); - die("Received SIGINT. Exiting.\n"); + function shutdown() { + if (file_exists(LOCK_DIRECTORY . "/update_daemon.lock")) + unlink(LOCK_DIRECTORY . "/update_daemon.lock"); } - pcntl_signal(SIGALRM, 'sigalrm_handler'); - pcntl_signal(SIGCHLD, 'sigchld_handler'); - pcntl_signal(SIGINT, 'sigint_handler'); + function task_shutdown() { + $pid = posix_getpid(); - if (file_is_locked("update_daemon.lock")) { - die("error: Can't create lockfile. ". - "Maybe another daemon is already running.\n"); + if (file_exists(LOCK_DIRECTORY . "/update_daemon-$pid.lock")) + unlink(LOCK_DIRECTORY . "/update_daemon-$pid.lock"); + } + + function sigint_handler() { + shutdown(); + die("[SIGINT] removing lockfile and exiting.\n"); } + function task_sigint_handler() { + task_shutdown(); + die("[SIGINT] removing lockfile and exiting.\n"); + } + + pcntl_signal(SIGCHLD, 'sigchld_handler'); + if (file_is_locked("update_daemon.lock")) { die("error: Can't create lockfile. ". "Maybe another daemon is already running.\n"); } if (!pcntl_fork()) { + pcntl_signal(SIGINT, 'sigint_handler'); + register_shutdown_function('shutdown'); + + // Try to lock a file in order to avoid concurrent update. $lock_handle = make_lockfile("update_daemon.lock"); if (!$lock_handle) { @@ -99,21 +129,22 @@ // Testing database connection. // It is unnecessary to start the fork loop if database is not ok. - $link = db_connect(DB_HOST, DB_USER, DB_PASS, DB_NAME); + $link = db_connect(DB_HOST, DB_USER, DB_PASS, DB_NAME); if (!$link) { if (DB_TYPE == "mysql") { print mysql_error(); } - // PG seems to display its own errors just fine by default. + // PG seems to display its own errors just fine by default. return; } db_close($link); - while (true) { + // Since sleep is interupted by SIGCHLD, we need another way to + // respect the SPAWN_INTERVAL $next_spawn = $last_checkpoint + SPAWN_INTERVAL - time(); if ($next_spawn % 10 == 0) { @@ -123,6 +154,9 @@ if ($last_checkpoint + SPAWN_INTERVAL < time()) { + check_ctimes(); + reap_children(); + for ($j = count($children); $j < MAX_JOBS; $j++) { $pid = pcntl_fork(); if ($pid == -1) { @@ -130,161 +164,80 @@ } else if ($pid) { _debug("[MASTER] spawned client $j [PID:$pid]..."); array_push($children, $pid); + $ctimes[$pid] = time(); } else { pcntl_signal(SIGCHLD, SIG_IGN); - pcntl_signal(SIGINT, SIG_DFL); + pcntl_signal(SIGINT, 'task_sigint_handler'); + + register_shutdown_function('task_shutdown'); + + $my_pid = posix_getpid(); + $lock_filename = "update_daemon-$my_pid.lock"; + + $lock_handle = make_lockfile($lock_filename); + + if (!$lock_handle) { + die("error: Can't create lockfile ($lock_filename). ". + "Maybe another daemon is already running.\n"); + } // ****** Updating RSS code ******* // Only run in fork process. $start_timestamp = time(); - $link = db_connect(DB_HOST, DB_USER, DB_PASS, DB_NAME); + $link = db_connect(DB_HOST, DB_USER, DB_PASS, DB_NAME); if (!$link) { if (DB_TYPE == "mysql") { print mysql_error(); } - // PG seems to display its own errors just fine by default. + // PG seems to display its own errors just fine by default. return; } - if (DB_TYPE == "pgsql") { - pg_query("set client_encoding = 'utf-8'"); - pg_set_client_encoding("UNICODE"); - } else { - if (defined('MYSQL_CHARSET') && MYSQL_CHARSET) { - db_query($link, "SET NAMES " . MYSQL_CHARSET); - // db_query($link, "SET CHARACTER SET " . MYSQL_CHARSET); - } - } + init_connection($link); // We disable stamp file, since it is of no use in a multiprocess update. // not really, tho for the time being -fox if (!make_stampfile('update_daemon.stamp')) { print "warning: unable to create stampfile"; - } - - // $last_purge = 0; - - // if (time() - $last_purge > PURGE_INTERVAL) { - - // FIXME : $last_purge is of no use in a multiprocess update. - // FIXME : We ALWAYS purge old posts. - _debug("Purging old posts (random 30 feeds)..."); - global_purge_old_posts($link, true, 30); - - // $last_purge = time(); - // } - - // Process all other feeds using last_updated and interval parameters - - $random_qpart = sql_random_function(); - - if (DAEMON_UPDATE_LOGIN_LIMIT > 0) { - if (DB_TYPE == "pgsql") { - $login_thresh_qpart = "AND ttrss_users.last_login >= NOW() - INTERVAL '".DAEMON_UPDATE_LOGIN_LIMIT." days'"; - } else { - $login_thresh_qpart = "AND ttrss_users.last_login >= DATE_SUB(NOW(), INTERVAL ".DAEMON_UPDATE_LOGIN_LIMIT." DAY)"; - } - } else { - $login_thresh_qpart = ""; - } - - if (DB_TYPE == "pgsql") { - $update_limit_qpart = "AND ttrss_feeds.last_updated < NOW() - INTERVAL '".(DAEMON_SLEEP_INTERVAL*2)." seconds'"; - } else { - $update_limit_qpart = "AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL ".(DAEMON_SLEEP_INTERVAL*2)." SECOND)"; } - if (DB_TYPE == "pgsql") { - $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '120 seconds')"; - } else { - $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 120 SECOND))"; - } - - $result = db_query($link, "SELECT feed_url,ttrss_feeds.id,owner_uid, - SUBSTRING(last_updated,1,19) AS last_updated, - update_interval - FROM - ttrss_feeds,ttrss_users - WHERE - ttrss_users.id = owner_uid $login_thresh_qpart $update_limit_qpart - $updstart_thresh_qpart - ORDER BY $random_qpart DESC LIMIT " . DAEMON_FEED_LIMIT); - - $user_prefs_cache = array(); - - _debug(sprintf("Scheduled %d feeds to update...\n", db_num_rows($result))); - - // Here is a little cache magic in order to minimize risk of double feed updates. - $feeds_to_update = array(); - while ($line = db_fetch_assoc($result)) { - $feeds_to_update[$line['id']] = $line; - } - - // We update the feed last update started date before anything else. - // There is no lag due to feed contents downloads - // It prevent an other process to update the same feed. - $feed_ids = array_keys($feeds_to_update); - if($feed_ids) { - db_query($link, sprintf("UPDATE ttrss_feeds SET last_update_started = NOW() - WHERE id IN (%s)", implode(',', $feed_ids))); - } - - while ($line = array_pop($feeds_to_update)) { - - $upd_intl = $line["update_interval"]; - $user_id = $line["owner_uid"]; - - if (!$upd_intl || $upd_intl == 0) { - if (!$user_prefs_cache[$user_id]['DEFAULT_UPDATE_INTERVAL']) { - $upd_intl = get_pref($link, 'DEFAULT_UPDATE_INTERVAL', $user_id); - $user_prefs_cache[$user_id]['DEFAULT_UPDATE_INTERVAL'] = $upd_intl; - } else { - $upd_intl = $user_prefs_cache[$user_id]['DEFAULT_UPDATE_INTERVAL']; - } - } - - if ($upd_intl < 0) { - # print "Updates disabled.\n"; - continue; - } + // Call to the feed batch update function + // or regenerate feedbrowser cache - _debug("Feed: " . $line["feed_url"] . ", " . $line["last_updated"]); - - // _debug(sprintf("\tLU: %d, INTL: %d, UID: %d) ", - // time() - strtotime($line["last_updated"]), $upd_intl*60, $user_id)); - - if (!$line["last_updated"] || - time() - strtotime($line["last_updated"]) > ($upd_intl * 60)) { - - _debug("Updating..."); + if (rand(0,100) > 30) { + update_daemon_common($link); + } else { + $count = update_feedbrowser_cache($link); + _debug("Feedbrowser updated, $count feeds processed."); - pcntl_alarm(300); + purge_orphans($link, true); - update_rss_feed($link, $line["feed_url"], $line["id"], true); + $rc = cleanup_tags($link, 14, 50000); - pcntl_alarm(0); + _debug("Cleaned $rc cached tags."); - sleep(1); // prevent flood (FIXME make this an option?) - } else { - _debug("Update not needed."); - } } - if (DAEMON_SENDS_DIGESTS) send_headlines_digests($link); - - print "Elapsed time: " . (time() - $start_timestamp) . " second(s)\n"; + _debug("Elapsed time: " . (time() - $start_timestamp) . " second(s)"); db_close($link); // We are in a fork. // We wait a little before exiting to avoid to be faster than our parent process. sleep(1); + + unlink(LOCK_DIRECTORY . "/$lock_filename"); + // We exit in order to avoid fork bombing. exit(0); } + + // We wait a little time before the next fork, in order to let the first fork + // mark the feeds it update : + sleep(1); } $last_checkpoint = time(); }