]> git.wh0rd.org - tt-rss.git/blobdiff - update_daemon2.php
first stage of headline element handling refactoring
[tt-rss.git] / update_daemon2.php
old mode 100644 (file)
new mode 100755 (executable)
index 326b69e..007d628
@@ -1,57 +1,71 @@
-#!/usr/bin/php
+#!/usr/bin/env php
 <?php
-       // This is an experimental multiprocess update daemon.
-       // Some configurable variable may be found below.
-
-       // define('DEFAULT_ERROR_LEVEL', E_ALL);
-       define('DEFAULT_ERROR_LEVEL', E_ERROR | E_WARNING | E_PARSE);
+       set_include_path(dirname(__FILE__) ."/include" . PATH_SEPARATOR .
+               get_include_path());
 
        declare(ticks = 1);
+       chdir(dirname(__FILE__));
 
-       define('MAGPIE_CACHE_DIR', '/var/tmp/magpie-ttrss-cache-daemon');
-       define('SIMPLEPIE_CACHE_DIR',   '/var/tmp/simplepie-ttrss-cache-daemon');
        define('DISABLE_SESSIONS', true);
 
-       define('MAX_JOBS', 2);
-
        require_once "version.php";
+       require_once "autoload.php";
+       require_once "functions.php";
+       require_once "config.php";
 
-       if (strpos(VERSION, ".99") !== false || getenv('DAEMON_XDEBUG')) {
-               define('DAEMON_EXTENDED_DEBUG', true);
-       }
-
-       define('PURGE_INTERVAL', 3600); // seconds
+       // defaults
+       define_default('PURGE_INTERVAL', 3600); // seconds
+       define_default('MAX_CHILD_RUNTIME', 1800); // seconds
+       define_default('MAX_JOBS', 2);
+       define_default('SPAWN_INTERVAL', DAEMON_SLEEP_INTERVAL); // seconds
 
        require_once "sanity_check.php";
-       require_once "config.php";
+       require_once "db.php";
+       require_once "db-prefs.php";
 
-       define('SPAWN_INTERVAL', DAEMON_SLEEP_INTERVAL);
+       if (!function_exists('pcntl_fork')) {
+               die("error: This script requires PHP compiled with PCNTL module.\n");
+       }
+
+       $options = getopt("");
 
-       if (!ENABLE_UPDATE_DAEMON) {
-               die("Please enable option ENABLE_UPDATE_DAEMON in config.php\n");
+       if (!is_array($options)) {
+               die("error: getopt() failed. ".
+                       "Most probably you are using PHP CGI to run this script ".
+                       "instead of required PHP CLI. Check tt-rss wiki page on updating feeds for ".
+                       "additional information.\n");
        }
-       
-       require_once "db.php";
-       require_once "db-prefs.php";
-       require_once "functions.php";
-       require_once "magpierss/rss_fetch.inc";
 
-       error_reporting(DEFAULT_ERROR_LEVEL);
+
+       $master_handlers_installed = false;
 
        $children = array();
+       $ctimes = array();
 
        $last_checkpoint = -1;
 
+       /**
+        * @SuppressWarnings(unused)
+        */
        function reap_children() {
                global $children;
+               global $ctimes;
 
                $tmp = array();
 
                foreach ($children as $pid) {
                        if (pcntl_waitpid($pid, $status, WNOHANG) != $pid) {
-                               array_push($tmp, $pid);
+
+                               if (file_is_locked("update_daemon-$pid.lock")) {
+                                       array_push($tmp, $pid);
+                               } else {
+                                       Debug::log("[reap_children] child $pid seems active but lockfile is unlocked.");
+                                       unset($ctimes[$pid]);
+
+                               }
                        } else {
-                               _debug("[SIGCHLD] child $pid reaped.");
+                               Debug::log("[reap_children] child $pid reaped.");
+                               unset($ctimes[$pid]);
                        }
                }
 
                return count($tmp);
        }
 
-       function sigalrm_handler() {
-               die("received SIGALRM, hang in feed update?\n");
+       function check_ctimes() {
+               global $ctimes;
+
+               foreach (array_keys($ctimes) as $pid) {
+                       $started = $ctimes[$pid];
+
+                       if (time() - $started > MAX_CHILD_RUNTIME) {
+                               Debug::log("[MASTER] child process $pid seems to be stuck, aborting...");
+                               posix_kill($pid, SIGKILL);
+                       }
+               }
        }
 
+       /**
+       * @SuppressWarnings(unused)
+       */
        function sigchld_handler($signal) {
                $running_jobs = reap_children();
 
-               _debug("[SIGCHLD] jobs left: $running_jobs");
+               Debug::log("[SIGCHLD] jobs left: $running_jobs");
 
                pcntl_waitpid(-1, $status, WNOHANG);
        }
 
+       function shutdown($caller_pid) {
+               if ($caller_pid == posix_getpid()) {
+                       if (file_exists(LOCK_DIRECTORY . "/update_daemon.lock")) {
+                               Debug::log("removing lockfile (master)...");
+                               unlink(LOCK_DIRECTORY . "/update_daemon.lock");
+                       }
+               }
+       }
+
+       function task_shutdown() {
+               $pid = posix_getpid();
+
+               if (file_exists(LOCK_DIRECTORY . "/update_daemon-$pid.lock")) {
+                       Debug::log("removing lockfile ($pid)...");
+                       unlink(LOCK_DIRECTORY . "/update_daemon-$pid.lock");
+               }
+       }
+
        function sigint_handler() {
-               unlink(LOCK_DIRECTORY . "/update_daemon.lock");
-               die("Received SIGINT. Exiting.\n");
+               Debug::log("[MASTER] SIG_INT received.\n");
+               shutdown(posix_getpid());
+               die;
+       }
+
+       function task_sigint_handler() {
+               Debug::log("[TASK] SIG_INT received.\n");
+               task_shutdown();
+               die;
        }
 
-       pcntl_signal(SIGALRM, 'sigalrm_handler');
        pcntl_signal(SIGCHLD, 'sigchld_handler');
-       pcntl_signal(SIGINT, 'sigint_handler');
 
-       if (file_is_locked("update_daemon.lock")) {
-               die("error: Can't create lockfile. ".
-                       "Maybe another daemon is already running.\n");
+       $longopts = array("log:",
+                       "log-level:",
+                       "tasks:",
+                       "interval:",
+                       "quiet",
+                       "help");
+
+       $options = getopt("", $longopts);
+
+       if (isset($options["help"]) ) {
+               print "Tiny Tiny RSS update daemon.\n\n";
+               print "Options:\n";
+               print "  --log FILE           - log messages to FILE\n";
+        print "  --log-level N        - log verbosity level\n";
+               print "  --tasks N            - amount of update tasks to spawn\n";
+               print "                         default: " . MAX_JOBS . "\n";
+               print "  --interval N         - task spawn interval\n";
+               print "                         default: " . SPAWN_INTERVAL . " seconds.\n";
+               print "  --quiet              - don't output messages to stdout\n";
+               return;
        }
 
+    Debug::set_enabled(true);
+    Debug::set_quiet(isset($options['quiet']));
+
+    if (isset($options["log-level"])) {
+        Debug::set_loglevel((int)$options["log-level"]);
+    }
+
+    if (isset($options["log"])) {
+        Debug::set_logfile($options["log"]);
+        Debug::log("Logging to " . $options["log"]);
+    }
+
+       if (isset($options["tasks"])) {
+               Debug::log("Set to spawn " . $options["tasks"] . " children.");
+               $max_jobs = $options["tasks"];
+       } else {
+               $max_jobs = MAX_JOBS;
+       }
+
+       if (isset($options["interval"])) {
+               Debug::log("Spawn interval: " . $options["interval"] . " seconds.");
+               $spawn_interval = $options["interval"];
+       } else {
+               $spawn_interval = SPAWN_INTERVAL;
+       }
+
+       // let's enforce a minimum spawn interval as to not forkbomb the host
+       $spawn_interval = max(60, $spawn_interval);
+       Debug::log("Spawn interval: $spawn_interval sec");
+
        if (file_is_locked("update_daemon.lock")) {
                die("error: Can't create lockfile. ".
                        "Maybe another daemon is already running.\n");
        }
 
-       if (!pcntl_fork()) {
-               $lock_handle = make_lockfile("update_daemon.lock");
+       // Try to lock a file in order to avoid concurrent update.
+       $lock_handle = make_lockfile("update_daemon.lock");
 
-               if (!$lock_handle) {
-                       die("error: Can't create lockfile. ".
-                               "Maybe another daemon is already running.\n");
-               }
-
-               while (true) { sleep(100); }
+       if (!$lock_handle) {
+               die("error: Can't create lockfile. ".
+                       "Maybe another daemon is already running.\n");
        }
 
-       // Testing database connection.
-       // It is unnecessary to start the fork loop if database is not ok.
-       $link = db_connect(DB_HOST, DB_USER, DB_PASS, DB_NAME); 
+       $schema_version = get_schema_version();
 
-       if (!$link) {
-               if (DB_TYPE == "mysql") {
-                       print mysql_error();
-               }
-               // PG seems to display its own errors just fine by default.             
-               return;
+       if ($schema_version != SCHEMA_VERSION) {
+               die("Schema version is wrong, please upgrade the database.\n");
        }
 
-       db_close($link);
-
+       // Protip: children close shared database handle when terminating, it's a bad idea to
+       // do database stuff on main process from now on.
 
        while (true) {
 
-               $next_spawn = $last_checkpoint + SPAWN_INTERVAL - time();
+               // Since sleep is interupted by SIGCHLD, we need another way to
+               // respect the spawn interval
+               $next_spawn = $last_checkpoint + $spawn_interval - time();
 
-               if ($next_spawn % 10 == 0) {
+               if ($next_spawn % 60 == 0) {
                        $running_jobs = count($children);
-                       _debug("[MASTER] active jobs: $running_jobs, next spawn at $next_spawn sec.");
+                       Debug::log("[MASTER] active jobs: $running_jobs, next spawn at $next_spawn sec.");
                }
 
-               if ($last_checkpoint + SPAWN_INTERVAL < time()) {
-
+               if ($last_checkpoint + $spawn_interval < time()) {
+                       check_ctimes();
                        reap_children();
 
-                       for ($j = count($children); $j < MAX_JOBS; $j++) {
+                       for ($j = count($children); $j < $max_jobs; $j++) {
                                $pid = pcntl_fork();
                                if ($pid == -1) {
                                        die("fork failed!\n");
                                } else if ($pid) {
-                                       _debug("[MASTER] spawned client $j [PID:$pid]...");
-                                       array_push($children, $pid);
-                               } else {
-                                       pcntl_signal(SIGCHLD, SIG_IGN);
-                                       pcntl_signal(SIGINT, SIG_DFL);
-
-                                       // ****** Updating RSS code *******
-                                       // Only run in fork process.
 
-                                       $start_timestamp = time();
-
-                                       $link = db_connect(DB_HOST, DB_USER, DB_PASS, DB_NAME); 
-
-                                       if (!$link) {
-                                               if (DB_TYPE == "mysql") {
-                                                       print mysql_error();
-                                               }
-                                               // PG seems to display its own errors just fine by default.             
-                                               return;
-                                       }
-
-                                       if (DB_TYPE == "pgsql") {
-                                               pg_query("set client_encoding = 'utf-8'");
-                                               pg_set_client_encoding("UNICODE");
-                                       } else {
-                                               if (defined('MYSQL_CHARSET') && MYSQL_CHARSET) {
-                                                       db_query($link, "SET NAMES " . MYSQL_CHARSET);
-                                                       // db_query($link, "SET CHARACTER SET " . MYSQL_CHARSET);
-                                               }
+                                       if (!$master_handlers_installed) {
+                                               Debug::log("[MASTER] installing shutdown handlers");
+                                               pcntl_signal(SIGINT, 'sigint_handler');
+                                               pcntl_signal(SIGTERM, 'sigint_handler');
+                                               register_shutdown_function('shutdown', posix_getpid());
+                                               $master_handlers_installed = true;
                                        }
 
-                                       // We disable stamp file, since it is of no use in a multiprocess update.
-                                       // not really, tho for the time being -fox
-                                       if (!make_stampfile('update_daemon.stamp')) {
-                                               print "warning: unable to create stampfile";
-                                       }       
-
-                                       // $last_purge = 0;
-
-                                       // if (time() - $last_purge > PURGE_INTERVAL) {
-
-                                       // FIXME : $last_purge is of no use in a multiprocess update.
-                                       // FIXME : We ALWAYS purge old posts.
-                                       _debug("Purging old posts (random 30 feeds)...");
-                                       global_purge_old_posts($link, true, 30);
-
-                                       //      $last_purge = time();
-                                       // }
-
-                                       // Process all other feeds using last_updated and interval parameters
-
-                                       $random_qpart = sql_random_function();
-                                               
-                                       if (DAEMON_UPDATE_LOGIN_LIMIT > 0) {
-                                               if (DB_TYPE == "pgsql") {
-                                                       $login_thresh_qpart = "AND ttrss_users.last_login >= NOW() - INTERVAL '".DAEMON_UPDATE_LOGIN_LIMIT." days'";
-                                               } else {
-                                                       $login_thresh_qpart = "AND ttrss_users.last_login >= DATE_SUB(NOW(), INTERVAL ".DAEMON_UPDATE_LOGIN_LIMIT." DAY)";
-                                               }                       
-                                       } else {
-                                               $login_thresh_qpart = "";
-                                       }
-
-                                       if (DB_TYPE == "pgsql") {
-                                               $update_limit_qpart = "AND ttrss_feeds.last_updated < NOW() - INTERVAL '".(DAEMON_SLEEP_INTERVAL*2)." seconds'";
-                                       } else {
-                                               $update_limit_qpart = "AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL ".(DAEMON_SLEEP_INTERVAL*2)." SECOND)";
-                                       }
-
-                                       if (DB_TYPE == "pgsql") {
-                                                       $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '120 seconds')";
-                                               } else {
-                                                       $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 120 SECOND))";
-                                               }                       
-
-                                       $result = db_query($link, "SELECT feed_url,ttrss_feeds.id,owner_uid,
-                                                       SUBSTRING(last_updated,1,19) AS last_updated,
-                                                       update_interval 
-                                               FROM 
-                                                       ttrss_feeds,ttrss_users 
-                                               WHERE 
-                                                       ttrss_users.id = owner_uid $login_thresh_qpart $update_limit_qpart 
-                                                       $updstart_thresh_qpart
-                                               ORDER BY $random_qpart DESC LIMIT " . DAEMON_FEED_LIMIT);
-
-                                       $user_prefs_cache = array();
-
-                                       _debug(sprintf("Scheduled %d feeds to update...\n", db_num_rows($result)));
-
-                                       // Here is a little cache magic in order to minimize risk of double feed updates.
-                                       $feeds_to_update = array();
-                                       while ($line = db_fetch_assoc($result)) {
-                                               $feeds_to_update[$line['id']] = $line;
-                                       }
-
-                                       // We update the feed last update started date before anything else.
-                                       // There is no lag due to feed contents downloads
-                                       // It prevent an other process to update the same feed.
-                                       $feed_ids = array_keys($feeds_to_update);
-                                       if($feed_ids) {
-                                               db_query($link, sprintf("UPDATE ttrss_feeds SET last_update_started = NOW()
-                                                       WHERE id IN (%s)", implode(',', $feed_ids)));
-                                       }
-
-                                       while ($line = array_pop($feeds_to_update)) {
-
-                                               $upd_intl = $line["update_interval"];
-                                               $user_id = $line["owner_uid"];
-
-                                               if (!$upd_intl || $upd_intl == 0) {
-                                                       if (!$user_prefs_cache[$user_id]['DEFAULT_UPDATE_INTERVAL']) {                  
-                                                               $upd_intl = get_pref($link, 'DEFAULT_UPDATE_INTERVAL', $user_id);
-                                                               $user_prefs_cache[$user_id]['DEFAULT_UPDATE_INTERVAL'] = $upd_intl;
-                                                       } else {
-                                                               $upd_intl = $user_prefs_cache[$user_id]['DEFAULT_UPDATE_INTERVAL'];
-                                                       }
-                                               }
-
-                                               if ($upd_intl < 0) { 
-                               #                               print "Updates disabled.\n";
-                                                       continue; 
-                                               }
-
-                                               _debug("Feed: " . $line["feed_url"] . ", " . $line["last_updated"]);
-
-                               //                      _debug(sprintf("\tLU: %d, INTL: %d, UID: %d) ", 
-                               //                              time() - strtotime($line["last_updated"]), $upd_intl*60, $user_id));
-
-                                               if (!$line["last_updated"] || 
-                                                       time() - strtotime($line["last_updated"]) > ($upd_intl * 60)) {
-
-                                                       _debug("Updating...");
-
-                                                       pcntl_alarm(300);
-
-                                                       update_rss_feed($link, $line["feed_url"], $line["id"], true);   
-
-                                                       pcntl_alarm(0);
+                                       Debug::log("[MASTER] spawned client $j [PID:$pid]...");
+                                       array_push($children, $pid);
+                                       $ctimes[$pid] = time();
+                               } else {
+                                       pcntl_signal(SIGCHLD, SIG_IGN);
+                                       pcntl_signal(SIGINT, 'task_sigint_handler');
 
-                                                       sleep(1); // prevent flood (FIXME make this an option?)
-                                               } else {
-                                                       _debug("Update not needed.");
-                                               }
-                                       }
+                                       register_shutdown_function('task_shutdown');
 
-                                       if (DAEMON_SENDS_DIGESTS) send_headlines_digests($link);
+                                       $quiet = (isset($options["quiet"])) ? "--quiet" : "";
+                                       $log = function_exists("flock") && isset($options['log']) ? '--log '.$options['log'] : '';
 
-                                       print "Elapsed time: " . (time() - $start_timestamp) . " second(s)\n";
+                                       $my_pid = posix_getpid();
 
-                                       db_close($link);
+                                       passthru(PHP_EXECUTABLE . " update.php --daemon-loop $quiet $log --task $j --pidlock $my_pid");
 
-                                       // We are in a fork.
-                                       // We wait a little before exiting to avoid to be faster than our parent process.
                                        sleep(1);
+
                                        // We exit in order to avoid fork bombing.
                                        exit(0);
                                }