]> git.wh0rd.org - tt-rss.git/blobdiff - update_daemon2.php
db updates, remove init_connection()
[tt-rss.git] / update_daemon2.php
old mode 100644 (file)
new mode 100755 (executable)
index 326b69e..e8a56ee
@@ -1,57 +1,63 @@
-#!/usr/bin/php
+#!/usr/bin/env php
 <?php
-       // This is an experimental multiprocess update daemon.
-       // Some configurable variable may be found below.
-
-       // define('DEFAULT_ERROR_LEVEL', E_ALL);
-       define('DEFAULT_ERROR_LEVEL', E_ERROR | E_WARNING | E_PARSE);
+       set_include_path(dirname(__FILE__) ."/include" . PATH_SEPARATOR .
+               get_include_path());
 
        declare(ticks = 1);
+       chdir(dirname(__FILE__));
 
-       define('MAGPIE_CACHE_DIR', '/var/tmp/magpie-ttrss-cache-daemon');
-       define('SIMPLEPIE_CACHE_DIR',   '/var/tmp/simplepie-ttrss-cache-daemon');
        define('DISABLE_SESSIONS', true);
 
-       define('MAX_JOBS', 2);
-
        require_once "version.php";
 
        if (strpos(VERSION, ".99") !== false || getenv('DAEMON_XDEBUG')) {
                define('DAEMON_EXTENDED_DEBUG', true);
        }
 
-       define('PURGE_INTERVAL', 3600); // seconds
-
+       require_once "functions.php";
+       require_once "rssfuncs.php";
        require_once "sanity_check.php";
        require_once "config.php";
+       require_once "db.php";
+       require_once "db-prefs.php";
+       require_once "errorhandler.php";
 
-       define('SPAWN_INTERVAL', DAEMON_SLEEP_INTERVAL);
+       // defaults
+       define('PURGE_INTERVAL', 3600); // seconds
+       define('MAX_CHILD_RUNTIME', 600); // seconds
+       define('MAX_JOBS', 2);
+       define('SPAWN_INTERVAL', DAEMON_SLEEP_INTERVAL); // seconds
 
-       if (!ENABLE_UPDATE_DAEMON) {
-               die("Please enable option ENABLE_UPDATE_DAEMON in config.php\n");
+       if (!function_exists('pcntl_fork')) {
+               die("error: This script requires PHP compiled with PCNTL module.\n");
        }
-       
-       require_once "db.php";
-       require_once "db-prefs.php";
-       require_once "functions.php";
-       require_once "magpierss/rss_fetch.inc";
 
-       error_reporting(DEFAULT_ERROR_LEVEL);
+       $master_handlers_installed = false;
 
        $children = array();
+       $ctimes = array();
 
        $last_checkpoint = -1;
 
        function reap_children() {
                global $children;
+               global $ctimes;
 
                $tmp = array();
 
                foreach ($children as $pid) {
                        if (pcntl_waitpid($pid, $status, WNOHANG) != $pid) {
-                               array_push($tmp, $pid);
+
+                               if (file_is_locked("update_daemon-$pid.lock")) {
+                                       array_push($tmp, $pid);
+                               } else {
+                                       _debug("[reap_children] child $pid seems active but lockfile is unlocked.");
+                                       unset($ctimes[$pid]);
+
+                               }
                        } else {
-                               _debug("[SIGCHLD] child $pid reaped.");
+                               _debug("[reap_children] child $pid reaped.");
+                               unset($ctimes[$pid]);
                        }
                }
 
                return count($tmp);
        }
 
-       function sigalrm_handler() {
-               die("received SIGALRM, hang in feed update?\n");
+       function check_ctimes() {
+               global $ctimes;
+
+               foreach (array_keys($ctimes) as $pid) {
+                       $started = $ctimes[$pid];
+
+                       if (time() - $started > MAX_CHILD_RUNTIME) {
+                               _debug("[MASTER] child process $pid seems to be stuck, aborting...");
+                               posix_kill($pid, SIGKILL);
+                       }
+               }
        }
 
        function sigchld_handler($signal) {
                pcntl_waitpid(-1, $status, WNOHANG);
        }
 
+       function shutdown($caller_pid) {
+               if ($caller_pid == posix_getpid()) {
+                       if (file_exists(LOCK_DIRECTORY . "/update_daemon.lock")) {
+                               _debug("removing lockfile (master)...");
+                               unlink(LOCK_DIRECTORY . "/update_daemon.lock");
+                       }
+               }
+       }
+
+       function task_shutdown() {
+               $pid = posix_getpid();
+
+               if (file_exists(LOCK_DIRECTORY . "/update_daemon-$pid.lock")) {
+                       _debug("removing lockfile ($pid)...");
+                       unlink(LOCK_DIRECTORY . "/update_daemon-$pid.lock");
+               }
+       }
+
        function sigint_handler() {
-               unlink(LOCK_DIRECTORY . "/update_daemon.lock");
-               die("Received SIGINT. Exiting.\n");
+               _debug("[MASTER] SIG_INT received.\n");
+               shutdown(posix_getpid());
+               die;
+       }
+
+       function task_sigint_handler() {
+               _debug("[TASK] SIG_INT received.\n");
+               task_shutdown();
+               die;
        }
 
-       pcntl_signal(SIGALRM, 'sigalrm_handler');
        pcntl_signal(SIGCHLD, 'sigchld_handler');
-       pcntl_signal(SIGINT, 'sigint_handler');
 
-       if (file_is_locked("update_daemon.lock")) {
-               die("error: Can't create lockfile. ".
-                       "Maybe another daemon is already running.\n");
+       $longopts = array("log:",
+                       "tasks:",
+                       "interval:",
+                       "quiet",
+                       "help");
+
+       $options = getopt("", $longopts);
+
+       if (isset($options["help"]) ) {
+               print "Tiny Tiny RSS update daemon.\n\n";
+               print "Options:\n";
+               print "  --log FILE           - log messages to FILE\n";
+               print "  --tasks N            - amount of update tasks to spawn\n";
+               print "                         default: " . MAX_JOBS . "\n";
+               print "  --interval N         - task spawn interval\n";
+               print "                         default: " . SPAWN_INTERVAL . " seconds.\n";
+               print "  --quiet              - don't output messages to stdout\n";
+               return;
+       }
+
+       define('QUIET', isset($options['quiet']));
+
+       if (isset($options["tasks"])) {
+               _debug("Set to spawn " . $options["tasks"] . " children.");
+               $max_jobs = $options["tasks"];
+       } else {
+               $max_jobs = MAX_JOBS;
+       }
+
+       if (isset($options["interval"])) {
+               _debug("Spawn interval: " . $options["interval"] . " seconds.");
+               $spawn_interval = $options["interval"];
+       } else {
+               $spawn_interval = SPAWN_INTERVAL;
+       }
+
+       if (isset($options["log"])) {
+               _debug("Logging to " . $options["log"]);
+               define('LOGFILE', $options["log"]);
        }
 
        if (file_is_locked("update_daemon.lock")) {
                        "Maybe another daemon is already running.\n");
        }
 
-       if (!pcntl_fork()) {
-               $lock_handle = make_lockfile("update_daemon.lock");
-
-               if (!$lock_handle) {
-                       die("error: Can't create lockfile. ".
-                               "Maybe another daemon is already running.\n");
-               }
+       // Try to lock a file in order to avoid concurrent update.
+       $lock_handle = make_lockfile("update_daemon.lock");
 
-               while (true) { sleep(100); }
+       if (!$lock_handle) {
+               die("error: Can't create lockfile. ".
+                       "Maybe another daemon is already running.\n");
        }
 
        // Testing database connection.
        // It is unnecessary to start the fork loop if database is not ok.
-       $link = db_connect(DB_HOST, DB_USER, DB_PASS, DB_NAME); 
+       $link = db_connect(DB_HOST, DB_USER, DB_PASS, DB_NAME);
 
-       if (!$link) {
-               if (DB_TYPE == "mysql") {
-                       print mysql_error();
-               }
-               // PG seems to display its own errors just fine by default.             
-               return;
-       }
+       if (!init_plugins($link)) die("Can't initialize db connection.\n");
+
+       $schema_version = get_schema_version($link);
 
        db_close($link);
 
+       if ($schema_version != SCHEMA_VERSION) {
+               die("Schema version is wrong, please upgrade the database.\n");
+       }
 
        while (true) {
 
-               $next_spawn = $last_checkpoint + SPAWN_INTERVAL - time();
+               // Since sleep is interupted by SIGCHLD, we need another way to
+               // respect the spawn interval
+               $next_spawn = $last_checkpoint + $spawn_interval - time();
 
-               if ($next_spawn % 10 == 0) {
+               if ($next_spawn % 60 == 0) {
                        $running_jobs = count($children);
                        _debug("[MASTER] active jobs: $running_jobs, next spawn at $next_spawn sec.");
                }
 
-               if ($last_checkpoint + SPAWN_INTERVAL < time()) {
+               if ($last_checkpoint + $spawn_interval < time()) {
+
+                       /* Check if schema version changed */
+
+                       $link = db_connect(DB_HOST, DB_USER, DB_PASS, DB_NAME);
+                       if (!init_plugins($link)) die("Can't initialize db connection.\n");
+                       $test_schema_version = get_schema_version($link);
+                       db_close($link);
+
+                       if ($test_schema_version != $schema_version) {
+                               echo "Expected schema version: $schema_version, got: $test_schema_version\n";
+                               echo "Schema version changed while we were running, bailing out\n";
+                               exit(100);
+                       }
 
+                       check_ctimes();
                        reap_children();
 
-                       for ($j = count($children); $j < MAX_JOBS; $j++) {
+                       for ($j = count($children); $j < $max_jobs; $j++) {
                                $pid = pcntl_fork();
                                if ($pid == -1) {
                                        die("fork failed!\n");
                                } else if ($pid) {
+
+                                       if (!$master_handlers_installed) {
+                                               _debug("[MASTER] installing shutdown handlers");
+                                               pcntl_signal(SIGINT, 'sigint_handler');
+                                               register_shutdown_function('shutdown', posix_getpid());
+                                               $master_handlers_installed = true;
+                                       }
+
                                        _debug("[MASTER] spawned client $j [PID:$pid]...");
                                        array_push($children, $pid);
+                                       $ctimes[$pid] = time();
                                } else {
                                        pcntl_signal(SIGCHLD, SIG_IGN);
-                                       pcntl_signal(SIGINT, SIG_DFL);
+                                       pcntl_signal(SIGINT, 'task_sigint_handler');
+
+                                       register_shutdown_function('task_shutdown');
+
+                                       $my_pid = posix_getpid();
+                                       $lock_filename = "update_daemon-$my_pid.lock";
+
+                                       $lock_handle = make_lockfile($lock_filename);
+
+                                       if (!$lock_handle) {
+                                               die("error: Can't create lockfile ($lock_filename). ".
+                                               "Maybe another daemon is already running.\n");
+                                       }
 
                                        // ****** Updating RSS code *******
                                        // Only run in fork process.
 
                                        $start_timestamp = time();
 
-                                       $link = db_connect(DB_HOST, DB_USER, DB_PASS, DB_NAME); 
-
-                                       if (!$link) {
-                                               if (DB_TYPE == "mysql") {
-                                                       print mysql_error();
-                                               }
-                                               // PG seems to display its own errors just fine by default.             
-                                               return;
-                                       }
+                                       $link = db_connect(DB_HOST, DB_USER, DB_PASS, DB_NAME);
 
-                                       if (DB_TYPE == "pgsql") {
-                                               pg_query("set client_encoding = 'utf-8'");
-                                               pg_set_client_encoding("UNICODE");
-                                       } else {
-                                               if (defined('MYSQL_CHARSET') && MYSQL_CHARSET) {
-                                                       db_query($link, "SET NAMES " . MYSQL_CHARSET);
-                                                       // db_query($link, "SET CHARACTER SET " . MYSQL_CHARSET);
-                                               }
-                                       }
+                                       if (!init_plugins($link)) return;
 
                                        // We disable stamp file, since it is of no use in a multiprocess update.
                                        // not really, tho for the time being -fox
                                        if (!make_stampfile('update_daemon.stamp')) {
-                                               print "warning: unable to create stampfile";
-                                       }       
-
-                                       // $last_purge = 0;
-
-                                       // if (time() - $last_purge > PURGE_INTERVAL) {
-
-                                       // FIXME : $last_purge is of no use in a multiprocess update.
-                                       // FIXME : We ALWAYS purge old posts.
-                                       _debug("Purging old posts (random 30 feeds)...");
-                                       global_purge_old_posts($link, true, 30);
-
-                                       //      $last_purge = time();
-                                       // }
-
-                                       // Process all other feeds using last_updated and interval parameters
-
-                                       $random_qpart = sql_random_function();
-                                               
-                                       if (DAEMON_UPDATE_LOGIN_LIMIT > 0) {
-                                               if (DB_TYPE == "pgsql") {
-                                                       $login_thresh_qpart = "AND ttrss_users.last_login >= NOW() - INTERVAL '".DAEMON_UPDATE_LOGIN_LIMIT." days'";
-                                               } else {
-                                                       $login_thresh_qpart = "AND ttrss_users.last_login >= DATE_SUB(NOW(), INTERVAL ".DAEMON_UPDATE_LOGIN_LIMIT." DAY)";
-                                               }                       
-                                       } else {
-                                               $login_thresh_qpart = "";
-                                       }
-
-                                       if (DB_TYPE == "pgsql") {
-                                               $update_limit_qpart = "AND ttrss_feeds.last_updated < NOW() - INTERVAL '".(DAEMON_SLEEP_INTERVAL*2)." seconds'";
-                                       } else {
-                                               $update_limit_qpart = "AND ttrss_feeds.last_updated < DATE_SUB(NOW(), INTERVAL ".(DAEMON_SLEEP_INTERVAL*2)." SECOND)";
-                                       }
-
-                                       if (DB_TYPE == "pgsql") {
-                                                       $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < NOW() - INTERVAL '120 seconds')";
-                                               } else {
-                                                       $updstart_thresh_qpart = "AND (ttrss_feeds.last_update_started IS NULL OR ttrss_feeds.last_update_started < DATE_SUB(NOW(), INTERVAL 120 SECOND))";
-                                               }                       
-
-                                       $result = db_query($link, "SELECT feed_url,ttrss_feeds.id,owner_uid,
-                                                       SUBSTRING(last_updated,1,19) AS last_updated,
-                                                       update_interval 
-                                               FROM 
-                                                       ttrss_feeds,ttrss_users 
-                                               WHERE 
-                                                       ttrss_users.id = owner_uid $login_thresh_qpart $update_limit_qpart 
-                                                       $updstart_thresh_qpart
-                                               ORDER BY $random_qpart DESC LIMIT " . DAEMON_FEED_LIMIT);
-
-                                       $user_prefs_cache = array();
-
-                                       _debug(sprintf("Scheduled %d feeds to update...\n", db_num_rows($result)));
-
-                                       // Here is a little cache magic in order to minimize risk of double feed updates.
-                                       $feeds_to_update = array();
-                                       while ($line = db_fetch_assoc($result)) {
-                                               $feeds_to_update[$line['id']] = $line;
-                                       }
-
-                                       // We update the feed last update started date before anything else.
-                                       // There is no lag due to feed contents downloads
-                                       // It prevent an other process to update the same feed.
-                                       $feed_ids = array_keys($feeds_to_update);
-                                       if($feed_ids) {
-                                               db_query($link, sprintf("UPDATE ttrss_feeds SET last_update_started = NOW()
-                                                       WHERE id IN (%s)", implode(',', $feed_ids)));
+                                               _debug("warning: unable to create stampfile\n");
                                        }
 
-                                       while ($line = array_pop($feeds_to_update)) {
+                                       // Call to the feed batch update function
+                                       // and maybe regenerate feedbrowser cache
 
-                                               $upd_intl = $line["update_interval"];
-                                               $user_id = $line["owner_uid"];
+                                       $nf = 0;
 
-                                               if (!$upd_intl || $upd_intl == 0) {
-                                                       if (!$user_prefs_cache[$user_id]['DEFAULT_UPDATE_INTERVAL']) {                  
-                                                               $upd_intl = get_pref($link, 'DEFAULT_UPDATE_INTERVAL', $user_id);
-                                                               $user_prefs_cache[$user_id]['DEFAULT_UPDATE_INTERVAL'] = $upd_intl;
-                                                       } else {
-                                                               $upd_intl = $user_prefs_cache[$user_id]['DEFAULT_UPDATE_INTERVAL'];
-                                                       }
-                                               }
-
-                                               if ($upd_intl < 0) { 
-                               #                               print "Updates disabled.\n";
-                                                       continue; 
-                                               }
+                                       _debug("Waiting before update [$j]..");
+                                       sleep($j*5);
+                                       $nf = update_daemon_common($link);
 
-                                               _debug("Feed: " . $line["feed_url"] . ", " . $line["last_updated"]);
+                                       if (rand(0,100) > 50) {
+                                               $count = update_feedbrowser_cache($link);
+                                               _debug("Feedbrowser updated, $count feeds processed.");
 
-                               //                      _debug(sprintf("\tLU: %d, INTL: %d, UID: %d) ", 
-                               //                              time() - strtotime($line["last_updated"]), $upd_intl*60, $user_id));
+                                               purge_orphans($link, true);
 
-                                               if (!$line["last_updated"] || 
-                                                       time() - strtotime($line["last_updated"]) > ($upd_intl * 60)) {
+                                               $rc = cleanup_tags($link, 14, 50000);
 
-                                                       _debug("Updating...");
+                                               _debug("Cleaned $rc cached tags.");
 
-                                                       pcntl_alarm(300);
+                                               global $pluginhost;
+                                               $pluginhost->run_hooks($pluginhost::HOOK_UPDATE_TASK, "hook_update_task", $op);
+                                       }
 
-                                                       update_rss_feed($link, $line["feed_url"], $line["id"], true);   
+                                       _debug("Elapsed time: " . (time() - $start_timestamp) . " second(s)");
 
-                                                       pcntl_alarm(0);
+                                       if ($nf > 0) {
+                                               _debug("Feeds processed: $nf");
 
-                                                       sleep(1); // prevent flood (FIXME make this an option?)
-                                               } else {
-                                                       _debug("Update not needed.");
+                                               if (time() - $start_timestamp > 0) {
+                                                       _debug("Feeds/minute: " . sprintf("%.2d", $nf/((time()-$start_timestamp)/60)));
                                                }
                                        }
 
-                                       if (DAEMON_SENDS_DIGESTS) send_headlines_digests($link);
-
-                                       print "Elapsed time: " . (time() - $start_timestamp) . " second(s)\n";
-
                                        db_close($link);
 
                                        // We are in a fork.
                                        // We wait a little before exiting to avoid to be faster than our parent process.
                                        sleep(1);
+
+                                       unlink(LOCK_DIRECTORY . "/$lock_filename");
+
                                        // We exit in order to avoid fork bombing.
                                        exit(0);
                                }