#!/usr/bin/perl # # keepalive.pl Author: Rich West # rwest@primavera.com # # This program has been written in a modular way in order to manage, monitor, # and attempt to keep alive any set of processes. The processes that are to # be watched are grabbed from configuration files in a seperate folder. Those # configuration files contain the actual process to watch, how to check the # status on that process, and the functions necessary to restart the process. # # At some point in time, a notification module will be written, but currently # the system does not provide any notification when a process cannot be # revived. Other utilities (Big Brother, etc) would have to manage that. # # This is a work in progress. All RCS change logs are kept at the end of this # file. # # $Header: /var/www/html/wesmo.com/keepalive/release/keepalive-1.2/RCS/keepalive,v 1.22 2004/08/23 18:27:06 root Exp root $ # ################################################################################ use strict; package keepalive; my (@path_elements, $element, $path); my ($DEBUG) = 0; ################################################################################ # # Determine where we are running in order to grab our config file properly. # ################################################################################ @path_elements = split(/\//, $0); pop(@path_elements); $path = "/"; foreach $element (@path_elements) { $path .= $element . "/"; } $path = "" if ($path eq "/"); require $path . "config.pl"; ################################################################################ # # Determine our log level # ################################################################################ if ($ARGV[0] =~ /^-D/) { $DEBUG = (split(/D/, $ARGV[0]))[1]; $DEBUG = $ARGV[1] if (! $DEBUG); $DEBUG = 1 if (!$DEBUG); } ################################################################################ # # Set up our signal handling. # ################################################################################ $SIG{INT} = sub { print DEBUG_LOG localtime() . " Exiting...\n" if ($DEBUG); close (DEBUG_LOG); close (ERROR_LOG); exit; }; $SIG{HUP} = sub { print DEBUG_LOG localtime() . " Restarting...\n" if ($DEBUG); exec ($0, @ARGV) or die "Could not restart: $!\n"; close (DEBUG_LOG); close (ERROR_LOG); exit; }; ################################################################################ # # Become a background (daemon) process. # ################################################################################ &daemonize($keepalive::HOME_DIR,$keepalive::LOG_DIR); ################################################################################ # # Start monitoring processes! # ################################################################################ &main($keepalive::DAEMON_DIR,$keepalive::INTERVAL,$keepalive::LOG_DIR,$keepalive::VERSION); ################################################################################ # # A standard routine to turn ourselves into a daemon. # ################################################################################ sub daemonize { my ($HOME_DIR,$LOG_DIR) = @_; use POSIX qw(setsid); chdir $HOME_DIR or die "Can't chdir to $HOME_DIR: $!"; open (STDIN, "/dev/null") or die "Can't read /dev/null: $!"; open (STDOUT, ">> $LOG_DIR/errors") or die "Can't write to $LOG_DIR/errors: $!"; open (STDERR, ">> $LOG_DIR/errors") or die "Can't write to $LOG_DIR/errors: $!"; defined(my $pid = fork) or die "Can't fork: $!"; exit if $pid; setsid or die "Can't start a new session: $!"; umask 0; } ################################################################################ # # Grab a list of the processes we need to watch. # ################################################################################ sub get_daemon_list { my($DAEMON_DIR,$my_hostname) = @_; my(@daemon_list); open (DIR, "ls $DAEMON_DIR | ") || die "Could not list the contents of $DAEMON_DIR.\n"; while () { chop; print DEBUG_LOG localtime() . " $my_hostname Found $_.\n" if ($DEBUG >= 5); next if (! /^S/); print DEBUG_LOG localtime() . " $my_hostname Using $_.\n" if ($DEBUG >= 5); push @daemon_list,$_; } return (@daemon_list); } ################################################################################ # # The daemon status check. We grab all of the down daemons here. # ################################################################################ sub status_check { my ($DAEMON_DIR) = shift; my ($my_hostname) = shift; my (@daemons) = @_; my (@failed,$status,$daemon); foreach $daemon (@daemons) { $status = &daemon_status($DAEMON_DIR, $daemon,$my_hostname); push @failed,$daemon if ($status == 0); print DEBUG_LOG localtime() . " $my_hostname $daemon is down.\n" if (($status == 0) && ($DEBUG)); print DEBUG_LOG localtime() . " $my_hostname $daemon is up.\n" if (($status == 1) && ($DEBUG >= 5)); } return (@failed); } ################################################################################ # # This simply checks a single daemon's status. # ################################################################################ sub daemon_status { my ($DAEMON_DIR, $daemon, $my_hostname) = @_; my ($status, $options); print DEBUG_LOG localtime() . " $my_hostname Checking the status of $daemon.\n" if ($DEBUG >= 5); $options = "-D" . $DEBUG if ($DEBUG >= 3); open (DAEMON, "$DAEMON_DIR/$daemon $options status 2>&1 |") || die "Could not execute $DAEMON_DIR/$daemon status.\n"; $status = ; close (DAEMON); return ($status); } ################################################################################ # # The restart process. We attempt to restart the downed daemons here. # ################################################################################ sub restart { my ($DAEMON_DIR) = shift; my ($my_hostname) = shift; my (@daemons) = @_; my (@failed,$daemon,$status,$output,$notification,$options); foreach $daemon (@daemons) { print DEBUG_LOG localtime() . " $my_hostname Attempting to restart $daemon.\n" if ($DEBUG); print DEBUG_LOG localtime() . " $my_hostname Executing $DAEMON_DIR/$daemon restart.\n" if ($DEBUG); $options = "-D" . $DEBUG if ($DEBUG >= 3); open (DAEMON, "$DAEMON_DIR/$daemon $options restart 2>&1 |") || die "Could not execute $DAEMON_DIR/$daemon restart.\n"; $output = ; close (DAEMON); print DEBUG_LOG $output if ($DEBUG >= 5); if (! (&daemon_status($DAEMON_DIR,$daemon,$my_hostname))) { push @failed,$daemon; $notification = localtime() . " $my_hostname $daemon is still down.\n"; print DEBUG_LOG localtime() . " $my_hostname $daemon is still down.\n" if ($DEBUG); $notification = localtime() . " $my_hostname $daemon has been detected as down. All attempts to automatically restart $daemon have failed.\n"; open (DAEMON, qq#$DAEMON_DIR/$daemon notify "$notification" 2>&1 |#) || die "Could not execute $DAEMON_DIR/$daemon notify.\n"; $output = ; close (DAEMON); print ERROR_LOG localtime() . "\n"; print ERROR_LOG $output if ($DEBUG); } else { $notification = localtime() . " $my_hostname $daemon has been restarted successfully.\n"; print DEBUG_LOG localtime() . " $my_hostname $daemon has been restarted successfully.\n" if ($DEBUG); $notification = localtime() . " $my_hostname $daemon has been detected as down. $daemon has been restarted successfully.\n"; open (DAEMON, qq#$DAEMON_DIR/$daemon notify "$notification" |#) || die "Could not execute $DAEMON_DIR/$daemon notify.\n"; $output = ; close (DAEMON); print ERROR_LOG localtime() . "\n"; print ERROR_LOG $output if ($DEBUG); } } return (@failed); } ################################################################################ # # The main guts. # ################################################################################ sub main { my($DAEMON_DIR,$INTERVAL,$LOG_DIR,$VERSION) = @_; my(@daemon_list, @down_daemons, @restart_results, $my_hostname); $my_hostname=`/usr/bin/hostname`; chop($my_hostname); @daemon_list = &get_daemon_list($DAEMON_DIR,$my_hostname); open (DEBUG_LOG, ">> $LOG_DIR/debug") || die "Could not write log file.\n" if ($DEBUG); open (ERROR_LOG, ">> $LOG_DIR/errors") || die "Could not write log file.\n" if ($DEBUG); print DEBUG_LOG localtime() . " $my_hostname Starting keepalive v$VERSION log at level $DEBUG.\n" if ($DEBUG); while ($#daemon_list >= -1) { @down_daemons = &status_check($DAEMON_DIR,$my_hostname,@daemon_list); @restart_results = &restart($DAEMON_DIR,$my_hostname,@down_daemons); print DEBUG_LOG localtime() . " $my_hostname Sleeping for $INTERVAL seconds.\n" if ($DEBUG >= 5); sleep $INTERVAL; } close (DEBUG_LOG); } ################################################################################ # $Log: keepalive,v $ # Revision 1.22 2004/08/23 18:27:06 root # Update for perl location. # # Revision 1.21 2004/08/23 18:25:56 root # Updates throughout. Mainly to handle additional logging. # # Revision 1.20 2003/08/01 18:08:40 rwest # Automatically determine just where our config file is. # # Revision 1.19 2003/08/01 17:30:30 rwest # Typo fix. # # Revision 1.18 2003/08/01 17:28:04 rwest # Inserted a routine to properly turn this in to a daemon. # # Revision 1.17 2003/08/01 16:33:53 rwest # More code cleanup. # # Revision 1.16 2003/08/01 16:26:53 rwest # Added version information. #  # # Revision 1.15 2003/08/01 16:22:07 rwest # typo fixes. # # Revision 1.14 2003/08/01 15:55:56 rwest # Fixed notification variable. # # Revision 1.13 2003/07/31 18:00:56 rwest # Added notification ability. # # Revision 1.12 2003/07/31 17:40:54 rwest # More interrupt handling and better logging. # # Revision 1.11 2003/07/31 17:35:04 rwest # Added in signal handling. # # Revision 1.10 2003/07/31 17:29:05 rwest # More major updating. Moved all config variables into the config.pl file. # # Revision 1.9 2003/07/31 16:19:08 rwest # Enabled two different levels of debugging. Anything >= 5 will be very # verbose. Anything less than that will only log down daemons and restart # attempts. # Also changed the flag from -v to -D. # More major updates. The scripts themselves and this daemon are more solid # now. # # Revision 1.8 2003/07/29 14:36:05 rwest # added a log file for the debug output. # # Revision 1.7 2003/07/29 14:21:48 rwest # Minor update # # Revision 1.6 2003/07/09 20:31:34 rwest # Enabled the toggling of the DEBUG flag via the command line argument of '-v' # # Revision 1.5 2003/07/09 20:26:06 rwest # Broke apart the individual status check from the check of all of the daemons # to allow for it to be integrated into the restart process. # # Revision 1.4 2003/07/09 18:21:27 rwest # It works now. :) # # Revision 1.3 2003/07/09 18:07:27 rwest # Cleanup # # Revision 1.2 2003/07/09 18:05:28 rwest # Ok.. working now. # # Revision 1.1 2003/07/09 17:56:57 rwest # Initial revision