#!/usr/bin/perl
#
# keepalive.pl Author: Rich West
# rwest@primavera.com
#
# This program has been written in a modular way in order to manage, monitor,
# and attempt to keep alive any set of processes. The processes that are to
# be watched are grabbed from configuration files in a seperate folder. Those
# configuration files contain the actual process to watch, how to check the
# status on that process, and the functions necessary to restart the process.
#
# At some point in time, a notification module will be written, but currently
# the system does not provide any notification when a process cannot be
# revived. Other utilities (Big Brother, etc) would have to manage that.
#
# This is a work in progress. All RCS change logs are kept at the end of this
# file.
#
# $Header: /var/www/html/wesmo.com/keepalive/release/keepalive-1.2/RCS/keepalive,v 1.22 2004/08/23 18:27:06 root Exp root $
#
################################################################################
use strict;
package keepalive;
my (@path_elements, $element, $path);
my ($DEBUG) = 0;
################################################################################
#
# Determine where we are running in order to grab our config file properly.
#
################################################################################
@path_elements = split(/\//, $0);
pop(@path_elements);
$path = "/";
foreach $element (@path_elements)
{
$path .= $element . "/";
}
$path = "" if ($path eq "/");
require $path . "config.pl";
################################################################################
#
# Determine our log level
#
################################################################################
if ($ARGV[0] =~ /^-D/)
{
$DEBUG = (split(/D/, $ARGV[0]))[1];
$DEBUG = $ARGV[1] if (! $DEBUG);
$DEBUG = 1 if (!$DEBUG);
}
################################################################################
#
# Set up our signal handling.
#
################################################################################
$SIG{INT} = sub
{
print DEBUG_LOG localtime() . " Exiting...\n" if ($DEBUG);
close (DEBUG_LOG);
close (ERROR_LOG);
exit;
};
$SIG{HUP} = sub
{
print DEBUG_LOG localtime() . " Restarting...\n" if ($DEBUG);
exec ($0, @ARGV) or die "Could not restart: $!\n";
close (DEBUG_LOG);
close (ERROR_LOG);
exit;
};
################################################################################
#
# Become a background (daemon) process.
#
################################################################################
&daemonize($keepalive::HOME_DIR,$keepalive::LOG_DIR);
################################################################################
#
# Start monitoring processes!
#
################################################################################
&main($keepalive::DAEMON_DIR,$keepalive::INTERVAL,$keepalive::LOG_DIR,$keepalive::VERSION);
################################################################################
#
# A standard routine to turn ourselves into a daemon.
#
################################################################################
sub daemonize {
my ($HOME_DIR,$LOG_DIR) = @_;
use POSIX qw(setsid);
chdir $HOME_DIR or die "Can't chdir to $HOME_DIR: $!";
open (STDIN, "/dev/null") or die "Can't read /dev/null: $!";
open (STDOUT, ">> $LOG_DIR/errors") or die "Can't write to $LOG_DIR/errors: $!";
open (STDERR, ">> $LOG_DIR/errors") or die "Can't write to $LOG_DIR/errors: $!";
defined(my $pid = fork) or die "Can't fork: $!";
exit if $pid;
setsid or die "Can't start a new session: $!";
umask 0;
}
################################################################################
#
# Grab a list of the processes we need to watch.
#
################################################################################
sub get_daemon_list
{
my($DAEMON_DIR,$my_hostname) = @_;
my(@daemon_list);
open (DIR, "ls $DAEMON_DIR | ") || die "Could not list the contents of $DAEMON_DIR.\n";
while (
)
{
chop;
print DEBUG_LOG localtime() . " $my_hostname Found $_.\n" if ($DEBUG >= 5);
next if (! /^S/);
print DEBUG_LOG localtime() . " $my_hostname Using $_.\n" if ($DEBUG >= 5);
push @daemon_list,$_;
}
return (@daemon_list);
}
################################################################################
#
# The daemon status check. We grab all of the down daemons here.
#
################################################################################
sub status_check
{
my ($DAEMON_DIR) = shift;
my ($my_hostname) = shift;
my (@daemons) = @_;
my (@failed,$status,$daemon);
foreach $daemon (@daemons)
{
$status = &daemon_status($DAEMON_DIR, $daemon,$my_hostname);
push @failed,$daemon if ($status == 0);
print DEBUG_LOG localtime() . " $my_hostname $daemon is down.\n" if (($status == 0) && ($DEBUG));
print DEBUG_LOG localtime() . " $my_hostname $daemon is up.\n" if (($status == 1) && ($DEBUG >= 5));
}
return (@failed);
}
################################################################################
#
# This simply checks a single daemon's status.
#
################################################################################
sub daemon_status
{
my ($DAEMON_DIR, $daemon, $my_hostname) = @_;
my ($status, $options);
print DEBUG_LOG localtime() . " $my_hostname Checking the status of $daemon.\n" if ($DEBUG >= 5);
$options = "-D" . $DEBUG if ($DEBUG >= 3);
open (DAEMON, "$DAEMON_DIR/$daemon $options status 2>&1 |") || die "Could not execute $DAEMON_DIR/$daemon status.\n";
$status = ;
close (DAEMON);
return ($status);
}
################################################################################
#
# The restart process. We attempt to restart the downed daemons here.
#
################################################################################
sub restart
{
my ($DAEMON_DIR) = shift;
my ($my_hostname) = shift;
my (@daemons) = @_;
my (@failed,$daemon,$status,$output,$notification,$options);
foreach $daemon (@daemons)
{
print DEBUG_LOG localtime() . " $my_hostname Attempting to restart $daemon.\n" if ($DEBUG);
print DEBUG_LOG localtime() . " $my_hostname Executing $DAEMON_DIR/$daemon restart.\n" if ($DEBUG);
$options = "-D" . $DEBUG if ($DEBUG >= 3);
open (DAEMON, "$DAEMON_DIR/$daemon $options restart 2>&1 |") || die "Could not execute $DAEMON_DIR/$daemon restart.\n";
$output = ;
close (DAEMON);
print DEBUG_LOG $output if ($DEBUG >= 5);
if (! (&daemon_status($DAEMON_DIR,$daemon,$my_hostname)))
{
push @failed,$daemon;
$notification = localtime() . " $my_hostname $daemon is still down.\n";
print DEBUG_LOG localtime() . " $my_hostname $daemon is still down.\n" if ($DEBUG);
$notification = localtime() . " $my_hostname $daemon has been detected as down. All attempts to automatically restart $daemon have failed.\n";
open (DAEMON, qq#$DAEMON_DIR/$daemon notify "$notification" 2>&1 |#) || die "Could not execute $DAEMON_DIR/$daemon notify.\n";
$output = ;
close (DAEMON);
print ERROR_LOG localtime() . "\n";
print ERROR_LOG $output if ($DEBUG);
}
else
{
$notification = localtime() . " $my_hostname $daemon has been restarted successfully.\n";
print DEBUG_LOG localtime() . " $my_hostname $daemon has been restarted successfully.\n" if ($DEBUG);
$notification = localtime() . " $my_hostname $daemon has been detected as down. $daemon has been restarted successfully.\n";
open (DAEMON, qq#$DAEMON_DIR/$daemon notify "$notification" |#) || die "Could not execute $DAEMON_DIR/$daemon notify.\n";
$output = ;
close (DAEMON);
print ERROR_LOG localtime() . "\n";
print ERROR_LOG $output if ($DEBUG);
}
}
return (@failed);
}
################################################################################
#
# The main guts.
#
################################################################################
sub main
{
my($DAEMON_DIR,$INTERVAL,$LOG_DIR,$VERSION) = @_;
my(@daemon_list, @down_daemons, @restart_results, $my_hostname);
$my_hostname=`/usr/bin/hostname`;
chop($my_hostname);
@daemon_list = &get_daemon_list($DAEMON_DIR,$my_hostname);
open (DEBUG_LOG, ">> $LOG_DIR/debug") || die "Could not write log file.\n" if ($DEBUG);
open (ERROR_LOG, ">> $LOG_DIR/errors") || die "Could not write log file.\n" if ($DEBUG);
print DEBUG_LOG localtime() . " $my_hostname Starting keepalive v$VERSION log at level $DEBUG.\n" if ($DEBUG);
while ($#daemon_list >= -1)
{
@down_daemons = &status_check($DAEMON_DIR,$my_hostname,@daemon_list);
@restart_results = &restart($DAEMON_DIR,$my_hostname,@down_daemons);
print DEBUG_LOG localtime() . " $my_hostname Sleeping for $INTERVAL seconds.\n" if ($DEBUG >= 5);
sleep $INTERVAL;
}
close (DEBUG_LOG);
}
################################################################################
# $Log: keepalive,v $
# Revision 1.22 2004/08/23 18:27:06 root
# Update for perl location.
#
# Revision 1.21 2004/08/23 18:25:56 root
# Updates throughout. Mainly to handle additional logging.
#
# Revision 1.20 2003/08/01 18:08:40 rwest
# Automatically determine just where our config file is.
#
# Revision 1.19 2003/08/01 17:30:30 rwest
# Typo fix.
#
# Revision 1.18 2003/08/01 17:28:04 rwest
# Inserted a routine to properly turn this in to a daemon.
#
# Revision 1.17 2003/08/01 16:33:53 rwest
# More code cleanup.
#
# Revision 1.16 2003/08/01 16:26:53 rwest
# Added version information.
#
#
# Revision 1.15 2003/08/01 16:22:07 rwest
# typo fixes.
#
# Revision 1.14 2003/08/01 15:55:56 rwest
# Fixed notification variable.
#
# Revision 1.13 2003/07/31 18:00:56 rwest
# Added notification ability.
#
# Revision 1.12 2003/07/31 17:40:54 rwest
# More interrupt handling and better logging.
#
# Revision 1.11 2003/07/31 17:35:04 rwest
# Added in signal handling.
#
# Revision 1.10 2003/07/31 17:29:05 rwest
# More major updating. Moved all config variables into the config.pl file.
#
# Revision 1.9 2003/07/31 16:19:08 rwest
# Enabled two different levels of debugging. Anything >= 5 will be very
# verbose. Anything less than that will only log down daemons and restart
# attempts.
# Also changed the flag from -v to -D.
# More major updates. The scripts themselves and this daemon are more solid
# now.
#
# Revision 1.8 2003/07/29 14:36:05 rwest
# added a log file for the debug output.
#
# Revision 1.7 2003/07/29 14:21:48 rwest
# Minor update
#
# Revision 1.6 2003/07/09 20:31:34 rwest
# Enabled the toggling of the DEBUG flag via the command line argument of '-v'
#
# Revision 1.5 2003/07/09 20:26:06 rwest
# Broke apart the individual status check from the check of all of the daemons
# to allow for it to be integrated into the restart process.
#
# Revision 1.4 2003/07/09 18:21:27 rwest
# It works now. :)
#
# Revision 1.3 2003/07/09 18:07:27 rwest
# Cleanup
#
# Revision 1.2 2003/07/09 18:05:28 rwest
# Ok.. working now.
#
# Revision 1.1 2003/07/09 17:56:57 rwest
# Initial revision