Next Previous Contents

3. File procautostart.cpp

// From your browser save this file as text-file named as 'procautostart.cpp'.


// Author: Al Dev alavoor@yahoo.com
//
// Program to monitor the unix processes
// and automatically re-start them if they die
//
//************************************************************************
//  NOTE: This program uses the Al Dev's String class library. Download string 
//        class from  http://www.linuxdoc.org/HOWTO/C++Programming-HOWTO.html
//************************************************************************


#include <stdio.h>
#include <strings.h> // C strings
#include <unistd.h> // for getopt
#include <alloc.h> // for free

#include <errno.h> // for kill() - error numbers command
extern int errno;

#ifdef Linux
#include <asm/errno.h> // for kill() - error numbers command
#endif

#include <sys/types.h> // for kill() command
#include <signal.h> // for kill() command
#include <sys/wait.h> // for wait()
#include <stdlib.h> // for setenv()
#include <time.h> // for strftime()
#include <libgen.h> // for basename()

//#include <syslog.h> // for logging 

#include "debug.h"
#include "String.h"
#include "StringTokenizer.h"

#define BUFF_HUN        100
#define BUFF_THOU       1024
#define PR_INIT_VAL     -10
#define WAIT_FOR_SYS    5  // wait for process to start up
#define DEF_SL_SECS     6  // default sleep time
#define SAFE_MEM        10  // to avoid any possible memory leaks

#define LOG_NO          false  // do not output to logfile
#define LOG_YES         true  // do output to logfile
#define STD_ERR_NO      false  // do not print to std err
#define STD_ERR_YES     true  // do print to std err
#define DATE_NO         false  // do not print date
#define DATE_YES        true  // do print date

int start_process(char *commandline, char *args[], char **envp, pid_t proc_pid);
int fork2(pid_t parent_pid, unsigned long tsecs);
inline void error_msg(char *mesg_out, char *lg_file, bool pr_lg, bool std_err, bool pr_dt);

//////////////////////////////////////////////
// To test this program use --
// procautostart -n 5 -c 'monitor_test dummy1 -a dummy2 -b dummy3  ' &
//////////////////////////////////////////////

void usage(char **argv)
{

    printf("%s:\n", argv[0]);
    printf("\ninterval specification:\n");
    printf(" -n # -- seconds\n");
    printf(" -m # -- microseconds\n");
    printf(" -o # -- nanoseconds\n");
    printf("\nprocess specification:\n");
    printf(" -c 'cmdline'\n");
    printf
        (" -p pidfile -- if specified reads process pid from this file\n");
    printf("\n");

        printf("\nUsage : %s -n <seconds> -m <microsecond> -o <nanosecond> -c '<command>'\n", argv[0]);
        printf("\nExample: procautostart -n 5 -c 'monitor_test dummy1 -a dummy2 -b dummy3  ' \n");

    exit(-1);
}

int main(int argc, char **argv, char **envp)
{
        unsigned long   sleep_sec, sleep_micro, sleep_nano;
        int     ch;
        pid_t   proc_pid;
        int pr_no = PR_INIT_VAL;
        char mon_log[40];
        char *pr_name = NULL, **cmdargs = NULL;
        String cmdline;
    char *pidfile = NULL;

        // you can turn on debug by editing Makefile and put -DDEBUG_PRT in gcc
        debug_("test debug", "this line"); 
        debug_("argc", argc); 

        // Use getpid() - man 2 getpid()
        proc_pid = getpid(); // get the Process ID of procautostart
        debug_("PID proc_pid", (int) proc_pid);

        // Create directory to hold log, temp files
        system("mkdir mon 1>/dev/null 2>/dev/null");

        sleep_sec = DEF_SL_SECS ; // default sleep time
        sleep_micro = 0; // default micro-sleep time
        sleep_nano = 0; // default nano-sleep time
        optarg = NULL;
        while ((ch = getopt(argc, argv, "n:m:o:h:c:")) != -1) // needs trailing colon :
        {
                switch (ch)
                {
                        case 'n':
                                debug_("scanned option n ", optarg);
                                sleep_sec = atoi(optarg);       
                                debug_("sleep_sec", sleep_sec);
                                break;
                        case 'm':
                                debug_("scanned option m ", optarg);
                                sleep_micro = atoi(optarg);     
                                debug_("sleep_micro", sleep_micro);
                                break;
                        case 'o':
                                debug_("scanned option o ", optarg);
                                sleep_nano = atoi(optarg);      
                                debug_("sleep_nano", sleep_nano);
                                break;
                        case 'c':
                                debug_("scanned option c ", optarg);
                                cmdline = optarg;
                                //cmdline = strdup(optarg); // does auto-malloc here
                                debug_("cmdline", cmdline.val());
                                break;
                        case 'h':
                                debug_("scanned option h ", optarg);
                                usage(argv);
                                break;
            case 'p':
                pidfile = strdup(optarg);
                break;

                        default:
                                debug_("ch", "default");
                                usage(argv);
                                break;
                }
        }

        if (cmdline.length() == 0) //if (cmdline == NULL)
                usage(argv);

        // detach from the main process
    if (fork())  // 0 returned in child process
                exit(0); // exit parent process - non-zero child PID got here... 

    //openlog(argv[0], LOG_PID, LOG_DAEMON);

        // trim the trailing blanks -- otherwise problem in grep command
        //cmdline.trim(true);
        //cmdline.chopall('&'); // trim trailing ampersand
        debug_("cmdline", cmdline.val());

        // Start the process
        {
                // Find the command line args
                StringTokenizer strtokens(cmdline.val());  // string tokenizer is borrowed from Java
                cmdargs = (char **) calloc(strtokens.countTokens() + SAFE_MEM, sizeof(char *));
                debug_("countTokens()", strtokens.countTokens());
                for (int tmpii = 0; strtokens.hasMoreTokens(); tmpii++)
                {
                        cmdargs[tmpii] = strdup(strtokens.nextToken().val());
                        debug_("tmpii", tmpii);
                        debug_("cmdargs[tmpii]", (char *) cmdargs[tmpii]);
                }

                // In case execve you MUST NOT have trailing ampersand & in the command line!!
                //pr_no = start_process(cmdline, NULL, NULL, proc_pid);  // Using execlp ...
                pr_no = start_process(cmdargs[0], & cmdargs[0], envp, proc_pid); // Using execve ....
                //cmdpid = start_command(cmdargs, envp, pidfile);
                
                // You can also use syslog if you do not like above logging
                //syslog(LOG_NOTICE, "Started process: %s", cmdline.val());

                debug_("The child pid", pr_no);
                if (pr_no < 0)
                {
                        fprintf(stderr, "\nFatal Error: Failed to start the process\n");
                        exit(-1);
                }
                sleep(WAIT_FOR_SYS); // wait for the process to come up

                // Get process name - only the first word from cmdline
                pr_name = strdup(basename(cmdargs[0])); // process name,  does auto-malloc here
        }

        // generate log file names
        {
                char    aa[21];

                strncpy(aa, pr_name, 20); aa[20] = '\0';
                // Define mon file-names - make it unique with combination of
                // process name and process id
                sprintf(mon_log, "mon/%s%d.log", aa, (int) proc_pid);
        }

        // Print out pid to log file
        if (pr_no > 0)
        {
                char aa[200];
                sprintf(aa, "Process ID of %s is %d", pr_name, pr_no);
                error_msg(aa, mon_log, LOG_YES, STD_ERR_NO, DATE_YES);
        }

        // monitors the process - restarts if process dies...
        char print_log[200];
        while (1)  // infinite loop - monitor every 6 seconds
        {
                //debug_("Monitoring the process now...", ".");
        switch (kill(pr_no, 0))
        {
            case 0:
                break;

            default:
            case ESRCH:    // process died !!
                                // ERSRCH means - No process can be found corresponding to pr_no
                                // hence process had died !!
                                sprintf(print_log, "Error ESRCH: No process or process group can be found for %d", pr_no);
                                error_msg(print_log, mon_log, LOG_YES, STD_ERR_YES, DATE_YES);
                                // You can also use syslog if you do not like above logging
                //syslog(LOG_NOTICE, "PROCESS DIED: %s", cmdline.val());
                                //pr_no = start_process(cmdline, NULL, NULL, proc_pid);  // Using execlp ....
                                pr_no = start_process(cmdargs[0], & cmdargs[0], envp, proc_pid); // Using execve ....
                                // You can also use syslog if you do not like below logging
                //syslog(LOG_NOTICE, "Started process: %s", cmdline.val());
                                sprintf(print_log, "Starting process %s", pr_name);
                                error_msg(print_log, mon_log, LOG_YES, STD_ERR_NO, DATE_NO);
                                sleep(WAIT_FOR_SYS); // wait for the process to come up
                break;
        }
                sprintf(print_log, "Process ID of %s is %d", pr_name, pr_no);
                error_msg(print_log, mon_log, LOG_YES, STD_ERR_NO, DATE_NO);
                //debug_("Sleeping now ......", ".");
                sleep(sleep_sec);

                // Uncomment these to use micro-seconds
                // For real-time process control use micro-seconds or nana-seconds sleep functions
                // See 'man3 usleep', 'man 2 nanasleep'
                // If you do not have usleep() or nanosleep() on your system, use select() or poll()
                // specifying no file descriptors to test.
                //usleep(sleep_micro);

                // To sleep nano-seconds ...  Uncomment these to use nano-seconds
                //struct timespec *req = new struct timespec;
                //req->tv_sec = 0; // seconds
                //req->tv_nsec = sleep_nano; // nanoseconds
                //nanosleep( (const struct timespec *)req, NULL);

                /* You can use select instead of sleep for portability
        struct timeval interval;
                interval.tv_sec += tmp;
                interval.tv_usec += (long int) 1e3 *tmp;
        select(1, NULL, NULL, NULL, & interval);
                */
        }
        //closelog(); if using syslog
}

inline void error_msg(char *mesg_out, char *lg_file, bool pr_lg, bool std_err, bool pr_dt)
{
        if (pr_lg) // (pr_lg == true) output to log file
        {
                char tmp_msg[BUFF_THOU];
                if (pr_dt == true) // print date and message to log file 'lg_file'
                {
                        sprintf(tmp_msg, "date >> %s; echo '\n%s\n' >> %s\n ", 
                                lg_file, mesg_out, lg_file);
                        system(tmp_msg);
                }
                else
                {
                        sprintf(tmp_msg, "echo '\n%s\n' >> %s\n ", 
                                mesg_out, lg_file);
                        system(tmp_msg);
                }
        }

        if (std_err) // (std_err == true) output to standard error
                fprintf(stderr, "\n%s\n", mesg_out);
        
        //debug_("mesg_out", mesg_out);
}

// start a process and returns PID or -ve value if error
// The main() function has envp arg as in - main(int argc, char *argv[], char **envp)
int start_process(char *commandline, char *args[], char **envp, pid_t parent_pid)
{
        int ff;
        unsigned long  tsecs;

        tsecs = time(NULL); // time in secs since Epoch 1 Jan 1970
        debug_("Time tsecs", tsecs); 

        // Use fork2() instead of fork to avoid zombie child processes
        switch (ff = fork2(parent_pid, tsecs))  // fork creates 2 process each executing the following lines
        {
        case -1:
                fprintf(stderr, "\nFatal Error: start_process() - Unable to fork process\n");
                _exit(errno);
                break;
        case 0:  // child process
                debug_("\nStarting the start child process\n", " ");
                // For child process to ignore the interrupts (i.e. to put
                // child process in "background" mode.
                // Signals are sent to all processes started from a 
                // particular terminal. Accordingly, when a program is to be run non-interactively
                // (started by &), the shell arranges that the program will ignore interrupts, so
                // it won't be stopped by interrupts intended for foreground processes.
                // Hence if previous value of signal is not IGN than set it to IGN.

                // Note: Signal handlers cannot be set for SIGKILL, SIGSTOP 
                if (signal(SIGINT, SIG_IGN) == SIG_ERR)
                        fprintf(stderr, "\nSignal Error: Not able to set signal to SIGINT\n");
                else
                if (signal(SIGINT, SIG_IGN) != SIG_IGN) // program already run in background
                        signal(SIGINT, SIG_IGN);  // ignore interrupts

                if (signal(SIGHUP, SIG_IGN) == SIG_ERR)
                        fprintf(stderr, "\nSignal Error: Not able to set signal to SIGHUP\n");
                else
                if (signal(SIGHUP, SIG_IGN) != SIG_IGN) // program already run in background
                        signal(SIGHUP, SIG_IGN);  // ignore hangups

                if (signal(SIGQUIT, SIG_IGN) == SIG_ERR)
                        fprintf(stderr, "\nSignal Error: Not able to set signal to SIGQUIT\n");
                else
                if (signal(SIGQUIT, SIG_IGN) != SIG_IGN) // program already run in background
                        signal(SIGQUIT, SIG_IGN);  // ignore Quit

                if (signal(SIGABRT, SIG_IGN) == SIG_ERR)
                        fprintf(stderr, "\nSignal Error: Not able to set signal to SIGABRT\n");
                else
                if (signal(SIGABRT, SIG_IGN) != SIG_IGN) // program already run in background
                        signal(SIGABRT, SIG_IGN);  // ignore ABRT

                if (signal(SIGTERM, SIG_IGN) == SIG_ERR)
                        fprintf(stderr, "\nSignal Error: Not able to set signal to SIGTERM\n");
                else
                if (signal(SIGTERM, SIG_IGN) != SIG_IGN) // program already run in background
                        signal(SIGTERM, SIG_IGN);  // ignore TERM

                // sigtstp - Stop typed at tty. Ignore this so that parent process 
                // be put in background with CTRL+Z or with SIGSTOP
                if (signal(SIGTSTP, SIG_IGN) == SIG_ERR)
                        fprintf(stderr, "\nSignal Error: Not able to set signal to SIGTSTP\n");
                else
                if (signal(SIGTSTP, SIG_IGN) != SIG_IGN) // program already run in background
                        signal(SIGTSTP, SIG_IGN);  // ignore TSTP

                // You can use debug_ generously because they do NOT increase program size!
                debug_("before execve commandline", commandline);
                debug_("before execve args[0]", args[0]);
                debug_("before execve args[1]", args[1]);
                debug_("before execve args[2]", args[2]);
                debug_("before execve args[3]", args[3]);
                debug_("before execve args[4]", args[4]);
                debug_("before execve args[5]", args[5]);
                debug_("before execve args[6]", args[6]);
                debug_("before execve args[7]", args[7]);
                debug_("before execve args[8]", args[8]);
                debug_("before execve args[9]", args[9]);
                execve(commandline, args, envp);

                // execlp, execvp does not provide expansion of metacharacters
                // like <, >, *, quotes, etc., in argument list. Invoke
                // the shell /bin/sh which then does all the work. Construct
                // a string 'commandline' that contains the complete command
                //execlp("/bin/sh", "sh", "-c", commandline, (char *) 0);  // if success than NEVER returns !!

                // If execlp returns than there is some serious error !! And
                // executes the following lines below...
                fprintf(stderr, "\nFatal Error: Unable to start child process\n");
                ff = -2;
                exit(127);
                break;
        default: // parent process
                // child pid is ff;
                if (ff < 0)
                        fprintf(stderr, "\nFatal Error: Problem while starting child process\n");

                {
                        char    buff[BUFF_HUN];
                        FILE    *fp1;
                        sprintf(buff, "mon/%d%lu.out", (int) parent_pid, tsecs); // tsecs is unsigned long
                        fp1 = fopen(buff, "r");
                        if (fp1 != NULL)
                        {
                                buff[0] = '\0';
                                fgets(buff, BUFF_HUN, fp1);
                                ff = atoi(buff);
                        }
                        fclose(fp1);
                        debug_("start process(): ff - ", ff);
#ifndef DEBUG_PRT
                        sprintf(buff, "rm -f mon/%d%lu.out", (int) parent_pid, tsecs);
                        system(buff);
#endif // DEBUG_PRT
                }

                // define wait() to put child process in foreground or else put in background
                //waitpid(ff, & status, WNOHANG || WUNTRACED);
                //waitpid(ff, & status, WUNTRACED);
                //wait(& status); 

                break;
        }
        return ff;
}

/* fork2() -- like fork, but the new process is immediately orphaned
 *            (won't leave a zombie when it exits)
 * Returns 1 to the parent, not any meaningful pid.
 * The parent cannot wait() for the new process (it's unrelated).
 */
/* This version assumes that you *haven't* caught or ignored SIGCHLD. */
/* If you have, then you should just be using fork() instead anyway.  */

int fork2(pid_t parent_pid, unsigned long tsecs)
{
    pid_t mainpid, child_pid = -10;
    int status;
        char    buff[BUFF_HUN];

    if (!(mainpid = fork()))
    {
        switch (child_pid = fork())
        {
          case 0:
                        //child_pid = getpid();
                        //debug_("At case 0 fork2 child_pid : ", child_pid);
                        return 0;
          case -1:
                        _exit(errno);    /* assumes all errnos are <256 */
          default: 
                        debug_("fork2 child_pid : ", (int) child_pid);
                        sprintf(buff, "echo %d > mon/%d%lu.out", (int) child_pid, (int) parent_pid, tsecs);
                        system(buff);
                        _exit(0);
        }
    }

        //debug_("fork2 pid : ", pid);
    if (mainpid < 0 || waitpid(mainpid, & status, 0) < 0)
      return -1;

    if (WIFEXITED(status))
      if (WEXITSTATUS(status) == 0)
        return 1;
      else
        errno = WEXITSTATUS(status);
    else
      errno = EINTR;  /* well, sort of :-) */

    return -1;
}

//
// char respawn[1024];
// strcpy(respawn, cmdline);
// For "C" program use kill(pid_t process, int signal) function. 
// #include <signal.h> // See 'man 2 kill'  
// Returns 0 on success and -1 with errno set.
//              kill -0 $pid 2>/dev/null || respawn
// To get the exit return status do --
//              kill -0 $pid 2>/dev/null | echo $?
// Return value 0 is success and others mean failure
// Sending 0 does not do anything to target process, but it tests
// whether the process exists. The kill command will set its exit
// status based on this process.
//
// Alternatively, you can use
//              ps -p $pid >/dev/null 2>&1 || respawn
// To get the exit return status do --
//              ps -p $pid >/dev/null 2>&1 | echo $?
// Return value 0 is success and others mean failure


//***********************************************************************
//      You can use pidfile to get the process id
//**********************************************************************
/* 
void poll_pidfile(char *pidfile)
{
    struct stat buf;
    struct timeval interval =       
    {
        tv_sec:0, tv_usec:(long int) 1e4
    }; // 10 miliseconds

    while (stat(pidfile, & buf) ? errno == ENOENT : buf.st_size == 0)
    {
        struct timeval i = interval;
        select(1, NULL, NULL, NULL, & i);
    }
}

int start_command(char **args, char **envp, char *pidfile)
{
    pid_t cmdpid;

    if (pidfile != NULL)
    {
        switch (unlink(pidfile))
        {
            case ENOENT:
            case 0:
                break;

            default:
                return -1;
        }
    }

    switch (cmdpid = fork2())
    {
        case 0: // child
            execve(args[0], args, envp);
            exit(-1);

        case -1: // error
            return -1;

        default: // parent
            break;
    }

    if (pidfile != NULL)
    {
        FILE *pf;

        poll_pidfile(pidfile);

        if ((pf = fopen(pidfile, "r")) == NULL)
        {
            syslog(LOG_ERR, "failed to read pidfile, using fork2 pid");
        }
        else
        {
            char textpid[1024];
            pid_t pid;

            fgets(textpid, sizeof(textpid), pf);
            if ((pid = atoi(textpid)) != -1)
            {
                cmdpid = pid;
            } else
            syslog(LOG_ERR,
                    "failed to find pid in pidfile, using fork2 pid");
            fclose(pf);
        }
    }

    return cmdpid;
}
*/


Next Previous Contents