/* Source: check_load.c Author: Adam G. Bowen (agbowen@bealenet.com) $Revision: 1.7 $ $Date: 1999/05/17 14:35:26 $ * Program: System load plugin for NetSaint * License: GPL * Copyright (c) 1999 Adam G. Bowen (agbowen@bealenet.com) * * Description: * * This plugin will check the system load on the remote host at ipaddress and * generate an alert if average is above one of the threshold values. * * Other errors result in a STATE_UNKNOWN return. * * Command line: * * check_load ipaddress sys_type * check_load ipaddress sys_type <w1> <c1> <w5> <c5> <w15> <c15> * * Required input: * * ipaddress = The ipaddress of the remote system to run the check on. * sys_type = The remote system type. * * Optional input: * * <w1> = 1 min load average necessary to result in a WARNING state. * <c1> = 1 min load average necessary to result in a CRITICAL state. * <w5> = 5 min load average necessary to result in a WARNING state. * <c5> = 5 min load average necessary to result in a CRITICAL state. * <w15> = 15 min load average necessary to result in a WARNING state. * <c15> = 15 min load average necessary to result in a CRITICAL state. * * Notes: * * If <w1>, <c1>, <w5>, <c5>, <w15> and <c15> are not passed on the command * line, they will be set to the default values in the check_load config file. * * sys_type is used to determine which config file to use to generate the * remote command. * * The configuration file /usr/local/netsaint/config/check_load/local * contains the following values: * * RSH_COMMAND|<location of rsh command on netsaint system>| * WARN_LOAD_1|<default 1 min warn average>| * CRIT_LOAD_1|<default 1 min crit average>| * WARN_LOAD_5|<default 5 min warn average>| * CRIT_LOAD_5|<default 5 min crit average>| * WARN_LOAD_15|<default 15 min warn average>| * CRIT_LOAD_15|<default 15 min crit average>| * * The configuration file /usr/local/netsaint/config/check_load/<sys_type> * contains the following values: * * UPTIME_COMMAND|<location of uptime command on system sys_type>| * * $Log: check_load.c,v $ * Revision 1.7 1999/05/17 14:35:26 netsaint * Changed the plugin to use a seperate config directory. This directory is the * CONFIG_DIR/command_name directory. * * Revision 1.6 1999/05/14 03:01:17 netsaint * Added the following integer variable: * socket_name * Changed the call check_net to open_socket. Added a call to recv_socket and * close_socket. The check_net subroutine was changed to provide more * flexibility. * Added a call to the subroutine get_command_name. * Changed the error checking routines to ensure that any error cause the * program to terminate. * * Revision 1.5 1999/05/07 15:30:26 netsaint * Removed the char variable error_buffer and the FILE *error_fp variable. * These variables are no longer needed since the printing of the error file is * handled in a subroutine. * Added a call to the check_output_file subroutine. This routine checks the * status of the output file. Also removed the struct stat file_stat variable. * Added a call to the check_consistency subroutine. This subroutine checks * that the warn value is less than the critical value. * * Revision 1.4 1999/05/03 14:48:17 netsaint * Changed the config_file_prefix to config_file_fs_prefix. Added the following * character variables: * config_file_net_prefix * config_file_net * expected * protocol * Added the following integer variables: * result * telnet_port * Added signal handler for the SIGALRM signal. All these changes were to * prevent the plugin from hanging when attempting to perform check on a remote * system that is down or not working properly. Prior to issuing the rsh * command, the plugin will attempt to establish a telnet session to the * plugin will not issue the rsh command. Prior to establishing the telnet * connection, an alarm is set. If the telnet connections does not return * control to the plugin before the timer expires, a SIGALRM signal will be * sent to the process which will caues the plugin to exit with a * STATE_CRITICAL error. * Added struct stat file_stat. * This is used to get the size of the out_put_file. If the size is zero, * the plugin exits with a STATE_UNKNOWN. Also change the error_file printing * to a subroutine in the plugins.h file. * * Revision 1.3 1999/04/28 15:16:27 netsaint * Added a </dev/null redirect of the input to the rsh command. This was to * prevent the netsaint program from stopping when running this plugin. Also * changed the strcat of CONFIG_DIR to the local and remote config_files to * strcpy. This was to ensure that no leading characters were left in the * variable before adding the CONFIG_DIR location. * * Revision 1.2 1999/04/27 12:03:32 netsaint * Removed the awk_command and awk_options variables. Changed the program to * look for the string load average: from the output returned from the uptime * command. This will prevent the program from crashing when checking the * load of a system that has been up for less than a day. * * Revision 1.1 1999/04/23 20:54:23 netsaint * Initial revision * * */ #include "/usr/local/src/netsaint/include/plugins.h" int main(int argc, char *argv[]) { char expected[MAX_CHARS]=""; char token_sep[] = ":,"; char command_line[MAX_CHARS]; char command_name[MAX_CHARS]; char config_file_local[MAX_CHARS]; char config_file_remote[MAX_CHARS]; char config_file_net[MAX_CHARS]; char error_file[MAX_CHARS]; char input_buffer[MAX_CHARS]; char ip_address[MAX_CHARS]; char load_crit_1[MAX_CHARS]; char load_crit_5[MAX_CHARS]; char load_crit_15[MAX_CHARS]; char load_warn_1[MAX_CHARS]; char load_warn_5[MAX_CHARS]; char load_warn_15[MAX_CHARS]; char out_put_file[MAX_CHARS]; char protocol[MAX_CHARS]; char port_telnet[MAX_CHARS]; char rsh_command[MAX_CHARS]; char sub_string[MAX_CHARS]; char system_name[MAX_CHARS]; char temp_value[MAX_CHARS]; char uptime_command[MAX_CHARS]; FILE *out_put_fp; float crit_load_1; float crit_load_5; float crit_load_15; float load_avrg_1; float load_avrg_5; float load_avrg_15; float warn_load_1; float warn_load_5; float warn_load_15; int get_defaults; int result; int return_value; int socket_name; int telnet_port; /* Initialize alarm signal handling */ signal(SIGALRM,alarm_signal); strcpy(command_name,get_command_name(argv[0])); if(!((argc==3) || (argc==9))) { printf("\n"); printf(" Incorrect number of arguments supplied\n"); printf("\n"); printf(" System load plugin for NetSaint\n"); printf(" Copyright (c) 1999 Adam G. Bowen (agbowen@bealenet.com)\n"); printf(" $Revision: 1.7 $\n"); printf(" Last Modified $Date: 1999/05/17 14:35:26 $\n"); printf(" License: GPL\n"); printf("\n"); printf(" Description:\n"); printf("\n"); printf(" This plugin will check the system load on the remote host at ipaddress and\n"); printf(" generate an alert if average is above one of the threshold values.\n"); printf("\n"); printf(" Usage: %s ipaddress sys_type\n",command_name); printf(" Usage: %s ipaddress sys_type <w1> <c1> <w5> <c5> <w15> <c15>\n",command_name); printf("\n"); printf(" Required input:\n"); printf("\n"); printf(" ipaddress = The ipaddress of the remote system to run the check on.\n"); printf(" sys_type = The remote system type.\n"); printf("\n"); printf(" Optional input:\n"); printf("\n"); printf(" <w1> = 1 min load average necessary to result in a WARNING state.\n"); printf(" <c1> = 1 min load average necessary to result in a CRITICAL state.\n"); printf(" <w5> = 5 min load average necessary to result in a WARNING state.\n"); printf(" <c5> = 5 min load average necessary to result in a CRITICAL state.\n"); printf(" <w15> = 15 min load average necessary to result in a WARNING state.\n"); printf(" <c15> = 15 min load average necessary to result in a CRITICAL state.\n"); printf("\n"); printf(" If <w1>, <c1>, <w5>, <c5>, <w15> and <c15> are not passed on the command line,\n"); printf(" they will be set to the default values in the %s config file.\n", command_name); printf("\n"); printf(" sys_type is used to determine which config file to use to generate the\n"); printf(" remote command.\n"); printf("\n"); return_value = STATE_UNKNOWN; } else { /* Set up config files and get the command line information */ strcpy(ip_address,argv[1]); strcpy(system_name,argv[2]); strcpy(config_file_local,CONFIG_DIR); strcpy(config_file_remote,CONFIG_DIR); strcpy(config_file_net,CONFIG_DIR); strcat(config_file_local,command_name); strcat(config_file_remote,command_name); strcat(config_file_net,CHECK_TELNET); strcat(config_file_local,"/local"); strcat(config_file_remote,"/"); strcat(config_file_net,"/"); strcat(config_file_remote,system_name); strcat(config_file_net,system_name); if(argc == 3) { get_defaults = TRUE; } else { get_defaults = FALSE; strcpy(load_warn_1,argv[3]); strcpy(load_crit_1,argv[4]); strcpy(load_warn_5,argv[5]); strcpy(load_crit_5,argv[6]); strcpy(load_warn_15,argv[7]); strcpy(load_crit_15,argv[8]); } /* Check if config files exist */ if (access(config_file_local, EXISTS) != 0 ) { printf("Config file %s does not exist!\n",config_file_local); return_value = STATE_UNKNOWN; } else if (access(config_file_remote, EXISTS) != 0 ) { printf("Config file %s does not exist!\n",config_file_remote); return_value = STATE_UNKNOWN; } else if (access(config_file_net, EXISTS) != 0 ) { printf("Config file %s does not exist!\n",config_file_net); return_value = STATE_UNKNOWN; } else { /* Local config file variables */ if((get_defaults == TRUE) && ((return_value=get_var("WARN_LOAD_1", config_file_local, load_warn_1)) != STATE_OK)) { printf("WARN_LOAD_1 entry not found in config file %s!\n",config_file_local); } else if((get_defaults == TRUE) && ((return_value=get_var("CRIT_LOAD_1", config_file_local, load_crit_1)) != STATE_OK)) { printf("CRIT_LOAD_1 entry not found in config file %s!\n",config_file_local); } else if((get_defaults == TRUE) && ((return_value=get_var("WARN_LOAD_5", config_file_local, load_warn_5)) != STATE_OK)) { printf("WARN_LOAD_5 entry not found in config file %s!\n",config_file_local); } else if((get_defaults == TRUE) && ((return_value=get_var("CRIT_LOAD_5", config_file_local, load_crit_5)) != STATE_OK)) { printf("CRIT_LOAD_5 entry not found in config file %s!\n",config_file_local); } else if((get_defaults == TRUE) && ((return_value=get_var("WARN_LOAD_15", config_file_local, load_warn_15)) != STATE_OK)) { printf("WARN_LOAD_15 entry not found in config file %s!\n",config_file_local); } else if((get_defaults == TRUE) && ((return_value=get_var("CRIT_LOAD_15", config_file_local, load_crit_15)) != STATE_OK)) { printf("CRIT_LOAD_15 entry not found in config file %s!\n",config_file_local); } else if((return_value=get_var("RSH_COMMAND", config_file_local, rsh_command)) != STATE_OK) { printf("RSH_COMMAND entry not found in config file %s!\n", config_file_local); } /* Remote config file variables */ else if((return_value=get_var("UPTIME_COMMAND", config_file_remote, uptime_command)) != STATE_OK) { printf("UPTIME_COMMAND entry not found in config file %s!\n", config_file_remote); } /* Network config file variables */ else if((return_value=get_var("TELNET_PORT", config_file_net, port_telnet)) != STATE_OK) { printf("TELNET_PORT entry not found in config file %s!\n",config_file_net); } else if((return_value=get_var("TELNET_PROTO", config_file_net, protocol)) != STATE_OK) { printf("TELNET_PROTO entry not found in config file %s!\n",config_file_net); } else { /* Check alert level consistency */ warn_load_1=atof(load_warn_1); crit_load_1=atof(load_crit_1); warn_load_5=atof(load_warn_5); crit_load_5=atof(load_crit_5); warn_load_15=atof(load_warn_15); crit_load_15=atof(load_crit_15); if((result=check_consistency(warn_load_1, crit_load_1)) != STATE_OK) { return_value = result; } else if((result=check_consistency(warn_load_5, crit_load_5)) != STATE_OK) { return_value = result; } else if((result=check_consistency(warn_load_15, crit_load_15)) != STATE_OK) { return_value = result; } else { /* Check the network */ telnet_port=atoi(port_telnet); alarm(TIME_OUT); if((result=open_socket(&socket_name, ip_address, telnet_port, protocol)) != STATE_OK) { return_value=exit_error(result,ip_address,protocol,telnet_port); } else if((result=recv_socket(&socket_name, expected)) != STATE_OK) { return_value=exit_error(result,ip_address,protocol,telnet_port); } else if((result=close_socket(&socket_name)) != STATE_OK) { return_value=exit_error(result,ip_address,protocol,telnet_port); } else { alarm(0); /* Generate out_put and error file names */ strcpy(out_put_file, tmpnam(NULL)); strcpy(error_file, tmpnam(NULL)); /* Set the command line and arguments to use for the check */ sprintf(command_line,"%s %s %s </dev/null >%s 2>%s",rsh_command, ip_address, uptime_command, out_put_file, error_file); /* Run the command */ system(command_line); return_value=check_output_file(out_put_file); if (return_value != STATE_OK) { print_error(error_file); } else { out_put_fp=fopen(out_put_file,"r"); /* Retrive single line from output file */ fgets(input_buffer,MAX_CHARS-1,out_put_fp); /* Populate variables */ strcpy(sub_string,strstr(input_buffer,"load average:")); strcpy(temp_value,strtok(sub_string,token_sep)); load_avrg_1 = atof(strcpy(temp_value,strtok(NULL,token_sep))); load_avrg_5 = atof(strcpy(temp_value,strtok(NULL,token_sep))); load_avrg_15 = atof(strcpy(temp_value,strtok(NULL,token_sep))); /* Close output file */ fclose(out_put_fp); /* Check the system load against warning and critical levels */ if((load_avrg_1 >= crit_load_1)||(load_avrg_5 >= crit_load_5)||(load_avrg_15 >= crit_load_15)) { return_value=STATE_CRITICAL; } else if((load_avrg_1 >= warn_load_1)||(load_avrg_5 >= warn_load_5)||(load_avrg_15 >= warn_load_15)) { return_value=STATE_WARNING; } if(return_value==STATE_OK) { printf("System load ok - load average: %.2f, %.2f, %.2f\n", load_avrg_1,load_avrg_5,load_avrg_15); } else { printf("System load error - load average: %.2f, %.2f, %.2f\n", load_avrg_1,load_avrg_5,load_avrg_15); } } /* Remove the output and error files */ remove(out_put_file); remove(error_file); } } } } } return return_value; }