/* * Read network log files (satlinkstats) and create a list of failures and duration. * * $Id: net-failures.c,v 1.14 2019/10/17 05:28:07 grog Exp grog $ */ #include #include #include #include #include #include #include #include "timefuncs.h" #define SMALLBUF 20 #define DAY 86400 time_t start_timestamp; /* time to start evaluation */ # define END_OF_TIME 0x7ff00000 time_t end_timestamp = END_OF_TIME; /* time to end evaluation */ time_t midnight; /* timestamp of midnight today */ time_t start_outage; /* timestamp current outage started */ time_t last_outage_end; /* timestamp previous outage ended */ time_t uptime; /* time up between last outage and this one */ time_t first_sample; /* first entry in input */ int outages; /* total number of outages */ int outage_time; /* and total time */ int outage_time_daytime; /* total time during working hours */ int longest_outage = 0; /* time of longest outage */ /* these copies over entire time */ int total_outages; /* total number of outages */ int total_outage_time; /* and total time */ char todays_date [SMALLBUF]; /* YYYYMMDD date */ char last_date [SMALLBUF]; /* YYYYMMDD date */ /* Info from input */ time_t now; /* current entry timestamp */ float linkstat; /* link status */ int systems; /* number of systems available */ char comments [1024]; char now_date [SMALLBUF]; /* YYYYMMDD from last input */ int link_state = 1; /* current state of link */ char start_time [80]; /* start time for printing */ char end_time [80]; /* end time for printing */ char longest_outage_start [80]; /* and times to save */ char longest_outage_end [80]; int all_disconnects; /* set to count only link dropouts */ int summarystats; /* set if we want stats per day instead of per incident */ int daytime; /* print summary stats for daytime only */ int summary; /* for non-summary only: one line summary over total period */ /* * Verbosity. 1 is -v (more stuff, but not now), and -1 is "ignore days with * perfect record" */ int verbose = 0; void usage (char *me) { fprintf (stderr, "Usage:\n" "\t%s [-c] [-e end-date] [-s start-date] [-S] [-q] [-v]\n" "\t-c:\toutput count and duration of outages per day\n" "\t-d\tprint summary stats for daytime only\n" "\t-e\tend at date\n" "\t-s\tstart at date\n" "\t-S\tproduce one-line summary\n" "\t-q\tbrief output (ignore days with perfect uptime)\n" "\t-v:\tverbose output\n" "\tOtherwise output details of each outage\n", me ); exit (1); } /* * Read in a line and parse to global locations. * Return 1 for success, 0 for failure (presumably EOF). */ int getinfo () { char line [1024]; if (fgets (line, 1024, stdin) == NULL) return 0; if (sscanf (line, "%d %f %d %s\n", (int *) &now, &linkstat, &systems, comments) != 4) { puts (line); return 0; } if ((all_disconnects == 0) && (systems > 0)) linkstat = 1.0; /* no fake link down messages */ strftime (now_date, 8, "%Y%m%d", localtime (&now)); return 1; } char *HMS (int seconds, char *result) { int minutes = seconds / 60; int hours = seconds / 3600; minutes -= hours* 60; seconds = seconds % 60; if (hours > 23) { int days = hours / 24; hours %= 24; sprintf (result, "%d days, %02d:%02d:%02d", days, hours, minutes, seconds); } else sprintf (result, "%02d:%02d:%02d", hours, minutes, seconds); return result; } /* * Print statistics for a day when using -c option. */ void print_stats () { time_t now = time (NULL); int duration; /* default to a day at a time */ time_t start; /* start of this measurement */ int duration_daytime; /* default to a day at a time */ time_t start_daytime; /* start of this measurement */ /* Whole day */ start = midnight; duration = DAY; /* Working hours */ start_daytime = midnight + 32400; /* 9:00 */ duration_daytime = 46800; /* 13 hours to 22:00 */ if ((now - start) < duration) /* today, duration is less than a whole day */ duration = now - start; if ((now - start_daytime) < duration_daytime) /* today, duration is less than a whole day */ duration_daytime = now - start_daytime; if (link_state == 0) /* still down, */ { outage_time += start + duration - (start_outage > start? start_outage : start); /* * We're making the implicit assumption here that our data are * complete. If we miss a day, it means that we've been down all * that time. But I don't check this when I print things out, at * least not at the moment, so we could end up with outages * lasting more than a day (correct) and availability < 0 * (incorrect).. */ start_outage = start + duration; /* for next time */ outage_time_daytime += start_daytime + duration_daytime - (start_outage > start_daytime? start_outage : start_daytime); } /* * Convert to readable format. * * output format: * number of outages * duration of outages * number of outages during daytime * duration of outages during daytime * % uptime * % uptime during daytime * # date */ if (outages || (verbose >= 0)) /* no outages, print anyway */ { strftime (start_time, 80, "%e %B %Y", localtime (&start)); printf ("%d\t%d\t%d\t%d\t%d\n", outage_time, outage_time_daytime); printf ("\t%3d\t%6d\t%6.2f%%\t%6.2f%%\t# %s\n", outages, outage_time, (float) (duration - outage_time) * 100 / duration, (float) (duration_daytime - outage_time_daytime) * 100 / duration_daytime, start_time ); total_outage_time += outage_time; if (outage_time > longest_outage) /* we have a new front runner*/ longest_outage = outage_time; outage_time = 0; total_outages += outages; outages = 0; } } int main (int argc, char *argv []) { char total_hms [20]; char average_hms [20]; char duration_hms [20]; char longest_hms [20]; char mydate [80]; time (&now); if (argc > 1) { int i; for (i = 1; i < argc; i++) { if (! strcmp (argv [i], "-a")) all_disconnects = 1; else if (! strcmp (argv [i], "-c")) summarystats = 1; else if (! strcmp (argv [i], "-d")) daytime = 1; else if (! strcmp (argv [i], "-e")) { i++; end_timestamp = get_date (argv, &i, now); } else if (! strcmp (argv [i], "-q")) verbose = -1; else if (! strcmp (argv [i], "-s")) { i++; start_timestamp = get_date (argv, &i, now); } else if (! strcmp (argv [i], "-S")) { summary = 1; verbose = -1; } else if (! strcmp (argv [i], "-v")) verbose = 1; else usage (argv [0]); } } /* Find the beginning of the period we're interested in. */ do getinfo (); while (now < start_timestamp); first_sample = now; /* we now have the first sample */ if (summarystats) /* per day version */ { int sample_duration; printf ("Date Outages Duration Availability\n"); do { strftime (todays_date, 80, "%e %B %Y", localtime (&now)); if (strcmp (todays_date, last_date)) /* new day */ { struct tm midnight_tm; if (*last_date) /* we already had something */ print_stats (); memset (&midnight_tm, 0, sizeof (midnight_tm)); midnight_tm.tm_isdst = -1; /* mktime should make up its own mind about DST */ strptime (todays_date, " %e %B %Y", &midnight_tm); midnight = mktime (&midnight_tm); strcpy (last_date, todays_date); } if (link_state) /* we were up */ { if (linkstat == 0.0) /* we've gone down now */ { outages++; /* another one */ start_outage = now; link_state = 0; } } else { if (linkstat != 0.0) /* we've come up */ { outage_time += now - start_outage; link_state = 1; } } /* XXX does this make any sense? */ if (verbose > 0) printf ("%d %f %d %d %s\n", (int) now, linkstat, link_state, systems, comments); } while (getinfo () && (now < (end_timestamp + DAY))); print_stats (); /* Summary over period */ /* * To calculate our uptime, don't go before start or beyond now */ if (first_sample > start_timestamp) start_timestamp = first_sample; if (time (NULL) < end_timestamp) end_timestamp = time (NULL); sample_duration = end_timestamp - start_timestamp; printf ("Total:\t\t%3d outages, %6d seconds, longest %3d, availability%6.2f%%\n", total_outages, total_outage_time, longest_outage, (float) (sample_duration - total_outage_time) * 100 / sample_duration); exit (0); } /* * Default: print info for each outage. Suppressed by -q flag (sets verbose = -1). */ if (verbose >= 0) printf ("Start time End time\tDuration\tBadness\n"); do { if (verbose > 0) printf ("%d %f %d %s\n", (int) now, linkstat, systems, comments); if (link_state) /* we were up */ { if (linkstat == 0.0) /* we've gone down now */ { outages++; /* another one */ start_outage = now; link_state = 0; } } else { if (linkstat != 0.0) /* we've come up */ { int this_outage = now - start_outage; outage_time += this_outage; if (last_outage_end) uptime = start_outage - last_outage_end; last_outage_end = now; /* convert to readable format */ strftime (start_time, 80, "%e %B %Y %H:%M:%S", localtime (&start_outage)); strftime (end_time, 80, "%e %B %Y %H:%M:%S", localtime (&now)); if (verbose >= 0) if (uptime) printf ("%d %d %6d\t%7.3f\t# %s %s\n", (int) start_outage, (int) now, this_outage, 3600 / (float) uptime, start_time, end_time ); else printf ("%d %d %6d\t\t# %s %s\n", (int) start_outage, (int) now, this_outage, start_time, end_time ); if (longest_outage < this_outage) /* we have a new front-runner */ { longest_outage = this_outage; strcpy (longest_outage_start, start_time); strcpy (longest_outage_end, end_time); } start_outage = 0; /* we don't really need this */ link_state = 1; } } } while (getinfo () && (now < (end_timestamp + DAY))); HMS (outage_time, total_hms); if (outages) { int average_time = (int) (now - first_sample) / outages; int duration = outage_time / outages; float availability = (float) (now - first_sample - outage_time) * 100 / (float) (now - first_sample); if (summary) /* really only one line */ { if (start_timestamp) { strftime (mydate, 80, "%F", localtime (&start_timestamp)); printf ("%s ", mydate); } else printf ("-"); if (end_timestamp < END_OF_TIME) { strftime (mydate, 80, "%F", localtime (&end_timestamp)); printf ("%s ", mydate); } else printf ("-"); /* Format: start date end date count total longest time between duration availability */ printf ("%3d\t%d\t%d\t%d\t%d\t%5.2f\n", outages, outage_time, longest_outage, average_time, duration, 100.0 - availability ); } else { HMS ((now - first_sample) / outages, average_hms); HMS (outage_time / outages, duration_hms); HMS (longest_outage, longest_hms); printf ("\nSummary"); if (start_timestamp) { strftime (mydate, 80, "%e %B %G", localtime (&start_timestamp)); printf (" from %s", mydate); } if (end_timestamp < END_OF_TIME) { strftime (mydate, 80, "%e %B %G", localtime (&end_timestamp)); printf (" to %s", mydate); } printf ("\n" "Total %d outages, total time %d seconds (%s)\n" "Longest outage:\t\t\t%d seconds (%s)\n" " Start:\t\t\t%s\n" " End:\t\t\t\t%s\n" "Average time between outages:\t%d seconds (%s)\n" "Average duration:\t\t%d seconds (%s)\n" "Availability:\t\t\t%5.2f%%\n", outages, outage_time, total_hms, longest_outage, longest_hms, longest_outage_start, longest_outage_end, average_time, average_hms, duration, duration_hms, availability ); } } else if (summary) /* really only one line */ { if (start_timestamp) { strftime (mydate, 80, "%F", localtime (&start_timestamp)); printf ("%s ", mydate); } else printf ("-"); if (end_timestamp < END_OF_TIME) { strftime (mydate, 80, "%F", localtime (&end_timestamp)); printf ("%s ", mydate); } else printf ("-"); printf (" 0\t0\t0\t0\t0\t0\n"); } else /* normal output, no failures */ { printf ("\nSummary"); if (start_timestamp) { strftime (mydate, 80, "%e %B %G", localtime (&start_timestamp)); printf (" from %s", mydate); } if (end_timestamp < END_OF_TIME) { strftime (mydate, 80, "%e %B %G", localtime (&end_timestamp)); printf (" to %s", mydate); } printf ("\nNo outages\n" ); } exit (0); }