diff options
4 files changed, 96 insertions, 16 deletions
diff --git a/contrib/package/freifunk-watchdog/Makefile b/contrib/package/freifunk-watchdog/Makefile index 403fb9c97d..fc1e9d7e05 100644 --- a/contrib/package/freifunk-watchdog/Makefile +++ b/contrib/package/freifunk-watchdog/Makefile @@ -8,7 +8,7 @@ include $(TOPDIR)/rules.mk PKG_NAME:=freifunk-watchdog -PKG_RELEASE:=1 +PKG_RELEASE:=2 PKG_BUILD_DIR := $(BUILD_DIR)/$(PKG_NAME) PKG_BUILD_DEPENDS := uci diff --git a/contrib/package/freifunk-watchdog/files/freifunk-watchdog.init b/contrib/package/freifunk-watchdog/files/freifunk-watchdog.init index 9ca43754d7..1cce46b4af 100755 --- a/contrib/package/freifunk-watchdog/files/freifunk-watchdog.init +++ b/contrib/package/freifunk-watchdog/files/freifunk-watchdog.init @@ -10,6 +10,10 @@ boot() echo "* * * * * $BIN running || /etc/init.d/freifunk-watchdog restart" >> /etc/crontabs/root fi + if lsmod | grep -q softdog; then + mknod /dev/watchdog c 10 130 + fi + start } diff --git a/contrib/package/freifunk-watchdog/src/watchdog.c b/contrib/package/freifunk-watchdog/src/watchdog.c index c269f22d02..5caf6d5e7c 100644 --- a/contrib/package/freifunk-watchdog/src/watchdog.c +++ b/contrib/package/freifunk-watchdog/src/watchdog.c @@ -131,6 +131,24 @@ static int find_process(const char *name) return -1; } +/* Get the 5 minute load average */ +static double find_loadavg(void) +{ + int fd; + char buffer[10]; + double load = 0.00; + + if( (fd = open("/proc/loadavg", O_RDONLY)) > -1 ) + { + if( read(fd, buffer, sizeof(buffer)) == sizeof(buffer) ) + load = atof(&buffer[5]); + + close(fd); + } + + return load; +} + /* Check if given uci file was updated */ static int check_uci_update(const char *config, time_t *mtime) { @@ -185,7 +203,7 @@ static void load_wifi_uci_add_iface(const char *section, struct uci_itr_ctx *itr val++; } } - + if( val == 3 ) { syslog(LOG_INFO, "Monitoring %s: bssid=%s channel=%d", @@ -240,6 +258,8 @@ static wifi_tuple_t * load_wifi_uci(wifi_tuple_t *ifs, time_t *modtime) static int do_daemon(void) { int iwfd; + int wdfd; + int wdtrigger = 1; int channel; char bssid[18]; @@ -248,32 +268,44 @@ static int do_daemon(void) int restart_wifi = 0; int restart_cron = 0; + int restart_sshd = 0; + int loadavg_panic = 0; openlog(SYSLOG_IDENT, 0, LOG_DAEMON); //daemon(1, 1); if( (iwfd = socket(AF_INET, SOCK_DGRAM, 0)) == -1 ) { - perror("Can not open wireless control socket"); + syslog(LOG_ERR, "Can not open wireless control socket: %s", + strerror(errno)); + return 1; } + if( (wdfd = open(WATCH_DEVICE, O_WRONLY)) > -1 ) + { + syslog(LOG_INFO, "Opened %s - polling each %i seconds", + WATCH_DEVICE, INTERVAL); + } + while( 1 ) { - if( (ifs = load_wifi_uci(ifs, &modtime)) == NULL ) - { - printf("Can not load wireless uci. File corrupt?\n"); - return 1; - } + /* Check average load */ + if( find_loadavg() >= LOAD_TRESHOLD ) + loadavg_panic++; + else + loadavg_panic = 0; /* Check crond */ if( find_process("crond") < 0 ) - { - syslog(LOG_WARNING, "The crond process died, restarting"); - restart_cron++; - } + restart_cron++; + + /* Check SSHd */ + if( find_process("dropbear") < 0 ) + restart_sshd++; /* Check wireless interfaces */ + ifs = load_wifi_uci(ifs, &modtime); for( curif = ifs; curif; curif = curif->next ) { /* Get current channel and bssid */ @@ -300,7 +332,7 @@ static int do_daemon(void) } else { - syslog(LOG_WARNING, "Requested interface %s not present", curif->ifname); + syslog(LOG_WARNING, "Requested interface %s not present", curif->ifname); } } @@ -309,7 +341,7 @@ static int do_daemon(void) if( restart_wifi >= HYSTERESIS ) { restart_wifi = 0; - syslog(LOG_WARNING, "Restarting wireless"); + syslog(LOG_WARNING, "Channel or BSSID mismatch on wireless interface, restarting"); EXEC(WIFI_ACTION); } @@ -317,13 +349,44 @@ static int do_daemon(void) if( restart_cron >= HYSTERESIS ) { restart_cron = 0; - syslog(LOG_WARNING, "Restarting crond process"); - EXEC(CRON_ACTION); + syslog(LOG_WARNING, "The cron process died, restarting"); + EXEC(CRON_ACTION); + } + + /* SSHd restart required? */ + if( restart_sshd >= HYSTERESIS ) + { + restart_sshd = 0; + syslog(LOG_WARNING, "The ssh process died, restarting"); + EXEC(SSHD_ACTION); + } + + /* Is there a load problem? */ + if( loadavg_panic >= HYSTERESIS ) + { + syslog(LOG_EMERG, "Critical system load level, triggering reset!"); + + /* Try watchdog, fall back to reboot */ + if( wdfd > -1 ) + ioctl(wdfd, WDIOC_SETTIMEOUT, &wdtrigger); + else + EXEC(LOAD_ACTION); } + /* Reset watchdog timer */ + if( wdfd > -1 ) + write(wdfd, '\0', 1); + sleep(INTERVAL); } + if( wdfd > -1 ) + { + syslog(LOG_INFO, "Stopping watchdog timer"); + write(wdfd, WATCH_SHUTDOWN, 1); + close(wdfd); + } + closelog(); return 0; } diff --git a/contrib/package/freifunk-watchdog/src/watchdog.h b/contrib/package/freifunk-watchdog/src/watchdog.h index 7de24210a6..f0d6209996 100644 --- a/contrib/package/freifunk-watchdog/src/watchdog.h +++ b/contrib/package/freifunk-watchdog/src/watchdog.h @@ -31,6 +31,8 @@ #include <sys/stat.h> #include <sys/ioctl.h> #include <sys/socket.h> +#include <linux/types.h> +#include <linux/watchdog.h> #include "ucix.h" #include "wireless.22.h" @@ -51,6 +53,17 @@ /* Crond error action */ #define CRON_ACTION "/etc/init.d/cron", "/etc/init.d/cron", "restart" +/* SSHd error action */ +#define SSHD_ACTION "/etc/init.d/dropbear", "/etc/init.d/dropbear", "restart" + +/* Watchdog device */ +#define WATCH_DEVICE "/dev/watchdog" +#define WATCH_SHUTDOWN 'V' + +/* System load error action and treshold */ +#define LOAD_TRESHOLD 5.00 +#define LOAD_ACTION "/sbin/reboot" + /* Fallback binary name (passed by makefile) */ #ifndef BINARY #define BINARY "ffwatchd" |