summaryrefslogtreecommitdiff
path: root/sysdep
diff options
context:
space:
mode:
authorOndrej Zajicek <santiago@crfreenet.org>2015-03-02 09:41:14 +0100
committerOndrej Zajicek <santiago@crfreenet.org>2015-03-02 09:41:14 +0100
commit8bcb5fb1e8a0718f88f99cde2f5b5a3bae5c4451 (patch)
treec4ce5adbef88e2b2b002d42e274773f424aa1f19 /sysdep
parent4e63974471c4e2f11781cda163e0dc2ac6fb72f9 (diff)
Implement latency tracking, internal event log and watchdog
Diffstat (limited to 'sysdep')
-rw-r--r--sysdep/unix/config.Y13
-rw-r--r--sysdep/unix/io.c170
-rw-r--r--sysdep/unix/main.c8
-rw-r--r--sysdep/unix/unix.h5
4 files changed, 195 insertions, 1 deletions
diff --git a/sysdep/unix/config.Y b/sysdep/unix/config.Y
index 7fd0ad2d..d6ab8cab 100644
--- a/sysdep/unix/config.Y
+++ b/sysdep/unix/config.Y
@@ -15,6 +15,7 @@ CF_DECLS
CF_KEYWORDS(LOG, SYSLOG, ALL, DEBUG, TRACE, INFO, REMOTE, WARNING, ERROR, AUTH, FATAL, BUG, STDERR, SOFT)
CF_KEYWORDS(TIMEFORMAT, ISO, OLD, SHORT, LONG, BASE, NAME, CONFIRM, UNDO, CHECK, TIMEOUT)
+CF_KEYWORDS(DEBUG, LATENCY, LIMIT, WATCHDOG, WARNING, TIMEOUT)
%type <i> log_mask log_mask_list log_cat cfg_timeout
%type <g> log_file
@@ -83,6 +84,7 @@ mrtdump_base:
}
;
+
CF_ADDTO(conf, timeformat_base)
timeformat_which:
@@ -104,6 +106,17 @@ timeformat_base:
TIMEFORMAT timeformat_spec ';'
;
+
+CF_ADDTO(conf, debug_unix)
+
+debug_unix:
+ DEBUG LATENCY bool { new_config->latency_debug = $3; }
+ | DEBUG LATENCY LIMIT expr_us { new_config->latency_limit = $4; }
+ | WATCHDOG WARNING expr_us { new_config->watchdog_warning = $3; }
+ | WATCHDOG TIMEOUT expr_us { new_config->watchdog_timeout = ($3 + 999999) TO_S; }
+ ;
+
+
/* Unix specific commands */
CF_CLI_HELP(CONFIGURE, ..., [[Reload configuration]])
diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c
index daf9d054..bbb87ca9 100644
--- a/sysdep/unix/io.c
+++ b/sysdep/unix/io.c
@@ -332,6 +332,8 @@ tm_first_shot(void)
return x;
}
+void io_log_event(void *hook, void *data);
+
static void
tm_shot(void)
{
@@ -372,6 +374,7 @@ tm_shot(void)
i = 0;
tm_start(t, i);
}
+ io_log_event(t->hook, t->data);
t->hook(t);
}
}
@@ -1840,6 +1843,162 @@ sk_dump_all(void)
/*
+ * Internal event log and watchdog
+ */
+
+#define EVENT_LOG_LENGTH 32
+
+struct event_log_entry
+{
+ void *hook;
+ void *data;
+ btime timestamp;
+ btime duration;
+};
+
+static struct event_log_entry event_log[EVENT_LOG_LENGTH];
+static struct event_log_entry *event_open;
+static int event_log_pos, event_log_num, watchdog_active;
+static btime last_time;
+static btime loop_time;
+
+static void
+io_update_time(void)
+{
+ struct timespec ts;
+ int rv;
+
+ if (!clock_monotonic_available)
+ return;
+
+ /*
+ * This is third time-tracking procedure (after update_times() above and
+ * times_update() in BFD), dedicated to internal event log and latency
+ * tracking. Hopefully, we consolidate these sometimes.
+ */
+
+ rv = clock_gettime(CLOCK_MONOTONIC, &ts);
+ if (rv < 0)
+ die("clock_gettime: %m");
+
+ last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
+
+ if (event_open)
+ {
+ event_open->duration = last_time - event_open->timestamp;
+
+ if (event_open->duration > config->latency_limit)
+ log(L_WARN "Event 0x%p 0x%p took %d ms",
+ event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
+
+ event_open = NULL;
+ }
+}
+
+/**
+ * io_log_event - mark approaching event into event log
+ * @hook: event hook address
+ * @data: event data address
+ *
+ * Store info (hook, data, timestamp) about the following internal event into
+ * a circular event log (@event_log). When latency tracking is enabled, the log
+ * entry is kept open (in @event_open) so the duration can be filled later.
+ */
+void
+io_log_event(void *hook, void *data)
+{
+ if (config->latency_debug)
+ io_update_time();
+
+ struct event_log_entry *en = event_log + event_log_pos;
+
+ en->hook = hook;
+ en->data = data;
+ en->timestamp = last_time;
+ en->duration = 0;
+
+ event_log_num++;
+ event_log_pos++;
+ event_log_pos %= EVENT_LOG_LENGTH;
+
+ event_open = config->latency_debug ? en : NULL;
+}
+
+static inline void
+io_close_event(void)
+{
+ if (event_open)
+ io_update_time();
+}
+
+void
+io_log_dump(void)
+{
+ int i;
+
+ log(L_DEBUG "Event log:");
+ for (i = 0; i < EVENT_LOG_LENGTH; i++)
+ {
+ struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
+ if (en->hook)
+ log(L_DEBUG " Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
+ (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
+ }
+}
+
+void
+watchdog_sigalrm(int sig UNUSED)
+{
+ /* Update last_time and duration, but skip latency check */
+ config->latency_limit = 0xffffffff;
+ io_update_time();
+
+ /* We want core dump */
+ abort();
+}
+
+static inline void
+watchdog_start1(void)
+{
+ io_update_time();
+
+ loop_time = last_time;
+}
+
+static inline void
+watchdog_start(void)
+{
+ io_update_time();
+
+ loop_time = last_time;
+ event_log_num = 0;
+
+ if (config->watchdog_timeout)
+ {
+ alarm(config->watchdog_timeout);
+ watchdog_active = 1;
+ }
+}
+
+static inline void
+watchdog_stop(void)
+{
+ io_update_time();
+
+ if (watchdog_active)
+ {
+ alarm(0);
+ watchdog_active = 0;
+ }
+
+ btime duration = last_time - loop_time;
+ if (duration > config->watchdog_warning)
+ log(L_WARN "I/O loop cycle took %d ms for %d events",
+ (int) (duration TO_MS), event_log_num);
+}
+
+
+/*
* Main I/O Loop
*/
@@ -1873,6 +2032,7 @@ io_loop(void)
sock *s;
node *n;
+ watchdog_start1();
sock_recalc_fdsets_p = 1;
for(;;)
{
@@ -1887,6 +2047,8 @@ io_loop(void)
timo.tv_sec = events ? 0 : MIN(tout - now, 3);
timo.tv_usec = 0;
+ io_close_event();
+
if (sock_recalc_fdsets_p)
{
sock_recalc_fdsets_p = 0;
@@ -1923,25 +2085,30 @@ io_loop(void)
if (async_config_flag)
{
+ io_log_event(async_config, NULL);
async_config();
async_config_flag = 0;
continue;
}
if (async_dump_flag)
{
+ io_log_event(async_dump, NULL);
async_dump();
async_dump_flag = 0;
continue;
}
if (async_shutdown_flag)
{
+ io_log_event(async_shutdown, NULL);
async_shutdown();
async_shutdown_flag = 0;
continue;
}
/* And finally enter select() to find active sockets */
+ watchdog_stop();
hi = select(hi+1, &rd, &wr, NULL, &timo);
+ watchdog_start();
if (hi < 0)
{
@@ -1965,6 +2132,7 @@ io_loop(void)
do
{
steps--;
+ io_log_event(s->rx_hook, s->data);
e = sk_read(s);
if (s != current_sock)
goto next;
@@ -1976,6 +2144,7 @@ io_loop(void)
do
{
steps--;
+ io_log_event(s->tx_hook, s->data);
e = sk_write(s);
if (s != current_sock)
goto next;
@@ -2003,6 +2172,7 @@ io_loop(void)
if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
{
count++;
+ io_log_event(s->rx_hook, s->data);
e = sk_read(s);
if (s != current_sock)
goto next2;
diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c
index 61b306dc..05f7560d 100644
--- a/sysdep/unix/main.c
+++ b/sysdep/unix/main.c
@@ -168,6 +168,9 @@ sysdep_preconfig(struct config *c)
{
init_list(&c->logfiles);
+ c->latency_limit = UNIX_DEFAULT_LATENCY_LIMIT;
+ c->watchdog_warning = UNIX_DEFAULT_WATCHDOG_WARNING;
+
#ifdef PATH_IPROUTE_DIR
read_iproute_table(PATH_IPROUTE_DIR "/rt_protos", "ipp_", 256);
read_iproute_table(PATH_IPROUTE_DIR "/rt_realms", "ipr_", 256);
@@ -585,6 +588,8 @@ handle_sigterm(int sig UNUSED)
async_shutdown_flag = 1;
}
+void watchdog_sigalrm(int sig UNUSED);
+
static void
signal_init(void)
{
@@ -600,6 +605,9 @@ signal_init(void)
sa.sa_handler = handle_sigterm;
sa.sa_flags = SA_RESTART;
sigaction(SIGTERM, &sa, NULL);
+ sa.sa_handler = watchdog_sigalrm;
+ sa.sa_flags = 0;
+ sigaction(SIGALRM, &sa, NULL);
signal(SIGPIPE, SIG_IGN);
}
diff --git a/sysdep/unix/unix.h b/sysdep/unix/unix.h
index 3cee96b4..593978cc 100644
--- a/sysdep/unix/unix.h
+++ b/sysdep/unix/unix.h
@@ -27,8 +27,10 @@ void cmd_reconfig_confirm(void);
void cmd_reconfig_undo(void);
void cmd_shutdown(void);
-#define UNIX_DEFAULT_CONFIGURE_TIMEOUT 300
+#define UNIX_DEFAULT_CONFIGURE_TIMEOUT 300
+#define UNIX_DEFAULT_LATENCY_LIMIT (1 S_)
+#define UNIX_DEFAULT_WATCHDOG_WARNING (5 S_)
/* io.c */
@@ -99,6 +101,7 @@ volatile int async_shutdown_flag;
void io_init(void);
void io_loop(void);
+void io_log_dump(void);
int sk_open_unix(struct birdsock *s, char *name);
void *tracked_fopen(struct pool *, char *name, char *mode);
void test_old_bird(char *path);