summaryrefslogtreecommitdiff
path: root/doc/threads/stats-filter.pl
blob: 51690651f9e93041afa26dd74e5fe555d10e947a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/perl

use common::sense;
use Data::Dump;
use List::Util;

my @GROUP_BY = qw/VERSION PEERS TOTAL_ROUTES/;
my @VALUES = qw/RSS SZ VSZ TIMEDIF/;

my ($FILE, $TYPE) = @ARGV;

### Load data ###
my %data;
open F, "<", $FILE or die $!;
my @header = split /;/, <F>;
chomp @header;

my $line = undef;
while ($line = <F>)
{
  chomp $line;
  my %row;
  @row{@header} = split /;/, $line;
  push @{$data{join ";", @row{@GROUP_BY}}}, { %row } if $row{TYPE} eq $TYPE;
}

### Do statistics ###
sub avg {
  return List::Util::sum(@_) / @_;
}

sub stdev {
  my $avg = shift;
  return 0 if @_ <= 1;
  return sqrt(List::Util::sum(map { ($avg - $_)**2 } @_) / (@_-1));
}

my %output;
my %vers;

STATS:
foreach my $k (keys %data)
{
  my %cols = map { my $vk = $_; $vk => [ map { $_->{$vk} } @{$data{$k}} ]; } @VALUES;

  my %avg = map { $_ => avg(@{$cols{$_}})} @VALUES;
  my %stdev = map { $_ => stdev($avg{$_}, @{$cols{$_}})} @VALUES;

  foreach my $v (@VALUES) {
    next if $stdev{$v} / $avg{$v} < 0.035;

    for (my $i=0; $i<@{$cols{$v}}; $i++)
    {
      my $dif = $cols{$v}[$i] - $avg{$v};
      next if $dif < $stdev{$v} * 2 and $dif > $stdev{$v} * (-2);
=cut
      printf "Removing an outlier for %s/%s: avg=%f, stdev=%f, variance=%.1f%%, val=%f, valratio=%.1f%%\n",
	$k, $v, $avg{$v}, $stdev{$v}, (100 * $stdev{$v} / $avg{$v}), $cols{$v}[$i], (100 * $dif / $stdev{$v});
=cut
      splice @{$data{$k}}, $i, 1, ();
      redo STATS;
    }
  }

  $vers{$data{$k}[0]{VERSION}}++;
  $output{"$data{$k}[0]{PEERS};$data{$k}[0]{TOTAL_ROUTES}"}{$data{$k}[0]{VERSION}} = { %avg };
}

### Export the data ###

say "PEERS;TOTAL_ROUTES;" . join ";", ( map { my $vk = $_; map { "$_/$vk" } keys %vers; } @VALUES );

sub keysort {
  my ($pa, $ta) = split /;/, $_[0];
  my ($pb, $tb) = split /;/, $_[1];

  return (int $ta) <=> (int $tb) if $pa eq $pb;
  return (int $pa) <=> (int $pb);
}
  
foreach my $k (sort { keysort($a, $b); } keys %output)
{
  say "$k;" . join ";", ( map { my $vk = $_; map { $output{$k}{$_}{$vk}; } keys %vers; } @VALUES );
}