From 8c4e52a10153c518c8f3a545bef937ad07154b71 Mon Sep 17 00:00:00 2001 From: Martin Mares <mj@ucw.cz> Date: Thu, 2 Apr 2015 18:29:23 +0200 Subject: [PATCH] Access: Use "perf" instead of "oprofile" for profiling --- access/README | 3 +- access/graph.pl | 146 ++++++++++++++++++++---------------------------- 2 files changed, 64 insertions(+), 85 deletions(-) diff --git a/access/README b/access/README index 1c15d40..d748127 100644 --- a/access/README +++ b/access/README @@ -5,7 +5,8 @@ Jak pouzivat merici utilitku: (2) make -- tim vyzkousite, ze testovaci program jde zkompilovat. (3) ./graph.pl -- ridici skript, ktery testovaci program kompiluje -s ruznymi parametry, spousti ho a loguje vysledky. Take vytvari soubor +s ruznymi parametry, spousti ho a loguje vysledky v adresari "out" +(nebo libovolnem jinem, ktery mu zadate). Take vytvari soubor plot.gp pro GnuPlot. Na zacatku skriptu jsou nastavitelne parametry. (4) gnuplot plot.gp -- vyrobi a-*.png s grafy. diff --git a/access/graph.pl b/access/graph.pl index c8b2c08..8e122aa 100755 --- a/access/graph.pl +++ b/access/graph.pl @@ -6,33 +6,29 @@ my @item_sizes = (16,64,128,1024,4096); # Sizes of items my @randomized = (0,1); # Try randomized accesses? my @modify = (0,1); # Try read-write accesses? my $measure_ms = 1000; # Duration of measurement in ms -my $array = 0; # Items are accessed as an array instead of a list +my $array = 1; # Items are accessed as an array instead of a list my $huge = 0; # Use huge pages (hugetlbfs required) # If you want to include profiling information (cache misses etc.) in detailed -# graphs, you can ask the measurement utility to call oprofile. Please note that -# this requires root privileges and that you need to adjust the profiling events -# below to match your CPU (see `opcontrol --list-events'). Also, it might be -# necessary to increase $measure_ms in order to gather enough samples. -my %oprofile = ( -## AMD K8 Family 10 (4 counters) -# "DATA_CACHE_MISSES" => 10000, -# "L2_CACHE_MISS" => 10000, -# "DRAM_ACCESSES" => "10000:0xff", -# "L1_DTLB_AND_L2_DTLB_MISS" => 10000, -## "L1_DTLB_MISS_AND_L2_DTLB_HIT" => 10000, -## Intel Core (2 counters) -# "DTLB_MISSES" => 10000, -# "BUS_TRAN_MEM" => 10000, # Memory transactions on the bus -## Intel i7 (4 counters) -# "DTLB_MISSES" => 10000, -# "MEM_UNCORE_RETIRED" => "10000:0x30", # Memory transactions (local DRAM) -# "L2_RQSTS" => "10000:0xaa", # L2 misses -# "L1D" => "10000", # Lines brought to L1 -## "LLC_MISSES" => 10000, # Last-Level Cache (L3) misses -## "OFFCORE_REQUESTS" => "10000:0x08", # Off-core read requests +# graphs, ask for specific events here. You must have the "perf" utility installed, +# with a recent enough kernel. The set of available events depends on the exact +# cpu type, see "man perf-list" for further details. Also, it might be necessary +# to increase $measure_ms in order to gather enough samples. + +# Events and their names +my %perf_events = ( + # 'cpu-cycles' => 'CPU cycles', + # 'cache-misses' => 'Cache misses', + 'L1-dcache-load-misses' => 'L1D load misses', + 'LLC-loads' => 'LLC loads', + # 'dTLB-load-misses' => 'DTLB load misses', + 'mem-loads' => 'Memory loads', + 'r412E' => 'LLC misses', ); -my @oprofile_events = sort keys %oprofile; +my @perf_events = sort keys %perf_events; + +# How to call "perf" +my $perf_tool = "perf_3.16"; # Use --graph to disable all calculations and just re-use the log files my $graph_only = 0; @@ -41,14 +37,13 @@ if (@ARGV && $ARGV[0] eq "--graph") { shift @ARGV; } -# Use ./graph.pl <directory> to store results in a separate directory -if (defined $ARGV[0]) { - my $dir = $ARGV[0]; - -d $dir or mkdir $dir or die "Cannot create $dir"; - chdir $dir or die; - for my $f ("access.c", "Makefile", "parse_op") { - -f $f or symlink "../$f", $f or die; - } +# Use ./graph.pl <directory> to store results in a given directory. +# Otherwise, "out" is used. +my $dir = $ARGV[0] // 'out'; +-d $dir or mkdir $dir or die "Cannot create $dir"; +chdir $dir or die; +for my $f ("access.c", "Makefile", "parse_op") { + -f $f or symlink "../$f", $f or die; } ### Get machine name and the description of caches ### @@ -61,6 +56,7 @@ $machine = "($machine)"; our $c; my @caches = (); +my $prev_L = 0; for $c (</sys/devices/system/cpu/cpu0/cache/index*>) { sub rd($) { my ($f) = @_; @@ -76,65 +72,49 @@ for $c (</sys/devices/system/cpu/cpu0/cache/index*>) { my $s = rd("size"); my $t = rd("type"); my $L = rd("coherency_line_size"); + my $line = ($L == $prev_L) ? "" : " (${L}B line)"; $t =~ s/(.).*/$1/; $t =~ s/U//; $t eq "I" and next; - push @caches, "L$l$t $s/$L $w-way"; + push @caches, "L$l$t $s $w-way$line"; + $prev_L = $L; } $machine .= " [" . join(", ", @caches) . "]"; -### Interface to oprofile ### - -sub oprof(@) { - system "opcontrol", @_ and die "opcontrol " . join(" ", @_) . " failed ($?)"; -} +### Profiling ### -sub profile_setup() { - %oprofile or return; - oprof("--shutdown"); - oprof("--setup", map { "--event=" . $_ . ":" . $oprofile{$_} } sort keys %oprofile); - oprof("--start"); -} +my @profile = (); -sub profile_shutdown() { - %oprofile or return; - oprof("--shutdown"); - oprof("--reset"); -} +sub run_profiled(@) { + if (!@perf_events) { + my $cmd = join(" ", @_); + my $out = `$cmd`; + die if $?; + return $out; + } -sub profile_start() { - %oprofile or return; - oprof("--reset"); -} + my @cmd = ($perf_tool, qw(stat -o perf.out -x :), (map +( '-e', $_ ), @perf_events), @_); + my $cmd = join(" ", @cmd); + my $out = `$cmd`; + die if $?; -sub profile_stop() { - %oprofile or return (); - oprof("--dump"); - if (system "opreport", "./access", "--xml", "--output-file=op.xml") { - print STDERR "opreport failed: $?, assuming that no samples were gathered\n"; - return (); - } - open P, "./parse_op op.xml |" or die "parse_op failed"; - my %evt = (); - while (<P>) { + @profile = (); + open my $perf, '<', 'perf.out' or die; + while (<$perf>) { chomp; - my ($k, $v) = split /\s+/; - $evt{$k} = $v; + next if /^$/ || /^#/; + my @fields = split /:/; + push @profile, $fields[0]; } - close P; - my @prof = (); - for my $e (@oprofile_events) { - my $mul = $oprofile{$e}; - $mul =~ s/:.*//; - push @prof, ($evt{$e} || 0) * 1. * $mul; - } - return @prof; + close $perf; + @profile == @perf_events or die "Perf returned wrong number of values"; + + return $out; } ### Measure and create logs ### if (!$graph_only) { - profile_setup(); for my $r (@randomized) { for my $m (@modify) { for my $s (@item_sizes) { @@ -150,14 +130,13 @@ if (!$graph_only) { `make access F="$o"`; die if $?; open D, ">$f" or die; for (my $mem=$min_mem; $mem<=$max_mem; $mem*=2) { - profile_start(); - my $out = `./access $mem`; die if $?; + print "$mem ($o): "; + my $out = run_profiled('./access', $mem); my ($t, $attempts) = split /\s+/, $out; - my @prof = profile_stop(); - @prof = map { $_ / $attempts } @prof; + my @prof = map { $_ / $attempts } @profile; chomp $t; - print "$mem ($o): ", join(" ", $t, - map { $oprofile_events[$_] . ":" . sprintf("%.3f", $prof[$_]) } 0..$#prof), + print join(" ", $t, + map { $perf_events[$_] . ":" . sprintf("%.3f", $prof[$_]) } 0..$#perf_events), "\n"; print D join("\t", $mem, $t, @prof), "\n"; } @@ -165,7 +144,6 @@ if (!$graph_only) { } } } - profile_shutdown(); } ### The plots ### @@ -179,7 +157,7 @@ set key left set xlabel "data set size [KB]" set ylabel "time per access [ns]" set grid -set yrange [1:120] # Change if necessary +set yrange [1:50] # Change if necessary EOF @@ -212,7 +190,7 @@ for my $s (@item_sizes) { } # For each item size and access type, show full profiling information -if (%oprofile) { +if (@perf_events) { for my $s (@item_sizes) { for my $r (@randomized) { for my $m (@modify) { @@ -224,8 +202,8 @@ if (%oprofile) { print GP "set y2range [0:3]\n"; my @a = (); push @a, "'a-$r-$m-$s.log' title 'Time' with linespoints"; - for my $i (0..$#oprofile_events) { - push @a, "'' using 1:" . ($i+3) . " axes x1y2 title '" . $oprofile_events[$i] . "' with linespoints"; + for my $i (0..$#perf_events) { + push @a, "'' using 1:" . ($i+3) . " axes x1y2 title '" . $perf_events{$perf_events[$i]} . "' with linespoints"; } print GP "plot ", join(", ", @a), "\n\n"; } -- GitLab