Commit 8c4e52a1 authored by Martin Mareš's avatar Martin Mareš

Access: Use "perf" instead of "oprofile" for profiling

parent 5922501a
......@@ -5,7 +5,8 @@ Jak pouzivat merici utilitku:
(2) make -- tim vyzkousite, ze testovaci program jde zkompilovat.
(3) ./graph.pl -- ridici skript, ktery testovaci program kompiluje
s ruznymi parametry, spousti ho a loguje vysledky. Take vytvari soubor
s ruznymi parametry, spousti ho a loguje vysledky v adresari "out"
(nebo libovolnem jinem, ktery mu zadate). Take vytvari soubor
plot.gp pro GnuPlot. Na zacatku skriptu jsou nastavitelne parametry.
(4) gnuplot plot.gp -- vyrobi a-*.png s grafy.
......@@ -6,33 +6,29 @@ my @item_sizes = (16,64,128,1024,4096); # Sizes of items
my @randomized = (0,1); # Try randomized accesses?
my @modify = (0,1); # Try read-write accesses?
my $measure_ms = 1000; # Duration of measurement in ms
my $array = 0; # Items are accessed as an array instead of a list
my $array = 1; # Items are accessed as an array instead of a list
my $huge = 0; # Use huge pages (hugetlbfs required)
# If you want to include profiling information (cache misses etc.) in detailed
# graphs, you can ask the measurement utility to call oprofile. Please note that
# this requires root privileges and that you need to adjust the profiling events
# below to match your CPU (see `opcontrol --list-events'). Also, it might be
# necessary to increase $measure_ms in order to gather enough samples.
my %oprofile = (
## AMD K8 Family 10 (4 counters)
# "DATA_CACHE_MISSES" => 10000,
# "L2_CACHE_MISS" => 10000,
# "DRAM_ACCESSES" => "10000:0xff",
# "L1_DTLB_AND_L2_DTLB_MISS" => 10000,
## "L1_DTLB_MISS_AND_L2_DTLB_HIT" => 10000,
## Intel Core (2 counters)
# "DTLB_MISSES" => 10000,
# "BUS_TRAN_MEM" => 10000, # Memory transactions on the bus
## Intel i7 (4 counters)
# "DTLB_MISSES" => 10000,
# "MEM_UNCORE_RETIRED" => "10000:0x30", # Memory transactions (local DRAM)
# "L2_RQSTS" => "10000:0xaa", # L2 misses
# "L1D" => "10000", # Lines brought to L1
## "LLC_MISSES" => 10000, # Last-Level Cache (L3) misses
## "OFFCORE_REQUESTS" => "10000:0x08", # Off-core read requests
# graphs, ask for specific events here. You must have the "perf" utility installed,
# with a recent enough kernel. The set of available events depends on the exact
# cpu type, see "man perf-list" for further details. Also, it might be necessary
# to increase $measure_ms in order to gather enough samples.
# Events and their names
my %perf_events = (
# 'cpu-cycles' => 'CPU cycles',
# 'cache-misses' => 'Cache misses',
'L1-dcache-load-misses' => 'L1D load misses',
'LLC-loads' => 'LLC loads',
# 'dTLB-load-misses' => 'DTLB load misses',
'mem-loads' => 'Memory loads',
'r412E' => 'LLC misses',
);
my @oprofile_events = sort keys %oprofile;
my @perf_events = sort keys %perf_events;
# How to call "perf"
my $perf_tool = "perf_3.16";
# Use --graph to disable all calculations and just re-use the log files
my $graph_only = 0;
......@@ -41,14 +37,13 @@ if (@ARGV && $ARGV[0] eq "--graph") {
shift @ARGV;
}
# Use ./graph.pl <directory> to store results in a separate directory
if (defined $ARGV[0]) {
my $dir = $ARGV[0];
-d $dir or mkdir $dir or die "Cannot create $dir";
chdir $dir or die;
for my $f ("access.c", "Makefile", "parse_op") {
-f $f or symlink "../$f", $f or die;
}
# Use ./graph.pl <directory> to store results in a given directory.
# Otherwise, "out" is used.
my $dir = $ARGV[0] // 'out';
-d $dir or mkdir $dir or die "Cannot create $dir";
chdir $dir or die;
for my $f ("access.c", "Makefile", "parse_op") {
-f $f or symlink "../$f", $f or die;
}
### Get machine name and the description of caches ###
......@@ -61,6 +56,7 @@ $machine = "($machine)";
our $c;
my @caches = ();
my $prev_L = 0;
for $c (</sys/devices/system/cpu/cpu0/cache/index*>) {
sub rd($) {
my ($f) = @_;
......@@ -76,65 +72,49 @@ for $c (</sys/devices/system/cpu/cpu0/cache/index*>) {
my $s = rd("size");
my $t = rd("type");
my $L = rd("coherency_line_size");
my $line = ($L == $prev_L) ? "" : " (${L}B line)";
$t =~ s/(.).*/$1/;
$t =~ s/U//;
$t eq "I" and next;
push @caches, "L$l$t $s/$L $w-way";
push @caches, "L$l$t $s $w-way$line";
$prev_L = $L;
}
$machine .= " [" . join(", ", @caches) . "]";
### Interface to oprofile ###
sub oprof(@) {
system "opcontrol", @_ and die "opcontrol " . join(" ", @_) . " failed ($?)";
}
### Profiling ###
sub profile_setup() {
%oprofile or return;
oprof("--shutdown");
oprof("--setup", map { "--event=" . $_ . ":" . $oprofile{$_} } sort keys %oprofile);
oprof("--start");
}
my @profile = ();
sub profile_shutdown() {
%oprofile or return;
oprof("--shutdown");
oprof("--reset");
}
sub run_profiled(@) {
if (!@perf_events) {
my $cmd = join(" ", @_);
my $out = `$cmd`;
die if $?;
return $out;
}
sub profile_start() {
%oprofile or return;
oprof("--reset");
}
my @cmd = ($perf_tool, qw(stat -o perf.out -x :), (map +( '-e', $_ ), @perf_events), @_);
my $cmd = join(" ", @cmd);
my $out = `$cmd`;
die if $?;
sub profile_stop() {
%oprofile or return ();
oprof("--dump");
if (system "opreport", "./access", "--xml", "--output-file=op.xml") {
print STDERR "opreport failed: $?, assuming that no samples were gathered\n";
return ();
}
open P, "./parse_op op.xml |" or die "parse_op failed";
my %evt = ();
while (<P>) {
@profile = ();
open my $perf, '<', 'perf.out' or die;
while (<$perf>) {
chomp;
my ($k, $v) = split /\s+/;
$evt{$k} = $v;
next if /^$/ || /^#/;
my @fields = split /:/;
push @profile, $fields[0];
}
close P;
my @prof = ();
for my $e (@oprofile_events) {
my $mul = $oprofile{$e};
$mul =~ s/:.*//;
push @prof, ($evt{$e} || 0) * 1. * $mul;
}
return @prof;
close $perf;
@profile == @perf_events or die "Perf returned wrong number of values";
return $out;
}
### Measure and create logs ###
if (!$graph_only) {
profile_setup();
for my $r (@randomized) {
for my $m (@modify) {
for my $s (@item_sizes) {
......@@ -150,14 +130,13 @@ if (!$graph_only) {
`make access F="$o"`; die if $?;
open D, ">$f" or die;
for (my $mem=$min_mem; $mem<=$max_mem; $mem*=2) {
profile_start();
my $out = `./access $mem`; die if $?;
print "$mem ($o): ";
my $out = run_profiled('./access', $mem);
my ($t, $attempts) = split /\s+/, $out;
my @prof = profile_stop();
@prof = map { $_ / $attempts } @prof;
my @prof = map { $_ / $attempts } @profile;
chomp $t;
print "$mem ($o): ", join(" ", $t,
map { $oprofile_events[$_] . ":" . sprintf("%.3f", $prof[$_]) } 0..$#prof),
print join(" ", $t,
map { $perf_events[$_] . ":" . sprintf("%.3f", $prof[$_]) } 0..$#perf_events),
"\n";
print D join("\t", $mem, $t, @prof), "\n";
}
......@@ -165,7 +144,6 @@ if (!$graph_only) {
}
}
}
profile_shutdown();
}
### The plots ###
......@@ -179,7 +157,7 @@ set key left
set xlabel "data set size [KB]"
set ylabel "time per access [ns]"
set grid
set yrange [1:120] # Change if necessary
set yrange [1:50] # Change if necessary
EOF
......@@ -212,7 +190,7 @@ for my $s (@item_sizes) {
}
# For each item size and access type, show full profiling information
if (%oprofile) {
if (@perf_events) {
for my $s (@item_sizes) {
for my $r (@randomized) {
for my $m (@modify) {
......@@ -224,8 +202,8 @@ if (%oprofile) {
print GP "set y2range [0:3]\n";
my @a = ();
push @a, "'a-$r-$m-$s.log' title 'Time' with linespoints";
for my $i (0..$#oprofile_events) {
push @a, "'' using 1:" . ($i+3) . " axes x1y2 title '" . $oprofile_events[$i] . "' with linespoints";
for my $i (0..$#perf_events) {
push @a, "'' using 1:" . ($i+3) . " axes x1y2 title '" . $perf_events{$perf_events[$i]} . "' with linespoints";
}
print GP "plot ", join(", ", @a), "\n\n";
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment