diff --git a/access/README b/access/README
index 1c15d40e616994eb001525452ca1b7a55d59206f..d74812738dfe2488d07cd82b8e860f64670cfe00 100644
--- a/access/README
+++ b/access/README
@@ -5,7 +5,8 @@ Jak pouzivat merici utilitku:
(2) make -- tim vyzkousite, ze testovaci program jde zkompilovat.
(3) ./graph.pl -- ridici skript, ktery testovaci program kompiluje
-s ruznymi parametry, spousti ho a loguje vysledky. Take vytvari soubor
+s ruznymi parametry, spousti ho a loguje vysledky v adresari "out"
+(nebo libovolnem jinem, ktery mu zadate). Take vytvari soubor
plot.gp pro GnuPlot. Na zacatku skriptu jsou nastavitelne parametry.
(4) gnuplot plot.gp -- vyrobi a-*.png s grafy.
diff --git a/access/graph.pl b/access/graph.pl
index c8b2c088f0cda3838f540e24bfedd0002334d1cd..8e122aadc430cd222fd011e1a6a7202fab6b51c1 100755
--- a/access/graph.pl
+++ b/access/graph.pl
@@ -6,33 +6,29 @@ my @item_sizes = (16,64,128,1024,4096); # Sizes of items
my @randomized = (0,1); # Try randomized accesses?
my @modify = (0,1); # Try read-write accesses?
my $measure_ms = 1000; # Duration of measurement in ms
-my $array = 0; # Items are accessed as an array instead of a list
+my $array = 1; # Items are accessed as an array instead of a list
my $huge = 0; # Use huge pages (hugetlbfs required)
# If you want to include profiling information (cache misses etc.) in detailed
-# graphs, you can ask the measurement utility to call oprofile. Please note that
-# this requires root privileges and that you need to adjust the profiling events
-# below to match your CPU (see `opcontrol --list-events'). Also, it might be
-# necessary to increase $measure_ms in order to gather enough samples.
-my %oprofile = (
-## AMD K8 Family 10 (4 counters)
-# "DATA_CACHE_MISSES" => 10000,
-# "L2_CACHE_MISS" => 10000,
-# "DRAM_ACCESSES" => "10000:0xff",
-# "L1_DTLB_AND_L2_DTLB_MISS" => 10000,
-## "L1_DTLB_MISS_AND_L2_DTLB_HIT" => 10000,
-## Intel Core (2 counters)
-# "DTLB_MISSES" => 10000,
-# "BUS_TRAN_MEM" => 10000, # Memory transactions on the bus
-## Intel i7 (4 counters)
-# "DTLB_MISSES" => 10000,
-# "MEM_UNCORE_RETIRED" => "10000:0x30", # Memory transactions (local DRAM)
-# "L2_RQSTS" => "10000:0xaa", # L2 misses
-# "L1D" => "10000", # Lines brought to L1
-## "LLC_MISSES" => 10000, # Last-Level Cache (L3) misses
-## "OFFCORE_REQUESTS" => "10000:0x08", # Off-core read requests
+# graphs, ask for specific events here. You must have the "perf" utility installed,
+# with a recent enough kernel. The set of available events depends on the exact
+# cpu type, see "man perf-list" for further details. Also, it might be necessary
+# to increase $measure_ms in order to gather enough samples.
+
+# Events and their names
+my %perf_events = (
+ # 'cpu-cycles' => 'CPU cycles',
+ # 'cache-misses' => 'Cache misses',
+ 'L1-dcache-load-misses' => 'L1D load misses',
+ 'LLC-loads' => 'LLC loads',
+ # 'dTLB-load-misses' => 'DTLB load misses',
+ 'mem-loads' => 'Memory loads',
+ 'r412E' => 'LLC misses',
);
-my @oprofile_events = sort keys %oprofile;
+my @perf_events = sort keys %perf_events;
+
+# How to call "perf"
+my $perf_tool = "perf_3.16";
# Use --graph to disable all calculations and just re-use the log files
my $graph_only = 0;
@@ -41,14 +37,13 @@ if (@ARGV && $ARGV[0] eq "--graph") {
shift @ARGV;
}
-# Use ./graph.pl <directory> to store results in a separate directory
-if (defined $ARGV[0]) {
- my $dir = $ARGV[0];
- -d $dir or mkdir $dir or die "Cannot create $dir";
- chdir $dir or die;
- for my $f ("access.c", "Makefile", "parse_op") {
- -f $f or symlink "../$f", $f or die;
- }
+# Use ./graph.pl <directory> to store results in a given directory.
+# Otherwise, "out" is used.
+my $dir = $ARGV[0] // 'out';
+-d $dir or mkdir $dir or die "Cannot create $dir";
+chdir $dir or die;
+for my $f ("access.c", "Makefile", "parse_op") {
+ -f $f or symlink "../$f", $f or die;
}
### Get machine name and the description of caches ###
@@ -61,6 +56,7 @@ $machine = "($machine)";
our $c;
my @caches = ();
+my $prev_L = 0;
for $c (</sys/devices/system/cpu/cpu0/cache/index*>) {
sub rd($) {
my ($f) = @_;
@@ -76,65 +72,49 @@ for $c (</sys/devices/system/cpu/cpu0/cache/index*>) {
my $s = rd("size");
my $t = rd("type");
my $L = rd("coherency_line_size");
+ my $line = ($L == $prev_L) ? "" : " (${L}B line)";
$t =~ s/(.).*/$1/;
$t =~ s/U//;
$t eq "I" and next;
- push @caches, "L$l$t $s/$L $w-way";
+ push @caches, "L$l$t $s $w-way$line";
+ $prev_L = $L;
}
$machine .= " [" . join(", ", @caches) . "]";
-### Interface to oprofile ###
-
-sub oprof(@) {
- system "opcontrol", @_ and die "opcontrol " . join(" ", @_) . " failed ($?)";
-}
+### Profiling ###
-sub profile_setup() {
- %oprofile or return;
- oprof("--shutdown");
- oprof("--setup", map { "--event=" . $_ . ":" . $oprofile{$_} } sort keys %oprofile);
- oprof("--start");
-}
+my @profile = ();
-sub profile_shutdown() {
- %oprofile or return;
- oprof("--shutdown");
- oprof("--reset");
-}
+sub run_profiled(@) {
+ if (!@perf_events) {
+ my $cmd = join(" ", @_);
+ my $out = `$cmd`;
+ die if $?;
+ return $out;
+ }
-sub profile_start() {
- %oprofile or return;
- oprof("--reset");
-}
+ my @cmd = ($perf_tool, qw(stat -o perf.out -x :), (map +( '-e', $_ ), @perf_events), @_);
+ my $cmd = join(" ", @cmd);
+ my $out = `$cmd`;
+ die if $?;
-sub profile_stop() {
- %oprofile or return ();
- oprof("--dump");
- if (system "opreport", "./access", "--xml", "--output-file=op.xml") {
- print STDERR "opreport failed: $?, assuming that no samples were gathered\n";
- return ();
- }
- open P, "./parse_op op.xml |" or die "parse_op failed";
- my %evt = ();
- while (<P>) {
+ @profile = ();
+ open my $perf, '<', 'perf.out' or die;
+ while (<$perf>) {
chomp;
- my ($k, $v) = split /\s+/;
- $evt{$k} = $v;
+ next if /^$/ || /^#/;
+ my @fields = split /:/;
+ push @profile, $fields[0];
}
- close P;
- my @prof = ();
- for my $e (@oprofile_events) {
- my $mul = $oprofile{$e};
- $mul =~ s/:.*//;
- push @prof, ($evt{$e} || 0) * 1. * $mul;
- }
- return @prof;
+ close $perf;
+ @profile == @perf_events or die "Perf returned wrong number of values";
+
+ return $out;
}
### Measure and create logs ###
if (!$graph_only) {
- profile_setup();
for my $r (@randomized) {
for my $m (@modify) {
for my $s (@item_sizes) {
@@ -150,14 +130,13 @@ if (!$graph_only) {
`make access F="$o"`; die if $?;
open D, ">$f" or die;
for (my $mem=$min_mem; $mem<=$max_mem; $mem*=2) {
- profile_start();
- my $out = `./access $mem`; die if $?;
+ print "$mem ($o): ";
+ my $out = run_profiled('./access', $mem);
my ($t, $attempts) = split /\s+/, $out;
- my @prof = profile_stop();
- @prof = map { $_ / $attempts } @prof;
+ my @prof = map { $_ / $attempts } @profile;
chomp $t;
- print "$mem ($o): ", join(" ", $t,
- map { $oprofile_events[$_] . ":" . sprintf("%.3f", $prof[$_]) } 0..$#prof),
+ print join(" ", $t,
+ map { $perf_events[$_] . ":" . sprintf("%.3f", $prof[$_]) } 0..$#perf_events),
"\n";
print D join("\t", $mem, $t, @prof), "\n";
}
@@ -165,7 +144,6 @@ if (!$graph_only) {
}
}
}
- profile_shutdown();
}
### The plots ###
@@ -179,7 +157,7 @@ set key left
set xlabel "data set size [KB]"
set ylabel "time per access [ns]"
set grid
-set yrange [1:120] # Change if necessary
+set yrange [1:50] # Change if necessary
EOF
@@ -212,7 +190,7 @@ for my $s (@item_sizes) {
}
# For each item size and access type, show full profiling information
-if (%oprofile) {
+if (@perf_events) {
for my $s (@item_sizes) {
for my $r (@randomized) {
for my $m (@modify) {
@@ -224,8 +202,8 @@ if (%oprofile) {
print GP "set y2range [0:3]\n";
my @a = ();
push @a, "'a-$r-$m-$s.log' title 'Time' with linespoints";
- for my $i (0..$#oprofile_events) {
- push @a, "'' using 1:" . ($i+3) . " axes x1y2 title '" . $oprofile_events[$i] . "' with linespoints";
+ for my $i (0..$#perf_events) {
+ push @a, "'' using 1:" . ($i+3) . " axes x1y2 title '" . $perf_events{$perf_events[$i]} . "' with linespoints";
}
print GP "plot ", join(", ", @a), "\n\n";
}