From 8c4e52a10153c518c8f3a545bef937ad07154b71 Mon Sep 17 00:00:00 2001
From: Martin Mares <mj@ucw.cz>
Date: Thu, 2 Apr 2015 18:29:23 +0200
Subject: [PATCH] Access: Use "perf" instead of "oprofile" for profiling

---
 access/README   |   3 +-
 access/graph.pl | 146 ++++++++++++++++++++----------------------------
 2 files changed, 64 insertions(+), 85 deletions(-)

diff --git a/access/README b/access/README
index 1c15d40..d748127 100644
--- a/access/README
+++ b/access/README
@@ -5,7 +5,8 @@ Jak pouzivat merici utilitku:
 (2) make -- tim vyzkousite, ze testovaci program jde zkompilovat.
 
 (3) ./graph.pl -- ridici skript, ktery testovaci program kompiluje
-s ruznymi parametry, spousti ho a loguje vysledky. Take vytvari soubor
+s ruznymi parametry, spousti ho a loguje vysledky v adresari "out"
+(nebo libovolnem jinem, ktery mu zadate). Take vytvari soubor
 plot.gp pro GnuPlot. Na zacatku skriptu jsou nastavitelne parametry.
 
 (4) gnuplot plot.gp -- vyrobi a-*.png s grafy.
diff --git a/access/graph.pl b/access/graph.pl
index c8b2c08..8e122aa 100755
--- a/access/graph.pl
+++ b/access/graph.pl
@@ -6,33 +6,29 @@ my @item_sizes = (16,64,128,1024,4096);	# Sizes of items
 my @randomized = (0,1);	# Try randomized accesses?
 my @modify = (0,1);	# Try read-write accesses?
 my $measure_ms = 1000;	# Duration of measurement in ms
-my $array = 0;		# Items are accessed as an array instead of a list
+my $array = 1;		# Items are accessed as an array instead of a list
 my $huge = 0;		# Use huge pages (hugetlbfs required)
 
 # If you want to include profiling information (cache misses etc.) in detailed
-# graphs, you can ask the measurement utility to call oprofile. Please note that
-# this requires root privileges and that you need to adjust the profiling events
-# below to match your CPU (see `opcontrol --list-events'). Also, it might be
-# necessary to increase $measure_ms in order to gather enough samples.
-my %oprofile = (
-## AMD K8 Family 10 (4 counters)
-#	"DATA_CACHE_MISSES" => 10000,
-#	"L2_CACHE_MISS" => 10000,
-#	"DRAM_ACCESSES" => "10000:0xff",
-#	"L1_DTLB_AND_L2_DTLB_MISS" => 10000,
-##	"L1_DTLB_MISS_AND_L2_DTLB_HIT" => 10000,
-## Intel Core (2 counters)
-#	"DTLB_MISSES" => 10000,
-#	"BUS_TRAN_MEM" => 10000,		# Memory transactions on the bus
-## Intel i7 (4 counters)
-#	"DTLB_MISSES" => 10000,
-#	"MEM_UNCORE_RETIRED" => "10000:0x30",	# Memory transactions (local DRAM)
-#	"L2_RQSTS" => "10000:0xaa",		# L2 misses
-#	"L1D" => "10000",			# Lines brought to L1
-##	"LLC_MISSES" => 10000,			# Last-Level Cache (L3) misses
-##	"OFFCORE_REQUESTS" => "10000:0x08",	# Off-core read requests
+# graphs, ask for specific events here. You must have the "perf" utility installed,
+# with a recent enough kernel. The set of available events depends on the exact
+# cpu type, see "man perf-list" for further details. Also, it might be necessary
+# to increase $measure_ms in order to gather enough samples.
+
+# Events and their names
+my %perf_events = (
+	# 'cpu-cycles' => 'CPU cycles',
+	# 'cache-misses' => 'Cache misses',
+	'L1-dcache-load-misses' => 'L1D load misses',
+	'LLC-loads' => 'LLC loads',
+	# 'dTLB-load-misses' => 'DTLB load misses',
+	'mem-loads' => 'Memory loads',
+	'r412E' => 'LLC misses',
 );
-my @oprofile_events = sort keys %oprofile;
+my @perf_events = sort keys %perf_events;
+
+# How to call "perf"
+my $perf_tool = "perf_3.16";
 
 # Use --graph to disable all calculations and just re-use the log files
 my $graph_only = 0;
@@ -41,14 +37,13 @@ if (@ARGV && $ARGV[0] eq "--graph") {
 	shift @ARGV;
 }
 
-# Use ./graph.pl <directory> to store results in a separate directory
-if (defined $ARGV[0]) {
-	my $dir = $ARGV[0];
-	-d $dir or mkdir $dir or die "Cannot create $dir";
-	chdir $dir or die;
-	for my $f ("access.c", "Makefile", "parse_op") {
-		-f $f or symlink "../$f", $f or die;
-	}
+# Use ./graph.pl <directory> to store results in a given directory.
+# Otherwise, "out" is used.
+my $dir = $ARGV[0] // 'out';
+-d $dir or mkdir $dir or die "Cannot create $dir";
+chdir $dir or die;
+for my $f ("access.c", "Makefile", "parse_op") {
+	-f $f or symlink "../$f", $f or die;
 }
 
 ### Get machine name and the description of caches ###
@@ -61,6 +56,7 @@ $machine = "($machine)";
 
 our $c;
 my @caches = ();
+my $prev_L = 0;
 for $c (</sys/devices/system/cpu/cpu0/cache/index*>) {
 	sub rd($) {
 		my ($f) = @_;
@@ -76,65 +72,49 @@ for $c (</sys/devices/system/cpu/cpu0/cache/index*>) {
 	my $s = rd("size");
 	my $t = rd("type");
 	my $L = rd("coherency_line_size");
+	my $line = ($L == $prev_L) ? "" : " (${L}B line)";
 	$t =~ s/(.).*/$1/;
 	$t =~ s/U//;
 	$t eq "I" and next;
-	push @caches, "L$l$t $s/$L $w-way";
+	push @caches, "L$l$t $s $w-way$line";
+	$prev_L = $L;
 }
 $machine .= " [" . join(", ", @caches) . "]";
 
-### Interface to oprofile ###
-
-sub oprof(@) {
-	system "opcontrol", @_ and die "opcontrol " . join(" ", @_) . " failed ($?)";
-}
+### Profiling ###
 
-sub profile_setup() {
-	%oprofile or return;
-	oprof("--shutdown");
-	oprof("--setup", map { "--event=" . $_ . ":" . $oprofile{$_} } sort keys %oprofile);
-	oprof("--start");
-}
+my @profile = ();
 
-sub profile_shutdown() {
-	%oprofile or return;
-	oprof("--shutdown");
-	oprof("--reset");
-}
+sub run_profiled(@) {
+	if (!@perf_events) {
+		my $cmd = join(" ", @_);
+		my $out = `$cmd`;
+		die if $?;
+		return $out;
+	}
 
-sub profile_start() {
-	%oprofile or return;
-	oprof("--reset");
-}
+	my @cmd = ($perf_tool, qw(stat -o perf.out -x :), (map +( '-e', $_ ), @perf_events), @_);
+	my $cmd = join(" ", @cmd);
+	my $out = `$cmd`;
+	die if $?;
 
-sub profile_stop() {
-	%oprofile or return ();
-	oprof("--dump");
-	if (system "opreport", "./access", "--xml", "--output-file=op.xml") {
-		print STDERR "opreport failed: $?, assuming that no samples were gathered\n";
-		return ();
-	}
-	open P, "./parse_op op.xml |" or die "parse_op failed";
-	my %evt = ();
-	while (<P>) {
+	@profile = ();
+	open my $perf, '<', 'perf.out' or die;
+	while (<$perf>) {
 		chomp;
-		my ($k, $v) = split /\s+/;
-		$evt{$k} = $v;
+		next if /^$/ || /^#/;
+		my @fields = split /:/;
+		push @profile, $fields[0];
 	}
-	close P;
-	my @prof = ();
-	for my $e (@oprofile_events) {
-		my $mul = $oprofile{$e};
-		$mul =~ s/:.*//;
-		push @prof, ($evt{$e} || 0) * 1. * $mul;
-	}
-	return @prof;
+	close $perf;
+	@profile == @perf_events or die "Perf returned wrong number of values";
+
+	return $out;
 }
 
 ### Measure and create logs ###
 
 if (!$graph_only) {
-	profile_setup();
 	for my $r (@randomized) {
 		for my $m (@modify) {
 			for my $s (@item_sizes) {
@@ -150,14 +130,13 @@ if (!$graph_only) {
 				`make access F="$o"`; die if $?;
 				open D, ">$f" or die;
 				for (my $mem=$min_mem; $mem<=$max_mem; $mem*=2) {
-					profile_start();
-					my $out = `./access $mem`; die if $?;
+					print "$mem ($o): ";
+					my $out = run_profiled('./access', $mem);
 					my ($t, $attempts) = split /\s+/, $out;
-					my @prof = profile_stop();
-					@prof = map { $_ / $attempts } @prof;
+					my @prof = map { $_ / $attempts } @profile;
 					chomp $t;
-					print "$mem ($o): ", join(" ", $t,
-						map { $oprofile_events[$_] . ":" . sprintf("%.3f", $prof[$_]) } 0..$#prof),
+					print join(" ", $t,
+						map { $perf_events[$_] . ":" . sprintf("%.3f", $prof[$_]) } 0..$#perf_events),
 						"\n";
 					print D join("\t", $mem, $t, @prof), "\n";
 				}
@@ -165,7 +144,6 @@ if (!$graph_only) {
 			}
 		}
 	}
-	profile_shutdown();
 }
 
 ### The plots ###
@@ -179,7 +157,7 @@ set key left
 set xlabel "data set size [KB]"
 set ylabel "time per access [ns]"
 set grid
-set yrange [1:120]	# Change if necessary
+set yrange [1:50]	# Change if necessary
 
 EOF
 
@@ -212,7 +190,7 @@ for my $s (@item_sizes) {
 }
 
 # For each item size and access type, show full profiling information
-if (%oprofile) {
+if (@perf_events) {
 	for my $s (@item_sizes) {
 		for my $r (@randomized) {
 			for my $m (@modify) {
@@ -224,8 +202,8 @@ if (%oprofile) {
 				print GP "set y2range [0:3]\n";
 				my @a = ();
 				push @a, "'a-$r-$m-$s.log' title 'Time' with linespoints";
-				for my $i (0..$#oprofile_events) {
-					push @a, "'' using 1:" . ($i+3) . " axes x1y2 title '" . $oprofile_events[$i] . "' with linespoints";
+				for my $i (0..$#perf_events) {
+					push @a, "'' using 1:" . ($i+3) . " axes x1y2 title '" . $perf_events{$perf_events[$i]} . "' with linespoints";
 				}
 				print GP "plot ", join(", ", @a), "\n\n";
 			}
-- 
GitLab