Commit feea6da4 authored by Martin Mareš's avatar Martin Mareš

Import utilit z meho soukromeho repozitare

parent 10faecaf
Brum :)
Materialy k predmetu Algoritmy a jejich implementace
Komentare vitany na <aim@ucw.cz>.
CC=gcc
LD=gcc
CFLAGS=-O2 -Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes -Wundef -Wredundant-decls -std=gnu99
all: access
access: access.c
access: CFLAGS+=$(F)
F=-DSIZE=16 -DMS=1000
clean:
rm -f `find . -name "*~" -or -name "*.[oa]" -or -name "\#*\#" -or -name TAGS -or -name core -or -name .depend -or -name .#*`
rm -f access a-* plot.gp
Jak pouzivat merici utilitku:
(1) make clean -- smaze pracovni soubory z pripadnych predchozich behu.
(2) make -- tim vyzkousite, ze testovaci program jde zkompilovat.
(3) ./graph.pl -- ridici skript, ktery testovaci program kompiluje
s ruznymi parametry, spousti ho a loguje vysledky. Take vytvari soubor
plot.gp pro GnuPlot. Na zacatku skriptu jsou nastavitelne parametry.
(4) gnuplot plot.gp -- vyrobi a-*.png s grafy.
/*
* A simple memory access speed test
*
* Written by Martin Mares <mj@ucw.cz> and put into public domain.
*
* Compile-time switches:
* SIZE=xxx Size of a single item in bytes
* RANDOMIZE Define for random access, undef for sequential
* MODIFY Define for read-write accesses, undef for read-only
* MS Measurement period in ms
* ARRAY Access items as array (default: linked list)
* HUGE Allocate huge pages (hugetlbfs required)
*
* Output is in nanoseconds per access.
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <time.h>
struct item {
struct item *next;
uint32_t pad[(SIZE-sizeof(struct item *)) / 4];
};
static struct item *items;
static unsigned block_size, n;
#define TICKS_PER_SEC 1000000
static int64_t get_timer(void)
{
struct timeval t;
gettimeofday(&t, NULL);
return 1000000*t.tv_sec + t.tv_usec;
}
#ifdef HUGE
static void alloc_mem(void)
{
#define HUGE_PATH "huge/access"
#define HUGE_PAGE (2*1048576)
void *pp;
unlink(HUGE_PATH);
int fd = open(HUGE_PATH, O_RDWR | O_CREAT, 0644);
if (fd < 0)
{
fprintf(stderr, "Cannot open hugetlbfs at %s: %m\n", HUGE_PATH);
exit(1);
}
if ((pp = mmap(NULL, ((block_size+HUGE_PAGE-1)/HUGE_PAGE)*HUGE_PAGE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED)
{
fprintf(stderr, "Cannot mmap hugetlbfs: %m\n");
exit(1);
}
items = pp;
}
#else
static void alloc_mem(void)
{
void *pp;
if (posix_memalign(&pp, 4096, block_size) < 0)
{
fprintf(stderr, "Cannot allocate memory: %m\n");
exit(1);
}
items = pp;
}
#endif
static inline void eat(uint32_t x)
{
// Discard a value, but do not allow optimizing it out
asm volatile ("" : : "r" (x));
}
static inline void process_item(struct item *x)
{
#ifdef MODIFY
x->pad[0]++;
#else
eat(x->pad[0]);
#endif
}
#ifdef ARRAY
static void fill(void)
{
memset(items, 0, block_size);
}
static void loop(void)
{
#ifdef RANDOMIZE
if (n & (n-1))
{
fprintf(stderr, "Block size must be a power of two!\n");
exit(1);
}
unsigned i=0;
do
{
process_item(&items[i]);
i = (i+259309) & (n-1);
}
while (i);
#else
struct item *stop = items + n;
for (struct item *a = items; a < stop; a++)
process_item(a);
#endif
}
#else
static void fill(void)
{
memset(items, 0, block_size);
#ifdef RANDOMIZE
for (unsigned i=0; i<n; i++)
{
unsigned j=(i+259309) % n;
if (j)
items[i].next = &items[j];
}
#else
for (unsigned i=0; i<n-1; i++)
items[i].next = &items[i+1];
#endif
}
static void loop(void)
{
for (struct item *a = items; a; a=a->next)
process_item(a);
}
#endif
int main(int argc, char **argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: access <size-in-KB>\n");
return 1;
}
block_size = atoi(argv[1]) * 1024;
n = block_size / SIZE;
alloc_mem();
fill();
unsigned attempts = 1;
for (;;)
{
int64_t t0 = get_timer();
for (unsigned i=0; i<attempts; i++)
loop();
int64_t t1 = get_timer();
int64_t t = t1-t0;
if (t >= TICKS_PER_SEC/1000*MS)
{
printf("%.3f %f\n", (double)t/attempts/n/TICKS_PER_SEC*1e9, (double)(2*attempts-1)*n);
break;
}
attempts *= 2;
}
return 0;
}
#!/usr/bin/perl
my $min_mem = 8; # Minimum memory block size (KB)
my $max_mem = 65536; # Maximum memory block size (KB), must fit in physical memory
my @item_sizes = (16,64,128,1024,4096); # Sizes of items
my @randomized = (0,1); # Try randomized accesses?
my @modify = (0,1); # Try read-write accesses?
my $measure_ms = 1000; # Duration of measurement in ms
my $array = 0; # Items are accessed as an array instead of a list
my $huge = 0; # Use huge pages (hugetlbfs required)
# If you want to include profiling information (cache misses etc.) in detailed
# graphs, you can ask the measurement utility to call oprofile. Please note that
# this requires root privileges and that you need to adjust the profiling events
# below to match your CPU (see `opcontrol --list-events'). Also, it might be
# necessary to increase $measure_ms in order to gather enough samples.
my %oprofile = (
## AMD K8 Family 10 (4 counters)
# "DATA_CACHE_MISSES" => 10000,
# "L2_CACHE_MISS" => 10000,
# "DRAM_ACCESSES" => "10000:0xff",
# "L1_DTLB_AND_L2_DTLB_MISS" => 10000,
## "L1_DTLB_MISS_AND_L2_DTLB_HIT" => 10000,
## Intel Core (2 counters)
# "DTLB_MISSES" => 10000,
# "BUS_TRAN_MEM" => 10000, # Memory transactions on the bus
## Intel i7 (4 counters)
# "DTLB_MISSES" => 10000,
# "MEM_UNCORE_RETIRED" => "10000:0x30", # Memory transactions (local DRAM)
# "L2_RQSTS" => "10000:0xaa", # L2 misses
# "L1D" => "10000", # Lines brought to L1
## "LLC_MISSES" => 10000, # Last-Level Cache (L3) misses
## "OFFCORE_REQUESTS" => "10000:0x08", # Off-core read requests
);
my @oprofile_events = sort keys %oprofile;
# Use --graph to disable all calculations and just re-use the log files
my $graph_only = 0;
if (@ARGV && $ARGV[0] eq "--graph") {
$graph_only = 1;
shift @ARGV;
}
# Use ./graph.pl <directory> to store results in a separate directory
if (defined $ARGV[0]) {
my $dir = $ARGV[0];
-d $dir or mkdir $dir or die "Cannot create $dir";
chdir $dir or die;
for my $f ("access.c", "Makefile", "parse_op") {
-f $f or symlink "../$f", $f or die;
}
}
### Get machine name and the description of caches ###
my $machine = `hostname`;
chomp $machine;
$machine .= " Array" if $array;
$machine .= " HugePages" if $huge;
$machine = "($machine)";
our $c;
my @caches = ();
for $c (</sys/devices/system/cpu/cpu0/cache/index*>) {
sub rd($) {
my ($f) = @_;
open F, "$c/$f" or return "";
my $x = <F>;
close F;
chomp $x;
return $x;
}
my $l = rd("level");
my $w = rd("ways_of_associativity");
my $s = rd("size");
my $t = rd("type");
my $L = rd("coherency_line_size");
$t =~ s/(.).*/$1/;
$t =~ s/U//;
$t eq "I" and next;
push @caches, "L$l$t $s/$L $w-way";
}
$machine .= " [" . join(", ", @caches) . "]";
### Interface to oprofile ###
sub oprof(@) {
system "opcontrol", @_ and die "opcontrol " . join(" ", @_) . " failed ($?)";
}
sub profile_setup() {
%oprofile or return;
oprof("--shutdown");
oprof("--setup", map { "--event=" . $_ . ":" . $oprofile{$_} } sort keys %oprofile);
oprof("--start");
}
sub profile_shutdown() {
%oprofile or return;
oprof("--shutdown");
oprof("--reset");
}
sub profile_start() {
%oprofile or return;
oprof("--reset");
}
sub profile_stop() {
%oprofile or return ();
oprof("--dump");
if (system "opreport", "./access", "--xml", "--output-file=op.xml") {
print STDERR "opreport failed: $?, assuming that no samples were gathered\n";
return ();
}
open P, "./parse_op op.xml |" or die "parse_op failed";
my %evt = ();
while (<P>) {
chomp;
my ($k, $v) = split /\s+/;
$evt{$k} = $v;
}
close P;
my @prof = ();
for my $e (@oprofile_events) {
my $mul = $oprofile{$e};
$mul =~ s/:.*//;
push @prof, ($evt{$e} || 0) * 1. * $mul;
}
return @prof;
}
### Measure and create logs ###
if (!$graph_only) {
profile_setup();
for my $r (@randomized) {
for my $m (@modify) {
for my $s (@item_sizes) {
my $f = "a-$r-$m-$s.log";
next if -f $f;
my @o = ( "-DSIZE=$s", "-DMS=$measure_ms" );
push @o, "-DRANDOMIZE" if $r;
push @o, "-DMODIFY" if $m;
push @o, "-DARRAY" if $array;
push @o, "-DHUGE" if $huge;
my $o = join(" ", @o);
`rm -f access`; die if $?;
`make access F="$o"`; die if $?;
open D, ">$f" or die;
for (my $mem=$min_mem; $mem<=$max_mem; $mem*=2) {
profile_start();
my $out = `./access $mem`; die if $?;
my ($t, $attempts) = split /\s+/, $out;
my @prof = profile_stop();
@prof = map { $_ / $attempts } @prof;
chomp $t;
print "$mem ($o): ", join(" ", $t,
map { $oprofile_events[$_] . ":" . sprintf("%.3f", $prof[$_]) } 0..$#prof),
"\n";
print D join("\t", $mem, $t, @prof), "\n";
}
close D;
}
}
}
profile_shutdown();
}
### The plots ###
# Generic header
open GP, ">plot.gp" or die;
print GP <<EOF ;
set terminal png large size 1024,768
set logscale x 2
set key left
set xlabel "data set size [KB]"
set ylabel "time per access [ns]"
set grid
set yrange [1:120] # Change if necessary
EOF
# For each access type, show different sizes
for my $r (@randomized) {
for my $m (@modify) {
my @plots = ();
for my $s (@item_sizes) {
push @plots, "'a-$r-$m-$s.log' title '${s}B items' with linespoints";
}
print GP "set output 'a-$r-$m.png'\n";
my $title = ($r ? "Random" : "Sequential") . " " . ($m ? "Read-Write" : "Read-Only") . " Accesses $machine";
print GP "set title '$title'\n";
print GP "plot ", join(", ", @plots), "\n\n";
}
}
# For each item size, show all access types
for my $s (@item_sizes) {
print GP "set output 'a-size-$s.png'\n";
print GP "set title 'Item size $s $machine'\n";
my @a = ();
for my $r (@randomized) {
for my $m (@modify) {
my $t = ($r ? "Random" : "Sequential") . " " . ($m ? "R+W" : "R");
push @a, "'a-$r-$m-$s.log' title '$t' with linespoints";
}
}
print GP "plot ", join(", ", @a), "\n\n";
}
# For each item size and access type, show full profiling information
if (%oprofile) {
for my $s (@item_sizes) {
for my $r (@randomized) {
for my $m (@modify) {
print GP "set output 'a-prof-$s-$r-$m.png'\n";
my $title = "Profile for " . ($r ? "Random" : "Sequential") . " " . ($m ? "Read-Write" : "Read-Only") . " $s bytes $machine";
print GP "set title '$title'\n";
print GP "set y2label 'number of events'\n";
print GP "set y2tics\n";
print GP "set y2range [0:3]\n";
my @a = ();
push @a, "'a-$r-$m-$s.log' title 'Time' with linespoints";
for my $i (0..$#oprofile_events) {
push @a, "'' using 1:" . ($i+3) . " axes x1y2 title '" . $oprofile_events[$i] . "' with linespoints";
}
print GP "plot ", join(", ", @a), "\n\n";
}
}
}
}
close GP;
#!/usr/bin/perl
# Parse XML output of opreport
use strict;
use warnings;
use XML::Simple;
# use Data::Dumper;
my $x = XMLin($ARGV[0], ForceArray => 1);
# print STDERR Dumper($x);
my $events = $x->{'setup'}->[0]->{'eventsetup'};
my $classes = $x->{'classes'}->[0]->{'class'};
my %class_to_event;
for my $c (keys %$classes) {
my $d = $classes->{$c};
my $e = $events->{$d->{'event'}}->{'eventname'};
# print "$c: $e\n";
$class_to_event{$c} = $e;
}
my $bin = $x->{'binary'};
my $thisbin = (values %{$bin})[0];
for my $c (@{$thisbin->{'count'}}) {
# print Dumper($c);
if (ref($c) eq "HASH") {
print $class_to_event{$c->{'class'}}, " ", $c->{'content'}+0, "\n";
} else {
print $events->{0}->{'eventname'}, " ", $c+0, "\n";
}
}
#!/usr/bin/perl
# A simple script to show processors, cores and NUMA nodes
# (c) 2010 Martin Mares <mj@ucw.cz>
use strict;
use warnings;
my $debug = 0;
our $sys = "/sys/devices/system";
our $spath;
sub map_parse($) {
my %set = ();
my $i = 0;
my $fw = 32;
for (reverse split /,/, $_[0]) {
for my $j (0..($fw-1)) {
if ((hex $_) & (1<<$j)) {
$set{$i * $fw + $j} = 1;
}
}
} continue {
$i++;
}
return \%set;
}
sub set_format($) {
return join(",", sort { $a <=> $b } keys %{$_[0]});
}
sub set_intersect($$) {
my ($a, $b) = @_;
return { map { $b->{$_} ? ($_ => 1) : () } keys %$a };
}
sub set_empty($) {
return !keys %{$_[0]};
}
sub rd($$) {
open X, $spath . "/" . $_[0] or return $_[1];
my $x = <X>;
chomp $x;
close X;
return $x;
}
my %cpu = ();
my %cache = ();
my %levels = ();
my %cpu_ids = ();
for my $c (<$sys/cpu/cpu[0-9]*>) {
$spath = "$c/topology";
my ($id) = ($c =~ /cpu(\d+)$/) or die;
my $p = rd("physical_package_id", 0);
my $cr = rd("core_id", 0);
$cpu{$p}{$cr}{$id} = 1;
$cpu_ids{$id} = "$p/$cr";
print "CPU: $p/$cr/$id\n" if $debug;
for my $x (<$c/cache/index[0-9]*>) {
$spath = $x;
my $l = rd("level", "?");
my $m = map_parse(rd("shared_cpu_map", ""));
$m->{$id} = 1;
my $t = rd("type", "?");
$t =~ s/(.).*/$1/;
$l = "L$l$t";
print "\t$l cpus=", set_format($m), "\n" if $debug;
$levels{$l} = 1;
$cache{set_format($m)}{$l} = {
"level" => $l,
"line" => rd("coherency_line_size", "?"),
"size" => rd("size", "?"),
"ways" => rd("ways_of_associativity", "?"),
};
}
}
my %nodes = ();
my %node_mem = ();
for my $n (</$sys/node/node[0-9]*>) {
$spath = $n;
my ($id) = ($n =~ /node(\d+)$/) or die;
my $c = map_parse(rd("cpumap", ""));
my $mem = "?";
if (open my $x, "$spath/meminfo") {
while (<$x>) {
/^Node \d+ MemTotal:\s*(.*)/ and $mem = $1;
}
close $x;
}
print "NODE $id: ", set_format($c), " ($mem)\n" if $debug;
$nodes{$id} = $c;
$node_mem{$id} = $mem;
}
sub prcpu($$) {
my ($desc, $set) = @_;
if (scalar keys %$set == 0) {
return;
} elsif (scalar keys %$set == 1) {
printf "%2d", (keys %$set)[0];
} else {
print " ";
}
printf(" %-20s", $desc);
my $fset = set_format($set);
for my $l (sort keys %levels) {
my $c;
if ($c = $cache{$fset}{$l}) {