Skip to content
Snippets Groups Projects
Commit feea6da4 authored by Martin Mareš's avatar Martin Mareš
Browse files

Import utilit z meho soukromeho repozitare

parent 10faecaf
No related branches found
No related tags found
No related merge requests found
Showing
with 446 additions and 1 deletion
Brum :)
Materialy k predmetu Algoritmy a jejich implementace
Komentare vitany na <aim@ucw.cz>.
CC=gcc
LD=gcc
CFLAGS=-O2 -Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes -Wundef -Wredundant-decls -std=gnu99
all: access
access: access.c
access: CFLAGS+=$(F)
F=-DSIZE=16 -DMS=1000
clean:
rm -f `find . -name "*~" -or -name "*.[oa]" -or -name "\#*\#" -or -name TAGS -or -name core -or -name .depend -or -name .#*`
rm -f access a-* plot.gp
Jak pouzivat merici utilitku:
(1) make clean -- smaze pracovni soubory z pripadnych predchozich behu.
(2) make -- tim vyzkousite, ze testovaci program jde zkompilovat.
(3) ./graph.pl -- ridici skript, ktery testovaci program kompiluje
s ruznymi parametry, spousti ho a loguje vysledky. Take vytvari soubor
plot.gp pro GnuPlot. Na zacatku skriptu jsou nastavitelne parametry.
(4) gnuplot plot.gp -- vyrobi a-*.png s grafy.
/*
* A simple memory access speed test
*
* Written by Martin Mares <mj@ucw.cz> and put into public domain.
*
* Compile-time switches:
* SIZE=xxx Size of a single item in bytes
* RANDOMIZE Define for random access, undef for sequential
* MODIFY Define for read-write accesses, undef for read-only
* MS Measurement period in ms
* ARRAY Access items as array (default: linked list)
* HUGE Allocate huge pages (hugetlbfs required)
*
* Output is in nanoseconds per access.
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <time.h>
struct item {
struct item *next;
uint32_t pad[(SIZE-sizeof(struct item *)) / 4];
};
static struct item *items;
static unsigned block_size, n;
#define TICKS_PER_SEC 1000000
static int64_t get_timer(void)
{
struct timeval t;
gettimeofday(&t, NULL);
return 1000000*t.tv_sec + t.tv_usec;
}
#ifdef HUGE
static void alloc_mem(void)
{
#define HUGE_PATH "huge/access"
#define HUGE_PAGE (2*1048576)
void *pp;
unlink(HUGE_PATH);
int fd = open(HUGE_PATH, O_RDWR | O_CREAT, 0644);
if (fd < 0)
{
fprintf(stderr, "Cannot open hugetlbfs at %s: %m\n", HUGE_PATH);
exit(1);
}
if ((pp = mmap(NULL, ((block_size+HUGE_PAGE-1)/HUGE_PAGE)*HUGE_PAGE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED)
{
fprintf(stderr, "Cannot mmap hugetlbfs: %m\n");
exit(1);
}
items = pp;
}
#else
static void alloc_mem(void)
{
void *pp;
if (posix_memalign(&pp, 4096, block_size) < 0)
{
fprintf(stderr, "Cannot allocate memory: %m\n");
exit(1);
}
items = pp;
}
#endif
static inline void eat(uint32_t x)
{
// Discard a value, but do not allow optimizing it out
asm volatile ("" : : "r" (x));
}
static inline void process_item(struct item *x)
{
#ifdef MODIFY
x->pad[0]++;
#else
eat(x->pad[0]);
#endif
}
#ifdef ARRAY
static void fill(void)
{
memset(items, 0, block_size);
}
static void loop(void)
{
#ifdef RANDOMIZE
if (n & (n-1))
{
fprintf(stderr, "Block size must be a power of two!\n");
exit(1);
}
unsigned i=0;
do
{
process_item(&items[i]);
i = (i+259309) & (n-1);
}
while (i);
#else
struct item *stop = items + n;
for (struct item *a = items; a < stop; a++)
process_item(a);
#endif
}
#else
static void fill(void)
{
memset(items, 0, block_size);
#ifdef RANDOMIZE
for (unsigned i=0; i<n; i++)
{
unsigned j=(i+259309) % n;
if (j)
items[i].next = &items[j];
}
#else
for (unsigned i=0; i<n-1; i++)
items[i].next = &items[i+1];
#endif
}
static void loop(void)
{
for (struct item *a = items; a; a=a->next)
process_item(a);
}
#endif
int main(int argc, char **argv)
{
if (argc != 2)
{
fprintf(stderr, "Usage: access <size-in-KB>\n");
return 1;
}
block_size = atoi(argv[1]) * 1024;
n = block_size / SIZE;
alloc_mem();
fill();
unsigned attempts = 1;
for (;;)
{
int64_t t0 = get_timer();
for (unsigned i=0; i<attempts; i++)
loop();
int64_t t1 = get_timer();
int64_t t = t1-t0;
if (t >= TICKS_PER_SEC/1000*MS)
{
printf("%.3f %f\n", (double)t/attempts/n/TICKS_PER_SEC*1e9, (double)(2*attempts-1)*n);
break;
}
attempts *= 2;
}
return 0;
}
#!/usr/bin/perl
my $min_mem = 8; # Minimum memory block size (KB)
my $max_mem = 65536; # Maximum memory block size (KB), must fit in physical memory
my @item_sizes = (16,64,128,1024,4096); # Sizes of items
my @randomized = (0,1); # Try randomized accesses?
my @modify = (0,1); # Try read-write accesses?
my $measure_ms = 1000; # Duration of measurement in ms
my $array = 0; # Items are accessed as an array instead of a list
my $huge = 0; # Use huge pages (hugetlbfs required)
# If you want to include profiling information (cache misses etc.) in detailed
# graphs, you can ask the measurement utility to call oprofile. Please note that
# this requires root privileges and that you need to adjust the profiling events
# below to match your CPU (see `opcontrol --list-events'). Also, it might be
# necessary to increase $measure_ms in order to gather enough samples.
my %oprofile = (
## AMD K8 Family 10 (4 counters)
# "DATA_CACHE_MISSES" => 10000,
# "L2_CACHE_MISS" => 10000,
# "DRAM_ACCESSES" => "10000:0xff",
# "L1_DTLB_AND_L2_DTLB_MISS" => 10000,
## "L1_DTLB_MISS_AND_L2_DTLB_HIT" => 10000,
## Intel Core (2 counters)
# "DTLB_MISSES" => 10000,
# "BUS_TRAN_MEM" => 10000, # Memory transactions on the bus
## Intel i7 (4 counters)
# "DTLB_MISSES" => 10000,
# "MEM_UNCORE_RETIRED" => "10000:0x30", # Memory transactions (local DRAM)
# "L2_RQSTS" => "10000:0xaa", # L2 misses
# "L1D" => "10000", # Lines brought to L1
## "LLC_MISSES" => 10000, # Last-Level Cache (L3) misses
## "OFFCORE_REQUESTS" => "10000:0x08", # Off-core read requests
);
my @oprofile_events = sort keys %oprofile;
# Use --graph to disable all calculations and just re-use the log files
my $graph_only = 0;
if (@ARGV && $ARGV[0] eq "--graph") {
$graph_only = 1;
shift @ARGV;
}
# Use ./graph.pl <directory> to store results in a separate directory
if (defined $ARGV[0]) {
my $dir = $ARGV[0];
-d $dir or mkdir $dir or die "Cannot create $dir";
chdir $dir or die;
for my $f ("access.c", "Makefile", "parse_op") {
-f $f or symlink "../$f", $f or die;
}
}
### Get machine name and the description of caches ###
my $machine = `hostname`;
chomp $machine;
$machine .= " Array" if $array;
$machine .= " HugePages" if $huge;
$machine = "($machine)";
our $c;
my @caches = ();
for $c (</sys/devices/system/cpu/cpu0/cache/index*>) {
sub rd($) {
my ($f) = @_;
open F, "$c/$f" or return "";
my $x = <F>;
close F;
chomp $x;
return $x;
}
my $l = rd("level");
my $w = rd("ways_of_associativity");
my $s = rd("size");
my $t = rd("type");
my $L = rd("coherency_line_size");
$t =~ s/(.).*/$1/;
$t =~ s/U//;
$t eq "I" and next;
push @caches, "L$l$t $s/$L $w-way";
}
$machine .= " [" . join(", ", @caches) . "]";
### Interface to oprofile ###
sub oprof(@) {
system "opcontrol", @_ and die "opcontrol " . join(" ", @_) . " failed ($?)";
}
sub profile_setup() {
%oprofile or return;
oprof("--shutdown");
oprof("--setup", map { "--event=" . $_ . ":" . $oprofile{$_} } sort keys %oprofile);
oprof("--start");
}
sub profile_shutdown() {
%oprofile or return;
oprof("--shutdown");
oprof("--reset");
}
sub profile_start() {
%oprofile or return;
oprof("--reset");
}
sub profile_stop() {
%oprofile or return ();
oprof("--dump");
if (system "opreport", "./access", "--xml", "--output-file=op.xml") {
print STDERR "opreport failed: $?, assuming that no samples were gathered\n";
return ();
}
open P, "./parse_op op.xml |" or die "parse_op failed";
my %evt = ();
while (<P>) {
chomp;
my ($k, $v) = split /\s+/;
$evt{$k} = $v;
}
close P;
my @prof = ();
for my $e (@oprofile_events) {
my $mul = $oprofile{$e};
$mul =~ s/:.*//;
push @prof, ($evt{$e} || 0) * 1. * $mul;
}
return @prof;
}
### Measure and create logs ###
if (!$graph_only) {
profile_setup();
for my $r (@randomized) {
for my $m (@modify) {
for my $s (@item_sizes) {
my $f = "a-$r-$m-$s.log";
next if -f $f;
my @o = ( "-DSIZE=$s", "-DMS=$measure_ms" );
push @o, "-DRANDOMIZE" if $r;
push @o, "-DMODIFY" if $m;
push @o, "-DARRAY" if $array;
push @o, "-DHUGE" if $huge;
my $o = join(" ", @o);
`rm -f access`; die if $?;
`make access F="$o"`; die if $?;
open D, ">$f" or die;
for (my $mem=$min_mem; $mem<=$max_mem; $mem*=2) {
profile_start();
my $out = `./access $mem`; die if $?;
my ($t, $attempts) = split /\s+/, $out;
my @prof = profile_stop();
@prof = map { $_ / $attempts } @prof;
chomp $t;
print "$mem ($o): ", join(" ", $t,
map { $oprofile_events[$_] . ":" . sprintf("%.3f", $prof[$_]) } 0..$#prof),
"\n";
print D join("\t", $mem, $t, @prof), "\n";
}
close D;
}
}
}
profile_shutdown();
}
### The plots ###
# Generic header
open GP, ">plot.gp" or die;
print GP <<EOF ;
set terminal png large size 1024,768
set logscale x 2
set key left
set xlabel "data set size [KB]"
set ylabel "time per access [ns]"
set grid
set yrange [1:120] # Change if necessary
EOF
# For each access type, show different sizes
for my $r (@randomized) {
for my $m (@modify) {
my @plots = ();
for my $s (@item_sizes) {
push @plots, "'a-$r-$m-$s.log' title '${s}B items' with linespoints";
}
print GP "set output 'a-$r-$m.png'\n";
my $title = ($r ? "Random" : "Sequential") . " " . ($m ? "Read-Write" : "Read-Only") . " Accesses $machine";
print GP "set title '$title'\n";
print GP "plot ", join(", ", @plots), "\n\n";
}
}
# For each item size, show all access types
for my $s (@item_sizes) {
print GP "set output 'a-size-$s.png'\n";
print GP "set title 'Item size $s $machine'\n";
my @a = ();
for my $r (@randomized) {
for my $m (@modify) {
my $t = ($r ? "Random" : "Sequential") . " " . ($m ? "R+W" : "R");
push @a, "'a-$r-$m-$s.log' title '$t' with linespoints";
}
}
print GP "plot ", join(", ", @a), "\n\n";
}
# For each item size and access type, show full profiling information
if (%oprofile) {
for my $s (@item_sizes) {
for my $r (@randomized) {
for my $m (@modify) {
print GP "set output 'a-prof-$s-$r-$m.png'\n";
my $title = "Profile for " . ($r ? "Random" : "Sequential") . " " . ($m ? "Read-Write" : "Read-Only") . " $s bytes $machine";
print GP "set title '$title'\n";
print GP "set y2label 'number of events'\n";
print GP "set y2tics\n";
print GP "set y2range [0:3]\n";
my @a = ();
push @a, "'a-$r-$m-$s.log' title 'Time' with linespoints";
for my $i (0..$#oprofile_events) {
push @a, "'' using 1:" . ($i+3) . " axes x1y2 title '" . $oprofile_events[$i] . "' with linespoints";
}
print GP "plot ", join(", ", @a), "\n\n";
}
}
}
}
close GP;
access/lecture/00-seq-rd.png

11.8 KiB

access/lecture/05-sr128.png

11 KiB

access/lecture/10-sw128.png

11.5 KiB

access/lecture/15-sr64.png

10.1 KiB

access/lecture/20-sr1024.png

11.6 KiB

access/lecture/25-sr4096.png

12.3 KiB

access/lecture/30-seq-rd-again.png

11.8 KiB

access/lecture/35-seq-rw.png

12.2 KiB

access/lecture/40-rnd-rd.png

12.9 KiB

access/lecture/45-rnd-rw.png

12.8 KiB

access/lecture/50-size128.png

11.4 KiB

access/lecture/55-list-vs-array-seq.png

11.5 KiB

access/lecture/57-list-vs-array-rand.png

12.8 KiB

access/lecture/60-sr1024-again.png

11.6 KiB

access/lecture/65-sr1024-array.png

11.3 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment