diff --git a/aim-kit/Makefile b/aim-kit/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..01760512a46f76ab1ed81b3cad5766f053ce6237 --- /dev/null +++ b/aim-kit/Makefile @@ -0,0 +1,54 @@ +# Compilation settings: +CFLAGS=-Wall -std=gnu99 -I/usr/include/netpbm $(MYCFLAGS) +LDFLAGS=-lm -lnetpbm $(MYLDFLAGS) + +OPTCFLAGS=$(CFLAGS) -O3 -march=native -DNDEBUG +DBGCFLAGS=$(CFLAGS) -ggdb3 + +# Benchmarking settings: +RUNS=4 +ITERS=20 + + +###### + + +.PHONY: all clean benchmark evaluate asm +all: aim-opt aim-dbg + + +aim-opt: aim-run.c exercise.c + $(CC) $(OPTCFLAGS) -o $@ $^ $(LDFLAGS) + +aim-dbg: aim-run.c exercise.c + $(CC) $(DBGCFLAGS) -o $@ $^ $(LDFLAGS) + + +exercise-opt.s: exercise.c + $(CC) $(OPTCFLAGS) -fverbose-asm -S $< -o $@ + +exercise-dbg.s: exercise.c + $(CC) $(DBGCFLAGS) -fverbose-asm -S $< -o $@ + +asm: exercise-dbg.s exercise-opt.s + + +clean: + rm -f *.o *.s aim-opt aim-dbg output.pbm + + +benchmark: aim-opt + @{ echo; \ + top -b -n 5 | head -n 5; \ + echo; \ + echo '>>> Will compute time needed for $(ITERS) iterations averaged over $(RUNS) runs.'; \ + echo '>>> The measured mean time, its probable lower and upper bounds and S.D.'; \ + echo '>>> are printed out. See the header of statistics.awk for details.'; } >&2 + @(for i in `seq 1 $(RUNS)`; do \ + ./aim-opt $(ITERS) test16384.pbm output.pbm | tee /dev/stderr; \ + done) | awk -f statistics.awk + +evaluate: aim-opt + for s in 64 1024; do ./aim-opt 1 test$$s.pbm output.pbm; done >/dev/null + # Consider the optimistic estimate + make -s benchmark | { read m o p s; echo $$m $$o $$p $$s >&2; echo $$o; } diff --git a/aim-kit/README b/aim-kit/README new file mode 100644 index 0000000000000000000000000000000000000000..20798a1f4fb29d1a7ef7bc2fc038175674c5945d --- /dev/null +++ b/aim-kit/README @@ -0,0 +1,28 @@ +This is a problem solution kit for the labs exercises of the MFF CUNI +course on "Algoritmy a jejich implementace". + +The test images are not part of the repository, download them separately at + http://kam.mff.cuni.cz/~aim/aim-testfiles.tar.gz + +The most interesting file is exercise.c, which is to provide the exercise() +routine that performs the appropriate image transformation. When submitting +your solution, send ONLY THE FILE exercise.c to <aim@ucw.cz>. + +Run `make` to build the binary executable: this will create two files: + + aim-opt - optimized version suitable for production use + aim-dbg - unoptimized version suitable for debugging + +Run the binary as: + ./aim-opt 1 test1024.pbm output.pbm + display output.pbm + +If you want to pass custom compiler flags, run it e.g. as: + make MYCFLAGS="-DLALALA ..." + +To get annotated assembly source, run `make exercise.s`. + + +In order to benchmark your code the same way we will, run `make benchmark`. +This will internally run your subroutine multiple times in row on the same +image, however you must not take advantage of this in your code. diff --git a/aim-kit/aim-run.c b/aim-kit/aim-run.c new file mode 100644 index 0000000000000000000000000000000000000000..32eabf731df33e76e463b33a8e66fc892789c3f2 --- /dev/null +++ b/aim-kit/aim-run.c @@ -0,0 +1,105 @@ +#include <limits.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/time.h> +#include <pbm.h> + +#include "aim.h" + + +#if CHAR_BIT != 8 +#error char size larger than 8 is not supported +#endif + + +static struct image * +image_init(int cols, int rows) +{ + struct image *img = malloc(sizeof(*img)); + img->cols = cols, img->rows = rows; + img->bitmap = malloc(img->rows * image_rowbytes(img)); + return img; +} + +static struct image * +image_load(char *filename) +{ + FILE *f = fopen(filename, "rb"); + if (!f) { perror("load"); exit(EXIT_FAILURE); } + + int cols, rows, fmt; + pbm_readpbminit(f, &cols, &rows, &fmt); + struct image *img = image_init(cols, rows); + + for (int i = 0; i < img->rows; i++) + pbm_readpbmrow_packed(f, img->bitmap + i * image_rowbytes(img), + img->cols, fmt); + + fclose(f); + return img; +} + +static void +image_save(char *filename, struct image *img) +{ + FILE *f = fopen(filename, "wb"); + if (!f) { perror("save"); exit(EXIT_FAILURE); } + + pbm_writepbminit(f, img->cols, img->rows, 0); + for (int i = 0; i < img->rows; i++) + pbm_writepbmrow_packed(f, img->bitmap + i * image_rowbytes(img), + img->cols, 0); + + fclose(f); +} + +static void +image_free(struct image *img) +{ + free(img->bitmap); + free(img); +} + + +typedef int_fast64_t timestamp_t; + +static timestamp_t +get_timer(void) +{ + struct timeval t; + gettimeofday(&t, NULL); + return 1000000*t.tv_sec + t.tv_usec; +} + + +int +main(int argc, char *argv[]) +{ + if (argc != 4) { + fprintf(stderr, "%s ITERATIONS SRCIMAGE.pbm DSTIMAGE.pbm\n", argv[0]); + return EXIT_FAILURE; + } + + struct image *img_in = image_load(argv[2]); + struct image *img_out = image_init(img_in->cols, img_in->rows); + timestamp_t t0 = get_timer(); + + /* Pre-warm the CPU! Neccessary for benchmarking w/ dynamic + * cpufreq policy. */ + while (get_timer() - t0 < 500000); + t0 = get_timer(); + + int iters = atoi(argv[1]); + for (int j = 0; j < iters; j++) { + exercise(img_in, img_out); + } + + t0 = get_timer() - t0; + // time spent: + printf("%.3f\n", (double) t0/1e6); + image_save(argv[3], img_out); + image_free(img_in); + image_free(img_out); + return EXIT_SUCCESS; +} diff --git a/aim-kit/aim.h b/aim-kit/aim.h new file mode 100644 index 0000000000000000000000000000000000000000..d3da6a53c5229a73213bf719705a8fa7ad740a3d --- /dev/null +++ b/aim-kit/aim.h @@ -0,0 +1,41 @@ +#ifndef AIM__AIM_H +#define AIM__AIM_H + +#include <stdbool.h> + +struct image { + unsigned int cols, rows; + /* Black-white image, one _BIT_ per pixel. Rows-oriented, + * rows are rounded to whole bytes. */ + unsigned char *bitmap; +}; + +static unsigned int image_rowbytes(struct image *img); +static bool image_getpixel(struct image *img, unsigned int x, unsigned int y); +static void image_putpixel(struct image *img, unsigned int x, unsigned int y, bool pixel); + +void exercise(struct image * restrict in, struct image * restrict out); + + +/** Implementation: */ + +static inline unsigned int +image_rowbytes(struct image *img) +{ + return (img->cols + 7) / 8; +} + +static inline bool +image_getpixel(struct image *img, unsigned int x, unsigned int y) +{ + return (img->bitmap[image_rowbytes(img) * y + x / 8] >> (7 - x % 8)) & 1; +} + +static inline void +image_putpixel(struct image *img, unsigned int x, unsigned int y, bool pixel) +{ + (img->bitmap[image_rowbytes(img) * y + x / 8] &= ~(1 << (7 - x % 8))); + (img->bitmap[image_rowbytes(img) * y + x / 8] |= pixel << (7 - x % 8)); +} + +#endif diff --git a/aim-kit/exercise.c b/aim-kit/exercise.c new file mode 100644 index 0000000000000000000000000000000000000000..ef563691618cb2fcbe6e44a519e35c5ac2177bac --- /dev/null +++ b/aim-kit/exercise.c @@ -0,0 +1,18 @@ +#include <stdio.h> +#include "aim.h" + +void +exercise(struct image * restrict in, struct image * restrict out) +{ + /* Do your image transformation here. Delete the code below + * first, especially when testing with large images! Modify + * the image, not just the printing code. */ + for (int y = 0; y < in->rows; y++) { + for (int x = 0; x < in->cols; x++) { + bool p = image_getpixel(in, x, y); + putchar(".#"[p]); + image_putpixel(out, x, y, p); + } + putchar('\n'); + } +} diff --git a/aim-kit/statistics.awk b/aim-kit/statistics.awk new file mode 100644 index 0000000000000000000000000000000000000000..27719f6a7067956bda564eb2a317a52513ea9a90 --- /dev/null +++ b/aim-kit/statistics.awk @@ -0,0 +1,29 @@ +# We will compute the mean and standard deviation of the numbers on stdin. +# We can interpret the standard deviation [*] like this: +# +# With probability 68%, the actual mean time M is within one S.D. s +# around the measured mean m: M \in (m-s,m+s) +# +# [*] We assume the numbers (time taken) are approximately normally +# distributed around the measured mean. + +BEGIN { + sum = 0 + sqsum = 0 +} + +{ + sum = sum + $1 + sqsum = sqsum + $1*$1 +} + +END { + n = NR + mean = sum / n + # Exercise: This method of variance computation might be + # sub-optimal. Try to find out why and implement a better + # method. + var = (n * sqsum - sum * sum) / (n * (n-1)) + sd = sqrt(var) + print mean, mean - sd, mean + sd, sd +}