diff --git a/aim-kit/Makefile b/aim-kit/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..01760512a46f76ab1ed81b3cad5766f053ce6237
--- /dev/null
+++ b/aim-kit/Makefile
@@ -0,0 +1,54 @@
+# Compilation settings:
+CFLAGS=-Wall -std=gnu99 -I/usr/include/netpbm $(MYCFLAGS)
+LDFLAGS=-lm -lnetpbm $(MYLDFLAGS)
+
+OPTCFLAGS=$(CFLAGS) -O3 -march=native -DNDEBUG
+DBGCFLAGS=$(CFLAGS) -ggdb3
+
+# Benchmarking settings:
+RUNS=4
+ITERS=20
+
+
+######
+
+
+.PHONY: all clean benchmark evaluate asm
+all: aim-opt aim-dbg
+
+
+aim-opt: aim-run.c exercise.c
+	$(CC) $(OPTCFLAGS) -o $@ $^ $(LDFLAGS) 
+
+aim-dbg: aim-run.c exercise.c
+	$(CC) $(DBGCFLAGS) -o $@ $^ $(LDFLAGS)
+
+
+exercise-opt.s: exercise.c
+	$(CC) $(OPTCFLAGS) -fverbose-asm -S $< -o $@
+
+exercise-dbg.s: exercise.c
+	$(CC) $(DBGCFLAGS) -fverbose-asm -S $< -o $@
+
+asm: exercise-dbg.s exercise-opt.s
+
+
+clean:
+	rm -f *.o *.s aim-opt aim-dbg output.pbm
+
+
+benchmark: aim-opt
+	@{ echo; \
+		top -b -n 5 | head -n 5; \
+		echo; \
+		echo '>>> Will compute time needed for $(ITERS) iterations averaged over $(RUNS) runs.'; \
+		echo '>>> The measured mean time, its probable lower and upper bounds and S.D.'; \
+		echo '>>> are printed out. See the header of statistics.awk for details.'; } >&2
+	@(for i in `seq 1 $(RUNS)`; do \
+		./aim-opt $(ITERS) test16384.pbm output.pbm | tee /dev/stderr; \
+	done) | awk -f statistics.awk
+
+evaluate: aim-opt
+	for s in 64 1024; do ./aim-opt 1 test$$s.pbm output.pbm; done >/dev/null
+	# Consider the optimistic estimate
+	make -s benchmark | { read m o p s; echo $$m $$o $$p $$s >&2; echo $$o; }
diff --git a/aim-kit/README b/aim-kit/README
new file mode 100644
index 0000000000000000000000000000000000000000..20798a1f4fb29d1a7ef7bc2fc038175674c5945d
--- /dev/null
+++ b/aim-kit/README
@@ -0,0 +1,28 @@
+This is a problem solution kit for the labs exercises of the MFF CUNI
+course on "Algoritmy a jejich implementace".
+
+The test images are not part of the repository, download them separately at
+	http://kam.mff.cuni.cz/~aim/aim-testfiles.tar.gz
+
+The most interesting file is exercise.c, which is to provide the exercise()
+routine that performs the appropriate image transformation.  When submitting
+your solution, send ONLY THE FILE exercise.c to <aim@ucw.cz>.
+
+Run `make` to build the binary executable: this will create two files:
+
+	aim-opt - optimized version suitable for production use
+	aim-dbg - unoptimized version suitable for debugging
+
+Run the binary as:
+	./aim-opt 1 test1024.pbm output.pbm
+	display output.pbm
+
+If you want to pass custom compiler flags, run it e.g. as:
+	make MYCFLAGS="-DLALALA ..."
+
+To get annotated assembly source, run `make exercise.s`.
+
+
+In order to benchmark your code the same way we will, run `make benchmark`.
+This will internally run your subroutine multiple times in row on the same
+image, however you must not take advantage of this in your code.
diff --git a/aim-kit/aim-run.c b/aim-kit/aim-run.c
new file mode 100644
index 0000000000000000000000000000000000000000..32eabf731df33e76e463b33a8e66fc892789c3f2
--- /dev/null
+++ b/aim-kit/aim-run.c
@@ -0,0 +1,105 @@
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pbm.h>
+
+#include "aim.h"
+
+
+#if CHAR_BIT != 8
+#error char size larger than 8 is not supported
+#endif
+
+
+static struct image *
+image_init(int cols, int rows)
+{
+	struct image *img = malloc(sizeof(*img));
+	img->cols = cols, img->rows = rows;
+	img->bitmap = malloc(img->rows * image_rowbytes(img));
+	return img;
+}
+
+static struct image *
+image_load(char *filename)
+{
+	FILE *f = fopen(filename, "rb");
+	if (!f) { perror("load"); exit(EXIT_FAILURE); }
+
+	int cols, rows, fmt;
+	pbm_readpbminit(f, &cols, &rows, &fmt);
+	struct image *img = image_init(cols, rows);
+
+	for (int i = 0; i < img->rows; i++)
+		pbm_readpbmrow_packed(f, img->bitmap + i * image_rowbytes(img),
+				img->cols, fmt);
+
+	fclose(f);
+	return img;
+}
+
+static void
+image_save(char *filename, struct image *img)
+{
+	FILE *f = fopen(filename, "wb");
+	if (!f) { perror("save"); exit(EXIT_FAILURE); }
+
+	pbm_writepbminit(f, img->cols, img->rows, 0);
+	for (int i = 0; i < img->rows; i++)
+		pbm_writepbmrow_packed(f, img->bitmap + i * image_rowbytes(img),
+				img->cols, 0);
+
+	fclose(f);
+}
+
+static void
+image_free(struct image *img)
+{
+	free(img->bitmap);
+	free(img);
+}
+
+
+typedef int_fast64_t timestamp_t;
+
+static timestamp_t
+get_timer(void)
+{
+	struct timeval t;
+	gettimeofday(&t, NULL);
+	return 1000000*t.tv_sec + t.tv_usec;
+}
+
+
+int
+main(int argc, char *argv[])
+{
+	if (argc != 4) {
+		fprintf(stderr, "%s ITERATIONS SRCIMAGE.pbm DSTIMAGE.pbm\n", argv[0]);
+		return EXIT_FAILURE;
+	}
+
+	struct image *img_in = image_load(argv[2]);
+	struct image *img_out = image_init(img_in->cols, img_in->rows);
+	timestamp_t t0 = get_timer();
+
+	/* Pre-warm the CPU! Neccessary for benchmarking w/ dynamic
+	 * cpufreq policy. */
+	while (get_timer() - t0 < 500000);
+	t0 = get_timer();
+
+	int iters = atoi(argv[1]);
+	for (int j = 0; j < iters; j++) {
+		exercise(img_in, img_out);
+	}
+
+	t0 = get_timer() - t0;
+	// time spent:
+	printf("%.3f\n", (double) t0/1e6);
+	image_save(argv[3], img_out);
+	image_free(img_in);
+	image_free(img_out);
+	return EXIT_SUCCESS;
+}
diff --git a/aim-kit/aim.h b/aim-kit/aim.h
new file mode 100644
index 0000000000000000000000000000000000000000..d3da6a53c5229a73213bf719705a8fa7ad740a3d
--- /dev/null
+++ b/aim-kit/aim.h
@@ -0,0 +1,41 @@
+#ifndef AIM__AIM_H
+#define AIM__AIM_H
+
+#include <stdbool.h>
+
+struct image {
+	unsigned int cols, rows;
+	/* Black-white image, one _BIT_ per pixel. Rows-oriented,
+	 * rows are rounded to whole bytes. */
+	unsigned char *bitmap;
+};
+
+static unsigned int image_rowbytes(struct image *img);
+static bool image_getpixel(struct image *img, unsigned int x, unsigned int y);
+static void image_putpixel(struct image *img, unsigned int x, unsigned int y, bool pixel);
+
+void exercise(struct image * restrict in, struct image * restrict out);
+
+
+/** Implementation: */
+
+static inline unsigned int
+image_rowbytes(struct image *img)
+{
+	return (img->cols + 7) / 8;
+}
+
+static inline bool
+image_getpixel(struct image *img, unsigned int x, unsigned int y)
+{
+	return (img->bitmap[image_rowbytes(img) * y + x / 8] >> (7 - x % 8)) & 1;
+}
+
+static inline void
+image_putpixel(struct image *img, unsigned int x, unsigned int y, bool pixel)
+{
+	(img->bitmap[image_rowbytes(img) * y + x / 8] &= ~(1 << (7 - x % 8)));
+	(img->bitmap[image_rowbytes(img) * y + x / 8] |= pixel << (7 - x % 8));
+}
+
+#endif
diff --git a/aim-kit/exercise.c b/aim-kit/exercise.c
new file mode 100644
index 0000000000000000000000000000000000000000..ef563691618cb2fcbe6e44a519e35c5ac2177bac
--- /dev/null
+++ b/aim-kit/exercise.c
@@ -0,0 +1,18 @@
+#include <stdio.h>
+#include "aim.h"
+
+void
+exercise(struct image * restrict in, struct image * restrict out)
+{
+	/* Do your image transformation here. Delete the code below
+	 * first, especially when testing with large images! Modify
+	 * the image, not just the printing code. */
+	for (int y = 0; y < in->rows; y++) {
+		for (int x = 0; x < in->cols; x++) {
+			bool p = image_getpixel(in, x, y);
+			putchar(".#"[p]);
+			image_putpixel(out, x, y, p);
+		}
+		putchar('\n');
+	}
+}
diff --git a/aim-kit/statistics.awk b/aim-kit/statistics.awk
new file mode 100644
index 0000000000000000000000000000000000000000..27719f6a7067956bda564eb2a317a52513ea9a90
--- /dev/null
+++ b/aim-kit/statistics.awk
@@ -0,0 +1,29 @@
+# We will compute the mean and standard deviation of the numbers on stdin.
+# We can interpret the standard deviation [*] like this:
+#
+# 	With probability 68%, the actual mean time M is within one S.D. s
+#	around the measured mean m: M \in (m-s,m+s)
+#
+# [*] We assume the numbers (time taken) are approximately normally
+# distributed around the measured mean.
+
+BEGIN {
+	sum = 0
+	sqsum = 0
+}
+
+{
+	sum = sum + $1
+	sqsum = sqsum + $1*$1
+}
+
+END {
+	n = NR
+	mean = sum / n
+	# Exercise: This method of variance computation might be
+	# sub-optimal. Try to find out why and implement a better
+	# method.
+	var = (n * sqsum - sum * sum) / (n * (n-1))
+	sd = sqrt(var)
+	print mean, mean - sd, mean + sd, sd
+}