From dd14ff8da1bb87fe38a819877415b5f7443a3c41 Mon Sep 17 00:00:00 2001 From: Petr Chmel <petr@chmel.net> Date: Sun, 6 Apr 2025 23:01:37 +0200 Subject: [PATCH] Publish matrix experiment --- 07-matrix_experiment/cpp/Makefile | 26 ++++++ .../cpp/matrix_experiment_real.cpp | 90 +++++++++++++++++++ .../cpp/matrix_experiment_sim.cpp | 80 +++++++++++++++++ 07-matrix_experiment/cpp/matrix_tests.h | 1 + 07-matrix_experiment/python/Makefile | 17 ++++ .../python/matrix_experiment_sim.py | 43 +++++++++ 07-matrix_experiment/python/matrix_tests.py | 1 + 07-matrix_experiment/task.md | 90 +++++++++++++++++++ 8 files changed, 348 insertions(+) create mode 100644 07-matrix_experiment/cpp/Makefile create mode 100644 07-matrix_experiment/cpp/matrix_experiment_real.cpp create mode 100644 07-matrix_experiment/cpp/matrix_experiment_sim.cpp create mode 100644 07-matrix_experiment/cpp/matrix_tests.h create mode 100644 07-matrix_experiment/python/Makefile create mode 100644 07-matrix_experiment/python/matrix_experiment_sim.py create mode 100644 07-matrix_experiment/python/matrix_tests.py create mode 100644 07-matrix_experiment/task.md diff --git a/07-matrix_experiment/cpp/Makefile b/07-matrix_experiment/cpp/Makefile new file mode 100644 index 0000000..3b99332 --- /dev/null +++ b/07-matrix_experiment/cpp/Makefile @@ -0,0 +1,26 @@ +.PHONY: test +test: matrix_experiment_sim matrix_experiment_real + @rm -rf out && mkdir out + @for exp in m1024-b16 m8192-b64 m65536-b256 m65536-b4096 ; do \ + for impl in smart naive ; do \ + echo "t-sim-$$exp-$$impl" ; \ + ./matrix_experiment_sim $$exp $$impl >out/t-sim-$$exp-$$impl ; \ + done ; \ + done + @for impl in smart naive ; do \ + echo "t-real-$$impl" ; \ + ./matrix_experiment_real $$impl >out/t-real-$$impl ; \ + done + +CXXFLAGS=-std=c++23 -O3 -Wall -Wextra -g -Wno-sign-compare + +matrix_experiment_sim: matrix_transpose.h matrix_tests.h matrix_experiment_sim.cpp + $(CXX) $(CPPFLAGS) $(CXXFLAGS) matrix_experiment_sim.cpp -o $@ + +matrix_experiment_real: matrix_transpose.h matrix_tests.h matrix_experiment_real.cpp + $(CXX) $(CPPFLAGS) $(CXXFLAGS) matrix_experiment_real.cpp -o $@ + +.PHONY: clean +clean:: + rm -f matrix_experiment_sim matrix_experiment_real + rm -rf out diff --git a/07-matrix_experiment/cpp/matrix_experiment_real.cpp b/07-matrix_experiment/cpp/matrix_experiment_real.cpp new file mode 100644 index 0000000..f46c38a --- /dev/null +++ b/07-matrix_experiment/cpp/matrix_experiment_real.cpp @@ -0,0 +1,90 @@ +#include <functional> +#include <string> +#include <vector> +#include <cstdio> +#include <cmath> +#include <iostream> + +#include <time.h> + +using namespace std; + +// If the condition is not true, report an error and halt. +#define EXPECT(condition, message) do { if (!(condition)) expect_failed(message); } while (0) + +void expect_failed(const string& message) { + cerr << "Test error: " << message << endl; + exit(1); +} + +class Matrix { + vector<unsigned> items; + unsigned &item(unsigned i, unsigned j) { return items[i*N + j]; } + public: + unsigned N; + Matrix(unsigned N) { this->N = N; items.resize(N*N, 0); } + + void swap(unsigned i1, unsigned j1, unsigned i2, unsigned j2) + { + // EXPECT(i1 < N && j1 < N && i2 < N && j2 < N, "Swap out of range: " + coord_string(i1, j1) + " with " + coord_string(i2, j2) + "."); + std::swap(item(i1, j1), item(i2, j2)); + } + + void naive_transpose() + { + for (unsigned i=0; i<N; i++) + for (unsigned j=0; j<i; j++) + swap(i, j, j, i); + } + +#include "matrix_transpose.h" +}; + +void real_test(bool naive) +{ + for (int e=40; e<=112; e++) { + unsigned N = (unsigned) pow(2, e/8.); + Matrix m(N); + + clock_t start_time, stop_time; + unsigned tries = 1; + do { + start_time = clock(); + for (unsigned t=0; t < tries; t++) { + if (naive) + m.naive_transpose(); + else + m.transpose(); + } + stop_time = clock(); + tries *= 2; + } while (stop_time - start_time < CLOCKS_PER_SEC/10); + // It is guaranteed that the total number of tries is odd :) + + double ns_per_item = (double)(stop_time - start_time) / CLOCKS_PER_SEC / (N*(N-1)) / tries * 1e9; + printf("%d\t%.6f\n", N, ns_per_item); + } +} + +int main(int argc, char **argv) +{ + if (argc != 2) { + fprintf(stderr, "Usage: %s (smart|naive)\n", argv[0]); + return 1; + } + + std::string mode = argv[1]; + + bool naive; + if (mode == "smart") + naive = false; + else if (mode == "naive") + naive = true; + else { + fprintf(stderr, "The argument must be either 'smart' or 'naive'\n"); + return 1; + } + + real_test(naive); + return 0; +} diff --git a/07-matrix_experiment/cpp/matrix_experiment_sim.cpp b/07-matrix_experiment/cpp/matrix_experiment_sim.cpp new file mode 100644 index 0000000..316db52 --- /dev/null +++ b/07-matrix_experiment/cpp/matrix_experiment_sim.cpp @@ -0,0 +1,80 @@ +#include <functional> +#include <string> +#include <vector> +#include <cstdio> +#include <cmath> +#include <string> +#include <iostream> + +#include <time.h> + +using namespace std; + +// If the condition is not true, report an error and halt. +#define EXPECT(condition, message) do { if (!(condition)) expect_failed(message); } while (0) + +void expect_failed(const string& message) { + cerr << "Test error: " << message << endl; + exit(1); +} + +#include "matrix_tests.h" + +void simulated_test(unsigned M, unsigned B, bool naive) +{ + for (int e=20; e<=52; e++) { + unsigned N = (unsigned) pow(2, e/4.); + TestMatrix m(N, M, B, 0); + m.fill_matrix(); + m.reset_stats(); + if (naive) + m.naive_transpose(); + else + m.transpose(); + + double misses_per_item = (double) m.stat_cache_misses / (N*(N-1)); + printf("%d\t%.6f\n", N, misses_per_item); + + m.check_result(); + } +} + +vector<pair<string, function<void(bool n)>>> tests = { +// M B + { "m1024-b16", [](bool n) { simulated_test( 1024, 16, n); } }, + { "m8192-b64", [](bool n) { simulated_test( 8192, 64, n); } }, + { "m65536-b256", [](bool n) { simulated_test(65536, 256, n); } }, + { "m65536-b4096", [](bool n) { simulated_test(65536, 4096, n); } }, +}; + +int main(int argc, char **argv) +{ + if (argc != 3) { + fprintf(stderr, "Usage: %s <test> (smart|naive)\n", argv[0]); + return 1; + } + + std::string which_test = argv[1]; + std::string mode = argv[2]; + + bool naive; + if (mode == "smart") + naive = false; + else if (mode == "naive") + naive = true; + else + { + fprintf(stderr, "Last argument must be either 'smart' or 'naive'\n"); + return 1; + } + + for (const auto& test : tests) { + if (test.first == which_test) { + test.second(naive); + return 0; + } + } + + fprintf(stderr, "Unknown test %s\n", which_test.c_str()); + return 1; +} diff --git a/07-matrix_experiment/cpp/matrix_tests.h b/07-matrix_experiment/cpp/matrix_tests.h new file mode 100644 index 0000000..06e6952 --- /dev/null +++ b/07-matrix_experiment/cpp/matrix_tests.h @@ -0,0 +1 @@ +../../06-matrix_transpose/cpp/matrix_tests.h \ No newline at end of file diff --git a/07-matrix_experiment/python/Makefile b/07-matrix_experiment/python/Makefile new file mode 100644 index 0000000..26bbb01 --- /dev/null +++ b/07-matrix_experiment/python/Makefile @@ -0,0 +1,17 @@ +TESTS=m1024-b16 m8192-b64 m65536-b256 m65536-b4096 +TESTFILES=$(addprefix out/t-sim-,$(TESTS)) + +.PHONY: test +test: $(addsuffix -smart,$(TESTFILES)) $(addsuffix -naive,$(TESTFILES)) + +out/t-sim-%-naive: + @mkdir -p out + ./matrix_experiment_sim.py $* naive >$@ + +out/t-sim-%-smart: + @mkdir -p out + ./matrix_experiment_sim.py $* smart >$@ + +.PHONY: clean +clean:: + rm -rf out __pycache__ diff --git a/07-matrix_experiment/python/matrix_experiment_sim.py b/07-matrix_experiment/python/matrix_experiment_sim.py new file mode 100644 index 0000000..d9cbab7 --- /dev/null +++ b/07-matrix_experiment/python/matrix_experiment_sim.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +import sys + +from matrix_tests import TestMatrix + +def simulated_test(M, B, naive): + for e in range(10, 25): + N = int(2 ** (e/2)) + print(" ", N, M, B, file=sys.stderr) + m = TestMatrix(N, M, B, 0) + m.fill_matrix() + m.reset_stats() + if naive: + m.naive_transpose() + else: + m.transpose() + misses_per_item = m.stat_cache_misses / (N*(N-1)) + print(N, misses_per_item, flush=True) + m.check_result() + + +tests = { +# M B + "m1024-b16": lambda n: simulated_test( 1024, 16, n), + "m8192-b64": lambda n: simulated_test( 8192, 64, n), + "m65536-b256": lambda n: simulated_test(65536, 256, n), + "m65536-b4096": lambda n: simulated_test(65536, 4096, n), +} + +if len(sys.argv) == 3: + test = sys.argv[1] + if sys.argv[2] == "smart": + naive = False + elif sys.argv[2] == "naive": + naive = True + else: + raise ValueError("Last argument must be either 'smart' or 'naive'") + if test in tests: + tests[test](naive) + else: + raise ValueError("Unknown test {}".format(test)) +else: + raise ValueError("Usage: {} <test> (smart|naive)".format(sys.argv[0])) diff --git a/07-matrix_experiment/python/matrix_tests.py b/07-matrix_experiment/python/matrix_tests.py new file mode 100644 index 0000000..bca928c --- /dev/null +++ b/07-matrix_experiment/python/matrix_tests.py @@ -0,0 +1 @@ +../../06-matrix_transpose/python/matrix_tests.py diff --git a/07-matrix_experiment/task.md b/07-matrix_experiment/task.md new file mode 100644 index 0000000..2368111 --- /dev/null +++ b/07-matrix_experiment/task.md @@ -0,0 +1,90 @@ +## Goal + +The goal of this assignment is to evaluate your implementation of cache-oblivious +matrix transposition experimentally and to compare it with the trivial algorithm +which transposes by definition. + +You are given a test program (`matrix_experiment_sim`) which evaluates your +implementation from the previous assignment on different simulated caches and +matrices of different sizes. For each experiment, the average number of cache +misses per item is reported (the diagonal items which do not move are not +counted). The program also evaluates performance of the trivial transposition algorithm. +The simulated cache is fully associative and uses LRU replacement strategy. + +You should run these experiments and write a report, which contains one plot of +the measured data for each cache type, showing dependency of the average number of +misses per item on the matrix size. There should be two curves in each plot: one for your +algorithm, another for the trivial one. + +The report should discuss the experimental results and try to explain the observed +behavior (including any strange anomalies) using theory from the lectures. +If you want, you can carry out further experiments to gain better understanding +of the algorithm and include these in the report. + +You should submit a PDF file with the report (and no source code). +You will get 1 temporary point upon submission if the file is syntactically correct; +proper points will be assigned later. + +## Test program + +The test program is given two arguments: +- Cache type: + - `m1024-b16` – cache of 1024 items organized in 16-item blocks + - `m8192-b64` – cache of 8192 items organized in 64-item blocks + - `m65536-b256` – cache of 65536 items organized on 256-item blocks + - `m65536-b4096` – cache of 65536 items organized in 4096-item blocks +- The implementation to test (`smart` or `naive`). + +The output of the program contains one line per experiment, which consists of +the matrix size and the average number of cache misses per item. + +*Warning:* The Python tests are slow, even though they use only a subset of the +matrix sizes. They can take about one hour to complete. +If your machine has multiple processors or cores, you can try `make -j` +to run the tests in parallel. + +## Optional: Tests on real hardware (for 5 extra points) + +You can also test your transposition algorithm on real hardware +using the `matrix_experiment_real` program. The matrix is stored in row-major +order, each item takes 4 bytes. + +The program takes one parameter, the implementation to test: `smart` or `naive`. +Its output contains one line per experiment, which consists of the matrix size +and the average time per item in nanoseconds. + +However, the program is available only for C++, because general slowness of +Python completely hides all cache effects. + +Again, your report should show a plot of the measured data and discuss the observed +effects. You should also include the configuration of caches in your machine. +(If you are using Linux, you can try the `machinfo` script from +[this repository](https://gitlab.kam.mff.cuni.cz/mj/aim.git).) + +## Hints + +The following tools can be useful for producing nice plots: +- [pandas](https://pandas.pydata.org/) +- [matplotlib](https://matplotlib.org/) +- [gnuplot](http://www.gnuplot.info/) + +A quick checklist for plots: +- Is there a caption explaining what is plotted? +- Are the axes clearly labelled? Do they have value ranges and units? +- Have you mentioned that this axis has logarithmic scale? (Logarithmic graphs + are more fitting in some cases, but you should tell.) +- Is it clear which curve means what? +- Is it clear what are the measured points and what is an interpolated + curve between them? +- Are there any overlaps? (E.g., the most interesting part of the curve + hidden underneath a label?) +- **Is the graph distorted by compression artifacts?** (No, you shouldn't use JPEG for plots!) + +In your discussion, please distinguish the following kinds of claims. +It should be always clear which is which: +- Experimental results (i.e., the raw data you obtained from the experiments) +- Theoretical facts (i.e., claims we have proved mathematically) +- Your hypotheses (e.g., when you claim that the graph looks like something is true, + but you are not able to prove rigorously that it always holds) + +Source code templates can be found in [git](https://gitlab.kam.mff.cuni.cz/datovky/assignments/-/tree/master). -- GitLab