Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
datovky
assignments
Commits
95a3a136
Commit
95a3a136
authored
Mar 23, 2022
by
David Mareček
Browse files
matrix-transpose homework
parent
21f7f710
Changes
9
Hide whitespace changes
Inline
Side-by-side
06-matrix_transpose/cpp/Makefile
0 → 100644
View file @
95a3a136
test
:
matrix_transpose_test
./
$<
CXXFLAGS
=
-std
=
c++11
-O2
-Wall
-Wextra
-g
-Wno-sign-compare
matrix_transpose_test
:
matrix_transpose_test.cpp matrix_transpose.h matrix_tests.h test_main.cpp
$(CXX)
$(CXXFLAGS)
$(
filter-out
%.h,
$^
)
-o
$@
clean
:
rm
-f
matrix_transpose_test
.PHONY
:
clean test
06-matrix_transpose/cpp/matrix_tests.h
0 → 100644
View file @
95a3a136
#include <string>
#include <vector>
#include <iostream>
#include <algorithm>
/* A matrix stored in a simulated cache */
class
CachedMatrix
{
unsigned
B
;
// Block size
unsigned
mem_blocks
;
// Memory size in blocks
unsigned
cache_blocks
;
// Cache size in blocks
unsigned
cache_used
;
// How many blocks of cache we already used
// We store the matrix as a one-dimensional array
vector
<
unsigned
>
items
;
unsigned
pos
(
unsigned
i
,
unsigned
j
)
{
return
i
*
N
+
j
;
}
/*
* For each memory block, we keep the following structure.
* If the block is currently cached, we set cached == true
* and lru_prev/next point to neighboring blocks in the cyclic LRU list.
* Otherwise, cached == false and the block is not in the LRU.
*/
class
MemoryBlock
{
public:
unsigned
lru_prev
,
lru_next
;
bool
cached
;
MemoryBlock
()
{
lru_prev
=
lru_next
=
0
;
cached
=
false
;
}
};
vector
<
MemoryBlock
>
blocks
;
// One block at the end of "blocks" serves as a head of the LRU list.
unsigned
lru_head
;
public:
// Number of rows and columns of the matrix
unsigned
N
;
int
debug_level
;
// Verbosity
CachedMatrix
(
unsigned
N
,
unsigned
M
,
unsigned
B
,
int
debug_level
=
0
)
{
EXPECT
(
N
>
0
,
"CachedMatrix must be non-empty."
);
EXPECT
(
B
>
0
,
"Blocks must be non-empty."
);
EXPECT
(
!
(
M
%
B
),
"Cache size must be divisible by block size."
);
EXPECT
(
M
>=
2
*
B
,
"Cache must have at least 2 blocks."
);
unsigned
NN
=
N
*
N
;
items
.
resize
(
NN
,
0
);
this
->
N
=
N
;
this
->
B
=
B
;
this
->
debug_level
=
debug_level
;
mem_blocks
=
(
NN
+
B
-
1
)
/
B
;
cache_blocks
=
M
/
B
;
cache_used
=
0
;
// Initialize the LRU list
blocks
.
resize
(
mem_blocks
+
1
);
lru_head
=
mem_blocks
;
blocks
[
lru_head
].
lru_prev
=
lru_head
;
blocks
[
lru_head
].
lru_next
=
lru_head
;
if
(
debug_level
>
0
)
cout
<<
"
\t
Memory: "
<<
mem_blocks
<<
" blocks of "
<<
B
<<
" items, "
<<
cache_blocks
<<
" cached
\n
"
;
}
// Read value at position (i,j), used only in testing code
unsigned
read
(
unsigned
i
,
unsigned
j
)
{
EXPECT
(
i
<
N
&&
j
<
N
,
"Read out of range: "
+
coord_string
(
i
,
j
)
+
"."
);
unsigned
addr
=
pos
(
i
,
j
);
access
(
addr
);
return
items
[
addr
];
}
// Write value at position (i,j), used only in testing code
void
write
(
unsigned
i
,
unsigned
j
,
unsigned
data
)
{
EXPECT
(
i
<
N
&&
j
<
N
,
"Write out of range: "
+
coord_string
(
i
,
j
)
+
"."
);
unsigned
addr
=
pos
(
i
,
j
);
access
(
addr
);
items
[
addr
]
=
data
;
}
// Swap items (i1,j1) and (i2,j2)
void
swap
(
unsigned
i1
,
unsigned
j1
,
unsigned
i2
,
unsigned
j2
)
{
EXPECT
(
i1
<
N
&&
j1
<
N
&&
i2
<
N
&&
j2
<
N
,
"Swap out of range: "
+
coord_string
(
i1
,
j1
)
+
" with "
+
coord_string
(
i2
,
j2
)
+
"."
);
if
(
debug_level
>
1
)
cout
<<
"
\t
Swap "
<<
coord_string
(
i1
,
j1
)
<<
" "
<<
coord_string
(
i2
,
j2
)
<<
endl
;
unsigned
addr1
=
pos
(
i1
,
j1
),
addr2
=
pos
(
i2
,
j2
);
access
(
addr1
);
access
(
addr2
);
std
::
swap
(
items
[
addr1
],
items
[
addr2
]);
}
unsigned
stat_cache_misses
;
unsigned
stat_accesses
;
// Reset statistic counters.
void
reset_stats
()
{
stat_cache_misses
=
0
;
stat_accesses
=
0
;
}
static
string
coord_string
(
unsigned
i
,
unsigned
j
)
{
return
"("
+
to_string
(
i
)
+
","
+
to_string
(
j
)
+
")"
;
}
#include "matrix_transpose.h"
private:
// Bring the given address to the cache.
void
access
(
unsigned
addr
)
{
int
i
=
addr
/
B
;
// Which block to bring
if
(
blocks
[
i
].
cached
)
{
lru_remove
(
i
);
}
else
{
if
(
cache_used
<
cache_blocks
)
{
// We still have room in the cache.
cache_used
++
;
if
(
debug_level
>
1
)
cout
<<
"
\t\t
Loading block "
<<
i
<<
endl
;
}
else
{
// We need to evict the least-recently used block to make space.
unsigned
replace
=
blocks
[
lru_head
].
lru_prev
;
lru_remove
(
replace
);
EXPECT
(
blocks
[
replace
].
cached
,
"Internal error: Buggy LRU list."
);
blocks
[
replace
].
cached
=
false
;
if
(
debug_level
>
1
)
cout
<<
"
\t\t
Loading block "
<<
i
<<
", replacing "
<<
replace
<<
endl
;
}
blocks
[
i
].
cached
=
true
;
stat_cache_misses
++
;
}
lru_add_after
(
i
,
lru_head
);
stat_accesses
++
;
}
// Remove block from the LRU list.
void
lru_remove
(
unsigned
i
)
{
unsigned
prev
=
blocks
[
i
].
lru_prev
;
unsigned
next
=
blocks
[
i
].
lru_next
;
blocks
[
prev
].
lru_next
=
next
;
blocks
[
next
].
lru_prev
=
prev
;
}
// Add block at the given position in the LRU list.
void
lru_add_after
(
unsigned
i
,
unsigned
after
)
{
unsigned
next
=
blocks
[
after
].
lru_next
;
blocks
[
next
].
lru_prev
=
i
;
blocks
[
after
].
lru_next
=
i
;
blocks
[
i
].
lru_next
=
next
;
blocks
[
i
].
lru_prev
=
after
;
}
};
/* A cached matrix extended by methods for testing */
class
TestMatrix
:
public
CachedMatrix
{
public:
TestMatrix
(
unsigned
N
,
unsigned
M
,
unsigned
B
,
int
debug_level
=
0
)
:
CachedMatrix
(
N
,
M
,
B
,
debug_level
)
{
}
// Fill matrix with a testing pattern.
void
fill_matrix
()
{
if
(
debug_level
>
1
)
cout
<<
"
\t
Initializing
\n
"
;
for
(
unsigned
i
=
0
;
i
<
N
;
i
++
)
for
(
unsigned
j
=
0
;
j
<
N
;
j
++
)
write
(
i
,
j
,
i
*
N
+
j
);
}
// Check that the pattern corresponds to the properly transposed matrix.
void
check_result
()
{
if
(
debug_level
>
1
)
cout
<<
"
\t
Checking
\n
"
;
for
(
unsigned
i
=
0
;
i
<
N
;
i
++
)
{
for
(
unsigned
j
=
0
;
j
<
N
;
j
++
)
{
unsigned
want
=
j
*
N
+
i
;
unsigned
found
=
read
(
i
,
j
);
unsigned
found_i
=
found
/
N
;
unsigned
found_j
=
found
%
N
;
EXPECT
(
found
==
want
,
"Mismatch at position "
+
coord_string
(
i
,
j
)
+
": expected element from "
+
coord_string
(
j
,
i
)
+
", found element from "
+
coord_string
(
found_i
,
found_j
)
+
"."
);
}
}
}
// Transpose the matrix naively.
void
naive_transpose
()
{
for
(
unsigned
i
=
0
;
i
<
N
;
i
++
)
for
(
unsigned
j
=
0
;
j
<
i
;
j
++
)
swap
(
i
,
j
,
j
,
i
);
}
};
06-matrix_transpose/cpp/matrix_transpose.h
0 → 100644
View file @
95a3a136
/*
* This file is #include'd inside the definition of a matrix class
* like this:
*
* class ClassName {
* // Number of rows and columns of the matrix
* unsigned N;
*
* // Swap elements (i1,j1) and (i2,j2)
* void swap(unsigned i1, unsigned j1, unsigned i2, unsigned j2);
*
* // Your code
* #include "matrix_transpose.h"
* }
*/
void
transpose
()
{
// TODO: Implement this efficiently
for
(
unsigned
i
=
0
;
i
<
N
;
i
++
)
for
(
unsigned
j
=
0
;
j
<
i
;
j
++
)
swap
(
i
,
j
,
j
,
i
);
}
06-matrix_transpose/cpp/matrix_transpose_test.cpp
0 → 100644
View file @
95a3a136
#include <functional>
#include <vector>
#include <iostream>
#include <iomanip>
using
namespace
std
;
// If the condition is not true, report an error and halt.
#define EXPECT(condition, message) do { if (!(condition)) expect_failed(message); } while (0)
void
expect_failed
(
const
string
&
message
);
#include "matrix_tests.h"
void
generic_test
(
unsigned
N
,
unsigned
M
,
unsigned
B
,
double
max_ratio
,
int
debug_level
)
{
TestMatrix
m
(
N
,
M
,
B
,
debug_level
);
m
.
fill_matrix
();
m
.
reset_stats
();
m
.
transpose
();
cout
<<
"
\t
"
<<
m
.
stat_cache_misses
<<
" misses in "
<<
m
.
stat_accesses
<<
" accesses
\n
"
;
if
(
m
.
stat_accesses
)
{
double
mpa
=
(
double
)
m
.
stat_cache_misses
/
m
.
stat_accesses
;
double
lb
=
1.
/
B
;
double
ratio
=
mpa
/
lb
;
cout
<<
"
\t
"
<<
std
::
fixed
<<
std
::
setprecision
(
6
)
<<
mpa
<<
" misses/access (lower bound is "
<<
lb
<<
" => ratio "
<<
ratio
<<
", need "
<<
max_ratio
<<
")
\n
"
;
EXPECT
(
ratio
<=
max_ratio
,
"Algorithm did too many I/O operations."
);
}
m
.
check_result
();
}
/*** A list of all tests ***/
vector
<
pair
<
string
,
function
<
void
()
>>>
tests
=
{
// name N M B max_ratio debug_level
{
"small2k"
,
[]
{
generic_test
(
8
,
32
,
8
,
8
,
2
);
}
},
{
"small"
,
[]
{
generic_test
(
13
,
64
,
8
,
4
,
2
);
}
},
{
"n100b16"
,
[]
{
generic_test
(
100
,
1024
,
16
,
3
,
1
);
}
},
{
"n1000b16"
,
[]
{
generic_test
(
1000
,
1024
,
16
,
3
,
1
);
}
},
{
"n1000b64"
,
[]
{
generic_test
(
1000
,
8192
,
64
,
3
,
1
);
}
},
{
"n1000b256"
,
[]
{
generic_test
(
1000
,
65536
,
256
,
5
,
1
);
}
},
{
"n1000b4096"
,
[]
{
generic_test
(
1000
,
65536
,
4096
,
50
,
1
);
}
},
};
06-matrix_transpose/cpp/test_main.cpp
0 → 100644
View file @
95a3a136
#include <cstdlib>
#include <functional>
#include <iostream>
#include <string>
#include <utility>
#include <vector>
using
namespace
std
;
extern
vector
<
pair
<
string
,
function
<
void
()
>>>
tests
;
void
expect_failed
(
const
string
&
message
)
{
cerr
<<
"Test error: "
<<
message
<<
endl
;
exit
(
1
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
vector
<
string
>
required_tests
;
if
(
argc
>
1
)
{
required_tests
.
assign
(
argv
+
1
,
argv
+
argc
);
}
else
{
for
(
const
auto
&
test
:
tests
)
required_tests
.
push_back
(
test
.
first
);
}
for
(
const
auto
&
required_test
:
required_tests
)
{
bool
found
=
false
;
for
(
const
auto
&
test
:
tests
)
if
(
required_test
==
test
.
first
)
{
cerr
<<
"Running test "
<<
required_test
<<
endl
;
test
.
second
();
found
=
true
;
break
;
}
if
(
!
found
)
{
cerr
<<
"Unknown test "
<<
required_test
<<
endl
;
return
1
;
}
}
return
0
;
}
06-matrix_transpose/python/matrix_tests.py
0 → 100644
View file @
95a3a136
from
matrix_transpose
import
Matrix
import
numpy
# Description of memory blocks is stored in an array blocks[block_index][B_xxx].
# If the block is cached, B_CACHED is 1 and B_LRU_NEXT and B_LRU_PREV point to
# neighboring blocks in the cyclic LRU list. Otherwise, B_CACHED is 0 and the block
# is not in the LRU.
B_CACHED
=
0
B_LRU_NEXT
=
1
B_LRU_PREV
=
2
class
CachedMatrix
(
Matrix
):
"""A matrix stored in simulated cache"""
def
__init__
(
self
,
N
,
M
,
B
,
debug_level
=
0
):
assert
N
>
0
,
"CachedMatrix must be non-empty."
assert
B
>
0
,
"Blocks must be non-empty."
assert
M
%
B
==
0
,
"Cache size must be divisible by block size."
assert
M
>=
2
*
B
,
"Cache must have at least 2 blocks."
Matrix
.
__init__
(
self
,
N
)
NN
=
N
*
N
self
.
items
=
numpy
.
zeros
(
shape
=
NN
,
dtype
=
numpy
.
int32
,
order
=
"C"
)
self
.
B
=
B
self
.
M
=
M
self
.
debug_level
=
debug_level
self
.
mem_blocks
=
(
NN
+
B
-
1
)
//
B
self
.
cache_blocks
=
M
//
B
self
.
cache_used
=
0
# Initialize the LRU list. There is a virtual block right after the last real block,
# which serves as a head of the cyclic LRU list.
self
.
blocks
=
numpy
.
zeros
(
shape
=
(
self
.
mem_blocks
+
1
,
3
),
dtype
=
numpy
.
int32
,
order
=
"C"
)
self
.
lru_head
=
self
.
mem_blocks
self
.
blocks
[
self
.
lru_head
,
B_LRU_NEXT
]
=
self
.
lru_head
self
.
blocks
[
self
.
lru_head
,
B_LRU_PREV
]
=
self
.
lru_head
self
.
reset_stats
()
if
debug_level
>
0
:
print
(
"
\t
Memory: {} blocks of {} items, {} cached"
.
format
(
self
.
mem_blocks
,
self
.
B
,
self
.
cache_blocks
))
def
_pos
(
self
,
i
,
j
):
"""Convert position in matrix to linear address in simulated memory."""
return
i
*
self
.
N
+
j
def
read
(
self
,
i
,
j
):
"""Read value at position (i,j), used only in testing code."""
assert
i
>=
0
and
i
<
self
.
N
and
j
>=
0
and
j
<
self
.
N
,
"Read out of range: ({},{})"
.
format
(
i
,
j
)
addr
=
self
.
_pos
(
i
,
j
)
self
.
_access
(
addr
)
return
self
.
items
[
addr
]
def
write
(
self
,
i
,
j
,
value
):
"""Write value at position (i,j), used only in testing code."""
assert
i
>=
0
and
i
<
self
.
N
and
j
>=
0
and
j
<
self
.
N
,
"Write out of range: ({},{})"
.
format
(
i
,
j
)
addr
=
self
.
_pos
(
i
,
j
)
self
.
_access
(
addr
)
self
.
items
[
addr
]
=
value
def
swap
(
self
,
i1
,
j1
,
i2
,
j2
):
"""Swap items (i1,j1) and (i2,j2)."""
assert
i1
>=
0
and
i1
<
self
.
N
and
j1
>=
0
and
j1
<
self
.
N
and
\
i2
>=
0
and
i2
<
self
.
N
and
j2
>=
0
and
j2
<
self
.
N
,
\
"Swap out of range: ({},{}) with ({},{})"
.
format
(
i1
,
j1
,
i2
,
j2
)
if
self
.
debug_level
>
1
:
print
(
"
\t
Swap ({},{}) ({},{})"
.
format
(
i1
,
j1
,
i2
,
j2
))
addr1
=
self
.
_pos
(
i1
,
j1
)
addr2
=
self
.
_pos
(
i2
,
j2
)
self
.
_access
(
addr1
)
self
.
_access
(
addr2
)
items
=
self
.
items
items
[
addr1
],
items
[
addr2
]
=
items
[
addr2
],
items
[
addr1
]
def
reset_stats
(
self
):
"""Reset statistic counters."""
self
.
stat_cache_misses
=
0
self
.
stat_accesses
=
0
def
_access
(
self
,
addr
):
"""Bring the given address to the cache."""
blocks
=
self
.
blocks
i
=
addr
//
self
.
B
# Which block to bring
if
blocks
[
i
,
B_CACHED
]
>
0
:
self
.
_lru_remove
(
i
)
else
:
if
self
.
cache_used
<
self
.
cache_blocks
:
# We still have room in the cache.
self
.
cache_used
+=
1
if
self
.
debug_level
>
1
:
print
(
"
\t\t
Loading block {}"
.
format
(
i
))
else
:
# We need to evict the least-recently used block to make space.
replace
=
blocks
[
self
.
lru_head
,
B_LRU_PREV
]
self
.
_lru_remove
(
replace
)
assert
blocks
[
replace
,
B_CACHED
]
>
0
,
"Internal error: Buggy LRU list"
blocks
[
replace
,
B_CACHED
]
=
0
if
self
.
debug_level
>
1
:
print
(
"
\t\t
Loading block {}, replacing {}"
.
format
(
i
,
replace
))
blocks
[
i
,
B_CACHED
]
=
1
self
.
stat_cache_misses
+=
1
self
.
_lru_add_after
(
i
,
self
.
lru_head
)
self
.
stat_accesses
+=
1
def
_lru_remove
(
self
,
i
):
"""Remove block from the LRU list."""
blocks
=
self
.
blocks
prev
,
next
=
blocks
[
i
,
B_LRU_PREV
],
blocks
[
i
,
B_LRU_NEXT
]
blocks
[
prev
,
B_LRU_NEXT
]
=
next
blocks
[
next
,
B_LRU_PREV
]
=
prev
def
_lru_add_after
(
self
,
i
,
after
):
"""Add block at the given position in the LRU list."""
blocks
=
self
.
blocks
next
=
blocks
[
after
,
B_LRU_NEXT
]
blocks
[
after
,
B_LRU_NEXT
]
=
i
blocks
[
next
,
B_LRU_PREV
]
=
i
blocks
[
i
,
B_LRU_NEXT
]
=
next
blocks
[
i
,
B_LRU_PREV
]
=
after
class
TestMatrix
(
CachedMatrix
):
"""A cached matrix extended by methods for testing."""
# Constructor is inherited
def
fill_matrix
(
self
):
"""Fill matrix with a testing pattern."""
if
self
.
debug_level
>
1
:
print
(
"
\t
Initializing"
)
N
=
self
.
N
for
i
in
range
(
N
):
for
j
in
range
(
N
):
self
.
write
(
i
,
j
,
i
*
N
+
j
)
def
check_result
(
self
):
"""Check that the pattern corresponds to the properly transposed matrix."""
if
self
.
debug_level
>
1
:
print
(
"
\t
Checking"
)
N
=
self
.
N
for
i
in
range
(
N
):
for
j
in
range
(
N
):
want
=
j
*
N
+
i
have
=
self
.
read
(
i
,
j
)
have_i
=
have
//
N
have_j
=
have
%
N
assert
have
==
want
,
"Mismatch at position ({},{}): expected element from ({},{}), found element from ({},{})"
.
format
(
i
,
j
,
j
,
i
,
have_i
,
have_j
)
def
naive_transpose
(
self
):
"""Transpose the matrix naively."""
for
i
in
range
(
self
.
N
):
for
j
in
range
(
i
):
self
.
swap
(
i
,
j
,
j
,
i
)
06-matrix_transpose/python/matrix_transpose.py
0 → 100644
View file @
95a3a136
class
Matrix
:
"""Interface of a matrix.
This class provides only the matrix size N and a method for swapping
two items. The actual storage of the matrix in memory is provided by
subclasses in testing code.
"""
def
__init__
(
self
,
N
):
self
.
N
=
N
def
swap
(
self
,
i1
,
j1
,
i2
,
j2
):
"""Swap elements (i1,j1) and (i2,j2)."""
# Overridden in subclasses
raise
NotImplementedError
def
transpose
(
self
):
"""Transpose the matrix."""
# TODO: Implement more efficiently
for
i
in
range
(
self
.
N
):
for
j
in
range
(
i
):
self
.
swap
(
i
,
j
,
j
,
i
)
06-matrix_transpose/python/matrix_transpose_test.py
0 → 100644
View file @
95a3a136
#!/usr/bin/env python3
import
math
import
sys
from
matrix_tests
import
TestMatrix
def
generic_test
(
N
,
M
,
B
,
max_ratio
,
debug_level
):
m
=
TestMatrix
(
N
,
M
,
B
,
debug_level
)
m
.
fill_matrix
()
m
.
reset_stats
()
m
.
transpose
()
print
(
"
\t
{} misses in {} accesses"
.
format
(
m
.
stat_cache_misses
,
m
.
stat_accesses
))
if
m
.
stat_accesses
:
mpa
=
m
.
stat_cache_misses
/
m
.
stat_accesses
lb
=
1
/
B
ratio
=
mpa
/
lb
print
(
"
\t
{:.6f} misses/access (lower bound is {:.6f} => ratio {:.6f}, need {:.6f})"
.
format
(
mpa
,
lb
,
ratio
,
max_ratio
))
assert
ratio
<=
max_ratio
,
"Algorithm did too many I/O operations."
m
.
check_result
()
# A list of all tests
tests
=
[
# name N M B max_ratio debug_level
(
"small2k"
,
lambda
:
generic_test
(
8
,
32
,
8
,
8
,
2
)),
(
"small"
,
lambda
:
generic_test
(
13
,
64
,
8
,
4
,
2
)),
(
"n100b16"
,
lambda
:
generic_test
(
100
,
1024
,
16
,
3
,
1
)),
(
"n1000b16"
,
lambda
:
generic_test
(
1000
,
1024
,
16
,
3
,
1
)),
(
"n1000b64"
,
lambda
:
generic_test
(
1000
,
8192
,
64
,
3
,
1
)),
(
"n1000b256"
,
lambda
:
generic_test
(
1000
,
65536
,
256
,
5
,
1
)),
(
"n1000b4096"
,
lambda
:
generic_test
(
1000
,
65536
,
4096
,
50
,
1
)),
]
if
__name__
==
"__main__"
:
for
required_test
in
sys
.
argv
[
1
:]
or
[
name
for
name
,
_
in
tests
]:
for
name
,
test
in
tests
:
if
name
==
required_test
:
print
(
"Running test {}"
.
format
(
name
),
file
=
sys
.
stderr
)
test
()
break
else
:
raise
ValueError
(
"Unknown test {}"
.
format
(
name
))
06-matrix_transpose/task.md
0 → 100644
View file @
95a3a136
Implement cache-oblivious transposition of square matrices.
The Python version requires the NumPy module for storing the matrix
in memory efficiently.
Source code templates can be found in
[
git
](
https://gitlab.kam.mff.cuni.cz/datovky/assignments/-/tree/master
)
.
Write
Preview