/*
 *	A simple memory access speed test
 *
 *	Written by Martin Mares <mj@ucw.cz> and put into public domain.
 *
 *	Compile-time switches:
 *	  SIZE=xxx	Size of a single item in bytes
 *	  RANDOMIZE	Define for random access, undef for sequential
 *	  MODIFY	Define for read-write accesses, undef for read-only
 *	  MS		Measurement period in ms
 *	  ARRAY		Access items as array (default: linked list)
 *	  HUGE		Allocate huge pages (hugetlbfs required)
 *
 *	Output is in nanoseconds per access.
 */

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <time.h>

struct item {
  struct item *next;
  uint32_t pad[(SIZE-sizeof(struct item *)) / 4];
};

static struct item *items;
static unsigned block_size, n;

#define TICKS_PER_SEC 1000000
static int64_t get_timer(void)
{
  struct timeval t;
  gettimeofday(&t, NULL);
  return 1000000*t.tv_sec + t.tv_usec;
}

#ifdef HUGE

static void alloc_mem(void)
{
#define HUGE_PATH "huge/access"
#define HUGE_PAGE (2*1048576)
  void *pp;
  unlink(HUGE_PATH);
  int fd = open(HUGE_PATH, O_RDWR | O_CREAT, 0644);
  if (fd < 0)
    {
      fprintf(stderr, "Cannot open hugetlbfs at %s: %m\n", HUGE_PATH);
      exit(1);
    }
  if ((pp = mmap(NULL, ((block_size+HUGE_PAGE-1)/HUGE_PAGE)*HUGE_PAGE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED)
    {
      fprintf(stderr, "Cannot mmap hugetlbfs: %m\n");
      exit(1);
    }
  items = pp;
}

#else

static void alloc_mem(void)
{
  void *pp;
  if (posix_memalign(&pp, 4096, block_size) < 0)
    {
      fprintf(stderr, "Cannot allocate memory: %m\n");
      exit(1);
    }
  items = pp;
}

#endif

static inline void eat(uint32_t x)
{
  // Discard a value, but do not allow optimizing it out
  asm volatile ("" : : "r" (x));
}

static inline void process_item(struct item *x)
{
#ifdef MODIFY
  x->pad[0]++;
#else
  eat(x->pad[0]);
#endif
}

#ifdef ARRAY

static void fill(void)
{
  memset(items, 0, block_size);
}

static void loop(void)
{
#ifdef RANDOMIZE
  if (n & (n-1))
    {
      fprintf(stderr, "Block size must be a power of two!\n");
      exit(1);
    }
  unsigned i=0;
  do
    {
      process_item(&items[i]);
      i = (i+259309) & (n-1);
    }
  while (i);
#else
  struct item *stop = items + n;
  for (struct item *a = items; a < stop; a++)
    process_item(a);
#endif
}

#else

static void fill(void)
{
  memset(items, 0, block_size);
#ifdef RANDOMIZE
  for (unsigned i=0; i<n; i++)
    {
      unsigned j=(i+259309) % n;
      if (j)
	items[i].next = &items[j];
    }
#else
  for (unsigned i=0; i<n-1; i++)
    items[i].next = &items[i+1];
#endif
}

static void loop(void)
{
  for (struct item *a = items; a; a=a->next)
    process_item(a);
}

#endif

int main(int argc, char **argv)
{
  if (argc != 2)
    {
      fprintf(stderr, "Usage: access <size-in-KB>\n");
      return 1;
    }
  block_size = atoi(argv[1]) * 1024;
  n = block_size / SIZE;

  alloc_mem();
  fill();

  unsigned attempts = 1;
  for (;;)
    {
      int64_t t0 = get_timer();

      for (unsigned i=0; i<attempts; i++)
	loop();

      int64_t t1 = get_timer();
      int64_t t = t1-t0;
      if (t >= TICKS_PER_SEC/1000*MS)
	{
	  printf("%.3f %f\n", (double)t/attempts/n/TICKS_PER_SEC*1e9, (double)(2*attempts-1)*n);
	  break;
	}
      attempts *= 2;
    }

  return 0;
}