-
Notifications
You must be signed in to change notification settings - Fork 0
/
example.cpp
104 lines (78 loc) · 3.13 KB
/
example.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#include "fast_rng.hpp"
#include "rng_helpers.hpp"
#include <iostream>
#include <cassert>
using namespace std;
// The following three functions use the example of filling an array of floats, 'out',
// of size 'nx' * 'ny', with adjacent frequencies separated by the 'stride' parameter
//
// The timing difference between using std::mt19937, the scalar xorshift_plus function,
// and the vectorized vec_xorshift_plus function is shown
// Example using vec_xorshift_plus
// Note that we must store the random floats generated using the _mm256_storeu_ps()
// intrinsic
static void time_vec_xorshift_plus(float *out, int niter, int ny, int nx, int stride)
{
assert(nx % 8 == 0);
fast_rng::vec_xorshift_plus x;
// Start timing
struct timeval tv0 = rng_helpers::get_time();
for (int iter = 0; iter < niter; iter++)
for (int ifreq = 0; ifreq < ny; ifreq++)
for (int it = 0; it < nx; it += 8)
_mm256_storeu_ps(&out[ifreq*stride + it], x.gen_floats());
// Stop timing
struct timeval tv1 = rng_helpers::get_time();
double dt = rng_helpers::time_diff(tv0, tv1);
double noutputs = double(niter) * double(ny) * double(nx);
double ns_per_output = 1.0e9 * dt / noutputs;
cout << "time_vec_xorshift_plus: ns_per_output = " << ns_per_output << endl;
}
static void time_xorshift_plus(float *out, int niter, int ny, int nx, int stride)
{
assert(nx % 8 == 0);
rng_helpers::xorshift_plus x;
// Start timing
struct timeval tv0 = rng_helpers::get_time();
for (int iter = 0; iter < niter; iter++)
for (int ifreq = 0; ifreq < ny; ifreq++)
for (int it = 0; it < nx; it += 8)
x.gen_floats(out + ifreq * stride + it);
// Stop timing
struct timeval tv1 = rng_helpers::get_time();
double dt = rng_helpers::time_diff(tv0, tv1);
double noutputs = double(niter) * double(ny) * double(nx);
double ns_per_output = 1.0e9 * dt / noutputs;
cout << "time_xorshift_plus: ns_per_output = " << ns_per_output << endl;
}
static void time_mt19937(float *out, int niter, int ny, int nx, int stride)
{
random_device rd;
mt19937 rng(rd());
uniform_real_distribution<> dist(1.0);
// Start timing
struct timeval tv0 = rng_helpers::get_time();
for (int iter = 0; iter < niter; iter++)
for (int ifreq = 0; ifreq < ny; ifreq++)
for (int it = 0; it < nx; it++)
out[ifreq*stride + it] = dist(rng);
// Stop timing
struct timeval tv1 = rng_helpers::get_time();
double dt = rng_helpers::time_diff(tv0, tv1);
double noutputs = double(niter) * double(ny) * double(nx);
double ns_per_output = 1.0e9 * dt / noutputs;
cout << "time_mt19937: ns_per_output = " << ns_per_output << endl;
}
int main()
{
const int ny = 16384;
const int nx = 1024;
const int stride = 4096;
const int niter = 10;
// Comparison of mt19937, xorshift_plus, and vec_xorshift_plus
float *buf = new float[ny * stride];
memset(buf, 0, ny * stride * sizeof(float));
time_mt19937(buf, niter, ny, nx, stride);
time_xorshift_plus(buf, niter, ny, nx, stride);
time_vec_xorshift_plus(buf, niter, ny, nx, stride);
}