-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcreate_layers.py
executable file
·248 lines (206 loc) · 9.23 KB
/
create_layers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
#!/usr/bin/env python3
import argparse
import numpy as np
## For longdouble Error Function ####
import pyximport; pyximport.install()
from erfl import erf
#####################################
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="""Creates the 4 pre-computed lookup tables for both the exponential and normal PRNG:
X = length of the ziggurat layers,
Y = f(X),
map = probability table (for Alias Sampling),
ipmf = Alias Table.
Values are calculated to longdouble precision and then rounded to double/single precision. Tables output to a C header file.""")
parser.add_argument('--type', default='exponential', choices={'exponential', 'normal'}, help="PRNG type.")
parser.add_argument('--size', default=256, type=int, help="Number of Ziggurat layers.")
parser.add_argument('--precision', default='double', choices={'float', 'double'}, help='Precision of tables')
args = parser.parse_args()
if args.precision == 'double':
max_int = 0x7fffffffffffffff
max_uint = 0xffffffffffffffff
to_int = np.int64
c_int_type = 'int64_t'
elif args.precision == 'float':
max_int = 0x7fffffff
max_uint = 0xffffffff
to_int = np.int32
c_int_type = 'int32_t'
# redefine Transcendental functions for proper precision
exp = lambda x: np.exp(x, dtype=np.longdouble)
sqrt = lambda x: np.sqrt(x, dtype=np.longdouble)
power = lambda x, y: np.power(x, y, dtype=np.longdouble)
def check_equal(x,y):
"""check_equal(x, y) -> raise Assertion Error if values in x & y differ by more than double precision"""
assert (np.abs(x - y,dtype=np.longdouble) < np.finfo(np.double).eps).all(), "{:} {:}".format(x,y)
def fsolve(f, a, b, xtol=1e-19, iterations=100, start_safe=False):
""" fsolve(f, a, b xtol=1e-19) -> x: f(x) = 0; implemented to longdouble precision
Tries Secant, then uses Bisection Method to find root.
Requires a <= x < b.
"""
assert a < b, "a must be lower bound, b must be higher bound"
x = [b, a]
F = list(map(f, x))
if np.sign(F[0]) == np.sign(F[1]):
raise ValueError("there is no root within [a, b]")
if not start_safe:
for n in range(2, iterations):
x.append(x[n-1] - F[n-1]*(x[n-1] - x[n-2])/(F[n-1] - F[n-2]))
if np.abs(x[n] - x[n-1]) < xtol:
return x[n]
F.append(f(x[n]))
length = x[0] - x[1]
iterations = int(np.ceil(np.log2(length/xtol)))
for iteration in range(iterations):
c = x[1] + length
F_c = f(c)
if F_c == 0.:
break
i = int(np.sign(F_c) == np.sign(F[1]))
x[i] = c
F[i] = F_c
length /= 2
return c
def realign(pmf):
"""realign(pmf) -> X, A: X & A allow random sampling from pmf in O(1) time.
pmf: defines any arbitrary probability mass function on the domain [0, len(pmf)); needn't be normalized.
A discrete random variable r, defined by pmf, is drawn from X, A as follows:
r = numpy.random.randint(len(pmf))
if X[r] > numpy.random.rand():
r = A[r]
See http://scorevoting.net/WarrenSmithPages/homepage/sampling.abs for a description of this method. This is an implementation of the method to longdouble precision.
"""
pmf = np.copy(pmf)
pmf /= pmf.mean()
L = len(pmf)
A = np.arange(L)
B = np.arange(L+1)
X = np.r_[pmf, np.longdouble(2)] # X[L] = sentinel.
i, j = 0, L
while True:
while X[B[i]]< 1: # In ascending order, find x_(b_i) > 1.
i += 1
while X[B[j]] >= 1: # In descending order, find x_(b_j) < 1.
j -= 1
if i >= j: # If ascent passes descent, end
break
B[i], B[j] = B[j], B[i] # Swap b_i, b_j
i = j
j += 1
# At this point, X[B][:j] is < 1 and X[B][j:] is > 1
# Also, X[B[i]] is < 1.
while i>=0:
while X[B[j]] <= 1: # Find x_(b_j) that needs probability mass
j += 1
if j > L-1: # Nobody needs probability mass, Done
break
X[B[j]] -= 1 - X[B[i]] # Send all of x_(b_i) excess probability mass to x_(b_j) (x_(b_i) is now done).
A[B[i]] = B[j]
if X[B[j]] < 1: # If x_(b_j) now has too much probability mass,
B[i], B[j] = B[j], B[i] # Swap, b_i, b_j, it becomes a donor.
j += 1
else: # Otherwise, leave it as an acceptor
i -= 1
new_pmf = np.copy(X[:-1])
for a_i, pmf_i in zip(A, X[:-1]):
new_pmf[a_i] += 1 - pmf_i
check_equal(new_pmf, pmf)
return X[:-1], A
if args.type == 'exponential':
volume = 1/np.longdouble(args.size)
f = lambda x: exp(-x)
CDF = lambda x: 1 - f(x)
elif args.type == 'normal':
oneHalf = 1/np.longdouble(2)
pi = sum([np.longdouble(pi_digit)*power(10, -i) for i, pi_digit in enumerate('314159265358979323846')])
alpha = sqrt(oneHalf*pi)
volume = sqrt(2*pi)/np.longdouble(2*args.size)
f = lambda x: exp(-oneHalf*x*x)
CDF = np.vectorize( lambda x: alpha*erf(sqrt(oneHalf)*x) )
def generate_X_i(vol=volume, last_Y_i=0):
lower_bound = 1
upper_bound = 10 if args.type == 'exponential' else 4
while lower_bound > vol: # Obtaining another layer is clearly hopeless after this point
# There are two solutions for X_i (a tall-skinny box and a long-flat box). We want the latter,
# so lower_bound is reduced gradually to avoid solving for the tall-skinny box.
try:
X_i = fsolve(lambda x: x*(f(x) - last_Y_i) - vol, lower_bound, upper_bound)
yield X_i
last_Y_i = f(X_i)
lower_bound = 0.9*X_i
upper_bound = X_i
except ValueError:
lower_bound *= 0.9
X = np.array(list(generate_X_i(vol=volume)) + [0])
Y = np.vectorize(f)(X)
dX = -np.diff(X) # dx_i = x_i-1 - x_i; x_-1 = x: f(x) = 0 = inf
dY = np.diff(Y)
check_equal(X[1:-1]*dY[:-1], volume)
V = -np.diff(CDF(np.r_[np.inf, X]))
V[1:] -= Y[:-1]*dX
V_tail = V[0]/V.sum()
print("Fraction of remainder in tail: {:.4%}".format(V_tail))
V = np.r_[V, np.zeros(args.size - len(V))]
layers = len(X) - 1
check_equal(args.size - layers, V.sum()/volume)
print("Mean V: {:g}".format((np.arange(len(V))*V).sum()/V.sum()))
V /= V.mean()
pmf, Map = realign(V)
print("static {:} X_0 = {:};".format(args.precision, X[0]))
ipmf = to_int(pmf*max_uint - max_int) # WDS sampler uses first 8 bits of random uint to sample an 8-bit integer
ipmf[pmf >= 1] = max_int
__relevant_equalities__ = """
dY_i = Y_i+1 - Y_i
dX_i = X_i - X_i+1
-m_i = Y_i - Y_i+1 / X_i - X_i+1
e_i = -m_i-1(x - X_i) + F_i - e^-x
0 = -m_i-1 + e^-x
x_max = -log(m_i-1)
y_max = Y_i(x_max) - f(x_max)
de_max = y_max / dY
de_max = (+m_i-1*(+log(m_i-1) + X_i) + F_i - e^log(m_i-1) / dY
de_max = { m_i-1 * (log[m_i-1] + X_i) + F_i - m_i-1 } /dY
"""
m = dY/dX
if args.type == 'exponential':
assert (m > Y[:-1]).all(), 'tangent line must be steeper than initial derivative'
assert (m < Y[1: ]).all(), 'tangent line must be shallower than final derivative'
epsilon = (Y[1:]-m*(1-X[1:]-np.log(m)))
E = epsilon/dY
print(f'Maximum deflection of f(x) relative to tangent line is {E.max()} and occurs above layer {E.argmax()}')
iE_max = to_int(E.max()*max_int)
print(f'static {c_int_type} iE_max = {iE_max};')
X /= max_int
Y /= max_int
elif args.type == 'normal':
print('__NORM_TAIL_BEGIN__', X[0])
inflection_point = 1
i_inflection = (X > inflection_point).sum()
assert X[i_inflection] < inflection_point, "Inflection point lies exactly on boundary between two layers"
print('static uint_fast8_t i_inflection = ', i_inflection)
E = np.zeros_like(X)
for i in range(1, len(X)):
delta_i = lambda x: x*f(x) - m[i-1]
X_max = fsolve(delta_i, X[i], X[i-1] if i != i_inflection else inflection_point) # X_max[i_inflection] should be max_x f(x) - Y_i_inflection(x) (not max_x abs(f(x) - Y_i_inflection(x)) ), because we want to sample points in the concave region.
assert X_max != 0, "Failed to find epsion_i for i = {:}".format(i)
Y_i = lambda x: Y[i] - m[i-1]*(x - X[i])
E[i] = np.abs( Y_i(X_max) - f(X_max) ) / dY[i-1]
iE = to_int(np.ceil(E*max_int))
print('static {:} iE_max = {:}, iE_min = {:};'.format(c_int_type, iE[i_inflection:].max(), iE[:i_inflection].max()))
X /= max_int
Y /= max_int
######### OUTPUT
output = open(args.type+"_layers.h", 'w')
short_type = 'exp' if args.type == 'exponential' else 'norm'
output.write("\n#define __{:}_LAYERS__ {:}\n".format(short_type.upper(), layers))
Arrays = dict(X=X, Y=Y, map=to_int(Map), ipmf=ipmf)
if args.type == 'normal':
Arrays['iE'] = iE
for name, data in Arrays.items():
output.write('static {dtype} __{short_type}_{name}__[{length}] = {{ {data} }};\n'.format(
dtype=args.precision if data.dtype == np.float128 else (str(data.dtype) + '_t'),
short_type=short_type,
name=name,
length=len(data),
data=', '.join(map(str,data))))
output.close()