-
Notifications
You must be signed in to change notification settings - Fork 1
/
cprimes.pyx
346 lines (285 loc) · 11.1 KB
/
cprimes.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
#!/usr/bin/env python3
''' This is for experiments with different version of pure Python3, Numpy and
Cython improvements - especially latest Cython 0.27+.
Comparing different takes on 2-3 selected sieves.
It seems that instead of improving slow algorithm with numpy and especially
cythonization it is more way more effective to simply choose faster algo.
'''
import math
import cython
import Cython.Compiler.Options; Cython.Compiler.Options.annotate = True
cimport numpy as np
# Add CFLAG in virtualenv to link to brew numpy
# export CFLAGS=-I/usr/local/Cellar/numpy/1.13.3/lib/python3.6/site-packages/numpy/core/include
import numpy as np
# from libc.math cimport sqrt
from cpython cimport array
import array
from libc.stdlib cimport malloc, free
from libc.string cimport memset
cdef extern from "stdbool.h":
ctypedef bint bool
cdef extern from "math.h":
double sqrt(double m)
# cdef extern from "math.h":
# int div(int x, int y)
cdef unsigned long long divx(unsigned long long x, unsigned long long y):
return x/y
cdef bool * getPrimes1(unsigned long long n):
# like primesfrom3to
cdef unsigned long long i, j, half, sq
half = divx(n, 2)
sq = int(n**0.5) + 1
cdef bool *primes = <bool*>malloc(half * sizeof(bool))
memset(primes, 1, half * sizeof(bool))
# There is big gain here from changing for loops to while
i = 3
while i < sq:
if primes[divx(i, 2)]:
j = divx(i * i, 2)
while j < half:
primes[j] = False
j += i
i += 2
return primes;
def cprimes_primesfrom3to(unsigned long long n):
res = <bool[:n // 2]> (getPrimes1(n))
return 2 * np.nonzero(res)[0][1::] + 1
cdef bool * getPrimes2(unsigned long long n):
# like ajs_primes3a
cdef bool *primes = <bool*>malloc(n * sizeof(bool))
memset(primes, 1, n * sizeof(bool))
cdef unsigned long long i, j, half, sq
half = divx(n, 2)
sq = int(sqrt(n)) + 1
primes[0] = False
primes[1] = False
primes[2] = False
i = 4
# There is big gain from changing for loops to while
while i < n:
primes[i] = False
i += 2
i = 3
while i < sq:
j = i * 2
while j < n:
primes[j] = False
j += i
i += 2
return primes;
def cprimes_ajs(unsigned long long n):
res = np.asarray(<bool[:n]> (getPrimes2(n)))
return np.where(res)[0]
def sundaram3_1(n):
# pure python3 implementation - basic case
numbers = list(range(3, n + 1, 2))
half = n // 2
initial = 4
for step in range(3, n + 1, 2):
for i in range(initial, half, step):
numbers[i - 1] = 0
initial += 2 * (step + 1)
if initial > half:
return [2] + list(filter(None, numbers))
def sundaram3_2(n):
# Using numpy arrays
numbers = np.arange(3, n + 1, 2)
half = (n) // 2
initial = 4
for step in range(3, n + 1, 2):
for i in range(initial, half, step):
numbers[i - 1] = 0
initial += 2 * (step + 1)
if initial > half:
res = np.asarray(numbers)
return res[res>0]
def sundaram3_2_1(n):
# Using numpy arrays
# Declaring numbers as np.uint64 improved slightly
# (uint64 0 to 18446744073709551615)
numbers = np.arange(3, n + 1, 2, dtype=np.uint64)
half = (n) // 2
initial = 4
for step in range(3, n + 1, 2):
for i in range(initial, half, step):
numbers[i - 1] = 0
initial += 2 * (step + 1)
if initial > half:
res = np.asarray(numbers)
return res[res>0]
def sundaram3_3_1(n):
# Cython arrays from pure python3 using modern Cython arrays and views
# http://cython.readthedocs.io/en/latest/src/tutorial/array.html
cdef array.array numbers = array.array('i', range(3, n + 1, 2))
cdef int[:] numbers_view = numbers
half = (n) // 2
initial = 4
for step in range(3, n + 1, 2):
for i in range(initial, half, step):
numbers_view[i - 1] = 0 # warning - index should be typed for
# more effective access
initial += 2 * (step + 1)
if initial > half:
# return [2] + list(filter(None, numbers_view)) - is slower
return [2] + list(filter(None, numbers))
def sundaram3_3_2(unsigned long long n):
# Cython arrays from pure python3 like 3_3_1 but with declared index types
# Not declaring index types we get Cython warning
# http://cython.readthedocs.io/en/latest/src/tutorial/array.html
# It is best solution so far
cdef unsigned long long step, i
cdef array.array numbers = array.array('L', range(3, n + 1, 2))
cdef unsigned long long[:] numbers_view = numbers
half = (n) // 2
initial = 4
for step in range(3, n + 1, 2):
for i in range(initial, half, step):
numbers_view[i - 1] = 0
initial += 2 * (step + 1)
if initial > half:
return [2] + list(filter(None, numbers))
@cython.cdivision(True) # turn division by zero checking off - C division
@cython.boundscheck(False) # turn array bounds check off
@cython.wraparound(False) # turn negative indexing off
def sundaram3_3_3(unsigned long long n):
# Cython arrays from pure python3 like 3_3_2 but with checks off
# http://cython.readthedocs.io/en/latest/src/tutorial/array.html
# No performance gain compared to 3_3_2...
cdef unsigned long long step, half, initial, i
cdef array.array numbers = array.array('L', range(3, n + 1, 2))
cdef unsigned long long[:] numbers_view = numbers
half = (n) / 2 # here we could use / in place of // (switching cdivision)
initial = 4
for step in range(3, n + 1, 2):
for i in range(initial, half, step):
numbers_view[i - 1] = 0
initial += 2 * (step + 1)
if initial > half:
return [2] + list(filter(None, numbers))
def sundaram3_4(unsigned long long n):
# Mixing numpy arrays and cython array views
# Seems like consistently the worst solution
#
cdef unsigned long long s, i
# cdef np.ndarray[np.longlong_t, ndim=1] numbers = np.arange(3, n + 1, 2, np.longlong)
cdef unsigned long long [:] numbers_view = np.arange(3, n + 1, 2, np.ulonglong)
cdef unsigned long long half = (n) // 2
cdef unsigned long long initial = 4
cdef unsigned long long [:] s_view = np.arange(3, n + 1, 2, np.ulonglong)
cdef unsigned long long [:] i_view
s = 3
while s in s_view:
# for s in s_view:
i_view = np.arange(initial, half, s, np.ulonglong)
i = initial
while i in i_view:
#for i in i_view:
numbers_view[i - 1] = 0
i += s
initial += 2 * (s + 1)
if initial > half:
numbers = np.asarray(numbers_view)
return numbers[numbers>0]
i += 2
def ambi_sieve_1(unsigned long long n):
# Cythonized from pure python - very bad! Big performance penalty!
cdef unsigned long long m
cdef array.array s = array.array('L', range(3, n, 2))
cdef unsigned long long[:] s_view = s
cdef array.array em = array.array('L', range(3, int(sqrt(n)) + 1, 2))
cdef unsigned long long[:] m_view = em
for m in m_view:
if s_view[(m - 3) // 2]:
s_view[(m * m - 3) // 2::m] = 0
return list(filter(None, s))
# @cython.cdivision(True) # turn division by zero checking off
# @cython.boundscheck(False) # turn array bounds check off
# @cython.wraparound(False) # turn negative indexing off
def ambi_sieve_2(unsigned long long n):
# Mixing numpy and Cython. No practical improvement.
# C division gives zero gain here
cdef unsigned long long m
cdef np.ndarray[np.longlong_t, ndim=1] s = np.arange(3, n, 2, np.longlong)
cdef np.ndarray[np.longlong_t, ndim=1] em = np.arange(3, int(sqrt(n)) + 1, 2, np.longlong)
for m in em:
if s[(m - 3) // 2]:
s[(m * m - 3) // 2::m] = 0
return s[s>0]
# @cython.cdivision(True) # turn division by zero checking off
# @cython.boundscheck(False) # turn array bounds check off
# @cython.wraparound(False) # turn negative indexing off
def primesfrom3to_1(unsigned long long n):
# Python array.array to cython view but leaving numpy zeros array sieve
# No effect
cdef unsigned long long i
cdef sieve = np.ones(n // 2, dtype=np.bool)
cdef array.array iv = array.array('L', range(3, int(sqrt(n)) + 1, 2))
cdef unsigned long long[:] i_view = iv
for i in i_view:
if sieve[i // 2]:
sieve[i * i // 2::i] = False
return 2 * np.nonzero(sieve)[0][1::] + 1
# @cython.cdivision(True) # turn division by zero checking off
# @cython.boundscheck(False) # turn array bounds check off
# @cython.wraparound(False) # turn negative indexing off
def primesfrom3to_2(unsigned long long n):
# Cythonizing single array but leaving main sieve in numpy
# No effect
cdef unsigned long long i
cdef sieve = np.ones(n // 2, dtype=np.bool)
cdef unsigned long long [:] i_view = np.arange(3, int(sqrt(n)) + 1, 2, np.ulonglong)
for i in i_view:
if sieve[i // 2]:
sieve[i * i // 2::i] = False
return 2 * np.nonzero(sieve)[0][1::] + 1
def primesfrom3to_3(unsigned long long n):
# Cythonizing numpy zeros array
cdef unsigned long long i
cdef np.ndarray[np.uint8_t, ndim=1] sieve = np.ones(n // 2, dtype=np.uint8)
for i in range(3, int(n**0.5) + 1, 2):
if sieve[i // 2]:
sieve[i * i // 2::i] = False
return 2 * np.nonzero(sieve)[0][1::] + 1
def primesfrom3to_4(unsigned long long n):
# Cythonizing both numpy arrays.
cdef unsigned long long i, half, i2, k
half = n // 2
cdef sieve = np.ones(half, dtype=np.uint8)
cdef unsigned char [:] sieve_view = sieve
cdef unsigned long long [:] i_view = np.arange(3, int(sqrt(n)) + 1, 2, np.ulonglong)
for i in i_view:
i2 = i // 2
if sieve_view[i2]:
k = i * i // 2
sieve_view[k::i] = False
return 2 * np.nonzero(sieve_view)[0][1::] + 1
def primesfrom3to_5(unsigned long long n):
# Terrible idea
cdef unsigned long long i, j, half
half = divx(n, 2)
cdef array.array sieve = array.array('B', [1] * (half))
cdef unsigned char [:] sieve_view = sieve
for i in range(3, int(sqrt(n)) + 1, 2):
if sieve[divx(i, 2)]:
for j in range(divx(i*i, 2), half, i):
sieve[j] = False
return 2 * np.nonzero(sieve_view)[0][1::] + 1
# return np.asarray(sieve_view)
def prime6(unsigned long long n):
cdef unsigned long long factor
cdef np.ndarray[np.longlong_t, ndim=1] primes = np.arange(3, n + 1, 2)
cdef isprime = np.ones((n - 1) // 2, dtype=np.bool)
for factor in primes[:int(sqrt(n))]:
if isprime[(factor - 2) // 2]:
isprime[(factor * 3 - 2) // 2:(n - 1) // 2:factor] = False
return np.insert(primes[isprime], 0, 2)
def ajs_primes3a(unsigned long long n):
cdef unsigned long long idx
cdef sieve = np.ones((n), dtype=np.bool)
sieve[0] = False
sieve[1] = False
sieve[4::2] = False
for idx in np.arange(3, int(sqrt(n)) + 1, 2):
sieve[idx * 2::idx] = False
return np.where(sieve)[0]