-
Notifications
You must be signed in to change notification settings - Fork 0
/
letterFrequencies.py
executable file
·52 lines (49 loc) · 1.17 KB
/
letterFrequencies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Approximate frequencies of letters in English. From Wikipedia.
# This includes an entry for space, and the sums with and without it.
# Divide as needed.
#
# sjd, from my general utils.
#
letterFreqs = [
( ' ', 17000 ), # Estimate for non-word chars
( 'e', 12702 ),
( 't', 9056 ),
( 'a', 8167 ),
( 'o', 7507 ),
( 'i', 6966 ),
( 'n', 6749 ),
( 's', 6327 ),
( 'h', 6094 ),
( 'r', 5987 ),
( 'd', 4253 ),
( 'l', 4025 ),
( 'c', 2782 ),
( 'u', 2758 ),
( 'm', 2406 ),
( 'w', 2360 ),
( 'f', 2228 ),
( 'g', 2015 ),
( 'y', 1974 ),
( 'p', 1929 ),
( 'b', 1492 ),
( 'v', 978 ),
( 'k', 772 ),
( 'j', 153 ),
( 'x', 150 ),
( 'q', 95 ),
( 'z', 74 ),
]
sumWithSpace = sum([ f[1] for f in letterFreqs ])
partialSums = []
ps = 0.0
for pair in letterFreqs:
ps += pair[1]
partialSums.append(ps)
def randomWeightedLetter() -> str:
import random
rgen = random.Random()
r = rgen.randint(1, sumWithSpace)
for i, tup in enumerate(partialSums):
r -= tup[1]
if (r<1): return(letterFreqs[i][0])
return(' ')