-
Notifications
You must be signed in to change notification settings - Fork 0
/
pentropy_single_f.gss
261 lines (203 loc) · 6.68 KB
/
pentropy_single_f.gss
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
@ This is a Gauss code for the Computation of a Single PE point estimate of a univariate time series and associated Relative Frequencies @
@ This code accompanies the Aptech blog Permutation entropy that I wrote at https://www.aptech.com/blog/permutation-entropy/ @
@ This Gauss program requires a working copy of GAUSS 18+ @
@ Suggested Citation:
Clower, E. and M. Henry, 2018. "petropy: Gauss module to compute a Single Permutation Entropy point estimate,"
@
NEW;
CLS;
et = hsec ;
/*
** Format: H = petropy(x, D, tau, method, accu)
**
** Input:
** x Column vector, univariate time series data
**
** D Scalar, embedding dimension
**
** tau Scalar, embedding time delay that determines the time separation between time series values
**
** method String treatment of equal values
** 'noise' - add small noise
** 'equal' - allow same rank for equal values
** 'order' - consider order of appearance (first occurence --> lower rank)
**
** accu Scalar, parameter describing accuracy of the values in X
** by number of decimal places
**
** Outputs
** H Permutation Entropy
** Hnorm Normalized Permutation Entropy
** refreq Relative Frequencies with its plot
*/
// 1901 - 2016
row_range={1,31519};
x = csvReadM("DJIA_1901_to_2016_red.csv",row_range,2);
x = packr(x);
// Continuously compounded returns
y = ln(x[2:rows(x)]) - ln(x[1:rows(x)-1]);
x=y;
//embedding dimension
D = 4;
//embedding time delay
tau = 1;
// PE
print;
H = pentropy(x, D, tau, "noise");
print "Permutation Entropy point estimate =" H[1];
print;
// RELATIVE FREQUENCIES
rem = {1};
RelFreq = delRow(H, rem);
print "Relative Frequencies" RelFreq;
numrows = rows(RelFreq);
xx = seqa(1, 1, numrows);
plotXY(xx, RelFreq);
#include dynargs.dec
proc (1) = pentropy(x, n, tau, ...);
local n_dynargs, accu, method, M, equal, tmp, shift_map,
k, j, shift_mat_ind, tmp2, tmp3, shift_mat, ind_mat, ind_vec, sort_ind_mat, ia, ic,
tmp_sort, permpat_num, refreq, size,tmp0, denom,Hnorm,factor,OPt,mlt,OP_mlt,OP_id,unique_ids,
matches,id_tot,id_pct,head,body,tbl;
n_dynargs = COUNT_DYNARGS;
//Set accuracy of the values in X
if n_dynargs < 2;
accu = 4; // default
else;
accu = sysstate(GET_ONE_DYNARG, 2);
endif;
//Set method
if n_dynargs < 1;
method = "order"; // default
else;
method = sysstate(GET_ONE_DYNARG, 1);
endif;
//Get length of x
M = rows(x);
equal = 0;
if n*log(n)>15;
errorlogat "Permutation dimension too high";
end;
endif;
if (rows(tau)) > 1 and (rows(tau) != n-1);
errorlogat "Time lag vector has to have n-1 entries";
end;
endif;
if ((n-1)*minc(tau) >=M) or maxc(tau) >= M;
errorlogat "Too few data points for desired dimension and lags";
endif;
if lower(method) == "noise";
print "Method: Add small noise.";
x = x + rndn(M,1)*10^(-accu-1);
elseif lower(method) == "equal";
print "Method: Allow equal ranks.";
equal = 1;
elseif lower(method) == "order";
print "Method: Consider order of occurrence.";
else;
errorlogat "Unknown method. Default method 'order' used";
endif;
print;
if rows(tau) > 1;
tau = reshape(tau, rows(tau), 1);
tmp = 0|tau;
tau = sortc(tmp);
// build n x (M-tau(n))-matrix from shifted values in x
shift_mat = zeros(n, M-tau[n]);
for ii(1, n, 1);
k = tau[ii] + 1;
j = M - tau[n] + tau[ii];
shift_mat[ii, .] = x[k:j];
endfor;
else;
shift_mat_ind = zeros((M-(n-1)*tau)*n, 1);
tmp0 = seqa(1, tau, n); // Column vector
shift_mat_ind[1:n] = tmp0;
for i(1, (M-(n-1)*tau)-1, 1);
shift_mat_ind[(i*n+1):((i+1)*n)] = tmp0 + i;
endfor;
shift_mat = reshape(x[shift_mat_ind], (M-(n-1)*tau), n)';
endif;
if equal;
ind_mat = zeros(size(shift_mat));
for ii(1, cols(ind_mat), 1);
tmp = unique(shift_mat[., ii]);
ind_mat[.,ii] = indnv(shift_mat[., ii], tmp);
endfor;
else;
// INDEX ORDERS of smallest to largest
sort_ind_mat = sortmatind(shift_mat);
ind_mat = zeros(rows(sort_ind_mat), cols(sort_ind_mat));
// Filling the zero matrix with RANK ORDERS
for ii(1, cols(ind_mat), 1);
ind_mat[sort_ind_mat[., ii], ii] = seqa(1, 1, n);
endfor;
endif;
// Identifying each of the Permutations
OPt = (ind_mat-1)';
mlt = {1000 100 10 1}; /* n = 4 */
OP_mlt = OPt .* mlt;
OP_id = sumr(OP_mlt);
unique_ids = unique(OP_id);
//print unique_ids;
matches = OP_id .== unique_ids';
id_tot = sumc(matches);
id_pct = id_tot ./ rows(OP_id);
//print id_pct;
head = "ID" $~ "Count" $~ "%";
body = ntos(unique_ids ~ id_tot ~ id_pct);
tbl = head $| body;
print tbl;
print;
print;
// assign unique number to each pattern (base-n number system)
ind_vec = n.^(seqa(0, 1, n))' * (ind_mat-1);
tmp_sort = sortc(ind_vec',1);
tmp2 = unique(tmp_sort);
ia = indnv(tmp2, tmp_sort);
tmp = ia|(cols(ind_vec)+1);
// Number of admissible Permutation Patterns
permpat_num = diff(tmp);
// RELATIVE FREQUENCIES of PERMUTATIONS p[i]
refreq = permpat_num/sumc(permpat_num);
// PE of order D
H = -sumc(refreq .* log2(refreq));
factor = n!;
Hnorm = (1/log2(factor)) * H;
retp(Hnorm|refreq);
endp;
proc (1) = log2(x);
retp( log(x)/log(2));
endp;
proc (1) = diff(x);
local tmp;
tmp = x[2:rows(x)] - x[1:rows(x)-1];
retp(tmp);
endp;
proc (1) = sortmatind(x_mat);
local sort_ind_mat;
sort_ind_mat = zeros(rows(x_mat), cols(x_mat));
for i(1, cols(x_mat), 1);
sort_ind_mat[.,i] = sortind(x_mat[.,i]);
endfor;
retp(sort_ind_mat);
endp;
proc (1) = delRow(x, remove);
local mask, xout;
mask = zeros(rows(x), 1);
mask[remove] = ones(rows(remove), 1);
xout = delif(x, mask);
retp(xout);
endp;
proc (1) = getDateRange(X, range);
local ndigits, idx, X_range;
ndigits = floor(log(range)) + 1;
range = range .* 10^(14-ndigits);
idx = sumc((x[.,1] .< range[1]) ~ (x[.,1] .< range[2])) + 1;
X_range = X[idx[1]:idx[2],.];
retp(X_range);
endp;
print;
etp = hsec ;
print;
(hsec-et)/100 "seconds";