-
Notifications
You must be signed in to change notification settings - Fork 193
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Restore type qualifiers dropped by b8cd84b
Spotted by scipy/scipy#21877 I've added the test case to our test suite so that we no longer regress on this one.
- Loading branch information
1 parent
b8cd84b
commit 3a75f3e
Showing
2 changed files
with
215 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,211 @@ | ||
import numpy as np | ||
|
||
|
||
#pythran export _Aij(float[:,:], int, int) | ||
#pythran export _Aij(int[:,:], int, int) | ||
def _Aij(A, i, j): | ||
"""Sum of upper-left and lower right blocks of contingency table.""" | ||
# See `somersd` References [2] bottom of page 309 | ||
return A[:i, :j].sum() + A[i+1:, j+1:].sum() | ||
|
||
|
||
#pythran export _Dij(float[:,:], int, int) | ||
#pythran export _Dij(int[:,:], int, int) | ||
def _Dij(A, i, j): | ||
"""Sum of lower-left and upper-right blocks of contingency table.""" | ||
# See `somersd` References [2] bottom of page 309 | ||
return A[i+1:, :j].sum() + A[:i, j+1:].sum() | ||
|
||
|
||
# pythran export _concordant_pairs(float[:,:]) | ||
# pythran export _concordant_pairs(int[:,:]) | ||
def _concordant_pairs(A): | ||
"""Twice the number of concordant pairs, excluding ties.""" | ||
# See `somersd` References [2] bottom of page 309 | ||
m, n = A.shape | ||
count = 0 | ||
for i in range(m): | ||
for j in range(n): | ||
count += A[i, j]*_Aij(A, i, j) | ||
return count | ||
|
||
|
||
# pythran export _discordant_pairs(float[:,:]) | ||
# pythran export _discordant_pairs(int[:,:]) | ||
def _discordant_pairs(A): | ||
"""Twice the number of discordant pairs, excluding ties.""" | ||
# See `somersd` References [2] bottom of page 309 | ||
m, n = A.shape | ||
count = 0 | ||
for i in range(m): | ||
for j in range(n): | ||
count += A[i, j]*_Dij(A, i, j) | ||
return count | ||
|
||
|
||
#pythran export _a_ij_Aij_Dij2(float[:,:]) | ||
#pythran export _a_ij_Aij_Dij2(int[:,:]) | ||
def _a_ij_Aij_Dij2(A): | ||
"""A term that appears in the ASE of Kendall's tau and Somers' D.""" | ||
# See `somersd` References [2] section 4: | ||
# Modified ASEs to test the null hypothesis... | ||
m, n = A.shape | ||
count = 0 | ||
for i in range(m): | ||
for j in range(n): | ||
count += A[i, j]*(_Aij(A, i, j) - _Dij(A, i, j))**2 | ||
return count | ||
|
||
|
||
#pythran export _compute_outer_prob_inside_method(int64, int64, int64, int64) | ||
def _compute_outer_prob_inside_method(m, n, g, h): | ||
""" | ||
Count the proportion of paths that do not stay strictly inside two | ||
diagonal lines. | ||
Parameters | ||
---------- | ||
m : integer | ||
m > 0 | ||
n : integer | ||
n > 0 | ||
g : integer | ||
g is greatest common divisor of m and n | ||
h : integer | ||
0 <= h <= lcm(m,n) | ||
Returns | ||
------- | ||
p : float | ||
The proportion of paths that do not stay inside the two lines. | ||
The classical algorithm counts the integer lattice paths from (0, 0) | ||
to (m, n) which satisfy |x/m - y/n| < h / lcm(m, n). | ||
The paths make steps of size +1 in either positive x or positive y | ||
directions. | ||
We are, however, interested in 1 - proportion to computes p-values, | ||
so we change the recursion to compute 1 - p directly while staying | ||
within the "inside method" a described by Hodges. | ||
We generally follow Hodges' treatment of Drion/Gnedenko/Korolyuk. | ||
Hodges, J.L. Jr., | ||
"The Significance Probability of the Smirnov Two-Sample Test," | ||
Arkiv fiur Matematik, 3, No. 43 (1958), 469-86. | ||
For the recursion for 1-p see | ||
Viehmann, T.: "Numerically more stable computation of the p-values | ||
for the two-sample Kolmogorov-Smirnov test," arXiv: 2102.08037 | ||
""" | ||
# Probability is symmetrical in m, n. Computation below uses m >= n. | ||
if m < n: | ||
m, n = n, m | ||
mg = m // g | ||
ng = n // g | ||
|
||
# Count the integer lattice paths from (0, 0) to (m, n) which satisfy | ||
# |nx/g - my/g| < h. | ||
# Compute matrix A such that: | ||
# A(x, 0) = A(0, y) = 1 | ||
# A(x, y) = A(x, y-1) + A(x-1, y), for x,y>=1, except that | ||
# A(x, y) = 0 if |x/m - y/n|>= h | ||
# Probability is A(m, n)/binom(m+n, n) | ||
# Optimizations exist for m==n, m==n*p. | ||
# Only need to preserve a single column of A, and only a | ||
# sliding window of it. | ||
# minj keeps track of the slide. | ||
minj, maxj = 0, min(int(np.ceil(h / mg)), n + 1) | ||
curlen = maxj - minj | ||
# Make a vector long enough to hold maximum window needed. | ||
lenA = min(2 * maxj + 2, n + 1) | ||
# This is an integer calculation, but the entries are essentially | ||
# binomial coefficients, hence grow quickly. | ||
# Scaling after each column is computed avoids dividing by a | ||
# large binomial coefficient at the end, but is not sufficient to avoid | ||
# the large dynamic range which appears during the calculation. | ||
# Instead we rescale based on the magnitude of the right most term in | ||
# the column and keep track of an exponent separately and apply | ||
# it at the end of the calculation. Similarly when multiplying by | ||
# the binomial coefficient | ||
dtype = np.float64 | ||
A = np.ones(lenA, dtype=dtype) | ||
# Initialize the first column | ||
A[minj:maxj] = 0.0 | ||
for i in range(1, m + 1): | ||
# Generate the next column. | ||
# First calculate the sliding window | ||
lastminj, lastlen = minj, curlen | ||
minj = max(int(np.floor((ng * i - h) / mg)) + 1, 0) | ||
minj = min(minj, n) | ||
maxj = min(int(np.ceil((ng * i + h) / mg)), n + 1) | ||
if maxj <= minj: | ||
return 1.0 | ||
# Now fill in the values. We cannot use cumsum, unfortunately. | ||
val = 0.0 if minj == 0 else 1.0 | ||
for jj in range(maxj - minj): | ||
j = jj + minj | ||
val = (A[jj + minj - lastminj] * i + val * j) / (i + j) | ||
A[jj] = val | ||
curlen = maxj - minj | ||
if lastlen > curlen: | ||
# Set some carried-over elements to 1 | ||
A[maxj - minj:maxj - minj + (lastlen - curlen)] = 1 | ||
|
||
return A[maxj - minj - 1] | ||
|
||
|
||
# pythran export siegelslopes(float32[:], float32[:], str) | ||
# pythran export siegelslopes(float64[:], float64[:], str) | ||
def siegelslopes(y, x, method): | ||
deltax = np.expand_dims(x, 1) - x | ||
deltay = np.expand_dims(y, 1) - y | ||
slopes, intercepts = [], [] | ||
|
||
for j in range(len(x)): | ||
id_nonzero, = np.nonzero(deltax[j, :]) | ||
slopes_j = deltay[j, id_nonzero] / deltax[j, id_nonzero] | ||
medslope_j = np.median(slopes_j) | ||
slopes.append(medslope_j) | ||
if method == 'separate': | ||
z = y*x[j] - y[j]*x | ||
medintercept_j = np.median(z[id_nonzero] / deltax[j, id_nonzero]) | ||
intercepts.append(medintercept_j) | ||
|
||
medslope = np.median(np.asarray(slopes)) | ||
if method == "separate": | ||
medinter = np.median(np.asarray(intercepts)) | ||
else: | ||
medinter = np.median(y - medslope*x) | ||
|
||
return medslope, medinter | ||
|
||
|
||
# pythran export _poisson_binom_pmf(float64[:]) | ||
def _poisson_binom_pmf(p): | ||
# implemented from poisson_binom [2] Equation 2 | ||
n = p.shape[0] | ||
pmf = np.zeros(n + 1, dtype=np.float64) | ||
pmf[:2] = 1 - p[0], p[0] | ||
for i in range(1, n): | ||
tmp = pmf[:i+1] * p[i] | ||
pmf[:i+1] *= (1 - p[i]) | ||
pmf[1:i+2] += tmp | ||
return pmf | ||
|
||
|
||
# pythran export _poisson_binom(int64[:], float64[:, :], str) | ||
def _poisson_binom(k, args, tp): | ||
# PDF/CDF of Poisson binomial distribution | ||
# k - arguments, shape (m,) | ||
# args - shape parameters, shape (n, m) | ||
# kind - {'pdf', 'cdf'} | ||
n, m = args.shape # number of shapes, batch size | ||
cache = {} | ||
out = np.zeros(m, dtype=np.float64) | ||
for i in range(m): | ||
p = tuple(args[:, i]) | ||
if p not in cache: | ||
pmf = _poisson_binom_pmf(args[:, i]) | ||
cache[p] = np.cumsum(pmf) if tp=='cdf' else pmf | ||
out[i] = cache[p][k[i]] | ||
return out |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters