Skip to content

Commit

Permalink
Restore type qualifiers dropped by b8cd84b
Browse files Browse the repository at this point in the history
Spotted by scipy/scipy#21877
I've added the test case to our test suite so that we no longer regress
on this one.
  • Loading branch information
serge-sans-paille committed Nov 13, 2024
1 parent b8cd84b commit 3a75f3e
Show file tree
Hide file tree
Showing 2 changed files with 215 additions and 0 deletions.
211 changes: 211 additions & 0 deletions pythran/tests/scipy/_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
import numpy as np


#pythran export _Aij(float[:,:], int, int)
#pythran export _Aij(int[:,:], int, int)
def _Aij(A, i, j):
"""Sum of upper-left and lower right blocks of contingency table."""
# See `somersd` References [2] bottom of page 309
return A[:i, :j].sum() + A[i+1:, j+1:].sum()


#pythran export _Dij(float[:,:], int, int)
#pythran export _Dij(int[:,:], int, int)
def _Dij(A, i, j):
"""Sum of lower-left and upper-right blocks of contingency table."""
# See `somersd` References [2] bottom of page 309
return A[i+1:, :j].sum() + A[:i, j+1:].sum()


# pythran export _concordant_pairs(float[:,:])
# pythran export _concordant_pairs(int[:,:])
def _concordant_pairs(A):
"""Twice the number of concordant pairs, excluding ties."""
# See `somersd` References [2] bottom of page 309
m, n = A.shape
count = 0
for i in range(m):
for j in range(n):
count += A[i, j]*_Aij(A, i, j)
return count


# pythran export _discordant_pairs(float[:,:])
# pythran export _discordant_pairs(int[:,:])
def _discordant_pairs(A):
"""Twice the number of discordant pairs, excluding ties."""
# See `somersd` References [2] bottom of page 309
m, n = A.shape
count = 0
for i in range(m):
for j in range(n):
count += A[i, j]*_Dij(A, i, j)
return count


#pythran export _a_ij_Aij_Dij2(float[:,:])
#pythran export _a_ij_Aij_Dij2(int[:,:])
def _a_ij_Aij_Dij2(A):
"""A term that appears in the ASE of Kendall's tau and Somers' D."""
# See `somersd` References [2] section 4:
# Modified ASEs to test the null hypothesis...
m, n = A.shape
count = 0
for i in range(m):
for j in range(n):
count += A[i, j]*(_Aij(A, i, j) - _Dij(A, i, j))**2
return count


#pythran export _compute_outer_prob_inside_method(int64, int64, int64, int64)
def _compute_outer_prob_inside_method(m, n, g, h):
"""
Count the proportion of paths that do not stay strictly inside two
diagonal lines.
Parameters
----------
m : integer
m > 0
n : integer
n > 0
g : integer
g is greatest common divisor of m and n
h : integer
0 <= h <= lcm(m,n)
Returns
-------
p : float
The proportion of paths that do not stay inside the two lines.
The classical algorithm counts the integer lattice paths from (0, 0)
to (m, n) which satisfy |x/m - y/n| < h / lcm(m, n).
The paths make steps of size +1 in either positive x or positive y
directions.
We are, however, interested in 1 - proportion to computes p-values,
so we change the recursion to compute 1 - p directly while staying
within the "inside method" a described by Hodges.
We generally follow Hodges' treatment of Drion/Gnedenko/Korolyuk.
Hodges, J.L. Jr.,
"The Significance Probability of the Smirnov Two-Sample Test,"
Arkiv fiur Matematik, 3, No. 43 (1958), 469-86.
For the recursion for 1-p see
Viehmann, T.: "Numerically more stable computation of the p-values
for the two-sample Kolmogorov-Smirnov test," arXiv: 2102.08037
"""
# Probability is symmetrical in m, n. Computation below uses m >= n.
if m < n:
m, n = n, m
mg = m // g
ng = n // g

# Count the integer lattice paths from (0, 0) to (m, n) which satisfy
# |nx/g - my/g| < h.
# Compute matrix A such that:
# A(x, 0) = A(0, y) = 1
# A(x, y) = A(x, y-1) + A(x-1, y), for x,y>=1, except that
# A(x, y) = 0 if |x/m - y/n|>= h
# Probability is A(m, n)/binom(m+n, n)
# Optimizations exist for m==n, m==n*p.
# Only need to preserve a single column of A, and only a
# sliding window of it.
# minj keeps track of the slide.
minj, maxj = 0, min(int(np.ceil(h / mg)), n + 1)
curlen = maxj - minj
# Make a vector long enough to hold maximum window needed.
lenA = min(2 * maxj + 2, n + 1)
# This is an integer calculation, but the entries are essentially
# binomial coefficients, hence grow quickly.
# Scaling after each column is computed avoids dividing by a
# large binomial coefficient at the end, but is not sufficient to avoid
# the large dynamic range which appears during the calculation.
# Instead we rescale based on the magnitude of the right most term in
# the column and keep track of an exponent separately and apply
# it at the end of the calculation. Similarly when multiplying by
# the binomial coefficient
dtype = np.float64
A = np.ones(lenA, dtype=dtype)
# Initialize the first column
A[minj:maxj] = 0.0
for i in range(1, m + 1):
# Generate the next column.
# First calculate the sliding window
lastminj, lastlen = minj, curlen
minj = max(int(np.floor((ng * i - h) / mg)) + 1, 0)
minj = min(minj, n)
maxj = min(int(np.ceil((ng * i + h) / mg)), n + 1)
if maxj <= minj:
return 1.0
# Now fill in the values. We cannot use cumsum, unfortunately.
val = 0.0 if minj == 0 else 1.0
for jj in range(maxj - minj):
j = jj + minj
val = (A[jj + minj - lastminj] * i + val * j) / (i + j)
A[jj] = val
curlen = maxj - minj
if lastlen > curlen:
# Set some carried-over elements to 1
A[maxj - minj:maxj - minj + (lastlen - curlen)] = 1

return A[maxj - minj - 1]


# pythran export siegelslopes(float32[:], float32[:], str)
# pythran export siegelslopes(float64[:], float64[:], str)
def siegelslopes(y, x, method):
deltax = np.expand_dims(x, 1) - x
deltay = np.expand_dims(y, 1) - y
slopes, intercepts = [], []

for j in range(len(x)):
id_nonzero, = np.nonzero(deltax[j, :])
slopes_j = deltay[j, id_nonzero] / deltax[j, id_nonzero]
medslope_j = np.median(slopes_j)
slopes.append(medslope_j)
if method == 'separate':
z = y*x[j] - y[j]*x
medintercept_j = np.median(z[id_nonzero] / deltax[j, id_nonzero])
intercepts.append(medintercept_j)

medslope = np.median(np.asarray(slopes))
if method == "separate":
medinter = np.median(np.asarray(intercepts))
else:
medinter = np.median(y - medslope*x)

return medslope, medinter


# pythran export _poisson_binom_pmf(float64[:])
def _poisson_binom_pmf(p):
# implemented from poisson_binom [2] Equation 2
n = p.shape[0]
pmf = np.zeros(n + 1, dtype=np.float64)
pmf[:2] = 1 - p[0], p[0]
for i in range(1, n):
tmp = pmf[:i+1] * p[i]
pmf[:i+1] *= (1 - p[i])
pmf[1:i+2] += tmp
return pmf


# pythran export _poisson_binom(int64[:], float64[:, :], str)
def _poisson_binom(k, args, tp):
# PDF/CDF of Poisson binomial distribution
# k - arguments, shape (m,)
# args - shape parameters, shape (n, m)
# kind - {'pdf', 'cdf'}
n, m = args.shape # number of shapes, batch size
cache = {}
out = np.zeros(m, dtype=np.float64)
for i in range(m):
p = tuple(args[:, i])
if p not in cache:
pmf = _poisson_binom_pmf(args[:, i])
cache[p] = np.cumsum(pmf) if tp=='cdf' else pmf
out[i] = cache[p][k[i]]
return out
4 changes: 4 additions & 0 deletions pythran/types/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,8 @@ def visit_Assign(self, node):
self.visit(node.value)
for t in node.targets:
self.combine(t, None, node.value)
if t in self.curr_locals_declaration:
self.result[t] = self.get_qualifier(t)(self.result[t])
if isinstance(t, ast.Subscript):
if self.visit_AssignedSubscript(t):
for alias in self.strict_aliases[t.value]:
Expand All @@ -474,6 +476,8 @@ def visit_AnnAssign(self, node):
self.combine(t, None, t)
else:
self.combine(t, None, node.value)
if t in self.curr_locals_declaration:
self.result[t] = self.get_qualifier(t)(self.result[t])

if isinstance(t, ast.Subscript):
if self.visit_AssignedSubscript(t):
Expand Down

0 comments on commit 3a75f3e

Please sign in to comment.