Skip to content

Commit

Permalink
Merge pull request #66 from DynamicsAndNeuralSystems/jmoo2880-pypi-pu…
Browse files Browse the repository at this point in the history
…blish

Add new SPI (GWTau) and benchmark generation script + workflow.
  • Loading branch information
joshuabmoore authored Apr 10, 2024
2 parents 6c8d9e0 + c337bdf commit e2fe31d
Show file tree
Hide file tree
Showing 10 changed files with 171 additions and 19 deletions.
35 changes: 35 additions & 0 deletions .github/workflows/run_dataset_generation.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Generate benchmarking dataset tables

on:
workflow_dispatch:

jobs:
test-ubuntu:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9"]
steps:
- uses: actions/checkout@v4
- name: Setup python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
- name: Install octave
run: |
sudo apt-get update
sudo apt-get install -y build-essential octave
- name: Install pyspi dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install .
- name: Run data generation
run: |
python tests/generate_benchmark_tables.py
- name: Upload artifact
uses: actions/upload-artifact@v3
with:
name: benchmark-tables
path: tests/CML7_benchmark_tables_new.pkl
4 changes: 0 additions & 4 deletions .github/workflows/run_unit_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,3 @@ jobs:
- name: Run pyspi SPI unit tests
run: |
pytest -v ./tests/test_SPIs.py


2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "pyspi"
version = "1.0.1"
version = "1.0.2"
authors = [
{ name ="Oliver M. Cliff", email="oliver.m.cliff@gmail.com"},
]
Expand Down
11 changes: 11 additions & 0 deletions pyspi/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,17 @@
statistic: max
squared: True

# Gromov-Wasserstain Distance
GromovWasserstainTau:
labels:
- unsigned
- distance
- unordered
- nonlinear
- undirected
dependencies:
configs:

.statistics.causal:
# Additive noise model
AdditiveNoiseModel:
Expand Down
11 changes: 11 additions & 0 deletions pyspi/fast_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,17 @@
- mode: euclidean
statistic: max
squared: True

# Gromov-Wasserstain Distance
GromovWasserstainTau:
labels:
- unsigned
- distance
- unordered
- nonlinear
- undirected
dependencies:
configs:

.statistics.causal:
# Additive noise model
Expand Down
59 changes: 59 additions & 0 deletions pyspi/statistics/distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,3 +259,62 @@ def bivariate(self, data, i=None, j=None):
data.barycenter[self._mode][(j, i)] = data.barycenter[self._mode][(i, j)]

return self._statfn(bc)


class GromovWasserstainTau(Undirected, Unsigned):
"""Gromov-Wasserstain distance (GWTau)"""

name = "Gromov-Wasserstain Distance"
identifier = "gwtau"
labels = ["unsigned", "distance", "unordered", "nonlinear", "undirected"]

@staticmethod
def vec_geo_dist(x):
diffs = np.diff(x, axis=0)
distances = np.linalg.norm(diffs, axis=1)
return np.cumsum(distances)

@staticmethod
def wass_sorted(x1, x2):
x1 = np.sort(x1)[::-1] # sort in descending order
x2 = np.sort(x2)[::-1]

if len(x1) == len(x2):
res = np.sqrt(np.mean((x1 - x2) ** 2))
else:
N, M = len(x1), len(x2)
i_ratios = np.arange(1, N + 1) / N
j_ratios = np.arange(1, M + 1) / M


min_values = np.minimum.outer(i_ratios, j_ratios)
max_values = np.maximum.outer(i_ratios - 1/N, j_ratios - 1/M)

lam = np.where(min_values > max_values, min_values - max_values, 0)

diffs_squared = (x1[:, None] - x2) ** 2
my_sum = np.sum(lam * diffs_squared)

res = np.sqrt(my_sum)

return res

@staticmethod
def gwtau(xi, xj):
timei = np.arange(len(xi))
timej = np.arange(len(xj))
traji = np.column_stack([timei, xi])
trajj = np.column_stack([timej, xj])

vi = GromovWasserstainTau.vec_geo_dist(traji)
vj = GromovWasserstainTau.vec_geo_dist(trajj)
gw = GromovWasserstainTau.wass_sorted(vi, vj)

return gw

@parse_bivariate
def bivariate(self, data, i=None, j=None):
x, y = data.to_numpy()[[i, j]]
# insert compute SPI code here (computes on x and y)
stat = self.gwtau(x, y)
return stat
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
'data/standard_normal.npy',
'data/cml7.npy']},
include_package_data=True,
version='1.0.1',
version='1.0.2',
description='Library for pairwise analysis of time series data.',
author='Oliver M. Cliff',
author_email='oliver.m.cliff@gmail.com',
Expand Down
Binary file modified tests/CML7_benchmark_tables.pkl
Binary file not shown.
40 changes: 40 additions & 0 deletions tests/generate_benchmark_tables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import numpy as np
import dill
from pyspi.calculator import Calculator

""""Script to generate benchmarking dataset"""
def get_benchmark_tables(calc_list):
# get the spis from the first calculator
spis = list(calc_list[1].spis.keys())
num_procs = calc_list[1].dataset.n_processes
# create a dict to store the mean and std for each spi
benchmarks = {key : {'mean': None, 'std': None} for key in spis}
num_trials = len(calc_list)
for spi in spis:
mpi_tensor = np.zeros(shape=(num_trials, num_procs, num_procs))
for (index, calc) in enumerate(calc_list):
mpi_tensor[index, :, :] = calc.table[spi].to_numpy()
mean_matrix = np.mean(mpi_tensor, axis=0) # compute element-wise mean across the first dimension
std_matrix = np.std(mpi_tensor, axis=0) # compute element-wise std across the first dimension
benchmarks[spi]['mean'] = mean_matrix
benchmarks[spi]['std'] = std_matrix

return benchmarks

# load and transpose dataset
dataset = np.load("pyspi/data/cml7.npy").T

# create list to store the calculator objects
store_calcs = list()

for i in range(75):
np.random.seed(42)
calc = Calculator(dataset=dataset)
calc.compute()
store_calcs.append(calc)

mpi_benchmarks = get_benchmark_tables(store_calcs)

# save data
with open("tests/CML7_benchmark_tables_new.pkl", "wb") as f:
dill.dump(mpi_benchmarks, f)
26 changes: 13 additions & 13 deletions tests/test_SPIs.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,9 @@ def test_mpi(est, est_ob, mpi_benchmark, mpi_new, spi_warning_logger):
mean_table = mpi_benchmark['mean']
std_table = mpi_benchmark['std']

# check std stable for zeros and impute with smallest non-zero value
min_nonzero_std = np.nanmin(std_table[std_table > 0])
std_table[std_table == 0] = min_nonzero_std


# check std table for zeros and impute with smallest non-zero value
std_table = np.where(std_table == 0, 1e-10, std_table)

# check that the shapes are equal
assert mean_table.shape == mpi_new.shape, f"SPI: {est}| Different table shapes. "

Expand All @@ -79,24 +77,26 @@ def test_mpi(est, est_ob, mpi_benchmark, mpi_new, spi_warning_logger):
# get the module name for easy reference
module_name = est_ob.__module__.split(".")[-1]

if (mpi_new == mpi_mean).all() == False:
if not np.allclose(mpi_new, mpi_mean):
# tables are not equivalent, quantify the difference by z-scoring.
diff = abs(mpi_new - mpi_mean)
zscores = diff/std_table

idxs_greater_than_thresh = np.argwhere(zscores > zscore_threshold)

if len(idxs_greater_than_thresh) > 0:
sigs = list()
for idx in idxs_greater_than_thresh:
sigs.append(zscores[idx[0], idx[1]])
sigs = zscores[idxs_greater_than_thresh[:, 0], idxs_greater_than_thresh[:, 1]]
# get the max
max_z = max(sigs)

# number of interactions
num_iteractions = (mpi_new.shape[0] * mpi_new.shape[1]) - mpi_new.shape[0]
num_interactions = mpi_new.size - mpi_new.shape[0]
# count exceedances
num_exceed = len(sigs)

if isSymmetric:
# number of unique exceedences is half
num_exceed /= 2
num_iteractions /= 2
num_exceed //= 2
num_interactions //= 2

spi_warning_logger(est, module_name, max_z, int(num_exceed), int(num_iteractions))
spi_warning_logger(est, module_name, max_z, int(num_exceed), int(num_interactions))

0 comments on commit e2fe31d

Please sign in to comment.