From 936aa48caaea8e0956627313fb5f8f86baaa6b2d Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 10 Apr 2024 10:06:29 -0700 Subject: [PATCH] deprecate TableCollection.fs * remove scipy as a dependency Closes #1271 --- doc/_toc.yml | 2 +- doc/misc/changelog.md | 2 +- fwdpy11/_types/table_collection.py | 4 +- pyproject.toml | 1 - requirements/minimal_install_requirements.txt | 1 - tests/test_TableCollection_fs.py | 223 ------------------ 6 files changed, 5 insertions(+), 228 deletions(-) delete mode 100644 tests/test_TableCollection_fs.py diff --git a/doc/_toc.yml b/doc/_toc.yml index 138dc318b..5e66ea78a 100644 --- a/doc/_toc.yml +++ b/doc/_toc.yml @@ -72,7 +72,7 @@ parts: - file: pages/mvdes - file: pages/tsoverview - file: pages/tstypes - - file: pages/tablefs + # - file: pages/tablefs - file: pages/recorders - caption: Technical details chapters: diff --git a/doc/misc/changelog.md b/doc/misc/changelog.md index 8c1a6b453..c9dde7fc2 100644 --- a/doc/misc/changelog.md +++ b/doc/misc/changelog.md @@ -1494,7 +1494,7 @@ release candidates (see below) plus the following: {issue}`389` {issue}`390` {issue}`392` -* {func}`fwdpy11.TableCollection.fs` added. See {ref}`tablefs`. +* {func}`fwdpy11.TableCollection.fs` added. See `tablefs`. PR {pr}`387` PR {pr}`399` * Creating populations from `msprime` input improved. diff --git a/fwdpy11/_types/table_collection.py b/fwdpy11/_types/table_collection.py index 1da7e0020..f8d444a40 100644 --- a/fwdpy11/_types/table_collection.py +++ b/fwdpy11/_types/table_collection.py @@ -1,7 +1,7 @@ +from deprecated import deprecated from typing import Iterable import numpy as np -import scipy.sparse # type: ignore from .._fwdpy11 import Edge, MutationRecord, Node, Site, ll_TableCollection @@ -181,6 +181,7 @@ def _ndfs( later. """ from . import TreeIterator + import scipy.sparse # type: ignore shapes = tuple(len(i) + 1 for i in samples) dok_JFS = [scipy.sparse.dok_matrix(shapes, dtype=np.int32) for i in windows] @@ -240,6 +241,7 @@ def _fs_implementation(self, samples, windows, include_function, simplify): simplify, ) + @deprecated(reason="dependency on scikit is a problem") def fs( self, samples, diff --git a/pyproject.toml b/pyproject.toml index 9113a18ae..150537962 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,6 @@ dependencies = [ "attrs >= 20.3.0", "black", "numpy < 2.0", - "scipy", "tskit >= 0.5.6", "demes ~= 0.2.2", "Deprecated", diff --git a/requirements/minimal_install_requirements.txt b/requirements/minimal_install_requirements.txt index d4b7cadb7..401e1d5af 100644 --- a/requirements/minimal_install_requirements.txt +++ b/requirements/minimal_install_requirements.txt @@ -3,5 +3,4 @@ attrs >= 20.3.0 demes == 0.2.2 numpy < 2 tskit >= 0.5.6 -scipy Deprecated diff --git a/tests/test_TableCollection_fs.py b/tests/test_TableCollection_fs.py deleted file mode 100644 index 572b85592..000000000 --- a/tests/test_TableCollection_fs.py +++ /dev/null @@ -1,223 +0,0 @@ -# -# Copyright (C) 2020 Kevin Thornton -# -# This file is part of fwdpy11. -# -# fwdpy11 is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# fwdpy11 is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with fwdpy11. If not, see . -# - -import unittest - -import demes -import msprime -import numpy as np - -import fwdpy11 - -# NOTE: these tests can all be improved in the future -# by having a conversion from msprime/tskit that lifts -# over mutations assuming that they are neutral. -# With that in place, we can compare to the msprime/tskit -# genotype matrix, which makes for a more independent test -# of correctness. - - -def fs_from_ndarray(gm): - gm_rc = np.sum(gm, axis=1) - gm_uc = np.unique(gm_rc, return_counts=True) - gm_fs = np.zeros(gm.shape[1] + 1, dtype=np.int32) - gm_fs[gm_uc[0]] += gm_uc[1] - return gm_fs - - -class TestSingleDemeCase(unittest.TestCase): - @classmethod - def setUpClass(self): - Ne = 1000 - Nr = 100.0 - L = 1 - ts = msprime.sim_ancestry( - 2 * Ne, - population_size=Ne, - recombination_rate=Nr / Ne / L, - sequence_length=L, - random_seed=666, - discrete_genome=False, - ) - self.pop = fwdpy11.DiploidPopulation.create_from_tskit(ts) - rng = fwdpy11.GSLrng(12343) - fwdpy11.infinite_sites(rng, self.pop, Nr / Ne) - - def test_comparison_to_genotype_matrix(self): - dm = fwdpy11.data_matrix_from_tables( - self.pop.tables, - self.pop.alive_nodes, - record_neutral=True, - record_selected=False, - ) - gm = np.array(dm.neutral, copy=False) - gm_fs = fs_from_ndarray(gm) - tc_fs = self.pop.tables.fs([self.pop.alive_nodes]) - self.assertTrue(np.array_equal(gm_fs[1:-1], tc_fs.data[1:-1])) - self.assertTrue(np.array_equal(gm_fs, tc_fs.data)) - - def test_comparison_to_genotype_matrix_for_sample(self): - A, B = 103, 210 - dm = fwdpy11.data_matrix_from_tables( - self.pop.tables, - self.pop.alive_nodes[A:B], - record_neutral=True, - record_selected=False, - ) - gm = np.array(dm.neutral, copy=False) - gm_fs = fs_from_ndarray(gm) - tc_fs = self.pop.tables.fs([self.pop.alive_nodes[A:B]]) - self.assertTrue(np.array_equal(gm_fs[1:-1], tc_fs.data[1:-1])) - - def test_skipping_neutral_variants(self): - tc_fs = self.pop.tables.fs([self.pop.alive_nodes], include_neutral=False) - self.assertEqual(tc_fs.sum(), 0) - - def test_empty_samples_list(self): - with self.assertRaises(ValueError): - self.pop.tables.fs([]) - - def test_nodes_out_of_range(self): - with self.assertRaises(ValueError): - samples = np.array([len(self.pop.tables.nodes)], dtype=np.int32) - self.pop.tables.fs([samples]) - - def test_two_overlapping_windows(self): - tc_fs = self.pop.tables.fs([self.pop.alive_nodes]) - wfs = self.pop.tables.fs( - [self.pop.alive_nodes], windows=[(0, 1.0 / 3.0), (1.0 / 3.0, 1.0)] - ) - self.assertTrue(np.array_equal(wfs, tc_fs)) - - def test_three_overlapping_windows(self): - tc_fs = self.pop.tables.fs([self.pop.alive_nodes]) - wfs = self.pop.tables.fs( - [self.pop.alive_nodes], - windows=[(0, 1.0 / 3.0), (1.0 / 3.0, 2.0 / 3.0), (2.0 / 3.0, 1)], - ) - self.assertTrue(np.array_equal(wfs, tc_fs)) - - def test_random_number_of_windows(self): - nw = np.random.randint(100, 300) - lefts = np.arange(nw) / (nw - 1) - windows = [(lefts[i], lefts[i + 1]) for i in range(len(lefts) - 1)] - tc_fs = self.pop.tables.fs([self.pop.alive_nodes]) - wfs = self.pop.tables.fs([self.pop.alive_nodes], windows=windows) - self.assertTrue(np.array_equal(wfs, tc_fs)) - - def test_separated_windows(self): - windows = [(0, 0.25), (0.66, 0.9)] - dm = fwdpy11.data_matrix_from_tables( - self.pop.tables, - self.pop.alive_nodes, - record_neutral=True, - record_selected=False, - ) - gm = np.array(dm.neutral, copy=False) - gm_pos = np.array([self.pop.mutations[k].pos for k in dm.neutral_keys]) - gm_pos_in_windows = np.where( - (gm_pos < 0.25) | ((gm_pos >= 0.66) & (gm_pos < 0.9)) - )[0] - gm = gm[gm_pos_in_windows, :] - gm_fs = fs_from_ndarray(gm) - tc_fs = self.pop.tables.fs([self.pop.alive_nodes], windows=windows) - self.assertTrue(np.array_equal(gm_fs, tc_fs)) - - -class TestTwoDemeCase(unittest.TestCase): - @classmethod - def setUpClass(self): - Ne = 1000 - Nr = 100.0 - yaml = f""" - time_units: generations - demes: - - name: deme0 - epochs: - - start_size: {Ne/2} - - name: deme1 - epochs: - - start_size: {Ne/2} - pulses: - - sources: [deme1] - dest: deme0 - proportions: [1.0] - time: {Ne} - """ - demography = msprime.Demography.from_demes(demes.loads(yaml)) - ts = msprime.sim_ancestry( - samples={0: Ne / 2, 1: Ne / 2}, - demography=demography, - recombination_rate=Nr / Ne, - sequence_length=1.0, - discrete_genome=False, - random_seed=666, - ) - self.pop = fwdpy11.DiploidPopulation.create_from_tskit(ts) - rng = fwdpy11.GSLrng(12343) - fwdpy11.infinite_sites(rng, self.pop, Nr / Ne) - - def test_marginal_deme_fs(self): - a = self.pop.alive_nodes - nodes = np.array(self.pop.tables.nodes, copy=False) - d0 = a[np.where(nodes["deme"][a] == 0)[0]] - d1 = a[np.where(nodes["deme"][a] == 1)[0]] - - for samples in (d0, d1): - tc_fs = self.pop.tables.fs([samples]) - dm = fwdpy11.data_matrix_from_tables( - self.pop.tables, samples, record_neutral=True, record_selected=True - ) - gm = np.array(dm.neutral, copy=False) - gm_fs = fs_from_ndarray(gm) - self.assertTrue(np.array_equal(gm_fs[1:-1], tc_fs.data[1:-1])) - - def test_joint_deme_fs(self): - a = self.pop.alive_nodes - nodes = np.array(self.pop.tables.nodes, copy=False) - d0 = a[np.where(nodes["deme"][a] == 0)[0]] - d1 = a[np.where(nodes["deme"][a] == 1)[0]] - - tc_fs = self.pop.tables.fs([d0, d1]) - tc_fs0 = np.asarray(tc_fs.sum(axis=1).flatten())[0] - tc_fs1 = np.asarray(tc_fs.sum(axis=0).flatten())[0] - for i, j in zip((tc_fs0, tc_fs1), (d0, d1)): - dm = fwdpy11.data_matrix_from_tables( - self.pop.tables, j, record_neutral=True, record_selected=True - ) - gm = np.array(dm.neutral, copy=False) - gm_fs = fs_from_ndarray(gm) - self.assertTrue(np.array_equal(gm_fs[1:-1], i.data[1:-1])) - - def test_joint_deme_fs_marginalize(self): - a = self.pop.alive_nodes - nodes = np.array(self.pop.tables.nodes, copy=False) - d0 = a[np.where(nodes["deme"][a] == 0)[0]] - d1 = a[np.where(nodes["deme"][a] == 1)[0]] - tc_fs = self.pop.tables.fs([d0, d1]) - tc_fs_deme0 = np.asarray(tc_fs.sum(axis=1).flatten())[0] - tc_fs_deme1 = np.asarray(tc_fs.sum(axis=0).flatten())[0] - - tc_fs2 = self.pop.tables.fs([d0, d1], marginalize=True) - self.assertTrue(np.array_equal(tc_fs_deme0, tc_fs2[0])) - self.assertTrue(np.array_equal(tc_fs_deme1, tc_fs2[1])) - - -if __name__ == "__main__": - unittest.main()