Merge pull request #51 from rmnldwg/release-1.0.0.a1

Release 1.0.0.a1
rmnldwg · Aug 30, 2023 · 5b2a397 · 5b2a397
2 parents 82a8608 + 74271b3
commit 5b2a397
Show file tree

Hide file tree

Showing 7 changed files with 471 additions and 13 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,31 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+
 <a name="unreleased"></a>
 ## [Unreleased]
 
+
+<a name="1.0.0.a1"></a>
+## [1.0.0.a1] - 2023-08-30
+
+Second alpha release, aimed at testing the all new implementation. See these [issues](https://github.com/rmnldwg/lymph/milestone/1) for an idea of what this tries to address.
+
+### Bug Fixes
+- (**matrix**) Wrong shape of observation matrix for trinary model
+
+### Documentation
+- Fix wrong python version in rtd config file
+- Remove outdated sampling tutorial
+- Remove deprecated read-the-docs config
+- Tell read-the-docs to install extra requirements
+- Execute quickstart notebook
+
+### Testing
+- Check correct shapes for trinary model matrices
+
+
 <a name="1.0.0.a0"></a>
 ## [1.0.0.a0] - 2023-08-15
 
@@ -63,7 +88,8 @@ Almost the entire API has changed. I'd therefore recommend to have a look at the
 - add pre-commit hook to check commit msg
 
 
-[Unreleased]: https://github.com/rmnldwg/lymph/compare/1.0.0.a0...HEAD
+[Unreleased]: https://github.com/rmnldwg/lymph/compare/1.0.0.a1...HEAD
+[1.0.0.a1]: https://github.com/rmnldwg/lymph/compare/1.0.0.a0...1.0.0.a1
 [1.0.0.a0]: https://github.com/rmnldwg/lymph/compare/0.4.3...1.0.0.a0
 [0.4.3]: https://github.com/rmnldwg/lymph/compare/0.4.2...0.4.3
 [0.4.2]: https://github.com/rmnldwg/lymph/compare/0.4.1...0.4.2

diff --git a/lymph/descriptors/matrix.py b/lymph/descriptors/matrix.py
@@ -148,13 +148,14 @@ class Observation(AbstractMatrixDescriptor):
     @staticmethod
     def generate(instance: models.Unilateral) -> np.ndarray:
         """Generate the observation matrix of the lymph model."""
-        num_lnls = len(instance.graph._lnls)
-        shape = (2**num_lnls, 1)
+        num_lnls = len(instance.graph.lnls)
+        base = 2 if instance.graph.is_binary else 3
+        shape = (base ** num_lnls, 1)
         observation_matrix = np.ones(shape=shape)
 
         for modality in instance.modalities.values():
             mod_obs_matrix = np.ones(shape=(1,1))
-            for _ in instance.graph._lnls:
+            for _ in instance.graph.lnls:
                 mod_obs_matrix = np.kron(mod_obs_matrix, modality.confusion_matrix)
 
             observation_matrix = row_wise_kron(observation_matrix, mod_obs_matrix)
@@ -174,7 +175,7 @@ def compute_encoding(
     LNL is considered to be unknown.
     """
     num_lnls = len(lnls)
-    encoding = np.ones(shape=2**num_lnls, dtype=bool)
+    encoding = np.ones(shape=2 ** num_lnls, dtype=bool)
 
     for j, lnl in enumerate(lnls):
         if lnl not in pattern or pd.isna(pattern[lnl]):
@@ -183,8 +184,8 @@ def compute_encoding(
             encoding,
             tile_and_repeat(
                 mat=np.array([not pattern[lnl], pattern[lnl]]),
-                tile=(1, 2**j),
-                repeat=(1, 2**(num_lnls - j - 1)),
+                tile=(1, 2 ** j),
+                repeat=(1, 2 ** (num_lnls - j - 1)),
             )[0],
         )
     return encoding
@@ -284,9 +285,6 @@ def __setitem__(self, __key, __value) -> None:
 
     def __missing__(self, t_stage: str) -> np.ndarray:
         """If the matrix for a ``t_stage`` is missing, try to generate it lazily."""
-        if t_stage not in self.model.data_matrices:
-            raise KeyError(f"Data matrix for T-stage {t_stage} is missing.")
-
         self.data[t_stage] = (
             self.model.observation_matrix @ self.model.data_matrices[t_stage]
         )

diff --git a/lymph/models/unilateral.py b/lymph/models/unilateral.py
@@ -3,6 +3,7 @@
 import itertools
 import warnings
 from itertools import product
+from typing import Generator
 
 import numpy as np
 import pandas as pd
@@ -451,8 +452,8 @@ def delete_observation_matrix(self):
 
 
     @property
-    def t_stages(self) -> set[str | int]:
-        """Set of all valid T-stages in the model.
+    def t_stages(self) -> Generator[str, None, None]:
+        """Generator of all valid T-stages in the model.
 
         This is the intersection of the unique T-stages found in the (mapped) data
         and the T-stages defined in the distributions over diagnose times.
@@ -669,6 +670,7 @@ def likelihood(
         self,
         data: pd.DataFrame | None = None,
         given_params: list[float] | np.ndarray | dict[str, float] | None = None,
+        load_data_kwargs: dict | None = None,
         log: bool = True,
         mode: str = "HMM"
     ) -> float:
@@ -678,12 +680,17 @@ def likelihood(
         the likelihood for the stored :py:attr:`~patient_data`,
         :py:attr:`~edge_params`, and the stored :py:attr:`~diag_time_dists`.
 
+        One may specify additional ``load_data_kwargs`` to pass to the method
+        :py:meth:`~load_patient_data` when loading the data.
+
         Returns the log-likelihood if ``log`` is set to ``True``. The ``mode`` parameter
         determines whether the likelihood is computed for the hidden Markov model
         (``"HMM"``) or the Bayesian network (``"BN"``).
         """
         if data is not None:
-            self.patient_data = data
+            if load_data_kwargs is None:
+                load_data_kwargs = {}
+            self.load_patient_data(data, **load_data_kwargs)
 
         if given_params is None:
             return self._likelihood(mode, log)

diff --git a/pyproject.toml b/pyproject.toml
@@ -84,3 +84,80 @@ ensure_newline_before_comments = true
 
 [tool.pycln]
 all = true
+
+
+# git-cliff ~ default configuration file
+# https://git-cliff.org/docs/configuration
+#
+# Lines starting with "#" are comments.
+# Configuration options are organized into tables and keys.
+# See documentation for more information on available options.
+
+[tool.git-cliff.changelog]
+# changelog header
+header = """
+# Changelog\n
+All notable changes to this project will be documented in this file.\n
+"""
+# template for the changelog body
+# https://tera.netlify.app/docs
+body = """
+{% if version %}\
+    ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
+{% else %}\
+    ## [unreleased]
+{% endif %}\
+{% for group, commits in commits | group_by(attribute="group") %}
+    ### {{ group | upper_first }}
+    {% for commit in commits %}
+        - {% if commit.breaking %}⚠ **BREAKING** {% endif %}{% if commit.scope %}(**{{ commit.scope }}**) {% endif %}{{ commit.message | upper_first }}\
+    {% endfor %}
+{% endfor %}\n
+"""
+# remove the leading and trailing whitespace from the template
+trim = true
+# changelog footer
+footer = """
+<!-- generated by git-cliff -->
+"""
+
+[tool.git-cliff.git]
+# parse the commits based on https://www.conventionalcommits.org
+conventional_commits = true
+# filter out the commits that are not conventional
+filter_unconventional = true
+# process each line of a commit as an individual commit
+split_commits = false
+# regex for preprocessing the commit messages
+commit_preprocessors = [
+  # { pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](https://github.com/orhun/git-cliff/issues/${2}))"}, # replace issue numbers
+]
+# regex for parsing and grouping commits
+commit_parsers = [
+  { message = "^feat", group = "Features" },
+  { message = "^fix", group = "Bug Fixes" },
+  { message = "^doc", group = "Documentation" },
+  { message = "^perf", group = "Performance" },
+  { message = "^refactor", group = "Refactor" },
+  { message = "^style", group = "Styling" },
+  { message = "^test", group = "Testing" },
+  { message = "^chore\\(release\\): prepare for", skip = true },
+  { message = "^chore", group = "Miscellaneous Tasks" },
+  { body = ".*security", group = "Security" },
+]
+# protect breaking changes from being skipped due to matching a skipping commit_parser
+protect_breaking_commits = false
+# filter out the commits that are not matched by commit parsers
+filter_commits = false
+# glob pattern for matching git tags
+tag_pattern = "[0-9]*"
+# regex for skipping tags
+skip_tags = "v0.1.0-beta.1"
+# regex for ignoring tags
+ignore_tags = ""
+# sort the tags topologically
+topo_order = false
+# sort the commits inside sections by oldest/newest order
+sort_commits = "oldest"
+# limit the number of commits included in the changelog.
+# limit_commits = 42
diff --git a/tests/binary_unilateral_test.py b/tests/binary_unilateral_test.py
@@ -225,6 +225,17 @@ def setUp(self):
         warnings.simplefilter("ignore", category=pd.errors.PerformanceWarning)
         self.model.load_patient_data(self.patient_data, side="ipsi")
 
+        # Initialize some fixed diagnose time distributions
+        self.init_diag_time_dists(["early", "late", "foo"])
+
+    def init_diag_time_dists(self, t_stages: list[str], seed: int = 42) -> None:
+        """Initialize some fixed diagnose time distributions."""
+        rng = np.random.default_rng(seed)
+        for t_stage in t_stages:
+            self.model.diag_time_dists[t_stage] = rng.uniform(
+                low=0., high=1., size=self.model.max_time + 1
+            )
+
 
 class PatientDataTestCase(LoadDataFixtureMixin, unittest.TestCase):
     """Test loading the patient data."""
@@ -236,6 +247,23 @@ def test_load_patient_data(self):
             ValueError, self.model.load_patient_data, self.patient_data, side="foo"
         )
 
+    def test_t_stages(self):
+        """Make sure all T-stages are present."""
+        t_stages_in_data = self.model.patient_data["_model", "#" ,"t_stage"].unique()
+        t_stages_in_diag_time_dists = self.model.diag_time_dists.keys()
+        t_stages_in_model = list(self.model.t_stages)
+        t_stages_intersection = set(t_stages_in_data).intersection(t_stages_in_diag_time_dists)
+
+        self.assertNotIn("foo", t_stages_in_model)
+        self.assertEqual(len(t_stages_in_diag_time_dists), 3)
+        self.assertEqual(len(t_stages_intersection), 2)
+        self.assertEqual(len(t_stages_intersection), len(t_stages_in_model))
+
+        for t_stage in t_stages_in_model:
+            self.assertIn(t_stage, t_stages_in_data)
+            self.assertIn(t_stage, t_stages_in_diag_time_dists)
+
+
     def test_data_matrices(self):
         """Make sure the data matrices are generated correctly."""
         for t_stage in ["early", "late"]: