Skip to content

Commit

Permalink
Merge pull request #51 from rmnldwg/release-1.0.0.a1
Browse files Browse the repository at this point in the history
Release 1.0.0.a1
  • Loading branch information
rmnldwg authored Aug 30, 2023
2 parents 82a8608 + 74271b3 commit 5b2a397
Show file tree
Hide file tree
Showing 7 changed files with 471 additions and 13 deletions.
28 changes: 27 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,31 @@
# Changelog

All notable changes to this project will be documented in this file.


<a name="unreleased"></a>
## [Unreleased]


<a name="1.0.0.a1"></a>
## [1.0.0.a1] - 2023-08-30

Second alpha release, aimed at testing the all new implementation. See these [issues](https://github.com/rmnldwg/lymph/milestone/1) for an idea of what this tries to address.

### Bug Fixes
- (**matrix**) Wrong shape of observation matrix for trinary model

### Documentation
- Fix wrong python version in rtd config file
- Remove outdated sampling tutorial
- Remove deprecated read-the-docs config
- Tell read-the-docs to install extra requirements
- Execute quickstart notebook

### Testing
- Check correct shapes for trinary model matrices


<a name="1.0.0.a0"></a>
## [1.0.0.a0] - 2023-08-15

Expand Down Expand Up @@ -63,7 +88,8 @@ Almost the entire API has changed. I'd therefore recommend to have a look at the
- add pre-commit hook to check commit msg


[Unreleased]: https://github.com/rmnldwg/lymph/compare/1.0.0.a0...HEAD
[Unreleased]: https://github.com/rmnldwg/lymph/compare/1.0.0.a1...HEAD
[1.0.0.a1]: https://github.com/rmnldwg/lymph/compare/1.0.0.a0...1.0.0.a1
[1.0.0.a0]: https://github.com/rmnldwg/lymph/compare/0.4.3...1.0.0.a0
[0.4.3]: https://github.com/rmnldwg/lymph/compare/0.4.2...0.4.3
[0.4.2]: https://github.com/rmnldwg/lymph/compare/0.4.1...0.4.2
Expand Down
16 changes: 7 additions & 9 deletions lymph/descriptors/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,13 +148,14 @@ class Observation(AbstractMatrixDescriptor):
@staticmethod
def generate(instance: models.Unilateral) -> np.ndarray:
"""Generate the observation matrix of the lymph model."""
num_lnls = len(instance.graph._lnls)
shape = (2**num_lnls, 1)
num_lnls = len(instance.graph.lnls)
base = 2 if instance.graph.is_binary else 3
shape = (base ** num_lnls, 1)
observation_matrix = np.ones(shape=shape)

for modality in instance.modalities.values():
mod_obs_matrix = np.ones(shape=(1,1))
for _ in instance.graph._lnls:
for _ in instance.graph.lnls:
mod_obs_matrix = np.kron(mod_obs_matrix, modality.confusion_matrix)

observation_matrix = row_wise_kron(observation_matrix, mod_obs_matrix)
Expand All @@ -174,7 +175,7 @@ def compute_encoding(
LNL is considered to be unknown.
"""
num_lnls = len(lnls)
encoding = np.ones(shape=2**num_lnls, dtype=bool)
encoding = np.ones(shape=2 ** num_lnls, dtype=bool)

for j, lnl in enumerate(lnls):
if lnl not in pattern or pd.isna(pattern[lnl]):
Expand All @@ -183,8 +184,8 @@ def compute_encoding(
encoding,
tile_and_repeat(
mat=np.array([not pattern[lnl], pattern[lnl]]),
tile=(1, 2**j),
repeat=(1, 2**(num_lnls - j - 1)),
tile=(1, 2 ** j),
repeat=(1, 2 ** (num_lnls - j - 1)),
)[0],
)
return encoding
Expand Down Expand Up @@ -284,9 +285,6 @@ def __setitem__(self, __key, __value) -> None:

def __missing__(self, t_stage: str) -> np.ndarray:
"""If the matrix for a ``t_stage`` is missing, try to generate it lazily."""
if t_stage not in self.model.data_matrices:
raise KeyError(f"Data matrix for T-stage {t_stage} is missing.")

self.data[t_stage] = (
self.model.observation_matrix @ self.model.data_matrices[t_stage]
)
Expand Down
13 changes: 10 additions & 3 deletions lymph/models/unilateral.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import itertools
import warnings
from itertools import product
from typing import Generator

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -451,8 +452,8 @@ def delete_observation_matrix(self):


@property
def t_stages(self) -> set[str | int]:
"""Set of all valid T-stages in the model.
def t_stages(self) -> Generator[str, None, None]:
"""Generator of all valid T-stages in the model.
This is the intersection of the unique T-stages found in the (mapped) data
and the T-stages defined in the distributions over diagnose times.
Expand Down Expand Up @@ -669,6 +670,7 @@ def likelihood(
self,
data: pd.DataFrame | None = None,
given_params: list[float] | np.ndarray | dict[str, float] | None = None,
load_data_kwargs: dict | None = None,
log: bool = True,
mode: str = "HMM"
) -> float:
Expand All @@ -678,12 +680,17 @@ def likelihood(
the likelihood for the stored :py:attr:`~patient_data`,
:py:attr:`~edge_params`, and the stored :py:attr:`~diag_time_dists`.
One may specify additional ``load_data_kwargs`` to pass to the method
:py:meth:`~load_patient_data` when loading the data.
Returns the log-likelihood if ``log`` is set to ``True``. The ``mode`` parameter
determines whether the likelihood is computed for the hidden Markov model
(``"HMM"``) or the Bayesian network (``"BN"``).
"""
if data is not None:
self.patient_data = data
if load_data_kwargs is None:
load_data_kwargs = {}
self.load_patient_data(data, **load_data_kwargs)

if given_params is None:
return self._likelihood(mode, log)
Expand Down
77 changes: 77 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,80 @@ ensure_newline_before_comments = true

[tool.pycln]
all = true


# git-cliff ~ default configuration file
# https://git-cliff.org/docs/configuration
#
# Lines starting with "#" are comments.
# Configuration options are organized into tables and keys.
# See documentation for more information on available options.

[tool.git-cliff.changelog]
# changelog header
header = """
# Changelog\n
All notable changes to this project will be documented in this file.\n
"""
# template for the changelog body
# https://tera.netlify.app/docs
body = """
{% if version %}\
## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
{% else %}\
## [unreleased]
{% endif %}\
{% for group, commits in commits | group_by(attribute="group") %}
### {{ group | upper_first }}
{% for commit in commits %}
- {% if commit.breaking %}⚠ **BREAKING** {% endif %}{% if commit.scope %}(**{{ commit.scope }}**) {% endif %}{{ commit.message | upper_first }}\
{% endfor %}
{% endfor %}\n
"""
# remove the leading and trailing whitespace from the template
trim = true
# changelog footer
footer = """
<!-- generated by git-cliff -->
"""

[tool.git-cliff.git]
# parse the commits based on https://www.conventionalcommits.org
conventional_commits = true
# filter out the commits that are not conventional
filter_unconventional = true
# process each line of a commit as an individual commit
split_commits = false
# regex for preprocessing the commit messages
commit_preprocessors = [
# { pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](https://github.com/orhun/git-cliff/issues/${2}))"}, # replace issue numbers
]
# regex for parsing and grouping commits
commit_parsers = [
{ message = "^feat", group = "Features" },
{ message = "^fix", group = "Bug Fixes" },
{ message = "^doc", group = "Documentation" },
{ message = "^perf", group = "Performance" },
{ message = "^refactor", group = "Refactor" },
{ message = "^style", group = "Styling" },
{ message = "^test", group = "Testing" },
{ message = "^chore\\(release\\): prepare for", skip = true },
{ message = "^chore", group = "Miscellaneous Tasks" },
{ body = ".*security", group = "Security" },
]
# protect breaking changes from being skipped due to matching a skipping commit_parser
protect_breaking_commits = false
# filter out the commits that are not matched by commit parsers
filter_commits = false
# glob pattern for matching git tags
tag_pattern = "[0-9]*"
# regex for skipping tags
skip_tags = "v0.1.0-beta.1"
# regex for ignoring tags
ignore_tags = ""
# sort the tags topologically
topo_order = false
# sort the commits inside sections by oldest/newest order
sort_commits = "oldest"
# limit the number of commits included in the changelog.
# limit_commits = 42
28 changes: 28 additions & 0 deletions tests/binary_unilateral_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,17 @@ def setUp(self):
warnings.simplefilter("ignore", category=pd.errors.PerformanceWarning)
self.model.load_patient_data(self.patient_data, side="ipsi")

# Initialize some fixed diagnose time distributions
self.init_diag_time_dists(["early", "late", "foo"])

def init_diag_time_dists(self, t_stages: list[str], seed: int = 42) -> None:
"""Initialize some fixed diagnose time distributions."""
rng = np.random.default_rng(seed)
for t_stage in t_stages:
self.model.diag_time_dists[t_stage] = rng.uniform(
low=0., high=1., size=self.model.max_time + 1
)


class PatientDataTestCase(LoadDataFixtureMixin, unittest.TestCase):
"""Test loading the patient data."""
Expand All @@ -236,6 +247,23 @@ def test_load_patient_data(self):
ValueError, self.model.load_patient_data, self.patient_data, side="foo"
)

def test_t_stages(self):
"""Make sure all T-stages are present."""
t_stages_in_data = self.model.patient_data["_model", "#" ,"t_stage"].unique()
t_stages_in_diag_time_dists = self.model.diag_time_dists.keys()
t_stages_in_model = list(self.model.t_stages)
t_stages_intersection = set(t_stages_in_data).intersection(t_stages_in_diag_time_dists)

self.assertNotIn("foo", t_stages_in_model)
self.assertEqual(len(t_stages_in_diag_time_dists), 3)
self.assertEqual(len(t_stages_intersection), 2)
self.assertEqual(len(t_stages_intersection), len(t_stages_in_model))

for t_stage in t_stages_in_model:
self.assertIn(t_stage, t_stages_in_data)
self.assertIn(t_stage, t_stages_in_diag_time_dists)


def test_data_matrices(self):
"""Make sure the data matrices are generated correctly."""
for t_stage in ["early", "late"]:
Expand Down
Loading

0 comments on commit 5b2a397

Please sign in to comment.