diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 22b04337..89c36517 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -14,6 +14,10 @@ Changelog - Added a new function, :func:`tabmat.from_polars`, to convert a :class:`polars.DataFrame` into a :class:`tabmat.SplitMatrix`. +**Bug fix:** + +- Fixed a bug in :meth:`tabmat.CategoricalMatrix.standardize` that sometimes returned ``nan`` values for the standard deviation due to numerical instability if using ``np.float32`` precision. + 4.0.1 - 2024-06-25 ------------------ diff --git a/src/tabmat/categorical_matrix.py b/src/tabmat/categorical_matrix.py index e8200ab8..a0d0c2fa 100644 --- a/src/tabmat/categorical_matrix.py +++ b/src/tabmat/categorical_matrix.py @@ -672,7 +672,9 @@ def _get_col_stds(self, weights: np.ndarray, col_means: np.ndarray) -> np.ndarra # but because X_ij is either {0, 1} # we don't actually need to square. mean = self.transpose_matvec(weights) - return np.sqrt(mean - col_means**2) + vars = mean - col_means**2 + # If using float32, we can get negative values due to precision errors + return np.sqrt(np.maximum(vars, 0)) def __getitem__(self, item): row, col = _check_indexer(item)