GUDHI · martinroyer · Nov 28, 2023 · Dec 19, 2023 · Dec 19, 2023 · Dec 20, 2023
diff --git a/src/python/gudhi/representations/vector_methods.py b/src/python/gudhi/representations/vector_methods.py
@@ -15,6 +15,8 @@
 from sklearn.exceptions    import NotFittedError
 from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
 from sklearn.metrics       import pairwise
+from sklearn.cluster import KMeans
+
 try:
     # New location since 1.0
     from sklearn.metrics     import DistanceMetric
@@ -719,21 +721,26 @@ class Atol(BaseEstimator, TransformerMixin):
     >>> a = np.array([[1, 2, 4], [1, 4, 0], [1, 0, 4]])
     >>> b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
     >>> c = np.array([[3, 2, -1], [1, 2, -1]])
-    >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006))
+    >>> atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006, n_init=10))
     >>> atol_vectoriser.fit(X=[a, b, c]).centers
     array([[ 2.6       ,  2.8       , -0.4       ],
            [ 2.        ,  0.66666667,  3.33333333]])
-    >>> atol_vectoriser(a)
+    >>> atol_vectoriser._transform(a)
     array([0.42375966, 1.18168665])
-    >>> atol_vectoriser(c)
+    >>> atol_vectoriser._transform(c)
     array([1.25157463, 0.02062512])
     >>> atol_vectoriser.transform(X=[a, b, c])
     array([[0.42375966, 1.18168665],
            [1.06330156, 0.29861028],
            [1.25157463, 0.02062512]])
     """
     # Note the example above must be up to date with the one in tests called test_atol_doc
-    def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"):
+    def __init__(
+            self,
+            quantiser=KMeans(n_clusters=2, random_state=202312, n_init=10),
+            weighting_method="cloud",
+            contrast="gaussian"
+    ):
         """
         Constructor for the Atol measure vectorisation class.
 
@@ -751,6 +758,7 @@ def __init__(self, quantiser, weighting_method="cloud", contrast="gaussian"):
         self.quantiser = quantiser
         self.contrast = contrast
         self.weighting_method = weighting_method
+        self._running_transform_names = ""
 
     def get_contrast(self):
         return {
@@ -780,18 +788,34 @@ def fit(self, X, y=None, sample_weight=None):
             self
         """
         if not hasattr(self.quantiser, 'fit'):
-            raise TypeError("quantiser %s has no `fit` attribute." % (self.quantiser))
-
-        # In fitting we remove infinite death time points so that every center is finite
-        X = [dgm[~np.isinf(dgm).any(axis=1), :] for dgm in X]
+            raise TypeError(f"quantiser {self.quantiser} has no `fit` attribute.")
+        n_clusters = self.quantiser.n_clusters
 
         if sample_weight is None:
             sample_weight = [self.get_weighting_method()(measure) for measure in X]
 
         measures_concat = np.concatenate(X)
         weights_concat = np.concatenate(sample_weight)
-        self.quantiser.fit(X=measures_concat, sample_weight=weights_concat)
+        # In fitting we remove infinite birth/death time points so that every center is finite
+        filtered_measures_concat = measures_concat[~np.isinf(measures_concat).any(axis=1), :] if len(measures_concat) else measures_concat
+        filtered_weights_concat = weights_concat[~np.isinf(measures_concat).any(axis=1)] if len(measures_concat) else weights_concat
+
+        if not len(filtered_measures_concat) or len(filtered_measures_concat) < n_clusters:
+            # If no point to fit, let's arbitrarily put centers in [0, 1)
+            print(f" [Atol] had {len(filtered_measures_concat)} points to fit {n_clusters} clusters,"
+                  f"        adding random points in [0, 1)^2")
+            filtered_weights_concat = np.concatenate((
+                filtered_weights_concat,
+                np.ones(shape=(n_clusters - len(filtered_measures_concat)))
+            ))
+            filtered_measures_concat = np.concatenate((
+                filtered_measures_concat,
+                np.random.random((n_clusters - len(filtered_measures_concat), 2))
+            ))
+
+        self.quantiser.fit(X=filtered_measures_concat, sample_weight=filtered_weights_concat)
         self.centers = self.quantiser.cluster_centers_
+
         # Hack, but some people are unhappy if the order depends on the version of sklearn
         self.centers = self.centers[np.lexsort(self.centers.T)]
         if self.quantiser.n_clusters == 1:
@@ -805,7 +829,7 @@ def fit(self, X, y=None, sample_weight=None):
             self.inertias = np.min(dist_centers, axis=0)/2
         return self
 
-    def __call__(self, measure, sample_weight=None):
+    def _transform(self, measure, sample_weight=None):
         """
         Apply measure vectorisation on a single measure. Only available after `fit` has been called.
 
@@ -834,4 +858,8 @@ def transform(self, X, sample_weight=None):
         """
         if sample_weight is None:
             sample_weight = [self.get_weighting_method()(measure) for measure in X]
-        return np.stack([self(measure, sample_weight=weight) for measure, weight in zip(X, sample_weight)])
+        self._running_transform_names = [f"Atol Center {i + 1}" for i in range(self.quantiser.n_clusters)]
+        return np.stack([self._transform(measure, sample_weight=weight) for measure, weight in zip(X, sample_weight)])
+
+    def get_feature_names_out(self):
+        return self._running_transform_names
diff --git a/src/python/test/test_representations.py b/src/python/test/test_representations.py
@@ -118,23 +118,23 @@ def test_atol_doc():
     b = np.array([[4, 2, 0], [4, 4, 0], [4, 0, 2]])
     c = np.array([[3, 2, -1], [1, 2, -1]])
 
-    atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006))
+    atol_vectoriser = Atol(quantiser=KMeans(n_clusters=2, random_state=202006, n_init=10))
     # Atol will do
     # X = np.concatenate([a,b,c])
-    # kmeans = KMeans(n_clusters=2, random_state=202006).fit(X) 
+    # kmeans = KMeans(n_clusters=2, random_state=202006, n_init=10).fit(X)
     # kmeans.labels_ will be : array([1, 0, 1, 0, 0, 1, 0, 0])
     first_cluster = np.asarray([a[0], a[2], b[2]])
-    second_cluster = np.asarray([a[1], b[0], b[2], c[0], c[1]])
+    second_cluster = np.asarray([a[1], b[0], b[1], c[0], c[1]])
 
     # Check the center of the first_cluster and second_cluster are in Atol centers
     centers = atol_vectoriser.fit(X=[a, b, c]).centers
     np.isclose(centers, first_cluster.mean(axis=0)).all(1).any() 
     np.isclose(centers, second_cluster.mean(axis=0)).all(1).any() 
 
     vectorization = atol_vectoriser.transform(X=[a, b, c])
-    assert np.allclose(vectorization[0], atol_vectoriser(a))
-    assert np.allclose(vectorization[1], atol_vectoriser(b))
-    assert np.allclose(vectorization[2], atol_vectoriser(c))
+    assert np.allclose(vectorization[0], atol_vectoriser._transform(a))
+    assert np.allclose(vectorization[1], atol_vectoriser._transform(b))
+    assert np.allclose(vectorization[2], atol_vectoriser._transform(c))
 
 
 def test_dummy_atol():
@@ -145,12 +145,12 @@ def test_dummy_atol():
     for weighting_method in ["cloud", "iidproba"]:
         for contrast in ["gaussian", "laplacian", "indicator"]:
             atol_vectoriser = Atol(
-                quantiser=KMeans(n_clusters=1, random_state=202006),
+                quantiser=KMeans(n_clusters=1, random_state=202006, n_init=10),
                 weighting_method=weighting_method,
                 contrast=contrast,
             )
             atol_vectoriser.fit([a, b, c])
-            atol_vectoriser(a)
+            atol_vectoriser._transform(a)
             atol_vectoriser.transform(X=[a, b, c])
 
 

diff --git a/src/python/test/test_representations_interface.py b/src/python/test/test_representations_interface.py
@@ -0,0 +1,87 @@
+from copy import deepcopy
+import numpy as np
+
+from sklearn.cluster import KMeans
+
+from gudhi.representations import (Atol, Landscape, Silhouette, BettiCurve, ComplexPolynomial, \
+                                   TopologicalVector, PersistenceImage, Entropy)
+
+vectorizers = {
+    "atol": Atol(quantiser=KMeans(n_clusters=2, random_state=202312, n_init="auto")),
+    # "betti": BettiCurve(),
+}
+
+diag1 = [np.array([[0., np.inf],
+                   [0., 8.94427191],
+                   [0., 7.28010989],
+                   [0., 6.08276253],
+                   [0., 5.83095189],
+                   [0., 5.38516481],
+                   [0., 5.]]),
+         np.array([[11., np.inf],
+                   [6.32455532, 6.70820393]]),
+         np.empty(shape=[0, 2])]
+
+diag2 = [np.array([[0., np.inf],
+                   [0., 8.94427191],
+                   [0., 7.28010989],
+                   [0., 6.08276253],
+                   [0., 5.83095189],
+                   [0., 5.38516481],
+                   [0., 5.]]),
+         np.array([[11., np.inf],
+                   [6.32455532, 6.70820393]]),
+         np.array([[0., np.inf],
+                   [0., 1]])]
+
+diag3 = [np.empty(shape=[0, 2])]
+
+
+def test_fit():
+    print(f" > Testing `fit`.")
+    for name, vectorizer in vectorizers.items():
+        print(f" >> Testing {name}")
+        deepcopy(vectorizer).fit(X=[diag1[0], diag2[0]])
+
+
+def test_fit_empty():
+    print(f" > Testing `fit_empty`.")
+    for name, vectorizer in vectorizers.items():
+        print(f" >> Testing {name}")
+        deepcopy(vectorizer).fit(X=[diag3[0], diag3[0]])
+
+
+def test_transform():
+    print(f" > Testing `transform`.")
+    for name, vectorizer in vectorizers.items():
+        print(f" >> Testing {name}")
+        deepcopy(vectorizer).fit_transform(X=[diag1[0], diag2[0], diag3[0]])
+
+
+def test_transform_empty():
+    print(f" > Testing `transform_empty`.")
+    for name, vectorizer in vectorizers.items():
+        print(f" >> Testing {name}")
+        copy_vec = deepcopy(vectorizer).fit(X=[diag1[0], diag2[0]])
+        copy_vec.transform(X=[diag3[0], diag3[0]])
+
+
+def test_set_output():
+    print(f" > Testing `set_output`.")
+    import pandas as pd
+    for name, vectorizer in vectorizers.items():
+        print(f" >> Testing {name}")
+        deepcopy(vectorizer).set_output(transform="pandas")
+
+
+def test_compose():
+    print(f" > Testing composition with `sklearn.compose.ColumnTransformer`.")
+    from sklearn.compose import ColumnTransformer
+    for name, vectorizer in vectorizers.items():
+        print(f" >> Testing {name}")
+        ct = ColumnTransformer([
+            (f"{name}-0", deepcopy(vectorizer), 0),
+            (f"{name}-1", deepcopy(vectorizer), 1),
+            (f"{name}-2", deepcopy(vectorizer), 2)]
+        )
+        ct.fit_transform(X=[diag1, diag2])