[ENH] Add sphinx event to add capability table to estimators' docs in…

…dividually (#2468) * feat: add sphinx event to add capability table to estimators - add `html-page-context` callback event to add capabilities table individually to each estimators' docs * fix: remove manually added tables in ad modules * fix: move context check at the top for better readability and speedup * fix: remove function _insert_html_table for better readability --------- Co-authored-by: Sebastian Schmidl <10573700+SebastianSchmidl@users.noreply.github.com>
aeon-toolkit · Jan 14, 2025 · f17762e · f17762e
1 parent 0c864d4
commit f17762e
Show file tree

Hide file tree

Showing 13 changed files with 99 additions and 126 deletions.
diff --git a/aeon/anomaly_detection/_cblof.py b/aeon/anomaly_detection/_cblof.py
@@ -19,16 +19,6 @@ class CBLOF(PyODAdapter):
     the PyOD model ``CBLOF`` except for `window_size` and `stride`, which are used to
     construct the sliding windows.
 
-    .. list-table:: Capabilities
-       :stub-columns: 1
-
-       * - Input data format
-         - univariate and multivariate
-       * - Output data format
-         - anomaly scores
-       * - Learning Type
-         - unsupervised or semi-supervised
-
     The documentation for parameters has been adapted from the
     [PyOD documentation](https://pyod.readthedocs.io/en/latest/pyod.models.html#id117).
     Here, `X` refers to the set of sliding windows extracted from the time series

diff --git a/aeon/anomaly_detection/_copod.py b/aeon/anomaly_detection/_copod.py
@@ -18,15 +18,6 @@ class COPOD(PyODAdapter):
     The parameter `n_jobs` is passed to COPOD model from PyOD, `window_size` and
     `stride` are used to construct the sliding windows.
 
-    .. list-table:: Capabilities
-       :stub-columns: 1
-       * - Input data format
-         - univariate and multivariate
-       * - Output data format
-         - anomaly scores
-       * - Learning Type
-         - unsupervised or semi-supervised
-
     Parameters
     ----------
     n_jobs : int, default=1

diff --git a/aeon/anomaly_detection/_dwt_mlead.py b/aeon/anomaly_detection/_dwt_mlead.py
@@ -43,16 +43,6 @@ class DWT_MLEAD(BaseAnomalyDetector):
     subsequently clusters the anomalies to determine the anomaly centers. This step is
     not implemented in this version.
 
-    .. list-table:: Capabilities
-       :stub-columns: 1
-
-       * - Input data format
-         - univariate
-       * - Output data format
-         - anomaly scores
-       * - Learning Type
-         - unsupervised
-
     Parameters
     ----------
     start_level : int, default=3

diff --git a/aeon/anomaly_detection/_iforest.py b/aeon/anomaly_detection/_iforest.py
@@ -19,16 +19,6 @@ class IsolationForest(PyODAdapter):
     the PyOD model ``IForest`` except for `window_size` and `stride`, which are used to
     construct the sliding windows.
 
-    .. list-table:: Capabilities
-       :stub-columns: 1
-
-       * - Input data format
-         - univariate and multivariate
-       * - Output data format
-         - anomaly scores
-       * - Learning Type
-         - unsupervised or semi-supervised
-
     The documentation for parameters has been adapted from the
     [PyOD documentation](https://pyod.readthedocs.io/en/latest/pyod.models.html#id405).
     Here, `X` refers to the set of sliding windows extracted from the time series

diff --git a/aeon/anomaly_detection/_kmeans.py b/aeon/anomaly_detection/_kmeans.py
@@ -25,16 +25,6 @@ class KMeansAD(BaseAnomalyDetector):
     fitted on a clean reference time series and used to detect anomalies in a different
     target time series with the same number of dimensions.
 
-    .. list-table:: Capabilities
-       :stub-columns: 1
-
-       * - Input data format
-         - univariate and multivariate
-       * - Output data format
-         - anomaly scores
-       * - Learning Type
-         - unsupervised or semi-superivsed
-
     Parameters
     ----------
     n_clusters : int, default=20

diff --git a/aeon/anomaly_detection/_left_stampi.py b/aeon/anomaly_detection/_left_stampi.py
@@ -22,16 +22,6 @@ class LeftSTAMPi(BaseAnomalyDetector):
 
     LeftSTAMPi supports univariate time series only.
 
-    .. list-table:: Capabilities
-       :stub-columns: 1
-
-       * - Input data format
-         - univariate
-       * - Output data format
-         - anomaly scores
-       * - Learning Type
-         - unsupervised
-
 
     Parameters
     ----------

diff --git a/aeon/anomaly_detection/_lof.py b/aeon/anomaly_detection/_lof.py
@@ -17,20 +17,6 @@ class LOF(PyODAdapter):
     This class implement metrics-based outlier detection algorithms using the
     Local Outlier Factor (LOF) algorithm from PyOD.
 
-    .. list-table:: Capabilities
-       :stub-columns: 1
-
-       * - Input data format
-         - univariate or multivariate
-       * - Output data format
-         - anomaly scores
-       * - missing_values
-         - False
-       * - Learning Type
-         - unsupervised or semi-supervised
-       * - python_dependencies
-         - ["pyod"]
-
     The documentation for parameters has been adapted from the
     [PyOD documentation](https://pyod.readthedocs.io/en/latest/pyod.models.html#id586).
     Here, `X` refers to the set of sliding windows extracted from the time series

diff --git a/aeon/anomaly_detection/_merlin.py b/aeon/anomaly_detection/_merlin.py
@@ -21,16 +21,6 @@ class MERLIN(BaseAnomalyDetector):
     most anomalous subsequence in a time series [1]_. The algorithm is based on the
     Euclidean distance between subsequences of the time series.
 
-    .. list-table:: Capabilities
-       :stub-columns: 1
-
-       * - Input data format
-         - univariate
-       * - Output data format
-         - binary classification
-       * - Learning Type
-         - unsupervised
-
     Parameters
     ----------
     min_length : int, default=5

diff --git a/aeon/anomaly_detection/_one_class_svm.py b/aeon/anomaly_detection/_one_class_svm.py
@@ -19,16 +19,6 @@ class OneClassSVM(BaseAnomalyDetector):
     the sklearn ``OneClassSVM`` except for `window_size` and `stride`, which are used to
     construct the sliding windows.
 
-    .. list-table:: Capabilities
-       :stub-columns: 1
-
-       * - Input data format
-         - univariate and multivariate
-       * - Output data format
-         - anomaly scores
-       * - Learning Type
-         - semi-supervised
-
     The documentation for parameters has been adapted from
     (https://scikit-learn.org/dev/modules/generated/sklearn.svm.OneClassSVM.html).
     Here, `X` refers to the set of sliding windows extracted from the time series
@@ -169,7 +159,6 @@ def _inner_fit(self, X: np.ndarray) -> None:
         self.estimator_.fit(X)
 
     def _predict(self, X) -> np.ndarray:
-
         _X, padding = sliding_windows(
             X, window_size=self.window_size, stride=self.stride, axis=0
         )
@@ -188,7 +177,6 @@ def _fit_predict(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndar
         return point_anomaly_scores
 
     def _inner_predict(self, X: np.ndarray, padding: int) -> np.ndarray:
-
         anomaly_scores = self.estimator_.score_samples(X)
 
         point_anomaly_scores = reverse_windowing(

diff --git a/aeon/anomaly_detection/_pyodadapter.py b/aeon/anomaly_detection/_pyodadapter.py
@@ -45,16 +45,6 @@ class PyODAdapter(BaseAnomalyDetector):
     For unsupervised anomaly detection, use `fit_predict` directly on the target time
     series.
 
-    .. list-table:: Capabilities
-       :stub-columns: 1
-
-       * - Input data format
-         - univariate and multivariate
-       * - Output data format
-         - anomaly scores
-       * - Learning Type
-         - unsupervised or semi-supervised
-
 
     Parameters
     ----------

diff --git a/aeon/anomaly_detection/_stomp.py b/aeon/anomaly_detection/_stomp.py
@@ -21,16 +21,6 @@ class STOMP(BaseAnomalyDetector):
 
     STOMP supports univariate time series only.
 
-    .. list-table:: Capabilities
-       :stub-columns: 1
-
-       * - Input data format
-         - univariate
-       * - Output data format
-         - anomaly scores
-       * - Learning Type
-         - unsupervised
-
 
     Parameters
     ----------

diff --git a/aeon/anomaly_detection/_stray.py b/aeon/anomaly_detection/_stray.py
@@ -23,16 +23,6 @@ class STRAY(BaseAnomalyDetector):
     HDoutliers that uses extreme value theory for the anomolous threshold
     calculation, to deal with data streams that exhibit non-stationary behavior.
 
-    .. list-table:: Capabilities
-       :stub-columns: 1
-
-       * - Input data format
-         - univariate and multivariate
-       * - Output data format
-         - binary classification
-       * - Learning Type
-         - unsupervised
-
     Parameters
     ----------
     alpha : float, default=0.01

diff --git a/docs/conf.py b/docs/conf.py
@@ -389,7 +389,7 @@ def _does_not_start_with_underscore(input_string):
 
             # For case where tag is not included output as not supported.
             if not _val or _val is None:
-                data[f"Supports {_str}"].append("\u274C")
+                data[f"Supports {_str}"].append("\u274c")
             else:
                 data[f"Supports {_str}"].append("\u2705")
 
@@ -401,6 +401,103 @@ def _does_not_start_with_underscore(input_string):
         file.write(df_str)
 
 
+def _add_estimator_capabilities_table(app, pagename, templatename, context, doctree):
+    """Add estimator capabilities table to HTML page."""
+    if "title" not in context or "body" not in context:
+        return
+
+    if '<span class="caption-text">Capabilities</span>' in context["body"]:
+        return
+
+    from aeon.utils.discovery import all_estimators
+
+    estimators = all_estimators(include_sklearn=False)
+
+    for estimator_name, estimator_class in estimators:
+        if estimator_name == context["title"]:
+            tags = estimator_class.get_class_tags()
+
+            capabilities = {
+                key.split(":")[1]: value
+                for key, value in tags.items()
+                if key.startswith("capability:")
+            }
+
+            html_output = """
+            <div class="table-wrapper docutils container" id="id3">
+            <table class="docutils align-default" id="id3">
+            <caption>
+            <span class="caption-text">Capabilities</span>
+            <a class="headerlink" href="#id3" title="Link to this table">¶</a>
+            </caption>
+            <tbody>
+            """
+
+            for idx, (key, value) in enumerate(capabilities.items()):
+                row_class = "row-odd" if idx % 2 == 0 else "row-even"
+                formatted_key = key.replace("_", " ").title()
+                if value is True:
+                    formatted_value = "Yes"
+                elif value is False:
+                    formatted_value = "No"
+                elif value is None:
+                    formatted_value = "Not Set"
+                else:
+                    formatted_value = str(value)
+
+                html_output += f"""
+            <tr class="{row_class}">
+                <th class="stub"><p>{formatted_key}</p></th>
+                <td><p>{formatted_value}</p></td>
+            </tr>
+            """
+
+            html_output += """
+            </tbody>
+            </table>
+            </div>
+            """
+
+            html_content = context["body"]
+
+            # Function to insert table into HTML content
+            # Look for existing NOTES section outside methods
+            start_methods = html_content.find('<dl class="py method">')
+            section_before_methods = html_content[:start_methods]
+
+            # Look for Notes section
+            notes_heading = '<p class="rubric">Notes</p>'
+            notes_pos = section_before_methods.find(notes_heading)
+
+            if notes_pos != -1:
+                # Notes exists, insert table after it
+                insert_pos = notes_pos + len(notes_heading)
+                context["body"] = (
+                    html_content[:insert_pos]
+                    + "\n"
+                    + html_output
+                    + html_content[insert_pos:]
+                )
+            else:
+                # Need to create Notes section
+                # Find position before References or Examples or Methods
+                # whichever comes first
+                ref_pos = section_before_methods.find(
+                    '<p class="rubric">References</p>'
+                )
+                ex_pos = section_before_methods.find('<p class="rubric">Examples</p>')
+
+                positions = [
+                    pos for pos in [ref_pos, ex_pos, start_methods] if pos != -1
+                ]
+                insert_pos = min(positions) if positions else start_methods
+
+                new_section = f'\n<p class="rubric">Notes</p>\n{html_output}\n'
+                context["body"] = (
+                    html_content[:insert_pos] + new_section + html_content[insert_pos:]
+                )
+
+
 def setup(app):
     """Set up sphinx builder.
 
@@ -409,6 +506,7 @@ def setup(app):
     app : Sphinx application object
     """
     app.connect("builder-inited", _make_estimator_overview)
+    app.connect("html-page-context", _add_estimator_capabilities_table)
 
 
 # -- Extension configuration -------------------------------------------------