Finalize documentation

ORNL · Sep 25, 2023 · 454b187 · 454b187
1 parent 7b574a9
commit 454b187
Show file tree

Hide file tree

Showing 10 changed files with 96 additions and 47 deletions.
diff --git a/icat/__init__.py b/icat/__init__.py
@@ -64,7 +64,7 @@ def initialize(offline: bool = False):
     for the stylesheets to correctly apply to some of the ipyvuetify datatables.
 
     Args:
-        offline (bool): If set to true, will configure panel to draw js/css resources from
+        offline (bool): If set to true, will configure panel to draw js/css resources from \
             local packages rather than hitting a CDN for them.
     """
     pn.extension("vega", inline=offline)

diff --git a/icat/anchorlist.py b/icat/anchorlist.py
@@ -247,12 +247,12 @@ class AnchorList(pn.viewable.Viewer):
     Args:
         model: The parent model.
         table_width (int): Static width of the visual component table.
-        table_height (int): Static height of the visual component table.
+        table_height (int): Static height of the visual component table. \
             (Currently unused)
-        anchor_types (list[type | dict[str, any]]): The anchor types to start the
-            interface with. This list can contain a combination of types and
-            dictionaries with keys ``ref`` (containing the type), ``name`` (display
-            name) and ``color`` (the color to render anchors of this type with.)
+        anchor_types (list[type | dict[str, any]]): The anchor types to start the \
+            interface with. This list can contain a combination of types and \
+            dictionaries with keys ``ref`` (containing the type), ``name`` (display \
+            name) and ``color`` (the color to render anchors of this type with.) \
             If left None (the default), will add ``DictionaryAnchor`` and ``TFIDFAnchor``.
     """
 
@@ -585,8 +585,8 @@ def fire_on_anchor_changed(self, name: str, key: str, value):
         from the anchor's individual on_anchor_changed events
 
         Args:
-            name (str): the *internal name* that panel is using, which we're using as
-            the anchor id.
+            name (str): the *internal name* that panel is using, which we're using as \
+                the anchor id.
             key (str): the name of the property being changed.
             value (any): The value the property was changed to.
         """
@@ -886,9 +886,9 @@ def add_anchor_types(self, anchor_types: list[type | dict[str, any]]):
         """Add multiple anchor types to the UI.
 
         Args:
-            anchor_types (list[type|dict[str, any]]): the list of anchor types
-                to add, this can consist of a combination of straight class types
-                and dictionaries containing ``ref`` (the class type), ``name``,
+            anchor_types (list[type|dict[str, any]]): the list of anchor types \
+                to add, this can consist of a combination of straight class types \
+                and dictionaries containing ``ref`` (the class type), ``name``, \
                 and ``color``.
 
         Example:
@@ -916,7 +916,7 @@ def add_anchor_type(
         Args:
             anchor_type (type): The class type (of subclass ``Anchor``) to register.
             name (str): The name to associate with the type in the UI.
-            color (str): The hex color to use in the CSS for rows in the anchorlist
+            color (str): The hex color to use in the CSS for rows in the anchorlist \
                 and anchors in anchorviz for this anchor type.
         """
         if name is not None:
@@ -1146,9 +1146,9 @@ def set_coverage(self, coverage_info: dict[str, dict[str, int | float]]):
         """Set the anchor coverage data, to be updated and displayed in the table.
 
         Args:
-            coverage_info (dict[str, dict[str, Union[int, float]]]): Dictionary (keys
-                being the anchor panel ids) and the value being a dictionary with the
-                "row" of data to display in the table.  Keys expected: "total", "pos",
+            coverage_info (dict[str, dict[str, Union[int, float]]]): Dictionary (keys \
+                being the anchor panel ids) and the value being a dictionary with the \
+                "row" of data to display in the table.  Keys expected: "total", "pos", \
                 "neg", "total_pct", "pos_pct", "neg_pct"
         """
         self.coverage_info = coverage_info

diff --git a/icat/anchors.py b/icat/anchors.py
@@ -32,11 +32,11 @@ class Anchor(param.Parameterized):
     """The main parent anchor class, this should be subclassed, not directly used.
 
     Args:
-        container (AnchorList): The anchor list that this anchor is a part of. If
-            you are creating the anchor manually, leave this None, it will get
+        container (AnchorList): The anchor list that this anchor is a part of. If \
+            you are creating the anchor manually, leave this None, it will get \
             populated automatically.
         anchor_name (str): The label to show for this anchor.
-        weight (float): Scalar multiple to apply to the output feature, this modifies
+        weight (float): Scalar multiple to apply to the output feature, this modifies \
             how strongly a particular feature is likely to influence the model.
         in_view (bool): Whether to show this anchor in anchorviz.
         in_model (bool): Whether to include this feature in the training process.
@@ -239,9 +239,9 @@ class DictionaryAnchor(Anchor):
     number of occurrences of each word in the given keywords.
 
     Args:
-        container (AnchorList): The containing anchor list parent instance.
-            This can usually be left ``None`` if defining an anchor manually,
-            any time ``add_anchor`` is called, the anchor list will take care
+        container (AnchorList): The containing anchor list parent instance. \
+            This can usually be left ``None`` if defining an anchor manually, \
+            any time ``add_anchor`` is called, the anchor list will take care \
             of setting this on all children anchors.
 
     Example:

diff --git a/icat/data.py b/icat/data.py
@@ -674,8 +674,8 @@ def apply_label(self, index: int | list[int], label: int | list[int]):
 
         Args:
             index (int | list[int]): Either a single index, or a list of indices.
-            label (int | list[int]): Either the single label to apply or a list of corresponding labels
-                for the provided indices. 1 is "interesting", 0 is "uninteresting". If a -1 is provided,
+            label (int | list[int]): Either the single label to apply or a list of corresponding labels \
+                for the provided indices. 1 is "interesting", 0 is "uninteresting". If a -1 is provided, \
                 this resets or "unlabels", removing it from the container model's training set.
         """
         self._handle_label_changed(index, label)

diff --git a/icat/item.py b/icat/item.py
@@ -27,7 +27,14 @@ def _template(self):
 
 class ItemViewer(pn.viewable.Viewer):
     """Interface for viewing and labeling a single item, useful for looking
-    at more than just a snippet of the full text."""
+    at more than just a snippet of the full text.
+
+    Args:
+        index (int): The row index of the entry in the dataframe to view.
+        width (int): The width of the rendered card.
+        height (int): The height of the rendered card.
+        data (DataManager): The parent data manager to pull the item from.
+    """
 
     def __init__(
         self,
@@ -153,7 +160,12 @@ def __panel__(self):
     def populate(self, index: int):
         """Fill or update all of the fields for the given index. This
         should be called anytime the model updates, or when the user
-        clicks/requests to view a new instance."""
+        clicks/requests to view a new instance.
+
+        Args:
+            index (int): The row index of the item to display from parent \
+                DataManager's active_data.
+        """
         if self.data.active_data is None:
             return
 

diff --git a/icat/model.py b/icat/model.py
@@ -29,7 +29,10 @@ class Model:
     Args:
         data (pd.DataFrame): The data to explore with.
         text_col (str): The name of the text column in the passed data.
-        default_sample_size (int): The initial number of points to sample for the
+        anchor_types (list[type | dict[str, any]]): The list of class types of \
+            anchors to initially include in the interface. (This can be modified \
+            after initialization through the ``anchor_list``.)
+        default_sample_size (int): The initial number of points to sample for the \
             visualizations.
     """
 
@@ -52,19 +55,25 @@ def __init__(
         self.training_data: pd.DataFrame = None
         """The rows (and only those rows) of the original data explicitly used for training."""
         self.text_col = text_col
+        """The column in the dataframe with the text to explore."""
 
         self.classifier: LogisticRegression = LogisticRegression(
             class_weight="balanced"
         )
+        """The underlying machine learning algorithm that learns based on the training data."""
 
         self.anchor_list: AnchorList = AnchorList(model=self, anchor_types=anchor_types)
+        """The ``AnchorList`` instance that manages all features/featuring necessary for
+        the classifier."""
         self.data: DataManager = DataManager(
             data=data,
             text_col=text_col,
             model=self,
             default_sample_size=default_sample_size,
         )
+        """The ``DataManager`` instance that handles all labeling tasks and data filtering/sampling."""
         self.view: InteractiveView = InteractiveView(model=self)
+        """The ``InteractiveView`` or dashboard widget that glues together the various visual components."""
 
         # set up necessary behind-the-scenes glue for anchors and data
         self.anchor_list.on_anchor_added(self._on_anchor_add)
@@ -204,8 +213,8 @@ def compute_coverage(self) -> dict[str, dict[str, float | int]]:
 
         Returns:
             A dictionary where each key is the panel id of the anchor, and the value
-            is a dictionary with the statistics: 'total', 'pos', 'neg', 'total_pct',
-            'pos_pct', and 'neg_pct'
+            is a dictionary with the statistics: ``'total'``, ``'pos'``, ``'neg'``,
+            ``'total_pct'``, ``'pos_pct'``, and ``'neg_pct'``
         """
         features = self.data.active_data.loc[:, self.feature_names()].values
         predictions = (
@@ -252,7 +261,7 @@ def is_seeded(self) -> bool:
 
         Returns:
             False if the label column doesn't exist, there's fewer than 10 labeled points,
-                or there's only one class of label.
+            or there's only one class of label.
         """
         if self.training_data is None or self.data.label_col not in self.training_data:
             # no labels!
@@ -276,13 +285,13 @@ def featurize(
         normalize_reference: pd.DataFrame | None = None,
     ) -> pd.DataFrame:
         """Run the anchors - calculates the output features for each anchor and adds the corresponding "weights" column
-        to the dataframe. These are the values that the classifier uses.
+        to the dataframe. These are the values that the classifier uses to make its predictions.
 
         Args:
             data (pd.DataFrame): The data to apply the anchors to. Uses the exploration data if not specified.
             normalize (bool): Whether to apply l1 normalization to the output values.
-            normalize_reference (Optional[pd.DataFrame]): A different dataframe whose features to sum for the L1 norm, this
-                is used with the model's separate training data versus full dataset, since the normed values of just the
+            normalize_reference (Optional[pd.DataFrame]): A different dataframe whose features to sum for the L1 norm, this \
+                is used with the model's separate training data versus full dataset, since the normed values of just the \
                 training data would be vastly different than within the full set.
 
         Returns:
@@ -334,7 +343,7 @@ def feature_names(self, in_model_only: bool = False) -> list[str]:
         """Provides a list of the feature column names in use in the data manager.
 
         Args:
-            in_model_only (bool): Only include anchors whose ``in_model`` value is
+            in_model_only (bool): Only include anchors whose ``in_model`` value is \
                 ``True``.
         """
         # TODO: conditional for if it's enabled in model?
@@ -360,7 +369,7 @@ def predict(
             to be re-fit multiple times.)
 
         Args:
-            data (Optional[pd.DataFrame]): If not specified, use the previously set training data,
+            data (Optional[pd.DataFrame]): If not specified, use the previously set training data, \
                 otherwise predict on this data.
             inplace (bool): Whether to operate directly on the passed data or create a copy of it.
 
@@ -453,7 +462,7 @@ def load(path: str) -> "Model":
             int(i) for i in model_information["icat_version"].split(".")
         )
         major, minor, patch = (int(i) for i in icat.__version__.split("."))
-        if major != saved_major or saved_minor < 7:
+        if major != saved_major or (saved_minor < 7 and saved_major < 1):
             print("ERROR - Model was saved with incompatible version of icat")
             return None
 

diff --git a/icat/table.py b/icat/table.py
@@ -11,6 +11,10 @@
 
 
 class TableContentsTemplate(v.VuetifyTemplate):
+    """The primary table used in the ``DataManager`` class. This is a heavily modified vuetify
+    DataTable, largely necessary for us to be able to put a more comprehensive set of action
+    buttons in each row."""
+
     items = traitlets.List(traitlets.Dict()).tag(sync=True)
     headers = traitlets.List(traitlets.Dict()).tag(
         sync=True
@@ -36,13 +40,6 @@ def __init__(self, *args, **kwargs):
 
         super().__init__(*args, **kwargs)
 
-        # self.on_msg(lambda widget, content, buffers: print(content))
-
-    #     self._add_selected_text_callbacks: list[Callable]
-    #     self._select_point_callbacks: list[Callable]
-    #     self._apply_label_callbacks: list[Callable]
-    #     self._update_options_callbacks: list[Callable]
-
     def on_apply_label(self, callback: callable):
         """Expect a point id and a label value (0 or 1)"""
         self._apply_label_callbacks.append(callback)

diff --git a/icat/utils.py b/icat/utils.py
@@ -41,7 +41,7 @@ def add_highlights(text: str, regex: str, color: str = "yellow") -> str:
 
     Args:
         text (str): The text to add highlights to.
-        regex (str): The regular expression to search for and sub in the text. Note that
+        regex (str): The regular expression to search for and sub in the text. Note that \
             this needs to have one capture group, so the regex should be wrapped in '()'.
         color (str): The background color to highlight the text with.
 

diff --git a/icat/view.py b/icat/view.py
@@ -108,10 +108,6 @@ def _add_list_anchor_to_viz(self, anchor: Anchor):
             anchor.theta if hasattr(anchor, "theta") else random.uniform(0, 2 * 3.14)
         )
         anchor_dict = dict(id=anchor.name, name=anchor.anchor_name, theta=theta)
-        # if type(anchor) == TFIDFAnchor:
-        #     anchor_dict["color"] = "#8e24aa"
-        # elif type(anchor) == SimilarityFunctionAnchor:
-        #     anchor_dict["color"] = "#248eaa"
         anchor_dict["color"] = self.model.anchor_list.get_anchor_type_config(
             type(anchor)
         )["color"]
@@ -281,6 +277,7 @@ def on_selected_points_change(self, callback: Callable):
         self._selected_points_change_callbacks.append(callback)
 
     def refresh_data(self):
+        """Refresh all components with the latest active_data from parent model's ``DataManager``."""
         self.anchorviz.set_points(self._serialize_data_to_dicts())
         self.model.data.item_viewer.populate(self.model.data.item_viewer.index)
         self.histograms.refresh_data(self.model.data)

diff --git a/sphinx/source/user/concepts.rst b/sphinx/source/user/concepts.rst
@@ -22,6 +22,16 @@ indicates what is "interesting" verus "uninteresting". In practice, models can
 be combined to achieve multi class predictions, or models can be used in sequence
 to allow creating a chain of filters.
 
+An ICAT model can be initialized by passing the dataset as a pandas dataframe
+and the name of the column with the text to feature on:
+
+.. code-block:: python
+
+    import icat
+    icat.initialize()
+
+    model = icat.Model(my_data_df, "text_col")
+
 Anchors
 =======
 
@@ -44,6 +54,18 @@ configure it, which show up in the :ref:`Anchor list`
 ICAT comes with several pre-defined anchors, (the ``DictionaryAnchor`` and
 ``TFIDFAnchor`` as defined above)
 
+Anchors can be added in the interface by clicking on the associated anchor
+type button in the anchorlist, or by programmatic definition:
+
+.. code-block:: python
+
+    import icat
+    icat.initialize()
+    model = icat.Model(my_data_df, "text_col")
+
+    some_anchor = icat.DictionaryAnchor(anchor_name="news", keywords=["news"])
+    model.add_anchor(some_anchor)
+
 Labelling
 =========
 
@@ -60,3 +82,15 @@ reflect this - orange indicates "interesting" and blue indicates "uninteresting"
 
 Once a model has been seeded, all labelling and anchor modifications the user
 makes retrain the model from scratch and updates the corresponding predictions.
+
+Labelling can be done either with the available buttons in the data manager/item
+viewer, or programmatically:
+
+.. code-block:: python
+
+    import icat
+    icat.initialize()
+    model = icat.Model(my_data_df, "text_col")
+
+    model.data.apply_label(42, 1)  # label index 42 as "interesting"
+    model.data.apply_label(13, 0)  # label index 13 as "uninteresting"