diff --git a/pynapple/core/interval_set.py b/pynapple/core/interval_set.py index ceab66b1..9b57edc8 100644 --- a/pynapple/core/interval_set.py +++ b/pynapple/core/interval_set.py @@ -219,9 +219,9 @@ def __init__( _MetadataMixin.__init__(self) self._class_attributes = self.__dir__() # get list of all attributes self._class_attributes.append("_class_attributes") # add this property + self._initialized = True if drop_meta is False: self.set_info(metadata) - self._initialized = True def __repr__(self): # Start by determining how many columns and rows. diff --git a/pynapple/core/metadata_class.py b/pynapple/core/metadata_class.py index da67d7c8..65499b19 100644 --- a/pynapple/core/metadata_class.py +++ b/pynapple/core/metadata_class.py @@ -8,19 +8,14 @@ class _MetadataMixin: """ An object containing metadata functionality for TsGroup, IntervalSet, or TsdFrame objects. + This is a private mixin that is not meant to be instantiated on its own. """ - def __init__(self, metadata=None): + def __init__(self): """ - Metadata initializer - - Parameters - ---------- - *args : list - List of pandas.DataFrame - **kwargs : dict - Dictionary containing metadata information + Metadata initializer. This sets the metadata index using properties of the inheriting class and initialized an empty metadata DataFrame. + Metadata can be set using the `set_info()` method. """ if self.__class__.__name__ == "TsdFrame": # metadata index is the same as the columns for TsdFrame @@ -31,8 +26,6 @@ def __init__(self, metadata=None): self._metadata = pd.DataFrame(index=self.metadata_index) - self.set_info(metadata) - def __dir__(self): """ Adds metadata columns to the list of attributes. @@ -41,7 +34,8 @@ def __dir__(self): def __setattr__(self, name, value): """ - Add metadata as an attribute assignment + Allow metadata to be added via attribute assignment after initialization, assuming the attribute is not already defined. + Sets metadata using the `set_info()` method. """ # self._initialized must be defined in the class that inherits _MetadataMixin # and it must be set to True after metadata is initialized @@ -52,22 +46,7 @@ def __setattr__(self, name, value): def __getattr__(self, name): """ - Allows dynamic access to metadata columns as properties. - - Parameters - ---------- - name : str - The name of the metadata column to access. - - Returns - ------- - pandas.Series - The series of values for the requested metadata column. - - Raises - ------ - AttributeError - If the requested attribute is not a metadata column. + Allows access to metadata columns as properties if the metadata name does not conflict with existing attributes. """ # avoid infinite recursion when pickling due to # self._metadata.column having attributes '__reduce__', '__reduce_ex__' @@ -83,17 +62,23 @@ def __getattr__(self, name): ) def __setitem__(self, key, value): + """ + Allows metadata to be set using key indexing. Sets metadata using the `set_info()` method. + """ if not isinstance(key, str): raise TypeError("Metadata keys must be strings!") self.set_info(**{key: value}) def __getitem__(self, key): + """ + Allows metadata to be accessed using key indexing. Calls the `get_info()` method. + """ return self.get_info(key) @property def metadata(self): """ - Returns metadata DataFrame + Returns a read-only version (copy) of the _metadata DataFrame """ # return copy so that metadata cannot modified in place return self._metadata.copy() @@ -106,6 +91,10 @@ def metadata_columns(self): return list(self._metadata.columns) def _raise_invalid_metadata_column_name(self, name): + """ + Check if metadata name is valid and raise warnings if it cannot be accessed as an attribute or key. + Prevents metadata names for TsdFrame to match data column names, and prevents "rate" from being changed for TsGroup. + """ if not isinstance(name, str): raise TypeError( f"Invalid metadata type {type(name)}. Metadata column names must be strings!" @@ -151,9 +140,8 @@ def _raise_invalid_metadata_column_name(self, name): def _check_metadata_column_names(self, metadata=None, **kwargs): """ - Throw warnings when metadata names cannot be accessed as attributes or keys. + Throw warnings when metadata names cannot be accessed as attributes or keys. Wrapper for _raise_invalid_metadata_column_name. """ - if metadata is not None: if isinstance(metadata, pd.DataFrame): [ @@ -171,25 +159,30 @@ def _check_metadata_column_names(self, metadata=None, **kwargs): def set_info(self, metadata=None, **kwargs): """ - Add metadata information about the object. - Metadata are saved as a DataFrame. + Add metadata information about the object. Metadata are saved as a pandas.DataFrame. Parameters ---------- - metadata : pandas.DataFrame or dict - metadata information - **kwargs - Can be either pandas.Series, numpy.ndarray, list or tuple + metadata : pandas.DataFrame or dict or pandas.Series, optional + Object containing metadata information, where metadata names are extracted from column names (pandas.DataFrame), key names (dict), or index (pandas.DataFrame). + - If a pandas.DataFrame is passed, the index must match the metadata index. + - If a dictionary is passed, the length of each value must match the metadata index length. + - A pandas.Series can only be passed if the object has a single interval. + **kwargs : optional + Key-word arguments for setting metadata. Values can be either pandas.Series, numpy.ndarray, list or tuple, and must have the same length as the metadata index. + If pandas.Series, the index must match the metadata index. + If the object only has one index, non-iterable values are also accepted. Raises ------ + ValueError + - If metadata index does not match input index (pandas.DataFrame, pandas.Series) + - If input array length does not match metadata length (numpy.ndarray, list, tuple) RuntimeError - Raise an error if - no column labels are found when passing simple arguments, - indexes are not equals for a pandas series,+ - not the same length when passing numpy array. + If the metadata argument is passed as a pandas.Series (for more than one metadata index), numpy.ndarray, list or tuple. TypeError - If some of the provided metadata could not be set. + If key-word arguments are not of type `pandas.Series`, `tuple`, `list`, or `numpy.ndarray` and cannot be set. + Examples -------- @@ -207,26 +200,35 @@ def set_info(self, metadata=None, **kwargs): >>> structs = pd.DataFrame(index = [0,1,2], data=['pfc','pfc','ca1'], columns=['struct']) >>> tsgroup.set_info(structs) >>> tsgroup - Index Freq. (Hz) struct - ------- ------------ -------- - 0 1 pfc - 1 2 pfc - 2 4 ca1 + Index rate struct + ------- ------- -------- + 0 0.66722 pfc + 1 1.33445 pfc + 2 4.00334 ca1 + + To add metadata with a dictionary: + + >>> coords = {"coords": [[0,0],[0,1],[1,0]]} + >>> tsgroup.set_info(coords) + >>> tsgroup + Index rate struct coords + ------- ------- -------- -------- + 0 0.66722 pfc [0, 0] + 1 1.33445 pfc [0, 1] + 2 4.00334 ca1 [1, 0] - To add metadata with a pd.Series, numpy.ndarray, list or tuple: + To add metadata with a keyword arument (pd.Series, numpy.ndarray, list or tuple): >>> hd = pd.Series(index = [0,1,2], data = [0,1,1]) >>> tsgroup.set_info(hd=hd) >>> tsgroup - Index Freq. (Hz) struct hd - ------- ------------ -------- ---- - 0 1 pfc 0 - 1 2 pfc 1 - 2 4 ca1 1 - + Index rate struct coords hd + ------- ------- -------- -------- ---- + 0 0.66722 pfc [0, 0] 0 + 1 1.33445 pfc [0, 1] 1 + 2 4.00334 ca1 [1, 0] 1 """ - # check for duplicate names, otherwise "self.metadata_name" - # syntax would behave unexpectedly. + # check for duplicate names and/or formatted names that cannot be accessed as attributes or keys self._check_metadata_column_names(metadata, **kwargs) not_set = [] if metadata is not None: @@ -283,23 +285,88 @@ def set_info(self, metadata=None, **kwargs): def get_info(self, key): """ - Returns the metainfo located in one column. + Returns metadata based on metadata column name or index. Parameters ---------- - key : str - One of the metainfo columns name + key : + - str: metadata column name or metadata index (for TsdFrame with string column names) + - list of str: multiple metadata column names or metadata indices (for TsdFrame with string column names) + - Number: metadata index (for TsGroup and IntervalSet) + - list, np.ndarray, pd.Series: metadata index (for TsGroup and IntervalSet) + - tuple: metadata index and column name (for TsGroup and IntervalSet) Returns ------- - pandas.Series - The metainfo + pandas.Series or pandas.DataFrame or Any (for single location) + The metadata information based on the key provided. + + Raises + ------ + IndexError + If the metadata index is not found. + + Examples + -------- + >>> import pynapple as nap + >>> import numpy as np + >>> tmp = { 0:nap.Ts(t=np.arange(0,200), time_units='s'), + 1:nap.Ts(t=np.arange(0,200,0.5), time_units='s'), + 2:nap.Ts(t=np.arange(0,300,0.25), time_units='s'), + } + >>> metadata = {"l1": [1, 2, 3], "l2": ["x", "x", "y"]} + >>> tsgroup = nap.TsGroup(tmp,metadata=metadata) + + To access a single metadata column: + + >>> tsgroup.get_info("l1") + 0 1 + 1 2 + 2 3 + Name: l1, dtype: int64 + + To access multiple metadata columns: + + >>> tsgroup.get_info(["l1", "l2"]) + l1 l2 + 0 1 x + 1 2 x + 2 3 y + + To access metadata of a single index: + + >>> tsgroup.get_info(0) + rate 0.667223 + l1 1 + l2 x + Name: 0, dtype: object + + To access metadata of multiple indices: + + >>> tsgroup.get_info([0, 1]) + rate l1 l2 + 0 0.667223 1 x + 1 1.334445 2 x + + To access metadata of a single index and column: + + >>> tsgroup.get_info((0, "l1")) + np.int64(1) + """ + # string indexing of one or more metadata columns if isinstance(key, str) or ( isinstance(key, list) and all([isinstance(k, str) for k in key]) ): - # metadata[str] or metadata[[*str]] - return self._metadata[key] + if ( + isinstance(key, list) and all([k in self.metadata_index for k in key]) + ) or (isinstance(key, str) and (key in self.metadata_index)): + # metadata index is a string + return self._metadata.loc[key] + else: + # metadata[str] or metadata[[*str]] + return self._metadata[key] + elif isinstance(key, (Number, list, np.ndarray, pd.Series)) or ( isinstance(key, tuple) and ( @@ -310,12 +377,15 @@ def get_info(self, key): ) ) ): + # assume key is index, or tupe of index and column name # metadata[Number], metadata[array_like], metadata[Any, str], or metadata[Any, [*str]] return self._metadata.loc[key] + elif isinstance(key, slice): + # assume key as index slice # DataFrame's `loc` treats slices differently (inclusive of stop) than numpy # `iloc` exludes the stop index, like numpy return self._metadata.iloc[key] else: - # we don't allow indexing columns with numbers + # we don't allow indexing columns with numbers, e.g. metadata[0,0] raise IndexError(f"Unknown metadata index {key}") diff --git a/pynapple/core/time_series.py b/pynapple/core/time_series.py index 389432b4..b3e81b5b 100644 --- a/pynapple/core/time_series.py +++ b/pynapple/core/time_series.py @@ -956,8 +956,8 @@ def __init__( self._class_attributes = self.__dir__() self._class_attributes.append("_class_attributes") # set metadata - self.set_info(metadata) self._initialized = True + self.set_info(metadata) @property def loc(self): diff --git a/tests/test_metadata.py b/tests/test_metadata.py index acde65ee..b3d11481 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -445,8 +445,8 @@ def test_tsdframe_metadata_slicing(tsdframe_meta): pytest.raises(ValueError, match="Invalid metadata name"), # shape mismatch with setitem pytest.raises(ValueError), - # key error with get_info - pytest.raises(KeyError), + # assertion error with get_info + pytest.raises(AssertionError), # attribute should raise error pytest.raises(AttributeError), # key should not match metadata