Skip to content

Commit

Permalink
update docstrings in metadata_class.py and add more examples in set_i…
Browse files Browse the repository at this point in the history
…nfo and get_info. allow getting with metadata index when index is string (for TsdFrame)
  • Loading branch information
sjvenditto committed Nov 14, 2024
1 parent 41da12f commit 2904388
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 68 deletions.
2 changes: 1 addition & 1 deletion pynapple/core/interval_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,9 @@ def __init__(
_MetadataMixin.__init__(self)
self._class_attributes = self.__dir__() # get list of all attributes
self._class_attributes.append("_class_attributes") # add this property
self._initialized = True
if drop_meta is False:
self.set_info(metadata)
self._initialized = True

def __repr__(self):
# Start by determining how many columns and rows.
Expand Down
198 changes: 134 additions & 64 deletions pynapple/core/metadata_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,14 @@
class _MetadataMixin:
"""
An object containing metadata functionality for TsGroup, IntervalSet, or TsdFrame objects.
This is a private mixin that is not meant to be instantiated on its own.
"""

def __init__(self, metadata=None):
def __init__(self):
"""
Metadata initializer
Parameters
----------
*args : list
List of pandas.DataFrame
**kwargs : dict
Dictionary containing metadata information
Metadata initializer. This sets the metadata index using properties of the inheriting class and initialized an empty metadata DataFrame.
Metadata can be set using the `set_info()` method.
"""
if self.__class__.__name__ == "TsdFrame":
# metadata index is the same as the columns for TsdFrame
Expand All @@ -31,8 +26,6 @@ def __init__(self, metadata=None):

self._metadata = pd.DataFrame(index=self.metadata_index)

self.set_info(metadata)

def __dir__(self):
"""
Adds metadata columns to the list of attributes.
Expand All @@ -41,7 +34,8 @@ def __dir__(self):

def __setattr__(self, name, value):
"""
Add metadata as an attribute assignment
Allow metadata to be added via attribute assignment after initialization, assuming the attribute is not already defined.
Sets metadata using the `set_info()` method.
"""
# self._initialized must be defined in the class that inherits _MetadataMixin
# and it must be set to True after metadata is initialized
Expand All @@ -52,22 +46,7 @@ def __setattr__(self, name, value):

def __getattr__(self, name):
"""
Allows dynamic access to metadata columns as properties.
Parameters
----------
name : str
The name of the metadata column to access.
Returns
-------
pandas.Series
The series of values for the requested metadata column.
Raises
------
AttributeError
If the requested attribute is not a metadata column.
Allows access to metadata columns as properties if the metadata name does not conflict with existing attributes.
"""
# avoid infinite recursion when pickling due to
# self._metadata.column having attributes '__reduce__', '__reduce_ex__'
Expand All @@ -83,17 +62,23 @@ def __getattr__(self, name):
)

def __setitem__(self, key, value):
"""
Allows metadata to be set using key indexing. Sets metadata using the `set_info()` method.
"""
if not isinstance(key, str):
raise TypeError("Metadata keys must be strings!")
self.set_info(**{key: value})

def __getitem__(self, key):
"""
Allows metadata to be accessed using key indexing. Calls the `get_info()` method.
"""
return self.get_info(key)

@property
def metadata(self):
"""
Returns metadata DataFrame
Returns a read-only version (copy) of the _metadata DataFrame
"""
# return copy so that metadata cannot modified in place
return self._metadata.copy()
Expand All @@ -106,6 +91,10 @@ def metadata_columns(self):
return list(self._metadata.columns)

def _raise_invalid_metadata_column_name(self, name):
"""
Check if metadata name is valid and raise warnings if it cannot be accessed as an attribute or key.
Prevents metadata names for TsdFrame to match data column names, and prevents "rate" from being changed for TsGroup.
"""
if not isinstance(name, str):
raise TypeError(
f"Invalid metadata type {type(name)}. Metadata column names must be strings!"
Expand Down Expand Up @@ -151,9 +140,8 @@ def _raise_invalid_metadata_column_name(self, name):

def _check_metadata_column_names(self, metadata=None, **kwargs):
"""
Throw warnings when metadata names cannot be accessed as attributes or keys.
Throw warnings when metadata names cannot be accessed as attributes or keys. Wrapper for _raise_invalid_metadata_column_name.
"""

if metadata is not None:
if isinstance(metadata, pd.DataFrame):
[
Expand All @@ -171,25 +159,30 @@ def _check_metadata_column_names(self, metadata=None, **kwargs):

def set_info(self, metadata=None, **kwargs):
"""
Add metadata information about the object.
Metadata are saved as a DataFrame.
Add metadata information about the object. Metadata are saved as a pandas.DataFrame.
Parameters
----------
metadata : pandas.DataFrame or dict
metadata information
**kwargs
Can be either pandas.Series, numpy.ndarray, list or tuple
metadata : pandas.DataFrame or dict or pandas.Series, optional
Object containing metadata information, where metadata names are extracted from column names (pandas.DataFrame), key names (dict), or index (pandas.DataFrame).
- If a pandas.DataFrame is passed, the index must match the metadata index.
- If a dictionary is passed, the length of each value must match the metadata index length.
- A pandas.Series can only be passed if the object has a single interval.
**kwargs : optional
Key-word arguments for setting metadata. Values can be either pandas.Series, numpy.ndarray, list or tuple, and must have the same length as the metadata index.
If pandas.Series, the index must match the metadata index.
If the object only has one index, non-iterable values are also accepted.
Raises
------
ValueError
- If metadata index does not match input index (pandas.DataFrame, pandas.Series)
- If input array length does not match metadata length (numpy.ndarray, list, tuple)
RuntimeError
Raise an error if
no column labels are found when passing simple arguments,
indexes are not equals for a pandas series,+
not the same length when passing numpy array.
If the metadata argument is passed as a pandas.Series (for more than one metadata index), numpy.ndarray, list or tuple.
TypeError
If some of the provided metadata could not be set.
If key-word arguments are not of type `pandas.Series`, `tuple`, `list`, or `numpy.ndarray` and cannot be set.
Examples
--------
Expand All @@ -207,26 +200,35 @@ def set_info(self, metadata=None, **kwargs):
>>> structs = pd.DataFrame(index = [0,1,2], data=['pfc','pfc','ca1'], columns=['struct'])
>>> tsgroup.set_info(structs)
>>> tsgroup
Index Freq. (Hz) struct
------- ------------ --------
0 1 pfc
1 2 pfc
2 4 ca1
Index rate struct
------- ------- --------
0 0.66722 pfc
1 1.33445 pfc
2 4.00334 ca1
To add metadata with a dictionary:
>>> coords = {"coords": [[0,0],[0,1],[1,0]]}
>>> tsgroup.set_info(coords)
>>> tsgroup
Index rate struct coords
------- ------- -------- --------
0 0.66722 pfc [0, 0]
1 1.33445 pfc [0, 1]
2 4.00334 ca1 [1, 0]
To add metadata with a pd.Series, numpy.ndarray, list or tuple:
To add metadata with a keyword arument (pd.Series, numpy.ndarray, list or tuple):
>>> hd = pd.Series(index = [0,1,2], data = [0,1,1])
>>> tsgroup.set_info(hd=hd)
>>> tsgroup
Index Freq. (Hz) struct hd
------- ------------ -------- ----
0 1 pfc 0
1 2 pfc 1
2 4 ca1 1
Index rate struct coords hd
------- ------- -------- -------- ----
0 0.66722 pfc [0, 0] 0
1 1.33445 pfc [0, 1] 1
2 4.00334 ca1 [1, 0] 1
"""
# check for duplicate names, otherwise "self.metadata_name"
# syntax would behave unexpectedly.
# check for duplicate names and/or formatted names that cannot be accessed as attributes or keys
self._check_metadata_column_names(metadata, **kwargs)
not_set = []
if metadata is not None:
Expand Down Expand Up @@ -283,23 +285,88 @@ def set_info(self, metadata=None, **kwargs):

def get_info(self, key):
"""
Returns the metainfo located in one column.
Returns metadata based on metadata column name or index.
Parameters
----------
key : str
One of the metainfo columns name
key :
- str: metadata column name or metadata index (for TsdFrame with string column names)
- list of str: multiple metadata column names or metadata indices (for TsdFrame with string column names)
- Number: metadata index (for TsGroup and IntervalSet)
- list, np.ndarray, pd.Series: metadata index (for TsGroup and IntervalSet)
- tuple: metadata index and column name (for TsGroup and IntervalSet)
Returns
-------
pandas.Series
The metainfo
pandas.Series or pandas.DataFrame or Any (for single location)
The metadata information based on the key provided.
Raises
------
IndexError
If the metadata index is not found.
Examples
--------
>>> import pynapple as nap
>>> import numpy as np
>>> tmp = { 0:nap.Ts(t=np.arange(0,200), time_units='s'),
1:nap.Ts(t=np.arange(0,200,0.5), time_units='s'),
2:nap.Ts(t=np.arange(0,300,0.25), time_units='s'),
}
>>> metadata = {"l1": [1, 2, 3], "l2": ["x", "x", "y"]}
>>> tsgroup = nap.TsGroup(tmp,metadata=metadata)
To access a single metadata column:
>>> tsgroup.get_info("l1")
0 1
1 2
2 3
Name: l1, dtype: int64
To access multiple metadata columns:
>>> tsgroup.get_info(["l1", "l2"])
l1 l2
0 1 x
1 2 x
2 3 y
To access metadata of a single index:
>>> tsgroup.get_info(0)
rate 0.667223
l1 1
l2 x
Name: 0, dtype: object
To access metadata of multiple indices:
>>> tsgroup.get_info([0, 1])
rate l1 l2
0 0.667223 1 x
1 1.334445 2 x
To access metadata of a single index and column:
>>> tsgroup.get_info((0, "l1"))
np.int64(1)
"""
# string indexing of one or more metadata columns
if isinstance(key, str) or (
isinstance(key, list) and all([isinstance(k, str) for k in key])
):
# metadata[str] or metadata[[*str]]
return self._metadata[key]
if (
isinstance(key, list) and all([k in self.metadata_index for k in key])
) or (isinstance(key, str) and (key in self.metadata_index)):
# metadata index is a string
return self._metadata.loc[key]
else:
# metadata[str] or metadata[[*str]]
return self._metadata[key]

elif isinstance(key, (Number, list, np.ndarray, pd.Series)) or (
isinstance(key, tuple)
and (
Expand All @@ -310,12 +377,15 @@ def get_info(self, key):
)
)
):
# assume key is index, or tupe of index and column name
# metadata[Number], metadata[array_like], metadata[Any, str], or metadata[Any, [*str]]
return self._metadata.loc[key]

elif isinstance(key, slice):
# assume key as index slice
# DataFrame's `loc` treats slices differently (inclusive of stop) than numpy
# `iloc` exludes the stop index, like numpy
return self._metadata.iloc[key]
else:
# we don't allow indexing columns with numbers
# we don't allow indexing columns with numbers, e.g. metadata[0,0]
raise IndexError(f"Unknown metadata index {key}")
2 changes: 1 addition & 1 deletion pynapple/core/time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -956,8 +956,8 @@ def __init__(
self._class_attributes = self.__dir__()
self._class_attributes.append("_class_attributes")
# set metadata
self.set_info(metadata)
self._initialized = True
self.set_info(metadata)

@property
def loc(self):
Expand Down
4 changes: 2 additions & 2 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,8 +445,8 @@ def test_tsdframe_metadata_slicing(tsdframe_meta):
pytest.raises(ValueError, match="Invalid metadata name"),
# shape mismatch with setitem
pytest.raises(ValueError),
# key error with get_info
pytest.raises(KeyError),
# assertion error with get_info
pytest.raises(AssertionError),
# attribute should raise error
pytest.raises(AttributeError),
# key should not match metadata
Expand Down

0 comments on commit 2904388

Please sign in to comment.