Skip to content

Commit

Permalink
Merge branch 'master' into release
Browse files Browse the repository at this point in the history
  • Loading branch information
venaturum committed Jan 29, 2022
2 parents 43f44e4 + 4e62fd3 commit d8b9b44
Show file tree
Hide file tree
Showing 8 changed files with 868 additions and 579 deletions.
9 changes: 9 additions & 0 deletions docs/release_notes/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ Release notes
========================


**v0.8.0 2022-01-29**

- Added `bins` parameter to :func:`piso.coverage` and :meth:`ArrayAccessor.coverage() <piso.accessor.ArrayAccessor.coverage>`
- Added `how` parameter to :func:`piso.coverage` and :meth:`ArrayAccessor.coverage() <piso.accessor.ArrayAccessor.coverage>`
- Added `result` parameter to :func:`piso.contains` and :meth:`ArrayAccessor.contains() <piso.accessor.ArrayAccessor.contains>`
- Added `how` parameter to :func:`piso.contains` and :meth:`ArrayAccessor.contains() <piso.accessor.ArrayAccessor.contains>`


**v0.7.0 2021-11-20**

Added the following methods
Expand All @@ -28,6 +36,7 @@ The following methods were extended to accommodate intervals with *closed = "bot
- :func:`piso.lookup`
- :func:`piso.isdisjoint` (and :meth:`ArrayAccessor.isdisjoint() <piso.accessor.ArrayAccessor.isdisjoint>`)


**v0.5.0 2021-11-02**

Added the following methods
Expand Down
8 changes: 6 additions & 2 deletions piso/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,12 @@ def issubset(self, *interval_arrays, squeeze=False):
)

@Appender(docstrings.coverage_docstring, join="\n", indents=1)
def coverage(self, domain=None):
def coverage(self, domain=None, bins=False, how="fraction"):
return intervalarray.coverage(
self._interval_array,
domain,
bins,
how,
)

@Appender(docstrings.complement_docstring, join="\n", indents=1)
Expand All @@ -156,11 +158,13 @@ def complement(self, domain=None):
)

@Appender(docstrings.contains_docstring, join="\n", indents=1)
def contains(self, x, include_index=True):
def contains(self, x, include_index=True, result="cartesian", how="any"):
return intervalarray.contains(
self._interval_array,
x,
include_index,
result,
how,
)

@Appender(docstrings.split_docstring, join="\n", indents=1)
Expand Down
80 changes: 68 additions & 12 deletions piso/docstrings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,11 +591,13 @@ def join_params(list_of_param_strings):


coverage_docstring = """
Calculates the fraction of a domain covered by a collection of intervals.
Calculates the size of a domain (or possibly multiple domains) covered by a collection of intervals.
The intervals are contained in the array object the accessor belongs to.
The (possibly overlapping) intervals may not, or partially, or wholly cover the domain.
Calculation over multiple domains is only possible when *bins* = True.
Parameters
----------
domain : :py:class:`tuple`, :class:`pandas.Interval`, :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`, optional
Expand All @@ -604,11 +606,19 @@ def join_params(list_of_param_strings):
If *domain* is a tuple then it should specify lower and upper bounds, and be equivalent to a
:class:`pandas.Interval`. If *domain* is a :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
then the intervals it contains define a possibly disconnected domain.
If *bins* = True then *domain* must be :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray` with disjoint intervals.
bins : boolean, default False
If False, then the *domain* is interpreted as a single domain and returns one value.
If True, then *domain* is interpreted as disjoint bins over which coverage is calculated for each.
how : {"fraction", "sum"}, default "fraction"
If *how* = "fraction" then the result is a fraction of the size of the domain.
If *how* = "sum" then the result is the length of the domain covered.
.. versionadded:: 0.8.0
Returns
----------
float
a number between 0 and 1, representing the fraction of the domain covered.
float or :class:`pandas.Series`
Examples
-----------
Expand All @@ -630,11 +640,24 @@ def join_params(list_of_param_strings):
>>> arr1.piso.coverage(pd.Interval(-10, 10))
0.3
>>> arr1.piso.coverage(pd.Interval(-10, 10), how="sum")
6
>>> domain = pd.arrays.IntervalArray.from_tuples(
... [(4,6), (7, 9)],
... [(4,6), (7, 10)],
... )
>>> arr1.piso.coverage(domain)
0.5
0.4
>>> arr1.piso.coverage(domain, bins=True)
(4, 6] 0.500000
(7, 10] 0.333333
dtype: float64
>>> arr1.piso.coverage(domain, bins=True, how="sum")
(4, 6] 1.0
(7, 10] 1.0
dtype: float64
"""

complement_docstring = """
Expand Down Expand Up @@ -736,25 +759,39 @@ def join_params(list_of_param_strings):


contains_docstring = """
Check pair-wise if a set of intervals, belonging to the object the accessor belongs to, contains a set of values.
Evaluates the intersection of a set of intervals with a set of points.
Returns a 2-dimensional boolean mask *M* of shape *(m,n)* where *m* is the number of intervals, and
*n* is the number of points. The element in the i-th row and j-th column is True if
The format of the result is dependent on the *result* parameter. If *result = "cartesian"* then the
the function returns a 2-dimensional boolean mask *M* of shape *(m,n)* where *m* is the number of
intervals, and *n* is the number of points. The element in the i-th row and j-th column is True if
the i-th interval contains the j-th point.
If *result = "points"* then the result is a 1-dimensional boolean mask of length *n*.
If *result = "intervals"* then the result is a 1-dimensional boolean mask of length *m*.
Parameters
----------
interval_array : :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
Contains the intervals. May be left-closed, right-closed, both, or neither.
x : scalar, or array-like of scalars
Values in *x* should belong to the same domain as the intervals in *interval_array*.
May be left-closed, right-closed, both, or neither.
Values in *x* should belong to the same domain as the intervals contained by the object the
accessor belongs to.
include_index : boolean, default True
Indicates whether to return a :class:`numpy.ndarray` or :class:`pandas.DataFrame` indexed
by *interval_array* and column names equal to *x*
result : {"cartesian", "points", "intervals"}, default "cartesian"
If *result* = "cartesian" then the result will be two dimensional, otherwise it will be
one dimensional.
how : {"any", "all"}, default "any"
Only relevant if *result* is not "cartesian". This parameter indicates either:
- a True value means any or all points are contained within an interval, or
- a True value means any or all intervals contained a point.
Which of these interpretations is dependent on the *result* parameter.
Returns
----------
:class:`numpy.ndarray` or :class:`pandas.DataFrame`
Two dimensional and boolean valued. Return type dependent on *include_index*.
:class:`numpy.ndarray`, :class:`pandas.DataFrame` or :class:`pandas.Series`
One, or two, dimensional and boolean valued. Return type dependent on *include_index* and *result*.
Examples
-----------
Expand All @@ -781,6 +818,25 @@ def join_params(list_of_param_strings):
array([[False, True, True, True],
[False, False, True, True]])
>>> arr.piso.contains([0, 1, 3, 4], result="points")
0 False
1 True
3 True
4 True
dtype: bool
>>> arr.piso.contains([0, 1, 3, 4], result="points", how="all")
0 False
1 False
3 True
4 True
dtype: bool
>>> arr.piso.contains([0, 1, 3, 4], result="intervals")
(0, 4] True
(2, 5] True
dtype: bool
>>> pd.IntervalIndex.from_tuples([(0,2)]).piso.contains(1, include_index=False)
array([[ True]])
"""
Expand Down
75 changes: 65 additions & 10 deletions piso/docstrings/intervalarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,9 @@ def join_params(list_of_param_strings):


coverage_docstring = """
Calculates the fraction of a domain covered by a collection of intervals.
Calculates the fraction of a domain (or possibly multiple domains) covered by a collection of intervals.
Calculation over multiple domains is only possible when *bins* = True.
Parameters
----------
Expand All @@ -609,11 +611,19 @@ def join_params(list_of_param_strings):
If *domain* is a tuple then it should specify lower and upper bounds, and be equivalent to a
:class:`pandas.Interval`. If *domain* is a :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
then the intervals it contains define a possibly disconnected domain.
If *bins* = True then *domain* must be :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray` with disjoint intervals.
bins : boolean, default False
If False, then the *domain* is interpreted as a single domain and returns one value.
If True, then *domain* is interpreted as disjoint bins over which coverage is calculated for each.
how : {"fraction", "sum"}, default "fraction"
If *how* = "fraction" then the result is a fraction of the size of the domain.
If *how* = "sum" then the result is the length of the domain covered.
.. versionadded:: 0.8.0
Returns
----------
float
a number between 0 and 1, representing the fraction of the domain covered.
float or :class:`pandas.Series`
Examples
-----------
Expand All @@ -634,11 +644,24 @@ def join_params(list_of_param_strings):
>>> piso.coverage(arr1, pd.Interval(-10, 10))
0.3
>>> piso.coverage(arr1, pd.Interval(-10, 10), how="sum")
6
>>> domain = pd.arrays.IntervalArray.from_tuples(
... [(4,6), (7, 9)],
... [(4,6), (7, 10)],
... )
>>> piso.coverage(arr1, domain)
0.5
0.4
>>> piso.coverage(arr1, domain, bins=True)
(4, 6] 0.500000
(7, 10] 0.333333
dtype: float64
>>> piso.coverage(arr1, domain, bins=True, how="sum")
(4, 6] 1.0
(7, 10] 1.0
dtype: float64
"""


Expand Down Expand Up @@ -743,12 +766,16 @@ def join_params(list_of_param_strings):


contains_docstring = """
Check pair-wise if a set of intervals contains a set of values
Evaluates the intersection of a set of intervals with a set of points.
Returns a 2-dimensional boolean mask *M* of shape *(m,n)* where *m* is the number of intervals, and
*n* is the number of points. The element in the i-th row and j-th column is True if
The format of the result is dependent on the *result* parameter. If *result = "cartesian"* then the
the function returns a 2-dimensional boolean mask *M* of shape *(m,n)* where *m* is the number of
intervals, and *n* is the number of points. The element in the i-th row and j-th column is True if
the i-th interval contains the j-th point.
If *result = "points"* then the result is a 1-dimensional boolean mask of length *n*.
If *result = "intervals"* then the result is a 1-dimensional boolean mask of length *m*.
Parameters
----------
interval_array : :class:`pandas.IntervalIndex` or :class:`pandas.arrays.IntervalArray`
Expand All @@ -758,11 +785,20 @@ def join_params(list_of_param_strings):
include_index : boolean, default True
Indicates whether to return a :class:`numpy.ndarray` or :class:`pandas.DataFrame` indexed
by *interval_array* and column names equal to *x*
result : {"cartesian", "points", "intervals"}, default "cartesian"
If *result* = "cartesian" then the result will be two dimensional, otherwise it will be
one dimensional.
how : {"any", "all"}, default "any"
Only relevant if *result* is not "cartesian". This parameter indicates either:
- a True value means any or all points are contained within an interval, or
- a True value means any or all intervals contained a point.
Which of these interpretations is dependent on the *result* parameter.
Returns
----------
:class:`numpy.ndarray` or :class:`pandas.DataFrame`
Two dimensional and boolean valued. Return type dependent on *include_index*.
:class:`numpy.ndarray`, :class:`pandas.DataFrame` or :class:`pandas.Series`
One, or two, dimensional and boolean valued. Return type dependent on *include_index* and *result*.
Examples
-----------
Expand All @@ -788,6 +824,25 @@ def join_params(list_of_param_strings):
array([[False, True, True, True],
[False, False, True, True]])
>>> piso.contains(arr, [0, 1, 3, 4], result="points")
0 False
1 True
3 True
4 True
dtype: bool
>>> piso.contains(arr, [0, 1, 3, 4], result="points", how="all")
0 False
1 False
3 True
4 True
dtype: bool
>>> piso.contains(arr, [0, 1, 3, 4], result="intervals")
(0, 4] True
(2, 5] True
dtype: bool
>>> piso.contains(pd.IntervalIndex.from_tuples([(0,2)]), 1, include_index=False)
array([[ True]])
"""
Expand Down
43 changes: 34 additions & 9 deletions piso/intervalarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,15 +178,30 @@ def _get_domain_tuple(interval_array, domain):


@Appender(docstrings.coverage_docstring, join="\n", indents=1)
def coverage(interval_array, domain=None):
def coverage(interval_array, domain=None, bins=False, how="fraction"):
assert how in ("fraction", "sum")

def _validate_domain():
if not isinstance(domain, (pd.IntervalIndex, pd.arrays.IntervalArray)):
raise ValueError(
"If bins argument is true then domain parameter must be a pandas IntervalIndex or IntervalArray."
)
if not isdisjoint(domain):
raise ValueError(
"If bins argument is true then domain parameter must represent disjoint intervals."
)

stepfunction = _interval_x_to_stairs(interval_array).make_boolean()
if isinstance(domain, (pd.IntervalIndex, pd.arrays.IntervalArray)):
if bins:
_validate_domain()
adjusted_domain = stepfunction.slice(pd.IntervalIndex(domain))
elif isinstance(domain, (pd.IntervalIndex, pd.arrays.IntervalArray)):
domain = _interval_x_to_stairs(domain)
result = stepfunction.where(domain).mean()
adjusted_domain = stepfunction.where(domain)
else:
domain = _get_domain_tuple(interval_array, domain)
result = stepfunction.clip(*domain).mean()
return result
adjusted_domain = stepfunction.clip(*domain)
return adjusted_domain.mean() if how == "fraction" else adjusted_domain.integral()


@Appender(docstrings.complement_docstring, join="\n", indents=1)
Expand All @@ -203,7 +218,9 @@ def complement(interval_array, domain=None):


@Appender(docstrings.contains_docstring, join="\n", indents=1)
def contains(interval_array, x, include_index=True):
def contains(interval_array, x, include_index=True, result="cartesian", how="any"):
assert result in ("cartesian", "intervals", "points")
assert how in ("any", "all")
starts = interval_array.left.values
ends = interval_array.right.values
x = pd.Series(x).values
Expand All @@ -213,10 +230,18 @@ def contains(interval_array, x, include_index=True):
left_compare = (
np.greater_equal if interval_array.closed in ("left", "both") else np.greater
)
result = (right_compare.outer(x, ends) & left_compare.outer(x, starts)).transpose()
calc = (right_compare.outer(x, ends) & left_compare.outer(x, starts)).transpose()
if result != "cartesian":
logical_method = np.logical_or if how == "any" else np.logical_and
axis = 0 if result == "points" else 1
calc = logical_method.reduce(calc, axis=axis)
if include_index:
return pd.DataFrame(result, index=interval_array, columns=x)
return result
if result == "cartesian":
calc = pd.DataFrame(calc, index=interval_array, columns=x)
else:
index = x if result == "points" else interval_array
calc = pd.Series(calc, index=index)
return calc


@Appender(docstrings.split_docstring, join="\n", indents=1)
Expand Down
Loading

0 comments on commit d8b9b44

Please sign in to comment.