Skip to content

Commit

Permalink
Support datetime, date decimal in hash_datas
Browse files Browse the repository at this point in the history
  • Loading branch information
Erotemic committed Sep 8, 2024
1 parent e4b6eaf commit db77869
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ This project (loosely) adheres to [Semantic Versioning](https://semver.org/spec/

### Changed
* Added module name printout to `schedule_deprecation`
* `ub.hash_data` now supports `datatime.datetime`, `datetime.date`, `decimal.Decimal` objects.


## Version 1.3.6 - Released 2024-06-08
Expand Down
57 changes: 55 additions & 2 deletions tests/test_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,9 +319,7 @@ def test_numpy_random_state():
if np is None:
pytest.skip('requires numpy')
data = np.random.RandomState(0)
# assert ub.hash_data(data).startswith('ujsidscotcycsqwnkxgbsxkcedplzvytmfmr')
assert ub.hash_data(data, hasher='sha512', types=True, base='abc').startswith('snkngbxghabesvowzalqtvdvjtvslmxve')
# _hashable_sequence(data)


def test_uuid():
Expand All @@ -335,6 +333,61 @@ def test_uuid():
'the hash should be equal when ignoring types')


def test_decimal():
import decimal
data = decimal.Decimal('3.1415')
sequence = b''.join(_hashable_sequence(data, types=True))
assert sequence == b'DECIMAL_[_INT\x00_,__[_INT\x03_,_INT\x01_,_INT\x04_,_INT\x01_,_INT\x05_,__]_INT\xfc_,__]_'
assert ub.hash_data(data, types=True, base='abc', hasher='sha512').startswith('oquwtvtrsytm')
assert ub.hash_data(data.as_tuple(), types=True) != ub.hash_data(data, types=True), (
'the fact that it is a Decimal should reflect in the hash')
assert ub.hash_data(data.as_tuple(), types=True) == ub.hash_data(data, types=False), (
'it is a quirk of our hashable extensions that an a typed decimal '
'tuple will be the same as an untyped decimal. '
'It is ok to break this test if we refactor to fix issues in '
'hashable extensions'
)
sequence1 = b''.join(_hashable_sequence(data, types=True))
sequence2 = b''.join(_hashable_sequence(data, types=False))
sequence3 = b''.join(_hashable_sequence(data.as_tuple(), types=True))
sequence4 = b''.join(_hashable_sequence(data.as_tuple(), types=False))
assert sequence1 != sequence2, 'quirky test'
assert sequence2 == sequence3, 'quirky test'
assert sequence4 != sequence3, 'quirky test'


def test_datetime():
import datetime as datetime_mod
data = datetime_mod.datetime(2101, 1, 1)
sequence = b''.join(_hashable_sequence(data, types=True))
assert sequence == b'DATETIME_[_INT\x085_,_INT\x01_,_INT\x01_,_INT\x00_,_INT\x00_,_INT\x00_,_INT\x05_,_INT\x01_,_INT\xff_,__]_'
assert ub.hash_data(data, types=True, base='abc', hasher='sha512').startswith('fwjyfdtgcdasv')
assert ub.hash_data(data.timetuple(), types=True) != ub.hash_data(data, types=True), (
'the fact that it is a Decimal should reflect in the hash')
assert ub.hash_data(data.timetuple(), types=True) == ub.hash_data(data, types=False), (
'it is a quirk of our hashable extensions that an a typed datetime '
'tuple will be the same as an untyped decimal. '
'It is ok to break this test if we refactor to fix issues in '
'hashable extensions'
)


def test_date():
import datetime as datetime_mod
data = datetime_mod.date(2101, 1, 1)
sequence = b''.join(_hashable_sequence(data, types=True))
assert sequence == b'DATE_[_INT\x085_,_INT\x01_,_INT\x01_,_INT\x00_,_INT\x00_,_INT\x00_,_INT\x05_,_INT\x01_,_INT\xff_,__]_'
assert ub.hash_data(data, types=True, base='abc', hasher='sha512').startswith('dlahlcoqypecc')
assert ub.hash_data(data.timetuple(), types=True) != ub.hash_data(data, types=True), (
'the fact that it is a Decimal should reflect in the hash')
assert ub.hash_data(data.timetuple(), types=True) == ub.hash_data(data, types=False), (
'it is a quirk of our hashable extensions that an a typed date'
'tuple will be the same as an untyped decimal. '
'It is ok to break this test if we refactor to fix issues in '
'hashable extensions'
)


def test_hash_data_custom_base():
data = 1
# A larger base means the string can be shorter
Expand Down
50 changes: 47 additions & 3 deletions ubelt/util_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ def b(s):
DEFAULT_HASHER = hashlib.sha512 # type: Callable


# This controls if types are used when generating hashable sequences for more
# complex objects. Currently there is no way for the user to control this, and
# that might need to addressed, but it will require some thought.
_COMPATIBLE_HASHABLE_SEQUENCE_TYPES_DEFAULT = True


Expand Down Expand Up @@ -558,9 +561,12 @@ def lookup(self, data):
# of strictly using this registry.
hash_func = self._hash_dispatch.dispatch(query_hash_type)
if getattr(hash_func, '__is_base__', False):
raise TypeError(
'No registered hash func for hashable type={!r}'.format(
query_hash_type))
base_msg = f'No registered hash func for hashable type={query_hash_type!r}'
try:
msg = f'{base_msg} with mro: {query_hash_type.__mro__}'
except AttributeError:
msg = base_msg
raise TypeError(msg)
return hash_func

def add_iterable_check(self, func):
Expand Down Expand Up @@ -726,9 +732,14 @@ def _register_builtin_class_extensions(self):
cc21b9fa
bd1cabd0
"""
# TODO: can we only register a stdlib class if we need it?
# Some of the stdlib modules dont need to be imported and
# cause extra import time overhead.
import uuid
import pathlib
import numbers
import decimal
import datetime as datetime_mod

@self.register(numbers.Integral)
def _convert_numpy_int(data):
Expand All @@ -738,6 +749,39 @@ def _convert_numpy_int(data):
def _convert_numpy_float(data):
return _convert_to_hashable(float(data), extensions=self)

@self.register(decimal.Decimal)
def _convert_decimal(data):
_hashable_sequence
seq = _hashable_sequence(
data.as_tuple(),
extensions=self,
types=_COMPATIBLE_HASHABLE_SEQUENCE_TYPES_DEFAULT)
hashable = b''.join(seq)
prefix = b'DECIMAL'
return prefix, hashable

@self.register(datetime_mod.date)
def _convert_date(data):
_hashable_sequence
seq = _hashable_sequence(
data.timetuple(),
extensions=self,
types=_COMPATIBLE_HASHABLE_SEQUENCE_TYPES_DEFAULT)
hashable = b''.join(seq)
prefix = b'DATE'
return prefix, hashable

@self.register(datetime_mod.datetime)
def _convert_datetime(data):
_hashable_sequence
seq = _hashable_sequence(
data.timetuple(),
extensions=self,
types=_COMPATIBLE_HASHABLE_SEQUENCE_TYPES_DEFAULT)
hashable = b''.join(seq)
prefix = b'DATETIME'
return prefix, hashable

@self.register(uuid.UUID)
def _convert_uuid(data):
hashable = data.bytes
Expand Down

0 comments on commit db77869

Please sign in to comment.