Skip to content

Commit

Permalink
ingester progress tracking work
Browse files Browse the repository at this point in the history
  • Loading branch information
dmichaels-harvard committed Mar 30, 2024
1 parent 9e78b28 commit 499c05a
Showing 1 changed file with 26 additions and 26 deletions.
52 changes: 26 additions & 26 deletions dcicutils/structured_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,8 @@ def get_counts() -> int:
diffs = {}
if callable(progress):
ntypes, nobjects = get_counts()
progress({PROGRESS.ANALYZE_START.value: PROGRESS.NOW(),
PROGRESS.ANALYZE_COUNT_TYPES.value: ntypes, PROGRESS.ANALYZE_COUNT_ITEMS.value: nobjects})
progress({PROGRESS.ANALYZE_START: PROGRESS.NOW(),
PROGRESS.ANALYZE_COUNT_TYPES: ntypes, PROGRESS.ANALYZE_COUNT_ITEMS: nobjects})
if self.data or self.portal: # TODO: what is this OR biz?
refs = self.resolved_refs_with_uuids
# TODO: Need feedback/progress tracking mechanism here.
Expand All @@ -212,20 +212,20 @@ def get_counts() -> int:
uuid=existing_object.uuid,
diffs=object_diffs or None))
if callable(progress):
progress({PROGRESS.ANALYZE_UPDATE.value: True,
PROGRESS.ANALYZE_COUNT_LOOKUP.value: nlookups + nlookups_compare})
progress({PROGRESS.ANALYZE_UPDATE: True,
PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups + nlookups_compare})
elif identifying_path:
# If there is no existing object we still create a record for this object
# but with no uuid which will be the indication that it does not exist.
diffs[type_name].append(create_readonly_object(path=identifying_path, uuid=None, diffs=None))
if callable(progress):
progress({PROGRESS.ANALYZE_CREATE.value: True,
PROGRESS.ANALYZE_COUNT_LOOKUP.value: nlookups})
progress({PROGRESS.ANALYZE_CREATE: True,
PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups})
else:
if callable(progress):
progress({PROGRESS.ANALYZE_COUNT_LOOKUP.value: nlookups})
progress({PROGRESS.ANALYZE_COUNT_LOOKUP: nlookups})
if callable(progress):
progress({PROGRESS.ANALYZE_DONE.value: PROGRESS.NOW()})
progress({PROGRESS.ANALYZE_DONE: PROGRESS.NOW()})
return diffs

def load_file(self, file: str) -> None:
Expand Down Expand Up @@ -272,8 +272,8 @@ def get_counts() -> Tuple[int, int]:
return nrows, len(excel.sheet_names)
if self._progress: # TODO: Move to _load_reader
nrows, nsheets = get_counts()
self._progress({PROGRESS.LOAD_START.value: PROGRESS.NOW(),
PROGRESS.LOAD_COUNT_SHEETS.value: nsheets, PROGRESS.LOAD_COUNT_ROWS.value: nrows})
self._progress({PROGRESS.LOAD_START: PROGRESS.NOW(),
PROGRESS.LOAD_COUNT_SHEETS: nsheets, PROGRESS.LOAD_COUNT_ROWS: nrows})
excel = Excel(file) # Order the sheet names by any specified ordering (e.g. ala snovault.loadxl).
order = {Schema.type_name(key): index for index, key in enumerate(self._order)} if self._order else {}
for sheet_name in sorted(excel.sheet_names, key=lambda key: order.get(Schema.type_name(key), sys.maxsize)):
Expand All @@ -298,14 +298,14 @@ def get_counts() -> Tuple[int, int]:
del self._errors["ref"]
if self._progress:
self._progress({ # TODO: Refactor with same thing below in _load_reader.
PROGRESS.LOAD_DONE.value: PROGRESS.NOW(),
PROGRESS.LOAD_COUNT_REFS.value: self.ref_total_count,
PROGRESS.LOAD_COUNT_REFS_FOUND.value: self.ref_total_found_count,
PROGRESS.LOAD_COUNT_REFS_NOT_FOUND.value: self.ref_total_notfound_count,
PROGRESS.LOAD_COUNT_REFS_LOOKUP.value: self.ref_lookup_count,
PROGRESS.LOAD_COUNT_REFS_LOOKUP_CACHE_HIT.value: self.ref_lookup_cache_hit_count,
PROGRESS.LOAD_COUNT_REFS_EXISTS_CACHE_HIT.value: self.ref_exists_cache_hit_count,
PROGRESS.LOAD_COUNT_REFS_INVALID.value: self.ref_invalid_identifying_property_count
PROGRESS.LOAD_DONE: PROGRESS.NOW(),
PROGRESS.LOAD_COUNT_REFS: self.ref_total_count,
PROGRESS.LOAD_COUNT_REFS_FOUND: self.ref_total_found_count,
PROGRESS.LOAD_COUNT_REFS_NOT_FOUND: self.ref_total_notfound_count,
PROGRESS.LOAD_COUNT_REFS_LOOKUP: self.ref_lookup_count,
PROGRESS.LOAD_COUNT_REFS_LOOKUP_CACHE_HIT: self.ref_lookup_cache_hit_count,
PROGRESS.LOAD_COUNT_REFS_EXISTS_CACHE_HIT: self.ref_exists_cache_hit_count,
PROGRESS.LOAD_COUNT_REFS_INVALID: self.ref_invalid_identifying_property_count
})

def _load_json_file(self, file: str) -> None:
Expand Down Expand Up @@ -335,14 +335,14 @@ def _load_reader(self, reader: RowReader, type_name: str) -> None:
self._add(type_name, structured_row)
if self._progress:
self._progress({
PROGRESS.LOAD_ITEM.value: True,
PROGRESS.LOAD_COUNT_REFS.value: self.ref_total_count,
PROGRESS.LOAD_COUNT_REFS_FOUND.value: self.ref_total_found_count,
PROGRESS.LOAD_COUNT_REFS_NOT_FOUND.value: self.ref_total_notfound_count,
PROGRESS.LOAD_COUNT_REFS_LOOKUP.value: self.ref_lookup_count,
PROGRESS.LOAD_COUNT_REFS_LOOKUP_CACHE_HIT.value: self.ref_lookup_cache_hit_count,
PROGRESS.LOAD_COUNT_REFS_EXISTS_CACHE_HIT.value: self.ref_exists_cache_hit_count,
PROGRESS.LOAD_COUNT_REFS_INVALID.value: self.ref_invalid_identifying_property_count
PROGRESS.LOAD_ITEM: True,
PROGRESS.LOAD_COUNT_REFS: self.ref_total_count,
PROGRESS.LOAD_COUNT_REFS_FOUND: self.ref_total_found_count,
PROGRESS.LOAD_COUNT_REFS_NOT_FOUND: self.ref_total_notfound_count,
PROGRESS.LOAD_COUNT_REFS_LOOKUP: self.ref_lookup_count,
PROGRESS.LOAD_COUNT_REFS_LOOKUP_CACHE_HIT: self.ref_lookup_cache_hit_count,
PROGRESS.LOAD_COUNT_REFS_EXISTS_CACHE_HIT: self.ref_exists_cache_hit_count,
PROGRESS.LOAD_COUNT_REFS_INVALID: self.ref_invalid_identifying_property_count
})
self._note_warning(reader.warnings, "reader")
if schema:
Expand Down

0 comments on commit 499c05a

Please sign in to comment.