Decompress data, fix logic when in reprocessing mode

softboiler · Feb 1, 2024 · 2b2c2bc · 2b2c2bc
1 parent 73dbbf5
commit 2b2c2bc
Show file tree

Hide file tree

Showing 5 changed files with 34 additions and 31 deletions.
diff --git a/dvc.lock b/dvc.lock
@@ -47,13 +47,13 @@ stages:
       nfiles: 321
     - path: src/boilercv/stages/preview/preview_binarized.py
       hash: md5
-      md5: 81189ca19c9c07cd8c80178611716f88
-      size: 835
+      md5: 3a85ed446eed5b2a8bf5be2ec6d16d08
+      size: 860
     outs:
     - path: data/previews/binarized.nc
       hash: md5
-      md5: a254f85beabe041bf6c4b0820024465d
-      size: 3446443
+      md5: 89d743d2211d82197a19ef1c0b752748
+      size: 3494758
   contours:
     cmd: python -m boilercv.stages.contours
     deps:
@@ -84,8 +84,8 @@ stages:
       nfiles: 321
     - path: src/boilercv/stages/fill.py
       hash: md5
-      md5: ba2f34dbad23ecf56873a5841627cb0b
-      size: 1518
+      md5: 1a6b4931f599ed8046e1c66a70543159
+      size: 1547
     outs:
     - path: data/filled
       hash: md5
@@ -236,8 +236,8 @@ stages:
       nfiles: 321
     - path: src/boilercv/stages/find_contours.py
       hash: md5
-      md5: 26bf1bf4419d40f1c90d771a6aaea0fa
-      size: 2966
+      md5: 33a3d4f607e2f0f2d5cf281847d51daf
+      size: 3041
     outs:
     - path: data/contours
       hash: md5
@@ -259,13 +259,13 @@ stages:
       nfiles: 321
     - path: src/boilercv/stages/preview/preview_filled.py
       hash: md5
-      md5: a798bfac9a99b5336d32d2c78511979d
-      size: 740
+      md5: f6176731c20cb880252d783590ff4bb3
+      size: 792
     outs:
     - path: data/previews/filled.nc
       hash: md5
-      md5: f3596d4b5c08d8701b7630f1136c45aa
-      size: 1661392
+      md5: d10a4641316a66e0fbd3a07101b46f81
+      size: 1669805
   preview_gray:
     cmd: python src/boilercv/stages/preview/preview_gray.py
     deps:
@@ -276,13 +276,13 @@ stages:
       nfiles: 321
     - path: src/boilercv/stages/preview/preview_gray.py
       hash: md5
-      md5: 951d853ba52d4659c073adb3121fbf10
-      size: 741
+      md5: 6e5f3f5933638d3293c066a7eb419668
+      size: 793
     outs:
     - path: data/previews/gray.nc
       hash: md5
-      md5: bca01dd9d72e5a7c25db766c5522652e
-      size: 18686615
+      md5: 95e0dfeb6743c8a477b3f9624a1b9b96
+      size: 18888256
   find_unobstructed:
     cmd: python src/boilercv/stages/find_unobstructed.py
     deps:
@@ -293,8 +293,8 @@ stages:
       nfiles: 321
     - path: src/boilercv/stages/find_unobstructed.py
       hash: md5
-      md5: ba3378329afb30f7be3ca3c56a4b0bb2
-      size: 225
+      md5: 4f3a6655839d40e92d84847236d1b487
+      size: 236
     outs:
     - path: data/unobstructed
       hash: md5
@@ -311,8 +311,8 @@ stages:
       nfiles: 1
     - path: src/boilercv/stages/find_tracks.py
       hash: md5
-      md5: 2d497c6b2161f8056a0eb2ccf4c19fe8
-      size: 163
+      md5: cf733a0a3c212066f287840ff5051d71
+      size: 174
     outs:
     - path: data/tracks
       hash: md5
@@ -329,12 +329,12 @@ stages:
       nfiles: 1
     - path: src/boilercv/correlations.py
       hash: md5
-      md5: d66b32fc1a05ef32a6eb51c9182548e3
-      size: 2856
+      md5: 425fdb56946ddc30b7cb692cecb31b17
+      size: 2949
     - path: src/boilercv/stages/compare_theory.py
       hash: md5
-      md5: ac742c69d72e2960d65ac1636ae13ac1
-      size: 208
+      md5: d0f384e7aff32751d19f9b7a4999982c
+      size: 219
     outs:
     - path: data/lifetimes
       hash: md5
@@ -380,6 +380,6 @@ stages:
     deps:
     - path: src/boilercv/stages/experiments
       hash: md5
-      md5: f5ddea1afcef324293572aeaf7442707.dir
-      size: 64376
-      nfiles: 10
+      md5: 543e2aa21e63fc2805fda403e9cef8e7.dir
+      size: 91098
+      nfiles: 11
diff --git a/src/boilercv/stages/fill.py b/src/boilercv/stages/fill.py
@@ -2,6 +2,7 @@
 
 import xarray as xr
 from loguru import logger
+from tqdm import tqdm
 
 from boilercv.data import ROI, VIDEO
 from boilercv.data.packing import pack
@@ -15,7 +16,7 @@
 def main():
     destination = PARAMS.paths.filled
     with process_datasets(destination) as videos_to_process:
-        for name in videos_to_process:
+        for name in tqdm(videos_to_process):
             df = get_contours_df(name)
             source_ds = get_dataset(name)
             ds = xr.zeros_like(source_ds, dtype=source_ds[VIDEO].dtype)

diff --git a/src/boilercv/stages/preview/__init__.py b/src/boilercv/stages/preview/__init__.py
@@ -45,7 +45,7 @@ def new_videos_to_preview(
         received_previews = list(received_previews.values())
         new_ds = get_preview_ds(received_video_names, received_previews)
 
-        if destination.exists():
+        if not reprocess and destination.exists():
             with xr.open_dataset(destination) as existing_ds:
                 if new_ds[VIDEO].shape == existing_ds[VIDEO].shape:
                     # Combine datasets if they're the same shape

diff --git a/src/boilercv/stages/preview/preview_filled.py b/src/boilercv/stages/preview/preview_filled.py
@@ -1,6 +1,7 @@
 """Update previews for the filled contours stage."""
 
 from loguru import logger
+from tqdm import tqdm
 
 from boilercv.data import FRAME, VIDEO
 from boilercv.data.sets import get_dataset
@@ -12,7 +13,7 @@ def main():
     stage = "filled"
     destination = PARAMS.paths.filled_preview
     with new_videos_to_preview(destination) as videos_to_preview:
-        for video_name in videos_to_preview:
+        for video_name in tqdm(videos_to_preview):
             ds = get_dataset(video_name, stage=stage, num_frames=1)
             videos_to_preview[video_name] = ds[VIDEO].isel({FRAME: 0}).values
 

diff --git a/src/boilercv/stages/preview/preview_gray.py b/src/boilercv/stages/preview/preview_gray.py
@@ -1,6 +1,7 @@
 """Update previews for grayscale videos."""
 
 from loguru import logger
+from tqdm import tqdm
 
 from boilercv.data import FRAME, VIDEO
 from boilercv.data.sets import get_dataset
@@ -12,7 +13,7 @@ def main():
     stage = "large_sources"
     destination = PARAMS.paths.gray_preview
     with new_videos_to_preview(destination) as videos_to_preview:
-        for video_name in videos_to_preview:
+        for video_name in tqdm(videos_to_preview):
             if ds := get_dataset(video_name, stage=stage, num_frames=1):
                 videos_to_preview[video_name] = ds[VIDEO].isel({FRAME: 0}).values