From ce32ad5c12b52e0dfb2a29af8bc85fab3fdfadd3 Mon Sep 17 00:00:00 2001 From: David Pruitt Date: Thu, 14 Nov 2024 12:32:51 -0800 Subject: [PATCH 1/8] Add constant scaling to healpix dataloader --- .../datapipes/healpix/timeseries_dataset.py | 62 ++++++++++++++----- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/modulus/datapipes/healpix/timeseries_dataset.py b/modulus/datapipes/healpix/timeseries_dataset.py index fed380256f..0a4cb330b9 100644 --- a/modulus/datapipes/healpix/timeseries_dataset.py +++ b/modulus/datapipes/healpix/timeseries_dataset.py @@ -19,6 +19,7 @@ import warnings from dataclasses import dataclass from typing import Optional, Sequence, Union +import warnings import numpy as np import pandas as pd @@ -189,9 +190,28 @@ def __init__( self.input_scaling = None self.target_scaling = None + self.constant_scaling = None + self.constants = None if self.scaling: self._get_scaling_da() + # setup constants + if "constants" in self.ds.data_vars: + # extract from ds: + const = self.ds.constants.values + + # rescale if needed + if self.constant_scaling: + const = ( + const - self.constant_scaling["mean"] + ) / self.constant_scaling["std"] + + # transpose to match new format: + # [C, F, H, W] -> [F, C, H, W] + self.constants = np.transpose(const[0], axes=(1, 0, 2, 3)) + + self.get_constants() + def get_constants(self): """Returns the constants used in this dataset @@ -199,14 +219,7 @@ def get_constants(self): ------- np.ndarray: The list of constants, None if there are no constants """ - # extract from ds: - const = self.ds.constants.values - - # transpose to match new format: - # [C, F, H, W] -> [F, C, H, W] - const = np.transpose(const, axes=(1, 0, 2, 3)) - - return const + return self.constants @staticmethod def _convert_time_step(dt): # pylint: disable=invalid-name @@ -244,9 +257,9 @@ def _get_scaling_da(self): ), } except (ValueError, KeyError): + missing = [m for m in self.ds.channel_in.values if m not in list(self.scaling.keys())] raise KeyError( - f"one or more of the input data variables f{list(self.ds.channel_in)} not found in the " - f"scaling config dict data.scaling ({list(self.scaling.keys())})" + f"Input channels {missing} not found in the scaling config dict data.scaling ({list(self.scaling.keys())})" ) try: self.target_scaling = scaling_da.sel( @@ -261,11 +274,32 @@ def _get_scaling_da(self): ), } except (ValueError, KeyError): + missing = [m for m in self.ds.channel_out.values if m not in list(self.scaling.keys())] raise KeyError( - f"one or more of the target data variables f{list(self.ds.channel_out)} not found in the " - f"scaling config dict data.scaling ({list(self.scaling.keys())})" + f"Target channels {missing} not found in the scaling config dict data.scaling ({list(self.scaling.keys())})" ) + try: + self.constant_scaling = scaling_da.sel( + index=self.ds.channel_c.values + ).rename({"index": "channel_out"}) + self.constant_scaling = { + "mean": np.expand_dims( + self.constant_scaling["mean"].to_numpy(), (0, 2, 3, 4) + ), + "std": np.expand_dims( + self.constant_scaling["std"].to_numpy(), (0, 2, 3, 4) + ), + } + except (ValueError, KeyError): + missing = [m for m in self.ds.channel_c.values if m not in list(self.scaling.keys())] + # only trigger if constant scale values are provided + if len(missing) != len(self.ds.channel_c.values): + raise KeyError( + f"Constant channels {missing} not found in the scaling config dict data.scaling ({list(self.scaling.keys())})" + ) + + def __len__(self): """Get number of samples in the dataset Returns @@ -428,9 +462,9 @@ def __getitem__(self, item): np.transpose(x, axes=(0, 3, 1, 2, 4, 5)) for x in inputs_result ] - if "constants" in self.ds.data_vars: + if self.constants is not None: # Add the constants as [F, C, H, W] - inputs_result.append(np.swapaxes(self.ds.constants.values, 0, 1)) + inputs_result.append(self.constants) logger.log(5, "computed batch in %0.2f s", time.time() - compute_time) torch.cuda.nvtx.range_pop() From 12c13da9c231fa5fbf31fda46746a923b6aabc74 Mon Sep 17 00:00:00 2001 From: David Pruitt Date: Thu, 14 Nov 2024 12:44:02 -0800 Subject: [PATCH 2/8] Add constant scaling to coupled hpx dataset --- modulus/datapipes/healpix/coupledtimeseries_dataset.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/modulus/datapipes/healpix/coupledtimeseries_dataset.py b/modulus/datapipes/healpix/coupledtimeseries_dataset.py index 99d0343adc..732d947e04 100644 --- a/modulus/datapipes/healpix/coupledtimeseries_dataset.py +++ b/modulus/datapipes/healpix/coupledtimeseries_dataset.py @@ -305,11 +305,10 @@ def __getitem__(self, item): np.transpose(x, axes=(0, 3, 1, 2, 4, 5)) for x in inputs_result ] - if "constants" in self.ds.data_vars: + + if self.constants is not None: # Add the constants as [F, C, H, W] - inputs_result.append(np.swapaxes(self.ds.constants.values, 0, 1)) - # inputs_result.append(self.ds.constants.values) - logger.log(5, "computed batch in %0.2f s", time.time() - compute_time) + inputs_result.append(self.constants) # append integrated couplings inputs_result.append(integrated_couplings) From 078e288df9698161a01780b7a81d48ab7b6f6f5e Mon Sep 17 00:00:00 2001 From: David Pruitt Date: Thu, 21 Nov 2024 05:35:58 +0000 Subject: [PATCH 3/8] update tests for constant scaling --- test/datapipes/test_healpix.py | 15 ++++++++--- test/datapipes/test_healpix_couple.py | 38 +++++++++++++++------------ 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/test/datapipes/test_healpix.py b/test/datapipes/test_healpix.py index 73216be186..9054d8a0aa 100644 --- a/test/datapipes/test_healpix.py +++ b/test/datapipes/test_healpix.py @@ -72,7 +72,10 @@ def scaling_dict(): "z1000": {"mean": 952.1435546875, "std": 895.7516479492188}, "z250": {"mean": 101186.28125, "std": 5551.77978515625}, "z500": {"mean": 55625.9609375, "std": 2681.712890625}, + "lsm": {"mean": 0, "std": 1}, + "z": {"mean": 0, "std": 1}, "tp6": {"mean": 1, "std": 0, "log_epsilon": 1e-6}, + "extra": {"mean": 1, "std": 0}, } return DictConfig(scaling) @@ -88,6 +91,9 @@ def scaling_double_dict(): "z250": {"mean": 0, "std": 2}, "z500": {"mean": 0, "std": 2}, "tp6": {"mean": 0, "std": 2, "log_epsilon": 1e-6}, + "lsm": {"mean": 0, "std": 2}, + "z": {"mean": 0, "std": 2}, + "extra": {"mean": 0, "std": 2}, } return DictConfig(scaling) @@ -212,7 +218,7 @@ def test_TimeSeriesDataset_initialization( "bogosity": {"mean": 0, "std": 42}, } ) - with pytest.raises(KeyError, match=("one or more of the input data variables")): + with pytest.raises(KeyError, match=("Input channels ")): timeseries_ds = TimeSeriesDataset( dataset=zarr_ds, data_time_step="3h", @@ -472,7 +478,7 @@ def test_TimeSeriesDataModule_initialization( scaling=scaling_double_dict, ) assert isinstance(timeseries_dm, TimeSeriesDataModule) - + print(f"trying to create {create_path} {dataset_name}") # without the prebuilt dataset timeseries_dm = TimeSeriesDataModule( src_directory=create_path, @@ -550,7 +556,8 @@ def test_TimeSeriesDataModule_get_constants( # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) - expected = np.transpose(zarr_ds.constants.values, axes=(1, 0, 2, 3)) + # dividing by 2 due to scaling + expected = np.transpose(zarr_ds.constants.values, axes=(1, 0, 2, 3)) / 2 assert np.array_equal( timeseries_dm.get_constants(), @@ -617,7 +624,7 @@ def test_TimeSeriesDataModule_get_dataloaders( test_dataloader, test_sampler = timeseries_dm.test_dataloader(num_shards=1) assert test_sampler is None assert isinstance(test_dataloader, DataLoader) - print(f"dataset lenght {len}") + # with >1 shard should be distributed sampler train_dataloader, train_sampler = timeseries_dm.train_dataloader(num_shards=2) assert isinstance(train_sampler, DistributedSampler) diff --git a/test/datapipes/test_healpix_couple.py b/test/datapipes/test_healpix_couple.py index 833827c9ef..957175d29c 100644 --- a/test/datapipes/test_healpix_couple.py +++ b/test/datapipes/test_healpix_couple.py @@ -71,6 +71,8 @@ def scaling_dict(): "z1000": {"mean": 952.1435546875, "std": 895.7516479492188}, "z250": {"mean": 101186.28125, "std": 5551.77978515625}, "z500": {"mean": 55625.9609375, "std": 2681.712890625}, + "lsm": {"mean": 0, "std": 1}, + "z": {"mean": 0, "std": 1}, "tp6": {"mean": 1, "std": 0, "log_epsilon": 1e-6}, } return DictConfig(scaling) @@ -86,6 +88,8 @@ def scaling_double_dict(): "z1000": {"mean": 0, "std": 2}, "z250": {"mean": 0, "std": 2}, "z500": {"mean": 0, "std": 2}, + "lsm": {"mean": 0, "std": 2}, + "z": {"mean": 0, "std": 2}, "tp6": {"mean": 0, "std": 2, "log_epsilon": 1e-6}, } return DictConfig(scaling) @@ -94,7 +98,7 @@ def scaling_double_dict(): @nfsdata_or_fail def test_ConstantCoupler(data_dir, dataset_name, scaling_dict, pytestconfig): variables = ["z500", "z1000"] - input_times = ["0H"] + input_times = ["0h"] input_time_dim = 1 output_time_dim = 1 presteps = 0 @@ -116,7 +120,7 @@ def test_ConstantCoupler(data_dir, dataset_name, scaling_dict, pytestconfig): assert isinstance(coupler, ConstantCoupler) interval = 2 - data_time_step = "3H" + data_time_step = "3h" coupler.compute_coupled_indices(interval, data_time_step) coupled_integration_dim = presteps + max(output_time_dim // input_time_dim, 1) expected = np.empty([batch_size, coupled_integration_dim, len(input_times)]) @@ -144,12 +148,12 @@ def test_ConstantCoupler(data_dir, dataset_name, scaling_dict, pytestconfig): @nfsdata_or_fail def test_TrailingAverageCoupler(data_dir, dataset_name, scaling_dict, pytestconfig): variables = ["z500", "z1000"] - input_times = ["6H", "12H"] + input_times = ["6h", "12h"] input_time_dim = 2 output_time_dim = 2 presteps = 0 batch_size = 2 - averaging_window = "6H" + averaging_window = "6h" # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) @@ -167,7 +171,7 @@ def test_TrailingAverageCoupler(data_dir, dataset_name, scaling_dict, pytestconf assert isinstance(coupler, TrailingAverageCoupler) interval = 2 - data_time_step = "3H" + data_time_step = "3h" coupler.compute_coupled_indices(interval, data_time_step) coupled_integration_dim = presteps + max(output_time_dim // input_time_dim, 1) expected = np.empty([batch_size, coupled_integration_dim, len(input_times)]) @@ -239,7 +243,7 @@ def test_CoupledTimeSeriesDataset_initialization( "bogosity": {"mean": 0, "std": 42}, } ) - with pytest.raises(KeyError, match=("one or more of the input data variables")): + with pytest.raises(KeyError, match=("Input channels ")): timeseries_ds = CoupledTimeSeriesDataset( dataset=zarr_ds, input_variables=variables, @@ -363,7 +367,7 @@ def test_CoupledTimeSeriesDataset_get_constants( "params": { "batch_size": 1, "variables": ["z250"], - "input_times": ["0H"], + "input_times": ["0"], "input_time_dim": 1, "output_time_dim": 1, "presteps": 0, @@ -405,7 +409,7 @@ def test_CoupledTimeSeriesDataset_len( "params": { "batch_size": 1, "variables": ["z250"], - "input_times": ["0H"], + "input_times": ["0h"], "input_time_dim": 1, "output_time_dim": 1, "presteps": 0, @@ -431,7 +435,7 @@ def test_CoupledTimeSeriesDataset_len( "params": { "batch_size": 2, "variables": ["z250"], - "input_times": ["0H"], + "input_times": ["0h"], "input_time_dim": 1, "output_time_dim": 1, "presteps": 0, @@ -486,7 +490,7 @@ def test_CoupledTimeSeriesDataset_get( "params": { "batch_size": batch_size, "variables": ["z250"], - "input_times": ["0H"], + "input_times": ["0h"], "input_time_dim": 1, "output_time_dim": 1, "presteps": 0, @@ -624,7 +628,7 @@ def test_CoupledTimeSeriesDataModule_initialization( "params": { "batch_size": 1, "variables": ["z250"], - "input_times": ["0H"], + "input_times": ["0h"], "input_time_dim": 1, "output_time_dim": 1, "presteps": 0, @@ -718,7 +722,7 @@ def test_CoupledTimeSeriesDataModule_get_constants( "params": { "batch_size": 1, "variables": ["z250"], - "input_times": ["0H"], + "input_times": ["0h"], "input_time_dim": 1, "output_time_dim": 1, "presteps": 0, @@ -759,7 +763,7 @@ def test_CoupledTimeSeriesDataModule_get_constants( # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) - expected = np.transpose(zarr_ds.constants.values, axes=(1, 0, 2, 3)) + expected = np.transpose(zarr_ds.constants.values, axes=(1, 0, 2, 3)) / 2.0 assert np.array_equal( timeseries_dm.get_constants(), @@ -807,7 +811,7 @@ def test_CoupledTimeSeriesDataModule_get_dataloaders( "params": { "batch_size": 1, "variables": ["z250"], - "input_times": ["0H"], + "input_times": ["0h"], "input_time_dim": 1, "output_time_dim": 1, "presteps": 0, @@ -870,7 +874,7 @@ def test_CoupledTimeSeriesDataModule_get_coupled_vars( "params": { "batch_size": 1, "variables": ["z250"], - "input_times": ["0H"], + "input_times": ["0h"], "input_time_dim": 1, "output_time_dim": 1, "presteps": 0, @@ -905,8 +909,8 @@ def test_CoupledTimeSeriesDataModule_get_coupled_vars( "params": { "batch_size": 1, "variables": ["z250"], - "input_times": ["6H"], - "averaging_window": "6H", + "input_times": ["6h"], + "averaging_window": "6h", "input_time_dim": 1, "output_time_dim": 1, "presteps": 0, From 9bee1c5d2837289047e0b28696833086c7096ba1 Mon Sep 17 00:00:00 2001 From: David Pruitt Date: Thu, 21 Nov 2024 05:37:11 +0000 Subject: [PATCH 4/8] Make zarr chunking automatic --- modulus/datapipes/healpix/data_modules.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/modulus/datapipes/healpix/data_modules.py b/modulus/datapipes/healpix/data_modules.py index 50f7284ba5..24af918e6a 100644 --- a/modulus/datapipes/healpix/data_modules.py +++ b/modulus/datapipes/healpix/data_modules.py @@ -119,7 +119,7 @@ def open_time_series_dataset_classic_on_the_fly( file_name = _get_file_name(directory, prefix, variable, suffix) logger.debug("open nc dataset %s", file_name) - ds = xr.open_dataset(file_name, chunks={"sample": batch_size}, autoclose=True) + ds = xr.open_dataset(file_name, autoclose=True) if "LL" in prefix: ds = ds.rename({"lat": "height", "lon": "width"}) @@ -212,7 +212,7 @@ def open_time_series_dataset_classic_prebuilt( if not ds_path.exists(): raise FileNotFoundError(f"Dataset doesn't appear to exist at {ds_path}") - result = xr.open_zarr(ds_path, chunks={"time": batch_size}) + result = xr.open_zarr(ds_path) return result @@ -285,12 +285,10 @@ def create_time_series_dataset_classic( for variable in all_variables: file_name = _get_file_name(src_directory, prefix, variable, suffix) logger.debug("open nc dataset %s", file_name) - if "sample" in list(xr.open_dataset(file_name).dims.keys()): - ds = xr.open_dataset(file_name, chunks={"sample": batch_size}).rename( - {"sample": "time"} - ) + if "sample" in list(xr.open_dataset(file_name).sizes.keys()): + ds = xr.open_dataset(file_name).rename({"sample": "time"}) else: - ds = xr.open_dataset(file_name, chunks={"time": batch_size}) + ds = xr.open_dataset(file_name) if "varlev" in ds.dims: ds = ds.isel(varlev=0) From 8d88c8ece8cd551457b6ef5dca15e6410f1c2671 Mon Sep 17 00:00:00 2001 From: David Pruitt Date: Thu, 21 Nov 2024 05:38:19 +0000 Subject: [PATCH 5/8] Fix constant scaling, remove duplicated code --- .../healpix/coupledtimeseries_dataset.py | 40 +---------- .../datapipes/healpix/timeseries_dataset.py | 67 +++++++++++-------- 2 files changed, 40 insertions(+), 67 deletions(-) diff --git a/modulus/datapipes/healpix/coupledtimeseries_dataset.py b/modulus/datapipes/healpix/coupledtimeseries_dataset.py index 732d947e04..70db9757f3 100644 --- a/modulus/datapipes/healpix/coupledtimeseries_dataset.py +++ b/modulus/datapipes/healpix/coupledtimeseries_dataset.py @@ -157,43 +157,10 @@ def _get_scaling_da(self): scaling_df.loc["zeros"] = {"mean": 0.0, "std": 1.0} scaling_da = scaling_df.to_xarray().astype("float32") + # only thing we do different here is get the scaling for the coupled values for c in self.couplings: c.set_scaling(scaling_da) - # REMARK: we remove the xarray overhead from these - try: - self.input_scaling = scaling_da.sel(index=self.input_variables).rename( - {"index": "channel_in"} - ) - self.input_scaling = { - "mean": np.expand_dims( - self.input_scaling["mean"].to_numpy(), (0, 2, 3, 4) - ), - "std": np.expand_dims( - self.input_scaling["std"].to_numpy(), (0, 2, 3, 4) - ), - } - except (ValueError, KeyError): - raise KeyError( - f"one or more of the input data variables f{list(self.ds.channel_in)} not found in the " - f"scaling config dict data.scaling ({list(self.scaling.keys())})" - ) - try: - self.target_scaling = scaling_da.sel(index=self.input_variables).rename( - {"index": "channel_out"} - ) - self.target_scaling = { - "mean": np.expand_dims( - self.target_scaling["mean"].to_numpy(), (0, 2, 3, 4) - ), - "std": np.expand_dims( - self.target_scaling["std"].to_numpy(), (0, 2, 3, 4) - ), - } - except (ValueError, KeyError): - raise KeyError( - f"one or more of the target data variables f{list(self.ds.channel_out)} not found in the " - f"scaling config dict data.scaling ({list(self.scaling.keys())})" - ) + super()._get_scaling_da() def __getitem__(self, item): # start range @@ -251,7 +218,6 @@ def __getitem__(self, item): torch.cuda.nvtx.range_pop() torch.cuda.nvtx.range_push("CoupledTimeSeriesDataset:__getitem__:process_batch") - compute_time = time.time() # Insolation if self.add_insolation: sol = insolation( @@ -305,7 +271,6 @@ def __getitem__(self, item): np.transpose(x, axes=(0, 3, 1, 2, 4, 5)) for x in inputs_result ] - if self.constants is not None: # Add the constants as [F, C, H, W] inputs_result.append(self.constants) @@ -327,7 +292,6 @@ def __getitem__(self, item): return inputs_result, targets def next_integration(self, model_outputs, constants): - inputs_result = [] # grab last few model outputs for re-initialization diff --git a/modulus/datapipes/healpix/timeseries_dataset.py b/modulus/datapipes/healpix/timeseries_dataset.py index 0a4cb330b9..fd395b660f 100644 --- a/modulus/datapipes/healpix/timeseries_dataset.py +++ b/modulus/datapipes/healpix/timeseries_dataset.py @@ -19,7 +19,6 @@ import warnings from dataclasses import dataclass from typing import Optional, Sequence, Union -import warnings import numpy as np import pandas as pd @@ -200,15 +199,14 @@ def __init__( # extract from ds: const = self.ds.constants.values - # rescale if needed - if self.constant_scaling: - const = ( - const - self.constant_scaling["mean"] - ) / self.constant_scaling["std"] + if self.scaling: + const = (const - self.constant_scaling["mean"]) / self.constant_scaling[ + "std" + ] # transpose to match new format: # [C, F, H, W] -> [F, C, H, W] - self.constants = np.transpose(const[0], axes=(1, 0, 2, 3)) + self.constants = np.transpose(const, axes=(1, 0, 2, 3)) self.get_constants() @@ -257,7 +255,11 @@ def _get_scaling_da(self): ), } except (ValueError, KeyError): - missing = [m for m in self.ds.channel_in.values if m not in list(self.scaling.keys())] + missing = [ + m + for m in self.ds.channel_in.values + if m not in list(self.scaling.keys()) + ] raise KeyError( f"Input channels {missing} not found in the scaling config dict data.scaling ({list(self.scaling.keys())})" ) @@ -274,31 +276,38 @@ def _get_scaling_da(self): ), } except (ValueError, KeyError): - missing = [m for m in self.ds.channel_out.values if m not in list(self.scaling.keys())] + missing = [ + m + for m in self.ds.channel_out.values + if m not in list(self.scaling.keys()) + ] raise KeyError( f"Target channels {missing} not found in the scaling config dict data.scaling ({list(self.scaling.keys())})" ) try: - self.constant_scaling = scaling_da.sel( - index=self.ds.channel_c.values - ).rename({"index": "channel_out"}) - self.constant_scaling = { - "mean": np.expand_dims( - self.constant_scaling["mean"].to_numpy(), (0, 2, 3, 4) - ), - "std": np.expand_dims( - self.constant_scaling["std"].to_numpy(), (0, 2, 3, 4) - ), - } + # not all datasets will have constants + if "constants" in self.ds.data_vars: + self.constant_scaling = scaling_da.sel( + index=self.ds.channel_c.values + ).rename({"index": "channel_out"}) + self.constant_scaling = { + "mean": np.expand_dims( + self.constant_scaling["mean"].to_numpy(), (1, 2, 3) + ), + "std": np.expand_dims( + self.constant_scaling["std"].to_numpy(), (1, 2, 3) + ), + } except (ValueError, KeyError): - missing = [m for m in self.ds.channel_c.values if m not in list(self.scaling.keys())] - # only trigger if constant scale values are provided - if len(missing) != len(self.ds.channel_c.values): - raise KeyError( - f"Constant channels {missing} not found in the scaling config dict data.scaling ({list(self.scaling.keys())})" - ) - + missing = [ + m + for m in self.ds.channel_c.values + if m not in list(self.scaling.keys()) + ] + raise KeyError( + f"Constant channels {missing} not found in the scaling config dict data.scaling ({list(self.scaling.keys())})" + ) def __len__(self): """Get number of samples in the dataset @@ -337,8 +346,8 @@ def _get_time_index(self, item): if self.forecast_mode else (item + 1) * self.batch_size + self._window_length ) - if not self.drop_last and max_index > self.ds.dims["time"]: - batch_size = self.batch_size - (max_index - self.ds.dims["time"]) + if not self.drop_last and max_index > self.ds.sizes["time"]: + batch_size = self.batch_size - (max_index - self.ds.sizes["time"]) else: batch_size = self.batch_size return (start_index, max_index), batch_size From 3da4398d6de5feda4f0412423bcccabe8f2f7efb Mon Sep 17 00:00:00 2001 From: David Pruitt Date: Wed, 20 Nov 2024 21:42:46 -0800 Subject: [PATCH 6/8] Remove debug print statement --- test/datapipes/test_healpix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/datapipes/test_healpix.py b/test/datapipes/test_healpix.py index 9054d8a0aa..7db4af274b 100644 --- a/test/datapipes/test_healpix.py +++ b/test/datapipes/test_healpix.py @@ -478,7 +478,7 @@ def test_TimeSeriesDataModule_initialization( scaling=scaling_double_dict, ) assert isinstance(timeseries_dm, TimeSeriesDataModule) - print(f"trying to create {create_path} {dataset_name}") + # without the prebuilt dataset timeseries_dm = TimeSeriesDataModule( src_directory=create_path, From c19573fb7e2d1788e1365966f0c3d82b10a4811e Mon Sep 17 00:00:00 2001 From: David Pruitt Date: Tue, 3 Dec 2024 20:29:32 +0000 Subject: [PATCH 7/8] remove spurious constant call --- modulus/datapipes/healpix/timeseries_dataset.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/modulus/datapipes/healpix/timeseries_dataset.py b/modulus/datapipes/healpix/timeseries_dataset.py index fd395b660f..a4580c9424 100644 --- a/modulus/datapipes/healpix/timeseries_dataset.py +++ b/modulus/datapipes/healpix/timeseries_dataset.py @@ -208,8 +208,6 @@ def __init__( # [C, F, H, W] -> [F, C, H, W] self.constants = np.transpose(const, axes=(1, 0, 2, 3)) - self.get_constants() - def get_constants(self): """Returns the constants used in this dataset From 6b06f7cb45ae5bbc8b30b708e6a9d030c2246758 Mon Sep 17 00:00:00 2001 From: David Pruitt Date: Tue, 3 Dec 2024 23:25:03 +0000 Subject: [PATCH 8/8] fix variable name --- modulus/datapipes/healpix/timeseries_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modulus/datapipes/healpix/timeseries_dataset.py b/modulus/datapipes/healpix/timeseries_dataset.py index a4580c9424..21b7c31e5b 100644 --- a/modulus/datapipes/healpix/timeseries_dataset.py +++ b/modulus/datapipes/healpix/timeseries_dataset.py @@ -199,7 +199,7 @@ def __init__( # extract from ds: const = self.ds.constants.values - if self.scaling: + if self.constant_scaling: const = (const - self.constant_scaling["mean"]) / self.constant_scaling[ "std" ]