From 34909cbbd42335c6a7b8258b43cabad0d260b12f Mon Sep 17 00:00:00 2001 From: Simon W Date: Wed, 13 Dec 2023 09:37:22 -0800 Subject: [PATCH 01/17] minimal pytest --- tests/test_integration.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test_integration.py b/tests/test_integration.py index e60b3a871..438f75c5c 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1705,3 +1705,14 @@ def test_unused_future_regressors(): m.add_future_regressor("price") m.add_lagged_regressor("cost") m.fit(df, freq="D") + +def test_on_the_fly_sampling(): + start_date = "2019-01-01" + end_date = "2019-03-01" + date_range = pd.date_range(start=start_date, end=end_date, freq="H") + y = np.random.randint(0, 1000, size=(len(date_range),)) + df = pd.DataFrame({"ds": date_range, "y": y}) + + m = NeuralProphet(epochs=1) + m.fit(df, freq='H') + m.predict(df) From 687c08559ee282da4eec06ee89613db2a117d51f Mon Sep 17 00:00:00 2001 From: Simon W Date: Wed, 13 Dec 2023 10:02:49 -0800 Subject: [PATCH 02/17] move_func_getitem --- neuralprophet/time_dataset.py | 86 +++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 39 deletions(-) diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py index dca97da79..7a889508d 100644 --- a/neuralprophet/time_dataset.py +++ b/neuralprophet/time_dataset.py @@ -63,10 +63,54 @@ def __init__(self, df, name, **kwargs): "events", "regressors", ] - inputs, targets, drop_missing = tabularize_univariate_datetime(df, **kwargs) + + self.df = df + self.kwargs = kwargs + #inputs, targets, drop_missing = tabularize_univariate_datetime(df, **kwargs) + #self.init_after_tabularized(inputs, targets) + #self.filter_samples_after_init(kwargs["prediction_frequency"]) + #self.drop_nan_after_init(df, kwargs["predict_steps"], drop_missing) + + def __getitem__(self, index): + """Overrides parent class method to get an item at index. + Parameters + ---------- + index : int + Sample location in dataset + Returns + ------- + OrderedDict + Model inputs, each of len(df) but with varying dimensions + Note + ---- + Contains the following data: + Model Inputs + * ``time`` (np.array, float), dims: (num_samples, 1) + * ``seasonalities`` (OrderedDict), named seasonalities + each with features (np.array, float) - dims: (num_samples, n_features[name]) + * ``lags`` (np.array, float), dims: (num_samples, n_lags) + * ``covariates`` (OrderedDict), named covariates, + each with features (np.array, float) of dims: (num_samples, n_lags) + * ``events`` (OrderedDict), events, + each with features (np.array, float) of dims: (num_samples, n_lags) + * ``regressors`` (OrderedDict), regressors, + each with features (np.array, float) of dims: (num_samples, n_lags) + np.array, float + Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts) + """ + inputs, targets, drop_missing = tabularize_univariate_datetime(self.df, **self.kwargs) self.init_after_tabularized(inputs, targets) - self.filter_samples_after_init(kwargs["prediction_frequency"]) - self.drop_nan_after_init(df, kwargs["predict_steps"], drop_missing) + self.filter_samples_after_init(self.kwargs["prediction_frequency"]) + self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], drop_missing) + + sample = self.samples[index] + targets = self.targets[index] + meta = self.meta + return sample, targets, meta + + def __len__(self): + """Overrides Parent class method to get data length.""" + return self.length def drop_nan_after_init(self, df, predict_steps, drop_missing): """Checks if inputs/targets contain any NaN values and drops them, if user opts to. @@ -223,42 +267,6 @@ def filter_samples_after_init( sample.pop("timestamps") self.length = len(self.samples) - def __getitem__(self, index): - """Overrides parent class method to get an item at index. - Parameters - ---------- - index : int - Sample location in dataset - Returns - ------- - OrderedDict - Model inputs, each of len(df) but with varying dimensions - Note - ---- - Contains the following data: - Model Inputs - * ``time`` (np.array, float), dims: (num_samples, 1) - * ``seasonalities`` (OrderedDict), named seasonalities - each with features (np.array, float) - dims: (num_samples, n_features[name]) - * ``lags`` (np.array, float), dims: (num_samples, n_lags) - * ``covariates`` (OrderedDict), named covariates, - each with features (np.array, float) of dims: (num_samples, n_lags) - * ``events`` (OrderedDict), events, - each with features (np.array, float) of dims: (num_samples, n_lags) - * ``regressors`` (OrderedDict), regressors, - each with features (np.array, float) of dims: (num_samples, n_lags) - np.array, float - Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts) - """ - sample = self.samples[index] - targets = self.targets[index] - meta = self.meta - return sample, targets, meta - - def __len__(self): - """Overrides Parent class method to get data length.""" - return self.length - def tabularize_univariate_datetime( df, From 5215340aa51f2c8ef5d71f7e1ce07db9d7b30433 Mon Sep 17 00:00:00 2001 From: Simon W Date: Thu, 14 Dec 2023 19:26:26 -0800 Subject: [PATCH 03/17] slicing --- neuralprophet/time_dataset.py | 8 +++++++- tests/test_integration.py | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py index 7a889508d..afa69beab 100644 --- a/neuralprophet/time_dataset.py +++ b/neuralprophet/time_dataset.py @@ -98,7 +98,13 @@ def __getitem__(self, index): np.array, float Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts) """ - inputs, targets, drop_missing = tabularize_univariate_datetime(self.df, **self.kwargs) + start_idx = index + #end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts') - 1 #correct? + end_idx = start_idx + 1 + df_slice = self.df.iloc[start_idx:end_idx] + + # Functions + inputs, targets, drop_missing = tabularize_univariate_datetime(df_slice, **self.kwargs) self.init_after_tabularized(inputs, targets) self.filter_samples_after_init(self.kwargs["prediction_frequency"]) self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], drop_missing) diff --git a/tests/test_integration.py b/tests/test_integration.py index 438f75c5c..601b9dff9 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1708,7 +1708,7 @@ def test_unused_future_regressors(): def test_on_the_fly_sampling(): start_date = "2019-01-01" - end_date = "2019-03-01" + end_date = "2019-01-04" date_range = pd.date_range(start=start_date, end=end_date, freq="H") y = np.random.randint(0, 1000, size=(len(date_range),)) df = pd.DataFrame({"ds": date_range, "y": y}) From c70fae292623001d66ae1a0efddc7ff96162ab9a Mon Sep 17 00:00:00 2001 From: Simon W Date: Thu, 14 Dec 2023 19:48:03 -0800 Subject: [PATCH 04/17] predict_mode --- neuralprophet/time_dataset.py | 11 +++++++---- tests/test_integration.py | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py index afa69beab..885a06165 100644 --- a/neuralprophet/time_dataset.py +++ b/neuralprophet/time_dataset.py @@ -98,10 +98,13 @@ def __getitem__(self, index): np.array, float Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts) """ - start_idx = index - #end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts') - 1 #correct? - end_idx = start_idx + 1 - df_slice = self.df.iloc[start_idx:end_idx] + if self.kwargs['predict_mode']: + df_slice = self.df + else: + start_idx = index + #end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts') - 1 #correct? + end_idx = start_idx + 1 + df_slice = self.df.iloc[start_idx:end_idx] # Functions inputs, targets, drop_missing = tabularize_univariate_datetime(df_slice, **self.kwargs) diff --git a/tests/test_integration.py b/tests/test_integration.py index 601b9dff9..76517b084 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1713,6 +1713,6 @@ def test_on_the_fly_sampling(): y = np.random.randint(0, 1000, size=(len(date_range),)) df = pd.DataFrame({"ds": date_range, "y": y}) - m = NeuralProphet(epochs=1) + m = NeuralProphet(epochs=1, learning_rate=0.01) m.fit(df, freq='H') m.predict(df) From b78d5e021552d95daa800b36572d62b56ab47244 Mon Sep 17 00:00:00 2001 From: Simon W Date: Mon, 18 Dec 2023 13:20:43 -0800 Subject: [PATCH 05/17] typos --- neuralprophet/forecaster.py | 4 ++-- neuralprophet/time_dataset.py | 2 ++ tests/test_integration.py | 15 ++++++++++----- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py index 852fc297b..d81712388 100644 --- a/neuralprophet/forecaster.py +++ b/neuralprophet/forecaster.py @@ -2684,7 +2684,7 @@ def _train( val_dataloaders=val_loader, **self.config_train.lr_finder_args, ) - # Estimate the optimat learning rate from the loss curve + # Estimate the optimal learning rate from the loss curve assert lr_finder is not None _, _, lr_suggestion = utils.smooth_loss_and_suggest(lr_finder.results) self.model.learning_rate = lr_suggestion @@ -2706,7 +2706,7 @@ def _train( **self.config_train.lr_finder_args, ) assert lr_finder is not None - # Estimate the optimat learning rate from the loss curve + # Estimate the optimal learning rate from the loss curve _, _, lr_suggestion = utils.smooth_loss_and_suggest(lr_finder.results) self.model.learning_rate = lr_suggestion start = time.time() diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py index 885a06165..ef1d7baa5 100644 --- a/neuralprophet/time_dataset.py +++ b/neuralprophet/time_dataset.py @@ -106,6 +106,8 @@ def __getitem__(self, index): end_idx = start_idx + 1 df_slice = self.df.iloc[start_idx:end_idx] + #df_slice = self.df + # Functions inputs, targets, drop_missing = tabularize_univariate_datetime(df_slice, **self.kwargs) self.init_after_tabularized(inputs, targets) diff --git a/tests/test_integration.py b/tests/test_integration.py index 76517b084..ead9a17a7 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1710,9 +1710,14 @@ def test_on_the_fly_sampling(): start_date = "2019-01-01" end_date = "2019-01-04" date_range = pd.date_range(start=start_date, end=end_date, freq="H") - y = np.random.randint(0, 1000, size=(len(date_range),)) - df = pd.DataFrame({"ds": date_range, "y": y}) - - m = NeuralProphet(epochs=1, learning_rate=0.01) + #y = np.random.randint(0, 1000, size=(len(date_range),)) + #df = pd.DataFrame({"ds": date_range, "y": y}) + df = pd.DataFrame( + { + "ds": {0: "2022-10-16 00:00:00", 1: "2022-10-17 00:00:00", 2: "2022-10-18 00:00:00", 3: "2022-10-19 00:00:00", 4: "2022-10-20 00:00:00",}, + "y": {0: 17, 1: 18, 2: 10, 3: 8, 4: 5}, + } + ) + m = NeuralProphet(epochs=1) #, learning_rate=0.01) m.fit(df, freq='H') - m.predict(df) + metrics = m.predict(df) From beae5bb21ce8d870ff0fd212f70ac2c462c7ee2d Mon Sep 17 00:00:00 2001 From: Simon W Date: Mon, 18 Dec 2023 17:56:49 -0800 Subject: [PATCH 06/17] lr-finder --- neuralprophet/data/process.py | 1 + neuralprophet/time_dataset.py | 12 +++++++----- tests/test_integration.py | 19 +++++++------------ 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/neuralprophet/data/process.py b/neuralprophet/data/process.py index 9f8861016..c9190f21a 100644 --- a/neuralprophet/data/process.py +++ b/neuralprophet/data/process.py @@ -623,4 +623,5 @@ def _create_dataset(model, df, predict_mode, prediction_frequency=None): config_regressors=model.config_regressors, config_missing=model.config_missing, prediction_frequency=prediction_frequency, + config_train=model.config_train ) diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py index ef1d7baa5..fdfaf7503 100644 --- a/neuralprophet/time_dataset.py +++ b/neuralprophet/time_dataset.py @@ -98,16 +98,17 @@ def __getitem__(self, index): np.array, float Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts) """ - if self.kwargs['predict_mode']: + learning_rate = self.kwargs['config_train'].learning_rate + # TODO: Drop config_train from self! + + if self.kwargs['predict_mode'] or (learning_rate is None): df_slice = self.df else: start_idx = index - #end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts') - 1 #correct? - end_idx = start_idx + 1 + end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts') + #end_idx = start_idx + 1 df_slice = self.df.iloc[start_idx:end_idx] - #df_slice = self.df - # Functions inputs, targets, drop_missing = tabularize_univariate_datetime(df_slice, **self.kwargs) self.init_after_tabularized(inputs, targets) @@ -291,6 +292,7 @@ def tabularize_univariate_datetime( config_lagged_regressors: Optional[configure.ConfigLaggedRegressors] = None, config_regressors: Optional[configure.ConfigFutureRegressors] = None, config_missing=None, + config_train=None, prediction_frequency=None, ): """Create a tabular dataset from univariate timeseries for supervised forecasting. diff --git a/tests/test_integration.py b/tests/test_integration.py index ead9a17a7..5a9cf80b9 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1707,17 +1707,12 @@ def test_unused_future_regressors(): m.fit(df, freq="D") def test_on_the_fly_sampling(): - start_date = "2019-01-01" - end_date = "2019-01-04" - date_range = pd.date_range(start=start_date, end=end_date, freq="H") - #y = np.random.randint(0, 1000, size=(len(date_range),)) - #df = pd.DataFrame({"ds": date_range, "y": y}) - df = pd.DataFrame( - { - "ds": {0: "2022-10-16 00:00:00", 1: "2022-10-17 00:00:00", 2: "2022-10-18 00:00:00", 3: "2022-10-19 00:00:00", 4: "2022-10-20 00:00:00",}, - "y": {0: 17, 1: 18, 2: 10, 3: 8, 4: 5}, - } - ) - m = NeuralProphet(epochs=1) #, learning_rate=0.01) + start_date = "2022-10-16 00:00:00" + end_date = "2022-12-30 00:00:00" + date_range = pd.date_range(start=start_date, end=end_date, freq="D") + y = np.random.randint(0, 20, size=(len(date_range),)) + df = pd.DataFrame({"ds": date_range, "y": y}) + + m = NeuralProphet(epochs=1, learning_rate=0.01) m.fit(df, freq='H') metrics = m.predict(df) From 8427ffc46da4aad65f017d937eff801b0f7f1642 Mon Sep 17 00:00:00 2001 From: Simon W Date: Tue, 19 Dec 2023 12:05:35 -0800 Subject: [PATCH 07/17] drop_missing --- neuralprophet/time_dataset.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py index fdfaf7503..5f090400c 100644 --- a/neuralprophet/time_dataset.py +++ b/neuralprophet/time_dataset.py @@ -106,14 +106,13 @@ def __getitem__(self, index): else: start_idx = index end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts') - #end_idx = start_idx + 1 df_slice = self.df.iloc[start_idx:end_idx] # Functions - inputs, targets, drop_missing = tabularize_univariate_datetime(df_slice, **self.kwargs) + inputs, targets = tabularize_univariate_datetime(df_slice, **self.kwargs) self.init_after_tabularized(inputs, targets) self.filter_samples_after_init(self.kwargs["prediction_frequency"]) - self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], drop_missing) + self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing) sample = self.samples[index] targets = self.targets[index] @@ -502,7 +501,7 @@ def _stride_timestamps_for_forecasts(x): tabularized_input_shapes_str += f" {key} {value.shape} \n" log.debug(f"Tabularized inputs shapes: \n{tabularized_input_shapes_str}") - return inputs, targets, config_missing.drop_missing + return inputs, targets def fourier_series(dates, period, series_order): From ff05b2a179f672f3b8cf86db6defe3211b0a814c Mon Sep 17 00:00:00 2001 From: Simon W Date: Tue, 19 Dec 2023 13:54:23 -0800 Subject: [PATCH 08/17] predict_v2 --- neuralprophet/forecaster.py | 1 + neuralprophet/time_dataset.py | 52 +++++++++++++++++++++++------------ tests/test_integration.py | 1 + 3 files changed, 36 insertions(+), 18 deletions(-) diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py index d81712388..72640cbe0 100644 --- a/neuralprophet/forecaster.py +++ b/neuralprophet/forecaster.py @@ -1774,6 +1774,7 @@ def predict_seasonal_components(self, df: pd.DataFrame, quantile: float = 0.5): predict_mode=True, config_missing=self.config_missing, prediction_frequency=self.prediction_frequency, + config_train=self.config_train, ) loader = DataLoader(dataset, batch_size=min(4096, len(df)), shuffle=False, drop_last=False) predicted = {} diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py index 5f090400c..0a7910c40 100644 --- a/neuralprophet/time_dataset.py +++ b/neuralprophet/time_dataset.py @@ -63,13 +63,16 @@ def __init__(self, df, name, **kwargs): "events", "regressors", ] - - self.df = df self.kwargs = kwargs - #inputs, targets, drop_missing = tabularize_univariate_datetime(df, **kwargs) - #self.init_after_tabularized(inputs, targets) - #self.filter_samples_after_init(kwargs["prediction_frequency"]) - #self.drop_nan_after_init(df, kwargs["predict_steps"], drop_missing) + + learning_rate = kwargs['config_train'].learning_rate + if kwargs['predict_mode'] or (learning_rate is None): + inputs, targets = tabularize_univariate_datetime(df, **kwargs) + self.init_after_tabularized(inputs, targets) + self.filter_samples_after_init(kwargs["prediction_frequency"]) + self.drop_nan_after_init(df, kwargs["predict_steps"], kwargs["config_missing"].drop_missing) + else: + self.df = df def __getitem__(self, index): """Overrides parent class method to get an item at index. @@ -98,31 +101,44 @@ def __getitem__(self, index): np.array, float Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts) """ - learning_rate = self.kwargs['config_train'].learning_rate # TODO: Drop config_train from self! - + learning_rate = self.kwargs['config_train'].learning_rate if self.kwargs['predict_mode'] or (learning_rate is None): - df_slice = self.df + sample = self.samples[index] + targets = self.targets[index] + meta = self.meta + return sample, targets, meta else: start_idx = index end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts') df_slice = self.df.iloc[start_idx:end_idx] - # Functions - inputs, targets = tabularize_univariate_datetime(df_slice, **self.kwargs) - self.init_after_tabularized(inputs, targets) - self.filter_samples_after_init(self.kwargs["prediction_frequency"]) - self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing) + # Functions + inputs, targets = tabularize_univariate_datetime(df_slice, **self.kwargs) + self.init_after_tabularized(inputs, targets) + self.filter_samples_after_init(self.kwargs["prediction_frequency"]) + self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing) - sample = self.samples[index] - targets = self.targets[index] - meta = self.meta - return sample, targets, meta + sample = self.samples[index] + targets = self.targets[index] + meta = self.meta + return sample, targets, meta def __len__(self): """Overrides Parent class method to get data length.""" return self.length + def drop_nan_init(self, drop_missing): + """Checks if inputs/targets contain any NaN values and drops them, if user opts to. + Parameters + ---------- + drop_missing : bool + whether to automatically drop missing samples from the data + predict_steps : int + number of steps to predict + """ + + def drop_nan_after_init(self, df, predict_steps, drop_missing): """Checks if inputs/targets contain any NaN values and drops them, if user opts to. Parameters diff --git a/tests/test_integration.py b/tests/test_integration.py index 5a9cf80b9..6d1799f64 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1712,6 +1712,7 @@ def test_on_the_fly_sampling(): date_range = pd.date_range(start=start_date, end=end_date, freq="D") y = np.random.randint(0, 20, size=(len(date_range),)) df = pd.DataFrame({"ds": date_range, "y": y}) + df.loc[3, "y"] = np.nan m = NeuralProphet(epochs=1, learning_rate=0.01) m.fit(df, freq='H') From c408e950095b83c7711c807eedec57df8a65bb1e Mon Sep 17 00:00:00 2001 From: Simon W Date: Tue, 19 Dec 2023 15:16:14 -0800 Subject: [PATCH 09/17] predict_v3 --- tests/test_unit.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/test_unit.py b/tests/test_unit.py index 7600f8c3d..c07c98527 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -82,7 +82,7 @@ def test_time_dataset(): local_data_params, global_data_params = df_utils.init_data_params(df=df, normalize="minmax") df = df.drop("ID", axis=1) df = df_utils.normalize(df, global_data_params) - inputs, targets, _ = time_dataset.tabularize_univariate_datetime( + inputs, targets = time_dataset.tabularize_univariate_datetime( df, n_lags=n_lags, n_forecasts=n_forecasts, config_missing=config_missing ) log.debug( @@ -806,6 +806,13 @@ def test_too_many_NaN(): config_missing = configure.MissingDataHandling( impute_missing=True, impute_linear=5, impute_rolling=5, drop_missing=False ) + config_train = configure.Train( + learning_rate=LR, + epochs=EPOCHS, + batch_size=BATCH_SIZE, + loss_func="SmoothL1Loss", + optimizer="AdamW", + ) length = 100 days = pd.date_range(start="2017-01-01", periods=length) y = np.ones(length) @@ -825,7 +832,7 @@ def test_too_many_NaN(): df["ID"] = "__df__" # Check if ValueError is thrown, if NaN values remain after auto-imputing with pytest.raises(ValueError): - time_dataset.TimeDataset(df, "name", config_missing=config_missing, predict_steps=1, prediction_frequency=None) + time_dataset.TimeDataset(df, "name", predict_mode=False, config_missing=config_missing, config_train=config_train, predict_steps=1, prediction_frequency=None) def test_future_df_with_nan(): From df29f33fbbbd32815d3405acca9e9c59cf99cb29 Mon Sep 17 00:00:00 2001 From: Simon W Date: Wed, 20 Dec 2023 11:01:41 -0800 Subject: [PATCH 10/17] samples --- neuralprophet/time_dataset.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py index 0a7910c40..db5727448 100644 --- a/neuralprophet/time_dataset.py +++ b/neuralprophet/time_dataset.py @@ -361,6 +361,11 @@ def tabularize_univariate_datetime( """ max_lags = get_max_num_lags(config_lagged_regressors, n_lags) n_samples = len(df) - max_lags + 1 - n_forecasts + #TODO + #n_samples = max_lags + n_forecasts + #if n_samples < 0: + # n_samples = max_lags + n_forecasts + # data is stored in OrderedDict inputs = OrderedDict({}) From 29fe999148ff1c6e8a23c701f0248ed859314e7e Mon Sep 17 00:00:00 2001 From: Simon W Date: Thu, 21 Dec 2023 11:10:22 -0800 Subject: [PATCH 11/17] lagged regressor n_lags --- neuralprophet/time_dataset.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py index db5727448..8ea20ebd2 100644 --- a/neuralprophet/time_dataset.py +++ b/neuralprophet/time_dataset.py @@ -66,7 +66,7 @@ def __init__(self, df, name, **kwargs): self.kwargs = kwargs learning_rate = kwargs['config_train'].learning_rate - if kwargs['predict_mode'] or (learning_rate is None): + if kwargs['predict_mode'] or (learning_rate is None) or self.kwargs['config_lagged_regressors']: inputs, targets = tabularize_univariate_datetime(df, **kwargs) self.init_after_tabularized(inputs, targets) self.filter_samples_after_init(kwargs["prediction_frequency"]) @@ -103,7 +103,7 @@ def __getitem__(self, index): """ # TODO: Drop config_train from self! learning_rate = self.kwargs['config_train'].learning_rate - if self.kwargs['predict_mode'] or (learning_rate is None): + if self.kwargs['predict_mode'] or (learning_rate is None) or self.kwargs['config_lagged_regressors']: sample = self.samples[index] targets = self.targets[index] meta = self.meta @@ -111,6 +111,7 @@ def __getitem__(self, index): else: start_idx = index end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts') + df_slice = self.df.iloc[start_idx:end_idx] # Functions @@ -360,11 +361,13 @@ def tabularize_univariate_datetime( Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts) """ max_lags = get_max_num_lags(config_lagged_regressors, n_lags) - n_samples = len(df) - max_lags + 1 - n_forecasts + #n_samples = len(df) - max_lags + 1 - n_forecasts #TODO - #n_samples = max_lags + n_forecasts - #if n_samples < 0: - # n_samples = max_lags + n_forecasts + learning_rate = config_train.learning_rate + if predict_mode or (learning_rate is None): + n_samples = len(df) - max_lags + 1 - n_forecasts + else: + n_samples=1 # data is stored in OrderedDict inputs = OrderedDict({}) From 2f584c23a66c99ed83ea33f7fce73ccda3b8dc7a Mon Sep 17 00:00:00 2001 From: Simon W Date: Thu, 21 Dec 2023 15:31:58 -0800 Subject: [PATCH 12/17] preliminary: events, holidays --- neuralprophet/time_dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py index 8ea20ebd2..26b822990 100644 --- a/neuralprophet/time_dataset.py +++ b/neuralprophet/time_dataset.py @@ -66,7 +66,7 @@ def __init__(self, df, name, **kwargs): self.kwargs = kwargs learning_rate = kwargs['config_train'].learning_rate - if kwargs['predict_mode'] or (learning_rate is None) or self.kwargs['config_lagged_regressors']: + if kwargs['predict_mode'] or (learning_rate is None) or kwargs['config_lagged_regressors'] or kwargs['config_country_holidays'] or kwargs['config_events']: inputs, targets = tabularize_univariate_datetime(df, **kwargs) self.init_after_tabularized(inputs, targets) self.filter_samples_after_init(kwargs["prediction_frequency"]) @@ -103,7 +103,7 @@ def __getitem__(self, index): """ # TODO: Drop config_train from self! learning_rate = self.kwargs['config_train'].learning_rate - if self.kwargs['predict_mode'] or (learning_rate is None) or self.kwargs['config_lagged_regressors']: + if self.kwargs['predict_mode'] or (learning_rate is None) or self.kwargs['config_lagged_regressors'] or self.kwargs['config_country_holidays'] or self.kwargs['config_events']: sample = self.samples[index] targets = self.targets[index] meta = self.meta @@ -364,7 +364,7 @@ def tabularize_univariate_datetime( #n_samples = len(df) - max_lags + 1 - n_forecasts #TODO learning_rate = config_train.learning_rate - if predict_mode or (learning_rate is None): + if predict_mode or (learning_rate is None) or config_lagged_regressors or config_country_holidays or config_events: n_samples = len(df) - max_lags + 1 - n_forecasts else: n_samples=1 From fca7adff3d035ab5e47e44f90faa4e3bbd83ef3e Mon Sep 17 00:00:00 2001 From: Simon W Date: Thu, 21 Dec 2023 15:47:15 -0800 Subject: [PATCH 13/17] adjustes pytests --- tests/test_unit.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_unit.py b/tests/test_unit.py index c07c98527..b8d6a26d8 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -76,6 +76,13 @@ def test_time_dataset(): n_forecasts = 1 valid_p = 0.2 config_missing = configure.MissingDataHandling() + config_train = configure.Train( + learning_rate=LR, + epochs=EPOCHS, + batch_size=BATCH_SIZE, + loss_func="SmoothL1Loss", + optimizer="AdamW", + ) df_train, df_val = df_utils.split_df(df_in, n_lags, n_forecasts, valid_p) # create a tabularized dataset from time series df, _, _ = df_utils.check_dataframe(df_train) @@ -83,7 +90,7 @@ def test_time_dataset(): df = df.drop("ID", axis=1) df = df_utils.normalize(df, global_data_params) inputs, targets = time_dataset.tabularize_univariate_datetime( - df, n_lags=n_lags, n_forecasts=n_forecasts, config_missing=config_missing + df, n_lags=n_lags, n_forecasts=n_forecasts, config_missing=config_missing, config_train=config_train ) log.debug( "tabularized inputs: {}".format( From 139a97f908564175c73cbab6ecd5e9d6787afdbd Mon Sep 17 00:00:00 2001 From: Simon W Date: Thu, 21 Dec 2023 16:12:28 -0800 Subject: [PATCH 14/17] selective forecasting --- neuralprophet/time_dataset.py | 6 +++--- tests/test_unit.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py index 26b822990..7642eb06f 100644 --- a/neuralprophet/time_dataset.py +++ b/neuralprophet/time_dataset.py @@ -66,7 +66,7 @@ def __init__(self, df, name, **kwargs): self.kwargs = kwargs learning_rate = kwargs['config_train'].learning_rate - if kwargs['predict_mode'] or (learning_rate is None) or kwargs['config_lagged_regressors'] or kwargs['config_country_holidays'] or kwargs['config_events']: + if kwargs['predict_mode'] or (learning_rate is None) or kwargs['config_lagged_regressors'] or kwargs['config_country_holidays'] or kwargs['config_events'] or kwargs['prediction_frequency']: inputs, targets = tabularize_univariate_datetime(df, **kwargs) self.init_after_tabularized(inputs, targets) self.filter_samples_after_init(kwargs["prediction_frequency"]) @@ -103,7 +103,7 @@ def __getitem__(self, index): """ # TODO: Drop config_train from self! learning_rate = self.kwargs['config_train'].learning_rate - if self.kwargs['predict_mode'] or (learning_rate is None) or self.kwargs['config_lagged_regressors'] or self.kwargs['config_country_holidays'] or self.kwargs['config_events']: + if self.kwargs['predict_mode'] or (learning_rate is None) or self.kwargs['config_lagged_regressors'] or self.kwargs['config_country_holidays'] or self.kwargs['config_events'] or self.kwargs['prediction_frequency']: sample = self.samples[index] targets = self.targets[index] meta = self.meta @@ -364,7 +364,7 @@ def tabularize_univariate_datetime( #n_samples = len(df) - max_lags + 1 - n_forecasts #TODO learning_rate = config_train.learning_rate - if predict_mode or (learning_rate is None) or config_lagged_regressors or config_country_holidays or config_events: + if predict_mode or (learning_rate is None) or config_lagged_regressors or config_country_holidays or config_events or prediction_frequency: n_samples = len(df) - max_lags + 1 - n_forecasts else: n_samples=1 diff --git a/tests/test_unit.py b/tests/test_unit.py index b8d6a26d8..6a3df35bb 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -814,7 +814,7 @@ def test_too_many_NaN(): impute_missing=True, impute_linear=5, impute_rolling=5, drop_missing=False ) config_train = configure.Train( - learning_rate=LR, + learning_rate=None, epochs=EPOCHS, batch_size=BATCH_SIZE, loss_func="SmoothL1Loss", @@ -839,7 +839,7 @@ def test_too_many_NaN(): df["ID"] = "__df__" # Check if ValueError is thrown, if NaN values remain after auto-imputing with pytest.raises(ValueError): - time_dataset.TimeDataset(df, "name", predict_mode=False, config_missing=config_missing, config_train=config_train, predict_steps=1, prediction_frequency=None) + time_dataset.TimeDataset(df, "name", predict_mode=False, config_missing=config_missing, config_lagged_regressors=None, config_country_holidays=None, config_events=None, config_train=config_train, predict_steps=1, prediction_frequency=None) def test_future_df_with_nan(): From 30aa303449de2dae12d8e44d884157010d252777 Mon Sep 17 00:00:00 2001 From: Simon W Date: Thu, 21 Dec 2023 16:26:19 -0800 Subject: [PATCH 15/17] black --- neuralprophet/data/process.py | 2 +- neuralprophet/time_dataset.py | 40 ++++++++++++++++++++++++++--------- tests/test_integration.py | 3 ++- tests/test_unit.py | 13 +++++++++++- 4 files changed, 45 insertions(+), 13 deletions(-) diff --git a/neuralprophet/data/process.py b/neuralprophet/data/process.py index c9190f21a..f3e44f9bb 100644 --- a/neuralprophet/data/process.py +++ b/neuralprophet/data/process.py @@ -623,5 +623,5 @@ def _create_dataset(model, df, predict_mode, prediction_frequency=None): config_regressors=model.config_regressors, config_missing=model.config_missing, prediction_frequency=prediction_frequency, - config_train=model.config_train + config_train=model.config_train, ) diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py index 7642eb06f..f93e4e7a3 100644 --- a/neuralprophet/time_dataset.py +++ b/neuralprophet/time_dataset.py @@ -65,8 +65,15 @@ def __init__(self, df, name, **kwargs): ] self.kwargs = kwargs - learning_rate = kwargs['config_train'].learning_rate - if kwargs['predict_mode'] or (learning_rate is None) or kwargs['config_lagged_regressors'] or kwargs['config_country_holidays'] or kwargs['config_events'] or kwargs['prediction_frequency']: + learning_rate = kwargs["config_train"].learning_rate + if ( + kwargs["predict_mode"] + or (learning_rate is None) + or kwargs["config_lagged_regressors"] + or kwargs["config_country_holidays"] + or kwargs["config_events"] + or kwargs["prediction_frequency"] + ): inputs, targets = tabularize_univariate_datetime(df, **kwargs) self.init_after_tabularized(inputs, targets) self.filter_samples_after_init(kwargs["prediction_frequency"]) @@ -102,15 +109,22 @@ def __getitem__(self, index): Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts) """ # TODO: Drop config_train from self! - learning_rate = self.kwargs['config_train'].learning_rate - if self.kwargs['predict_mode'] or (learning_rate is None) or self.kwargs['config_lagged_regressors'] or self.kwargs['config_country_holidays'] or self.kwargs['config_events'] or self.kwargs['prediction_frequency']: + learning_rate = self.kwargs["config_train"].learning_rate + if ( + self.kwargs["predict_mode"] + or (learning_rate is None) + or self.kwargs["config_lagged_regressors"] + or self.kwargs["config_country_holidays"] + or self.kwargs["config_events"] + or self.kwargs["prediction_frequency"] + ): sample = self.samples[index] targets = self.targets[index] meta = self.meta return sample, targets, meta else: start_idx = index - end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts') + end_idx = start_idx + self.kwargs.get("n_lags") + self.kwargs.get("n_forecasts") df_slice = self.df.iloc[start_idx:end_idx] @@ -139,7 +153,6 @@ def drop_nan_init(self, drop_missing): number of steps to predict """ - def drop_nan_after_init(self, df, predict_steps, drop_missing): """Checks if inputs/targets contain any NaN values and drops them, if user opts to. Parameters @@ -361,13 +374,20 @@ def tabularize_univariate_datetime( Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts) """ max_lags = get_max_num_lags(config_lagged_regressors, n_lags) - #n_samples = len(df) - max_lags + 1 - n_forecasts - #TODO + # n_samples = len(df) - max_lags + 1 - n_forecasts + # TODO learning_rate = config_train.learning_rate - if predict_mode or (learning_rate is None) or config_lagged_regressors or config_country_holidays or config_events or prediction_frequency: + if ( + predict_mode + or (learning_rate is None) + or config_lagged_regressors + or config_country_holidays + or config_events + or prediction_frequency + ): n_samples = len(df) - max_lags + 1 - n_forecasts else: - n_samples=1 + n_samples = 1 # data is stored in OrderedDict inputs = OrderedDict({}) diff --git a/tests/test_integration.py b/tests/test_integration.py index 6d1799f64..cdb3eebda 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1706,6 +1706,7 @@ def test_unused_future_regressors(): m.add_lagged_regressor("cost") m.fit(df, freq="D") + def test_on_the_fly_sampling(): start_date = "2022-10-16 00:00:00" end_date = "2022-12-30 00:00:00" @@ -1715,5 +1716,5 @@ def test_on_the_fly_sampling(): df.loc[3, "y"] = np.nan m = NeuralProphet(epochs=1, learning_rate=0.01) - m.fit(df, freq='H') + m.fit(df, freq="H") metrics = m.predict(df) diff --git a/tests/test_unit.py b/tests/test_unit.py index 6a3df35bb..be4d7d55a 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -839,7 +839,18 @@ def test_too_many_NaN(): df["ID"] = "__df__" # Check if ValueError is thrown, if NaN values remain after auto-imputing with pytest.raises(ValueError): - time_dataset.TimeDataset(df, "name", predict_mode=False, config_missing=config_missing, config_lagged_regressors=None, config_country_holidays=None, config_events=None, config_train=config_train, predict_steps=1, prediction_frequency=None) + time_dataset.TimeDataset( + df, + "name", + predict_mode=False, + config_missing=config_missing, + config_lagged_regressors=None, + config_country_holidays=None, + config_events=None, + config_train=config_train, + predict_steps=1, + prediction_frequency=None, + ) def test_future_df_with_nan(): From 381c9129d1ac3b57857e8b5cf10f9857e0f7e897 Mon Sep 17 00:00:00 2001 From: Simon W Date: Thu, 21 Dec 2023 16:28:30 -0800 Subject: [PATCH 16/17] ruff --- tests/test_integration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_integration.py b/tests/test_integration.py index cdb3eebda..4876f502a 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1717,4 +1717,4 @@ def test_on_the_fly_sampling(): m = NeuralProphet(epochs=1, learning_rate=0.01) m.fit(df, freq="H") - metrics = m.predict(df) + _ = m.predict(df) From 660934c0696806a81ea9da73fd44c2d5840b9161 Mon Sep 17 00:00:00 2001 From: Simon W Date: Thu, 4 Jan 2024 12:29:37 +0100 Subject: [PATCH 17/17] lagged_regressors --- neuralprophet/time_dataset.py | 14 +++++++++++++- tests/test_integration.py | 1 - 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py index f93e4e7a3..333bc5d9c 100644 --- a/neuralprophet/time_dataset.py +++ b/neuralprophet/time_dataset.py @@ -124,7 +124,19 @@ def __getitem__(self, index): return sample, targets, meta else: start_idx = index - end_idx = start_idx + self.kwargs.get("n_lags") + self.kwargs.get("n_forecasts") + + # Lagged Regressors + if self.kwargs["config_lagged_regressors"]: + n_lagged_regressor_list = [] + for dict_name, nested_dict in self.kwargs["config_lagged_regressors"].items(): + name_of_nested_dict = dict_name + n_lagged_regressor = self.kwargs["config_lagged_regressors"][name_of_nested_dict].n_lags + n_lagged_regressor_list.append(n_lagged_regressor) + max_lag = max(self.kwargs["n_lags"], *n_lagged_regressor_list) + end_idx = start_idx + max_lag + self.kwargs.get("n_forecasts") + + else: + end_idx = start_idx + self.kwargs.get("n_lags") + self.kwargs.get("n_forecasts") df_slice = self.df.iloc[start_idx:end_idx] diff --git a/tests/test_integration.py b/tests/test_integration.py index 4876f502a..730493828 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1713,7 +1713,6 @@ def test_on_the_fly_sampling(): date_range = pd.date_range(start=start_date, end=end_date, freq="D") y = np.random.randint(0, 20, size=(len(date_range),)) df = pd.DataFrame({"ds": date_range, "y": y}) - df.loc[3, "y"] = np.nan m = NeuralProphet(epochs=1, learning_rate=0.01) m.fit(df, freq="H")