From 8330bab900cbfdb5f781a4625212bfd1bfa20d66 Mon Sep 17 00:00:00 2001 From: Oskar Triebe Date: Wed, 14 Aug 2024 15:57:54 -0700 Subject: [PATCH] [Major] simplify learning rate finder and use log-mean of default and smooth suggestion (#1630) * simplify learning rate finder * set learning rate to logarithmic mean of smooth and default. * move logic to util --- neuralprophet/configure.py | 4 ++-- neuralprophet/forecaster.py | 8 +++----- neuralprophet/utils.py | 18 +++++++++++++++--- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/neuralprophet/configure.py b/neuralprophet/configure.py index 17bdaac25..bc2b004fc 100644 --- a/neuralprophet/configure.py +++ b/neuralprophet/configure.py @@ -208,7 +208,7 @@ def set_lr_finder_args(self, dataset_size, num_batches): Set the lr_finder_args. This is the range of learning rates to test. """ - num_training = 150 + int(np.log10(100 + dataset_size) * 25) + num_training = 100 + int(np.log10(dataset_size) * 20) if num_batches < num_training: log.warning( f"Learning rate finder: The number of batches ({num_batches}) is too small than the required number \ @@ -217,7 +217,7 @@ def set_lr_finder_args(self, dataset_size, num_batches): # num_training = num_batches self.lr_finder_args.update( { - "min_lr": 1e-6, + "min_lr": 1e-7, "max_lr": 10, "num_training": num_training, "early_stop_threshold": None, diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py index f35f1c7f5..85939955e 100644 --- a/neuralprophet/forecaster.py +++ b/neuralprophet/forecaster.py @@ -2805,13 +2805,12 @@ def _train( lr_finder = tuner.lr_find( model=self.model, train_dataloaders=train_loader, - val_dataloaders=val_loader, + # val_dataloaders=val_loader, # not be used, but may lead to Lightning bug if not provided **self.config_train.lr_finder_args, ) # Estimate the optimal learning rate from the loss curve assert lr_finder is not None - _, _, lr_suggestion = utils.smooth_loss_and_suggest(lr_finder.results) - self.model.learning_rate = lr_suggestion + _, _, self.model.learning_rate = utils.smooth_loss_and_suggest(lr_finder) start = time.time() self.trainer.fit( self.model, @@ -2832,8 +2831,7 @@ def _train( ) assert lr_finder is not None # Estimate the optimal learning rate from the loss curve - _, _, lr_suggestion = utils.smooth_loss_and_suggest(lr_finder.results) - self.model.learning_rate = lr_suggestion + _, _, self.model.learning_rate = utils.smooth_loss_and_suggest(lr_finder) start = time.time() self.trainer.fit( self.model, diff --git a/neuralprophet/utils.py b/neuralprophet/utils.py index 2613c927b..62b9e7481 100644 --- a/neuralprophet/utils.py +++ b/neuralprophet/utils.py @@ -751,7 +751,7 @@ def set_log_level(log_level: str = "INFO", include_handlers: bool = False): set_logger_level(logging.getLogger("NP"), log_level, include_handlers) -def smooth_loss_and_suggest(lr_finder_results, window=10): +def smooth_loss_and_suggest(lr_finder, window=10): """ Smooth loss using a Hamming filter. @@ -769,10 +769,12 @@ def smooth_loss_and_suggest(lr_finder_results, window=10): suggested_lr: float Suggested learning rate based on gradient """ + lr_finder_results = lr_finder.results lr = lr_finder_results["lr"] loss = lr_finder_results["loss"] # Derive window size from num lr searches, ensure window is divisible by 2 - half_window = math.ceil(round(len(loss) * 0.1) / 2) + # half_window = math.ceil(round(len(loss) * 0.1) / 2) + half_window = math.ceil(window / 2) # Pad sequence and initialialize hamming filter loss = np.pad(np.array(loss), pad_width=half_window, mode="edge") window = np.hamming(half_window * 2) @@ -798,7 +800,17 @@ def smooth_loss_and_suggest(lr_finder_results, window=10): "samples or manually set the learning rate." ) raise - return (loss, lr, suggestion) + suggestion_default = lr_finder.suggestion(skip_begin=10, skip_end=3) + if suggestion is not None and suggestion_default is not None: + log_suggestion_smooth = np.log(suggestion) + log_suggestion_default = np.log(suggestion_default) + lr_suggestion = np.exp((log_suggestion_smooth + log_suggestion_default) / 2) + elif suggestion is None and suggestion_default is None: + log.error("Automatic learning rate test failed. Please set manually the learning rate.") + raise + else: + lr_suggestion = suggestion if suggestion is not None else suggestion_default + return (loss, lr, lr_suggestion) def _smooth_loss(loss, beta=0.9):