From 49048087b18d8f46b8c086b83b375862de10306a Mon Sep 17 00:00:00 2001
From: Oskar Triebe <ourownstory@users.noreply.github.com>
Date: Thu, 12 Sep 2024 17:24:06 -0700
Subject: [PATCH] [Minor] Split prep_or_copy_df into copy and
 check_multiple_series_id (#1647)

* separate copy from check multiple ID

* fix remaining references

* clean up

* add copy to tests

* fix tests

* update tests

* fixes

* finish fixes
---
 .../feature-guides/global_local_trend.ipynb   | 23 +-------
 neuralprophet/data/process.py                 |  6 +-
 neuralprophet/data/transform.py               |  3 +-
 neuralprophet/df_utils.py                     | 47 ++++++++-------
 neuralprophet/forecaster.py                   | 55 +++++++++++-------
 neuralprophet/time_dataset.py                 |  8 +--
 tests/test_integration.py                     |  4 +-
 tests/test_regularization.py                  |  3 +
 tests/test_unit.py                            | 57 ++++++++++++++-----
 tests/utils/benchmark_time_dataset.py         |  4 +-
 10 files changed, 125 insertions(+), 85 deletions(-)
diff --git a/docs/source/how-to-guides/feature-guides/global_local_trend.ipynb b/docs/source/how-to-guides/feature-guides/global_local_trend.ipynb
index ca7fd3eff..1fada83e4 100644
--- a/docs/source/how-to-guides/feature-guides/global_local_trend.ipynb
+++ b/docs/source/how-to-guides/feature-guides/global_local_trend.ipynb
@@ -1263,28 +1263,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "ValueError",
-     "evalue": "Invalid frequency: NaT",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "Input \u001b[0;32mIn [27]\u001b[0m, in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m future \u001b[38;5;241m=\u001b[39m m\u001b[38;5;241m.\u001b[39mmake_future_dataframe(df_test)\n\u001b[0;32m----> 2\u001b[0m forecast \u001b[38;5;241m=\u001b[39m \u001b[43mm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfuture\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      3\u001b[0m metrics \u001b[38;5;241m=\u001b[39m m\u001b[38;5;241m.\u001b[39mtest(df_test)\n\u001b[1;32m      4\u001b[0m forecast_trend \u001b[38;5;241m=\u001b[39m m\u001b[38;5;241m.\u001b[39mpredict_trend(df_test)\n",
-      "File \u001b[0;32m~/Desktop/code/neural_prophet/neuralprophet/forecaster.py:831\u001b[0m, in \u001b[0;36mNeuralProphet.predict\u001b[0;34m(self, df, decompose, raw)\u001b[0m\n\u001b[1;32m    829\u001b[0m df, received_ID_col, received_single_time_series, _ \u001b[38;5;241m=\u001b[39m df_utils\u001b[38;5;241m.\u001b[39mprep_or_copy_df(df)\n\u001b[1;32m    830\u001b[0m \u001b[38;5;66;03m# to get all forecasteable values with df given, maybe extend into future:\u001b[39;00m\n\u001b[0;32m--> 831\u001b[0m df, periods_added \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_maybe_extend_df\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    832\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prepare_dataframe_to_predict(df)\n\u001b[1;32m    833\u001b[0m \u001b[38;5;66;03m# normalize\u001b[39;00m\n",
-      "File \u001b[0;32m~/Desktop/code/neural_prophet/neuralprophet/forecaster.py:2773\u001b[0m, in \u001b[0;36mNeuralProphet._maybe_extend_df\u001b[0;34m(self, df)\u001b[0m\n\u001b[1;32m   2771\u001b[0m extended_df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame()\n\u001b[1;32m   2772\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m df_name, df_i \u001b[38;5;129;01min\u001b[39;00m df\u001b[38;5;241m.\u001b[39mgroupby(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mID\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m-> 2773\u001b[0m     _ \u001b[38;5;241m=\u001b[39m \u001b[43mdf_utils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minfer_frequency\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf_i\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_lags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_lags\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata_freq\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2774\u001b[0m     \u001b[38;5;66;03m# to get all forecasteable values with df given, maybe extend into future:\u001b[39;00m\n\u001b[1;32m   2775\u001b[0m     periods_add[df_name] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_maybe_extend_periods(df_i)\n",
-      "File \u001b[0;32m~/Desktop/code/neural_prophet/neuralprophet/df_utils.py:1324\u001b[0m, in \u001b[0;36minfer_frequency\u001b[0;34m(df, freq, n_lags, min_freq_percentage)\u001b[0m\n\u001b[1;32m   1322\u001b[0m freq_df \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m()\n\u001b[1;32m   1323\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m df_name, df_i \u001b[38;5;129;01min\u001b[39;00m df\u001b[38;5;241m.\u001b[39mgroupby(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mID\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m-> 1324\u001b[0m     freq_df\u001b[38;5;241m.\u001b[39mappend(\u001b[43m_infer_frequency\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf_i\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmin_freq_percentage\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m   1325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mset\u001b[39m(freq_df)) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m n_lags \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m   1326\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m   1327\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOne or more dataframes present different major frequencies, please make sure all dataframes present the same major frequency for auto-regression\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1328\u001b[0m     )\n",
-      "File \u001b[0;32m~/Desktop/code/neural_prophet/neuralprophet/df_utils.py:1252\u001b[0m, in \u001b[0;36m_infer_frequency\u001b[0;34m(df, freq, min_freq_percentage)\u001b[0m\n\u001b[1;32m   1250\u001b[0m     dominant_freq_percentage \u001b[38;5;241m=\u001b[39m distribution\u001b[38;5;241m.\u001b[39mmax() \u001b[38;5;241m/\u001b[39m \u001b[38;5;28mlen\u001b[39m(df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mds\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m   1251\u001b[0m     num_freq \u001b[38;5;241m=\u001b[39m frequencies[np\u001b[38;5;241m.\u001b[39margmax(distribution)]  \u001b[38;5;66;03m# get value of most common diff\u001b[39;00m\n\u001b[0;32m-> 1252\u001b[0m     inferred_freq \u001b[38;5;241m=\u001b[39m \u001b[43mconvert_num_to_str_freq\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnum_freq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mds\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43miloc\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1254\u001b[0m log\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m   1255\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMajor frequency \u001b[39m\u001b[38;5;132;01m{\u001b[39;00minferred_freq\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m corresponds to \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnp\u001b[38;5;241m.\u001b[39mround(dominant_freq_percentage \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m100\u001b[39m, \u001b[38;5;241m3\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m% of the data.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1256\u001b[0m )\n\u001b[1;32m   1257\u001b[0m ideal_freq_exists \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m dominant_freq_percentage \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m min_freq_percentage \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n",
-      "File \u001b[0;32m~/Desktop/code/neural_prophet/neuralprophet/df_utils.py:1159\u001b[0m, in \u001b[0;36mconvert_num_to_str_freq\u001b[0;34m(freq_num, initial_time_stamp)\u001b[0m\n\u001b[1;32m   1144\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconvert_num_to_str_freq\u001b[39m(freq_num, initial_time_stamp):\n\u001b[1;32m   1145\u001b[0m     \u001b[38;5;124;03m\"\"\"Convert numeric frequencies into frequency tags\u001b[39;00m\n\u001b[1;32m   1146\u001b[0m \n\u001b[1;32m   1147\u001b[0m \u001b[38;5;124;03m    Parameters\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1157\u001b[0m \u001b[38;5;124;03m            frequency tag\u001b[39;00m\n\u001b[1;32m   1158\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1159\u001b[0m     aux_ts \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdate_range\u001b[49m\u001b[43m(\u001b[49m\u001b[43minitial_time_stamp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m100\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_timedelta\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfreq_num\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1160\u001b[0m     freq_str \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39minfer_freq(aux_ts)\n\u001b[1;32m   1161\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m freq_str\n",
-      "File \u001b[0;32m~/Desktop/code/neural_prophet/env/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py:1070\u001b[0m, in \u001b[0;36mdate_range\u001b[0;34m(start, end, periods, freq, tz, normalize, name, closed, inclusive, **kwargs)\u001b[0m\n\u001b[1;32m   1067\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m freq \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m com\u001b[38;5;241m.\u001b[39many_none(periods, start, end):\n\u001b[1;32m   1068\u001b[0m     freq \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mD\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1070\u001b[0m dtarr \u001b[38;5;241m=\u001b[39m \u001b[43mDatetimeArray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_generate_range\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1071\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstart\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1072\u001b[0m \u001b[43m    \u001b[49m\u001b[43mend\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1073\u001b[0m \u001b[43m    \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1074\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1075\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtz\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtz\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1076\u001b[0m \u001b[43m    \u001b[49m\u001b[43mnormalize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnormalize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1077\u001b[0m \u001b[43m    \u001b[49m\u001b[43minclusive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclusive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1078\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1079\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1080\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DatetimeIndex\u001b[38;5;241m.\u001b[39m_simple_new(dtarr, name\u001b[38;5;241m=\u001b[39mname)\n",
-      "File \u001b[0;32m~/Desktop/code/neural_prophet/env/lib/python3.8/site-packages/pandas/core/arrays/datetimes.py:409\u001b[0m, in \u001b[0;36mDatetimeArray._generate_range\u001b[0;34m(cls, start, end, periods, freq, tz, normalize, ambiguous, nonexistent, inclusive)\u001b[0m\n\u001b[1;32m    404\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m com\u001b[38;5;241m.\u001b[39mcount_not_none(start, end, periods, freq) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m3\u001b[39m:\n\u001b[1;32m    405\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    406\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOf the four parameters: start, end, periods, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    407\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mand freq, exactly three must be specified\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    408\u001b[0m     )\n\u001b[0;32m--> 409\u001b[0m freq \u001b[38;5;241m=\u001b[39m \u001b[43mto_offset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    411\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m start \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    412\u001b[0m     start \u001b[38;5;241m=\u001b[39m Timestamp(start)\n",
-      "File \u001b[0;32mpandas/_libs/tslibs/offsets.pyx:3580\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.offsets.to_offset\u001b[0;34m()\u001b[0m\n",
-      "File \u001b[0;32mpandas/_libs/tslibs/offsets.pyx:3682\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.offsets.to_offset\u001b[0;34m()\u001b[0m\n",
-      "\u001b[0;31mValueError\u001b[0m: Invalid frequency: NaT"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "future = m.make_future_dataframe(df_test)\n",
     "forecast = m.predict(future)\n",
diff --git a/neuralprophet/data/process.py b/neuralprophet/data/process.py
index 549c747cd..3e099c83c 100644
--- a/neuralprophet/data/process.py
+++ b/neuralprophet/data/process.py
@@ -399,7 +399,8 @@ def _check_dataframe(
             "Dataframe has less than n_forecasts + n_lags rows. "
             "Forecasting not possible. Please either use a larger dataset, or adjust the model parameters."
         )
-    df, _, _, _ = df_utils.prep_or_copy_df(df)
+    # df = df.copy(deep=True)
+    # df, _, _, _ = df_utils.check_multiple_series_id(df)
     df, regressors_to_remove, lag_regressors_to_remove = df_utils.check_dataframe(
         df=df,
         check_y=check_y,
@@ -474,7 +475,8 @@ def _handle_missing_data(
         The pre-processed DataFrame, including imputed missing data, if applicable.
 
     """
-    df, _, _, _ = df_utils.prep_or_copy_df(df)
+    # df = df.copy(deep=True)
+    # df, _, _, _ = df_utils.check_multiple_series_id(df)
 
     if n_lags == 0 and not predicting:
         # drop rows with NaNs in y and count them
diff --git a/neuralprophet/data/transform.py b/neuralprophet/data/transform.py
index a7a772f37..e79518af3 100644
--- a/neuralprophet/data/transform.py
+++ b/neuralprophet/data/transform.py
@@ -24,7 +24,8 @@ def _normalize(df: pd.DataFrame, config_normalization: Normalization) -> pd.Data
     -------
         df: pd.DataFrame, normalized
     """
-    df, _, _, _ = df_utils.prep_or_copy_df(df)
+    # df = df.copy(deep=True)
+    # df, _, _, _ = df_utils.check_multiple_series_id(df)
     df_norm = pd.DataFrame()
     for df_name, df_i in df.groupby("ID"):
         data_params = config_normalization.get_data_params(df_name)
diff --git a/neuralprophet/df_utils.py b/neuralprophet/df_utils.py
index 4bcb558cc..0252fdf72 100644
--- a/neuralprophet/df_utils.py
+++ b/neuralprophet/df_utils.py
@@ -22,7 +22,7 @@ class ShiftScale:
     scale: float = 1.0
 
 
-def prep_or_copy_df(df: pd.DataFrame) -> tuple[pd.DataFrame, bool, bool, list[str]]:
+def check_multiple_series_id(df: pd.DataFrame) -> tuple[pd.DataFrame, bool, bool, list[str]]:
     """Copy df if it contains the ID column. Creates ID column with '__df__' if it is a df with a single time series.
     Parameters
     ----------
@@ -42,26 +42,23 @@ def prep_or_copy_df(df: pd.DataFrame) -> tuple[pd.DataFrame, bool, bool, list[st
     if not isinstance(df, pd.DataFrame):
         raise ValueError("Provided DataFrame (df) must be of pd.DataFrame type.")
 
-    # Create a copy of the dataframe
-    df_copy = df.copy(deep=True)
-
-    df_has_id_column = "ID" in df_copy.columns
+    df_has_id_column = "ID" in df.columns
 
     # If there is no ID column, then add one with a single value
     if not df_has_id_column:
         log.debug("Provided DataFrame (df) contains a single time series.")
-        df_copy["ID"] = "__df__"
-        return df_copy, df_has_id_column, True, ["__df__"]
+        df["ID"] = "__df__"
+        return df, df_has_id_column, True, ["__df__"]
 
     # Create a list of unique ID values
-    unique_id_values = list(df_copy["ID"].unique())
+    unique_id_values = list(df["ID"].unique())
     # Check if there is only one unique ID value
     df_has_single_time_series = len(unique_id_values) == 1
+    num_time_series_id = len(unique_id_values)
 
-    single_or_multiple_message = "a single" if df_has_single_time_series else "multiple"
-    log.debug(f"Provided DataFrame (df) has an ID column and contains {single_or_multiple_message} time series.")
+    log.debug(f"Provided DataFrame (df) has an ID column and contains {num_time_series_id} time series.")
 
-    return df_copy, df_has_id_column, df_has_single_time_series, unique_id_values
+    return df, df_has_id_column, df_has_single_time_series, unique_id_values
 
 
 def return_df_in_original_format(df, received_ID_col=False, received_single_time_series=True):
@@ -285,7 +282,8 @@ def init_data_params(
             ShiftScale entries containing ``shift`` and ``scale`` parameters for each column
     """
     # Compute Global data params
-    df, _, _, _ = prep_or_copy_df(df)
+    # df = df.copy(deep=True)
+    # df, _, _, _ = check_multiple_series_id(df)
     df_merged = df.copy(deep=True).drop("ID", axis=1)
     global_data_params = data_params_definition(
         df_merged, normalize, config_lagged_regressors, config_regressors, config_events, config_seasonality
@@ -382,6 +380,8 @@ def normalize(df, data_params):
     """
     df = df.copy(deep=True)
     for name in df.columns:
+        if name == "ID":
+            continue
         if name not in data_params.keys():
             raise ValueError(f"Unexpected column {name} in data")
         new_name = name
@@ -428,7 +428,8 @@ def check_dataframe(
         pd.DataFrame or dict
             checked dataframe
     """
-    df, _, _, _ = prep_or_copy_df(df)
+    # df = df.copy(deep=True)
+    # df, _, _, _ = check_multiple_series_id(df)
     if df.groupby("ID").size().min() < 1:
         raise ValueError("Dataframe has no rows.")
     if "ds" not in df:
@@ -642,7 +643,9 @@ def _crossvalidation_with_time_threshold(df, n_lags, n_forecasts, k, fold_pct, f
     min_train = total_samples - samples_fold - (k - 1) * (samples_fold - samples_overlap)
     assert min_train >= samples_fold
     folds = []
-    df_fold, _, _, _ = prep_or_copy_df(df)
+    df_fold = df
+    # df_fold = df.copy(deep=True)
+    # df_fold, _, _, _ = check_multiple_series_id(df_fold)
     for i in range(k, 0, -1):
         threshold_time_stamp = find_time_threshold(df_fold, n_lags, n_forecasts, samples_fold, inputs_overbleed=True)
         df_train, df_val = split_considering_timestamp(
@@ -704,7 +707,8 @@ def crossvalidation_split_df(
 
             validation data
     """
-    df, _, _, _ = prep_or_copy_df(df)
+    # df = df.copy(deep=True)
+    df, _, _, _ = check_multiple_series_id(df)
     folds = []
     if len(df["ID"].unique()) == 1:
         for df_name, df_i in df.groupby("ID"):
@@ -764,7 +768,8 @@ def double_crossvalidation_split_df(df, n_lags, n_forecasts, k, valid_pct, test_
         tuple of k tuples [(folds_val, folds_test), …]
             elements same as :meth:`crossvalidation_split_df` returns
     """
-    df, _, _, _ = prep_or_copy_df(df)
+    # df = df.copy(deep=True)
+    # df, _, _, _ = check_multiple_series_id(df)
     if len(df["ID"].unique()) > 1:
         raise NotImplementedError("double_crossvalidation_split_df not implemented for df with many time series")
     fold_pct_test = float(test_pct) / k
@@ -885,7 +890,8 @@ def split_df(
         pd.DataFrame, dict
             validation data
     """
-    df, _, _, _ = prep_or_copy_df(df)
+    # df = df.copy(deep=True)
+    # df, _, _, _ = check_multiple_series_id(df)
     df_train = pd.DataFrame()
     df_val = pd.DataFrame()
     if local_split:
@@ -1367,7 +1373,8 @@ def infer_frequency(df, freq, n_lags, min_freq_percentage=0.7):
             Valid frequency tag according to major frequency.
 
     """
-    df, _, _, _ = prep_or_copy_df(df)
+    # df = df.copy(deep=True)
+    # df, _, _, _ = check_multiple_series_id(df)
     freq_df = list()
     for df_name, df_i in df.groupby("ID"):
         freq_df.append(_infer_frequency(df_i, freq, min_freq_percentage))
@@ -1410,8 +1417,8 @@ def create_dict_for_events_or_regressors(
     if other_df is None:
         # if other_df is None, create dictionary with None for each ID
         return {df_name: None for df_name in df_names}
-
-    other_df, received_ID_col, _, _ = prep_or_copy_df(other_df)
+    other_df = other_df.copy(deep=True)
+    other_df, received_ID_col, _, _ = check_multiple_series_id(other_df)
     # if other_df does not contain ID, create dictionary with original ID with the same other_df for each ID
     if not received_ID_col:
         other_df = other_df.drop("ID", axis=1)
diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py
index 6f14e2684..e95a4bcaa 100644
--- a/neuralprophet/forecaster.py
+++ b/neuralprophet/forecaster.py
@@ -615,7 +615,7 @@ def _create_dataset(self, df, predict_mode, components_stacker=None):
         -------
             TimeDataset
         """
-        df, _, _, _ = df_utils.prep_or_copy_df(df)
+        # df, _, _, _ = df_utils.check_multiple_series_id(df)
         return time_dataset.GlobalTimeDataset(
             df,
             predict_mode=predict_mode,
@@ -1071,7 +1071,8 @@ def fit(
             )
 
         # Copy df and save list of unique time series IDs (the latter for global-local modelling if enabled)
-        df, _, _, self.id_list = df_utils.prep_or_copy_df(df)
+        df = df.copy(deep=True)
+        df, _, _, self.id_list = df_utils.check_multiple_series_id(df)
         df = _check_dataframe(self, df, check_y=True, exogenous=True)
 
         # Infer frequency from data
@@ -1187,6 +1188,7 @@ def fit(
         if not self.fitted:
             if self.config_trend.changepoints is not None:
                 df_aux = pd.DataFrame({"ds": pd.Series(self.config_trend.changepoints)})
+                df_aux["ID"] = "__df__"
                 df_aux = _normalize(df=df_aux, config_normalization=self.config_normalization)
                 self.config_trend.changepoints = df_aux["t"].values
 
@@ -1228,8 +1230,8 @@ def fit(
         # Set up  DataLoaders: Validation
         validation_enabled = validation_df is not None and isinstance(validation_df, pd.DataFrame)
         if validation_enabled:
-            df_val = validation_df
-            df_val, _, _, _ = df_utils.prep_or_copy_df(df_val)
+            df_val = validation_df.copy(deep=True)
+            df_val, _, _, _ = df_utils.check_multiple_series_id(df_val)
             df_val = _check_dataframe(self, df_val, check_y=False, exogenous=False)
             df_val = _handle_missing_data(
                 df=df_val,
@@ -1243,7 +1245,8 @@ def fit(
                 config_seasonality=self.config_seasonality,
                 predicting=False,
             )
-            # df_val, _, _, _ = df_utils.prep_or_copy_df(df_val)
+            # df_val = df_val.copy(deep=True)
+            # df_val, _, _, _ = df_utils.check_multiple_series_id(df_val)
             df_val = _normalize(df=df_val, config_normalization=self.config_normalization)
             val_components_stacker = utils_time_dataset.ComponentStacker(
                 n_lags=self.config_ar.n_lags,
@@ -1399,7 +1402,8 @@ def predict(self, df: pd.DataFrame, decompose: bool = True, raw: bool = False, a
             log.warning("Raw forecasts are incompatible with plotting utilities")
         if self.fitted is False:
             raise ValueError("Model has not been fitted. Predictions will be random.")
-        df, received_ID_col, received_single_time_series, _ = df_utils.prep_or_copy_df(df)
+        df = df.copy(deep=True)
+        df, received_ID_col, received_single_time_series, _ = df_utils.check_multiple_series_id(df)
         # to get all forecasteable values with df given, maybe extend into future:
         df, periods_added = _maybe_extend_df(
             df=df,
@@ -1463,9 +1467,10 @@ def test(self, df: pd.DataFrame, verbose: bool = True):
             pd.DataFrame
                 evaluation metrics
         """
-        df, _, _, _ = df_utils.prep_or_copy_df(df)
         if self.fitted is False:
             log.warning("Model has not been fitted. Test results will be random.")
+        df = df.copy(deep=True)
+        df, _, _, _ = df_utils.check_multiple_series_id(df)
         df = _check_dataframe(self, df, check_y=True, exogenous=True)
         freq = df_utils.infer_frequency(df, n_lags=self.config_model.max_lags, freq=self.data_freq)
         df = _handle_missing_data(
@@ -1480,7 +1485,7 @@ def test(self, df: pd.DataFrame, verbose: bool = True):
             config_seasonality=self.config_seasonality,
             predicting=False,
         )
-        df, _, _, _ = df_utils.prep_or_copy_df(df)
+        df, _, _, _ = df_utils.check_multiple_series_id(df)
         df = _normalize(df=df, config_normalization=self.config_normalization)
         components_stacker = utils_time_dataset.ComponentStacker(
             n_lags=self.config_ar.n_lags,
@@ -1616,7 +1621,8 @@ def split_df(self, df: pd.DataFrame, freq: str = "auto", valid_p: float = 0.2, l
             1	2022-12-13	8.02	data2
             2	2022-12-13	8.30	data3
         """
-        df, received_ID_col, received_single_time_series, _ = df_utils.prep_or_copy_df(df)
+        df = df.copy(deep=True)
+        df, received_ID_col, received_single_time_series, _ = df_utils.check_multiple_series_id(df)
         df = _check_dataframe(self, df, check_y=False, exogenous=False)
         freq = df_utils.infer_frequency(df, n_lags=self.config_model.max_lags, freq=freq)
         df = _handle_missing_data(
@@ -1805,7 +1811,8 @@ def crossvalidation_split_df(
             1	2022-12-10	8.25	data2
             2	2022-12-10	7.55	data3
         """
-        df, received_ID_col, received_single_time_series, _ = df_utils.prep_or_copy_df(df)
+        df = df.copy(deep=True)
+        df, received_ID_col, received_single_time_series, _ = df_utils.check_multiple_series_id(df)
         df = _check_dataframe(self, df, check_y=False, exogenous=False)
         freq = df_utils.infer_frequency(df, n_lags=self.config_model.max_lags, freq=freq)
         df = _handle_missing_data(
@@ -1869,7 +1876,8 @@ def double_crossvalidation_split_df(
             tuple of k tuples [(folds_val, folds_test), …]
                 elements same as :meth:`crossvalidation_split_df` returns
         """
-        df, _, _, _ = df_utils.prep_or_copy_df(df)
+        df = df.copy(deep=True)
+        df, _, _, _ = df_utils.check_multiple_series_id(df)
         df = _check_dataframe(self, df, check_y=False, exogenous=False)
         freq = df_utils.infer_frequency(df, n_lags=self.config_model.max_lags, freq=freq)
         df = _handle_missing_data(
@@ -1915,7 +1923,8 @@ def create_df_with_events(self, df: pd.DataFrame, events_df: pd.DataFrame):
                 "The events configs should be added to the NeuralProphet object (add_events fn)"
                 "before creating the data with events features"
             )
-        df, received_ID_col, received_single_time_series, _ = df_utils.prep_or_copy_df(df)
+        df = df.copy(deep=True)
+        df, received_ID_col, received_single_time_series, _ = df_utils.check_multiple_series_id(df)
         df = _check_dataframe(self, df, check_y=True, exogenous=False)
         df_dict_events = df_utils.create_dict_for_events_or_regressors(df, events_df, "events")
         df_created = pd.DataFrame()
@@ -1991,7 +2000,8 @@ def make_future_dataframe(
             >>> forecast = m.predict(df=future)
 
         """
-        df, received_ID_col, received_single_time_series, _ = df_utils.prep_or_copy_df(df)
+        df = df.copy(deep=True)
+        df, received_ID_col, received_single_time_series, _ = df_utils.check_multiple_series_id(df)
         events_dict = df_utils.create_dict_for_events_or_regressors(df, events_df, "events")
         regressors_dict = df_utils.create_dict_for_events_or_regressors(df, regressors_df, "regressors")
 
@@ -2073,8 +2083,8 @@ def predict_trend(self, df: pd.DataFrame, quantile: float = 0.5):
         """
         if quantile is not None and not (0 < quantile < 1):
             raise ValueError("The quantile specified need to be a float in-between (0,1)")
-
-        df, received_ID_col, received_single_time_series, _ = df_utils.prep_or_copy_df(df)
+        df = df.copy(deep=True)
+        df, received_ID_col, received_single_time_series, _ = df_utils.check_multiple_series_id(df)
         df = _check_dataframe(self, df, check_y=False, exogenous=False)
         df = _normalize(df=df, config_normalization=self.config_normalization)
         df_trend = pd.DataFrame()
@@ -2130,7 +2140,8 @@ def predict_seasonal_components(self, df: pd.DataFrame, quantile: float = 0.5):
         self.config_ar.n_lags = 0
         self.config_model.n_forecasts = 1
 
-        df, received_ID_col, received_single_time_series, _ = df_utils.prep_or_copy_df(df)
+        df = df.copy(deep=True)
+        df, received_ID_col, received_single_time_series, _ = df_utils.check_multiple_series_id(df)
         df = _check_dataframe(self, df, check_y=False, exogenous=False)
         df = _normalize(df=df, config_normalization=self.config_normalization)
         for df_name, df_i in df.groupby("ID"):
@@ -2298,7 +2309,8 @@ def plot(
                 ----
                 None (default): plot self.highlight_forecast_step_n by default
         """
-        fcst, received_ID_col, received_single_time_series, _ = df_utils.prep_or_copy_df(fcst)
+        fcst = fcst.copy(deep=True)
+        fcst, received_ID_col, received_single_time_series, _ = df_utils.check_multiple_series_id(fcst)
         if not received_single_time_series:
             if df_name not in fcst["ID"].unique():
                 assert len(fcst["ID"].unique()) > 1
@@ -2413,7 +2425,8 @@ def get_latest_forecast(
         """
         if self.config_model.max_lags == 0:
             raise ValueError("Use the standard plot function for models without lags.")
-        fcst, received_ID_col, received_single_time_series, _ = df_utils.prep_or_copy_df(fcst)
+        fcst = fcst.copy(deep=True)
+        fcst, received_ID_col, received_single_time_series, _ = df_utils.check_multiple_series_id(fcst)
         if not received_single_time_series:
             if df_name not in fcst["ID"].unique():
                 assert len(fcst["ID"].unique()) > 1
@@ -2489,7 +2502,8 @@ def plot_latest_forecast(
         """
         if self.config_model.max_lags == 0:
             raise ValueError("Use the standard plot function for models without lags.")
-        fcst, received_ID_col, received_single_time_series, _ = df_utils.prep_or_copy_df(fcst)
+        fcst = fcst.copy(deep=True)
+        fcst, received_ID_col, received_single_time_series, _ = df_utils.check_multiple_series_id(fcst)
         if not received_single_time_series:
             if df_name not in fcst["ID"].unique():
                 assert len(fcst["ID"].unique()) > 1
@@ -2625,7 +2639,8 @@ def plot_components(
             matplotlib.axes.Axes
                 plot of NeuralProphet components
         """
-        fcst, received_ID_col, received_single_time_series, _ = df_utils.prep_or_copy_df(fcst)
+        fcst = fcst.copy(deep=True)
+        fcst, received_ID_col, received_single_time_series, _ = df_utils.check_multiple_series_id(fcst)
         if not received_single_time_series:
             if df_name not in fcst["ID"].unique():
                 assert len(fcst["ID"].unique()) > 1
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 1044d63eb..85116040f 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -45,10 +45,10 @@ def __init__(
 
         # Context Notes
         # Currently done to df before it arrives here:
-        # -> fit calls prep_or_copy_df, _check_dataframe, and _handle_missing_data, passes to _train
-        # -> _train calls prep_or_copy_df, then passes to init_train_loader, which returns the train_loader
-        # -> init_train_loader calls prep_or_copy_df, _normalize, _create_dataset (returns TimeDataset), returns dataset wrapped in DataLoader
-        # ->_create_dataset calls prep_or_copy_df, then returns GlobalTimeDataset
+        # -> fit calls copy, check_multiple_series_id, _check_dataframe, and _handle_missing_data, passes to _train
+        # -> _train calls passes to init_train_loader, which returns the train_loader
+        # -> init_train_loader calls _normalize, _create_dataset (returns TimeDataset), returns dataset wrapped in DataLoader
+        # ->_create_dataset returns GlobalTimeDataset
         # Future TODO: integrate some of these preprocessing steps happening outside?
 
         self.df = df.reset_index(drop=True)  # Needed for index based operations in __getitem__
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 8185f5d2e..0abe17bd3 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -55,6 +55,7 @@ def test_train_eval_test():
         learning_rate=LR,
     )
     df = pd.read_csv(PEYTON_FILE, nrows=95)
+    df, _, _, _ = df_utils.check_multiple_series_id(df)
     df, _, _ = df_utils.check_dataframe(df, check_y=False)
     _handle_missing_data(
         df=df,
@@ -78,10 +79,11 @@ def test_train_eval_test():
 def test_df_utils_func():
     log.info("testing: df_utils Test")
     df = pd.read_csv(PEYTON_FILE, nrows=95)
+    # df = df.copy(deep=True)
+    df, _, _, _ = df_utils.check_multiple_series_id(df)
     df, _, _ = df_utils.check_dataframe(df, check_y=False)
 
     # test find_time_threshold
-    df, _, _, _ = df_utils.prep_or_copy_df(df)
     time_threshold = df_utils.find_time_threshold(df, n_lags=2, n_forecasts=2, valid_p=0.2, inputs_overbleed=True)
     df_train, df_val = df_utils.split_considering_timestamp(
         df, n_lags=2, n_forecasts=2, inputs_overbleed=True, threshold_time_stamp=time_threshold
diff --git a/tests/test_regularization.py b/tests/test_regularization.py
index 6d75e0b40..d96464c3b 100644
--- a/tests/test_regularization.py
+++ b/tests/test_regularization.py
@@ -57,6 +57,7 @@ def test_reg_func_abs():
 def test_regularization_holidays():
     log.info("testing: regularization of holidays")
     df = generate_holiday_dataset(y_holidays_override=Y_HOLIDAYS_OVERRIDE)
+    df, _, _, _ = df_utils.check_multiple_series_id(df)
     df, _, _ = df_utils.check_dataframe(df, check_y=False)
 
     m = NeuralProphet(
@@ -92,6 +93,7 @@ def test_regularization_holidays():
 def test_regularization_events():
     log.info("testing: regularization of events")
     df, events = generate_event_dataset(y_events_override=Y_EVENTS_OVERRIDE)
+    df, _, _, id_list = df_utils.check_multiple_series_id(df)
     df, _, _ = df_utils.check_dataframe(df, check_y=False)
 
     m = NeuralProphet(
@@ -147,6 +149,7 @@ def test_regularization_lagged_regressor():
     """
     log.info("testing: regularization lagged regressors")
     df, lagged_regressors = generate_lagged_regressor_dataset(periods=100)
+    df, _, _, id_list = df_utils.check_multiple_series_id(df)
     df, _, _ = df_utils.check_dataframe(df, check_y=False)
 
     m = NeuralProphet(
diff --git a/tests/test_unit.py b/tests/test_unit.py
index a273f0acd..7f42b1495 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -79,9 +79,11 @@ def test_timedataset_minimal():
         config_model.set_max_num_lags(n_lags)
         config_missing = configure.MissingDataHandling()
         # config_train = configure.Train()
+        df_in, _, _, _ = df_utils.check_multiple_series_id(df_in)
         df, df_val = df_utils.split_df(df_in, n_lags, n_forecasts, valid_p)
         # create a tabularized dataset from time series
-        df, _, _, _ = df_utils.prep_or_copy_df(df)
+        # df = df.copy(deep=True)
+        # df, _, _, _ = df_utils.check_multiple_series_id(df)
         df, _, _ = df_utils.check_dataframe(df)
         df = _handle_missing_data(
             df,
@@ -135,10 +137,7 @@ def test_timedataset_minimal():
 def test_normalize():
     length = 100
     days = pd.date_range(start="2017-01-01", periods=length)
-    y = np.ones(length)
-    y[1] = 0
-    y[2] = 2
-    y[3] = 3.3
+    y = np.arange(length)
     df = pd.DataFrame({"ds": days, "y": y})
     m = NeuralProphet(
         epochs=EPOCHS,
@@ -146,7 +145,8 @@ def test_normalize():
         learning_rate=LR,
         normalize="soft",
     )
-    df, _, _, _ = df_utils.prep_or_copy_df(df)
+    df = df.copy(deep=True)
+    df, _, _, _ = df_utils.check_multiple_series_id(df)
     # with config
 
     m.config_normalization.init_data_params(df, m.config_lagged_regressors, m.config_regressors, m.config_events)
@@ -155,11 +155,28 @@ def test_normalize():
     m.config_normalization.unknown_data_normalization = True
     _normalize(df=df, config_normalization=m.config_normalization)
     m.config_normalization.unknown_data_normalization = False
+
     # using config for utils
     df = df.drop("ID", axis=1)
     df_utils.normalize(df, m.config_normalization.global_data_params)
     df_utils.normalize(df, m.config_normalization.local_data_params["__df__"])
 
+
+def test_normalize_utils():
+    length = 100
+    days = pd.date_range(start="2017-01-01", periods=length)
+    y = np.arange(length)
+    df = pd.DataFrame({"ds": days, "y": y})
+    m = NeuralProphet(
+        epochs=EPOCHS,
+        batch_size=BATCH_SIZE,
+        learning_rate=LR,
+        normalize="soft",
+    )
+    df, _, _, _ = df_utils.check_multiple_series_id(df)
+
+    # m.config_normalization.unknown_data_normalization = True
+
     # with utils
     local_data_params, global_data_params = df_utils.init_data_params(
         df=df,
@@ -170,8 +187,10 @@ def test_normalize():
         global_normalization=m.config_normalization.global_normalization,
         global_time_normalization=m.config_normalization.global_time_normalization,
     )
-    df_utils.normalize(df, global_data_params)
-    df_utils.normalize(df, local_data_params["__df__"])
+    log.error(local_data_params)
+    log.error(global_data_params)
+    df_utils.normalize(df.copy(deep=True), global_data_params)
+    df_utils.normalize(df.copy(deep=True), local_data_params["__df__"])
 
 
 def test_add_lagged_regressors():
@@ -250,6 +269,7 @@ def check_split(df_in, df_len_expected, n_lags, n_forecasts, freq, p=0.1):
             n_lags=n_lags,
             n_forecasts=n_forecasts,
         )
+        df_in, _, _, _ = df_utils.check_multiple_series_id(df_in)
         df_in, _, _ = df_utils.check_dataframe(df_in, check_y=False)
         df_in = _handle_missing_data(
             df=df_in,
@@ -297,6 +317,7 @@ def check_split(df_in, df_len_expected, n_lags, n_forecasts, freq, p=0.1):
 
 def test_cv():
     def check_folds(df, n_lags, n_forecasts, valid_fold_num, valid_fold_pct, fold_overlap_pct):
+        df, _, _, _ = df_utils.check_multiple_series_id(df)
         folds = df_utils.crossvalidation_split_df(
             df, n_lags, n_forecasts, valid_fold_num, valid_fold_pct, fold_overlap_pct
         )
@@ -318,8 +339,9 @@ def check_folds(df, n_lags, n_forecasts, valid_fold_num, valid_fold_pct, fold_ov
         assert all([x == y for (x, y) in zip(train_folds_samples, train_folds_should)])
 
     len_df = 100
+    df = pd.DataFrame({"ds": pd.date_range(start="2017-01-01", periods=len_df), "y": np.arange(len_df)})
     check_folds(
-        df=pd.DataFrame({"ds": pd.date_range(start="2017-01-01", periods=len_df), "y": np.arange(len_df)}),
+        df=df,
         n_lags=0,
         n_forecasts=1,
         valid_fold_num=3,
@@ -327,8 +349,9 @@ def check_folds(df, n_lags, n_forecasts, valid_fold_num, valid_fold_pct, fold_ov
         fold_overlap_pct=0.0,
     )
     len_df = 1000
+    df = pd.DataFrame({"ds": pd.date_range(start="2017-01-01", periods=len_df), "y": np.arange(len_df)})
     check_folds(
-        df=pd.DataFrame({"ds": pd.date_range(start="2017-01-01", periods=len_df), "y": np.arange(len_df)}),
+        df=df,
         n_lags=50,
         n_forecasts=10,
         valid_fold_num=10,
@@ -342,6 +365,7 @@ def check_folds_dict(
         df, n_lags, n_forecasts, valid_fold_num, valid_fold_pct, fold_overlap_pct, global_model_cv_type="local"
     ):
         "Does not work with global_model_cv_type == global-time or global_model_cv_type is None"
+        df, _, _, _ = df_utils.check_multiple_series_id(df)
         folds = df_utils.crossvalidation_split_df(
             df,
             n_lags,
@@ -502,8 +526,9 @@ def test_reg_delay():
 
 def test_double_crossvalidation():
     len_df = 100
+    df = pd.DataFrame({"ds": pd.date_range(start="2017-01-01", periods=len_df), "y": np.arange(len_df), "ID": "__df__"})
     folds_val, folds_test = df_utils.double_crossvalidation_split_df(
-        df=pd.DataFrame({"ds": pd.date_range(start="2017-01-01", periods=len_df), "y": np.arange(len_df)}),
+        df=df,
         n_lags=0,
         n_forecasts=1,
         k=3,
@@ -531,8 +556,10 @@ def test_double_crossvalidation():
         learning_rate=LR,
         n_lags=2,
     )
+    len_df = 100
+    df = pd.DataFrame({"ds": pd.date_range(start="2017-01-01", periods=len_df), "y": np.arange(len_df), "ID": "__df__"})
     folds_val, folds_test = m.double_crossvalidation_split_df(
-        df=pd.DataFrame({"ds": pd.date_range(start="2017-01-01", periods=len_df), "y": np.arange(len_df)}),
+        df=df,
         k=3,
         valid_pct=0.3,
         test_pct=0.15,
@@ -554,7 +581,10 @@ def test_double_crossvalidation():
 
     # Raise not implemented error as double_crossvalidation is not compatible with many time series
     with pytest.raises(NotImplementedError):
-        df = pd.DataFrame({"ds": pd.date_range(start="2017-01-01", periods=len_df), "y": np.arange(len_df)})
+        len_df = 100
+        df = pd.DataFrame(
+            {"ds": pd.date_range(start="2017-01-01", periods=len_df), "y": np.arange(len_df), "ID": "__df__"}
+        )
         df1 = df.copy(deep=True)
         df1["ID"] = "df1"
         df2 = df.copy(deep=True)
@@ -891,6 +921,7 @@ def test_too_many_NaN():
         limit_linear=config_missing.impute_linear,
         rolling=config_missing.impute_rolling,
     )
+    df, _, _, id_list = df_utils.check_multiple_series_id(df)
     df, _, _ = df_utils.check_dataframe(df)
     local_data_params, global_data_params = df_utils.init_data_params(df=df, normalize="minmax")
     df = df.drop("ID", axis=1)
diff --git a/tests/utils/benchmark_time_dataset.py b/tests/utils/benchmark_time_dataset.py
index af2dda090..f20f30e7e 100644
--- a/tests/utils/benchmark_time_dataset.py
+++ b/tests/utils/benchmark_time_dataset.py
@@ -70,8 +70,8 @@ def load(nrows=NROWS, epochs=EPOCHS, batch=BATCH_SIZE, season=True, iterations=1
     )
 
     # Mimick m.fit(df) behavior
-
-    df, _, _, m.id_list = df_utils.prep_or_copy_df(df)
+    df = df.copy(deep=True)
+    df, _, _, m.id_list = df_utils.check_multiple_series_id(df)
     df = _check_dataframe(m, df, check_y=True, exogenous=True)
     m.data_freq = df_utils.infer_frequency(df, n_lags=m.config_model.max_lags, freq=freq)
     df = _handle_missing_data(