Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[minor] Support data without datestamps #1188

Closed
wants to merge 30 commits into from
Closed
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
e0e8966
Start
SimonWittner Mar 2, 2023
5061930
preliminary
SimonWittner Mar 14, 2023
ccd090d
helper function to create random datestamps for equidistant data
SimonWittner Mar 14, 2023
6bab826
helper function for equidistant data without datestamps
SimonWittner Mar 14, 2023
7f4b3d0
fixed blacked & isort
SimonWittner Mar 14, 2023
22daacc
added Pytest
SimonWittner Mar 15, 2023
fcb6dac
Merge branch 'ourownstory:main' into equidistant_data
SimonWittner Mar 16, 2023
ebb1ae8
Merge branch 'ourownstory:main' into equidistant_data
SimonWittner Mar 18, 2023
9b5e6c8
Merge remote-tracking branch 'origin' into equidistant_data
SimonWittner Apr 25, 2023
84b852f
changes Oskar
SimonWittner Apr 25, 2023
7c9af08
black
SimonWittner Apr 25, 2023
99227a9
adjusted
SimonWittner Apr 25, 2023
b5021eb
changed encoder
SimonWittner Apr 26, 2023
44602d4
Merge branch 'main' into equidistant_data
SimonWittner Apr 26, 2023
72e3a7f
Merge branch 'main' into equidistant_data
ourownstory Apr 26, 2023
e7b1739
changed name back to IDs
leoniewgnr Apr 27, 2023
8ccbc1b
Merge branch 'main' into equidistant_data
leoniewgnr Apr 27, 2023
def74dd
Merge remote-tracking branch 'origin/main' into equidistant_data
SimonWittner Apr 29, 2023
6c30c39
adjustment model passing
SimonWittner Apr 29, 2023
7423206
Merge remote-tracking branch 'origin/main' into equidistant_data
SimonWittner May 4, 2023
01018c5
WIP - optimizing handling of model during checking of df
SimonWittner May 6, 2023
a338c39
updated process.py
SimonWittner May 6, 2023
5c0db1f
improved model handling
SimonWittner May 6, 2023
11c6fa3
improved model handling_v2
SimonWittner May 7, 2023
467fee9
improved model handling_v3
SimonWittner May 7, 2023
773dc97
changed location of dummy_ds function call
SimonWittner May 9, 2023
6a0b437
changed location of dummy_ds function call_v2
SimonWittner May 11, 2023
517d278
Merge branch 'main' into equidistant_data
ourownstory May 12, 2023
509c618
Merge branch 'main' into equidistant_data
ourownstory Jun 2, 2023
82ba128
Merge branch 'main' into equidistant_data
ourownstory Jun 16, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 33 additions & 3 deletions neuralprophet/df_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
if TYPE_CHECKING:
from neuralprophet.configure import ConfigEvents, ConfigLaggedRegressors, ConfigSeasonality


log = logging.getLogger("NP.df_utils")


Expand All @@ -37,11 +36,15 @@ def prep_or_copy_df(df: pd.DataFrame) -> tuple[pd.DataFrame, bool, bool, list[st
bool
wheter it is a single time series
list
list of IDs
list of IDsgithubn
SimonWittner marked this conversation as resolved.
Show resolved Hide resolved
"""
if not isinstance(df, pd.DataFrame):
raise ValueError("Provided DataFrame (df) must be of pd.DataFrame type.")

if "ds" not in df and "y" in df:
df = create_random_datestamps(df)
log.info(
"Dataframe has no column 'ds' - random equidistant datestamps added. Consider calling 'df_utils.create_random_datestamps' to adjust ds."
)
# Create a copy of the dataframe
df_copy = df.copy(deep=True)

Expand Down Expand Up @@ -502,6 +505,33 @@ def check_single_dataframe(df, check_y, covariates, regressors, events, seasonal
return df


def create_random_datestamps(
df, freq="D", startyear=1970, startmonth=1, startday=1, starthour=0, startminute=0, startsecond=0
):
"""
Helper function to create a random series of datestamps for equidistant data without ds.

Parameters
----------
df : pd.DataFrame
without column ``ds``
freq : str
Frequency of data recording, any valid frequency for pd.date_range, such as ``D`` or ``M``
startyear, startmonth, startday, starthour, startminute, startsecond : int
Defines the first datestamp
Returns
-------
pd.DataFrame or dict
dataframe with random equidistant datestamps
"""
startdate = pd.Timestamp(
year=startyear, month=startmonth, day=startday, hour=starthour, minute=startminute, second=startsecond
)
ds = pd.date_range(startdate, periods=len(df), freq=freq).to_frame(index=False, name="ds").astype(str)
df_random_ds = pd.concat([ds, df], axis=1)
return df_random_ds


def check_dataframe(
df: pd.DataFrame, check_y: bool = True, covariates=None, regressors=None, events=None, seasonalities=None
) -> Tuple[pd.DataFrame, List]:
Expand Down
11 changes: 11 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,17 @@
PLOT = False


def test_create_random_datestamps():
df = pd.read_csv(PEYTON_FILE)
df_drop = df.drop("ds", axis=1)

m = NeuralProphet(quantiles=[0.02, 0.98], epochs=10)
_ = m.fit(df_drop, freq="D")

future = m.make_future_dataframe(df_drop, periods=365, n_historic_predictions=True)
forecast = m.predict(future)


def test_save_load():
df = pd.read_csv(PEYTON_FILE, nrows=NROWS)
m = NeuralProphet(
Expand Down