import warnings
from copy import deepcopy
from datetime import datetime
from typing import Dict
from typing import Iterable
from typing import List
from typing import Optional
from typing import Sequence
from typing import Set
from typing import Union
import pandas as pd
from etna import SETTINGS
from etna.distributions import BaseDistribution
from etna.distributions import CategoricalDistribution
from etna.distributions import FloatDistribution
from etna.models.base import BaseAdapter
from etna.models.base import PredictionIntervalContextIgnorantAbstractModel
from etna.models.mixins import PerSegmentModelMixin
from etna.models.mixins import PredictionIntervalContextIgnorantModelMixin
if SETTINGS.prophet_required:
from prophet import Prophet
from prophet.serialize import model_from_dict
from prophet.serialize import model_to_dict
[docs]class _ProphetAdapter(BaseAdapter):
"""Class for holding Prophet model."""
predefined_regressors_names = ("floor", "cap")
def __init__(
self,
growth: str = "linear",
changepoints: Optional[List[datetime]] = None,
n_changepoints: int = 25,
changepoint_range: float = 0.8,
yearly_seasonality: Union[str, bool] = "auto",
weekly_seasonality: Union[str, bool] = "auto",
daily_seasonality: Union[str, bool] = "auto",
holidays: Optional[pd.DataFrame] = None,
seasonality_mode: str = "additive",
seasonality_prior_scale: float = 10.0,
holidays_prior_scale: float = 10.0,
changepoint_prior_scale: float = 0.05,
mcmc_samples: int = 0,
interval_width: float = 0.8,
uncertainty_samples: Union[int, bool] = 1000,
stan_backend: Optional[str] = None,
additional_seasonality_params: Iterable[Dict[str, Union[str, float, int]]] = (),
):
self.growth = growth
self.n_changepoints = n_changepoints
self.changepoints = changepoints
self.changepoint_range = changepoint_range
self.yearly_seasonality = yearly_seasonality
self.weekly_seasonality = weekly_seasonality
self.daily_seasonality = daily_seasonality
self.holidays = holidays
self.seasonality_mode = seasonality_mode
self.seasonality_prior_scale = seasonality_prior_scale
self.holidays_prior_scale = holidays_prior_scale
self.changepoint_prior_scale = changepoint_prior_scale
self.mcmc_samples = mcmc_samples
self.interval_width = interval_width
self.uncertainty_samples = uncertainty_samples
self.stan_backend = stan_backend
self.additional_seasonality_params = additional_seasonality_params
self.model = self._create_model()
self.regressor_columns: Optional[List[str]] = None
def _create_model(self) -> "Prophet":
model = Prophet(
growth=self.growth,
changepoints=self.changepoints,
n_changepoints=self.n_changepoints,
changepoint_range=self.changepoint_range,
yearly_seasonality=self.yearly_seasonality,
weekly_seasonality=self.weekly_seasonality,
daily_seasonality=self.daily_seasonality,
holidays=self.holidays,
seasonality_mode=self.seasonality_mode,
seasonality_prior_scale=self.seasonality_prior_scale,
holidays_prior_scale=self.holidays_prior_scale,
changepoint_prior_scale=self.changepoint_prior_scale,
mcmc_samples=self.mcmc_samples,
interval_width=self.interval_width,
uncertainty_samples=self.uncertainty_samples,
stan_backend=self.stan_backend,
)
for seasonality_params in self.additional_seasonality_params:
model.add_seasonality(**seasonality_params)
return model
def _check_not_used_columns(self, df: pd.DataFrame):
if self.regressor_columns is None:
raise ValueError("Something went wrong, regressor_columns is None!")
columns_not_used = [col for col in df.columns if col not in ["target", "timestamp"] + self.regressor_columns]
if columns_not_used:
warnings.warn(
message=f"This model doesn't work with exogenous features unknown in future. "
f"Columns {columns_not_used} won't be used."
)
def _select_regressors(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
"""Select data with regressors.
During fit there can't be regressors with NaNs, they are removed at higher level.
Look at the issue: https://github.com/tinkoff-ai/etna/issues/557
During prediction without validation NaNs in regressors lead to exception from the underlying model.
This model requires data to be in numeric dtype.
"""
if self.regressor_columns is None:
raise ValueError("Something went wrong, regressor_columns is None!")
regressors_with_nans = [regressor for regressor in self.regressor_columns if df[regressor].isna().sum() > 0]
if regressors_with_nans:
raise ValueError(
f"Regressors {regressors_with_nans} contain NaN values. "
"Try to lower horizon value, or drop these regressors."
)
if self.regressor_columns:
try:
result = df[self.regressor_columns].apply(pd.to_numeric)
except ValueError as e:
raise ValueError(f"Only convertible to numeric features are allowed! Error: {str(e)}")
else:
result = None
return result
[docs] def fit(self, df: pd.DataFrame, regressors: List[str]) -> "_ProphetAdapter":
"""
Fits a Prophet model.
Parameters
----------
df:
Features dataframe
regressors:
List of the columns with regressors
"""
self.regressor_columns = regressors
self._check_not_used_columns(df)
prophet_df = self._prepare_prophet_df(df=df)
for regressor in self.regressor_columns:
if regressor not in self.predefined_regressors_names:
self.model.add_regressor(regressor)
self.model.fit(prophet_df)
return self
[docs] def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Sequence[float]) -> pd.DataFrame:
"""
Compute predictions from a Prophet model.
Parameters
----------
df:
Features dataframe
prediction_interval:
If True returns prediction interval for forecast
quantiles:
Levels of prediction distribution
Returns
-------
:
DataFrame with predictions
"""
prophet_df = self._prepare_prophet_df(df=df)
forecast = self.model.predict(prophet_df)
y_pred = pd.DataFrame(forecast["yhat"])
if prediction_interval:
sim_values = self.model.predictive_samples(prophet_df)
for quantile in quantiles:
percentile = quantile * 100
y_pred[f"yhat_{quantile:.4g}"] = self.model.percentile(sim_values["yhat"], percentile, axis=1)
rename_dict = {
column: column.replace("yhat", "target") for column in y_pred.columns if column.startswith("yhat")
}
y_pred = y_pred.rename(rename_dict, axis=1)
return y_pred
def _prepare_prophet_df(self, df: pd.DataFrame) -> pd.DataFrame:
"""Prepare dataframe for fit and predict."""
if self.regressor_columns is None:
raise ValueError("List of regressor is not set!")
df = df.reset_index()
prophet_df = pd.DataFrame()
prophet_df["y"] = df["target"]
prophet_df["ds"] = df["timestamp"]
regressors_data = self._select_regressors(df)
if regressors_data is not None:
prophet_df[self.regressor_columns] = regressors_data[self.regressor_columns]
return prophet_df
@staticmethod
def _filter_aggregated_components(components: Iterable[str]) -> Set[str]:
"""Filter out aggregated components."""
# aggregation of corresponding model terms, e.g. sum
aggregated_components = {
"additive_terms",
"multiplicative_terms",
"extra_regressors_additive",
"extra_regressors_multiplicative",
}
return set(components) - aggregated_components
def _check_mul_components(self):
"""Raise error if model contains multiplicative components."""
components_modes = self.model.component_modes
if components_modes is None:
raise ValueError("This model is not fitted!")
mul_components = self._filter_aggregated_components(self.model.component_modes["multiplicative"])
if len(mul_components) > 0:
raise ValueError("Forecast decomposition is only supported for additive components!")
def _predict_seasonal_components(self, df: pd.DataFrame) -> pd.DataFrame:
"""Estimate seasonal, holidays and exogenous components."""
model = self.model
seasonal_features, _, component_cols, _ = model.make_all_seasonality_features(df)
holiday_names = set(model.train_holiday_names) if model.train_holiday_names is not None else set()
components_names = list(
filter(lambda v: v not in holiday_names, self._filter_aggregated_components(component_cols.columns))
)
beta_c = model.params["beta"].T * component_cols[components_names].values
comp = seasonal_features.values @ beta_c
# apply rescaling for additive components
comp *= model.y_scale
return pd.DataFrame(data=comp, columns=components_names)
[docs] def predict_components(self, df: pd.DataFrame) -> pd.DataFrame:
"""Estimate prediction components.
Parameters
----------
df:
features dataframe
Returns
-------
:
dataframe with prediction components
"""
self._check_mul_components()
prophet_df = self._prepare_prophet_df(df=df)
prophet_df = self.model.setup_dataframe(prophet_df)
components = self._predict_seasonal_components(df=prophet_df)
components["trend"] = self.model.predict_trend(df=prophet_df)
return components.add_prefix("target_component_")
[docs] def get_model(self) -> Prophet:
"""Get internal prophet.Prophet model that is used inside etna class.
Returns
-------
result:
Internal model
"""
return self.model
def __getstate__(self):
state = self.__dict__.copy()
try:
model_dict = model_to_dict(self.model)
is_fitted = True
except ValueError:
is_fitted = False
model_dict = {}
del state["model"]
state["_is_fitted"] = is_fitted
state["_model_dict"] = model_dict
return state
def __setstate__(self, state):
local_state = deepcopy(state)
is_fitted = local_state["_is_fitted"]
model_dict = local_state["_model_dict"]
del local_state["_is_fitted"]
del local_state["_model_dict"]
self.__dict__.update(local_state)
if is_fitted:
self.model = model_from_dict(model_dict)
else:
self.model = self._create_model()
[docs]class ProphetModel(
PerSegmentModelMixin, PredictionIntervalContextIgnorantModelMixin, PredictionIntervalContextIgnorantAbstractModel
):
"""Class for holding Prophet model.
Notes
-----
Original Prophet can use features 'cap' and 'floor',
they should be added to the known_future list on dataset initialization.
This model supports in-sample and out-of-sample forecast decomposition. The number
of components in the decomposition depends on model parameters. Main components are:
trend, seasonality, holiday and exogenous effects. Seasonal components will be decomposed
down to individual periods if fitted. Holiday and exogenous will be present in decomposition
if fitted.Corresponding components are obtained directly from the model.
Examples
--------
>>> from etna.datasets import generate_periodic_df
>>> from etna.datasets import TSDataset
>>> from etna.models import ProphetModel
>>> classic_df = generate_periodic_df(
... periods=100,
... start_time="2020-01-01",
... n_segments=4,
... period=7,
... sigma=3
... )
>>> df = TSDataset.to_dataset(df=classic_df)
>>> ts = TSDataset(df, freq="D")
>>> future = ts.make_future(7)
>>> model = ProphetModel(growth="flat")
>>> model.fit(ts=ts)
ProphetModel(growth = 'flat', changepoints = None, n_changepoints = 25,
changepoint_range = 0.8, yearly_seasonality = 'auto', weekly_seasonality = 'auto',
daily_seasonality = 'auto', holidays = None, seasonality_mode = 'additive',
seasonality_prior_scale = 10.0, holidays_prior_scale = 10.0, changepoint_prior_scale = 0.05,
mcmc_samples = 0, interval_width = 0.8, uncertainty_samples = 1000, stan_backend = None,
additional_seasonality_params = (), )
>>> forecast = model.forecast(future)
>>> forecast
segment segment_0 segment_1 segment_2 segment_3
feature target target target target
timestamp
2020-04-10 9.00 9.00 4.00 6.00
2020-04-11 5.00 2.00 7.00 9.00
2020-04-12 0.00 4.00 7.00 9.00
2020-04-13 0.00 5.00 9.00 7.00
2020-04-14 1.00 2.00 1.00 6.00
2020-04-15 5.00 7.00 4.00 7.00
2020-04-16 8.00 6.00 2.00 0.00
"""
def __init__(
self,
growth: str = "linear",
changepoints: Optional[List[datetime]] = None,
n_changepoints: int = 25,
changepoint_range: float = 0.8,
yearly_seasonality: Union[str, bool] = "auto",
weekly_seasonality: Union[str, bool] = "auto",
daily_seasonality: Union[str, bool] = "auto",
holidays: Optional[pd.DataFrame] = None,
seasonality_mode: str = "additive",
seasonality_prior_scale: float = 10.0,
holidays_prior_scale: float = 10.0,
changepoint_prior_scale: float = 0.05,
mcmc_samples: int = 0,
interval_width: float = 0.8,
uncertainty_samples: Union[int, bool] = 1000,
stan_backend: Optional[str] = None,
additional_seasonality_params: Iterable[Dict[str, Union[str, float, int]]] = (),
):
"""
Create instance of Prophet model.
Parameters
----------
growth:
Options are ‘linear’ and ‘logistic’. This likely will not be tuned;
if there is a known saturating point and growth towards that point
it will be included and the logistic trend will be used, otherwise
it will be linear.
changepoints:
List of dates at which to include potential changepoints. If
not specified, potential changepoints are selected automatically.
n_changepoints:
Number of potential changepoints to include. Not used
if input ``changepoints`` is supplied. If ``changepoints`` is not supplied,
then ``n_changepoints`` potential changepoints are selected uniformly from
the first ``changepoint_range`` proportion of the history.
changepoint_range:
Proportion of history in which trend changepoints will
be estimated. Defaults to 0.8 for the first 80%. Not used if
``changepoints`` is specified.
yearly_seasonality:
By default (‘auto’) this will turn yearly seasonality on if there is
a year of data, and off otherwise. Options are [‘auto’, True, False].
If there is more than a year of data, rather than trying to turn this
off during HPO, it will likely be more effective to leave it on and
turn down seasonal effects by tuning ``seasonality_prior_scale``.
weekly_seasonality:
Same as for ``yearly_seasonality``.
daily_seasonality:
Same as for ``yearly_seasonality``.
holidays:
``pd.DataFrame`` with columns holiday (string) and ds (date type)
and optionally columns lower_window and upper_window which specify a
range of days around the date to be included as holidays.
``lower_window=-2`` will include 2 days prior to the date as holidays. Also
optionally can have a column ``prior_scale`` specifying the prior scale for
that holiday.
seasonality_mode:
'additive' (default) or 'multiplicative'.
seasonality_prior_scale:
Parameter modulating the strength of the
seasonality model. Larger values allow the model to fit larger seasonal
fluctuations, smaller values dampen the seasonality. Can be specified
for individual seasonalities using ``add_seasonality``.
holidays_prior_scale:
Parameter modulating the strength of the holiday components model, unless overridden
in the holidays input.
changepoint_prior_scale:
Parameter modulating the flexibility of the
automatic changepoint selection. Large values will allow many
changepoints, small values will allow few changepoints.
mcmc_samples:
Integer, if greater than 0, will do full Bayesian inference
with the specified number of MCMC samples. If 0, will do MAP
estimation.
interval_width:
Float, width of the uncertainty intervals provided
for the forecast. If ``mcmc_samples=0``, this will be only the uncertainty
in the trend using the MAP estimate of the extrapolated generative
model. If ``mcmc.samples>0``, this will be integrated over all model
parameters, which will include uncertainty in seasonality.
uncertainty_samples:
Number of simulated draws used to estimate
uncertainty intervals. Settings this value to 0 or False will disable
uncertainty estimation and speed up the calculation.
stan_backend:
as defined in StanBackendEnum default: None - will try to
iterate over all available backends and find the working one
additional_seasonality_params: Iterable[Dict[str, Union[int, float, str]]]
parameters that describe additional (not 'daily', 'weekly', 'yearly') seasonality that should be
added to model; dict with required keys 'name', 'period', 'fourier_order' and optional ones 'prior_scale',
'mode', 'condition_name' will be used for :py:meth:`prophet.Prophet.add_seasonality` method call.
"""
self.growth = growth
self.n_changepoints = n_changepoints
self.changepoints = changepoints
self.changepoint_range = changepoint_range
self.yearly_seasonality = yearly_seasonality
self.weekly_seasonality = weekly_seasonality
self.daily_seasonality = daily_seasonality
self.holidays = holidays
self.seasonality_mode = seasonality_mode
self.seasonality_prior_scale = seasonality_prior_scale
self.holidays_prior_scale = holidays_prior_scale
self.changepoint_prior_scale = changepoint_prior_scale
self.mcmc_samples = mcmc_samples
self.interval_width = interval_width
self.uncertainty_samples = uncertainty_samples
self.stan_backend = stan_backend
self.additional_seasonality_params = additional_seasonality_params
super(ProphetModel, self).__init__(
base_model=_ProphetAdapter(
growth=self.growth,
n_changepoints=self.n_changepoints,
changepoints=self.changepoints,
changepoint_range=self.changepoint_range,
yearly_seasonality=self.yearly_seasonality,
weekly_seasonality=self.weekly_seasonality,
daily_seasonality=self.daily_seasonality,
holidays=self.holidays,
seasonality_mode=self.seasonality_mode,
seasonality_prior_scale=self.seasonality_prior_scale,
holidays_prior_scale=self.holidays_prior_scale,
changepoint_prior_scale=self.changepoint_prior_scale,
mcmc_samples=self.mcmc_samples,
interval_width=self.interval_width,
uncertainty_samples=self.uncertainty_samples,
stan_backend=self.stan_backend,
additional_seasonality_params=self.additional_seasonality_params,
)
)
[docs] def params_to_tune(self) -> Dict[str, BaseDistribution]:
"""Get default grid for tuning hyperparameters.
This grid tunes parameters: ``seasonality_mode``, ``seasonality_prior_scale``, ``changepoint_prior_scale``,
``changepoint_range``, ``holidays_prior_scale``.
Other parameters are expected to be set by the user.
Returns
-------
:
Grid to tune.
"""
return {
"seasonality_mode": CategoricalDistribution(["additive", "multiplicative"]),
"seasonality_prior_scale": FloatDistribution(low=1e-2, high=10, log=True),
"changepoint_prior_scale": FloatDistribution(low=1e-3, high=0.5, log=True),
"changepoint_range": FloatDistribution(low=0.8, high=0.95),
"holidays_prior_scale": FloatDistribution(low=1e-2, high=10, log=True),
}