import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.arima.model import ARIMA
from datetime import datetime, timedelta
register_matplotlib_converters()
5: Generate Some Data#
\(y_t = 50 + 0.4\varepsilon_{t-1} + 0.3\varepsilon_{t-2} + \varepsilon_t\)#
\(\varepsilon_t \sim N(0,1)\)#
errors = np.random.normal(0, 1, 400)
date_index = pd.date_range(start='9/1/2019', end='1/1/2020')
mu = 50
series = []
for t in range(1,len(date_index)+1):
series.append(mu + 0.4*errors[t-1] + 0.3*errors[t-2] + errors[t])
series = pd.Series(series, date_index)
series = series.asfreq(pd.infer_freq(series.index))
plt.figure(figsize=(10,4))
plt.plot(series)
plt.axhline(mu, linestyle='--', color='grey')
<matplotlib.lines.Line2D at 0x7f420c12d400>
def calc_corr(series, lag):
return pearsonr(series[:-lag], series[lag:])[0]
ACF#
acf_vals = acf(series)
num_lags = 10
plt.bar(range(num_lags), acf_vals[:num_lags])
<BarContainer object of 10 artists>
PACF#
pacf_vals = pacf(series)
num_lags = 20
plt.bar(range(num_lags), pacf_vals[:num_lags])
<BarContainer object of 20 artists>
Get training and testing sets#
train_end = datetime(2019,12,30)
test_end = datetime(2020,1,1)
train_data = series[:train_end]
test_data = series[train_end + timedelta(days=1):test_end]
Fit ARIMA Model#
ARIMA?
#create the model
model = ARIMA(train_data, order=(0,0,2))
#fit the model
model_fit = model.fit()
#summary of the model
print(model_fit.summary())
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 121
Model: ARIMA(0, 0, 2) Log Likelihood -177.787
Date: Wed, 22 Mar 2023 AIC 363.575
Time: 12:58:21 BIC 374.758
Sample: 09-01-2019 HQIC 368.117
- 12-30-2019
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 49.8920 0.171 292.359 0.000 49.557 50.226
ma.L1 0.3305 0.094 3.498 0.000 0.145 0.516
ma.L2 0.3725 0.088 4.220 0.000 0.199 0.546
sigma2 1.1027 0.143 7.704 0.000 0.822 1.383
===================================================================================
Ljung-Box (L1) (Q): 0.94 Jarque-Bera (JB): 4.17
Prob(Q): 0.33 Prob(JB): 0.12
Heteroskedasticity (H): 1.77 Skew: -0.44
Prob(H) (two-sided): 0.08 Kurtosis: 3.20
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
Predicted Model:#
\(\hat{y}_t = 50 + 0.37\varepsilon_{t-1} + 0.25\varepsilon_{t-2}\)#
#get prediction start and end dates
pred_start_date = test_data.index[0]
pred_end_date = test_data.index[-1]
#get the predictions and residuals
predictions = model_fit.predict(start=pred_start_date, end=pred_end_date)
residuals = test_data - predictions
plt.figure(figsize=(10,4))
plt.plot(series[-14:])
plt.plot(predictions)
plt.legend(('Data', 'Predictions'), fontsize=16)
<matplotlib.legend.Legend at 0x7f4205f2c580>
print('Mean Absolute Percent Error:', round(np.mean(abs(residuals/test_data)),4))
Mean Absolute Percent Error: 0.004
print('Root Mean Squared Error:', np.sqrt(np.mean(residuals**2)))
Root Mean Squared Error: 0.23016330452025244