from arch.univariate import arch_model
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import pandas as pd
import numpy as np
from random import gauss, seed
seed(10)

import matplotlib.pyplot as plt
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

02- Volatility prediction: Simulated Data#

\[a_t = \varepsilon_t \sigma_t \]
\[\sigma_t^2 = \omega + \alpha_1 a_{t-1}^2 + \alpha_2 a_{t-2}^2 + \beta_1 \sigma_{t-1}^2 + \beta_2 \sigma_{t-2}^2 \]
\[ a_0, a_1 \sim \mathcal{N}(0,1) \]
\[ \sigma_0 =1, \sigma_1 = 1 \]
\[ \varepsilon_t \sim \mathcal{N}(0,1) \]

Simulate the time series#

# create dataset
n = 1000
omega = 0.5

alpha_1 = 0.1
alpha_2 = 0.2

beta_1 = 0.3
beta_2 = 0.4

test_size = int(n*0.1)

series = [gauss(0,1), gauss(0,1)]
vols = [1, 1]

for _ in range(n):
    new_vol = np.sqrt(omega + alpha_1*series[-1]**2 + alpha_2*series[-2]**2 + beta_1*vols[-1]**2 + beta_2*vols[-2]**2)
    new_val = gauss(0,1) * new_vol
    
    vols.append(new_vol)
    series.append(new_val)
plt.figure(figsize=(10,4))
plt.plot(series, label='Series')
plt.plot(vols, color='red', label='Volatility')
plt.axhline(0, ls='--', alpha=0.3, color='k')
plt.legend(ncol=2, loc='best')
plt.title('Simulated GARCH(2,2) Data', fontsize=20)
plt.show()
../../../_images/af4eac3366adfc4e813e34f2f8db7f44fc782d7c34533810bf42152ed83942d1.png

PACF and ACF Plots#

Exercise 1:

  • Plot the PACF of the innovation

  • Plot the PACF of squared innovation

  • What are the two properties you are seeing in the graphs

Exercise 2:

  • Do the same for the ACF

Fit and Test models#

Separate the training set from the testing set#

train, test = series[:-test_size], series[-test_size:]

Fit the best GARCH Model= GARCH (2,2)#

model = arch_model(train, mean='Zero', p=2, o=0, q=2)
model_fit = model.fit(disp='off')
constant_params = model_fit.params
print(model_fit.summary())
                       Zero Mean - GARCH Model Results                        
==============================================================================
Dep. Variable:                      y   R-squared:                       0.000
Mean Model:                 Zero Mean   Adj. R-squared:                  0.001
Vol Model:                      GARCH   Log-Likelihood:               -2833.59
Distribution:                  Normal   AIC:                           5677.18
Method:            Maximum Likelihood   BIC:                           5701.21
                                        No. Observations:                  902
Date:                Tue, Apr 18 2023   Df Residuals:                      902
Time:                        14:12:41   Df Model:                            0
                             Volatility Model                             
==========================================================================
                 coef    std err          t      P>|t|    95.0% Conf. Int.
--------------------------------------------------------------------------
omega          0.5187      0.216      2.396  1.658e-02 [9.437e-02,  0.943]
alpha[1]       0.1373  4.413e-02      3.111  1.863e-03 [5.081e-02,  0.224]
alpha[2]       0.1961  3.813e-02      5.142  2.713e-07   [  0.121,  0.271]
beta[1]    2.7887e-14      0.171  1.635e-13      1.000   [ -0.334,  0.334]
beta[2]        0.6666      0.134      4.981  6.335e-07   [  0.404,  0.929]
==========================================================================

Covariance estimator: robust

fitted_paramters = constant_params.rename(‘Fitted Parameters’).to_frame() fitted_paramters[‘true_params’] = [0.5, 0.1, 0.2, 0.3, 0.4] fitted_paramters

Even when we know the data generating process it is difficult to get back the exact parameters of ARCH ARCH estimation is based on maximum likelihood. The more there are parameters to estimate, the more difficult it becomes

Predict#

test_size
100
predictions = model_fit.forecast(horizon=test_size)

Predict the mean ?#

plt.figure(figsize=(10,4))
#true, = plt.plot(series[-test_size:], label='True Series')
true, = plt.plot(series[-test_size:], label='True Series')
preds, = plt.plot(np.sqrt(predictions.mean.values[-1, :]), label='Prediction')

plt.legend(loc='best')
plt.title('Series Prediction', fontsize=20)
plt.show()
../../../_images/61742112d9404d939497287a69867861f0be9f7eaf8b0fcb8ce4d227c7741024.png
plt.figure(figsize=(10,4))
true, = plt.plot(vols[-test_size:], label='True Volatility')
preds, = plt.plot(np.sqrt(predictions.variance.values[-1, :]))

plt.legend(loc='best')
plt.title('Volatility Prediction', fontsize=20)
plt.show()
../../../_images/30712a9f42ea28dbcecb894696204fc03b5361f305c0ced6e49a1b334aa157da.png
predictions_long_term = model_fit.forecast(horizon=1000)
plt.figure(figsize=(10,4))
true, = plt.plot(vols[-test_size:], label='True Volatility')
preds, = plt.plot(np.sqrt(predictions_long_term.variance.values[-1, :]), label='Predicted Volatility')
plt.legend(ncol=2)
plt.title('Long Term Volatility Prediction', fontsize=20)
plt.show()
../../../_images/a1ccda2db74bac9a87f3e8fad0e48610d099ae1068e242827f82b99031c3d296.png

Rolling Forecast Origin#

rolling_predictions = []
params = {}
for i in tqdm(range(test_size)):
    train = series[:-(test_size-i)]
    model = arch_model(train, mean='Zero', p=2, q=2)
    model_fit = model.fit(disp='off')
    params[i] = model_fit.params
    pred = model_fit.forecast(horizon=1)
    rolling_predictions.append(np.sqrt(pred.variance.values[-1,:][0]))
params = pd.concat(params.values(), keys=params.keys(), axis=1).T
100%|██████████| 100/100 [00:03<00:00, 26.55it/s]
plt.figure(figsize=(10,4))
true, = plt.plot(vols[-test_size:], label = 'True Volatility')
preds, = plt.plot(rolling_predictions, label = 'Predicted Volatility')
plt.legend(loc='best')
plt.title('Volatility Prediction - Rolling Forecast', fontsize=20)
plt.show()
../../../_images/a39454e9879f2740cc5e72a9b516487c353ab4fbe8c31681829bdca8803b6c69.png
fig, ax1 = plt.subplots(figsize=(10, 4))

# plot the first plot on the left axis
ax1.plot(vols[-test_size:], label='True Volatility', color='grey')
ax1.set_ylabel('True Volatility', color='grey')
ax1.tick_params(axis='y', labelcolor='grey')

# create a twin axis for the second plot on the right
ax2 = ax1.twinx()

# plot the second plot on the right axis
ax2.plot(params)
ax2.set_ylabel('Parameter Value', color='red')
ax2.tick_params(axis='y', labelcolor='red')

# add a title and legend
plt.title('Parameter change over time', fontsize=20)
plt.legend( list(params.columns), ncol=2, loc='best')

plt.show()
../../../_images/41cdcc57a9c5f75169f2cc034a8b5a0f80ae6722c254f2fb0d9f4eb72737fe64.png

Conclusion#

  • Note that the GARCH have no effect on the conditional mean, but only on the conditional volatility

  • The best usage of the Garch model prediction, as any time-series model is to re-estimate the parameters every day and predict 1 step ahead

  • Parameters update take the recent sudden changes in volatility

Exercises 1:#

  1. Now Fit an ARCH (3) model on the above timeseries

  2. Interprete the results

Exercises 1:#

  1. Now Fit an GARCH (3,3) model on the above timeseries

  2. Interprete the results

def generate_process(omega, alphas, betas, sample_size=1000):
    '''
    Generate the process following GARCH (p,q)
    '''
    n = sample_size
    p = len(alphas)
    q = len(betas)
    # Initionalizing the process 
    series = []
    for _ in range(p):
        series.append(gauss(0,1))
        
    vols = list(np.ones(q))

    # Replicate the GARCH dynamic
    for _ in range(n):
        new_var = omega
        for i in range(p):
            new_var = new_var + alphas[i]*series[-1-i]**2
        for i in range(q):
            new_var = new_var + betas[i]*vols[-1-i]**2
        new_vol = np.sqrt(new_var)
        new_val = gauss(0,1) * new_vol

        vols.append(new_vol)
        series.append(new_val)
        
    return vols, series

def plot_process(vols, series, title='Simulated Data'):
    plt.figure(figsize=(10,4))
    plt.plot(series, label='Series')
    plt.plot(vols, color='red', label='Volatility')
    plt.axhline(0, ls='--', alpha=0.3, color='k')
    plt.legend(ncol=2, loc='best')
    plt.title(title, fontsize=20)
    plt.show()
vols, series = generate_process(.5, [0.1, 0.2], [0.3, 0.4]) 
plot_process(vols, series)
../../../_images/ebb22219b35965d43b1cc355d0803e92d25cf35046fbc87f412827dcddeaa003.png
vols, series = generate_process(.5, [0.1], [0.3]) 
plot_process(vols, series)
../../../_images/e49b18568021c989e33090b1e636d675c83f0caf787bb7ed7c9a100c446aaa87.png