import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
register_matplotlib_converters()

import os
data_folder = '../data/'

1: ACF vs PACF#

# Generate a stationary time series
np.random.seed(1)
stationary_ts = np.random.normal(0, 1, size=1000)

# Generate a non-stationary time series
nonstationary_ts = np.cumsum(np.random.normal(0, 1, size=1000))

# Compute the autocorrelation function for both time series
lags = 20
plt.subplots(2, 2, figsize=(13,7))
acf_stationary = plot_acf(stationary_ts, lags=lags, alpha=0.05, ax=plt.subplot(221), title='ACF Stationary')
acf_nonstationary = plot_acf(nonstationary_ts, lags=lags, alpha=0.05, ax=plt.subplot(222), title= 'ACF non Stationary')

# Plot the time series
plt.subplot(223)
plt.plot(stationary_ts)
plt.title("Stationary Time Series")

plt.subplot(224)
plt.plot(nonstationary_ts)
plt.title("Non-Stationary Time Series")

plt.tight_layout()
plt.show()
../../../_images/7ec2c300eb073444b395aa3fe474ffc53d6dfb9dddcca83bd33e9c57ba27835f.png

Ice Cream Production Data#

#read data
df_ice_cream = pd.read_csv(os.path.join(data_folder, 'ice_cream.csv'))
df_ice_cream.head()
DATE IPN31152N
0 1972-01-01 59.9622
1 1972-02-01 67.0605
2 1972-03-01 74.2350
3 1972-04-01 78.1120
4 1972-05-01 84.7636
#rename columns to something more understandable
df_ice_cream.rename(columns={'DATE':'date', 'IPN31152N':'production'}, inplace=True)
#convert date column to datetime type
df_ice_cream['date'] = pd.to_datetime(df_ice_cream.date)
#set date as index
df_ice_cream.set_index('date', inplace=True)
#just get data from 2010 onwards
start_date = pd.to_datetime('2010-01-01')
df_ice_cream = df_ice_cream[start_date:]
#show result
df_ice_cream.head()
production
date
2010-01-01 91.2895
2010-02-01 110.4994
2010-03-01 127.0971
2010-04-01 132.6468
2010-05-01 134.5576
plt.figure(figsize=(10,4))
plt.plot(df_ice_cream.production)
plt.title('Ice Cream Production over Time', fontsize=20)
plt.ylabel('Production', fontsize=16)
for year in range(2011,2021):
    plt.axvline(pd.to_datetime(str(year)+'-01-01'), color='k', linestyle='--', alpha=0.2)
../../../_images/2cfe7536adb38514ab25614c72ea25ecbd271a5d14a373a8828f4cc107dc7ee1.png

ACF#

acf_plot = plot_acf(df_ice_cream.production, lags=100)
../../../_images/c0b76f2f7be030773a16dc3ff9586b1e9999ca0cffff3141a35a4076d2e5196f.png

Based on decaying ACF, we are likely dealing with an Auto Regressive process#

PACF#

pacf_plot = plot_pacf(df_ice_cream.production)
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/graphics/tsaplots.py:348: FutureWarning: The default method 'yw' can produce PACF values outside of the [-1,1] interval. After 0.13, the default will change tounadjusted Yule-Walker ('ywm'). You can use this method now by setting method='ywm'.
  warnings.warn(
../../../_images/d24f5508977f86424d1ddbe2b3ce6bffb85224298186a216c7b1dc15604b2370.png

Based on PACF, we should start with an Auto Regressive model with lags 1, 2, 3, 10, 13#

On stock data#

import yfinance as yf
#define the ticker symbol
tickerSymbol = 'SPY'
#get data on this ticker
tickerData = yf.Ticker(tickerSymbol)
#get the historical prices for this ticker
tickerDf = tickerData.history(period='1d', start='2015-1-1', end='2020-1-1')
tickerDf = tickerDf[['Close']]
#see your data
tickerDf.head()
Close
Date
2015-01-02 00:00:00-05:00 176.788849
2015-01-05 00:00:00-05:00 173.596115
2015-01-06 00:00:00-05:00 171.961029
2015-01-07 00:00:00-05:00 174.103851
2015-01-08 00:00:00-05:00 177.193390
plt.figure(figsize=(10,4))
plt.plot(tickerDf.Close)
plt.title('Stock Price over Time (%s)'%tickerSymbol, fontsize=20)
plt.ylabel('Price', fontsize=16)
for year in range(2015,2021):
    plt.axvline(pd.to_datetime(str(year)+'-01-01'), color='k', linestyle='--', alpha=0.2)
../../../_images/571b12b4cf303b552571e18e7408ab6ecc301b322882365c3a1b21817bfdec0f.png

Stationarity: take first difference of this series#

#take first difference
first_diffs = tickerDf.Close.values[1:] - tickerDf.Close.values[:-1]
first_diffs = np.concatenate([first_diffs, [0]])
#set first difference as variable in dataframe
tickerDf['FirstDifference'] = first_diffs
tickerDf.head()
Close FirstDifference
Date
2015-01-02 00:00:00-05:00 176.788849 -3.192734
2015-01-05 00:00:00-05:00 173.596115 -1.635086
2015-01-06 00:00:00-05:00 171.961029 2.142822
2015-01-07 00:00:00-05:00 174.103851 3.089539
2015-01-08 00:00:00-05:00 177.193390 -1.419983
plt.figure(figsize=(10,4))
plt.plot(tickerDf.FirstDifference)
plt.title('First Difference over Time (%s)'%tickerSymbol, fontsize=20)
plt.ylabel('Price Difference', fontsize=16)
for year in range(2015,2021):
    plt.axvline(pd.to_datetime(str(year)+'-01-01'), color='k', linestyle='--', alpha=0.2)
../../../_images/2d6fc773445959fc6d69ce1f56ecd97f19e0d98a367188f6e7c22dac9df2891e.png

ACF#

acf_plot = plot_acf(tickerDf.FirstDifference)
../../../_images/5771a4f71026afbe6daa50c2eca41fd6eeb668f6cfb1f9c68fc10c40efcacdd8.png

ACF isn’t that informative#

PACF#

pacf_plot = plot_pacf(tickerDf.FirstDifference)
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/graphics/tsaplots.py:348: FutureWarning: The default method 'yw' can produce PACF values outside of the [-1,1] interval. After 0.13, the default will change tounadjusted Yule-Walker ('ywm'). You can use this method now by setting method='ywm'.
  warnings.warn(
../../../_images/be70487e427265cc4f5f2194df11d48b9b0de810aa221510c1306dda3035a111.png

PACF also doesn’t tell us much#