import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
register_matplotlib_converters()
import os
data_folder = '../data/'
1: ACF vs PACF#
# Generate a stationary time series
np.random.seed(1)
stationary_ts = np.random.normal(0, 1, size=1000)
# Generate a non-stationary time series
nonstationary_ts = np.cumsum(np.random.normal(0, 1, size=1000))
# Compute the autocorrelation function for both time series
lags = 20
plt.subplots(2, 2, figsize=(13,7))
acf_stationary = plot_acf(stationary_ts, lags=lags, alpha=0.05, ax=plt.subplot(221), title='ACF Stationary')
acf_nonstationary = plot_acf(nonstationary_ts, lags=lags, alpha=0.05, ax=plt.subplot(222), title= 'ACF non Stationary')
# Plot the time series
plt.subplot(223)
plt.plot(stationary_ts)
plt.title("Stationary Time Series")
plt.subplot(224)
plt.plot(nonstationary_ts)
plt.title("Non-Stationary Time Series")
plt.tight_layout()
plt.show()
Ice Cream Production Data#
#read data
df_ice_cream = pd.read_csv(os.path.join(data_folder, 'ice_cream.csv'))
df_ice_cream.head()
| DATE | IPN31152N | |
|---|---|---|
| 0 | 1972-01-01 | 59.9622 | 
| 1 | 1972-02-01 | 67.0605 | 
| 2 | 1972-03-01 | 74.2350 | 
| 3 | 1972-04-01 | 78.1120 | 
| 4 | 1972-05-01 | 84.7636 | 
#rename columns to something more understandable
df_ice_cream.rename(columns={'DATE':'date', 'IPN31152N':'production'}, inplace=True)
#convert date column to datetime type
df_ice_cream['date'] = pd.to_datetime(df_ice_cream.date)
#set date as index
df_ice_cream.set_index('date', inplace=True)
#just get data from 2010 onwards
start_date = pd.to_datetime('2010-01-01')
df_ice_cream = df_ice_cream[start_date:]
#show result
df_ice_cream.head()
| production | |
|---|---|
| date | |
| 2010-01-01 | 91.2895 | 
| 2010-02-01 | 110.4994 | 
| 2010-03-01 | 127.0971 | 
| 2010-04-01 | 132.6468 | 
| 2010-05-01 | 134.5576 | 
plt.figure(figsize=(10,4))
plt.plot(df_ice_cream.production)
plt.title('Ice Cream Production over Time', fontsize=20)
plt.ylabel('Production', fontsize=16)
for year in range(2011,2021):
    plt.axvline(pd.to_datetime(str(year)+'-01-01'), color='k', linestyle='--', alpha=0.2)
ACF#
acf_plot = plot_acf(df_ice_cream.production, lags=100)
Based on decaying ACF, we are likely dealing with an Auto Regressive process#
PACF#
pacf_plot = plot_pacf(df_ice_cream.production)
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/graphics/tsaplots.py:348: FutureWarning: The default method 'yw' can produce PACF values outside of the [-1,1] interval. After 0.13, the default will change tounadjusted Yule-Walker ('ywm'). You can use this method now by setting method='ywm'.
  warnings.warn(
Based on PACF, we should start with an Auto Regressive model with lags 1, 2, 3, 10, 13#
On stock data#
import yfinance as yf
#define the ticker symbol
tickerSymbol = 'SPY'
#get data on this ticker
tickerData = yf.Ticker(tickerSymbol)
#get the historical prices for this ticker
tickerDf = tickerData.history(period='1d', start='2015-1-1', end='2020-1-1')
tickerDf = tickerDf[['Close']]
#see your data
tickerDf.head()
| Close | |
|---|---|
| Date | |
| 2015-01-02 00:00:00-05:00 | 176.788849 | 
| 2015-01-05 00:00:00-05:00 | 173.596115 | 
| 2015-01-06 00:00:00-05:00 | 171.961029 | 
| 2015-01-07 00:00:00-05:00 | 174.103851 | 
| 2015-01-08 00:00:00-05:00 | 177.193390 | 
plt.figure(figsize=(10,4))
plt.plot(tickerDf.Close)
plt.title('Stock Price over Time (%s)'%tickerSymbol, fontsize=20)
plt.ylabel('Price', fontsize=16)
for year in range(2015,2021):
    plt.axvline(pd.to_datetime(str(year)+'-01-01'), color='k', linestyle='--', alpha=0.2)
Stationarity: take first difference of this series#
#take first difference
first_diffs = tickerDf.Close.values[1:] - tickerDf.Close.values[:-1]
first_diffs = np.concatenate([first_diffs, [0]])
#set first difference as variable in dataframe
tickerDf['FirstDifference'] = first_diffs
tickerDf.head()
| Close | FirstDifference | |
|---|---|---|
| Date | ||
| 2015-01-02 00:00:00-05:00 | 176.788849 | -3.192734 | 
| 2015-01-05 00:00:00-05:00 | 173.596115 | -1.635086 | 
| 2015-01-06 00:00:00-05:00 | 171.961029 | 2.142822 | 
| 2015-01-07 00:00:00-05:00 | 174.103851 | 3.089539 | 
| 2015-01-08 00:00:00-05:00 | 177.193390 | -1.419983 | 
plt.figure(figsize=(10,4))
plt.plot(tickerDf.FirstDifference)
plt.title('First Difference over Time (%s)'%tickerSymbol, fontsize=20)
plt.ylabel('Price Difference', fontsize=16)
for year in range(2015,2021):
    plt.axvline(pd.to_datetime(str(year)+'-01-01'), color='k', linestyle='--', alpha=0.2)
ACF#
acf_plot = plot_acf(tickerDf.FirstDifference)
ACF isn’t that informative#
PACF#
pacf_plot = plot_pacf(tickerDf.FirstDifference)
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/graphics/tsaplots.py:348: FutureWarning: The default method 'yw' can produce PACF values outside of the [-1,1] interval. After 0.13, the default will change tounadjusted Yule-Walker ('ywm'). You can use this method now by setting method='ywm'.
  warnings.warn(