import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.api import VAR
from scipy.stats import pearsonr
13: VAR Model#
def parser(s):
    return datetime.strptime(s, '%Y-%m')
ice_cream_heater_df = pd.read_csv('../data/ice_cream_vs_heater.csv', parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
/tmp/ipykernel_3777724/10753663.py:1: FutureWarning: The squeeze argument has been deprecated and will be removed in a future version. Append .squeeze("columns") to the call to squeeze.
  ice_cream_heater_df = pd.read_csv('../data/ice_cream_vs_heater.csv', parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
ice_cream_heater_df = ice_cream_heater_df.asfreq(pd.infer_freq(ice_cream_heater_df.index))
plt.figure(figsize=(12,6))
ice_cream, = plt.plot(ice_cream_heater_df['ice cream'])
heater, = plt.plot(ice_cream_heater_df['heater'], color='red')
for year in range(2004, 2021):
    plt.axvline(datetime(year,1,1), linestyle='--', color='k', alpha=0.5)
plt.legend(['Ice Cream', 'Heater'], fontsize=16)
<matplotlib.legend.Legend at 0x7f12641759d0>
Normalize#
avgs = ice_cream_heater_df.mean()
devs = ice_cream_heater_df.std()
for col in ice_cream_heater_df.columns:
    ice_cream_heater_df[col] = (ice_cream_heater_df[col] - avgs.loc[col]) / devs.loc[col]
plt.figure(figsize=(12,6))
ice_cream, = plt.plot(ice_cream_heater_df['ice cream'])
heater, = plt.plot(ice_cream_heater_df['heater'], color='red')
for year in range(2004, 2021):
    plt.axvline(datetime(year,1,1), linestyle='--', color='k', alpha=0.5)
    
plt.axhline(0, linestyle='--', color='k', alpha=0.3)
plt.legend(['Ice Cream', 'Heater'], fontsize=16)
<matplotlib.legend.Legend at 0x7f12620bbbe0>
Take First Difference to Remove Trend#
ice_cream_heater_df = ice_cream_heater_df.diff().dropna()
plt.figure(figsize=(12,6))
ice_cream, = plt.plot(ice_cream_heater_df['ice cream'])
heater, = plt.plot(ice_cream_heater_df['heater'], color='red')
for year in range(2004, 2021):
    plt.axvline(datetime(year,1,1), linestyle='--', color='k', alpha=0.5)
    
plt.axhline(0, linestyle='--', color='k', alpha=0.3)
plt.ylabel('First Difference', fontsize=18)
plt.legend(['Ice Cream', 'Heater'], fontsize=16)
<matplotlib.legend.Legend at 0x7f1261fc3f10>
plt.figure(figsize=(12,6))
ice_cream, = plt.plot(ice_cream_heater_df['ice cream'])
for year in range(2004, 2021):
    plt.axvline(datetime(year,1,1), linestyle='--', color='k', alpha=0.5)
    
plt.axhline(0, linestyle='--', color='k', alpha=0.3)
plt.ylabel('First Difference', fontsize=18)
plt.legend(['Ice Cream'], fontsize=16)
<matplotlib.legend.Legend at 0x7f1261f15520>
Remove Increasing Volatility#
annual_volatility = ice_cream_heater_df.groupby(ice_cream_heater_df.index.year).std()
annual_volatility
| heater | ice cream | |
|---|---|---|
| Month | ||
| 2004 | 0.415481 | 0.184201 | 
| 2005 | 0.474527 | 0.214837 | 
| 2006 | 0.400148 | 0.227698 | 
| 2007 | 0.359839 | 0.198947 | 
| 2008 | 0.396182 | 0.183239 | 
| 2009 | 0.499810 | 0.221038 | 
| 2010 | 0.459566 | 0.211668 | 
| 2011 | 0.443924 | 0.501018 | 
| 2012 | 0.471104 | 0.389711 | 
| 2013 | 0.503587 | 0.335840 | 
| 2014 | 0.855743 | 0.377482 | 
| 2015 | 0.569441 | 0.404340 | 
| 2016 | 0.719843 | 0.593347 | 
| 2017 | 0.830886 | 0.874263 | 
| 2018 | 0.987221 | 0.708785 | 
| 2019 | 0.892991 | 0.843598 | 
| 2020 | 0.426657 | 0.369810 | 
ice_cream_heater_df['ice_cream_annual_vol'] = ice_cream_heater_df.index.map(lambda d: annual_volatility.loc[d.year, 'ice cream'])
ice_cream_heater_df['heater_annual_vol'] = ice_cream_heater_df.index.map(lambda d: annual_volatility.loc[d.year, 'heater'])
ice_cream_heater_df
| heater | ice cream | ice_cream_annual_vol | heater_annual_vol | |
|---|---|---|---|---|
| Month | ||||
| 2004-02-01 | -0.918789 | 0.117140 | 0.184201 | 0.415481 | 
| 2004-03-01 | -0.408351 | 0.058570 | 0.184201 | 0.415481 | 
| 2004-04-01 | -0.102088 | 0.175710 | 0.184201 | 0.415481 | 
| 2004-05-01 | 0.000000 | 0.117140 | 0.184201 | 0.415481 | 
| 2004-06-01 | 0.000000 | 0.175710 | 0.184201 | 0.415481 | 
| ... | ... | ... | ... | ... | 
| 2020-02-01 | -0.714614 | 0.117140 | 0.369810 | 0.426657 | 
| 2020-03-01 | -0.918789 | -0.058570 | 0.369810 | 0.426657 | 
| 2020-04-01 | 0.000000 | 0.527129 | 0.369810 | 0.426657 | 
| 2020-05-01 | 0.204175 | 0.995687 | 0.369810 | 0.426657 | 
| 2020-06-01 | -0.306263 | 0.234279 | 0.369810 | 0.426657 | 
197 rows × 4 columns
ice_cream_heater_df['ice cream'] = ice_cream_heater_df['ice cream'] / ice_cream_heater_df['ice_cream_annual_vol']
ice_cream_heater_df['heater'] = ice_cream_heater_df['heater'] / ice_cream_heater_df['heater_annual_vol']
plt.figure(figsize=(12,6))
ice_cream, = plt.plot(ice_cream_heater_df['ice cream'])
for year in range(2004, 2021):
    plt.axvline(datetime(year,1,1), linestyle='--', color='k', alpha=0.5)
    
plt.axhline(0, linestyle='--', color='k', alpha=0.3)
plt.ylabel('First Difference', fontsize=18)
plt.legend(['Ice Cream'], fontsize=16)
<matplotlib.legend.Legend at 0x7f1261bb7a00>
plt.figure(figsize=(12,6))
ice_cream, = plt.plot(ice_cream_heater_df['ice cream'])
heater, = plt.plot(ice_cream_heater_df['heater'], color='red')
for year in range(2004, 2021):
    plt.axvline(datetime(year,1,1), linestyle='--', color='k', alpha=0.5)
    
plt.axhline(0, linestyle='--', color='k', alpha=0.3)
plt.ylabel('First Difference', fontsize=18)
plt.legend(['Ice Cream', 'Heater'], fontsize=16)
<matplotlib.legend.Legend at 0x7f1261af2af0>
Remove Seasonality#
month_avgs = ice_cream_heater_df.groupby(ice_cream_heater_df.index.month).mean()
print(month_avgs)
         heater  ice cream  ice_cream_annual_vol  heater_annual_vol
Month                                                              
1     -0.428360   0.130345              0.415976           0.580717
2     -1.483129   0.250194              0.402342           0.570997
3     -1.063595   0.433016              0.402342           0.570997
4     -0.442926   0.838085              0.402342           0.570997
5     -0.121663   0.967273              0.402342           0.570997
6     -0.128425   0.987311              0.402342           0.570997
7     -0.107093   0.735052              0.404376           0.580018
8      0.075720  -1.821285              0.404376           0.580018
9      0.544638  -1.307661              0.404376           0.580018
10     1.613170  -0.830889              0.404376           0.580018
11     1.183118  -0.088136              0.404376           0.580018
12     0.491641   0.088998              0.404376           0.580018
ice_cream_heater_df['ice_cream_month_avg'] = ice_cream_heater_df.index.map(lambda d: month_avgs.loc[d.month, 'ice cream'])
ice_cream_heater_df['heater_month_avg'] = ice_cream_heater_df.index.map(lambda d: month_avgs.loc[d.month, 'heater'])
ice_cream_heater_df
| heater | ice cream | ice_cream_annual_vol | heater_annual_vol | ice_cream_month_avg | heater_month_avg | |
|---|---|---|---|---|---|---|
| Month | ||||||
| 2004-02-01 | -2.211387 | 0.635934 | 0.184201 | 0.415481 | 0.250194 | -1.483129 | 
| 2004-03-01 | -0.982838 | 0.317967 | 0.184201 | 0.415481 | 0.433016 | -1.063595 | 
| 2004-04-01 | -0.245710 | 0.953901 | 0.184201 | 0.415481 | 0.838085 | -0.442926 | 
| 2004-05-01 | 0.000000 | 0.635934 | 0.184201 | 0.415481 | 0.967273 | -0.121663 | 
| 2004-06-01 | 0.000000 | 0.953901 | 0.184201 | 0.415481 | 0.987311 | -0.128425 | 
| ... | ... | ... | ... | ... | ... | ... | 
| 2020-02-01 | -1.674916 | 0.316756 | 0.369810 | 0.426657 | 0.250194 | -1.483129 | 
| 2020-03-01 | -2.153463 | -0.158378 | 0.369810 | 0.426657 | 0.433016 | -1.063595 | 
| 2020-04-01 | 0.000000 | 1.425403 | 0.369810 | 0.426657 | 0.838085 | -0.442926 | 
| 2020-05-01 | 0.478547 | 2.692427 | 0.369810 | 0.426657 | 0.967273 | -0.121663 | 
| 2020-06-01 | -0.717821 | 0.633512 | 0.369810 | 0.426657 | 0.987311 | -0.128425 | 
197 rows × 6 columns
ice_cream_heater_df['ice cream'] = ice_cream_heater_df['ice cream'] - ice_cream_heater_df['ice_cream_month_avg']
ice_cream_heater_df['heater'] = ice_cream_heater_df['heater'] - ice_cream_heater_df['heater_month_avg']
ice_cream_heater_df
| heater | ice cream | ice_cream_annual_vol | heater_annual_vol | ice_cream_month_avg | heater_month_avg | |
|---|---|---|---|---|---|---|
| Month | ||||||
| 2004-02-01 | -0.728257 | 0.385740 | 0.184201 | 0.415481 | 0.250194 | -1.483129 | 
| 2004-03-01 | 0.080757 | -0.115049 | 0.184201 | 0.415481 | 0.433016 | -1.063595 | 
| 2004-04-01 | 0.197217 | 0.115816 | 0.184201 | 0.415481 | 0.838085 | -0.442926 | 
| 2004-05-01 | 0.121663 | -0.331339 | 0.184201 | 0.415481 | 0.967273 | -0.121663 | 
| 2004-06-01 | 0.128425 | -0.033411 | 0.184201 | 0.415481 | 0.987311 | -0.128425 | 
| ... | ... | ... | ... | ... | ... | ... | 
| 2020-02-01 | -0.191787 | 0.066562 | 0.369810 | 0.426657 | 0.250194 | -1.483129 | 
| 2020-03-01 | -1.089868 | -0.591394 | 0.369810 | 0.426657 | 0.433016 | -1.063595 | 
| 2020-04-01 | 0.442926 | 0.587318 | 0.369810 | 0.426657 | 0.838085 | -0.442926 | 
| 2020-05-01 | 0.600210 | 1.725154 | 0.369810 | 0.426657 | 0.967273 | -0.121663 | 
| 2020-06-01 | -0.589396 | -0.353799 | 0.369810 | 0.426657 | 0.987311 | -0.128425 | 
197 rows × 6 columns
plt.figure(figsize=(12,6))
ice_cream, = plt.plot(ice_cream_heater_df['ice cream'])
for year in range(2004, 2021):
    plt.axvline(datetime(year,1,1), linestyle='--', color='k', alpha=0.5)
    
plt.axhline(0, linestyle='--', color='k', alpha=0.3)
plt.ylabel('First Difference', fontsize=18)
plt.legend(['Ice Cream'], fontsize=16)
<matplotlib.legend.Legend at 0x7f1260157fd0>
plt.figure(figsize=(12,6))
ice_cream, = plt.plot(ice_cream_heater_df['ice cream'])
heater, = plt.plot(ice_cream_heater_df['heater'], color='red')
for year in range(2004, 2021):
    plt.axvline(datetime(year,1,1), linestyle='--', color='k', alpha=0.5)
    
plt.axhline(0, linestyle='--', color='k', alpha=0.3)
plt.ylabel('First Difference', fontsize=18)
plt.legend(['Ice Cream', 'Heater'], fontsize=16)
<matplotlib.legend.Legend at 0x7f126009e610>
PACF - Heater#
plot_pacf(ice_cream_heater_df['heater'])
plt.show()
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/graphics/tsaplots.py:348: FutureWarning: The default method 'yw' can produce PACF values outside of the [-1,1] interval. After 0.13, the default will change tounadjusted Yule-Walker ('ywm'). You can use this method now by setting method='ywm'.
  warnings.warn(
So consider an AR(2)#
Correlation between “heater” and lagged “ice cream”#
for lag in range(1, 14):
    heater_series = ice_cream_heater_df['heater'].iloc[lag:]
    lagged_ice_cream_series = ice_cream_heater_df['ice cream'].iloc[:-lag]
    print('Lag: %s'%lag)
    print(pearsonr(heater_series, lagged_ice_cream_series))
    print('------')
Lag: 1
PearsonRResult(statistic=-0.031544507425915636, pvalue=0.6607284991749385)
------
Lag: 2
PearsonRResult(statistic=-0.09872703210944635, pvalue=0.16970841905089779)
------
Lag: 3
PearsonRResult(statistic=-0.0017838553789235895, pvalue=0.9803056762960977)
------
Lag: 4
PearsonRResult(statistic=0.060239718598255415, pvalue=0.40529798325286276)
------
Lag: 5
PearsonRResult(statistic=-0.054034864480150276, pvalue=0.45664426275056963)
------
Lag: 6
PearsonRResult(statistic=0.06461183026553836, pvalue=0.3745300043445426)
------
Lag: 7
PearsonRResult(statistic=-0.04949334618415865, pvalue=0.49768766857620883)
------
Lag: 8
PearsonRResult(statistic=0.07890837135977341, pvalue=0.28045489704330623)
------
Lag: 9
PearsonRResult(statistic=-0.0532350169960043, pvalue=0.4681034202228026)
------
Lag: 10
PearsonRResult(statistic=0.029536989060791177, pvalue=0.6882054906333271)
------
Lag: 11
PearsonRResult(statistic=-0.059795363515225686, pvalue=0.41752174204856846)
------
Lag: 12
PearsonRResult(statistic=-0.07513046035981792, pvalue=0.3094409252542678)
------
Lag: 13
PearsonRResult(statistic=0.19808902198429754, pvalue=0.007029107082073456)
------
Fit a VAR Model#
ice_cream_heater_df = ice_cream_heater_df[['ice cream', 'heater']]
model = VAR(ice_cream_heater_df)
model_fit = model.fit(maxlags=13)
model_fit.summary()
  Summary of Regression Results   
==================================
Model:                         VAR
Method:                        OLS
Date:           Wed, 22, Mar, 2023
Time:                     13:23:17
--------------------------------------------------------------------
No. of Equations:         2.00000    BIC:                   -1.92349
Nobs:                     184.000    HQIC:                  -2.48459
Log likelihood:          -204.405    FPE:                  0.0571123
AIC:                     -2.86700    Det(Omega_mle):       0.0434311
--------------------------------------------------------------------
Results for equation ice cream
================================================================================
                   coefficient       std. error           t-stat            prob
--------------------------------------------------------------------------------
const                -0.016054         0.034105           -0.471           0.638
L1.ice cream         -0.287811         0.079633           -3.614           0.000
L1.heater            -0.121251         0.073673           -1.646           0.100
L2.ice cream         -0.217862         0.087025           -2.503           0.012
L2.heater            -0.081992         0.079793           -1.028           0.304
L3.ice cream         -0.122169         0.090772           -1.346           0.178
L3.heater            -0.100743         0.080730           -1.248           0.212
L4.ice cream         -0.167505         0.089113           -1.880           0.060
L4.heater            -0.130541         0.082232           -1.587           0.112
L5.ice cream         -0.206725         0.090337           -2.288           0.022
L5.heater            -0.168606         0.084980           -1.984           0.047
L6.ice cream         -0.138873         0.092630           -1.499           0.134
L6.heater            -0.115771         0.089911           -1.288           0.198
L7.ice cream         -0.180695         0.092245           -1.959           0.050
L7.heater            -0.027255         0.094850           -0.287           0.774
L8.ice cream         -0.111166         0.092679           -1.199           0.230
L8.heater             0.041803         0.094740            0.441           0.659
L9.ice cream         -0.043835         0.091389           -0.480           0.631
L9.heater             0.012616         0.094705            0.133           0.894
L10.ice cream        -0.153345         0.089841           -1.707           0.088
L10.heater            0.059215         0.094546            0.626           0.531
L11.ice cream        -0.004417         0.089771           -0.049           0.961
L11.heater            0.042153         0.093799            0.449           0.653
L12.ice cream        -0.126643         0.090094           -1.406           0.160
L12.heater           -0.021794         0.090097           -0.242           0.809
L13.ice cream        -0.081957         0.087069           -0.941           0.347
L13.heater           -0.018799         0.079199           -0.237           0.812
================================================================================
Results for equation heater
================================================================================
                   coefficient       std. error           t-stat            prob
--------------------------------------------------------------------------------
const                 0.005855         0.036062            0.162           0.871
L1.ice cream         -0.033113         0.084202           -0.393           0.694
L1.heater            -0.405367         0.077900           -5.204           0.000
L2.ice cream         -0.169804         0.092018           -1.845           0.065
L2.heater            -0.193569         0.084371           -2.294           0.022
L3.ice cream         -0.048999         0.095980           -0.511           0.610
L3.heater            -0.016958         0.085362           -0.199           0.843
L4.ice cream         -0.007633         0.094226           -0.081           0.935
L4.heater             0.009474         0.086950            0.109           0.913
L5.ice cream         -0.020253         0.095520           -0.212           0.832
L5.heater            -0.050607         0.089856           -0.563           0.573
L6.ice cream          0.040645         0.097944            0.415           0.678
L6.heater            -0.006504         0.095070           -0.068           0.945
L7.ice cream         -0.053440         0.097538           -0.548           0.584
L7.heater            -0.053795         0.100292           -0.536           0.592
L8.ice cream          0.074170         0.097997            0.757           0.449
L8.heater            -0.059001         0.100176           -0.589           0.556
L9.ice cream         -0.004720         0.096633           -0.049           0.961
L9.heater            -0.000168         0.100139           -0.002           0.999
L10.ice cream        -0.007920         0.094996           -0.083           0.934
L10.heater            0.040322         0.099970            0.403           0.687
L11.ice cream        -0.076564         0.094922           -0.807           0.420
L11.heater            0.022568         0.099181            0.228           0.820
L12.ice cream        -0.111959         0.095263           -1.175           0.240
L12.heater            0.082608         0.095266            0.867           0.386
L13.ice cream         0.203451         0.092065            2.210           0.027
L13.heater           -0.155390         0.083744           -1.856           0.064
================================================================================
Correlation matrix of residuals
             ice cream    heater
ice cream     1.000000  0.064855
heater        0.064855  1.000000
So our final model is:#
\[
\hat{h}_t = - 0.41h_{t-1} - 0.19h_{t-2} + 0.2i_{t-13}
\]