import pandas as pd
from statsmodels.tsa.seasonal import STL
import matplotlib.pyplot as plt
from datetime import datetime
import os
data_folder = '../data/'
3: Seasonal-Trend Decomposition using LOESS (STL)#
Read the Data#
ice_cream_interest = pd.read_csv(os.path.join(data_folder, 'ice_cream_interest.csv'))
ice_cream_interest['month'] = pd.to_datetime(ice_cream_interest.month)
ice_cream_interest.set_index('month', inplace=True)
plt.figure(figsize=(10,4))
plt.plot(ice_cream_interest)
[<matplotlib.lines.Line2D at 0x7faa9cc4a580>]
data:image/s3,"s3://crabby-images/2b822/2b822f6eb04d2b7e97d24bd8806b132e11e1edd4" alt="../../../_images/cb1a003154d416993d75e6315d38b7b5941107c05a333fa47595cbdef66af8bd.png"
plt.figure(figsize=(10,4))
plt.plot(ice_cream_interest)
for year in range(2004,2021):
plt.axvline(datetime(year,1,1), color='k', linestyle='--', alpha=0.5)
data:image/s3,"s3://crabby-images/5cca2/5cca242b830495ee032d4fa9437078dbdc05ada8" alt="../../../_images/4fb5c236ed04915f1d392447b70212b05d90a47a1aa71a36ee5055e384a51694.png"
Visual Inspection: Mid-2011 and Late-2016#
Perform STL Decomp#
stl = STL(ice_cream_interest)
result = stl.fit()
seasonal, trend, resid = result.seasonal, result.trend, result.resid
plt.figure(figsize=(8,6))
plt.subplot(4,1,1)
plt.plot(ice_cream_interest)
plt.title('Original Series', fontsize=16)
plt.subplot(4,1,2)
plt.plot(trend)
plt.title('Trend', fontsize=16)
plt.subplot(4,1,3)
plt.plot(seasonal)
plt.title('Seasonal', fontsize=16)
plt.subplot(4,1,4)
plt.plot(resid)
plt.title('Residual', fontsize=16)
plt.tight_layout()
data:image/s3,"s3://crabby-images/018ee/018eeef49a91842fcbf52fff2e67a4b8f362faa8" alt="../../../_images/eeeed2276866d7d06865660cf6b0fd8ab797168069675432d51a1cd43b010781.png"
estimated = trend + seasonal
plt.figure(figsize=(12,4))
plt.plot(ice_cream_interest)
plt.plot(estimated)
[<matplotlib.lines.Line2D at 0x7faa9cb6a310>]
data:image/s3,"s3://crabby-images/0f4b8/0f4b8175bb2836cd24b7b1567d19f7c9dc7867c6" alt="../../../_images/32a57675172fb57b388a30c00a1be3209f7aece1b3ea909252727f50a0e56439.png"
Anomaly Detection#
resid_mu = resid.mean()
resid_dev = resid.std()
lower = resid_mu - 3*resid_dev
upper = resid_mu + 3*resid_dev
plt.figure(figsize=(10,4))
plt.plot(resid)
plt.fill_between([datetime(2003,1,1), datetime(2021,8,1)], lower, upper, color='g', alpha=0.25, linestyle='--', linewidth=2)
plt.xlim(datetime(2003,9,1), datetime(2020,12,1))
(12296.0, 18597.0)
data:image/s3,"s3://crabby-images/b6302/b6302a66bff5c8c4f5472f0d8edf91e4589c4a1c" alt="../../../_images/4fed803325a67af87efe5174098fd837ec90aa15e3d6c6892e1c23596345c0aa.png"
anomalies = ice_cream_interest[(resid < lower) | (resid > upper)]
plt.figure(figsize=(10,4))
plt.plot(ice_cream_interest)
for year in range(2004,2021):
plt.axvline(datetime(year,1,1), color='k', linestyle='--', alpha=0.5)
plt.scatter(anomalies.index, anomalies.interest, color='r', marker='D')
<matplotlib.collections.PathCollection at 0x7faa9d44e190>
data:image/s3,"s3://crabby-images/1b451/1b4513f89021dccbba3bbb1976b5e15b1a6f4c5d" alt="../../../_images/6d1e689e19fcfdad27e26f26dc6b55a70b45954402cae80ca2738ee7836ddc3a.png"
anomalies
interest | |
---|---|
month | |
2011-04-01 | 45 |
2015-12-01 | 25 |
2016-12-01 | 66 |