±à¼ÍƼö: |
±¾ÎÄÀ´Ô´¸öÈ˲©¿Í£¬±¾ÎĽ«ÒªÌÖÂÛ¹ØÓÚÔ¤²âµÄ·½·¨¡£ÓÐÒ»ÖÖÔ¤²âÊǸúʱ¼äÏà¹ØµÄ£¬¶øÕâÖÖ´¦ÀíÓëʱ¼äÏà¹ØÊý¾ÝµÄ·½·¨½Ð×öʱ¼äÐòÁÐÄ£ÐÍ¡£ |
|
¼ÓÔØÊý¾Ý£¬¹Û²ìÊý¾Ý
%matplotlib
inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
from dateutil.relativedelta import relativedelta
import seaborn as sns
import statsmodels.api as sm
from statsmodels.tsa.stattools import acf
from statsmodels.tsa.stattools import pacf
from statsmodels.tsa.seasonal import seasonal_decompose
df = pd.read_csv('portland-oregon-average-monthly.csv',
parse_dates=['month'], index_col='month')
print(df.head())
df['riders'].plot(figsize=(12,8), title= 'Monthly
Ridership', fontsize=14)
plt.savefig('month_ridership.png', bbox_inches='tight') |
·Ö½âÊý¾Ý£º
decomposition = seasonal_decompose(df['riders'],
freq=12)
fig = plt.figure()
fig = decomposition.plot()
fig.set_size_inches(12, 6) |
ʱ¼äÐòÁÐÎȶ¨»¯
²âÊÔ·½·¨£º
from
statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries):
#Determing rolling statistics
rolmean = timeseries.rolling(window=12).mean()
rolstd = timeseries.rolling(window=12).std()
#Plot rolling statistics:
fig = plt.figure(figsize=(12, 8))
orig = plt.plot(timeseries, color='blue',label='Original')
mean = plt.plot(rolmean, color='red', label='Rolling
Mean')
std = plt.plot(rolstd, color='black', label
= 'Rolling Std')
plt.legend(loc='best')
plt.title('Rolling Mean & Standard Deviation')
plt.show()
#Perform Dickey-Fuller test:
print('Results of Dickey-Fuller Test:')
dftest = adfuller(timeseries, autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test
Statistic','p-value','#Lags Used','Number of
Observations Used'])
for key,value in dftest[4].items():
dfoutput['Critical Value (%s)'%key] = value
print(dfoutput) |
²âÊÔÐòÁÐÎȶ¨ÐÔ£º
test_stationarity(df['riders']) |
¿´ÒÔ¿´µ½ÕûÌåµÄÐòÁв¢Ã»Óе½´ïÎȶ¨ÐÔÒªÇó£¬Òª½«Ê±¼äÐòÁÐתΪƽÎÈÐòÁУ¬ÓÐÈçϼ¸ÖÖ·½·¨£º
Deflation by CPI
Logarithmic£¨È¡¶ÔÊý£©
First Difference£¨Ò»½×²î·Ö£©
Seasonal Difference£¨¼¾½Ú²î·Ö£©
Seasonal Adjustment
ÕâÀï»á³¢ÊÔÈ¡¶ÔÊý¡¢Ò»½×²é·Ö¡¢¼¾½Ú²î·ÖÈýÖÖ·½·¨£¬ÏȽøÐÐÒ»½×²î·Ö£¬È¥³ýÔö³¤Ç÷ÊÆºó¼ì²âÎȶ¨ÐÔ£º
df['first_difference']
= df['riders'].
diff(1)
test_stationarity(df['first_difference'].
dropna(inplace=False)) |
¿ÉÒÔ¿´µ½Í¼ÐÎÉÏ¿´ÉÏÈ¥±äÎȶ¨ÁË£¬µ«p-valueµÄ²¢Ã»ÓÐСÓÚ0.05¡£ÔÙÀ´¿´¿´12½×²é·Ö£¨¼´¼¾½Ú²é·Ö£©£¬¿´¿´ÊÇ·ñÎȶ¨£º
df['seasonal_difference']
= df
['riders'].diff(12)
test_stationarity(df['seasonal
_difference'].dropna(inplace=False))
|
´ÓͼÐÎÉÏ£¬±ÈÒ»½×²î·Ö¸ü²»Îȶ¨£¨ËäÈ»¼¾½ÚÖ¸±êÒѾ³öÀ´ÁË£©£¬ÎÒÃÇÔÙÀ´½«Ò»½×²é·ÖºÍ¼¾½Ú²é·ÖºÏ²¢ÆðÀ´£¬ÔÙÀ´¿´Ï£º
df['seasonal_first_difference']
= df
['first_difference'].diff(12)
test_stationarity(df['seasonal_first
_difference'].dropna(inplace=False)) |
¿ÉÒÔ¿´µ½£¬ÔÚÒ»½×²î·ÖºóÔÙ¼ÓÉϼ¾½Ú²î·Ö£¬Êý¾ÝÖÕÓÚÎȶ¨ÏÂÁË¡£P-valueСÓÚÁË0.05£¬²¢ÇÒTest
StatisticµÄֵԶԶСÓÚCritical Value (5%)µÄÖµ¡£
ÔÚ¡¶Analysis of Financial Time Series¡·Ìáµ½£¬¶ÔÔʼÊý¾ÝÈ¡¶ÔÊýÓÐÁ½¸öÓô¦£ºµÚÒ»¸öÊǽ«Ö¸ÊýÔö³¤×ªÎªÏßÐÔÔö³¤£»µÚ¶þ¿ÉÒÔÆ½ÎÈÐòÁеķ½²î¡£ÔÙÀ´¿´¿´È¡¶ÔÊýµÄÊý¾Ý£º
df['riders_log']= np.log(df['riders'])
test_stationarity(df['riders_log']) |
È¡¶ÔÊýºó£¬´ÓͼÐÎÉÏ¿´ÓÐЩÎȶ¨ÁË£¬µ«p-value»¹ÊÇûÓнӽü0.05£¬Ã»ÄÜ´ïÎȶ¨ÐÔÒªÇó¡£ÔÚ´Ë»ù´¡ÉÏÎÒÃÇÔÙÀ´×öÏÂÒ»½×²î·Ö£¬È¥³ýÔö³¤Ç÷ÊÆ£º
df['log_first_difference']
= df
['riders_log'].diff(1)
test_stationarity(df['log_first
_difference'].dropna(inplace=False)) |
»¹ÊÇûÄÜÂú×ãÐèÇó£¬ÎÒÃÇÔÙÀ´¶Ô¶ÔÊý½øÐм¾½ÚÐÔ²î·Ö£º
df['log_seasonal_difference']
= df
['riders_log'].diff(12)
test_stationarity(df['log_seasonal
_difference'].dropna(inplace=False)) |
»¹ÊÇûÄÜÂú×ãÐèÇó£¬ÔÙÀ´ÊÔÊÔÔÚ¶ÔÊý»ù´¡ÉϽøÐÐÒ»½×²î·Ö+¼¾½Ú²î·ÖµÄЧ¹û£º
df['log_seasonal_first_difference']
=
df['log_first_difference'].diff(12)
test_stationarity(df['log_seasonal_first
_difference'].dropna(inplace=False)) |
¿ÉÒÔ¿´µ½Îȶ¨ÁËÂú×ãÐèÇóÁË£¬µ«ÊÇÎȶ¨ÐÔÏà±È²»È¡¶ÔÊýµÄÇé¿öÏ£¬²¢Ã»ÓÐÌáÉý¶øÊÇϽµÁË¡£
»æÖÆACFºÍPACFͼ£¬ÕÒ³ö×î¼Ñ²ÎÊý
fig
= plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(df['seasonal
_first_difference'].iloc[13:],
lags=40, ax=ax1)
#´Ó13¿ªÊ¼ÊÇÒòΪ×ö¼¾½ÚÐÔ²î·ÖʱwindowÊÇ12
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(df['seasonal_
first_difference'].iloc[13:],
lags=40, ax=ax2) |
fig = sm.graphics.tsa.plot_acf (
df [' seasonal _ first _ difference ' ] . iloc [ 13
: ], lags = 40 , ax = ax1 ) #´Ó13¿ªÊ¼ÊÇÒòΪ×ö¼¾½ÚÐÔ²î·ÖʱwindowÊÇ12
Ò»Ö±¸ã²»Çå³þÕâ2ÕÅͼÔõô¿´¡£½ñÌìÒ»ÆðÀ´Ñ§Ï°Ï£º£¨Ã²ËÆÏÂÃæµÄÄÚÈÝÒ²ÊǺýÀïºýÍ¿µÄ£¬ºóÐø»¹Ðè¸üÉîÈëµÄѧϰ£©
ÈçºÎÈ·¶¨²î·Ö½×ÊýºÍ³£ÊýÏ
1.¼ÙÈçʱ¼äÐòÁÐÔںܸߵÄlag£¨10ÒÔÉÏ£©ÉÏÓÐÕýµÄACF£¬ÔòÐèÒªºÜ¸ß½×µÄ²î·Ö
2.¼ÙÈçlag-1µÄACFÊÇ0»ò¸ºÊý»òÕ߷dz£Ð¡£¬Ôò²»ÐèÒªºÜ¸ßµÄ²î·Ö£¬¼ÙÈçlag-1µÄACFÊÇ-0.5»ò¸üС£¬ÐòÁпÉÄÜoverdifferenced¡£
3.×îÓŵIJî·Ö½×ÊýÒ»°ãÔÚ×îÓŽ×Êýϱê×¼²î×îС£¨µ«²¢²»ÊÇ×ÜÊýÈç´Ë£©
4.Ä£ÐͲ»²é·ÖÒâζ×ÅÔÏÈÐòÁÐÊÇÆ½Îȵģ»1½×²î·ÖÒâζ×ÅÔÏÈÐòÁÐÓиö¹Ì¶¨µÄƽ¾ùÇ÷ÊÆ£»¶þ½×²î·ÖÒâζ×ÅÔÏÈÐòÁÐÓиöËæÊ±¼ä±ä»¯µÄÇ÷ÊÆ
5.Ä£ÐÍûÓвî·ÖÒ»°ã¶¼Óг£ÊýÏÓÐÁ½½×²î·ÖÒ»°ãû³£ÊýÏ¼ÙÈç1½×²î·ÖÄ£ÐÍ·ÇÁãµÄƽ¾ùÇ÷ÊÆ£¬ÔòÓг£ÊýÏî
ÈçºÎÈ·¶¨ARºÍMAµÄ½×Êý£º
1.¼ÙÈçPACFÏÔʾ½ØÎ²»òÕßlag-1µÄACFÊÇÕýµÄ£¨´ËʱÐòÁÐÈÔÈ»Óеãunderdifferenced£©£¬ÔòÐèÒª¿¼ÂÇARÏPACFµÄ½ØÎ²Ïî±íÃ÷ARµÄ½×Êý
2.¼ÙÈçACFÏÔʾ½ØÎ²»òÕßlag-1µÄACFÊǸºµÄ£¨´ËʱÐòÁÐÓеãoverdifferenced£©£¬ÔòÐèÒª¼ÓMAÏACFµÄ½ØÎ²Ïî±íÃ÷ARµÄ½×Êý
3.ARºÍMA¿ÉÒÔÏ໥µÖÏû¶Ô·½µÄÓ°Ï죬ËùÒÔ¼ÙÈçÓÃAR-MAÄ£ÐÍÈ¥ÄâºÏÊý¾Ý£¬ÈÔÈ»ÐèÒª¿¼ÂǼÓЩAR»òMAÏî¡£ÓÈÆäÔÚÔÏÈÄ£ÐÍÐèÒª³¬¹ý10´ÎµÄµü´úÈ¥converge¡£
¼ÙÈçÔÚAR²¿·ÖÓиöµ¥Î»¸ù£¨ARϵÊýºÍ´óԼΪ1£©£¬´ËʱӦ¸Ã¼õÉÙÒ»ÏîARÔö¼ÓÒ»´Î²î·Ö
¼ÙÈçÔÚMA²¿·ÖÓиöµ¥Î»¸ù£¨MAϵÊýºÍ´óԼΪ1£©£¬´ËʱӦ¸Ã¼õÉÙÒ»ÏîAR¼õÉÙÒ»´Î²î·Ö
¼ÙÈ糤ÆÚÔ¤²â³öÏÖ²»Îȶ¨£¬Ôò¿ÉÄÜAR¡¢MAϵÊýÓе¥Î»¸ù
ÈçºÎÈ·¶¨¼¾½ÚÐÔ²¿·Ö£º
1.¼ÙÈçÐòÁÐÓÐÏÔÖøÊǼ¾½ÚÐÔģʽ£¬ÔòÐèÒªÓü¾½ÚÐÔ²î·Ö£¬µ«ÊDz»ÒªÊ¹ÓÃÁ½´Î¼¾½ÚÐÔ²î·Ö»ò×ܹý³¬¹ýÁ½´Î²î·Ö£¨¼¾½ÚÐԺͷǼ¾½ÚÐÔ£©
2.¼ÙÈç²î·ÖÐòÁÐÔÚÖͺós´¦ÓÐÕýµÄACF£¬sÊǼ¾½ÚµÄ³¤¶È£¬ÔòÐèÒª¼ÓÈëSARÏ¼ÙÈç²î·ÖÐòÁÐÔÚÖͺós´¦ÓиºµÄACF£¬ÔòÐèÒª¼ÓÈëSMAÏÈç¹ûʹÓÃÁ˼¾½ÚÐÔ²îÒ죬ÔòºóÒ»ÖÖÇé¿öºÜ¿ÉÄÜ·¢Éú£¬Èç¹ûÊý¾Ý¾ßÓÐÎȶ¨ºÍºÏºõÂß¼µÄ¼¾½ÚÐÔģʽ¡£Èç¹ûûÓÐʹÓü¾½ÚÐÔ²îÒ죬ǰÕߺܿÉÄܻᷢÉú£¬Ö»Óе±¼¾½ÚÐÔģʽ²»Îȶ¨Ê±²ÅÊÊÓá£Ó¦¸Ã¾¡Á¿±ÜÃâÔÚͬһģÐÍÖÐʹÓöàÓÚÒ»¸ö»òÁ½¸ö¼¾½ÚÐÔ²ÎÊý£¨SAR
+ SMA£©£¬ÒòΪÕâ¿ÉÄܵ¼Ö¹ý¶ÈÄâºÏÊý¾ÝºÍ/»ò¹ÀËãÖеÄÎÊÌâ¡£
´´½¨Ä£ÐͲ¢½øÐÐÔ¤²â
¾¹ýÉÏÃæµÄ·ÖÎö£¬¿ÉÒÔ¶À´¦ÐèÒªÖÆ×÷µÄÄ£ÐͲÎÊýΪ(0,1,0)x(1,1,1,12)£º£¨ËµÊµ»°ÎÒÒ²²»Çå³þÕâ¸ö²ÎÊýÊÇÔõôȷ¶¨µÄ£¬»¹ÐèÒª¼ÌÐøÑ§Ï°,
SARIMAXÔõôÓÃÒ²»¹Ã»Ñ§Ï°Ã÷°×£©
mod
= sm.tsa.statespace.SARIMAX(df['riders'], trend='n',
order=(0,1,0), seasonal_order=(1,1,1,12))
results = mod.fit()
print(results.summary()) |
Statespace Model Results
Dep. Variable: riders No. Observations: 114
Model: SARIMAX(0, 1, 0)x(1, 1, 1, 12) Log Likelihood
-501.340
Date: Fri, 16 Nov 2018 AIC 1008.680
Time: 14:13:15 BIC 1016.526
Sample: 01-01-1973 HQIC 1011.856
- 06-01-1982
Covariance Type: opg
coef std err z P>|z| [0.025 0.975]
ar.S.L12 0.3236 0.186 1.739 0.082 -0.041 0.688
ma.S.L12 -0.9990 43.079 -0.023 0.981 -85.433
83.435
sigma2 984.7897 4.23e+04 0.023 0.981 -8.19e+04
8.39e+04
Ljung-Box (Q): 36.56 Jarque-Bera (JB): 4.81
Prob(Q): 0.63 Prob(JB): 0.09
Heteroskedasticity (H): 1.48 Skew: 0.38
Prob(H) (two-sided): 0.26 Kurtosis: 3.75
Warnings:
[1] Covariance matrix calculated using the outer
product of gradients
(complex-step).
|
Ô¤²âÖµÓëÕæÊµÖµ±È½Ï£º
df['forecast']
= results.predict(start = 102, end= 114, dynamic=
True)
df[['riders', 'forecast']].plot(figsize=(12,
6))
plt.savefig('ts_df_predict.png', bbox_inches='tight') |

npredict
=df['riders']['1982'].shape[0]
fig, ax = plt.subplots(figsize=(12,6))
npre = 12
ax.set(title='Ridership', xlabel='Date', ylabel='Riders')
ax.plot(df.index[-npredict-npre+1:], df.ix[-npredict-npre+1:,
'riders'], 'o', label='Observed')
ax.plot(df.index[-npredict-npre+1:], df.ix[-npredict-npre+1:,
'forecast'], 'g', label='Dynamic forecast')
legend = ax.legend(loc='lower right')
legend.get_frame().set_facecolor('w')
# plt.savefig('ts_predict_compare.png', bbox_inches='tight') |
ΪÁ˲úÉúÕë¶ÔδÀ´µÄÔ¤²âÖµ£¬ÎÒÃǼÓÈëеÄʱ¼ä£º
start
= datetime.datetime.strptime("1982-07-01",
"%Y-%m-%d")
date_list = [start + relativedelta(months=x)
for x in range(0,12)]
future = pd.DataFrame(index=date_list, columns=
df.columns)
df = pd.concat([df, future]) |
ÔÚмÓÈëµÄʱ¼äÉÏÀ´Ô¤²âδÀ´Öµ£º
df['forecast']
= results.predict(start = 114, end = 125, dynamic=
True)
df[['riders', 'forecast']].ix[-24:].plot(figsize=(12,
8))
#plt.savefig('ts_predict_future.png', bbox_inches='tight') |
|