首页 > 其他 > 详细

时间序列预测入(二)

时间:2019-03-24 22:54:09      阅读:152      评论:0      收藏:0      [点我收藏+]

ARIMA预测

# -*- coding: utf-8 -*-
"""
Created on Fri Mar 22 21:03:34 2019

@author: Administrator
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta

num = 14
filenames = []
basepath = D:\\pworkspace\\data\\Metro_train\\
for i in range(1, num+1):
    if i < 10:
        filenames.append(basepath + record_2019-01-0 + str(i) + .csv)
    else:
        filenames.append(basepath + record_2019-01- + str(i) + .csv)

flag = True      
for filename in filenames:
    df = pd.read_csv(filename)
    df[time] = df[time].str[:-4] + 0:00
    df[time] = pd.to_datetime(df[time])
    df0 = df[df[stationID] == 0].copy()
    del df
    user_in = df0[df0[status] == 1]
    user_out = df0[df0[status] == 0]
    user_in = user_in.groupby(time)
    user_out = user_out.groupby(time)
    user_in = user_in.count()
    user_out = user_out.count()
    user_in[count] = user_in[userID]
    user_out[count] = user_out[userID]
    user_in = user_in.drop([lineID, stationID, deviceID,
                            status, payType, userID], axis=1)
    user_out = user_out.drop([lineID, stationID, deviceID,
                            status, payType, userID], axis=1)
    if flag:
        user_in_all = user_in
        #user_out_all = user_out
        flag = False
    else:
        user_in_all = pd.concat([user_in_all,user_in], axis=0)
        #user_out_all = pd.concat([user_out_all,user_out], axis=0)
        

#start = datetime(2019,1,1,0,0,0)
#timelist = [ str(start + timedelta(seconds=600*i)) for i in range(24 * 6 * 2)]

startdate = datetime(2019,1,1,0,0,0)
enddate = startdate + timedelta(days=num-1, minutes=50, hours=23)

all_time_data = pd.DataFrame({time : pd.date_range(start=str(startdate), 
                               end=str(enddate),  freq=10T)})
all_time_data[count] = 0
all_time_data.index = all_time_data[time]
all_time_data = all_time_data.drop(time, axis=1)

user_in_all = pd.merge(all_time_data, user_in_all, right_on=time, 
         left_index=True, how=outer)
user_in_all[np.isnan(user_in_all[count_y])] = 0
user_in_all[count_x] = user_in_all[count_x] + user_in_all[count_y]

user_in_all[count] = user_in_all[count_x]
user_in_all = user_in_all.drop([count_x, count_y], axis=1)
user_in_all.plot(figsize=(15,8))
plt.show()

ts = user_in_all[count]
ts_ewma = pd.DataFrame(ts).ewm(span=60).mean()
ts_ewma.plot(figsize=(15,8))
plt.show()

from statsmodels.tsa.stattools import acf, pacf, adfuller
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.arima_model import ARIMA
#import statsmodels.api as sm

ts_diff_1 = ts_ewma.diff(1).dropna(axis=0, how=any)
ts_diff_1 = ts_diff_1[count]

# ADF平稳性检验
adfuller(ts_diff_1, autolag=AIC)
# 白噪声检验
acorr_ljungbox(ts_diff_1, 1)
# ACF PACF
lag_acf = acf(ts_diff_1, nlags=50)
lag_pacf = pacf(ts_diff_1, nlags=50)
plt.figure(facecolor=white, figsize=(15, 8))
plt.plot(lag_acf)
plt.show()
plt.figure(facecolor=white, figsize=(15, 8))
plt.plot(lag_pacf)
plt.axhline(y=-1.9/np.sqrt(len(ts_diff_1)), linestyle=--, color=gray)
plt.axhline(y=1.9/np.sqrt(len(ts_diff_1)), linestyle=--, color=gray)
plt.show()

model = ARIMA(ts_diff_1, order=(6, 0, 0))
ts_predict = model.fit().predict()
rmse = np.sqrt(sum((ts_predict - ts_diff_1)**2) / ts_diff_1.size)

plt.figure(facecolor=white, figsize=(15, 8))
plt.plot(ts_predict, lw=0.5, color=blue, label=Predict)
plt.plot(ts_diff_1, lw=0.5, color=red, label=Original)
plt.legend(loc=lower right)
#plt.ylim((-1000, 1000))
plt.show()

运行结果

技术分享图片

?技术分享图片

技术分享图片

技术分享图片

技术分享图片

时间序列预测入(二)

原文:https://www.cnblogs.com/coshaho/p/10590940.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!