時系列データの一次階差、自己相関、偏自己相関、ARIMAおよび予想

GOLDAMGBD228NLBM.csv_GOLD DJIA.csv_DOW
#各種条件設定
FirstDate<-as.Date("2004/9/1")
LastDate<-as.Date("2014/8/1")
period<-"months"
path01<-"C:/Users/username/Desktop/R_Folder/"
path02<-"C:/Users/username/Desktop/"
setwd(path01)
#パッケージ読込
library(urca)
library(tseries)
library(forecast)
#分析開始
AllData01<-data.frame(date=seq(FirstDate,LastDate,by=period),dummy=1)
AllData02<-AllData01
TSeriesDataFileList<-dir(path01)
for(iii in 1:length(TSeriesDataFileList))
{
dataset<-read.table(file=TSeriesDataFileList[iii],header=TRUE,sep=",")
#日付列書式の統一
dataset[,1]<-as.Date(dataset[,1])
colnames(dataset)[1]<-"date"
for(ccc in 2:ncol(dataset))
{
#以下は非数値データ行の削除及びデータの実数化。オリジナルデータ書式、分析目的に合わせて適宜追加、変更。
dataset<-subset(dataset,dataset[,ccc]!=".")
dataset<-subset(dataset,is.na(dataset[,ccc])==F)
dataset<-subset(dataset,FirstDate<=dataset[,1])
dataset<-subset(dataset,LastDate>=dataset[,1])
dataset[,ccc]<-as.double(as.character(dataset[,ccc]))
#単位根検定 H0:単位根有り
p0<-adf.test(dataset[,ccc],k=1)$p.value	
p1<-adf.test(diff(dataset[,ccc]),k=1)$p.value	
#出力
cat("\n","\n","\n","ファイル名:",TSeriesDataFileList[iii],",列名:",colnames(dataset)[ccc],"\n",sep="")
cat("1)単位根検定","\n")
cat("・原系列,p値=",p0,"\n",sep="")
cat("・一次階差,p値=",p1,"\n",sep="")
cat("\n")
cat("2)Summary","\n")
cat("・原系列","\n")
print(summary(dataset[,ccc]))
cat("・一次階差","\n")
print(summary(diff(dataset[,ccc])))
cat("\n")
cat("3)その他基本統計量","\n")
cat("・原系列","\n")
cat("要素数=",length(dataset[,ccc]),",")
cat("不偏分散=",var(dataset[,ccc]),",")
cat("標準偏差=",sd(dataset[,ccc]))
cat("\n")
cat("・一次階差","\n")
cat("要素数=",length(diff(dataset[,ccc])),",")
cat("不偏分散=",var(diff(dataset[,ccc])),",")
cat("標準偏差=",sd(diff(dataset[,ccc])))
cat("\n","\n")
cat("4)ARIMA","\n")
cat("・原系列")
Result.arima<-auto.arima(dataset[,ccc],ic="aic",trace=T,stepwise=T)
print(Result.arima)
cat("\n")
cat("5)ARIMA残差の正規性","\n")
cat("・原系列")
print(shapiro.test(Result.arima$res))#H0:正規分布からのサンプルである。
cat("6)原系列の一部","\n")
print(head(dataset,10))
cat("\n","\n")
#グラフ作成
ymax<-max(dataset[,ccc])
ymin<-min(dataset[,ccc])
ytitle<-colnames(dataset)[ccc]
setwd(path02)
png(file=paste(TSeriesDataFileList[iii],"_",colnames(dataset)[ccc],".png",sep=""),width=900,height=900)
par(mfrow=c(2,3),mar=c(5,5,5,5),ps=22,cex.axis=1,cex.lab=1,cex.main=1,cex.sub=1)
#1
plot(dataset[,ccc],type="l",ylim=c(ymin,ymax),ylab=ytitle)
#2
plot(dataset[,ccc]/ymax*100,type="l",ylim=c(ymin/ymax*100,ymax/ymax*100),ylab=paste("Max=100%:",ytitle,sep=""))
#3
plot(diff(dataset[,ccc]),type="h",ylab=paste("1stDifference:",ytitle,sep=""))
#4
acf(dataset[,ccc],plot=T,main=colnames(dataset)[ccc])
#5
pacf(dataset[,ccc],plot=T,main=colnames(dataset)[ccc])
#6
plot(forecast(Result.arima,level=c(50,95),h=30),ylab=ytitle)
dev.off()
setwd(path01)
}
AllData01<-merge(AllData01,dataset,by="date",all=T,sort=T) #欠損はNAとして処理
AllData02<-merge(AllData02,dataset,by="date",sort=T) #全系列に数値データが存在する日付のみ
}
head(AllData01,4)
head(AllData02,4)

ファイル名:DJIA.csv,列名:DOW
1)単位根検定 
・原系列,p値=0.9090181
・一次階差,p値=0.01

2)Summary 
・原系列 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   7235   10540   11880   12030   13160   16990 
・一次階差 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-1937.0  -135.1   145.1    55.2   303.2   756.6 

3)その他基本統計量 
・原系列 
要素数= 120 ,不偏分散= 4547939 ,標準偏差= 2132.59
・一次階差 
要素数= 119 ,不偏分散= 153837.6 ,標準偏差= 392.2214
 
4)ARIMA 
・原系列
 ARIMA(2,1,2) with drift         : 1752.55
 ARIMA(0,1,0) with drift         : 1750.14
 ARIMA(1,1,0) with drift         : 1747.779
 ARIMA(0,1,1) with drift         : 1747.084
 ARIMA(1,1,1) with drift         : 1749.303
 ARIMA(0,1,2) with drift         : 1749.081
 ARIMA(1,1,2) with drift         : 1751.163
 ARIMA(0,1,1)                    : 1746.68
 ARIMA(1,1,1)                    : 1749.155
 ARIMA(0,1,0)                    : 1750.493
 ARIMA(0,1,2)                    : 1748.673
 ARIMA(1,1,2)                    : 1750.965

 Best model: ARIMA(0,1,1)                    

Series: dataset[, ccc] 
ARIMA(0,1,1)                    

Coefficients:
         ma1
      0.2217
s.e.  0.0895

sigma^2 estimated as 148139:  log likelihood=-877.28
AIC=1758.56   AICc=1758.66   BIC=1764.12

5)ARIMA残差の正規性 
・原系列
        Shapiro-Wilk normality test

data:  Result.arima$res
W = 0.8941, p-value = 9.864e-08

6)原系列の一部 
         date      DOW
1  2004-09-01 10206.48
2  2004-10-01 10001.60
3  2004-11-01 10411.76
4  2004-12-01 10673.38
5  2005-01-01 10539.51
6  2005-02-01 10723.82
7  2005-03-01 10682.09
8  2005-04-01 10283.19
9  2005-05-01 10377.18
10 2005-06-01 10486.68

 



ファイル名:GOLDAMGBD228NLBM.csv,列名:GOLD
1)単位根検定 
・原系列,p値=0.9606881
・一次階差,p値=0.01

2)Summary 
・原系列 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  405.4   662.4   983.7  1047.0  1363.0  1781.0 
・一次階差 
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-112.800  -15.460    7.399    7.492   37.130  191.000 

3)その他基本統計量 
・原系列 
要素数= 120 ,不偏分散= 179399.8 ,標準偏差= 423.5561
・一次階差 
要素数= 119 ,不偏分散= 2118.933 ,標準偏差= 46.03187
 
4)ARIMA 
・原系列
 ARIMA(2,1,2) with drift         : 1248.812
 ARIMA(0,1,0) with drift         : 1244.475
 ARIMA(1,1,0) with drift         : 1244.366
 ARIMA(0,1,1) with drift         : 1243.211
 ARIMA(1,1,1) with drift         : 1244.615
 ARIMA(0,1,2) with drift         : 1245.079
 ARIMA(1,1,2) with drift         : 1246.372
 ARIMA(0,1,1)                    : 1243.516

 Best model: ARIMA(0,1,1) with drift         

Series: dataset[, ccc] 
ARIMA(0,1,1) with drift         

Coefficients:
         ma1   drift
      0.1707  7.4588
s.e.  0.0952  4.8669

sigma^2 estimated as 2062:  log likelihood=-617.19
AIC=1240.39   AICc=1240.6   BIC=1248.73

5)ARIMA残差の正規性 
・原系列
        Shapiro-Wilk normality test

data:  Result.arima$res
W = 0.974, p-value = 0.01995

6)原系列の一部 
          date    GOLD
438 2004-09-01 405.402
439 2004-10-01 420.210
440 2004-11-01 439.059
441 2004-12-01 442.974
442 2005-01-01 424.080
443 2005-02-01 423.430
444 2005-03-01 434.355
445 2005-04-01 429.140
446 2005-05-01 422.903
447 2005-06-01 430.302

 
 警告メッセージ: 
1: In adf.test(diff(dataset[, ccc]), k = 1) :
  p-value smaller than printed p-value
2: In adf.test(diff(dataset[, ccc]), k = 1) :
  p-value smaller than printed p-value
> head(AllData01,4)
        date dummy      DOW    GOLD
1 2004-09-01     1 10206.48 405.402
2 2004-10-01     1 10001.60 420.210
3 2004-11-01     1 10411.76 439.059
4 2004-12-01     1 10673.38 442.974
> head(AllData02,4)
        date dummy      DOW    GOLD
1 2004-09-01     1 10206.48 405.402
2 2004-10-01     1 10001.60 420.210
3 2004-11-01     1 10411.76 439.059
4 2004-12-01     1 10673.38 442.974
> 
>