時系列データの一次階差、自己相関、偏自己相関、ARIMA、予想、共和分検定、一次階差のQQプロット、一次階差のヒストグラム、線形近似

EXJPUS.csv_JPY.USD NIKKEI225.csv_NIKKEI225
#各種条件設定
FirstDate<-as.Date("2009/9/1")
LastDate<-as.Date("2014/8/1")
Forcast<-6 #forcast
period<-"months"
path01<-paste("C:/Users/",username,"/Desktop/R_Folder/",sep="")
path02<-paste("C:/Users/",username,"/Desktop/R_Graph/",sep="")
setwd(path01)
#パッケージ読込
library(urca) #ca.po()
library(tseries)
library(forecast)
library(MASS) #truehist()
#分析開始
AllData01<-data.frame(date=seq(FirstDate,LastDate,by=period),dummy=1)
AllData02<-AllData01
TSeriesDataFileList<-dir(path01)
for(iii in 1:length(TSeriesDataFileList))
{
dataset<-read.table(file=TSeriesDataFileList[iii],header=TRUE,sep=",")
#日付列書式の統一
dataset[,1]<-as.Date(dataset[,1])
colnames(dataset)[1]<-"date" #小文字
dataset<-subset(dataset,FirstDate<=dataset[,1])
dataset<-subset(dataset,LastDate>=dataset[,1])
for(ccc in 2:ncol(dataset))
{
#以下は非数値データ行の削除及びデータの実数化。オリジナルデータ書式、分析目的に合わせて適宜追加、変更。
dataset<-subset(dataset,dataset[,ccc]!=".")
dataset<-subset(dataset,is.na(dataset[,ccc])==F)
dataset[,ccc]<-as.double(as.character(dataset[,ccc]))
#単位根検定 H0:単位根有り
p0<-adf.test(dataset[,ccc],k=1)$p.value	
p1<-adf.test(diff(dataset[,ccc]),k=1)$p.value	
#出力
cat("\n","\n","ファイル名:",TSeriesDataFileList[iii],",列名:",colnames(dataset)[ccc],",対象期間:",format(FirstDate,"%Y/%b/%d"),"-",format(LastDate,"%Y/%b/%d"),"\n",sep="")
cat("1)単位根検定","\n")
cat("・原系列,p値=",p0,"\n",sep="")
cat("・一次階差,p値=",p1,"\n",sep="")
cat("\n")
cat("2)Summary","\n")
cat("・原系列","\n")
print(summary(dataset[,ccc]))
cat("・一次階差","\n")
print(summary(diff(dataset[,ccc])))
cat("\n")
cat("3)その他基本統計量","\n")
cat("・原系列","\n")
cat("要素数=",length(dataset[,ccc]),",")
cat("不偏分散=",var(dataset[,ccc]),",")
cat("標準偏差=",sd(dataset[,ccc]))
cat("\n")
cat("・一次階差","\n")
cat("要素数=",length(diff(dataset[,ccc])),",")
cat("不偏分散=",var(diff(dataset[,ccc])),",")
cat("標準偏差=",sd(diff(dataset[,ccc])))
cat("\n","\n")
cat("4)ARIMA","\n")
cat("・原系列")
Result.arima<-auto.arima(dataset[,ccc],ic="aic",trace=T,stepwise=T)
print(Result.arima)
cat("\n")
cat("5)ARIMA残差の正規性","\n")
cat("・原系列")
print(shapiro.test(Result.arima$res))#H0:正規分布からのサンプルである。
cat("6)原系列の一部","\n")
print(head(dataset,10))
cat("\n","\n")
#グラフ作成
ymax<-max(dataset[,ccc])
ymin<-min(dataset[,ccc])
ytitle<-colnames(dataset)[ccc]
setwd(path02)
png(file=paste(TSeriesDataFileList[iii],"_",colnames(dataset)[ccc],".png",sep=""),width=900,height=900)
par(mfrow=c(3,3),mar=c(5,5,5,5),ps=22,cex.axis=1,cex.lab=1,cex.main=1,cex.sub=1)
Date<-dataset[,1]
#1
plot(Date,dataset[,ccc],type="l",ylim=c(ymin,ymax),ylab=ytitle,xaxt="n")
axis.Date(side=1,at=seq(FirstDate,LastDate,period),format="%Y%m")
#2
plot(Date,dataset[,ccc]/ymax*100,type="l",ylim=c(ymin/ymax*100,ymax/ymax*100),ylab=paste("Max=100%:",ytitle,sep=""),xaxt="n")
axis.Date(side=1,at=seq(FirstDate,LastDate,period),format="%Y%m")
#3
plot(dataset[,ccc],type="l",ylim=c(ymin,ymax),ylab=ytitle,xaxt="n")
abline(lm(dataset[,ccc]~seq(1,length(dataset[,ccc]))),col=2)
#4
plot(diff(dataset[,ccc]),type="h",ylab=paste("1stDifference:",ytitle,sep=""))
#5
#truehist(diff(dataset[,ccc]),xlab="",main=paste("1stDifference:",ytitle,sep="")) 
hist(diff(dataset[,ccc]),xlab="",main=paste("1stDifference:",ytitle,sep="")) 
#6
qqnorm(diff(dataset[,ccc]))
qqline(diff(dataset[,ccc]),col=2)
#7
acf(dataset[,ccc],plot=T,main=colnames(dataset)[ccc])
#8
pacf(dataset[,ccc],plot=T,main=colnames(dataset)[ccc])
#9
plot(forecast(Result.arima,level=c(50,95),h=Forcast),ylab=ytitle)
dev.off()
setwd(path01)
}
AllData01<-merge(AllData01,dataset,by="date",all=T,sort=T) #欠損はNAとして処理
AllData02<-merge(AllData02,dataset,by="date",sort=T) #全系列に数値データが存在する日付のみ
if(iii==length(TSeriesDataFileList)){
if(4<=ncol(AllData02)){
for(iii in 4:ncol(AllData02))
{
co.integ<-ca.po(AllData02[iii:3],demean="trend")#3列目が説明変数、とその他列が応答変数での共和分検定。H0:共和分関係なし(誤差項は非定常)
cat(colnames(AllData02[3]),"(説明変数)と",colnames(AllData02[iii]),"(応答変数)との共和分検定",sep="")
print(summary(co.integ))
}
}
head(AllData01,4)
head(AllData02,4)
#licence()
}
}

ファイル名:EXJPUS.csv,列名:JPY.USD,対象期間:2009/9/01-2014/8/01
1)単位根検定 
・原系列,p値=0.8221737
・一次階差,p値=0.01839341

2)Summary 
・原系列 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  76.64   80.88   88.28   88.63   97.76  103.80 
・一次階差 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-3.6840 -1.0580 -0.1527  0.1978  1.3010  5.2680 

3)その他基本統計量 
・原系列 
要素数= 60 ,不偏分散= 82.10137 ,標準偏差= 9.060981
・一次階差 
要素数= 59 ,不偏分散= 3.524853 ,標準偏差= 1.877459
 
4)ARIMA 
・原系列
 ARIMA(2,2,2)                    : 244.3556
 ARIMA(0,2,0)                    : 262.528
 ARIMA(1,2,0)                    : 248.6144
 ARIMA(0,2,1)                    : 241.3534
 ARIMA(1,2,1)                    : 242.0918
 ARIMA(0,2,2)                    : 242.5031
 ARIMA(1,2,2)                    : 242.8977
 ARIMA(0,2,1)                    : 241.3534

 Best model: ARIMA(0,2,1)                    

Series: dataset[, ccc] 
ARIMA(0,2,1)                    

Coefficients:
          ma1
      -0.8228
s.e.   0.1274

sigma^2 estimated as 3.438:  log likelihood=-118.68
AIC=241.35   AICc=241.57   BIC=245.47

5)ARIMA残差の正規性 
・原系列
        Shapiro-Wilk normality test

data:  Result.arima$res
W = 0.9795, p-value = 0.4085

6)原系列の一部 
          date JPY.USD
465 2009-09-01 91.2748
466 2009-10-01 90.3671
467 2009-11-01 89.2674
468 2009-12-01 89.9509
469 2010-01-01 91.1011
470 2010-02-01 90.1395
471 2010-03-01 90.7161
472 2010-04-01 93.4527
473 2010-05-01 91.9730
474 2010-06-01 90.8059

 


ファイル名:NIKKEI225.csv,列名:NIKKEI225,対象期間:2009/9/01-2014/8/01
1)単位根検定 
・原系列,p値=0.827566
・一次階差,p値=0.01

2)Summary 
・原系列 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   8506    9327   10090   11080   13350   15660 
・一次階差 
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-1426.00  -223.50    77.26    85.69   494.50  1308.00 

3)その他基本統計量 
・原系列 
要素数= 60 ,不偏分散= 5549693 ,標準偏差= 2355.779
・一次階差 
要素数= 59 ,不偏分散= 320615 ,標準偏差= 566.2288
 
4)ARIMA 
・原系列
 ARIMA(2,1,2) with drift         : 909.8415
 ARIMA(0,1,0) with drift         : 902.9292
 ARIMA(1,1,0) with drift         : 904.9245
 ARIMA(0,1,1) with drift         : 904.9255
 ARIMA(1,1,1) with drift         : 906.708
 ARIMA(0,1,0)                    : 917.7868

 Best model: ARIMA(0,1,0) with drift         

Series: dataset[, ccc] 
ARIMA(0,1,0) with drift         

Coefficients:
        drift
      85.6920
s.e.  73.7168

sigma^2 estimated as 320615:  log likelihood=-449.46
AIC=902.93   AICc=903.14   BIC=907.08

5)ARIMA残差の正規性 
・原系列
        Shapiro-Wilk normality test

data:  Result.arima$res
W = 0.9885, p-value = 0.8465

6)原系列の一部 
          date NIKKEI225
725 2009-09-01  10302.87
726 2009-10-01  10066.24
727 2009-11-01   9640.99
728 2009-12-01  10169.01
729 2010-01-01  10661.62
730 2010-02-01  10175.13
731 2010-03-01  10671.49
732 2010-04-01  11139.77
733 2010-05-01  10103.98
734 2010-06-01   9786.05

 
JPY.USD(説明変数)とNIKKEI225(応答変数)との共和分検定
######################################## 
# Phillips and Ouliaris Unit Root Test # 
######################################## 

Test of type Pu 
detrending of series with constant and linear trend 


Call:
lm(formula = z[, 1] ~ z[, -1] + trd)

Residuals:
     Min       1Q   Median       3Q      Max 
-1094.02  -419.96   -28.58   407.44  1279.85 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -8196.162    763.365  -10.74 2.60e-15 ***
z[, -1]       203.815      9.343   21.81  < 2e-16 ***
trd            39.798      4.847    8.21 3.08e-11 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 551.1 on 57 degrees of freedom
Multiple R-squared:  0.9471,    Adjusted R-squared:  0.9453 
F-statistic: 510.6 on 2 and 57 DF,  p-value: < 2.2e-16


Value of test-statistic is: 25.8366 

Critical values of Pu are:
                  10pct    5pct    1pct
critical values 41.2488 48.8439 65.1714

 警告メッセージ: 
In adf.test(diff(dataset[, ccc]), k = 1) :
  p-value smaller than printed p-value
> 
> 

アプリケーション R Core Team (2013). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL http://www.R-project.org/.