時系列データの単位根検定、基本統計量、ARIMA、残差の正規性、グラフ(原系列、正規化、トレンド、一次階差、ヒストグラム(一次階差)、Q-Qプロット(一次階差)、自己相関、偏自己相関、ARIMAによる予想、構造変化、ペリオドグラム、スペクトル分析)

DEXUSEU.csv_EURUSD
ファイル名:DEXUSEU.csv,列名:EURUSD,対象期間:2013/9/06-2014/9/05
1)単位根検定 
・原系列,p値=0.8896247
・一次階差,p値=0.01

2)Summary 
・原系列 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.293   1.352   1.362   1.361   1.375   1.393 
・一次階差 
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-0.021000 -0.002800  0.000000 -0.000084  0.002275  0.017600 

3)その他基本統計量 
・原系列 
要素数= 251 ,不偏分散= 0.0003391923 ,標準偏差= 0.01841717
・一次階差 
要素数= 250 ,不偏分散= 2.305043e-05 ,標準偏差= 0.004801086
 
4)ARIMA 
・原系列
 ARIMA(2,2,2)                    : 1e+20
 ARIMA(0,2,0)                    : -1755.423
 ARIMA(1,2,0)                    : -1822.482
 ARIMA(0,2,1)                    : -1906.643
 ARIMA(1,2,1)                    : -1928.013
 ARIMA(1,2,2)                    : -1926.023
 ARIMA(1,2,1)                    : -1928.013
 ARIMA(2,2,1)                    : -1918.25

 Best model: ARIMA(1,2,1)                    

Series: dataset[, ccc] 
ARIMA(1,2,1)                    

Coefficients:
          ar1      ma1
      -0.0295  -0.9799
s.e.   0.0647   0.0138

sigma^2 estimated as 2.305e-05:  log likelihood=974.45
AIC=-1942.91   AICc=-1942.81   BIC=-1932.35

5)ARIMA残差の正規性 
・原系列
        Shapiro-Wilk normality test

data:  Result.arima$res
W = 0.9606, p-value = 2.286e-06

6)原系列の一部 
           date EURUSD
3830 2013-09-06 1.3166
3831 2013-09-09 1.3260
3832 2013-09-10 1.3260
3833 2013-09-11 1.3301
3834 2013-09-12 1.3315
3835 2013-09-13 1.3276
3836 2013-09-16 1.3350
3837 2013-09-17 1.3357
3838 2013-09-18 1.3351
3839 2013-09-19 1.3527

 
 警告メッセージ: 
In adf.test(diff(dataset[, ccc]), k = 1) :
  p-value smaller than printed p-value
> 
> 

#各種条件設定
username<-Sys.info()['user']
FirstDate<-as.Date("2013/9/6")
LastDate<-as.Date("2014/9/5")
period<-"days"
path01<-paste("C:/Users/",username,"/Desktop/R_Data_Read/",sep="")
path02<-paste("C:/Users/",username,"/Desktop/R_Graph/",sep="")
path03<-paste("C:/Users/",username,"/Desktop/R_Data_Write/",sep="")
setwd(path01)
condition01<-0 #0:共和分検定等あり、1:なし
#パッケージ読込
library(urca) #ca.po()
library(tseries)
library(forecast)
library(MASS) #truehist()
library(lmtest) #dwtest
library(systemfit)
#分析開始
AllData01<-data.frame(date=seq(FirstDate,LastDate,by=period),dummy=1)
AllData02<-AllData01
TSeriesDataFileList<-dir(path01)
for(iii in 1:length(TSeriesDataFileList)) #sepia
{
dataset<-read.table(file=TSeriesDataFileList[iii],header=TRUE,sep=",")
#日付列書式の統一
dataset[,1]<-as.Date(dataset[,1])
colnames(dataset)[1]<-"date" #小文字
dataset<-subset(dataset,FirstDate<=dataset[,1])
dataset<-subset(dataset,LastDate>=dataset[,1])
for(ccc in 2:ncol(dataset)) #red
{
#以下は非数値データ行の削除及びデータの実数化。オリジナルデータ書式、分析目的に合わせて適宜追加、変更。
dataset<-subset(dataset,dataset[,ccc]!=".")
dataset<-subset(dataset,is.na(dataset[,ccc])==F)
dataset[,ccc]<-as.double(as.character(dataset[,ccc]))
#単位根検定 H0:単位根有り
p0<-adf.test(dataset[,ccc],k=1)$p.value	
p1<-adf.test(diff(dataset[,ccc]),k=1)$p.value	
#出力
cat("\n","\n","ファイル名:",TSeriesDataFileList[iii],",列名:",colnames(dataset)[ccc],",対象期間:",format(FirstDate,"%Y/%b/%d"),"-",format(LastDate,"%Y/%b/%d"),"\n",sep="")
cat("1)単位根検定","\n")
cat("・原系列,p値=",p0,"\n",sep="")
cat("・一次階差,p値=",p1,"\n",sep="")
cat("\n")
cat("2)Summary","\n")
cat("・原系列","\n")
print(summary(dataset[,ccc]))
cat("・一次階差","\n")
print(summary(diff(dataset[,ccc])))
cat("\n")
cat("3)その他基本統計量","\n")
cat("・原系列","\n")
cat("要素数=",length(dataset[,ccc]),",")
cat("不偏分散=",var(dataset[,ccc]),",")
cat("標準偏差=",sd(dataset[,ccc]))
cat("\n")
cat("・一次階差","\n")
cat("要素数=",length(diff(dataset[,ccc])),",")
cat("不偏分散=",var(diff(dataset[,ccc])),",")
cat("標準偏差=",sd(diff(dataset[,ccc])))
cat("\n","\n")
cat("4)ARIMA","\n")
cat("・原系列")
Result.arima<-auto.arima(dataset[,ccc],ic="aic",trace=T,stepwise=T)
print(Result.arima)
cat("\n")
cat("5)ARIMA残差の正規性","\n")
cat("・原系列")
print(shapiro.test(Result.arima$res))#H0:正規分布からのサンプルである。
cat("6)原系列の一部","\n")
print(head(dataset,10))
cat("\n","\n")
#グラフ作成
ymax<-max(dataset[,ccc])
ymin<-min(dataset[,ccc])
ytitle<-colnames(dataset)[ccc]
setwd(path02)
png(file=paste(TSeriesDataFileList[iii],"_",colnames(dataset)[ccc],".png",sep=""),width=1200,height=900)
par(mfrow=c(3,4),mar=c(5,5,5,5),ps=22,cex.axis=1,cex.lab=1,cex.main=1,cex.sub=1)
Date<-dataset[,1]
#1
plot(Date,dataset[,ccc],type="l",ylim=c(ymin,ymax),ylab=ytitle,xaxt="n",main=ytitle)
axis.Date(side=1,at=seq(FirstDate,LastDate,period),format="%Y%m")
#2
plot(Date,dataset[,ccc]/ymax*100,type="l",ylim=c(ymin/ymax*100,ymax/ymax*100),
ylab=paste("Max=100%:",ytitle,sep=""),xaxt="n",main=paste(ytitle,"Normalization"))
axis.Date(side=1,at=seq(FirstDate,LastDate,period),format="%Y%m")
#3
plot(dataset[,ccc],type="l",ylim=c(ymin,ymax),ylab=ytitle,xaxt="n",main="Trend")
abline(lm(dataset[,ccc]~seq(1,length(dataset[,ccc]))),col=2)
#4
plot(diff(dataset[,ccc]),type="h",ylab=paste("1stDifference:",ytitle,sep=""),main="1st Difference")
#5
#truehist(diff(dataset[,ccc]),xlab="",main=paste("1stDifference:",ytitle,sep="")) 
hist(diff(dataset[,ccc]),xlab="",main=paste("1st Difference:",ytitle,sep="")) 
#6
qqnorm(diff(dataset[,ccc]),main="1st Diff Normal Q-Q Plot")
qqline(diff(dataset[,ccc]),col=2)
#7
acf(dataset[,ccc],type="correlation",ci=c(0.9,0.95),plot=T,main=colnames(dataset)[ccc],lag=length(floor(dataset[,ccc]/5))) 
#8
pacf(dataset[,ccc],ci=c(0.9,0.95),plot=T,main=colnames(dataset)[ccc],lag=length(floor(dataset[,ccc]/5)))
#9
plot(forecast(Result.arima,level=c(50,95),h=floor(length(dataset[,ccc])/10)),ylab=ytitle)
#10
#構造変化
StChg<-data.frame(id=seq(1:length(dataset[,ccc])),dummy=0,Pr=0)
cnt<-0
for(kkk in 2:length(dataset[,ccc])) #kkk:構造変化時点
{
StChg[,2]<-0
StChg[,2][kkk<=StChg[,1]]<-1
fm0<-lm(seq(1:length(dataset[,ccc]))~dataset[,ccc])
fm1<-lm(seq(1:length(dataset[,ccc]))~dataset[,ccc]+StChg[,2]+StChg[,2]*dataset[,ccc])
StChg[,3][kkk]<-anova(fm0,fm1)$Pr[2]#H0:構造変化なし
if(0.05<=StChg[,3][kkk] && cnt==0){
SCDate<-dataset[,1][kkk]
cnt<-1
}
}
plot(StChg[,1],StChg[,3],type="h",main=paste("Structure Change ",SCDate,sep=""),xlab="Index",ylab="Pr(>F)")
#11
spec.pgram(dataset[,ccc],spans=c(5,5),col=2,main=paste(colnames(dataset)[ccc],"\n","Smoothed Periodogram"))
#12
spectrum(dataset[,ccc],method="ar") #method="pgram"
title(paste("\n",TSeriesDataFileList[iii],",",colnames(dataset)[ccc],",","対象期間:",format(FirstDate,"%Y/%b/%d"),"-",format(LastDate,"%Y/%b/%d"),sep=""),outer=T)
dev.off()
setwd(path01)
} #red
AllData01<-merge(AllData01,dataset,by="date",all=T,sort=T) #欠損はNAとして処理
AllData02<-merge(AllData02,dataset,by="date",sort=T) #全系列に数値データが存在する日付のみ
if(iii==length(TSeriesDataFileList)){ #blue 
AllData01<-subset(AllData01,select=-dummy)
AllData02<-subset(AllData02,select=-dummy)
if(condition01==1){
if(3<=ncol(AllData02)){
for(nnn in 3:ncol(AllData02))
{
#共和分検定
co.integ<-ca.po(AllData02[nnn:2],demean="trend")
#2列目が説明変数、とその他列が応答変数での共和分検定。H0:共和分関係なし(誤差項は非定常)
cat("[",colnames(AllData02[2]),"(説明変数)と",colnames(AllData02[nnn]),"(応答変数)との共和分検定","]",sep="")
print(summary(co.integ))
cat("\n")
#D.W test
cat("[",colnames(AllData02[2]),"(説明変数)と",colnames(AllData02[nnn]),"(応答変数)との線形回帰残差のダービン=ワトソン検定","]",sep="")
#H0:自己相関無し
print(dwtest(lm(AllData02[,nnn]~AllData02[,2],data=AllData02))) 
cat("\n")
#相互相関
cat("[",colnames(AllData02[2]),"(説明変数)と",colnames(AllData02[nnn]),"(応答変数)との相互相関係数","]",sep="")
setwd(path02)
png(file="CCF_AllData02.png",width=900,height=900)
par(mfrow=c(1,2),mar=c(5,5,5,5),ps=22,cex.axis=1,cex.lab=1,cex.main=1,cex.sub=1)
ccf(AllData02[,nnn],AllData02[,2],main=paste("CCF:",colnames(AllData02[2]),"-",colnames(AllData02[nnn])))
ccf(diff(AllData02[,nnn]),diff(AllData02[,2]),main=paste("CCF:",colnames(AllData02[2]),"-",colnames(AllData02[nnn])))
dev.off()
}
}
}
#cat("\n")
#print(head(AllData01,4))
#cat("\n")
#print(head(AllData02,4))
setwd(path03)
write.table(AllData01,file="AllData01.csv",sep=",",quote=FALSE,row.names=FALSE)
write.table(AllData02,file="AllData02.csv",sep=",",quote=FALSE,row.names=FALSE)
#licence()
} #blue
} #sepia