assignment3last - math.utah.edusergazy/statistics/time series analysis... ·...
TRANSCRIPT
Assignment3last.Rsergazy
Thu Dec 13 02:00:29 2018
#setwd("/Users/sergazy/Downloads/Fall 2018 courses and all files/Eric�s class/12:05:2018 Time series")load("Time_series_data.Rdata")
library(�randomForest�)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
library(car)# to use qqPlot package
## Loading required package: carData
library(expm)
## Loading required package: Matrix
##
## Attaching package: �expm�
## The following object is masked from �package:Matrix�:
##
## expm
# Set random seed to make results reproducible:set.seed(17)
# Assign the data to squaresY1square=log(Y1^2) #we are taking logarith here to make MSE smallerY2square=log(Y2^2) #we are taking logarith here to make MSE smallerIsquare=I^2
# and interactionsY1interactY2=log(Y1*Y2) #we are taking logarith here to make MSE smaller#Y1interactI=Y1*I#Y2interactI=log(Y2*I)
#test1=Y1[1001:1200]#test2=Y2[1001:1200]Y1inc=diff(Y1[1:1200])
Y2inc=diff(Y2[1:1200])
# Assign the data to the covariates setscovariates=cbind(Y1[1:996],Y1[2:997],Y1[3:998],Y1[4:999],
Y2[1:996],Y2[2:997],Y2[3:998],Y2[4:999],
I[1:996],I[2:997],I[3:998],I[4:999],
Y1square[1:996],Y1square[2:997],Y1square[3:998],Y1square[4:999],
Y2square[1:996],Y2square[2:997],Y2square[3:998],Y2square[4:999],
Isquare[1:996],Isquare[2:997],Isquare[3:998],Isquare[4:999],
Y1interactY2[1:996],Y1interactY2[2:997],Y1interactY2[3:998], Y1interactY2[4:999])
#Y1interactI[1:996],Y1interactI[2:997],Y1interactI[3:998], Y1interactI[4:999],
1
M
m
TT
#Y2interactI[1:996],Y2interactI[2:997],Y2interactI[3:998], Y1interactI[4:999])#we are giving column names here. Note Y1t4 means Y1(t-4) and Y1t3=Y1(t-3) etc. The rest is# similar idea. It will be easy to read the important variable when we use varImpPlot function.colnames(covariates)=c("Y1t4","Y1t3","Y1t2","Y1t1",
"Y2t4","Y2t3","Y2t2","Y2t1",
"It4","It3","It2","It1",
"Y1sqt4","Y1sqt3","Y1sqt2","Y1sqt1",
"Y2sqt4","Y2sqt3","Y2sqt2","Y2sqt1",
"Isqt4","Isqt3","Isqt2","Isqt1",
"Y1Y2t4","Y1Y2t3","Y1Y2t2","Y1Y2t1")
# "Y1It4","Y1It3","Y1It2","Y1It1",# "Y2It4","Y2It3","Y2It2","Y2It1")# Assign the data to the covariate test sets#covariates_test=cbind(Y1[997:1196],Y1[998:1197],Y1[999:1198],Y1[1000:1199],# Y2[997:1196],Y2[998:1197],Y2[999:1198],Y2[1000:1199],# I[997:1196],I[998:1197],I[999:1198],I[1000:1199],# Y1inc[997:1196],Y1inc[998:1197],Y1inc[999:1198],# Y2inc[997:1196],Y2inc[998:1197],Y2inc[999:1198])
covariates_test=cbind(Y1[997:1196],Y1[998:1197],Y1[999:1198],Y1[1000:1199],
Y2[997:1196],Y2[998:1197],Y2[999:1198],Y2[1000:1199],
I[997:1196],I[998:1197],I[999:1198],I[1000:1199],
Y1square[997:1196],Y1square[998:1197],Y1square[999:1198],Y1square[1000:1199],
Y2square[997:1196],Y2square[998:1197],Y2square[999:1198],Y2square[1000:1199],
Isquare[997:1196],Isquare[998:1197],Isquare[999:1198],Isquare[1000:1199],
Y1interactY2[997:1196],Y1interactY2[998:1197],
Y1interactY2[999:1198],Y1interactY2[1000:1199])
# Y1interactI[997:1196],Y1interactI[998:1197],# Y1interactI[999:1198],Y1interactI[1000:1199],#Y2interactI[997:1196],Y2interactI[998:1197],#Y2interactI[999:1198],Y2interactI[1000:1199])colnames(covariates_test)=c("Y1t4","Y1t3","Y1t2","Y1t1",
"Y2t4","Y2t3","Y2t2","Y2t1",
"It4","It3","It2","It1",
"Y1sqt4","Y1sqt3","Y1sqt2","Y1sqt1",
"Y2sqt4","Y2sqt3","Y2sqt2","Y2sqt1",
"Isqt4","Isqt3","Isqt2","Isqt1",
"Y1Y2t4","Y1Y2t3","Y1Y2t2","Y1Y2t1")
#"Y1It4","Y1It3","Y1It2","Y1It1",#"Y2It4","Y2It3","Y2It2","Y2It1")
############################################ part a
#################################################################################
#First, creating random forests for Y1 and Y2# Create a Random Forest model for Y1 now with default parametersmodel1_a = randomForest(Y1[5:1000] ~ ., data = covariates, importance=TRUE)
varImpPlot(model1_a) #this tells me that Isquare and Y1 interecation with I variables are important.
2
M
Mim
I i
Y2sqt1Y1Y2t3Y1Y2t4Y2t3Y2sqt4Y1t4Y2t1Y1sqt4Y2sqt3Y2t4Y1Y2t2Y2t2Isqt4Y2sqt2Y1Y2t1Y1t3Y1sqt3It4Isqt3It3Y1t2Y1sqt2Isqt2It2Y1t1Y1sqt1It1Isqt1
5 10 15 20 25 30%IncMSE
Y2sqt2Y2t1Y2t2Y2t3Y2sqt3Y2sqt4Y2t4Isqt4It4Y2sqt1Isqt3It3Y1Y2t4It2Isqt2Y1Y2t3Isqt1It1Y1t4Y1sqt4Y1Y2t2Y1t3Y1sqt3Y1Y2t1Y1sqt2Y1t2Y1t1Y1sqt1
0 20000 60000IncNodePurity
model1_a
# Create a Random Forest model with default parameters for Y2 nowmodel2_a = randomForest(Y2[5:1000] ~ ., data = covariates, importance=TRUE)
varImpPlot(model2_a)# here we see that Y1Y2(t-1) is the most important variable.
It1Isqt1Y2t4Y2sqt4Y1Y2t3It4Y2t2Y2sqt2Isqt4Y1Y2t4It3Y2sqt3It2Y1sqt1Y1t3Y2t3Y1sqt4Y1t4Y1t1Isqt3Isqt2Y1sqt3Y1Y2t2Y1t2Y1sqt2Y2sqt1Y2t1Y1Y2t1
5 10 15%IncMSE
It4Isqt4Isqt1It1Y2sqt3It2Isqt2It3Y2t3Y2t4Y2sqt4Isqt3Y2sqt2Y2t2Y1Y2t4Y1Y2t3Y1sqt4Y1sqt1Y1t1Y1t4Y1t3Y1sqt3Y1Y2t2Y1t2Y1sqt2Y2sqt1Y2t1Y1Y2t1
0 500000 1500000IncNodePurity
model2_a
3
######################################## Done, creating models for Y1 and Y2
####################################################################################
#first lets check, How our model is doing on train set
#I will do prediction on the train set for Y1prediction1_a_train= predict(model1_a,covariates)
#I will check the histogram of residuals and mean square error.hist(Y1[5:1000]- prediction1_a_train ) #it looks normal, So that is good.
Histogram of Y1[5:1000] − prediction1_a_train
Y1[5:1000] − prediction1_a_train
Freq
uenc
y
−6 −4 −2 0 2 4 6
010
020
030
0
MSE1_a_train= 1/996 * sum((Y1[5:1000]- prediction1_a_train)^2)
MSE1_a_train #this is pretty small which is expected because we are testing on train set. This is for Y1
## [1] 1.097738
#Now let us plot and see how it looks.library("ggplot2")
##
## Attaching package: �ggplot2�
## The following object is masked from �package:randomForest�:
##
## margin
plot1_train_a=ggplot(data=NULL,aes(Time, Y1, colour = prediction))+geom_line(aes(x=c(5:1000), y=Y1[5:1000]), color="black")+geom_line(aes(x=c(5:1000), y=prediction1_a_train, color="red"))
4
G
print(plot1_train_a) #this looks so good as we expected
25
50
75
100
125
0 250 500 750 1000Time
Y1
predictionred
#now let us do same calculations for Y2.#I will do prediction on the train set for Y2prediction2_a_train= predict(model2_a,covariates)
#I will check the histogram of residuals and mean square error.hist(Y2[5:1000]- prediction2_a_train ) #it looks normal, So that is good.
5
I
Histogram of Y2[5:1000] − prediction2_a_train
Y2[5:1000] − prediction2_a_train
Freq
uenc
y
−60 −40 −20 0 20 40 60 80
010
020
030
0
MSE2_a_train= 1/996 * sum((Y2[5:1000]- prediction2_a_train)^2)
MSE2_a_train #this is pretty small which is expected because we are testing on train set. This is for Y1
## [1] 122.7025
#Now let us plot and see how it looks.library("ggplot2")
plot2_train_a=ggplot(data=NULL,aes(Time, Y2, colour = prediction))+geom_line(aes(x=c(5:1000), y=Y2[5:1000]), color="black")+geom_line(aes(x=c(5:1000), y=prediction2_a_train, color="red"))
print(plot2_train_a) #this looks so good as we expected.
6
Thisisfor42
200
400
600
0 250 500 750 1000Time
Y2
predictionred
########################################## finishing, How our model is doing on train set
##################################################################################
#Now, it is time to check on a test set#I will do prediction on the test set for Y1prediction1_a_test= predict(model1_a, covariates_test)
#I will check the histogram of residuals and mean square error for Y1hist(Y1[1001:1200]- prediction1_a_test ) #it looks normal, So that is good.
7
Histogram of Y1[1001:1200] − prediction1_a_test
Y1[1001:1200] − prediction1_a_test
Freq
uenc
y
−6 −4 −2 0 2 4 6 8
010
2030
4050
6070
MSE1_a_test= 1/200 * sum((Y1[1001:1200]- prediction1_a_test)^2)
MSE1_a_test
## [1] 4.918447
library("ggplot2")
plot1_a_test=ggplot(data=NULL,aes(Time, Y1, colour = prediction))+geom_line(aes(x=c(1001:1200), y=Y1[1001:1200]), color="black")+geom_line(aes(x=c(1001:1200), y=prediction1_a_test, color="red"))
print(plot1_a_test)
8
20
30
40
50
60
1000 1050 1100 1150 1200Time
Y1
predictionred
#I will do prediction on the test set for Y2prediction2_a_test= predict(model2_a, covariates_test)
#I will check the histogram of residuals and mean square error for Y2hist(Y2[1001:1200] - prediction2_a_test ) #it looks normal also it is good.
9
1
Histogram of Y2[1001:1200] − prediction2_a_test
Y2[1001:1200] − prediction2_a_test
Freq
uenc
y
−80 −60 −40 −20 0 20 40 60
010
2030
4050
60
MSE2_a_test = 1/200 * sum((Y2[1001:1200] - prediction2_a_test )^2)
MSE2_a_test
## [1] 572.9075
plot2_a_test=ggplot(data=NULL,aes(Time, Y2, colour = prediction))+geom_line(aes(x=c(1001:1200), y=Y2[1001:1200]), color="black")+geom_line(aes(x=c(1001:1200), y=prediction2_a_test, color="red"))
print(plot2_a_test)
10
t
I
I
50
100
150
200
250
1000 1050 1100 1150 1200Time
Y2
predictionred
########################################## finishing, testing on a test set
#Our conclusion as you can see from MSE and plot is:#prediction for Y1 is far better for prediction for Y2.
############################################# part b
# Now let�s create models on increments#################################################################################
# Create a Random Forest model with default parameters for Y1inc nowmodel1_b = randomForest(Y1inc[4:999] ~ ., data = covariates, importance=TRUE)
varImpPlot(model1_b) #this tells me that I(t-1) is the most important.
11
1
Datafinisheshere
ITV1
Isqt4It4Y2sqt4Y2t4Y2t1Y2sqt3Y2t3Y2sqt1It3Y2sqt2Y1Y2t3Y1Y2t4Isqt3Y2t2Y1sqt4It2Y1t4Isqt2Y1Y2t1Y1Y2t2Y1t3Y1sqt3Y1t2Y1sqt2Y1t1Y1sqt1It1Isqt1
5 10 15 20 25 30%IncMSE
Y2sqt4Y2sqt3Y2t4Y2t3Y2sqt1Y2t1Y2sqt2Y1Y2t4Y2t2Y1Y2t2Y1sqt4Y1t4Y1Y2t3Y1Y2t1It4Isqt4It3Isqt3Y1sqt2Y1t2Y1t3Isqt2It2Y1sqt3Y1t1Y1sqt1It1Isqt1
0 400 800 1200IncNodePurity
model1_b
# Create a Random Forest model with default parameters for Y2inc nowmodel2_b = randomForest(Y2inc[5:1000] ~ ., data = covariates,importance=TRUE)
varImpPlot(model2_b) #this tells me that Y2(t-1) is the most important.
Y2t3Y2sqt3Y1Y2t3Y1t4Y2t2Y2sqt2Isqt3Y2t4Y1sqt4Y1Y2t2Y2sqt4It3It4Isqt2Y1sqt3It2Y1Y2t1Y1sqt2Isqt4Y1t3Y1Y2t4Isqt1It1Y1t2Y1t1Y1sqt1Y2t1Y2sqt1
4 6 8 10 12 14%IncMSE
Y1sqt3Y1Y2t3Y1t4Y1Y2t2Y1t3It3Y2sqt3Y1sqt4Y2t3Isqt3Y1t2Y1t1Y1sqt2Y2t4Y1sqt1Y2sqt2Y1Y2t1Y2t2Y2sqt4Isqt2Y1Y2t4It2Isqt1It1Y2sqt1Y2t1It4Isqt4
0 10000 30000IncNodePurity
model2_b
12
Figure f l
##################################################################################
#First, lets check on training set how well we are doing.#I will do prediction for Y1inc on the train setprediction1_b_inc = predict(model1_b,covariates)
prediction1_b_train=Y1[5:1000] + prediction1_b_inc #we added the original one to get prediction on Y1#prediction1_bhead(prediction1_b_train)
## 1 2 3 4 5 6
## 40.42699 37.91030 46.31380 39.63054 38.46564 45.22874
hist(Y1[5:1000] - prediction1_b_train) #it looks normal that is good.
Histogram of Y1[5:1000] − prediction1_b_train
Y1[5:1000] − prediction1_b_train
Freq
uenc
y
−10 −5 0 5 10
050
150
250
350
MSE1_b_train= 1/996 * sum((Y1[5:1000] - prediction1_b_train)^2)
MSE1_b_train#this is mean square error for increments Y1inc
## [1] 6.173792
plot1_train_b=ggplot(data=NULL,aes(Time, Y1, colour = prediction))+geom_line(aes(x=c(5:1000), y=Y1[5:1000]), color="black")+geom_line(aes(x=c(5:1000), y=prediction1_b_train, color="red"))
print(plot1_train_b) #this looks so good as we expected
13
i
25
50
75
100
125
0 250 500 750 1000Time
Y1
predictionred
#prediction for increments in Y2. I will do prediction for Y2inc on the train set.prediction2_b_inc = predict(model2_b,covariates)
prediction2_b_train=Y2[5:1000] + prediction2_b_inc #we added the original one to get prediction on Y2#prediction2_bhead(prediction2_b_train)
## 1 2 3 4 5 6
## 174.2458 111.7770 115.6104 147.6885 167.1237 192.8537
#residuals2_bhist(Y2[5:1000] - prediction2_b_train) #it looks normal that is good.
14
Histogram of Y2[5:1000] − prediction2_b_train
Y2[5:1000] − prediction2_b_train
Freq
uenc
y
−100 −50 0 50 100
010
020
030
040
0
MSE2_b_train = 1/996 * sum(( Y2[5:1000] - prediction2_b_train)^2)
MSE2_b_train #this is mean square error for increments Y2inc
## [1] 378.267
plot2_train_b=ggplot(data=NULL,aes(Time, Y2, colour = prediction))+geom_line(aes(x=c(5:1000), y=Y2[5:1000]), color="black")+geom_line(aes(x=c(5:1000), y=prediction2_b_train, color="red"))
print(plot2_train_b) #this looks so good as we expected
15
200
400
600
0 250 500 750 1000Time
Y2
predictionred
#################################################### finishing the test set.
##When we look mean square errors on train sets
#we see that MSE1_a_train way better than MSE1_b_train#Similarly, MSE2_a_train way better than MSE2_b_train# So we conclude model in a is better than model in b on train set.#lets see what is the case on test set.
##################################################################################
#Now, it is time to check on a test set#I will do prediction for Y1inc on the test setprediction1_b_inc = predict(model1_b, covariates_test)
prediction1_b_test=Y1[1000:1199] + prediction1_b_inc #we added the original one to get prediction on Y1#prediction1_bhead(prediction1_b_test)
## 1 2 3 4 5 6
## 46.07365 46.29716 43.73886 37.32133 37.97970 34.60023
hist(Y1[1001:1200] - prediction1_b_test) #it looks normal that is good.
16
I
v
bit off
Histogram of Y1[1001:1200] − prediction1_b_test
Y1[1001:1200] − prediction1_b_test
Freq
uenc
y
−6 −4 −2 0 2 4 6
05
1015
2025
30
MSE1_b_test= 1/200 * sum((Y1[1001:1200] - prediction1_b_test)^2)
MSE1_b_test #this is mean square error for increments Y1
## [1] 5.832033
plot1_b_test=ggplot(data=NULL,aes(Time, Y1, colour = prediction))+geom_line(aes(x=c(1001:1200), y=Y1[1001:1200]), color="black")+geom_line(aes(x=c(1001:1200), y=prediction1_b_test, color="red"))
print(plot1_b_test)
17
20
30
40
50
60
1000 1050 1100 1150 1200Time
Y1
predictionred
#prediction for increments in Y2#I will do prediction for Y2inc on the test setprediction2_b_inc = predict(model2_b, covariates_test)
prediction2_b_test=Y2[1000:1199] + prediction2_b_inc #we added the original one to get prediction on Y2#prediction2_bhead(prediction2_b_test)
## 1 2 3 4 5 6
## 193.8985 210.0268 146.4600 188.1269 171.1992 171.0874
hist( Y2[1001:1200] - prediction2_b_test) #it looks normal that is good.
18
1
I
Histogram of Y2[1001:1200] − prediction2_b_test
Y2[1001:1200] − prediction2_b_test
Freq
uenc
y
−100 −50 0 50
010
2030
4050
60
MSE2_b_test= 1/200 * sum(( Y2[1001:1200] - prediction2_b_test)^2)
MSE2_b_test#this is mean square error for increments Y2
## [1] 682.5334
plot2_b_test=ggplot(data=NULL,aes(Time, Y2, colour = prediction))+geom_line(aes(x=c(1001:1200), y=Y2[1001:1200]), color="black")+geom_line(aes(x=c(1001:1200), y=prediction2_b_test, color="red"))
print(plot2_b_test)
19
1
I
50
100
150
200
250
1000 1050 1100 1150 1200Time
Y2
predictionred
#################################################### finishing the test set.
#prediction for Y1inc is far better for prediction for Y2inc.
# Even though MSE1_b_test for Y1 is better in Model b, MSE2_b_test for Y2 is far worse in model b,#We compared MSE1_a_test with MSE1_b_test and MSE2_a_test with MSE2_b_test# So we pick model in a, over all as our best model.
######## part c
###############################################################################################
V1=(Y1[5:1000]-predict(model1_a))^2 #variance for Y1V2=(Y2[5:1000]-predict(model2_a))^2 #variance for Y2Cov12=sqrt(V1*V2) ##covariance for Y1 and Y2
####################################################
# Create a Random Forests for variance and covariance#create random forest for V1model1_c=randomForest(V1 ~ ., data = covariates, importance=TRUE)
varImpPlot(model1_c) #this tells me that I(t-1) is the most important.
20
I
Dirt b finishesher
Y2t3Y1Y2t3It4Y2sqt1Y1t4Y2sqt4Y1Y2t2Y2sqt3Y1sqt4Y2sqt2Y2t2Y1Y2t4Y2t1Y1Y2t1Isqt4Y2t4It3Isqt3Y1t3Y1sqt3Y1sqt1Y1t1Y1sqt2Y1t2It2Isqt2Isqt1It1
4 6 8 10 12 14%IncMSE
Y2t3Y2t4Y2sqt3Y1Y2t4Y2sqt4Y2t2Y1Y2t2Y1Y2t3Y2sqt2Y2sqt1Y2t1Y1Y2t1Y1t3Y1t4Y1sqt4Y1sqt3It4It2Isqt3Isqt2Isqt4Y1sqt1Y1t1It3Isqt1Y1sqt2It1Y1t2
0 5000 15000IncNodePurity
model1_c
#create random forest for V2model2_c=randomForest(V2 ~ ., data = covariates,importance=TRUE)
varImpPlot(model2_c) #this tells me that Y1(t-2) is the most important.
It4It2Isqt2Y2sqt4Isqt4Y1Y2t4Isqt1Y2sqt2Y1Y2t3Y2t4It1Y1Y2t2It3Y2t3Y2t2Y1t3Isqt3Y2sqt3Y1sqt3Y2t1Y1t4Y1Y2t1Y2sqt1Y1sqt4Y1sqt1Y1t1Y1t2Y1sqt2
2 4 6 8%IncMSE
Y2sqt2It1Y2t3It2Isqt1Y2sqt3Y1Y2t3Y2t2Y1Y2t2Isqt3It3Isqt2Y2t4Y2sqt4Y1Y2t4It4Y1sqt2Y1t1Y1t2Isqt4Y1sqt1Y2t1Y1Y2t1Y2sqt1Y1sqt3Y1t3Y1t4Y1sqt4
0.0e+00 1.5e+08 3.0e+08IncNodePurity
model2_c
21
Jowett 7
#create random forest for V12model12_c=randomForest(Cov12 ~ ., data = covariates,importance=TRUE)
varImpPlot(model12_c) #this tells me that Y2I(t-1) is the most important.
It4Isqt4Y1Y2t2Y1sqt4Isqt3It3Y2sqt2Y2t1Y1Y2t4It2Isqt2Y2sqt1Y1sqt3Y1t4Y1Y2t3Y2t2Y2sqt3Y1t3Y2t4Y1sqt2Isqt1Y2t3Y1t2Y1Y2t1Y2sqt4It1Y1sqt1Y1t1
2 4 6 8%IncMSE
Y2t2Y2sqt2Y2sqt4Y1Y2t4Y2t4Y1sqt3Y1Y2t2Y1Y2t3Y1t3Y1t4Y2sqt3Y1t2Y2t1Isqt2Y2sqt1Y2t3Y1sqt4It2It3It1Isqt1Y1sqt2Isqt3Y1Y2t1Y1sqt1Y1t1Isqt4It4
0e+00 4e+05 8e+05IncNodePurity
model12_c
####################################################
######################################################################################################
#First we will check on train setY1sd_train = predict(model1_c) #predicting the variance for Y1Y2sd_train = predict(model2_c) #predicting the variance for Y2Y1Y2cov_train = predict(model12_c) #predicting the covariance for Y1 and Y2
#we expect that to be normalN1_train=(Y1[5:1000] - predict(model1_a))/(sqrt(Y1sd_train))
hist(N1_train)
22
Att Y
I
Histogram of N1_train
N1_train
Frequency
−4 −3 −2 −1 0 1 2 3
050
100
150
200
qqPlot(N1_train)
−3 −2 −1 0 1 2 3
−3−2
−10
12
3
norm quantiles
N1_
train
866
503
## [1] 866 503
#same for Y2 on train setN2_train=(Y2[5:1000] - predict(model2_a))/(sqrt(Y2sd_train))
hist(N2_train)
23
I
look normal
looksverygood
I
Histogram of N2_train
N2_train
Frequency
−2 0 2 4
050
100
150
200
qqPlot(N2_train)
−3 −2 −1 0 1 2 3
−20
24
norm quantiles
N2_
train
101
100
## [1] 101 100
#they look pretty normal as we wish.
#evaluate 2-d case for the covariance on train setM_train=matrix(0,ncol=2,nrow = 996)
24
I
l
looksverygood
N1_wocov_train=Y1[5:1000] - predict(model1_a)
N2_wocov_train=Y2[5:1000] - predict(model2_a)
sum(Y1sd_train*Y2sd_train-Y1Y2cov_train^2 > 0) #to check positive definite case
## [1] 979
for(i in 1:996){
if((Y1Y2cov_train[i])/sqrt(Y1sd_train[i]*Y2sd_train[i]) > 1){
Y1Y2cov_train[i]=0.90*sqrt(Y1sd_train[i]*Y2sd_train[i])
}
if((Y1Y2cov_train[i])/sqrt(Y1sd_train[i]*Y2sd_train[i]) < -1){
Y1Y2cov_train[i]=(-0.90)*sqrt(Y1sd_train[i]*Y2sd_train[i])
}
K=matrix(c(Y1sd_train[i], Y1Y2cov_train[i],Y1Y2cov_train[i], Y2sd_train[i]),ncol=2)
M_train[i,]=solve(sqrtm(K))%*%t(as.matrix(cbind(N1_wocov_train[i],N2_wocov_train[i])))
}
colMeans(M_train) #this looks very good because it is so close to zero.
## [1] -0.023629716 0.002716292
cov(M_train) #with this we finished part c here we see that it doesn�t look identity matrix much.
## [,1] [,2]
## [1,] 3.737858 -1.289074
## [2,] -1.289074 1.228550
hist(M_train[,1])
Histogram of M_train[, 1]
M_train[, 1]
Freq
uenc
y
−10 −5 0 5 10 15
010
020
030
040
0
25
hist(M_train[,2])
Histogram of M_train[, 2]
M_train[, 2]
Freq
uenc
y
−4 −2 0 2 4 6
050
100
200
300
qqPlot(M_train[,1]) #some part is slighly off.
−3 −2 −1 0 1 2 3
−10
−50
510
15
norm quantiles
M_t
rain
[, 1]
860
25
## [1] 860 25
qqPlot(M_train[,2]) #this look pretty good
26
−3 −2 −1 0 1 2 3
−20
24
norm quantiles
M_t
rain
[, 2]
101
756
## [1] 101 756
############################################################### here we finish train set.
############################################################### Starts testing on test set.
Y1sd = predict(model1_c,covariates_test)
Y2sd = predict(model2_c,covariates_test)
Y1Y2cov= predict(model12_c,covariates_test)
N1_test=(Y1[1001:1200] - predict(model1_a, covariates_test))/(sqrt(Y1sd))
hist(N1_test) #looks normal that is good.
27
Histogram of N1_test
N1_test
Frequency
−3 −2 −1 0 1 2 3
010
2030
40
qqPlot(N1_test) #this looks good.
−3 −2 −1 0 1 2 3
−3−2
−10
12
3
norm quantiles
N1_
test
19
145
## [1] 19 145
N2_test=(Y2[1001:1200] - predict(model2_a, covariates_test))/(sqrt(Y2sd))
hist(N2_test)
28
Histogram of N2_test
N2_test
Frequency
−3 −2 −1 0 1 2
05
1015
2025
3035
qqPlot(N2_test) #this looks good.
−3 −2 −1 0 1 2 3
−3−2
−10
12
norm quantiles
N2_
test
178 186
## [1] 178 186
#evaluate 2-d case for the covariance on test setM_test=matrix(0,ncol=2,nrow = 200)
N1_wocov_test=Y1[1001:1200] - predict(model1_a, covariates_test)
N2_wocov_test=Y2[1001:1200] - predict(model2_a, covariates_test)
sum(Y1sd*Y2sd-Y1Y2cov^2 > 0)
29
## [1] 198
library(expm)
for(i in 1:200){
if((Y1Y2cov[i])/sqrt(Y1sd[i]*Y2sd[i]) > 1){
Y1Y2cov[i]=0.90*sqrt(Y1sd[i]*Y2sd[i])
}
if((Y1Y2cov[i])/sqrt(Y1sd[i]*Y2sd[i]) < -1){
Y1Y2cov[i]=(-0.90)*sqrt(Y1sd[i]*Y2sd[i])
}
K=matrix(c(Y1sd[i], Y1Y2cov[i],Y1Y2cov[i], Y2sd[i]),ncol=2)
M_test[i,]=solve(sqrtm(K))%*%t(as.matrix(cbind(N1_wocov_test[i],N2_wocov_test[i])))
}
colMeans(M_test) #great, we should expect it to close to zero.
## [1] 0.19635984 -0.08372921
cov(M_test) #with this we finished part c here we see that it doesn�t look identity matrix much.
## [,1] [,2]
## [1,] 3.474919 -1.233411
## [2,] -1.233411 1.278205
hist(M_test[,1]) #kinda normal but mean is not zero
Histogram of M_test[, 1]
M_test[, 1]
Freq
uenc
y
−5 0 5
020
4060
hist(M_test[,2]) #kinda normal and mean is close to zero.
30
Histogram of M_test[, 2]
M_test[, 2]
Freq
uenc
y
−3 −2 −1 0 1 2 3
010
2030
qqPlot(M_test[,1])
−3 −2 −1 0 1 2 3
−6−4
−20
24
6
norm quantiles
M_t
est[,
1]
145
174
## [1] 145 174
qqPlot(M_test[,2])
31
−3 −2 −1 0 1 2 3
−3−2
−10
12
norm quantiles
M_t
est[,
2]
178 186
## [1] 178 186
################# finishint checking for test set
###### part d
#first we are taking whole set as a training set. Socovariates_d=cbind(Y1[1:1196],Y1[2:1197],Y1[3:1198],Y1[4:1199],
Y2[1:1196],Y2[2:1197],Y2[3:1198],Y2[4:1199],
I[1:1196],I[2:1197],I[3:1198],I[4:1199],
Y1square[1:1196],Y1square[2:1197],Y1square[3:1198],Y1square[4:1199],
Y2square[1:1196],Y1square[2:1197],Y1square[3:1198],Y2square[4:1199],
Isquare[1:1196],Isquare[2:1197],Isquare[3:1198],Isquare[4:1199],
Y1interactY2[1:1196],Y1interactY2[2:1197],Y1interactY2[3:1198],Y1interactY2[4:1199])
# Y1interactI[1:1196],Y1interactI[2:1197],Y1interactI[3:1198],Y1interactI[4:1199],#Y2interactI[1:1196],Y2interactI[2:1197],Y2interactI[3:1198],Y2interactI[4:1199])#we are giving column names here. Note Y1t4 means Y1(t-4) and Y1t3=Y1(t-3) etc. The rest is# similar idea. It will be easy to read the important variable when we use varImpPlot function.colnames(covariates_d)=c("Y1t4","Y1t3","Y1t2","Y1t1",
"Y2t4","Y2t3","Y2t2","Y2t1",
"It4","It3","It2","It1",
"Y1sqt4","Y1sqt3","Y1sqt2","Y1sqt1",
"Y2sqt4","Y2sqt3","Y2sqt2","Y2sqt1",
"Isqt4","Isqt3","Isqt2","Isqt1",
"Y1Y2t4","Y1Y2t3","Y1Y2t2","Y1Y2t1")
# "Y1It4","Y1It3","Y1It2","Y1It1",#"Y2It4","Y2It3","Y2It2","Y2It1")# covariates_d_try=cbind(Y1[1:1194],Y1[2:1195],Y1[3:1196],Y1[4:1197],Y1[5:1198],# Y2[1:1194],Y2[2:1195],Y2[3:1196],Y2[4:1197],Y2[5:1198],# I[1:1194],I[2:1195],I[3:1196],I[4:1197],I[5:1198])## colnames(covariates_d_try)=c("Y1t5","Y1t4","Y1t3","Y1t2","Y1t1","Y2t5","Y2t4","Y2t3","Y2t2","Y2t1",
32
Part c finishes
7am defining new couriates
im
imai
# "It5","It4","It3","It2","It1")# covariates_d_try_test=cbind(Y1[1195],Y1[1196],Y1[1197],Y1[1198],Y1[1199],# Y2[1195],Y2[1196],Y2[1197],Y2[1198],Y2[1199],# I[1195],I[1196],I[1197],I[1198],I[1199])covariates_d_test=cbind(Y1[1197],Y1[1198],Y1[1199],Y1[1200],
Y2[1197],Y2[1198],Y2[1199],Y2[1200],
I[1197],I[1198],I[1199],I[1200],
Y1square[1197],Y1square[1198],Y1square[1199],Y1square[1200],
Y2square[1197],Y2square[1198],Y2square[1199],Y2square[1200],
Isquare[1197],Isquare[1198],Isquare[1199],Isquare[1200],
Y1interactY2[1197],Y1interactY2[1198],Y1interactY2[1199],Y1interactY2[1200])
#Y1interactI[1197],Y1interactI[1198],Y1interactI[1199],Y1interactI[1200],#Y2interactI[1197],Y2interactI[1198],Y2interactI[1199],Y2interactI[1200])colnames(covariates_d_test)=c("Y1t4","Y1t3","Y1t2","Y1t1",
"Y2t4","Y2t3","Y2t2","Y2t1",
"It4","It3","It2","It1",
"Y1sqt4","Y1sqt3","Y1sqt2","Y1sqt1",
"Y2sqt4","Y2sqt3","Y2sqt2","Y2sqt1",
"Isqt4","Isqt3","Isqt2","Isqt1",
"Y1Y2t4","Y1Y2t3","Y1Y2t2","Y1Y2t1")
#"Y1It4","Y1It3","Y1It2","Y1It1",#"Y2It4","Y2It3","Y2It2","Y2It1")
##########################################we creat the random forests for Y1 and Y2
model_d_Y1=randomForest(Y1[5:1200]~ ., data = covariates_d, importance=TRUE)
varImpPlot(model_d_Y1)
Y1t4Y1sqt4Y1Y2t3Y1Y2t2Y2sqt4Y2sqt1Y1t3Y2t4Y1sqt3Y1Y2t4Y2sqt3Y2t1Y1Y2t1Y2t3Y2t2Isqt4It4Isqt3Y1sqt2It3Y1t2Y2sqt2Isqt2Y1t1Y1sqt1It2Isqt1It1
5 10 20 30%IncMSE
Y2t1Y2t2Y2t4Y2sqt4Y2t3Y2sqt1Y1Y2t4Isqt4It4Y1Y2t3It3Isqt3It2Isqt2Y1sqt4Y1t4Y1Y2t2It1Isqt1Y1t3Y1sqt3Y2sqt3Y1Y2t1Y1t2Y1sqt2Y2sqt2Y1t1Y1sqt1
0 20000 60000IncNodePurity
model_d_Y1
33
im
im
im
Ift A one f squareEH are most important
model_d_Y2=randomForest(Y2[5:1200]~ ., data = covariates_d, importance=TRUE)
varImpPlot(model_d_Y2)
It1Y1Y2t3Y2sqt4Isqt1Y1Y2t4Y2t4Y1t3It4Y2sqt3Y1sqt3Y1t1Isqt4Y1t4Isqt2Y1sqt4Y1sqt1Y2t3Y2t2Y1t2Y1sqt2It3Y1Y2t2Isqt3Y2sqt2It2Y2sqt1Y2t1Y1Y2t1
5 10 15 20%IncMSE
It4Isqt4Isqt3It3It1Isqt2Isqt1It2Y2t3Y2sqt4Y2t4Y1Y2t4Y2t2Y1Y2t3Y1t1Y1sqt1Y1t4Y1sqt4Y1t3Y2sqt3Y1sqt3Y1sqt2Y1Y2t2Y1t2Y2sqt2Y2t1Y2sqt1Y1Y2t1
0 500000 1500000IncNodePurity
model_d_Y2
## as in part c
V1_d=(Y1[5:1200]-predict(model_d_Y1))^2 #variance for Y1V2_d=(Y2[5:1200]-predict(model_d_Y2))^2 #variance for Y2Cov12_d=sqrt(V1_d*V2_d) #covariance for Y1 and Y2
##########################################we creat the random forests for variences on whole set
model_d_V1=randomForest(V1_d~ ., data = covariates_d, importance=TRUE)
model_d_V2=randomForest(V2_d~ ., data = covariates_d, importance=TRUE)
model_d_V12=randomForest(Cov12_d~ ., data = covariates_d, importance=TRUE)
#Predict on train set and test set.predict_Y1=predict(model_d_Y1)
hist(Y1[5:1200]-predict_Y1)#looks pretty normal with mean zero.
34
Y sintend42 it it is most important
Histogram of Y1[5:1200] − predict_Y1
Y1[5:1200] − predict_Y1
Freq
uenc
y
−15 −10 −5 0 5 10 15
010
020
030
040
0
predict_Y1_test=predict(model_d_Y1,covariates_d_test)
############################################ Now first we do one future scenario
Ilast4=I[1197:1200]
Inew=c(Ilast4,Ifuture)
Isqrnew=Inew^2
Y1last4=Y1[1197:1200]
Y1sqrlast4=Y1square[1197:1200]
Y2last4=Y2[1197:1200]
Y2sqrlast4=Y2square[1197:1200]
Y1Y2last4=Y1interactY2[1197:1200]
Y1last204=rep(0,204)
Y2last204=rep(0,204)
for(i in 1:200){
k=4+i
currentposition=matrix(c(Y1last4[(k-4):(k-1)], Y2last4[(k-4):(k-1)], Inew[(k-4):(k-1)],
Y1sqrlast4[(k-4):(k-1)], Y2sqrlast4[(k-4):(k-1)], Isqrnew[(k-4):(k-1)],
Y1Y2last4[(k-4):(k-1)]),
nrow=1)
colnames(currentposition)=c("Y1t4","Y1t3","Y1t2","Y1t1",
35
"Y2t4","Y2t3","Y2t2","Y2t1",
"It4","It3","It2","It1",
"Y1sqt4","Y1sqt3","Y1sqt2","Y1sqt1",
"Y2sqt4","Y2sqt3","Y2sqt2","Y2sqt1",
"Isqt4","Isqt3","Isqt2","Isqt1",
"Y1Y2t4","Y1Y2t3","Y1Y2t2","Y1Y2t1")
prediction_Y=cbind(predict(model_d_Y1,currentposition), predict(model_d_Y2,currentposition))
Sigma11=predict(model_d_V1,currentposition)
Sigma22=predict(model_d_V2,currentposition)
Sigma12=predict(model_d_V12,currentposition)
if((Sigma12)/sqrt(Sigma11*Sigma22) > 1){
Sigma12=0.9*sqrt(Sigma11*Sigma22)
}
if((Sigma12)/sqrt(Sigma11*Sigma22) < -1){
Sigma12=(-0.9)*sqrt(Sigma11*Sigma22)
}
K_d=matrix(c(Sigma11, Sigma12,Sigma12, Sigma22),ncol=2)
newpoint=t(prediction_Y)+sqrtm(K_d)%*%rnorm(2,mean=0,sd=1)
Y1last4=c(Y1last4,newpoint[1])
Y2last4=c(Y2last4,newpoint[2])
Y1sqrlast4=c(Y1sqrlast4,log(newpoint[1]^2))
Y2sqrlast4=c(Y2sqrlast4,log(newpoint[2]^2))
Y1Y2last4=c(Y1Y2last4,log(newpoint[1]*newpoint[2]))
}
plot1_d_one=ggplot(data=NULL,aes(Time, Y1))+geom_line(aes(x=c(1:1197), y=Y1[c(1:1197)]), color="blue")+geom_line(aes(x=c(1197:1400), y=Y1last4), color="green")
print(plot1_d_one)
36
I believethis6mg
because
this seemslightly off g g
didnt like
yI422and
41.42
www.nsreausimffmmei.innIiitia
and 5Stepsback
honk
Thosesimulation
are
Here
It givesmebetterMSE
andbetterpredictions
Is hadn't
enoughtimetogoback
I onlykeptthis
because3
and tochange
everything
25
50
75
100
125
0 500 1000Time
Y1
plot2_d_one=ggplot(data=NULL,aes(Time, Y1))+geom_line(aes(x=c(1:1197), y=Y2[c(1:1197)]), color="blue")+geom_line(aes(x=c(1197:1400), y=Y2last4), color="green")
print(plot2_d_one)
37
1
I
200
400
600
0 500 1000Time
Y1
############################################ finishing one scenario..
############################################ Now lets do 100 scenario
sim100Y1=list()
sim100Y2=list()
for(m in 1:100){
Ilast4=I[1197:1200]
Inew=c(Ilast4,Ifuture)
Isqrnew=Inew^2
Y1last4=Y1[1197:1200]
Y1sqrlast4=Y1square[1197:1200]
Y2last4=Y2[1197:1200]
Y2sqrlast4=Y2square[1197:1200]
Y1Y2last4=Y1interactY2[1197:1200]
for(i in 1:200){
k=4+i
currentposition=matrix(c(Y1last4[(k-4):(k-1)], Y2last4[(k-4):(k-1)], Inew[(k-4):(k-1)],
Y1sqrlast4[(k-4):(k-1)], Y2sqrlast4[(k-4):(k-1)], Isqrnew[(k-4):(k-1)],
Y1Y2last4[(k-4):(k-1)]),
nrow=1)
colnames(currentposition)=c("Y1t4","Y1t3","Y1t2","Y1t1",
"Y2t4","Y2t3","Y2t2","Y2t1",
38
i
"It4","It3","It2","It1",
"Y1sqt4","Y1sqt3","Y1sqt2","Y1sqt1",
"Y2sqt4","Y2sqt3","Y2sqt2","Y2sqt1",
"Isqt4","Isqt3","Isqt2","Isqt1",
"Y1Y2t4","Y1Y2t3","Y1Y2t2","Y1Y2t1")
prediction_Y=cbind(predict(model_d_Y1,currentposition), predict(model_d_Y2,currentposition))
Sigma11=predict(model_d_V1,currentposition)
Sigma22=predict(model_d_V2,currentposition)
Sigma12=predict(model_d_V12,currentposition)
if((Sigma12)/sqrt(Sigma11*Sigma22) > 1){
Sigma12=0.9*sqrt(Sigma11*Sigma22)
}
if((Sigma12)/sqrt(Sigma11*Sigma22) < -1){
Sigma12=(-0.9)*sqrt(Sigma11*Sigma22)
}
K_d=matrix(c(Sigma11, Sigma12,Sigma12, Sigma22),ncol=2)
newpoint=t(prediction_Y)+sqrtm(K_d)%*%rnorm(2,mean=0,sd=1)
Y1last4=c(Y1last4,newpoint[1])
Y2last4=c(Y2last4,newpoint[2])
Y1sqrlast4=c(Y1sqrlast4,newpoint[1])
Y2sqrlast4=c(Y2sqrlast4,newpoint[2])
Y1Y2last4=c(Y1Y2last4,log(newpoint[1]*newpoint[2]))
}
Y1last204=Y1last4
Y2last204=Y2last4
sim100Y1[[m]]=Y1last204
sim100Y2[[m]]=Y2last204
}
plot(sim100Y1[[1]],type="l")
for(i in 2:100){
lines(sim100Y1[[i]])
}
39
0 50 100 150 200
4060
80100
120
Index
sim100Y1[[1]]
plot(sim100Y2[[1]],type="l")
for(i in 2:100){
lines(sim100Y2[[i]])
}
0 50 100 150 200
200
400
600
800
Index
sim100Y2[[1]]
############################################################# finishing 100 scenario with simulations
### I get a little bit bad similation results with these covarities.
######################## ######################## ######################## ########################
# But I told your earlier I did 5 steps back with different covarities so I am adding
40
F
4
#here that one as well. but that is basically same thing but without interactions and 5 steps back.################################################################################################
###### part d alternative simulations
#first we are taking whole set as a training set. So# covariates_d=cbind(Y1[1:1196],Y1[2:1197],Y1[3:1198],Y1[4:1199],# Y2[1:1196],Y2[2:1197],Y2[3:1198],Y2[4:1199],# I[1:1196],I[2:1197],I[3:1198],I[4:1199],# Y1inc[1:1196],Y1inc[2:1197], Y1inc[3:1198],# Y2inc[1:1196],Y2inc[2:1197], Y2inc[3:1198])# Here I am trying 5 steps back instead of 4 in order to get better prediction.covariates_d_try=cbind(Y1[1:1194],Y1[2:1195],Y1[3:1196],Y1[4:1197],Y1[5:1198],
Y2[1:1194],Y2[2:1195],Y2[3:1196],Y2[4:1197],Y2[5:1198],
I[1:1194],I[2:1195],I[3:1196],I[4:1197],I[5:1198])
# colnames(covariates_d)=c("Y1t4","Y1t3","Y1t2","Y1t1", "Y2t4","Y2t3","Y2t2","Y2t1",# "It4","It3","It2","It1", "Y1inct3","Y1inct2","Y1inct1",# "Y2inct3","Y2inct2","Y2inct1")colnames(covariates_d_try)=c("Y1t5","Y1t4","Y1t3","Y1t2","Y1t1","Y2t5","Y2t4","Y2t3","Y2t2","Y2t1",
"It5","It4","It3","It2","It1")
covariates_d_try_test=cbind(Y1[1195],Y1[1196],Y1[1197],Y1[1198],Y1[1199],
Y2[1195],Y2[1196],Y2[1197],Y2[1198],Y2[1199],
I[1195],I[1196],I[1197],I[1198],I[1199])
colnames(covariates_d_try_test)=c("Y1t5","Y1t4","Y1t3","Y1t2","Y1t1","Y2t5","Y2t4","Y2t3","Y2t2","Y2t1",
"It5","It4","It3","It2","It1")
model_d_Y1=randomForest(Y1[6:1199]~ ., data = covariates_d_try, importance=TRUE)
varImpPlot(model_d_Y1)
Y2t4Y2t3Y2t2Y2t5Y2t1Y1t5Y1t4It5It4Y1t3It3Y1t2It2Y1t1It1
10 20 30 40%IncMSE
Y2t5Y2t4It5Y2t3It4It3It2It1Y2t2Y1t5Y2t1Y1t4Y1t3Y1t2Y1t1
0e+00 4e+04 8e+04IncNodePurity
model_d_Y1
41
But r
t.INTTsA
It It is most important
model_d_Y2=randomForest(Y2[6:1199]~ ., data = covariates_d_try, importance=TRUE)
varImpPlot(model_d_Y2)
Y2t4Y2t3It1Y2t5It4Y2t2It2Y1t5It5Y1t4It3Y1t3Y1t1Y1t2Y2t1
5 10 15 20%IncMSE
It4It1It3It5It2Y2t5Y2t4Y2t3Y2t2Y1t5Y1t1Y1t4Y1t3Y2t1Y1t2
0 500000 1000000IncNodePurity
model_d_Y2
V1_d=(Y1[6:1199]-predict(model_d_Y1))^2
V2_d=(Y2[6:1199]-predict(model_d_Y2))^2
Cov12_d=sqrt(V1_d*V2_d)
model_d_V1=randomForest(V1_d~ ., data = covariates_d_try, importance=TRUE)
varImpPlot(model_d_V1)
model_d_V2=randomForest(V2_d~ ., data = covariates_d_try, importance=TRUE)
varImpPlot(model_d_V1)
42
42ft it is most impatient
Y2t3Y2t5It5Y2t4Y2t1It4Y2t2Y1t4Y1t5Y1t3It3Y1t1It2Y1t2It1
4 6 8 10 12 14 16%IncMSE
Y2t2Y2t5Y2t3Y2t4Y2t1Y1t4Y1t3Y1t5It3It4It5It1Y1t1It2Y1t2
0 10000 20000 30000IncNodePurity
model_d_V1
model_d_V12=randomForest(Cov12_d~ ., data = covariates_d_try, importance=TRUE)
varImpPlot(model_d_V12)
It5Y2t5It4Y1t4Y2t2It2It3Y1t5Y2t4Y1t3Y2t1Y1t2Y2t3Y1t1It1
2 4 6 8%IncMSE
Y2t2Y2t3Y2t4Y2t5Y1t3Y1t5It2Y1t4Y2t1Y1t2It1It3Y1t1It4It5
0 500000 1500000IncNodePurity
model_d_V12
43
predict_Y1=predict(model_d_Y1,covariates_d_try_test)
#first we check for one scenarioIlast5=I[1196:1200]
Inew=c(Ilast5,Ifuture)
Y1last5=Y1[1196:1200]
Y2last5=Y2[1196:1200]
Y1last205=rep(0,205)
Y2last205=rep(0,205)
for(i in 1:200){
k=5+i
currentposition=matrix(c(Y1last5[(k-5):(k-1)],Y2last5[(k-5):(k-1)],Inew[(k-5):(k-1)]),nrow=1)
colnames(currentposition)=c("Y1t5","Y1t4","Y1t3","Y1t2","Y1t1","Y2t5","Y2t4","Y2t3","Y2t2","Y2t1",
"It5","It4","It3","It2","It1")
prediction_Y=cbind(predict(model_d_Y1,currentposition), predict(model_d_Y2,currentposition))
Sigma11=predict(model_d_V1,currentposition)
Sigma22=predict(model_d_V2,currentposition)
Sigma12=predict(model_d_V12,currentposition)
if((Sigma12)/sqrt(Sigma11*Sigma22) > 1){
Sigma12=0.9*sqrt(Sigma11*Sigma22)
}
if((Sigma12)/sqrt(Sigma11*Sigma22) < -1){
Sigma12=(-0.9)*sqrt(Sigma11*Sigma22)
}
K_d=matrix(c(Sigma11, Sigma12,Sigma12, Sigma22),ncol=2)
newpoint=t(prediction_Y)+sqrtm(K_d)%*%rnorm(2,mean=0,sd=1)
Y1last5=c(Y1last5,newpoint[1])
Y2last5=c(Y2last5,newpoint[2])
}
plot1_d=ggplot(data=NULL,aes(Time, Y1))+geom_line(aes(x=c(1:1195), y=Y1[c(1:1195)]), color="blue")+geom_line(aes(x=c(1196:1400), y=Y1last5), color="green")
print(plot1_d)
44
l
25
50
75
100
125
0 500 1000Time
Y1
plot2_d=ggplot(data=NULL,aes(Time, Y1))+geom_line(aes(x=c(1:1195), y=Y2[c(1:1195)]), color="blue")+geom_line(aes(x=c(1196:1400), y=Y2last5), color="green")
print(plot2_d)
45
1
This looksvery reasonableP Prediction
I
200
400
600
0 500 1000Time
Y1
#now lets check for 100 scenariosim100Y1=list()
sim100Y2=list()
for(m in 1:100){
Ilast5=I[1196:1200]
Inew=c(Ilast5,Ifuture)
Y1last5=Y1[1196:1200]
Y2last5=Y2[1196:1200]
for(i in 1:200){
k=5+i
currentposition=matrix(c(Y1last5[(k-5):(k-1)],Y2last5[(k-5):(k-1)],Inew[(k-5):(k-1)]),nrow=1)
colnames(currentposition)=c("Y1t5","Y1t4","Y1t3","Y1t2","Y1t1","Y2t5","Y2t4","Y2t3","Y2t2","Y2t1",
"It5","It4","It3","It2","It1")
prediction_Y1=cbind(predict(model_d_Y1,currentposition), predict(model_d_Y2,currentposition))
Sigma11=predict(model_d_V1,currentposition)
Sigma22=predict(model_d_V2,currentposition)
Sigma12=predict(model_d_V12,currentposition)
#here we check positive definitenessif((Sigma12)/sqrt(Sigma11*Sigma22) > 1){
Sigma12=0.9*sqrt(Sigma11*Sigma22)
}
if((Sigma12)/sqrt(Sigma11*Sigma22) < -1){
Sigma12=(-0.9)*sqrt(Sigma11*Sigma22)
}
K_d=matrix(c(Sigma11, Sigma12,Sigma12, Sigma22),ncol=2)
46
I
this also looksvery reasonableprediction
newpoint=t(prediction_Y1)+sqrtm(K_d)%*%rnorm(2,mean=0,sd=1)
Y1last5=c(Y1last5,newpoint[1])
Y2last5=c(Y2last5,newpoint[2])
}
Y1last205=Y1last5
Y2last205=Y2last5
sim100Y1[[m]]=Y1last205
sim100Y2[[m]]=Y2last205
}
plot(sim100Y1[[1]],type="l")
for(i in 2:100){
lines(sim100Y1[[i]])
}
0 50 100 150 200
2040
6080
Index
sim100Y1[[1]]
plot(sim100Y2[[1]],type="l")
for(i in 2:100){
lines(sim100Y2[[i]])
}
47
0 50 100 150 200
100
200
300
400
Index
sim100Y2[[1]]
48
Thank you so much for a wonderfulsemester
Kwtest regards
Segars