# For the Toluca Company example, p. 19 KNNL 5thED, 
#                                           prepared by Y.L Tseng #

# This is how you scan a data set in your working directory into R #
# Make sure to change the working directory to the appropriate one #
# Also before scan the data set into R, check the data set first to see 
# the number of columns it has, you need it for assigning ncol below.

ch01ta01<-matrix(scan("ch01ta01.txt"),ncol=2, byrow=T);
ch01ta01

dum<-data.frame(x=ch01ta01[,1],y=ch01ta01[,2]) # to name the columns #
dum       # do you see any difference in dum and ch01ta01? #
attach(dum)

# To get Fig. 1.10 on p.20 ......  # 
# windows()
opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
plot(x,y,xlab="Lot Size", ylab="Hours", main="Scatter plot");

fm<-lm(y~x,dum)

plot(x,y,xlab="Lot Size", ylab="Hours", main="Fitted Regression Line");
abline(coef(fm))

# To get Table 1.2 on p.22 ....... #
ta12<-data.frame("x"=x,"y"=y,"y_hat"=fitted(fm),"residual"=resid(fm))
ta12

# For Fig. 1.11, p.20 and Fig. 2.2, p.46 ..... #
summary(fm)     # Please compare the outputs of R and those in text
anova(fm)

names(fm)  #See what you can get from fm

# Load Mass package to construct confidence intervals for
# beta's

confint(fm)    # conf. int.'s for reg. coeff.'s  default: 95% #
confint(fm, level=0.9)

# Estimation of mean response, its confidence intervals  Ex. on p. 55, p. 59
# and prediction intervals 

# This is how you  
# construct confidence/prediction  intervals  for the mean response at given predictor x's values

new<- data.frame(x=seq(10,110,10))
new
predict(lm(y~x), new, se.fit = TRUE)
pred.w.plim <- predict(lm(y~x), new, interval="prediction",level=0.9)
pred.w.clim <- predict(lm(y~x), new, interval="confidence",level=0.9)

cbind(new$x,pred.w.plim) # prediction intervals for x at seq(10,110,10)

cbind(new$x,pred.w.clim) # confidence intervals for x at seq(10,110,10)

# Confidence band for the entire reg. line  Ex. on p. 62, Fig. 2.6

width<-function(n,xh,alpha){
sqrt(2*qf(1-alpha,2,n-2)*sum((y-fitted(fm))^2)/(n-2)*(1/n+(xh-mean(x))^2/((n-1)*var(x))))
}
n<-length(y)
u<-coef(fm)[1]+coef(fm)[2]*new$x+width(n,new$x,0.1)
l<-coef(fm)[1]+coef(fm)[2]*new$x-width(n,new$x,0.1)
band<-cbind(l,u)
cbind(new$x,band)

# Graphical display these inervals
# windows()
opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
matplot(new$x,pred.w.clim,
        col=c(1,2,2), lty=c(1,2,2),type="l",# ylim=c(70,500),
main="90% Confidence intervals",ylab="Hours Y")
matplot(new$x,pred.w.plim,
        col=c(1,3,3), lty=c(1,3,3),type="l", #ylim=c(70,500),
main="90% Prediction intervals",ylab="Hours Y")
matplot(new$x,band,col=c(5,5),lty=c(5,5),type="l", #ylim=c(70,500),
main=" 90% Confi. band for reg. line",ylab="Hours Y")
abline(coef(fm))

# It is hard to compare among plots......

# Overlay CI, PI, Conf band at those x values
matplot(new$x,cbind(pred.w.clim, pred.w.plim[,-1],band),
        col=c(1,2,2,3,3,5,5), lty=c(1,2,2,3,3,5,5),type="l", main="Overlay display",ylab="Hours Y")

# Before you quit R, clean up the objects, please. #
detach()

rm(list=ls())

# Also try the "Save History" under "File" of the R Console window,
# and you may find out how Yes prepare these handouts.....  :> #