# For the Toluca Company example, p. 19 KNNL 5thED, # prepared by Y.L Tseng # # This is how you scan a data set in your working directory into R # # Make sure to change the working directory to the appropriate one # # Also before scan the data set into R, check the data set first to see # the number of columns it has, you need it for assigning ncol below. ch01ta01<-matrix(scan("ch01ta01.txt"),ncol=2, byrow=T); ch01ta01 dum<-data.frame(x=ch01ta01[,1],y=ch01ta01[,2]) # to name the columns # dum # do you see any difference in dum and ch01ta01? # attach(dum) # To get Fig. 1.10 on p.20 ...... # # windows() opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0)) plot(x,y,xlab="Lot Size", ylab="Hours", main="Scatter plot"); fm<-lm(y~x,dum) plot(x,y,xlab="Lot Size", ylab="Hours", main="Fitted Regression Line"); abline(coef(fm)) # To get Table 1.2 on p.22 ....... # ta12<-data.frame("x"=x,"y"=y,"y_hat"=fitted(fm),"residual"=resid(fm)) ta12 # For Fig. 1.11, p.20 and Fig. 2.2, p.46 ..... # summary(fm) # Please compare the outputs of R and those in text anova(fm) names(fm) #See what you can get from fm # Load Mass package to construct confidence intervals for # beta's confint(fm) # conf. int.'s for reg. coeff.'s default: 95% # confint(fm, level=0.9) # Estimation of mean response, its confidence intervals Ex. on p. 55, p. 59 # and prediction intervals # This is how you # construct confidence/prediction intervals for the mean response at given predictor x's values new<- data.frame(x=seq(10,110,10)) new predict(lm(y~x), new, se.fit = TRUE) pred.w.plim <- predict(lm(y~x), new, interval="prediction",level=0.9) pred.w.clim <- predict(lm(y~x), new, interval="confidence",level=0.9) cbind(new$x,pred.w.plim) # prediction intervals for x at seq(10,110,10) cbind(new$x,pred.w.clim) # confidence intervals for x at seq(10,110,10) # Confidence band for the entire reg. line Ex. on p. 62, Fig. 2.6 width<-function(n,xh,alpha){ sqrt(2*qf(1-alpha,2,n-2)*sum((y-fitted(fm))^2)/(n-2)*(1/n+(xh-mean(x))^2/((n-1)*var(x)))) } n<-length(y) u<-coef(fm)[1]+coef(fm)[2]*new$x+width(n,new$x,0.1) l<-coef(fm)[1]+coef(fm)[2]*new$x-width(n,new$x,0.1) band<-cbind(l,u) cbind(new$x,band) # Graphical display these inervals # windows() opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0)) matplot(new$x,pred.w.clim, col=c(1,2,2), lty=c(1,2,2),type="l",# ylim=c(70,500), main="90% Confidence intervals",ylab="Hours Y") matplot(new$x,pred.w.plim, col=c(1,3,3), lty=c(1,3,3),type="l", #ylim=c(70,500), main="90% Prediction intervals",ylab="Hours Y") matplot(new$x,band,col=c(5,5),lty=c(5,5),type="l", #ylim=c(70,500), main=" 90% Confi. band for reg. line",ylab="Hours Y") abline(coef(fm)) # It is hard to compare among plots...... # Overlay CI, PI, Conf band at those x values matplot(new$x,cbind(pred.w.clim, pred.w.plim[,-1],band), col=c(1,2,2,3,3,5,5), lty=c(1,2,2,3,3,5,5),type="l", main="Overlay display",ylab="Hours Y") # Before you quit R, clean up the objects, please. # detach() rm(list=ls()) # Also try the "Save History" under "File" of the R Console window, # and you may find out how Yes prepare these handouts..... :> #