# MLR Course: Regression # prepared by Yu-Ling Tseng # PART I: some matrix operations that you might find useful x <- 1:5/5; # say, we have these x values # x; var(x); # the sample variance of those x's # x^2; mean(x); # x_bar # sum((x-mean(x))^2); # sum of (x_i-x_bar)^2 i=1,2,..# sum((x-mean(x))^2)/(length(x)-1); # c.f. var(x) # y<-x+rnorm(5) fm<-lm(y~x) X <- model.matrix(fm) # this gives the design matrix in a S-L-R model, [1, x]# X t(X); # transpose of X # A <- solve(t(X)%*%X); # (X^tX)^(-1) # A; H <- X%*%A%*%t(X); H; # the hat matrix # # PART II: Multiple regression -- Dwaine Studios Example ch6fi05 <- matrix(scan("ch06fi05.txt"),ncol=3, byrow=T) ; dum <- data.frame(ch6fi05) names(dum) <- c("TARGTPOP","DISPOINC","SALES") # x1 <- ch6fi05[,1]; # x2 <- ch6fi05[,2]; # y <- ch6fi05[,3]; # dum <- data.frame("SALES"=y,"TARGTPOP"=x1,"DISPOINC"=x2) #Another way to input data into R #dum <-read.table("ch06fi05.txt", header=FALSE,col.names = c("TARGTPOP", "DISPOINC","SALES")) attach(dum) # rm(x1,x2,y) # To get Figure 6.4, p.232 # pairs(dum, main="Dwaine DATA:Scatter Plot Matrix",lwd=3); # (a) Scatter plot matrix cor(dum); # (b) Correlation matrix # Do MLR: regress SALES on those two predictors # fm <- lm(SALES~TARGTPOP+DISPOINC, dum) # To get (b) Basic Data on p237 of the text # fi65b<-data.frame("x1"=TARGTPOP,"x2"=DISPOINC,"y"=SALES,"y_hat"=fitted(fm),"residual"=resid(fm)) fi65b; # To get (a) Multiple Regression Output on p.237 # summary(fm) anova(fm) # Note that the ANOVA table you get from R is a little different from that # on p.237. Think: How to get the ANOVA table on p.237 from R's output? # To get (X^tX)^(-1) , p.237# X<-model.matrix(fm) #the design matrix D<-solve(t(X)%*%X) D; D%*%t(X)%*%SALES #LSE for beta's # Load Mass package to construct confidence intervals for # beta's confint(fm,level=.95) confint(fm,level=.9) # c.f. p. 245 joint conf. int's for beta1 and beta2 # Estimation of mean response, its confidence intervals # and prediction intervals summary(dum) new<- data.frame(TARGTPOP=c(65.4,53.1),DISPOINC=c(17.6, 17.7)) new predict(lm(SALES~TARGTPOP+DISPOINC), new, se.fit = TRUE) pred.w.plim <- predict(lm(SALES~TARGTPOP+DISPOINC), new, interval="prediction",level=.95) pred.w.clim <- predict(lm(SALES~TARGTPOP+DISPOINC), new, interval="confidence",level=.95) pred.w.plim #c.f. p. 247 joint 90% prediction intervals for City A and B; notice that # level is set to be .95 in the command to get this simultaneous .90 prediction # intervals. pred.w.clim # p. 246 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Note: ANOVA tables in R # anova(fittedmodel) of R gives the EXTRA SS's, c.f. chapter 7. # ex. anova(fm) v.s. anova(fmDT) # fmDT <- lm(SALES~DISPOINC+TARGTPOP, dum) # regress SALES on those two predictors # w/ different input order # anova(fm) anova(fmDT) # but, same outputs with summary(fittedmodel) # summary(fm) summary(fmDT) # More Graphical fun..... # Load rgl package for 3D plot that can be spinned library(rgl) plot3d(TARGTPOP,DISPOINC,SALES, col="red", size=3)