# Using Toluca data (CH01TA01.DAT), this sample program shows how to
calculate the LSE,
# fitted value, R-squares, adjutsed R-squares directly from matrix operations.
# Prepared by Andy Tsao
# Refer also to Practical
Regression and ANOVA using R, particularly p24--26.
#
# The program is divided into the following steps
# 1. Input data set into R
# 2. Construct the design matrix X.
# 3. Calculate betahat = (X'X)^{-1}X'Y, yhat = X'betahat
# R^2 = 1- || Y- yhat||^2/||Y||^2, R^2_adj = 1 -
#
# 1. Input Data to R
# Here the data set = CH01TA01.DAT
, refer to s01.html for this part.
t<-matrix(scan("CH01TA01.DAT"),ncol=2,byrow=T)
toluca <-data.frame(lotsize=t[,1],hours=t[,2])
attach(toluca) # Use "toluca" as our working
dataset
#2. Construct the design matrix
x <- cbind(1,toluca[,-c(2)])
y<- toluca$hours
# ( X'X)^{-1}
x<-as.matrix(x)
xtxi <- solve(t(x) %*% x)
xtxi
# Contrast with
g <- lm(hours ~lotsize)
gs<-summary(g)
gs$cov.unscaled
# LSE calculation
xtxi %*% t(x) %*% y
betahat<-solve(t(x) %*% x, t(x) %*% y)
betahat
# Fitted Value X'betahat
t(x) %*% betahat
g$fitted.values
# See other components of R objects then extract residual for calculating
s^2, R^2
names(g); names(gs)
sqrt(sum(g$res^2)/g$df)
1- sum(g$res^2)/sum((y-mean(y))^2)
detach(toluca) # "toluca" is no longer
used
# Then Exit R without saving workpace image.
q()