#  Using Toluca data (CH01TA01.DAT), this sample program shows how to calculate the LSE,
# fitted value, R-squares, adjutsed R-squares directly from matrix operations.
#  Prepared by Andy Tsao
#  Refer also to Practical Regression and ANOVA using R, particularly p24--26.
#
# The program is divided into the following steps
# 1. Input data set into R
# 2. Construct  the design matrix X.
# 3. Calculate betahat = (X'X)^{-1}X'Y, yhat = X'betahat    
#      R^2 = 1- || Y- yhat||^2/||Y||^2,  R^2_adj = 1 -
#
# 1. Input Data to R
# Here the data set = CH01TA01.DAT , refer to s01.html for this part.

t<-matrix(scan("CH01TA01.DAT"),ncol=2,byrow=T)
toluca <-data.frame(lotsize=t[,1],hours=t[,2])
attach(toluca)  # Use "toluca" as our working dataset
#2. Construct the design matrix
x <- cbind(1,toluca[,-c(2)])
y<- toluca$hours
# ( X'X)^{-1}
x<-as.matrix(x)
xtxi <- solve(t(x) %*% x)
xtxi
# Contrast with
g <- lm(hours ~lotsize)
gs<-summary(g)

gs$cov.unscaled

# LSE calculation
xtxi %*% t(x) %*% y
betahat<-solve(t(x) %*% x, t(x) %*% y)

betahat

# Fitted Value X'betahat
t(x) %*% betahat
g$fitted.values


# See other components of R objects then extract residual for calculating s^2, R^2
names(g); names(gs)
sqrt(sum(g$res^2)/g$df)
1- sum(g$res^2)/sum((y-mean(y))^2)

detach(toluca)  # "toluca" is no longer used

# Then Exit R without saving workpace image.
q()