# This sample program shows how to reproduce the
# Table 1.1, 1.2 and Figure 1.10, 1.11 in our textbook using R
# Refer to p 19--26 of KNNL
# Created by C. Andy Tsao, 100318.
# You need to change the working dir from "File|Change dir" to the dir
# where the data is located. Here the data set = CH01TA01.txt
#(Save to your local drive as an ASCII file)

t<-matrix(scan("CH01TA01.txt"),ncol=2,byrow=T) 

# The data has 2 columns and read by row

toluca <-data.frame(lot=t[,1],hours=t[,2]) # Y=Work Hours; X=Lot Size
attach(toluca)

# Preliminary analysis: We are checking
# a) Systematic part: Linear trend, i.e.  E(Y)= \beta_0 + \beta_1 X?
# b) Any extreme observation or other oddity?
# Note that we will need to get residuals to check assumptions regarding
# error component. We will cover that in a moment.

# Numerical Summaries
summary(toluca); var(lot); var(hours);

# Graphical Displays
par(mfrow=c(2,2));    # Setup the graphical device

hist(lot); boxplot(lot); qqnorm(lot);
plot(density(lot));
stem(lot);
windows();
hist(hours); boxplot(hours); qqnorm(hours);
plot(density(hours));
stem(hours);


plot(lot,hours)

m1<-lm(hours~lot)
summary(m1)  # Figure 1.11
par(mfrow=c(2,2)); 
plot(m1);

names(m1)    # List all the objects associated with m1
residualsqr<-
(m1$residual)^2
lot;hours;m1$fitted;m1$residual;residualsqr
sum(lot);sum(m1$resi);

plot(lot,hours)
abline(m1$coef)  # plot fitted line

# Residual Analysis: Check if the iid normal random error assumption is OK.
res<-m1$resi;
windows()

par(mfrow=c(2,2)); 
hist(res); boxplot(res); qqnorm(res);
plot(density(res));