# This sample program shows how to reproduce the
# Table 1.1, 1.2 and Figure 1.10, 1.11 in our textbook using R
# Refer to p 21--23 of NKNW
# Created by C. Andy Tsao, 031012.
# You need to change the working dir from "File|Change dir" to the dir
# where the data is located. Here the data set = CH01TA01.DAT
#(Save to your local drive as an ASCII file)

t<-matrix(scan("CH01TA01.DAT"),ncol=2,byrow=T) 

# The data has 2 columns and read by row

toluca <-data.frame(lot=t[,1],hours=t[,2]) # Y=Work Hours; X=Lot Size
attach(toluca)

# Preliminary analysis: We are checking
# a) Systematic part: Linear trend, i.e.  E(Y)= \beta_0 + \beta_1 X?
# b) Any extreme observation or other oddity?
# Note vhat we will need to get residuals to check assumptions regarding
# error component. We will cover that in a moment.

# Numerical Summaries
summary(toluca); var(lot); var(hours);

# Graphical Displays
par(mfrow=c(2,2));    # Setup the graphical device

hist(lot); boxplot(lot); qqnorm(lot);
plot(density(lot));
stem(lot);
windows();
hist(hours); boxplot(hours); qqnorm(hours);
plot(density(hours));
stem(hours);


plot(lot,hours)

m1<-lm(hours~lot)
summary(m1)  # Figure 1.11
names(m1)    # List all the objects associated with m1
residualsqr<-
(m1$residual)^2
lot;hours;m1$fitted;m1$residual;residualsqr
sum(lot);sum(m1$resi);


plot(lot,hours)
abline(m1$coef)

# Residual Analysis: Check if the iid normal random error assumption is OK.
res<-m1$resi;
windows()
hist(res); boxplot(res); qqnorm(res);
plot(density(res));