# Handout for Regression      prepared  by Yu-Ling Tseng #

# This is to let you get a feeling as how a random sample of size n
# from N(0, 1) would look like in time sequence plots, and in histograms. #
# Input: k: positive integer, n (sample size): positive integer  #

ploty <- function(k,n){
m<- k*k;            
windows() 
opar <- par(mfrow = c(k,k), oma = c(0, 0, 2.7, 0));
total <- m*n;
y <- array(rnorm(total),dim=c(n,m))
for (i in 1:m){
plot(y[,i],ylim=c(-4,4),sub=paste("n=", n));
abline(h=-2); abline(h=2); abline(h=0)
}
windows() 
opar <- par(mfrow = c(k,k), oma = c(0, 0, 2.7, 0));
for (j in 1:m){
hist(y[,j] #,br=c(-5, -4,-2,-1.5,-1,-.5,0,0.5,1,1.5,2,4,5)
,freq=FALSE,main="Histogram of n N(0,1) obs's.", sub=paste("n=", n))
curve(dnorm, col="red", add=TRUE)
abline(v=-2, col="green");abline(v=2, col="green");  abline(v=0,col="blue")
}
windows()
opar <- par(mfrow = c(k,k), oma = c(0, 0, 2.7, 0));
for (i in 1:m){
qqnorm(y[,i]);qqline(y[,i]);
}
}

# Please note that each time you use ft ploty, three graphic windows will be open #

# Try k=2, with n=10, 100, 500, respectively..... e.g. #
ploty(2, 10);  

# Try this out: for (i in (1:3)){ploty(2, 10^i)};

# Now, with simulated regression data.....

# Basic residual-plots for diagnostic in a regression analysis
# Please note that how a violation of certain assumptions made in reg
# model affect the display........

diagnostic <- function(a, b,n){
windows() 
opar <- par(mfrow = c(2,2), oma = c(0, 0, 2.7, 0));
ind <- c(1:n);             # index, 1, ..., n #
e <- rnorm(n)              # pure normal terms
x <- 3*runif(n);           # the predictor's values, say
fx <- a+b*x;               # the true reg. function: a line
y <- fx+e;                 # the reg. model
yvx<- fx+x*e/2;             # What if variances change with x's values.....
yvi <- fx+ind*e/2;         # What if variances change with index (i.e. time)...

# Histograms  #
hist(e,freq=FALSE);
hist(y,freq=FALSE);
hist(yvx,freq=FALSE);      # See any difference here?
hist(yvi,freq=FALSE);      # and here?
windows() 
opar <- par(mfrow = c(2,2), oma = c(0, 0, 2.7, 0));
#time seq. plots    #
plot(ind, e);
plot(ind, y);
plot(ind, yvx);
plot(ind, yvi);      # different from the other three plots? #
windows() 
opar <- par(mfrow = c(2,2), oma = c(0, 0, 2.7, 0));
# Plots v.s. x #
plot(x, e);
plot(x, y);   abline(a, b);
plot(x, yvx);   abline(a, b);      # any difference?
plot(x, yvi); abline(a, b);
windows() 
opar <- par(mfrow = c(2,2), oma = c(0, 0, 2.7, 0));
# Normal prob. plots #
qqnorm(e); qqline(e);
qqnorm(y); qqline(y);
qqnorm(yvx); qqline(yvx);         
qqnorm(yvi); qqline(yvi);     
}


# Try n=10, 100, 1000, 10000 ...  and true reg. line is, say, a+bx   #
diagnostic(2, -1, 10);

# Try this out: for (i in (1:3)){diagnostic(a, b, 50*i)};