# Handout for Regression prepared by Yu-Ling Tseng # # This is to let you get a feeling as how a random sample of size n # from N(0, 1) would look like in time sequence plots, and in histograms. # # Input: k: positive integer, n (sample size): positive integer # ploty <- function(k,n){ m<- k*k; windows() opar <- par(mfrow = c(k,k), oma = c(0, 0, 2.7, 0)); total <- m*n; y <- array(rnorm(total),dim=c(n,m)) for (i in 1:m){ plot(y[,i],ylim=c(-4,4),sub=paste("n=", n)); abline(h=-2); abline(h=2); abline(h=0) } windows() opar <- par(mfrow = c(k,k), oma = c(0, 0, 2.7, 0)); for (j in 1:m){ hist(y[,j] #,br=c(-5, -4,-2,-1.5,-1,-.5,0,0.5,1,1.5,2,4,5) ,freq=FALSE,main="Histogram of n N(0,1) obs's.", sub=paste("n=", n)) curve(dnorm, col="red", add=TRUE) abline(v=-2, col="green");abline(v=2, col="green"); abline(v=0,col="blue") } windows() opar <- par(mfrow = c(k,k), oma = c(0, 0, 2.7, 0)); for (i in 1:m){ qqnorm(y[,i]);qqline(y[,i]); } } # Please note that each time you use ft ploty, three graphic windows will be open # # Try k=2, with n=10, 100, 500, respectively..... e.g. # ploty(2, 10); # Try this out: for (i in (1:3)){ploty(2, 10^i)}; # Now, with simulated regression data..... # Basic residual-plots for diagnostic in a regression analysis # Please note that how a violation of certain assumptions made in reg # model affect the display........ diagnostic <- function(a, b,n){ windows() opar <- par(mfrow = c(2,2), oma = c(0, 0, 2.7, 0)); ind <- c(1:n); # index, 1, ..., n # e <- rnorm(n) # pure normal terms x <- 3*runif(n); # the predictor's values, say fx <- a+b*x; # the true reg. function: a line y <- fx+e; # the reg. model yvx<- fx+x*e/2; # What if variances change with x's values..... yvi <- fx+ind*e/2; # What if variances change with index (i.e. time)... # Histograms # hist(e,freq=FALSE); hist(y,freq=FALSE); hist(yvx,freq=FALSE); # See any difference here? hist(yvi,freq=FALSE); # and here? windows() opar <- par(mfrow = c(2,2), oma = c(0, 0, 2.7, 0)); #time seq. plots # plot(ind, e); plot(ind, y); plot(ind, yvx); plot(ind, yvi); # different from the other three plots? # windows() opar <- par(mfrow = c(2,2), oma = c(0, 0, 2.7, 0)); # Plots v.s. x # plot(x, e); plot(x, y); abline(a, b); plot(x, yvx); abline(a, b); # any difference? plot(x, yvi); abline(a, b); windows() opar <- par(mfrow = c(2,2), oma = c(0, 0, 2.7, 0)); # Normal prob. plots # qqnorm(e); qqline(e); qqnorm(y); qqline(y); qqnorm(yvx); qqline(yvx); qqnorm(yvi); qqline(yvi); } # Try n=10, 100, 1000, 10000 ... and true reg. line is, say, a+bx # diagnostic(2, -1, 10); # Try this out: for (i in (1:3)){diagnostic(a, b, 50*i)};