# Handout for Regression prepared by Yu-Ling Tseng # # with simulated regression data..... # Basic residual-plots for diagnostic in a regression analysis # Please note that how a violation of certain assumptions made in reg # model affect the display........ residplot=function(a, b, n){ opar <- par(mfrow = c(2,2), oma = c(0, 0, 2.7, 0)); ind <- 1:n; # index, 1, ..., n # e <- rnorm(n) # pure normal terms x <- 3*runif(n); # the predictor's values, say fx <- a+b*x; # the true reg. function: a line y <- fx+e; # the reg. model yvx<- fx+x*e/2; # What if variances change with x's values..... yvi <- fx+ind*e/2; # What if variances change with index (i.e. time)... # Histograms # hist(e,freq=FALSE); hist(y,freq=FALSE); hist(yvx,freq=FALSE); # See any difference here? hist(yvi,freq=FALSE); # and here? opar <- par(mfrow = c(2,2), oma = c(0, 0, 2.7, 0)); #time seq. plots # plot(ind, e); plot(ind, y); plot(ind, yvx); plot(ind, yvi); # different from the other three plots? # opar <- par(mfrow = c(2,2), oma = c(0, 0, 2.7, 0)); # Plots v.s. x # plot(x, e); plot(x, y); abline(a, b); plot(x, yvx); abline(a, b); # any difference? plot(x, yvi); abline(a, b); opar <- par(mfrow = c(2,2), oma = c(0, 0, 2.7, 0)); # Normal prob. plots # qqnorm(e); qqline(e); qqnorm(y); qqline(y); qqnorm(yvx); qqline(yvx); qqnorm(yvi); qqline(yvi); } # Try n=10, 100, 1000, 10000 ... and true reg. line is, say, a+bx # residplot(1, -2, 10);