# Handout for APLM prepared by Yu-Ling Tseng # # R for the Case Example -- Plutonium Measurement, p 141 of textbook# # # Background: # Some environmental cleanup work requires that nuclear material, # such as plutonium 238, be located and completely removed from a restoration # site. When plutonium has become mixed with other materials in very small # amounts, detecting its presence can be a difficult task. Even very small # amounts can traced, however, because plutonium emits subatomic particles-- # alpha particles -- that can be detected. Devices that are used to detect # plutonium record the intensity of alpha particles strikes in counts per # second (# per sec, let it be Y). The regression relationship between # alpha counts per second (the response variable) and plutonium activity # (the explanatory variable, let it be x) is then used to to estimate the # activity of plutonium in the material under study. (This use of a regression # relationship involves inverse prediction; see p.168 ~ 170 for details.) # # Task: to estimate the regression relationship between alpha counts per second # and plutonium activity. # Experiment: four standard rods conataining fixed, known level of plutonium # activity:0.0, 5.0, 10.0 and 20.0 picocuries per gram (pCi/g) are exposed to the # detection device from 4 to 10 times, and the rate of alpha strikes (counts per # second) was recorded. # Data set : 24 data points ch3ta10<-matrix(scan("ch03ta10.txt"),ncol=2, byrow=T) ; # x <- ch3ta10[,2]; # y <- ch3ta10[,1]; dum <- data.frame(x=ch3ta10[,2], y=ch3ta10[,1]) attach(dum) # Scatter plot to see if a linear relation suitable.... # windows() plot(x,y,xlab="pCi/g", ylab="# per second", main="Scatter plot"); # Notice that, as expected, the strike rate tends to increase with the plutonium # activity level..... except one strange data point... (which one?) # Note, also, a nonzero strike rate for the standard rod with zero plutonium level # (possible reason?) # dum <- data.frame(x=ch3ta10[,2], y=ch3ta10[,1]) # attach(dum) # rm(x,y) # simple regression: y reg. on x... fm <- lm(y~x, dum) summary(fm) anova(fm) # windows() opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0)) plot(fm, las=1); # Note: R gives four basic plots for diagnostics.... # # case 24 is an outlier. An examination of laboratory record revealed that # the experimental conditions were not properly maintained for that case. # It is decided, then, to discard case 24 in the analysis.... fm0 <- lm(y~x, dum[-24, ]) # remove case 24 from dum summary(fm0) anova(fm0) # windows() opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0)) plot(fm0, las=1); # diagnostics # F-value=229 on 1, 21 df's, p-value =0, so there is a significant regression # relationship. # However, error variance appears to increase with the plutonium activity level. # Normal Q-Q plot suggests nonnormality (heavy tail)... but probably due to # the unequal error variance.... # To get X^2_BP SQe<-resid(fm0)^2 # e^2 dum1<-data.frame(x[-24],SQe) attach(dum1) X2BP<-(sum((fitted(lm(SQe~x[-24],dum1))-mean(SQe) )^2)/2)/(sum(resid(fm0)^2) /(length(x[-24])))^2 X2BP X2BP>qchisq(.95,1) #?, if yes, reject the constant variance null hypothesis #or calculate the p-value of the test and reject if p-value < alpha 1-pchisq(X2BP,1) #Test decision: #Note: X^2_BP= 23.29 > 3.84 (or p-value < 0.05), that is the # Breusch-Pagan test rejects at level 0.05 the equal variance null hypothesis. # Try to transform y, then.... take sqrt(y) be our response variable, say... sfm0 <- update(fm0,sqrt(.)~.) # a very useful R command..... summary(sfm0) anova(sfm0) # windows() opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0)) plot(sfm0, las=1) # diagnostics # Normal Q-Q plot is now roughly a straight line # but residuals v.s. y_hat: not a horizontal band... : suggests a nonlinear relation # of course.... since y and x are related linearly... sqrt(y) and x, then,.... # take sqrt(x) be the predictor for sqrt(y), then... sfm0s <- update(sfm0, .~sqrt(.)) # really take a note on this command.... # summary(sfm0s) anova(sfm0s) # windows() opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0)) plot(sfm0s, las=1); # diagnostics # A satisfactory linear fit results..... ;> # Residual plots does suggest, though, that some nonconstancy of the error # variance may still remain; but if so, it does not appear to be substantial. # since X^2_BP=3.85 w/ p-value=0.05, supporting the conclusion from the residual plot SQe<-resid(sfm0s)^2 # e^2 dum2<-data.frame(sqrt(x)[-24],SQe) attach(dum2) X2BP<-(sum((fitted(lm(SQe~sqrt(x)[-24],dum2))-mean(SQe) )^2)/2)/(sum(resid(sfm0s)^2) /(length(x[-24])))^2 X2BP X2BP>qchisq(.95,1) #?, if yes, reject the constant variance null hypothesis #or calculate the p-value of the test and reject if p-value < alpha 1-pchisq(X2BP,1) # So the nonconstancy of the error variance is not substantial.... say, "satisfactory" then # The final fitted line: sqrt(y) = 0.0730 + 0.0573 sqrt(x) with R^2=0.945 # F-value = 360.9 with p-value=0: fitted line is statistically significant. # With this data set: the estimated regression relationship between # alpha counts per second and plutonium activity is # sqrt(alpha counts per second) = 0.073 + 0.0573 * sqrt(plutonium activity level) # The estimated reg. relationship is statistically significant. # Before you leave your R session detach(); rm(list=ls(all=TRUE)); q()