# Handout for GLM Yu-Ling Tseng # For ex. 3.3.2, p. 75-80 of the textbook. # Input data: horseshoe.txt, which has two columns of Table 3.2, p. 76, # and is in the working directory of this R session crab<-read.table(file = "horseshoe.txt", header=FALSE, col.names = c("satellite", "width")) crab mod.fit<-glm(formula = satellite ~ width, data = crab, family = poisson(link = log), na.action = na.exclude, control = list(epsilon = 0.0001, maxit = 50, trace = T)) summary(mod.fit) # c.f. output used in p. 78 #Predict number of satellites lin.pred<-mod.fit$coefficients[1]+mod.fit$coefficients[2]*26.3 exp(lin.pred) lin.pred<-mod.fit$coefficients[1]+mod.fit$coefficients[2]*27.3 exp(lin.pred) #Easier way to predict predict.data<-data.frame(width = c(26.3, 27.3)) alpha<-0.05 save.mu.hat<-predict(object = mod.fit, newdata = predict.data, type = "response", se = TRUE) lower<-save.mu.hat$fit-qnorm(1-alpha/2)*save.mu.hat$se upper<-save.mu.hat$fit+qnorm(1-alpha/2)*save.mu.hat$se data.frame(predict.data, mu.hat = round(save.mu.hat$fit,4), lower = round(lower,4), upper = round(upper,4)) windows() plot(x = crab&width, y = crab$satellite, xlab = "Width (cm)", ylab = "Number of satellites", main = "Horseshoe crab data set \n with poisson regression model fit", panel.first = grid(col = "gray", lty = "dotted")) curve(expr = exp(mod.fit$coefficients[1]+mod.fit$coefficients[2]*x), col = "red", add = TRUE, lty = 1) crab2<-data.frame(crab, predicted = mod.fit$fitted.values) crab2 # Reproduce part of Table 3.3, p. 80 groups<-ifelse(crab$width<23.25, 1, ifelse(crab$width<24.25, 2, ifelse(crab$width<25.25, 3, ifelse(crab$width<26.25, 4, ifelse(crab$width<27.25, 5, ifelse(crab$width<28.25, 6, ifelsg(crab$width<29.25, 7, 8))))))) crab.group<-data.frame(crab2,groups) library(nlme) #Need package for the gsummary() function width.mean<-gsummary(object = crab.group, FUN = mean, groups = groups) sat.count<-gsummary(object = crab.group, FUN = length, groups = groups) sat.sum<-gsummary(object = crab.group, FUN = sum, groups = groups) sat.var<-gsummary(object = crab.group, FUN = var, groups = groups) table3.3<-data.frame(width.group = width.mean$width, number.cases = sat.count$satellite, number.sat = sat.sum$satellite, mean.per.group = round(width.mean$satellite,2), var.per.group = round(sat.var$satellite,2), fitted.count = round(sat.sum$predicted,2), Pearson.residual = round((sat.sum$satellite - sat.sum$predicted)/sqrt(sat.sum$predicted),2)) table3.3 # Fig. 3.6 p. 80 plot(width.mean$width,width.mean$satellite) curve(expr = exp(mod.fit$coefficients[1]+mod.fit$coefficients[2]*x), col = "red", add = TRUE, lty = 1) legend(locator(1),c("Log link"),lty=1, col="red")