# GLM_Handout for Model Selection     Y. L. Tseng

crab<-read.table(file = "horseshoe_all.txt", header=FALSE, col.names = c("color", "spine","width","satellite", "weight"))
y<-ifelse(crab$satellite>0, 1,0)
ndark<-ifelse(crab$color<5, 1,0)
crab5<-data.frame("c"=relevel(as.factor(crab$color),ref="5"),"s"=relevel(as.factor(crab$spine),ref="3"),"satellite"=crab$satellite,"y"=y,"w"=crab$width,"wt"=crab$weight/1000,"dark"=ndark)
  
# Table 5.1, p. 139
m0<-glm(formula = y ~ c+s+wt+w, data = crab5, family = binomial(link = logit), 
        na.action = na.exclude, control = list(epsilon = 0.0001, maxit = 50, trace = T))
summary(m0) 
dG2<-m0$null.deviance-m0$deviance
dfdG2<-m0$df.null-m0$df.residual
pvalue<-1-pchisq(dG2,dfdG2)
cbind(dG2, dfdG2, pvalue)    # For testing if any of the predictors is sig. c.f.  p.138
# This shows extremely strong evidence that at least one predictor has an eefect.                                
# Although this overall test is highly sig., the Table 5.1 results, p. 139, are discouraging.
# --> Small p-value for the overall test yet the lack of sig. for individual effects
# is a warning sign of multicollinearity.
# Width is sig. when consider only color and width as predictors, as shown in last handout;
# controlling for weight, color and spine condition, little evidence remains for a width effect.

cov(crab5$w,crab5$wt)/(var(crab5$w)*var(crab5$wt))^(0.5)  # strong correlation between w and wt

# ==> Further analysis uses w, c and s as predictors, then.

# For stepwise variable selection algorrithms  5.1.3. 
# (1) Forward selection adds terms sequentially until further additions do not improve the fit.
# (2) Backwar elimination begins with a complex model and sequentially removes terms 
# (highest-order terms with largest p-value........), the process stops when any further deletion
# leads to a sig.'ly poorer fit.

# Caution!!!  stat.ly sig. =/= practically sig.   Read p.140, please.

# EX: Backward eliminationfor horseshoe crab data
# To get Table 5.2, p. 141
m1<-glm(formula = y ~ c+s+w+c*s+c*w+s*w, data = crab5, family = binomial(link = logit), 
     na.action = na.exclude, control = list(epsilon = 0.0001, maxit = 50, trace = T))

m2<-glm(formula = y ~ c+s+w, data = crab5, family = binomial(link = logit), 
     na.action = na.exclude, control = list(epsilon = 0.0001, maxit = 50, trace = T))
m3a<-glm(formula = y ~ c+s, data = crab5, family = binomial(link = logit), 
     na.action = na.exclude, control = list(epsilon = 0.0001, maxit = 50, trace = T))
m3b<-glm(formula = y ~ s+w, data = crab5, family = binomial(link = logit), 
     na.action = na.exclude, control = list(epsilon = 0.0001, maxit = 50, trace = T))
m3c<-glm(formula = y ~ c+w, data = crab5, family = binomial(link = logit), 
     na.action = na.exclude, control = list(epsilon = 0.0001, maxit = 50, trace = T))
m4a<-glm(formula = y ~ c, data = crab5, family = binomial(link = logit), 
     na.action = na.exclude, control = list(epsilon = 0.0001, maxit = 50, trace = T))
m4b<-glm(formula = y ~ w, data = crab5, family = binomial(link = logit), 
     na.action = na.exclude, control = list(epsilon = 0.0001, maxit = 50, trace = T))
m5<-glm(formula = y ~ dark+w, data = crab5, family = binomial(link = logit), 
     na.action = na.exclude, control = list(epsilon = 0.0001, maxit = 50, trace = T))
m6<-glm(formula = y ~ 1, data = crab5, family = binomial(link = logit), 
     na.action = na.exclude, control = list(epsilon = 0.0001, maxit = 50, trace = T))

G2dfAic<-rbind(c(m1$deviance,m1$df.residual,m1$aic),
+ c(m2$deviance,m2$df.residual,m2$aic),
+ c(m3a$deviance,m3a$df.residual,m3a$aic),
+ c(m3b$deviance,m3b$df.residual,m3b$aic),
+ c(m3c$deviance,m3c$df.residual,m3c$aic),
+ c(m4a$deviance,m4a$df.residual,m4a$aic),
+ c(m4b$deviance,m4b$df.residual,m4b$aic),
+ c(m5$deviance,m5$df.residual,m5$aic),
+ c(m6$deviance,m6$df.residual,m6$aic))

G2dfAic
anova(m2,m1)
anova(m3a,m2)
anova(m3b,m2)
anova(m3c,m2)
anova(m4a,m3c)
anova(m4b,m3c)
anova(m5,m3c)
anova(m6,m5)