# This sample program is a prelim analysis of body fat percentage (bfp) # data. For ease of illustration, I use only bfp, gender, height, weight, resistance. # For the class project, you should try other variables as well. # Note that some of the variables might be redundant. # Created by C. Andy Tsao 100610 @ YesKno's Mhouse # The practice data is available at bfpa.txt. t<-matrix(scan("bfpa.txt"),ncol=5,byrow=T) bfpa <-data.frame(gender=as.factor(t[,1]),height=t[,2],weight=t[,3],resi=t[,4],bfp=t[,5]) # Input gender, height and weight and declare gender as factor attach(bfpa) # Numerical Summary summary(bfpa) pairs(bfpa) # Simultaneous fitting all the indepdent variables ngender<-as.numeric(gender) summary(ngender) result.c<-lm(bfp~gender+height+weight+resi) summary(result.c) result.n<-lm(bfp~ngender+height+weight+resi) summary(result.n) model.matrix(result.c) model.matrix(result.n) # Now suppose based on subject knowledge that weight is an important variable. # We proceed to add the rest of variables one by one mw<-lm(bfp~weight) m1h<-lm(bfp~weight+height); m1g<-lm(bfp~weight+gender); m1r<-lm(bfp~weight+resi); # Make sure you know which regression the residual plots corresponded to summary(mw);par(mfrow=c(2,2));plot(mw);windows(); summary(m1h);par(mfrow=c(2,2));plot(m1h);windows(); summary(m1g);par(mfrow=c(2,2));plot(m1g);windows(); summary(m1r);par(mfrow=c(2,2));plot(m1r);windows(); # You can also try to remove one variable from "full model" also mwor<- lm(bfp~gender+height+weight) # model without resi mwoh<- lm(bfp~gender+weight+resi) # model without height mwog<- lm(bfp~height+weight+resi) # model without gender summary(mwor);par(mfrow=c(2,2));plot(mwor);windows(); summary(mwoh);par(mfrow=c(2,2));plot(mwoh);windows(); summary(mwog);par(mfrow=c(2,2));plot(mwog);windows(); # Transforming weight into various categorical variables cweight<-(weight>54) cweight cweight<-1*(weight>54) cweight cweight<-1*(weight>54)+1*(weight>64)+1*(weight>69) cweight summary(weight) oweight<-48+(54-48)*(weight>54)+(64-54)*(weight>64)+(69-64)*(weight>69) is.factor(weight) is.factor(cweight) cwt<-as.factor(cweight) # glm vs. lm mwgi<-glm(bfp~weight*gender); mwgi2<-lm(bfp~weight*gender); mwgi3<-lm(bfp~oweight*gender); mwgi4<-lm(bfp~cwt*gender); summary(mwgi2);summary(mwgi3); summary(mwgi4) anova(mwgi4); # Check the differences between design matrices model.matrix(mwgi4) model.matrix(mwgi3); # model without resi with various weight variables mwor<- lm(bfp~gender+height+weight) mworcweight<- lm(bfp~gender+height+cweight) mworcwt<- lm(bfp~gender+height+cwt) summary(mwor) summary(mworcweight) summary(mworcwt); # Have fun!