Annotated Output of r_fig605.r


> # This program produces Figure 6.5 on Page 241
> # of our Text.
>
>    t<-matrix(scan("CH06FI05.txt"),ncol=3,byrow=T)
Read 63 items       {63=3x21}
>
> # The data has 3 columns and read by row
>
> dwaine <-data.frame(TARGTPOP=t[,1],DISPOINC=t[,2],SALES=t[,3])
> TXD <- t[,1]*t[,2]              {TXD: interaction TARGPOP*DISPOINC }
> attach(dwaine)
>
> opar <- par(no.readonly = TRUE) # put old par setting
> par(mfrow=c(2,2));
> plot(TARGTPOP,SALES, main="TARGTPOP vs. SALES")
> plot(DISPOINC,SALES, main="DISPOINC vs. SALES")
> plot(TARGTPOP,DISPOINC, main="TARGTPOP vs. DISPOINC")
> plot(TXD,SALES, main="TXD vs. SALES")
> par(opar);
> pairs(dwaine,main="Dwaine DATA") # Prelim graphical displays in pairs.
>
> g<- lm(SALES~TARGTPOP*DISPOINC)
> g2<- lm(SALES~TARGTPOP+DISPOINC)
> gt<- lm(SALES~TARGTPOP)
> gd<- lm(SALES~DISPOINC)
>
> # Summary and ANOVA
> g2s<-summary(g2);g2s;anova(g2);g2s$cov.unscaled

Call:
lm(formula = SALES ~ TARGTPOP + DISPOINC)     
{
E(SALES) = \beta_0 + \beta_1 TARGTPOP + \beta_2 DISPOINC}

Residuals:
     Min       1Q   Median       3Q      Max
-18.4239  -6.2161   0.7449   9.4356  20.2151

Coefficients:
                 Estimate    Std. Error    t value   Pr(>|t|)   
(Intercept)     -68.8571    60.0170  -1.147   0.2663          {H_0: \beta_0=0 vs. H_1: \beta_0 \neq 0}
TARGTPOP   1.4546     0.2118   6.868    2e-06 ***     
{H_0: \beta_1=0 vs. H_1: \beta_1 \neq 0}
DISPOINC      9.3655     4.0640   2.305   0.0333 *        
{H_0: \beta_2=0 vs. H_1: \beta_2 \neq 0}
            (1)     (2)     (3)    (4)
(1)=\hat{\beta_2}(y,x); (2)=sd(
\hat{\beta_2}(Y,x));
(3)=(1)/(2); (4)=P[|T_18|>|t|]                   
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 11.01 on 18 degrees of freedom
                                    (5)          (6)
(5)= SSE/(n-3); (6)=n-3; n=21. p=3
(Here we adopt the notation in Text; total of p variables with \beta_0 included)

Multiple R-Squared: 0.9167,     Adjusted R-squared: 0.9075 {cf. Page 231 (6.42)}
                             (7)                                            (8)
(7)=
=SSR/SSTO= 1- SSE/SSTO;   (8)= 1- [(SSE/(n-p))/(SSTO/(n-1))]
F-statistic:  99.1 on 2 and 18 DF,  p-value: 1.921e-10
         (10)    (11)    (12)                    (13)

Analysis of Variance Table

Response: SALES
                      Df  Sum Sq Mean Sq  F value   Pr(>F)   
TARGTPOP   1   23371.8 23371.8 192.8962 4.64e-11 ***
DISPOINC     1   643.5     643.5     5.3108     0.03332 * 
Residuals        18  2180.9   121.2                     
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
            (Intercept)      TARGTPOP     DISPOINC
(Intercept) 29.72892348  0.0721834719 -1.992553186
TARGTPOP     0.07218347  0.0003701761 -0.005549917
DISPOINC    -1.99255319 -0.0055499173  0.136310637
> par(mfrow=c(2,2));
> plot.lm(g2)     # Checking residuals
>
> summary(gd);anova(gd)

Call:
lm(formula = SALES ~ DISPOINC)     (14)

Residuals:
    Min      1Q  Median      3Q     Max
-41.290 -11.630  -5.203  10.531  42.079

Coefficients:
            Estimate Std. Error t value Pr(>|t|)   
(Intercept) -352.493     80.657  -4.370 0.000329 ***
DISPOINC      31.173      4.698   6.636 2.39e-06 ***
                      (15)       (16)         (17)    (18)
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 20.39 on 19 degrees of freedom
                                    (19)        (20)
Multiple R-Squared: 0.6986,     Adjusted R-squared: 0.6827
                                (21)                                       (22)
F-statistic: 44.03 on 1 and 19 DF,  p-value: 2.391e-06
         (23)     (24)     (25)                 (26)
Analysis of Variance Table

Response: SALES
                  Df  Sum Sq Mean Sq F value    Pr(>F)   
DISPOINC   1 18299.8 18299.8  44.032 2.391e-06 ***   
                   (24)  (25)  (26)        (27)     (28)
Residuals     19  7896.4   415.6     
                    (29)  (30)    (31)               

---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
> summary(gt);anova(gt)

Call:
lm(formula = SALES ~ TARGTPOP)

Residuals:
    Min      1Q  Median      3Q     Max
-19.403  -6.121  -0.311   4.228  27.452

Coefficients:
            Estimate Std. Error t value Pr(>|t|)   
(Intercept)  68.0454     9.4622   7.191 7.86e-07 ***
TARGTPOP      1.8359     0.1464  12.539 1.23e-10 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 12.19 on 19 degrees of freedom
Multiple R-Squared: 0.8922,     Adjusted R-squared: 0.8865
F-statistic: 157.2 on 1 and 19 DF,  p-value: 1.229e-10

Analysis of Variance Table

Response: SALES
          Df  Sum Sq Mean Sq F value    Pr(>F)   
TARGTPOP   1 23371.8 23371.8  157.22 1.229e-10 ***
Residuals 19  2824.4   148.7                     
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1
>
> names(g)
 [1] "coefficients"  "residuals"     "effects"       "rank"        
 [5] "fitted.values" "assign"        "qr"            "df.residual" 
 [9] "xlevels"       "call"          "terms"         "model"       
>
> # Load Mass package to construct confidence intervals for
> # beta's
>
> confint(g)
                        2.5 %      97.5 %
(Intercept)       -493.435814 468.4832817
TARGTPOP            -7.059170   8.1230663
DISPOINC           -22.186550  34.3731541
TARGTPOP:DISPOINC   -0.381423   0.4871856
> confint(g2)
                   2.5 %    97.5 %
(Intercept) -194.9480130 57.233867
TARGTPOP       1.0096226  1.899497
DISPOINC       0.8274411 17.903560
> confint(gt)
                2.5 %    97.5 %
(Intercept) 48.240659 87.850057
TARGTPOP     1.529429  2.142327
> confint(gd)
                 2.5 %     97.5 %
(Intercept) -521.30975 -183.67584
DISPOINC      21.34053   41.00586
>
> # Estimation of mean response, its confidence intervals
> # and prediction intervals
> summary(dwaine)
    TARGTPOP        DISPOINC         SALES     
 Min.   :38.40   Min.   :15.80   Min.   :137.2 
 1st Qu.:47.80   1st Qu.:16.30   1st Qu.:152.8 
 Median :52.30   Median :17.10   Median :166.5 
 Mean   :62.02   Mean   :17.14   Mean   :181.9 
 3rd Qu.:82.70   3rd Qu.:18.10   3rd Qu.:209.7 
 Max.   :91.30   Max.   :19.10   Max.   :244.2 
> new<- data.frame(TARGTPOP=seq(40,90,5))
> predict(lm(SALES~TARGTPOP), new, se.fit = TRUE)
$fit
       1        2        3        4        5        6        7        8
141.4805 150.6599 159.8393 169.0186 178.1980 187.3774 196.5568 205.7362
       9       10       11
214.9156 224.0950 233.2744

$se.fit
       1        2        3        4        5        6        7        8
4.179987 3.645264 3.189902 2.852166 2.676956 2.696145 2.905884 3.269707
       9       10       11
3.742950 4.289548 4.884939

$df
[1] 19

$residual.scale
[1] 12.19233

> pred.w.plim <- predict(lm(SALES~TARGTPOP), new, interval="prediction")
> pred.w.clim <- predict(lm(SALES~TARGTPOP), new, interval="confidence")
>
> pred.w.plim # prediction intervals for TARGTPOP at seq(40,90,5)
        fit      lwr      upr
1  141.4805 114.5036 168.4574
2  150.6599 124.0249 177.2948
3  159.8393 133.4615 186.2170
4  169.0186 142.8109 195.2264
5  178.1980 152.0714 204.3247
6  187.3774 161.2421 213.5128
7  196.5568 170.3232 222.7904
8  205.7362 179.3157 232.1568
9  214.9156 188.2213 241.6099
10 224.0950 197.0429 251.1471
11 233.2744 205.7835 260.7652
> pred.w.clim # confidence intervals for TARGTPOP at seq(40,90,5)
        fit      lwr      upr
1  141.4805 132.7317 150.2293
2  150.6599 143.0302 158.2895
3  159.8393 153.1627 166.5178
4  169.0186 163.0490 174.9883
5  178.1980 172.5951 183.8010
6  187.3774 181.7343 193.0205
7  196.5568 190.4747 202.6389
8  205.7362 198.8926 212.5798
9  214.9156 207.0815 222.7497
10 224.0950 215.1169 233.0731
11 233.2744 223.0501 243.4987
>
> # Overlay CI and PI at the given TARGTPOP values
> matplot(new$TARGTPOP,cbind(pred.w.clim, pred.w.plim[,-1]),
+         lty=c(1,2,2,3,3), type="l", ylab="predicted y")