> # This sample program illustrate simple linear regression and
> # polynomial regression of second order.
>
> # Refer to
> # http://lib.stat.cmu.edu/DASL/Stories/tvads.html
> # for background of the data
>
> # 1. Initiate an R section
> # 2. Change the working dir to the one where the data is located.
>
> # Reading data and assigned it as the working data set
> tvad <- read.table("tvad.txt", header=T)
> attach(tvad)
>
> # Prelim inspection
> tvad
             firm spend yields
1     MILLER_LITE  50.1   32.1
2           PEPSI  74.1   99.6
3         STROH_S  19.3   11.7
4    FEDL_EXPRESS  22.9   21.9
5     BURGER_KING  82.4   60.8
6       COCO-COLA  40.1   78.6
7       MCDONALDS 185.9   92.4
8             MCI  26.9   50.7
9       DIET_COLA  20.4   21.4
10           FORD 166.2   40.1
11          LEVIS  27.0   40.8
12       BUD_LITE  45.6   10.4
13       ATT_BELL 154.9   88.9
14   CALVIN_KLEIN   5.0   12.0
15         WENDYS  49.7   29.2
16       POLAROID  26.9   38.0
17         SHASTA   5.7   10.0
18       MEOW_MIX   7.6   12.3
19    OSCAR_MEYER   9.2   23.4
20          CREST  32.4   71.1
21 KIBBLES_N_BITS   6.1    4.4
> # Initiate graphic devices
> par(mfrow=c(2,2));
>
> hist(spend); boxplot(spend); qqnorm(spend);
> plot(density(spend));

>
> stem(spend) ; summary(spend);

  The decimal point is 2 digit(s) to the right of the |

  0 | 1111122233334
  0 | 55578
  1 |
  1 | 579

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
    5.0    19.3    27.0    50.4    50.1   185.9

> hist(yields); boxplot(yields); qqnorm(yields);
> plot(density(yields));

>
> stem(yields) ; summary(yields);

  The decimal point is 1 digit(s) to the right of the |

  0 | 400222
  2 | 123928
  4 | 011
  6 | 119
  8 | 92
  10 | 0

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
   4.40   12.30   32.10   40.47   60.80   99.60
>
> plot(yields~spend)

>
> fm0 <- lm(yields~1)                  # Summarization by mean only.
> summary(fm0)  # Yields = b_0 + epsilon

Call:
lm(formula = yields ~ 1)

Residuals:
    Min      1Q  Median      3Q     Max
-36.067 -28.167  -8.367  20.333  59.133

Coefficients:
                  Estimate   Std. Error  t value      Pr(>|t|)
(Intercept)   40.467      6.586       6.144     5.28e-06 ***
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 30.18 on 20 degrees of freedom

> # Plot the fitted line overlayed over the y vs. x scatter plot
> plot(spend,yields,xlab="spend", ylab="yields", main="Summary by mean");
> abline(coef(fm0),0)

> # Plots for diagnosis
> par(mfrow=c(2,2));
> plot(fm0)

> fm1 <- lm(yields~spend) # Regress yields on spend.
> summary(fm1)  # Yields = b_0 + b_1 (spend) + epsilon

Call:
lm(formula = yields ~ spend)

Residuals:
    Min      1Q  Median      3Q     Max
-42.422 -12.623  -8.171   8.832  50.526

Coefficients:
                Estimate     Std. Error t value Pr(>|t|)
(Intercept) 22.16269    7.08948   3.126  0.00556 **
spend        0.36317       0.09712   3.739  0.00139 **
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 23.5 on 19 degrees of freedom
Multiple R-Squared: 0.424,      Adjusted R-squared: 0.3936
F-statistic: 13.98 on 1 and 19 DF,  p-value: 0.001389

> names(fm1)  # List variables from the regression output
 [1] "coefficients"  "residuals"     "effects"       "rank"
 [5] "fitted.values" "assign"        "qr"            "df.residual"
 [9] "xlevels"       "call"          "terms"         "model"
> # Plot the fitted line overlayed over the y vs. x scatter plot
> plot(spend,yields,xlab="spend", ylab="yields", main="Simple Linear Regression");
> abline(coef(fm1))

> # Plots for diagnosis
> plot(fm1)

>
> fm2 <- lm(yields~spend+I(spend^2))
> summary(fm2)

Call:
lm(formula = yields ~ spend + I(spend^2))

Residuals:
    Min      1Q  Median      3Q     Max
-37.825  -9.128  -2.773   9.560  34.460

Coefficients:
                 Estimate     Std. Error     t value     Pr(>|t|)
(Intercept)  7.059322   9.986180   0.707       0.4887
spend        1.084708     0.369944   2.932       0.0089 **
I(spend^2)  -0.003990  0.001984  -2.011       0.0595 .
---
Signif. codes:  0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

Residual standard error: 21.82 on 18 degrees of freedom
Multiple R-Squared: 0.5296,     Adjusted R-squared: 0.4774
F-statistic: 10.13 on 2 and 18 DF,  p-value: 0.001127

> plot(spend,yields,xlab="spend", ylab="yields", main="Polynomial Regression");
> hat<-predict(fm2)
> points(approx(spend, hat), col = 2, pch = "*")

Warning message:
Collapsing to unique x values in: approx(spend, hat)
>
> par(mfrow=c(2,2));
> plot(fm2)

> detach(tvad)
> q()
> # Exit (You don't need to save workspace image)
>
>