SetUp1: A small sample of using R

Author

Yes

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

a <- 3;
b <- -1.5;
n1 <- 100;
n2 <- 10;

opar <- par(mfrow = c(1,2), oma = c(0, 0, 2.7, 0)); 
x <- seq(-5, 5, len=n1); # n1 equally spaced points in [-5, 5] #
x;
  [1] -5.00000000 -4.89898990 -4.79797980 -4.69696970 -4.59595960 -4.49494949
  [7] -4.39393939 -4.29292929 -4.19191919 -4.09090909 -3.98989899 -3.88888889
 [13] -3.78787879 -3.68686869 -3.58585859 -3.48484848 -3.38383838 -3.28282828
 [19] -3.18181818 -3.08080808 -2.97979798 -2.87878788 -2.77777778 -2.67676768
 [25] -2.57575758 -2.47474747 -2.37373737 -2.27272727 -2.17171717 -2.07070707
 [31] -1.96969697 -1.86868687 -1.76767677 -1.66666667 -1.56565657 -1.46464646
 [37] -1.36363636 -1.26262626 -1.16161616 -1.06060606 -0.95959596 -0.85858586
 [43] -0.75757576 -0.65656566 -0.55555556 -0.45454545 -0.35353535 -0.25252525
 [49] -0.15151515 -0.05050505  0.05050505  0.15151515  0.25252525  0.35353535
 [55]  0.45454545  0.55555556  0.65656566  0.75757576  0.85858586  0.95959596
 [61]  1.06060606  1.16161616  1.26262626  1.36363636  1.46464646  1.56565657
 [67]  1.66666667  1.76767677  1.86868687  1.96969697  2.07070707  2.17171717
 [73]  2.27272727  2.37373737  2.47474747  2.57575758  2.67676768  2.77777778
 [79]  2.87878788  2.97979798  3.08080808  3.18181818  3.28282828  3.38383838
 [85]  3.48484848  3.58585859  3.68686869  3.78787879  3.88888889  3.98989899
 [91]  4.09090909  4.19191919  4.29292929  4.39393939  4.49494949  4.59595960
 [97]  4.69696970  4.79797980  4.89898990  5.00000000
z <- a+b*x;          # a: intercept, b: slope #
plot(x,z,main=paste("sample size=", n1, "(a math. relation)"));
abline(a, b, lty=1,col=1);  # The true regression line #
sd1 <-2
y<-z+rnorm(n1)*sd1;   # rnorm(M) gives you M random numbers from N(0,1) #
plot(x,y, main=paste("sample size=",n1, ",s.d.=", sd1, "(a stat. relation)" ));
abline(a, b, lty=1,col=1);  # The true regression line #

opar <- par(mfrow = c(1,2), oma = c(0, 0, 2.7, 0)); 
hist(z,freq=FALSE, main=paste("sample size=",n1, ",s.d.=", 0))   # Histogram of z's #
hist(y,freq=FALSE, main=paste("sample size=",n1, ",s.d.=", sd1))   # Do you see any difference in these two histograms? #

You can add options to executable code like this

opar <- par(mfrow = c(1,2), oma = c(0, 0, 2.7, 0)); 
y<-z+rnorm(n1)*sd1;
plot(x,y, main=paste("sample size=",n1, ",s.d.=", sd1));
fm <- lm(y~x);       # Simple regression; y regress on x # 
summary(fm);         # Check if estimates for intercept and slope good #

Call:
lm(formula = y ~ x)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.8466 -1.3047 -0.0912  1.5445  5.0028 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  2.92370    0.19170   15.25   <2e-16 ***
x           -1.52351    0.06575  -23.17   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.917 on 98 degrees of freedom
Multiple R-squared:  0.8457,    Adjusted R-squared:  0.8441 
F-statistic:   537 on 1 and 98 DF,  p-value: < 2.2e-16
abline(fm, lty=3,col="red");           # Add the estimated reg. line to the current plot #
abline(a, b, lty=1,col=1);  # The true regression line #


sd2 <- 16;
t<-z+rnorm(n1)*sd2;
plot(x,t,main=paste("sample size=",n1, ",s.d.=", sd2));
abline(a, b, lty=1, col=1);
fm1 <- lm(t~x);
summary(fm1);

Call:
lm(formula = t ~ x)

Residuals:
    Min      1Q  Median      3Q     Max 
-34.930 -11.756   0.275   9.198  42.584 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   5.4956     1.4586   3.768 0.000281 ***
x            -1.2288     0.5002  -2.457 0.015788 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 14.59 on 98 degrees of freedom
Multiple R-squared:  0.058, Adjusted R-squared:  0.04839 
F-statistic: 6.034 on 1 and 98 DF,  p-value: 0.01579
abline(fm1, lty=3,col="blue");

opar <- par(mfrow = c(1,2), oma = c(0, 0, 2.7, 0)); 
hist(y,freq=FALSE, main=paste("sample size=",n1, ",s.d.=", sd1))
hist(t,freq=FALSE, main=paste("sample size=",n1, ",s.d.=", sd2))

opar <- par(mfrow = c(1,3), oma = c(0, 0, 2.7, 0)); 
x1 <- seq(-5, 5, len=n2)
z1 <- a+b*x1;
plot(x1,z1,main=paste("sample size=",n2));
abline(a, b, lty=1,col=1);

y1<-z1+rnorm(n2)*sd1;
plot(x1,y1,main=paste("sample size=",n2, ",s.d.=", sd1),ylim=c(-40,40));
fm2 <- lm(y1~x1);
summary(fm2);

Call:
lm(formula = y1 ~ x1)

Residuals:
    Min      1Q  Median      3Q     Max 
-5.1507 -1.6908  0.2507  1.4595  4.5531 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   4.1965     0.9094   4.615 0.001722 ** 
x1           -1.6394     0.2849  -5.753 0.000427 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.876 on 8 degrees of freedom
Multiple R-squared:  0.8054,    Adjusted R-squared:  0.781 
F-statistic:  33.1 on 1 and 8 DF,  p-value: 0.0004273
abline(fm2,lty=3,col="red")
abline(a, b, lty=1,col=1);


t1<-z1+rnorm(n2)*sd2;
plot(x1,t1,main=paste("sample size=",n2, ",s.d.=", sd2),ylim=c(-40,40));
fm3 <- lm(t1~x1);
abline(fm3,lty=3,col="blue")
abline(a, b, lty=1,col=1);

summary(fm3);

Call:
lm(formula = t1 ~ x1)

Residuals:
    Min      1Q  Median      3Q     Max 
-27.866  -4.299  -0.389   6.030  32.432 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept)   -8.204      5.202  -1.577    0.153
x1             1.766      1.630   1.083    0.310

Residual standard error: 16.45 on 8 degrees of freedom
Multiple R-squared:  0.128, Adjusted R-squared:  0.01896 
F-statistic: 1.174 on 1 and 8 DF,  p-value: 0.3102
opar <- par(mfrow = c(1,2), oma = c(0, 0, 2.7, 0)); 

hist(y1,freq=FALSE, main=paste("sample size=",n2, ",s.d.=", sd1))
hist(t1,freq=FALSE, main=paste("sample size=",n2, ",s.d.=", sd2))

opar <- par(mfrow = c(1,2), oma = c(0, 0, 2.7, 0)); 
y<-z+rnorm(n1)*sd1;
plot(x,y, main=paste("sample size=",n1, ",s.d.=", sd1),ylim=c(-40,40));
fm <- lm(y~x);       # Simple regression; y regress on x # 
summary(fm);         # Check if estimates for intercept and slope good #

Call:
lm(formula = y ~ x)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.0602 -1.2436 -0.1034  1.4741  5.3235 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  3.01993    0.19426   15.55   <2e-16 ***
x           -1.43900    0.06662  -21.60   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.943 on 98 degrees of freedom
Multiple R-squared:  0.8264,    Adjusted R-squared:  0.8246 
F-statistic: 466.5 on 1 and 98 DF,  p-value: < 2.2e-16
abline(fm, lty=3,col="red");           # Add the estimated reg. line to the current plot #
abline(a, b, lty=1,col=1);  # The true regression line #
anova(fm);              # Obtain the ANOVA table    #
Analysis of Variance Table

Response: y
          Df  Sum Sq Mean Sq F value    Pr(>F)    
x          1 1760.46 1760.46  466.51 < 2.2e-16 ***
Residuals 98  369.82    3.77                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sd2 <- 16;
t<-z+rnorm(n1)*sd2;
plot(x,t,main=paste("sample size=",n1, ",s.d.=", sd2),ylim=c(-40,40));
abline(a, b, lty=1, col=1);
fm1 <- lm(t~x);
summary(fm1);

Call:
lm(formula = t ~ x)

Residuals:
    Min      1Q  Median      3Q     Max 
-35.900  -8.919  -0.285  12.051  33.830 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept)   0.4008     1.5333   0.261   0.7943  
x            -1.0347     0.5259  -1.968   0.0519 .
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 15.33 on 98 degrees of freedom
Multiple R-squared:  0.038, Adjusted R-squared:  0.02818 
F-statistic: 3.871 on 1 and 98 DF,  p-value: 0.05195
abline(fm1, lty=3,col="blue");

anova(fm1);             #the ANOVA table    #
Analysis of Variance Table

Response: t
          Df  Sum Sq Mean Sq F value  Pr(>F)  
x          1   910.1  910.14  3.8712 0.05195 .
Residuals 98 23040.6  235.11                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
opar <- par(mfrow = c(1,2), oma = c(0, 0, 2.7, 0)); 
y1<-z1+rnorm(n2)*sd1;
plot(x1,y1,main=paste("sample size=",n2, ",s.d.=", sd1),ylim=c(-40,40));
fm2 <- lm(y1~x1);
summary(fm2);

Call:
lm(formula = y1 ~ x1)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.16072 -1.02541  0.05907  1.09046  2.11389 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   2.8860     0.5128   5.628 0.000494 ***
x1           -1.0951     0.1607  -6.815 0.000136 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.622 on 8 degrees of freedom
Multiple R-squared:  0.8531,    Adjusted R-squared:  0.8347 
F-statistic: 46.45 on 1 and 8 DF,  p-value: 0.0001357
abline(fm2, lty=3,col="red")
abline(a, b, lty=1,col=1);
anova(fm2);                #the ANOVA table    #
Analysis of Variance Table

Response: y1
          Df  Sum Sq Mean Sq F value    Pr(>F)    
x1         1 122.144  122.14  46.446 0.0001357 ***
Residuals  8  21.038    2.63                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
t1<-z1+rnorm(n2)*sd2;
plot(x1,t1,main=paste("sample size=",n2, ",s.d.=", sd2),ylim=c(-40,40));
fm3 <- lm(t1~x1);
abline(fm3, lty=3,col="blue")
abline(a, b, lty=1,col=1);

summary(fm3);

Call:
lm(formula = t1 ~ x1)

Residuals:
    Min      1Q  Median      3Q     Max 
-27.072  -7.881   3.840  10.788  21.049 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept)   8.7635     5.4121   1.619    0.144
x1           -0.2665     1.6958  -0.157    0.879

Residual standard error: 17.11 on 8 degrees of freedom
Multiple R-squared:  0.003077,  Adjusted R-squared:  -0.1215 
F-statistic: 0.02469 on 1 and 8 DF,  p-value: 0.879
anova(fm3);                #the ANOVA table    #
Analysis of Variance Table

Response: t1
          Df  Sum Sq Mean Sq F value Pr(>F)
x1         1    7.23   7.232  0.0247  0.879
Residuals  8 2343.30 292.912               
# Can you see the effect of sample size and standard deviation on the fitted line? #
# Play with R as much as possible this week, you will need to use R 
# for your homework shortly.    

# Always remove all objects before ending your R session # 
# rm(list=ls(all=TRUE));

# q() # To quit R #

# Come to my office if you have any question!    ^_^  Yes.