# Using R for a Linear Regression

# first we create vectors to hold our values for x and for y

x = c(0, 1.00, 2.00, 3.00, 4.00, 5.00)

y = c(0, 0.94, 2.15, 3.19, 3.70, 4.21)

# next, let's examine the help file for the lm() function

help(lm)

# as we have done elsewhere, we assign the results of the lm()
# function to an object so that we can access its values; here we
# see that we can use the summary() function to get a statistical
# analysis of the model, the coef() function to extract the
# coefficients, the reisd() function to extract the residual
# errors, and the fitted() function to get the predicted values
# of y for each x

xy.mod = lm(y ~ x)

xy.mod

summary(xy.mod)

coef(xy.mod)

resid(xy.mod)

fitted(xy.mod)

# evaluating the results using the alr3 package (which you need
# to install before you can use it)

summary(xy.mod)

library("alr3")

pureErrorAnova(xy.mod)

# plotting the data and the model is essential as a visualize
# examination reveals details that the statistical summary does
# not reveal

plot(x, resid(xy.mod), type = "l")

plot(x, y, pch = 19, col = "blue")

abline(xy.mod, lwd = 2, col = "blue", lty = 2)

# a straight-line is not a good model for our data; let's build a
# polynomial model, get a statistical summary of the model, and
# then look at the data and the model visually

xy.poly = lm(y ~ x + I(x^2))

summary(xy.poly)

pureErrorAnova(xy.poly)

plot(x, resid(xy.poly), type = "l")

plot(x, y, pch = 19, col = "blue")

x1 = seq(0, 5, 0.01)

y1 = coef(xy.poly)[1] + coef(xy.poly)[2]*x1 + coef(xy.poly)[3]*x1^2

lines(x1, y1, lwd = 2, col = "blue", lty = 2)

# another thing we can do is to replicate some points, which
# allows an estimate of lack of fit

x2 = c(0, 1, 2, 2, 3, 3, 4, 4, 5, 6)

y2 = c(3, 5, 5, 7, 10, 12, 9, 11, 13, 15)

plot(x2, y2, pch = 19, col = "blue", ylim = c(0,16))

xy.rep = lm(y2 ~ x2)

summary(xy.rep)

abline(xy.rep, lwd = 2, col = "blue", lty = 2)

pureErrorAnova(xy.rep)