# first, we need to enter the data used in SPS06, which we do # using coded values for the factors temp = c(1, 1, 1, 1, -1, -1, -1, -1) ppd = c(-1, 1, 1, -1, -1, 1, 1, -1) ph = c(1, 1, -1, -1, 1, 1, -1, -1) rate = c(6.69, 11.71, 14.79, 8.05, 6.33, 11.11, 14.08, 7.59) # second, we build a model of the response surface; note that the # use of * means that the model will include first-order effects # for each facator and all possible interactions between the # factors; because the number of parameters in the model (8) is # the same as the number of data points, a summary yields not # useful statistical information cer.lm = lm(rate ~ temp * ppd * ph) summary(cer.lm) # if our data is in a data frame, then we need to tell R where to # find the data, which we can do using data = data.frame or by # using data.frame$column name cer = data.frame(temp, ppd, ph, rate) cer.lm2 = lm(rate ~ temp * ppd * ph, data = cer) summary(cer.lm2) cer.lm3 = lm(cer$rate ~ cer$temp * cer$ppd * cer$ph) summary(cer.lm3) # third, we evaluate the model to identify important # coefficients; if the responses are just random noise # independent of the factors, then we expect that the # coefficients should be small in magnitude and distributed # randomly (that is, a normal distribution) around a mean given # by the y-intercept; thus, they should fall along a qqline on a # qqplot qqnorm(coef(cer.lm)[-1]) qqline(coef(cer.lm)[-1]) # fourth, we refine the model by keeping only those parameters # that seem likely to have a real affect on the response; in this # case that is ppd, ph, and their interaction cer.new = lm(rate ~ ppd * ph) summary(cer.new) # one limitation to our approach is that by testing each factor # at only two levels, we are limited to a model that is a # straight-line; if we wish to evalute if this is reasonable, # then we can make replicate measurements at the center of our # experimental design and use a t-test to see if there is a # significant difference between the y-intercept and the mean of # the replciates finding, in this case, that there is no evidence # of a significant difference cer.rep = c(10.06, 9.81, 10.04, 10.16, 10.24) mean(cer.rep) sd(cer.rep) t.test(cer.rep, mu = coef(cer.new)[1], alternative = "two.sided") # fifth, once we have a model we can use it to predict the # response for combinations of factor levels; we do this using # the predict function help(predict.lm) cer.pred = data.frame(temp = -0.600, ppd = -0.291, ph = 0.75) predict(cer.new, cer.pred) # sixth, visualizing a model is always a good idea; a plot of the # response versus two factors requires three-dimensions with the # response on the z-axis and the factors on the x-axis and the # y-axis; we can do this using the plot3D package; first we # create vectors to hold values for ppd and for ph that we can # use to calculate the response ppd.x = seq(-1, 1, 0.1) ph.y = seq(-1, 1, 0.1) # next, we write a function that will take a value for x anf for # y and return the response model = function(x, y) {coef(cer.new)[1] + coef(cer.new)[2]*x + coef(cer.new)[3]*y + coef(cer.new)[4]*x*y} # finally, we use the outer function to calculate the response # for every possible combination of x and of y help(outer) rate.z = outer(ppd.x, ph.y, model) View(rate.z) # now we can plot the response surface using perspective plots or contour plots library(plot3D) persp3D(x = ppd.x, y = ph.y, z = rate.z, phi = 0, theta = 30) persp3D(x = ppd.x, y = ph.y, z = rate.z, phi = 30, theta = 30) persp3D(x = ppd.x, y = ph.y, z = rate.z, phi = 0, theta = 60) contour2D(x = ppd.x, y = ph.y, z = rate.z) persp3D(x = ppd.x, y = ph.y, z = rate.z, phi = 0, theta = 30, contour = TRUE, ticktype = "detailed")