# Using R for an Analysis of Variance setwd("~/Box Sync/p-harvey/Teaching/Chem 351/Class Units/05.Analysis_of_Variance") # examine the help file for the function aov() noting that it # requires a formula to specify the model (more on this in a bit) # and a data frame with the data help(aov) # we beging by creating vectors for each treatment and then pull # them together into a data frame tablet.a = c(5.67, 5.67, 5.55, 5.57) tablet.b = c(5.75, 5.47, 5.43, 5.45) tablet.c = c(4.74, 4.45, 4.65, 4.94) tablets = data.frame(tablet.a, tablet.b, tablet.c); tablets # this is still not the right format as we need the data frame to # have just two columns, one with the data and one with a set of # id codes that identify the source of each entry; we can # accomplish this using the stack() function help(stack) tablets_1 = stack(tablets); tablets_1 unstack(tablets_1) # R creates default names for the data frame's columns, but these # are not particularly informative, so let's change them to # something more meaningful names(tablets_1) names(tablets_1) = c("conc.fe", "tab.id") tablets_1 # one limitation to the method outlined above for creating data # frames is that it will not work if the individual data sets do # not have equal numbers of entries; here is another way to # create a data frame for anova fe = c(tablet.a, tablet.b, tablet.c); fe id = c(rep("tabket.a", times = length(tablet.a)), rep("tablet.b", times = length(tablet.b)), rep("tablet.c", times = length(tablet.c))); id tablets_2 = data.frame(fe, id); tablets_2 # and another way to accomplish this as well tablet.d = c(5.67, 5.67, 5.55, 5.57, 5.65) tablet.e = c(5.75, 5.47, 5.43, 5.45) tablet.f = c(4.74, 4.45, 4.65, 4.94) tablet.e[5] = NA; tablet.e tablet.f[5] = NA; tablet.f tablets_new = data.frame(tablet.d, tablet.e, tablet.f); tablets tablets_4 = stack(tablets_new); tablets_4 tablets_4 = na.omit(tablets_4); tablets_4 # now we are ready to learn about formulas help(formula) boxplot(tablets) boxplot(conc.fe ~ tab.id, data = tablets_1, col = c("blue", "green", "red")) tablets.out = aov(conc.fe ~ tab.id, data = tablets_1) summary(tablets.out) # evaluating sources of difference when a signficant difference # is likely given anova results help(TukeyHSD) TukeyHSD(tablets.out, conf.level = 0.95) tablets.hsd = TukeyHSD(tablets.out, conf.level = 0.95) plot(tablets.hsd) # let's pretty this up a bit; first let's reset the margins for # the plot's bottom, left, top, and right sides old.par = par(mar = c(5, 8, 4, 2)) # and then create the plot using las = 1 to set the y-axis labels # so that they are printed horizontally plot(tablets.hsd, las = 1) # and then reset the original plotting parameters par(old.par) # we can extend anova to more than one independent variable; here # we add a second independent variable tablets acid = rep(c("HCl", "HCl", "HNO3", "HNO3"), times = 3) tablets_5 = data.frame(tablets_1, acid); tablets_5 tablets_5.out = aov(conc.fe ~ tab.id * acid, data = tablets_5) summary(tablets_5.out) tablets_5.hsd = TukeyHSD(tablets_5.out, conf.level = 0.95) tablets_5.hsd old.par = par(mar = c(5, 12, 4, 2)) plot(tablets_5.hsd, las = 1) par(old.par)