# Using R for Confidence Intervals and Signficance Tests setwd("~/Box Sync/p-harvey/Teaching/Chem 351/Class Units/04.Confidence_Intervals") # let's start by viewing the help file for the t-distribution, # which identifies four functions that make use of this # distribution; these are follow the same format that we saw # earlier for the normal distribution; of these, the one of # interest to use is qt, which gives the value of t-crit that # yields a particular probability help(TDist) # let's view the help file for the t-test, which we can use for # any parametric t-test; note providing x only (that is y = NULL) # gives a t-test of x-bar vs. mu, that providing both x and y # allows for a t-test between two means (with mu = 0 being the # default test that means are the same), and that paired is used # for a paired t-test; all tests can be run as two-sided or as # one-sided; note that we enter a confidence level of 1-alpha help(t.test) # and the help file for the F-test, which tests for a defined # ratio and allows for either a two-sided or a one-sided test help(var.test) # SPS03.1 (H0: xbar = mu; HA: xbar ≠ mu; two-tailed test; alpa = # 0.05); this requires a t-test of an experimental mean to a # theoretical mean; as we do not have the results for individual # trials, we cannot use the t.test function and must calculate by # "hand" # first we calculate texp texp = abs((0.513 - 0.520))*sqrt(3)/0.0500 texp # next, we find tcrit using qt; note that we set p to 0.05/2 # because for a two-tailed test half of the proability must lie # at either end of the distribution; if this was a one-tailed # test, we would use p = 0.5; the choice of lower.tail is not # important for a two-tail test as we can just use the absolute value tcrit = abs(qt(p = 0.05/2, df = 2)) tcrit # the texp of 0.243 is less than the tcrit of 4.30 so we retain # the null hypothesis and find no evidence of a determinate error # SPS03.2 (H0: xbar = mu; HA: xbar > mu; one-tailed test; alpha = # 0.05); this requires a one-tailed t-test of an experimental # mean to a defined value (xbar vs. mu) nitrate = c(51.0, 51.3, 51.6, 50.9) t.test(nitrate, mu = 50.0, alternative = "greater", conf.level = 0.95) # here we see that texp is 7.59 but the value of tcrit is not # reported; the p-value, however, gives the probability for which # we can reject the null hypothesis; since this is less than # 0.05, we reject H0 and accept HA; the results also report the # 95% CI for the mean, which has a lower limit greater than mu, # again leading us to reject H0 and accept HA; to check, let's # calculate by hand texp = abs((mean(nitrate) - 50.0))*sqrt(4)/sd(nitrate); texp tcrit = abs(qt(p = 0.05, df = 3, lower.tail = FALSE)); tcrit # the script file xbar-mu.R completes the t-test and presents it # visually source("xbar-mu.R") xbar_mu(nitrate, truemean = 50.0, ha = "greater", alpha = 0.05, xlab = "ppm nitrate") xbar_mu(nitrate, truemean = 51.0, ha = "greater", alpha = 0.05, xlab = "ppm nitrate") # SPS03.3 (H0: xbar1 = xbar2; HA: xbar1 ≠ xbar2; two-tailed test; # alpah = 0.05); this requires a two-tailed unpaired t-test lab1 = c(0.470, 0.448, 0.463, 0.449, 0.482, 0.454, 0.477, 0.409) lab2 = c(0.529, 0.490, 0.489, 0.521, 0.486, 0.502) mean(lab1); sd(lab1) mean(lab2); sd(lab2) # first, we have to complete an F-test var.test(lab1, lab2, ratio = 1, alternative = "two.sided", conf.level = 0.95) # which gives an Fexp of 1.591 and a p-value of 0.6298; thus, we # have no reason to believe that there is significant difference # between the variances and will set var.equal when running the # t-test t.test(lab1, lab2, mu = 0, alternative = "two.sided", paired = FALSE, var.equal = TRUE, conf.level = 0.95) # significance tests for paired data (data from last class) a.w = c(0.430, 0.266, 0.457, 0.531, 0.707, 0.716) s.w = c(0.415, 0.238, 0.390, 0.410, 0.605, 0.609) t.test(a.w, s.w, mu = 0, alternative = "two.sided", paired = TRUE) t.test(a.w - s.w, mu = 0, alternative = "two.sided") xbar_mu(a.w - s.w) # non-parametric tests for means that makes no assumption about # the underlying distribution help(wilcox.test) analyte = c(104, 79, 98, 150, 87, 136, 101); analyte wilcox.test(analyte, mu = 95) s1 = c(9.8, 10.2, 10.7, 9.5, 10.5); s1 s2 = c(7.7, 9.7, 8.0, 9.9, 9.0); s2 wilcox.test(s1, s2, alternative = "two.sided", conf.level = 0.95) # tests for outliers (first need to install the outliers package) library(outliers) help(dixon.test) penny = round(c(rnorm(9, 2.5, 0.04), 3.11), digits = 3); penny dixon.test(penny, two.sided = FALSE) help(grubbs.test) # Gexp = (outlier - mean)/sd grubbs.test(penny)