# Using R for Confidence Intervals and Signficance Tests
setwd("~/Box Sync/p-harvey/Teaching/Chem 351/Class Units/04.Confidence_Intervals")
# let's start by viewing the help file for the t-distribution,
# which identifies four functions that make use of this
# distribution; these are follow the same format that we saw
# earlier for the normal distribution; of these, the one of
# interest to use is qt, which gives the value of t-crit that
# yields a particular probability
help(TDist)
# let's view the help file for the t-test, which we can use for
# any parametric t-test; note providing x only (that is y = NULL)
# gives a t-test of x-bar vs. mu, that providing both x and y
# allows for a t-test between two means (with mu = 0 being the
# default test that means are the same), and that paired is used
# for a paired t-test; all tests can be run as two-sided or as
# one-sided; note that we enter a confidence level of 1-alpha
help(t.test)
# and the help file for the F-test, which tests for a defined
# ratio and allows for either a two-sided or a one-sided test
help(var.test)
# SPS03.1 (H0: xbar = mu; HA: xbar ≠ mu; two-tailed test; alpa =
# 0.05); this requires a t-test of an experimental mean to a
# theoretical mean; as we do not have the results for individual
# trials, we cannot use the t.test function and must calculate by
# "hand"
# first we calculate texp
texp = abs((0.513 - 0.520))*sqrt(3)/0.0500
texp
# next, we find tcrit using qt; note that we set p to 0.05/2
# because for a two-tailed test half of the proability must lie
# at either end of the distribution; if this was a one-tailed
# test, we would use p = 0.5; the choice of lower.tail is not
# important for a two-tail test as we can just use the absolute value
tcrit = abs(qt(p = 0.05/2, df = 2))
tcrit
# the texp of 0.243 is less than the tcrit of 4.30 so we retain
# the null hypothesis and find no evidence of a determinate error
# SPS03.2 (H0: xbar = mu; HA: xbar > mu; one-tailed test; alpha =
# 0.05); this requires a one-tailed t-test of an experimental
# mean to a defined value (xbar vs. mu)
nitrate = c(51.0, 51.3, 51.6, 50.9)
t.test(nitrate, mu = 50.0, alternative = "greater",
conf.level = 0.95)
# here we see that texp is 7.59 but the value of tcrit is not
# reported; the p-value, however, gives the probability for which
# we can reject the null hypothesis; since this is less than
# 0.05, we reject H0 and accept HA; the results also report the
# 95% CI for the mean, which has a lower limit greater than mu,
# again leading us to reject H0 and accept HA; to check, let's
# calculate by hand
texp = abs((mean(nitrate) - 50.0))*sqrt(4)/sd(nitrate); texp
tcrit = abs(qt(p = 0.05, df = 3, lower.tail = FALSE)); tcrit
# the script file xbar-mu.R completes the t-test and presents it
# visually
source("xbar-mu.R")
xbar_mu(nitrate, truemean = 50.0, ha = "greater",
alpha = 0.05, xlab = "ppm nitrate")
xbar_mu(nitrate, truemean = 51.0, ha = "greater",
alpha = 0.05, xlab = "ppm nitrate")
# SPS03.3 (H0: xbar1 = xbar2; HA: xbar1 ≠ xbar2; two-tailed test;
# alpah = 0.05); this requires a two-tailed unpaired t-test
lab1 = c(0.470, 0.448, 0.463, 0.449, 0.482, 0.454, 0.477, 0.409)
lab2 = c(0.529, 0.490, 0.489, 0.521, 0.486, 0.502)
mean(lab1); sd(lab1)
mean(lab2); sd(lab2)
# first, we have to complete an F-test
var.test(lab1, lab2, ratio = 1, alternative = "two.sided",
conf.level = 0.95)
# which gives an Fexp of 1.591 and a p-value of 0.6298; thus, we
# have no reason to believe that there is significant difference
# between the variances and will set var.equal when running the
# t-test
t.test(lab1, lab2, mu = 0, alternative = "two.sided",
paired = FALSE, var.equal = TRUE, conf.level = 0.95)
# significance tests for paired data (data from last class)
a.w = c(0.430, 0.266, 0.457, 0.531, 0.707, 0.716)
s.w = c(0.415, 0.238, 0.390, 0.410, 0.605, 0.609)
t.test(a.w, s.w, mu = 0, alternative = "two.sided",
paired = TRUE)
t.test(a.w - s.w, mu = 0, alternative = "two.sided")
xbar_mu(a.w - s.w)
# non-parametric tests for means that makes no assumption about
# the underlying distribution
help(wilcox.test)
analyte = c(104, 79, 98, 150, 87, 136, 101); analyte
wilcox.test(analyte, mu = 95)
s1 = c(9.8, 10.2, 10.7, 9.5, 10.5); s1
s2 = c(7.7, 9.7, 8.0, 9.9, 9.0); s2
wilcox.test(s1, s2, alternative = "two.sided", conf.level = 0.95)
# tests for outliers (first need to install the outliers package)
library(outliers)
help(dixon.test)
penny = round(c(rnorm(9, 2.5, 0.04), 3.11), digits = 3); penny
dixon.test(penny, two.sided = FALSE)
help(grubbs.test) # Gexp = (outlier - mean)/sd
grubbs.test(penny)