# Using R for Confidence Intervals and Signficance Tests

setwd("~/Box Sync/p-harvey/Teaching/Chem 351/Class Units/04.Confidence_Intervals")

# let's start by viewing the help file for the t-distribution,
# which identifies four functions that make use of this
# distribution; these are follow the same format that we saw
# earlier for the normal distribution; of these, the one of
# interest to use is qt, which gives the value of t-crit that
# yields a particular probability

help(TDist)

# let's view the help file for the t-test, which we can use for
# any parametric t-test; note providing x only (that is y = NULL)
# gives a t-test of x-bar vs. mu, that providing both x and y
# allows for a t-test between two means (with mu = 0 being the
# default test that means are the same), and that paired is used
# for a paired t-test; all tests can be run as two-sided or as
# one-sided; note that we enter a confidence level of 1-alpha

help(t.test)

# and the help file for the F-test, which tests for a defined
# ratio and allows for either a two-sided or a one-sided test

help(var.test)

# SPS03.1 (H0: xbar = mu; HA: xbar ≠ mu; two-tailed test; alpa =
# 0.05); this requires a t-test of an experimental mean to a
# theoretical mean; as we do not have the results for individual
# trials, we cannot use the t.test function and must calculate by
# "hand"

# first we calculate texp

texp = abs((0.513 - 0.520))*sqrt(3)/0.0500
texp

# next, we find tcrit using qt; note that we set p to 0.05/2
# because for a two-tailed test half of the proability must lie
# at either end of the distribution; if this was a one-tailed
# test, we would use p = 0.5; the choice of lower.tail is not
# important for a two-tail test as we can just use the absolute value

tcrit = abs(qt(p = 0.05/2, df = 2))
tcrit

# the texp of 0.243 is less than the tcrit of 4.30 so we retain
# the null hypothesis and find no evidence of a determinate error

# SPS03.2 (H0: xbar = mu; HA: xbar > mu; one-tailed test; alpha =
# 0.05); this requires a one-tailed t-test of an experimental
# mean to a defined value (xbar vs. mu)

nitrate = c(51.0, 51.3, 51.6, 50.9)

t.test(nitrate, mu = 50.0, alternative = "greater", 
       conf.level = 0.95)

# here we see that texp is 7.59 but the value of tcrit is not
# reported; the p-value, however, gives the probability for which
# we can reject the null hypothesis; since this is less than
# 0.05, we reject H0 and accept HA; the results also report the
# 95% CI for the mean, which has a lower limit greater than mu,
# again leading us to reject H0 and accept HA; to check, let's
# calculate by hand

texp = abs((mean(nitrate) - 50.0))*sqrt(4)/sd(nitrate); texp
tcrit = abs(qt(p = 0.05, df = 3, lower.tail = FALSE)); tcrit

# the script file xbar-mu.R completes the t-test and presents it
# visually

source("xbar-mu.R")
xbar_mu(nitrate, truemean = 50.0, ha = "greater", 
        alpha = 0.05, xlab = "ppm nitrate")
        
xbar_mu(nitrate, truemean = 51.0, ha = "greater", 
        alpha = 0.05, xlab = "ppm nitrate")

# SPS03.3 (H0: xbar1 = xbar2; HA: xbar1 ≠ xbar2; two-tailed test;
# alpah = 0.05); this requires a two-tailed unpaired t-test

lab1 = c(0.470, 0.448, 0.463, 0.449, 0.482, 0.454, 0.477, 0.409)
lab2 = c(0.529, 0.490, 0.489, 0.521, 0.486, 0.502)

mean(lab1); sd(lab1)
mean(lab2); sd(lab2)

# first, we have to complete an F-test

var.test(lab1, lab2, ratio = 1, alternative = "two.sided", 
         conf.level = 0.95)

# which gives an Fexp of 1.591 and a p-value of 0.6298; thus, we
# have no reason to believe that there is significant difference
# between the variances and will set var.equal when running the
# t-test

t.test(lab1, lab2, mu = 0, alternative = "two.sided", 
       paired = FALSE, var.equal = TRUE, conf.level = 0.95)

# significance tests for paired data (data from last class)

a.w = c(0.430, 0.266, 0.457, 0.531, 0.707, 0.716)

s.w = c(0.415, 0.238, 0.390, 0.410, 0.605, 0.609)

t.test(a.w, s.w, mu = 0, alternative = "two.sided", 
       paired = TRUE)

t.test(a.w - s.w, mu = 0, alternative = "two.sided")

xbar_mu(a.w - s.w)

# non-parametric tests for means that makes no assumption about
# the underlying distribution

help(wilcox.test)

analyte = c(104, 79, 98, 150, 87, 136, 101); analyte

wilcox.test(analyte, mu = 95)

s1 = c(9.8, 10.2, 10.7, 9.5, 10.5); s1

s2 = c(7.7, 9.7, 8.0, 9.9, 9.0); s2

wilcox.test(s1, s2, alternative = "two.sided", conf.level = 0.95)

# tests for outliers (first need to install the outliers package)

library(outliers)

help(dixon.test)

penny = round(c(rnorm(9, 2.5, 0.04), 3.11), digits = 3); penny

dixon.test(penny, two.sided = FALSE)

help(grubbs.test) # Gexp = (outlier - mean)/sd

grubbs.test(penny)