# Multiple Linear Regression (MLR)

library(readr)
library(plot3D)
library(scatterplot3d)

allSpec <- read_csv("allSpec.csv")
load("co_stds.RData")
load("cr_stds.RData")
load("cu_stds.RData")
load("ni_stds.RData")

source("MLRScript.R")

# plot overlay of spectra for metals
wavelengths = as.numeric(colnames(allSpec[8:642]))
plot(x = wavelengths, y = allSpec[1, 8:642], type = "l", xlab = "wavelengths (nm)", ylab = "absorbance", col = "blue", lwd = 2)
lines(x = wavelengths, y = allSpec[6, 8:642], col = "red", lwd = 2)
lines(x = wavelengths, y = allSpec[11, 8:642], col = "orange", lwd = 2)
lines(x = wavelengths, y = allSpec[16, 8:642], col = "forestgreen", lwd = 2)
legend(x = 820, y = 0.4, legend = c("Cu", "Co", "Cr", "Ni"), lwd = 2, col = c("blue", "red", "orange", "forestgreen"), bty = "n")

# select 16 wavelengths to use (wavelength_ids are column ids)
wavelength_ids = seq(8,642,40)
abline(v = as.numeric(colnames(allSpec[wavelength_ids])), lty = 3, lwd = 1)

# subset data to use for this example (24 samples total) 
ex_one = allSpec[c(1, 6, 11, 21:25, 38:53), ]

# step one: calculate and examine eb values using standards
abs_stds = allSpec[1:15,wavelength_ids]
conc_stds = as.matrix(data.frame(allSpec[1:15, 4], allSpec[1:15,5], allSpec[1:15,6]))
eb_pred = findeb(abs_stds,conc_stds)
eb_pred

# step two: calculate and examine predicted concentrations for samples
abs_samples = allSpec[c(21:25, 38:53), wavelength_ids]
pred_conc = findconc(abs_samples,eb_pred)
pred_conc

# step three: extract and examine real concentrations
real_conc = as.matrix(data.frame(allSpec[c(21:25, 38:53), 4], allSpec[c(21:25,38:53), 5], allSpec[c(21:25,38:53), 6]))
real_conc

# step four: calculate and report errors
#   first find the absolute error for each analyte and sample
conc_error = real_conc - pred_conc
conc_error

#   next, find the mean error for each analyte
help(apply)
means = apply(conc_error, 2, mean)
round(means, digits = 6)

#   then, find the standard deviation for each analyte
sds = apply(conc_error, 2, sd)
round(sds, digits = 6)

#   now we can report the confidence intervals for each analyte
conf.int = abs(qt(0.05/2, 20)) * sds
round(conf.int, digits = 6)

# identify maximum error for each analyte
max(abs(conc_error[,1]))
max(abs(conc_error[,2]))
max(abs(conc_error[,3]))
which.max(abs(conc_error[,1]))
which.max(abs(conc_error[,2]))
which.max(abs(conc_error[,3]))
pred_conc[9,1]
real_conc[9,1]
pred_conc[16,2]
real_conc[16,2]
pred_conc[9,3]
real_conc[9,3]