#---------------------------------------------------#
#--- Plotting, Moment and Normality Testing in R ---#
#---------------------------------------------------#


#do this the first time to install the package, then you never have to do it again
install.packages("e1071")

#do this each time you start an R session
library(e1071)

#Importing data: use .csv format if at all possible
weights <- read.csv('NBS Weight Data.csv')

#create a histogram
hist(weights$Result, breaks = 12, freq = F, xlab = 'Weight - 10g (mg)', ylab = 'Relative Frequency', main = 'Histogram of NBS Measured Weights')

#create a box and whisker plot
#Box shows Q1 to Q3, with line at median.  IQR = Q3 - Q1
#The top whisker denotes the maximum value or Q3 + 1.5*IQR, whichever is smaller
#The bottom whisker denotes the minimum value or Q1 - 1.5*IQR, whichever is larger.
boxplot(weights$Result, ylab = 'Weight - 10g (mg)', main = '100 NBS Weight Measurements')

#create a normal probability plot
qqout = qqnorm(weights$Result, ylab = 'Weight - 10g (mg)', main = '100 NBS Weight Measurements')
qqline(weights$Result)      #add a straight line to the normal probability plot
cor(qqout$x,qqout$y)    #calculate the correlation coefficient from the qqplot


#this generates G1, the unbiased estimator for skewness
G1 = skewness(weights$Result, na.rm = TRUE, type = 2)
#type 1 = biased estimator, type 2 = unbiased estimator

#Now lets generate SE of the unbiased skewness estimator
n = length(weights$Result)
SE_G1 = sqrt(6*n*(n-1)/(n-2)/(n+1)/(n+3))

z1 = G1/SE_G1
p = 2*(1-pnorm(z1))
p
# the function pnorm(z1) produces the area under the normal distribution below z1

#this generates G2, the unbiased estimator for kurtosis
G2 = kurtosis(weights$Result, na.rm = TRUE, type = 2)
#type 1 = biased estimator, type 2 = unbiased estimator

#Now lets generate SE of the unbiased skewness estimator
n = length(weights$Result)
SE_G2 = 2*SE_G1*sqrt((n*n-1)/(n-3)/(n+5))

z2 = G2/SE_G2
p = 2*(1-pnorm(z2))
p

#performs the Shapiro-Wilk test for normality
shapiro.test(weights$Result)
#reject the null hypothesis that the data is normally distributed if the 
#p-value is less that your significance level (e.g., 0.05)