#-----------------------------# #--- Factorial Design in R ---# #-----------------------------# #First, some tools for creating designs # install.packages("AlgDesign") library(AlgDesign) # As an example, we'll create full factorial design with three factors (F1, F2, F3) and # with 3, 2, and 3 levels, respectively gen.factorial(levels = c(3,2,3), nVars = 3, center=TRUE, varNames=c("F1", "F2", "F3")) # When center = TRUE, the design is symmetric and the three levels will be signified by -1, 0, and 1 # The common 2^k design (two levels for each factor): dat = gen.factorial(levels = 2, nVars = 3, varNames=c("F1", "F2", "F3")) dat # the output is a data frame containing the factorial design # Factorial Design example from Box, Hunter, and Hunter, Statistics for Experimenters, John Wiley & Sons (1978). yield <- read.csv('FactorialDesign.csv') # Note that this is a two-level, three factor, full factorial design with Yield as the response (no replicates) # remove the "Run" column yield <- yield[-1] # There are data for two catalysts (0, 1); let's convert it into a factor yield$Catalyst = factor(yield$Catalyst) # A few plots can help us see the data plot(yield) par(mfrow=c(2,2)) interaction.plot(yield$Temp,yield$Conc,yield$Yield) interaction.plot(yield$Temp,yield$Catalyst,yield$Yield) interaction.plot(yield$Conc,yield$Catalyst,yield$Yield) par(mfrow=c(1,1)) # install.packages("lattice") library(lattice) xyplot(Yield ~ Temp | Catalyst, data = yield, panel = function(x, y, ...) { panel.xyplot(x, y, ...) panel.lmline(x, y, ...) } ) xyplot(Yield ~ Conc | Catalyst, data = yield, panel = function(x, y, ...) { panel.xyplot(x, y, ...) panel.lmline(x, y, ...) } ) par(mfrow=c(2,2)) boxplot(Yield~Temp, data=yield, main="Yield by Temperature", xlab="Temperature (C)",ylab="Percent Yield") boxplot(Yield~Conc, data=yield, main="Yield by Concentration", xlab="Concentraion (wt%)",ylab="Percent Yield") boxplot(Yield~Catalyst, data=yield, main="Yield by Catalyst", xlab="Catalyst Type",ylab="Percent Yield") par(mfrow=c(1,1)) # Alternately, combine boxpolts into one graph boxplot(Yield ~ Temp*Conc, data=yield, ylab="% Yield", xlab="Temp.Conc") boxplot(Yield ~ Temp*Catalyst, data=yield, ylab="% Yield", xlab="Temp.Catalyst") # If you find the printing of significance stars annoying, you can turn them off (for model and ANOVA summary outputs) options(show.signif.stars=F) # let's model the main effects model1 = lm(Yield ~ ., data = yield) summary(model1) # Now we'll add two-factor interactions model2 = lm(Yield ~ .^2, data = yield) summary(model2) #compare the two models anova(model1, model2) # Finally, the full model including three-factor interactions model3 = lm(Yield ~ .^3, data = yield) summary(model3) # Note that since we have no replicates, we have no way of assessing the significance of any of the coefficients of this full model with all interactions # also note that we could model this with some, but not all of the interactions present model4 = lm(Yield ~ Conc + Temp*Catalyst, data = yield) summary(model4) # Note that Y ~ X1 * X2 is the same as Y ~ X1 + X2 + X1 : X2 # and ":" means 'interaction between' anova(model4,model2) # Another way to model the response as a function of the factors is with an anova table. This requires all variables to be factors. yield.aov = yield yield.aov$Conc = factor(yield.aov$Conc) yield.aov$Temp = factor(yield.aov$Temp) aov1.out = aov(Yield ~ ., data=yield.aov) summary(aov1.out) aov2.out = aov(Yield ~ .^2, data=yield.aov) summary(aov2.out) # This provides all of the means commonly calculated in ANOVA analysis model.tables(aov2.out, type="means", se=T) aov3.out = aov(Yield ~ .^3, data=yield.aov) summary(aov3.out) # Based on these results, I think a Conc + Temp*Catalyst model may be the best aov4.out = aov(Yield ~ Conc + Temp*Catalyst, data=yield.aov) summary(aov4.out) # You can plot the ANOVA results in a couple of ways: plot(aov4.out, 1) # residuals versus fitted values plot(aov4.out, 2) # QQ plot