Multivariate Analysis of Variance (MANOVA) using R

Data

perform.A = c(63.3, 68.3, 86.7, 52.8, 75, 58, 69.5, 32.7, 60.9, 58.2)
contracts.A = c(24, 30, 33, 19, 30, 22, 28, 13, 17, 18)
A.gender = c(1, 1, 1, 0, 0, 0, 1, 0, 0, 0)

perform.B = c(72.8, 88.2, 80.8, 71.3, 81.5, 47.6, 81, 81.4, 83, 76, 74.5)
contracts.B = c(25, 26, 29, 22, 41, 15, 40, 37, 39, 29, 26)
B.gender = c(1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1)

perform.C = c(82.3, 89.7, 81, 85.1, 74.1, 75.9, 74.7, 81.1, 76.4, 81.8)
contracts.C = c(38, 42, 48, 42, 40, 45, 33, 31, 47, 38)
C.gender = c(0, 0, 1, 0, 1, 1, 1, 1, 1, 0)

sales = c(perform.A, perform.B, perform.C)
contracts = c(contracts.A , contracts.B , contracts.C)
gender = c(A.gender, B.gender, C.gender)
group = c(rep(1, 10), rep(2, 11), rep(3, 10))

gender =factor(gender, levels = c(0,1), labels=c("Woman", "Man")) 
group = factor(group, levels = c(1,2,3), labels = c("Α", "Β", "Γ"))

manova.df = data.frame(group, gender, sales, contracts)

1. Scatterplot among contracts and sales
plot(contracts, sales, xlab = "Πωλήσεις", ylab = "Contracts", 
     main = "Correlation between sales and contracts", 
     cex = 1.3, cex.main = 1.3, cex.axis = 1.3, cex.lab = 1.3)

2. Pearson correlation between sales and contracts
cor.test(contracts, sales)

3. Contingency frequency table
table(group, gender)

4. Contingency frequency table (2nd option)
library(expss)
manova.df = apply_labels(manova.df,
                         gender = "Gender",
                         group = "Group",
                         sales = "Sales",
                         contracts = "Contracts")
cro_cases(manova.df$group, manova.df$gender,  total_label = "Total")

5. MANOVA
library(car)
lm.manova = lm(cbind(sales,contracts) ~ group + gender + group*gender, manova.df, contrasts=list(group=contr.sum, gender=contr.sum))
man.res = Manova(lm.manova, type="III", test ="Pillai")
summary(man.res, multivariate=TRUE)

6. Homogeneity of dependent variables between levels of factors
leveneTest(sales ~ group * gender, manova.df, center = mean)
leveneTest(contracts ~ group * gender, manova.df, center = mean)

7. Homogeneity of covariance of dependent variables among all levels of the interaction of the factors
library(magrittr)
group1 = unclass(manova.df$group) %>% as.numeric 
gender1 = unclass(manova.df$gender) %>% as.numeric 
manova.df$interaction = factor(group1 * 10 + gender1)
BoxMTest(manova.df[, c(3,4)], manova.df[, 5])
Remark: Function BoxMTest is available here


8. Stepdown analysis
lm.anova.sales = lm(sales ~ group + gender + group*gender, manova.df, contrasts=list(group=contr.sum, gender=contr.sum))
Anova(lm.anova.sales, type="III") 

lm.anova.contracts = lm(contracts ~ group + gender + group*gender, manova.df, contrasts=list(group=contr.sum, gender=contr.sum))
Anova(lm.anova.contracts, type="III")

lm.post.hoc.ancova = lm(contracts ~ group + gender + group * gender + sales, manova.df, contrasts=list(group=contr.sum, gender=contr.sum))
Anova(lm.post.hoc.ancova, type="III")

Remark: Mahalanobis distance
mahal.manova.df = as.numeric(as.matrix(manova.df[, -c(1, 2)]))

mahal.manova.df = matrix(mahal.manova.df,nrow = 31,ncol = 3)
mahal_dist = mahalanobis(mahal.manova.df,colMeans(mahal.manova.df), cov(mahal.manova.df))
manova.df$MD = round(mahal_dist, 1)

hist(manova.df$MD, main = "Mahalanobis distance", 
     col = c("grey45"),  xlab = "Mahalanobis distance", 
     ylab = "Συχνότητα",  ylim = c(0,25))

# critical value: Any observation with Mahalanobis distance greater than qchisq(0.999, 2, lower.tail=TRUE) is considered to be a multivariate outlier.
manova.df$outlier = "No"
manova.df$outlier[manova.df$MD > qchisq(0.999, 2, lower.tail=TRUE)] = "Yes"

The above example is contained in the paragraph 5.6 of the book "Στατιστική ανάλυση με τη γλώσσα R" (in Greek, ISBN: 978-960-93-9445-1) published in Thessaloniki, 2017.

Comments