#-*- S -*- 

# Chapter 13   Multivariate Analysis

library(MASS)
library(mva)
postscript(file="ch13.ps", width=8, height=6, pointsize=9)
options(width=65, digits=5)

data(swiss)
swiss.x <- as.matrix(swiss[,-1])

# 13.1  Graphical methods

data(iris3)
ir <- rbind(iris3[,,1], iris3[,,2], iris3[,,3])
ir.species <- c(rep("s",50), rep("c",50), rep("v",50))
#if(interactive()) brush(ir)

ir.pca <- princomp(log(ir), cor=T)
ir.pca
summary(ir.pca)
plot(ir.pca)
loadings(ir.pca)
ir.pc <- predict(ir.pca)
eqscplot(ir.pc[,1:2], type="n", 
     xlab="first principal component", 
     ylab = "second principal component")
text(ir.pc[,1:2], labels=ir.species)

ir.scal <- cmdscale(dist(ir), k = 2, eig = T)
ir.scal$points[, 2] <- -ir.scal$points[, 2]
eqscplot(ir.scal$points, type="n")
text(ir.scal$points, labels=ir.species, cex=0.8)
distp <- dist(ir)
dist2 <- dist(ir.scal$points)
sum((distp - dist2)^2)/sum(distp^2)

ir.sam <- sammon(dist(ir[-143,]))
eqscplot(ir.sam$points, type="n")
text(ir.sam$points, labels=ir.species[-143], cex=0.8)

ir.iso <- isoMDS(dist(ir[-143,]))
eqscplot(ir.iso$points, type="n")
text(ir.iso$points, labels=ir.species[-143], cex=0.8)

if(F) {
ir.mst <- mstree(ir)
ir.mst$x <- -ir.mst$x; ir.mst$y <- -ir.mst$y
eqscplot(ir.mst$y, ir.mst$x, type="n")
text(ir.mst$y, ir.mst$x, labels=ir.species, cex=0.8)
}

data(fgl)
fgl.iso <- isoMDS(dist(as.matrix(fgl[-40, -10])))
eqscplot(fgl.iso$points, type="n", xlab="", ylab="")
text(fgl.iso$points, labels=c("F", "N", "V", "C", "T", "H")[fgl$type[-40]], cex=0.6)

data(state)
state <- state.x77[,2:7]
row.names(state) <- state.abb
biplot(princomp(state, cor=T), pc.biplot=T, cex = 0.7, expand=0.8)


# 13.2  Cluster analysis

h <- hclust(dist(swiss.x), method="single")
plot(h) # plclust(h)
#cutree(h, 3)
#plclust( clorder(h, cutree(h, 3) ))

if(F){
h <- hclust(dist(swiss.x), method="average")
initial <- tapply(swiss.x, list(rep(cutree(h, 3), 
   ncol(swiss.x)), col(swiss.x)), mean)
dimnames(initial) <- list(NULL, dimnames(swiss.x)[[2]])
}
km <- kmeans(swiss.x, 3) #initial)
swiss.pca <- princomp(swiss.x)
swiss.pca
swiss.px <- predict(swiss.pca)
dimnames(km$centers)[[2]] <- dimnames(swiss.x)[[2]]
swiss.centers <- predict(swiss.pca, km$centers)
eqscplot(swiss.px[, 1:2], type="n", 
   xlab="first principal component",
   ylab="second principal component")
text(swiss.px[,1:2], labels=km$cluster)
points(swiss.centers[,1:2], pch=3, cex=3)
#identify(swiss.px[, 1:2], cex=0.5)

if(F){
h <- mclust(swiss.x, method = "S*")$tree
plclust( clorder(h, cutree(h, 3) ))
h <- mclust(swiss.x, method = "S*", noise=T)
hclass <- mclass(h, 3)
hclass$class
mreloc(hclass, swiss.x, method = "S*", noise=T)
}

library(cluster)
swiss.pam <- pam(swiss.px, 3)
summary(swiss.pam)
eqscplot(swiss.px[, 1:2], type="n", 
   xlab="first principal component",
   ylab="second principal component")
text(swiss.px[,1:2], labels=swiss.pam$clustering)
points(swiss.pam$medoid[,1:2], pch=3, cex=3)
 
fanny(swiss.px, 3)

#par(mfrow=c(2,1))
#pltree(agnes(swiss.x, method="single"))
#pltree(diana(swiss.x))
#par(mfrow=c(1,1))

# 13.3  Discriminant analysis

ir.lda <- lda(log(ir), ir.species)
ir.lda
ir.ld <- predict(ir.lda, dimen=2)$x
eqscplot(ir.ld, type="n", xlab="first linear discriminant", 
     ylab="second linear discriminant")
text(ir.ld, labels=ir.species)

data(fgl)
par(mfrow=c(1,2))
fgl.lda <- lda(type ~ ., fgl)
fgl.ld <- predict(fgl.lda, dimen=2)$x
eqscplot(fgl.ld[, 2:1], type="n", xlab="LD2", ylab="LD1")
text(fgl.ld[, 2:1], labels=c("F", "N", "V", "C", "T", "H")
   [fgl$type[-40]], cex=0.6)
fgl.rlda <- lda(type ~ ., fgl, method="t")
fgl.rld <- predict(fgl.rlda, dimen=2)$x
eqscplot(fgl.rld[, 2:1], type="n", xlab="LD2", ylab="LD1")
text(fgl.rld[, 2:1], labels=c("F", "N", "V", "C", "T", "H")
   [fgl$type[-40]], cex=0.6)
par(mfrow=c(1,1))

cancor(swiss.x[,c(1,4,5)]/100, swiss.x[,c(2,3)]/100)

#Fisher's data for correspondence analysis:
#       fair red medium dark black
#  blue  326  38    241  110     3
# light  688 116    584  188     4
#medium  343  84    909  412    26
#  dark   98  48    403  681    85

# Note correction: 26 not 25.

#corresp(read.table("Fisher.dat"))
data(caith)
corresp(caith)


# 13.4  An example: Leptograpsus variegatus crabs

data(crabs)
lcrabs <- log(crabs[,4:8])
crabs.grp <- factor(c("B", "b", "O", "o")[rep(1:4, rep(50,4))])
lcrabs.pca <- princomp(lcrabs)
lcrabs.pc <- predict(lcrabs.pca)
dimnames(lcrabs.pc) <- list(NULL, paste("PC", 1:5, sep=""))
lcrabs.pca
loadings(lcrabs.pca)

cr.scale <- 0.5 * log(crabs$CL * crabs$CW)
slcrabs <- lcrabs - cr.scale
cr.means <- matrix(0, 2, 5)
cr.means[1,] <- apply(slcrabs[crabs$sex=="F",], 2, mean)
cr.means[2,] <- apply(slcrabs[crabs$sex=="M",], 2, mean)
dslcrabs <- slcrabs - cr.means[unclass(crabs$sex),]
lcrabs.sam <- sammon(dist(dslcrabs))
eqscplot(lcrabs.sam$points, type="n", xlab="", ylab="")
text(lcrabs.sam$points, labels=crabs.grp)

if(F) {
crabs.h <- cutree(hclust(dist(dslcrabs)),2)
table(crabs$sp, crabs.h)
cr.means[1,] <- apply(dslcrabs[crabs.h==1,], 2, mean)
cr.means[2,] <- apply(dslcrabs[crabs.h==2,], 2, mean)
crabs.km <- kmeans(dslcrabs, cr.means)
table(crabs$sp, crabs.km$cluster)
eqscplot(lcrabs.sam$points, type="n", xlab="", ylab="")
text(lcrabs.sam$points, labels=crabs.km$cluster)
table(crabs$sp, mreloc(crabs.h, dslcrabs))
}

dcrabs.lda <- lda(crabs$sex ~ FL + RW + CL + CW, lcrabs)
dcrabs.lda
dcrabs.pred <- predict(dcrabs.lda)
table(crabs$sex, dcrabs.pred$class)

dcrabs.lda4 <- lda(crabs.grp ~ FL + RW + CL + CW, lcrabs)
dcrabs.pr4 <- predict(dcrabs.lda4, dimen=2)
#
# next line changed as the ordering of levels in R depends on the locale!
dcrabs.pr2 <- dcrabs.pr4$post[, c("B", "O")] %*% c(1,1)
table(crabs$sex, dcrabs.pr2 > 0.5)

cr.t <- dcrabs.pr4$x[,1:2]
eqscplot(cr.t, type="n", xlab="First LD", ylab="Second LD")
text(cr.t, labels=crabs.grp)
perp <- function(x, y) {
    m <- (x+y)/2
    s <- - (x[1] - y[1])/(x[2] - y[2])
    abline(c(m[2] - s*m[1], s))
    invisible()
}
cr.m <- lda(cr.t, crabs$sex)$means
points(cr.m, pch=3, mkh=0.3)
perp(cr.m[1,], cr.m[2,])

cr.lda <- lda(cr.t, crabs.grp)
x <- seq(-6, 6, 0.25)
y <- seq(-2, 2, 0.25)
Xcon <- matrix(c(rep(x,length(y)), 
               rep(y, rep(length(x),length(y)))),,2)
cr.pr <- predict(cr.lda, Xcon)$post[, c("B", "O")] %*% c(1,1)
contour(x, y, matrix(cr.pr, length(x), length(y)),
   levels=0.5, labex=0, add=T, lty=3)

for(i in c("O", "o",  "B", "b")) print(var(lcrabs[crabs.grp==i, ]))


# 13.5  Factor analysis

if(F){
swiss.FA <- factanal(swiss.x, factors=2, method="mle")
swiss.FA
summary(swiss.FA)
factanal(swiss.x, factors=2, method="mle", 
    control=list(iter.max=100, unique.tol=1e-20))$uniq
A <- loadings(swiss.FA) %*% t(loadings(swiss.FA)) + 
            diag(swiss.FA$uniq)
round(cor(swiss.x) - A, 3)
rm(A)
swiss.FA1 <- factanal(swiss.x, method="mle")
swiss.FA1
summary(swiss.FA1)

#ability.cov <- matrix(scan(n=36),6,6)
#24.641   5.991  33.520  6.023  20.755  29.701
# 5.991   6.700  18.137  1.782   4.936   7.204
#33.520  18.137 149.831 19.424  31.430  50.753
# 6.023   1.782  19.424 12.711   4.757   9.075
#20.755   4.936  31.430  4.757  52.604  66.762
#29.701   7.204  50.753  9.075  66.762 135.292

dimnames(ability.cov) <- list(
c("general","picture","blocks","maze","reading","vocab"),
c("general","picture","blocks","maze","reading","vocab"))


ability.cl <- list(cov=ability.cov, center=rep(0,6), n.obs=112)
ability.FA <- factanal(covlist=ability.cl, method="mle")
ability.FA
ability.FA <- update(ability.FA, factors=2)
ability.FA
summary(ability.FA)
rotate(swiss.FA, rotation="promax")
rotate(loadings(swiss.FA), rotation="promax")
loadings(rotate(ability.FA, rotation="oblimin"))
par(pty="s")
L <- loadings(ability.FA)
eqscplot(L, xlim=c(0,1), ylim=c(0,1))
#identify(L, dimnames(L)[[1]])
oblirot <- rotate(loadings(ability.FA), rotation="oblimin")
naxes <- solve(oblirot$tmat)
arrows(rep(0,2), rep(0,2), naxes[,1], naxes[,2])
}

# =========================================

# Chapter 13 Complements


# 13.3

#caith <- read.table("Fisher.dat")
dimnames(caith)[[2]] <- c("F", "R", "M", "D", "B")
par(mfcol=c(1,3))
plot(corresp(caith, nf=2)); title("symmetric")
plot(corresp(caith, nf=2), type="rows"); title("rows")
plot(corresp(caith, nf=2), type="col"); title("columns")

data(farms)
par(mfrow=c(1,1))
farms.mca <- mca(farms, abbrev=T)  # Use levels as names
plot(farms.mca, cex=rep(0.7,2))


# 13.5
if(F){
loadings(rotate(ir.pca, n=2))

A <- loadings(ir.pca) %*% diag(ir.pca$sdev)
dimnames(A)[[2]] <- names(ir.pca$sdev)
B <- rotate(A[, 1:2], normalize=F)$rmat
print.loadings(B)
}

# End of ch13
