#-*- S -*-  

# Chapter 17   Classification

library(MASS)
library(nnet)
options(width=65, digits=5, height=9999)
library(tree)
library(modreg)

postscript(file="ch17.ps", width=8, height=6, pointsize=9)


# 17.2  A simple example

predplot <- function(object, main="", len=50, ...) 
{
   plot(Cushings[,1], Cushings[,2], log="xy", type="n", 
     xlab="Tetrahydrocortisone", ylab = "Pregnanetriol", main=main)
   text(Cushings[1:21,1], Cushings[1:21,2], 
      as.character(tp))
   text(Cushings[22:27,1], Cushings[22:27,2], "u")
   xp <- seq(0.6, 4.0, length=len)
   yp <- seq(-3.25, 2.45, length=len)
   cushT <- expand.grid(Tetrahydrocortisone=xp,
     Pregnanetriol=yp)
   Z <- predict(object, cushT, ...); zp <- unclass(Z$class)
   zp <- Z$post[,3] - pmax(Z$post[,2], Z$post[,1])
   contour(exp(xp), exp(yp), matrix(zp, len),
     add=T, levels=0, labex=0)
   zp <- Z$post[,1] - pmax(Z$post[,2], Z$post[,3])
   contour(exp(xp), exp(yp), matrix(zp, len),
     add=T, levels=0, labex=0)
   invisible()
}
data(Cushings)
cush <- log(as.matrix(Cushings[, -3]))
tp <- factor(Cushings$Type[1:21])
cush.lda <- lda(cush[1:21,], tp); predplot(cush.lda, "LDA")
cush.qda <- qda(cush[1:21,], tp); predplot(cush.qda, "QDA")
predplot(cush.qda, "QDA (predictive)", method="predictive")
predplot(cush.qda, "QDA (debiased)", method="debiased")


Cf <- data.frame(tp = tp, 
   Tetrahydrocortisone = log(Cushings[1:21,1]),
   Pregnanetriol = log(Cushings[1:21,2]) )
cush.multinom <- multinom(tp ~ Tetrahydrocortisone 
   + Pregnanetriol, Cf, maxit=250)
xp <- seq(0.6, 4.0, length=100); np <- length(xp)
yp <- seq(-3.25, 2.45, length=100)
cushT <- expand.grid(Tetrahydrocortisone=xp,
     Pregnanetriol=yp)
Z <- predict(cush.multinom, cushT, type="probs")
plot(Cushings[,1], Cushings[,2], log="xy", type="n", 
  xlab="Tetrahydrocortisone", ylab = "Pregnanetriol")
text(Cushings[1:21,1], Cushings[1:21,2], as.character(tp))
text(Cushings[22:27,1], Cushings[22:27,2], "u")
zp <- Z[,3] - pmax(Z[,2], Z[,1])
contour(exp(xp), exp(yp), matrix(zp, np), 
   add=T, levels=0, labex=0)
zp <- Z[,1] - pmax(Z[,2], Z[,3])
contour(exp(xp), exp(yp), matrix(zp, np),
   add=T, levels=0, labex=0)

cush.tr <- tree(tp ~ Tetrahydrocortisone + Pregnanetriol, Cf)
plot(cush[,1], cush[,2], type="n", 
  xlab="Tetrahydrocortisone", ylab = "Pregnanetriol")
text(cush[1:21,1], cush[1:21,2], as.character(tp))
text(cush[22:27,1], cush[22:27,2], "u")
par(cex=1.5); partition.tree(cush.tr, add=T); par(cex=1)

cush <- cush[1:21,]; tpi <- class.ind(tp)

plt <- function(main, ...) {
   plot(Cushings[,1], Cushings[,2], log="xy", type="n", 
   xlab="Tetrahydrocortisone", ylab = "Pregnanetriol", main=main, ...)
   for(il in 1:4) {
       set <- Cushings$Type==levels(Cushings$Type)[il]
       text(Cushings[set, 1], Cushings[set, 2], 
          as.character(Cushings$Type[set]), col = 2 + il) }
}

plt.bndry <- function(size=0, decay=0, ...) 
{
   cush.nn <- nnet(cush, tpi, skip=T, softmax=T, size=size, 
      decay=decay, maxit=1000)
   invisible(b1(predict(cush.nn, cushT), ...))
}

b1 <- function(Z, ...) 
{
   zp <- Z[,3] - pmax(Z[,2], Z[,1])
   contour(exp(xp), exp(yp), matrix(zp, np),
      add=T, levels=0, labex=0, ...)
   zp <- Z[,1] - pmax(Z[,3], Z[,2])
   contour(exp(xp), exp(yp), matrix(zp, np),
      add=T, levels=0, labex=0, ...)
}
set.seed <- function(x) {}
par(mfrow=c(2,2))
plt("Size = 2")
set.seed(1); plt.bndry(size=2, col=2)
set.seed(3); plt.bndry(size=2, col=3); plt.bndry(size=2, col=4)

plt("Size = 2, lambda = 0.001")
set.seed(1); plt.bndry(size=2, decay=0.001, col=2)
set.seed(2); plt.bndry(size=0, decay=0.001, col=4)

plt("Size = 2, lambda = 0.01")
set.seed(1); plt.bndry(size=2, decay=0.01, col=2)
set.seed(2); plt.bndry(size=2, decay=0.01, col=4)

plt("Size = 5, 20  lambda = 0.01")
set.seed(2); plt.bndry(size=5, decay=0.01, col=1)
set.seed(2); plt.bndry(size=20, decay=0.01, col=2)


plt("Many local maxima")
Z <- matrix(0, nrow(cushT), ncol(tpi))
for(iter in 1:20) {
    set.seed(iter)
    cush.nn <- nnet(cush, tpi,  skip=T, softmax=T, size=3,   
        decay=0.01, maxit=1000, trace=F)
    Z <- Z + predict(cush.nn, cushT)
    cat("final value", format(round(cush.nn$value,3)), "\n")
    b1(predict(cush.nn, cushT), col=2, lwd=0.5)
}
plt("Averaged")
b1(Z, lwd=3)

library(class)
par(pty="s", mfrow=c(1,2))
plot(Cushings[,1], Cushings[,2], log="xy", type="n", 
   xlab="Tetrahydrocortisone", ylab = "Pregnanetriol", main="1-NN")
text(Cushings[1:21,1], Cushings[1:21,2], as.character(tp))
text(Cushings[22:27,1], Cushings[22:27,2], "u")
Z <- knn(scale(cush, F, c(3.4, 5.7)), 
         scale(cushT, F, c(3.4, 5.7)), tp)
contour(exp(xp), exp(yp), matrix(as.numeric(Z=="a"), np),
      add=T, levels=0.5, labex=0)
contour(exp(xp), exp(yp), matrix(as.numeric(Z=="c"), np),
      add=T, levels=0.5, labex=0)
plot(Cushings[,1], Cushings[,2], log="xy", type="n", 
   xlab="Tetrahydrocortisone", ylab = "Pregnanetriol", main="3-NN")
text(Cushings[1:21,1], Cushings[1:21,2], as.character(tp))
text(Cushings[22:27,1], Cushings[22:27,2], "u")
Z <- knn(scale(cush, F, c(3.4, 5.7)), 
         scale(cushT, F, c(3.4, 5.7)), tp, k=3)
contour(exp(xp), exp(yp), matrix(as.numeric(Z=="a"), np),
      add=T, levels=0.5, labex=0)
contour(exp(xp), exp(yp), matrix(as.numeric(Z=="c"), np),
      add=T, levels=0.5, labex=0)


# 17.3  Forensic glass

data(fgl)
set.seed(123); rand <- sample (10, 214, replace=T)
con <- function(x,y)
{
   tab <- table(x,y)
   print(tab)
   diag(tab) <- 0
   cat("error rate = ", round(100*sum(tab)/length(x),2),"%\n")
   invisible()
}
CVtest <- function(fitfn, predfn, ...)
{
  res <- fgl$type
  for (i in sort(unique(rand))) {
     cat("fold ",i,"\n", sep="")
     learn <- fitfn(rand != i, ...)
     res[rand == i] <- predfn(learn, rand==i)
  }
  res
}
res.multinom <- CVtest(
   function(x, ...) multinom(type ~ ., fgl[x,], ...),
   function(obj, x) predict(obj, fgl[x, ],type="class"),
   maxit=1000, trace=F )
con(fgl$type, res.multinom)
res.lda <- CVtest(
   function(x, ...) lda(type ~ ., fgl[x, ], ...),
   function(obj, x) predict(obj, fgl[x, ])$class )
con(fgl$type, res.lda)

library(class)
fgl0 <- fgl[ ,-10] # drop type
{ res <- fgl$type
  for (i in sort(unique(rand))) {
     cat("fold ",i,"\n", sep="")
     sub <- rand == i
     res[sub] <- knn(fgl0[!sub, ], fgl0[sub,], fgl$type[!sub], 
                     k=1)
  }
  res } -> res.knn1
con(fgl$type, res.knn1)

res.lb <- knn(fgl0, fgl0, fgl$type, k=3, prob=T, use.all=F)
table(attr(res.lb, "prob"))

set.seed(123)
res.tree <- CVtest(
   function(x, ...) {
     tr <- tree(type ~ ., fgl[x,], ...)
     assign("x", x, envir=.GlobalEnv)
     r <- cv.tree(tr,, prune.misclass)
     rmin <- max(seq(along=r$dev)[r$dev < min(r$dev)+9])
     cat("size chosen was", r$size[rmin], "\n")
     prune.misclass(tr, k = r$k[rmin])
   },
   function(obj, x) predict(obj, fgl[x, ], type="class")
)
con(fgl$type, res.tree)


CVprobs <- function(fitfn, predfn, ...)
{
  res <- matrix(, 214, 6)
  for (i in sort(unique(rand))) {
     cat("fold ",i,"\n", sep="")
     learn <- fitfn(rand != i, ...)
     res[rand == i,] <- predfn(learn, rand==i)
  }
  res
}

set.seed(123)
probs.multinom <- CVprobs(
   function(x, ...) multinom(type ~ ., fgl[x,], ...),
   function(obj, x) predict(obj, fgl[x, ],type="probs"),
   maxit=1000, trace=F )

probs.yes <- as.vector(class.ind(fgl$type))
probs <- as.vector(probs.multinom)
par(pty="s")
plot(c(0,1), c(0,1), type="n", xlab="predicted probability", 
   ylab="", xaxs="i", yaxs="i", las=1)
rug(probs[probs.yes==0], 0.02, side=1, lwd=0.5)
rug(probs[probs.yes==1], 0.02, side=3, lwd=0.5)
abline(0,1)
newp <- seq(0, 1, length=100)
lines(newp, predict(loess(probs.yes ~ probs, span=1), newp))


# =========================================

# Chapter 17 Complements

# 17.3  Forensic glass

# Neural networks  ---------------------------------------------

library(nnet)
fgl1 <- lapply(fgl[, 1:9], function(x)
               {r <- range(x); (x-r[1])/diff(r)})
fgl1 <- data.frame(fgl1, type=fgl$type)

res.multinom <- CVtest(
   function(x, ...) multinom(type ~ ., fgl1[x,], ...),
   function(obj, x) predict(obj, fgl1[x, ],type="class"),
   maxit=1000, trace=F)
con(fgl$type, res.multinom)

res.mult2 <- CVtest(
   function(x, ...) multinom(type ~ ., fgl1[x,], ...),
   function(obj, x) predict(obj, fgl1[x, ], type="class"),
   maxit=1000, trace=F, decay=1e-3)
con(fgl$type, res.mult2)

res.mult3 <- CVtest(
   function(xsamp, ...) {
     assign("xsamp", xsamp, envir=.GlobalEnv)
     obj <- multinom(type ~ ., fgl1[xsamp,], trace=F, ...)
     stepAIC(obj)
     },
   function(obj, x) predict(obj, fgl1[x, ],type="class"),
   maxit=1000, decay=1e-3)
con(fgl$type, res.mult3)

res.nn <- CVtest(
   function(x, ...) nnet(type ~ ., fgl1[x,], ...),
   function(obj, x) predict(obj, fgl1[x, ], type="class"),
   maxit=1000, size=6, decay=0.01, trace=F )
con(fgl$type, res.nn)

CVnn <- function(nreps=1, ...)
{
  res <-  matrix(0, 214, 6)
  dimnames(res) <- list(NULL, levels(fgl$type))
  for (i in sort(unique(rand))) {
    cat("fold ",i,"\n", sep="")
    for(rep in 1:nreps) {
      learn <- nnet(type ~ ., fgl1[rand !=i,], trace=F, ...)
      res[rand == i,] <- res[rand == i,] + 
        predict(learn, fgl1[rand==i,])
    }
  }
  max.col(res/nreps)
}
res.nn <- CVnn(maxit=1000, size=6, decay=0.01)
con(fgl$type, res.nn)

CVnn2 <- function(formula, data, 
                  size = rep(6,2), lambda = c(0.001, 0.01),
                  nreps = 1, nifold = 5, verbose = 99, ...)
{
  CVnn1 <- function(formula, data, nreps=1, ri, verbose,  ...)
  {
    truth <- data[,deparse(formula[[2]])]
    res <-  matrix(0, nrow(data), length(levels(truth)))
    if(verbose > 20) cat("  inner fold")
    for (i in sort(unique(ri))) {
      if(verbose > 20) cat(" ", i,  sep="")
      for(rep in 1:nreps) {
        learn <- nnet(formula, data[ri !=i,], trace=F, ...)
        res[ri == i,] <- res[ri == i,] + 
          predict(learn, data[ri == i,])
      }
   }
    if(verbose > 20) cat("\n")
    sum(unclass(truth) != max.col(res/nreps))
  }
  truth <- data[,deparse(formula[[2]])]  
  res <-  matrix(0, nrow(data), length(levels(truth)))
  choice <- numeric(length(lambda))
  for (i in sort(unique(rand))) {
    if(verbose > 0) cat("fold ", i,"\n", sep="")
    ri <- sample(nifold, sum(rand!=i), replace=T)
    for(j in seq(along=lambda)) {
      if(verbose > 10)
        cat("  size =", size[j], "decay =", lambda[j], "\n")
      choice[j] <- CVnn1(formula, data[rand != i,], nreps=nreps, 
                          ri=ri, size=size[j], decay=lambda[j],
                          verbose=verbose, ...)
    }
    decay <- lambda[which.is.max(-choice)]
    csize <- size[which.is.max(-choice)]
    if(verbose > 5) cat("  #errors:", choice, "  ")
    if(verbose > 1) cat("chosen size = ", csize,
                        " decay = ", decay, "\n", sep="")
    for(rep in 1:nreps) {
      learn <- nnet(formula, data[rand != i,], trace=F,
                    size=csize, decay=decay, ...)
      res[rand == i,] <- res[rand == i,] + 
          predict(learn, data[rand == i,])
    }
  }
  factor(levels(truth)[max.col(res/nreps)],
         levels = levels(truth))
}

if(F){
# only do these if you have a fast, large machine
  res.nn2 <- CVnn2(type ~ ., fgl1, skip=T, maxit=500, nreps=10)
  con(fgl$type, res.nn2)
}

# Learning vector quantization -----------------------------------

library(class)
cd0 <- lvqinit(fgl0, fgl$type, prior=rep(1,6)/6,k=3)
cd1 <- olvq1(fgl0, fgl$type, cd0)
con(fgl$type, lvqtest(cd1, fgl0))

CV.lvq <- function()
{
  res <- fgl$type
  for(i in sort(unique(rand))) {
    cat("doing fold",i,"\n")
    cd0 <- lvqinit(fgl0[rand != i,], fgl$type[rand != i],
                   prior=rep(1,6)/6, k=3)
    cd1 <- olvq1(fgl0[rand != i,], fgl$type[rand != i], cd0)
    cd1 <- lvq3(fgl0[rand != i,], fgl$type[rand != i], 
                cd1, niter=10000)
    res[rand == i] <- lvqtest(cd1, fgl0[rand == i,])
  }
  res
}
con(fgl$type, CV.lvq())

library(mva)
fgl0 <- scale(princomp(fgl[,-10])$scores)
con(fgl$type, CV.lvq())


# Additive and tensor-spline models -----------------------------

#    Least-squares fitting to indicator functions

if(F){
library(mda); library(nnet)
levs <- levels(fgl$type)
fgl.bruto <- bruto(fgl[, 1:9], class.ind(fgl$type))
bruto.class <- max.col(predict(fgl.bruto, as.matrix(fgl[, 1:9])))
con(fgl$type, factor(levs[bruto.class], levels=levs))


res.bruto <- CVtest(
   function(xsamp, ...)
     bruto(fgl[xsamp, 1:9], class.ind(fgl$type)[xsamp,], ...),
   function(obj, x)
     factor(levs[max.col(predict(obj, 
                 as.matrix(fgl[x, 1:9])))], levels=levs)
)
con(fgl$type, res.bruto)


res.mars <- CVtest(
   function(xsamp, ...)
     mars(as.matrix(fgl[xsamp, 1:9]),
          class.ind(fgl$type)[xsamp,], ...),
   function(obj, x)
     factor(levs[max.col(predict(obj, 
                 as.matrix(fgl[x, 1:9])))], levels=levs)
)
con(fgl$type, res.mars)


res.mars2 <- CVtest(degree=2,
   function(xsamp, ...)
     mars(as.matrix(fgl[xsamp, 1:9]),
          class.ind(fgl$type)[xsamp,], ...),
   function(obj, x)
     factor(levs[max.col(predict(obj, 
                 as.matrix(fgl[x, 1:9])))], levels=levs)
)
con(fgl$type, res.mars2)


res.mars9 <- CVtest(degree=9,
   function(xsamp, ...)
     mars(as.matrix(fgl[xsamp, 1:9]),
          class.ind(fgl$type)[xsamp,], ...),
   function(obj, x)
     factor(levs[max.col(predict(obj, 
                 as.matrix(fgl[x, 1:9])))], levels=levs)
)
con(fgl$type, res.mars9)

#   Prediction followed by linear discriminant analysis

library(mda)
res.bruto.fda <- CVtest(method=bruto,
   function(xsamp, ...)
     fda(type ~ ., data=fgl[xsamp,], ...),
   function(obj, x) predict(obj, fgl[x, ])
)
con(fgl$type, res.bruto.fda)

res.mars.fda <- CVtest(method=mars,
   function(xsamp, ...)
     fda(type ~ ., data=fgl[xsamp,], ...),
   function(obj, x) predict(obj, fgl[x, ])
)
con(fgl$type, res.mars.fda)

res.mars2.fda <- CVtest(method=mars, degree=2,
   function(xsamp, ...)
     fda(type ~ ., data=fgl[xsamp,], ...),
   function(obj, x) predict(obj, fgl[x, ])
)
con(fgl$type, res.mars2.fda)

res.mars9.fda <- CVtest(method=mars, degree=9,
   function(xsamp, ...)
     fda(type ~ ., data=fgl[xsamp,], ...),
   function(obj, x) predict(obj, fgl[x, ])
)
con(fgl$type, res.mars9.fda)
}

#    Non-linear logistic discrimination

if(F){
if(platform()=="WIN386") library(polyclas) else library(polyclass)

res.polycl <- CVtest(maxdim=50,
   function(xsamp, ...)
     poly.fit(unclass(fgl$type)[xsamp],
              as.matrix(fgl[xsamp, 1:9]), ...),
   function(obj, x)
     factor(levs[max.col(ppoly(fit=obj,
                         cov=as.matrix(fgl[x, 1:9])))],
            levels=levs)
)
con(fgl$type, res.polycl)

res.polycl2 <- CVtest(maxdim=50, cv=10, seed=123,
   function(xsamp, ...)
     poly.fit(unclass(fgl$type)[xsamp],
              as.matrix(fgl[xsamp, 1:9]), ...),
   function(obj, x)
     factor(levs[max.col(ppoly(fit=obj,
                         cov=as.matrix(fgl[x, 1:9])))],
            levels=levs)
)
con(fgl$type, res.polycl2)

# Mixture discriminant analysis  ----------------------------------

library(class) # needed for the initialization
res.mda <- CVtest(subclasses=c(6,6,3,3,2,4),
   function(xsamp, ...)
     mda(type ~ ., data=fgl[xsamp,], ...),
   function(obj, x) predict(obj, fgl[x, ])
)
con(fgl$type, res.mda)
}

# End of ch17



