#-*- S -*-

# Chapter 5   Distributions and Data Summaries


# needs R --vsize 30M for the bootstrap runs


# for later use, from section 5.6
test.t.2 <- function(d) {
# ttest is function(x) mean(x)/sqrt(var(x)/length(x))
    binary.v <- function(x, digits) {
       if(missing(digits)) {
           mx <- max(x)
           digits <- if(mx > 0) 1 + floor(log(mx, base = 2)) else 1
       }
       ans <- 0:(digits - 1)
       lx <- length(x)
       x <- rep(x, rep(digits, lx))
       x <- (x %/% 2^ans) %% 2
       dim(x) <- c(digits, lx)
       x
    }
    digits <- length(d)
    n <- 2^digits
    x <- d * 2 * (binary.v(1:n, digits) - 0.5)
    mx <- matrix(1/digits, 1, digits) %*% x
    s <- matrix(1/(digits - 1), 1, digits)
    vx <- s %*% (x - matrix(mx, digits, n, byrow=T))^2
    as.vector(mx/sqrt(vx/digits))
}
library(MASS)
options(width=65, digits=5, height=9999)
postscript(file="ch05.ps", width=8, height=6, pointsize=9)

rm(A, B) # precautionary clear-out
data(shoes)
attach(shoes)
tperm <- test.t.2(B-A)  # see section 5.6
detach()


# 5.1  Probability distributions

x <- rt(250, 9)
qqnorm(x); qqline(x)

#qqmath(~ x, distribution=qnorm, aspect="xy",
#   prepanel = prepanel.qqmathline,
#   panel = function(x, y, ...) {
#      panel.qqmathline(y, distribution=qnorm, ...)
#      panel.qqmath(x, y, ...)
#   },
#   xlab= "Quantiles of Standard Normal"
#)


# 5.2  Generating random data

contam <- rnorm( 100, 0, (1 + 2*rbinom(100, 1, 0.05)) )


# 5.3  Data summaries

data(faithful)
data(chem)
data(abbey)
par(mfrow=c(2,3))
hist.scott(faithful$eruptions, xlab="eruptions")
hist.scott(chem)
hist.scott(tperm)
hist.FD(faithful$eruptions, xlab="eruptions")
hist.FD(chem)
hist.FD(tperm)
par(mfrow=c(1,1))

data(swiss)
swiss.fertility <- swiss[, 1]
stem(swiss.fertility)
stem(chem)
stem(abbey)
stem(abbey, scale=0.4) # different in R

par(mfrow=c(1,2))
boxplot(chem, sub="chem", range=0.5)
boxplot(abbey, sub="abbey")
par(mfrow=c(1,1))
# fgl.df is from Chapter 3
data(fgl)
fgl0 <- fgl[ ,-10] # omit type.
fgl.df <- data.frame(type = rep(fgl$type, 9),
   y = as.vector(as.matrix(fgl0)),
   meas = factor(rep(1:9, rep(214,9)), labels=names(fgl0)))
#bwplot(type ~ y | meas, data=fgl.df, scales=list(x="free"),
#   strip=function(...) strip.default(..., style=1), xlab="")


# 5.4  Classical univariate statistics

library(ctest)
attach(shoes)
t.test(A, mu=10)
t.test(A)$conf.int
wilcox.test(A, mu=10)
var.test(A, B)
t.test(A, B)
t.test(A, B, var.equal=F)
wilcox.test(A, B)
t.test(A, B, paired=T)
wilcox.test(A, B, paired=T)
detach()
detach("package:ctest")

par(mfrow=c(1,2))
truehist(tperm, xlab="diff")
x <- seq(-4,4, 0.1)
lines(x, dt(x,9))
#cdf.compare(tperm, distribution="t", df=9)
sres <- c(sort(tperm), 4)
yres <- (0:1024)/1024
plot(sres, yres, type="S", xlab="diff", ylab="")
lines(x, pt(x,9), lty=3)
legend(-5, 1.05, c("Permutation dsn","t_9 cdf"), lty=c(1,3))
par(mfrow=c(1,1))


# 5.5  Density estimation

attach(faithful)
par(mfrow=c(2,3))
truehist(eruptions, h=0.5, x0=0.0, xlim=c(1, 6), ymax=0.6)
truehist(eruptions, h=0.5, x0=0.1, xlim=c(1, 6), ymax=0.6)
truehist(eruptions, h=0.5, x0=0.2, xlim=c(1, 6), ymax=0.6)
truehist(eruptions, h=0.5, x0=0.3, xlim=c(1, 6), ymax=0.6)
truehist(eruptions, h=0.5, x0=0.4, xlim=c(1, 6), ymax=0.6)

breaks <- seq(1, 6.4, 0.1)
counts <- numeric(length(breaks))
for(i in (0:4)) counts[i+(1:50)] <- counts[i+(1:50)] +
    rep(hist(eruptions, breaks=0.1*i + seq(1, 6, 0.5),
    prob=T, plot=F)$intensities, rep(5,10))
plot(breaks+0.05, counts/5, type="l", xlab="eruptions",
    ylab="averaged", bty="n", xlim=c(1, 6), ylim=c(0, 0.6))
detach()

if(F){
dplot <- function(...)
{
   densityplot(~ tperm, ...,
      ylim = c(0, 0.45), from = -5, to = 5, n = 200,
      panel = function(x, y, ...) {
         ourpanel.densityplot(x, y, ...)
         x1 <- seq(-5, 5, 0.1)
         panel.xyplot(x1, dt(x1, 9), lty=2, type="l")
      }
   )
}

ourpanel.densityplot <- function(x, y, type = "l", n = 50,
   window = "gaussian", width = NULL, from = NULL, to = NULL,
   cut = NULL, ...)
{
    denargs <- list(x = x, n = n, window = window)
    if(!is.null(width)) denargs <- c(denargs, list(width = width))
    if(!is.null(from)) denargs <- c(denargs, list(from = from))
    if(!is.null(to)) denargs <- c(denargs, list(to = to))
    if(!is.null(cut)) denargs <- c(denargs, list(cut = cut))
    d <- do.call("density", denargs)
    panel.xyplot(d$x, d$y, type = type, ...)
    rug(jitter(x))
}
print(dplot(sub=list("default",cex=1.5)),
    split=c(1,2,2,2), more=T)
print(dplot(sub=list("width=0.2",cex=1.5), width=0.2),
    split=c(2,2,2,2), more=T)
print(dplot(sub=list("width=0.5",cex=1.5) ,width=0.5),
    split=c(1,1,2,2), more=T)
print(dplot(sub=list("width=1.5",cex=1.5), width=1.5),
    split=c(2,1,2,2), more=F)
}

attach(faithful)
par(mfrow=c(2,2))
truehist(eruptions, nbins=15, xlim=c(1,6), ymax=0.8)
lines(density(eruptions, n=200))
bandwidth.nrd(eruptions)
lines(density(eruptions, width=1.6, n=200), lty=3)
bandwidth.nrd(tperm)
c( ucv(eruptions),  bcv(eruptions) )
truehist(eruptions, nbins=15, xlim=c(1,6), ymax=0.8)
lines(density(eruptions, width=0.41, n=200))
lines(density(eruptions, width=0.63, n=200), lty=3)
c(width.SJ(eruptions, method="dpi"), width.SJ(eruptions))
c( ucv(tperm), bcv(tperm), width.SJ(tperm) )
par(mfrow=c(1,1))

attach(faithful)
plot(eruptions, waiting, xlim=c(1,6), ylim=c(40,100))
f1 <- kde2d(eruptions, waiting, n=50, lims=c(1,6,40,100))
image(f1, zlim = c(0, 0.045))
#levelplot(z ~ x*y, con2tr(f1),
#   xlab="duration", ylab="waiting",
#   at = seq(0, 0.045, 0.001), colorkey=F,
#   col.regions = rev(trellis.par.get("regions")$col))
f2 <- kde2d(eruptions, waiting, n=50, lims=c(1,6,40,100),
   h = c(width.SJ(eruptions), width.SJ(waiting)) )
#levelplot(z ~ x*y, con2tr(f2),
#   xlab="duration", ylab="waiting",
#   at = seq(0, 0.045, 0.001), colorkey=F,
#   col.regions = rev(trellis.par.get("regions")$col))
#wireframe(z ~ x*y, con2tr(f2),
#   aspect = c(1, 0.5), screen=list(z=20, x=-60), zoom=1.2)
image(f2, zlim = c(0, 0.045))
persp(f2, phi=30, theta=20, d=5)

plot(eruptions[-272], eruptions[-1], xlim=c(1,6), ylim=c(40,100))
f1 <- kde2d(eruptions[-272], eruptions[-1],
   h=rep(1.5, 2), n=50, lims=c(1,6,1,6))
contour(f1 ,xlab="previous duration",
    ylab="duration", levels = c(0.05, 0.1, 0.2, 0.4) )
f1 <- kde2d(eruptions[-272], eruptions[-1],
   h=rep(0.6, 2), n=50, lims=c(1,6,1,6))
contour(f1 ,xlab="previous duration",
    ylab="duration", levels = c(0.05, 0.1, 0.2, 0.4) )
f1 <- kde2d(eruptions[-272], eruptions[-1],
   h=rep(0.4, 2), n=50, lims=c(1,6,1,6))
contour(f1 ,xlab="previous duration",
    ylab="duration", levels = c(0.05, 0.1, 0.2, 0.4) )


# 5.6  Bootstrap and permutation methods

density(eruptions, n=1, from=4, to=4.01, width=0.41)$y
density(eruptions, n=1, from=4, to=4.01, width=0.63)$y
1/(2*sqrt(length(eruptions))*0.415)

#set.seed(101)
m <- 1000
res <- numeric(m)
for (i in 1:m) res[i] <- median(sample(eruptions, replace=T))
bootres <- res # for future use
mean(res - median(eruptions))
sqrt(var(res))
truehist(res, nbins=nclass.FD(res))
lines(density(res, n=200, width=bandwidth.nrd(res)))
c( ucv(res), bcv(res) )
c( width.SJ(res, lower=0.001), width.SJ(res, method="dpi") )
quantile(res, c(0.025, 0.975))

if(F){
if(version$major >= 4) {
  erupt.boot <- bootstrap(eruptions, median, seed=101, B=1000)
  print(summary(erupt.boot))
  print(jack.after.bootstrap(erupt.boot, "Bias"))
  print(jack.after.bootstrap(erupt.boot, "SE"))
}
}

attach(shoes)
d <- B - A
ttest <- function(x) mean(x)/sqrt(var(x)/length(x))
n <- 1000
res <- numeric(n)
for(i in 1:n) res[i] <- ttest(x <- d*sign(runif(10)-0.5))

test.t.1 <- function(d) {
   binary <- function(x, digits = if(x > 0) 1 +
                  floor(log(x, base = 2))    else 1)
   { ans <- 0:(digits - 1); (x %/% 2^ans) %% 2 }
   digits <- length(d)
   n <- 2^digits
   perm.res <- numeric(n)
   for(i in 1:n) {
      x <- d * 2 * (binary(i, digits = digits) - 0.5)
      perm.res[i] <- ttest(x)
   }
   perm.res
}
#tperm <- test.t.1(B-A)


# =========================================

# Chapter 5 Complements


# 5.5  Density estimation

library(logspline)

par(mfrow=c(2,2))
attach(faithful)
faithful.ls <- logspline.fit(eruptions, lbound=0)
x <- seq(1, 6, len=200)
truehist(eruptions, nbins=15, xlim=c(1,6), ymax=1.0)
lines(x, dlogspline(x, faithful.ls))
detach()

par(mfrow=c(1,3), pty="s")

truehist(tperm, xlab="diff")
tperm.ls <- logspline.fit(tperm)
x <- seq(-5, 5, len=200)
lines(x, dlogspline(x, tperm.ls))

sres <- c(sort(tperm), 5); yres <- (0:1024)/1024
plot(sres, yres, type="S", xlab="diff", ylab="cdf")
lines(x, plogspline(x, tperm.ls))

x <- c(0.0005, seq(0.001, 0.999, 0.001), 0.9995)
plot( qt(x, 9), qlogspline(x, tperm.ls),
      xlab="Quantiles of t on 9 df", ylab="Fitted quantiles",
      type="l", xlim=c(-5, 5), ylim=c(-5, 5))
points( qt(ppoints(tperm), 9), sort(tperm) )

par(mfrow=c(1,1), pty="m")
res <- bootres  # retrieve the values
truehist(res, nbins=nclass.FD(res), ymax=20)
x <- seq(3.7, 4.2, len=1000)
res.ls <- logspline.fit(res)
lines(x, dlogspline(x, res.ls))
points(res.ls$knots, dlogspline(res.ls$knots, res.ls))
res.ls <- logspline.fit(res, penalty=2)
lines(x, dlogspline(x, res.ls), lty=3)
points(res.ls$knots, dlogspline(res.ls$knots, res.ls))

data(galaxies)
x <- seq(8000, 35000, 200)
plot(x, dlogspline(x, logspline.fit(galaxies)), type="l",
     xlab="velocity of galaxy", ylab="density")
lines(density(galaxies, n=200, window="gaussian",
      width=width.SJ(galaxies)), lty=3)
detach("package:logspline")

library(locfit)
faithful.lf <- locfit(~ eruptions, data=faithful, flim=c(1,6))
plot(faithful.lf, get.data=T, mpv=200, ylim=c(0,1))

faithful.lf1 <- locfit(~ eruptions, data=faithful, flim=c(1,6),
                        alpha=c(0.15, 0.9))
lines(faithful.lf1, m=200, lty=3)

# defer 2D plot until 1D is completed.

akaike <- function(formula, alpha, pen=2, ...)
{
  m <- nrow(alpha); ll <- numeric(m); vr <- numeric(m)
  for(i in 1:m) {
    fit <- locfit(formula, alpha=alpha[i,], ...)
    ll[i] <- fit$dp["lk"]; vr[i] <- fit$dp["t0"]
  }
  cbind(alpha=alpha, LogLik=ll, df=vr, AIC=-2*ll+pen*vr)
}
attach(faithful)
akaike( ~ eruptions,
       alpha = cbind(0, 2.5 * seq( 0.1, 0.6, by = 0.05)),
       ev = "data", kern = "gauss")
fit <- locfit(~ eruptions, alpha = c(0, 1.2), flim = c(1, 6),
        kern = "gauss", ev = "grid", mg = 200)
lines(fit, m=200, lty=2)

erupt.bin <- data.frame(duration=seq(1.6, 5.1, by=0.05),
    count=hist(eruptions, breaks=seq(1.575, 5.125, by=0.05),
               plot=F)$counts)
fit2 <- locfit(count ~ duration, data=erupt.bin,
               weights=rep(272*0.05, 71),
               alpha=c(0, 0, 2), family="poisson")
lines(fit2, m=200, lty=4)
detach()

plot(locfit(~ eruptions+waiting, data=faithful, alpha=0.25,
     scale=c(1,10)), type="persp")

# galaxies

data(galaxies)
plot(locfit(~galaxies, flim=c(8000, 35000)),
     get.data=T, ylim=c(0, 0.0003), mpv=200)

akaike( ~ galaxies,
       alpha=cbind(seq( 0.15, 0.7, 0.05), 0),
       ev="data", kern="gauss")
fit <- locfit(~ galaxies, alpha=0.45, flim=c(8000, 35000),
        kern="gauss", ev="grid", mg=200)
lines(fit, m=200, lty=2)

galaxies.bin <- data.frame(velocity=seq(8000, 35000, 500),
    count=hist(galaxies, breaks=seq(7750, 35250, 500),
               plot=F)$counts)
fit2 <- locfit(count ~ velocity, data=galaxies.bin,
               weights=rep(82*500, nrow(galaxies.bin)),
               alpha=c(0, 0, 2), family="poisson")
lines(fit2, m=200, lty=3)
detach("package:locfit")


# 5.6  Bootstrap and permutation methods

library(boot)
attach(faithful)
#set.seed(101)
erupt.boot <- boot(eruptions, function(x,i) median(x[i]), R=1000)
erupt.boot
boot.ci(erupt.boot, conf=c(0.90, 0.95),
          type=c("norm","basic","perc","bca"))
detach()

library(nls)
data(stormer)
storm.fm <- nls(Time ~ b*Viscosity/(Wt - c), stormer,
		start = c(b=29.401, c=2.2183))
storm.bf <- function(rs, ind) {
    assign("Tim2", fitted(storm.fm) + rs[ind], envir=.GlobalEnv)
    tmp <- nls(Tim2 ~ (b * Viscosity)/(Wt - c), stormer,
               start = coef(storm.fm))
    tmp$m$getAllPars()
}
rs <- scale(resid(storm.fm), scale = FALSE)
storm.boot <- boot(rs, storm.bf, R = 1000)
storm.boot

boot.ci(storm.boot, index=1,
	type=c("norm", "basic", "perc", "bca"))
boot.ci(storm.boot, index=2,
	type=c("norm", "basic", "perc", "bca"))
detach("package:boot")

#if(platform()!="WIN386") library(bootstrap, first=T)  else
#library(bootstra, first=T)  # In case 4.0 or later is in use

library(bootstrap)

attach(faithful)
#set.seed(101)
erupt.boot <- bootstrap(eruptions, 1000, median)
mean(erupt.boot$thetastar - median(eruptions))
sqrt(var(erupt.boot$thetastar))

#set.seed(101)
erupt.boot2 <- bootstrap(eruptions, 1000, median, func=mean)
#set.seed(101)
boott(eruptions, median, perc=c(0.025, 0.05, 0.95, 0.975))
detach()

storm.bf1 <- function(rs) {
    assign("Tim2", fitted(storm.fm) + rs, envir=.GlobalEnv)
    tmp <- nls(Tim2 ~ (b * Viscosity)/(Wt - c), stormer,
               start = coef(storm.fm))
    tmp$m$getAllPars()[1]
}
storm.bf2 <- function(rs) {
    assign("Tim2", fitted(storm.fm) + rs, envir=.GlobalEnv)
    tmp <- nls(Tim2 ~ (b * Viscosity)/(Wt - c), stormer,
               start = coef(storm.fm))
    tmp$m$getAllPars()[2]
}
#set.seed(101)
boott(rs, storm.bf1, perc=c(0.025, 0.05, 0.95, 0.975))
#set.seed(101)
boott(rs, storm.bf2, perc=c(0.025, 0.05, 0.95, 0.975))
detach("package:bootstrap")

# end of ch05
