% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/adaptive_sample.r
\name{adaptive.sample}
\alias{adaptive.sample}
\title{Spatially adaptive sampling}
\usage{
adaptive.sample(obj1, obj2, pred.var.col = NULL, excd.prob.col = NULL,
  batch.size = 1, delta, criterion, poly = NULL, plotit = TRUE)
}
\arguments{
\item{obj1}{a \code{sf} or \code{sp} object of \bold{locations available for sampling}, where each line contains the coordinates of a spatial location, a \bold{prediction variance} or an \bold{exceedance probability} at that location and, optionally, values of one or more covariates. NOTE that only one of the two quantities (i.e. PV or EP) is required to add samples adaptively. Locations that meet the specified selection criterion are equally likely to be sampled subject to spatial contraints. See \code{criterion} and \bold{Details} for more information.}

\item{obj2}{a \code{sf} or \code{sp} object of \bold{locations previously sampled}. Each line corresponds to one spatial location. It must contain values of 2D coordinates and may also contain the values of one or more covariates. The initial sample locations design can be generated from \code{\link[geosample:random.sample]{random.sample}}, \code{\link[geosample:discrete.inhibit.sample]{discrete.inhibit.sample}}, \code{\link[geosample:contin.inhibit.sample]{contin.inhibit.sample}} or some other design.}

\item{pred.var.col}{a scalar of length one indicating the column number corresponding to prediction variance at each spatial location in \code{obj1}. This is required if \code{criterion =} \code{"predvar"}. See \code{'criterion'} and \bold{Details} for information.}

\item{excd.prob.col}{a scalar of length one indicating the column number corresponding to exceedance probabilities at each spatial location in \code{obj1}. This is required if \code{criterion =} \code{"exceedprob"}. See \code{'criterion'} and \bold{Details} for information.}

\item{batch.size}{a non-negative integer giving the number of adaptively chosen locations to be added to the existing sample (design).}

\item{delta}{minimum permissible distance between any two locations in the sample.}

\item{criterion}{criterion used for choosing new locations \eqn{x^*}. Use \code{"predvar"} for \bold{prediction variance} or \code{"exceedprob"} for \bold{exceedance probablity}. See the \bold{Details} section for more information.}

\item{poly}{'optional', a \code{sf} or \code{sp} polygon object in which the design sits. The default is the bounding box of points given by \code{obj1}.}

\item{plotit}{'logical' specifying if graphical output is required. Default is \code{plotit = TRUE}.}
}
\value{
A list with the following four components:

\code{total.size:} the total number of locations, \eqn{n}, sampled.

\code{delta:} the value of \eqn{\delta}.

\code{criterion:} the sample selection criterion used for adaptive sampling.

\code{sample.locs:} a list of objects for sample locations. It has the following components.

\code{curr.sample:} a \code{sf} or \code{sp} object of dimension \eqn{n} by 2 containing all sampled locations, where \eqn{n} is the total sample size (initial plus newly added sample locations).

\code{prev.sample:} a \code{sf} or \code{sp} object of dimension \eqn{n_{i}} by 2 containing \code{initial sample} locations, where \eqn{n_{i} < n}.

\code{added.sample:} a \code{sf} or \code{sp} object of dimension \eqn{n_{a}} by 2 containing \code{additional sample} locations, i.e. adaptively sampled locations, where \eqn{n_a = b}, the batch size.
}
\description{
Draw an additional sample from a set of available locations in a defined geographical region, imposing a minimum distance between any two sampled units and taking into account existing data from previously sampled locations. The algorithm allows the user to specify either a \emph{prediction variance (PV)} criterion or an \emph{exceedance probability (EP)} criterion to choose new sampling locations. The function accepts either \code{sf} or \code{sp} objects.
}
\details{
For the predictive target \eqn{T = S(x)} at a particular location \code{x},
given an initial set of sampling locations \eqn{X_0 = (x_1,\ldots, x_{n0})}
the available set of additional sampling locations is \eqn{A_0 =  X* \setminus X_0}. To mimic spatially continuous sampling, the initial set should be a fine grid to cover the region of interest

Define the following notation:
\itemize{
   \item \eqn{{\cal X}^*} is the set of all potential sampling locations, with number of elements \eqn{n^*}.
   \item \eqn{X_0} is the initial sample, with number of elements \eqn{n_0}.
   \item \eqn{b} is the batch size.
   \item \eqn{n = n_0 + kb} is the total sample size.
   \item \eqn{{\cal X}_j, j \ge 1} is the set of locations added in the \eqn{j^{th}}{j^{th}} batch, with number of elements \eqn{b}.
   \item \eqn{A_j = {\cal X}^* \setminus {\cal X}_0 \cup \ldots \cup X_j} is the set of available locations after addition of the \eqn{j^{th}}{j^{th}} batch.
}

\bold{1. Prediction variance criterion.}

For each \eqn{x \in A_0}, denote by \emph{PV(x)} the prediction variance, \eqn{\code{Var}(T|Y_0)}. The algorithm then proceeds as follows.
\itemize{
   \item{Step 1.} Use a non-adaptive design to determine \eqn{{\cal X}_0}.
   \item{Step 2.} Set \eqn{j = 0}.
   \item{Step 3.} For each \eqn{x \in A_j}, calculate \eqn{PV(x)}.
     \itemize{
        \item{Step 3.(i)}   choose \eqn{x^* =  \code{arg  max}_{A_j} PV(x)},
        \item{Step 3.(ii)}  if \eqn{||x^* - x_i|| > \delta, \forall i=1,\ldots,n_0 + jb}, add \eqn{x^*} to the design,
     }
    \item{Step 4.} Repeat step 3 until \eqn{b} locations have been added to form the set \eqn{X_{j+1}}.
    \item{Step 5.} Set \eqn{A_j = A_{j=1} \setminus {\cal X}_j} and we update \eqn{j} to \eqn{j + 1}.
    \item{Step 6.} Repeat steps 3 to 5 until the total number of sampled locations is \eqn{n} or \eqn{A_j = \emptyset}.
}

\bold{2. Exceedance probability criterion.}

For each \eqn{x \in A_0}, denote by \emph{EP(x)} the exceedance probability, \eqn{P[\{T(x) > t | y_0\} - 0.5]} for a specified threshold \emph{t}. The algorithm proceeds as above, with changes only in step 3, as follows.
\itemize{
   \item{Step 3.} For each \eqn{x \in A_j}, calculate \eqn{EP(x)}.
   \itemize{
      \item{Step 3.(i)} choose \eqn{x^* = \code{arg min}_{A_j}EP(x)}.
   }
}
}
\note{
The function can only add a single batch at a time.
}
\examples{
#example using toy datasets
#1. sampling locations with associated prediction variance and exceedance probabilities
set.seed(1234)
xy.all <- expand.grid(x = seq(0,1, l = 10),y = seq(0,1, l = 10))
xy.all$predvar <- runif(100, min=0, max = 2.5)
xy.all$exceedprob <- runif(100, min = 0, max = 1)
obj1 <- sf::st_as_sf(xy.all, coords = c('x', 'y'))

#2. initial sample design
set.seed(1234)
xy.sample <- discrete.inhibit.sample(obj = obj1, size = 70,
                                     delta = 0.075, k = 0,
                                     plotit = TRUE)
init.design <- xy.sample$sample.locs

#3. adaptive sampling designs
#a. using prediction variance criterion
adapt.design.pv <- adaptive.sample(obj1 = obj1, obj2 = init.design,
                                   pred.var.col = 1, criterion = "predvar",
                                   delta = 0.1, batch.size = 10,
                                   poly = NULL, plotit = TRUE)


#b. using exceedance probability criterion
adapt.design.ep <- adaptive.sample(obj1 = obj1, obj2 = init.design,
                                   excd.prob.col = 2, criterion = "exceedprob",
                                   delta = 0.1, batch.size = 10,
                                   poly = NULL, plotit = TRUE)



\dontrun{
data("sim.data")
library("PrevMap")
library("sf")

#1. Generate inhibitory design without close pairs using discrete.inhibit.sample().
set.seed(1234)
xy.sample <- discrete.inhibit.sample(obj = sim.data, size = 100, delta = 0.075,
                                     k = 0, plotit = TRUE)
names(xy.sample)
init.design <- xy.sample$sample.locs

#2. Data analysis
knots <- as.matrix(expand.grid(seq(-0.2, 1.2, length = 15),
                               seq(-0.2, 1.2, length = 15)))
lr.mcmc <- control.mcmc.MCML(n.sim = 10000, burnin = 1000, thin = 6)

par0.lr <- c(0.001, 1, 0.4)
fit.MCML.lr <- binomial.logistic.MCML(y ~ 1,
                                      units.m = ~units.m, coords = ~st_coordinates(init.design),
                                      data = init.design, par0 = par0.lr, fixed.rel.nugget = 0,
                                      start.cov.pars = par0.lr[3], control.mcmc = lr.mcmc,
                                      low.rank = TRUE, knots = knots, kappa = 1.5,
                                      method = "nlminb", messages = TRUE,
                                      plot.correlogram = FALSE)

summary(fit.MCML.lr, log.cov.pars = FALSE)

# Note: parameter estimation above can and should be repeated several times with updated starting
# values for the covariance function.

#3. Plug-in prediction using estimated parameters
pred.MCML.lr <- spatial.pred.binomial.MCML(object = fit.MCML.lr,
                                           control.mcmc = lr.mcmc,
                                           grid.pred = st_coordinates(sim.data),
                                           type = "joint", messages = TRUE,
                                           scale.predictions = "prevalence",
                                           standard.errors = TRUE,  thresholds = 0.45,
                                           scale.thresholds = "prevalence")


#4. Visualisation of analysis from initial sample
plot(pred.MCML.lr, type = "prevalence", summary = "predictions",
     zlim = c(0, 1), main = "Prevalence - predictions")
contour(pred.MCML.lr, "prevalence", "predictions",
        zlim = c(0, 1), levels = seq(0.1,0.9, 0.1), add = TRUE)

plot(pred.MCML.lr,  summary = "exceedance.prob",
     zlim = c(0, 1), main = "Prevalence - exceedance probability")
contour(pred.MCML.lr, summary = "exceedance.prob",
        zlim = c(0, 1), levels = seq(0.1,0.3, 0.1), add = TRUE)

plot(pred.MCML.lr, type = "prevalence",  summary = "standard.errors",
     main = "Prevalence - standard errors")

#5. Adaptive sampling
#create data frame of ingredients to adaptive sampling from spatial predictions above
obj1 <- as.data.frame(cbind(pred.MCML.lr$grid,
                            c(pred.MCML.lr$prevalence$standard.errors)^2,
                            pred.MCML.lr$exceedance.prob))
colnames(obj1) <- c("x", "y", "pred.var", "exceed.prob")
obj1 <- sf::st_as_sf(obj1, coords = c('x', 'y'))


#adaptive sampling using prediction variance criterion.
adapt.design.pv <- adaptive.sample(obj1 = obj1, obj2 = init.design,
                                   pred.var.col = 1, excd.prob.col = 2,
                                   criterion = "predvar", delta = 0.08,
                                   batch.size = 10, poly = NULL, plotit = TRUE)

#adaptive sampling using exceedance probability criterion.
adapt.design.ep <- adaptive.sample(obj1 = obj1, obj2 = init.design,
                                   pred.var.col = 1, excd.prob.col = 2,
                                   criterion = "exceedprob", delta = 0.08,
                                   batch.size = 10, poly = NULL, plotit = TRUE)
}



}
\references{
Chipeta M G, Terlouw D J, Phiri K S and Diggle P J. (2016a). Adaptive geostatistical design and analysis for prevalence surveys, \emph{Spatial Statistics} \bold{15}, pp. 70-84.

Giorgi E and Diggle P J. (2017). PrevMap: an R package for prevalence mapping. \emph{Journal of Statistical Software}. \bold{78}:1-29, doi: 10.18637/jss.v078.i08

Kabaghe A N, Chipeta M G, McCann R S, Phiri K S, Van Vugt M, Takken W, Diggle P J, and Terlouw D J. (2017). Adaptive geostatistical sampling enables efficient identification of malaria hotspots in repeated cross-sectional surveys in rural Malawi, \emph{PLoS One} \bold{12}(2) pp. e0172266
}
\seealso{
\code{\link[geosample:discrete.inhibit.sample]{discrete.inhibit.sample}} and \code{\link[geosample:contin.inhibit.sample]{contin.inhibit.sample}}
}
\author{
Michael G. Chipeta \email{mchipeta@mlw.mw}

Peter J. Diggle \email{p.diggle@lancaster.ac.uk}
}
