\encoding{latin1}
\name{twinstim_simulation}
\alias{simEpidataCS}
\alias{simulate.twinstim}

\title{
  Simulation of a Self-Exciting Spatio-Temporal Point Process
}

\description{
  The function \code{simEpidataCS} simulates events of a self-exiciting
  spatio-temporal point process of the \code{"\link{twinstim}"} class.
  Simulation works via Ogata's modified thinning of the conditional
  intensity as described in Meyer et al. (2012).

  The \code{\link{simulate}} method for objects of class
  \code{"\link{twinstim}"} simulates new epidemic data using the model and
  the parameter estimates of the fitted object.
}

\usage{
simEpidataCS(endemic, epidemic, siaf, tiaf, qmatrix, rmarks,
    events, stgrid, tiles, beta0, beta, gamma, siafpars, tiafpars,
    t0 = stgrid$start[1], T = tail(stgrid$stop,1), nEvents = 1e5,
    control.siaf = list(F=list(), Deriv=list()),
    W = NULL, trace = 5, nCircle2Poly = 32, gmax = NULL, .allocate = 500,
    .skipChecks = FALSE, .onlyEvents = FALSE)

\method{simulate}{twinstim}(object, nsim = 1, seed = NULL, data, tiles,
    rmarks = NULL, t0 = NULL, T = NULL, nEvents = 1e5,
    control.siaf = object$control.siaf,
    W = NULL, trace = FALSE, nCircle2Poly = NULL, gmax = NULL,
    .allocate = 500, simplify = TRUE, ...)
}

\arguments{
  \item{endemic}{
    see \code{\link{twinstim}}. Note that type-specific endemic
    intercepts are specified by \code{beta0} here, not by the term
    \code{(1|type)}.
  }
  \item{epidemic}{
    see \code{\link{twinstim}}. Marks appearing in this formula must
    be returned by the generating function \code{rmarks}.
  }
  \item{siaf}{
    see \code{\link{twinstim}}.
    In addition to what is required for fitting with \code{twinstim},
    the \code{siaf} specification must also contain the element
    \code{simulate}, a function which draws random locations following the
    spatial kernel \code{siaf$f}. The first argument of the function is the
    number of points to sample (say \code{n}),
    the second one is the vector of parameters
    \code{siafpars}, the third one is the type indicator (a character string
    matching a type name as specified by \code{dimnames(qmatrix)}). With the
    current implementation there will always be simulated only one
    location at a time, i.e. \code{n=1}.
    The \link[=siaf.constant]{predefined siaf's} all provide simulation.
  } 
  \item{tiaf}{
    e.g. what is returned by the generating function
    \code{\link{tiaf.constant}} or \code{\link{tiaf.exponential}}. See also
    \code{\link{twinstim}}.
  }
  \item{qmatrix}{
    see \code{\link{epidataCS}}. Note that this square
    matrix and its \code{dimnames} determine the number and names of the
    different event types. In the simplest case, there is only a single
    type of event, i.e. \code{qmatrix = diag(1)}.
  }
  \item{rmarks}{
    function of single time (1st arg) and location
    (2nd arg) returning a one-row \code{data.frame} of marks (named
    according to the variables in \code{epidemic}) for an event at this
    point. This must include the columns \code{eps.s} and \code{eps.t}, 
    i.e. the values of the spatial and temporal interaction ranges at this
    point. Only \code{"numeric"} and \code{"factor"} columns are
    allowed. Assure that factor variables are coded equally 
    (same levels and level order) for each new sample.

    For the \code{simulate.twinstim} method, the default (\code{NULL})
    means sampling from the empirical distribution function of the
    (non-missing) marks in \code{data} in (\code{t0};\code{T}].
  }
  \item{events}{
    \code{NULL} or missing (default) in case of an empty prehistory,
    or a \code{\link{SpatialPointsDataFrame}} containing events of the
    prehistory (-Inf;\code{t0}] of the process (required for the
    epidemic to start in case of no endemic component in the model).
    The \code{SpatialPointsDataFrame} must have the same
    \code{proj4string} as \code{tiles} and \code{W}). The attached
    \code{data.fram}e (data slot) must contain the typical 
    columns as described in \code{\link{as.epidataCS}} (\code{time},
    \code{tile}, \code{eps.t}, \code{eps.s}, and, for type-specific
    models, \code{type}) and all marks appearing in the \code{epidemic}
    specification. Note that some column names are reserved (see
    \code{\link{as.epidataCS}}).  Only events up to
    time \code{t0} are selected and taken as the prehistory.
  }
  \item{stgrid}{
    see \code{\link{as.epidataCS}}. Simulation only works inside the spatial
    and temporal range of \code{stgrid}.
  }
  \item{tiles}{
    object inheriting from \code{SpatialPolygons} with \code{row.names} equal
    to the \code{tile} names in \code{stgrid} and having the same
    \code{proj4string} as \code{events} and \code{W}. This is necessary
    to draw the spatial location of events generated by the endemic component.
  }
  \item{beta0,beta,gamma,siafpars,tiafpars}{
    these are the parameter subvectors of the \code{twinstim}.
    \code{beta} and \code{gamma} must be given in the 
    same order as they appear in \code{endemic} and \code{epidemic},
    respectively. \code{beta0} is either a single endemic intercept or a
    vector of type-specific endemic intercepts in the same order as in
    \code{qmatrix}.
  }
  \item{t0}{
    events having occured during (-Inf;\code{t0}] are regarded as part of the
    prehistory \eqn{H_0} of the process. The time point \code{t0} must be an element
    of \code{stgrid$start}. For \code{simEpidataCS}, by default, and
    also if \code{t0=NULL}, it is the earliest time point of the
    spatio-temporal grid \code{stgrid}. For the \code{simulate.twinstim}
    method, \code{NULL} means to use the same time range as for the
    fitting of the \code{"twinstim"} \code{object}.
  }
  \item{T, nEvents}{
    simulate a maximum of \code{nEvents} events up to time \code{T},
    then stop. For \code{simEpidataCS}, by default, and also if
    \code{T=NULL}, \code{T} equals the last stop time in \code{stgrid}
    (it cannot be greater) and \code{nEvents} is bounded above by 10000.
    For the \code{simulate.twinstim} method, \code{T=NULL} means to use
    the same same time range as for the fitting of the \code{"twinstim"}
    \code{object}. 
  } 
  \item{W}{
    see \code{\link{as.epidataCS}}. Must have the same
    \code{proj4string} as \code{events} and \code{tiles}. If not
    specified (\code{NULL}), \code{W} is generated 
    automatically by calculating the union of the \code{tiles} using
    \code{\link[maptools]{unionSpatialPolygons}}
    from package \pkg{maptools} (if available). This automatic generation might
    take a while so it might be useful to create \code{W} in advance.
    It is important that \code{W} and \code{tiles} cover the same region,
    because on the one hand offspring is sampled in the
    spatial influence region of the parent event (which is the
    intersection of \code{W} and a circle of radius the \code{eps.s} of the
    parent event), and on the other hand the tiles of the sampled coordinates
    are determined by overlay with \code{tiles}.
  }
  \item{trace}{
    logical (or integer) indicating if (or how often) the current
    simulation status should be \code{cat}ed.  For the
    \code{simulate.twinstim} method, \code{trace} currently only applies
    to the first of the \code{nsim} simulations.
  }
  \item{.allocate}{
    number of rows (events) to initially allocate for the event history;
    defaults to 500.  Each time the simulated epidemic exceeds the
    allocated space, the event \code{data.frame} will be enlarged by
    \code{.allocate} rows.
  }
  \item{.skipChecks,.onlyEvents}{
    these logical arguments are not meant to be set by the user.
    They are used by the simulate-method for twinstim objects.
  }
  \item{object}{
    an object of class \code{"\link{twinstim}"}.
  }
  \item{nsim}{
    number of epidemics (i.e. spatio-temporal point patterns inheriting
    from class \code{"epidataCS"}) to simulate.  Defaults to 1 when the
    result is a simple object inheriting from class
    \code{"simEpidataCS"} (as if \code{simEpidataCS} would have been
    called directly).  If \code{nsim > 1}, the result 
    will be a list the structure of which depends on the argument
    \code{simplify}. 
  }
  \item{seed}{
    an object specifying if and how the random number generator should be
    initialized (seeded). By default (\code{NULL}), the state of the random
    generator is not changed and the value of \code{.Random.seed} prior to
    simulation is stored as attribute \code{"seed"} of the result.
  }
  \item{data}{
    an object of class \code{"epidataCS"}, usually the one to which the
    \code{"twinstim"} \code{object} was fitted.
  }
  \item{simplify}{
    logical. It is strongly recommended to set \code{simplify = TRUE}
    (default) if \code{nsim} is large. This saves space and computation time,
    because for each simulated epidemic only the \code{events} component is
    saved. All other components, which do not vary between simulations,
    are only stored from the first run. In this case, the runtime of each
    simulation is stored as an attribute \code{"runtime"} to each simulated
    \code{events}. See also the \dQuote{Value} section below.
  }
  \item{control.siaf}{see \code{\link{twinstim}}.}
  \item{nCircle2Poly}{see \code{\link{as.epidataCS}}. For
    \code{simulate.twinstim}, \code{NULL} means to use the same value as
    for \code{data}.}
  \item{gmax}{
    maximum value the temporal interaction function
    \code{tiaf$g} can attain. If \code{NULL}, then it is assumed as the
    maximum value of the type-specific values at 0, i.e.
    \code{max(tiaf$g(rep.int(0,nTypes), tiafpars, 1:nTypes))}.
  }
  \item{\dots}{unused (arguments of the generic).}
}

\value{
  The function \code{simEpidataCS} returns a simulated epidemic of class
  \code{"simEpidataCS"}, which enhances the class
  \code{"epidataCS"} by the following additional components known from
  objects of class \code{"\link{twinstim}"}:
  \code{timeRange}, \code{formula}, \code{coefficients}, \code{npars},
  \code{call}, \code{runtime}.

  The \code{simulate.twinstim} method has some additional
  \emph{attributes} set on its result:
  \code{call}, \code{seed}, \code{simplified}, and \code{runtime}
  with their obvious meanings. Furthermore, if
  \code{nsim > 1}, it returns an object of class
  \code{"simEpidataCSlist"}, the form of which depends on the value of
  \code{simplify}: if \code{simplify = FALSE}, then the return value is
  just a list of sequential simulations, each of class
  \code{"simEpidataCS"}. However, if \code{simplify = TRUE}, then the
  sequential simulations share all components but the simulated
  \code{events}, i.e. the result is a list with the same components as
  a single object of class \code{"simEpidataCS"}, but with \code{events}
  replaced by an \code{eventsList} containing the \code{events} returned
  by each of the simulations.

  The \code{stgrid} component of the returned \code{"simEpidataCS"}
  will be truncated to the actual end of the simulation, which might
  be \eqn{<T}, if the upper bound \code{nEvents} is reached during
  simulation.
  
  CAVE: Currently, \code{simplify=TRUE} in \code{simulate.twinstim}
  ignores that multiple simulated epidemics
  (\code{nsim > 1}) may have different \code{stgrid} 
  time ranges. In a \code{"simEpidataCSlist"}, the \code{stgrid} shared
  by all of the simulated epidemics is just the \code{stgrid}
  returned by the \emph{first} simulation.
}

\note{
  The more detailed the polygons in \code{tiles} are the slower the
  algorithm is. Often it can be advantageous to sacrifice some detail
  for speed by reducing polygon complexity using, e.g., the
  Douglas and Peucker (1973) reduction method available at
  \url{http://MapShaper.org} (Harrower and Bloch, 2006) or as function
  \code{dp()} in the (slightly outdated) package \pkg{shapefiles}, or by
  passing by \pkg{spatstat}'s \code{\link[spatstat]{simplify.owin}} procedure.
  
  Note also that generation of \code{"epidataCS"} depends on the
  computation of polygon intersections via the \pkg{gpclib} package,
  which has a restricted license. In order to use \code{as.epidataCS},
  this license must be explicitly accepted by setting
  \code{\link{surveillance.options}(gpclib=TRUE)}.
}

\references{
  Douglas, D. H. and Peucker, T. K. (1973):
  Algorithms for the reduction of the number of points required to
  represent a digitized line or its caricature.
  \emph{Cartographica: The International Journal for Geographic
  Information and Geovisualization}, \bold{10}, 112-122

  Harrower, M. and Bloch, M. (2006):
  MapShaper.org: A Map Generalization Web Service.
  \emph{IEEE Computer Graphics and Applications}, \bold{26}(4), 22-27.\cr
  DOI-Link: \url{http://dx.doi.org/10.1109/MCG.2006.85}

  Meyer, S., Elias, J. and H\enc{}{oe}hle, M. (2012):
  A space-time conditional intensity model for invasive meningococcal
  disease occurrence. \emph{Biometrics}, \bold{68}, 607-616.\cr
  DOI-Link: \url{http://dx.doi.org/10.1111/j.1541-0420.2011.01684.x}

  Meyer, S. (2010):
  Spatio-Temporal Infectious Disease Epidemiology based on Point Processes.
  Master's Thesis, Ludwig-Maximilians-Universit\enc{}{ae}t
  M\enc{}{ue}nchen.\cr
  Available as \url{http://epub.ub.uni-muenchen.de/11703/}
}

\author{ 
  Sebastian Meyer, with contributions by Michael H\enc{}{oe}hle
}

\seealso{
The \code{\link{plot.epidataCS}} and \code{\link{animate.epidataCS}}
methods for plotting and animating continuous-space epidemic data,
respectively, which also work for simulated epidemics (by inheritance).

Function \code{\link{twinstim}} for fitting
spatio-temporal conditional intensity models to epidemic data.
}

\examples{
data(imdepi)
data(imdepifit)

## load borders of Germany's districts (obtained from the
## Bundesamt fr Kartographie und Geodsie, Frankfurt am Main, Germany,
## www.geodatenzentrum.de), simplified by the "special" Visvalingam
## algorithm (level=60\%) using www.MapShaper.org:
load(system.file("shapes", "districtsD.RData", package="surveillance"))

## map of Germany's districts
plot(districtsD)
plot(stateD, add=TRUE, border=2, lwd=2)
# 'stateD' is the union of the polygons in 'districtsD'
# (created by maptools::unionSpatialPolygons)

if (require("gpclib")) {
    oopt <- surveillance.options(gpclib=TRUE)

    ## simulate 2 realizations (during a VERY short period -- for speed)
    ## considering events from data(imdepi) before t=31 as pre-history
    mysims <- simulate(imdepifit, nsim=2, seed=1, data=imdepi,
	               tiles=districtsD, W=stateD, t0=31, T=61, trace=FALSE,
                       nCircle2Poly=16, .allocate=100, simplify=TRUE)

    ## extract the first realization -> object of class simEpidataCS
    mysim1 <- mysims[[2]]
    summary(mysim1)
    plot(mysim1, aggregate="space")

    ## plot both epidemics using the plot-method for simEpidataCSlist's
    plot(mysims, aggregate="time", subset=type=="B")

    surveillance.options(oopt)
}
}

\keyword{datagen}
\keyword{models}
