#' Semi-Parametric models for Interval Censored Data
#' 
#' @param formula regression formula. Response must be a \code{Surv} object of type \code{'interval2'}or \code{cbind}. See details.
#' @param data dataset
#' @param model What type of model to fit. Current choices are "\code{ph}" (Cox PH) or "\code{po}" (proportional odds)
#' @param weights Vector of case weights. Not standardized; see details
#' @param bs_samples Number of bootstrap samples used for estimation of standard errors 
#' @param useMCores Should multiple cores be used for bootstrap sample? Does not register cluster (see example)
#' @param B Should intervals be open or closed? See details.
#' @param controls Advanced control options 
#' 
#' @description  	Fits a semi-parametric model for interval censored data. 
#' Can fit either a Cox-PH model or a proportional odds model.  
#'
#' The covariance matrix for the regression coefficients is estimated via bootstrapping. 
#' For large datasets, this can become slow so parallel processing can be used to take advantage of multiple cores via the \code{foreach} package. 
#'
#'@details
#'	Response variable should either be of the form \code{cbind(l, u)} or 
#'	\code{Surv(l, u, type = 'interval2')}, where \code{l} and \code{u} are the lower 
#'	and upper ends of the interval known to contain the event of interest. 
#'	Uncensored data can be included by setting \code{l == u}, right censored data 
#'	can be included by setting \code{u == Inf} or \code{u == NA} and left censored 
#'	data can be included by setting \code{l == 0}.
#'
#' The argument \code{B} determines whether the intervals should be open or closed, 
#' i.e. \code{B = c(0,1)} implies that the event occurs within the interval \code{(l,u]}.
#'  The exception is that if \code{l == u}, it is assumed that the event is uncensored, 
#'  regardless of \code{B}. 
#'
#' In regards to weights, they are not standardized. 
#' This means that if weight[i] = 2, this is the equivalent to having two 
#' observations with the same values as subject i. 
#'
#' The algorithm used is inspired by the extended ICM algorithm from Wei Pan 1999.
#' However, it uses a conditional Newton Raphson step (for the regression parameters) 
#' and an ICM step (for the baseline survival parameters), rather than one single
#' ICM step (for both sets). In addition, a gradient ascent can also be used
#' to update the baseline parameters. This step is necessary if the
#' data contains many uncensored observations, very similar to how 
#' the EM algorithm greatly accelerates the ICM algorithm for the NPMLE 
#' (gradient ascent is used rather than the EM, as the M step is not 
#' in closed form for semi-parametric models). 
#'
#' Earlier versions of icenReg used an active set algorithm, which was not
#'  as fast for large datasets.
#'
#' @examples
#' set.seed(1)
#'
#' sim_data <- simIC_weib(n = 500, inspections = 5, inspectLength = 1)
#' ph_fit <- ic_sp(Surv(l, u, type = 'interval2') ~ x1 + x2, 
#'                 data = sim_data)	
#' # Default fits a Cox-PH model
#' 
#' summary(ph_fit)		
#' # Regression estimates close to true 0.5 and -0.5 values
#'
#'
#' new_data <- data.frame(x1 = c(0,1), x2 = c(1, 1) )
#' rownames(new_data) <- c('group 1', 'group 2')
#' plot(ph_fit, new_data)
#' # plotting the estimated survival curves
#' 
#' po_fit <- ic_sp(Surv(l, u, type = 'interval2') ~ x1 + x2, 
#'                 data = sim_data, model = 'po')
#' # fits a proportional odds model
#' 
#' summary(po_fit)
#'
#' # Not run: how to set up multiple cores
#' # library(doParallel)
#' # myCluster <- makeCluster(2) 
#' # registerDoParallel(myCluster)
#' # fit <- ic_sp(Surv(l, u, type = 'interval2') ~ x1 + x2,
#' #              data = sim_data, useMCores = TRUE
#' #              bs_samples = 500)
#' # stopCluster(myCluster)
#'
#'
#' @author Clifford Anderson-Bergman
#' @references 
#' Pan, W., (1999), Extending the iterative convex minorant algorithm to the Cox model for interval-censored data, \emph{Journal of Computational and Graphical Statistics}, Vol 8(1), pp109-120
#'
#' Wellner, J. A., and Zhan, Y. (1997) A hybrid algorithm for computation of the maximum likelihood estimator from censored data, \emph{Journal of the  American Statistical Association}, Vol 92, pp945-959
#' 
#' Anderson-Bergman, C. (preprint) Revisiting the iterative convex minorant algorithm for interval censored survival regression models
#' @export
ic_sp <- function(formula, data, model = 'ph', weights = NULL, bs_samples = 0, useMCores = F, 
                  B = c(0,1), 
                  controls = makeCtrls_icsp() ){
  recenterCovars = TRUE
  useFullHess = FALSE  
  if(missing(data)) data <- environment(formula)
	cl <- match.call()
	mf <- match.call(expand.dots = FALSE)
  m <- match(c("formula", "data", "subset", "na.action", "offset"), names(mf), 0L)
	mf <- mf[c(1L, m)]
  mf$drop.unused.levels <- TRUE
  mf[[1L]] <- quote(stats::model.frame)
  mf <- eval(mf, parent.frame())
  mt <- attr(mf, "terms")
  y <- model.response(mf, "numeric")
  x <- model.matrix(mt, mf, contrasts)
	if(is.matrix(x))	xNames <- colnames(x)
  else				      xNames <- as.character(formula[[3]])
  if('(Intercept)' %in% colnames(x)){	
    ind = which(colnames(x) == '(Intercept)')
    x <- x[,-ind]
    xNames <- xNames[-ind]
  }
  if(length(xNames) == 0 & bs_samples > 0){
		 cat('no covariates included, so bootstrapping is not useful. Setting bs_samples = 0')
		 bs_samples = 0
	}
  yMat <- makeIntervals(y, mf)
  yMat <- adjustIntervals(B, yMat)
  if(sum(is.na(mf)) > 0)
    stop("NA's not allowed. If this is supposed to be right censored (i.e. [4, NA] was supposed to be right censored at t = 4), replace NA with Inf")
        
  checkMatrix(x)
  
  if(model == 'ph')	callText = 'ic_ph'
	else if(model == 'po')	callText = 'ic_po'
	else stop('invalid choice of model. Current optios are "ph" (cox ph) or "po" (proportional odds)')
	
  weights <- checkWeights(weights, yMat)	
	if(length(x) == 0) recenterCovars = FALSE
	
  if(!is.null(controls$regStart)) regStart <- controls$regStart
  else                            regStart <- rep(0, length(xNames)) 
  if(length(regStart) != length(xNames)){
    stop("length of provided regression parameters wrong length")
  }
    
  other_info <- list(useGA = controls$useGA, maxIter = controls$maxIter, 
                     baselineUpdates = controls$baseUpdates, 
                     useFullHess = useFullHess, 
                     updateCovars = controls$updateReg,
                     recenterCovars = recenterCovars, 
                     regStart = regStart)  

  fitInfo <- fit_ICPH(yMat, x, callText, weights, other_info)
	dataEnv <- list()
	dataEnv[['x']] <- as.matrix(x, nrow = nrow(yMat))
	if(ncol(dataEnv$x) == 1) colnames(dataEnv[['x']]) <- as.character(formula[[3]])
	dataEnv[['y']] <- yMat
	seeds = as.integer( runif(bs_samples, 0, 2^31) )
	bsMat <- numeric()
	if(useMCores) `%mydo%` <- `%dopar%`
	else          `%mydo%` <- `%do%`
	i <- NULL  #this is only to trick R CMD check, 
	           #as it does not recognize the foreach syntax
	if(bs_samples > 0){
	    bsMat <- foreach(i = seeds, .combine = 'rbind') %mydo%{
	        set.seed(i)
	        sampDataEnv <- bs_sampleData(dataEnv, weights)
			    ans <- getBS_coef(sampDataEnv, callText = callText,
				           other_info = other_info)
			    rm(sampDataEnv)
			    return(ans)
	    	}
	 }
	    
	 if(bs_samples > 0){
	   	 names(fitInfo$coefficients) <- xNames
	   	 colnames(bsMat) <- xNames
	   	 incompleteIndicator <- is.na(bsMat[,1])
	   	 numNA <- sum(incompleteIndicator)
	   	 if(numNA > 0){
	    		if(numNA / length(incompleteIndicator) >= 0.1)
	    		cat('warning: ', numNA,
	    		    ' bootstrap samples (out of ', bs_samples, 
	    		    ') were dropped due to singular covariate matrix.',
              'Likely due to very sparse covariate. Be wary of these results.\n', sep = '')
	    		bsMat <- bsMat[!incompleteIndicator,]
	    	}
	      covar <- cov(bsMat)
    }else{ 
        bsMat <- NULL
        covar <- NULL
    }
    names(fitInfo$coefficients) <- xNames
    fitInfo$bsMat <- bsMat
    fitInfo$var <- covar
    fitInfo$call = cl
    fitInfo$formula = formula
    fitInfo$.dataEnv <- new.env()
    if(!missing(data)){ fitInfo$.dataEnv$data = data }
    fitInfo$par = 'semi-parametric'
    fitInfo$model = model
    fitInfo$reg_pars <- fitInfo$coefficients
    fitInfo$terms <- mt
    fitInfo$xlevels <- .getXlevels(mt, mf)
    if(fitInfo$iterations == controls$maxIter){
      warning('Maximum iterations reached in ic_sp.')
    }
   return(fitInfo)
}

#' Control Parameters for ic_sp
#' 
#' @param useGA Should constrained gradient ascent step be used?
#' @param maxIter Maximum iterations
#' @param baseUpdates number of baseline updates (ICM + GA) per iteration
#' @param regStart Initial values for regression parameters
#'
#'  @description
#' Creates the control options for the \code{ic_sp} function. 
#' Defaults not intended to be changed for use in standard analyses.    
#'
#' @details 
#' The constrained gradient step, actived by \code{useGA = T}, 
#' is a step that was added to improve the convergence in a special case. 
#' The option to turn it off is only in place to help demonstrate it's utility. 
#'
#'  \code{regStart} also for seeding of initial value of regression parameters. Intended for use in ``warm start" for bootstrap samples 
#'  and providing fixed regression parameters when calculating fit in qq-plots. 
#'  
#' @author Clifford Anderson-Bergman
#' @export
makeCtrls_icsp <- function(useGA = T, maxIter = 10000, baseUpdates = 5,
               regStart = NULL){
  ans <- list(useGA = useGA, maxIter = maxIter, 
              baseUpdates = baseUpdates, 
              regStart = regStart, updateReg = TRUE)
  return(ans)
}

vcov.icenReg_fit <- function(object,...) object$var

#' Get Estimated Survival Curves from Semi-parametric Model for Interval Censored Data
#' 
#' @param fit model fit with \code{ic_sp} 
#' @param newdata data.frame containing covariates for which the survival curve will be fit to. 
#' Rownames from \code{newdata} will be used to name survival curve. 
#' If left blank, baseline covariates will be used
#' 
#' @description
#' Extracts the estimated survival curve(s) from an ic_sp or ic_np model for interval censored data. 
#' 
#' @details
#' Output will be a list with two elements: the first item will be \code{$Tbull_ints}, 
#' which is the Turnbull intervals. 
#' This is a k x 2 matrix, with the first column being the beginning of the 
#' Turnbull interval and the second being the end. 
#' This is necessary due to the \emph{representational non-uniqueness};
#' any survival curve that lies between the survival curves created from the
#' upper and lower limits of the Turnbull intervals will have equal likelihood. 
#' See example for proper display of this. The second item is \code{$S_curves}, 
#' or the estimated survival probability at each Turnbull interval for individuals 
#' with the covariates provided in \code{newdata}. Note that multiple rows may 
#' be provided to newdata, which will result in multiple S_curves. 
#' @author Clifford Anderson-Bergman
#' @export
getSCurves <- function(fit, newdata = NULL){
	if(inherits(fit, 'impute_par_icph'))	stop('getSCurves currently not supported for imputation model')
	if(inherits(fit, 'ic_par'))				stop('getSCurves does not support ic_par objects. Use getFitEsts() instead. See ?getFitEsts')
	etas <- get_etas(fit, newdata)
	grpNames <- names(etas)
	transFxn <- get_link_fun(fit)
	if(fit$par == 'semi-parametric' | fit$par == 'non-parametric'){
		x_l <- fit$T_bull_Intervals[1,]
		x_u <- fit$T_bull_Intervals[2,]
		x_l <- c(x_l[1], x_l)
		x_u <- c(x_l[1], x_u)
		Tbull_intervals <- cbind(x_l,  x_u)
		colnames(Tbull_intervals) <- c('lower', 'upper')
		s <- 1 - c(0, cumsum(fit$p_hat))
		ans <- list(Tbull_ints = Tbull_intervals, "S_curves" = list())
		
		for(i in 1:length(etas)){
			eta <- etas[i]
			ans[["S_curves"]][[grpNames[i] ]] <- transFxn(s, eta)
		}
		class(ans) <- 'sp_curves'
		return(ans)
	}
	else{
	  	stop('getSCurves only for semi-parametric model. Try getFitEsts')
	}
}


plot.icenReg_fit <- function(x, y, fun = 'surv', 
                             lgdLocation = 'topright', xlab = "time", ...){
	if(inherits(x, 'impute_par_icph'))	stop('plot currently not supported for imputation model')
  argList <- list(...)
  colors <- argList$col
	if(missing(y)) y <- argList$newdata	
	newdata <- y
  nRows <- 1
  if(!is.null(newdata)) nRows <- nrow(newdata)
  if(fun == 'surv'){ s_trans <- function(x){x}; yName = 'S(t)'}
  else if(fun == 'cdf'){ s_trans <- function(x){1-x}; yName = 'F(t)' }
  else stop('"fun" option not recognized. Choices are "surv" or "cdf"')

  addList <- list(xlab = xlab, ylab = yName)
  argList <- addListIfMissing(addList, argList)
  firstPlotList <- argList
  firstPlotList[['type']] <- 'n'
  firstPlotList[['x']] <- 1
  firstPlotList[['y']] <- 1
  
    
	if(x$par == 'semi-parametric' | x$par == 'non-parametric'){
		curveInfo <- getSCurves(x, y)
		allx <- c(curveInfo$Tbull_ints[,1], curveInfo$Tbull_ints[,2])
		dummyx <- range(allx, finite = TRUE)
		dummyy <- c(0,1)
	  firstPlotList[['xlim']] = dummyx
	  firstPlotList[['ylim']] = dummyy
    do.call(plot, firstPlotList)

		x_l <- curveInfo$Tbull_ints[,1]
		x_u <- curveInfo$Tbull_ints[,2]
		k <- length(x_l)
		ss <- curveInfo$S_curves

		if(is.null(colors))  colors <- 1:length(ss)
		if(length(colors) == 1) colors <- rep(colors, length(ss)) 
		for(i in 1:length(ss)){
			lines(x_l, s_trans(ss[[i]]), col = colors[i], type = 's')
			lines(x_u, s_trans(ss[[i]]), col = colors[i], type = 's')
			lines(c(x_l[k], x_u[k]), s_trans(c(ss[[i]][k], ss[[i]][k])), col = colors[i])
		}
		if(length(ss) > 1){
			grpNames <- names(ss)
			legend(lgdLocation, legend = grpNames, lwd = rep(1, length(grpNames) ), col = colors)
		}
	}
	else if(inherits(x, 'par_fit')){
    ranges <- matrix(nrow = nRows, ncol = 2)
		ranges[,1] <- getFitEsts(x, newdata = newdata, p = 0.05 )
    ranges[,2] <- getFitEsts(x, newdata = newdata, p = 0.95 )
    
    addList <- list(xlab = xlab, ylab = yName, 
                    xlim = range(as.numeric(ranges), finite = TRUE), ylim = c(0,1))
    firstPlotList<- addListIfMissing(addList, firstPlotList)
    do.call(plot, firstPlotList)

    ranges[,1] <- getFitEsts(x, newdata = newdata, p = 0.005 )
		ranges[,2] <- getFitEsts(x, newdata = newdata, p = 0.995 )
		if(is.null(colors))  colors <- 1:nRows
		
		for(i in 1:nrow(ranges)){
			grid = ranges[i,1] + 0:100/100 * (ranges[i,2] - ranges[i,1])
			est.s <- 1 - getFitEsts(x, newdata = subsetData_ifNeeded(i, newdata), q = grid)
			lines(grid, s_trans(est.s), col = colors[i])
		}
		if(nrow(ranges) > 1){
			grpNames <- rownames(newdata)
			legend(lgdLocation, legend = grpNames, lwd = rep(1, length(grpNames) ), col = 1:ncol(ranges))
		}
	}
}


lines.icenReg_fit <- function(x, y, fun = 'surv', ...){
  argList <- list(...)
  colors <- argList$col
  if(missing(y)) y <- argList$newdata	
  newdata <- y
  nRows <- 1
  if(!is.null(newdata)) nRows <- nrow(newdata)
  if(fun == 'surv'){ s_trans <- function(x){x}; yName = 'S(t)'}
  else if(fun == 'cdf'){ s_trans <- function(x){1-x}; yName = 'F(t)' }
  else stop('"fun" option not recognized. Choices are "surv" or "cdf"')
  
#  addList <- list(xlab = xlab, ylab = yName)
#  argList <- addListIfMissing(addList, argList)

  if(x$par == 'semi-parametric' | x$par == 'non-parametric'){
    argList <- addIfMissing('s', 'type', argList)
    curveInfo <- getSCurves(x, y)
    allx <- c(curveInfo$Tbull_ints[,1], curveInfo$Tbull_ints[,2])
    dummyx <- range(allx, finite = TRUE)
    dummyy <- c(0,1)
    x_l <- curveInfo$Tbull_ints[,1]
    x_u <- curveInfo$Tbull_ints[,2]
    k <- length(x_l)
    ss <- curveInfo$S_curves
    if(is.null(colors))  colors <- 1:length(ss)
    if(length(colors) == 1) colors <- rep(colors, length(ss)) 
    for(i in 1:length(ss)){
      argList[['x']] <- x_l
      argList[['y']] <- s_trans(ss[[i]])
      argList[['col']] <- colors[i]
      do.call(lines, argList)     
      argList[['x']] <- x_u
      do.call(lines, argList)     
      argList[['x']] <- c(x_l[k], x_u[k])
      argList[['y']] <- s_trans(c(ss[[i]][k], ss[[i]][k]))
      do.call(lines, argList)
    }
  }
  else if(inherits(x, 'par_fit')){
    ranges <- matrix(nrow = nRows, ncol = 2)
    ranges[,1] <- getFitEsts(x, newdata = newdata, p = 0.05 )
    ranges[,2] <- getFitEsts(x, newdata = newdata, p = 0.95 )
    if(is.null(colors))  colors <- 1:nRows
    for(i in 1:nrow(ranges)){
      grid = ranges[i,1] + 0:100/100 * (ranges[i,2] - ranges[i,1])
      est.s <- 1 - getFitEsts(x, newdata = subsetData_ifNeeded(i, newdata), q = grid)
      argList[['x']] <- grid
      argList[['y']] <- s_trans(est.s)
      argList[['col']] <- colors[i]
      do.call(lines, argList)
    }
  }
}


summary.icenReg_fit <- function(object,...)
	new('icenRegSummary', object)
summary.ic_npList <- function(object, ...)
  object

	
#' Simulates interval censored data from regression model with a Weibull baseline
#' 
#' @param n Number of samples simulated
#' @param b1 Value of first regression coefficient
#' @param b2 Value of second regression coefficient
#' @param model Type of regression model. Options are 'po' (prop. odds) and 'ph' (Cox PH)
#' @param shape shape parameter of baseline distribution
#' @param scale scale parameter of baseline distribution
#' @param inspections number of inspections times of censoring process
#' @param inspectLength max length of inspection interval
#' @param rndDigits number of digits to which the inspection time is rounded to, 
#' creating a discrete inspection time. If \code{rndDigits = NULL}, the inspection time is not rounded, 
#' resulting in a continuous inspection time
#' @param prob_cen probability event being censored. If event is uncensored, l == u
#'
#' @description
#' Simulates interval censored data from a regression model with a weibull baseline distribution. Used for demonstration
#' @details 
#' Exact event times are simulated according to regression model: covariate \code{x1} 
#' is distributed \code{rnorm(n)} and covariate \code{x2} is distributed
#' \code{1 - 2 * rbinom(n, 1, 0.5)}. Event times are then censored with a 
#' case II interval censoring mechanism with \code{inspections} different inspection times. 
#' Time between inspections is distributed as \code{runif(min = 0, max = inspectLength)}. 
#' Note that the user should be careful in simulation studies not to simulate data 
#' where nearly all the data is right censored (or more over, all the data with x2 = 1 or -1) 
#' or this can result in degenerate solutions!
#' 
#' @examples 
#' set.seed(1)
#' sim_data <- simIC_weib(n = 500, b1 = .3, b2 = -.3, model = 'ph', 
#'                       shape = 2, scale = 2, inspections = 6, 
#'                       inspectLength = 1)
#' #simulates data from a cox-ph with beta weibull distribution.
#'
#' diag_covar(Surv(l, u, type = 'interval2') ~ x1 + x2, 
#'            data = sim_data, model = 'po')
#' diag_covar(Surv(l, u, type = 'interval2') ~ x1 + x2,
#'            data = sim_data, model = 'ph')
#'
#' #'ph' fit looks better than 'po'; the difference between the transformed survival
#' #function looks more constant
#' @author Clifford Anderson-Bergman
#' @export
simIC_weib <- function(n = 100, b1 = 0.5, b2 = -0.5, model = 'ph', 
					   shape = 2, scale = 2, 
					   inspections = 2, inspectLength = 2.5,
					   rndDigits = NULL, prob_cen = 1){
	rawQ <- runif(n)
    x1 <- runif(n, -1, 1)
    x2 <- 1 - 2 * rbinom(n, 1, 0.5)
    nu <- exp(x1 * b1 + x2 * b2)
    
    if(model == 'ph')		adjFun <- function(x, nu) {1 - x^(1/nu)}
	else if(model == 'po') 	adjFun <- function(x, nu) {1 - x*(1/nu) / (x * 1/nu - x + 1)}
    adjQ <- adjFun(rawQ, nu)
    trueTimes <- qweibull(adjQ, shape = shape, scale = scale)
    
    obsTimes <- runif(n = n, max = inspectLength)
    if(!is.null(rndDigits))
    	obsTimes <- round(obsTimes, rndDigits)
    
    l <- rep(0, n)
    u <- rep(0, n)
    
    caught <- trueTimes < obsTimes
    u[caught] <- obsTimes[caught]
    l[!caught] <- obsTimes[!caught]
    
    if(inspections > 1){
    	for(i in 2:inspections){
		    oldObsTimes <- obsTimes
    		obsTimes <- oldObsTimes + runif(n, max = inspectLength)
		    if(!is.null(rndDigits))
    			obsTimes <- round(obsTimes, rndDigits)
    		caught <- trueTimes >= oldObsTimes  & trueTimes < obsTimes
    		needsCatch <- trueTimes > obsTimes
    		u[caught] <- obsTimes[caught]
    		l[needsCatch] <- obsTimes[needsCatch]
    	}
    }
    else{
    	needsCatch <- !caught	
    }
    u[needsCatch] <- Inf
    
    if(sum(l > u) > 0)	stop('warning: l > u! Bug in code')
    
    isCensored <- rbinom(n = n, size = 1, prob = prob_cen) == 1

    l[!isCensored] <- trueTimes[!isCensored]
    u[!isCensored] <- trueTimes[!isCensored]
    
    if(sum(l == Inf) > 0){
      allTimes <- c(l,u)
      allFiniteTimes <- allTimes[allTimes < Inf]
      maxFiniteTime <- max(allFiniteTimes)
      l[l == Inf] <- maxFiniteTime
    }
    return(data.frame(l = l, u = u, x1 = x1, x2 = x2))
}


simICPO_beta <- function(n = 100, b1 = 1, b2 = -1, inspections = 1, shape1 = 2, shape2 = 2, rndDigits = NULL){
	rawQ <- runif(n)
    x1 <- rnorm(n)
    x2 <- rbinom(n, 1, 0.5) - 0.5
    nu <- exp(x1 * b1 + x2 * b2)
    adjQ <- 1 - rawQ*(1/nu) / (rawQ * 1/nu - rawQ + 1)
    trueTimes <- qbeta(adjQ, shape1 = shape1, shape2 = shape2)
    
    inspectionError = 1 / (inspections + 1)
    obsTimes <- 1 / (inspections + 1) + runif(n, min = -inspectionError, max = inspectionError)
    if(!is.null(rndDigits))
    	obsTimes <- round(obsTimes, rndDigits)
    
    l <- rep(0, n)
    u <- rep(0, n)
    
    caught <- trueTimes < obsTimes
    u[caught] <- obsTimes[caught]
    l[!caught] <- obsTimes[!caught]
    
    if(inspections > 1){
    	for(i in 2:inspections){
		    oldObsTimes <- obsTimes
    		obsTimes <- i / (inspections+1) + runif(n, min = -inspectionError, max = inspectionError)
		    if(!is.null(rndDigits))
    			obsTimes <- round(obsTimes, rndDigits)
    		caught <- trueTimes >= oldObsTimes  & trueTimes < obsTimes
    		needsCatch <- trueTimes > obsTimes
    		u[caught] <- obsTimes[caught]
    		l[needsCatch] <- obsTimes[needsCatch]
    	}
    }
    else{
    	needsCatch <- !caught	
    }
    u[needsCatch] <- 1
    
    if(sum(l > u) > 0)	stop('warning: l > u! Bug in code')
    
    return(data.frame(l = l, u = u, x1 = x1, x2 = x2))
}




#' Evaluate covariate effect for regression model
#' 
#' @param object      Either a formula or a model fit with \code{ic_sp} or \code{ic_par}
#' @param varName     Covariate to split data on. If left blank, will split on each covariate
#' @param data        Data. Unnecessary if \code{object} is a fit
#' @param model       Type of model. Choices are \code{'ph'} or \code{'po'} 
#' @param weights     Case weights
#' @param yType       Type of plot created. See details
#' @param factorSplit Should covariate be split as a factor (i.e. by levels)
#' @param numericCuts If \code{fractorSplit == FALSE}, cut points of covariate to stratify data on
#' @param col         Colors of each subgroup plot. If left blank, will auto pick colors
#' @param xlab        Label of x axis
#' @param ylab        Label of y axis
#' @param main        title of plot
#' @param lgdLocation Where legend should be placed. See details
#' @description Creates plots to diagnosis fit of covariate effect in a regression model. 
#' For a given variable, stratifies the data across different levels of the variable and adjusts 
#' for all the other covariates included in \code{fit} and then plots a given function to help 
#' diagnosis where covariate effect follows model assumption 
#' (i.e. either proportional hazards or proportional odds). See \code{details} for descriptions of the plots. 
#'  
#' If \code{varName} is not provided, will attempt to figure out how to divide up each covariate 
#' and plot all of them, although this may fail. 
#'@details 
#' For the Cox-PH and proportional odds models, there exists a transformation of survival curves 
#' such that the difference should be constant for subjects with different covariates. 
#' In the case of the Cox-PH, this is the log(-log(S(t|X))) transformation, for the proporitonal odds, 
#' this is the log(S(t|X) / (1 - S(t|X))) transformation. 
#'
#' The function diag_covar allows the user to easily use these transformations to diagnosis 
#' whether such a model is appropriate. In particular, it takes a single covariate and 
#' stratifies the data on different levels of that covariate. 
#' Then, it fits the semi-parametric regression model 
#' (adjusting for all other covariates in the data set) on each of these 
#' stratum and extracts the baseline survival function. If the stratified covariate does 
#' follow the regression assumption, the difference between these transformed baseline 
#' survival functions should be approximately constant. 
#'
#' To help diagnosis, the default function plotted is the transformed survival functions, 
#' with the overall means subtracted off. If the assumption holds true, then the mean 
#' removed curves should be approximately parallel lines (with stochastic noise). 
#' Other choices of \code{yType}, the function to plot, are \code{"transform"}, 
#' which is the transformed functions without the means subtracted and \code{"survival"}, 
#' which is the baseline survival distribution is plotted for each strata. 
#'
#' Currently does not support stratifying covariates that are involved in an interaction term. 
#'
#' For variables that are factors, it will create a strata for each level of the covariate, up to 20 levels. 
#' If \code{factorSplit == FALSE}, will divide up numeric covariates according to the cuts provided to numericCuts. 
#'
#' \code{lgdLocation} is an argument placed to \code{legend} dictating where the legend will be placed. 
#' If \code{lgdLocation = NULL}, will use standard placement given \code{yType}. See \code{?legend} for more details. 
#' @author Clifford Anderson-Bergman
#' @export
diag_covar <- function(object, varName, 
           data, model, weights = NULL,
           yType = 'meanRemovedTransform', 
           factorSplit = TRUE, 
           numericCuts, col, 
           xlab, ylab, main, 
           lgdLocation = NULL){
	if(!yType %in% c('survival', 'transform', 'meanRemovedTransform')) stop("yType not recognized. Options = 'survival', 'transform' or 'meanRemovedTransform'")
  if(missing(data)){
    if(!is(object, 'icenReg_fit')) stop("either object must be icenReg_fit, or formula with data supplied")
    data <- object$getRawData()
  }
  max_n_use <- nrow(data) #for backward compability. No longer need max_n_use
  subDataInfo <- subSampleData(data, max_n_use, weights)
	data <- subDataInfo$data
	weights <- subDataInfo$w
	if(is.null(weights)) weights <- rep(1, nrow(data))
	
	fullFormula <- getFormula(object)
	if(missing(model)) model <- object$model
	if(is.null(model)) stop('either object must be a fit, or model must be provided')
	if(missing(data))	data <- getData(object)
	if(missing(varName)){
		allVars <- getVarNames_fromFormula(fullFormula)
		nV <- length(allVars)
		k <- ceiling(sqrt(nV))
		if(k > 1) par(mfrow = c( ceiling(nV/k), k) )
		for(vn in allVars){
			useFactor <- length( unique((data[[vn]])) ) < 5
			diag_covar(object, vn, factorSplit = useFactor, model = model,
			           data = data, yType = yType,
			           weights = weights, lgdLocation = lgdLocation,
			           col = col)
			}
		return(invisible(NULL))
	}

	if(model == 'aft'){
	  stop('diag_covar not supported for aft model. This is because calculating
	       the non-parametric aft model is quite difficult')
	}
	
	if(model == 'ph')				s_trans <- function(x){ isOk <- x > 0 & x < 1
															ans <- numeric()
															ans[isOk] <- log(-log(x[isOk]) )
															ans
															}
	
	else if(model == 'po')		 	s_trans <- function(x){ isOk <- x > 0 & x < 1
															ans <- numeric()
															ans[isOk] <- log(x[isOk]/(1-x[isOk]))
															return(ans)
														   }
															

	allData <- data
	vals <- allData[[varName]]
	if(is.null(vals))	stop(paste('Cannot find variable', varName, 'in original dataset'))
	orgCall <- fullFormula
	reducCall <- removeVarFromCall(orgCall, varName)
	if(identical(orgCall, reducCall))	stop('varName not found in original call')
	
	spltInfo <- NULL
	if(factorSplit){
		factVals <- factor(vals)
		if(length(levels(factVals)) > 20) stop('Attempting to split on factor with over 20 levels. Try using numeric version of covariate and use numericCuts instead')
		spltInfo <- makeFactorSplitInfo(vals, levels(factVals))
	}
	else{
		if(missing(numericCuts))	numericCuts <- median(data[[varName]])
		spltInfo <- makeNumericSplitInfo(vals, numericCuts)
	}
	
	spltFits <- splitAndFit(newcall = reducCall, data = allData, varName = varName, 
							splitInfo = spltInfo, fitFunction = ic_sp, model = model, weights = weights)
		
	allX <- numeric()
	allY <- numeric()
	fitNames <- ls(spltFits)
	for(nm in fitNames){
		allX <- c(allX, as.numeric(spltFits[[nm]]$T_bull_Intervals) )
	}
	
	xlim <- range(allX, na.rm = TRUE, finite = TRUE)
	ylim <- sort( s_trans(c(0.025, 0.975)) )
	
	if(missing(col))
		col <- 1 + 1:length(fitNames)
	if(missing(xlab))	xlab = 't'
	if(missing(main)) 	main = varName
	if(missing(ylab)){
		if(model == 'ph'){
			ylab = 'log(-log(S(t)))'
			lgdLoc <- 'bottomright'
		}
		else if(model == 'po'){
			ylab = 'log(S(t)/(1 - S(t)))'
			lgdLoc <- 'bottomleft'	
		}
		else stop('model not recognized!')
	}
	t_vals <- xlim[1] + 1:999/1000 * (xlim[2] - xlim[1])
	estList <- list()
	meanVals <- 0
	if(yType == 'survival'){
		ylab = 'S(t)'
		lgdLoc <- 'bottomleft'
		s_trans <- function(x) x
		ylim = c(0,1)
	}
	if(yType == 'meanRemovedTransform'){
		ylab = paste('Mean Removed', ylab)
	}
		
	for(i in seq_along(fitNames)){
		if(yType == 'transform' | yType == 'survival'){
			nm <- fitNames[i]
			thisCurve <- getSCurves(spltFits[[nm]])
			theseTbls <- thisCurve$Tbull_ints
			thisS <- thisCurve$S_curves$baseline
			#thisCol <- col[i]
			#lines(theseTbls[,1], s_trans(thisS), col = thisCol, type = 's')
			#lines(theseTbls[,2], s_trans(thisS), col = thisCol, type = 's')
			estList[[nm]] <- list(x = theseTbls, y = s_trans(thisS) )
		}
		else if(yType == 'meanRemovedTransform'){	
			nm <- fitNames[i]
			estList[[nm]] <- s_trans(1 - getFitEsts(spltFits[[nm]], q = t_vals) )
			meanVals <- estList[[nm]] + meanVals
			}
	}

	if(yType == 'meanRemovedTransform'){
		meanVals <- meanVals/length(estList)
		ylim = c(Inf, -Inf)
		for(i in seq_along(estList)){
			theseLims <- range(estList[[i]] - meanVals, finite = TRUE, na.rm = TRUE)
			ylim[1] <- min(theseLims[1], ylim[1])
			ylim[2] <- max(theseLims[2], ylim[2])
		}
		
		yrange <- ylim[2] - ylim[1]
		ylim[2] = ylim[2] + yrange * 0.2
		ylim[1] = ylim[1] - yrange * 0.2
	}
		
	plot(NA, xlab = xlab, ylab = ylab, main = main, xlim = xlim, ylim = ylim)

	
	if(yType == 'meanRemovedTransform'){
		for(i in seq_along(estList)){
			lines(t_vals, estList[[i]] - meanVals, col = col[i])
		}
	}	
	else if(yType == 'transform' | yType == 'survival'){
		for(i in seq_along(estList)){
			xs <- estList[[i]]$x
			y  <- estList[[i]]$y
			lines(xs[,1], y, col = col[i], type = 's')
			lines(xs[,2], y, col = col[i], type = 's')
		}
	}
	if(!is.null(lgdLocation))	lgdLoc <- lgdLocation
	legend(lgdLoc, legend = fitNames, col = col, lwd = 1)
}



#' Parametric Regression  Models for Interval Censored Data
#' 
#' @param formula     Regression formula. Response must be a \code{Surv} object of type
#'  \code{'interval2'} or \code{cbind}. See details.
#' @param data        Dataset
#' @param model       What type of model to fit. Current choices are "\code{ph}" (proportional hazards), 
#' "\code{po}" (proportional odds) or "\code{aft}" (accelerated failure time)
#' @param dist        What baseline parametric distribution to use. See details for current choices
#' @param weights     vector of case weights. Not standardized; see details
#'
#' @description Fits a parametric regression model for interval censored data. 
#' Can fita proportional hazards, proportional odds or accelerated failure time model.  
#'
#' @details Currently supported distributions choices are "exponential", "weibull", "gamma", 
#' "lnorm", "loglogistic" and "generalgamma" (i.e. generalized gamma distribution). 
#'
#' Response variable should either be of the form \code{cbind(l, u)} or \code{Surv(l, u, type = 'interval2')}, 
#' where \code{l} and \code{u} are the lower and upper ends of the interval known to contain the event of interest. 
#' Uncensored data can be included by setting \code{l == u}, right censored data can be included by setting 
#' \code{u == Inf} or \code{u == NA} and left censored data can be included by setting \code{l == 0}.
#'
#' Does not allow uncensored data points at t = 0 (i.e. \code{l == u == 0}), as this will 
#' lead to a degenerate estimator for most parametric families. Unlike the current implementation 
#' of survival's \code{survreg}, does allow left side of intervals of positive length to 0 and 
#' right side to be \code{Inf}. 
#'
#' In regards to weights, they are not standardized. This means that if weight[i] = 2, 
#' this is the equivalent to having two observations with the same values as subject i. 
#' 
#' 
#' For numeric stability, if abs(right - left) < 10^-6, observation are considered 
#' uncensored rather than interval censored with an extremely small interval. 
#' @examples
#' data(miceData)
#'
#' logist_ph_fit <- ic_par(Surv(l, u, type = 'interval2') ~ grp, 
#'                        data = miceData, dist = 'loglogistic')
#' 
#' logist_po_fit <- ic_par(cbind(l, u) ~ grp, 
#'                         data = miceData, dist = 'loglogistic',
#'                        model = 'po')
#'
#' summary(logist_ph_fit)
#' summary(logist_po_fit)
#' @author Clifford Anderson-Bergman
#' @export
ic_par <- function(formula, data, model = 'ph', dist = 'weibull', weights = NULL){
  if(missing(data)) data <- environment(formula)
	cl <- match.call()
	mf <- match.call(expand.dots = FALSE)
#    m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"), names(mf), 0L)
    m <- match(c("formula", "data", "subset", "na.action", "offset"), names(mf), 0L)
    mf <- mf[c(1L, m)]
    mf$drop.unused.levels <- TRUE
    mf[[1L]] <- quote(stats::model.frame)
    mf <- eval(mf, parent.frame())
    
    mt <- attr(mf, "terms")
    y <- model.response(mf, "numeric")
    x <- model.matrix(mt, mf, contrasts)
	if(is.matrix(x))	xNames <- colnames(x)
    else				xNames <- as.character(formula[[3]])
	if('(Intercept)' %in% colnames(x)){	
		ind = which(colnames(x) == '(Intercept)')
		x <- x[,-ind]
		xNames <- xNames[-ind]
	}
		
  yMat <- as.matrix(y)[,1:2]
  
  if(is(y, "Surv")){
    rightCens <- mf[,1][,3] == 0
	  yMat[rightCens,2] <- Inf
	
	  exact <- mf[,1][,3] == 1
	  yMat[exact, 2] = yMat[exact, 1]
  }
    storage.mode(yMat) <- 'double'
    
    if(sum(is.na(mf)) > 0)
    	stop("NA's not allowed. If this is supposed to be right censored (i.e. [4, NA] was supposed to be right censored at t = 4), replace NA with Inf")
        
    testMat <- cbind(x, 1)
    invertResult <- try(diag(solve(t(testMat) %*% testMat )), silent = TRUE)
    if(is(invertResult, 'try-error'))
	    stop('covariate matrix is computationally singular! Make sure not to add intercept to model, also make sure every factor has observations at every level')
	    
	callText <- paste(dist, model)

	if(is.null(weights))	weights = rep(1, nrow(yMat))
	if(length(weights) != nrow(yMat))	stop('weights improper length!')
	if(min(weights) < 0)				stop('negative weights not allowed!')
	if(sum(is.na(weights)) > 0)			stop('cannot have weights = NA')
	if(is.null(ncol(x))) recenterCovar = FALSE
   	fitInfo <- fit_par(yMat, x, parFam = dist, link = model, 
   	                   leftCen = 0, rightCen = Inf, uncenTol = 10^-6, 
   	                   regnames = xNames, weights = weights,
   	                   callText = callText)
	fitInfo$call = cl
	fitInfo$formula = formula
  fitInfo$.dataEnv = new.env()
  if(!missing(data)){ fitInfo$.dataEnv$data = data }
	fitInfo$par = dist
	fitInfo$model = model
  fitInfo$terms <- mt
  fitInfo$xlevels <- .getXlevels(mt, mf)

  return(fitInfo)
}

#' Get Survival Curve Estimates from icenReg Model
#' 
#' @param fit      model fit with \code{ic_par} or \code{ic_sp}
#' @param newdata  \code{data.frame} containing covariates
#' @param p        Percentiles
#' @param q        Quantiles
#' @description 
#' Gets probability or quantile estimates from a \code{ic_par} or \code{ic_sp} object. 
#' Provided estimates conditional on regression parameters found in \code{newdata}.
#' @details
#' If \code{newdata} is left blank, baseline estimates will be returned (i.e. all covariates = 0). 
#' If \code{p} is provided, will return the estimated F^{-1}(p | x). If \code{q} is provided, 
#' will return the estimated F(q | x). If neither \code{p} nor \code{q} are provided, 
#' the estimated conditional median is returned.
#'  
#' In the case of \code{ic_sp}, the MLE of the baseline survival is not necessarily unique, 
#' as probability mass is assigned to disjoint Turnbull intervals, but the likelihood function is 
#' indifferent to how probability mass is assigned within these intervals. In order to have a well 
#' defined estimate returned, we assume probability is assigned uniformly in these intervals. 
#' In otherwords, we return *a* maximum likelihood estimate, but don't attempt to characterize *all* maximum 
#' likelihood estimates with this function. If that is desired, all the information needed can be 
#' extracted with \code{getSCurves}.
#' @examples 
#' simdata <- simIC_weib(n = 500, b1 = .3, b2 = -.3,
#' inspections = 6, inspectLength = 1)
#' fit <- ic_par(Surv(l, u, type = 'interval2') ~ x1 + x2,
#'              data = simdata)
#' new_data <- data.frame(x1 = c(1,2), x2 = c(-1,1))
#' rownames(new_data) <- c('grp1', 'grp2')
#' 
#' estQ <- getFitEsts(fit, new_data, p = c(.25, .75))
#' 
#' estP <- getFitEsts(fit, q = 400)
#' @author Clifford Anderson-Bergman
#' @export
getFitEsts <- function(fit, newdata, p, q){
  if(missing(newdata)) newdata <- NULL
  etas <- get_etas(fit, newdata)
  
  
  if(missing(p))	p <- NULL
  if(missing(q))  q <- NULL
  if(!is.null(q)) {xs <- q; type = 'q'}
  else{ 
    type = 'p'
    if(is.null(p)) xs <- 0.5
    else		   xs <- p
  }
  
  if(length(etas) == 1){etas <- rep(etas, length(xs))}
  if(length(xs) == 1){xs <- rep(xs, length(etas))}
  if(length(etas) != length(xs) ) stop('length of p or q must match nrow(newdata) OR be 1')

  regMod <- fit$model
  
  if(inherits(fit, 'sp_fit'))	{
    scurves <- getSCurves(fit, newdata = NULL)
    baselineInfo <- list(tb_ints = scurves$Tbull_ints, s = scurves$S_curves$baseline)
    baseMod = 'sp'
  }
  if(inherits(fit, 'par_fit')){	
    baseMod <- fit$par
    baselineInfo <- fit$baseline
  }
  
  if(fit$model == 'po' | fit$model == 'ph' | fit$model == 'none'){
    surv_etas <- etas
    scale_etas <- rep(1, length(etas)) 
  }
  
  else if(fit$model == 'aft'){
    scale_etas <- etas
    surv_etas <- rep(1, length(etas)) 
  }
  else stop('model not recognized in getFitEsts')
  
  if(type == 'q'){
    ans <- getSurvProbs(xs, surv_etas, baselineInfo = baselineInfo, regMod = regMod, baseMod = baseMod)
    ans <- ans * scale_etas
    return(ans)
  }
  else if(type == 'p'){
    xs <- xs / scale_etas
    ans <- getSurvTimes(xs, surv_etas, baselineInfo = baselineInfo, regMod = regMod, baseMod = baseMod)
  return(ans)
  }
}


#' Compare parametric baseline distributions with semi-parametric baseline
#' 
#' @param object       Either a formula or a model fit with \code{ic_sp} or \code{ic_par}
#' @param data         Data. Unnecessary if \code{object} is a fit
#' @param model        Type of model. Choices are \code{'ph'} or \code{'po'}
#' @param dists        Parametric baseline fits	
#' @param cols         Colors of baseline distributions
#' @param weights      Case weights
#' @param lgdLocation  Where legend will be placed. See \code{?legend} for more details
#' @param useMidCovars Should the distribution plotted be for covariates = mean values instead of 0
#'
#' @description 
#' Creates plots to diagnosis fit of different choices of parametric baseline model. 
#' Plots the semi paramtric model against different choices of parametric models. 
#' 
#' @details
#' If \code{useMidCovars = T}, then the survival curves plotted are for fits with the mean covariate value, 
#' rather than 0. This is because often the baseline distribution (i.e. with all covariates = 0) will be 
#' far away from the majority of the data.
#' 
#' @examples data(IR_diabetes)
#' fit <- ic_par(cbind(left, right) ~ gender, 
#'              data = IR_diabetes)
#'
#' diag_baseline(fit, lgdLocation = "topright", 
#'              dist = c("exponential", "weibull", "loglogistic"))
#'
#' @author Clifford Anderson-Bergman
#' @export
diag_baseline <- function(object, data, model = 'ph', weights = NULL,
						  dists = c('exponential', 'weibull', 'gamma', 'lnorm', 'loglogistic', 'generalgamma'),
						  cols = NULL, lgdLocation = 'bottomleft',
						  useMidCovars = T){
  if(model == 'aft'){
    stop('diag_baseline not supported for aft model. This is because calculating
	       the non-parametric aft model is quite difficult')
  }  
	newdata = NULL
	if(useMidCovars) newdata <- 'midValues'
	formula <- getFormula(object)
	if(missing(data))	data <- getData(object)
	max_n_use = nrow(data)	#no longer necessary, for backward compatability				  	
	
	subDataInfo <- subSampleData(data, max_n_use, weights)
	sp_data <- subDataInfo$data
	weights <- subDataInfo$w

	sp_fit <- ic_sp(formula, data = sp_data, bs_samples = 0, model = model)
	plot(sp_fit, newdata)
	xrange <- range(getSCurves(sp_fit)$Tbull_ints, finite = TRUE)
	grid <- xrange[1] + 0:100/100 *(xrange[2] - xrange[1])
	if(is.null(cols)) cols <- 1 + 1:length(dists)
	for(i in seq_along(dists)){
		this_dist <- dists[i]
		par_fit <- ic_par(formula, data = data, model = model, dist = this_dist)
		y <- getFitEsts(par_fit, newdata = newdata, q = grid)
		lines(grid, 1 - y, col = cols[i])
	}
	legend(lgdLocation, legend = c('Semi-parametric', dists), col = c('black', cols), lwd = 1)
}

#' Predictions from icenReg Regression Model
#' 
#' @param object   Model fit with \code{ic_par} or \code{ic_sp}
#' @param type     type of prediction. Options include \code{"lp", "response"}
#' @param newdata  \code{data.frame} containing covariates
#' @param ...      other arguments (will be ignored)
#'
#' @description   
#' Gets various estimates from an \code{ic_np}, \code{ic_sp} or \code{ic_par} object.
#' @details 
#' If \code{newdata} is left blank, will provide estimates for original data set. 
#'
#' For the argument \code{type}, there are two options. \code{"lp"} provides the 
#' linear predictor for each subject (i.e. in a proportional hazards model, 
#' this is the log-hazards ratio, in proportional odds, the log proporitonal odds), 
#' \code{"response"} provides the median response value for each subject, 
#' *conditional on that subject's covariates, but ignoring their actual response interval*. 
#' Use \code{imputeCens} to impute the censored values.
#' @examples 
#' simdata <- simIC_weib(n = 500, b1 = .3, b2 = -.3,
#'                       inspections = 6,
#'                       inspectLength = 1)
#'
#' fit <- ic_par(cbind(l, u) ~ x1 + x2,
#'               data = simdata)
#'
#' imputedValues <- predict(fit)
#' @author Clifford Anderson-Bergman
#' @export
predict.icenReg_fit <- function(object, type = 'response',
                                newdata = NULL, ...)
      #imputeOptions = fullSample, fixedParSample, median
  {
  if(is.null(newdata)) newdata <- object$getRawData()
  if(type == 'lp')
    return( log(get_etas(object, newdata = newdata)))
  if(type == 'response')
    return(getFitEsts(fit = object, newdata = newdata))
  stop('"type" not recognized: options are "lp", "response" and "impute"')
}

#' Impute Interval Censored Data from icenReg Regression Model
#' 
#' @param fit         icenReg model fit 
#' @param newdata     \code{data.frame} containing covariates and censored intervals. If blank, will use data from model
#' @param imputeType  type of imputation. See details for options
#' @param numImputes  Number of imputations (ignored if \code{imputeType = "median"}) 
#' 
#' @description
#' Imputes censored responses from data. 
#' @details 	
#'  If \code{newdata} is left blank, will provide estimates for original data set. 
#' 
#'  There are several options for how to impute. \code{imputeType = 'median'} 
#'  imputes the median time, conditional on the response interval, covariates and 
#'  regression parameters at the MLE. To get random imputations without accounting
#'  for error in the estimated parameters \code{imputeType ='fixedParSample'} takes a 
#'  random sample of the response variable, conditional on the response interval, 
#'  covariates and estimated parameters at the MLE. Finally, 
#'  \code{imputeType = 'fullSample'} first takes a random sample of the coefficients,
#'  (assuming asymptotic normality) and then takes a random sample 
#'  of the response variable, conditional on the response interval, 
#'  covariates, and the random sample of the coefficients. 
#'  
#'  @examples 
#' simdata <- simIC_weib(n = 500, b1 = .3, b2 = -.3,
#'                       inspections = 6, inspectLength = 1)
#'
#' fit <- ic_par(cbind(l, u) ~ x1 + x2,
#'               data = simdata)
#'
#' imputedValues <- imputeCens(fit)
#' @author Clifford Anderson-Bergman
#' @export
imputeCens<- function(fit, newdata = NULL, imputeType = 'fullSample', numImputes = 5){
  if(is.null(newdata)) newdata <- fit$getRawData()
  yMat <- expandY(fit$formula, newdata, fit)
  p1 <- getFitEsts(fit, newdata, q = as.numeric(yMat[,1]) ) 
  p2 <- getFitEsts(fit, newdata, q = as.numeric(yMat[,2]) ) 
  ans <- matrix(nrow = length(p1), ncol = numImputes)
  storage.mode(ans) <- 'double'
  if(imputeType == 'median'){
    p_med <- (p1 + p2)/2
    ans <- getFitEsts(fit, newdata, p = p_med)
    isLow <- ans < yMat[,1]
    ans[isLow] <- yMat[isLow,1]
    isHi <- ans > yMat[,2]
    ans[isHi] <- yMat[isHi]
    return()
  }
  if(imputeType == 'fixedParSample'){
    for(i in 1:numImputes){
      p_samp <- runif(length(p1), p2, p1)
      theseImputes <- getFitEsts(fit, newdata, p = p_samp)
      isLow <- theseImputes < yMat[,1]
      theseImputes[isLow] <- yMat[isLow,1]
      isHi <- theseImputes > yMat[,2]
      theseImputes[isHi] <- yMat[isHi,2]
      ans <- fastMatrixInsert(theseImputes, ans, colNum = i)
    }
    return(ans)
  }
  if(imputeType == 'fullSample'){
    isSP <- is(fit, 'sp_fit')
    for(i in 1:numImputes){
      orgCoefs <- getSamplablePars(fit)
      if(!isSP){
        coefVar <- getSamplableVar(fit)
        sampledCoefs <- sampPars(orgCoefs, coefVar)
      }
      else{
        sampledCoefs <- getBSParSample(fit)
      }
      setSamplablePars(fit, sampledCoefs)
      p1 <- getFitEsts(fit, newdata, q = as.numeric(yMat[,1]) ) 
      p2 <- getFitEsts(fit, newdata, q = as.numeric(yMat[,2]) ) 
      p_samp <- runif(length(p1), p1, p2)
      theseImputes <- getFitEsts(fit, newdata, p = p_samp)
      isLow <- theseImputes < yMat[,1]
      theseImputes[isLow] <- yMat[isLow,1]
      isHi <- theseImputes > yMat[,2]
      theseImputes[isHi] <- yMat[isHi,2]
      fastMatrixInsert(theseImputes, ans, colNum = i)
      setSamplablePars(fit, orgCoefs)
    }
    return(ans)
  }
  stop('imputeType type not recognized.')
}

plot.sp_curves <- function(x, sname = 'baseline', xRange = NULL, ...){
  if(is.null(xRange))
    xRange <- range(c(x[[1]][,1], x[[1]][,2]), finite = TRUE)
  dotList <- list(...)
  addList <- list(xlim = xRange, ylim = c(0,1), x = NA)
  dotList <- addListIfMissing(addList, dotList)
  do.call(plot, dotList)
  lines(x, sname = sname, ...)
}

lines.ic_npList <- function(x, fitNames = NULL, ...){
  if(is.null(fitNames)){
    fitNames <- names(x$scurves)
    lines(x, fitNames, ...)
  }
  dotList <- list(...)
  cols <- dotList$col

  for(i in seq_along(fitNames)){
    thisName <- fitNames[i]
    dotList$col <- cols[i]
    dotList$x <- x$scurves[[thisName]]
    do.call(lines, dotList)
  }
}

plot.ic_npList <- function(x, fitNames = NULL, lgdLocation = 'bottomleft', ... ){
  addList <- list(xlim = x$xRange,
                  ylim = c(0,1),
                  xlab = 't', 
                  ylab = 'S(t)', 
                  x = NA)
  dotList <- list(...)
  dotList <- addListIfMissing(addList, dotList)
  do.call(plot, dotList)  
  grpNames <- names(x$fitList)
  cols <- dotList$col
  if(is.null(cols)) cols = 2:(length(grpNames) + 1)
  if(length(cols) != length(grpNames)) 
    stop('length of number of strata not equal to number of colors')
  dotList$col <- cols
  dotList$fitNames = fitNames
  dotList$x <- x
  do.call(lines, dotList)
  legend(lgdLocation, legend = grpNames, col = cols, lty = 1)
}

lines.sp_curves <- function(x, sname = 'baseline',...){
  firstTimeObs <- x[[1]][1,1]
  firstTimeAssume <- firstTimeObs
  if(firstTimeObs > 0)
    firstTimeAssume <- 0
  lines(c(firstTimeAssume, firstTimeObs), c(1,1), ...)
  lines(x[[1]][,1], x[[2]][[sname]], ..., type = 's')
  lines(x[[1]][,2], x[[2]][[sname]],   ..., type = 's')
  lastObs <- nrow(x[[1]])
  lastTimes <- x[[1]][lastObs,]
  if(lastTimes[2] == Inf) lastTimes[2] <- lastTimes[1]
  lastTimes[2] <- lastTimes[2] + (lastTimes[2] - firstTimeObs)
  lines(lastTimes, c(0,0), ... ) 
}

dGeneralGamma <- function(x, mu, s, Q){
  max_n <- getMaxLength(list(x, mu, s, Q) )
  x <- updateDistPars(x, max_n)
  mu <- updateDistPars(mu, max_n)
  s <- updateDistPars(s, max_n)
  Q <- updateDistPars(Q, max_n)
  
  ans <- .Call('dGeneralGamma', x, mu, s, Q)
  return(ans)
}

qGeneralGamma <- function(p, mu, s, Q){
  x <- p
  max_n <- getMaxLength(list(x, mu, s, Q) )
  x <- updateDistPars(x, max_n)
  mu <- updateDistPars(mu, max_n)
  s <- updateDistPars(s, max_n)
  Q <- updateDistPars(Q, max_n)
  
  ans <- .Call('qGeneralGamma', x, mu, s, Q)
  return(ans)
  
}

pGeneralGamma <- function(q, mu, s, Q){
  x <- q
  max_n <- getMaxLength(list(x, mu, s, Q) )
  x <- updateDistPars(x, max_n)
  mu <- updateDistPars(mu, max_n)
  s <- updateDistPars(s, max_n)
  Q <- updateDistPars(Q, max_n)
  
  ans <- .Call('qGeneralGamma', x, mu, s, Q)
  return(ans)
  
}

#' Non-Parametric Estimator for Interval Censored Data
#' 
#' @param formula   Formula for stratification. If only one group, can be left blank and 
#' data must be entered as n x 2 matrix.
#' @param data      A \code{data.frame} or an n x 2 matrix. See details.
#' @param maxIter   Maximum iterations
#' @param tol       Numeric tolerance
#' @param B         Should intervals be open or closed? See details.
#'
#'  @description
#'  Fits the non-parametric maximum likelihood estimator (NPMLE) for univariate interval censored data. 
#'  This is a generalization of the Kaplan-Meier curves that allows for interval censoring. 
#'  Also referred to as the Turnbull estimator.
#'  
#' @details 
#' \code{data} must be an n x 2 matrix or data.frame containing two columns of data representing 
#' left and right sides of the censoring interval, denoted L and R. This allows for left censored 
#' (L == 0), right censored (R == inf), uncensored (L == R) along with general interval censored observations. 
#'
#' The argument \code{B} determines whether the intervals should be open or closed, i.e. 
#' \code{B = c(0,1)} implies that the event occurs within the interval \code{(l,u]}. 
#' The exception is that if \code{l == u}, it is assumed that the event is uncensored, regardless of \code{B}.
#'
#' The NPMLE is fit using an efficient implementation of the EMICM algorithm. 
#' @references 
#' Turnbull, B. (1976) The empricial distribution with arbitrarily grouped and censored data 
#' \emph{Journal of the Royal Statistical Society B}, vol 38 p290-295
#'
#' Wellner, J. A., and Zhan, Y. (1997) A hybrid algorithm for computation of the maximum likelihood estimator 
#' from censored data, \emph{Journal of the  American Statistical Association}, Vol 92, pp945-959
#'
#' Anderson-Bergman, C. (2016) An efficient implementation of the EMICM algorithm for the interval censored NPMLE
#' \emph{Journal of Computational and Graphical Statistics}, \emph{just accepted}
#' 
#' @author Clifford Anderson-Bergman
#' @export
ic_np <- function(formula = NULL, data, maxIter = 1000, tol = 10^-10, B = c(0,1)){
  if(is.null(formula)){ return(ic_npSINGLE(data, maxIter = maxIter, tol = tol, B = B)) }
  if(!inherits(formula, 'formula')) {
    #Covering when user ONLY provides data as first unlabeled argument
    data <- formula
    return(ic_npSINGLE(data, maxIter = maxIter, tol = tol, B = B))
  }
  
  if(missing(data)) data <- environment(formula)
  cl <- match.call()
  mf <- match.call(expand.dots = FALSE)
  m <- match(c("formula", "data", "subset", "na.action", "offset"), names(mf), 0L)
  mf <- mf[c(1L, m)]
  mf$drop.unused.levels <- TRUE
  mf[[1L]] <- quote(stats::model.frame)
  mf <- eval(mf, parent.frame())
  
  mt <- attr(mf, "terms")
  y <- model.response(mf, "numeric")
  yMat <- as.matrix(y)[,1:2]
  if(is(y, 'Surv')){
    rightCens <- mf[,1][,3] == 0
    yMat[rightCens,2] <- Inf
    exact <- mf[,1][,3] == 1
    yMat[exact, 2] = yMat[exact, 1]
  }
  storage.mode(yMat) <- 'double'
#  yMat <- adjustIntervals(B, yMat)
  
  
  formFactor <- formula[[3]]
  if( length(formFactor) != 1 ){ 
    stop('predictor must be either single factor OR 0 for ic_np')
  }
  if(formFactor == 0){ return(ic_npSINGLE(yMat, maxIter = maxIter, tol = tol, B = B)) }
  thisFactor <- data[[ as.character(formFactor) ]]
  if(!is.factor(thisFactor)){ stop('predictor must be factor') }
  
  theseLevels <- levels(thisFactor)
  fitList <- list()
  
  for(thisLevel in theseLevels){
    thisData <- yMat[thisFactor == thisLevel, ]
    if(nrow(thisData) > 0)
      fitList[[thisLevel]] <- ic_npSINGLE(thisData, maxIter = maxIter, tol = tol, B = B)
  }
  ans <- ic_npList(fitList)
  return(ans)
}

ic_npSINGLE <- function(data,  maxIter = 1000, tol = 10^-10, B){
  data <- as.matrix(data)
  if(ncol(data) != 2) stop("data should be an nx2 matrix or data.frame")
  if(any(data[,1] > data[,2]) ) stop(paste0("data[,1] > data[,2].",
                                          "This is impossible for interval censored data") )
  storage.mode(data) <- "double"
  data <- adjustIntervals(B, data)
  mis <- findMaximalIntersections(data[,1], data[,2])
  fit <- .Call("EMICM", mis$l_inds, mis$r_inds, as.integer(maxIter))
  tbulls <- rbind(mis$mi_l, mis$mi_r)
  ans <- new('ic_np')
  #ans <- list(phat = fit[[1]], Tbull_ints = tbulls, llk = fit[[2]], iters = fit[[3]])
  ans$p_hat <- fit[[1]]
  ans$T_bull_Intervals <- tbulls
  ans$coefficients <- numeric()
  ans$llk <- fit[[2]]
  ans$iterations <- fit[[3]]
  ans$par <- 'non-parametric'
  ans$model = 'none'
  ans$var <- matrix(nrow = 0, ncol = 0) 
  dataEnv <- new.env()
  dataEnv[['data']] <- data
  ans[['.dataEnv']] <- dataEnv
  return(ans)
}

icqqplot <- function(par_fit){
 spfit <- makeQQFit(par_fit)
 baseSCurves <- getSCurves(spfit)
 baseS <- baseSCurves$S_curves$baseline
 baseUpper <- baseSCurves$Tbull_ints[,2]
 baseLower <- baseSCurves$Tbull_ints[,1]
 
 parUpperS <- 1 - getFitEsts(fit = par_fit, q = baseUpper)
 parLowerS <- 1 - getFitEsts(fit = par_fit, q = baseLower)
 
 plot(baseS, parUpperS, xlim = c(0,1), ylim = c(0,1), main = 'QQ Plot', xlab = c('Unconstrained Baseline Survival'),
      ylab = 'Parametric Survival', type = 's')
 lines(baseS, parLowerS, type = 's')
 lines(c(0,1), c(0,1), col = 'red')
}