##***********************************************************************
## $Id: select.R 19 2010-08-06 15:06:01Z mariotomo $
##
## this file is part of the R library delftfews.  delftfews is free
## software: you can redistribute it and/or modify it under the terms
## of the GNU General Public License as published by the Free Software
## Foundation, either version 3 of the License, or (at your option)
## any later version.
##
## delftfews is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with the nens libraray.  If not, see
## <http://www.gnu.org/licenses/>.
##
## Purpose    : selecting rows or columns from a timeseries set
##
## initial programmer :  Mario Frasca
## contributors: Mario Frasca, Michèl van Leeuwen, 
##
## initial date       :  20091120
##

timestamp.in.range <- function(data, from, to, by, units, offset, tz="UTC") {
  ## returns array of booleans identifying rows falling in repeating
  ## intervals.  timestamps are first converted to the given units and
  ## then tested.

  ## `data` is a data.frame with a `timestamps` column holding POSIXct values.
  ## `from` and `to` have the obvious meanings.  (`from` is included, `to` is excluded.)
  ## `by` is the length of the repeating interval.
  ## `units` specifies the unit used for `from`, `to`, `by`.
  ## `offset` is the value at EPOCH.

  if (to < from)
    to <- to + by
  window.width <- to - from
  
  ## we want to check the 'numerical' parts only, when the timestamp
  ## is expressed in the timezone specified.  to do so we print the
  ## timestamps in the timezone specified, remove the timezone
  ## information and do all calculations as if we were living in UTC.
  ## it's a cheap trick, you don't need tell me.
  timestamps.as.string <- format(index(data), tz=tz)
  fictive.timestamps <- as.POSIXct(timestamps.as.string, tz="UTC")
  values.from.from <- (as.double(difftime(fictive.timestamps, EPOCH), units=units) + offset - from) %% by
  return (values.from.from < window.width)
}


timestamp.in.range.weekday <- function(data, from, to, tz="CET") {
  timestamp.in.range(data, from, to, 7, 'days', 4, tz=tz)
}


timestamp.in.weekend <- function(data, tz="CET") {
  ## all equivalent:
  ## timestamp.in.range.weekday(data, 6, 8, tz=tz) # Sat->Mon(next week)
  ## timestamp.in.range.weekday(data, 6, 1, tz=tz) # Sat->Mon
  ## !timestamp.in.range.weekday(data, 1, 6, tz=tz) # not Mon->Sat
  ## we use "not from monday (included) to saturday (excluded)"
  !timestamp.in.range.weekday(data, 1, 6, tz=tz) 
}


timestamp.in.range.hour <- function(data, from, to, tz="CET") {
  timestamp.in.range(data, from, to, 24, 'hours', 0, tz=tz)
}


reformat.date <- function(datestring) {
  ## transforms date with some separator to MMDD
  ##

  parts <- strsplit(datestring, '[/-]')[[1]]
  if(length(parts) == 1) {
    if(nchar(datestring) != 4)
      stop("invalid date string")
    return(datestring)
  }
  return(paste(sprintf("%02d", as.numeric(parts)), collapse=''))
}


timestamp.in.range.calendar <- function(data, from, to, tz="CET") {
  ## returns whether the timestamps of a timeseries are between start and end date

  dates <- format.Date(index(data), format="%02m%02d")

  from <- reformat.date(from)
  to <- reformat.date(to)

  if(from < to)
    result <- (dates >= from) & (dates < to)
  else
    result <- (dates >= from) | (dates < to)

  return(result)
}


select.percentiles <- function(input, percentiles, score.function=sum.first, ...) {
  ## assuming 'input' contains some sort of monte carlo realizations
  ## of the same experiment in timeseries format, this function
  ## chooses the percentiles indicated, after the `score.function` function
  ## has applied to each column.

  ## how many columns
  N <- ncol(input)
  ## call the score.function, passing it any extra parameters
  tempdata <- score.function(input, ...)
  ## set unique names so we can find back each individual column after
  ## ordering by score
  names(tempdata) <- 1:N
  ## these are the columns.  
  columns <- as.numeric(names(sort(tempdata)[N * percentiles / 100]))

  ## result has same timestamps as input, but only the chosen columns
  result <- input[, columns]
  ## force result to have the same class as the input
  class(result) <- class(input)
  ## rename columns adding a trailing .percentile
  colnames(result) <- paste(names(input)[columns], percentiles, sep='.')

  ## done
  return(result)
}

"[.delftfews" <- function(x, i, j, drop = FALSE) {
  if (missing(i))
    NextMethod()
  else if (missing(j) && is.character(i))
    do.call("[.zoo", list(x=x, j=i, drop=drop))
  else
    NextMethod()
}

"[<-.delftfews" <- function(x, i, j, value) {
  if (missing(i))
    NextMethod()
  else if (missing(j) && is.character(i)) {
    result <- do.call("$<-.zoo", list(object=x, x=i, value=value))
    if (!("delftfews" %in% class(result)))
      class(result) <- c("delftfews", class(result))
    result
  }
  else
    NextMethod()
}

################################################################################

"[.zoo" <- function(x, i, j, drop = TRUE, ...)
{
  if(!is.zoo(x)) stop("method is only for zoo objects")
  x.index <- index(x)
  rval <- coredata(x)
  if(missing(i)) i <- 1:NROW(rval)

  ## also support that i can be index:
  ## if i is not numeric/integer/logical, it is interpreted to be the index
  if (all(class(i) == "logical"))
    i <- which(i)
  else if (inherits(i, "zoo") && all(class(coredata(i)) == "logical")) {
    i <- which(coredata(merge(zoo(,time(x)), i)))
  } else if(!((all(class(i) == "numeric") || all(class(i) == "integer")))) 
    i <- which(MATCH(x.index, i, nomatch = 0L) > 0L)
  
  if(length(dim(rval)) == 2) {
	drop. <- if (length(i) == 1) FALSE else drop
    rval <- if (missing(j)) rval[i, , drop = drop.] # EDITED
            else rval[i, j, drop = drop.]           # EDITED
	if (drop && length(rval) == 1) rval <- c(rval)
	rval <- zoo(rval, x.index[i])
  } else
	rval <- zoo(rval[i], x.index[i])
  class(rval) <- class(x)

  attr(rval, "oclass") <- attr(x, "oclass")
  attr(rval, "levels") <- attr(x, "levels")
  attr(rval, "frequency") <- attr(x, "frequency")
  if(!is.null(attr(rval, "frequency"))) class(rval) <- c("zooreg", class(rval))

  return(rval)
}

"[<-.zoo" <- function (x, i, j, value) 
{
  ## x[,j] <- value and x[] <- value can be handled by default method
  if(missing(i)) return(NextMethod("[<-"))

  ## otherwise do the necessary processing on i
  x.index <- index(x)
  n <- NROW(coredata(x))
  value2 <- NULL
  
  if (all(class(i) == "logical")) {
    i <- which(i)
  } else if (inherits(i, "zoo") && all(class(coredata(i)) == "logical")) {
    i <- which(coredata(merge(zoo(,time(x)), i)))
  } else if(!((all(class(i) == "numeric") || all(class(i) == "integer")))) {
    ## all time indexes in x.index?
    i.ok <- MATCH(i, x.index, nomatch = 0L) > 0L
    if(any(!i.ok)) {
      if(is.null(dim(value))) {
        value2 <- value[!i.ok]
        value <- value[i.ok]
      } else {
        value2 <- value[!i.ok,, drop = FALSE]
        value <- value[i.ok,, drop = FALSE]      
      }
      i2 <- i[!i.ok]
      i <- i[i.ok]
    }
    i <- which(MATCH(x.index, i, nomatch = 0L) > 0L)
  }
  if(any(i > n) | any(i < 1)) stop("Out-of-range assignment not possible.")
  ## taking shortcut: ([.zoo, x, , , <altered coredata>)
  coredata(x)[i, j] <- value
  ## remainder became superfluous
  return(x)
}

"$<-.zoo" <- function(object, x, value) {
  if(length(dim(object)) != 2) stop("not possible for univariate zoo series")
  if(NCOL(object) > 0 & is.null(colnames(object))) stop("only possible for zoo series with column names")
  wi <- match(x, colnames(object))
  if(is.na(wi)) {
    object <- cbind(object, value)
    if(is.null(dim(object))) dim(object) <- c(length(object), 1)
    colnames(object)[NCOL(object)] <- x  
  } else {
    if(is.null(value)) {
      object <- object[, -wi, drop = FALSE]
    } else {   
      object[, wi] <- value
    }
  }
  object
}
