## drop.tip.R (2009-01-07)

##   Remove Tips in a Phylogenetic Tree

## Copyright 2003-2009 Emmanuel Paradis

## This file is part of the R-package `ape'.
## See the file ../COPYING for licensing issues.

extract.clade <- function(phy, node, root.edge = 0)
{
    Ntip <- length(phy$tip.label)
    ROOT <- Ntip + 1
    Nedge <- dim(phy$edge)[1]
    wbl <- !is.null(phy$edge.length)
    if (length(node) > 1) {
        node <- node[1]
        warning("only the first value of 'node' has been considered")
    }
    if (is.character(node)) {
        if (is.null(phy$node.label))
            stop("the tree has no node labels")
        node <- which(phy$node.label %in% node) + Ntip
    }
    if (node <= Ntip) stop("node number must be greater than the number of tips")
    if (node == ROOT) return(phy)
    phy <- reorder(phy) # insure it is in cladewise order
    root.node <- which(phy$edge[, 2] == node)
    start <- root.node + 1 # start of the clade looked for
    anc <- phy$edge[root.node, 1] # the ancestor of 'node'
    next.anc <- which(phy$edge[-(1:start), 1] == anc) # find the next occurence of 'anc'

    keep <- if (length(next.anc)) start + 0:(next.anc[1] - 1) else start:Nedge

    if (root.edge) {
        NewRootEdge <- phy$edge.length[root.node]
        root.edge <- root.edge - 1
        while (root.edge) {
            if (anc == ROOT) break
            i <- which(phy$edge[, 2] ==  anc)
            NewRootEdge <- NewRootEdge + phy$edge.length[i]
            root.edge <- root.edge - 1
            anc <- phy$edge[i, 1]
        }
        if (root.edge && !is.null(phy$root.edge))
            NewRootEdge <- NewRootEdge + phy$root.edge
        phy$root.edge <- NewRootEdge
    }

    phy$edge <- phy$edge[keep, ]
    if (wbl) phy$edge.length <- phy$edge.length[keep]
    TIPS <- phy$edge[, 2] <= Ntip
    tip <- phy$edge[TIPS, 2]
    phy$tip.label <- phy$tip.label[tip]
    ## keep the ordering so no need to reorder tip.label:
    phy$edge[TIPS, 2] <- order(tip)
    if (!is.null(phy$node.label))
        phy$node.label <- phy$node.label[sort(unique(phy$edge[, 1])) - Ntip]
    Ntip <- length(phy$tip.label)
    phy$Nnode <- dim(phy$edge)[1] - Ntip + 1L
    ## The block below renumbers the nodes so that they conform
    ## to the "phylo" format -- same as in root()
    newNb <- integer(Ntip + phy$Nnode)
    newNb[node] <- Ntip + 1L
    sndcol <- phy$edge[, 2] > Ntip
    ## executed from right to left, so newNb is modified before phy$edge:
    phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
        (Ntip + 2):(Ntip + phy$Nnode)
    phy$edge[, 1] <- newNb[phy$edge[, 1]]
    phy
}

drop.tip <-
    function(phy, tip, trim.internal = TRUE, subtree = FALSE, root.edge = 0)
{
    if (class(phy) != "phylo")
        stop('object "phy" is not of class "phylo"')
    Ntip <- length(phy$tip.label)
    NEWROOT <- ROOT <- Ntip + 1
    Nnode <- phy$Nnode
    Nedge <- dim(phy$edge)[1]
    if (subtree) {
        trim.internal <- TRUE
        tr <- reorder(phy, "pruningwise")
        N <- .C("node_depth", as.integer(Ntip), as.integer(Nnode),
                as.integer(tr$edge[, 1]), as.integer(tr$edge[, 2]),
                as.integer(Nedge), double(Ntip + Nnode),
                DUP = FALSE, PACKAGE = "ape")[[6]]
    }
    wbl <- !is.null(phy$edge.length)
    edge1 <- phy$edge[, 1] # local copies
    edge2 <- phy$edge[, 2] #
    keep <- !logical(Nedge)
    ## find the tips to drop:
    if (is.character(tip))
        tip <- which(phy$tip.label %in% tip)
    trms <- edge2 <= Ntip
    ## delete the terminal edges given by `tip':
    keep[match(tip, edge2)] <- FALSE

    if (trim.internal) {
        ## delete the internal edges that do not have descendants
        ## anymore (ie, they are in the 2nd column of `edge' but
        ## not in the 1st one)
        repeat {
            sel <- !(edge2 %in% edge1[keep]) & !trms & keep
            if (!sum(sel)) break
            keep[sel] <- FALSE
        }
        if (subtree) {
            ## keep the subtending edge(s):
            subt <- edge1 %in% edge1[keep] & edge1 %in% edge1[!keep]
            ## <FIXME> 'if (... ' needed below?
            if (any(subt)) keep[which(subt)] <- TRUE
        }
        if (root.edge && wbl) {
            degree <- tabulate(edge1[keep])
            if (degree[ROOT] == 1) {
                j <- integer(0) # will store the indices of the edges below the new root
                repeat {
                    i <- which(edge1 == NEWROOT & keep)
                    j <- c(i, j)
                    NEWROOT <- edge2[i]
                    degree <- tabulate(edge1[keep])
                    if (degree[NEWROOT] > 1) break
                }
                keep[j] <- FALSE
                if (length(j) > root.edge) j <- 1:root.edge
                NewRootEdge <- sum(phy$edge.length[j])
                if (length(j) < root.edge && !is.null(phy$root.edge))
                    NewRootEdge <- NewRootEdge + phy$root.edge
                phy$root.edge <- NewRootEdge
            }
        }
    }

    if (!root.edge) phy$root.edge <- NULL

    ## upate the tree; 1) drop the edges and tip labels
    phy$edge <- phy$edge[keep, ]
    if (wbl) phy$edge.length <- phy$edge.length[keep]
    phy$tip.label <- phy$tip.label[-tip]
    ## 2) renumber the remaining tips now
    TIPS <- phy$edge[, 2] <= Ntip
    ## keep the ordering so no need to reorder tip.label:
    phy$edge[TIPS, 2] <- order(phy$edge[TIPS, 2])
    Ntip <- length(phy$tip.label) # update Ntip

    ## make new tip labels if necessary
    if (subtree || !trim.internal) {
        new.trms <- !(phy$edge[, 2] %in% phy$edge[, 1]) & phy$edge[, 2] > Ntip
        node2tip <- phy$edge[new.trms, 2]
        if (subtree)
            new.lab <- paste("[", N[node2tip], "_tips]", sep = "")
        else {
            new.lab <-
              if (is.null(phy$node.label)) rep("NA", length(node2tip))
              else phy$node.label[node2tip - Ntip]
        }
        ## change the #'s in the edge matrix
        new.tip <- Ntip + 1:length(node2tip)
        phy$edge[new.trms, 2] <- new.tip
        phy$tip.label[new.tip] <- new.lab
        Ntip <- length(phy$tip.label)
        if (!is.null(phy$node.label))
            phy$node.label <- phy$node.label[-(node2tip - Ntip)]
    }
    phy$Nnode <- dim(phy$edge)[1] - Ntip + 1L # 3) update phy$Nnode

    ## The block below renumbers the nodes so that they conform
    ## to the "phylo" format -- same as in root()
    newNb <- integer(Ntip + phy$Nnode)
    newNb[NEWROOT] <- Ntip + 1L
    sndcol <- phy$edge[, 2] > Ntip
    ## executed from right to left, so newNb is modified before phy$edge:
    phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
        (Ntip + 2):(Ntip + phy$Nnode)
    phy$edge[, 1] <- newNb[phy$edge[, 1]]

    collapse.singles(phy)
}
