\name{cim}
\encoding{latin1}
\alias{cim}

\title{Clustered Image Maps (CIMs) ("heat maps")}

\description{
This function generates color-coded Clustered Image Maps (CIMs) ("heat maps") to 
represent "high-dimensional" data sets.
}

\usage{
cim(mat,
color = NULL,
row.names = TRUE,
col.names = TRUE,
row.sideColors = NULL,
col.sideColors = NULL,
row.cex = NULL,
col.cex = NULL,
threshold = 0,
cluster = "both",
dist.method = c("euclidean", "euclidean"),
clust.method = c("complete", "complete"),
cut.tree = c(0, 0),
transpose = FALSE,
symkey = TRUE,
keysize = c(1, 1),
zoom = FALSE,
main = NULL,
xlab = NULL,
ylab = NULL,
margins = c(5, 5),
lhei = NULL,
lwid = NULL,
comp=NULL,
center = TRUE,
scale = FALSE,
mapping = "XY",
legend= NULL,
save = NULL,
name.save = NULL)
}

\arguments{
  \item{mat}{numeric matrix of values to be plotted. Alternatively, an object of class inheriting from \code{"pca"}, \code{"spca"}, \code{"ipca"}, \code{"sipca"},   \code{"rcc"}, \code{"pls"}, \code{"spls"}, \code{"plsda"}, \code{"splsda"}, \code{"mlspls"} or \code{"mlsplsda"}.}

  \item{color}{a character vector of colors such as that generated by \code{\link{terrain.colors}},
    \code{\link{topo.colors}}, \code{\link{rainbow}}, \code{\link{color.jet}} or similar functions.}
    
  \item{row.names, col.names}{logical, should the name of rows and/or columns of \code{mat} be shown? If \code{TRUE}
    (defaults) \code{rownames(mat)} and/or \code{colnames(mat)} are used. Possible character vectors with 
    row and/or column labels can be used.}
       
    
  \item{row.sideColors}{(optional) character vector of length \code{nrow(mat)} containing the color 
    names for a vertical side bar that may be used to annotate the rows of \code{mat}.}
  	
  \item{col.sideColors}{(optional) character vector of length \code{ncol(mat)} containing 
    the color names for a horizontal side bar that may be used to annotate the columns of \code{mat}.}
    
    
  \item{row.cex, col.cex}{positive numbers, used as \code{cex.axis} in for the row or column 
  axis labeling. The defaults currently only use number of rows or columns, respectively.}
  
  \item{mapping}{character string indicating whether to map \code{"X"},
    \code{"Y"} or \code{"XY"}-association matrix. See Details.}
  
  \item{cluster}{character string indicating whether to cluster \code{"none"}, \code{"row"}, 
    \code{"column"} or \code{"both"}. Defaults to \code{"both"}.}
    
  \item{dist.method}{character vector of length two. The distance measure used in
    clustering rows and columns. Possible values are \code{"correlation"} for Pearson
    correlation and all the distances supported by \code{\link{dist}}, such as
    \code{"euclidean"}, etc.}
  
  \item{clust.method}{character vector of length two. The agglomeration method to be used for rows and columns.
    Accepts the same values as in \code{\link{hclust}} such as \code{"ward"}, \code{"complete"}, etc.}
    
  \item{cut.tree}{numeric vector of length two with components in [0,1]. The height proportions where 
    the trees should be cut for rows and columns, if these are clustered.}
  
  \item{comp}{atomic or vector of positive integers. The components to adequately account 
    for the data association. For a non sparse method, the similarity matrix is computed based on the variates and loading vectors of those specified components. For a sparse approach, the similarity matric is computed based on the variables selected on those specified components. See example. Defaults to \code{comp = 1:object$ncomp}.}
    
  \item{transpose}{logical indicating if the matrix should be transposed for plotting.
  Defaults to \code{FALSE}.}
  
  \item{center}{either a logical value or a numeric vector of length equal to the
    number of columns of \code{mat}. See \code{\link{scale}} function.}
  
  \item{scale}{either a logical value or a numeric vector of length equal to the
    number of columns of \code{mat}. See \code{\link{scale}} function.}
    
  \item{threshold}{numeric between 0 and 1. Variables with correlations below this threshold in absolute value are not plotted. To use only when mapping is "XY".}
    
  \item{symkey}{boolean indicating whether the color key should be made symmetric about 0. 
	Defaults to \code{TRUE}.}
  
  \item{keysize}{vector of length two, indicating the size of the color key.}	
  
  \item{zoom}{logical. Whether to use zoom for interactive zoom. See Details.}
  
  \item{main, xlab, ylab}{main, \eqn{x}- and \eqn{y}-axis titles; default to none.}
  
  \item{margins}{numeric vector of length two containing the margins (see \code{\link{par}(mar)}) 
	for column and row names respectively.}
  
  \item{lhei, lwid}{arguments passed to \code{layout} to divide the device up into two 
    (or three if a side color is drawn) rows and two columns, with the row-heights \code{lhei} 
    and the column-widths \code{lwid}.}
    
    \item{legend}{A list indicating the legend for each group, the color vector,  title of the legend and cex.}
    
  \item{save}{should the plot be saved? If so, argument to be set to either  \code{'jpeg'}, \code{'tiff'}, \code{'png'} or \code{'pdf'}.}
  \item{name.save}{character string for the name of the saved file.}
}

\details{ 
One matrix Clustered Image Map (default method) is a 2-dimensional visualization
of a real-valued matrix (basically \code{\link{image}(t(mat))}) with rows and/or
columns reordered according to some hierarchical clustering method to identify
interesting patterns. Generated dendrograms from clustering are added to the left
side and to the top of the image. By default the used clustering method for rows
and columns is the \emph{complete linkage} method and the used distance measure
is the distance \emph{euclidean}.  

In \code{"pca"}, \code{"spca"}, \code{"ipca"}, \code{"sipca"}, \code{"plsda"},
\code{"splsda"} and \code{"mlsplsda"} methods the \code{mat} matrix is \code{object$X}.

For the remaining methods, if \code{mapping = "X"} or \code{mapping = "Y"} the
\code{mat} matrix is \code{object$X} or \code{object$Y} respectively. If \code{mapping = "XY"}:
\itemize{ 
\item in \code{rcc} method, the matrix \code{mat} is created where element \eqn{(j,k)}
is the scalar product value between every pairs of vectors in dimension 
\code{length(comp)} representing the variables \eqn{X_j} and \eqn{Y_k} on the
axis defined by \eqn{Z_i} with \eqn{i} in \code{comp}, where \eqn{Z_i} is the
equiangular vector between the \eqn{i}-th \eqn{X} and \eqn{Y} canonical variate. 

\item in \code{pls}, \code{spls} and \code{mlspls} methods, if \code{object$mode} is
\code{"regression"}, the element \eqn{(j,k)} of the matrix \code{mat} 
is given by the scalar product value between every pairs of vectors in dimension
\code{length(comp)} representing the variables \eqn{X_j} and \eqn{Y_k} on the axis
defined by \eqn{U_i} with \eqn{i} in \code{comp}, where \eqn{U_i} is the \eqn{i}-th 
\eqn{X} variate. If \code{object$mode} is \code{"canonical"} then \eqn{X_j} and
\eqn{Y_k} are represented on the axis defined by \eqn{U_i} and \eqn{V_i} respectively.}

By default four components will be displayed in the plot. At the top left is the
color key, top right is the column dendogram, bottom left is the row dendogram,
bottom right is the image plot. When \code{sideColors} are provided, an
additional row or column is inserted in the appropriate location. This layout can
be overriden by specifiying appropriate values for \code{lwid} and \code{lhei}.
\code{lwid} controls the column width, and \code{lhei} controls the row height. 
See the help page for \code{\link{layout}} for details on how to use these arguments.

For visualization of "high-dimensional" data sets, a nice zooming tool was created. 
\code{zoom = TRUE} open a new device, one for CIM, one for zoom-out region and
define an interactive 'zoom' process: click two points at imagen map region by
pressing the first mouse button. It then draws a rectangle around the selected
region and zoom-out this at new device. The process can be repeated to zoom-out
other regions of interest. 

The zoom process is terminated by clicking the second button and selecting 'Stop' 
from the menu, or from the 'Stop' menu on the graphics window.
}

\value{
A list containing the following components:
  \item{M}{the mapped matrix used by \code{cim}.}
  \item{rowInd, colInd}{row and column index permutation vectors as returned 
    by \code{\link{order.dendrogram}}.}
  \item{ddr, ddc}{object of class \code{"dendrogram"} which describes the row and 
	column trees produced by \code{cim}.}
  \item{mat.cor}{the correlation matrix used for the heatmap. Available only when mapping = "XY".}
  \item{row.names, col.names}{character vectors with row and column labels used.}
  \item{row.sideColors, col.sideColors}{character vector containing the color 
    names for vertical and horizontal side bars used to annotate the rows and columns.}
}

\references{
Eisen, M. B., Spellman, P. T., Brown, P. O. and Botstein, D. (1998). 
Cluster analysis and display of genome-wide expression patterns. 
\emph{Proceeding of the National Academy of Sciences of the USA} \bold{95}, 14863-14868.

Weinstein, J. N., Myers, T. G., O'Connor, P. M., Friend, S. H., Fornace Jr., A. J., 
Kohn, K. W., Fojo, T., Bates, S. E., Rubinstein, L. V., Anderson, N. L., 
Buolamwini, J. K., van Osdol, W. W., Monks, A. P., Scudiero, D. A., Sausville, E. A., 
Zaharevitz, D. W., Bunow, B., Viswanadhan, V. N., Johnson, G. S., Wittes, R. E. 
and Paull, K. D. (1997). An information-intensive approach to the molecular 
pharmacology of cancer. \emph{Science} \bold{275}, 343-349.

Gonzalez I., Le Cao K.A., Davis M.J., Dejean S. (2012). Visualising associations between
paired 'omics' data sets. \emph{BioData Mining}; \bold{5}(1).
}

\author{Ignacio Gonzalez, Francois Bartolo, Kim-Anh Le Cao.}

\seealso{\code{\link{heatmap}}, 
\code{\link{hclust}}, \code{\link{plotVar}}, 
\code{\link{network}} and 

\url{http://mixomics.org/graphics/} for more details on all options available.}

\examples{
## default method: shwos cross correlation between 2 data sets
#------------------------------------------------------------------
data(nutrimouse)
X <- nutrimouse$lipid
Y <- nutrimouse$gene
  
cim(cor(X, Y), cluster = "none")
  
  
## CIM representation for objects of class 'rcc'
#------------------------------------------------------------------
nutri.rcc <- rcc(X, Y, ncomp = 3, lambda1 = 0.064, lambda2 = 0.008)

cim(nutri.rcc, xlab = "genes", ylab = "lipids", margins = c(5, 6))

#-- interactive 'zoom' available as below
\dontrun{

    cim(nutri.rcc, xlab = "genes", ylab = "lipids", margins = c(5, 6),
        zoom = TRUE)
    #-- select the region and "see" the zoom-out region


    #-- cim from X matrix with a side bar to indicate the diet
    diet.col <- palette()[as.numeric(nutrimouse$diet)]
    cim(nutri.rcc, mapping = "X", row.names = nutrimouse$diet,
        row.sideColors = diet.col, xlab = "lipids",
        clust.method = c("ward", "ward"), margins = c(6, 4))

    #-- cim from Y matrix with a side bar to indicate the genotype
    geno.col = color.mixo(as.numeric(nutrimouse$genotype))
    cim(nutri.rcc, mapping = "Y", row.names = nutrimouse$genotype,
        row.sideColors = geno.col, xlab = "genes",
        clust.method = c("ward", "ward"))

    #-- save the result as a jpeg file
    jpeg(filename = "test.jpeg", res = 600, width = 4000, height = 4000)
    cim(nutri.rcc, xlab = "genes", ylab = "lipids", margins = c(5, 6))
    dev.off()
}
## CIM representation for objects of class 'spca' (also works for sipca)
#------------------------------------------------------------------
data(liver.toxicity)
X <- liver.toxicity$gene

liver.spca <- spca(X, ncomp = 2, keepX = c(30, 30), scale = FALSE)

dose.col <- color.mixo(as.numeric(as.factor(liver.toxicity$treatment[, 3])))

# side bar, no variable names shown
cim(liver.spca, row.sideColors = dose.col, col.names = FALSE,
    row.names = liver.toxicity$treatment[, 3],
    clust.method = c("ward", "ward"))
    

## CIM representation for objects of class '(s)pls' 
#------------------------------------------------------------------
data(liver.toxicity)

X <- liver.toxicity$gene
Y <- liver.toxicity$clinic
liver.spls <- spls(X, Y, ncomp = 3,
                      keepX = c(20, 50, 50), keepY = c(10, 10, 10))


# default
cim(liver.spls)

\dontrun{
    # transpose matrix, choose clustering method
    cim(liver.spls, transpose = TRUE,   
        clust.method = c("ward", "ward"), margins = c(5, 7))

    # Here we visualise only the X variables selected 
    cim(liver.spls, mapping="X")

    # Here we should visualise only the Y variables selected
    cim(liver.spls, mapping="Y") 

    # Here we only visualise the similarity matrix between the variables by spls  
    cim(liver.spls, cluster="none")

    # plotting two data sets with the similarity matrix as input in the funciton 
    # (see our BioData Mining paper for more details)
    # Only the variables selected by the sPLS model in X and Y are represented
    cim(liver.spls, mapping="XY")

    # on the X matrix only, side col var to indicate dose
    dose.col <- color.mixo(as.numeric(as.factor(liver.toxicity$treatment[, 3])))
    cim(liver.spls, mapping = "X", row.sideColors = dose.col, 
        row.names = liver.toxicity$treatment[, 3])

    # CIM default representation includes the total of 120 genes selected, with the dose color
    # with a sparse method, show only the variables selected on specific components
    cim(liver.spls, comp = 1)
    cim(liver.spls, comp = 2)
    cim(liver.spls, comp = c(1,2))
    cim(liver.spls, comp = c(1,3))
}

## CIM representation for objects of class '(s)plsda' 
#------------------------------------------------------------------
# Setting up the Y outcome first
Y <- liver.toxicity$treatment[, 3]

liver.splsda <- splsda(X, Y, ncomp = 2, keepX = c(40, 30))

cim(liver.splsda, row.sideColors = dose.col, row.names = Y)


## CIM representation for objects of class splsda 'multilevel' 
# with a two level factor (repeated sample and time)
#------------------------------------------------------------------
data(vac18.simulated)
X <- vac18.simulated$genes
design <- data.frame(samp = vac18.simulated$sample,
                     time = vac18.simulated$time,
                     stim = vac18.simulated$stimulation)

res.2level <- multilevel(X, ncomp = 2, design = design,
                         keepX = c(120, 10), method = 'splsda')

#define colors for the levels: stimulation and time
stim.col <- c("darkblue", "purple", "green4","red3")
stim.col <- stim.col[as.numeric(design$stim)]
time.col <- c("orange", "cyan")[as.numeric(design$time)]


# The row side bar indicates the two levels of the facteor, stimulation and time.
# the sample names have been motified on the plot.
cim(res.2level, row.sideColors = cbind(stim.col, time.col), 
    row.names = paste(design$time, design$stim, sep = "_"),
    col.names = FALSE,
  #setting up legend:
    legend=list(legend = c(levels(design$time), levels(design$stim)), 
                col = c("orange", "cyan", "darkblue", "purple", "green4","red3"), 
                title = "Condition", cex = 0.7)
)


## CIM representation for objects of class spls 'multilevel' 
#------------------------------------------------------------------

data(liver.toxicity)
repeat.indiv <- c(1, 2, 1, 2, 1, 2, 1, 2, 3, 3, 4, 3, 4, 3, 4, 4, 5, 6, 5, 5,
                  6, 5, 6, 7, 7, 8, 6, 7, 8, 7, 8, 8, 9, 10, 9, 10, 11, 9, 9,
                  10, 11, 12, 12, 10, 11, 12, 11, 12, 13, 14, 13, 14, 13, 14,
                  13, 14, 15, 16, 15, 16, 15, 16, 15, 16)

# sPLS is a non supervised technique, and so we only indicate the sample repetitions 
# in the design (1 factor only here, sample)
# sPLS takes as an input 2 data sets, and the variables selected
design <- data.frame(sample = repeat.indiv) 
res.spls.1level <- multilevel(X = liver.toxicity$gene,
                              Y=liver.toxicity$clinic,
                              design = design,
                              ncomp = 2,
                              keepX = c(50, 50), keepY = c(5, 5),
                              method = 'spls', 
                              mode = 'canonical')

stim.col <- c("darkblue", "purple", "green4","red3")

# showing only the Y variables, and only those selected in comp 1 
cim(res.spls.1level, mapping="Y",
    row.sideColors = stim.col[factor(liver.toxicity$treatment[,3])], comp = 1,
    #setting up legend:
    legend=list(legend = unique(liver.toxicity$treatment[,3]), col=stim.col, 
    title = "Dose", cex=0.9))

\dontrun{
    # showing only the X variables, for all selected on comp 1 and 2 
    cim(res.spls.1level, mapping="X",
        row.sideColors = stim.col[factor(liver.toxicity$treatment[,3])], 
        #setting up legend:
        legend=list(legend = unique(liver.toxicity$treatment[,3]), col=stim.col, 
        title = "Dose", cex=0.9))


    # These are the cross correlations between the variables selected in X and Y.
    # The similarity matrix is obtained as in our paper in Data Mining
    cim(res.spls.1level, mapping="XY")
}
}

\keyword{multivariate}
\keyword{iplot}
\keyword{hplot}
\keyword{graphs}
\keyword{cluster}

