## ----global_options, include=FALSE-------------------------------------------- knitr::opts_chunk$set(dpi = 100, echo = TRUE, warning = FALSE, message = FALSE) ## ----load_lib----------------------------------------------------------------- ## required python modules: autograd, numpy, scipy, sklearn ## To properly install packages, run: # install.packages("BiocManager") # BiocManager::install("mixOmics") # BiocManager::install("phyloseq") # install.packages("mixKernel") library(mixOmics) library(mixKernel) ## ----input_data--------------------------------------------------------------- data(TARAoceans) # more details with: ?TARAOceans # we check the dimension of the data: lapply(list("phychem" = TARAoceans$phychem, "pro.phylo" = TARAoceans$pro.phylo, "pro.NOGs" = TARAoceans$pro.NOGs), dim) ## ----compute_kernel, echo=TRUE------------------------------------------------ phychem.kernel <- compute.kernel(TARAoceans$phychem, kernel.func = "linear") pro.phylo.kernel <- compute.kernel(TARAoceans$pro.phylo, kernel.func = "abundance") pro.NOGs.kernel <- compute.kernel(TARAoceans$pro.NOGs, kernel.func = "abundance") # check dimensions dim(pro.NOGs.kernel$kernel) ## ----cim_kernel, fig.width=4-------------------------------------------------- cim.kernel(phychem = phychem.kernel, pro.phylo = pro.phylo.kernel, pro.NOGs = pro.NOGs.kernel, method = "square") ## ----meta_kernel-------------------------------------------------------------- meta.kernel <- combine.kernels(phychem = phychem.kernel, pro.phylo = pro.phylo.kernel, pro.NOGs = pro.NOGs.kernel, method = "full-UMKL") ## ----KPCA--------------------------------------------------------------------- kernel.pca.result <- kernel.pca(meta.kernel, ncomp = 10) ## ----plotIndiv_PCA, fig.keep='all'-------------------------------------------- all.depths <- levels(factor(TARAoceans$sample$depth)) depth.pch <- c(20, 17, 4, 3)[match(TARAoceans$sample$depth, all.depths)] plotIndiv(kernel.pca.result, comp = c(1, 2), ind.names = FALSE, legend = TRUE, group = as.vector(TARAoceans$sample$ocean), col = c("#f99943", "#44a7c4", "#05b052", "#2f6395", "#bb5352", "#87c242", "#07080a", "#92bbdb"), pch = TARAoceans$sample$depth, legend.title = "Ocean / Sea", title = "Projection of TARA Oceans stations", size.title = 10, legend.title.pch = "Depth") ## ----tune_pca----------------------------------------------------------------- plot(kernel.pca.result) ## ----permute_kpca------------------------------------------------------------- head(TARAoceans$taxonomy[ ,"Phylum"], 10) head(TARAoceans$GO, 10) # here we set a seed for reproducible results with this tutorial set.seed(17051753) kernel.pca.result <- kernel.pca.permute(kernel.pca.result, ncomp = 1, phychem = colnames(TARAoceans$phychem), pro.phylo = TARAoceans$taxonomy[, "Phylum"], pro.NOGs = TARAoceans$GO) ## ----display_var-------------------------------------------------------------- plotVar.kernel.pca(kernel.pca.result, ndisplay = 10, ncol = 3) ## ----proteobacteria_display, fig.keep='all'----------------------------------- selected <- which(TARAoceans$taxonomy[, "Phylum"] == "Proteobacteria") proteobacteria.per.sample <- apply(TARAoceans$pro.phylo[, selected], 1, sum) / apply(TARAoceans$pro.phylo, 1, sum) colfunc <- colorRampPalette(c("royalblue", "red")) col.proteo <- colfunc(length(proteobacteria.per.sample)) col.proteo <- col.proteo[rank(proteobacteria.per.sample, ties = "first")] plotIndiv(kernel.pca.result, comp = c(1, 2), ind.names = FALSE, legend = FALSE, col = col.proteo, pch = TARAoceans$sample$depth, legend.title = "Ocean / Sea", title = "Representation of Proteobacteria abundance", legend.title.pch = "Depth") ## ----temperature_display, fig.keep='all'-------------------------------------- col.temp <- colfunc(length(TARAoceans$phychem[, 4])) col.temp <- col.temp[rank(TARAoceans$phychem[, 4], ties = "first")] plotIndiv(kernel.pca.result, comp = c(1, 2), ind.names = FALSE, legend = FALSE, col = col.temp, pch = TARAoceans$sample$depth, legend.title = "Ocean / Sea", title = "Representation of mean temperature", legend.title.pch = "Depth") ## ----dependencies------------------------------------------------------------- have_depend <- reticulate::py_module_available("autograd") & reticulate::py_module_available("scipy") & reticulate::py_module_available("numpy") & reticulate::py_module_available("sklearn") ## ----select_ukfs-------------------------------------------------------------- if (have_depend) { ukfs.res <- select.features(TARAoceans$pro.phylo, kx.func = "bray", lambda = 1, keepX = 5, nstep = 1) selected <- sort(ukfs.res, decreasing = TRUE, index.return = TRUE)$ix[1:5] TARAoceans$taxonomy[selected, ] } ## ----comput_correlation_graph, eval=FALSE------------------------------------- # library("MASS") # library("igraph") # library("correlationtree") # # pro.phylo.alist <- data.frame("names" = colnames(TARAoceans$pro.phylo), # t(TARAoceans$pro.phylo)) # L <- mat2list(df2mat(pro.phylo.alist, 1)) # corr.mat <- as.matrix(cross_cor(L, remove = TRUE)) # pro.phylo.graph <- graph_from_adjacency_matrix(corr.mat, # mode = "undirected", # weighted = TRUE) # Lg <- laplacian_matrix(pro.phylo.graph, sparse=TRUE) ## ----select_ukfsg------------------------------------------------------------- if (have_depend) { load(file = file.path(system.file(package = "mixKernel"), "loaddata", "Lg.rda")) ukfsg.res <- select.features(TARAoceans$pro.phylo, kx.func = "bray", lambda = 1, method = "graph", Lg = Lg, keepX = 5, nstep = 1) selected <- sort(ukfsg.res, decreasing = TRUE, index.return = TRUE)$ix[1:5] TARAoceans$taxonomy[selected, ] } ## ----session_information------------------------------------------------------ sessionInfo()