params <- list(my_css = "css/rmdformats.css") ## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(httptest) start_vignette("2") ## ----setup, echo=FALSE, message=FALSE, warning=FALSE-------------------------- if (!library(ctxR, logical.return = TRUE)){ devtools::load_all() } old_options <- options("width") ## ----echo=FALSE, warning=FALSE------------------------------------------------ # Used to visualize data in a variety of plot designs library(ggplot2) library(gridExtra) ## ----setup-print, echo = FALSE------------------------------------------------ # Redefining the knit_print method to truncate character values to 25 characters # in each column and to truncate the columns in the print call to prevent # wrapping tables with several columns. #library(ctxR) knit_print.data.table = function(x, ...) { y <- data.table::copy(x) y <- y[, lapply(.SD, function(t){ if (is.character(t)){ t <- strtrim(t, 25) } return(t) })] print(y, trunc.cols = TRUE) } registerS3method( "knit_print", "data.table", knit_print.data.table, envir = asNamespace("knitr") ) ## ----ctxR dtxsid data chemical, message=FALSE, eval=FALSE--------------------- # chemical_details_by_dtxsid <- get_chemical_details(DTXSID = 'DTXSID7020182') ## ----ctxR dtxcid data chemical, message=FALSE, eval=FALSE--------------------- # chemical_details_by_dtxcid <- get_chemical_details(DTXCID = 'DTXCID30182') ## ----ctxR batch data chemical, message=FALSE, eval=FALSE---------------------- # vector_dtxsid<- c("DTXSID7020182", "DTXSID9020112", "DTXSID8021430") # chemical_details_by_batch_dtxsid <- get_chemical_details_batch(DTXSID = vector_dtxsid) # # vector_dtxcid <- c("DTXCID30182", "DTXCID801430", "DTXCID90112") # chemical_details_by_batch_dtxcid <- get_chemical_details_batch(DTXCID = vector_dtxcid) ## ----ctxr dtxsid check, message=FALSE, eval=FALSE----------------------------- # dtxsid_check_true <- check_existence_by_dtxsid(DTXSID = 'DTXSID7020182') # dtxsid_check_false <- check_existence_by_dtxsid(DTXSID = 'DTXSID7020182f') ## ----ctxr dtxsid check batch, message=FALSE, eval=FALSE----------------------- # vector_dtxsid_and_non_dtxsid <- c('DTXSID7020182F', 'DTXSID7020182', 'DTXSID0020232F') # dtxsid_checks <- check_existence_by_dtxsid_batch(DTXSID = vector_dtxsid_and_non_dtxsid) ## ----ctxR property range chemical, message=FALSE, eval=FALSE------------------ # chemical_by_property_range <- get_chemical_by_property_range(start = 1.311, # end = 1.313, # property = 'Density') ## ----ctxR info chemical, message=FALSE, eval=FALSE---------------------------- # chemical_info <- get_chem_info(DTXSID = 'DTXSID7020182') ## ----ctxR fate data chemical, message=FALSE, eval=FALSE----------------------- # fate_by_dtxsid <- get_fate_by_dtxsid(DTXSID = 'DTXSID7020182') ## ----ctxR starting value chemical, message=FALSE, eval=FALSE------------------ # search_starts_with <- chemical_starts_with(word = 'DTXSID70201') ## ----ctxR exact value chemical, message=FALSE, eval=FALSE--------------------- # search_exact <- chemical_equal(word = 'DTXSID7020182') ## ----ctxR substring value chemical, message=FALSE, eval=FALSE----------------- # search_contains <- chemical_contains(word = 'DTXSID702018') ## ----ctxR mass range ms ready chemical, message=FALSE, eval=FALSE------------- # msready_by_mass <- get_msready_by_mass(start = 200.9, # end = 200.95) ## ----ctxR chemical formula ms ready chemical, message=FALSE, eval=FALSE------- # msready_by_formula <- get_msready_by_formula(formula = 'C16H24N2O5S') ## ----ctxR dtxcid ms ready chemical, message=FALSE, eval=FALSE----------------- # msready_by_dtxcid <- get_msready_by_dtxcid(DTXCID = 'DTXCID30182') ## ----ctxR types of chemical lists, message=FALSE, eval=FALSE------------------ # get_all_list_types() ## ----ctxR all list types chemical, message=FALSE, eval=FALSE------------------ # chemical_lists_by_type <- get_chemical_lists_by_type(type = 'federal') ## ----ctxR list by name chemical, message=FALSE, eval=FALSE-------------------- # public_chemical_list_by_name <- get_public_chemical_list_by_name(listname = 'CCL4') ## ----ctxR lists containing chemical, message=FALSE, eval=FALSE---------------- # lists_containing_chemical <- get_lists_containing_chemical(DTXSID = 'DTXSID7020182') ## ----ctxR chemicals-in-list-start, message=FALSE, eval=FALSE------------------ # chemicals_in_ccl4_start <- get_chemicals_in_list_start(list_name = 'CCL4', word = 'Bi') ## ----ctxR chemicals-in-list-exact, message=FALSE, eval=FALSE------------------ # chemicals_in_ccl4_exact <- get_chemicals_in_list_exact(list_name = 'BIOSOLIDS2021', word = 'Bisphenol A') ## ----ctxR chemicals-in-list-contain, message=FALSE, eval=FALSE---------------- # chemicals_in_ccl4_contain <- get_chemicals_in_list_contain(list_name = 'CCL4', word = 'Bis') ## ----ctxR chemical in list chemical, message=FALSE, eval=FALSE---------------- # chemicals_in_list <- get_chemicals_in_list(list_name = 'CCL4') ## ----ctxR mrv by dtxsid dtxcid chemical, message=FALSE, eval=FALSE------------ # chemical_mrv_by_dtxsid <- get_chemical_mrv(DTXSID = 'DTXSID7020182') # chemical_mrv_by_dtxcid <- get_chemical_mrv(DTXCID = 'DTXCID30182') ## ----ctxR mol by dtxsid dtxcid chemical, message=FALSE, eval=FALSE------------ # chemical_mol_by_dtxsid <- get_chemical_mol(DTXSID = 'DTXSID7020182') # chemical_mol_by_dtxcid <- get_chemical_mol(DTXCID = 'DTXCID30182') ## ----ctxR image by dtxsid dtxcid chemical, message=FALSE, eval=FALSE---------- # chemical_image_by_dtxsid <- get_chemical_image(DTXSID = 'DTXSID7020182') # chemical_image_by_dtxcid <- get_chemical_image(DTXCID = 'DTXCID30182') # chemical_image_by_smiles <- get_chemical_image(SMILES = 'CC(C)(C1=CC=C(O)C=C1)C1=CC=C(O)C=C1') # # countcolors::plotArrayAsImage(chemical_image_by_dtxsid) # countcolors::plotArrayAsImage(chemical_image_by_dtxcid) # countcolors::plotArrayAsImage(chemical_image_by_smiles) ## ----ctxR synonym by dtxsid chemical, message=FALSE, eval=FALSE--------------- # chemical_synonym <- get_chemical_synonym(DTXSID = 'DTXSID7020182') ## ------------------------------------------------------------------------------------------------- options(width = 100) ccl4_information <- get_public_chemical_list_by_name('CCL4') print(ccl4_information, trunc.cols = TRUE) natadb_information <- get_public_chemical_list_by_name('NATADB') print(natadb_information, trunc.cols = TRUE) ## ------------------------------------------------------------------------------------------------- ccl4 <- get_chemicals_in_list('ccl4') ccl4 <- data.table::as.data.table(ccl4) natadb <- get_chemicals_in_list('NATADB') natadb <- data.table::as.data.table(natadb) ## ----eval=FALSE----------------------------------------------------------------------------------- # dim(ccl4) # dim(natadb) # colnames(ccl4) # head(ccl4, 1) ## ------------------------------------------------------------------------------------------------- ccl4_phys_chem <- get_chem_info_batch(ccl4$dtxsid) natadb_phys_chem <- get_chem_info_batch(natadb$dtxsid) ## ----eval=FALSE----------------------------------------------------------------------------------- # dim(ccl4_phys_chem) # colnames(ccl4_phys_chem) ## ------------------------------------------------------------------------------------------------- ccl4_phys_chem[, unique(propertyId)] ccl4_phys_chem[, unique(propType)] ## ------------------------------------------------------------------------------------------------- ccl4_phys_chem[propertyId == 'boiling-point', .(Mean = mean(value))] ccl4_phys_chem[propertyId == 'boiling-point', .(Mean = mean(value)), by = .(propType)] ccl4_phys_chem[propertyId == 'melting-point', .(Mean = mean(value))] ccl4_phys_chem[propertyId == 'melting-point', .(Mean = mean(value)), by = .(propType)] ## ----fig.align='center',class.source="scroll-300",message=FALSE----------------------------------- head(ccl4_phys_chem[dtxsid == ccl4$dtxsid[[25]], ]) ccl4_phys_chem[dtxsid == ccl4$dtxsid[[25]], .(propType, value, unit), by = .(propertyId)] ccl4_phys_chem[dtxsid == ccl4$dtxsid[[25]], .(value, unit), by = .(propertyId, propType)] ccl4_phys_chem[dtxsid == ccl4$dtxsid[[25]], .(Mean_value = sapply(.SD, mean)), by = .(propertyId, unit), .SDcols = c("value")] ccl4_phys_chem[dtxsid == ccl4$dtxsid[[25]], .(Mean_value = sapply(.SD, mean)), by = .(propertyId, unit, propType), .SDcols = c("value")][order(propertyId)] ## ----fig.align='center',class.source="scroll-300",message=FALSE----------------------------------- ccl4_vapor_all <- ccl4_phys_chem[propertyId %in% 'vapor-pressure', .(mean_vapor_pressure = sapply(.SD, mean)), .SDcols = c('value'), by = .(dtxsid)] natadb_vapor_all <- natadb_phys_chem[propertyId %in% 'vapor-pressure', .(mean_vapor_pressure = sapply(.SD, mean)), .SDcols = c('value'), by = .(dtxsid)] ccl4_vapor_grouped <- ccl4_phys_chem[propertyId %in% 'vapor-pressure', .(mean_vapor_pressure = sapply(.SD, mean)), .SDcols = c('value'), by = .(dtxsid, propType)] natadb_vapor_grouped <- natadb_phys_chem[propertyId %in% 'vapor-pressure', .(mean_vapor_pressure = sapply(.SD, mean)), .SDcols = c('value'), by = .(dtxsid, propType)] ## ----fig.align='center',class.source="scroll-300",message=FALSE----------------------------------- summary(ccl4_vapor_all) summary(ccl4_vapor_grouped) summary(natadb_vapor_all) summary(natadb_vapor_grouped) ## ----fig.align='center',class.source="scroll-300",message=FALSE----------------------------------- ccl4_vapor_all[, log_transform_mean_vapor_pressure := log(mean_vapor_pressure)] ccl4_vapor_grouped[, log_transform_mean_vapor_pressure := log(mean_vapor_pressure)] natadb_vapor_all[, log_transform_mean_vapor_pressure := log(mean_vapor_pressure)] natadb_vapor_grouped[, log_transform_mean_vapor_pressure := log(mean_vapor_pressure)] ## ----fig.align='center', echo=FALSE, eval=FALSE--------------------------------------------------- # ggplot(ccl4_vapor_all, aes(log_transform_mean_vapor_pressure)) + # geom_boxplot() + # coord_flip() # ggplot(ccl4_vapor_grouped, aes(propType, log_transform_mean_vapor_pressure)) + # geom_boxplot() ## ----fig.align='center', echo=FALSE, eval=FALSE--------------------------------------------------- # ggplot(natadb_vapor_all, aes(log_transform_mean_vapor_pressure)) + # geom_boxplot() + coord_flip() # ggplot(natadb_vapor_grouped, aes(propType, log_transform_mean_vapor_pressure)) + # geom_boxplot() ## ----fig.align='center',class.source="scroll-300",message=FALSE----------------------------------- ccl4_vapor_grouped[, set := 'CCL4'] natadb_vapor_grouped[, set := 'NATADB'] all_vapor_grouped <- rbind(ccl4_vapor_grouped, natadb_vapor_grouped) vapor_box <- ggplot(all_vapor_grouped, aes(set, log_transform_mean_vapor_pressure)) + geom_boxplot(aes(color = propType)) vapor <- ggplot(all_vapor_grouped, aes(log_transform_mean_vapor_pressure)) + geom_boxplot((aes(color = set))) + coord_flip() ## ----fig.align='center', class.source="scroll-200", echo=FALSE------------------------------------ gridExtra::grid.arrange(vapor_box, vapor, ncol=2) ## ----fig.align='center',class.source="scroll-300",message=FALSE----------------------------------- ccl4_hlc_all <- ccl4_phys_chem[propertyId %in% 'henrys-law', .(mean_hlc = sapply(.SD, mean)), .SDcols = c('value'), by = .(dtxsid)] natadb_hlc_all <- natadb_phys_chem[propertyId %in% 'henrys-law', .(mean_hlc = sapply(.SD, mean)), .SDcols = c('value'), by = .(dtxsid)] ccl4_hlc_grouped <- ccl4_phys_chem[propertyId %in% 'henrys-law', .(mean_hlc = sapply(.SD, mean)), .SDcols = c('value'), by = .(dtxsid, propType)] natadb_hlc_grouped <- natadb_phys_chem[propertyId %in% 'henrys-law', .(mean_hlc = sapply(.SD, mean)), .SDcols = c('value'), by = .(dtxsid, propType)] ## ----fig.align='center',class.source="scroll-300",message=FALSE----------------------------------- summary(ccl4_hlc_all) summary(ccl4_hlc_grouped) summary(natadb_hlc_all) summary(natadb_hlc_grouped) ## ----fig.align='center',class.source="scroll-300",message=FALSE----------------------------------- ccl4_hlc_all[, log_transform_mean_hlc := log(mean_hlc)] ccl4_hlc_grouped[, log_transform_mean_hlc := log(mean_hlc)] natadb_hlc_all[, log_transform_mean_hlc := log(mean_hlc)] natadb_hlc_grouped[, log_transform_mean_hlc := log(mean_hlc)] ## ----fig.align='center',class.source="scroll-300",message=FALSE----------------------------------- ccl4_hlc_grouped[, set := 'CCL4'] natadb_hlc_grouped[, set := 'NATADB'] all_hlc_grouped <- rbind(ccl4_hlc_grouped, natadb_hlc_grouped) hlc_box <- ggplot(all_hlc_grouped, aes(set, log_transform_mean_hlc)) + geom_boxplot(aes(color = propType)) hlc <- ggplot(all_hlc_grouped, aes(log_transform_mean_hlc)) + geom_boxplot(aes(color = set)) + coord_flip() ## ----fig.align='center',class.source="scroll-200", echo=FALSE------------------------------------- gridExtra::grid.arrange(hlc_box, hlc, ncol=2) ## ----fig.align='center',class.source="scroll-300",message=FALSE----------------------------------- ccl4_boiling_all <- ccl4_phys_chem[propertyId %in% 'boiling-point', .(mean_boiling_point = sapply(.SD, mean)), .SDcols = c('value'), by = .(dtxsid)] natadb_boiling_all <- natadb_phys_chem[propertyId %in% 'boiling-point', .(mean_boiling_point = sapply(.SD, mean)), .SDcols = c('value'), by = .(dtxsid)] ccl4_boiling_grouped <- ccl4_phys_chem[propertyId %in% 'boiling-point', .(mean_boiling_point = sapply(.SD, mean)), .SDcols = c('value'), by = .(dtxsid, propType)] natadb_boiling_grouped <- natadb_phys_chem[propertyId %in% 'boiling-point', .(mean_boiling_point = sapply(.SD, mean)), .SDcols = c('value'), by = .(dtxsid, propType)] ## ----fig.align='center',class.source="scroll-300",message=FALSE----------------------------------- summary(ccl4_boiling_all) summary(ccl4_boiling_grouped) summary(natadb_boiling_all) summary(natadb_boiling_grouped) ## ----fig.align='center',class.source="scroll-300",message=FALSE----------------------------------- ccl4_boiling_grouped[, set := 'CCL4'] natadb_boiling_grouped[, set := 'NATADB'] all_boiling_grouped <- rbind(ccl4_boiling_grouped, natadb_boiling_grouped) boiling_box <- ggplot(all_boiling_grouped, aes(set, mean_boiling_point)) + geom_boxplot(aes(color = propType)) boiling <- ggplot(all_boiling_grouped, aes(mean_boiling_point)) + geom_boxplot(aes(color = set)) + coord_flip() ## ----fig.align='center',class.source="scroll-200", echo=FALSE------------------------------------- gridExtra::grid.arrange(boiling_box, boiling, ncol=2) ## ----breakdown, echo = FALSE, results = 'hide'-------------------------------- # This chunk will be hidden in the final product. It serves to undo defining the # custom print function to prevent unexpected behavior after this module during # the final knitting process and restores original option values. knit_print.data.table = knitr::normal_print registerS3method( "knit_print", "data.table", knit_print.data.table, envir = asNamespace("knitr") ) options(old_options) ## ----include=FALSE------------------------------------------------------------ end_vignette()