## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(message = FALSE)
knitr::opts_chunk$set(warning = FALSE)

library(HaDeX2)
library(microbenchmark)
library(dplyr)
library(ggplot2)

## ----echo = FALSE-------------------------------------------------------------
knitr::kable(data.frame(plot_type = c("comparison", "woods", 
                     "uptake curve", "diff uptake curve", 
                     "butterfly", "diff butterfly", 
                     "chiclet", "diff chiclet", 
                     "heatmap", "diff heatmap", 
                     "3D structure",
                     "volcano", 
                     "manhattan",
                     "uncertainty",
                     "coverage", "coverage heatmap",
                     "measurement variablity", "mass uptake curve"),
           HaDeX = c(TRUE, TRUE, 
                     TRUE, FALSE,
                     FALSE, FALSE,
                     FALSE, FALSE,
                     FALSE, FALSE,
                     FALSE, 
                     FALSE, 
                     FALSE, 
                     FALSE, 
                     TRUE, FALSE, 
                     FALSE, FALSE),
           HaDeX2 = c(TRUE, TRUE,
                      TRUE, TRUE,
                      TRUE, TRUE, 
                      TRUE, TRUE,
                      TRUE, TRUE,
                      TRUE,
                      TRUE,
                      TRUE,
                      TRUE,
                      TRUE, TRUE, 
                      TRUE, TRUE)
           ))

## ----echo=FALSE---------------------------------------------------------------
knitr::kable(data.frame(option = c("tooltips", "helpers", "tabular data", "times next to each other", "export to external tools"),
           HaDeX = c(TRUE, TRUE, TRUE, FALSE, FALSE), 
           HaDeX2 = c(TRUE, TRUE, TRUE, TRUE, TRUE)))

## ----echo=FALSE---------------------------------------------------------------
x <- data.frame(HaDeX2 = c("add_stat_dependency", "calculate_aggregated_diff_uptake", "calculate_aggregated_test_results", "calculate_aggregated_uptake", "calculate_auc", "calculate_back_exchange", "calculate_confidence_limit_values", "calculate_diff_uptake", "calculate_exp_masses", "calculate_exp_masses_per_replicate", "calculate_kinetics", "calculate_MHP", "calculate_p_value", "calculate_peptide_kinetics", "calculate_state_uptake", "create_aggregated_diff_uptake_dataset", "create_aggregated_uptake_dataset", "create_control_dataset", "create_diff_uptake_dataset", "create_kinetic_dataset", "create_overlap_distribution_dataset", "create_p_diff_uptake_dataset", 
"create_p_diff_uptake_dataset_with_confidence", "quality_control_dataset", "create_replicate_dataset", "create_state_comparison_dataset", "create_state_uptake_dataset", "create_uptake_dataset", "get_n_replicates", "get_peptide_sequence", "get_protein_coverage", 
"get_protein_redundancy", "get_replicate_list_sd", "get_residue_positions", "get_structure_color", 
"HaDeX_GUI", "HaDeXify", "install_GUI", "plot_aggregated_differential_uptake",  "plot_aggregated_uptake", "plot_aggregated_uptake_structure", "plot_amino_distribution", 
"plot_butterfly", "plot_chiclet", "plot_coverage", "plot_coverage_heatmap", 
"plot_differential", "plot_differential_butterfly", "plot_differential_chiclet",   
"plot_differential_uptake_curve", "plot_manhattan", "plot_overlap", "plot_overlap_distribution", "plot_peptide_charge_measurement", "plot_peptide_mass_measurement", "plot_position_frequency", "plot_quality_control", "plot_replicate_histogram", "plot_replicate_mass_uptake", "plot_state_comparison", "plot_uncertainty", "plot_uptake_curve", "plot_volcano", "prepare_hdxviewer_export", "read_hdx", "reconstruct_sequence", "show_aggregated_uptake_data", 
"show_coverage_heatmap_data", "show_diff_uptake_data", "show_diff_uptake_data_confidence", "show_overlap_data", "show_p_diff_uptake_data", "show_peptide_charge_measurement", "show_peptide_mass_measurement", "show_quality_control_data", "show_replicate_histogram_data", "show_summary_data", "show_uc_data", "show_uptake_data", "update_hdexaminer_file"),
HaDeX = c("add_stat_dependency", NA, NA,NA, NA, NA, "calculate_confidence_limit_values", NA, NA, NA, "calculate_kinetics",
NA, NA, NA, NA, NA, NA, NA, NA, NA, 

NA, NA, NA, NA, NA, NA, "calculate_state_deuteration", NA, NA, NA,
NA, NA, NA, NA, NA, "HaDeX_gui", NA, NA, NA, NA,
NA, NA, NA, NA, "plot_coverage", NA, "woods_plot", NA, NA, NA,
NA, NA, "plot_position_frequency", NA, NA, NA, NA, NA, NA, "comparison_plot",

NA, "plot_kinetics", NA, NA, "read_hdx", "reconstruct_sequence", NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA))

tab_res <- arrange(x, HaDeX, HaDeX2) %>%
  select(HaDeX, HaDeX2)

knitr::kable(tab_res, longtable = TRUE)


## ----message=FALSE, eval=FALSE, echo=TRUE-------------------------------------
#  library(HaDeX)
#  
#  dat_HaDeX <- HaDeX::read_hdx(system.file(package = "HaDeX2", "HaDeX/data/alpha.csv"))
#  dat_HaDeX2 <- HaDeX2::read_hdx(system.file(package = "HaDeX2", "HaDeX/data/alpha.csv"))
#  
#  version_benchmark <- microbenchmark(
#    list = alist(`HaDeX_1. Read input` = HaDeX::read_hdx(system.file(package = "HaDeX2",
#                                                                     "HaDeX/data/alpha.csv")),
#                 `HaDeX2_1. Read input` = HaDeX2::read_hdx(system.file(package = "HaDeX2",
#                                                                       "HaDeX/data/alpha.csv")),
#                 `HaDeX_2. Plot uptake curve` = {
#                   HaDeX::calculate_kinetics(dat = dat_HaDeX,
#                                             sequence = "GFGDLKSPAGL",
#                                             state = "Alpha_KSCN",
#                                             start = 1, end = 11,
#                                             time_in = 0, time_out = 1440) %>%
#                     HaDeX::plot_kinetics(kin_dat = .)},
#                 `HaDeX2_2. Plot uptake curve` = {
#                   HaDeX2::calculate_peptide_kinetics(dat = dat_HaDeX2,
#                                                      sequence = "GFGDLKSPAGL",
#                                                      state = "Alpha_KSCN",
#                                                      start = 1, end = 11,
#                                                      time_0 = 0, time_100 = 1440) %>%
#                     HaDeX2::plot_uptake_curve(uc_dat = .)},
#                 `HaDeX_3. Plot comparison` = {
#                   HaDeX::prepare_dataset(dat = dat_HaDeX,
#                                          in_state_first = "Alpha_KSCN_0",
#                                          chosen_state_first = "Alpha_KSCN_1",
#                                          out_state_first = "Alpha_KSCN_1440",
#                                          in_state_second = "ALPHA_Gamma_0",
#                                          chosen_state_second = "ALPHA_Gamma_1",
#                                          out_state_second = "ALPHA_Gamma_1440") %>%
#                     HaDeX::comparison_plot(calc_dat = .,
#                                            theoretical = FALSE,
#                                            relative = TRUE,
#                                            state_first = "Alpha_KSCN",
#                                            state_second = "ALPHA_Gamma")},
#                 `HaDeX2_3. Plot comparison` = {
#                   HaDeX2::create_state_comparison_dataset(dat = dat_HaDeX2,
#                                                           states = c("Alpha_KSCN",
#                                                                      "ALPHA_Gamma"),
#                                                           time_0 = 0, time_100 = 1440) %>%
#                     HaDeX2::plot_state_comparison(uptake_dat = .,
#                                                   theoretical = FALSE,
#                                                   fractional = TRUE,
#                                                   time_t = 1)},
#                 `HaDeX_4. Plot Woods` = {
#                   HaDeX::prepare_dataset(dat = dat_HaDeX,
#                                          in_state_first = "Alpha_KSCN_0",
#                                          chosen_state_first = "Alpha_KSCN_1",
#                                          out_state_first = "Alpha_KSCN_1440",
#                                          in_state_second = "ALPHA_Gamma_0",
#                                          chosen_state_second = "ALPHA_Gamma_1",
#                                          out_state_second = "ALPHA_Gamma_1440") %>%
#                     HaDeX::woods_plot(calc_dat = .,
#                                       theoretical = FALSE,
#                                       relative = TRUE,
#                                       confidence_limit = 0.98,
#                                       confidence_limit_2 = 0.98)},
#                 `HaDeX2_4. Plot Woods` = {
#                   HaDeX2::calculate_diff_uptake(dat = dat_HaDeX2,
#                                                 states = c("Alpha_KSCN", "ALPHA_Gamma"),
#                                                 time_t = 1, time_0 = 0, time_100 = 1440) %>%
#                     HaDeX2::plot_differential(diff_uptake_dat = .,
#                                               time_t = 1,
#                                               theoretical = FALSE,
#                                               fractional = TRUE,
#                                               show_houde_interval = TRUE,
#                                               confidence_level = 0.98)},
#                 `HaDeX_5. Calculate confidence limit` = {
#                   HaDeX::prepare_dataset(dat = dat_HaDeX,
#                                          in_state_first = "Alpha_KSCN_0",
#                                          chosen_state_first = "Alpha_KSCN_1",
#                                          out_state_first = "Alpha_KSCN_1440",
#                                          in_state_second = "ALPHA_Gamma_0",
#                                          chosen_state_second = "ALPHA_Gamma_1",
#                                          out_state_second = "ALPHA_Gamma_1440") %>%
#                     HaDeX::calculate_confidence_limit_values(calc_dat = .,
#                                                              confidence_limit = 0.98,
#                                                              theoretical = FALSE,
#                                                              relative = TRUE)},
#                  `HaDeX2_5. Calculate confidence limit` = {
#                   HaDeX2::calculate_diff_uptake(dat = dat_HaDeX2,
#                                                 states = c("Alpha_KSCN", "ALPHA_Gamma"),
#                                                 time_0 = 0, time_100 = 1440, time_t = 1) %>%
#                     HaDeX2::calculate_confidence_limit_values(diff_uptake_dat = .,
#                                                               confidence_level  = 0.98,
#                                                               theoretical = FALSE,
#                                                               fractional = TRUE)},
#                 `HaDeX_6. Reconstruct sequence` = HaDeX::reconstruct_sequence(dat = dat_HaDeX),
#                 `HaDeX2_6. Reconstruct sequence` =  HaDeX2::reconstruct_sequence(dat = dat_HaDeX2)
#  
#                 )
#  )
#  

## ----message = FALSE, echo=FALSE, results='asis', out.width='100%', fig.cap="Benchmark results.", fig.height=6----

version_benchmark <- readRDS(file = "version_benchmark.rds")

data.frame(version_benchmark) %>% 
  mutate(tool = sapply(strsplit(as.character(expr), "_"), first),
         task = sapply(strsplit(as.character(expr), "_"), last),
         time = time/10e5) %>% 
  ggplot(aes(x = tool, y = time)) +
  geom_boxplot() +
  scale_x_discrete("") +
  scale_y_continuous("Time [ms]", breaks = scales::pretty_breaks(n = 4)) +
  facet_wrap(~ task, scales = "free_y", ncol = 2)

  

## ----echo=FALSE---------------------------------------------------------------
summary(version_benchmark, unit = "ms") %>% 
  mutate(tool = sapply(strsplit(as.character(expr), "_"), first),
         task = sapply(strsplit(as.character(expr), "_"), last)) %>% 
  select(tool, task, median) %>% 
  tidyr::pivot_wider(names_from = tool, values_from = median) %>% 
  mutate(`Runtime ratio` = HaDeX2/HaDeX) %>% 
  knitr::kable(caption = "Median speed of function execution (in miliseconds).")

