--- title: "Getting familiar with dMrs" author: "Paul Little, Reuben Adatorwovor" date: "Last Updated: `r format(Sys.Date(), '%m/%d/%Y')`" output: html_document: theme: journal highlight: tango toc: true toc_depth: 3 toc_float: collapsed: true smooth_scroll: true number_sections: true vignette: > %\VignetteIndexEntry{Getting familiar with ProjectH} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ```{r global_options,include = FALSE} knitr::opts_chunk$set(comment = NA,warning = FALSE) ``` # Introduction This **dMrs** package is designed to fit survival data to the corresponding manuscript. ```{r load_lib,echo = TRUE,eval = TRUE} req_packs = c("sqldf","relsurv","ggplot2","data.table","dMrs") for(pack in req_packs){ chk_pack = tryCatch(find.package(pack), warning = function(ww){NULL}, error = function(ee){NULL}) if( !is.null(chk_pack) ){ library(pack,character.only = TRUE) next } stop(sprintf("Install %s",pack)) } ``` # Application The code below will load `relsurv`'s working dataset `rdata` and import Slovenia's latest ratetable from HMD. ```{r load_data,echo = TRUE,eval = TRUE} data(rdata) rdata$sex = ifelse(rdata$sex == 1,"male","female") rdata[1:3,] # Slovenia population death tables female_fn = "./slovenia_female.txt" male_fn = "./slovenia_male.txt" slotab = transrate.hmd(female = female_fn,male = male_fn) dimnames(slotab) # Hazards calculated per day within age, year, sex strata slotab[1:3,1:3,] # Subset rdata for years captured by slotab dim(rdata) rdata$datediag_yr = as.Date(rdata$year) rdata$datediag_yr = as.character(rdata$datediag_yr) rdata$datediag_yr = sapply(rdata$datediag_yr, function(xx) strsplit(xx,"-")[[1]][1]) rdata$datediag_yr = rdata$datediag_yr table(rdata$datediag_yr) dimnames(slotab)[[2]] rdata = rdata[which(rdata$datediag_yr %in% dimnames(slotab)[[2]]),] dim(rdata) ``` # Relsurv Approach ```{r ana_relsurv,echo = TRUE,eval = TRUE} test = rs.surv(Surv(time,cens) ~ 1, ratetable = slotab,data = rdata, rmap = list(age = age*365.241)) str(test) COMP = data.frame(Time = test$time / 365.241, SurvEst = test$surv, SurvLow = test$lower, SurvHigh = test$upper) plot(COMP$Time,COMP$SurvEst, xlab = "Time (yrs)",type = "l", ylab = "Net Survival",main = "relsurv method", ylim = c(min(COMP$SurvLow),1)) lines(COMP$Time,COMP$SurvLow,lty = 2) lines(COMP$Time,COMP$SurvHigh,lty = 2) ``` # dMrs Approach Prep wDAT, working dataset's initial fields. ```{r} wDAT = rdata[,c("datediag_yr","time","cens","age","sex")] wDAT$delta = wDAT$cens wDAT$datediag_yr = as.integer(wDAT$datediag_yr) # time in years wDAT$time = wDAT$time / 365.241 wDAT$age = as.integer(wDAT$age) wDAT[1:5,] ``` Prep rDAT, the reference data.frame. ```{r} mm = fread(file = male_fn,data.table = FALSE) ff = fread(file = female_fn,data.table = FALSE) rDAT = rbind(data.frame(sex = "male",mm,stringsAsFactors = FALSE), data.frame(sex = "female",ff,stringsAsFactors = FALSE)) rDAT[1:5,] rDAT = rDAT[,c("Year","Age","sex","qx")] # table(rDAT$Age) rDAT$Age = ifelse(rDAT$Age == "110+",110,rDAT$Age) rDAT$Age = as.integer(rDAT$Age) rDAT[1:5,] ``` Perform matching to calculate log density and log cdf. ```{r} aa = refData_match(wDAT = wDAT,rDAT = rDAT) head(aa) wDAT = cbind(wDAT,aa) wDAT[1:3,] ``` Prep `dMrs` inputs. ```{r prep_dMrs} len1 = 10 len2 = 15 A_range = c(0.4,4) L_range = quantile(wDAT$time,c(0.5,1)) K_range = c(0.1,2) T_range = c(0.1,20) # Less fine grid for alpha/lambda A_ugrid = log(seq(A_range[1],A_range[2],length.out = len1)) L_ugrid = log(seq(L_range[1],L_range[2],length.out = len1)) # Finer grid for kappa/theta K_ugrid = log(seq(K_range[1],K_range[2],length.out = len2)) T_ugrid = log(seq(T_range[1],T_range[2],length.out = len2)) param_grid = list(A = A_ugrid, L = L_ugrid,K = K_ugrid,T = T_ugrid) param_grid ``` Run data fit with `dMrs`'s main function. ```{r opt_data} res = run_analyses(DATA = wDAT, param_grid = param_grid, vec_time = seq(0,100,0.5), ncores = 1, verb = TRUE, PLOT = TRUE) ``` Check `dMrs` output ```{r chk_out} # See all solutions OO = opt_sum(OPT = res) OO # Select best model idx = which(OO$BIC == max(OO$BIC)) idx # MLEs (unconstrained) res[[idx]]$RES$out # MLEs (constrained) res[[idx]]$RES$cout # Log-likelihood res[[idx]]$RES$LL # Gradient res[[idx]]$RES$GRAD # Hessian res[[idx]]$RES$HESS # Covariance matrix res[[idx]]$RES$COVAR ``` # Net-survival ```{r dmrs_surv} # Predicted survivals res[[idx]]$PRED[1:3,] plot_SURVs(run_ANA = res[idx],MULTIPLE = FALSE) ``` Compare `dMrs` vs `relsurv` ```{r comp_surv,echo = TRUE,eval = TRUE} tmp_pred = res[[idx]]$PRED out = sqldf(" select COMP.*, 'Pohar-Perme' as Method from COMP union select DMRS.time as Time, DMRS.surv as SurvEst, DMRS.low_surv2 as SurvLow, DMRS.high_surv2 as SurvHigh, 'dMrs' as Method from tmp_pred as DMRS ") my_themes = theme(text = element_text(size = 28), legend.position = "bottom", plot.title = element_text(hjust = 0.5), panel.background = element_blank(), panel.grid.major = element_line(colour = "grey50", linewidth = 0.5,linetype = "dotted"), panel.border = element_rect(colour = "black", fill = NA,linewidth = 1), legend.key.width = unit(1.5, "cm"), legend.key.size = unit(0.5, "inch"), legend.text = element_text(size = 20)) ggplot(data = out, mapping = aes(x = Time,y = SurvEst,group = Method,fill = Method)) + geom_line(linewidth = 1.25,alpha = 1, aes(color = Method),show.legend = FALSE) + geom_ribbon(mapping = aes(ymin = SurvLow, ymax = SurvHigh),alpha = 0.5) + ylim(c(0.4,1)) + xlim(0,20) + xlab("Time (yrs)") + ylab("Net Survival") + my_themes ``` # Session information ```{r session,echo = FALSE} sessionInfo() ``` # References