% Packages \documentclass[a4paper]{article} \usepackage{a4wide} \setlength{\parskip}{0.7ex plus0.1ex minus0.1ex} \setlength{\parindent}{0em} %\documentclass{article} \usepackage[american]{babel} \usepackage{floatrow} \usepackage{url} \usepackage{amsmath} \usepackage{amssymb} \usepackage{enumitem} \usepackage{authblk} \usepackage{caption} % Page settings \usepackage[margin=1in]{geometry} %\setlength{\textwidth}{15cm} %\setlength{\textheight}{22cm} %\setlength{\oddsidemargin}{1.5cm} %\setlength{\evensidemargin}{1.5cm} %\setlength{\topmargin}{-1.5cm} % References \usepackage[backend=bibtex, sorting=none]{biblatex} \bibliography{references} \usepackage{csquotes} \begin{filecontents*}{references.bib} @article{Richards, title = {A flexible growth function for empirical use.}, author = {FJ, Richards}, year = {1959}, journal={J Exp Bot.}, volume={10}, pages={290--300} } @article{Giraldo, author={J, Giraldo and NM, Vivas and E, Vila and A, Badia}, title={Assessing the (a)symmetry of concentration-effect curves: empirical versus mechanistic models}, year={2002}, journal={Pharmacol Ther.}, volume={95}, number={1}, pages={21--45} } @article{Motulsky, author={HJ, Motulsky and RE, Brown}, title={Assessing the (a)symmetry of concentration-effect curves: empirical versus mechanistic models}, year={2006}, journal={BMC Bioinformatics}, volume={9}, pages={7--123} } @article{Levasseur1998, author = {Levasseur, Laurence M. and Faessel, H\'{e}l\`{e}ne and Slocum, Harry K. and Greco, William R.}, journal = {Journal of Pharmacokinetics and Biopharmaceutics}, month = dec, number = {6}, pages = {717--733}, title = {Implications for Clinical Pharmacodynamic Studies of the Statistical Characterization of an In Vitro Antiproliferation Assay}, volume = {26}, year = {1998} } @article{Willet1988, author = {Willett, John B and Singer, Judith D}, journal = {The American Statistician}, month = dec, number = {3}, pages = {236-238}, title = {Another Cautionary Note about R2: Its Use in Weighted Least-Squares Regression Analysis}, volume = {42}, year = {1988} } @misc{NCI, url = {https://wiki.nci.nih.gov/display/NCIDTPdata/NCI-60+Growth+Inhibition+Data} } \end{filecontents*} \makeatletter \def\blx@maxline{77} \makeatother % Authors \author[1,2]{Frederic Commo} \author[1]{Briant M. Bot} \affil[1]{Sage Bionetworks, Fred Hutchinson Cancer Research Center, Seattle, Washington} \affil[2]{INSERM U981, Institut Gustave Roussy, 114 rue Edouard Vaillant, 94805 Villejuif, France} \renewcommand\Authands{ and } \renewcommand\Affilfont{\fontsize{9}{10.8}\itshape} \begin{document} %\VignetteIndexEntry{R package nplr} %\VignetteEngine{knitr::knitr} \title{R package \emph{nplr} \\ \emph{n-parameter logistic regressions}} \maketitle % Chunk setup <>= # fig.width=5.5, require(knitr) opts_chunk$set(fig.align='center', fig.height=5, warning=FALSE, dev='pdf', prompt=TRUE, comment=NA, highlight=FALSE, tidy=FALSE) @ \section{Introduction} \subsection{Overview} In in-vitro experiments, the aim of drug response analyses is usually to estimate the drug concentration required to reach a given cell line growth inhibition rate - typically the 50\% inhibitory concentration ($IC_{50}$), which inhibits 50\% of the proliferation, compared with an untreated control. This estimation can be achieved by modeling the inhibition rate observed under a range of drug concentrations. Once the model is fitted, the x values (drug concentrations) can be estimated from the y values (inhibition rates) by simply inverting the function. \par The most commonly used model for drug response analysis is the Richards' equation \cite{Richards}, also refered to as a 5-parameter logistic regression \cite{Giraldo}: $$y = B + \dfrac{T - B}{\left [1 + 10^{b(x_{mid} - x)} \right ]^s}$$ \\ where $B$ and $T$ are the bottom and top asymptotes, and $b$, $x_{mid}$ and $s$ are the Hill slope, the x-coordinate at the inflexion point and an asymetric coefficient, respectively. \par \vspace{\baselineskip} The \texttt{nplr} package is based on the full 5-parameter model, where all of the parameters are optimized, simultaneously, using a Newton-Raphson method (\texttt{nlm}, R package \texttt{stats}). The objective function to minimize is a weighted sum of squared errors: $$ sse(Y) = \Sigma_i{w_i.(\hat{y}_i - y_i)^2}, i=1,...,n $$ \\ The weights, $wi$, used in the objective function can be computed using 3 possible methods, as follows: \begin{itemize} \renewcommand\labelitemi{--} \item{residuals weights:} $w_i = \left (\frac{1}{res_i}\right )^p, i=1,...,n\ values$ \item{standard weights:} $w_{ir} = \frac{1}{Var(y_r)}, r=1,...,r\ replicated\ conditions$ \item{general weights:} $w_{i} = \left (\frac{1}{\hat{y}_i}\right )^p, i=1,...,n\ values$ \end{itemize} where $p$ is a tuning parameter. The \texttt{standard weights} and the \texttt{general weights} methods are described in \cite{Motulsky,Levasseur1998}. \texttt{nplr} provides several options in order to compute flexible weighted n-parameter logistic regression: \texttt{npars="all"} can be explicitly specified, from 2 to 5, or set to \texttt{all}. In that case, all the possible models are evaluated, and the optimal one is retunred, with respect to the minimal error returned by \texttt{nlm}. \par The final model performance is estimated by a weighted and non-weighted standard error, as well as the weighted and non-weighted goodness-of-fit: \begin{itemize} \renewcommand\labelitemi{--} \item{standard error:} $ \frac{1}{(n-2)}.\Sigma_i{(\hat{y}_i - y_i)^2}, i=1,...,n $ \item{weighted standard error:} $ \frac{1}{(n-2)}.\Sigma_i{w_i.(\hat{y}_i - y_i)^2}, i=1,...,n $ \item{goodness-of-fit:} $ 1 - \frac{SS_{res}}{SS_{tot}} $ \item{weighted goodness-of-fit:} $ 1 - \frac{\Sigma_i{w_i(\hat{y}_i - y_i)^2}}{\Sigma_i{(y_i - \overline{y})^2}} $ \end{itemize} \subsection{Functions in \texttt{nplr}} The main function is simply \texttt{nplr()}. It requires 2 main arguments: a vector of \texttt{x} and a vector of \texttt{y}. Several other arguments have default values. \par The $npars$ argument allows a user to run specific n-parameter models, n from 2 to 5, while the default value, \texttt{npars="all"}, asks the function to test which model fits the best the data, according to a weighted Goodness-of-Fit estimator.\\ In some situations, the x values may need to be log-transformed, e.g. x is provided as original drug concentrations. In such case, setting \texttt{useLog=TRUE} in \texttt{nplr()} will apply a $Log_{10}$ transformation on the x values. \par The \texttt{nplr()} function has been optimized for fitting curves on y-values passed as proportions of control, between 0 to 1. If data are supplied as original response values, e.g. optic density measurements, the \texttt{convertToProp()} function may be helpful. In drug-response curve fitting, a good practice consists in adjusting the signals on a $T_0$ and a $control$ (Ctrl) values. Providing this values, the proportion values, $y_p$, are computed as: $$y_p = \frac{y - T_0}{Ctrl - T_0}$$ where $y$, $T_0$ and $Ctrl$ are the observed values, the 'time zero' and the 'untreated control', respectively. \par Note that if neither $T_0$ nor $Ctrl$ are provided, \texttt{convertToProp()} will compute the proportions with respect to the $min$ and $max$ of $y$. In that case, the user should be aware that $y = 0.5$ may not correspond to a $IC_{50}$, but rather to a $EC_{50}$ (the half-effect between the maximum and the minimum of the observed effects). \par \vspace{\baselineskip} In a drug-response (or progression) curve fitting context, typical needs are to invert the function in order to estimate the x value, e.g. the $IC_50$, given a $y$ value, e.g. the 0.5 survival rate. To do so, the implemented \texttt{getEstimates()} method takes 2 arguments: the model (an instance of the class nplr), and one (or a vector of) target(s). \texttt{getEstimates()} returns the corresponding x values and their estimated confidence intervals, as specified by \texttt{conf.level}. \section{Examples} The examples below use some samples of the NCI-60 Growth Inhibition Data. The full data can be downloaded at \cite{NCI}. For the purpose of the demonstration, the supplied drug concentrations have been re-exponentiated. \subsection{Example 1} \subsubsection{Fitting a model} <>= require(nplr) @ The first example fits a simple drug-response curve: the PC-3 cell line treated with Thioguanine, 19 points without replicates. <>= path <- system.file("extdata", "pc3.txt", package="nplr") pc3 <- read.delim(path) np1 <- nplr(x=pc3$CONC, y=pc3$GIPROP) @ Calling the object returns the fitting summary for the model. <>= np1 @ \subsubsection{Visualizing the model} A specific \texttt{plot()} function has been implemented in order to visualize the results. <>= plot(np1, cex.main = 1.2, main="PC-3 cell line. Response to Thioguanine") @ <>= @ \vspace{.5in} This function has several predefined graphical parameters, and some of them can be overwritten. However, a convenient way to draw simplest or customized plots is shown in the example below: <>= op <- par(no.readonly=TRUE) par(mfrow=c(1,2)) plot(np1, pcol="grey40", lcol="skyblue1", showEstim=.5, showInfl=TRUE, main="Default 'nplr' plot", cex.main=1.5) x1 <- getX(np1); y1 <- getY(np1) x2 <- getXcurve(np1); y2 <- getYcurve(np1) plot(x1, y1, pch=15, cex=2, col="tan1", xlab=expression(Log[10](conc)), ylab="Prop", main="Custom plot", cex.main=1.5) lines(x2, y2, lwd=5, col="seagreen4") par(op) @ <>= @ \subsubsection{Accessing the performances} Once the model is built, several accessor functions allow to get access to the performances of the model, and its parameters. <>= getGoodness(np1) @ <>= getStdErr(np1) @ <>= getPar(np1) @ Here, the 5-parameter model have been chosen as it showed better performances, according to the goodness-of-fit (\texttt{npar=5}). The optimal values for the parameters are reported in \texttt{params}. \subsubsection{Estimating the drug concentrations} The purpose of such fitting is to estimate the response to the drug. To do so, \texttt{nplr} provides 2 estimates: the area under the curve (AUC), and the drug concentration for a given response to reach. \par The \texttt{getAUC()} function returns the area under the curve (AUC) estimated by the trapezoid rule and the Simpson's rule, while \texttt{getEstimates()} invert the function and returns the estimated concentration for a given response. If no target is specified, the default output is a table of the x values corresponding to responses from 0.9 to 0.1. <>= getAUC(np1) @ <>= getEstimates(np1) @ A single value (a target), or a vector of values, can be passed to \texttt{getEstimates()}, and a confidence level can be specified (by default, conf.level is set to .95). <>= getEstimates(np1, .5) @ <>= getEstimates(np1, c(.25, .5, .75), conf.level=.90) @ \subsection{Example 2} The next example analyses a drug-response experiment with replicated drug concentrations: the MCF-7 cell line treated with Irinotecan. <>= path <- system.file("extdata", "mcf7.txt", package="nplr") mcf7 <- read.delim(path) np2 <- nplr(x=mcf7$CONC, y=mcf7$GIPROP) @ <>= plot(np2, showSDerr = TRUE, lwd = 4 , cex.main=1.25, main="Cell line MCF-7. Response to Irinotecan") @ \vspace{.5in} As there are replicates, we can compare the effect of the different weighted methods: the default method is \texttt{residuals weights, "res"}. A \texttt{no-weight} condition can be tested by setting the \texttt{LPweight} argument to 0: The vector of weights is then just a vector of 1's. <>= x <- mcf7$CONC y <- mcf7$GIPROP noweight <- nplr(x, y, LPweight=0, silent=TRUE) sdw <- nplr(x, y, method="sdw", silent=TRUE) gw <- nplr(x, y, method="sdw", LPweight=1.5, silent=TRUE) @ <>= par(mfrow=c(2,2)) plot(np2, showEstim=.5, main="residuals weights") plot(noweight, showEstim=.5, main="No weight") plot(sdw, showEstim=.5, main="Stdev weights") plot(noweight, showEstim=.5, main="general weights") par(op) @ \vspace{.5in} Note that the curves do not seem to change dramatically. However, the different weights can give different performances. \subsection{Example 3} \subsubsection{Fitting a Progression/Time model} This last example illustrates a Progression/Time experiment: these are simulated data.\\ <>= path <- system.file("extdata", "prog.txt", package="nplr") prog <- read.delim(path) @ Here, the progression values are given in some unknown unit, and the x values are \texttt{Time} in hours. So we don't need to use a $Log_{10}$ transformation. Let us assume that we have access to the $T_0$ and the $control$ values. We can use \texttt{convertToProp()} in order to convert the y values to proportions. <>= x <- prog$time yp <- convertToProp(prog$prog, T0 = 5, Ctrl = 102) np3 <- nplr(x, yp, useLog=FALSE) @ When progression is at stake, it may be interesting to get the coordinates of the inflexion point, as it corresponds to the point where the slope (the progression) is maximal. <>= getInflexion(np3) @ <>= plot(np3, showInfl=TRUE, xlab="Time (hrs)", cex.main=1.5, cex.lab=1.2, ylab="Prop. of control", main="Progression") @ \subsubsection{Evaluating the number of parameters} When a 5-p logistic regression is used, and because of the asymetric parameter, the curve is no longer symetrical around its inflexion point. Here is an illustration of the impact of the number of parameters on the fitting. <>= plot(x, yp, pch=19, col="grey" , cex.main=1.5, cex.lab=1.2, main="The n-parameter effect", xlab="Time", ylab="Progression") le <- c() for(i in 2:5){ test <- nplr(x, yp, npars = i, useLog = FALSE) lines(getXcurve(test), getYcurve(test), lwd = 2, col = i) goodness <- getGoodness(test) gof <- goodness$gof le <- c(le, sprintf("%s-P: GOF=%s", i, round(gof, 4))) } legend("bottomright", legend=le, lwd=2, col=2:5, bty="n") @ Note that the 5-P model may not be always the best choice. \subsection{Superimposing multiple curves} When multiple cell lines, or multiple compounds are compared, it can be useful to superimpose the response curves on a single plot. <>= path <- system.file("extdata", "multicell.tsv", package="nplr") multicell <- read.delim(path) # Computing models (to be stored in a list) cellsList <- split(multicell, multicell$cell) Models <- lapply(cellsList, function(tmp){ nplr(tmp$conc, tmp$resp, silent = TRUE) }) # Visualizing overlay(Models, xlab = expression(Log[10](Conc.)), ylab = "Resp.", main="Superimposing multiple curves", cex.main=1.5, lwd = 3) @ \subsection{Web-server version} \texttt{nplr} is available as a free online application at\\ \url{https://fredcommo.shinyapps.io/curveFitter}\\ This application is built on top of \texttt{shiny} and \texttt{nplr}, and allows a user to upload a file as the same form as those provided as examples in the \texttt{nplr} package. A \texttt{multicell.tsv} example file is also available for download through the App (click on \texttt{Example file}).\\ Plot and results can be exported after the models have been built. \begin{center} \makebox[\textwidth]{\includegraphics[width=\textwidth]{nplrApp.pdf}} \captionof{figure}{\texttt{nplr} at https://fredcommo.shinyapps.io/curveFitter} \end{center} \section{Accessing R code} \texttt{nplr} R code is available on github: \url{https://github.com/fredcommo/nplr} \printbibliography \end{document}