## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.dim = c(7, 4) ) ## ----setup-------------------------------------------------------------------- library(LMMsolver) library(ggplot2) ## ----linearFun---------------------------------------------------------------- f1 <- function(x) { 0.6 + 0.7*x} ## ----linearFunSim------------------------------------------------------------- set.seed(2016) n <- 25 x <- seq(0, 1, length = n) sigma2e <- 0.04 y <- f1(x) + rnorm(n, sd = sqrt(sigma2e)) dat1 <- data.frame(x = x, y = y) ## ----fit1--------------------------------------------------------------------- obj1 <- LMMsolve(fixed = y ~ x, data = dat1) ## ----predictlin--------------------------------------------------------------- newdat <- data.frame(x = seq(0, 1, length = 300)) pred1 <- predict(obj1, newdata = newdat, se.fit = TRUE) # adding the true values for comparison pred1$y_true <- f1(pred1$x) ## ----ggplotLinear------------------------------------------------------------- ggplot(data = dat1, aes(x = x, y = y)) + geom_point(col = "black", size = 1.5) + geom_line(data = pred1, aes(y=y_true), color = "red", linewidth = 1, linetype = "dashed") + geom_line(data = pred1, aes(y = ypred), color = "blue", linewidth = 1) + geom_ribbon(data = pred1, aes(x=x,ymin = ypred-2*se, ymax = ypred+2*se), alpha = 0.2, inherit.aes = FALSE) + theme_bw() ## ----nonlinearFun------------------------------------------------------------- f2 <- function(x) { 0.3 + 0.4*x + 0.2*sin(20*x) } ## ----simDat2------------------------------------------------------------------ set.seed(12) n <- 150 x <- seq(0, 1, length = n) sigma2e <- 0.04 y <- f2(x) + rnorm(n, sd = sqrt(sigma2e)) dat2 <- data.frame(x, y) ## ----nonlinearFit------------------------------------------------------------- obj2 <- LMMsolve(fixed = y ~ 1, spline = ~spl1D(x, nseg = 50), data = dat2) ## ----summary_nonlinear-------------------------------------------------------- summary(obj2) ## ----predict_non-------------------------------------------------------------- newdat <- data.frame(x = seq(0, 1, length = 300)) pred2 <- predict(obj2, newdata = newdat, se.fit = TRUE) pred2$y_true <- f2(pred2$x) ggplot(data = dat2, aes(x = x, y = y)) + geom_point(col = "black", size = 1.5) + geom_line(data = pred2, aes(y = y_true), color = "red", linewidth = 1, linetype ="dashed") + geom_line(data = pred2, aes(y = ypred), color = "blue", linewidth = 1) + geom_ribbon(data= pred2, aes(x=x, ymin = ypred-2*se, ymax = ypred+2*se), alpha=0.2, inherit.aes = FALSE) + theme_bw() ## ----twoExperiments----------------------------------------------------------- set.seed(1234) nA <- 50 nB <- 100 mu_A <- 0.10 mu_B <- -0.10 sigma2e_A <- 0.04 sigma2e_B <- 0.10 x1 <- runif(n = nA) x2 <- runif(n = nB) y1 <- f2(x1) + rnorm(nA, sd = sqrt(sigma2e_A)) + mu_A y2 <- f2(x2) + rnorm(nB, sd = sqrt(sigma2e_B)) + mu_B Experiment <- as.factor(c(rep("A", nA), rep("B", nB))) dat4 <- data.frame(x = c(x1, x2), y = c(y1,y2), Experiment = Experiment) ## ----boxplot------------------------------------------------------------------ ggplot(dat4, aes(x = Experiment, y = y, fill = Experiment)) + geom_boxplot() + geom_point(position = position_jitterdodge(), alpha = 0.3) ## ----twoExpSolve-------------------------------------------------------------- obj4 <- LMMsolve(fixed= y ~ 1, spline = ~spl1D(x, nseg = 50, xlim = c(0,1)), random = ~Experiment, residual = ~Experiment, data = dat4) ## ----summaryExp--------------------------------------------------------------- summary(obj4) ## ----twoExpPredict------------------------------------------------------------ newdat <- data.frame(x=seq(0, 1, length = 300)) pred4 <- predict(obj4, newdata = newdat, se.fit = TRUE) pred4$y_true <- f2(pred4$x) ggplot(data = dat4, aes(x = x, y = y, colour = Experiment)) + geom_point(size = 1.5) + geom_line(data = pred4, aes(y = y_true), color="red", linewidth = 1, linetype = "dashed") + geom_line(data = pred4, aes(y = ypred), color = "blue", linewidth = 1) + geom_ribbon(data = pred4, aes(x = x,ymin = ypred-2*se, ymax = ypred+2*se), alpha = 0.2, inherit.aes = FALSE) + theme_bw() ## ----coefExp------------------------------------------------------------------ coef(obj4)$Experiment ## ----USprecip data------------------------------------------------------------ ## Get precipitation data from spam data(USprecip, package = "spam") ## Only use observed data USprecip <- as.data.frame(USprecip) USprecip <- USprecip[USprecip$infill == 1, ] ## ----runobj3------------------------------------------------------------------ obj5 <- LMMsolve(fixed = anomaly ~ 1, spline = ~spl2D(x1 = lon, x2 = lat, nseg = c(41, 41)), data = USprecip) ## ----ED_USprecip-------------------------------------------------------------- summary(obj5) ## ----pred_USprecip------------------------------------------------------------ lon_range <- range(USprecip$lon) lat_range <- range(USprecip$lat) newdat <- expand.grid(lon = seq(lon_range[1], lon_range[2], length = 200), lat = seq(lat_range[1], lat_range[2], length = 300)) plotDat5 <- predict(obj5, newdata = newdat) ## ----Plot_USprecip------------------------------------------------------------ plotDat5 <- sf::st_as_sf(plotDat5, coords = c("lon", "lat")) usa <- sf::st_as_sf(maps::map("usa", regions = "main", plot = FALSE)) sf::st_crs(usa) <- sf::st_crs(plotDat5) intersection <- sf::st_intersects(plotDat5, usa) plotDat5 <- plotDat5[!is.na(as.numeric(intersection)), ] ggplot(usa) + geom_sf(color = NA) + geom_tile(data = plotDat5, mapping = aes(geometry = geometry, fill = ypred), linewidth = 0, stat = "sf_coordinates") + scale_fill_gradientn(colors = topo.colors(100))+ labs(title = "Precipitation (anomaly)", x = "Longitude", y = "Latitude") + coord_sf() + theme(panel.grid = element_blank()) ## ----SeaSurfaceTemp----------------------------------------------------------- data(SeaSurfaceTemp) head(SeaSurfaceTemp, 5) table(SeaSurfaceTemp$type) ## ----convert_and_split-------------------------------------------------------- # convert from Kelvin to Celsius df <- SeaSurfaceTemp df$sst <- df$sst - 273.15 ### split in training and test set df_train <- df[df$type == "train", ] df_test <- df[df$type == "test", ] ## ----SST_raw_data------------------------------------------------------------- nasa_palette <- c( "#03006d","#02008f","#0000b6","#0001ef","#0000f6","#0428f6","#0b53f7", "#0f81f3","#18b1f5","#1ff0f7","#27fada","#3efaa3","#5dfc7b","#85fd4e", "#aefc2a","#e9fc0d","#f6da0c","#f5a009","#f6780a","#f34a09","#f2210a", "#f50008","#d90009","#a80109","#730005" ) map_layer <- geom_map( data = map_data("world"), map = map_data("world"), aes(group = group, map_id = region), fill = "black", colour = "white", linewidth = 0.1 ) # Brazil-Malvinas confluence zone BM_box <- cbind(lon = c(-60, -48), lat = c(-50, -35)) ggplot() + scale_colour_gradientn(colours = nasa_palette, name = expression(degree*C)) + xlab("Longitude (deg)") + ylab("Latitude (deg)") + map_layer + xlim(BM_box[, "lon"]) + ylim(BM_box[, "lat"]) + theme_bw() + coord_fixed(expand = FALSE) + geom_point(data = df_train, aes(lon, lat, colour = sst), size=0.5) ## ----SeaTempAnalysis---------------------------------------------------------- obj6 <- LMMsolve(fixed = sst ~ 1, spline = ~spl2D(lon, lat, nseg = c(70, 70), x1lim = BM_box[, "lon"], x2lim = BM_box[, "lat"]), data = df_train, tolerance = 1.0e-1) summary(obj6) ## ----predictions_grid--------------------------------------------------------- lon_range <- BM_box[, "lon"] lat_range <- BM_box[, "lat"] newdat <- expand.grid(lon = seq(lon_range[1], lon_range[2], length = 200), lat = seq(lat_range[1], lat_range[2], length = 200)) pred_grid <- predict(obj6, newdata = newdat, se.fit=TRUE) pred_grid <- pred_grid[pred_grid$se<5, ] ## Plot predictions on a grid ggplot(pred_grid) + geom_tile(aes(x = lon, y = lat, fill = ypred)) + scale_fill_gradientn(colours = nasa_palette) + labs( fill = "pred.", x = "Longitude (deg)", y = "Latitude (deg)" ) + map_layer + theme_bw() + coord_fixed(expand = FALSE, xlim = BM_box[, "lon"], ylim = BM_box[, "lat"]) + scale_x_continuous(breaks = c(-58, -54, -50)) ## ----seSST-------------------------------------------------------------------- ## Plot standard error ggplot(pred_grid) + geom_raster(aes(x = lon, y = lat, fill = se)) + scale_fill_distiller(palette = "BrBG", direction = -1) + labs( fill = "s.e.", x = "Longitude (deg)", y = "Latitude (deg)") + map_layer + theme_bw() + coord_fixed(expand = FALSE, xlim = c(-60, -48), ylim = c(-50, -35)) + scale_x_continuous(breaks = c(-58, -54, -50)) ## ----test_set----------------------------------------------------------------- pred_test <- predict(obj6, newdata = df_test) ggplot(pred_test, aes(x = sst,y = ypred)) + geom_point() + xlab("observed SST (Celsius)") + ylab("predicted SST (Celsius)") + geom_abline(intercept=0,slope=1,col='red') + theme_bw() ## ----RMSE_test---------------------------------------------------------------- Y <- (pred_test$sst - pred_test$ypred)^2 RMSE <- sqrt(mean(Y)) round(RMSE, 2) ## ----poissonSim--------------------------------------------------------------- set.seed(1234) n <- 150 x <- seq(0, 1, length=n) fun_lambda <- function(x) { 4 + 3*x + 4*sin(7*x) } x <- seq(0, 1, length = n) y <- rpois(n = n, lambda = fun_lambda(x)) dat3 <- data.frame(x = x, y = y) ## ----Poisson_LMMsolver-------------------------------------------------------- obj3 <- LMMsolve(fixed = y ~ 1, spline = ~spl1D(x, nseg = 50), family = poisson(), data = dat3) summary(obj3) ## ----predict_poisson---------------------------------------------------------- newdat <- data.frame(x = seq(0, 1, length = 300)) pred3 <- predict(obj3, newdata = newdat, se.fit = TRUE) pred3$y_true <- fun_lambda(pred3$x) ggplot(data = dat3, aes(x = x, y = y)) + geom_point(col = "black", size = 1.5) + geom_line(data = pred3, aes(y = y_true), color = "red", linewidth = 1, linetype ="dashed") + geom_line(data = pred3, aes(y = ypred), color = "blue", linewidth = 1) + geom_ribbon(data= pred3, aes(x=x, ymin = ypred-2*se, ymax = ypred+2*se), alpha=0.2, inherit.aes = FALSE) + theme_bw() ## ----binomial_sim------------------------------------------------------------- set.seed(1234) n <- 100 sz <- 10 fun_prob <- function(x) { 0.5 + 0.4*sin(2*pi*x) } x <- seq(0, 1, length=n) nsucces <- sapply(x, FUN=function(x) { rbinom(n=1, size = sz, prob = fun_prob(x)) }) dat <- data.frame(x = x, succes = nsucces, failure= sz - nsucces) head(dat, 5) ## ----binomial_solve----------------------------------------------------------- obj3 <- LMMsolve(fixed = cbind(succes, failure) ~ 1, spline = ~spl1D(x, nseg = 50), family = binomial(), data = dat) summary(obj3) ## ----binomial_predict--------------------------------------------------------- newdat <- data.frame(x = seq(0, 1, by=0.01)) pred3 <- predict(obj3, newdata = newdat, se.fit=TRUE) ## ----binomial_plot------------------------------------------------------------ pred3$y_true <- fun_prob(pred3$x) dat$y <- dat$succes/sz ggplot(data = dat, aes(x = x, y = y)) + geom_point(col = "black", size = 1.5) + geom_line(data = pred3, aes(y = y_true), color = "red", linewidth = 1, linetype = "dashed") + geom_line(data = pred3, aes(y = ypred), color = "blue", linewidth = 1) + geom_ribbon(data= pred3, aes(x=x, ymin = ypred-2*se, ymax = ypred+2*se), alpha=0.2, inherit.aes = FALSE) + theme_bw() ## ----multinomialmodel--------------------------------------------------------- k <- 4 mu <- c(0.1, 0.4, 0.6, 0.9) names(mu) <- LETTERS[1:k] nonlinear <- function(x, mu) { z <- sapply(mu, function(mu) { exp(-8*sin(pi*(x-mu))^2)}) # normalize to sum equal to one z <- z/sum(z) return(z) } ## ----simMultinomial----------------------------------------------------------- x <- runif(n, 0, 1) sz <- 10 multiNom <- t(sapply(x, FUN= function(x) { rmultinom(n=1, size=sz, prob = nonlinear(x,mu)) } )) colnames(multiNom) <- names(mu) dat <- data.frame(x, multiNom) head(dat, 4) ## ----multinomialfit----------------------------------------------------------- obj <- LMMsolve(fixed = cbind(A,B,C,D) ~ 1, spline = ~spl1D(x, nseg = 17, xlim = c(0,1)), data = dat, family = multinomial()) summary(obj) ## ----makePredictions---------------------------------------------------------- sRows <- rowSums(multiNom) fr <- multiNom/sRows dat_fr <- data.frame(x, fr) x0 <- seq(0, 1, by = 0.01) newdat <- data.frame(x = x0) pred <- predict(obj, newdata = newdat) head(pred) ## ----makePlot----------------------------------------------------------------- library(tidyr) colnames(pred) <- c("x", "category", "y") prob_true <- t(sapply(X=x0, FUN = function(x) { nonlinear(x, mu)})) colnames(prob_true) <- names(mu) df_true <- data.frame(x = x0, prob_true) prob_true_lf <- df_true %>% gather(key = "category",value="y", A:D) dat_fr_lf <- dat_fr %>% gather(key = "category",value="y", A:D) p1 <- ggplot(prob_true_lf, aes(x = x, y=y, color = category)) + geom_line(linetype='dashed') + geom_line(data=pred) + geom_point(data=dat_fr_lf) p1 ## ----oatsdata----------------------------------------------------------------- ## Load data. data(john.alpha, package = "agridat") head(john.alpha) ## ----define LVmodel----------------------------------------------------------- ## Add plot as factor. john.alpha$plotF <- as.factor(john.alpha$plot) ## Define the precision matrix, see eqn (A2) in Boer et al (2020). N <- nrow(john.alpha) cN <- c(1 / sqrt(N - 1), rep(0, N - 2), 1 / sqrt(N - 1)) D <- diff(diag(N), diff = 1) Delta <- 0.5 * crossprod(D) LVinv <- 0.5 * (2 * Delta + cN %*% t(cN)) ## Add LVinv to list, with name corresponding to random term. lGinv <- list(plotF = LVinv) ## ----modelLV------------------------------------------------------------------ obj7 <- LMMsolve(fixed = yield ~ rep + gen, random = ~plotF, ginverse = lGinv, data = john.alpha) ## ----summary_dev_VAR---------------------------------------------------------- round(deviance(obj7, relative = FALSE), 2) summary(obj7, which = "variances") ## ----APSIMdat----------------------------------------------------------------- data(APSIMdat) head(APSIMdat) ## ----APSIMmodel--------------------------------------------------------------- obj8 <- LMMsolve(fixed = biomass ~ 1, spline = ~spl1D(x = das, nseg = 50), data = APSIMdat) ## ----APSIMmodelSummary-------------------------------------------------------- summary(obj8) ## ----APSIMplot---------------------------------------------------------------- das_range <- range(APSIMdat$das) newdat <- data.frame(das=seq(das_range[1], das_range[2], length = 300)) pred8 <- predict(obj8, newdata = newdat, se.fit = TRUE) ggplot(data = APSIMdat, aes(x = das, y = biomass)) + geom_point(size = 1.0) + geom_line(data = pred8, aes(y = ypred), color = "blue", linewidth = 1) + geom_ribbon(data = pred8, aes(x = das,ymin = ypred-2*se, ymax = ypred+2*se), alpha = 0.2, inherit.aes = FALSE) + labs(title = "APSIM biomass as function of time", x = "days after sowing", y = "biomass (kg)") + theme_bw() ## ----APSIMDeriv--------------------------------------------------------------- plotDatDt <- obtainSmoothTrend(obj8, grid = 1000, deriv = 1) ggplot(data = plotDatDt, aes(x = das, y = ypred)) + geom_line(color = "red", linewidth = 1) + labs(title = "APSIM growth rate as function of time", x = "days after sowing", y = "growth rate (kg/day)") + theme_bw() ## ----multipop----------------------------------------------------------------- ## Load data for multiparental population. data(multipop) head(multipop) ## ----residualARG-------------------------------------------------------------- ## Fit null model. obj9 <- LMMsolve(fixed = pheno ~ cross, residual = ~cross, data = multipop) dev0 <- deviance(obj9, relative = FALSE) ## ----groupOPTION-------------------------------------------------------------- ## Fit alternative model - include QTL with probabilities defined in columns 3:5 lGrp <- list(QTL = 3:5) obj10 <- LMMsolve(fixed = pheno ~ cross, group = lGrp, random = ~grp(QTL), residual = ~cross, data = multipop) dev1 <- deviance(obj10, relative = FALSE) ## ----approxPvalue------------------------------------------------------------- ## Deviance difference between null and alternative model. dev <- dev0 - dev1 ## Calculate approximate p-value. minlog10p <- -log10(0.5 * pchisq(dev, 1, lower.tail = FALSE)) round(minlog10p, 2) ## ----QTLeffects--------------------------------------------------------------- coef(obj10)$QTL