## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 8 , fig.height = 8, fig.align ='center' ) ## ----setup-------------------------------------------------------------------- library(ShapleyOutlier) ## ----------------------------------------------------------------------------- library(robustHD) library(dplyr) library(tidyr) # library(tidyverse) library(cellWise) ## ----------------------------------------------------------------------------- data(TopGear) rownames(TopGear) = paste(TopGear[,1],TopGear[,2]) myTopGear <- TopGear[,-31] #removing the verdict variable myTopGear <- myTopGear[,sapply(myTopGear,function(x)any(is.numeric(x)))] myTopGear <- myTopGear[!apply(myTopGear,1, function(x)any(is.na(x))),] myTopGear <- myTopGear[,-2] # Transform some variables to get roughly gaussianity in the center: transTG = myTopGear transTG$Price = log(myTopGear$Price) transTG$Displacement = log(myTopGear$Displacement) transTG$BHP = log(myTopGear$BHP) transTG$Torque = log(myTopGear$Torque) transTG$TopSpeed = log(myTopGear$TopSpeed) transTG <- transTG %>% rename("log(Price)" = Price, "log(Displacement)" = Displacement, "log(BHP)" = BHP, "log(Torque)" = Torque, "log(TopSpeed)" = TopSpeed) X <- as.matrix(transTG) X <- robStandardize(X) n <- nrow(X) p <- ncol(X) set.seed(1) MCD <- covMcd(X, nsamp = "best") mu <-MCD$center Sigma <- MCD$cov Sigma_inv <- solve(MCD$cov) phi <- shapley(x = X, mu = mu, Sigma = Sigma_inv, inverted = TRUE)$phi colnames(phi) <- colnames(transTG) rownames(phi) <- rownames(transTG) md <- rowSums(phi) chi2.q <- 0.99 crit <- sqrt(qchisq(chi2.q,p)) ## ----------------------------------------------------------------------------- n_obs <- 6 TopGear_SCD <- SCD(x = X[names(sort(md, decreasing = TRUE)[1:n_obs]),], mu, Sigma, Sigma_inv, step_size = 0.1, min_deviation = 0.2) plot(TopGear_SCD, type = "bar", md_squared = FALSE) ## ----------------------------------------------------------------------------- TopGear_MOE <- MOE(x = X[names(sort(md, decreasing = TRUE)[1:n_obs]),], mu, Sigma, Sigma_inv, step_size = 0.1, local = TRUE, min_deviation = 0.2) plot(TopGear_MOE, type = "bar", md_squared = FALSE) ## ----------------------------------------------------------------------------- X_sub <- X[names(sort(md, decreasing = TRUE)[1:n_obs]),] X_sub_cH <- cellHandler(X_sub, mu = mu, Sigma = Sigma)$Ximp explain_cH <- shapley(x = X_sub, mu = mu, Sigma = Sigma_inv, inverted = TRUE, cells = (X_sub != X_sub_cH)) plot(explain_cH, abbrev.var = FALSE, abbrev.obs = FALSE, md_squared = FALSE) ## ----------------------------------------------------------------------------- TopGear_SCD_rescaled <- TopGear_SCD TopGear_SCD_rescaled$x_original <- t(apply(TopGear_SCD_rescaled$x_original,1, function(x) x*attr(X, "scale") + attr(X, "center"))) TopGear_SCD_rescaled$x <- t(apply(TopGear_SCD_rescaled$x,1, function(x) x*attr(X, "scale") + attr(X, "center"))) plot(TopGear_SCD_rescaled, type = "cell") + coord_flip() ## ----------------------------------------------------------------------------- TopGear_MOE_rescaled <- TopGear_MOE TopGear_MOE_rescaled$x_original <- t(apply(TopGear_MOE_rescaled$x_original,1, function(x) x*attr(X, "scale") + attr(X, "center"))) TopGear_MOE_rescaled$x <- t(apply(TopGear_MOE_rescaled$x,1, function(x) x*attr(X, "scale") + attr(X, "center"))) plot(TopGear_MOE_rescaled, type = "cell") + coord_flip() ## ----------------------------------------------------------------------------- plot(x = new_shapley_algorithm(x = t(apply(X_sub_cH,1, function(x) x*attr(X, "scale") + attr(X, "center"))), phi = explain_cH$phi, x_original = t(apply(X_sub,1, function(x) x*attr(X, "scale") + attr(X, "center")))), type = "cell") + coord_flip() ## ----------------------------------------------------------------------------- ind <- 3 interaction1 <- shapley_interaction(X[names(sort(md, decreasing = TRUE)[ind]),], TopGear_MOE$mu_tilde[ind,], Sigma) interaction2 <- shapley_interaction(X[names(sort(md, decreasing = TRUE)[ind+1]),], TopGear_MOE$mu_tilde[ind+1,], Sigma) dimnames(interaction1) <- list(c("Price", "Displ.", "BHP", "Torque", "Acc", "T.Speed", "MPG", "Weight", "Length", "Width", "Height"), c("Price", "Displ.", "BHP", "Torque", "Acc", "T.Speed", "MPG", "Weight", "Length", "Width", "Height")) dimnames(interaction2) <- dimnames(interaction1) plot(interaction1, abbrev = FALSE, title = names(sort(md, decreasing = TRUE)[ind])) plot(interaction2, abbrev = FALSE, title = names(sort(md, decreasing = TRUE)[ind+1])) ## ----------------------------------------------------------------------------- data("WeatherVienna") weather_summer <- WeatherVienna %>% dplyr::select(-c(`t`, t_max, t_min, p_max, p_min)) %>% drop_na() %>% filter(month %in% c("JUN", "JUL", "AUG")) %>% filter(year >= 1955) %>% group_by(year) %>% dplyr::select(-month) %>% summarise(across(.cols = everything(), function(x) mean(x))) X <- weather_summer %>% dplyr::select(-c(num_frost, num_ice, year)) rownames(X) <- weather_summer$year X <- robStandardize(X) n <- nrow(X) p <- ncol(X) set.seed(1) MCD <- covMcd(X, alpha = 0.5, nsamp = "best") mu <-MCD$center Sigma <- MCD$cov Sigma_inv <- solve(MCD$cov) phi <- shapley(x = X, mu = mu, Sigma = Sigma_inv, inverted = TRUE)$phi colnames(phi) <- colnames(X) rownames(phi) <- rownames(X) md <- rowSums(phi) chi2.q <- 0.99 crit <- sqrt(qchisq(chi2.q,p)) ## ----------------------------------------------------------------------------- weather_SCD <- SCD(x = X, mu, Sigma, Sigma_inv, step_size = 0.1, min_deviation = 0.2) plot(weather_SCD, abbrev.var = FALSE, abbrev.obs = FALSE, md_squared = FALSE, sort.obs = FALSE, type = "bar") ## ----------------------------------------------------------------------------- weather_MOE <- MOE(x = X, mu, Sigma, Sigma_inv, step_size = 0.1, local = TRUE, min_deviation = 0.2) plot(weather_MOE, abbrev.var = FALSE, abbrev.obs = FALSE, md_squared = FALSE, sort.obs = FALSE, type = "bar") ## ----------------------------------------------------------------------------- X_cH <- cellHandler(as.matrix(X), mu = mu, Sigma = Sigma)$Ximp explain_cH <- shapley(x = X, mu = mu, Sigma = Sigma_inv, inverted = TRUE, cells = (X != X_cH)) plot(explain_cH, abbrev.var = FALSE, abbrev.obs = FALSE, md_squared = FALSE) ## ----------------------------------------------------------------------------- weather_SCD_rescaled <- weather_SCD weather_SCD_rescaled$x_original <- t(apply(weather_SCD_rescaled$x_original,1, function(x) x*attr(X, "scale") + attr(X, "center"))) weather_SCD_rescaled$x <- t(apply(weather_SCD_rescaled$x,1, function(x) x*attr(X, "scale") + attr(X, "center"))) plot(weather_SCD_rescaled, type = "cell", n_digits = 0, continuous_rowname = 10, rotate_x = FALSE) ## ----------------------------------------------------------------------------- weather_MOE_rescaled <- weather_MOE weather_MOE_rescaled$x_original <- t(apply(weather_MOE_rescaled$x_original,1, function(x) x*attr(X, "scale") + attr(X, "center"))) weather_MOE_rescaled$x <- t(apply(weather_MOE_rescaled$x,1, function(x) x*attr(X, "scale") + attr(X, "center"))) plot(weather_MOE_rescaled, type = "cell", n_digits = 0, continuous_rowname = 10, rotate_x = FALSE) ## ----------------------------------------------------------------------------- plot(x = new_shapley_algorithm(x = t(apply(X_cH,1, function(x) x*attr(X, "scale") + attr(X, "center"))), phi = explain_cH$phi, x_original = t(apply(X,1, function(x) x*attr(X, "scale") + attr(X, "center")))), type = "cell", n_digits = 0, continuous_rowname = 10, rotate_x = FALSE) ## ----------------------------------------------------------------------------- ind <- 67 #year 2021 interaction1 <- shapley_interaction(as.numeric(X[ind,]), mu, Sigma) interaction2 <- shapley_interaction(as.numeric(X[ind,]), weather_MOE$mu_tilde[ind,], Sigma) plot(interaction1, abbrev = FALSE, legend = FALSE, title = "SCD: year 2021", text_size = 16) plot(interaction2, abbrev = FALSE, title = "MOE: year 2021", text_size = 16) ## ----------------------------------------------------------------------------- phi_SCD <- unique(weather_SCD$phi_history[[ind]]) rownames(phi_SCD) <- paste("Step", 0:(nrow(phi_SCD)-1)) plot(new_shapley(phi = phi_SCD), abbrev.var = FALSE, abbrev.obs = FALSE, sort.obs = FALSE, sort.var = FALSE) ## ----------------------------------------------------------------------------- phi_MOE <- weather_MOE$phi_history[[ind]] mu_tilde_MOE <- weather_MOE$mu_tilde_history[[ind]] non_centrality_MOE <- apply(mu_tilde_MOE, 1, function(x) mahalanobis(x, mu, Sigma_inv, inverted = TRUE)) rownames(phi_MOE) <- paste("Step", 0:(nrow(phi_MOE)-1)) plot(new_shapley(phi = phi_MOE, mu_tilde = mu_tilde_MOE, non_centrality = non_centrality_MOE), abbrev.var = FALSE, abbrev.obs = FALSE, sort.obs = FALSE, sort.var = FALSE)