The goal of this vignette is to demonstrate how to use LBBNNs with a convolutional architecture. We will only use a dummy dataset here, but the torchvision package can be used to try it out on real datasets e.g. MNIST.
if(!requireNamespace("torchvision"))
install.packages("torchvision")
torch::torch_manual_seed(42)
dir <- "./dataset/kmnist"
kmnist_transform <- function(x) {
d <- dim(x)
if (length(d) == 3 && d[3] > 1 && d[1] == d[2]) {#if shape [28,28,batch] as on windows and linux(?)
x <- torchvision::transform_to_tensor(x) #now shape should be [batch, 28,28]
x <- x$unsqueeze(2) #add the channel dimension - > [batch,1,28,28]
}
else{ #on mac, everything is fine
x <- torchvision::transform_to_tensor(x)
}
return(x)
}
#get datasets from torchvision and define training and test loaders
train_ds <- torchvision::kmnist_dataset(
dir,
download = TRUE,
transform = kmnist_transform)
test_ds <- torchvision::kmnist_dataset(
dir,
train = FALSE,
transform = kmnist_transform)
train_loader_kmnist <- torch::dataloader(train_ds, batch_size = 100, shuffle = TRUE)
test_loader_kmnist <- torch::dataloader(test_ds, batch_size = 100)Here we manually define the layers, as we will use both convolutional and feed-forward layers. They are defined as follows:
device <- "cpu"
conv_layer_1 <- lbbnn_conv2d(in_channels = 1, out_channels = 32, kernel_size = 5,
prior_inclusion = 0.5, standard_prior = 1,
density_init = c(-10, 10), num_transforms = 2,
flow = FALSE, hidden_dims = c(200, 200),
device = device)
conv_layer_2 <- lbbnn_conv2d(in_channels = 32, out_channels = 64, kernel_size = 5,
prior_inclusion = 0.5, standard_prior = 1,
density_init = c(-10, 15), num_transforms = 2,
flow = FALSE, hidden_dims = c(200, 200),
device = device)
linear_layer_1 <- lbbnn_linear(in_features = 1024, out_features = 300,
prior_inclusion = 0.5, standard_prior = 1,
density_init = c(-10, 10), num_transforms = 2,
flow = FALSE, hidden_dims = c(200, 200), device = device,
bias_inclusion_prob = FALSE, conv_net = TRUE)
linear_layer_2 <- lbbnn_linear(in_features = 300, out_features = 10,
prior_inclusion = 0.5, standard_prior = 1,
density_init = c(-5, 15),num_transforms = 2,
flow = FALSE, hidden_dims = c(200, 200), device = device,
bias_inclusion_prob = FALSE, conv_net = TRUE)We include pooling layers between the convolutional layers.
LBBNN_ConvNet <- torch::nn_module(
"LBBNN_ConvNet",
initialize = function(conv1, conv2, fc1 ,fc2 ,device = device) {
self$problem_type <- "multiclass classification"
self$input_skip <- FALSE
self$conv1 <- conv1
self$conv2 <- conv2
self$fc1 <- fc1
self$fc2 <- fc2
self$pool <- torch::nn_max_pool2d(2)
self$act <- torch::nn_leaky_relu()
self$out <- torch::nn_log_softmax(dim = 2)
self$pout <- torch::nn_softmax(dim = 2)
self$loss_fn <- torch::nn_nll_loss(reduction = "sum")
},
forward = function(x, MPM = FALSE, predict = FALSE) {
x = self$act(self$conv1(x, MPM))
x = self$pool(x)
x = self$act(self$conv2(x, MPM))
x = self$pool(x)
x = torch::torch_flatten(x,start_dim = 2)
x = self$act(self$fc1(x, MPM))
if(!predict)
x = self$out(self$fc2(x ,MPM))
else
x = self$pout(self$fc2(x ,MPM))
},
kl_div = function(){
kl <- self$conv1$kl_div() + self$conv2$kl_div() +
self$fc1$kl_div() + self$fc2$kl_div()
return(kl)
},
density = function(){
alphas <- NULL
alphas <- c(as.numeric(self$conv1$alpha), as.numeric(self$conv2$alpha)
,as.numeric(self$fc1$alpha), as.numeric(self$fc2$alpha))
return(mean(alphas > 0.5))
},
compute_paths = function(){
NULL
},
density_active_path = function(){
NA
}
)
model_conv <- LBBNN_ConvNet(conv_layer_1, conv_layer_2, linear_layer_1,
linear_layer_2, device)
model_conv$to(device = device)These functions work the same as with feed-forward architectures. Training can be accelerated on GPU where available.
train_lbbnn(epochs = 2, LBBNN = model_conv, lr = 0.01, train_dl = train_loader,
device = device)
#>
#> Epoch 1, training: loss = 892055.75000, acc = 0.08500, density = 0.51523
#>
#> Epoch 2, training: loss = 888615.37500, acc = 0.11000, density = 0.51430
validate_lbbnn(model_conv, num_samples = 2, test_dl = train_loader,
device = device)
#> $accuracy_full_model
#> [1] 0.13
#>
#> $accuracy_sparse
#> [1] 0.13
#>
#> $density
#> [1] 0.5138763
#>
#> $density_active_path
#> [1] NA
Need a high-speed mirror for your open-source project?
Contact our mirror admin team at info@clientvps.com.
This archive is provided as a free public service to the community.
Proudly supported by infrastructure from VPSPulse , RxServers , BuyNumber , UnitVPS , OffshoreName and secure payment technology by ArionPay.