classification: convolutional architecture

The goal of this vignette is to demonstrate how to use LBBNNs with a convolutional architecture. We will only use a dummy dataset here, but the torchvision package can be used to try it out on real datasets e.g. MNIST.

Demonstration of how to download KMNIST using torchvision

if(!requireNamespace("torchvision"))
  install.packages("torchvision")
torch::torch_manual_seed(42)
dir <- "./dataset/kmnist"
kmnist_transform <- function(x) {
  d <- dim(x)
  if (length(d) == 3 && d[3] > 1 && d[1] == d[2]) {#if shape [28,28,batch] as on windows and linux(?)
    x <- torchvision::transform_to_tensor(x) #now shape should be [batch, 28,28]
    x <- x$unsqueeze(2) #add the channel dimension - > [batch,1,28,28]
  }
  else{ #on mac, everything is fine 
    x <- torchvision::transform_to_tensor(x)
  }
  return(x)
}
#get datasets from torchvision and define training and test loaders
train_ds <- torchvision::kmnist_dataset(
  dir,
  download = TRUE,
  transform = kmnist_transform)

test_ds <- torchvision::kmnist_dataset(
  dir,
  train = FALSE,
  transform = kmnist_transform)

train_loader_kmnist <- torch::dataloader(train_ds, batch_size = 100, shuffle = TRUE)
test_loader_kmnist <- torch::dataloader(test_ds, batch_size = 100)

Create dummy dataset with the same shape as KMNIST

torch::torch_manual_seed(42)
x <- torch::torch_randn(200, 1, 28, 28)
y <- torch::torch_randint(1, 11, size = 200)
dataset <- torch::tensor_dataset(x, y)
train_loader <- torch::dataloader(dataset, batch_size = 100)

Create the layers that define the architecture of our convolutional network

Here we manually define the layers, as we will use both convolutional and feed-forward layers. They are defined as follows:

device <- "cpu"
conv_layer_1 <- lbbnn_conv2d(in_channels = 1, out_channels = 32, kernel_size = 5,
                             prior_inclusion = 0.5, standard_prior = 1,
                             density_init = c(-10, 10), num_transforms = 2,
                             flow = FALSE, hidden_dims = c(200, 200),
                             device = device)
conv_layer_2 <- lbbnn_conv2d(in_channels = 32, out_channels = 64, kernel_size = 5,
                             prior_inclusion = 0.5, standard_prior = 1,
                             density_init = c(-10, 15), num_transforms = 2,
                             flow = FALSE, hidden_dims = c(200, 200),
                             device = device)

linear_layer_1 <- lbbnn_linear(in_features = 1024, out_features = 300,
                               prior_inclusion = 0.5, standard_prior = 1,
                               density_init = c(-10, 10), num_transforms = 2,
                               flow = FALSE, hidden_dims = c(200, 200), device = device,
                               bias_inclusion_prob = FALSE, conv_net = TRUE)

linear_layer_2 <- lbbnn_linear(in_features = 300, out_features = 10,
                               prior_inclusion = 0.5, standard_prior = 1,
                               density_init = c(-5, 15),num_transforms = 2,
                               flow = FALSE, hidden_dims = c(200, 200), device = device,
                               bias_inclusion_prob = FALSE, conv_net = TRUE)

Define the model object

We include pooling layers between the convolutional layers.

LBBNN_ConvNet <- torch::nn_module(
  "LBBNN_ConvNet",
  
  initialize = function(conv1, conv2, fc1 ,fc2 ,device = device) {
    self$problem_type <- "multiclass classification"
    self$input_skip <- FALSE
    self$conv1 <- conv1
    self$conv2 <- conv2
    self$fc1 <- fc1
    self$fc2 <- fc2
    self$pool <- torch::nn_max_pool2d(2)
    self$act <- torch::nn_leaky_relu()
    self$out <- torch::nn_log_softmax(dim = 2)
    self$pout <- torch::nn_softmax(dim = 2)
    self$loss_fn <- torch::nn_nll_loss(reduction = "sum")
  },
  
  forward = function(x, MPM = FALSE, predict = FALSE) {
    x = self$act(self$conv1(x, MPM))
    x = self$pool(x)
    x = self$act(self$conv2(x, MPM))
    x = self$pool(x)
    x = torch::torch_flatten(x,start_dim = 2)
    x = self$act(self$fc1(x, MPM))
    if(!predict)
      x = self$out(self$fc2(x ,MPM))
    else
      x = self$pout(self$fc2(x ,MPM))
  },
  kl_div = function(){
    kl <- self$conv1$kl_div() + self$conv2$kl_div() +
      self$fc1$kl_div() + self$fc2$kl_div()
    return(kl)
  },
  density = function(){
    alphas <- NULL
    alphas <- c(as.numeric(self$conv1$alpha), as.numeric(self$conv2$alpha)
                ,as.numeric(self$fc1$alpha), as.numeric(self$fc2$alpha))
    return(mean(alphas > 0.5))
    
    
  },
  compute_paths = function(){
    NULL
  },
  density_active_path = function(){
    NA
  }
)

model_conv <- LBBNN_ConvNet(conv_layer_1, conv_layer_2, linear_layer_1,
                       linear_layer_2, device)
model_conv$to(device = device)

Train and validate the model

These functions work the same as with feed-forward architectures. Training can be accelerated on GPU where available.

train_lbbnn(epochs = 2, LBBNN = model_conv, lr = 0.01, train_dl = train_loader,
            device = device)
#> 
#> Epoch 1, training: loss = 892055.75000, acc = 0.08500, density = 0.51523
#> 
#> Epoch 2, training: loss = 888615.37500, acc = 0.11000, density = 0.51430
validate_lbbnn(model_conv, num_samples = 2, test_dl = train_loader, 
               device = device)
#> $accuracy_full_model
#> [1] 0.13
#> 
#> $accuracy_sparse
#> [1] 0.13
#> 
#> $density
#> [1] 0.5138763
#> 
#> $density_active_path
#> [1] NA

Need a high-speed mirror for your open-source project?
Contact our mirror admin team at info@clientvps.com.