--- title: "classification: convolutional architecture" output: rmarkdown::html_vignette params: eval: true vignette: > %\VignetteIndexEntry{classification: convolutional architecture} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(LBBNN) has_torch <- requireNamespace("torch", quietly = TRUE) && torch::torch_is_installed() ``` The goal of this vignette is to demonstrate how to use LBBNNs with a convolutional architecture. We will only use a dummy dataset here, but the torchvision package can be used to try it out on real datasets e.g. MNIST. ## Demonstration of how to download KMNIST using torchvision ```{r,eval=FALSE} if(!requireNamespace("torchvision")) install.packages("torchvision") torch::torch_manual_seed(42) dir <- "./dataset/kmnist" kmnist_transform <- function(x) { d <- dim(x) if (length(d) == 3 && d[3] > 1 && d[1] == d[2]) {#if shape [28,28,batch] as on windows and linux(?) x <- torchvision::transform_to_tensor(x) #now shape should be [batch, 28,28] x <- x$unsqueeze(2) #add the channel dimension - > [batch,1,28,28] } else{ #on mac, everything is fine x <- torchvision::transform_to_tensor(x) } return(x) } #get datasets from torchvision and define training and test loaders train_ds <- torchvision::kmnist_dataset( dir, download = TRUE, transform = kmnist_transform) test_ds <- torchvision::kmnist_dataset( dir, train = FALSE, transform = kmnist_transform) train_loader_kmnist <- torch::dataloader(train_ds, batch_size = 100, shuffle = TRUE) test_loader_kmnist <- torch::dataloader(test_ds, batch_size = 100) ``` ## Create dummy dataset with the same shape as KMNIST ```{r, eval = has_torch} torch::torch_manual_seed(42) x <- torch::torch_randn(200, 1, 28, 28) y <- torch::torch_randint(1, 11, size = 200) dataset <- torch::tensor_dataset(x, y) train_loader <- torch::dataloader(dataset, batch_size = 100) ``` ## Create the layers that define the architecture of our convolutional network Here we manually define the layers, as we will use both convolutional and feed-forward layers. They are defined as follows: ```{r, eval = has_torch} device <- "cpu" conv_layer_1 <- lbbnn_conv2d(in_channels = 1, out_channels = 32, kernel_size = 5, prior_inclusion = 0.5, standard_prior = 1, density_init = c(-10, 10), num_transforms = 2, flow = FALSE, hidden_dims = c(200, 200), device = device) conv_layer_2 <- lbbnn_conv2d(in_channels = 32, out_channels = 64, kernel_size = 5, prior_inclusion = 0.5, standard_prior = 1, density_init = c(-10, 15), num_transforms = 2, flow = FALSE, hidden_dims = c(200, 200), device = device) linear_layer_1 <- lbbnn_linear(in_features = 1024, out_features = 300, prior_inclusion = 0.5, standard_prior = 1, density_init = c(-10, 10), num_transforms = 2, flow = FALSE, hidden_dims = c(200, 200), device = device, bias_inclusion_prob = FALSE, conv_net = TRUE) linear_layer_2 <- lbbnn_linear(in_features = 300, out_features = 10, prior_inclusion = 0.5, standard_prior = 1, density_init = c(-5, 15),num_transforms = 2, flow = FALSE, hidden_dims = c(200, 200), device = device, bias_inclusion_prob = FALSE, conv_net = TRUE) ``` ## Define the model object We include pooling layers between the convolutional layers. ```{r, eval = has_torch} LBBNN_ConvNet <- torch::nn_module( "LBBNN_ConvNet", initialize = function(conv1, conv2, fc1 ,fc2 ,device = device) { self$problem_type <- "multiclass classification" self$input_skip <- FALSE self$conv1 <- conv1 self$conv2 <- conv2 self$fc1 <- fc1 self$fc2 <- fc2 self$pool <- torch::nn_max_pool2d(2) self$act <- torch::nn_leaky_relu() self$out <- torch::nn_log_softmax(dim = 2) self$pout <- torch::nn_softmax(dim = 2) self$loss_fn <- torch::nn_nll_loss(reduction = "sum") }, forward = function(x, MPM = FALSE, predict = FALSE) { x = self$act(self$conv1(x, MPM)) x = self$pool(x) x = self$act(self$conv2(x, MPM)) x = self$pool(x) x = torch::torch_flatten(x,start_dim = 2) x = self$act(self$fc1(x, MPM)) if(!predict) x = self$out(self$fc2(x ,MPM)) else x = self$pout(self$fc2(x ,MPM)) }, kl_div = function(){ kl <- self$conv1$kl_div() + self$conv2$kl_div() + self$fc1$kl_div() + self$fc2$kl_div() return(kl) }, density = function(){ alphas <- NULL alphas <- c(as.numeric(self$conv1$alpha), as.numeric(self$conv2$alpha) ,as.numeric(self$fc1$alpha), as.numeric(self$fc2$alpha)) return(mean(alphas > 0.5)) }, compute_paths = function(){ NULL }, density_active_path = function(){ NA } ) model_conv <- LBBNN_ConvNet(conv_layer_1, conv_layer_2, linear_layer_1, linear_layer_2, device) model_conv$to(device = device) ``` ## Train and validate the model These functions work the same as with feed-forward architectures. Training can be accelerated on GPU where available. ```{r, eval = has_torch} train_lbbnn(epochs = 2, LBBNN = model_conv, lr = 0.01, train_dl = train_loader, device = device) validate_lbbnn(model_conv, num_samples = 2, test_dl = train_loader, device = device) ```