Analysis of Immucan Lung

Setup

knitr::opts_chunk$set(echo=TRUE)
knitr::opts_chunk$set(warning=FALSE, message=FALSE)

# Load libraries
library(RColorBrewer)

# Set general input paths to all analysis files
analysis.path <- "/mnt/bb_dqbm_volume/analysis/Immucan_lung/training/full"

## Input data
data.path <- "/mnt/bb_dqbm_volume/data/Immucan_lung"

# Set path to masks
masks.path <- file.path(data.path, "masks")
# Set path to pre-trained random forest classifier
rf.path <- file.path(data.path, "rffit_v7_all_markers.rds")

# Import helper fncs
source("../analysis/helpers/helper_fnc.R")

Read Inputs

## Read results
# Read in all results for lung data into one dataframe
results.files <- list.files(analysis.path, 'simulation_results.txt',
                            full.names=TRUE, recursive=TRUE)
results.files <- results.files[grepl("normalized", results.files)]

results.df <- lapply(results.files, read_results, type="res", voi="norm") %>% 
  bind_rows() %>%
  mutate(norm=ifelse(grepl("combi", norm), "combi", norm))


## Read X_test and X_simulated
# Specify X_test and X_simulated files to be read in
X.files <- list.files(analysis.path, "X_", full.names=TRUE, recursive=TRUE)
X.files <- X.files[grepl("normalized", X.files)]

# Read in all sce experiments from saved anndata and save additional info in metadata
# (e.g. which dataset is used in training and testing, if data was normalized before 
# training and if it is the ground truth or simulated X)
X.list <- lapply(X.files, read_results, type="x", voi="norm")
X.list <- lapply(X.list, function(sce.temp){
  if (grepl("combi", metadata(sce.temp)$norm)){
    metadata(sce.temp)$norm <- "combi"
  } 
  assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
  
  sce.temp
})


# Read in masks for lung data
masks <- loadImages(masks.path, as.is=TRUE)
mcols(masks) <- DataFrame(sample_id=names(masks))

## Read random forest classifier
# Read RF classifier for the lung dataset
rffit.lung <- readRDS(rf.path)

Barplot of Results

For each different measurement used to test training results, a barplot is shown comparing CISI results using the original normalization across cells (used in training, simulating and to what it is compared to) and not using it at all.

# For each different measurement of training results, plot barplot comparing
# normalized und unnormalized results

# Melt dataframe for plotting
data_temp <- results.df %>%
  dplyr::select(-c("dataset", "training", "datasize", "version", "Best crossvalidation fold")) %>%
  pivot_longer(!c("norm", "simulation"), names_to="measure", values_to="value")

# Create barplot
results.barplot <- plot_cisi_results(data_temp, "measure", "value", "norm")
print(results.barplot)

Scatterplot Results with and without Normalization in Training (k=1, arcsinh)

The following plots show scatterplots of different proteins of interest where cells are coloured according to celltypes of interest for Immucan lung dataset once trained on normalized and once trained on unnormalized datasets.

names(X.list) <- lapply(X.list, function(x){metadata(x)$ground_truth})
X.list <- append(X.list[-c(1:2)], X.list[1:2])

# List of combinations of proteins of interest
poi <- list(c("CD20", "CD3", "B"), c("VISTA", "CD3", ""))
for (p_vec in poi) {
  cat("###", p_vec[1], "vs", p_vec[2], "\n")
  
  # Plot arcsinh transformed counts of proteins of interest of decomposed data trained
  # with and without normalization
  X.spillover <- plot_grid(plot_exprs(X.list[1:2], p_vec[3], p_vec[1], p_vec[2]),
                           plot_exprs(X.list[3:4], p_vec[3], p_vec[1], p_vec[2]),
                           plot_exprs(X.list[5:6], p_vec[3], p_vec[1], p_vec[2]),
                           ncol=1, labels=unique(unlist(lapply(X.list, function(sce){
                             metadata(sce)$norm}))),
                           label_size=title.fontsize, hjust=c(-4.5, -3, -7.5), 
                           vjust=0.9, rel_widths = c(1, 2)) #, scale=0.9)
  
  print(X.spillover)
  cat("\n\n")
}

CD20 vs CD3

VISTA vs CD3

Cell Phenotyping (k=1, arcsinh)

Next, we use the gating labels of a pre-trained random forest classifier for the Immucan lung dataset to newly train a random forest classifier on our decomposed arcsinh transformed data and predict all celltypes.

We then compare these results to the ‘ground truth’ celltypes (celltypes predicted from ground truth data) by calculating the precision, sensitivity and specififity.

## Fix colours for plotting
celltype.col <- c(brewer.pal(12, "Paired"),
                           brewer.pal(8, "Pastel2")[-c(3,5,8)],
                           brewer.pal(12, "Set3")[-c(2,3,8,9,11,12)])
celltype.col <- c(celltype.col[1:length(unique(colData(X.list[[1]])$celltype))],
                  "black")
names(celltype.col) <- c(levels(colData(X.list[[1]])$celltype), NA)


# Predict cell type using pretrained classifier on decomposed results and
# compute confusion matrix using celltypes of true data

# Extract decomposted datasets
lungSim.list <- keep(X.list, function(x){
  metadata(x)$ground_truth=="simulated"})

# Train RF classifier
# rffit.sim <- lapply(lungSim.list, 
#   train_rf, rffit.original=rffit.lung)

# saveRDS(rffit.sim, file.path(analysis.path, "rffit_sim_data_norm.rds"))
rffit.sim <- readRDS(file.path(analysis.path, "rffit_sim_data_norm.rds"))

# Calculate confusion matrix
lungSim.cm <- lapply(as.list(1:length(lungSim.list)), function(i){
  compute_confusionMatrix(lungSim.list[[i]], rffit.sim[[i]])})

# Plot prediction results/confusion matrix as shown in IMCDataAnalysis script
i <- 1
for (cm in lungSim.cm){
  cat("###", metadata(lungSim.list[[i]])$norm, "\n")
  
  pred.plot <- data.frame(cm$byClass) %>%
    mutate(class=sub("Class: ", "", rownames(cm$byClass))) %>%
    ggplot() +
    geom_point(aes(1 - Specificity, Sensitivity,
                   size=Detection.Rate,
                   fill=class),
               shape=21) +
    scale_fill_manual(values=celltype.col) +
    theme_cowplot(title.fontsize) +
    ylab("Sensitivity (TPR)") +
    xlab("1 - Specificity (FPR)")
  
  i <- i+1

  print(pred.plot)
  cat("\n\n")
}

normalized

unnormalized

combi

Cell classification probabilities (k=1, arcsinh)

Next, we have a look at which celltypes the RF predicts for wrongly identified cells using the classification probabilities per celltype class.

# Predict celltype probabilities
lungSim.prob <- lapply(as.list(1:length(lungSim.list)), function(i){
  compute_celltypeProb(lungSim.list[[i]], rffit.sim[[i]])})

# Plot prediction results/confusion matrix as shown in IMCDataAnalysis script
i <- 1
for (p in lungSim.prob){
  cat("###", metadata(lungSim.list[[i]])$norm, "\n")
  
  pred_prob.plot <- p %>%
    pivot_longer(cols=B:Tumor_Ki67) %>%
    ggplot() +
    geom_boxplot(aes(x=name, y=value, fill=name), outlier.size=0.5) +
    facet_wrap(.~truth, ncol=1) + 
    scale_fill_manual(values=celltype.col) +
    scale_x_discrete(guide=guide_axis(n.dodge=2)) +
    theme(panel.background=element_blank(), 
          axis.text.x=element_text(angle=45, hjust=1)) +
    theme_cowplot(title.fontsize) +
    labs(x="probability", y="celltype", fill="celltype")
  
  i <- i+1

  print(pred_prob.plot)
  cat("\n\n")
}

normalized

unnormalized

combi

Images with celltypes (k=1, arcsinh)

We also have a look at images coloured by celltypes identified using the ground truth and decomposed/simulated data.

Image 1

lungSim.list <- lapply(as.list(1:length(lungSim.list)), function(i){
  sce <- lungSim.list[[i]]
  
  cell_class <- data.frame(cell_id=rownames(lungSim.prob[[i]]),
                           celltype_pred=factor(colnames(lungSim.prob[[i]])[max.col(
                             lungSim.prob[[i]] %>%
                               dplyr::select(-truth), ties.method="first")]))
  cell_class[rowMax(as.matrix(lungSim.prob[[i]] %>%
                                dplyr::select(-truth))) < 0.4, "celltype_pred"] <- "uncertain"
  
  # Store labels in SpatialExperiment onject
  colData(sce) <- merge(colData(sce), cell_class, all.x=TRUE)
  colData(sce)$celltype_NA <- colData(sce)$celltype
  colData(sce)$celltype_NA[is.na(colData(sce)$celltype_pred)] <- NA
  
  sce
})

# Specify image index of interest
im <- 1

# Print image im of ground truth celltypes vs decomposed celltypes
for (sce in lungSim.list){
  cat("####", metadata(sce)$norm, "\n")
  
  images <- plot_celltype_images(sce, masks, im, celltype.col)
  
  print(images)
  cat("\n\n")
}

normalized

unnormalized

combi

Image 10

# Specify image index of interest
im <- 10

# Print image im of ground truth celltypes vs decomposed celltypes
for (sce in lungSim.list){
  cat("####", metadata(sce)$norm, "\n")
  
  images.2 <- plot_celltype_images(sce, masks, im, celltype.col)
  
  print(images.2)
  cat("\n\n")
}

normalized

unnormalized

combi

Image 100

# Specify image index of interest
im <- 100

# Print image im of ground truth celltypes vs decomposed celltypes
for (sce in lungSim.list){
  cat("####", metadata(sce)$norm, "\n")
  
  images.3 <- plot_celltype_images(sce, masks, im, celltype.col)
  
  print(images.3)
  cat("\n\n")
}

Analysis of Immucan Lung

07 December, 2022

Setup

Read Inputs

Barplot of Results

Scatterplot Results with and without Normalization in Training (k=1, arcsinh)

CD20 vs CD3

VISTA vs CD3

Cell Phenotyping (k=1, arcsinh)

normalized

unnormalized

combi

Cell classification probabilities (k=1, arcsinh)

normalized

unnormalized

combi

Images with celltypes (k=1, arcsinh)

Image 1

normalized

unnormalized

combi

Image 10

normalized

unnormalized

combi

Image 100

normalized

unnormalized

combi