Setup

knitr::opts_chunk$set(echo=TRUE)
knitr::opts_chunk$set(warning=FALSE, message=FALSE)

# Load libraries
library(RColorBrewer)
library(dittoSeq)
# Set general input paths to all analysis files
analysis.path <- "/mnt/bb_dqbm_volume/analysis/Tonsil_th152/training/subset"

# Set path to masks
masks.path <- "/mnt/bb_dqbm_volume/data/Tonsil_th152/steinbock/masks_deepcell"
# Import helper fncs
source("../analysis/helpers/helper_fnc.R")

Read Inputs

First, we read in the results from CISI pilot run for the chosen immune markers (CD15, CD20, CD3, CD38, CD4, CD68, CD8a, ICOS, Ki-67, MPO, panCK, SMA, CD303, FOXP3, GranzymeB). The pilot ran was done using no normalization and with k=1.

## Read results
# Read in all results for tonsil into one dataframe
results.files <- list.files(analysis.path, 'simulation_results.txt',
                            full.names=TRUE, recursive=TRUE)
results.files <- results.files[grepl("pilot_immune_channels", results.files)]

results.df <- lapply(results.files, read_results, type="res", voi="normalisation") %>% 
  bind_rows() %>%
  mutate(normalisation=ifelse(!grepl("normalized", normalisation), "unnormalized", "combi"))


## Read X_test and X_simulated
# Specify X_test and X_simulated files to be read in
X.files <- list.files(analysis.path, "X_", full.names=TRUE, recursive=TRUE)
X.files <- X.files[grepl("pilot_immune_channels", X.files)]

# Read in all sce experiments from saved anndata and save additional info in metadata
# (e.g. which dataset is used in training and testing and if it is the ground truth or simulated X)
X.list <- lapply(X.files, read_results, type="x", voi="normalisation")
X.list <- lapply(X.list, function(sce.temp){
  assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
  metadata(sce.temp)$normalisation <- ifelse(!grepl("normalized", metadata(sce.temp)$normalisation),
                                          "unnormalized", "combi")
  
  sce.temp
})



## Read U
u.files <- list.files(analysis.path, "gene_modules.csv",
                      full.names=TRUE, recursive=TRUE)
# TODO: change this line if more files come along and need to exclude
u.files <- u.files[grepl("pilot_immune_channels", u.files)]
u <- lapply(u.files, read_U, type="training") %>% 
  bind_rows() %>%
  dplyr::rename(dataset=rep, normalisation=k) %>%
  mutate(normalisation=ifelse(!grepl("normalized", normalisation), "unnormalized", "combi"))


## Read A
a.files <- list.files(analysis.path, "version_*",
                      full.names=TRUE, recursive=TRUE)
# TODO: change this line if more files come along and need to exclude
a.files <- a.files[grepl("pilot_immune_channels", a.files)]
a <- lapply(a.files, read_A, voi="normalisation") %>% 
  bind_rows() %>%
  mutate(normalisation=ifelse(!grepl("normalized", normalisation), "unnormalized", "combi"))


# Read in masks for tonsil data
masks <- loadImages(masks.path, as.is=TRUE)
mcols(masks) <- DataFrame(sample_id=names(masks))

Results

Barplot of Results (k=1)

For each different measurement used to test training results, a barplot is shown comparing CISI results.

# For each different measurement of training results, plot barplot 

# Melt dataframe for plotting
data_temp <- results.df %>%
  dplyr::select(-c("dataset", "training", "datasize", "version", "Best crossvalidation fold")) %>%
  pivot_longer(!c("simulation", "normalisation"), names_to="measure", values_to="value")

# Create barplot
results.barplot <- plot_cisi_results(data_temp, "measure", "value", "normalisation")
print(results.barplot)

Per protein results

Correlations per Protein (k=1, arcsinh)

Correlation between ground truth and decomposed results per protein for
asinh transformed counts.

aoi <- "exprs"
# Calculate correlations between ground truth and simulated data for each protein
X.cor <- lapply(X.list, function(sce){
  counts.long <- as.data.frame(assays(sce)[[aoi]]) %>%
    mutate(protein=rownames(.)) %>%
    melt() %>%
    dplyr::rename(!!as.symbol(metadata(sce)$ground_truth):=value,
                  cell=variable) %>%
    mutate(normalisation=metadata(sce)$normalisation)
  
}) %>% bind_rows() %>%
  group_by(protein, cell, normalisation) %>%
  summarise_all(na.omit) %>%
  group_by(normalisation, protein) %>%
  mutate(correlation=cor(ground_truth, simulated))

# Plot correlations for normalized and combi CISI run
for (i in unique(X.cor$normalisation)) {
  cat("#####", i, "\n")
  
  proteinCorr <- plot_protein_cor(X.cor %>% 
                                    dplyr::filter(normalisation==i)) + ylim(0, 1)
  
  print(proteinCorr)
  cat("\n\n")
}
combi

unnormalized

Image results

FOXP3 of Test Image (k=1, arcsinh)

Expression values for cells in test image for worst performing protein FOXP3 for unnormalized and combi CISI run.

# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi <- "FOXP3"
for (n in unique(X.cor$normalisation)){
  cat("#####", n, "\n")
  
  img.foxp3 <- plot_cells(keep(X.list, function(sce.temp){
    metadata(sce.temp)$normalisation == n
  }), masks, poi)
  # Plot decomposed vs true results for test roi (002)
  img.foxp3 <- plot_grid(plotlist=append(img.foxp3[grepl("20220520_TsH_th152_cisi1_002",
                                                         names(img.foxp3))],
                                         img.foxp3[grepl("legend",
                                                         names(img.foxp3))]), ncol=2, 
                         labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
                         label_size=title.fontsize, hjust=c(-2, -1.5), 
                         vjust=1, scale=0.9)
  print(img.foxp3)
  cat("\n\n")
}
combi

unnormalized

CD303 of Test Image (k=1, arcsinh)

Expression values for cells in test image for second worst performing protein CD303 for unnormalized and combi CISI run.

# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi <- "CD303"
for (n in unique(X.cor$normalisation)){
  cat("#####", n, "\n")
  
  # Call plot_cells to get individual plots for test roi for decomposed and true
  # results
  img.cd303 <- plot_cells(keep(X.list, function(sce.temp){
    metadata(sce.temp)$normalisation == n
  }), masks, poi)
  # Plot decomposed vs true results for test roi (002)
  img.cd303 <- plot_grid(plotlist=append(img.cd303[grepl("20220520_TsH_th152_cisi1_002",
                                                         names(img.cd303))],
                                         img.cd303[grepl("legend",
                                                         names(img.cd303))]), ncol=2, 
                         labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
                         label_size=title.fontsize, hjust=c(-2, -1.5), 
                         vjust=1, scale=0.9)
  print(img.cd303)
  cat("\n\n")
}
combi

unnormalized

CD68 of Test Image (k=1, arcsinh)

Expression values for cells in test image for another bad performing protein CD68 for unnormalized and combi CISI run.

# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi <- "CD68"
for (n in unique(X.cor$normalisation)){
  cat("#####", n, "\n")
  
  # Call plot_cells to get individual plots for test roi for decomposed and true
  # results
  img.cd68 <- plot_cells(keep(X.list, function(sce.temp){
    metadata(sce.temp)$normalisation == n
  }), masks, poi)
  # Plot decomposed vs true results for test roi (002)
  img.cd68 <- plot_grid(plotlist=append(img.cd68[grepl("20220520_TsH_th152_cisi1_002",
                                                       names(img.cd68))],
                                        img.cd68[grepl("legend",
                                                       names(img.cd68))]), ncol=2, 
                        labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
                        label_size=title.fontsize, hjust=c(-2, -1.5), 
                        vjust=1, scale=0.9)
  print(img.cd68)
  cat("\n\n")
}
combi

unnormalized

CD38 of Test Image (k=1, arcsinh)

Expression values for cells in test image for another bad performing protein CD38 for unnormalized and combi CISI run.

# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi <- "CD38"
for (n in unique(X.cor$normalisation)){
  cat("#####", n, "\n")
  
  # Call plot_cells to get individual plots for test roi for decomposed and true
  # results
  img.cd38 <- plot_cells(keep(X.list, function(sce.temp){
    metadata(sce.temp)$normalisation == n
  }), masks, poi)
  # Plot decomposed vs true results for test roi (002)
  img.cd38 <- plot_grid(plotlist=append(img.cd38[grepl("20220520_TsH_th152_cisi1_002",
                                                       names(img.cd38))],
                                        img.cd38[grepl("legend",
                                                       names(img.cd38))]), ncol=2, 
                        labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
                        label_size=title.fontsize, hjust=c(-2, -1.5), 
                        vjust=1, scale=0.9)
  print(img.cd38)
  cat("\n\n")
}
combi

unnormalized

GranzymeB of Test Image (k=1, arcsinh)

Expression values for cells in test image for another bad performing protein GranzymeB for unnormalized and combi CISI run.

# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi <- "GranzymeB"
for (n in unique(X.cor$normalisation)){
  cat("#####", n, "\n")
  
  # Call plot_cells to get individual plots for test roi for decomposed and true
  # results
  img.granzymeB <- plot_cells(keep(X.list, function(sce.temp){
    metadata(sce.temp)$normalisation == n
  }), masks, poi)
  # Plot decomposed vs true results for test roi (002)
  img.granzymeB <- plot_grid(plotlist=append(img.granzymeB[grepl("20220520_TsH_th152_cisi1_002",
                                                                 names(img.granzymeB))],
                                             img.granzymeB[grepl("legend",
                                                                 names(img.granzymeB))]), ncol=2, 
                             labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
                             label_size=title.fontsize, hjust=c(-2, -1.5), 
                             vjust=1, scale=0.9)
  print(img.granzymeB)
  cat("\n\n")
}
combi

unnormalized

CD20 of Test Image (k=1, arcsinh)

Expression values for cells in test image for best performing protein CD20 for unnormalized and combi CISI run.

# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi <- "CD20"
for (n in unique(X.cor$normalisation)){
  cat("#####", n, "\n")
  
  # Call plot_cells to get individual plots for test roi for decomposed and true
  # results
  img.cd20 <- plot_cells(keep(X.list, function(sce.temp){
    metadata(sce.temp)$normalisation == n
  }), masks, poi)
  # Plot decomposed vs true results for test roi (002)
  img.cd20 <- plot_grid(plotlist=append(img.cd20[grepl("20220520_TsH_th152_cisi1_002",
                                                       names(img.cd20))],
                                        img.cd20[grepl("legend",
                                                       names(img.cd20))]), ncol=2, 
                        labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
                        label_size=title.fontsize, hjust=c(-2, -1.5), 
                        vjust=1, scale=0.9)
  print(img.cd20)
  cat("\n\n")
}
combi

unnormalized

CD3 of Test Image (k=1, arcsinh)

Expression values for cells in test image for CD3, a protein performing mediocre for unnormalized and combi CISI run.

# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi <- "CD3"
for (n in unique(X.cor$normalisation)){
  cat("#####", n, "\n")
  
  # Call plot_cells to get individual plots for test roi for decomposed and true
  # results
  img.cd3 <- plot_cells(keep(X.list, function(sce.temp){
    metadata(sce.temp)$normalisation == n
  }), masks, poi)
  # Plot decomposed vs true results for test roi (002)
  img.cd3 <- plot_grid(plotlist=append(img.cd3[grepl("20220520_TsH_th152_cisi1_002",
                                                     names(img.cd3))],
                                       img.cd3[grepl("legend",
                                                     names(img.cd3))]), ncol=2, 
                       labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
                       label_size=title.fontsize, hjust=c(-2, -1.5), 
                       vjust=1, scale=0.9)
  print(img.cd3)
  cat("\n\n")
}
combi

unnormalized

CD303 protein distribution (k=1, arcsinh)

# Have a look at dittoRidgePlots of a bad performing protein
X.list <- lapply(X.list, function(sce.temp) {
  colData(sce.temp)$dummy <- "1"
  
  sce.temp
})

plot.list <- NULL
for(sce in X.list){
  plot.list <- append(plot.list, list(dittoRidgePlot(sce, var="CD303", 
                                                     group.by="dummy", assay="exprs") +
                                        ggtitle(metadata(sce)$ground_truth) +
                                        coord_cartesian(xlim=c(0, 6.2), expand=TRUE)))
}
print(plot_grid(plotlist=plot.list, ncol=1))

Matrix designs

U

# Plot U for normalized and combi CISI run
cor <- plot_U(u, "normalisation", "dataset")
normalisation = combi

normalisation = unnormalized

A

plot_A(a %>%
         dplyr::select(-training), "normalisation", "dataset")
normalisation = combi

normalisation = unnormalized