Analysis of Tonsil th152

Setup

knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)

# Load libraries
library(scater)

# Import helper fncs
source("../analysis/helpers/helper_fnc.R")

# Set general input paths to all analysis files
analysis.path <- "/mnt/bb_dqbm_volume/analysis/Tonsil_th152/training/full"

# Set path to masks
masks.path <- "/mnt/bb_dqbm_volume/data/Tonsil_th152/steinbock/masks_deepcell"

# Output folder for plots
plot.folder <- file.path(thesis_output.folder, "analysis_tonsil")
if(!dir.exists(plot.folder)){dir.create(plot.folder)}

Read Inputs

## Read results
# Read in all results for tonsil data (spillover and non-spillover corrected) 
# into one dataframe
results.files <- list.files(analysis.path, 'simulation_results.txt',
                            full.names=TRUE, recursive=TRUE)
results.files <- results.files[grepl("sm_", results.files) & grepl("no_norm", results.files)]

results.df <- lapply(results.files, read_results, type="res", voi="corrected") %>% 
  bind_rows() %>%
  mutate(corrected=gsub("s", "", corrected))


## Read X_test and X_simulated
# Specify X_test and X_simulated files to be read in
X.files <- list.files(analysis.path, "X_", full.names=TRUE, recursive=TRUE)
X.files <- X.files[grepl("sm_", X.files) & grepl("no_norm", X.files)]

# Read in all sce experiments from saved anndata and save additional info in metadata
# (e.g. which dataset is used in training and testing, if spillover correction was used 
# and if it is the ground truth or simulated X)
X.list <- lapply(X.files, read_results, type="x", voi="sm")
X.list <- lapply(X.list, function(sce.temp){
  metadata(sce.temp)$sm <- gsub("s", "", metadata(sce.temp)$sm)
  assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
  
  sce.temp
})


## Read masks
# Read in masks for tonsil data
masks <- loadImages(masks.path, as.is = TRUE)
mcols(masks) <- DataFrame(sample_id=names(masks))

Barplot of Results

For each different measurement used to test training results, a barplot is shown comparing spillover corrected and uncorrected results for Tonsil th152 data.

# For each different measurement of training results, plot barplot comparing
# spillover corrected and uncorrected results

# Melt dataframe for plotting
data_temp <- results.df %>%
  dplyr::select(-c("dataset", "training", "datasize", "version", "Best crossvalidation fold")) %>%
  pivot_longer(!c("corrected", "simulation"), names_to="measure", values_to="value")

# Create barplot
results.barplot <- plot_cisi_results(data_temp, "measure", "value", "corrected")
print(results.barplot)

# Save plot for master thesis

thesis_results.barplot <- ggplot(data_temp %>% 
                                   dplyr::filter(corrected=="uncorrected") %>%
                                   mutate(simulation=ifelse(simulation=="composite", 
                                                            "no_noise", simulation)), 
                                 aes(x=measure, y=value, pattern=simulation, 
                                     fill=simulation)) +
  geom_bar(stat="identity",
           position=position_dodge(preserve="single"),
           width=0.6) +
  scale_y_continuous(limits=c(0, 1)) +
  scale_fill_npg() +
  labs(x="Measure", y="Value", fill="Simulation Type") + 
  theme_cowplot(thesis_title.fontsize) +
  theme(legend.position="bottom",
        axis.text.x=element_text(angle=45, vjust=1, hjust=1))

ggsave(file.path(plot.folder, "d_res_T.pdf"), thesis_results.barplot, 
       width=13.5, height=10, units="cm")

Scatterplot of Neighbouring channels (k=1, arcsinh)

The following plots show some neighboring channels for Tonsil th152 data trained on spillover corrected and non-spillover corrected data.

# Add names and reorder list
names(X.list) <- lapply(X.list, function(x){metadata(x)$ground_truth})
X.list <- append(X.list[-c(1:2)], X.list[1:2])

# List of combinations of proteins of interest
poi <- list(c("CD8a", "Tim-3"), c("CD68", "CD20"))
for (p_vec in poi) {
  cat("###", p_vec[1], "vs", p_vec[2], "\n")
  
  # Plot arcsinh transformed counts of proteins of interest of decomposed data trained
  # with and without spillover correction
  X.spillover <- plot_grid(plot_exprs(X.list[1:2], "", p_vec[1], p_vec[2]),
                           plot_exprs(X.list[3:4], "", p_vec[1], p_vec[2]),
                           plot_exprs(X.list[5:6], "", p_vec[1], p_vec[2]),
                           ncol=1, labels=unique(unlist(lapply(X.list, function(sce){
                             metadata(sce)$sm}))),
                           label_size=title.fontsize, hjust=c(-5.5, -4, -8.5), 
                           vjust=0.9, scale = 0.9)
  
  print(X.spillover)
  cat("\n\n")
}

CD8a vs Tim-3

CD68 vs CD20

d_neighbours_T.grid <- plot_grid(plot_exprs(X.list[3:4], "", poi[[1]][1], 
                                            poi[[1]][2], thesis=TRUE),
                                 plot_exprs(X.list[3:4], "", poi[[2]][1], 
                                            poi[[2]][2], thesis=TRUE),
                                 ncol=1, labels="AUTO", align="h", axis="l")
save_plot(file.path(plot.folder, "d_neighbours_T.pdf"),
          d_neighbours_T.grid, base_width=4.77)

Per protein results

Correlations per Protein (arcsinh)

Correlation between ground truth and decomposed results per protein for
arcsinh transformed counts.

aoi <- "exprs"
# Calculate correlations between ground truth and simulated data for each protein
X.cor <- lapply(X.list, function(sce){
  counts.long <- as.data.frame(assays(sce)[[aoi]]) %>%
    mutate(protein=rownames(.)) %>%
    melt() %>%
    dplyr::rename(!!as.symbol(metadata(sce)$ground_truth):=value,
                  cell=variable) %>%
    mutate(sm=metadata(sce)$sm,
           dataset=paste(metadata(sce)$training, metadata(sce)$dataset, sep="_"))
  
}) %>% bind_rows() %>%
  group_by(protein, cell, sm, dataset) %>%
  summarise_all(na.omit) %>%
  group_by(sm, dataset, protein) %>%
  mutate(correlation=cor(ground_truth, simulated))


# Plot correlations for spillover and non-spillover corrected data
for (i in unique(X.cor$sm)) {
  cat("####", i, "\n")
  
  proteinCorr <- plot_protein_cor(X.cor %>%
                                    dplyr::filter(sm==i))
  
  print(proteinCorr)
  cat("\n\n")
}

combi

corrected

uncorrected

# Save plot for master thesis
d_protein_cor.plot <- plot_protein_cor(X.cor %>% dplyr::filter(sm=="uncorrected")) +
  theme_cowplot(thesis_title.fontsize)
ggsave(file.path(plot.folder, "d_protein_cor.pdf"), 
       d_protein_cor.plot +
         labs(x=update_text(d_protein_cor.plot$labels$x),
              y=update_text(d_protein_cor.plot$labels$y)), 
       width=11, height=12, units="cm")

Protein distributions coloured by correlation results (k=1, argsinh)

For a manual inspection of different protein distribution characteristics, we plot the boxplots of the original protein expressions coloured by correlation results. These show: Interquartile range (IQR), 1st and 3rd quantiles, median, mean, max, min and outliers.

# Plot boxplots of protein expr per dataset coloured by protein correlation results
# (performance)

# Plot correlations for each test/training dataset combination
for (i in unique(X.cor$sm)) {
  cat("####", i, "\n")
  
  proteinDist <- plot_protein_dist(X.cor %>%
                                    dplyr::filter(sm==i),
                                   0)
  
  print(proteinDist)
  cat("\n\n")
}

Analysis of Tonsil th152

15 December, 2022

Setup

Read Inputs

Barplot of Results

Scatterplot of Neighbouring channels (k=1, arcsinh)

CD8a vs Tim-3

CD68 vs CD20

Per protein results

Correlations per Protein (arcsinh)

combi

corrected

uncorrected

Protein distributions coloured by correlation results (k=1, argsinh)

combi

corrected

uncorrected