Setup


knitr::opts_chunk$set(echo=TRUE)
knitr::opts_chunk$set(warning=FALSE, message=FALSE)

# Load libraries
library(batchelor)
library(Rphenograph)
library(igraph)
library(dittoSeq)
# Import helper fncs
source("../analysis/helpers/helper_fnc.R")
## TODO: Change paths
# Set general input paths to all analysis files
analysis.path <- "/mnt/bb_dqbm_volume/analysis/Tonsil_th179_1"

# Set path to masks
masks.path <- "/mnt/bb_dqbm_volume/data/20221108_TsH_LSS_cisiabmix1_179/steinbock/masks_deepcell"

Read Inputs


First, we read in the results from the CISI analysis using a pre-defined A and the best parameters from the parameter sweep. For this we have data from simulated composite data from training, from the actual experiment and the decomposed data.

The channels from the experiment were decomposed using the dictionary U with the best predicted performance (from the run using ‘min strategy’ for A) for a 8 to 4 compression. This dictionary came from the run with the parameters: k=4, d=20 and m=4 and contains 17 module.

## Read results
# Read in all results into one dataframe
results.files <- list.files(analysis.path, 'simulation_results.txt',
                            full.names=TRUE, recursive=TRUE)
results.df <- lapply(results.files, read_results, type="res", voi="comparison") %>% 
  bind_rows() %>%
  dplyr::select(-c("dataset", "training", "datasize"))


## Read X_test, X_simulated and X_decomposed 
# Specify files to be read in
X_results.files <- list.files(analysis.path, "X_", full.names=TRUE, recursive=TRUE)

# Read in all sce experiments from saved anndata and save additional info in metadata
# (e.g. if it is the ground truth or simulated X and if it comes from training simulations,
# experiment simulation or the acutal decomposed experiment)
X_results.list <- lapply(X_results.files, read_results, type="x", voi="comparison")
X_results.list <- lapply(X_results.list, function(sce.temp){
  metadata(sce.temp) <- within(metadata(sce.temp), rm(dataset, training)) 
  assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
  
  sce.temp
})


## Read anndata object containing composite measurements
X_simulation.files <- list.files(analysis.path, "composite_measurements.h5ad", 
                             full.names=TRUE, recursive=TRUE)
X_simulation.list <- lapply(X_simulation.files, read_results, type="x", use_voi=FALSE)
# Remove non-composite channels, add if data comes form simulated or real composite 
# measurements and add arcsinh transformed counts
X_simulation.list <- lapply(1:length(X_simulation.list), function(i){
  sce.temp <- X_simulation.list[[i]]
  sce.temp <- sce.temp[grepl("CC\\d_", rownames(sce.temp)), ]
  metadata(sce.temp) <- within(metadata(sce.temp), rm(dataset, training))
  metadata(sce.temp)$ground_truth <- ifelse(grepl("simulated", X_simulation.files[i]),
                                            "simulated", "real_composite")
  assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
  
  sce.temp
})


## Read U
u.files <- list.files(analysis.path, "gene_modules",
                      full.names=TRUE, recursive=TRUE)
u.files <- u.files[!grepl("min", u.files)]
u <- read_single_U(u.files)


## Read A's
a.files <- list.files(analysis.path, "version_*",
                      full.names=TRUE, recursive=TRUE)
a <- lapply(a.files, function(a.file) {
  a.temp <- read_single_A(a.file) %>%
    mutate(type=ifelse(grepl("best", a.file), "best", "fixed"))
  
  a.temp
}) %>% bind_rows()


# Read in masks 
masks <- loadImages(masks.path, as.is=TRUE)
mcols(masks) <- DataFrame(sample_id=names(masks))

Results


Barplot of Results

The barplot shows the different measurements used to evaluate decomposition of CISI.

There are five different results: training (decomposed simulated data from training compared to ground truth training data using fixed A), training-original (decomposed simulated data from training compared to ground truth training data using the best A from the parameter sweep), experiment-simulation (decomposed simulated data from experiment data compared to ground truth experiment data using fixed A), experiment-simulation-best-A (decomposed simulated data from experiment data compared to ground truth experiment data using best A from the parameter sweep) and experiment (decomposed real composite measurements from experiment compared to ground truth experiment data).

# For each different measurement of training results, plot barplot 
# Melt dataframe for plotting
data_temp <- results.df %>%
  dplyr::select(-c("version")) %>%
  pivot_longer(!c("simulation", "comparison"), names_to="measure", values_to="value")

# Create barplot
results.barplot <- plot_cisi_results(data_temp, "measure", "value", "comparison")
print(results.barplot)

Decomposed vs Ground truth Measurements


Images (arcsinh)

Here we show the decomposed measurements vs the ground truth measurements for all images.

# Subset list to decomposed and ground truth expressions
X_decomposition.list <- keep(X_results.list, function(x){
  metadata(x)$comparison=="experiment"})
# Find the names of all protein channels
pois <- rownames(X_decomposition.list[[1]])
names(pois) <- rownames(X_decomposition.list[[1]])


# Call plot_cells to get individual plots for all rois for decomposed and real
# measurements
for (n in names(pois)){
  cat("####", n, "\n")
  
  img <- plot_cells(X_decomposition.list, masks, pois[[n]], layer="exprs", display="all")

  # Plot decomposed vs true results 
  img <- plot_grid(plotlist=img, ncol=2, 
                   labels=unlist(lapply(X_decomposition.list, function(sce){metadata(sce)$ground_truth})),
                   label_size=title.fontsize, hjust=c(-2, -1.5), 
                   vjust=1, scale=0.9)
  print(img)
  cat("\n\n")
}

SMA

CD68

CD3

CD20

CD45RO

CD8a

CD11c

Ki-67

Correlations per Protein (arcsinh)

Correlation between ground truth and decomposed results per protein for
arcsinh transformed counts.

aoi <- "exprs"
# Calculate correlations between ground truth and decomposed data for each protein
X.cor <- lapply(X_decomposition.list, function(sce){
  counts.long <- as.data.frame(assays(sce)[[aoi]]) %>%
    mutate(protein=rownames(.)) %>%
    melt() %>%
    dplyr::rename(!!as.symbol(metadata(sce)$ground_truth):=value,
                  cell=variable)
}) %>% bind_rows() %>%
  group_by(protein, cell) %>%
  summarise_all(na.omit) %>%
  group_by(protein) %>%
  summarise(correlation=cor(ground_truth, decomposed))

# Plot correlations per protein
proteinCorr <- plot_protein_cor(X.cor) + ylim(0, 1)
print(proteinCorr)

# Delete objects no longer needed (since there are a lot of big objects in this RMD)
rm(X_decomposition.list)

Simulated vs Real Measurements


Images of Simulated vs Real Composite Measurements (arcsinh)

Here we show the composite measurements vs the simulated composite measurements for all images.

# Find the names of all composite channels
pois <- rownames(X_simulation.list[[1]])
names(pois) <- rownames(X_simulation.list[[1]])


# Call plot_cells to get individual plots for all rois for simulated and real
# composite measurements
for (n in names(pois)){
  cat("####", n, "\n")
  
  img <- plot_cells(X_simulation.list, masks, pois[[n]], layer="exprs", display="all")
  
  # Plot simulated vs true composite measurements  
  img.grid <- plot_grid(plotlist=img, ncol=2, 
                   labels=unlist(lapply(X_simulation.list, function(sce){metadata(sce)$ground_truth})),
                   label_size=title.fontsize, hjust=c(-2, -1.5), 
                   vjust=1, scale=0.9)
  print(img.grid)
  cat("\n\n")
}

CC0_SMA_CD68_CD3

CC1_CD11c

CC2_CD68_CD20_CD45RO

CC3_CD8a_Ki-67

# Delete objects no longer needed (since there are a lot of big objects in this RMD)
rm(X_simulation.list)

Images of Decomposed Simulated vs Real Decomposed Measurements (arcsinh)

Here we show the images of decomposed simulated data from the individual ground truth channels in the actual experiment compared to the actual decomposed data.

# Create new list with SCE objects containing decomposed simulated data and real 
# decomposed data
X_sim_decomposition.list <- keep(X_results.list, function(x){
  (metadata(x)$comparison=="experiment-simulation" & metadata(x)$ground_truth=="simulated") |
    (metadata(x)$comparison=="experiment" & metadata(x)$ground_truth=="decomposed")})

# Find the names of all channels
pois <- rownames(X_sim_decomposition.list[[1]])
names(pois) <- rownames(X_sim_decomposition.list[[1]])


# Call plot_cells to get individual plots for all rois for simulated and real
# composite measurements
for (n in names(pois)){
  cat("####", n, "\n")
  
  img <- plot_cells(X_sim_decomposition.list, masks, pois[[n]], layer="exprs", display="all")
  
  # Plot simulated vs true composite measurements  
  img.grid <- plot_grid(plotlist=img, ncol=2, 
                   labels=unlist(lapply(X_sim_decomposition.list, function(sce){metadata(sce)$ground_truth})),
                   label_size=title.fontsize, hjust=c(-2, -1.5), 
                   vjust=1, scale=0.9)
  print(img.grid)
  cat("\n\n")
}

SMA

CD68

CD3

CD20

CD45RO

CD8a

CD11c

Ki-67

# Delete objects no longer needed (since there are a lot of big objects in this RMD)
rm(X_sim_decomposition.list)

Training vs. Decomposition Correlations per Protein (arcsinh)

Scatterplot of correlations between ground truth and simulated or decomposed results per protein for arcsinh transformed counts .

aoi <- "exprs"

# Calculate correlations between ground truth and simulated data for each protein
X.cor <- X.cor %>% 
  dplyr::rename(correlation_decomposition=correlation) %>%
  merge(lapply(keep(X_results.list, function(x){
    metadata(x)$comparison=="training"}), 
    function(sce){
      counts.long <- as.data.frame(assays(sce)[[aoi]]) %>%
        mutate(protein=rownames(.)) %>%
        melt() %>%
        dplyr::rename(!!as.symbol(metadata(sce)$ground_truth):=value,
                      cell=variable) 
      
    }) %>% bind_rows() %>%
      group_by(protein, cell) %>%
      summarise_all(na.omit) %>%
      group_by(protein) %>%
      summarise(correlation_training=cor(ground_truth, simulated)))


# Plot correlations for each protein
correlations.plot <- ggscatter(X.cor,
                               x="correlation_training", 
                               y="correlation_decomposition",
                               label="protein",
                               label.rectangle=TRUE,
                               color=pal_npg("nrc")("1"),
                               add.params=list(color=pal_npg("nrc")("4")[4],
                                               size=2)) +
  theme_cowplot(title.fontsize) +
  geom_abline(slope=1, linetype="dashed") +
  stat_cor(size=2) +
  ylim(0, 1) +
  xlim(0, 1)

print(correlations.plot)

# Delete objects no longer needed (since there are a lot of big objects in this RMD)
rm(X.cor)

Matrix designs


U

# Plot U 
u.plot <- plot_single_U(u, paste0("U\n(dictionary size: ", length(unique(u$module)), ")"))
print(u.plot)

A’s

# Plot A's
for (n in unique(a$type)){
  cat("####", n, "\n")
  
  a.plot <- plot_single_A(a %>%
                            dplyr::filter(type==n) %>%
                            dplyr::select(-type), "A")
  print(a.plot)
  
  cat("\n\n")
}

fixed

best