Setup


knitr::opts_chunk$set(echo=TRUE)
knitr::opts_chunk$set(warning=FALSE, message=FALSE)

# Load libraries
library(batchelor)
library(Rphenograph)
library(igraph)
library(dittoSeq)
# Import helper fncs
source("../analysis/helpers/helper_fnc.R")
## TODO: Change paths
# Set general input paths to all analysis files
analysis.path <- "/mnt/bb_dqbm_volume/analysis/Tonsil_th179_2"

# Set path to masks
masks.path <- "/mnt/bb_dqbm_volume/data/20221108_TsH_LSS_cisiabmix2_179/steinbock/masks_deepcell"

Read Inputs


First, we read in the results from the CISI analysis using a pre-defined A and the best parameters from the parameter sweep. For this we have data from simulated composite data from training, from the actual experiment and the decomposed data.

The channels from the experiment were decomposed using the dictionary U with the best predicted performance (from the run using ‘min strategy’ for A) for a 8 to 6 compression. This dictionary came from the run with the parameters: k=4, d=10 and m=4 and contains 10 module.

## Read results
# Read in all results into one dataframe
results.files <- list.files(analysis.path, 'simulation_results.txt',
                            full.names=TRUE, recursive=TRUE)
results.df <- lapply(results.files, read_results, type="res", voi="comparison") %>% 
  bind_rows() %>%
  dplyr::select(-c("dataset", "training", "datasize")) %>%
  mutate(comparison=ifelse(comparison=="best-A", "experiment-simulation_best-A", comparison))


## Read X_test, X_simulated and X_decomposed 
# Specify files to be read in
X_results.files <- list.files(analysis.path, "X_", full.names=TRUE, recursive=TRUE)

# Read in all sce experiments from saved anndata and save additional info in metadata
# (e.g. if it is the ground truth or simulated X and if it comes from training simulations,
# experiment simulation or the acutal decomposed experiment)
X_results.list <- lapply(X_results.files, read_results, type="x", voi="comparison")
X_results.list <- lapply(X_results.list, function(sce.temp){
  metadata(sce.temp) <- within(metadata(sce.temp), rm(dataset, training)) 
  assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
  
  sce.temp
})


## Read anndata object containing composite measurements
X_simulation.files <- list.files(analysis.path, "composite_measurements.h5ad", 
                             full.names=TRUE, recursive=TRUE)
X_simulation.list <- lapply(X_simulation.files, read_results, type="x", use_voi=FALSE)
# Remove non-composite channels, add if data comes form simulated or real composite 
# measurements and add arcsinh transformed counts
X_simulation.list <- lapply(1:length(X_simulation.list), function(i){
  sce.temp <- X_simulation.list[[i]]
  sce.temp <- sce.temp[grepl("CC\\d_", rownames(sce.temp)), ]
  metadata(sce.temp) <- within(metadata(sce.temp), rm(dataset, training))
  metadata(sce.temp)$ground_truth <- ifelse(grepl("simulated", X_simulation.files[i]),
                                            "simulated", "real_composite")
  assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
  
  sce.temp
})


## Read U
u.files <- list.files(analysis.path, "gene_modules",
                      full.names=TRUE, recursive=TRUE)
u.files <- u.files[!grepl("min", u.files)]
u <- read_single_U(u.files)


## Read A's
a.files <- list.files(analysis.path, "version_*",
                      full.names=TRUE, recursive=TRUE)
a <- lapply(a.files, function(a.file) {
  a.temp <- read_single_A(a.file) %>%
    mutate(type=ifelse(grepl("best", a.file), "best", "fixed"))
  
  a.temp
}) %>% bind_rows()


# Read in masks 
masks <- loadImages(masks.path, as.is=TRUE)
mcols(masks) <- DataFrame(sample_id=names(masks))

Results


Barplot of Results

The barplot shows the different measurements used to evaluate decomposition of CISI.

There are five different results: training (decomposed simulated data from training compared to ground truth training data using fixed A), training-original (decomposed simulated data from training compared to ground truth training data using the best A from the parameter sweep), experiment-simulation (decomposed simulated data from experiment data compared to ground truth experiment data using fixed A), experiment-simulation-best-A (decomposed simulated data from experiment data compared to ground truth experiment data using best A from the parameter sweep) and experiment (decomposed real composite measurements from experiment compared to ground truth experiment data).

# For each different measurement of training results, plot barplot 
# Melt dataframe for plotting
data_temp <- results.df %>%
  dplyr::select(-c("version")) %>%
  pivot_longer(!c("simulation", "comparison"), names_to="measure", values_to="value")

# Create barplot
results.barplot <- plot_cisi_results(data_temp, "measure", "value", "comparison")
print(results.barplot)

Decomposed vs Ground truth Measurements


Images (arcsinh)

Here we show the decomposed measurements vs the ground truth measurements for all images.

# Subset list to decomposed and ground truth expressions
X_decomposition.list <- keep(X_results.list, function(x){
  metadata(x)$comparison=="experiment"})
# Find the names of all protein channels
pois <- rownames(X_decomposition.list[[1]])
names(pois) <- rownames(X_decomposition.list[[1]])


# Call plot_cells to get individual plots for all rois for decomposed and real
# measurements
for (n in names(pois)){
  cat("####", n, "\n")
  
  img <- plot_cells(X_decomposition.list, masks, pois[[n]], layer="exprs", display="all")

  # Plot decomposed vs true results 
  img <- plot_grid(plotlist=img, ncol=2, 
                   labels=unlist(lapply(X_decomposition.list, function(sce){metadata(sce)$ground_truth})),
                   label_size=title.fontsize, hjust=c(-2, -1.5), 
                   vjust=1, scale=0.9)
  print(img)
  cat("\n\n")
}

SMA

CD3

CD20

CD8a

CD11c

CD4

panCK

Ki-67

Correlations per Protein (arcsinh)

Correlation between ground truth and decomposed results per protein for
arcsinh transformed counts.

aoi <- "exprs"
# Calculate correlations between ground truth and decomposed data for each protein
X.cor <- lapply(X_decomposition.list, function(sce){
  counts.long <- as.data.frame(assays(sce)[[aoi]]) %>%
    mutate(protein=rownames(.)) %>%
    melt() %>%
    dplyr::rename(!!as.symbol(metadata(sce)$ground_truth):=value,
                  cell=variable)
}) %>% bind_rows() %>%
  group_by(protein, cell) %>%
  summarise_all(na.omit) %>%
  group_by(protein) %>%
  summarise(correlation=cor(ground_truth, decomposed))

# Plot correlations per protein
proteinCorr <- plot_protein_cor(X.cor) + ylim(0, 1)
print(proteinCorr)

# Delete objects no longer needed (since there are a lot of big objects in this RMD)
rm(X_decomposition.list)

Simulated vs Real Measurements


Images of Simulated vs Real Composite Measurements (arcsinh)

Here we show the composite measurements vs the simulated composite measurements for all images.

# Find the names of all composite channels
pois <- rownames(X_simulation.list[[1]])
names(pois) <- rownames(X_simulation.list[[1]])


# Call plot_cells to get individual plots for all rois for simulated and real
# composite measurements
for (n in names(pois)){
  cat("####", n, "\n")
  
  img <- plot_cells(X_simulation.list, masks, pois[[n]], layer="exprs", display="all")
  
  # Plot simulated vs true composite measurements  
  img.grid <- plot_grid(plotlist=img, ncol=2, 
                   labels=unlist(lapply(X_simulation.list, function(sce){metadata(sce)$ground_truth})),
                   label_size=title.fontsize, hjust=c(-2, -1.5), 
                   vjust=1, scale=0.9)
  print(img.grid)
  cat("\n\n")
}

CC0_panCK_Ki-67

CC1_CD4

CC2_SMA_CD20

CC3_CD3

CC4_CD11c

CC5_Ki-67_CD8a

# Delete objects no longer needed (since there are a lot of big objects in this RMD)
rm(X_simulation.list)

Images of Decomposed Simulated vs Real Decomposed Measurements (arcsinh)

Here we show the images of decomposed simulated data from the individual ground truth channels in the actual experiment compared to the actual decomposed data.

# Create new list with SCE objects containing decomposed simulated data and real 
# decomposed data
X_sim_decomposition.list <- keep(X_results.list, function(x){
  (metadata(x)$comparison=="experiment-simulation" & metadata(x)$ground_truth=="simulated") |
    (metadata(x)$comparison=="experiment" & metadata(x)$ground_truth=="decomposed")})

# Find the names of all channels
pois <- rownames(X_sim_decomposition.list[[1]])
names(pois) <- rownames(X_sim_decomposition.list[[1]])


# Call plot_cells to get individual plots for all rois for simulated and real
# composite measurements
for (n in names(pois)){
  cat("####", n, "\n")
  
  img <- plot_cells(X_sim_decomposition.list, masks, pois[[n]], layer="exprs", display="all")
  
  # Plot simulated vs true composite measurements  
  img.grid <- plot_grid(plotlist=img, ncol=2, 
                   labels=unlist(lapply(X_sim_decomposition.list, function(sce){metadata(sce)$ground_truth})),
                   label_size=title.fontsize, hjust=c(-2, -1.5), 
                   vjust=1, scale=0.9)
  print(img.grid)
  cat("\n\n")
}

SMA

CD3

CD20

CD8a

CD11c

CD4

panCK

Ki-67

# Delete objects no longer needed (since there are a lot of big objects in this RMD)
rm(X_sim_decomposition.list)

Training vs. Decomposition Correlations per Protein (arcsinh)

Scatterplot of correlations between ground truth and simulated or decomposed results per protein for arcsinh transformed counts .

aoi <- "exprs"

# Calculate correlations between ground truth and simulated data for each protein
X.cor <- X.cor %>% 
  dplyr::rename(correlation_decomposition=correlation) %>%
  merge(lapply(keep(X_results.list, function(x){
    metadata(x)$comparison=="training"}), 
    function(sce){
      counts.long <- as.data.frame(assays(sce)[[aoi]]) %>%
        mutate(protein=rownames(.)) %>%
        melt() %>%
        dplyr::rename(!!as.symbol(metadata(sce)$ground_truth):=value,
                      cell=variable) 
      
    }) %>% bind_rows() %>%
      group_by(protein, cell) %>%
      summarise_all(na.omit) %>%
      group_by(protein) %>%
      summarise(correlation_training=cor(ground_truth, simulated)))


# Plot correlations for each protein
correlations.plot <- ggscatter(X.cor,
                               x="correlation_training", 
                               y="correlation_decomposition",
                               label="protein",
                               label.rectangle=TRUE,
                               color=pal_npg("nrc")("1"),
                               add.params=list(color=pal_npg("nrc")("4")[4],
                                               size=2)) +
  theme_cowplot(title.fontsize) +
  geom_abline(slope=1, linetype="dashed") +
  stat_cor(size=2) +
  ylim(0, 1) +
  xlim(0, 1)

print(correlations.plot)

# Delete objects no longer needed (since there are a lot of big objects in this RMD)
rm(X.cor)

Matrix designs


U

# Plot U 
u.plot <- plot_single_U(u, paste0("U\n(dictionary size: ", length(unique(u$module)), ")"))
print(u.plot)

A’s

# Plot A's
for (n in unique(a$type)){
  cat("####", n, "\n")
  
  a.plot <- plot_single_A(a %>%
                            dplyr::filter(type==n) %>%
                            dplyr::select(-type), "A")
  print(a.plot)
  
  cat("\n\n")
}

fixed

best