knitr::opts_chunk$set(echo=TRUE)
knitr::opts_chunk$set(warning=FALSE, message=FALSE)
# Load libraries
library(batchelor)
library(Rphenograph)
library(igraph)
library(dittoSeq)
# Import helper fncs
source("../analysis/helpers/helper_fnc.R")
## TODO: Change paths
# Set general input paths to all analysis files
analysis.path <- "/mnt/bb_dqbm_volume/analysis/Tonsil_th179_2"
# Set path to masks
masks.path <- "/mnt/bb_dqbm_volume/data/20221108_TsH_LSS_cisiabmix2_179/steinbock/masks_deepcell"
First, we read in the results from the CISI analysis using a pre-defined A and the best parameters from the parameter sweep. For this we have data from simulated composite data from training, from the actual experiment and the decomposed data.
The channels from the experiment were decomposed using the dictionary U with the best predicted performance (from the run using ‘min strategy’ for A) for a 8 to 6 compression. This dictionary came from the run with the parameters: k=4, d=10 and m=4 and contains 10 module.
## Read results
# Read in all results into one dataframe
results.files <- list.files(analysis.path, 'simulation_results.txt',
full.names=TRUE, recursive=TRUE)
results.df <- lapply(results.files, read_results, type="res", voi="comparison") %>%
bind_rows() %>%
dplyr::select(-c("dataset", "training", "datasize")) %>%
mutate(comparison=ifelse(comparison=="best-A", "experiment-simulation_best-A", comparison))
## Read X_test, X_simulated and X_decomposed
# Specify files to be read in
X_results.files <- list.files(analysis.path, "X_", full.names=TRUE, recursive=TRUE)
# Read in all sce experiments from saved anndata and save additional info in metadata
# (e.g. if it is the ground truth or simulated X and if it comes from training simulations,
# experiment simulation or the acutal decomposed experiment)
X_results.list <- lapply(X_results.files, read_results, type="x", voi="comparison")
X_results.list <- lapply(X_results.list, function(sce.temp){
metadata(sce.temp) <- within(metadata(sce.temp), rm(dataset, training))
assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
sce.temp
})
## Read anndata object containing composite measurements
X_simulation.files <- list.files(analysis.path, "composite_measurements.h5ad",
full.names=TRUE, recursive=TRUE)
X_simulation.list <- lapply(X_simulation.files, read_results, type="x", use_voi=FALSE)
# Remove non-composite channels, add if data comes form simulated or real composite
# measurements and add arcsinh transformed counts
X_simulation.list <- lapply(1:length(X_simulation.list), function(i){
sce.temp <- X_simulation.list[[i]]
sce.temp <- sce.temp[grepl("CC\\d_", rownames(sce.temp)), ]
metadata(sce.temp) <- within(metadata(sce.temp), rm(dataset, training))
metadata(sce.temp)$ground_truth <- ifelse(grepl("simulated", X_simulation.files[i]),
"simulated", "real_composite")
assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
sce.temp
})
## Read U
u.files <- list.files(analysis.path, "gene_modules",
full.names=TRUE, recursive=TRUE)
u.files <- u.files[!grepl("min", u.files)]
u <- read_single_U(u.files)
## Read A's
a.files <- list.files(analysis.path, "version_*",
full.names=TRUE, recursive=TRUE)
a <- lapply(a.files, function(a.file) {
a.temp <- read_single_A(a.file) %>%
mutate(type=ifelse(grepl("best", a.file), "best", "fixed"))
a.temp
}) %>% bind_rows()
# Read in masks
masks <- loadImages(masks.path, as.is=TRUE)
mcols(masks) <- DataFrame(sample_id=names(masks))
The barplot shows the different measurements used to evaluate decomposition of CISI.
There are five different results: training (decomposed simulated data from training compared to ground truth training data using fixed A), training-original (decomposed simulated data from training compared to ground truth training data using the best A from the parameter sweep), experiment-simulation (decomposed simulated data from experiment data compared to ground truth experiment data using fixed A), experiment-simulation-best-A (decomposed simulated data from experiment data compared to ground truth experiment data using best A from the parameter sweep) and experiment (decomposed real composite measurements from experiment compared to ground truth experiment data).
# For each different measurement of training results, plot barplot
# Melt dataframe for plotting
data_temp <- results.df %>%
dplyr::select(-c("version")) %>%
pivot_longer(!c("simulation", "comparison"), names_to="measure", values_to="value")
# Create barplot
results.barplot <- plot_cisi_results(data_temp, "measure", "value", "comparison")
print(results.barplot)
Here we show the decomposed measurements vs the ground truth measurements for all images.
# Subset list to decomposed and ground truth expressions
X_decomposition.list <- keep(X_results.list, function(x){
metadata(x)$comparison=="experiment"})
# Find the names of all protein channels
pois <- rownames(X_decomposition.list[[1]])
names(pois) <- rownames(X_decomposition.list[[1]])
# Call plot_cells to get individual plots for all rois for decomposed and real
# measurements
for (n in names(pois)){
cat("####", n, "\n")
img <- plot_cells(X_decomposition.list, masks, pois[[n]], layer="exprs", display="all")
# Plot decomposed vs true results
img <- plot_grid(plotlist=img, ncol=2,
labels=unlist(lapply(X_decomposition.list, function(sce){metadata(sce)$ground_truth})),
label_size=title.fontsize, hjust=c(-2, -1.5),
vjust=1, scale=0.9)
print(img)
cat("\n\n")
}
Correlation between ground truth and decomposed results per protein
for
arcsinh transformed counts.
aoi <- "exprs"
# Calculate correlations between ground truth and decomposed data for each protein
X.cor <- lapply(X_decomposition.list, function(sce){
counts.long <- as.data.frame(assays(sce)[[aoi]]) %>%
mutate(protein=rownames(.)) %>%
melt() %>%
dplyr::rename(!!as.symbol(metadata(sce)$ground_truth):=value,
cell=variable)
}) %>% bind_rows() %>%
group_by(protein, cell) %>%
summarise_all(na.omit) %>%
group_by(protein) %>%
summarise(correlation=cor(ground_truth, decomposed))
# Plot correlations per protein
proteinCorr <- plot_protein_cor(X.cor) + ylim(0, 1)
print(proteinCorr)
# Delete objects no longer needed (since there are a lot of big objects in this RMD)
rm(X_decomposition.list)
Here we show the composite measurements vs the simulated composite measurements for all images.
# Find the names of all composite channels
pois <- rownames(X_simulation.list[[1]])
names(pois) <- rownames(X_simulation.list[[1]])
# Call plot_cells to get individual plots for all rois for simulated and real
# composite measurements
for (n in names(pois)){
cat("####", n, "\n")
img <- plot_cells(X_simulation.list, masks, pois[[n]], layer="exprs", display="all")
# Plot simulated vs true composite measurements
img.grid <- plot_grid(plotlist=img, ncol=2,
labels=unlist(lapply(X_simulation.list, function(sce){metadata(sce)$ground_truth})),
label_size=title.fontsize, hjust=c(-2, -1.5),
vjust=1, scale=0.9)
print(img.grid)
cat("\n\n")
}
# Delete objects no longer needed (since there are a lot of big objects in this RMD)
rm(X_simulation.list)
Here we show the images of decomposed simulated data from the individual ground truth channels in the actual experiment compared to the actual decomposed data.
# Create new list with SCE objects containing decomposed simulated data and real
# decomposed data
X_sim_decomposition.list <- keep(X_results.list, function(x){
(metadata(x)$comparison=="experiment-simulation" & metadata(x)$ground_truth=="simulated") |
(metadata(x)$comparison=="experiment" & metadata(x)$ground_truth=="decomposed")})
# Find the names of all channels
pois <- rownames(X_sim_decomposition.list[[1]])
names(pois) <- rownames(X_sim_decomposition.list[[1]])
# Call plot_cells to get individual plots for all rois for simulated and real
# composite measurements
for (n in names(pois)){
cat("####", n, "\n")
img <- plot_cells(X_sim_decomposition.list, masks, pois[[n]], layer="exprs", display="all")
# Plot simulated vs true composite measurements
img.grid <- plot_grid(plotlist=img, ncol=2,
labels=unlist(lapply(X_sim_decomposition.list, function(sce){metadata(sce)$ground_truth})),
label_size=title.fontsize, hjust=c(-2, -1.5),
vjust=1, scale=0.9)
print(img.grid)
cat("\n\n")
}
# Delete objects no longer needed (since there are a lot of big objects in this RMD)
rm(X_sim_decomposition.list)
Scatterplot of correlations between ground truth and simulated or decomposed results per protein for arcsinh transformed counts .
aoi <- "exprs"
# Calculate correlations between ground truth and simulated data for each protein
X.cor <- X.cor %>%
dplyr::rename(correlation_decomposition=correlation) %>%
merge(lapply(keep(X_results.list, function(x){
metadata(x)$comparison=="training"}),
function(sce){
counts.long <- as.data.frame(assays(sce)[[aoi]]) %>%
mutate(protein=rownames(.)) %>%
melt() %>%
dplyr::rename(!!as.symbol(metadata(sce)$ground_truth):=value,
cell=variable)
}) %>% bind_rows() %>%
group_by(protein, cell) %>%
summarise_all(na.omit) %>%
group_by(protein) %>%
summarise(correlation_training=cor(ground_truth, simulated)))
# Plot correlations for each protein
correlations.plot <- ggscatter(X.cor,
x="correlation_training",
y="correlation_decomposition",
label="protein",
label.rectangle=TRUE,
color=pal_npg("nrc")("1"),
add.params=list(color=pal_npg("nrc")("4")[4],
size=2)) +
theme_cowplot(title.fontsize) +
geom_abline(slope=1, linetype="dashed") +
stat_cor(size=2) +
ylim(0, 1) +
xlim(0, 1)
print(correlations.plot)
# Delete objects no longer needed (since there are a lot of big objects in this RMD)
rm(X.cor)
# Plot U
u.plot <- plot_single_U(u, paste0("U\n(dictionary size: ", length(unique(u$module)), ")"))
print(u.plot)
# Plot A's
for (n in unique(a$type)){
cat("####", n, "\n")
a.plot <- plot_single_A(a %>%
dplyr::filter(type==n) %>%
dplyr::select(-type), "A")
print(a.plot)
cat("\n\n")
}