knitr::opts_chunk$set(echo=TRUE)
knitr::opts_chunk$set(warning=FALSE, message=FALSE)
# Load libraries
library(RColorBrewer)
library(dittoSeq)
# Set general input paths to all analysis files
analysis.path <- "/mnt/bb_dqbm_volume/analysis/Tonsil_th152/training/subset"
# Set path to masks
masks.path <- "/mnt/bb_dqbm_volume/data/Tonsil_th152/steinbock/masks_deepcell"
# Import helper fncs
source("../analysis/helpers/helper_fnc.R")
First, we read in the results from CISI pilot run for the chosen immune markers (CD15, CD20, CD3, CD38, CD4, CD68, CD8a, ICOS, Ki-67, MPO, panCK, SMA, CD303, FOXP3, GranzymeB). The pilot ran was done using no normalization and with k=1.
## Read results
# Read in all results for tonsil into one dataframe
results.files <- list.files(analysis.path, 'simulation_results.txt',
full.names=TRUE, recursive=TRUE)
results.files <- results.files[grepl("pilot_immune_channels", results.files)]
results.df <- lapply(results.files, read_results, type="res", voi="normalisation") %>%
bind_rows() %>%
mutate(normalisation=ifelse(!grepl("normalized", normalisation), "unnormalized", "combi"))
## Read X_test and X_simulated
# Specify X_test and X_simulated files to be read in
X.files <- list.files(analysis.path, "X_", full.names=TRUE, recursive=TRUE)
X.files <- X.files[grepl("pilot_immune_channels", X.files)]
# Read in all sce experiments from saved anndata and save additional info in metadata
# (e.g. which dataset is used in training and testing and if it is the ground truth or simulated X)
X.list <- lapply(X.files, read_results, type="x", voi="normalisation")
X.list <- lapply(X.list, function(sce.temp){
assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
metadata(sce.temp)$normalisation <- ifelse(!grepl("normalized", metadata(sce.temp)$normalisation),
"unnormalized", "combi")
sce.temp
})
## Read U
u.files <- list.files(analysis.path, "gene_modules.csv",
full.names=TRUE, recursive=TRUE)
# TODO: change this line if more files come along and need to exclude
u.files <- u.files[grepl("pilot_immune_channels", u.files)]
u <- lapply(u.files, read_U, type="training") %>%
bind_rows() %>%
dplyr::rename(dataset=rep, normalisation=k) %>%
mutate(normalisation=ifelse(!grepl("normalized", normalisation), "unnormalized", "combi"))
## Read A
a.files <- list.files(analysis.path, "version_*",
full.names=TRUE, recursive=TRUE)
# TODO: change this line if more files come along and need to exclude
a.files <- a.files[grepl("pilot_immune_channels", a.files)]
a <- lapply(a.files, read_A, voi="normalisation") %>%
bind_rows() %>%
mutate(normalisation=ifelse(!grepl("normalized", normalisation), "unnormalized", "combi"))
# Read in masks for tonsil data
masks <- loadImages(masks.path, as.is=TRUE)
mcols(masks) <- DataFrame(sample_id=names(masks))
For each different measurement used to test training results, a barplot is shown comparing CISI results.
# For each different measurement of training results, plot barplot
# Melt dataframe for plotting
data_temp <- results.df %>%
dplyr::select(-c("dataset", "training", "datasize", "version", "Best crossvalidation fold")) %>%
pivot_longer(!c("simulation", "normalisation"), names_to="measure", values_to="value")
# Create barplot
results.barplot <- plot_cisi_results(data_temp, "measure", "value", "normalisation")
print(results.barplot)
Correlation between ground truth and decomposed results per protein
for
asinh transformed counts.
aoi <- "exprs"
# Calculate correlations between ground truth and simulated data for each protein
X.cor <- lapply(X.list, function(sce){
counts.long <- as.data.frame(assays(sce)[[aoi]]) %>%
mutate(protein=rownames(.)) %>%
melt() %>%
dplyr::rename(!!as.symbol(metadata(sce)$ground_truth):=value,
cell=variable) %>%
mutate(normalisation=metadata(sce)$normalisation)
}) %>% bind_rows() %>%
group_by(protein, cell, normalisation) %>%
summarise_all(na.omit) %>%
group_by(normalisation, protein) %>%
mutate(correlation=cor(ground_truth, simulated))
# Plot correlations for normalized and combi CISI run
for (i in unique(X.cor$normalisation)) {
cat("#####", i, "\n")
proteinCorr <- plot_protein_cor(X.cor %>%
dplyr::filter(normalisation==i)) + ylim(0, 1)
print(proteinCorr)
cat("\n\n")
}
Expression values for cells in test image for worst performing protein FOXP3 for unnormalized and combi CISI run.
# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi <- "FOXP3"
for (n in unique(X.cor$normalisation)){
cat("#####", n, "\n")
img.foxp3 <- plot_cells(keep(X.list, function(sce.temp){
metadata(sce.temp)$normalisation == n
}), masks, poi)
# Plot decomposed vs true results for test roi (002)
img.foxp3 <- plot_grid(plotlist=append(img.foxp3[grepl("20220520_TsH_th152_cisi1_002",
names(img.foxp3))],
img.foxp3[grepl("legend",
names(img.foxp3))]), ncol=2,
labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
label_size=title.fontsize, hjust=c(-2, -1.5),
vjust=1, scale=0.9)
print(img.foxp3)
cat("\n\n")
}
Expression values for cells in test image for second worst performing protein CD303 for unnormalized and combi CISI run.
# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi <- "CD303"
for (n in unique(X.cor$normalisation)){
cat("#####", n, "\n")
# Call plot_cells to get individual plots for test roi for decomposed and true
# results
img.cd303 <- plot_cells(keep(X.list, function(sce.temp){
metadata(sce.temp)$normalisation == n
}), masks, poi)
# Plot decomposed vs true results for test roi (002)
img.cd303 <- plot_grid(plotlist=append(img.cd303[grepl("20220520_TsH_th152_cisi1_002",
names(img.cd303))],
img.cd303[grepl("legend",
names(img.cd303))]), ncol=2,
labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
label_size=title.fontsize, hjust=c(-2, -1.5),
vjust=1, scale=0.9)
print(img.cd303)
cat("\n\n")
}
Expression values for cells in test image for another bad performing protein CD68 for unnormalized and combi CISI run.
# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi <- "CD68"
for (n in unique(X.cor$normalisation)){
cat("#####", n, "\n")
# Call plot_cells to get individual plots for test roi for decomposed and true
# results
img.cd68 <- plot_cells(keep(X.list, function(sce.temp){
metadata(sce.temp)$normalisation == n
}), masks, poi)
# Plot decomposed vs true results for test roi (002)
img.cd68 <- plot_grid(plotlist=append(img.cd68[grepl("20220520_TsH_th152_cisi1_002",
names(img.cd68))],
img.cd68[grepl("legend",
names(img.cd68))]), ncol=2,
labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
label_size=title.fontsize, hjust=c(-2, -1.5),
vjust=1, scale=0.9)
print(img.cd68)
cat("\n\n")
}
Expression values for cells in test image for another bad performing protein CD38 for unnormalized and combi CISI run.
# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi <- "CD38"
for (n in unique(X.cor$normalisation)){
cat("#####", n, "\n")
# Call plot_cells to get individual plots for test roi for decomposed and true
# results
img.cd38 <- plot_cells(keep(X.list, function(sce.temp){
metadata(sce.temp)$normalisation == n
}), masks, poi)
# Plot decomposed vs true results for test roi (002)
img.cd38 <- plot_grid(plotlist=append(img.cd38[grepl("20220520_TsH_th152_cisi1_002",
names(img.cd38))],
img.cd38[grepl("legend",
names(img.cd38))]), ncol=2,
labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
label_size=title.fontsize, hjust=c(-2, -1.5),
vjust=1, scale=0.9)
print(img.cd38)
cat("\n\n")
}
Expression values for cells in test image for another bad performing protein GranzymeB for unnormalized and combi CISI run.
# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi <- "GranzymeB"
for (n in unique(X.cor$normalisation)){
cat("#####", n, "\n")
# Call plot_cells to get individual plots for test roi for decomposed and true
# results
img.granzymeB <- plot_cells(keep(X.list, function(sce.temp){
metadata(sce.temp)$normalisation == n
}), masks, poi)
# Plot decomposed vs true results for test roi (002)
img.granzymeB <- plot_grid(plotlist=append(img.granzymeB[grepl("20220520_TsH_th152_cisi1_002",
names(img.granzymeB))],
img.granzymeB[grepl("legend",
names(img.granzymeB))]), ncol=2,
labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
label_size=title.fontsize, hjust=c(-2, -1.5),
vjust=1, scale=0.9)
print(img.granzymeB)
cat("\n\n")
}
Expression values for cells in test image for best performing protein CD20 for unnormalized and combi CISI run.
# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi <- "CD20"
for (n in unique(X.cor$normalisation)){
cat("#####", n, "\n")
# Call plot_cells to get individual plots for test roi for decomposed and true
# results
img.cd20 <- plot_cells(keep(X.list, function(sce.temp){
metadata(sce.temp)$normalisation == n
}), masks, poi)
# Plot decomposed vs true results for test roi (002)
img.cd20 <- plot_grid(plotlist=append(img.cd20[grepl("20220520_TsH_th152_cisi1_002",
names(img.cd20))],
img.cd20[grepl("legend",
names(img.cd20))]), ncol=2,
labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
label_size=title.fontsize, hjust=c(-2, -1.5),
vjust=1, scale=0.9)
print(img.cd20)
cat("\n\n")
}
Expression values for cells in test image for CD3, a protein performing mediocre for unnormalized and combi CISI run.
# Call plot_cells to get individual plots for test roi for decomposed and true
# results
poi <- "CD3"
for (n in unique(X.cor$normalisation)){
cat("#####", n, "\n")
# Call plot_cells to get individual plots for test roi for decomposed and true
# results
img.cd3 <- plot_cells(keep(X.list, function(sce.temp){
metadata(sce.temp)$normalisation == n
}), masks, poi)
# Plot decomposed vs true results for test roi (002)
img.cd3 <- plot_grid(plotlist=append(img.cd3[grepl("20220520_TsH_th152_cisi1_002",
names(img.cd3))],
img.cd3[grepl("legend",
names(img.cd3))]), ncol=2,
labels=unlist(lapply(X.list, function(sce){metadata(sce)$ground_truth})),
label_size=title.fontsize, hjust=c(-2, -1.5),
vjust=1, scale=0.9)
print(img.cd3)
cat("\n\n")
}
# Have a look at dittoRidgePlots of a bad performing protein
X.list <- lapply(X.list, function(sce.temp) {
colData(sce.temp)$dummy <- "1"
sce.temp
})
plot.list <- NULL
for(sce in X.list){
plot.list <- append(plot.list, list(dittoRidgePlot(sce, var="CD303",
group.by="dummy", assay="exprs") +
ggtitle(metadata(sce)$ground_truth) +
coord_cartesian(xlim=c(0, 6.2), expand=TRUE)))
}
print(plot_grid(plotlist=plot.list, ncol=1))
# Plot U for normalized and combi CISI run
cor <- plot_U(u, "normalisation", "dataset")
plot_A(a %>%
dplyr::select(-training), "normalisation", "dataset")