knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# Load libraries
library(scater)
# Import helper fncs
source("../analysis/helpers/helper_fnc.R")
# Set general input paths to all analysis files
analysis.path <- "/mnt/bb_dqbm_volume/analysis/Tonsil_th152/training/full"
# Set path to masks
masks.path <- "/mnt/bb_dqbm_volume/data/Tonsil_th152/steinbock/masks_deepcell"
# Output folder for plots
plot.folder <- file.path(thesis_output.folder, "analysis_tonsil")
if(!dir.exists(plot.folder)){dir.create(plot.folder)}
## Read results
# Read in all results for tonsil data (spillover and non-spillover corrected)
# into one dataframe
results.files <- list.files(analysis.path, 'simulation_results.txt',
full.names=TRUE, recursive=TRUE)
results.files <- results.files[grepl("sm_", results.files) & grepl("no_norm", results.files)]
results.df <- lapply(results.files, read_results, type="res", voi="corrected") %>%
bind_rows() %>%
mutate(corrected=gsub("s", "", corrected))
## Read X_test and X_simulated
# Specify X_test and X_simulated files to be read in
X.files <- list.files(analysis.path, "X_", full.names=TRUE, recursive=TRUE)
X.files <- X.files[grepl("sm_", X.files) & grepl("no_norm", X.files)]
# Read in all sce experiments from saved anndata and save additional info in metadata
# (e.g. which dataset is used in training and testing, if spillover correction was used
# and if it is the ground truth or simulated X)
X.list <- lapply(X.files, read_results, type="x", voi="sm")
X.list <- lapply(X.list, function(sce.temp){
metadata(sce.temp)$sm <- gsub("s", "", metadata(sce.temp)$sm)
assay(sce.temp, "exprs") <- asinh(counts(sce.temp)/1)
sce.temp
})
## Read masks
# Read in masks for tonsil data
masks <- loadImages(masks.path, as.is = TRUE)
mcols(masks) <- DataFrame(sample_id=names(masks))
For each different measurement used to test training results, a barplot is shown comparing spillover corrected and uncorrected results for Tonsil th152 data.
# For each different measurement of training results, plot barplot comparing
# spillover corrected and uncorrected results
# Melt dataframe for plotting
data_temp <- results.df %>%
dplyr::select(-c("dataset", "training", "datasize", "version", "Best crossvalidation fold")) %>%
pivot_longer(!c("corrected", "simulation"), names_to="measure", values_to="value")
# Create barplot
results.barplot <- plot_cisi_results(data_temp, "measure", "value", "corrected")
print(results.barplot)
# Save plot for master thesis
thesis_results.barplot <- ggplot(data_temp %>%
dplyr::filter(corrected=="uncorrected") %>%
mutate(simulation=ifelse(simulation=="composite",
"no_noise", simulation)),
aes(x=measure, y=value, pattern=simulation,
fill=simulation)) +
geom_bar(stat="identity",
position=position_dodge(preserve="single"),
width=0.6) +
scale_y_continuous(limits=c(0, 1)) +
scale_fill_npg() +
labs(x="Measure", y="Value", fill="Simulation Type") +
theme_cowplot(thesis_title.fontsize) +
theme(legend.position="bottom",
axis.text.x=element_text(angle=45, vjust=1, hjust=1))
ggsave(file.path(plot.folder, "d_res_T.pdf"), thesis_results.barplot,
width=13.5, height=10, units="cm")
The following plots show some neighboring channels for Tonsil th152 data trained on spillover corrected and non-spillover corrected data.
# Add names and reorder list
names(X.list) <- lapply(X.list, function(x){metadata(x)$ground_truth})
X.list <- append(X.list[-c(1:2)], X.list[1:2])
# List of combinations of proteins of interest
poi <- list(c("CD8a", "Tim-3"), c("CD68", "CD20"))
for (p_vec in poi) {
cat("###", p_vec[1], "vs", p_vec[2], "\n")
# Plot arcsinh transformed counts of proteins of interest of decomposed data trained
# with and without spillover correction
X.spillover <- plot_grid(plot_exprs(X.list[1:2], "", p_vec[1], p_vec[2]),
plot_exprs(X.list[3:4], "", p_vec[1], p_vec[2]),
plot_exprs(X.list[5:6], "", p_vec[1], p_vec[2]),
ncol=1, labels=unique(unlist(lapply(X.list, function(sce){
metadata(sce)$sm}))),
label_size=title.fontsize, hjust=c(-5.5, -4, -8.5),
vjust=0.9, scale = 0.9)
print(X.spillover)
cat("\n\n")
}
d_neighbours_T.grid <- plot_grid(plot_exprs(X.list[3:4], "", poi[[1]][1],
poi[[1]][2], thesis=TRUE),
plot_exprs(X.list[3:4], "", poi[[2]][1],
poi[[2]][2], thesis=TRUE),
ncol=1, labels="AUTO", align="h", axis="l")
save_plot(file.path(plot.folder, "d_neighbours_T.pdf"),
d_neighbours_T.grid, base_width=4.77)
Correlation between ground truth and decomposed results per protein
for
arcsinh transformed counts.
aoi <- "exprs"
# Calculate correlations between ground truth and simulated data for each protein
X.cor <- lapply(X.list, function(sce){
counts.long <- as.data.frame(assays(sce)[[aoi]]) %>%
mutate(protein=rownames(.)) %>%
melt() %>%
dplyr::rename(!!as.symbol(metadata(sce)$ground_truth):=value,
cell=variable) %>%
mutate(sm=metadata(sce)$sm,
dataset=paste(metadata(sce)$training, metadata(sce)$dataset, sep="_"))
}) %>% bind_rows() %>%
group_by(protein, cell, sm, dataset) %>%
summarise_all(na.omit) %>%
group_by(sm, dataset, protein) %>%
mutate(correlation=cor(ground_truth, simulated))
# Plot correlations for spillover and non-spillover corrected data
for (i in unique(X.cor$sm)) {
cat("####", i, "\n")
proteinCorr <- plot_protein_cor(X.cor %>%
dplyr::filter(sm==i))
print(proteinCorr)
cat("\n\n")
}
# Save plot for master thesis
d_protein_cor.plot <- plot_protein_cor(X.cor %>% dplyr::filter(sm=="uncorrected")) +
theme_cowplot(thesis_title.fontsize)
ggsave(file.path(plot.folder, "d_protein_cor.pdf"),
d_protein_cor.plot +
labs(x=update_text(d_protein_cor.plot$labels$x),
y=update_text(d_protein_cor.plot$labels$y)),
width=11, height=12, units="cm")
For a manual inspection of different protein distribution characteristics, we plot the boxplots of the original protein expressions coloured by correlation results. These show: Interquartile range (IQR), 1st and 3rd quantiles, median, mean, max, min and outliers.
# Plot boxplots of protein expr per dataset coloured by protein correlation results
# (performance)
# Plot correlations for each test/training dataset combination
for (i in unique(X.cor$sm)) {
cat("####", i, "\n")
proteinDist <- plot_protein_dist(X.cor %>%
dplyr::filter(sm==i),
0)
print(proteinDist)
cat("\n\n")
}