cellphonedb之可视化受体配体对
1 下载cellphonedb官网测试数据,并运行软件
cellphonedb官网
下载测试数据
curl https://raw.githubusercontent.com/Teichlab/cellphonedb/master/in/example_data/test_counts.txt --output test_counts.txt
curl https://raw.githubusercontent.com/Teichlab/cellphonedb/master/in/example_data/test_meta.txt --output test_meta.txt
Python运行软件cellphonedb
cd cpdb-venv
source cpdb-venv/bin/activate
cellphonedb method statistical_analysis test_meta.txt test_counts.txt --iterations=10 --threads=20
查看测试文件
data_path <- 'data'
test_meta <- read.delim(file.path(data_path, 'test_meta.txt'), header=T, sep='\t')
table(test_meta$cell_type)
##
## Myeloid NKcells_0 NKcells_1 Tcells
## 1 5 3 1
2 处理cellphonedb结果文件
# Offer downloadable file function
library(tidyverse)
library(DT)
create_dt <- function(x){
DT::datatable(x,
extensions = 'Buttons',
options = list(dom = 'Blfrtip',
buttons = c('csv', 'excel', 'pdf'),
lengthMenu = list(c(10,25,50,-1), c(10,25,50,"All"))))
}
2-1 读取cellphonedb结果文件
new_path <- 'out'
mypvals <- read.delim(file.path(new_path,"pvalues.txt"), check.names = FALSE)
mymeans <- read.delim(file.path(new_path, "means.txt"), check.names = FALSE)
mysigmeans <- read.delim(file.path(new_path, "significant_means.txt"), check.names = FALSE)
dim(mymeans)
## [1] 190 27
mymeans[1:4, 1:4]
## id_cp_interaction interacting_pair partner_a partner_b
## 1 CPI-SS028784FC6 HLA-DPA1_TNFSF9 simple:HLADPA1 simple:P41273
## 2 CPI-SS00A8596B5 PVR_TNFSF9 simple:P15151 simple:P41273
## 3 CPI-SS0B84DAE3D PVR_CD96 simple:P15151 simple:P40200
## 4 CPI-SS0A8627ED6 PVR_CD226 simple:P15151 simple:Q15762
2-2 调整受体配体对顺序,要求配体在前,受体在后
# Rearrange data column sequence
library(dplyr)
order_sequence <- function(df){
da <- data.frame()
for(i in 1:length(df$gene_a)){
sub_data <- df[i, ]
if(sub_data$receptor_b=='False'){
if(sub_data$receptor_a=='True'){
old_names <- colnames(sub_data)
my_list <- strsplit(old_names[-c(1:11)], split="\\|")
my_character <- paste(sapply(my_list, '[[', 2L), sapply(my_list, '[[', 1L), sep='|')
new_names <- c(names(sub_data)[1:4], 'gene_b', 'gene_a', 'secreted', 'receptor_b', 'receptor_a', "annotation_strategy", "is_integrin", my_character)
sub_data = dplyr::select(sub_data, new_names)
# print('Change sequence!!!')
names(sub_data) <- old_names
da = rbind(da, sub_data)
}
}else{
da = rbind(da, sub_data)
}
}
return(da)
}
# 受体配体对,要求一个为受体,一个为配体
df <- subset(mymeans, receptor_a == 'True' & receptor_b == 'False' | receptor_a == 'False' & receptor_b == 'True')
# 筛选单基因的受体配体对,待商榷!
df <- df %>% dplyr::mutate(na_count = rowSums(is.na(df) | df == "")) %>% subset(na_count == 0) %>% dplyr::select(-na_count)
dim(df)
## [1] 72 27
# 运行函数,重排受体配体顺序,并生成新列
means_order <- order_sequence(df) %>% tidyr::unite(Pairs, gene_a, gene_b)
pvals_order <- order_sequence(mypvals) %>% tidyr::unite(Pairs, gene_a, gene_b)
# 保存文件
# write.table(means_order %>% dplyr::select(-interacting_pair), file.path(new_path,"means_order.txt"), sep = '\t', quote=F, row.names=F)
# write.table(pvals_order %>% dplyr::select(-interacting_pair), file.path(new_path,"pvalues_order.txt"), sep = '\t', quote=F, row.names=F)
2-3 合并表达量文件和pvalue文件
means_sub <- means_order[, c('Pairs', colnames(mymeans)[-c(1:11)])]
pvals_sub <- pvals_order[, c('Pairs', colnames(mymeans)[-c(1:11)])]
means_gather <- tidyr::gather(means_sub, celltype, mean_expression, names(means_sub)[-1])
pvals_gather <- tidyr::gather(pvals_sub, celltype, pval, names(pvals_sub)[-1])
mean_pval <- dplyr::left_join(means_gather, pvals_gather, by = c('Pairs', 'celltype'))
create_dt(mean_pval)
# write.table(mean_pval, file.path(new_path,"mean_pval_order.txt"), sep = '\t', quote=F, row.names=F)
2-4 提取显著性表达的受体配体对
# 至少在一组细胞类型两两组合中,pvalue显著的受体配体对,认为是显著性表达的受体配体对
a <- mean_pval %>% dplyr::select(Pairs, celltype, pval) %>% tidyr::spread(key=celltype, value=pval)
sig_pairs <- a[which(rowSums(a<=0.05)!=0), ]
dim(sig_pairs)
## [1] 72 17
# 保存显著性表达的受体配体对
mean_pval_sub <- subset(mean_pval, Pairs %in% sig_pairs$Pairs)
# write.table(mean_pval_sub, file.path(new_path,"mean_pval_sig.txt"), sep = '\t', quote=F, row.names=F)
3 可视化显著性表达的受体配体对
library(RColorBrewer)
library(scales)
library(ggplot2)
library(cowplot)
# 比较均值和P值数据变换前后的分布
p1 <- mean_pval_sub %>% ggplot(aes(x=mean_expression)) + geom_density(alpha=0.2, color='red')
p2 <- mean_pval_sub %>% ggplot(aes(x=log2(mean_expression))) + geom_density(alpha=0.2, color='red')
mean_distribution <- plot_grid(p1, p2, labels = "AUTO", nrow=1)
p1 <- mean_pval_sub %>% ggplot(aes(x=pval)) + geom_density(alpha=0.2, color='red')
p2 <- mean_pval_sub %>% ggplot(aes(x=-log10(pval+1*10^-3))) + geom_density(alpha=0.2, color='red')
pval_distribution <- plot_grid(p1, p2, labels = "AUTO", nrow=1)
plot_grid(mean_distribution, pval_distribution, labels = "AUTO", ncol=1)
# 展示部分(10对)显著性表达的受体配体对
p <- mean_pval_sub %>% dplyr::arrange(Pairs) %>% head(16*10) %>%
ggplot(aes(x=Pairs, y=celltype)) +
# geom_point(aes(color=log2(mean_expression), size=pval)) +
# scale_size(trans = 'reverse') +
geom_point(aes(color=log2(mean_expression), size=-log10(pval+1*10^-3)) ) +
guides(colour = guide_colourbar(order = 1),size = guide_legend(order = 2)) +
labs(x='', y='') +
scale_color_gradientn(name='Expression level \n(log2 mean expression \nmolecule1, molecule2)', colours = terrain.colors(100)) +
# scale_color_gradient2('Expression level \n(log2 mean expression \nmolecule1, molecule2)', low = 'blue', mid = 'yellow', high = 'red') +
theme(axis.text.x= element_text(angle=45, hjust=1)) +
# coord_flip() +
theme(
panel.border = element_rect(color = 'black', fill = NA),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
panel.background = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.ticks = element_blank()
# plot.title = element_text(hjust = 0.5),
# legend.position = 'bottom' # guides(fill = guide_legend(label.position = "bottom"))
# legend.position = "bottom"
# axis.text.y.right = element_text(angle=270, hjust=0.5)
) +
theme(legend.key.size = unit(0.4, 'cm'), #change legend key size
# legend.key.height = unit(1, 'cm'), #change legend key height
# legend.key.width = unit(1, 'cm'), #change legend key width
legend.title = element_text(size=9), #change legend title font size
legend.text = element_text(size=8)) #change legend text font size
p
save_plot(paste0(new_path,"/Ligands_Receptors_dotplot.png"), p, base_height = 4, base_aspect_ratio = 3, base_width = NULL, dpi=600)
save_plot(paste0(new_path,"/Ligands_Receptors_dotplot.pdf"), p, base_height = 4, base_aspect_ratio = 3, base_width = NULL)