单细胞RNA-seq揭示TNBC的异质性(图表复现01)
摘要
三阴性乳腺癌(TNBC)是一种侵袭性亚型,其特征是肿瘤内广泛的异质性。为了研究其潜在的生物学特性,作者对6个原代TNBC的1500个细胞进行了单细胞RNA测序(scRNA-seq)。并且发现每个肿瘤细胞间基因表达程序的异质性是可变的,并且很大程度上与推断基因组拷贝数变化的克隆性相关,表明基因型驱动个体亚群的基因表达表型。基因表达谱的聚类识别出多种肿瘤共有的不同的恶性细胞亚群,包括与多种治疗耐药和转移特征相关的单一亚群,并通过激活糖鞘脂代谢和相关的先天免疫途径来表征功能。一个定义该亚群的新特征预测了一个大队列中TNBC患者的长期预后。总的来说,该分析揭示了TNBC的功能异质性及其与基因组进化的关联,并揭示了导致该疾病预后不良的未预料到的生物学原理。
实验数据
在文末提供了可用数据GSE118390,并且作者也在其github账号中提供了文章中所用的代码与其他实验数据
GSE118390:https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE118390
Github:https://github.com/Michorlab/tnbc_scrnaseq
复现图表
Fig1b
代码重复(尽管作者提供了代码,但是运行还是会出错,这就需要我们了解代码具体的意思,从而进行深入学习)
1.操作准备流程
rm(list=ls())#魔幻操作,一键清空
setwd("..\\tnbc_scrnaseq-master\\data\\")
#首先定位文件目录(更改为自己将数据存放的位置)
#install.packages("here"),加载R
library(here)
source(here::here("code", "libraries.R"))#需要加载的R包
source(here::here("code", "funcs.R"))#用于细胞类型识别、聚类、签名等各个方面的功能的函数
source(here::here("code", "funcs_markers.R"))#根据标记识别细胞类型的功能的函数
#后面两个函数大家可以打开进行阅读,了解一下作者的函数构造原理
2.数据导入
##读入标准化数据和表型数据(标准化数据需要自行去GEO下载)
mat_norm <- read.table(("norm_data.txt"), sep = "\t", header = TRUE)#标准化后的数据
pd_norm <-read.table(("pd_norm.txt"),sep="\t",header=TRUE,stringsAsFactors= FALSE)#样本信息
fd_norm <-read.table(("fd_norm.txt"),sep="\t",header=TRUE,stringsAsFactors= FALSE)#基因信息
melanoma_cellcycle <- read.table(("melanoma_cellcycle.txt"), sep = "\t", header = TRUE, stringsAsFactors = FALSE)#黑色素瘤的周期分类文件(黑色素瘤的细胞周期分类(Tirosch et al 2016))
all_markers <- read.table(("markers_clean.txt"), sep = "\t", header = TRUE, stringsAsFactors = FALSE)#细胞类型标志物文件
3.构建单细胞常规S4对象
sceset_final <- SingleCellExperiment(assays = list(exprs = as.matrix(mat_norm)),
colData = pd_norm,
rowData = fd_norm)#创建S4单细胞数据对象
4.颜色设置(为注释内容设定相应颜色)
epithelial_col <- brocolors("crayons")["Maroon"]
basal_epithelial_col <- brocolors("crayons")["Red"]
luminal_epithelial_col <- brocolors("crayons")["Sunset Orange"]
luminal_progenitor_col <- brocolors("crayons")["Salmon"]
stroma_col <- brocolors("crayons")["Aquamarine"]
endothelial_col <- brocolors("crayons")["Wisteria"]
PTPRC_col <- brocolors("crayons")["Inchworm"]
t_cell_col <- brocolors("crayons")["Screamin' Green"]
b_cell_col <- brocolors("crayons")["Fern"]
macrophage_col <- brocolors("crayons")["Tropical Rain Forest"]
marker_cols <- c("epithelial" = unname(epithelial_col), "basal epithelial" = unname(basal_epithelial_col),
"luminal epithelial" = unname(luminal_epithelial_col), "luminal progenitor" = unname(luminal_progenitor_col),
"stroma" = unname(stroma_col), "endothelial" = unname(endothelial_col),
"immune" = unname(PTPRC_col), "T cell" = unname(t_cell_col), "B cell" = unname(b_cell_col),
"macrophage" = unname(macrophage_col))
cycling_mel_cols <- c("Low cycling" = "gainsboro",
"High cycling" = unname(brocolors("crayons")["Mulberry"]))
depletion_cols <- c("depleted" = unname(brocolors("crayons")["White"]), "not depleted" = unname(brocolors("crayons")["Red"]))
pats_cols <- c("PT039" = unname(brocolors("crayons")["Orange Red"]), "PT058" = unname(brocolors("crayons")["Orange"]),
"PT081" = unname(brocolors("crayons")["Pink Flamingo"]), "PT084" = unname(brocolors("crayons")["Fern"]),
"PT089" = unname(brocolors("crayons")["Blue Violet"]), "PT126" = unname(brocolors("crayons")["Sky Blue"]))
tsne_cols <- c("epithelial" = unname(basal_epithelial_col), "stroma" = unname(stroma_col), "endothelial" = unname(endothelial_col),
"Tcell" = unname(t_cell_col), "Bcell" = unname(b_cell_col), "macrophage" = unname(macrophage_col))
anno_colors <- list("marker" = marker_cols, "cycling" = cycling_mel_cols, "immune depletion" = depletion_cols,
"patient" = pats_cols, "tsne" = tsne_cols)
5.黑色素瘤细胞周期分类(Tirosch et al 2016)
#利用我们前面导入的melanoma_cellcycle变量进行设置
melanoma_g1s <- melanoma_cellcycle$G1S#将G1S与G2M分开,便于分组
melanoma_g1s <- match_clean_vector_genes(mat_norm, melanoma_g1s)
scores_g1s <- avg_expr_genes(mat_norm, melanoma_g1s$index)
melanoma_g2m <- melanoma_cellcycle$G2M
melanoma_g2m <- match_clean_vector_genes(mat_norm, melanoma_g2m)
scores_g2m <- avg_expr_genes(mat_norm, melanoma_g2m$index)
cycling_mel <- rep(NA, length(scores_g1s))
#以公式区分当前样品是否处在细胞周期中
for (i in 1:length(cycling_mel)) {
if (scores_g1s[i] >= (median(scores_g1s) + 2 * mad(scores_g1s)) && scores_g2m[i] < (median(scores_g2m) + 2 * mad(scores_g2m)))
cycling_mel[i] <- "High cycling"
if (scores_g1s[i] < (median(scores_g1s) + 2 * mad(scores_g1s)) && scores_g2m[i] >= (median(scores_g2m) + 2 * mad(scores_g2m)))
cycling_mel[i] <- "High cycling"
if (scores_g1s[i] < (median(scores_g1s) + 2 * mad(scores_g1s)) && scores_g2m[i] < (median(scores_g2m) + 2 * mad(scores_g2m)))
cycling_mel[i] <- "Low cycling"
if (scores_g1s[i] >= (median(scores_g1s) + 2 * mad(scores_g1s)) && scores_g2m[i] >= (median(scores_g2m) + 2 * mad(scores_g2m)))
cycling_mel[i] <- "High cycling"
}
#更新样本信息
colData(sceset_final)$mel_scores_g1s <- scores_g1s
colData(sceset_final)$mel_scores_g2m <- scores_g2m
colData(sceset_final)$cycling_mel <- cycling_mel#将容器中的样本信息进行更新
pd_norm <- colData(sceset_final)
pd_norm <- as.data.frame(pd_norm)#新加的三列整合到样本信息中
#准备输入文件
patients_now <- c()
mats_now <- list()
pds_now <- list()
for (i in 1:length(unique(pd_norm$patient))) {
patients_now[i] <- sort(unique(pd_norm$patient))[i]
mats_now[[i]] <- mat_norm[, pd_norm$patient == patients_now[i]]
pds_now[[i]] <- pd_norm[pd_norm$patient == patients_now[i],]
}
names(mats_now) <- patients_now
names(pds_now) <- patients_now
#将6个患者样本进行拆分整合
6.细胞类型标志物的整理
#我们在前面也导入了all_markers变量
markers <- unique(all_markers[all_markers$gene %in% rowData(sceset_final)$hgnc_symbol, ])
#标志物名称转换
markers$type_heatmap <- markers$type
markers$type_heatmap[which(markers$type_heatmap == "luminalprogenitor")] <- "luminal progenitor"
markers$type_heatmap[which(markers$type_heatmap == "luminalepithelial")] <- "luminal epithelial"
markers$type_heatmap[which(markers$type_heatmap == "basalepithelial")] <- "basal epithelial"
markers$type_heatmap[which(markers$type_heatmap %in% c("EPCAM", "EGFR", "CDH1"))] <- "epithelial"
markers$type_heatmap[which(markers$type_heatmap == "Bcell")] <- "B cell"
markers$type_heatmap[which(markers$type_heatmap == "Tcell")] <- "T cell"
markers$type_long_heatmap <- markers$type_long
markers$type_long_heatmap[which(markers$type == "stroma")] <- "stroma"
markers$type_long_heatmap[which(markers$type == "endothelial")] <- "endothelial"
#将名字统一便于后续分析
7.热图注释文件
colors_markers_ch <- markers$type_heatmap
for (i in c(1:length(names(anno_colors$marker)))) {
colors_markers_ch <- replace(colors_markers_ch, colors_markers_ch == names(anno_colors$marker)[i], anno_colors$marker[i])
}
splits_ch <- as.factor(markers$type_long_heatmap)#注释细胞类型
splits_ch <- factor(splits_ch, levels(splits_ch)[c(2,4,1,3)])#level排序
colors_anno_markers_ch <- as.factor(markers$type_heatmap)
colors_anno_markers_ch <- factor(colors_anno_markers_ch,
levels(colors_anno_markers_ch)[c(4,2,5,6,9,3,8,10,1,7)])
#同上操作
ha_rows <- HeatmapAnnotation(df = data.frame(type = colors_anno_markers_ch),
annotation_legend_param = list(type = list(ncol = 2,
title = "cell type", title_position ="topcenter")),
which = "row",
col = list("type" = anno_colors$marker),
annotation_width = unit(3, "mm"))#热图注释文件准备(行)
cycling_now <- list()
depletion_now <- list()
ha_cols_up <- list()
ha_cols_bottom <- list()
for (i in 1:length(patients_now)) {
cycling_now[[i]] <- pds_now[[i]][,"cycling_mel"]
if (i == 1)
ha_cols_up[[i]] <- HeatmapAnnotation(df = data.frame(cycling = cycling_now[[i]]),
col = list(cycling = anno_colors$cycling),
show_annotation_name = T,
annotation_name_side = "left",
annotation_name_gp = gpar(fontsize = 11),
annotation_legend_param = list(list(title_position = "topcenter",
title = c("cycling status"))),
gap = unit(c(1, 1), "mm"))
if (i > 1)
ha_cols_up[[i]] <- HeatmapAnnotation(df = data.frame(cycling = cycling_now[[i]]),
col = list(cycling = anno_colors$cycling),
show_legend = FALSE,
gap = unit(c(1, 1), "mm"))
}#热图注释文件准备(上部-聚类)
for (i in 1:length(patients_now)) {
depletion_now[[i]] <- pds_now[[i]][,"depletion_batch"]
if (i == 1)
ha_cols_bottom[[i]] <- HeatmapAnnotation(df=data.frame('CD45' = depletion_now[[i]]),
col = list('CD45' = c("depleted_yes" = "gainsboro", "depleted_no" = "gray54")),
show_annotation_name = TRUE,
annotation_name_side = "left", annotation_name_gp = gpar(fontsize = 11),
annotation_legend_param = list(title = "CD45 status", title_position = "topcenter",
at = c("depleted_yes", "depleted_no"),
labels = c("CD45 depleted","CD45 unselected")),
show_legend = TRUE,
gap = unit(c(1), "mm"))
if (i == 2)
ha_cols_bottom[[i]] <- HeatmapAnnotation(df=data.frame('CD45' = depletion_now[[i]]),
col = list('CD45' = c("depleted_yes" = "gainsboro", "depleted_no" = "gray54")),
show_annotation_name = FALSE,
show_legend = FALSE,
annotation_name_side = "left", annotation_name_gp = gpar(fontsize = 11),
annotation_legend_param = list(title = "CD45 status",
title_position = "topcenter",
at = c("depleted_yes", "depleted_no"),
labels = c("CD45 depleted","CD45 unselected")),
gap = unit(c(1), "mm"))
if (i > 2)
ha_cols_bottom[[i]] <- HeatmapAnnotation(df=data.frame('CD45' = depletion_now[[i]]),
col = list('CD45' = c("depleted_yes" = "gainsboro", "depleted_no" = "gray54")),
show_legend = FALSE,
gap = unit(c(1), "mm"))
}#热图注释文件准备(下部)
names(cycling_now) <- patients_now
names(depletion_now) <- patients_now
names(ha_cols_up) <- patients_now
names(ha_cols_bottom) <- patients_now
8.热图绘制
#热图绘制(每一个Heatmap都可以出图,可以分步尝试)
ht_list <- ha_rows +
Heatmap(mats_now[[1]][match(markers$gene,rownames(mats_now[[1]])),],
cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE,
name = patients_now[1], clustering_distance_columns = "euclidean", row_names_side = "left",
row_names_gp = gpar(fontsize = 10, col = colors_markers_ch),
split = splits_ch, gap = unit(1, "mm"), column_title = patients_now[1],
column_title_gp = gpar(fontsize = 11),
row_title_gp = gpar(font = 11), top_annotation = ha_cols_up[[1]],
heatmap_legend_param = list(title = "expression", title_position = "topcenter", color_bar = "continuous"),
bottom_annotation = ha_cols_bottom[[1]],
col = colorRamp2(c(-2, 3,8), c("blue", "white", "red"))
) +
Heatmap(mats_now[[2]][match(markers$gene,rownames(mats_now[[1]])),],
cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE,
name = patients_now[2], clustering_distance_columns = "euclidean",
show_row_names = FALSE, column_title = patients_now[2],
column_title_gp = gpar(fontsize = 11),
show_heatmap_legend = FALSE,
row_title_gp = gpar(font = 11), top_annotation = ha_cols_up[[2]], bottom_annotation = ha_cols_bottom[[2]],
gap = unit(1, "mm"),
col = colorRamp2(c(-2, 3,8), c("blue", "white", "red"))) +
Heatmap(mats_now[[3]][match(markers$gene,rownames(mats_now[[3]])),],
cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE,
name = patients_now[3], clustering_distance_columns = "euclidean",
show_row_names = FALSE, column_title = patients_now[3],
column_title_gp = gpar(fontsize = 11),
show_heatmap_legend = FALSE,
row_title_gp = gpar(font = 11), top_annotation = ha_cols_up[[3]], bottom_annotation = ha_cols_bottom[[3]],
gap = unit(1, "mm"),
col = colorRamp2(c(-2, 3,8), c("blue", "white", "red"))) +
Heatmap(mats_now[[4]][match(markers$gene,rownames(mats_now[[4]])),],
cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE,
name = patients_now[4], clustering_distance_columns = "euclidean",
show_row_names = FALSE, column_title = patients_now[4],
column_title_gp = gpar(fontsize = 11),
show_heatmap_legend = FALSE,
row_title_gp = gpar(font = 11), top_annotation = ha_cols_up[[4]], bottom_annotation = ha_cols_bottom[[4]],
gap = unit(1, "mm"),
col = colorRamp2(c(-2, 3,8), c("blue", "white", "red"))) +
Heatmap(mats_now[[5]][match(markers$gene,rownames(mats_now[[5]])),],
cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE,
name = patients_now[5], clustering_distance_columns = "euclidean",
show_row_names = FALSE, column_title = patients_now[5],
column_title_gp = gpar(fontsize = 11),
show_heatmap_legend = FALSE,
row_title_gp = gpar(font = 11), top_annotation = ha_cols_up[[5]], bottom_annotation = ha_cols_bottom[[5]],
gap = unit(1, "mm"),
col = colorRamp2(c(-2, 3,8), c("blue", "white", "red"))) +
Heatmap(mats_now[[6]][match(markers$gene,rownames(mats_now[[6]])),],
cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE,
name = patients_now[6], clustering_distance_columns = "euclidean",
row_names_gp = gpar(fontsize = 10, col = colors_markers_ch),
split = splits_ch, column_title = patients_now[6],
column_title_gp = gpar(fontsize = 11),
row_title_gp = gpar(font = 11), top_annotation = ha_cols_up[[6]], bottom_annotation = ha_cols_bottom[[6]],
show_heatmap_legend = FALSE,
gap = unit(1, "mm"),
col = colorRamp2(c(-2, 3,8), c("blue", "white", "red")))
draw(ht_list, gap = unit(0.1, "cm"), heatmap_legend_side = "bottom", annotation_legend_side = "bottom")
#将最终图片进行整合(图注放置在底部)
图片展示
这样Fig1b就完成了,未完待续……
我个人的理解是对于提供代码的文献要特别学习一下,因为作者好心提供原始数据和代码的机会并不多,在复现作者的代码过程中你可以学习到作者编程中的优美布局,如何将复杂的代码简化,这些都是学习的重点。了解后也可以试着调整参数对于结果进行优化,最终将所学的知识融会贯通。
Fig1c
代码重复
1.操作准备流程
#标记物的细胞类型
cells_markers <- lists_markers(mat_norm, 1, markers)#将细胞按照细胞类型进行汇总
epithelial_markers <- cells_markers$epithelial_cells
is_epithelial <- decide_is_epithelial(epithelial_markers)#鉴定是否为上皮细胞
immune_markers <- cells_markers$immune_cells
is_immune <- decide_is_immune(immune_markers)
other_markers <- cells_markers$other_cells
is_other <- decide_is_other(other_markers)#免疫细胞与其他细胞同上
#以上的函数均可以在funcs_markers.R中查找
2.细胞类型鉴定
one_epithelial_marker <- expression_one_epithelial_marker(mat_norm, pd_norm, is_epithelial, epithelial_markers, "pats", 0.5)
is_epithelial[which(one_epithelial_marker$is_epithelial_extra == 1)] <- 1
#评估在细胞只有一个上皮标记物表达的情况下表达的分布
is_epithelial_simple <- is_epithelial
is_epithelial_simple[which(is_epithelial == 1)] <- "epithelial"
#将上皮细胞中只有一个上皮标记物表达的细胞定义为上皮细胞
is_immune_simple <- is_immune
is_immune_simple[which(is_immune == "immune_mix")] <- 0
is_other_simple <- is_other
is_other_simple[which(is_other == "other_mix")] <- 0
cells_types <- paste(is_epithelial_simple, is_immune_simple, is_other_simple, sep = "_")
names(cells_types) <- names(is_epithelial)
cell_types <- sapply(strsplit(cells_types, "_"), function(x){
#没有细胞类型(上皮细胞,免疫细胞,其他细胞)
if (sum(x == 0) == 3) return("unknown") else
if (sum(x == 0) == 2) return(setdiff(x, "0")) else
if (sum(c("epithelial", "stroma", "0") %in% x) == 3) return("epithelial") else
return(paste(setdiff(x, "0"),collapse = "_"))})
cell_types_simple <- cell_types
table(cell_types_simple)
cell_types_simple[which(sapply(strsplit(cell_types, "_"), length) > 1)] <- "undecided"#对不能确定的细胞赋值
table(cell_types_simple)#可以与上面的table()做比较
#更新colData and pd_norm
colData(sceset_final)$cell_types_markers <- cell_types_simple
pd_norm <- colData(sceset_final)
3.细胞簇归类
#无监督聚类的细胞类型
HSMM_clustering_ct <- monocle_unsup_clust_plots(sceset_obj = sceset_final, mat_to_cluster = mat_norm, anno_colors = anno_colors,
name_in_phenodata = "cluster_allregr_disp", disp_extra = 1, save_plots = 0,
path_plots = NULL, type_pats = "allpats", regress_pat = 1)
#计算monocle簇
HSMM_clustering_ct$Cluster <- HSMM_clustering_ct$cluster_allregr_disp
table(HSMM_clustering_ct$Cluster)
#作者为了保证重复性,使用了原来的集群
original_clustering <- readRDS(file = "original_clustering.RDS")
HSMM_clustering_ct$Cluster <- original_clustering
table(HSMM_clustering_ct$Cluster)
#更新sceset_final对象
colData(sceset_final) <- cbind(colData(sceset_final), pData(HSMM_clustering_ct)[,c(96:104)])
colData(sceset_final)$cell_types_cl_all <- colData(sceset_final)$cell_types_markers
pd_norm <- colData(sceset_final)
在这个作者使用了原来的集群文件,我感觉也可以利用Monncle进行一下后续分析探索,观察一下是否会出现一些有趣的结论
4.对unknown和undecided细胞簇进行分配(需要对9个细胞簇都进行相关计算)
#分配unknown和undecided细胞簇
cells_to_assign <- list()
cells_to_reassign <- list()
mean_exprs <- list()
mean_reassign_exprs <- list()
#簇1:只有上皮细胞就不用进行处理
cluster_here <- 1
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
mean_exprs[[cluster_here]] <- NULL
mean_reassign_exprs[[cluster_here]] <- NULL
cells_to_assign[[cluster_here]] <- NULL
cells_to_reassign[[cluster_here]] <- NULL
#簇2: 尝试把unknown和undecided细胞分配给巨噬细胞
cluster_here <- 2
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
to_assign_cluster <- c("undecided", "unknown")
mean_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm,
cells_pos = intersect(which(pd_norm$Cluster == cluster_here),
which(pd_norm$cell_types_markers %in% to_assign_cluster)),
epithelial_markers = epithelial_markers,
immune_markers = immune_markers,
other_markers = other_markers)
#只分配平均免疫表达最高的细胞
cells_to_assign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster))[which(apply(mean_exprs[[cluster_here]], 1, function(x){if (x[2] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_assign[[cluster_here]]] <- "macrophage"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])
#同时检查上皮细胞是否有高免疫表达
mean_reassign_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm,
cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "epithelial")),
epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
#只重新分配平均免疫表达最高的上皮细胞
cells_to_reassign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "epithelial"))[which(apply(mean_reassign_exprs[[cluster_here]], 1, function(x){if (x[2] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_reassign[[cluster_here]]] <- "macrophage"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])
#簇3: 混合数据,所以没有办法进行处理
cluster_here <- 3
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
mean_exprs[[cluster_here]] <- NULL
mean_reassign_exprs[[cluster_here]] <- NULL
cells_to_assign[[cluster_here]] <- NULL
cells_to_reassign[[cluster_here]] <- NULL
#簇4: 尝试unknown和undecided细胞给上皮细胞(参考簇2)
cluster_here <- 4
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
to_assign_cluster <- c("undecided", "unknown")
mean_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm,
cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster)),
epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_assign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster))[which(apply(mean_exprs[[cluster_here]], 1, function(x){if (x[1] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_assign[[cluster_here]]] <- "epithelial"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])
mean_reassign_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm,
cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "stroma")),
epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_reassign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "stroma"))[which(apply(mean_reassign_exprs[[cluster_here]], 1, function(x){if (x[1] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_reassign[[cluster_here]]] <- "epithelial"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])
#簇5(参考簇3)
cluster_here <- 5
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
mean_exprs[[cluster_here]] <- NULL
mean_reassign_exprs[[cluster_here]] <- NULL
cells_to_assign[[cluster_here]] <- NULL
cells_to_reassign[[cluster_here]] <- NULL
#簇6:尝试把unknown细胞给上皮细胞(参考簇2)
cluster_here <- 6
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
to_assign_cluster <- c("unknown")
mean_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm,
cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster)),
epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_assign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster))[which(apply(mean_exprs[[cluster_here]], 1, function(x){if (x[1] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_assign[[cluster_here]]] <- "epithelial"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])
mean_reassign_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm,
cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "Bcell")),
epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_reassign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "Bcell"))[which(apply(mean_reassign_exprs[[cluster_here]], 1, function(x){if (x[1] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_reassign[[cluster_here]]] <- "epithelial"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])
#簇7(同簇6)
cluster_here <- 7
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
to_assign_cluster <- c("unknown")
mean_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm,
cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster)),
epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_assign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster))[which(apply(mean_exprs[[cluster_here]], 1, function(x){if (x[1] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_assign[[cluster_here]]] <- "epithelial"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])
#簇8(同簇2)
cluster_here <- 8
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
to_assign_cluster <- c("undecided", "unknown")
mean_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm,
cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster)),
epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_assign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster))[which(apply(mean_exprs[[cluster_here]], 1, function(x){if (x[1] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_assign[[cluster_here]]] <- "epithelial"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])
mean_reassign_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm,
cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "stroma")),
epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_reassign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "stroma"))[which(apply(mean_reassign_exprs[[cluster_here]], 1, function(x){if (x[1] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_reassign[[cluster_here]]] <- "epithelial"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])
mean_reassign_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm,
cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% c("Bcell", "Tcell"))),
epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_reassign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% c("Bcell", "Tcell")))[which(apply(mean_reassign_exprs[[cluster_here]], 1, function(x){if (x[1] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_reassign[[cluster_here]]] <- "epithelial"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])
#簇9(同簇2)
cluster_here <- 9
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
to_assign_cluster <- c("undecided", "unknown")
mean_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm,
cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster)),
epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_assign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster))[which(apply(mean_exprs[[cluster_here]], 1, function(x){if (x[2] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_assign[[cluster_here]]] <- "macrophage"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])
mean_reassign_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm,
cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "epithelial")),
epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_reassign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "epithelial"))[which(apply(mean_reassign_exprs[[cluster_here]], 1, function(x){if (x[2] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_reassign[[cluster_here]]] <- "macrophage"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])
colData(sceset_final)$cell_types_cl_all <- pd_norm$cell_types_cl_all
#移除unknown和undecided细胞
unkund <- which(pd_norm$cell_types_cl_all %in% c("undecided", "unknown"))
#把之前的矩阵进行更新
sceset_ct <- sceset_final[,-unkund]
pd_ct <- colData(sceset_ct)
mat_ct <- assays(sceset_ct)$exprs
mats_ct <- list()
pds_ct <- list()
for (i in 1:length(patients_now)) {
mats_ct[[i]] <- mat_ct[,pd_ct$patient == patients_now[i]]
pds_ct[[i]] <- pd_ct[pd_ct$patient == patients_now[i],]
}
names(mats_ct) <- patients_now
names(pds_ct) <- patients_now
5.可视化绘制
#细胞类型的条状堆叠图
match_celltype_levels <- c("epithelial", "stroma", "endothelial", "Tcell", "Bcell", "macrophage")
tbl_pd_ct <- tbl_df(pd_ct)
tbl_pd_ct <- tbl_pd_ct %>%
group_by(patient) %>%
mutate(cell_types_cl_all = factor(cell_types_cl_all, levels = match_celltype_levels)) %>%
arrange(cell_types_cl_all)
ggplot() +
geom_bar(data = tbl_pd_ct, aes(x = patient, fill = factor(cell_types_cl_all)), position = position_fill(reverse = TRUE)) +
scale_fill_manual(values = anno_colors$tsne) +
labs(fill = "cell type", y = "fraction of cells")
图片展示
Fig1e
代码重复
#患者循环,判断 G1/S或G2/M分数
for (i in 1:length(patients_now)) {
percent_epith <- length(intersect_all(which(pd_ct$patient == patients_now[i]),
which(pd_ct$cell_types_cl_all == "epithelial"),
which(pd_ct$cycling_mel == "High cycling")))/length(intersect_all(
which(pd_ct$patient == patients_now[i]),
which(pd_ct$cell_types_cl_all == "epithelial")))*100
percent_all <- length(intersect_all(which(pd_ct$patient == patients_now[i]),
which(pd_ct$cycling_mel == "High cycling")))/length(which(pd_ct$patient == patients_now[i]))*100
print(ggplot(as.data.frame(pds_ct[[i]]), aes(x = mel_scores_g1s, y = mel_scores_g2m)) +
geom_rect(ggplot2::aes(xmin = median(pd_ct$mel_scores_g1s) + 2 * mad(pd_ct$mel_scores_g1s),
xmax = Inf,
ymin = -Inf,
ymax = Inf),
fill = "gainsboro", alpha = 0.05) +
geom_rect(aes(ymin = median(pd_ct$mel_scores_g2m) + 2 * mad(pd_ct$mel_scores_g2m),
ymax = Inf,
xmin = -Inf,
xmax = Inf),
fill = "gainsboro", alpha = 0.05) +
geom_point(aes(col = factor(cell_types_cl_all, levels = names(anno_colors$tsne)),
shape = factor(cycling_mel)), size = 5) +
xlim(-0.15, 2) +
ylim(-0.15, 2.8) +
labs(col = "cell type", shape = "High cycling", x = "G1S score", y = "G2M score",
title = paste("patient ", patients_now[i], " (", round(percent_all), "% cycling cells)", sep = "")) +
scale_color_manual(values = anno_colors$tsne))
}
图片展示
这里会出现六张图片,分别对应六位患者(篇幅原因只显示正文中的两张)
Fig1d
代码重复
#所有患者的细胞周期堆叠图
tbl_pd_cycle <- tbl_pd_ct %>%
group_by(patient) %>%
mutate(cycling_mel = factor(cycling_mel, levels = c("High cycling", "Low cycling"))) %>%
arrange(cycling_mel)
ggplot() +
geom_bar(data = tbl_pd_cycle, aes(x = patient, fill = factor(cycling_mel)),
position = position_fill(reverse = TRUE)) +
scale_fill_manual(values = anno_colors$cycling) +
labs(fill = "cycling status", y = "fraction of cells")+
theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),
panel.background = element_blank(),axis.line=element_line(colour = "black"))#消除背景网格线
图片展示