单细胞RNA-seq揭示TNBC的异质性(图表复现01)

这篇文章是“ Unravelling subclonal heterogeneity and aggressive disease states in TNBC through single-cell RNA-seq  ” 通过单细胞RNA-seq揭示TNBC的亚克隆异质性和侵袭性疾病状态
(最近世界卫生组织国际癌症研究机构据统计称乳腺癌已经取代了肺癌,成为了全球第一大癌症,因此本文还是非常有学习的价值的)

摘要

三阴性乳腺癌(TNBC)是一种侵袭性亚型,其特征是肿瘤内广泛的异质性。为了研究其潜在的生物学特性,作者对6个原代TNBC的1500个细胞进行了单细胞RNA测序(scRNA-seq)。并且发现每个肿瘤细胞间基因表达程序的异质性是可变的,并且很大程度上与推断基因组拷贝数变化的克隆性相关,表明基因型驱动个体亚群的基因表达表型。基因表达谱的聚类识别出多种肿瘤共有的不同的恶性细胞亚群,包括与多种治疗耐药和转移特征相关的单一亚群,并通过激活糖鞘脂代谢和相关的先天免疫途径来表征功能。一个定义该亚群的新特征预测了一个大队列中TNBC患者的长期预后。总的来说,该分析揭示了TNBC的功能异质性及其与基因组进化的关联,并揭示了导致该疾病预后不良的未预料到的生物学原理。

实验数据

在文末提供了可用数据GSE118390,并且作者也在其github账号中提供了文章中所用的代码与其他实验数据

GSE118390:https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE118390
Github:https://github.com/Michorlab/tnbc_scrnaseq

复现图表

Fig1b

代码重复(尽管作者提供了代码,但是运行还是会出错,这就需要我们了解代码具体的意思,从而进行深入学习)
1.操作准备流程

rm(list=ls())#魔幻操作,一键清空
setwd("..\\tnbc_scrnaseq-master\\data\\")
#首先定位文件目录(更改为自己将数据存放的位置)
#install.packages("here"),加载R
library(here)
source(here::here("code", "libraries.R"))#需要加载的R包
source(here::here("code", "funcs.R"))#用于细胞类型识别、聚类、签名等各个方面的功能的函数  
source(here::here("code", "funcs_markers.R"))#根据标记识别细胞类型的功能的函数
#后面两个函数大家可以打开进行阅读,了解一下作者的函数构造原理

2.数据导入

##读入标准化数据和表型数据(标准化数据需要自行去GEO下载)
mat_norm <- read.table(("norm_data.txt"), sep = "\t", header = TRUE)#标准化后的数据

pd_norm <-read.table(("pd_norm.txt"),sep="\t",header=TRUE,stringsAsFactors= FALSE)#样本信息

fd_norm <-read.table(("fd_norm.txt"),sep="\t",header=TRUE,stringsAsFactors= FALSE)#基因信息

melanoma_cellcycle <- read.table(("melanoma_cellcycle.txt"), sep = "\t", header = TRUE, stringsAsFactors = FALSE)#黑色素瘤的周期分类文件(黑色素瘤的细胞周期分类(Tirosch et al 2016))

all_markers <- read.table(("markers_clean.txt"), sep = "\t", header = TRUE, stringsAsFactors = FALSE)#细胞类型标志物文件

3.构建单细胞常规S4对象

sceset_final <- SingleCellExperiment(assays = list(exprs = as.matrix(mat_norm)),
                                     colData = pd_norm, 
                                     rowData = fd_norm)#创建S4单细胞数据对象

4.颜色设置(为注释内容设定相应颜色)

epithelial_col <- brocolors("crayons")["Maroon"]
basal_epithelial_col <- brocolors("crayons")["Red"]
luminal_epithelial_col <- brocolors("crayons")["Sunset Orange"]
luminal_progenitor_col <- brocolors("crayons")["Salmon"]
stroma_col <- brocolors("crayons")["Aquamarine"]
endothelial_col <- brocolors("crayons")["Wisteria"]
PTPRC_col <- brocolors("crayons")["Inchworm"]
t_cell_col <- brocolors("crayons")["Screamin' Green"]
b_cell_col <- brocolors("crayons")["Fern"]
macrophage_col <- brocolors("crayons")["Tropical Rain Forest"]
marker_cols <- c("epithelial" = unname(epithelial_col), "basal epithelial" = unname(basal_epithelial_col), 
                 "luminal epithelial" = unname(luminal_epithelial_col), "luminal progenitor" = unname(luminal_progenitor_col),
                 "stroma" = unname(stroma_col), "endothelial" = unname(endothelial_col), 
                 "immune" = unname(PTPRC_col), "T cell" = unname(t_cell_col), "B cell" = unname(b_cell_col), 
                 "macrophage" = unname(macrophage_col))
cycling_mel_cols <- c("Low cycling" = "gainsboro",
                      "High cycling" = unname(brocolors("crayons")["Mulberry"]))
depletion_cols <- c("depleted" = unname(brocolors("crayons")["White"]), "not depleted" = unname(brocolors("crayons")["Red"]))
pats_cols <- c("PT039" = unname(brocolors("crayons")["Orange Red"]), "PT058" = unname(brocolors("crayons")["Orange"]), 
               "PT081" = unname(brocolors("crayons")["Pink Flamingo"]), "PT084" = unname(brocolors("crayons")["Fern"]), 
               "PT089" = unname(brocolors("crayons")["Blue Violet"]), "PT126" = unname(brocolors("crayons")["Sky Blue"]))
tsne_cols <- c("epithelial" = unname(basal_epithelial_col), "stroma" = unname(stroma_col), "endothelial" = unname(endothelial_col),
               "Tcell" = unname(t_cell_col), "Bcell" = unname(b_cell_col), "macrophage" = unname(macrophage_col))
anno_colors <- list("marker" = marker_cols, "cycling" = cycling_mel_cols, "immune depletion" = depletion_cols, 
                    "patient" = pats_cols, "tsne" = tsne_cols)

5.黑色素瘤细胞周期分类(Tirosch et al 2016)

#利用我们前面导入的melanoma_cellcycle变量进行设置
melanoma_g1s <- melanoma_cellcycle$G1S#将G1S与G2M分开,便于分组
melanoma_g1s <- match_clean_vector_genes(mat_norm, melanoma_g1s)
scores_g1s <- avg_expr_genes(mat_norm, melanoma_g1s$index)
melanoma_g2m <- melanoma_cellcycle$G2M
melanoma_g2m <- match_clean_vector_genes(mat_norm, melanoma_g2m)
scores_g2m <- avg_expr_genes(mat_norm, melanoma_g2m$index)
cycling_mel <- rep(NA, length(scores_g1s))
#以公式区分当前样品是否处在细胞周期中
for (i in 1:length(cycling_mel)) {
  if (scores_g1s[i] >= (median(scores_g1s) + 2 * mad(scores_g1s)) && scores_g2m[i] < (median(scores_g2m) + 2 * mad(scores_g2m)))
    cycling_mel[i] <- "High cycling"
  if (scores_g1s[i] < (median(scores_g1s) + 2 * mad(scores_g1s)) && scores_g2m[i] >= (median(scores_g2m) + 2 * mad(scores_g2m)))
    cycling_mel[i] <- "High cycling"
  if (scores_g1s[i] < (median(scores_g1s) + 2 * mad(scores_g1s)) && scores_g2m[i] < (median(scores_g2m) + 2 * mad(scores_g2m)))
    cycling_mel[i] <- "Low cycling"
  if (scores_g1s[i] >= (median(scores_g1s) + 2 * mad(scores_g1s)) && scores_g2m[i] >= (median(scores_g2m) + 2 * mad(scores_g2m)))
    cycling_mel[i] <- "High cycling"
}
#更新样本信息
colData(sceset_final)$mel_scores_g1s <- scores_g1s
colData(sceset_final)$mel_scores_g2m <- scores_g2m
colData(sceset_final)$cycling_mel <- cycling_mel#将容器中的样本信息进行更新
pd_norm <- colData(sceset_final)
pd_norm <- as.data.frame(pd_norm)#新加的三列整合到样本信息中
#准备输入文件
patients_now <- c()
mats_now <- list()
pds_now <- list()
for (i in 1:length(unique(pd_norm$patient))) {
  patients_now[i] <- sort(unique(pd_norm$patient))[i]
  mats_now[[i]] <- mat_norm[, pd_norm$patient == patients_now[i]]
  pds_now[[i]] <- pd_norm[pd_norm$patient == patients_now[i],]
}
names(mats_now) <- patients_now
names(pds_now) <- patients_now
#将6个患者样本进行拆分整合

6.细胞类型标志物的整理

#我们在前面也导入了all_markers变量
markers <- unique(all_markers[all_markers$gene %in% rowData(sceset_final)$hgnc_symbol, ])
#标志物名称转换
markers$type_heatmap <- markers$type
markers$type_heatmap[which(markers$type_heatmap == "luminalprogenitor")] <- "luminal progenitor"
markers$type_heatmap[which(markers$type_heatmap == "luminalepithelial")] <- "luminal epithelial"
markers$type_heatmap[which(markers$type_heatmap == "basalepithelial")] <- "basal epithelial"
markers$type_heatmap[which(markers$type_heatmap %in% c("EPCAM", "EGFR", "CDH1"))] <- "epithelial"
markers$type_heatmap[which(markers$type_heatmap == "Bcell")] <- "B cell"
markers$type_heatmap[which(markers$type_heatmap == "Tcell")] <- "T cell"
markers$type_long_heatmap <- markers$type_long
markers$type_long_heatmap[which(markers$type == "stroma")] <- "stroma"
markers$type_long_heatmap[which(markers$type == "endothelial")] <- "endothelial"
#将名字统一便于后续分析

7.热图注释文件

colors_markers_ch <- markers$type_heatmap
for (i in c(1:length(names(anno_colors$marker)))) {
  colors_markers_ch <- replace(colors_markers_ch, colors_markers_ch == names(anno_colors$marker)[i], anno_colors$marker[i])
}

splits_ch <- as.factor(markers$type_long_heatmap)#注释细胞类型
splits_ch <- factor(splits_ch, levels(splits_ch)[c(2,4,1,3)])#level排序

colors_anno_markers_ch <- as.factor(markers$type_heatmap)
colors_anno_markers_ch <- factor(colors_anno_markers_ch,
                                 levels(colors_anno_markers_ch)[c(4,2,5,6,9,3,8,10,1,7)])
#同上操作
ha_rows <- HeatmapAnnotation(df = data.frame(type = colors_anno_markers_ch),
                             annotation_legend_param = list(type = list(ncol = 2,
                                                            title = "cell type",                                                                    title_position ="topcenter")),
                             which = "row", 
                             col = list("type" = anno_colors$marker), 
                             annotation_width = unit(3, "mm"))#热图注释文件准备(行)

cycling_now <- list()
depletion_now <- list()
ha_cols_up <- list()
ha_cols_bottom <- list()
for (i in 1:length(patients_now)) {
  cycling_now[[i]] <- pds_now[[i]][,"cycling_mel"]
 
if (i == 1)
    ha_cols_up[[i]] <- HeatmapAnnotation(df = data.frame(cycling = cycling_now[[i]]), 
                                         col = list(cycling = anno_colors$cycling), 
                                         show_annotation_name = T, 
                                         annotation_name_side = "left", 
                                         annotation_name_gp = gpar(fontsize = 11),
                       annotation_legend_param = list(list(title_position = "topcenter",
                                         title = c("cycling status"))),
                                         gap = unit(c(1, 1), "mm"))
  if (i > 1)
    ha_cols_up[[i]] <- HeatmapAnnotation(df = data.frame(cycling = cycling_now[[i]]), 
                                         col = list(cycling = anno_colors$cycling), 
                                         show_legend = FALSE,
                                         gap = unit(c(1, 1), "mm"))
}#热图注释文件准备(上部-聚类)
for (i in 1:length(patients_now)) {
  depletion_now[[i]] <- pds_now[[i]][,"depletion_batch"]
  
  if (i == 1)
    ha_cols_bottom[[i]] <- HeatmapAnnotation(df=data.frame('CD45' = depletion_now[[i]]), 
          col = list('CD45' = c("depleted_yes" = "gainsboro", "depleted_no" = "gray54")),
                                             show_annotation_name = TRUE,
                                             annotation_name_side = "left",                                                            annotation_name_gp = gpar(fontsize = 11),
                                    annotation_legend_param = list(title = "CD45 status",                                              title_position = "topcenter", 
                                             at = c("depleted_yes", "depleted_no"),
                                         labels = c("CD45 depleted","CD45 unselected")),
                                             show_legend = TRUE,
                                             gap = unit(c(1), "mm"))
  
  if (i == 2)
    ha_cols_bottom[[i]] <- HeatmapAnnotation(df=data.frame('CD45' = depletion_now[[i]]), 
         col = list('CD45' = c("depleted_yes" = "gainsboro", "depleted_no" = "gray54")),
                                             show_annotation_name = FALSE, 
                                             show_legend = FALSE,
                                             annotation_name_side = "left",                                                            annotation_name_gp = gpar(fontsize = 11),
                                   annotation_legend_param = list(title = "CD45 status", 
                                             title_position = "topcenter", 
                                             at = c("depleted_yes", "depleted_no"),
                                          labels = c("CD45 depleted","CD45 unselected")),
                                             gap = unit(c(1), "mm"))
  if (i > 2)
    ha_cols_bottom[[i]] <- HeatmapAnnotation(df=data.frame('CD45' = depletion_now[[i]]), 
          col = list('CD45' = c("depleted_yes" = "gainsboro", "depleted_no" = "gray54")),
                                             show_legend = FALSE,
                                             gap = unit(c(1), "mm"))
}#热图注释文件准备(下部)
names(cycling_now) <- patients_now
names(depletion_now) <- patients_now
names(ha_cols_up) <- patients_now
names(ha_cols_bottom) <- patients_now

8.热图绘制

#热图绘制(每一个Heatmap都可以出图,可以分步尝试)
ht_list <- ha_rows + 
  Heatmap(mats_now[[1]][match(markers$gene,rownames(mats_now[[1]])),], 
          cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE,
          name = patients_now[1], clustering_distance_columns = "euclidean", row_names_side = "left", 
          row_names_gp = gpar(fontsize = 10, col = colors_markers_ch),
          split = splits_ch, gap = unit(1, "mm"), column_title = patients_now[1], 
          column_title_gp = gpar(fontsize = 11),
          row_title_gp = gpar(font = 11), top_annotation = ha_cols_up[[1]], 
          heatmap_legend_param = list(title = "expression", title_position = "topcenter", color_bar = "continuous"),
          bottom_annotation = ha_cols_bottom[[1]],
          col = colorRamp2(c(-2, 3,8), c("blue", "white", "red"))
          ) + 
  Heatmap(mats_now[[2]][match(markers$gene,rownames(mats_now[[1]])),],
          cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE,
          name = patients_now[2], clustering_distance_columns = "euclidean",
          show_row_names = FALSE, column_title = patients_now[2],
          column_title_gp = gpar(fontsize = 11),
          show_heatmap_legend = FALSE,
          row_title_gp = gpar(font = 11), top_annotation = ha_cols_up[[2]], bottom_annotation = ha_cols_bottom[[2]],
          gap = unit(1, "mm"),
          col = colorRamp2(c(-2, 3,8), c("blue", "white", "red"))) +
  Heatmap(mats_now[[3]][match(markers$gene,rownames(mats_now[[3]])),],
          cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE,
          name = patients_now[3], clustering_distance_columns = "euclidean",
          show_row_names = FALSE, column_title = patients_now[3],
          column_title_gp = gpar(fontsize = 11),
          show_heatmap_legend = FALSE,
          row_title_gp = gpar(font = 11), top_annotation = ha_cols_up[[3]], bottom_annotation = ha_cols_bottom[[3]],
          gap = unit(1, "mm"),
          col = colorRamp2(c(-2, 3,8), c("blue", "white", "red"))) +
  Heatmap(mats_now[[4]][match(markers$gene,rownames(mats_now[[4]])),],
          cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE,
          name = patients_now[4], clustering_distance_columns = "euclidean",
          show_row_names = FALSE, column_title = patients_now[4], 
          column_title_gp = gpar(fontsize = 11),
          show_heatmap_legend = FALSE,
          row_title_gp = gpar(font = 11), top_annotation = ha_cols_up[[4]], bottom_annotation = ha_cols_bottom[[4]],
          gap = unit(1, "mm"),
          col = colorRamp2(c(-2, 3,8), c("blue", "white", "red"))) +
  Heatmap(mats_now[[5]][match(markers$gene,rownames(mats_now[[5]])),], 
          cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE,
          name = patients_now[5], clustering_distance_columns = "euclidean",
          show_row_names = FALSE, column_title = patients_now[5], 
          column_title_gp = gpar(fontsize = 11), 
          show_heatmap_legend = FALSE,
          row_title_gp = gpar(font = 11), top_annotation = ha_cols_up[[5]], bottom_annotation = ha_cols_bottom[[5]],
          gap = unit(1, "mm"),
          col = colorRamp2(c(-2, 3,8), c("blue", "white", "red"))) + 
  Heatmap(mats_now[[6]][match(markers$gene,rownames(mats_now[[6]])),], 
          cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE,
          name = patients_now[6], clustering_distance_columns = "euclidean",
          row_names_gp = gpar(fontsize = 10, col = colors_markers_ch), 
          split = splits_ch, column_title = patients_now[6], 
          column_title_gp = gpar(fontsize = 11),
          row_title_gp = gpar(font = 11), top_annotation = ha_cols_up[[6]], bottom_annotation = ha_cols_bottom[[6]],
          show_heatmap_legend = FALSE,
          gap = unit(1, "mm"),
          col = colorRamp2(c(-2, 3,8), c("blue", "white", "red")))
draw(ht_list, gap = unit(0.1, "cm"), heatmap_legend_side = "bottom", annotation_legend_side = "bottom")
#将最终图片进行整合(图注放置在底部)

图片展示

这样Fig1b就完成了,未完待续……

我个人的理解是对于提供代码的文献要特别学习一下,因为作者好心提供原始数据和代码的机会并不多,在复现作者的代码过程中你可以学习到作者编程中的优美布局,如何将复杂的代码简化,这些都是学习的重点。了解后也可以试着调整参数对于结果进行优化,最终将所学的知识融会贯通。

Fig1c

代码重复
1.操作准备流程

#标记物的细胞类型
cells_markers <- lists_markers(mat_norm, 1, markers)#将细胞按照细胞类型进行汇总
epithelial_markers <- cells_markers$epithelial_cells
is_epithelial <- decide_is_epithelial(epithelial_markers)#鉴定是否为上皮细胞
immune_markers <- cells_markers$immune_cells
is_immune <- decide_is_immune(immune_markers)
other_markers <- cells_markers$other_cells
is_other <- decide_is_other(other_markers)#免疫细胞与其他细胞同上
#以上的函数均可以在funcs_markers.R中查找

2.细胞类型鉴定

one_epithelial_marker <- expression_one_epithelial_marker(mat_norm, pd_norm, is_epithelial, epithelial_markers, "pats", 0.5)
is_epithelial[which(one_epithelial_marker$is_epithelial_extra == 1)] <- 1
#评估在细胞只有一个上皮标记物表达的情况下表达的分布
is_epithelial_simple <- is_epithelial
is_epithelial_simple[which(is_epithelial == 1)] <- "epithelial"
#将上皮细胞中只有一个上皮标记物表达的细胞定义为上皮细胞
is_immune_simple <- is_immune
is_immune_simple[which(is_immune == "immune_mix")] <- 0
is_other_simple <- is_other
is_other_simple[which(is_other == "other_mix")] <- 0
cells_types <- paste(is_epithelial_simple, is_immune_simple, is_other_simple, sep = "_")
names(cells_types) <- names(is_epithelial)
cell_types <- sapply(strsplit(cells_types, "_"), function(x){
#没有细胞类型(上皮细胞,免疫细胞,其他细胞)
  if (sum(x == 0) == 3) return("unknown") else 
    if (sum(x == 0) == 2) return(setdiff(x, "0")) else
      if (sum(c("epithelial", "stroma", "0") %in% x) == 3) return("epithelial") else
        return(paste(setdiff(x, "0"),collapse = "_"))})
cell_types_simple <- cell_types
table(cell_types_simple)
cell_types_simple[which(sapply(strsplit(cell_types, "_"), length) > 1)] <- "undecided"#对不能确定的细胞赋值
table(cell_types_simple)#可以与上面的table()做比较

#更新colData and pd_norm
colData(sceset_final)$cell_types_markers <- cell_types_simple
pd_norm <- colData(sceset_final)

3.细胞簇归类

#无监督聚类的细胞类型
HSMM_clustering_ct <- monocle_unsup_clust_plots(sceset_obj = sceset_final, mat_to_cluster = mat_norm, anno_colors = anno_colors, 
                                                name_in_phenodata = "cluster_allregr_disp", disp_extra = 1, save_plots = 0,
                                                path_plots = NULL, type_pats = "allpats", regress_pat = 1)
#计算monocle簇
HSMM_clustering_ct$Cluster <- HSMM_clustering_ct$cluster_allregr_disp
table(HSMM_clustering_ct$Cluster)

#作者为了保证重复性,使用了原来的集群
original_clustering <- readRDS(file = "original_clustering.RDS")
HSMM_clustering_ct$Cluster <- original_clustering
table(HSMM_clustering_ct$Cluster)

#更新sceset_final对象
colData(sceset_final) <- cbind(colData(sceset_final), pData(HSMM_clustering_ct)[,c(96:104)])
colData(sceset_final)$cell_types_cl_all <- colData(sceset_final)$cell_types_markers
pd_norm <- colData(sceset_final)

在这个作者使用了原来的集群文件,我感觉也可以利用Monncle进行一下后续分析探索,观察一下是否会出现一些有趣的结论

4.对unknown和undecided细胞簇进行分配(需要对9个细胞簇都进行相关计算)

#分配unknown和undecided细胞簇
cells_to_assign <- list()
cells_to_reassign <- list()
mean_exprs <- list()
mean_reassign_exprs <- list()

#簇1:只有上皮细胞就不用进行处理
cluster_here <- 1
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
mean_exprs[[cluster_here]] <- NULL
mean_reassign_exprs[[cluster_here]] <- NULL
cells_to_assign[[cluster_here]] <- NULL
cells_to_reassign[[cluster_here]] <- NULL

#簇2: 尝试把unknown和undecided细胞分配给巨噬细胞
cluster_here <- 2
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
to_assign_cluster <- c("undecided", "unknown")
mean_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm, 
                                                      cells_pos = intersect(which(pd_norm$Cluster == cluster_here), 
                                                                            which(pd_norm$cell_types_markers %in% to_assign_cluster)),
                                                      epithelial_markers = epithelial_markers, 
                                                      immune_markers = immune_markers, 
                                                      other_markers = other_markers)

#只分配平均免疫表达最高的细胞
cells_to_assign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster))[which(apply(mean_exprs[[cluster_here]], 1, function(x){if (x[2] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_assign[[cluster_here]]] <- "macrophage"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])

#同时检查上皮细胞是否有高免疫表达
mean_reassign_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm, 
                                                               cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "epithelial")),
                                                               epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
#只重新分配平均免疫表达最高的上皮细胞
cells_to_reassign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "epithelial"))[which(apply(mean_reassign_exprs[[cluster_here]], 1, function(x){if (x[2] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_reassign[[cluster_here]]] <- "macrophage"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])

#簇3: 混合数据,所以没有办法进行处理
cluster_here <- 3
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
mean_exprs[[cluster_here]] <- NULL
mean_reassign_exprs[[cluster_here]] <- NULL
cells_to_assign[[cluster_here]] <- NULL
cells_to_reassign[[cluster_here]] <- NULL

#簇4: 尝试unknown和undecided细胞给上皮细胞(参考簇2)
cluster_here <- 4
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
to_assign_cluster <- c("undecided", "unknown")
mean_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm, 
                                                      cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster)),
                                                      epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_assign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster))[which(apply(mean_exprs[[cluster_here]], 1, function(x){if (x[1] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_assign[[cluster_here]]] <- "epithelial"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])
mean_reassign_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm, 
                                                               cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "stroma")),
                                                               epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_reassign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "stroma"))[which(apply(mean_reassign_exprs[[cluster_here]], 1, function(x){if (x[1] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_reassign[[cluster_here]]] <- "epithelial"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])

#簇5(参考簇3)
cluster_here <- 5
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
mean_exprs[[cluster_here]] <- NULL
mean_reassign_exprs[[cluster_here]] <- NULL
cells_to_assign[[cluster_here]] <- NULL
cells_to_reassign[[cluster_here]] <- NULL

#簇6:尝试把unknown细胞给上皮细胞(参考簇2)
cluster_here <- 6
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
to_assign_cluster <- c("unknown")
mean_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm, 
                                                      cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster)),
                                                      epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_assign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster))[which(apply(mean_exprs[[cluster_here]], 1, function(x){if (x[1] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_assign[[cluster_here]]] <- "epithelial"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])
mean_reassign_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm, 
                                                               cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "Bcell")),
                                                               epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_reassign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "Bcell"))[which(apply(mean_reassign_exprs[[cluster_here]], 1, function(x){if (x[1] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_reassign[[cluster_here]]] <- "epithelial"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])

#簇7(同簇6)
cluster_here <- 7
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
to_assign_cluster <- c("unknown")
mean_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm, 
                                                      cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster)),
                                                      epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_assign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster))[which(apply(mean_exprs[[cluster_here]], 1, function(x){if (x[1] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_assign[[cluster_here]]] <- "epithelial"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])

#簇8(同簇2)
cluster_here <- 8
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
to_assign_cluster <- c("undecided", "unknown")
mean_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm, 
                                                      cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster)),
                                                      epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_assign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster))[which(apply(mean_exprs[[cluster_here]], 1, function(x){if (x[1] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_assign[[cluster_here]]] <- "epithelial"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])
mean_reassign_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm, 
                                                               cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "stroma")),
                                                               epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_reassign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "stroma"))[which(apply(mean_reassign_exprs[[cluster_here]], 1, function(x){if (x[1] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_reassign[[cluster_here]]] <- "epithelial"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])
mean_reassign_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm, 
                                                               cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% c("Bcell", "Tcell"))),
                                                               epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_reassign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% c("Bcell", "Tcell")))[which(apply(mean_reassign_exprs[[cluster_here]], 1, function(x){if (x[1] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_reassign[[cluster_here]]] <- "epithelial"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])

#簇9(同簇2)
cluster_here <- 9
table(pd_norm$cell_types_markers[which(pd_norm$Cluster == cluster_here)])
to_assign_cluster <- c("undecided", "unknown")
mean_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm, 
                                                      cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster)),
                                                      epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_assign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% to_assign_cluster))[which(apply(mean_exprs[[cluster_here]], 1, function(x){if (x[2] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_assign[[cluster_here]]] <- "macrophage"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])
mean_reassign_exprs[[cluster_here]] <- compute_mean_expr_types(types = c("epithelial", "immune", "other"), mat_expr = mat_norm, 
                                                               cells_pos = intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "epithelial")),
                                                               epithelial_markers = epithelial_markers, immune_markers = immune_markers, other_markers = other_markers)
cells_to_reassign[[cluster_here]] <- intersect(which(pd_norm$Cluster == cluster_here), which(pd_norm$cell_types_markers %in% "epithelial"))[which(apply(mean_reassign_exprs[[cluster_here]], 1, function(x){if (x[2] >= max(x)) return(1) else {return(0)}}) == 1)]
pd_norm$cell_types_cl_all[cells_to_reassign[[cluster_here]]] <- "macrophage"
table(pd_norm$cell_types_cl_all[which(pd_norm$Cluster == cluster_here)])

colData(sceset_final)$cell_types_cl_all <- pd_norm$cell_types_cl_all

#移除unknown和undecided细胞
unkund <- which(pd_norm$cell_types_cl_all %in% c("undecided", "unknown"))

#把之前的矩阵进行更新
sceset_ct <- sceset_final[,-unkund]
pd_ct <- colData(sceset_ct)
mat_ct <- assays(sceset_ct)$exprs
mats_ct <- list()
pds_ct <- list()
for (i in 1:length(patients_now)) {
  mats_ct[[i]] <- mat_ct[,pd_ct$patient == patients_now[i]]
  pds_ct[[i]] <- pd_ct[pd_ct$patient == patients_now[i],]
}
names(mats_ct) <- patients_now
names(pds_ct) <- patients_now

5.可视化绘制

#细胞类型的条状堆叠图
match_celltype_levels <- c("epithelial", "stroma", "endothelial", "Tcell", "Bcell", "macrophage")
tbl_pd_ct <- tbl_df(pd_ct)
tbl_pd_ct <- tbl_pd_ct %>%
  group_by(patient) %>%
  mutate(cell_types_cl_all = factor(cell_types_cl_all, levels = match_celltype_levels)) %>%
  arrange(cell_types_cl_all)

ggplot() +
  geom_bar(data = tbl_pd_ct, aes(x = patient, fill = factor(cell_types_cl_all)), position = position_fill(reverse = TRUE)) +
  scale_fill_manual(values = anno_colors$tsne) +
  labs(fill = "cell type", y = "fraction of cells")

图片展示

Fig1e

代码重复

#患者循环,判断 G1/S或G2/M分数
for (i in 1:length(patients_now)) {
  percent_epith <- length(intersect_all(which(pd_ct$patient == patients_now[i]), 
                                        which(pd_ct$cell_types_cl_all == "epithelial"), 
                                        which(pd_ct$cycling_mel == "High cycling")))/length(intersect_all(
                                          which(pd_ct$patient == patients_now[i]), 
                                          which(pd_ct$cell_types_cl_all == "epithelial")))*100
  percent_all <- length(intersect_all(which(pd_ct$patient == patients_now[i]), 
                                      which(pd_ct$cycling_mel == "High cycling")))/length(which(pd_ct$patient == patients_now[i]))*100
  print(ggplot(as.data.frame(pds_ct[[i]]), aes(x = mel_scores_g1s, y = mel_scores_g2m)) +
          geom_rect(ggplot2::aes(xmin = median(pd_ct$mel_scores_g1s) + 2 * mad(pd_ct$mel_scores_g1s),
                                 xmax = Inf,
                                 ymin = -Inf,
                                 ymax = Inf),
                    fill = "gainsboro", alpha = 0.05) +
          geom_rect(aes(ymin = median(pd_ct$mel_scores_g2m) + 2 * mad(pd_ct$mel_scores_g2m),
                        ymax = Inf,
                        xmin = -Inf,
                        xmax = Inf),
                    fill = "gainsboro", alpha = 0.05) +
          geom_point(aes(col = factor(cell_types_cl_all, levels = names(anno_colors$tsne)), 
                         shape = factor(cycling_mel)), size  = 5) +
          xlim(-0.15, 2) +
          ylim(-0.15, 2.8) +
          labs(col = "cell type", shape = "High cycling", x = "G1S score", y = "G2M score", 
               title = paste("patient ", patients_now[i], " (", round(percent_all), "% cycling cells)", sep = "")) + 
          scale_color_manual(values = anno_colors$tsne))
 
}

图片展示

这里会出现六张图片,分别对应六位患者(篇幅原因只显示正文中的两张)

Fig1d

代码重复

#所有患者的细胞周期堆叠图
tbl_pd_cycle <- tbl_pd_ct %>%
  group_by(patient) %>%
  mutate(cycling_mel = factor(cycling_mel, levels = c("High cycling", "Low cycling"))) %>%
  arrange(cycling_mel)

ggplot() +
  geom_bar(data = tbl_pd_cycle, aes(x = patient, fill = factor(cycling_mel)), 
           position = position_fill(reverse = TRUE)) +
  scale_fill_manual(values = anno_colors$cycling) +
  labs(fill = "cycling status", y = "fraction of cells")+
  theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),
        panel.background = element_blank(),axis.line=element_line(colour = "black"))#消除背景网格线

图片展示

(0)

相关推荐