标记基因可视化
作者 | 单细胞天地小编 刘小泽
课程链接在:http://jm.grazy.cn/index/mulitcourse/detail.html?cid=55
这次会介绍如何对一些标记基因进行可视化。对应视频第三单元7-8讲
前言
将对应文章这张图:
会根据之前的6个发育时期和4个cluster的tSNE结果,进行一些marker基因的等高线图和热图可视化
另外还有小提琴图:
对marker基因可视化的目的还是说明之前分的群是有道理的,文章中也提到了:
1 首先还是使用包装好的代码进行可视化
1.1 加载表达矩阵、获得cluster信息
1rm(list = ls())
2options(warn=-1)
3options(stringsAsFactors = F)
4source("../analysis_functions.R")
5load('../female_rpkm.Rdata')
6
7# 加载之前HCPC分群结果
8cluster <- read.csv('female_clustering.csv')
9female_clustering=cluster[,2];names(female_clustering)=cluster[,1]
10
11> table(female_clustering)
12female_clustering
13 C1 C2 C3 C4
14240 90 190 43
1.2 拿到文章中的marker基因列表
作者要对哪些基因可视化,都是有自己的思量的
1# 作者选择的14个marker基因
2markerGenes <- c(
3 "Nr2f1",
4 "Nr2f2",
5 "Maf",
6 "Foxl2",
7 "Rspo1",
8 "Lgr5",
9 "Bmp2",
10 "Runx1",
11 "Amhr2",
12 "Kitl",
13 "Fst",
14 "Esr2",
15 "Amh",
16 "Ptges"
17)
1.3 提取marker基因小表达矩阵
1gene_subset <- as.matrix(log(females[rownames(females) %in% markerGenes,]+1))
2
3> dim(gene_subset)
4[1] 14 563
5
6> gene_subset[1:4,1:4]
7 E10.5_XX_20140505_C01_150331_1 E10.5_XX_20140505_C02_150331_1 E10.5_XX_20140505_C03_150331_1 E10.5_XX_20140505_C04_150331_2
8Kitl 2.547141 1.172887 4.1123988 4.032277
9Lgr5 0.000000 0.000000 0.0568497 0.000000
10Esr2 0.000000 0.000000 0.0000000 0.000000
11Fst 0.000000 0.000000 0.0000000 2.694897
12
13## 然后对这个小表达矩阵再细分,根据4个cluster的列名,也即是前面female_clustering=cluster[,2];names(female_clustering)=cluster[,1]这一步的目的
14cl1_gene_subset <- gene_subset[, colnames(gene_subset) %in% names(female_clustering[female_clustering=="C1"])]
15cl2_gene_subset <- gene_subset[, colnames(gene_subset) %in% names(female_clustering[female_clustering=="C2"])]
16cl3_gene_subset <- gene_subset[, colnames(gene_subset) %in% names(female_clustering[female_clustering=="C3"])]
17cl4_gene_subset <- gene_subset[, colnames(gene_subset) %in% names(female_clustering[female_clustering=="C4"])]
18
19## 重新再组合起来
20heatmap_gene_subset <- cbind(
21 cl1_gene_subset,
22 cl2_gene_subset,
23 cl3_gene_subset,
24 cl4_gene_subset
25)
1.4 根据marker基因的顺序,重新排列这个矩阵
1# 之前是这样
2> rownames(heatmap_gene_subset);markerGenes
3 [1] "Kitl" "Lgr5" "Esr2" "Fst" "Runx1" "Amhr2" "Bmp2" "Rspo1" "Nr2f2" "Amh" "Foxl2" "Ptges" "Maf" "Nr2f1"
4 [1] "Nr2f1" "Nr2f2" "Maf" "Foxl2" "Rspo1" "Lgr5" "Bmp2" "Runx1" "Amhr2" "Kitl" "Fst" "Esr2" "Amh" "Ptges"
5
6# 得到marker基因在heatmap_gene_subset中的位置
7match(markerGenes,rownames(heatmap_gene_subset))
8
9# 然后就能提取出和marker基因顺序一样的heatmap_gene_subset
10heatmap_gene_subset <- heatmap_gene_subset[match(markerGenes,rownames(heatmap_gene_subset)),]
11
12# 之后是这样
13> rownames(heatmap_gene_subset);markerGenes
14 [1] "Nr2f1" "Nr2f2" "Maf" "Foxl2" "Rspo1" "Lgr5" "Bmp2" "Runx1" "Amhr2" "Kitl" "Fst" "Esr2" "Amh" "Ptges"
15 [1] "Nr2f1" "Nr2f2" "Maf" "Foxl2" "Rspo1" "Lgr5" "Bmp2" "Runx1" "Amhr2" "Kitl" "Fst" "Esr2" "Amh" "Ptges"
1.5 修改表达矩阵的列名,得到6个时间点信息
1heatmap_female_stages <- sapply(strsplit(colnames(heatmap_gene_subset), "_"), `[`, 1)
2> table(heatmap_female_stages)
3heatmap_female_stages
4E10.5 E11.5 E12.5 E13.5 E16.5 P6
5 68 100 103 99 85 108
1.6 用包装好的pheatmap画热图
1# 看到H图中,列被分成了4栏,那么这个就是根据colbreaks来划分的。colbreaks的意思就是从哪里到哪里这是一块。当有多个分组又想画一个分割线的话,这个参数就很有用
2colbreaks <- c(
3 ncol(cl1_gene_subset),
4 ncol(cl1_gene_subset)+ncol(cl2_gene_subset),
5 ncol(cl1_gene_subset)+ncol(cl2_gene_subset)+ncol(cl3_gene_subset)
6)
7
8# 然后就是上色,6个时间点和4个群使用自定义的颜色
9cluster_color <- c(
10 C1="#560047",
11 C2="#a53bad",
12 C3="#eb6bac",
13 C4="#ffa8a0"
14)
15
16stage_color=c(
17 E10.5="#2754b5",
18 E11.5="#8a00b0",
19 E12.5="#d20e0f",
20 E13.5="#f77f05",
21 E16.5="#f9db21",
22 P6="#43f14b"
23)
24
25# 开始画热图
26library(pheatmap)
27png("female_marker_heatmap.png")
28plot_heatmap_2(
29 heatmap_gene_subset,
30 female_clustering,
31 heatmap_female_stages,
32 rowbreaks,
33 colbreaks,
34 cluster_color,
35 stage_color
36)
37dev.off()
1.7 用包装好的ggboxplot画小提琴图
1pdf("step2.1-B-markers-violin.pdf", width=10, height=22)
2require(gridExtra)
3
4# 每个基因的小提琴图都有4个cluster,对它们用不同的颜色
5female_clusterPalette <- c(
6 "#560047",
7 "#a53bad",
8 "#eb6bac",
9 "#ffa8a0"
10)
11# 每个基因做一个小提琴图,并用for循环保存在p这个列表中
12p <- list()
13for (genes in markerGenes) {
14 p[[genes]] <- violin_gene_exp(
15 genes,
16 females,
17 female_clustering,
18 female_clusterPalette
19 )
20}
21# 最后组合起来,每列显示3张
22do.call(grid.arrange,c(p, ncol=3))
23dev.off()
其中这个`violin_gene_exp`函数是精髓,如果要看它做了什么,可以按住cmd或ctrl,然后点一下这个函数,就会跳到自定义函数脚本中
1.8 用包装好的`geom_point`+`geom_density_2d`画等高线图
1pdf("step2.1-C-markers-tSNE-density.pdf", width=16, height=28)
2require(gridExtra)
3
4load('../step1-female-RPKM-tSNE/female_tsne.Rdata')
5p <- list()
6for (genes in markerGenes) {
7 p[[genes]] <- tsne_gene_exp(
8 female_tsne,
9 genes,
10 females
11 )
12}
13
14do.call(grid.arrange,c(p, ncol=4))
15dev.off()
2 使用Seurat包带的函数进行可视化
上一次已经做好了Seurat的tSNE分群结果,直接加载
1load('seurat3-female-tsne.Rdata')
2DimPlot(object = sce_female_tsne, reduction = "tsne")
然后画小提琴图和表达量热图
1# 小提琴图
2pdf('seurat3_VlnPlot.pdf', width=10, height=15)
3VlnPlot(object = sce_female_tsne, features = markerGenes ,
4 pt.size = 0.2,ncol = 4)
5dev.off()
6
7# 基因表达量热图
8pdf('seurat3_FeaturePlot.pdf', width=10, height=15)
9FeaturePlot(object = sce_female_tsne, features = markerGenes ,
10 pt.size = 0.2,ncol = 3)
11dev.off()
比较作者代码和Seurat的结果
取同一个基因Nr2f2,看看它们的小提琴图:
然后如果我们自己画图呢?
1# 就画其中的Nr2f2基因
2## 分类信息在此
3group <- Seurat::Idents(sce_female)
4## 表达矩阵在此
5nr2f2 <- as.numeric(log(female_count['Nr2f2',]+1))
6boxplot(nr2f2~group)
ggboxplot画一个箱线图并加上显著性
1df <- data.frame(expr=nr2f2,
2 group=group)
3female_clusterPalette <- c(
4 "#560047",
5 "#a53bad",
6 "#eb6bac",
7 "#ffa8a0"
8)
9my_comparisons <- list( c("0", "1"), c("1", "2"), c("2", "3") )
10ggboxplot(df, x = "group", y = "expr",
11 color = "group", palette = female_clusterPalette)+
12 stat_compare_means(comparisons = my_comparisons)
ggviolin再画一个小提琴图
1df <- data.frame(expr=nr2f2,
2 group=group)
3female_clusterPalette <- c(
4 "#560047",
5 "#a53bad",
6 "#eb6bac",
7 "#ffa8a0"
8)
9ggviolin(df, "group", "expr", fill = "group",
10 palette = female_clusterPalette,
11 add = "boxplot", add.params = list(fill = "white"))+
12 stat_compare_means(comparisons = my_comparisons)