seurat-三-scrna-seq数据全流程分析.md 70 KB


title: Seurat (三) scRNA-seq数据全流程分析 (一) tags: [] id: '792' categories:

  • - Seurat教程
  • - 生物信息学 date: 2021-10-01 03:12:44 ---

第一步 查看数据构成

library(RColorBrewer)
library(ggplot2)
blank_theme <- theme_minimal()+
    theme(
        axis.title.x = element_blank(),
        axis.text.x=element_blank(),
        axis.title.y = element_blank(),
        axis.text.y=element_blank(),
        panel.border = element_blank(),
        panel.grid=element_blank(),
        axis.ticks = element_blank(),
        plot.title=element_text(size=14, face="bold",hjust = 0.5)
    )
f_pie <- function(lc_x, lc_main){
    lc_cols <- brewer.pal(length(lc_x), "Paired")
    lc_v <- as.vector(100*lc_x)
    lc_percent = sprintf('%0.2f%%',lc_v)
    lc_df <- data.frame(type = names(lc_x), nums = lc_v)
    lc_df$pos <- with(lc_df, 100-cumsum(nums)+nums/2)
#     print(lc_df)
    lc_pie <- ggplot(data = lc_df, mapping = aes(x = 1, y = nums, fill = type)) + geom_bar(stat = 'identity')
    lc_pie <- lc_pie + coord_polar("y", start=0, direction = 1) + scale_fill_manual(values=lc_cols) + blank_theme 
    lc_pie <- lc_pie + geom_text(aes(x = 1.3, y=pos,label= lc_percent))
    lc_pie <- lc_pie + labs(title = lc_main)
    lc_pie
}
 
options(repr.plot.width=6, repr.plot.height=6)
options(ggrepel.max.overlaps = Inf)
f_pie(prop.table(table(Idents(scRNA))), "Proportion of each brain region")

第二步 QC

scRNA[["percent.mt"]] <- PercentageFeatureSet(scRNA, pattern = "^MT-")

options(repr.plot.width=18, repr.plot.height=6)
options(ggrepel.max.overlaps = Inf)
VlnPlot(scRNA, 
        features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
        ncol = 3)
plot1 <- FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2

x10s <- SplitObject(scRNA, split.by = 'ident')
# 6.3、分别对各样本进行QC
x10s$BA213 <- subset(x10s$BA213,
                  subset = nFeature_RNA > 200 & nFeature_RNA < 4000 & percent.mt < 10)
x10s$BA04 <- subset(x10s$BA04,
                  subset = nFeature_RNA > 200 & nFeature_RNA < 3000 & percent.mt < 10)
x10s$BA09 <- subset(x10s$BA09,
                  subset = nFeature_RNA > 200 & nFeature_RNA < 3500 & percent.mt < 10)
x10s$BA21 <- subset(x10s$BA21,
                  subset = nFeature_RNA > 200 & nFeature_RNA < 4500 & percent.mt < 10)
x10s$BA22 <- subset(x10s$BA22,
                  subset = nFeature_RNA > 200 & nFeature_RNA < 3000 & percent.mt < 10)
x10s$BA39 <- subset(x10s$BA39,
                  subset = nFeature_RNA > 200 & nFeature_RNA < 4000 & percent.mt < 10)
x10s$BA40 <- subset(x10s$BA40,
                  subset = nFeature_RNA > 200 & nFeature_RNA < 4000 & percent.mt < 10)
x10s$BA44 <- subset(x10s$BA44,
                  subset = nFeature_RNA > 200 & nFeature_RNA < 4000 & percent.mt < 10)
x10s$BA45 <- subset(x10s$BA45,
                  subset = nFeature_RNA > 200 & nFeature_RNA < 5000 & percent.mt < 10)

f_IntegrateData <- function(olist){
    # 8、鉴定用于多样本数据整合的anchors
    tp_anchors <- FindIntegrationAnchors(object.list = olist, dims = 1:30, 
                          reduction = "cca")
    gc()
    # 9、整合多样本数据集 注意 内存需求大于16g!!!,不足请设置足够的swap空间再运行(总内存32g是够的)
    tp_int <- IntegrateData(anchorset = tp_anchors)
    gc()
    tp_int
}

scRNA <- f_IntegrateData(x10s)

第三步 PCA降维

lc_all.genes <- rownames(scRNA)
scRNA <- ScaleData(scRNA, features = lc_all.genes)

第四步 UMPA降维

scRNA <- RunUMAP(scRNA, dims = 1:pca_dim)
lc_reduction = "umap"
options(repr.plot.width=9, repr.plot.height=9)
DimPlot(scRNA, reduction = lc_reduction, group.by = 'orig.ident',  label = T, repel = T, label.size = 6) + labs(title = "UMAP reduction of brain regions")