使用harmony算法移除批次效应.md 3.8 KB


title: 使用Harmony算法移除批次效应 tags: [] id: '2232' categories:

  • - 分群 comments: false date: 2022-08-13 00:08:41 ---

安装补充包

读取数据

root_path = "."
f_read10x <- function(dirN, ...){
    tp_samples <- list.files(file.path(root_path, dirN))
    tp_dir <- file.path(root_path, dirN, tp_samples)
    names(tp_dir) <- tp_samples
    scRNA <- list()
    for (lc_ba in names(tp_dir)){
        counts <- Read10X(data.dir = tp_dir[[lc_ba]])
        scRNA[[lc_ba]] <- CreateSeuratObject(counts, project = lc_ba, ...)
        scRNA[[lc_ba]][["percent.mt"]] <- PercentageFeatureSet(scRNA[[lc_ba]], pattern = "^MT-")
        scRNA[[lc_ba]][["percent.ERCC"]] <- PercentageFeatureSet(scRNA[[lc_ba]], pattern = "^ERCC-")
    }
    scRNA
}

质量控制

options(repr.plot.width=12, repr.plot.height=6)
options(ggrepel.max.overlaps = Inf)
library(ggplot2)
for (scRNAo in scRNA){
    print(VlnPlot(scRNAo, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) + labs(title = scRNAo@project.name))
}

scRNA$APAP1 <- subset(scRNA$APAP1, subset = nFeature_RNA > 200 & nFeature_RNA < 3000)

scRNA <- Reduce(function(...) merge(...), scRNA)

预处理函数

f_ScaleData_RunPCA <- function(scRNA){
    scRNA <- FindVariableFeatures(scRNA, selection.method = "vst", nfeatures = 2000)
    lc_all.genes <- rownames(scRNA)
    scRNA <- ScaleData(scRNA, features = lc_all.genes)

    scRNA <- RunPCA(scRNA, features = VariableFeatures(object = scRNA))
    print(ElbowPlot(scRNA, ndims = 40))
    scRNA
}
require(harmony)
f_RunHarmony <-  function(scRNA, dims=1:30, batchN="batch"){
    scRNA = scRNA %>% RunHarmony(batchN, plot_convergence = TRUE, max.iter.harmony = 30)
    scRNA <- scRNA %>% RunUMAP(reduction = "harmony", dims = dims)
    scRNA
}

分群函数

f_FindNeighbors <- function(scRNA, resolution = 0.5){
    scRNA <- scRNA %>% FindNeighbors(reduction = "harmony") %>%  FindClusters(resolution = resolution)
    scRNA[[paste0('h_resolution_', resolution)]] <- Idents(scRNA)
    scRNA
}

查看分群质量

f_metaG2G <- function(metaG, matrixN=F){
    res  <- list()
    alltype <- unique(metaG[[1]])
    for(type in alltype){
        res[[type]] <- rownames(metaG)[metaG[[1]] == type]
        if (matrixN){
            res[[type]] <- gsub('-','.',res[[type]])
        }
    }
    res
}

f_br_cluster_f <- function(sObject, lc_groupN){
    lc_filter <- unlist(unique(sObject[[lc_groupN]]))
    lc_filter <- lc_filter[!is.na(lc_filter)]
    lc_filter
}

f_br_cluster <- function(sObject, lc_groupN, lc_labelN, lc_prop = F){
    lc_g <- f_metaG2G(sObject[[lc_groupN]])
    lc_l <- sObject[[lc_labelN]]
    lc_l[[1]] <- as.character(lc_l[[1]])
    res <- data.frame(row.names = f_br_cluster_f(lc_l, lc_labelN))
    if(lc_prop){
        for(Nm in names(lc_g)){
            tmp <- prop.table(table(lc_l[lc_g[[Nm]],]))
            res[[Nm]] <- tmp[rownames(res)]
        }
    }else{
        for(Nm in names(lc_g)){
            tmp <- table(lc_l[lc_g[[Nm]],])
            res[[Nm]] <- tmp[rownames(res)]
        }
    }
    res[is.na(res)] = 0
    res
}

标注函数

f_add_annotation <- function(sObject, lc_ids,lc_metaName){
    Idents(sObject) <- sObject[[lc_metaName]]
    names(lc_ids) <- levels(sObject)
    sObject <- RenameIdents(sObject, lc_ids)
    sObject[[lc_metaName]] <- Idents(sObject)
    sObject
}

f_mergeMeta_pre <- function(scRNA, metaN, baseMetaN){
    scRNA[[metaN]] <- as.character(scRNA[[baseMetaN]][[1]])
    scRNA
}
f_mergeMeta <- function(scRNA, metaN, metaData){
    scRNA[[metaN]][rownames(metaData),] <- as.character(metaData[[1]])
    scRNA
}
f_mergeMeta_end <- function(scRNA, metaN){
    scRNA[[metaN]][[1]] <- as.factor(scRNA[[metaN]][[1]])
    scRNA
}