不同批次但未记录批次时使用deseq2的方法.md 2.7 KB


title: 不同批次但未记录批次时使用DESeq2的方法 tags: [] id: '1954' categories:

  • - uncategorized date: 2022-08-08 16:56:44 ---

准备数据

library(SummarizedExperiment)
tmp <- load('PRAD_tp.rda')
counts <- data@assays@data$unstranded
colnames(counts) <- data@colData$patient
f_rm_duplicated <- function(NameL, reverse=F){
    tmp <- data.frame(table(NameL))
    if(reverse){
        tmp <- tmp$NameL[tmp$Freq > 1]
    }else{
        tmp <- tmp$NameL[tmp$Freq == 1]
    }
    which(NameL %in% as.character(tmp))
}
counts <- counts[,f_rm_duplicated(colnames(counts))]
geneInfo <- as.data.frame(data@rowRanges)[c('gene_id','gene_type','gene_name')]
tmp <- load('mCRPC.rda')
geneInfo2 <- as.data.frame(data@rowRanges)[c('gene_id','gene_type','gene_name')]
all(geneInfo2$gene_id == geneInfo$gene_id)
counts2 <- data@assays@data$unstranded
colnames(counts2) <- data@colData$bcr_patient_barcode
counts2 <- counts2[,f_rm_duplicated(colnames(counts2))]
colnames(counts) <- paste('N', colnames(counts), sep = '_')
cts_b <- as.data.frame(cbind(counts2,counts))
rownames(geneInfo) <- NULL
keep <- (rowSums(counts) > ncol(counts)) & (rowSums(counts2) > ncol(counts2))
cts_b[keep,]

安装补充包

计算DEG

library(DESeq2)
library(sva)
f_DESeq2_sva <- function(cts_bb, rowInfo, ControlN, TreatN, rm.NA=T){
    cts_b <- cts_bb[,c(ControlN, TreatN)]
    conditions <- factor(c(rep("Control",length(ControlN)), rep("Treat",length(TreatN)))) 
    colData_b <- data.frame(row.names = colnames(cts_b), conditions)
    ## 生物学差异的设计矩阵
    group <- as.factor(c(rep("Ctl",length(ControlN)), rep("Trt",length(TreatN))))
    mod1  <-  model.matrix(~group)
    print(mod1)
    ## 无差异的设计矩阵
    mod0  <-  cbind(mod1[,1])
    svseq  <-  svaseq(as.matrix(cts_b),mod1,mod0,n.sv=2) # Estimate batch with svaseq (unsupervised)
    colData_b$SVa <- svseq$sv[,1]
    colData_b$SVb <- svseq$sv[,2]
    print(colData_b)
    ddssva <- DESeqDataSetFromMatrix(countData = cts_b,
                          colData = colData_b,
                          design = ~ SVa + SVb + conditions)
    ddssva <- DESeq(ddssva)
    
    res <- results(ddssva, contrast=c("conditions","Treat","Control"))
    rres <- cbind(rowInfo, data.frame(res))
    if(rm.NA){rres <- rres[!is.na(rres$padj),]}
    rres <- rres[order(rres$log2FoldChange, decreasing = T),]
    # saveRDS(rres, paste('DEGs', paste(TreatN, collapse = '_'), 'vs.', paste(ControlN, collapse = '_'), 'DESeq2.rds',sep = '_'))
    rres
}
Ct1 <- colnames(counts)
Tt1 <- colnames(counts2)
r1 <- f_DESeq2_sva(cts_b[keep,], geneInfo[keep,], Ct1, Tt1)