tcga-四-生存分析.md 70 KB


title: TCGA (四) 生存分析 tags: [] id: '784' categories:

  • - 生物信息学
  • - 组织测序分析 date: 2021-10-05 23:52:00 ---

第一步 导入程辑包

library(cgdsr)
library(survival)
library(survminer)
library(ggpubr)
# 配置数据路径
root_path = "~/zlliu/R_data/TCGA"
 
# 配置结果保存路径
output_path = root_path
if (!file.exists(output_path)){dir.create(output_path)}
 
# 设置工作目录,输出文件将保存在此目录下
setwd(output_path)
getwd()
mycgds <- CGDS("http://www.cbioportal.org/")
all_TCGA_studies <- getCancerStudies(mycgds)

第二步 获得临床数据与测序矩阵

f_getSA_1 <- function(subjectID){
    res = list()
    res$subjectID <- subjectID
    res$cases  <- getCaseLists(mycgds, subjectID)
    res$GP <- getGeneticProfiles(mycgds,subjectID)
    View(res$GP)
    View(res$cases)
    res
}
 
f_getSA_5 <- function(lc_a, lc_sGP, lc_cg,lc_cc=1){
    lc_a$cc <- lc_a$cases[lc_cc,1]
    lc_a$sGP <- lc_a$GP[lc_sGP,1]
    lc_a$cg <- unlist(lc_cg)
    lc_a
}
 
f_getSA_2 <- function(lc_a, lc_c=c('OS_MONTHS','OS_STATUS')){
    lc_a$cd <- getClinicalData(mycgds, lc_a$cc)
    View(head(lc_a$cd))
    View(dim(lc_a$cd))
    View(colnames(lc_a$cd))
    lc_a$PD <- getProfileData(mycgds,lc_a$cg,lc_a$sGP,lc_a$cc)
    View(head(lc_a$PD))
    View(dim(lc_a$PD))
    if(all(lc_c %in% colnames(lc_a$cd))){
        lc_a$os <- lc_a$cd[,lc_c]
        lc_a$os <- subset(lc_a$os, lc_a$os[[lc_c[1]]]>0)
        print(unique(lc_a$os[[2]]))
    }else{
        print(lc_c %in% colnames(lc_a$cd))
    }
    lc_a
}
 
f_getSA_4 <- function(lc_b){
    p.val = 1 - pchisq(lc_b$chisq, length(lc_b$n) - 1)
    p.val
}
 
f_getSA_3 <- function(lc_a, lc_g, lc_ST = '1:DECEASED', lc_c=c('OS_MONTHS','OS_STATUS'), lc_force = F, lc_d=F){
    dat2 <- subset(lc_a$PD, !is.na(lc_a$PD[[lc_g]]))
    common <- intersect(rownames(lc_a$os), rownames(dat2))
    lc_a$dat <- lc_a$os[common,]
    lc_a$dat2 <- dat2[common,]
    res <- lc_a$dat2[[lc_g]]
    if(lc_d){
        res <- data.frame(res)
    }else{
        res <- data.frame(ifelse(res > median(res),'high','low'))
    }
    if(length(unique(res[[1]]))<2){
        lc_a$pv <- 1
        return(lc_a)
    }
    colnames(res) <- lc_g
    nmsl <<- res
    nm_my.surv <<- Surv(lc_a$dat[[lc_c[1]]],lc_a$dat[[lc_c[2]]]==lc_ST)
#     lc_a$nmb <- my.surv
#     lc_a$nmsl <- res
    nmb <<- paste("nm_my.surv~", lc_g)
    lc_b <- survdiff(nm_my.surv~., data = res)
    lc_a$pv <- f_getSA_4(lc_b)
    if(lc_a$pv < 0.05  lc_force){
        kmfit1 <- survfit(formula(nmb), data = nmsl)
        lc_a$pr <- ggsurvplot(kmfit1,conf.int =F, pval = T,risk.table =T, ncensor.plot = TRUE)
    }
#     rm(nmsl, nm_my.surv, nmb)
    lc_a
}
 
library(fdrtool)
f_getSA_6 <- function(lc_a, ...){
    res1 = NULL
    res2 = list()
    for(hhh in colnames(lc_a$PD)){
        lc_a <- f_getSA_3(lc_a, hhh, ...)
        res1 <- rbind(res1, c(hhh, lc_a$pv))
        if(lc_a$pv < 0.05){
            res2[[hhh]] <- lc_a$pr
        }
    }
    if(length(res1) > 0){
        colnames(res1) <- c('gene','p')
        lc_a$srl <- list(pv=data.frame(res1), pr=res2)
        lc_a$srl$pv$p <- as.numeric(lc_a$srl$pv$p)
        lc_a$srl$fdr <- fdrtool(lc_a$srl$pv$p, statistic="pvalue")
        lc_a$srl$pv$q <- lc_a$srl$fdr$qval
        lc_a$srl$s <- subset(lc_a$srl$pv, p <0.05 & q <0.05)
    }else{
        print('error: NULL lc_a$srl')
    }
    lc_a
}

第三步 总体生存率分析

Fe_death <- read.csv('~/zlliu/R_data/TCGA/Fe_death.CSV')
a <- f_getSA_1('prad_su2c_2019')
a <- f_getSA_5(a, 4, Fe_death$Symbol)
a <- f_getSA_2(a)
a <- f_getSA_6(a)

第四步 无进展生存期分析

Fe_death <- read.csv('~/zlliu/R_data/TCGA/Fe_death.CSV')
a <- f_getSA_1('prad_tcga')
a <- f_getSA_5(a, 4, Fe_death$Symbol)
a <- f_getSA_2(a, lc_c = c('DFS_MONTHS','DFS_STATUS'))
a <- f_getSA_6(a,lc_c = c('DFS_MONTHS','DFS_STATUS'),lc_ST = '1:Recurred/Progressed')
subset(a$srl$pv, p <0.05)
a$srl$pr
a <- f_getSA_3(a, 'CHAC1', lc_force = T, lc_c = c('DFS_MONTHS','DFS_STATUS'),lc_ST = '1:Recurred/Progressed')
a$pr

第五步 多值生存分析

Fe_death <- read.csv('~/zlliu/R_data/TCGA/Fe_death.CSV')
a <- f_getSA_1('prad_tcga')
a <- f_getSA_5(a, 3, Fe_death$Symbol)
a <- f_getSA_2(a, lc_c = c('DFS_MONTHS','DFS_STATUS'))
a <- f_getSA_6(a,lc_c = c('DFS_MONTHS','DFS_STATUS'),lc_ST = '1:Recurred/Progressed', lc_d = T)
subset(a$srl$pv, p <0.05 & q <0.05)
a$srl$pr[a$srl$s$gene]