title: TCGA (四) 生存分析 tags: [] id: '784' categories:
library(cgdsr)
library(survival)
library(survminer)
library(ggpubr)
# 配置数据路径
root_path = "~/zlliu/R_data/TCGA"
# 配置结果保存路径
output_path = root_path
if (!file.exists(output_path)){dir.create(output_path)}
# 设置工作目录,输出文件将保存在此目录下
setwd(output_path)
getwd()
mycgds <- CGDS("http://www.cbioportal.org/")
all_TCGA_studies <- getCancerStudies(mycgds)
f_getSA_1 <- function(subjectID){
res = list()
res$subjectID <- subjectID
res$cases <- getCaseLists(mycgds, subjectID)
res$GP <- getGeneticProfiles(mycgds,subjectID)
View(res$GP)
View(res$cases)
res
}
f_getSA_5 <- function(lc_a, lc_sGP, lc_cg,lc_cc=1){
lc_a$cc <- lc_a$cases[lc_cc,1]
lc_a$sGP <- lc_a$GP[lc_sGP,1]
lc_a$cg <- unlist(lc_cg)
lc_a
}
f_getSA_2 <- function(lc_a, lc_c=c('OS_MONTHS','OS_STATUS')){
lc_a$cd <- getClinicalData(mycgds, lc_a$cc)
View(head(lc_a$cd))
View(dim(lc_a$cd))
View(colnames(lc_a$cd))
lc_a$PD <- getProfileData(mycgds,lc_a$cg,lc_a$sGP,lc_a$cc)
View(head(lc_a$PD))
View(dim(lc_a$PD))
if(all(lc_c %in% colnames(lc_a$cd))){
lc_a$os <- lc_a$cd[,lc_c]
lc_a$os <- subset(lc_a$os, lc_a$os[[lc_c[1]]]>0)
print(unique(lc_a$os[[2]]))
}else{
print(lc_c %in% colnames(lc_a$cd))
}
lc_a
}
f_getSA_4 <- function(lc_b){
p.val = 1 - pchisq(lc_b$chisq, length(lc_b$n) - 1)
p.val
}
f_getSA_3 <- function(lc_a, lc_g, lc_ST = '1:DECEASED', lc_c=c('OS_MONTHS','OS_STATUS'), lc_force = F, lc_d=F){
dat2 <- subset(lc_a$PD, !is.na(lc_a$PD[[lc_g]]))
common <- intersect(rownames(lc_a$os), rownames(dat2))
lc_a$dat <- lc_a$os[common,]
lc_a$dat2 <- dat2[common,]
res <- lc_a$dat2[[lc_g]]
if(lc_d){
res <- data.frame(res)
}else{
res <- data.frame(ifelse(res > median(res),'high','low'))
}
if(length(unique(res[[1]]))<2){
lc_a$pv <- 1
return(lc_a)
}
colnames(res) <- lc_g
nmsl <<- res
nm_my.surv <<- Surv(lc_a$dat[[lc_c[1]]],lc_a$dat[[lc_c[2]]]==lc_ST)
# lc_a$nmb <- my.surv
# lc_a$nmsl <- res
nmb <<- paste("nm_my.surv~", lc_g)
lc_b <- survdiff(nm_my.surv~., data = res)
lc_a$pv <- f_getSA_4(lc_b)
if(lc_a$pv < 0.05 lc_force){
kmfit1 <- survfit(formula(nmb), data = nmsl)
lc_a$pr <- ggsurvplot(kmfit1,conf.int =F, pval = T,risk.table =T, ncensor.plot = TRUE)
}
# rm(nmsl, nm_my.surv, nmb)
lc_a
}
library(fdrtool)
f_getSA_6 <- function(lc_a, ...){
res1 = NULL
res2 = list()
for(hhh in colnames(lc_a$PD)){
lc_a <- f_getSA_3(lc_a, hhh, ...)
res1 <- rbind(res1, c(hhh, lc_a$pv))
if(lc_a$pv < 0.05){
res2[[hhh]] <- lc_a$pr
}
}
if(length(res1) > 0){
colnames(res1) <- c('gene','p')
lc_a$srl <- list(pv=data.frame(res1), pr=res2)
lc_a$srl$pv$p <- as.numeric(lc_a$srl$pv$p)
lc_a$srl$fdr <- fdrtool(lc_a$srl$pv$p, statistic="pvalue")
lc_a$srl$pv$q <- lc_a$srl$fdr$qval
lc_a$srl$s <- subset(lc_a$srl$pv, p <0.05 & q <0.05)
}else{
print('error: NULL lc_a$srl')
}
lc_a
}
Fe_death <- read.csv('~/zlliu/R_data/TCGA/Fe_death.CSV')
a <- f_getSA_1('prad_su2c_2019')
a <- f_getSA_5(a, 4, Fe_death$Symbol)
a <- f_getSA_2(a)
a <- f_getSA_6(a)
Fe_death <- read.csv('~/zlliu/R_data/TCGA/Fe_death.CSV')
a <- f_getSA_1('prad_tcga')
a <- f_getSA_5(a, 4, Fe_death$Symbol)
a <- f_getSA_2(a, lc_c = c('DFS_MONTHS','DFS_STATUS'))
a <- f_getSA_6(a,lc_c = c('DFS_MONTHS','DFS_STATUS'),lc_ST = '1:Recurred/Progressed')
subset(a$srl$pv, p <0.05)
a$srl$pr
a <- f_getSA_3(a, 'CHAC1', lc_force = T, lc_c = c('DFS_MONTHS','DFS_STATUS'),lc_ST = '1:Recurred/Progressed')
a$pr
Fe_death <- read.csv('~/zlliu/R_data/TCGA/Fe_death.CSV')
a <- f_getSA_1('prad_tcga')
a <- f_getSA_5(a, 3, Fe_death$Symbol)
a <- f_getSA_2(a, lc_c = c('DFS_MONTHS','DFS_STATUS'))
a <- f_getSA_6(a,lc_c = c('DFS_MONTHS','DFS_STATUS'),lc_ST = '1:Recurred/Progressed', lc_d = T)
subset(a$srl$pv, p <0.05 & q <0.05)
a$srl$pr[a$srl$s$gene]