cox-ridge:外部数据集验证.md 4.1 KB


title: COX-Ridge:外部数据集验证 tags: [] id: '2265' categories:

  • - 统计学 date: 2022-08-19 16:42:29 ---

建模

library(glmnet)
library(survival)
df1 <- readRDS('../../fig.3/B_ref_A_fiig.3_C/new_df1.rds')
y <- data.matrix(df1[9:10])
x <- data.matrix(df1[1:8])
Ridge <- glmnet(x = x, y = y, family = 'cox', alpha = 0)
plot(Ridge, xvar = 'lambda', label = T)
set.seed(0)
Ridge.cv <- cv.glmnet(x,y,family="cox", alpha=0,nfolds=10, type.measure = 'C')
plot(Ridge.cv)
Ridge.cf <- coef(Ridge.cv, s="lambda.min")
Ridge.cf <- as.data.frame(as.matrix(Ridge.cf))
names(Ridge.cf) <- 'coef'
saveRDS(Ridge.cf, 'Ridge.cf.rds')

验证

library("survival")
library("survminer")
library("rms")
train_sets <- readRDS('../../../COXPH/train_sets.rds')
test_sets <- readRDS('../../../COXPH/test/test_sets.rds')
all_sets <- c(train_sets, test_sets)
library(survival)
library(survminer)
library(ggpubr)
require(survRM2)
f_surv <- function(df, isplot=T, timeN='time', statusN='status', groupN='group', tau=5, risk.table=T, ncensor.plot=T){
    res <- list()
    my.surv <<- Surv(df[[timeN]], df[[statusN]]==1)
    my.surv.f <<- paste0("my.surv~", groupN)
    my.surv.df <<- df
    kmfit <- surv_fit(formula(my.surv.f), data = my.surv.df)
    sdiff <- survdiff(formula(my.surv.f), data = my.surv.df)
    res$pv <- 1 - pchisq(sdiff$chisq, length(sdiff$n) - 1)
    if(isplot){
        res$plot <- ggsurvplot(kmfit, conf.int =F, pval = T, risk.table = risk.table, ncensor.plot = ncensor.plot)
    }
    res$RMS <- rmst2(df[[timeN]], df[[statusN]], as.numeric(df[[groupN]])-1, tau=tau)
    res$RMST <- as.data.frame(rbind(res$RMS$RMST.arm1$result[1,],res$RMS$RMST.arm0$result[1,]))
    rownames(res$RMST) <- c('RMST (arm=1)', 'RMST (arm=0)')
    res
}
f_COXPH_predict <- function(data_sets, coxM){
    res <- list()
    for(Name in names(data_sets)){
        try(expr = {
            dat <- data_sets[[Name]]
            y <- Surv(round(dat$meta$pfs_time,2), dat$meta$pfs_status == 1)
            y <- data.matrix(y)
            x <- predict(f1,type="lp",newdata=dat$data)
            df <- as.data.frame(cbind(x,y))
            names(df)[1] <- 'Risk_Score'
            df$group <- 'Low Risk'
            df$group[df$Risk_Score > median(df$Risk_Score)] <- 'High Risk'
            df$group <- as.factor(df$group)
            res[[Name]] <- df
        })
    }
    res
} 
f_surv_list  <- function(df_lists, ...){
    res <- list()
    for(Name in names(df_lists)){
        df <- df_lists[[Name]]
        res[[Name]] <- f_surv(df, ...)
    }
    res
}
f1 <- readRDS('../../fig.3/B_ref_A_fiig.3_C/coxph.rds')
f1$coefficients <- Ridge.cf[names(f1$coefficients),'coef']
tmp_dat <- f_COXPH_predict(all_sets, f1)
tmp_surv <- f_surv_list(tmp, tau = 12*3)
library(timeROC)
f_timeROC_one <-function(dat, geneN){
    years2 <- timeROC(T=dat$time,delta=dat$status,
                          marker=dat[[geneN]],cause=1,
                          weighting="marginal",
        #                   other_markers=as.matrix(df$`1-year`),
                          times=12*c(1,3,5), ROC=T, iid=T)
    auc2.ci <- confint(years2, parm=NULL, level = 0.95,n.sim=2000)
    auc2.ci <- auc2.ci$CI_AUC
    auc2.ci
    years2.auc1 <- paste0(round(years2$AUC[1],4)*100,'(',auc2.ci[1,1],'~',auc2.ci[1,2],')%')
    years2.auc2 <- paste0(round(years2$AUC[2],4)*100,'(',auc2.ci[2,1],'~',auc2.ci[2,2],')%')
    years2.auc3 <- paste0(round(years2$AUC[3],4)*100,'(',auc2.ci[3,1],'~',auc2.ci[3,2],')%')
    res <- list(roc=years2, auc1=years2.auc1, auc2=years2.auc2, auc3=years2.auc3)
    res
}
options(repr.plot.width=8, repr.plot.height=8)
tmp <- f_timeROC_one(tmp_dat$GSE54460, 'Risk_Score')
plot(NA, ylim=c(0,1), xlim=c(0,1), main="Time dependent ROC of Risk_Score in train_data", xlab='1-Specificity', ylab='Sensitivity')
plot(tmp$roc,time=12*1, add=T, col="#BC3C29FF", lwd=2)      
plot(tmp$roc,time=12*3,add=T,col="#0072B5FF") 
plot(tmp$roc,time=12*5,add=T,col="#E18727FF") 
legend("bottomright",c(paste("AUC of 1-year =",tmp$auc1),
                 paste("AUC of 3-year =",tmp$auc2),
                 paste("AUC of 5-year =",tmp$auc3)),
       col=c("#BC3C29FF","#0072B5FF","#E18727FF"),lty=1,lwd=2)