title: 【探索】暴力计算临床研究的样本量 urlname: Sample-size-calculation-for-survival-analysis-in-clinical-research index_img: https://api.limour.top/randomImg?d=2024-03-12 16:46:35 date: 2024-03-13 00:46:35 tags: Bootstrap
和《使用Bootstrap法计算自举置信区间》的想法差不多,通过暴力枚举来计算临床研究的样本量,以两组生存分析为例。
require(survival)
f_surv_logrank = function(df){
# df 包含 group time status 三列
# group 类型为 factor
# status 0 表示未发生结局事件 1 表示发生结局事件
surv_obj = with(survival::Surv(time = time, event = status), data = df)
surv_fit = survival::survfit(surv_obj ~ group, data = df)
surv_diff = survival::survdiff(surv_obj ~ group, data = df)
res = list(pv = 1 - stats::pchisq(surv_diff$chisq, length(surv_diff$n) - 1), # p值
surv_fit = surv_fit, # 绘图用
surv_obj = surv_obj) # 为了兼容惰性求值
return(res)
}
f_surv_logrank_plot = function(res){
require(survminer)
surv_obj <<- res$surv_obj # 为了兼容惰性求值
survminer::ggsurvplot(res$surv_fit, conf.int = F, pval = T, risk.table = T, ncensor.plot = TRUE)
}
f_surv_logrank_simulation_Group = function(N, Median_Survival_Time, Lost, Duration_Accrual_Time, Duration_Total_Time){
time = stats::rexp(N, rate = log(2) / Median_Survival_Time) # 生存时间服从指数分布
status = rep(1,N) # 到生存时间发生结局事件
# print(median((survfit(Surv(time, status) ~ 1))))
EnrollT = stats::runif(N, min = 0, max = Duration_Accrual_Time) # 招募时间服从均匀分布
calender_time = time + EnrollT # 发生结局的日期
idx = calender_time > Duration_Total_Time # 研究终止时未发生结局事件
status[idx] = 0
time[idx] = Duration_Total_Time - EnrollT[idx] # 实际参与试验的时间
# print(median((survfit(Surv(time, status) ~ 1)))) # 如果 Accrual_Time + Median_Survival < Total_Time,结果不变
loss = stats::rexp(N, rate = Lost) # 失访时间服从指数分布
idx = loss < time # 失访的人
status[idx] = 0
time[idx] = loss[idx]
# print(median((survfit(Surv(time, status) ~ 1)))) # 结果改变
return(list(time = time, status = status))
}
f_surv_logrank_simulation_Power = function(n_C, Median_Survival_Time_C, Lost_C,
n_T, Median_Survival_Time_T, Lost_T,
Duration_Accrual_Time, Duration_Total_Time, Simulation_Cycle, Alpha){
df = data.frame(group = factor(c(rep('Control',n_C), rep('Treatment',n_T))),
time = rep(0,n_C+n_T),
status = rep(0,n_C+n_T))
sum = 0
for (i in 1:Simulation_Cycle) {
C = f_surv_logrank_simulation_Group(n_C, Median_Survival_Time_C, Lost_C, Duration_Accrual_Time, Duration_Total_Time)
T = f_surv_logrank_simulation_Group(n_T, Median_Survival_Time_T, Lost_T, Duration_Accrual_Time, Duration_Total_Time)
df$time = c(C$time, T$time)
df$status = c(C$status, T$status)
if(f_surv_logrank(df)$pv < Alpha){
sum = sum + 1
}
}
return(sum/Simulation_Cycle)
}
f_surv_logrank_simulation_Sample_Size = function(n_C_min, n_C_max, Median_Survival_Time_C, Lost_C,
TvsC, Median_Survival_Time_T, Lost_T,
Duration_Accrual_Time, Duration_Total_Time,
Simulation_Cycle, Alpha, Power, err=0.01){
mid = floor((n_C_min + n_C_max) / 2) # 以防没有进入循环
while (n_C_min < n_C_max) {
mid = floor((n_C_min + n_C_max) / 2)
simulation_Power = f_surv_logrank_simulation_Power(mid, Median_Survival_Time_C, Lost_C,
as.integer(mid * TvsC), Median_Survival_Time_T, Lost_T,
Duration_Accrual_Time, Duration_Total_Time, Simulation_Cycle, Alpha)
print(paste("mid:", mid, "simulation_Power:", simulation_Power))
if (abs(simulation_Power - Power) < err) {
return(mid)
}else if(simulation_Power < Power) {
n_C_min = mid + 1
}else {
n_C_max = mid - 1
}
}
return(mid)
}
Power = 0.9 # 检验效能 = 1 - 第二类错误的概率
Alpha = 0.05 # 第一类错误的概率
Median_Survival_Time_C = 6 # 对照组的中位生存时间
Median_Survival_Time_T = 8 # 试验组的中位生存时间
Duration_Accrual_Time = 8 # 入组完成用时
Duration_Total_Time = 18 # 总试验用时
Lost_C = 0.05 # 对照组随访单位时间后发生失访的概率
Lost_T = 0.05 # 试验组随访单位时间后发生失访的概率
TvsC = 1 # 试验组的样本量:对照组的样本量 1:1 = 1
Simulation_Cycle = 100 # 模拟的循环次数,越大越准确
f_surv_logrank_simulation_Power(441, 6, 0.05,
442, 8, 0.05,
8, 18, 1000, 0.05
)
# PASS的结果是 0.9
f_surv_logrank_simulation_Sample_Size(0, 1000, 6, 0.05,
1, 8, 0.05,
8, 18, 1000, 0.05, 0.9
)
# PASS的结果是 441