title: TCGAbiolinks下载甲基化数据 tags: [] id: '2310' categories:
library(TCGAbiolinks)
query_met.hg38 <- GDCquery(
project= "TCGA-PRAD",
data.category = "DNA Methylation",
data.type = "Methylation Beta Value",
platform = "Illumina Human Methylation 450"
)
GDCdownload(query_met.hg38)
data.hg38 <- GDCprepare(query_met.hg38)
saveRDS(data.hg38, 'prad.meth.rds')
library(SummarizedExperiment)
# 删除包含 NA 值的探针
data.hg38 <- subset(data.hg38,subset = (rowSums(is.na(assay(data.hg38))) == 0))
# 去除重复样本
data.hg38 <- data.hg38[, substr(colnames(data.hg38), 14, 16) == "01A"]
group <- readRDS('../idea_2/fig3.2/fig5/tcga.predict.rds')
gRow <- intersect(data.hg38$patient, rownames(group))
group <- group[gRow,]
data.hg38 <- data.hg38[, substr(colnames(data.hg38), 1, 12) %in% gRow]
data.hg38$group <- group[substr(colnames(data.hg38), 1, 12), 'group']
TCGAvisualize_meanMethylation(
data.hg38,
groupCol = "group",
group.legend = "Groups",
filename = NULL,
print.pvalue = TRUE
)
res <- TCGAanalyze_DMC(
data.hg38,
# colData 函数获取的矩阵中分组列名
groupCol = "group",
group1 = "High Risk",
group2 = "Low Risk",
p.cut = 0.05,
diffmean.cut = 0.15,
save = FALSE,
legend = "State",
plot.filename = "./PRAD_metvolcano.png",
cores = 8
)
sig_met <- subset(res, status != "Not Significant")
res_data <- subset(data.hg38, subset = (rownames(data.hg38) %in% rownames(sig_met)))
library(circlize)
library(ComplexHeatmap)
group <- group[substr(colnames(data.hg38), 1, 12), ]
#定义注释信息
ha <- HeatmapAnnotation(`Risk group` = group$group,
`Risk Score` = group$Risk_Score,
col = list(
`Risk group` = c("Low Risk" = "#99CCFFAA", 'High Risk' = '#FF6666AA')
),
# annotation_name_gp = gpar(fontsize = 14),
show_annotation_name = TRUE)
heatmap <- Heatmap(
assay(res_data),
name = "DNA methylation",
col = matlab::jet.colors(200),
show_row_names = FALSE,
cluster_rows = TRUE,
cluster_columns = FALSE,
show_column_names = FALSE,
bottom_annotation = ha,
column_title = "DNA Methylation",
column_order = order(group$Risk_Score)
)
pdf("./prad_meth_heatmap.pdf",width = 6, height = 4);{
draw(heatmap, annotation_legend_side = "bottom")
};dev.off()
query_met.hg19 <- GDCquery(
project= "TCGA-PRAD",
data.category = "DNA methylation",
platform = "Illumina Human Methylation 450",
legacy = TRUE # hg19
)
GDCdownload(query_met.hg19)
data.hg19 <- GDCprepare(query_met.hg19)
saveRDS(data.hg38, 'prad.meth.hg19.rds')
library(rGADEM)
library(GenomicRanges)
library(BSgenome.Hsapiens.UCSC.hg19)
library(motifStack)
probes <- rowRanges(res_data)
# 获取差异的探针并设置 200bp 大小的窗口
sequence <- GRanges(
seqnames = as.character(seqnames(probes)),
ranges = IRanges(start = start(ranges(probes)) - 100,
end = start(ranges(probes)) + 100),
strand = "*"
)
# 识别模体
gadem <- GADEM(sequence, verbose = FALSE, genome = Hsapiens)
因为hg19的数据还在下载,更多内容见官方教程