title: TCGAbiolinks下载甲基化数据(续) tags: [] id: '2323' categories:
之前TCGAbiolinks下载甲基化数据的hg19的数据下载好了,现在继续进行分析
require(rGADEM)
require(GenomicRanges)
require(motifStack)
require(dplyr)
f_gadem <- function(probes, seqname, delta_start = 100, delta_end = 100){
probes_tmp <- subset(probes, seqnames==seqname)
sequence <- GRanges(
seqnames = rep(seqname,length(probes_tmp)),
IRanges(
start = ranges(probes_tmp) %>% as.data.frame() %>% dplyr::pull("start") - 100,
end = ranges(probes_tmp) %>% as.data.frame() %>% dplyr::pull("end") + 100),
strand = "*"
)
gadem <- GADEM(sequence, verbose = FALSE, genome = BSgenome.Hsapiens.UCSC.hg19::Hsapiens)
gadem
}
f_gadem_one <- function(probes, seqname, delta_start = 100, delta_end = 100){
probes_tmp <- subset(probes, seqnames==seqname)
res <- list(
seqnames = rep(seqname,length(probes_tmp)),
start = ranges(probes_tmp) %>% as.data.frame() %>% dplyr::pull("start") - 100,
end = ranges(probes_tmp) %>% as.data.frame() %>% dplyr::pull("end") + 100
)
res
}
f_gadem <- function(probes, all_seqN=NULL, delta_start = 100, delta_end = 100){
if(is.null(all_seqN)){
all_seqN <- as.character(unique(seqnames(probes)))
}
res_seqnames <- NULL
res_start <- NULL
res_end <- NULL
for(x in all_seqN){
res <- f_gadem_one(probes, x, delta_start, delta_end)
res_seqnames <- c(res_seqnames, res$seqnames)
res_start <- c(res_start, res$start)
res_end <- c(res_end, res$end)
}
sequence <- GRanges(
seqnames = res_seqnames,
IRanges(
start = res_start,
end = res_end),
strand = "*"
)
gadem <- GADEM(sequence, verbose = FALSE, genome = BSgenome.Hsapiens.UCSC.hg19::Hsapiens)
gadem
}
saveRDS(rowRanges(res_data), 'probes_hg19.rds')
probes <- readRDS('probes_hg19.rds')
gadem <- f_gadem(probes, 'chr1')
nMotifs(gadem) # 找到的模体数量
nOccurrences(gadem) # 出现的次数
consensus(gadem) # 查看模体的一致性序列
# 展示模体 logo 图
pwm <- getPWM(gadem)
pfm <- new("pfm",mat=pwm[[1]],name="Novel Site 1")
plotMotifLogo(pfm)
saveRDS(pwm, 'chr1_pwm.rds')
library(MotIV)
pwm <- readRDS('chr1_pwm.rds')
analysis.jaspar <- motifMatch(pwm)
summary(analysis.jaspar)
options(repr.plot.width=18, repr.plot.height=12)
plot(analysis.jaspar, ncol=2, top=5, rev=FALSE, main="", bysim=TRUE, cex=0.4)