pyscenic (二) 转录因子分析

前置要求

pyscenic (一) 转录因子分析

pySCENIC

aertslab/pySCENIC

第一步 准备数据库

  • 设置工作目录
1
2
3
4
5
6
7
8
9
10
11
12
import os
# 配置数据路径
root_path = os.path.expanduser("~/zlliu/R_output/21.10.23.pyscenic")

# 配置结果保存路径
output_path = root_path
if not os.path.isdir(output_path):
os.makedirs(output_path)

# 设置工作目录,输出文件将保存在此目录下
os.chdir(output_path)
os.getcwd()
1
!wget https://raw.githubusercontent.com/aertslab/pySCENIC/master/resources/hs_hgnc_tfs.txt
1
!wget https://resources.aertslab.org/cistarget/motif2tf/motifs-v9-nr.hgnc-m0.001-o0.0.tbl
1
!wget https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg38/refseq_r80/mc9nr/gene_based/hg38__refseq-r80__10kb_up_and_down_tss.mc9nr.feather

第二步 输出 ex_matrix

1
2
3
4
5
%%R
require(Seurat)
SeuratObject = subset(SeuratObject, CellType == 'fibroblast')
write.csv(as.matrix(SeuratObject[['RNA']]@counts), file = "ex_matrix.csv")
cellInfo = SeuratObject@metadata

第三步 计算

  • 导入包和数据
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import glob
import pickle
import pandas as pd
import numpy as np

from dask.diagnostics import ProgressBar

from arboreto.utils import load_tf_names
from arboreto.algo import grnboost2

from ctxcore.rnkdb import FeatherRankingDatabase as RankingDatabase
from pyscenic.utils import modules_from_adjacencies, load_motifs
from pyscenic.prune import prune2df, df2regulons
from pyscenic.aucell import aucell

import seaborn as sns
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
SC_EXP_FNAME = 'ex_matrix.csv'
TFS_FNAME = 'hs_hgnc_tfs.txt'
DATABASES_GLOB = "hg38*.mc9nr.feather"
MOTIF_ANNOTATIONS_FNAME = 'motifs-v9-nr.hgnc-m0.001-o0.0.tbl'

ex_matrix = pd.read_csv(SC_EXP_FNAME, sep=',', header=0, index_col=0).T
ex_matrix.shape

tf_names = load_tf_names(TFS_FNAME)

db_fnames = glob.glob(DATABASES_GLOB)
def name(fname):
return os.path.splitext(os.path.basename(fname))[0]
dbs = [RankingDatabase(fname=fname, name=name(fname)) for fname in db_fnames]
dbs
  • 计算 grn
1
adjacencies = grnboost2(ex_matrix, tf_names=tf_names, verbose=True)
1
2
with open('adjacencies.pydata','wb') as f:
pickle.dump(adjacencies,f)
  • 计算 ctx
1
2
3
4
modules = list(modules_from_adjacencies(adjacencies, ex_matrix))
with ProgressBar():
df = prune2df(dbs, modules, MOTIF_ANNOTATIONS_FNAME)
regulons = df2regulons(df)
1
2
3
4
5
6
with open('modules.pydata','wb') as f:
pickle.dump(modules,f)
with open('df.pydata','wb') as f:
pickle.dump(df,f)
with open('regulons.pydata','wb') as f:
pickle.dump(regulons,f)
  • 计算 AUCell
1
2
regulons = [r.rename(r.name.replace('(+)',' ('+str(len(r))+'g)')) for r in regulons] # 第四步 导入方法二
auc_mtx = aucell(ex_matrix, regulons, num_workers=4)
1
2
with open('auc_mtx.pydata','wb') as f:
pickle.dump(auc_mtx,f)

第四步 导入R中出图

pySCENIC

  • 导入方法一 (不推荐)
1
2
3
from pyscenic.export import export2loom
# regulons = [r.rename(r.name.replace('(+)',' ('+str(len(r))+'g)')) for r in regulons]
export2loom(ex_mtx = ex_matrix, auc_mtx = auc_mtx, regulons=regulons, out_fname = "results.loom")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
%%R
require(ggplot2)
require(SCENIC)
require(SCopeLoomR)

loom <- open_loom("results.loom", mode="r")

# Read information from loom file:
regulons_incidMat <- get_regulons(loom)
regulons <- regulonsToGeneLists(regulons_incidMat)
regulonsAUC <- get_regulonsAuc(loom)
regulonsAucThresholds <- get_regulonThresholds(loom)
embeddings <- get_embeddings(loom)

exprMat <- get_dgem(loom)
cellInfo <- get_cellAnnotation(loom)
clusterings <- get_clusterings_withName(loom)

close_loom(loom)
  • 导入方法二 (推荐)
1
auc_mtx.to_csv('auc_mtx.tsv')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
%%R
require(ggplot2)
require(SCENIC)
require(SCopeLoomR)
require(AUCell)

regulonAUC <- importAUCfromText('auc_mtx.tsv')
regulonAUC <- regulonAUC[onlyNonDuplicatedExtended(rownames(regulonAUC)),]
# cellInfo = SeuratObject@metadata
regulonActivity_byCellType <- sapply(split(rownames(cellInfo), cellInfo$CellType),
function(cells) rowMeans(getAUC(regulonAUC)[,cells]))
regulonActivity_byCellType_Scaled <- t(scale(t(regulonActivity_byCellType), center = T, scale=T))
pheatmap::pheatmap(regulonActivity_byCellType_Scaled, #fontsize_row=3,
color=colorRampPalette(c("blue","white","red"))(100), breaks=seq(-3, 3, length.out = 100),
treeheight_row=10, treeheight_col=10, border_color=NA)

SCENIC—Single Cell rEgulatory Network Inference and Clustering


pyscenic (二) 转录因子分析
https://b.limour.top/1111.html
Author
Limour
Posted on
October 23, 2021
Licensed under