er-dai-ce-xu-shu-ju-chu-li-zhi-shu-ju-ge-shi-shuo-ming.html 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811
  1. <!DOCTYPE html>
  2. <html lang="en" data-default-color-scheme=auto>
  3. <head><!-- hexo injector head_begin start -->
  4. <script defer src="https://api.limour.top/vue/0d2f95c1-755d-436b-adf8-eee12a80ed32/script.js"></script>
  5. <!-- hexo injector head_begin end -->
  6. <meta charset="UTF-8">
  7. <link rel="apple-touch-icon" sizes="76x76" href="https://img.limour.top/2023/08/29/64ee07361815a.webp">
  8. <link rel="icon" href="https://img.limour.top/2023/08/29/64ee07361815a.webp">
  9. <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=5.0, shrink-to-fit=no">
  10. <meta http-equiv="x-ua-compatible" content="ie=edge">
  11. <meta name="theme-color" content="#2f4154">
  12. <meta name="author" content="Limour">
  13. <meta name="keywords" content="">
  14. <meta name="description" content="FASTA(.fa) 储存参考数据集 从零开始完整学习全基因组测序(WGS)数据分析:第2节 FASTA和FASTQ 基本单元 序列所表示的基因名:&amp;gt;ENSMUSG00000020122ENSMUST00000138518,后可接空格表示注释前缀 具体序列信息:CCCTCCTATCATGC……GGGCCCACCTGTTCTCTGGT 基因名独占一行,序列信息为基因名后一行至下一个 &amp;gt;">
  15. <title>【迁移】二代测序数据处理之数据格式说明 - Limour&#39;s Blog</title>
  16. <link rel="stylesheet" href="https://jscdn.limour.top/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" />
  17. <link rel="stylesheet" href="https://jscdn.limour.top/npm/github-markdown-css@4.0.0/github-markdown.min.css" />
  18. <link rel="stylesheet" href="https://jscdn.limour.top/npm/hint.css@2.7.0/hint.min.css" />
  19. <!-- 主题依赖的图标库,不要自行修改 -->
  20. <!-- Do not modify the link that theme dependent icons -->
  21. <link rel="stylesheet" href="//at.alicdn.com/t/font_1749284_hj8rtnfg7um.css">
  22. <link rel="stylesheet" href="//at.alicdn.com/t/font_1736178_lbnruvf0jn.css">
  23. <link rel="stylesheet" href="/css/main.css" />
  24. <link id="highlight-css" rel="stylesheet" href="/css/highlight.css" />
  25. <link id="highlight-css-dark" rel="stylesheet" href="/css/highlight-dark.css" />
  26. <link rel="stylesheet" href="/theme-inject/custom.css">
  27. <link rel="stylesheet" href="/theme-inject/iconfont.css">
  28. <script id="fluid-configs">
  29. var Fluid = window.Fluid || {};
  30. Fluid.ctx = Object.assign({}, Fluid.ctx)
  31. var CONFIG = {"hostname":"hexo.limour.top","root":"/","version":"1.9.7","typing":{"enable":false,"typeSpeed":70,"cursorChar":"_","loop":false,"scope":[]},"anchorjs":{"enable":true,"element":"h1,h2,h3,h4,h5,h6","placement":"left","visible":"hover","icon":"§"},"progressbar":{"enable":true,"height_px":3,"color":"#29d","options":{"showSpinner":false,"trickleSpeed":100}},"code_language":{"enable":true,"default":"TEXT"},"copy_btn":true,"image_caption":{"enable":true},"image_zoom":{"enable":false,"img_url_replace":["",""]},"toc":{"enable":true,"placement":"right","headingSelector":"h1,h2,h3,h4,h5,h6","collapseDepth":0},"lazyload":{"enable":true,"loading_img":"https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg","onlypost":false,"offset_factor":2},"web_analytics":{"enable":false,"follow_dnt":true,"baidu":null,"google":{"measurement_id":null},"tencent":{"sid":null,"cid":null},"woyaola":null,"cnzz":null,"leancloud":{"app_id":null,"app_key":null,"server_url":null,"path":"window.location.pathname","ignore_local":false}},"search_path":"/local-search.xml","include_content_in_search":true};
  32. if (CONFIG.web_analytics.follow_dnt) {
  33. var dntVal = navigator.doNotTrack || window.doNotTrack || navigator.msDoNotTrack;
  34. Fluid.ctx.dnt = dntVal && (dntVal.startsWith('1') || dntVal.startsWith('yes') || dntVal.startsWith('on'));
  35. }
  36. </script>
  37. <script src="/js/utils.js" ></script>
  38. <script src="/js/color-schema.js" ></script>
  39. <link rel="canonical" href="https://hexo.limour.top/er-dai-ce-xu-shu-ju-chu-li-zhi-shu-ju-ge-shi-shuo-ming"/>
  40. <meta name="generator" content="Hexo 7.1.1"><link rel="alternate" href="/atom.xml" title="Limour's Blog" type="application/atom+xml">
  41. <link rel="alternate" href="/rss2.xml" title="Limour's Blog" type="application/rss+xml">
  42. </head>
  43. <body>
  44. <header>
  45. <div class="header-inner" style="height: 70vh;">
  46. <nav id="navbar" class="navbar fixed-top navbar-expand-lg navbar-dark scrolling-navbar">
  47. <div class="container">
  48. <a class="navbar-brand" href="/">
  49. <strong>Limour&#39;s Blog</strong>
  50. </a>
  51. <button id="navbar-toggler-btn" class="navbar-toggler" type="button" data-toggle="collapse"
  52. data-target="#navbarSupportedContent"
  53. aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
  54. <div class="animated-icon"><span></span><span></span><span></span></div>
  55. </button>
  56. <!-- Collapsible content -->
  57. <div class="collapse navbar-collapse" id="navbarSupportedContent">
  58. <ul class="navbar-nav ml-auto text-center">
  59. <li class="nav-item">
  60. <a class="nav-link" href="https://hexo.limour.top/" target="_self">
  61. <i class="iconfont icon-home-fill"></i>
  62. <span>Home</span>
  63. </a>
  64. </li>
  65. <li class="nav-item">
  66. <a class="nav-link" href="/archives/" target="_self">
  67. <i class="iconfont icon-archive-fill"></i>
  68. <span>Archive1</span>
  69. </a>
  70. </li>
  71. <li class="nav-item">
  72. <a class="nav-link" href="https://occdn.limour.top/archives/" target="_self">
  73. <i class="iconfont icon-archive-fill"></i>
  74. <span>Archive2</span>
  75. </a>
  76. </li>
  77. <li class="nav-item">
  78. <a class="nav-link" href="https://b.limour.top/archives/" target="_self">
  79. <i class="iconfont icon-archive-fill"></i>
  80. <span>Archive3</span>
  81. </a>
  82. </li>
  83. <li class="nav-item">
  84. <a class="nav-link" href="https://od.limour.top/" target="_self">
  85. <i class="iconfont icon-onedrive"></i>
  86. <span>Alist</span>
  87. </a>
  88. </li>
  89. <li class="nav-item">
  90. <a class="nav-link" href="https://orcid.org/0000-0001-8897-1685" target="_self">
  91. <i class="iconfont icon-orcid"></i>
  92. <span>Orcid</span>
  93. </a>
  94. </li>
  95. <li class="nav-item">
  96. <a class="nav-link" href="/links/" target="_self">
  97. <i class="iconfont icon-link-fill"></i>
  98. <span>Links</span>
  99. </a>
  100. </li>
  101. <li class="nav-item">
  102. <a class="nav-link" href="/atom.xml" target="_self">
  103. <i class="iconfont icon-rss"></i>
  104. <span>RSS</span>
  105. </a>
  106. </li>
  107. <li class="nav-item" id="search-btn">
  108. <a class="nav-link" target="_self" href="javascript:;" data-toggle="modal" data-target="#modalSearch" aria-label="Search">
  109. <i class="iconfont icon-search"></i>
  110. </a>
  111. </li>
  112. <li class="nav-item" id="color-toggle-btn">
  113. <a class="nav-link" target="_self" href="javascript:;" aria-label="Color Toggle">
  114. <i class="iconfont icon-dark" id="color-toggle-icon"></i>
  115. </a>
  116. </li>
  117. </ul>
  118. </div>
  119. </div>
  120. </nav>
  121. <div id="banner" class="banner" parallax=true
  122. style="background: url('https://img.limour.top/2023/08/29/64ee08e108638.webp') no-repeat center center; background-size: cover;">
  123. <div class="full-bg-img">
  124. <div class="mask flex-center" style="background-color: rgba(0, 0, 0, 0.3)">
  125. <div class="banner-text text-center fade-in-up">
  126. <div class="h2">
  127. <span id="subtitle">【迁移】二代测序数据处理之数据格式说明</span>
  128. </div>
  129. <div class="mt-3">
  130. <span class="post-meta mr-2">
  131. <i class="iconfont icon-author" aria-hidden="true"></i>
  132. Limour
  133. </span>
  134. <span class="post-meta">
  135. <i class="iconfont icon-date-fill" aria-hidden="true"></i>
  136. <time datetime="2022-01-17 11:33" pubdate>
  137. January 17, 2022 am
  138. </time>
  139. </span>
  140. </div>
  141. <div class="mt-1">
  142. <span class="post-meta mr-2">
  143. <i class="iconfont icon-chart"></i>
  144. 2k words
  145. </span>
  146. <span class="post-meta mr-2">
  147. <i class="iconfont icon-clock-fill"></i>
  148. 18 mins
  149. </span>
  150. </div>
  151. </div>
  152. </div>
  153. </div>
  154. </div>
  155. </div>
  156. </header>
  157. <main>
  158. <div class="container-fluid nopadding-x">
  159. <div class="row nomargin-x">
  160. <div class="side-col d-none d-lg-block col-lg-2">
  161. </div>
  162. <div class="col-lg-8 nopadding-x-md">
  163. <div class="container nopadding-x-md" id="board-ctn">
  164. <div id="board">
  165. <article class="post-content mx-auto">
  166. <h1 id="seo-header">【迁移】二代测序数据处理之数据格式说明</h1>
  167. <p id="updated-time" class="note note-info" style="">
  168. Last updated on March 19, 2024 pm
  169. </p>
  170. <div class="markdown-body">
  171. <h2 id="FASTA-fa-储存参考数据集">FASTA(.fa) 储存参考数据集</h2>
  172. <p><a target="_blank" rel="noopener" href="https://zhuanlan.zhihu.com/p/28470883">从零开始完整学习全基因组测序(WGS)数据分析:第2节 FASTA和FASTQ</a></p>
  173. <ul>
  174. <li>基本单元</li>
  175. <li>序列所表示的基因名:<code>&gt;ENSMUSG00000020122ENSMUST00000138518</code>,后可接空格表示注释前缀</li>
  176. <li>具体序列信息:<code>CCCTCCTATCATGC……GGGCCCACCTGTTCTCTGGT</code></li>
  177. <li>基因名独占一行,序列信息为基因名后一行至下一个 <code>&gt;</code> 基因名标记前</li>
  178. </ul>
  179. <figure class="highlight txt"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line">&gt;ENSMUSG00000020122ENSMUST00000138518</span><br><span class="line">CCCTCCTATCATGCTGTCAGTGTATCTCTAAATAGCACTCTCAACCCCCGTGAACTTGGT</span><br><span class="line">TATTAAAAACATGCCCAAAGTCTGGGAGCCAGGGCTGCAGGGAAATACCACAGCCTCAGT</span><br><span class="line">TCATCAAAACAGTTCATTGCCCAAAATGTTCTCAGCTGCAGCTTTCATGAGGTAACTCCA</span><br><span class="line">GGGCCCACCTGTTCTCTGGT</span><br></pre></td></tr></table></figure>
  180. <ul>
  181. <li>FASTA文件为基本单元的简单罗列</li>
  182. </ul>
  183. <figure class="highlight txt"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br></pre></td><td class="code"><pre><span class="line">&gt;ENSMUSG00000020122ENSMUST00000138518</span><br><span class="line">CCCTCCTATCATGCTGTCAGTGTATCTCTAAATAGCACTCTCAACCCCCGTGAACTTGGT</span><br><span class="line">TATTAAAAACATGCCCAAAGTCTGGGAGCCAGGGCTGCAGGGAAATACCACAGCCTCAGT</span><br><span class="line">TCATCAAAACAGTTCATTGCCCAAAATGTTCTCAGCTGCAGCTTTCATGAGGTAACTCCA</span><br><span class="line">GGGCCCACCTGTTCTCTGGT</span><br><span class="line">&gt;……</span><br><span class="line">……</span><br><span class="line">&gt;ENSMUSG00000020122ENSMUST00000125984</span><br><span class="line">GAGTCAGGTTGAAGCTGCCCTGAACACTACAGAGAAGAGAGGCCTTGGTGTCCTGTTGTC</span><br><span class="line">TCCAGAACCCCAATATGTCTTGTGAAGGGCACACAACCCCTCAAAGGGGTGTCACTTCTT</span><br><span class="line">CTGATCACTTTTGTTACTGTTTACTAACTGATCCTATGAATCACTGTGTCTTCTCAGAGG</span><br><span class="line">CCGTGAACCACGTCTGCAAT</span><br><span class="line">&gt;……</span><br><span class="line">……</span><br></pre></td></tr></table></figure>
  184. <h2 id="FASTQ-fq-储存原始测序数据">FASTQ(.fq) 储存原始测序数据</h2>
  185. <ul>
  186. <li><strong>每四行成为一个独立的单元</strong>,<strong>称之为read</strong>;FASTQ文件为read的简单罗列</li>
  187. <li>第一行:以‘@’开头,是这一条read的唯一标识符</li>
  188. <li>第二行:测序read的序列,由A,C,G,T和N这五种字母构成,N代表的是测序时那些无法被识别出来的碱基;</li>
  189. <li>第三行:以‘+’开头,用以兼容旧版格式</li>
  190. <li>第四行:测序read的质量值,Q = -10log(测序错误率),字符=<code>chr(ord('!')+Q)</code>,上限为 <code>~</code></li>
  191. </ul>
  192. <figure class="highlight txt"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line">@DJB775P1:248:D0MDGACXX:7:1202:12362:49613</span><br><span class="line">TGCTTACTCTGCGTTGATACCACTGCTTAGATCGGAAGAGCACACGTCTGAA</span><br><span class="line">+</span><br><span class="line">JJJJJIIJJJJJJHIHHHGHFFFFFFCEEEEEDBD?DDDDDDBDDDABDDCA</span><br></pre></td></tr></table></figure>
  193. <h2 id="GTF-gtf-描述基因和转录本的信息">GTF(.gtf) 描述基因和转录本的信息</h2>
  194. <p><a target="_blank" rel="noopener" href="https://cloud.tencent.com/developer/article/1625204">GTF文件格式简介</a></p>
  195. <ul>
  196. <li>头部有 <code>#</code> 开头的注释行</li>
  197. <li>主体为 <code>\t</code> 分隔的具有九列的表格,空值用 <code>.</code> 填充</li>
  198. <li>第一列 <code>seqid</code> 代表染色体的ID</li>
  199. <li>第二列是 <code>source</code> 代表基因结构的来源</li>
  200. <li>第三列是feature, 代表区间对应的特征类型,如外显子等</li>
  201. <li>第四、五列为区间的起止坐标</li>
  202. <li>第六列是 <code>score</code></li>
  203. <li>第七列是 <code>strand</code>, 代表正负链的信息, +表示正链,-表示负链,?表示不清楚</li>
  204. <li>第八列是 <code>phase</code>,当描述的是CDS区间信息时,需要指定翻译时开始的位置,取值范围有0,1,2两种</li>
  205. <li>第九列是attributes, 表示属性,键值对间以分号分隔,键值对内以空格分隔</li>
  206. </ul>
  207. <figure class="highlight txt"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line">#!genome-build GRCh38.p12</span><br><span class="line">#!genome-version GRCh38</span><br><span class="line">#!genome-date 2013-12</span><br><span class="line">#!genome-build-accession NCBI:GCA_000001405.27</span><br><span class="line">#!genebuild-last-updated 2018-01</span><br><span class="line">1 ensembl_havana gene 65419 71585 . + . gene_id &quot;ENSG00000186092&quot;; gene_version &quot;6&quot;; gene_name &quot;OR4F5&quot;; gene_source &quot;ensembl_havana&quot;; gene_biotype &quot;protein_coding&quot;;</span><br></pre></td></tr></table></figure>
  208. <h2 id="IDX-idx-基因组比对工具HISAT2的索引文件">IDX(.idx) 基因组比对工具HISAT2的索引文件</h2>
  209. <p><a target="_blank" rel="noopener" href="http://blog.biochen.com/archives/337">RNA-Seq基因组比对工具HISAT2</a></p>
  210. <ul>
  211. <li>使用 hisat2-build 工具从.fa文件建立</li>
  212. </ul>
  213. <figure class="highlight shell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><span class="line">export FADIR=/opt/human_grch38/dna</span><br><span class="line">export FANAME=Homo_sapiens.GRCh38.dna.chromosome</span><br><span class="line">export IDXDIR=/opt/human_grch38/hisat2_idx</span><br><span class="line"></span><br><span class="line">export FILELIST=$FADIR/$&#123;FANAME&#125;.1.fa,$FADIR/$&#123;FANAME&#125;.2.fa,$FADIR/$&#123;FANAME&#125;.3.fa,$FADIR/$&#123;FANAME&#125;.4.fa,$FADIR/$&#123;FANAME&#125;.5.fa,$FADIR/$&#123;FANAME&#125;.6.fa,$FADIR/$&#123;FANAME&#125;.7.fa,$FADIR/$&#123;FANAME&#125;.8.fa,$FADIR/$&#123;FANAME&#125;.9.fa,$FADIR/$&#123;FANAME&#125;.10.fa,</span><br><span class="line">export FILELIST=$&#123;FILELIST&#125;$FADIR/$&#123;FANAME&#125;.11.fa,$FADIR/$&#123;FANAME&#125;.12.fa,$FADIR/$&#123;FANAME&#125;.13.fa,$FADIR/$&#123;FANAME&#125;.14.fa,$FADIR/$&#123;FANAME&#125;.15.fa,$FADIR/$&#123;FANAME&#125;.16.fa,$FADIR/$&#123;FANAME&#125;.17.fa,$FADIR/$&#123;FANAME&#125;.18.fa,$FADIR/$&#123;FANAME&#125;.19.fa,$FADIR/$&#123;FANAME&#125;.20.fa,</span><br><span class="line">export FILELIST=$&#123;FILELIST&#125;$FADIR/$&#123;FANAME&#125;.21.fa,$FADIR/$&#123;FANAME&#125;.22.fa,$FADIR/$&#123;FANAME&#125;.MT.fa,$FADIR/$&#123;FANAME&#125;.X.fa,$FADIR/$&#123;FANAME&#125;.Y.fa</span><br><span class="line">echo **************************************</span><br><span class="line">echo $FILELIST</span><br><span class="line">echo **************************************</span><br><span class="line">hisat2-build -p 8 $FILELIST $IDXDIR/GRCh38.hisat2.idx</span><br></pre></td></tr></table></figure>
  214. <h2 id="Sam-Bam-bam-记录比对的具体情况">Sam/Bam(.bam) 记录比对的具体情况</h2>
  215. <p><a target="_blank" rel="noopener" href="https://www.jianshu.com/p/ff6187c97155">Sam/Bam文件格式详解</a></p>
  216. <p>bam文件是sam文件的二进制格式,sam 文件是Sequence Alignment/Map Format的简写,产生于比对之后的数据输出,记录了比对的具体情况。文件中以tab键分割,包括 <code>Header section</code> 和 <code>Alignments section</code> 两部分:</p>
  217. <h3 id="Header-section">Header section</h3>
  218. <p>该部分全部以“@”开头,提供基本的软件版本,参考序列信息,排序信息等</p>
  219. <ul>
  220. <li>@HD行:这一行中有各种不同的标识
  221. <ul>
  222. <li>标识“VN”用以说明格式版本</li>
  223. <li>标识“SO”用以说明比对排序的情况,有unknown (default)、unsorted、queryname和coordinate,对于coordinate,排序的主键是Alignments section的第三列“RNAME”,其顺序由@SQ行的“SN”标识的顺序定义,次要排序键是Alignments section的第四列“POS”字段。对于RNAME和POS相等的比对,排列顺序则是任意的</li>
  224. </ul>
  225. </li>
  226. <li>@SQ行的“SN”标签是参考序列说明,它的值主要是用于Alignments section的第三列“RNAME”和第七列“MRNM”比对的记录</li>
  227. <li>@PG行是使用的程序说明;该行“ID”为程序记录标识符,“PN”为程序名字,“CL”为命令行</li>
  228. <li>@CO行是任意的说明信息</li>
  229. </ul>
  230. <h3 id="Alignments-section">Alignments section</h3>
  231. <p>该部分包含了11列必需字段,无效或者没有的字段一般用<code>0</code>或者<code>*</code>表示。</p>
  232. <figure class="highlight txt"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><span class="line">@HD VN:1.6 SO:coordinate</span><br><span class="line">@SQ SN:ref LN:45</span><br><span class="line">r001 99 ref 7 30 8M2I4M1D3M = 37 39 TTAGATAAAGGATACTG *</span><br><span class="line">r002 0 ref 9 30 3S6M1P1I4M * 0 0 AAAAGATAAGGATA *</span><br><span class="line">r003 0 ref 9 30 5S6M * 0 0 GCCTAAGCTAA * SA:Z:ref,29,-,6H5M,17,0;</span><br><span class="line">r004 0 ref 16 30 6M14N5M * 0 0 ATAGCTTCAGC *</span><br><span class="line">r003 2064 ref 29 17 6H5M * 0 0 TAGGC * SA:Z:ref,9,+,5S6M,30,1;</span><br><span class="line">r001 147 ref 37 30 9M = 7 -39 CAGCGGCAT * NM:i:1</span><br></pre></td></tr></table></figure>
  233. <h4 id="第1列:Qname">第1列:Qname</h4>
  234. <p>Read的名字</p>
  235. <h4 id="第2列:FLAG">第2列:FLAG</h4>
  236. <p>每一个read的比对情况可以用十进制数字(或者十六进制数字)表示,如果比对情况 有多个,将多个比对情况所代表的十进制数字加和就是这一行的FLAG。<br>
  237. 另,以下网站可以通过输入FLAG值,直接找出该FLAG是那些FLAG的加和:<a target="_blank" rel="noopener" href="https://broadinstitute.github.io/picard/explain-flags.html">Decoding SAM flags</a></p>
  238. <h4 id="第3列:RNAME">第3列:RNAME</h4>
  239. <p>比对上的参考序列的名字,该名字出现在Header section的@SQ行的SN标识中,如果该read没有比对上,也就是说该read在参考序列上没有坐标,那么这一列则用“”表示,那么这一行的POS和CIGAR列也会是“”。</p>
  240. <h4 id="第4列:POS">第4列:POS</h4>
  241. <p>read比对到的参考序列“RNAME”最左侧的位置坐标,也是CIGAR中第一个比对标识“M”对应的最左侧碱基在参考序列的位置,未比对上的read在参考序列中没有坐标,此列标识为“0”。</p>
  242. <h4 id="第5列:MAPQ">第5列:MAPQ</h4>
  243. <p>比对的质量值,计算方法为比对错误率的-10*log10的值,一般是四舍五入的整数值,如果是255,说明该比对值无效。</p>
  244. <h4 id="第6列:CIGAR">第6列:CIGAR</h4>
  245. <p>CIGAR标识符表示read中每个碱基的比对情况,主要有以下标识符:</p>
  246. <ul>
  247. <li>M: read上的碱基与参考序列“RNAME”完全匹配,碱基一一对应,包括了正确匹配与错误匹配</li>
  248. <li>I: read上的碱基相对于参考序列“RNAME”有插入现象</li>
  249. <li>D: read上的碱基相对于参考序列“RNAME”有删除现象</li>
  250. <li>N: read上的碱基相对于参考序列“RNAME”存在连续没有比对上的空缺</li>
  251. <li>S: read的开头或者结尾部分没有比对到参考序列&quot;RNAME”上, 但这部分未比对上的连续序列仍保留在sam文件的该read序列中</li>
  252. <li>H: read的开头或者结尾部分没有比对到参考序列&quot;RNAME”上, 这部分未比对上的连续序列未保留</li>
  253. <li>P: padding (silent deletion from padded reference)</li>
  254. <li>=:sequence match 正确匹配</li>
  255. <li>X:sequence mismatch 错误匹配</li>
  256. </ul>
  257. <h4 id="第7列:MRNM">第7列:MRNM</h4>
  258. <p>该read的mate read比对上的参考序列的名字,该名字出现在Header section的@SQ行的SN标识中,</p>
  259. <ul>
  260. <li>如果和该read所在行的第三列“RNAME”一样,则用“=”表示,说明这对read比对到了同一条参考序列上;</li>
  261. <li>如果mate read没有比对上,第七列则用“*”表示;</li>
  262. <li>如果这对read没有比对到同一条参考序列,那么这一列则是mate read所在行第三列的“RNAME”。</li>
  263. </ul>
  264. <h4 id="第8列:MPOS">第8列:MPOS</h4>
  265. <p>该read的mate read比对到的参考序列“RNAME”最左侧的位置坐标,也是mate read CIGAR中第一个比对标识“M”对应的最左侧碱基在参考序列的位置,未比对上的read在参考序列中没有坐标,此列标识为“0”。</p>
  266. <h4 id="第9列:ISIZE">第9列:ISIZE</h4>
  267. <p>表示pair read完全匹配到同一条参考序列时,两个read之间的长度,可简单理解为测序文库的长度。</p>
  268. <h4 id="第10列:SEQ">第10列:SEQ</h4>
  269. <p>存储的序列,没有存储,此列则用“*”标识。该序列的长度一定等于CIGAR标识中“M”,“I”,“S”,“=”,“X”标识的碱基长度之和。</p>
  270. <h4 id="第11列:QUAL">第11列:QUAL</h4>
  271. <p>序列的每个碱基对应一个碱基质量字符,每个碱基质量字符对应的ASCII码值减去33(Sanger Phred-33 质量值体系),即为该碱基的测序质量得分(Phred Quality Score)。不同Phred Quality Score代表不同的碱基测序错误率,如Phred Quality Score值为20和30分别表示碱基测序错误率为1%和0.1%。</p>
  272. <h2 id="相关参数说明">相关参数说明</h2>
  273. <p><a target="_blank" rel="noopener" href="https://www.jieandze1314.com/post/cnposts/18/">基因组的那些事儿</a></p>
  274. <ul>
  275. <li>测序深度:30x;每个碱基平均被测次数,相关研究表明5~60x中 30x对于后续分析可以达95%置信度</li>
  276. <li>测序策略:PE150;PE双端测序、一条序列正反测两次;150每次测150bp,双端测一条片段共300bp</li>
  277. <li>350bpcDNA建库:将DNA用超声波随机打断成350bp,加接头,作为测序前的准备工作</li>
  278. </ul>
  279. </div>
  280. <hr/>
  281. <div>
  282. <div class="post-metas my-3">
  283. <div class="post-meta">
  284. <i class="iconfont icon-tags"></i>
  285. <a href="/tags/NGS/" class="print-no-link">#NGS</a>
  286. <a href="/tags/fasta/" class="print-no-link">#fasta</a>
  287. <a href="/tags/fastq/" class="print-no-link">#fastq</a>
  288. <a href="/tags/gft/" class="print-no-link">#gft</a>
  289. <a href="/tags/bam/" class="print-no-link">#bam</a>
  290. </div>
  291. </div>
  292. <div class="license-box my-3">
  293. <div class="license-title">
  294. <div>【迁移】二代测序数据处理之数据格式说明</div>
  295. <div>https://hexo.limour.top/er-dai-ce-xu-shu-ju-chu-li-zhi-shu-ju-ge-shi-shuo-ming</div>
  296. </div>
  297. <div class="license-meta">
  298. <div class="license-meta-item">
  299. <div>Author</div>
  300. <div>Limour</div>
  301. </div>
  302. <div class="license-meta-item license-meta-date">
  303. <div>Posted on</div>
  304. <div>January 17, 2022</div>
  305. </div>
  306. <div class="license-meta-item license-meta-date">
  307. <div>Updated on</div>
  308. <div>March 19, 2024</div>
  309. </div>
  310. <div class="license-meta-item">
  311. <div>Licensed under</div>
  312. <div>
  313. <a class="print-no-link" target="_blank" href="https://creativecommons.org/licenses/by-nc-sa/4.0/">
  314. <span class="hint--top hint--rounded" aria-label="BY - Attribution">
  315. <i class="iconfont icon-by"></i>
  316. </span>
  317. </a>
  318. <a class="print-no-link" target="_blank" href="https://creativecommons.org/licenses/by-nc-sa/4.0/">
  319. <span class="hint--top hint--rounded" aria-label="NC - Non-commercial">
  320. <i class="iconfont icon-nc"></i>
  321. </span>
  322. </a>
  323. <a class="print-no-link" target="_blank" href="https://creativecommons.org/licenses/by-nc-sa/4.0/">
  324. <span class="hint--top hint--rounded" aria-label="SA - Share-alike">
  325. <i class="iconfont icon-sa"></i>
  326. </span>
  327. </a>
  328. </div>
  329. </div>
  330. </div>
  331. <div class="license-icon iconfont"></div>
  332. </div>
  333. <div class="post-prevnext my-3">
  334. <article class="post-prev col-6">
  335. <a href="/-qian-yi-CellTypist-zhu-shi-mian-yi-xi-bao-ya-qun" title="【迁移】CellTypist 注释免疫细胞亚群">
  336. <i class="iconfont icon-arrowleft"></i>
  337. <span class="hidden-mobile">【迁移】CellTypist 注释免疫细胞亚群</span>
  338. <span class="visible-mobile">Previous</span>
  339. </a>
  340. </article>
  341. <article class="post-next col-6">
  342. <a href="/Solving-equations-using-postfix-notation" title="【探索】利用后缀表达式解方程">
  343. <span class="hidden-mobile">【探索】利用后缀表达式解方程</span>
  344. <span class="visible-mobile">Next</span>
  345. <i class="iconfont icon-arrowright"></i>
  346. </a>
  347. </article>
  348. </div>
  349. </div>
  350. <article id="comments" lazyload>
  351. <div id="waline"></div>
  352. <script type="text/javascript">
  353. Fluid.utils.loadComments('#waline', function() {
  354. Fluid.utils.createCssLink('https://cdn.staticfile.org/waline/2.15.5/waline.min.css')
  355. Fluid.utils.createScript('https://cdn.staticfile.org/waline/2.15.5/waline.min.js', function() {
  356. var options = Object.assign(
  357. {"serverURL":"https://comments.limour.top","path":"window.location.pathname","meta":["nick","mail","link"],"requiredMeta":["nick"],"lang":"zh-CN","emoji":["https://jscdn.limour.top/gh/walinejs/emojis/weibo"],"dark":"html[data-user-color-scheme=\"dark\"]","wordLimit":0,"pageSize":10},
  358. {
  359. el: '#waline',
  360. path: window.location.pathname
  361. }
  362. )
  363. Waline.init(options);
  364. Fluid.utils.waitElementVisible('#waline .vcontent', () => {
  365. var imgSelector = '#waline .vcontent img:not(.vemoji)';
  366. Fluid.plugins.imageCaption(imgSelector);
  367. Fluid.plugins.fancyBox(imgSelector);
  368. })
  369. });
  370. });
  371. </script>
  372. <noscript>Please enable JavaScript to view the comments</noscript>
  373. </article>
  374. </article>
  375. </div>
  376. </div>
  377. </div>
  378. <div class="side-col d-none d-lg-block col-lg-2">
  379. <aside class="sidebar" style="margin-left: -1rem">
  380. <div id="toc">
  381. <p class="toc-header">
  382. <i class="iconfont icon-list"></i>
  383. <span>Table of Contents</span>
  384. </p>
  385. <div class="toc-body" id="toc-body"></div>
  386. </div>
  387. </aside>
  388. </div>
  389. </div>
  390. </div>
  391. <a id="scroll-top-button" aria-label="TOP" href="#" role="button">
  392. <i class="iconfont icon-arrowup" aria-hidden="true"></i>
  393. </a>
  394. <div class="modal fade" id="modalSearch" tabindex="-1" role="dialog" aria-labelledby="ModalLabel"
  395. aria-hidden="true">
  396. <div class="modal-dialog modal-dialog-scrollable modal-lg" role="document">
  397. <div class="modal-content">
  398. <div class="modal-header text-center">
  399. <h4 class="modal-title w-100 font-weight-bold">Search</h4>
  400. <button type="button" id="local-search-close" class="close" data-dismiss="modal" aria-label="Close">
  401. <span aria-hidden="true">&times;</span>
  402. </button>
  403. </div>
  404. <div class="modal-body mx-3">
  405. <div class="md-form mb-5">
  406. <input type="text" id="local-search-input" class="form-control validate">
  407. <label data-error="x" data-success="v" for="local-search-input">Keyword</label>
  408. </div>
  409. <div class="list-group" id="local-search-result"></div>
  410. </div>
  411. </div>
  412. </div>
  413. </div>
  414. </main>
  415. <footer>
  416. <div class="footer-inner">
  417. <div class="footer-content">
  418. <a target="_blank" rel="nofollow noopener" href="http://www.beian.gov.cn/portal/registerSystemInfo?recordcode=43130202000203"><img src="https://img.limour.top/2023/08/27/64eadeb81d6a0.webp" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload>湘公网安备43130202000203号 </a> <a target="_blank" rel="nofollow noopener" href="https://beian.miit.gov.cn/">湘ICP备20008299号 </a> <a target="_blank" rel="nofollow noopener" href="https://icp.gov.moe/?keyword=20210128">萌ICP备20210128号</a> <br> <a href="https://www.foreverblog.cn/" target="_blank"> <img src="https://img.foreverblog.cn/logo_en_default.png" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload alt="" style="width:auto;height:24px"> </a> <br> <a href="https://hexo.io" target="_blank" rel="nofollow noopener"><span>Hexo</span></a> <i class="iconfont icon-love"></i> <a href="https://github.com/fluid-dev/hexo-theme-fluid" target="_blank" rel="nofollow noopener"><span>Fluid</span></a> <i class="iconfont icon-love"></i> <a href="https://github.com/limour-blog/limour-blog.github.io" target="_blank" rel="nofollow noopener"><span>SRC</span></a> <i class="iconfont icon-love"></i> <a href="https://web.archive.org/web/20231130095837/https://effectiveacceleration.tech/" target="_blank" rel="nofollow noopener"><span>e/Acc</span></a>
  419. </div>
  420. </div>
  421. </footer>
  422. <!-- Scripts -->
  423. <script src="https://jscdn.limour.top/npm/nprogress@0.2.0/nprogress.min.js" ></script>
  424. <link rel="stylesheet" href="https://jscdn.limour.top/npm/nprogress@0.2.0/nprogress.min.css" />
  425. <script>
  426. NProgress.configure({"showSpinner":false,"trickleSpeed":100})
  427. NProgress.start()
  428. window.addEventListener('load', function() {
  429. NProgress.done();
  430. })
  431. </script>
  432. <script src="https://jscdn.limour.top/npm/jquery@3.6.4/dist/jquery.min.js" ></script>
  433. <script src="https://jscdn.limour.top/npm/bootstrap@4.6.1/dist/js/bootstrap.min.js" ></script>
  434. <script src="/js/events.js" ></script>
  435. <script src="/js/plugins.js" ></script>
  436. <script src="/js/img-lazyload.js" ></script>
  437. <script>
  438. Fluid.utils.createScript('https://jscdn.limour.top/npm/tocbot@4.20.1/dist/tocbot.min.js', function() {
  439. var toc = jQuery('#toc');
  440. if (toc.length === 0 || !window.tocbot) { return; }
  441. var boardCtn = jQuery('#board-ctn');
  442. var boardTop = boardCtn.offset().top;
  443. window.tocbot.init(Object.assign({
  444. tocSelector : '#toc-body',
  445. contentSelector : '.markdown-body',
  446. linkClass : 'tocbot-link',
  447. activeLinkClass : 'tocbot-active-link',
  448. listClass : 'tocbot-list',
  449. isCollapsedClass: 'tocbot-is-collapsed',
  450. collapsibleClass: 'tocbot-is-collapsible',
  451. scrollSmooth : true,
  452. includeTitleTags: true,
  453. headingsOffset : -boardTop,
  454. }, CONFIG.toc));
  455. if (toc.find('.toc-list-item').length > 0) {
  456. toc.css('visibility', 'visible');
  457. }
  458. Fluid.events.registerRefreshCallback(function() {
  459. if ('tocbot' in window) {
  460. tocbot.refresh();
  461. var toc = jQuery('#toc');
  462. if (toc.length === 0 || !tocbot) {
  463. return;
  464. }
  465. if (toc.find('.toc-list-item').length > 0) {
  466. toc.css('visibility', 'visible');
  467. }
  468. }
  469. });
  470. });
  471. </script>
  472. <script src=https://lib.baomitu.com/clipboard.js/2.0.11/clipboard.min.js></script>
  473. <script>Fluid.plugins.codeWidget();</script>
  474. <script>
  475. Fluid.utils.createScript('https://jscdn.limour.top/npm/anchor-js@4.3.1/anchor.min.js', function() {
  476. window.anchors.options = {
  477. placement: CONFIG.anchorjs.placement,
  478. visible : CONFIG.anchorjs.visible
  479. };
  480. if (CONFIG.anchorjs.icon) {
  481. window.anchors.options.icon = CONFIG.anchorjs.icon;
  482. }
  483. var el = (CONFIG.anchorjs.element || 'h1,h2,h3,h4,h5,h6').split(',');
  484. var res = [];
  485. for (var item of el) {
  486. res.push('.markdown-body > ' + item.trim());
  487. }
  488. if (CONFIG.anchorjs.placement === 'left') {
  489. window.anchors.options.class = 'anchorjs-link-left';
  490. }
  491. window.anchors.add(res.join(', '));
  492. Fluid.events.registerRefreshCallback(function() {
  493. if ('anchors' in window) {
  494. anchors.removeAll();
  495. var el = (CONFIG.anchorjs.element || 'h1,h2,h3,h4,h5,h6').split(',');
  496. var res = [];
  497. for (var item of el) {
  498. res.push('.markdown-body > ' + item.trim());
  499. }
  500. if (CONFIG.anchorjs.placement === 'left') {
  501. anchors.options.class = 'anchorjs-link-left';
  502. }
  503. anchors.add(res.join(', '));
  504. }
  505. });
  506. });
  507. </script>
  508. <script>Fluid.plugins.imageCaption();</script>
  509. <script src="/js/local-search.js" ></script>
  510. <!-- 主题的启动项,将它保持在最底部 -->
  511. <!-- the boot of the theme, keep it at the bottom -->
  512. <script src="/js/boot.js" ></script>
  513. <noscript>
  514. <div class="noscript-warning">Blog works best with JavaScript enabled</div>
  515. </noscript>
  516. <!-- hexo injector body_end start -->
  517. <script defer src="/theme-inject/timeliness.js"></script>
  518. <!-- hexo injector body_end end --></body>
  519. </html>