rss2.xml 403 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. <?xml version="1.0" encoding="utf-8"?>
  2. <rss version="2.0"
  3. xmlns:atom="http://www.w3.org/2005/Atom"
  4. xmlns:content="http://purl.org/rss/1.0/modules/content/">
  5. <channel>
  6. <title>Limour&#39;s Blog</title>
  7. <link>https://hexo.limour.top/</link>
  8. <atom:link href="https://hexo.limour.top/rss2.xml" rel="self" type="application/rss+xml"/>
  9. <description></description>
  10. <pubDate>Mon, 25 Mar 2024 13:10:47 GMT</pubDate>
  11. <generator>http://hexo.io/</generator>
  12. <item>
  13. <title>【记录】搭建流量统计工具 Shynet</title>
  14. <link>https://hexo.limour.top/Building-a-traffic-statistics-tool-Shynet</link>
  15. <guid>https://hexo.limour.top/Building-a-traffic-statistics-tool-Shynet</guid>
  16. <pubDate>Mon, 25 Mar 2024 12:52:28 GMT</pubDate>
  17. <description>&lt;p&gt;&lt;a href=&quot;https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL21pbGVzbWNjL3NoeW5ldA==&quot; rel=&quot;noopener external nofollow noreferrer&quot;&gt;Shynet</description>
  18. <content:encoded><![CDATA[<p><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL21pbGVzbWNjL3NoeW5ldA==" rel="noopener external nofollow noreferrer">Shynet</a> 是一款用 python 编写的现代、隐私友好、无需Cookie或JS即可工作的网络流量统计工具。</p><p>相比 <a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL3VtYW1pLXNvZnR3YXJlL3VtYW1p" rel="noopener external nofollow noreferrer">Umami</a>, Shynet 支持通过 1 pixel 的图像进行统计,而不依赖 JS, 并且 Shynet 统计的信息更加详细。</p><p><img src="https://img.limour.top/2024/03/25/660177c20629f.webp" alt="最终效果"></p><h2 id="搭建-Shynet">搭建 Shynet</h2><ul><li><a href="/Docker-bu-shu-Nginx-Proxy-Manager">反向代理服务</a></li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-built_in">mkdir</span> -p ~/app/shynet &amp;&amp; <span class="hljs-built_in">cd</span> ~/app/shynet &amp;&amp; nano docker-compose.yml<br></code></pre></td></tr></table></figure><figure class="highlight yml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br></pre></td><td class="code"><pre><code class="hljs yml"><span class="hljs-attr">version:</span> <span class="hljs-string">&#x27;3.6&#x27;</span><br> <br><span class="hljs-attr">services:</span><br> <span class="hljs-attr">shynet:</span><br> <span class="hljs-attr">image:</span> <span class="hljs-string">milesmcc/shynet:latest</span><br> <span class="hljs-attr">restart:</span> <span class="hljs-string">always</span><br> <span class="hljs-attr">env_file:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">.env</span><br> <span class="hljs-attr">volumes:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">./db:/var/local/shynet/db/</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">/etc/localtime:/etc/localtime:ro</span><br> <br><span class="hljs-attr">networks:</span><br> <span class="hljs-attr">default:</span><br> <span class="hljs-attr">external:</span> <span class="hljs-literal">true</span><br> <span class="hljs-attr">name:</span> <span class="hljs-string">ngpm</span><br></code></pre></td></tr></table></figure><ul><li>配置环境变量</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><code class="hljs bash">wget -O .<span class="hljs-built_in">env</span> https://github.com/milesmcc/shynet/raw/master/TEMPLATE.<span class="hljs-built_in">env</span><br><span class="hljs-comment"># 注释掉 .env 中 PostgreSQL 相关的部分,启用 SQLITE 相关的部分</span><br><span class="hljs-comment"># 注释掉 .env 中 Email 相关的部分</span><br><span class="hljs-comment"># 按说明生成 DJANGO_SECRET_KEY</span><br><span class="hljs-comment"># 修改 ALLOWED_HOSTS 和 CSRF_TRUSTED_ORIGINS</span><br><span class="hljs-comment"># 语言换成中文 LANGUAGE_CODE=zh-cn</span><br><span class="hljs-comment"># 时区换成上海 TIME_ZONE=Asia/Shanghai</span><br><span class="hljs-built_in">mkdir</span> -p db &amp;&amp; <span class="hljs-built_in">chmod</span> 777 db<br>sudo docker-compose up -d<br><span class="hljs-comment"># 反代 shynet:8080</span><br></code></pre></td></tr></table></figure><ul><li>配置管理账号</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><code class="hljs bash">sudo docker-compose <span class="hljs-built_in">exec</span> -it shynet ./manage.py registeradmin &lt;your email&gt;<br><span class="hljs-comment"># 控制台输出如下信息</span><br><span class="hljs-comment"># Email address: &lt;your email&gt;</span><br><span class="hljs-comment"># Password: &lt;Password&gt;</span><br></code></pre></td></tr></table></figure><h2 id="配置混淆">配置混淆</h2><figure class="highlight nginx"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><code class="hljs nginx"><span class="hljs-attribute">sub_filter</span> <span class="hljs-string">&#x27;https://xxx/ingress/&#x27;</span> <span class="hljs-string">&#x27;https://xxx/vue/&#x27;</span>;<br><span class="hljs-attribute">sub_filter_once</span> <span class="hljs-literal">off</span>;<br><span class="hljs-attribute">sub_filter_types</span> application/javascript;<br></code></pre></td></tr></table></figure><p><img src="https://img.limour.top/2024/03/25/6601762cad36c.webp" alt=""></p><h2 id="配置-Hexo">配置 Hexo</h2><ul><li><a href="/-ji-lu--zai-GitHub-shang-da-jian-Hexo">搭建 Hexo</a></li><li>编辑 <code>scripts/custom.js</code>, 内容如下</li></ul><figure class="highlight js"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><code class="hljs js"><span class="hljs-comment">// shynet 统计</span><br>hexo.<span class="hljs-property">extend</span>.<span class="hljs-property">injector</span>.<span class="hljs-title function_">register</span>(<span class="hljs-string">&#x27;head_begin&#x27;</span>, <span class="hljs-string">`</span><br><span class="hljs-string">&lt;script defer src=&quot;https://xxxx/vue/xxxx/script.js&quot;&gt;&lt;/script&gt;</span><br><span class="hljs-string">`</span>);<br></code></pre></td></tr></table></figure>]]></content:encoded>
  19. <category domain="https://hexo.limour.top/tags/hexo/">hexo</category>
  20. <comments>https://hexo.limour.top/Building-a-traffic-statistics-tool-Shynet#disqus_thread</comments>
  21. </item>
  22. <item>
  23. <title>【记录】Linux 设置个人热点</title>
  24. <link>https://hexo.limour.top/Linux-Setting-AP</link>
  25. <guid>https://hexo.limour.top/Linux-Setting-AP</guid>
  26. <pubDate>Wed, 20 Mar 2024 11:52:10 GMT</pubDate>
  27. <description>&lt;p&gt;实在受不了虚拟机的性能损失了,再加上 Win11 上跑虚拟机对 SSD 的损耗过大,因此还是将系统换成了 ubuntu,只要注意选无网络安装,不要去更新,基本还是很好换系统的。另外清华源不错!&lt;/p&gt;
  28. &lt;p&gt;换系统后,需要&lt;a href=&quot;/Win11-she-zhi-ka</description>
  29. <content:encoded><![CDATA[<p>实在受不了虚拟机的性能损失了,再加上 Win11 上跑虚拟机对 SSD 的损耗过大,因此还是将系统换成了 ubuntu,只要注意选无网络安装,不要去更新,基本还是很好换系统的。另外清华源不错!</p><p>换系统后,需要<a href="/Win11-she-zhi-kai-ji-qi-dong-yi-dong-re-dian">重新折腾一下 AP 设置</a>,因此记录一下折腾过程。</p><p>无线网卡是垃圾的 <code>mediatek mt7921e</code></p><h2 id="更新内核">更新内核</h2><p>因为网卡垃圾,不得不更新到最新的内核才支持 AP 设置</p><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><code class="hljs bash">proxychains wget https://raw.githubusercontent.com/pimlie/ubuntu-mainline-kernel.sh/master/ubuntu-mainline-kernel.sh<br><span class="hljs-built_in">chmod</span> +x ubuntu-mainline-kernel.sh<br>sudo gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv 17C622B0 <span class="hljs-comment"># 网络错误,需要绕过某个东西</span><br>sudo proxychains ./ubuntu-mainline-kernel.sh -i<br>sudo reboot<br><span class="hljs-built_in">uname</span> -r<br>sudo apt --fix-broken install<br></code></pre></td></tr></table></figure><h2 id="解决-53-端口占用">解决 53 端口占用</h2><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs bash">sudo systemctl stop systemd-resolved<br>sudo nano /etc/systemd/resolved.conf<br></code></pre></td></tr></table></figure><figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><code class="hljs conf">[Resolve]<br>DNS=8.8.8.8 #取消注释,增加dns<br>DNSStubListener=no #取消注释,把yes改为no<br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><code class="hljs bash">sudo <span class="hljs-built_in">ln</span> -sf /run/systemd/resolve/resolv.conf /etc/resolv.conf<br></code></pre></td></tr></table></figure><h2 id="安装-create-ap">安装 create_ap</h2><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-built_in">cd</span> /dev/shm/<br>proxychains git <span class="hljs-built_in">clone</span> https://github.com/oblique/create_ap<br><span class="hljs-built_in">cd</span> create_ap<br>sudo make install<br>sudo apt-get install util-linux procps hostapd iproute2 iw haveged dnsmasq<br></code></pre></td></tr></table></figure><h2 id="测试-create-ap">测试 create_ap</h2><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><code class="hljs bash">sudo create_ap wlp2s0 enp1s0 ser5 &lt;密码&gt; --country CN -c 157 --freq-band 5 --no-virt<br></code></pre></td></tr></table></figure><h2 id="启用-create-ap">启用 create_ap</h2><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><code class="hljs bash">nano create_ap.service<br>sudo <span class="hljs-built_in">mv</span> create_ap.service /etc/systemd/system/create_ap.service<br>sudo systemctl <span class="hljs-built_in">enable</span> create_ap<br>sudo systemctl start create_ap<br></code></pre></td></tr></table></figure><figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><code class="hljs conf">[Unit]<br>Description=create_ap<br>After=network.target docker.service<br>[Service]<br>ExecStart=/usr/bin/create_ap wlp2s0 enp1s0 ser5 &lt;密码&gt; --country CN -c 157 --freq-band 5 --no-virt<br>ExecReload=/bin/kill -HUP $MAINPID<br>Restart=on-failure<br>[Install]<br>WantedBy=multi-user.target<br></code></pre></td></tr></table></figure><h2 id="增加稳定性">增加稳定性</h2><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs bash">sudo crontab -e<br><span class="hljs-comment"># 5 4 * * * /usr/bin/systemctl restart create_ap</span><br></code></pre></td></tr></table></figure><h2 id="踩坑花絮">踩坑花絮</h2><ul><li><code>lnxrouter</code> 虽然在 <code>create_ap</code> 上进行了更新,但是实际体验在所有信道上都报错,折腾了半天,放弃</li><li>搜到一些老旧的教程,自己去折腾 <code>hostapd</code>,然后自己去配置网桥的时候把服务器弄断网好几次,不得不到处找显示器和键盘</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br></pre></td><td class="code"><pre><code class="hljs bash">sudo su<br><span class="hljs-built_in">cat</span> &lt;&lt; <span class="hljs-string">EOF &gt; /etc/hostapd/hostapd.conf</span><br><span class="hljs-string">interface=wlp2s0</span><br><span class="hljs-string">bridge=br-ap</span><br><span class="hljs-string">driver=nl80211</span><br><span class="hljs-string">ssid=ser5</span><br><span class="hljs-string">hw_mode=a</span><br><span class="hljs-string">channel=165</span><br><span class="hljs-string">country_code=CN</span><br><span class="hljs-string">macaddr_acl=0</span><br><span class="hljs-string">auth_algs=3</span><br><span class="hljs-string">wpa=2</span><br><span class="hljs-string">wpa_passphrase=&lt;密码&gt;</span><br><span class="hljs-string">wpa_key_mgmt=WPA-PSK</span><br><span class="hljs-string">wpa_pairwise=TKIP CCMP</span><br><span class="hljs-string">rsn_pairwise=TKIP CCMP</span><br><span class="hljs-string">EOF</span><br></code></pre></td></tr></table></figure><ul><li>收获教训:没事别碰 <code>/etc/netplan/00-installer-config.yaml</code>,特别是没显示器和键盘的时候</li><li>获取网卡型号和驱动型号,查看支持的信道</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><code class="hljs bash">sudo ethtool -i wlp2s0<br>sudo lspci -nn | grep <span class="hljs-string">&quot;Network&quot;</span><br>iwlist wlp2s0 channel<br></code></pre></td></tr></table></figure><ul><li>另外新内核似乎不需要 <code>haveged</code> 来增加熵了</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-built_in">cat</span> /proc/sys/kernel/random/entropy_avail<br>systemctl status haveged <br>apt install haveged<br>systemctl <span class="hljs-built_in">enable</span> haveged<br>systemctl start haveged<br></code></pre></td></tr></table></figure>]]></content:encoded>
  30. <category domain="https://hexo.limour.top/tags/ubuntu/">ubuntu</category>
  31. <comments>https://hexo.limour.top/Linux-Setting-AP#disqus_thread</comments>
  32. </item>
  33. <item>
  34. <title>【探索】暴力计算临床研究的样本量</title>
  35. <link>https://hexo.limour.top/Sample-size-calculation-for-survival-analysis-in-clinical-research</link>
  36. <guid>https://hexo.limour.top/Sample-size-calculation-for-survival-analysis-in-clinical-research</guid>
  37. <pubDate>Tue, 12 Mar 2024 16:46:35 GMT</pubDate>
  38. <description>这篇博客介绍了如何计算临床研究中两组生存分析的样本量。首先,作者提供了R代码,包括Logrank对数秩检验的函数以及模拟计算样本量的函数。其次,作者详细解释了模拟计算的步骤,包括生成生存时间数据、招募时间、失访时间等,并通过模拟来估计研究的功效。最后,作者展示了如何使用模拟计算函数来确定样本量,以达到预先设定的功效水平。通过模拟检验,作者展示了样本量计算的有效性,并给出了两个示例,以验证样本量计算的准确性。</description>
  39. <content:encoded><![CDATA[<p>和《<a href="/shi-yong-Bootstrap-fa-ji-suan-zi-ju-zhi-xin-qu-jian">使用Bootstrap法计算自举置信区间</a>》的想法差不多,通过暴力枚举来计算临床研究的样本量,以两组生存分析为例。</p><h2 id="Logrank对数秩检验">Logrank对数秩检验</h2><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br></pre></td><td class="code"><pre><code class="hljs R">require<span class="hljs-punctuation">(</span>survival<span class="hljs-punctuation">)</span><br>f_surv_logrank <span class="hljs-operator">=</span> <span class="hljs-keyword">function</span><span class="hljs-punctuation">(</span>df<span class="hljs-punctuation">)</span><span class="hljs-punctuation">&#123;</span><br> <span class="hljs-comment"># df 包含 group time status 三列</span><br> <span class="hljs-comment"># group 类型为 factor</span><br> <span class="hljs-comment"># status 0 表示未发生结局事件 1 表示发生结局事件</span><br> surv_obj <span class="hljs-operator">=</span> with<span class="hljs-punctuation">(</span>survival<span class="hljs-operator">::</span>Surv<span class="hljs-punctuation">(</span>time <span class="hljs-operator">=</span> time<span class="hljs-punctuation">,</span> event <span class="hljs-operator">=</span> status<span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span> data <span class="hljs-operator">=</span> df<span class="hljs-punctuation">)</span><br> surv_fit <span class="hljs-operator">=</span> survival<span class="hljs-operator">::</span>survfit<span class="hljs-punctuation">(</span>surv_obj <span class="hljs-operator">~</span> group<span class="hljs-punctuation">,</span> data <span class="hljs-operator">=</span> df<span class="hljs-punctuation">)</span><br> surv_diff <span class="hljs-operator">=</span> survival<span class="hljs-operator">::</span>survdiff<span class="hljs-punctuation">(</span>surv_obj <span class="hljs-operator">~</span> group<span class="hljs-punctuation">,</span> data <span class="hljs-operator">=</span> df<span class="hljs-punctuation">)</span><br> res <span class="hljs-operator">=</span> <span class="hljs-built_in">list</span><span class="hljs-punctuation">(</span>pv <span class="hljs-operator">=</span> <span class="hljs-number">1</span> <span class="hljs-operator">-</span> stats<span class="hljs-operator">::</span>pchisq<span class="hljs-punctuation">(</span>surv_diff<span class="hljs-operator">$</span>chisq<span class="hljs-punctuation">,</span> <span class="hljs-built_in">length</span><span class="hljs-punctuation">(</span>surv_diff<span class="hljs-operator">$</span>n<span class="hljs-punctuation">)</span> <span class="hljs-operator">-</span> <span class="hljs-number">1</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span> <span class="hljs-comment"># p值</span><br> surv_fit <span class="hljs-operator">=</span> surv_fit<span class="hljs-punctuation">,</span> <span class="hljs-comment"># 绘图用</span><br> surv_obj <span class="hljs-operator">=</span> surv_obj<span class="hljs-punctuation">)</span> <span class="hljs-comment"># 为了兼容惰性求值</span><br> <span class="hljs-built_in">return</span><span class="hljs-punctuation">(</span>res<span class="hljs-punctuation">)</span><br><span class="hljs-punctuation">&#125;</span><br>f_surv_logrank_plot <span class="hljs-operator">=</span> <span class="hljs-keyword">function</span><span class="hljs-punctuation">(</span>res<span class="hljs-punctuation">)</span><span class="hljs-punctuation">&#123;</span><br> require<span class="hljs-punctuation">(</span>survminer<span class="hljs-punctuation">)</span><br> surv_obj <span class="hljs-operator">&lt;&lt;-</span> res<span class="hljs-operator">$</span>surv_obj <span class="hljs-comment"># 为了兼容惰性求值</span><br> survminer<span class="hljs-operator">::</span>ggsurvplot<span class="hljs-punctuation">(</span>res<span class="hljs-operator">$</span>surv_fit<span class="hljs-punctuation">,</span> conf.int <span class="hljs-operator">=</span> <span class="hljs-built_in">F</span><span class="hljs-punctuation">,</span> pval <span class="hljs-operator">=</span> <span class="hljs-built_in">T</span><span class="hljs-punctuation">,</span> risk.table <span class="hljs-operator">=</span> <span class="hljs-built_in">T</span><span class="hljs-punctuation">,</span> ncensor.plot <span class="hljs-operator">=</span> <span class="hljs-literal">TRUE</span><span class="hljs-punctuation">)</span><br><span class="hljs-punctuation">&#125;</span><br></code></pre></td></tr></table></figure><h2 id="模拟计算">模拟计算</h2><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br></pre></td><td class="code"><pre><code class="hljs R">f_surv_logrank_simulation_Group <span class="hljs-operator">=</span> <span class="hljs-keyword">function</span><span class="hljs-punctuation">(</span>N<span class="hljs-punctuation">,</span> Median_Survival_Time<span class="hljs-punctuation">,</span> Lost<span class="hljs-punctuation">,</span> Duration_Accrual_Time<span class="hljs-punctuation">,</span> Duration_Total_Time<span class="hljs-punctuation">)</span><span class="hljs-punctuation">&#123;</span><br> time <span class="hljs-operator">=</span> stats<span class="hljs-operator">::</span>rexp<span class="hljs-punctuation">(</span>N<span class="hljs-punctuation">,</span> rate <span class="hljs-operator">=</span> <span class="hljs-built_in">log</span><span class="hljs-punctuation">(</span><span class="hljs-number">2</span><span class="hljs-punctuation">)</span> <span class="hljs-operator">/</span> Median_Survival_Time<span class="hljs-punctuation">)</span> <span class="hljs-comment"># 生存时间服从指数分布</span><br> status <span class="hljs-operator">=</span> <span class="hljs-built_in">rep</span><span class="hljs-punctuation">(</span><span class="hljs-number">1</span><span class="hljs-punctuation">,</span>N<span class="hljs-punctuation">)</span> <span class="hljs-comment"># 到生存时间发生结局事件</span><br> <span class="hljs-comment"># print(median((survfit(Surv(time, status) ~ 1))))</span><br> EnrollT <span class="hljs-operator">=</span> stats<span class="hljs-operator">::</span>runif<span class="hljs-punctuation">(</span>N<span class="hljs-punctuation">,</span> <span class="hljs-built_in">min</span> <span class="hljs-operator">=</span> <span class="hljs-number">0</span><span class="hljs-punctuation">,</span> <span class="hljs-built_in">max</span> <span class="hljs-operator">=</span> Duration_Accrual_Time<span class="hljs-punctuation">)</span> <span class="hljs-comment"># 招募时间服从均匀分布</span><br> calender_time <span class="hljs-operator">=</span> time <span class="hljs-operator">+</span> EnrollT <span class="hljs-comment"># 发生结局的日期</span><br> idx <span class="hljs-operator">=</span> calender_time <span class="hljs-operator">&gt;</span> Duration_Total_Time <span class="hljs-comment"># 研究终止时未发生结局事件</span><br> status<span class="hljs-punctuation">[</span>idx<span class="hljs-punctuation">]</span> <span class="hljs-operator">=</span> <span class="hljs-number">0</span><br> time<span class="hljs-punctuation">[</span>idx<span class="hljs-punctuation">]</span> <span class="hljs-operator">=</span> Duration_Total_Time <span class="hljs-operator">-</span> EnrollT<span class="hljs-punctuation">[</span>idx<span class="hljs-punctuation">]</span> <span class="hljs-comment"># 实际参与试验的时间</span><br> <span class="hljs-comment"># print(median((survfit(Surv(time, status) ~ 1)))) # 如果 Accrual_Time + Median_Survival &lt; Total_Time,结果不变</span><br> loss <span class="hljs-operator">=</span> stats<span class="hljs-operator">::</span>rexp<span class="hljs-punctuation">(</span>N<span class="hljs-punctuation">,</span> rate <span class="hljs-operator">=</span> Lost<span class="hljs-punctuation">)</span> <span class="hljs-comment"># 失访时间服从指数分布</span><br> idx <span class="hljs-operator">=</span> loss <span class="hljs-operator">&lt;</span> time <span class="hljs-comment"># 失访的人</span><br> status<span class="hljs-punctuation">[</span>idx<span class="hljs-punctuation">]</span> <span class="hljs-operator">=</span> <span class="hljs-number">0</span><br> time<span class="hljs-punctuation">[</span>idx<span class="hljs-punctuation">]</span> <span class="hljs-operator">=</span> loss<span class="hljs-punctuation">[</span>idx<span class="hljs-punctuation">]</span><br> <span class="hljs-comment"># print(median((survfit(Surv(time, status) ~ 1)))) # 结果改变</span><br> <span class="hljs-built_in">return</span><span class="hljs-punctuation">(</span><span class="hljs-built_in">list</span><span class="hljs-punctuation">(</span>time <span class="hljs-operator">=</span> time<span class="hljs-punctuation">,</span> status <span class="hljs-operator">=</span> status<span class="hljs-punctuation">)</span><span class="hljs-punctuation">)</span><br><span class="hljs-punctuation">&#125;</span><br>f_surv_logrank_simulation_Power <span class="hljs-operator">=</span> <span class="hljs-keyword">function</span><span class="hljs-punctuation">(</span>n_C<span class="hljs-punctuation">,</span> Median_Survival_Time_C<span class="hljs-punctuation">,</span> Lost_C<span class="hljs-punctuation">,</span> <br> n_T<span class="hljs-punctuation">,</span> Median_Survival_Time_T<span class="hljs-punctuation">,</span> Lost_T<span class="hljs-punctuation">,</span> <br> Duration_Accrual_Time<span class="hljs-punctuation">,</span> Duration_Total_Time<span class="hljs-punctuation">,</span> Simulation_Cycle<span class="hljs-punctuation">,</span> Alpha<span class="hljs-punctuation">)</span><span class="hljs-punctuation">&#123;</span><br> df <span class="hljs-operator">=</span> data.frame<span class="hljs-punctuation">(</span>group <span class="hljs-operator">=</span> factor<span class="hljs-punctuation">(</span><span class="hljs-built_in">c</span><span class="hljs-punctuation">(</span><span class="hljs-built_in">rep</span><span class="hljs-punctuation">(</span><span class="hljs-string">&#x27;Control&#x27;</span><span class="hljs-punctuation">,</span>n_C<span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span> <span class="hljs-built_in">rep</span><span class="hljs-punctuation">(</span><span class="hljs-string">&#x27;Treatment&#x27;</span><span class="hljs-punctuation">,</span>n_T<span class="hljs-punctuation">)</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span> <br> time <span class="hljs-operator">=</span> <span class="hljs-built_in">rep</span><span class="hljs-punctuation">(</span><span class="hljs-number">0</span><span class="hljs-punctuation">,</span>n_C<span class="hljs-operator">+</span>n_T<span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span> <br> status <span class="hljs-operator">=</span> <span class="hljs-built_in">rep</span><span class="hljs-punctuation">(</span><span class="hljs-number">0</span><span class="hljs-punctuation">,</span>n_C<span class="hljs-operator">+</span>n_T<span class="hljs-punctuation">)</span><span class="hljs-punctuation">)</span><br> <span class="hljs-built_in">sum</span> <span class="hljs-operator">=</span> <span class="hljs-number">0</span><br> <span class="hljs-keyword">for</span> <span class="hljs-punctuation">(</span>i <span class="hljs-keyword">in</span> <span class="hljs-number">1</span><span class="hljs-operator">:</span>Simulation_Cycle<span class="hljs-punctuation">)</span> <span class="hljs-punctuation">&#123;</span><br> C <span class="hljs-operator">=</span> f_surv_logrank_simulation_Group<span class="hljs-punctuation">(</span>n_C<span class="hljs-punctuation">,</span> Median_Survival_Time_C<span class="hljs-punctuation">,</span> Lost_C<span class="hljs-punctuation">,</span> Duration_Accrual_Time<span class="hljs-punctuation">,</span> Duration_Total_Time<span class="hljs-punctuation">)</span><br> <span class="hljs-built_in">T</span> <span class="hljs-operator">=</span> f_surv_logrank_simulation_Group<span class="hljs-punctuation">(</span>n_T<span class="hljs-punctuation">,</span> Median_Survival_Time_T<span class="hljs-punctuation">,</span> Lost_T<span class="hljs-punctuation">,</span> Duration_Accrual_Time<span class="hljs-punctuation">,</span> Duration_Total_Time<span class="hljs-punctuation">)</span><br> df<span class="hljs-operator">$</span>time <span class="hljs-operator">=</span> <span class="hljs-built_in">c</span><span class="hljs-punctuation">(</span>C<span class="hljs-operator">$</span>time<span class="hljs-punctuation">,</span> <span class="hljs-built_in">T</span><span class="hljs-operator">$</span>time<span class="hljs-punctuation">)</span><br> df<span class="hljs-operator">$</span>status <span class="hljs-operator">=</span> <span class="hljs-built_in">c</span><span class="hljs-punctuation">(</span>C<span class="hljs-operator">$</span>status<span class="hljs-punctuation">,</span> <span class="hljs-built_in">T</span><span class="hljs-operator">$</span>status<span class="hljs-punctuation">)</span><br> <span class="hljs-keyword">if</span><span class="hljs-punctuation">(</span>f_surv_logrank<span class="hljs-punctuation">(</span>df<span class="hljs-punctuation">)</span><span class="hljs-operator">$</span>pv <span class="hljs-operator">&lt;</span> Alpha<span class="hljs-punctuation">)</span><span class="hljs-punctuation">&#123;</span><br> <span class="hljs-built_in">sum</span> <span class="hljs-operator">=</span> <span class="hljs-built_in">sum</span> <span class="hljs-operator">+</span> <span class="hljs-number">1</span><br> <span class="hljs-punctuation">&#125;</span><br> <span class="hljs-punctuation">&#125;</span><br> <span class="hljs-built_in">return</span><span class="hljs-punctuation">(</span><span class="hljs-built_in">sum</span><span class="hljs-operator">/</span>Simulation_Cycle<span class="hljs-punctuation">)</span><br><span class="hljs-punctuation">&#125;</span><br>f_surv_logrank_simulation_Sample_Size <span class="hljs-operator">=</span> <span class="hljs-keyword">function</span><span class="hljs-punctuation">(</span>n_C_min<span class="hljs-punctuation">,</span> n_C_max<span class="hljs-punctuation">,</span> Median_Survival_Time_C<span class="hljs-punctuation">,</span> Lost_C<span class="hljs-punctuation">,</span> <br> TvsC<span class="hljs-punctuation">,</span> Median_Survival_Time_T<span class="hljs-punctuation">,</span> Lost_T<span class="hljs-punctuation">,</span> <br> Duration_Accrual_Time<span class="hljs-punctuation">,</span> Duration_Total_Time<span class="hljs-punctuation">,</span><br> Simulation_Cycle<span class="hljs-punctuation">,</span> Alpha<span class="hljs-punctuation">,</span> Power<span class="hljs-punctuation">,</span> err<span class="hljs-operator">=</span><span class="hljs-number">0.01</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">&#123;</span><br> mid <span class="hljs-operator">=</span> <span class="hljs-built_in">floor</span><span class="hljs-punctuation">(</span><span class="hljs-punctuation">(</span>n_C_min <span class="hljs-operator">+</span> n_C_max<span class="hljs-punctuation">)</span> <span class="hljs-operator">/</span> <span class="hljs-number">2</span><span class="hljs-punctuation">)</span> <span class="hljs-comment"># 以防没有进入循环</span><br> <span class="hljs-keyword">while</span> <span class="hljs-punctuation">(</span>n_C_min <span class="hljs-operator">&lt;</span> n_C_max<span class="hljs-punctuation">)</span> <span class="hljs-punctuation">&#123;</span><br> mid <span class="hljs-operator">=</span> <span class="hljs-built_in">floor</span><span class="hljs-punctuation">(</span><span class="hljs-punctuation">(</span>n_C_min <span class="hljs-operator">+</span> n_C_max<span class="hljs-punctuation">)</span> <span class="hljs-operator">/</span> <span class="hljs-number">2</span><span class="hljs-punctuation">)</span><br> simulation_Power <span class="hljs-operator">=</span> f_surv_logrank_simulation_Power<span class="hljs-punctuation">(</span>mid<span class="hljs-punctuation">,</span> Median_Survival_Time_C<span class="hljs-punctuation">,</span> Lost_C<span class="hljs-punctuation">,</span> <br> <span class="hljs-built_in">as.integer</span><span class="hljs-punctuation">(</span>mid <span class="hljs-operator">*</span> TvsC<span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span> Median_Survival_Time_T<span class="hljs-punctuation">,</span> Lost_T<span class="hljs-punctuation">,</span> <br> Duration_Accrual_Time<span class="hljs-punctuation">,</span> Duration_Total_Time<span class="hljs-punctuation">,</span> Simulation_Cycle<span class="hljs-punctuation">,</span> Alpha<span class="hljs-punctuation">)</span><br> print<span class="hljs-punctuation">(</span>paste<span class="hljs-punctuation">(</span><span class="hljs-string">&quot;mid:&quot;</span><span class="hljs-punctuation">,</span> mid<span class="hljs-punctuation">,</span> <span class="hljs-string">&quot;simulation_Power:&quot;</span><span class="hljs-punctuation">,</span> simulation_Power<span class="hljs-punctuation">)</span><span class="hljs-punctuation">)</span><br> <span class="hljs-keyword">if</span> <span class="hljs-punctuation">(</span><span class="hljs-built_in">abs</span><span class="hljs-punctuation">(</span>simulation_Power <span class="hljs-operator">-</span> Power<span class="hljs-punctuation">)</span> <span class="hljs-operator">&lt;</span> err<span class="hljs-punctuation">)</span> <span class="hljs-punctuation">&#123;</span><br> <span class="hljs-built_in">return</span><span class="hljs-punctuation">(</span>mid<span class="hljs-punctuation">)</span><br> <span class="hljs-punctuation">&#125;</span><span class="hljs-keyword">else</span> <span class="hljs-keyword">if</span><span class="hljs-punctuation">(</span>simulation_Power <span class="hljs-operator">&lt;</span> Power<span class="hljs-punctuation">)</span> <span class="hljs-punctuation">&#123;</span><br> n_C_min <span class="hljs-operator">=</span> mid <span class="hljs-operator">+</span> <span class="hljs-number">1</span><br> <span class="hljs-punctuation">&#125;</span><span class="hljs-keyword">else</span> <span class="hljs-punctuation">&#123;</span><br> n_C_max <span class="hljs-operator">=</span> mid <span class="hljs-operator">-</span> <span class="hljs-number">1</span><br> <span class="hljs-punctuation">&#125;</span><br> <span class="hljs-punctuation">&#125;</span><br> <span class="hljs-built_in">return</span><span class="hljs-punctuation">(</span>mid<span class="hljs-punctuation">)</span><br><span class="hljs-punctuation">&#125;</span><br></code></pre></td></tr></table></figure><h2 id="参数说明">参数说明</h2><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><code class="hljs R">Power <span class="hljs-operator">=</span> <span class="hljs-number">0.9</span> <span class="hljs-comment"># 检验效能 = 1 - 第二类错误的概率</span><br>Alpha <span class="hljs-operator">=</span> <span class="hljs-number">0.05</span> <span class="hljs-comment"># 第一类错误的概率</span><br>Median_Survival_Time_C <span class="hljs-operator">=</span> <span class="hljs-number">6</span> <span class="hljs-comment"># 对照组的中位生存时间</span><br>Median_Survival_Time_T <span class="hljs-operator">=</span> <span class="hljs-number">8</span> <span class="hljs-comment"># 试验组的中位生存时间</span><br>Duration_Accrual_Time <span class="hljs-operator">=</span> <span class="hljs-number">8</span> <span class="hljs-comment"># 入组完成用时</span><br>Duration_Total_Time <span class="hljs-operator">=</span> <span class="hljs-number">18</span> <span class="hljs-comment"># 总试验用时</span><br>Lost_C <span class="hljs-operator">=</span> <span class="hljs-number">0.05</span> <span class="hljs-comment"># 对照组随访单位时间后发生失访的概率</span><br>Lost_T <span class="hljs-operator">=</span> <span class="hljs-number">0.05</span> <span class="hljs-comment"># 试验组随访单位时间后发生失访的概率</span><br>TvsC <span class="hljs-operator">=</span> <span class="hljs-number">1</span> <span class="hljs-comment"># 试验组的样本量:对照组的样本量 1:1 = 1</span><br>Simulation_Cycle <span class="hljs-operator">=</span> <span class="hljs-number">100</span> <span class="hljs-comment"># 模拟的循环次数,越大越准确</span><br></code></pre></td></tr></table></figure><h2 id="检查效果">检查效果</h2><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><code class="hljs R">f_surv_logrank_simulation_Power<span class="hljs-punctuation">(</span><span class="hljs-number">441</span><span class="hljs-punctuation">,</span> <span class="hljs-number">6</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0.05</span><span class="hljs-punctuation">,</span> <br> <span class="hljs-number">442</span><span class="hljs-punctuation">,</span> <span class="hljs-number">8</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0.05</span><span class="hljs-punctuation">,</span><br> <span class="hljs-number">8</span><span class="hljs-punctuation">,</span> <span class="hljs-number">18</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1000</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0.05</span><br><span class="hljs-punctuation">)</span><br><span class="hljs-comment"># PASS的结果是 0.9</span><br>f_surv_logrank_simulation_Sample_Size<span class="hljs-punctuation">(</span><span class="hljs-number">0</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1000</span><span class="hljs-punctuation">,</span> <span class="hljs-number">6</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0.05</span><span class="hljs-punctuation">,</span> <br> <span class="hljs-number">1</span><span class="hljs-punctuation">,</span> <span class="hljs-number">8</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0.05</span><span class="hljs-punctuation">,</span><br> <span class="hljs-number">8</span><span class="hljs-punctuation">,</span> <span class="hljs-number">18</span><span class="hljs-punctuation">,</span> <span class="hljs-number">1000</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0.05</span><span class="hljs-punctuation">,</span> <span class="hljs-number">0.9</span><br><span class="hljs-punctuation">)</span><br><span class="hljs-comment"># PASS的结果是 441</span><br></code></pre></td></tr></table></figure>]]></content:encoded>
  40. <category domain="https://hexo.limour.top/tags/Bootstrap/">Bootstrap</category>
  41. <comments>https://hexo.limour.top/Sample-size-calculation-for-survival-analysis-in-clinical-research#disqus_thread</comments>
  42. </item>
  43. <item>
  44. <title>【探索】6G显存畅玩无限长度的LLM角色扮演</title>
  45. <link>https://hexo.limour.top/Enjoy-unlimited-length-LLM-role-playing-with-6GB-of-VRAM</link>
  46. <guid>https://hexo.limour.top/Enjoy-unlimited-length-LLM-role-playing-with-6GB-of-VRAM</guid>
  47. <pubDate>Sat, 10 Feb 2024 01:02:10 GMT</pubDate>
  48. <description>&lt;p&gt;角色扮演的体验是否舒适主要受角色卡、大模型和生成时间三个因素的影响。&lt;/p&gt;
  49. &lt;p&gt;优秀的角色卡往往附带大量的设定,这会极大的拖慢第一次生成的时间,并且随着对话的进行,上下文长度很容易超过kv_cache的上限,这些很破坏沉浸式的体验。&lt;/p&gt;
  50. &lt;p&gt;此外,大模型在进行角色</description>
  51. <content:encoded><![CDATA[<p>角色扮演的体验是否舒适主要受角色卡、大模型和生成时间三个因素的影响。</p><p>优秀的角色卡往往附带大量的设定,这会极大的拖慢第一次生成的时间,并且随着对话的进行,上下文长度很容易超过kv_cache的上限,这些很破坏沉浸式的体验。</p><p>此外,大模型在进行角色扮演时,除了进行必要的对话生成外,还需要生成旁白增加想象空间。</p><p>对博主这些相比填空更喜欢选项的玩家,给出提问建议也是非常必要的:在建议的基础上修改比自己从零写一个情景更简单,同时也完整保留了控制剧情走向的权力。</p><p>以上这些都让本就稀缺的kv_cache更加雪上加霜。</p><p>万幸,StreamingLLM 发现了kv_cache具有良好的平移性,而 llama.cpp 也提供了对kv_cache进行底层操作的api:可以指定范围的 kv_cache_seq_rm 和 kv_cache_seq_shift。基于这两个api,我们将实现对kv_cache的 token 级微操,榨干kv_cache的全部价值。</p><p>博主实践表明,在充分利用kv_cache的基础上,哪怕是 huggingface space 免费的2vCPU容器也可以游玩角色扮演,而笔记本端6G显存的1660Ti可以做到畅玩角色扮演。</p><h2 id="体验-DEMO">体验 DEMO</h2><ul><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9odWdnaW5nZmFjZS5jby9zcGFjZXMvTGltb3VyL2xsYW1hLXB5dGhvbi1zdHJlYW1pbmdsbG0=" rel="noopener external nofollow noreferrer">Limour/llama-python-streamingllm</a></li><li>同一时间仅支持一个人用,用之前点 Reset 按钮恢复初始的 kv_cache</li><li>按 Submit 没反应,说明有人在用,等一段时间后再 Reset</li><li>最好是 Duplicate 后,设为私密来使用</li></ul><h2 id="代码仓库">代码仓库</h2><ul><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL0xpbW91ci1kZXYvbGxhbWEtcHl0aG9uLXN0cmVhbWluZ2xsbQ==" rel="noopener external nofollow noreferrer">llama-python-streamingllm</a></li><li><a href="/-ji-lu--an-zhuang-conda-bing-geng-huan-qing-hua-yuan">安装conda</a></li><li><a href="/Use-Tunnel-to-speed-up-the-connection-of-VPS">学术上网</a>(管理员权限)</li><li>使用前需要修改 <code>rp_config.json</code> 里的模型路径和参数,指定为你已经下载了的<code>GGUF</code>格式模型的路径</li><li>推荐 <a href="https://hexo.limour.top/go/#aHR0cHM6Ly9odWdnaW5nZmFjZS5jby9UaGVCbG9rZS9DYXVzYWxMTS03Qi1HR1VGL2Jsb2IvbWFpbi9jYXVzYWxsbV83Yi5RNV9LX00uZ2d1Zg==" rel="noopener external nofollow noreferrer">causallm_7b.Q5_K_M.gguf</a></li><li>或者自己用 <a href="https://hexo.limour.top/go/#aHR0cHM6Ly9odWdnaW5nZmFjZS5jby9kYXRhc2V0cy9MaW1vdXIvYi1jb3JwdXM=" rel="noopener external nofollow noreferrer">Galgame</a> 解包的对话数据集微调一个合适的模型。</li></ul><h3 id="二选一:GPU版本的环境">二选一:GPU版本的环境</h3><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><code class="hljs powershell">conda create <span class="hljs-literal">-n</span> llamaCpp libcublas cuda<span class="hljs-literal">-toolkit</span> git <span class="hljs-literal">-c</span> nvidia <span class="hljs-literal">-c</span> conda<span class="hljs-literal">-forge</span><br>conda activate llamaCpp<br>conda install python=<span class="hljs-number">3.10</span> gradio <span class="hljs-literal">-c</span> conda<span class="hljs-literal">-forge</span><br><span class="hljs-comment"># 然后去 release 下载相应的包 https://github.com/Limour-dev/llama-cpp-python-cuBLAS-wheels/releases</span><br>pip install <span class="hljs-literal">--force-reinstall</span> llama_cpp_python<span class="hljs-literal">-0</span>.<span class="hljs-number">2.39</span>+cu122<span class="hljs-literal">-cp310-cp310-win_amd64</span>.whl<br></code></pre></td></tr></table></figure><h3 id="二选一:CPU版本的环境">二选一:CPU版本的环境</h3><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><code class="hljs powershell">conda create <span class="hljs-literal">-n</span> llamaCpp python=<span class="hljs-number">3.10</span> gradio git <span class="hljs-literal">-c</span> conda<span class="hljs-literal">-forge</span><br>conda activate llamaCpp<br>pip install llama<span class="hljs-literal">-cpp-python</span>==<span class="hljs-number">0.2</span>.<span class="hljs-number">39</span><br></code></pre></td></tr></table></figure><h3 id="下载并运行">下载并运行</h3><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><code class="hljs powershell">conda activate llamaCpp<br>git clone <span class="hljs-literal">--depth</span>=<span class="hljs-number">1</span> https://github.com/Limour<span class="hljs-literal">-dev</span>/llama<span class="hljs-literal">-python-streamingllm</span>.git<br><span class="hljs-built_in">cd</span> llama<span class="hljs-literal">-python-streamingllm</span><br>mkdir cache<br>python .\gradio_streamingllm.py<br></code></pre></td></tr></table></figure><h2 id="核心内容">核心内容</h2><ul><li><code>Submit</code> 会将 msg 发送给模型,然后流式生成回答</li><li><code>Retry</code> 会重新生成最近一次的 msg 所对应的回答</li><li><code>旁白</code> 会流式生成一份旁白到 <code>VO</code> 框</li><li><code>建议</code> 会以 usr 的身份流式生成一份 msg 供修改</li><li>上面四个功能的基础就是下面的基于 StreamingLLM 原理的 venv 开头的函数</li></ul><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br><span class="line">81</span><br><span class="line">82</span><br><span class="line">83</span><br><span class="line">84</span><br><span class="line">85</span><br></pre></td><td class="code"><pre><code class="hljs python"><span class="hljs-keyword">class</span> <span class="hljs-title class_">StreamingLLM</span>(<span class="hljs-title class_ inherited__">Llama</span>):<br> <span class="hljs-keyword">pass</span><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">kv_cache_seq_trim</span>(<span class="hljs-params">self</span>):<br> self._ctx.kv_cache_seq_rm(-<span class="hljs-number">1</span>, self.n_tokens, -<span class="hljs-number">1</span>)<br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">kv_cache_seq_ltrim</span>(<span class="hljs-params">self, n_keep, n_discard=<span class="hljs-number">256</span>, n_past=-<span class="hljs-number">1</span></span>):<br> <span class="hljs-keyword">if</span> n_past &lt; <span class="hljs-number">0</span>:<br> n_past = self.n_tokens<br> self._ctx.kv_cache_seq_rm(-<span class="hljs-number">1</span>, n_keep, n_keep + n_discard)<br> self._ctx.kv_cache_seq_shift(<span class="hljs-number">0</span>, n_keep + n_discard, n_past, -n_discard)<br> self.input_ids[n_keep:n_past - n_discard] = self.input_ids[n_keep + n_discard:n_past]<br> self.n_tokens = n_past - n_discard<br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">_venv_init</span>(<span class="hljs-params">self</span>):<br> self.venv = [<span class="hljs-number">0</span>]<br> self.venv_idx_map = []<br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">venv_create</span>(<span class="hljs-params">self, name: <span class="hljs-built_in">str</span></span>):<br> self.venv.append(<span class="hljs-number">0</span>)<br> self.venv_idx_map.append(name)<br> <span class="hljs-keyword">return</span> name<br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">venv_disband</span>(<span class="hljs-params">self, name_set</span>):<br> <span class="hljs-keyword">if</span> <span class="hljs-built_in">len</span>(self.venv) &lt;= <span class="hljs-number">1</span>:<br> <span class="hljs-keyword">return</span> <span class="hljs-literal">False</span><br> name_set = &#123;x <span class="hljs-keyword">for</span> x <span class="hljs-keyword">in</span> name_set <span class="hljs-keyword">if</span> x <span class="hljs-keyword">in</span> self.venv_idx_map&#125;<br> <span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> name_set:<br> <span class="hljs-keyword">return</span> <span class="hljs-literal">False</span><br> <span class="hljs-keyword">while</span> self.venv_idx_map:<br> <span class="hljs-keyword">if</span> self.venv_idx_map[<span class="hljs-number">0</span>] <span class="hljs-keyword">in</span> name_set:<br> self.venv_idx_map.pop(<span class="hljs-number">0</span>) <span class="hljs-comment"># 删除</span><br> tmp = self.venv.pop(<span class="hljs-number">1</span>) <span class="hljs-comment"># 对应的 venv 移入上一层</span><br> self.venv[<span class="hljs-number">0</span>] += tmp<br> <span class="hljs-keyword">else</span>:<br> <span class="hljs-keyword">break</span><br> <span class="hljs-keyword">return</span> <span class="hljs-literal">True</span><br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">venv_revision</span>(<span class="hljs-params">self, name: <span class="hljs-built_in">str</span></span>):<br> <span class="hljs-keyword">if</span> <span class="hljs-built_in">len</span>(self.venv) &lt;= <span class="hljs-number">1</span>:<br> <span class="hljs-keyword">return</span> <span class="hljs-literal">False</span><br> <span class="hljs-keyword">if</span> name <span class="hljs-keyword">not</span> <span class="hljs-keyword">in</span> self.venv_idx_map:<br> <span class="hljs-keyword">return</span> <span class="hljs-literal">False</span><br> _s = <span class="hljs-number">0</span><br> <span class="hljs-keyword">while</span> self.venv_idx_map:<br> <span class="hljs-keyword">if</span> self.venv_idx_map[-<span class="hljs-number">1</span>] == name:<br> <span class="hljs-keyword">break</span><br> self.venv_idx_map.pop() <span class="hljs-comment"># 删除</span><br> _s += self.venv.pop()<br> <span class="hljs-keyword">if</span> _s:<br> self.n_tokens -= <span class="hljs-built_in">min</span>(_s, self.n_tokens)<br> self.kv_cache_seq_trim()<br> <span class="hljs-keyword">return</span> <span class="hljs-literal">True</span><br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">venv_remove</span>(<span class="hljs-params">self, name: <span class="hljs-built_in">str</span></span>):<br> <span class="hljs-keyword">if</span> <span class="hljs-built_in">len</span>(self.venv) &lt;= <span class="hljs-number">1</span>:<br> <span class="hljs-keyword">return</span> <span class="hljs-literal">False</span><br> <span class="hljs-keyword">if</span> name <span class="hljs-keyword">not</span> <span class="hljs-keyword">in</span> self.venv_idx_map:<br> <span class="hljs-keyword">return</span> <span class="hljs-literal">False</span><br> venv_idx = self.venv_idx_map.index(name) + <span class="hljs-number">1</span><br> <span class="hljs-keyword">while</span> self.venv_idx_map:<br> self.venv_idx_map.pop(venv_idx - <span class="hljs-number">1</span>) <span class="hljs-comment"># 删除</span><br> <span class="hljs-keyword">if</span> venv_idx == <span class="hljs-built_in">len</span>(self.venv) - <span class="hljs-number">1</span>:<br> <span class="hljs-comment"># 最后一层</span><br> self.n_tokens -= <span class="hljs-built_in">min</span>(self.venv.pop(), self.n_tokens)<br> self.kv_cache_seq_trim()<br> <span class="hljs-keyword">break</span><br> <span class="hljs-keyword">else</span>:<br> <span class="hljs-comment"># 非最后一层</span><br> n_keep = self.n_tokens - <span class="hljs-built_in">sum</span>(self.venv[i] <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(venv_idx, <span class="hljs-built_in">len</span>(self.venv)))<br> n_discard = self.venv.pop(venv_idx)<br> self.kv_cache_seq_ltrim(n_keep, n_discard)<br> <span class="hljs-keyword">try</span>:<br> venv_idx = self.venv_idx_map.index(name, venv_idx - <span class="hljs-number">1</span>) + <span class="hljs-number">1</span><br> <span class="hljs-keyword">except</span> ValueError: <span class="hljs-comment"># 没有了</span><br> <span class="hljs-keyword">break</span><br> <span class="hljs-keyword">return</span> <span class="hljs-literal">True</span><br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">eval_t</span>(<span class="hljs-params">self, tokens, n_keep=<span class="hljs-number">4</span>, n_discard=<span class="hljs-number">256</span>, im_start=<span class="hljs-literal">None</span></span>):<br> <span class="hljs-keyword">if</span> self._n_ctx &lt; self.n_tokens + <span class="hljs-built_in">len</span>(tokens):<br> tmp_n_discard = <span class="hljs-built_in">max</span>(n_discard, self.n_tokens + <span class="hljs-built_in">len</span>(tokens) - self._n_ctx)<br> self.kv_cache_seq_ltrim(n_keep, tmp_n_discard)<br> <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">0</span>, <span class="hljs-built_in">len</span>(tokens), self.n_batch):<br> <span class="hljs-keyword">pass</span><br> self.n_tokens += n_tokens<br> self.venv[-<span class="hljs-number">1</span>] += n_tokens<br></code></pre></td></tr></table></figure>]]></content:encoded>
  52. <category domain="https://hexo.limour.top/tags/%E6%8E%A2%E7%B4%A2/">探索</category>
  53. <category domain="https://hexo.limour.top/tags/llama/">llama</category>
  54. <comments>https://hexo.limour.top/Enjoy-unlimited-length-LLM-role-playing-with-6GB-of-VRAM#disqus_thread</comments>
  55. </item>
  56. <item>
  57. <title>【探索】将BlueLM-7B-Chat转换为标准的GGUF模型</title>
  58. <link>https://hexo.limour.top/Convert-BlueLM-7B-Chat-to-the-standard-GGUF-model</link>
  59. <guid>https://hexo.limour.top/Convert-BlueLM-7B-Chat-to-the-standard-GGUF-model</guid>
  60. <pubDate>Sat, 03 Feb 2024 22:38:07 GMT</pubDate>
  61. <description>&lt;h2 id=&quot;准备模型&quot;&gt;准备模型&lt;/h2&gt;
  62. &lt;ul&gt;
  63. &lt;li&gt;&lt;a href=&quot;/Running-Qwen-on-the-Win10-platform-with-6GB-of-video-memory&quot;&gt;运行环境&lt;/a&gt;&lt;/li&gt;
  64. &lt;/ul&gt;
  65. &lt;figure class=&quot;h</description>
  66. <content:encoded><![CDATA[<h2 id="准备模型">准备模型</h2><ul><li><a href="/Running-Qwen-on-the-Win10-platform-with-6GB-of-video-memory">运行环境</a></li></ul><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><code class="hljs powershell"><span class="hljs-comment"># conda create -n llamaConvert python=3.10 git -c conda-forge</span><br><span class="hljs-comment"># conda activate llamaConvert</span><br><span class="hljs-comment"># cd D:\llama</span><br><span class="hljs-comment"># git clone --depth=1 https://github.com/ggerganov/llama.cpp.git</span><br><span class="hljs-comment"># cd llama.cpp</span><br><span class="hljs-comment"># python -m pip install -r requirements.txt</span><br><span class="hljs-comment"># pip install tiktoken</span><br><span class="hljs-variable">$env:HF_ENDPOINT</span>=<span class="hljs-string">&quot;https://hf-mirror.com&quot;</span>; python <span class="hljs-literal">-c</span> <span class="hljs-string">&quot;from huggingface_hub import snapshot_download; snapshot_download(repo_id=&#x27;vivo-ai/BlueLM-7B-Chat-32K&#x27;, local_dir=r&#x27;D:\models\BlueLM-7B&#x27;)&quot;</span><br><span class="hljs-comment"># 还是用 vivo-ai/BlueLM-7B-Chat 吧, 32k的 ntkmixed 长度外推方案不知道怎么改</span><br></code></pre></td></tr></table></figure><ul><li>初始的模型结构</li></ul><figure class="highlight txt"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br></pre></td><td class="code"><pre><code class="hljs txt">BlueLMForCausalLM(<br> (model): BlueLMModel(<br> (embed_tokens): Embedding(100096, 4096, padding_idx=3)<br> (embed_layer_norm): LayerNorm((4096,), eps=1e-06, elementwise_affine=True)<br> (layers): ModuleList(<br> (0-31): 32 x BlueLMDecoderLayer(<br> (self_attn): BlueLMAttention(<br> (q_proj): Linear(in_features=4096, out_features=4096, bias=False)<br> (k_proj): Linear(in_features=4096, out_features=4096, bias=False)<br> (v_proj): Linear(in_features=4096, out_features=4096, bias=False)<br> (o_proj): Linear(in_features=4096, out_features=4096, bias=False)<br> (rotary_emb): BlueLMRotaryEmbedding()<br> )<br> (mlp): BlueLMMLP(<br> (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)<br> (down_proj): Linear(in_features=11008, out_features=4096, bias=False)<br> (up_proj): Linear(in_features=4096, out_features=11008, bias=False)<br> (act_fn): SiLU()<br> (dropout): Dropout(p=0, inplace=False)<br> )<br> (input_layernorm): BlueLMRMSNorm()<br> (post_attention_layernorm): BlueLMRMSNorm()<br> )<br> )<br> (norm): BlueLMRMSNorm()<br> )<br> (lm_head): Linear(in_features=4096, out_features=100096, bias=False)<br>)<br></code></pre></td></tr></table></figure><h2 id="归一化-embed">归一化 embed</h2><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br></pre></td><td class="code"><pre><code class="hljs python"><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM<br><span class="hljs-keyword">import</span> torch<br><br><span class="hljs-comment"># 提前将 modeling_bluelm.py 中用到 flash_attn 的部分改成 None,反正不真运行,只需要模型结构</span><br>tmp = AutoModelForCausalLM.from_pretrained(<span class="hljs-string">r&#x27;D:\models\BlueLM-7B&#x27;</span>,<br> torch_dtype=torch.bfloat16,<br> trust_remote_code=<span class="hljs-literal">True</span>)<br><br>test_i = torch.arange(<span class="hljs-number">0</span>, <span class="hljs-number">10</span>, dtype=torch.long)<br><br>embedding = tmp.model.embed_tokens<br>layer_norm = tmp.model.embed_layer_norm<br><br>test_o_o = embedding(test_i)<br>test_o_o = layer_norm(test_o_o)<br><br><span class="hljs-keyword">for</span> param <span class="hljs-keyword">in</span> embedding.parameters():<br> <span class="hljs-keyword">if</span> <span class="hljs-built_in">len</span>(param.shape) &gt; <span class="hljs-number">1</span>:<br> param.data = layer_norm(param.data)<br><br>test_o_c = embedding(test_i)<br><br><span class="hljs-built_in">print</span>(torch.allclose(test_o_o, test_o_c, atol=<span class="hljs-number">1e-4</span>))<br><br><span class="hljs-keyword">del</span> tmp.model.embed_layer_norm<br>tmp.save_pretrained(<span class="hljs-string">r&#x27;D:\models\BlueLM&#x27;</span>)<br><span class="hljs-comment"># 记得将缺失的一些文件手动复制一下</span><br><span class="hljs-comment"># 顺便删掉config.json里的rope scaling type</span><br></code></pre></td></tr></table></figure><ul><li>删除 embed_layer_norm 后的结构</li></ul><figure class="highlight txt"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br></pre></td><td class="code"><pre><code class="hljs txt">BlueLMForCausalLM(<br> (model): BlueLMModel(<br> (embed_tokens): Embedding(100096, 4096, padding_idx=3)<br> (layers): ModuleList(<br> (0-31): 32 x BlueLMDecoderLayer(<br> (self_attn): BlueLMAttention(<br> (q_proj): Linear(in_features=4096, out_features=4096, bias=False)<br> (k_proj): Linear(in_features=4096, out_features=4096, bias=False)<br> (v_proj): Linear(in_features=4096, out_features=4096, bias=False)<br> (o_proj): Linear(in_features=4096, out_features=4096, bias=False)<br> (rotary_emb): BlueLMRotaryEmbedding()<br> )<br> (mlp): BlueLMMLP(<br> (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)<br> (down_proj): Linear(in_features=11008, out_features=4096, bias=False)<br> (up_proj): Linear(in_features=4096, out_features=11008, bias=False)<br> (act_fn): SiLU()<br> (dropout): Dropout(p=0, inplace=False)<br> )<br> (input_layernorm): BlueLMRMSNorm()<br> (post_attention_layernorm): BlueLMRMSNorm()<br> )<br> )<br> (norm): BlueLMRMSNorm()<br> )<br> (lm_head): Linear(in_features=4096, out_features=100096, bias=False)<br>)<br></code></pre></td></tr></table></figure><h2 id="测试运行">测试运行</h2><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><code class="hljs powershell">conda activate llamaConvert<br><span class="hljs-built_in">cd</span> D:\llama\llama.cpp<br>python convert.py D:\models\BlueLM <span class="hljs-literal">--padvocab</span><br>Wrote D:\models\BlueLM\ggml<span class="hljs-literal">-model-f16</span>.gguf<br>conda activate llamaCpp<br><span class="hljs-built_in">cd</span> D:\llama<span class="hljs-literal">-cublas</span><br>.\quantize.exe D:\models\BlueLM\ggml<span class="hljs-literal">-model-f16</span>.gguf D:\models\BlueLM\ggml<span class="hljs-literal">-model-Q5_K_M</span>.gguf Q5_K_M<br>.\main.exe <span class="hljs-literal">-m</span> D:\models\BlueLM\ggml<span class="hljs-literal">-model-Q5_K_M</span>.gguf <span class="hljs-literal">-ngl</span> <span class="hljs-number">25</span> <span class="hljs-literal">-c</span> <span class="hljs-number">1024</span> <span class="hljs-literal">--interactive-first</span><br></code></pre></td></tr></table></figure>]]></content:encoded>
  67. <category domain="https://hexo.limour.top/tags/%E6%8E%A2%E7%B4%A2/">探索</category>
  68. <category domain="https://hexo.limour.top/tags/llama/">llama</category>
  69. <comments>https://hexo.limour.top/Convert-BlueLM-7B-Chat-to-the-standard-GGUF-model#disqus_thread</comments>
  70. </item>
  71. <item>
  72. <title>【探索】从零开始训练 GPT</title>
  73. <link>https://hexo.limour.top/training-gpt-from-scratch</link>
  74. <guid>https://hexo.limour.top/training-gpt-from-scratch</guid>
  75. <pubDate>Thu, 18 Jan 2024 14:19:11 GMT</pubDate>
  76. <description>探索整个过程,从在一台搭载1660Ti显卡的笔记本电脑上构建 Tokenizer,定义带有 RoPE 的 Transformer,一直到训练、保存模型和可视化训练过程。沉浸在从零开始训练 GPT 的旅程中,深入了解每一个步骤。跳入深度学习的世界,释放在你的便携1660Ti笔记本上的强大潜能。</description>
  77. <content:encoded><![CDATA[<p><img src="https://img.limour.top/2024/01/18/65a93c6a8065a.webp" alt="训练中..."></p><h2 id="预期结构">预期结构</h2><ul><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL0xpbW91ci1kZXYvSGVsbG9HUFQ=" rel="noopener external nofollow noreferrer">相关代码已经放到 Github</a></li></ul><figure class="highlight txt"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br></pre></td><td class="code"><pre><code class="hljs txt">HelloGPT(<br> (tok_embeddings): Embedding(32765, 768)<br> (rotary_emb): RotaryEmbedding(head_dim=64, max_seq_len=1024)<br> (layers): ModuleList(<br> (0-11): 12 x Decoder(<br> (ln1): RMSNorm(hidden_size=768, eps=1e-06)<br> (attn): Attention(<br> (q_proj): Linear(in_features=768, out_features=768, bias=False)<br> (k_proj): Linear(in_features=768, out_features=768, bias=False)<br> (v_proj): Linear(in_features=768, out_features=768, bias=False)<br> (o_proj): Linear(in_features=768, out_features=768, bias=False)<br> )<br> (ln2): RMSNorm(hidden_size=768, eps=1e-06)<br> (mlp): MLP(<br> (gate_proj): Linear(in_features=768, out_features=1536, bias=False)<br> (up_proj): Linear(in_features=768, out_features=1536, bias=False)<br> (down_proj): Linear(in_features=1536, out_features=768, bias=False)<br> )<br> )<br> )<br> (norm): RMSNorm(hidden_size=768, eps=1e-06)<br> (ln2): Linear(in_features=768, out_features=32765, bias=False)<br>)<br></code></pre></td></tr></table></figure><h2 id="配置环境">配置环境</h2><ul><li><a href="/-ji-lu--an-zhuang-conda-bing-geng-huan-qing-hua-yuan">安装conda</a></li></ul><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><code class="hljs powershell"><span class="hljs-built_in">cd</span> E:\GPT<br>conda install mamba <span class="hljs-literal">-c</span> conda<span class="hljs-literal">-forge</span><br>mamba create <span class="hljs-literal">-n</span> HelloGPT pytorch pytorch<span class="hljs-literal">-cuda</span>=<span class="hljs-number">12.1</span> <span class="hljs-literal">-c</span> pytorch <span class="hljs-literal">-c</span> nvidia <span class="hljs-literal">-c</span> conda<span class="hljs-literal">-forge</span><br>conda activate HelloGPT<br>conda install numpy transformers tiktoken tensorboard sentencepiece<span class="hljs-literal">-python</span> jieba emoji <span class="hljs-literal">-c</span> conda<span class="hljs-literal">-forge</span><br>pip install opencc<span class="hljs-literal">-python-reimplemented</span> <span class="hljs-literal">-i</span> https://pypi.tuna.tsinghua.edu.cn/simple<br>python test_cuda.py<br>python test_SPDA.py<br>D:\vscode\Code.exe<br></code></pre></td></tr></table></figure><h2 id="准备数据">准备数据</h2><ul><li>下载 <a href="https://hexo.limour.top/go/#aHR0cHM6Ly9odWdnaW5nZmFjZS5jby9jb2xsZWN0aW9ucy9MaW1vdXIvcjE4LW5vdmVscy1nYWxnYW1lLTY1OThmMTY4OTRjYWRjOWNkY2IzZjNhYg==" rel="noopener external nofollow noreferrer">h-corpus-2023</a></li></ul><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br></pre></td><td class="code"><pre><code class="hljs python"><span class="hljs-keyword">import</span> os<br><br><span class="hljs-keyword">class</span> <span class="hljs-title class_">Fileset</span>(<span class="hljs-title class_ inherited__">list</span>):<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self, path, ext=<span class="hljs-string">&#x27;&#x27;</span>, _read=<span class="hljs-literal">None</span></span>):<br> <span class="hljs-keyword">if</span> <span class="hljs-built_in">isinstance</span>(path, <span class="hljs-built_in">str</span>):<br> self.root = path<br> self.extend(f <span class="hljs-keyword">for</span> f <span class="hljs-keyword">in</span> os.listdir(self.root) <span class="hljs-keyword">if</span> f.endswith(ext))<br> self._read = _read<br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__getitem__</span>(<span class="hljs-params">self, index</span>):<br> <span class="hljs-keyword">if</span> <span class="hljs-built_in">isinstance</span>(index, <span class="hljs-built_in">int</span>): <span class="hljs-comment"># index是索引</span><br> <span class="hljs-keyword">if</span> self._read:<br> <span class="hljs-keyword">return</span> self._read(os.path.join(self.root, <span class="hljs-built_in">super</span>().__getitem__(index)))<br> <span class="hljs-keyword">else</span>:<br> <span class="hljs-keyword">return</span> os.path.join(self.root, <span class="hljs-built_in">super</span>().__getitem__(index))<br> <span class="hljs-keyword">else</span>: <span class="hljs-comment"># index是切片</span><br> fileset = Fileset(<span class="hljs-literal">None</span>)<br> fileset.root = self.root<br> fileset._read = self._read<br> fileset.extend(<span class="hljs-built_in">super</span>().__getitem__(index))<br> <span class="hljs-keyword">return</span> fileset<br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">getFileName</span>(<span class="hljs-params">self, index</span>):<br> fname, ext = os.path.splitext(<span class="hljs-built_in">super</span>().__getitem__(index))<br> <span class="hljs-keyword">return</span> fname<br><br><br><span class="hljs-keyword">from</span> tokenizer <span class="hljs-keyword">import</span> tokenizer<br>token_eos = <span class="hljs-number">2</span><br><br><br><span class="hljs-keyword">def</span> <span class="hljs-title function_">readOne</span>(<span class="hljs-params">filePath</span>):<br> retn = []<br> <span class="hljs-keyword">with</span> <span class="hljs-built_in">open</span>(file=filePath, encoding=<span class="hljs-string">&#x27;utf-8&#x27;</span>) <span class="hljs-keyword">as</span> f:<br> <span class="hljs-keyword">for</span> line <span class="hljs-keyword">in</span> f:<br> retn += tokenizer.encode(line).ids<br> retn.append(token_eos)<br> <span class="hljs-keyword">return</span> retn<br><br><br><span class="hljs-keyword">class</span> <span class="hljs-title class_">Hcorpus</span>():<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self, path, ext=<span class="hljs-string">&#x27;txt&#x27;</span>, fileset_idx=<span class="hljs-number">0</span>, fileset_sub_idx=<span class="hljs-number">0</span></span>):<br> self.fileset = Fileset(path, ext, readOne)<br> self.fileset_idx = fileset_idx<br> self.fileset_sub_idx = fileset_sub_idx<br> <span class="hljs-keyword">if</span> self.fileset_sub_idx &lt; <span class="hljs-number">0</span>: <span class="hljs-comment"># 再读上一个太复杂了,直接放弃</span><br> self.fileset_sub_idx = <span class="hljs-number">0</span><br> <span class="hljs-keyword">if</span> self.fileset_idx &gt;= <span class="hljs-built_in">len</span>(self.fileset):<br> self.fileset_idx = <span class="hljs-number">0</span><br> self.cache = self.fileset[self.fileset_idx]<br> self.fileset_idx += <span class="hljs-number">1</span><br> self.cache_idx = self.fileset_sub_idx<br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__call__</span>(<span class="hljs-params">self, size=<span class="hljs-number">512</span></span>):<br> <span class="hljs-keyword">while</span> <span class="hljs-built_in">len</span>(self.cache) &lt; self.cache_idx + size:<br> <span class="hljs-keyword">if</span> self.fileset_idx &gt;= <span class="hljs-built_in">len</span>(self.fileset):<br> self.fileset_idx = <span class="hljs-number">0</span><br> self.fileset_sub_idx = self.cache_idx - <span class="hljs-built_in">len</span>(self.cache)<br> self.cache = self.cache[self.cache_idx:] + self.fileset[self.fileset_idx]<br> self.cache_idx = <span class="hljs-number">0</span><br> self.fileset_idx += <span class="hljs-number">1</span><br> retn = self.cache[self.cache_idx:self.cache_idx + size]<br> self.cache_idx += size<br> self.fileset_sub_idx += size<br> <span class="hljs-keyword">return</span> retn<br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__repr__</span>(<span class="hljs-params">self</span>):<br> <span class="hljs-keyword">return</span> <span class="hljs-string">f&quot;Hcorpus(r&#x27;<span class="hljs-subst">&#123;self.fileset.root&#125;</span>&#x27;, fileset_idx=<span class="hljs-subst">&#123;self.fileset_idx-<span class="hljs-number">1</span>&#125;</span>, fileset_sub_idx=<span class="hljs-subst">&#123;self.fileset_sub_idx&#125;</span>)&quot;</span><br></code></pre></td></tr></table></figure><h2 id="训练Tokenizer">训练Tokenizer</h2><ul><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9odWdnaW5nZmFjZS5jby9kb2NzL3Rva2VuaXplcnMvcXVpY2t0b3Vy" rel="noopener external nofollow noreferrer">tokenizer 包的文档</a></li><li>繁体转换成简体:<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL0xpbW91ci1kZXYvSGVsbG9HUFQvYmxvYi9tYWluL3RyYWluX3Rva2VuaXplcl9wcmUucHk=" rel="noopener external nofollow noreferrer">train_tokenizer_pre.py</a></li><li>获取常用 emoji:<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL0xpbW91ci1kZXYvSGVsbG9HUFQvYmxvYi9tYWluL3RtcF9lbW9qaS5weQ==" rel="noopener external nofollow noreferrer">tmp_emoji.py</a></li><li>分词统计词频:<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL0xpbW91ci1kZXYvSGVsbG9HUFQvYmxvYi9tYWluL3RyYWluX3Rva2VuaXplcl9qaWViYS5weQ==" rel="noopener external nofollow noreferrer">tokenizer_jieba.py</a></li><li>区分词性并构造 BPE 语料:<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL0xpbW91ci1kZXYvSGVsbG9HUFQvYmxvYi9tYWluL3RyYWluX3Rva2VuaXplcl9qaWViYV9zdGF0aXN0aWNzLnB5" rel="noopener external nofollow noreferrer">train_tokenizer_jieba_statistics.py</a></li><li>训练 BPE 模型:<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL0xpbW91ci1kZXYvSGVsbG9HUFQvYmxvYi9tYWluL3RyYWluX3Rva2VuaXplci5weQ==" rel="noopener external nofollow noreferrer">train_tokenizer.py</a></li><li>最终训练好的 BPE 模型:<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL0xpbW91ci1kZXYvSGVsbG9HUFQvYmxvYi9tYWluL0hlbGxvQlBFLnRva2VuaXplci5qc29u" rel="noopener external nofollow noreferrer">HelloBPE.tokenizer.json</a></li></ul><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs python"><span class="hljs-keyword">from</span> tokenizers <span class="hljs-keyword">import</span> Tokenizer<br>tokenizer = Tokenizer.from_file(<span class="hljs-string">&quot;HelloBPE.tokenizer.json&quot;</span>)<br></code></pre></td></tr></table></figure><h2 id="定义模型">定义模型</h2><h3 id="定义-Decoder">定义 Decoder</h3><h4 id="定义-RMSnorm">定义 RMSnorm</h4><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><code class="hljs python"><span class="hljs-keyword">class</span> <span class="hljs-title class_">RMSNorm</span>(nn.Module):<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self, dim: <span class="hljs-built_in">int</span>, eps: <span class="hljs-built_in">float</span> = <span class="hljs-number">1e-6</span></span>):<br> <span class="hljs-built_in">super</span>().__init__()<br> self.eps = eps<br> self.weight = nn.Parameter(torch.ones(dim))<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, x</span>):<br> x = x * torch.rsqrt(x.<span class="hljs-built_in">pow</span>(<span class="hljs-number">2</span>).mean(-<span class="hljs-number">1</span>, keepdim=<span class="hljs-literal">True</span>) + self.eps)<br> <span class="hljs-keyword">return</span> x * self.weight<br></code></pre></td></tr></table></figure><h4 id="定义-RoPE">定义 RoPE</h4><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br></pre></td><td class="code"><pre><code class="hljs python"><span class="hljs-keyword">class</span> <span class="hljs-title class_">RotaryEmbedding</span>(nn.Module):<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self, head_dim: <span class="hljs-built_in">int</span>, max_seq_len: <span class="hljs-built_in">int</span>, device=device, theta: <span class="hljs-built_in">float</span> = <span class="hljs-number">10000.0</span></span>):<br> <span class="hljs-built_in">super</span>().__init__()<br> self.head_dim = head_dim<br> self.set_max_seq_len(max_seq_len, device, theta)<br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">set_max_seq_len</span>(<span class="hljs-params">self, max_seq_len: <span class="hljs-built_in">int</span>, device=device, theta: <span class="hljs-built_in">float</span> = <span class="hljs-number">10000.0</span></span>):<br> self.max_seq_len = max_seq_len<br> freqs = <span class="hljs-number">1.0</span> / (theta ** (torch.arange(<span class="hljs-number">0</span>, self.head_dim, <span class="hljs-number">2</span>).<span class="hljs-built_in">float</span>().to(device) / self.head_dim))<br> t = torch.arange(max_seq_len, device=device) <span class="hljs-comment"># type: ignore</span><br> freqs = torch.outer(t, freqs).<span class="hljs-built_in">float</span>() <span class="hljs-comment"># 外积</span><br> self.freqs_cis = torch.polar(torch.ones_like(freqs), freqs) <span class="hljs-comment"># 复数,模 1,角度 freqs</span><br> self.freqs_cis.requires_grad = <span class="hljs-literal">False</span> <span class="hljs-comment"># filter(lambda p : p.requires_grad, model.parameters())</span><br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">rotary_emb</span>(<span class="hljs-params">self, x</span>):<br> x_ = torch.view_as_complex(x.<span class="hljs-built_in">float</span>().reshape(*x.shape[:-<span class="hljs-number">1</span>], -<span class="hljs-number">1</span>, <span class="hljs-number">2</span>))<br> x_out = torch.view_as_real(x_ * self.local_freqs_cis).flatten(<span class="hljs-number">3</span>)<br> <span class="hljs-keyword">return</span> x_out.type_as(x)<br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, start_pos: <span class="hljs-built_in">int</span>, seqlen: <span class="hljs-built_in">int</span></span>):<br> self.local_freqs_cis = self.freqs_cis[start_pos: start_pos + seqlen].view(<span class="hljs-number">1</span>, seqlen, <span class="hljs-number">1</span>, -<span class="hljs-number">1</span>) <span class="hljs-comment"># cacheKV 相关,可忽略</span><br> self.local_freqs_cis.requires_grad = <span class="hljs-literal">False</span><br> <span class="hljs-keyword">return</span> self.rotary_emb<br></code></pre></td></tr></table></figure><h4 id="定义-Attention">定义 Attention</h4><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br></pre></td><td class="code"><pre><code class="hljs python"><span class="hljs-keyword">class</span> <span class="hljs-title class_">Attention</span>(nn.Module):<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self, hidden_size, n_heads, cacheKV, max_batch_size, max_seq_len, device=device</span>):<br> <span class="hljs-built_in">super</span>().__init__()<br> self.n_heads = n_heads<br> self.head_dim = hidden_size // n_heads<br> self.q_proj = nn.Linear(hidden_size, hidden_size, bias=<span class="hljs-literal">False</span>)<br> self.k_proj = nn.Linear(hidden_size, hidden_size, bias=<span class="hljs-literal">False</span>)<br> self.v_proj = nn.Linear(hidden_size, hidden_size, bias=<span class="hljs-literal">False</span>)<br> self.o_proj = nn.Linear(hidden_size, hidden_size, bias=<span class="hljs-literal">False</span>)<br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, hidden_states, rotary_emb, start_pos=<span class="hljs-number">0</span>, mask=<span class="hljs-literal">None</span>, is_causal=<span class="hljs-literal">True</span></span>):<br> bsz, seqlen, hidden_size = hidden_states.shape<br><br> q = self.q_proj(hidden_states)<br> k = self.k_proj(hidden_states)<br> v = self.v_proj(hidden_states)<br><br> q = q.view(bsz, seqlen, self.n_heads, self.head_dim)<br> k = k.view(bsz, seqlen, self.n_heads, self.head_dim)<br> v = v.view(bsz, seqlen, self.n_heads, self.head_dim)<br><br> q = rotary_emb(q)<br> k = rotary_emb(k)<br><br> q = q.transpose(<span class="hljs-number">1</span>, <span class="hljs-number">2</span>) <span class="hljs-comment"># (bs, n_heads, seqlen, head_dim)</span><br> k = k.transpose(<span class="hljs-number">1</span>, <span class="hljs-number">2</span>) <span class="hljs-comment"># (bs, n_local_heads, cache_len + seqlen, head_dim)</span><br> v = v.transpose(<span class="hljs-number">1</span>, <span class="hljs-number">2</span>) <span class="hljs-comment"># (bs, n_local_heads, cache_len + seqlen, head_dim)</span><br><br> output = F.scaled_dot_product_attention(q, k, v, attn_mask=mask, is_causal=is_causal)<br><br> output = output.transpose(<span class="hljs-number">1</span>, <span class="hljs-number">2</span>).contiguous().view(bsz, seqlen, hidden_size)<br> <span class="hljs-keyword">return</span> self.o_proj(output)<br></code></pre></td></tr></table></figure><h4 id="定义-MLP">定义 MLP</h4><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br></pre></td><td class="code"><pre><code class="hljs python"><span class="hljs-keyword">class</span> <span class="hljs-title class_">MLP</span>(nn.Module):<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self, hidden_size</span>):<br> <span class="hljs-built_in">super</span>().__init__()<br> intermediate_size = <span class="hljs-built_in">int</span>(<span class="hljs-number">2</span> * hidden_size)<br> self.gate_proj = nn.Linear(hidden_size, intermediate_size, bias=<span class="hljs-literal">False</span>)<br> self.up_proj = nn.Linear(hidden_size, intermediate_size, bias=<span class="hljs-literal">False</span>)<br> self.down_proj = nn.Linear(intermediate_size, hidden_size, bias=<span class="hljs-literal">False</span>)<br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, x</span>):<br> gate = F.silu(self.gate_proj(x))<br> intermediate_states = self.up_proj(x)<br> <span class="hljs-keyword">return</span> self.down_proj(gate * intermediate_states)<br></code></pre></td></tr></table></figure><h4 id="组装-Decoder">组装 Decoder</h4><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><code class="hljs python"><span class="hljs-keyword">class</span> <span class="hljs-title class_">Decoder</span>(nn.Module):<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self, hidden_size, n_heads, cacheKV, max_batch_size, max_seq_len</span>):<br> <span class="hljs-built_in">super</span>().__init__()<br> self.ln1 = RMSNorm(hidden_size)<br> self.attn = Attention(hidden_size, n_heads, cacheKV, max_batch_size, max_seq_len)<br> self.ln2 = RMSNorm(hidden_size)<br> self.mlp = MLP(hidden_size)<br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, x, rotary_emb, start_pos, mask=<span class="hljs-literal">None</span>, is_causal=<span class="hljs-literal">True</span></span>):<br> x = x + self.attn(self.ln1(x), rotary_emb, start_pos, mask, is_causal)<br> <span class="hljs-keyword">return</span> x + self.mlp(self.ln2(x))<br></code></pre></td></tr></table></figure><h3 id="组装模型">组装模型</h3><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br></pre></td><td class="code"><pre><code class="hljs python"><span class="hljs-keyword">class</span> <span class="hljs-title class_">HelloGPT</span>(nn.Module):<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self, vocab_size=<span class="hljs-number">32765</span>, hidden_size=<span class="hljs-number">768</span>, n_heads=<span class="hljs-number">12</span>, max_seq_len=<span class="hljs-number">1024</span>, n_layers=<span class="hljs-number">12</span>, cacheKV=<span class="hljs-literal">False</span>, max_batch_size=<span class="hljs-number">1</span></span>):<br> <span class="hljs-built_in">super</span>().__init__()<br> <span class="hljs-comment"># hidden_size &gt; 8.33 * ln(vocab_size)</span><br> self.tok_embeddings = nn.Embedding(vocab_size, hidden_size)<br> self.rotary_emb = RotaryEmbedding(hidden_size // n_heads, max_seq_len * <span class="hljs-number">2</span>)<br> self.rotary_emb.requires_grad = <span class="hljs-literal">False</span><br> self.layers = nn.ModuleList()<br> <span class="hljs-keyword">for</span> layer_id <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(n_layers):<br> self.layers.append(Decoder(hidden_size, n_heads, cacheKV, max_batch_size, max_seq_len))<br> self.norm = RMSNorm(hidden_size)<br> self.ln2 = nn.Linear(hidden_size, vocab_size, bias=<span class="hljs-literal">False</span>)<br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, input_ids: torch.Tensor, start_pos=<span class="hljs-number">0</span>, no_mask=<span class="hljs-literal">True</span></span>):<br> _bsz, seqlen = input_ids.shape<br> h = self.tok_embeddings(input_ids)<br><br> <span class="hljs-comment"># 预计算,减少每一层的重复计算</span><br> rotary_emb = self.rotary_emb(start_pos, seqlen)<br> <span class="hljs-keyword">for</span> layer <span class="hljs-keyword">in</span> self.layers:<br> h = layer(h, rotary_emb, start_pos)<br><br> h = self.norm(h)<br> h = self.ln2(h)<br> <span class="hljs-keyword">return</span> h.<span class="hljs-built_in">float</span>()<br></code></pre></td></tr></table></figure><h2 id="训练模型">训练模型</h2><h3 id="数据载入">数据载入</h3><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><code class="hljs python">data = Hcorpus(<span class="hljs-string">r&#x27;D:\datasets\h-corpus&#x27;</span>)<br><span class="hljs-keyword">def</span> <span class="hljs-title function_">get_batch</span>(<span class="hljs-params">size=<span class="hljs-number">512</span>, bsz=<span class="hljs-number">8</span></span>):<br> x = []<br> y = []<br> <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(bsz):<br> tmp = data(size+<span class="hljs-number">1</span>)<br> x.append(tmp[:size])<br> y.append(tmp[<span class="hljs-number">1</span>:])<br> <span class="hljs-keyword">return</span> torch.tensor(x).to(device), torch.tensor(y).to(device)<br></code></pre></td></tr></table></figure><h3 id="模型载入">模型载入</h3><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs python">model = HelloGPT(n_layers=<span class="hljs-number">8</span>, max_seq_len=<span class="hljs-number">768</span>)<br>model.to(device)<br></code></pre></td></tr></table></figure><h3 id="训练模型-2">训练模型</h3><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br></pre></td><td class="code"><pre><code class="hljs python"><span class="hljs-comment">## 初始化训练器</span><br>criterion = nn.CrossEntropyLoss() <span class="hljs-comment"># 交叉熵损失函数</span><br>optimizer = torch.optim.Adam(train_parameters, lr=<span class="hljs-number">6e-4</span>) <span class="hljs-comment"># Adam 优化器</span><br>scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=<span class="hljs-number">5</span>, T_mult=<span class="hljs-number">2</span>) <span class="hljs-comment"># 余弦退火学习率</span><br>torch.manual_seed(<span class="hljs-number">1337</span>) <span class="hljs-comment"># 魔术随机种子</span><br><br>total_loss = <span class="hljs-number">0</span><br>print_iter = <span class="hljs-number">20</span><br><span class="hljs-keyword">for</span> epoch <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">1</span>, <span class="hljs-number">100001</span>):<br> optimizer.zero_grad(set_to_none=<span class="hljs-literal">True</span>) <span class="hljs-comment"># 清空梯度,节省显存</span><br> x, y = get_batch(size=<span class="hljs-number">384</span>, bsz=<span class="hljs-number">4</span>) <span class="hljs-comment"># x 是训练语料 y 是 x 移动了一位,当做预测目标</span><br> y_ = model(x) <span class="hljs-comment"># 通过 x 预测的 y</span><br> loss = criterion(y_.view(-<span class="hljs-number">1</span>, <span class="hljs-number">32765</span>), y.view(-<span class="hljs-number">1</span>)) <span class="hljs-comment"># 计算损失</span><br> loss.backward() <span class="hljs-comment"># 反向传播梯度</span><br> torch.nn.utils.clip_grad_norm_(train_parameters, <span class="hljs-number">0.5</span>) <span class="hljs-comment"># 梯度裁剪,减轻过拟合</span><br> optimizer.step() <span class="hljs-comment"># 通过梯度优化训练参数</span><br> scheduler.step() <span class="hljs-comment"># 计算下一步的学习率</span><br> total_loss += loss <span class="hljs-comment"># 累计损失</span><br><br> <span class="hljs-keyword">if</span> epoch % print_iter == <span class="hljs-number">0</span>:<br> <span class="hljs-built_in">print</span>(data)<br> <span class="hljs-built_in">print</span>(<span class="hljs-string">f&#x27;epoch: <span class="hljs-subst">&#123;epoch&#125;</span> lr: <span class="hljs-subst">&#123;scheduler.get_last_lr()[<span class="hljs-number">0</span>]:<span class="hljs-number">.4</span>e&#125;</span> loss: <span class="hljs-subst">&#123;total_loss / print_iter:<span class="hljs-number">.4</span>e&#125;</span>&#x27;</span>)<br> total_loss = <span class="hljs-number">0</span><br></code></pre></td></tr></table></figure><h3 id="保存读取">保存读取</h3><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br></pre></td><td class="code"><pre><code class="hljs python"><span class="hljs-keyword">with</span> <span class="hljs-built_in">open</span>(<span class="hljs-string">&#x27;tmp_training.pkl&#x27;</span>, <span class="hljs-string">&#x27;rb&#x27;</span>) <span class="hljs-keyword">as</span> file:<br> epoch = pickle.load(file) <span class="hljs-comment"># 读取 epoch 位置</span><br> tmp_fileset_idx = pickle.load(file) <span class="hljs-comment"># 读取 data 位置</span><br> tmp_fileset_sub_idx = pickle.load(file)<br><span class="hljs-comment"># 恢复数据位置</span><br>data = Hcorpus(<span class="hljs-string">r&#x27;D:\datasets\h-corpus&#x27;</span>, fileset_idx=tmp_fileset_idx-<span class="hljs-number">1</span>, fileset_sub_idx=tmp_fileset_sub_idx)<br>model = torch.load(<span class="hljs-string">f&#x27;tmp_model_<span class="hljs-subst">&#123;epoch&#125;</span>.pth&#x27;</span>) <span class="hljs-comment"># 恢复模型</span><br><span class="hljs-built_in">print</span>(<span class="hljs-string">f&#x27;start from epoch: <span class="hljs-subst">&#123;epoch&#125;</span> data: <span class="hljs-subst">&#123;data&#125;</span>&#x27;</span>)<br><br>save_iter = <span class="hljs-number">5000</span><br><span class="hljs-keyword">for</span> epoch <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">1</span>, <span class="hljs-number">100001</span>):<br> <span class="hljs-keyword">pass</span><br> <span class="hljs-keyword">if</span> epoch % save_iter == <span class="hljs-number">0</span>:<br> optimizer.zero_grad(set_to_none=<span class="hljs-literal">True</span>) <span class="hljs-comment"># 清空梯度,节省显存</span><br> <span class="hljs-keyword">with</span> <span class="hljs-built_in">open</span>(<span class="hljs-string">&#x27;tmp_training.pkl&#x27;</span>, <span class="hljs-string">&#x27;wb&#x27;</span>) <span class="hljs-keyword">as</span> file:<br> pickle.dump(epoch, file) <span class="hljs-comment"># 保存 epoch 位置</span><br> pickle.dump(data.fileset_idx, file) <span class="hljs-comment"># 保存 data 位置</span><br> pickle.dump(data.fileset_sub_idx, file)<br> torch.save(model, <span class="hljs-string">f&#x27;tmp_model_<span class="hljs-subst">&#123;epoch&#125;</span>.pth&#x27;</span>) <span class="hljs-comment"># 保存模型</span><br> <span class="hljs-built_in">print</span>(<span class="hljs-string">f&#x27;save to tmp_model_<span class="hljs-subst">&#123;epoch&#125;</span>.pth&#x27;</span>)<br></code></pre></td></tr></table></figure><h3 id="可视化">可视化</h3><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><code class="hljs python">writer = SummaryWriter(<span class="hljs-string">&#x27;logs&#x27;</span>) <span class="hljs-comment"># tensorboard --logdir logs</span><br><span class="hljs-keyword">for</span> epoch <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">1</span>, <span class="hljs-number">100001</span>):<br> <span class="hljs-keyword">pass</span><br> writer.add_scalar(<span class="hljs-string">&#x27;lr&#x27;</span>, scheduler.get_last_lr()[<span class="hljs-number">0</span>], epoch)<br> writer.add_scalar(<span class="hljs-string">&#x27;loss&#x27;</span>, loss, epoch)<br> <span class="hljs-keyword">if</span> epoch % print_iter == <span class="hljs-number">0</span>:<br> <span class="hljs-keyword">pass</span><br> writer.add_scalar(<span class="hljs-string">&#x27;total_loss&#x27;</span>, total_loss / print_iter, epoch)<br>writer.close()<br></code></pre></td></tr></table></figure><h2 id="附加-streaming-llm">附加 streaming_llm</h2><figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br></pre></td><td class="code"><pre><code class="hljs python"><span class="hljs-keyword">class</span> <span class="hljs-title class_">RotaryEmbedding</span>(nn.Module):<br> <span class="hljs-keyword">pass</span><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">inverse_rotary_emb</span>(<span class="hljs-params">self, x</span>):<br> x_ = torch.view_as_complex(x.<span class="hljs-built_in">float</span>().reshape(*x.shape[:-<span class="hljs-number">1</span>], -<span class="hljs-number">1</span>, <span class="hljs-number">2</span>))<br> x_out = torch.view_as_real(x_ * self.local_freqs_cis_inverse).flatten(<span class="hljs-number">3</span>)<br> <span class="hljs-keyword">return</span> x_out.type_as(x)<br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">inverse_forward</span>(<span class="hljs-params">self, start_pos: <span class="hljs-built_in">int</span>, seqlen: <span class="hljs-built_in">int</span></span>):<br> self.local_freqs_cis_inverse = self.freqs_cis[start_pos: start_pos + seqlen].view(<span class="hljs-number">1</span>, seqlen, <span class="hljs-number">1</span>, -<span class="hljs-number">1</span>) <span class="hljs-comment"># cacheKV 相关,可忽略</span><br> self.local_freqs_cis_inverse = self.local_freqs_cis_inverse.conj() <span class="hljs-comment"># 乘上共轭就旋转回去了</span><br> self.local_freqs_cis.requires_grad = <span class="hljs-literal">False</span><br> <span class="hljs-keyword">return</span> self.inverse_rotary_emb<br><br><span class="hljs-keyword">class</span> <span class="hljs-title class_">Attention</span>(nn.Module):<br> <span class="hljs-keyword">pass</span><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, hidden_states, rotary_emb, start_pos=<span class="hljs-number">0</span>, mask=<span class="hljs-literal">None</span>, is_causal=<span class="hljs-literal">True</span></span>):<br> <span class="hljs-keyword">pass</span><br> <span class="hljs-keyword">if</span> self.cacheKV: <span class="hljs-comment"># cacheKV 相关,可忽略</span><br> self.cache_k[:bsz, start_pos: start_pos + seqlen] = k<br> self.cache_v[:bsz, start_pos: start_pos + seqlen] = v<br> k = self.cache_k[:bsz, : start_pos + seqlen]<br> v = self.cache_v[:bsz, : start_pos + seqlen]<br><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">streaming_llm</span>(<span class="hljs-params">self, start_pos, seqlen, to_pos, inverse_rotary_emb, rotary_emb, bsz</span>):<br> k = self.cache_k[:bsz, start_pos: start_pos + seqlen]<br> v = self.cache_v[:bsz, start_pos: start_pos + seqlen]<br> k = inverse_rotary_emb(k)<br> k = rotary_emb(k)<br> self.cache_k[:bsz, to_pos: to_pos + seqlen] = k<br> self.cache_v[:bsz, to_pos: to_pos + seqlen] = v<br><br><span class="hljs-keyword">class</span> <span class="hljs-title class_">HelloGPT</span>(nn.Module):<br> <span class="hljs-keyword">pass</span><br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">streaming_llm</span>(<span class="hljs-params">self, start_pos, seqlen, to_pos, max_batch_size=<span class="hljs-number">1</span></span>):<br> rotary_emb = self.rotary_emb(to_pos, seqlen)<br> inverse_rotary_emb = self.rotary_emb.inverse_forward(start_pos, seqlen)<br> <span class="hljs-keyword">for</span> layer <span class="hljs-keyword">in</span> self.layers:<br> layer.attn.streaming_llm(start_pos, seqlen, to_pos, inverse_rotary_emb, rotary_emb, max_batch_size)<br></code></pre></td></tr></table></figure>]]></content:encoded>
  78. <category domain="https://hexo.limour.top/tags/%E6%8E%A2%E7%B4%A2/">探索</category>
  79. <category domain="https://hexo.limour.top/tags/llama/">llama</category>
  80. <comments>https://hexo.limour.top/training-gpt-from-scratch#disqus_thread</comments>
  81. </item>
  82. <item>
  83. <title>【避坑】Azure AI 避免反向薅羊毛</title>
  84. <link>https://hexo.limour.top/Azure-AI-prevents-reverse-wool-shearing</link>
  85. <guid>https://hexo.limour.top/Azure-AI-prevents-reverse-wool-shearing</guid>
  86. <pubDate>Tue, 09 Jan 2024 05:55:40 GMT</pubDate>
  87. <description>&lt;h2 id=&quot;起因&quot;&gt;起因&lt;/h2&gt;
  88. &lt;p&gt;今天收到 Azure 的付费邮件,一看账单,好家伙,24.54$ ,比上个月暴涨 622%,给我 CPU 干烧了。&lt;/p&gt;
  89. &lt;p&gt;赶紧去成本分析里按资源分类看上个月的扣费详情,然后就看到两个 10.33$ 的 &lt;code&gt;Contai</description>
  90. <content:encoded><![CDATA[<h2 id="起因">起因</h2><p>今天收到 Azure 的付费邮件,一看账单,好家伙,24.54$ ,比上个月暴涨 622%,给我 CPU 干烧了。</p><p>赶紧去成本分析里按资源分类看上个月的扣费详情,然后就看到两个 10.33$ 的 <code>Container Registry</code>,分别位于我在 <a href="https://hexo.limour.top/go/#aHR0cDovL2FpLmF6dXJlLmNvbS8=" rel="noopener external nofollow noreferrer">Azure AI Studio</a> 里的两个不同项目所在区域。</p><p>一顿折腾,发现这个 Container Registry,有一年的免费试用期,但是免费限额是 31/个/天,一个 15 天刚好是 10.33$ 。</p><p>这 Azure 不讲武德,这样免费,头半个月根本不知道这东西要收费,等月末美滋滋去付账单时钱都已经扣完了。。。</p><p>特别是,这东西似乎是 Azure AI Studio 自动开通的,我根本没有用到过它。心情更糟了。</p><p><img src="https://img.limour.top/2024/01/09/659ce07c76fd0.webp" alt=""></p><h2 id="解决方案">解决方案</h2><p>赶紧去资源组里找到这两个<code>容器注册表</code>,全给删了。删除后不会对 Azure AI 的使用产生影响。</p><p>然后是想办法提工单,看能不能把这钱退回来。</p><p><img src="https://img.limour.top/2024/01/09/659ce568e9756.webp" alt="最后保留的服务,不知道哪些还可以删"></p><h2 id="工单结果">工单结果</h2><blockquote><p>透过案件了解到Container Registry是您不清楚的情况下创建的,且您已经将此资源进行了删除。考虑到您是首次使用Azure产品较不熟悉,且已经将资源删除,经过竭力向主管团队申请,现为您申请了相关费用的减免,即:<br>12/1/2023-12/31/2023期间由Container Registry – Standard产生的费用20.66 USD已经申请退回至您的信用卡,依据银行流程,款项约需要7-21个工作日抵达您的账户,届时请您查看。<br>同时,我们也查看了您当前的计费周期(1/1/2024-1/31/2024)的使用量报表,Container Registry – Standard未产生费用,还请您放心。</p></blockquote>]]></content:encoded>
  91. <category domain="https://hexo.limour.top/tags/openai/">openai</category>
  92. <comments>https://hexo.limour.top/Azure-AI-prevents-reverse-wool-shearing#disqus_thread</comments>
  93. </item>
  94. <item>
  95. <title>【记录】win10平台6G显存运行Qwen-1.8B</title>
  96. <link>https://hexo.limour.top/Running-Qwen-on-the-Win10-platform-with-6GB-of-video-memory</link>
  97. <guid>https://hexo.limour.top/Running-Qwen-on-the-Win10-platform-with-6GB-of-video-memory</guid>
  98. <pubDate>Mon, 01 Jan 2024 03:11:36 GMT</pubDate>
  99. <description>&lt;p&gt;&lt;a href=&quot;https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL2dnZXJnYW5vdi9sbGFtYS5jcHA=&quot; rel=&quot;noopener external nofollow noreferrer&quot;&gt;Ll</description>
  100. <content:encoded><![CDATA[<p><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL2dnZXJnYW5vdi9sbGFtYS5jcHA=" rel="noopener external nofollow noreferrer">Llama.cpp</a> 能 CPU &amp; GPU 环境混合推理,这里记录一下在 windows10 平台上运行 <a href="https://hexo.limour.top/go/#aHR0cHM6Ly9odWdnaW5nZmFjZS5jby9Rd2VuL1F3ZW4tMV84Qg==" rel="noopener external nofollow noreferrer">Qwen-1.8B</a> 的过程,显卡是 1660Ti 。</p><h2 id="准备模型">准备模型</h2><ul><li><a href="/-ji-lu--an-zhuang-conda-bing-geng-huan-qing-hua-yuan">安装conda</a></li><li><a href="/Use-Tunnel-to-speed-up-the-connection-of-VPS">Tun模式</a>(管理员权限)</li></ul><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><code class="hljs powershell">conda create <span class="hljs-literal">-n</span> llamaConvert python=<span class="hljs-number">3.10</span> git <span class="hljs-literal">-c</span> conda<span class="hljs-literal">-forge</span><br>conda activate llamaConvert<br><span class="hljs-built_in">cd</span> D:\llama<br>git clone <span class="hljs-literal">--depth</span>=<span class="hljs-number">1</span> https://github.com/ggerganov/llama.cpp.git<br><span class="hljs-built_in">cd</span> llama.cpp<br>python <span class="hljs-literal">-m</span> pip install <span class="hljs-literal">-r</span> requirements.txt<br>pip install tiktoken<br></code></pre></td></tr></table></figure><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><code class="hljs powershell">python <span class="hljs-literal">-c</span> <span class="hljs-string">&quot;from huggingface_hub import snapshot_download; snapshot_download(repo_id=&#x27;Qwen/Qwen-1_8B-Chat&#x27;, local_dir=r&#x27;D:\qwen&#x27;, ignore_patterns=[&#x27;*.h5&#x27;, &#x27;*.ot&#x27;, &#x27;*.msgpack&#x27;, &#x27;*.safetensors&#x27;])&quot;</span><br><span class="hljs-built_in">cd</span> D:\qwen<br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;model-00001-of-00002.safetensors&#x27;</span> <span class="hljs-string">&quot;https://huggingface.co/Qwen/Qwen-1_8B-Chat/resolve/main/model-00001-of-00002.safetensors?download=true&quot;</span><br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;model-00002-of-00002.safetensors&#x27;</span> <span class="hljs-string">&quot;https://huggingface.co/Qwen/Qwen-1_8B-Chat/resolve/main/model-00002-of-00002.safetensors?download=true&quot;</span><br></code></pre></td></tr></table></figure><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><code class="hljs powershell"><span class="hljs-built_in">cd</span> D:\llama\llama.cpp<br>python <span class="hljs-built_in">convert-hf</span><span class="hljs-literal">-to-gguf</span>.py D:\qwen<br><span class="hljs-comment"># Model successfully exported to &#x27;D:\qwen\ggml-model-f16.gguf&#x27;</span><br></code></pre></td></tr></table></figure><h2 id="运行模型">运行模型</h2><ul><li>下载 <a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL2dnZXJnYW5vdi9sbGFtYS5jcHAvcmVsZWFzZXM=" rel="noopener external nofollow noreferrer">llama-b1732-bin-win-cublas-cu12.2.0-x64.zip</a></li><li>提取文件到 <code>D:\llama</code></li></ul><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><code class="hljs powershell">conda create <span class="hljs-literal">-n</span> llamaCpp libcublas cuda<span class="hljs-literal">-toolkit</span> git <span class="hljs-literal">-c</span> nvidia <span class="hljs-literal">-c</span> conda<span class="hljs-literal">-forge</span><br>conda activate llamaCpp<br><span class="hljs-built_in">cd</span> D:\llama ; .\main.exe <span class="hljs-comment">## 检查能否正确运行</span><br><span class="hljs-built_in">cd</span> D:\llama ; .\quantize.exe <span class="hljs-literal">--help</span> <span class="hljs-comment">## 自己决定量化方式</span><br>.\quantize.exe D:\qwen\ggml<span class="hljs-literal">-model-f16</span>.gguf .\qwen<span class="hljs-literal">-1_8-f16</span>.gguf <span class="hljs-built_in">COPY</span><br>.\server.exe <span class="hljs-literal">-m</span> .\qwen<span class="hljs-literal">-1_8-f16</span>.gguf <span class="hljs-literal">-c</span> <span class="hljs-number">4096</span> <span class="hljs-literal">--n-gpu-layers</span> <span class="hljs-number">50</span> <span class="hljs-comment">## 调节 n-gpu-layers 平衡 CPU &amp; GPU</span><br></code></pre></td></tr></table></figure><ul><li>访问 <code>http://127.0.0.1:8080</code> 选择 <code>Completion</code> 进行测试</li></ul><h2 id="微调模型">微调模型</h2><ul><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9odWdnaW5nZmFjZS5jby9kYXRhc2V0cy9hNjg2ZDM4MC9oLWNvcnB1cy0yMDIz" rel="noopener external nofollow noreferrer">h-corpus数据集</a></li><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL1F3ZW5MTS9Rd2VuL2Jsb2IvbWFpbi9SRUFETUVfQ04ubWQjJUU1JUJFJUFFJUU4JUIwJTgz" rel="noopener external nofollow noreferrer">官方微调教程</a></li></ul><h2 id="附加-Yi-6B-Chat">附加 Yi-6B-Chat</h2><p><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9odWdnaW5nZmFjZS5jby8wMS1haS9ZaS02Qi1DaGF0" rel="noopener external nofollow noreferrer">Yi-6B</a>是零一万物开源的双语语言模型,经过3T多语种语料库的训练,在语言理解、常识推理、阅读理解等方面有一定潜力。</p><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><code class="hljs powershell"><span class="hljs-built_in">cd</span> D:\models\<span class="hljs-number">01</span>yi<br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;model-00001-of-00003.safetensors&#x27;</span> <span class="hljs-string">&quot;https://huggingface.co/01-ai/Yi-6B-Chat/resolve/main/model-00001-of-00003.safetensors?download=true&quot;</span><br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;model-00002-of-00003.safetensors&#x27;</span> <span class="hljs-string">&quot;https://huggingface.co/01-ai/Yi-6B-Chat/resolve/main/model-00002-of-00003.safetensors?download=true&quot;</span><br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;model-00003-of-00003.safetensors&#x27;</span> https://huggingface.co/<span class="hljs-number">01</span><span class="hljs-literal">-ai</span>/Yi<span class="hljs-literal">-6B-Chat</span>/resolve/main/model<span class="hljs-literal">-00003-of-00003</span>.safetensors?download=true<br>conda activate llamaConvert<br>python <span class="hljs-literal">-c</span> <span class="hljs-string">&quot;from huggingface_hub import snapshot_download; snapshot_download(repo_id=&#x27;01-ai/Yi-6B-Chat&#x27;, local_dir=r&#x27;D:\models\01yi&#x27;, ignore_patterns=[&#x27;*.h5&#x27;, &#x27;*.ot&#x27;, &#x27;*.msgpack&#x27;, &#x27;*.safetensors&#x27;])&quot;</span><br></code></pre></td></tr></table></figure><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><code class="hljs powershell">conda activate llamaConvert<br><span class="hljs-built_in">cd</span> D:\llama\llama.cpp<br>python convert.py D:\models\<span class="hljs-number">01</span>yi<br><span class="hljs-comment"># Wrote D:\models\01yi\ggml-model-f16.gguf</span><br>conda activate llamaCpp<br><span class="hljs-built_in">cd</span> D:\llama ; .\quantize.exe <span class="hljs-literal">--help</span><br>.\quantize.exe D:\models\<span class="hljs-number">01</span>yi\ggml<span class="hljs-literal">-model-f16</span>.gguf .\<span class="hljs-number">01</span>yi<span class="hljs-literal">-6b-Q4_K_M</span>.gguf Q4_K_M<br>.\server.exe <span class="hljs-literal">-m</span> .\<span class="hljs-number">01</span>yi<span class="hljs-literal">-6b-Q4_K_M</span>.gguf <span class="hljs-literal">-c</span> <span class="hljs-number">4096</span> <span class="hljs-literal">--n-gpu-layers</span> <span class="hljs-number">50</span><br></code></pre></td></tr></table></figure><h2 id="附加-百川2">附加 百川2</h2><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><code class="hljs powershell"><span class="hljs-built_in">cd</span> D:\models\baichuan<br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;pytorch_model.bin&#x27;</span> <span class="hljs-string">&quot;https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat/resolve/main/pytorch_model.bin?download=true&quot;</span><br>conda activate llamaConvert<br>python <span class="hljs-literal">-c</span> <span class="hljs-string">&quot;from huggingface_hub import snapshot_download; snapshot_download(repo_id=&#x27;baichuan-inc/Baichuan2-7B-Chat&#x27;, local_dir=r&#x27;D:\models\baichuan&#x27;, ignore_patterns=[&#x27;*.h5&#x27;, &#x27;*.bin&#x27;, &#x27;*.ot&#x27;, &#x27;*.msgpack&#x27;, &#x27;*.safetensors&#x27;])&quot;</span><br><span class="hljs-built_in">cd</span> D:\llama\llama.cpp<br>python convert.py D:\models\baichuan<br><span class="hljs-comment"># Wrote D:\models\baichuan\ggml-model-f16.gguf</span><br>conda activate llamaCpp<br><span class="hljs-built_in">cd</span> D:\llama ; .\quantize.exe <span class="hljs-literal">--help</span><br>.\quantize.exe D:\models\baichuan\ggml<span class="hljs-literal">-model-f16</span>.gguf .\baichuan<span class="hljs-literal">-7b-Q3_K_M</span>.gguf Q3_K_M<br>.\server.exe <span class="hljs-literal">-m</span> .\baichuan<span class="hljs-literal">-7b-Q3_K_M</span>.gguf <span class="hljs-literal">-c</span> <span class="hljs-number">2048</span> <span class="hljs-literal">--n-gpu-layers</span> <span class="hljs-number">30</span><br></code></pre></td></tr></table></figure><h2 id="附加-tigerbot-13b">附加 tigerbot-13b</h2><p><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9odWdnaW5nZmFjZS5jby9UaWdlclJlc2VhcmNoL3RpZ2VyYm90LTEzYi1jaGF0LXY1" rel="noopener external nofollow noreferrer">tigerbot-13b</a> 在 <a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL2plaW5sZWUxOTkxL2NoaW5lc2UtbGxtLWJlbmNobWFyaw==" rel="noopener external nofollow noreferrer">chinese-llm-benchmark</a> 上排名靠前。</p><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><code class="hljs powershell"><span class="hljs-built_in">cd</span> D:\models\tigerbot<br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;pytorch_model-00001-of-00003.bin&#x27;</span> <span class="hljs-literal">--max-download-limit</span>=<span class="hljs-number">6</span>M <span class="hljs-string">&quot;https://huggingface.co/TigerResearch/tigerbot-13b-chat-v5/resolve/main/pytorch_model-00001-of-00003.bin?download=true&quot;</span><br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;pytorch_model-00002-of-00003.bin&#x27;</span> <span class="hljs-literal">--max-download-limit</span>=<span class="hljs-number">6</span>M <span class="hljs-string">&quot;https://huggingface.co/TigerResearch/tigerbot-13b-chat-v5/resolve/main/pytorch_model-00002-of-00003.bin?download=true&quot;</span><br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;pytorch_model-00003-of-00003.bin&#x27;</span> <span class="hljs-literal">--max-download-limit</span>=<span class="hljs-number">6</span>M <span class="hljs-string">&quot;https://huggingface.co/TigerResearch/tigerbot-13b-chat-v5/resolve/main/pytorch_model-00003-of-00003.bin?download=true&quot;</span><br>conda activate llamaConvert<br>python <span class="hljs-literal">-c</span> <span class="hljs-string">&quot;from huggingface_hub import snapshot_download; snapshot_download(repo_id=&#x27;TigerResearch/tigerbot-13b-chat-v5&#x27;, local_dir=r&#x27;D:\models\tigerbot&#x27;, ignore_patterns=[&#x27;*.h5&#x27;, &#x27;*.bin&#x27;, &#x27;*.ot&#x27;, &#x27;*.msgpack&#x27;, &#x27;*.safetensors&#x27;])&quot;</span><br><span class="hljs-built_in">cd</span> D:\llama\llama.cpp<br>python convert.py D:\models\tigerbot <span class="hljs-literal">--padvocab</span><br><span class="hljs-built_in">cd</span> D:\llama ; .\quantize.exe <span class="hljs-literal">--help</span><br>.\quantize.exe D:\models\tigerbot\ggml<span class="hljs-literal">-model-f16</span>.gguf D:\models\tigerbot<span class="hljs-literal">-13B-Chat-Q4_K_M</span>.gguf Q4_K_M<br>.\server.exe <span class="hljs-literal">-m</span> D:\models\tigerbot<span class="hljs-literal">-13B-Chat-Q4_K_M</span>.gguf <span class="hljs-literal">-c</span> <span class="hljs-number">4096</span><br></code></pre></td></tr></table></figure><div class="note note-info"> <p>感觉 6G 显存下,比较好用的是 Yi-6B-Chat-Q4_K_M<br>tigerbot-13b 在 R5 5600H 上推理速度 4.6 tokens/s,CPU 使用率 60%,频率 3.5GHz,应该是内存带宽瓶颈</p> </div><h2 id="附加-在-Colab-上量化">附加 在 Colab 上量化</h2><ul><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9jb2xhYi5yZXNlYXJjaC5nb29nbGUuY29tL2RyaXZlLzFKVDNYRmpEN0NUUkI5N3B1M1FwZUd1eldBMXlZRUFNNz91c3A9c2hhcmluZw==" rel="noopener external nofollow noreferrer">llm2gguf.ipynb</a></li><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9odWdnaW5nZmFjZS5jby9MaW1vdXIvQ2F1c2FsTE0tMTRCLUdHVUY=" rel="noopener external nofollow noreferrer">量化后的结果</a></li></ul><h3 id="安装-llama-cpp">安装 llama.cpp</h3><figure class="highlight ipython"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><code class="hljs ipython">!git clone --depth=<span class="hljs-number">1</span> https://github.com/ggerganov/llama.cpp.git<br>%cd /content/llama.cpp<br>!LLAMA_CUDA=<span class="hljs-number">1</span> make -j<br></code></pre></td></tr></table></figure><h3 id="计算-imatrix">计算 imatrix</h3><figure class="highlight ipython"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><code class="hljs ipython">%cd /content<br>!wget -O transient.txt.gz https://huggingface.co/datasets/Limour/b-corpus/resolve/main/<span class="hljs-number">00</span>-preview/<span class="hljs-number">00</span>-transient.txt.gz?download=true<br>!gunzip transient.txt.gz<br>!mkdir -p /content/CausalLM-14B-GGUF<br>!wget -O /content/CausalLM-14B-GGUF/causallm_14b.Q8_0.gguf https://huggingface.co/TheBloke/CausalLM-14B-GGUF/resolve/main/causallm_14b.Q8_0.gguf?download=true<br>!/content/llama.cpp/imatrix -m /content/CausalLM-14B-GGUF/causallm_14b.Q8_0.gguf -f /content/transient.txt -ngl <span class="hljs-number">36</span><br></code></pre></td></tr></table></figure><h3 id="登录拥抱脸">登录拥抱脸</h3><figure class="highlight ipython"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><code class="hljs ipython"><span class="hljs-keyword">from</span> google.colab <span class="hljs-keyword">import</span> userdata<br><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> login<br><span class="hljs-comment"># login(token=os.environ.get(&quot;HF_TOKEN&quot;), write_permission=True)</span><br>login(token=userdata.get(<span class="hljs-string">&#x27;HF_TOKEN&#x27;</span>), write_permission=<span class="hljs-literal">True</span>)<br><span class="hljs-comment"># from huggingface_hub import notebook_login</span><br><span class="hljs-comment"># notebook_login()</span><br></code></pre></td></tr></table></figure><h3 id="跳过-转换模型">(跳过) 转换模型</h3><figure class="highlight ipython"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><code class="hljs ipython">%cd llama.cpp<br>!python -m pip install -r requirements.txt<br>!pip install tiktoken<br><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> snapshot_download<br>!mkdir -p ~/CausalLM<br>snapshot_download(repo_id=<span class="hljs-string">&#x27;CausalLM/7B&#x27;</span>, local_dir=<span class="hljs-string">r&#x27;/content/CausalLM&#x27;</span>, ignore_patterns=[<span class="hljs-string">&#x27;*.h5&#x27;</span>, <span class="hljs-string">&#x27;*.ot&#x27;</span>, <span class="hljs-string">&#x27;*.msgpack&#x27;</span>, <span class="hljs-string">&#x27;*.safetensors&#x27;</span>])<br>!python convert.py --vocab-<span class="hljs-built_in">type</span> bpe --pad-vocab --outtype f16 /content/CausalLM <br></code></pre></td></tr></table></figure><h3 id="量化模型">量化模型</h3><figure class="highlight ipython"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><code class="hljs ipython">!/content/llama.cpp/quantize --allow-requantize --imatrix /content/imatrix.dat /content/CausalLM-14B-GGUF/causallm_14b.Q8_0.gguf /content/CausalLM-14B-GGUF/causallm_14b.IQ3_XS.gguf IQ3_XS<br></code></pre></td></tr></table></figure><h3 id="上传模型">上传模型</h3><figure class="highlight ipython"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><code class="hljs ipython"><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> HfApi<br>api = HfApi()<br>api.upload_file(<br> path_or_fileobj=<span class="hljs-string">&quot;/content/CausalLM-14B-GGUF/causallm_14b.IQ3_XS.gguf&quot;</span>,<br> path_in_repo=<span class="hljs-string">&quot;causallm_14b.IQ3_XS.gguf&quot;</span>,<br> repo_id=<span class="hljs-string">&quot;Limour/CausalLM-14B-GGUF&quot;</span><br>)<br></code></pre></td></tr></table></figure>]]></content:encoded>
  101. <category domain="https://hexo.limour.top/tags/llama/">llama</category>
  102. <comments>https://hexo.limour.top/Running-Qwen-on-the-Win10-platform-with-6GB-of-video-memory#disqus_thread</comments>
  103. </item>
  104. <item>
  105. <title>【记录】轻量个人导航页面 Flare</title>
  106. <link>https://hexo.limour.top/Lightweight-personal-navigation-page-Flare</link>
  107. <guid>https://hexo.limour.top/Lightweight-personal-navigation-page-Flare</guid>
  108. <pubDate>Sun, 31 Dec 2023 17:18:28 GMT</pubDate>
  109. <description>&lt;p&gt;&lt;a href=&quot;https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL3NvdWx0ZWFyeS9kb2NrZXItZmxhcmU=&quot; rel=&quot;noopener external nofollow noreferrer</description>
  110. <content:encoded><![CDATA[<p><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL3NvdWx0ZWFyeS9kb2NrZXItZmxhcmU=" rel="noopener external nofollow noreferrer">Flare</a> 是一款轻量、快速、美观的个人导航页面,适用于 HomeLab 或其他注重私密的场景。</p><ul><li><a href="/Docker-bu-shu-Nginx-Proxy-Manager">反向代理服务</a></li><li>访问 <code>https://flare.limour.top/editor</code> 进行书签编辑</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-built_in">mkdir</span> -p ~/app/flare &amp;&amp; <span class="hljs-built_in">cd</span> ~/app/flare &amp;&amp; nano docker-compose.yml<br>sudo docker-compose up -d <span class="hljs-comment"># flare:5005</span><br>sudo docker-compose logs <span class="hljs-comment"># 获取登录密码</span><br></code></pre></td></tr></table></figure><figure class="highlight yml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br></pre></td><td class="code"><pre><code class="hljs yml"><span class="hljs-attr">version:</span> <span class="hljs-string">&#x27;3.6&#x27;</span><br> <br><span class="hljs-attr">services:</span><br> <span class="hljs-attr">flare:</span><br> <span class="hljs-attr">image:</span> <span class="hljs-string">soulteary/flare</span><br> <span class="hljs-attr">restart:</span> <span class="hljs-string">always</span><br> <span class="hljs-comment"># 默认无需添加任何参数,如有特殊需求</span><br> <span class="hljs-comment"># 可阅读文档 https://github.com/soulteary/docker-flare/blob/main/docs/advanced-startup.md</span><br> <span class="hljs-comment"># 启用账号登录模式</span><br> <span class="hljs-attr">command:</span> <span class="hljs-string">flare</span> <span class="hljs-string">--disable_login=0</span><br> <span class="hljs-attr">environment:</span><br> <span class="hljs-comment"># 如需开启用户登录模式,需要先设置 `nologin` 启动参数为 `0`</span><br> <span class="hljs-comment"># 如开启 `nologin`,未设置 FLARE_USER,则默认用户为 `flare`</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">FLARE_USER=LimourFlare</span><br> <span class="hljs-comment"># 指定你自己的账号密码,默认生成的密码强度堪忧</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">FLARE_PASS=your_password</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">FLARE_OFFLINE=1</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">FLARE_MINI_REQUEST=1</span><br> <span class="hljs-attr">volumes:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">./app:/app</span><br> <br><span class="hljs-attr">networks:</span><br> <span class="hljs-attr">default:</span><br> <span class="hljs-attr">external:</span> <span class="hljs-literal">true</span><br> <span class="hljs-attr">name:</span> <span class="hljs-string">ngpm</span><br></code></pre></td></tr></table></figure>]]></content:encoded>
  111. <category domain="https://hexo.limour.top/tags/docker/">docker</category>
  112. <category domain="https://hexo.limour.top/tags/ngpm/">ngpm</category>
  113. <category domain="https://hexo.limour.top/tags/homepage/">homepage</category>
  114. <comments>https://hexo.limour.top/Lightweight-personal-navigation-page-Flare#disqus_thread</comments>
  115. </item>
  116. <item>
  117. <title>【记录】Win10平台使用MLC-LLM编译Qwen-1.8B-Chat</title>
  118. <link>https://hexo.limour.top/Compile-Qwen-1.8B-Chat-using-MLC-LLM-on-Win</link>
  119. <guid>https://hexo.limour.top/Compile-Qwen-1.8B-Chat-using-MLC-LLM-on-Win</guid>
  120. <pubDate>Sat, 09 Dec 2023 04:24:07 GMT</pubDate>
  121. <description>&lt;p&gt;&lt;a href=&quot;https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL21sYy1haS9tbGMtbGxt&quot; rel=&quot;noopener external nofollow noreferrer&quot;&gt;MLC-LLM&lt;/a</description>
  122. <content:encoded><![CDATA[<p><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL21sYy1haS9tbGMtbGxt" rel="noopener external nofollow noreferrer">MLC-LLM</a> 是一种大模型高性能通用部署解决方案,可以通过预编译加速使用本机API原生部署任何大型语言模型。该项目的使命是利用ML编译技术,使每个人都能在其设备上本地开发、优化和部署AI模型。<br><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9odWdnaW5nZmFjZS5jby9Rd2VuL1F3ZW4tMV84Qg==" rel="noopener external nofollow noreferrer">Qwen-1.8B</a> 是阿里云研发的通义千问大模型系列的18亿参数规模的模型。在Qwen-1.8B的基础上,使用对齐机制打造了基于大语言模型的AI助手 <a href="https://hexo.limour.top/go/#aHR0cHM6Ly9odWdnaW5nZmFjZS5jby9Rd2VuL1F3ZW4tMV84Qi1DaGF0" rel="noopener external nofollow noreferrer">Qwen-1.8B-Chat</a>。</p><h2 id="配置环境">配置环境</h2><ul><li><a href="/-ji-lu--an-zhuang-conda-bing-geng-huan-qing-hua-yuan">安装conda</a></li><li><a href="/Use-Tunnel-to-speed-up-the-connection-of-VPS">Tun模式</a>(管理员权限)</li><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9sbG0ubWxjLmFpL2RvY3MvaW5zdGFsbC90dm0uaHRtbCNpbnN0YWxsLXR2bS11bml0eQ==" rel="noopener external nofollow noreferrer">详细流程</a></li></ul><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br></pre></td><td class="code"><pre><code class="hljs powershell">conda create <span class="hljs-literal">-n</span> mlc_llm python numpy pytorch transformers scipy timm git <span class="hljs-literal">-c</span> pytorch <span class="hljs-literal">-c</span> conda<span class="hljs-literal">-forge</span><br>conda activate mlc_llm<br>python <span class="hljs-literal">-m</span> pip install <span class="hljs-literal">--pre</span> <span class="hljs-literal">-U</span> <span class="hljs-operator">-f</span> https://mlc.ai/wheels mlc<span class="hljs-literal">-ai-nightly</span><br>python <span class="hljs-literal">-c</span> <span class="hljs-string">&quot;import tvm; print(&#x27;\n&#x27;.join(f&#x27;&#123;k&#125;: &#123;v&#125;&#x27; for k, v in tvm.support.libinfo().items()))&quot;</span><br>python <span class="hljs-literal">-c</span> <span class="hljs-string">&quot;import tvm; print(tvm.vulkan().exist)&quot;</span><br><span class="hljs-built_in">cd</span> D:\mlc<span class="hljs-literal">-llm</span><br>git clone <span class="hljs-literal">--depth</span>=<span class="hljs-number">1</span> <span class="hljs-literal">-b</span> main <span class="hljs-literal">--single-branch</span> https://github.com/mlc<span class="hljs-literal">-ai</span>/mlc<span class="hljs-literal">-llm</span>.git<br><span class="hljs-built_in">cd</span> .\mlc<span class="hljs-literal">-llm</span>\<br>git submodule sync<br>git submodule update <span class="hljs-literal">--init</span> <span class="hljs-literal">--recursive</span> <span class="hljs-literal">--depth</span>=<span class="hljs-number">1</span><br>pip install .<br>python <span class="hljs-literal">-m</span> mlc_llm.build <span class="hljs-literal">--help</span><br></code></pre></td></tr></table></figure><h2 id="准备模型">准备模型</h2><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><code class="hljs powershell">python <span class="hljs-literal">-c</span> <span class="hljs-string">&quot;from huggingface_hub import snapshot_download; snapshot_download(repo_id=&#x27;Qwen/Qwen-1_8B-Chat&#x27;, local_dir=&#x27;D:\mlc-llm\qwen&#x27;, ignore_patterns=[&#x27;*.h5&#x27;, &#x27;*.ot&#x27;, &#x27;*.msgpack&#x27;, &#x27;*.safetensors&#x27;])&quot;</span><br><span class="hljs-built_in">cd</span> D:\mlc<span class="hljs-literal">-llm</span>\qwen<br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;model-00001-of-00002.safetensors&#x27;</span> <span class="hljs-string">&quot;https://huggingface.co/Qwen/Qwen-1_8B-Chat/resolve/main/model-00001-of-00002.safetensors?download=true&quot;</span><br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;model-00002-of-00002.safetensors&#x27;</span> <span class="hljs-string">&quot;https://huggingface.co/Qwen/Qwen-1_8B-Chat/resolve/main/model-00002-of-00002.safetensors?download=true&quot;</span><br></code></pre></td></tr></table></figure><h2 id="编译模型">编译模型</h2><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs powershell"><span class="hljs-built_in">cd</span> D:\mlc<span class="hljs-literal">-llm</span>\dist<br>python <span class="hljs-literal">-m</span> mlc_llm.build <span class="hljs-literal">--model</span> <span class="hljs-string">&quot;D:\mlc-llm\qwen&quot;</span> <span class="hljs-literal">--target</span> vulkan <span class="hljs-literal">--quantization</span> q0f16 <span class="hljs-literal">--use-safetensors</span><br></code></pre></td></tr></table></figure><ul><li>等待模型支持:<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL21sYy1haS9tbGMtbGxtL2lzc3Vlcy8xMzcz" rel="noopener external nofollow noreferrer">Model type qwen not supported</a></li></ul>]]></content:encoded>
  123. <category domain="https://hexo.limour.top/tags/llama/">llama</category>
  124. <comments>https://hexo.limour.top/Compile-Qwen-1.8B-Chat-using-MLC-LLM-on-Win#disqus_thread</comments>
  125. </item>
  126. <item>
  127. <title>【探索】外科打结法中的等价操作</title>
  128. <link>https://hexo.limour.top/Equivalent-operations-in-surgical-knot-tying</link>
  129. <guid>https://hexo.limour.top/Equivalent-operations-in-surgical-knot-tying</guid>
  130. <pubDate>Sat, 02 Dec 2023 06:47:05 GMT</pubDate>
  131. <description>&lt;p&gt;手术中的止血和缝合,均需要进行结扎,而结扎是否牢固,又与打结有密切关系,结一定要打得牢固,不能松动、滑脱。&lt;br&gt;
  132. 常用的结扣是方结,结扎后极为牢固,在手术中最常用。而打方结时,手法顺序错误就容易打成假结或滑结。因此这里将探讨基础打结手法的等价性,帮助快速理解不同手法所成结</description>
  133. <content:encoded><![CDATA[<p>手术中的止血和缝合,均需要进行结扎,而结扎是否牢固,又与打结有密切关系,结一定要打得牢固,不能松动、滑脱。<br>常用的结扣是方结,结扎后极为牢固,在手术中最常用。而打方结时,手法顺序错误就容易打成假结或滑结。因此这里将探讨基础打结手法的等价性,帮助快速理解不同手法所成结的本质。<br>除不易混淆的外科结外,无论是单手打结还是持钳打结,均由基础动作组合而成,基础动作所成的结都对应纽结理论中的三叶结。三叶结有两种,它们互成镜像,彼此不相同痕,分别称为左手三叶结和右手三叶结。因此无论用哪种手法,最后一定能对应到两种三叶结上。</p><p><img src="https://img.limour.top/2023/12/02/656ad3ee2a544.webp" alt="两种三叶结"></p><p><img src="https://img.limour.top/2023/12/02/656ad41001b48.webp" alt="右手勾法对应右手三叶结"></p><p><img src="https://img.limour.top/2023/12/02/656ad4260ec27.webp" alt="左手勾法对应左手三叶结"></p><p><img src="https://img.limour.top/2023/12/02/656ad43dec1e3.webp" alt="右手掏法对应左手三叶结"></p><p><img src="https://img.limour.top/2023/12/02/656ad43fd2842.webp" alt="左手掏法对应右手三叶结"></p><p><img src="https://img.limour.top/2023/12/02/656ad44069922.webp" alt="镊右手定则法对应右手三叶结"></p><p><img src="https://img.limour.top/2023/12/02/656ad43e8f792.webp" alt="镊左手定则法对应左手三叶结"></p><p>因此,右手勾法、左手掏法、镊右手定则法三者等价;左手勾法、右手掏法、镊左手定则法三者等价。任意组合两种基础打结动作打出<strong>不同</strong>的两种三叶结即可组成一个正确的方结。</p>]]></content:encoded>
  134. <category domain="https://hexo.limour.top/tags/%E6%8E%A2%E7%B4%A2/">探索</category>
  135. <comments>https://hexo.limour.top/Equivalent-operations-in-surgical-knot-tying#disqus_thread</comments>
  136. </item>
  137. <item>
  138. <title>【翻译】多重免疫分析揭示了血清免疫蛋白质组学在预测胃癌术前化疗反应中的作用</title>
  139. <link>https://hexo.limour.top/Multiplex-immune-profiling-reveals-the-role-of-serum-immune-proteomics-in-predicting-response-to-preoperative-chemotherapy-of-gastric-cancer</link>
  140. <guid>https://hexo.limour.top/Multiplex-immune-profiling-reveals-the-role-of-serum-immune-proteomics-in-predicting-response-to-preoperative-chemotherapy-of-gastric-cancer</guid>
  141. <pubDate>Fri, 01 Dec 2023 15:41:27 GMT</pubDate>
  142. <description>&lt;div class=&quot;note note-info&quot;&gt;
  143. &lt;p&gt;原文链接:&lt;a href=&quot;https://hexo.limour.top/go/#aHR0cHM6Ly9kb2kub3JnLzEwLjEwMTYvai54Y3JtLjIwMjMuMTAwOT</description>
  144. <content:encoded><![CDATA[<div class="note note-info"> <p>原文链接:<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9kb2kub3JnLzEwLjEwMTYvai54Y3JtLjIwMjMuMTAwOTMx" rel="noopener external nofollow noreferrer">Multiplex immune profiling reveals the role of serum immune proteomics in predicting response to preoperative chemotherapy of gastric cancer</a></p> </div><h2 id="摘要">摘要</h2><p>对于胃腺癌患者,对术前化疗的反应存在异质性。该领域现有的研究主要集中在肿瘤微环境(TME)上,而关于全身免疫与化疗反应之间的关系知之甚少。在这项研究中,我们收集了胃腺癌患者在术前、术中和术后接受术前化疗前后的血清样本,并使用基于抗体的蛋白质组学面板研究其免疫蛋白质组学。我们还收集了手术切除的肿瘤样本,并采用多种方法评估它们的肿瘤微环境。我们发现局部和全身免疫特征均与治疗反应相关。术前化疗引发了复杂的全身免疫反应,表现为动态的血清免疫蛋白质组学。建立了一个用于预测反应的术前血清蛋白评分系统。总的来说,这些发现突显了全身免疫在胃癌治疗中的基本但在很大程度上被低估的作用,建议使用基于术前血清免疫蛋白质组学的患者分层策略。</p><h2 id="导言">导言</h2><p>胃癌,其中胃腺癌(GAC)是其主要组织学类型,是全球最常见的恶性肿瘤之一,也是导致癌症相关死亡的主要原因之一。相当一部分胃癌患者在晚期被诊断,这在很大程度上限制了治疗的有效性和患者的预后。尽管手术切除仍然是治疗的强制性支柱,包括JCOG9501和JCOG9502(日本临床肿瘤研究组的系列研究)在内的几项研究表明,胃癌患者不会从扩大切除中受益。在过去的十年中,新辅助和围手术期治疗带来了新的希望。MAGIC试验表明,对于可切除的II/III期胃癌患者,行三个术前和三个术后周期的ECF(表阿霉素、顺铂和5-氟尿嘧啶)化疗,相较于仅手术,可以将5年生存率从23%提高到36%(MAGIC: the Medical Research Council Adjuvant Gastric Infusional Chemotherapy)。FLOT4-AIO试验进一步显示,与ECF或ECX(表阿霉素、5-氟尿嘧啶和卡培他滨)相比,FLOT(5-氟尿嘧啶、叶酸、奥沙利铂和多西紫杉醇)方案可导致更好的病理反应率、R0切除率和总生存(OS)。人们认识到,术前用化疗治疗可以增加根治切除的机会,消除早期微观扩散,并允许对辅助治疗进行术前反应评估。随着免疫检查点抑制剂(ICIs)等新药物的出现,化疗仍然是胃癌围手术期治疗中最基本且可获得的组成部分。<br>另一方面,在胃癌中,术前治疗仍然存在争议,尤其是在东亚国家。对术前化疗的反应存在异质性,而对其机制的了解有限。需要预测患者对术前化疗反应的生物标志物,以对患者进行最佳治疗分层。新出现的证据表明,免疫参与了患者对化疗的反应。Choi等人报道称,肿瘤标本中基质程序性细胞死亡配体1(PD-L1)的表达可以预测第II/III期胃癌经D2胃切除术后辅助化疗的益处。 Kim等人在标准一线化疗期间使用配对的术前和治疗期间的胃活检样本,发现化疗诱导了自然杀伤细胞(NK)的浸润,巨噬细胞的极化,以及在治疗反应者中抗原呈递的增加。但是,在胃癌免疫学领域的现有研究主要集中在肿瘤微环境(TME)中的局部免疫反应上,关于全身免疫与胃癌化疗反应之间的关系知之甚少。<br>胃癌是一种全身性疾病。肿瘤负担和抗肿瘤治疗刺激的免疫反应在不同组织之间协调进行。对接受术前化疗的患者进行系统免疫景观或由Hiam-Galvez等人描述的免疫宏环境的分析对于全面了解癌症免疫和治疗抵抗机制至关重要。现有的系统免疫-炎症指标,如中性粒细胞与淋巴细胞比值(NLR),主要依赖于血细胞计数,这限制了它们的维度。血清免疫蛋白质组学,具有高含量,将是对全身性免疫的理想反映。在这项研究中,我们收集了胃腺癌患者在术前、术中和术后接受术前化疗的血清样本,并使用基于抗体的蛋白质组学平台(Olink Target 96 Inflammation panel)研究了他们的免疫蛋白质组学。我们还从这些患者中收集了手术切除的肿瘤样本,并结合多重免疫荧光(mIF)、免疫组织化学(IHC)和RNA测序(RNA-seq)来评估肿瘤微环境。研究了血清免疫蛋白质组学的动态变化及其与肿瘤微环境的相关性。鉴定了预测接受术前化疗患者肿瘤缩小、总生存(OS)和无进展生存(PFS)的生物标志物。</p><h2 id="结果">结果</h2><h3 id="研究人群">研究人群</h3><p>本研究纳入了90名接受术前化疗并随后接受胃切除手术的胃腺癌患者(图1A)。在术前期间接受免疫检查点抑制剂(ICIs)的患者被排除在外。符合条件的患者被分为响应者(残余肿瘤/肿瘤床≤50%的化疗效果,Becker TRG评分1–2)和非响应者(Becker TRG评分3)。在90名患者中,有36人(40%)达到了肿瘤缩小评分1–2,被视为响应者。肿瘤缩小程度较好的患者与非响应者相比,总生存显著更长(图S1A)。无进展生存显示了类似的趋势,尽管没有统计学差异(图S1B)。患者的基本临床特征总结在表S1中。近半数的患者接受了两药细胞毒性化疗,其中大多数是SOX(S-1加奥沙利铂)或XELOX(卡培他滨加奥沙利铂)方案。其余的患者接受了三药细胞毒性化疗,主要是DOS(多西紫杉醇、奥沙利铂和S-1)方案。截至2022年3月1日的分析日期,中位随访时间为55.8个月(范围从3.2到82.7个月)。在整体人群中,中位无进展生存为39.8个月(95%置信区间[CI],32.7至未达到[NR]),而中位总生存为63.9个月(95% CI,51.8至74.1),有45例死亡(50%)。</p><h3 id="血清免疫蛋白质组学动态与术前化疗反应相关">血清免疫蛋白质组学动态与术前化疗反应相关</h3><p>从接受术前化疗的患者中收集了37份术前、8份术中和83份术后的血清样本,其中30份术前和30份术后的血清样本是成对的(图1A)。使用Olink Target 96 Inflammation panel的近距离扩展测定法(PEA)测量了关键免疫和炎症通路中92个标记蛋白的水平。比较术前和术后血清样本中蛋白质水平显示了术前化疗后血清免疫蛋白质组学的动态变化。92个蛋白中有18个在成对和非成对测试中均显示出显著变化(图1B、图S1C和图S1D),表明术前化疗引发了复杂的全身免疫反应。其中,血清C-X-C基序化学因子配体1(CXCL1)和CXCL5水平在术前化疗后显著下降(图S1D)。有趣的是,Zhou等人报道称,作为CXCR2配体的CXCL1和CXCL5可以显著促进胃癌细胞的迁移,并推动胃癌的转移。化疗通过降低CXCL5和CXCL1的血清水平可能有助于预防胃癌的转移。事实上,CXCL1/5水平在术前化疗的早期周期中下降(图S1E)。<br>我们进一步比较了不同治疗反应患者的血清免疫蛋白质组学动态变化。我们发现,响应者在治疗后表现出更动态的血清免疫蛋白质组学变化(图1C和1D)。我们还比较了化疗后响应者和非响应者蛋白水平的绝对变化,发现在响应者中,免疫蛋白水平在化疗后整体上更大幅度的变化(图1E)。例如,与响应者相比,非响应者治疗后血清CXCL5水平的降低程度要轻得多(图1C–1F)。在治疗期间的蛋白质组学在响应者和非响应者中也似乎存在差异(图S1E)。例如,在响应者中,治疗期间血清白介素受体亚单位b(IL-10RB)和IL-18水平在化疗过程中呈上升趋势,而在非响应者中未呈现这种趋势(图S1F和图S1G),尽管这一部分的结论可能受到样本数量的限制。<br>综合而言,这些结果表明在胃腺癌患者中对术前化疗存在复杂的全身性免疫反应。响应者在术前化疗后往往表现出更为动态的全身性免疫反应。</p><h3 id="肿瘤微环境(TME)与患者对术前化疗的反应相关">肿瘤微环境(TME)与患者对术前化疗的反应相关</h3><p>首先,我们比较了来自不同治疗反应患者的肿瘤样本的转录组,以获得有关肿瘤局部特征的一般知识。基因集富集分析(GSEA)显示了良好反应者中改变的标志性通路(图2A)。如DNA复制和细胞周期等通路的改变,可能表明抑制癌细胞增殖和肿瘤退化。除此之外,近一半的通路与免疫有关,如趋化因子信号通路和细胞因子与细胞因子受体相互作用通路(图2B和图2C),表明免疫在化疗中的重要性。<br>因此,我们通过多重免疫荧光(mIF)在手术切除的肿瘤样本中评估了地理免疫景观。我们使用CD4、CD8和Foxp3染色来识别不同类型的T细胞。我们使用CD68和CD163染色来识别巨噬细胞(图2D)。我们比较了响应者和非响应者之间的免疫浸润。CD68+巨噬细胞和CD68+/CD163+ M2巨噬细胞的细胞密度在非响应者中显著更高(图2E和图S2A)。相应地,Xing等人报道称,在胃癌新辅助化疗后,非响应者中CD68+巨噬细胞浸润更高。M2巨噬细胞也被证明参与了多种癌症的化疗耐药。<br>与此同时,我们从队列中收集了24份术前内镜活检样本。我们使用mIF对术前TME进行了分析(图S2B)。值得注意的是,大多数内镜活检只获取了胃的表浅黏膜,这在很大程度上限制了它们对整个肿瘤的代表性和与手术切除组织的可比性(图S2C)。事实上,mIF显示术前TME中的免疫细胞浸润在响应者和非响应者之间没有差异(修订后的图S2D),这可能是由于活检深度有限和胃癌内肿瘤的显著异质性。<br>总体而言,这些结果表明术后TME与对术前化疗的反应相关。</p><h3 id="血清免疫蛋白质组学与TME之间的相关性">血清免疫蛋白质组学与TME之间的相关性</h3><p>鉴于大多数现有的癌症免疫学研究集中在肿瘤微环境(TME)上,我们评估了全身免疫与TME之间的相关性。我们还确定了血清免疫蛋白质组学与TME中免疫细胞浸润之间的相关性。有趣的是,术后TME似乎与术前而非术后血清免疫蛋白质组学更相关。即使在样本数量较少的情况下,术前血清免疫蛋白质组学与免疫细胞浸润之间的相关性总体上更强(图3A和图3B)。例如,更高的术前血清纤维母细胞生长因子21(FGF21)水平与CD68+巨噬细胞的浸润较少呈相关,而更高的术前血清转化生长因子b1(TGF-b1)水平与CD4+T细胞的浸润较多呈相关(图3C和图3D)。事实上,据报道TGF-b在调节效应器和调节性CD4阳性细胞反应方面具有多效性。 术后血清免疫蛋白质组学与术后免疫细胞浸润之间的相关性也被观察到。例如,更高的术前血清C-C基序化学因子配体11(CCL11)水平与CD4+/FOXP3+ T细胞的浸润较多呈相关(图3E)。王等人报道CCL11增加了乳腺癌中CD4+CD25+Foxp3+调节性T细胞(Tregs)的比例。需要进一步的研究来探讨CCL11是否在胃癌中调节CD4+Foxp3+Treg细胞功能。<br>我们还评估了术后血清蛋白水平与92个免疫基因的肿瘤mRNA水平之间的相关性。其中有5个免疫基因的相关性具有统计学意义,仅有两个是正相关的,符合预期(图3F和图S3A–S3E)。TNFSF12和CCL4的相关性实际上是边缘的(图S3A和图S3B)。血清蛋白水平与组织基因mRNA水平之间的相关性总体上较弱。<br>这些结果显示了全身性免疫与肿瘤微环境之间的相互通信和相互依赖关系。对肿瘤微环境的研究无法充分揭示免疫系统如何全面应对胃癌和抗肿瘤治疗。应该投入更多的努力来对患有胃癌的患者进行系统性免疫分析。</p><h3 id="经典全身性免疫炎症指标的临床价值">经典全身性免疫炎症指标的临床价值</h3><p>经典全身性免疫炎症指标大多基于血细胞比率,并已证明与患者的临床结局相关。 我们对血清免疫蛋白质组学与经典全身性免疫炎症指标之间的关系感到好奇。因此,我们评估了术后血清免疫蛋白质组学与经典免疫炎症指标之间的相关性,包括中性粒细胞与淋巴细胞比值(NLR)、血小板与淋巴细胞比值(PLR)、单核细胞与淋巴细胞比值(MLR)以及血小板分布宽度(PDW)以及常见血细胞计数。尽管大多数相关性相对较弱(图S3F),但血清CXCL5和CXCL1水平与血小板计数呈强相关(图S3G和图S3H)。由于CXCL1和CXCL5通常参与中性粒细胞的稳态和功能,需要更多的工作来理解这种意外但有趣的相关性。我们还评估了经典全身性免疫炎症指标与TME特征之间的关系。术后经典免疫炎症指标与TME中的免疫细胞浸润之间没有观察到相关性(图S3I)。<br>我们进一步探讨了经典全身性免疫炎症指标的临床价值,并评估了中性粒细胞与淋巴细胞比值(NLR)、血小板与淋巴细胞比值(PLR)、单核细胞与淋巴细胞比值(MLR)和血小板分布宽度(PDW)的治疗响应预测价值。我们绘制了这四个指标的受试者工作特征(ROC)曲线,最高的曲线下面积(AUC)为0.602(图S3J)。比例风险回归显示了这四个指标的预后价值。在单变量Cox回归中,这些指标对于OS或PFS均未显示出显著的预后价值,而在多变量Cox回归中,较高的NLR与较短的OS相关,危险比为1.172(95% CI,1.0066–1.3639)(图S3K和图S3L)。相应地,先前的报告已经显示NLR是胃食管交界和胃腺癌的负面预后因子。总体而言,这四个指标的预后价值有限。</p><h3 id="术后肿瘤基质PD-L1水平和术前血清PD-L1水平均可预测术前化疗反应">术后肿瘤基质PD-L1水平和术前血清PD-L1水平均可预测术前化疗反应</h3><p>PD-L1是关键的免疫调控分子。与其受体PD-1相互作用时,PD-L1抑制细胞毒性T细胞的免疫反应,从而参与肿瘤免疫逃逸。Choi等人基于CLASSIC试验队列报告称,基质PD-L1水平可以预测在第II/III期胃癌D2胃切除术后的辅助化疗效果。利用基于PD1/PDL1免疫组织化学染色的类似评分系统,我们发现非响应者在手术切除的肿瘤样本中基质PD-L1染色分数较高的趋势(图4A和图4B)。基质PD-1染色显示了类似的趋势,尽管这在统计学上并不显著(图S4A和图S4B)。然而,肿瘤区域的PD-L1染色与治疗反应没有相关性(图4A)。这些结果表明肿瘤中的基质PD-L1水平可以预测术前化疗的反应,并表明PD-1/PD-L1途径可能在胃癌的化疗抵抗中起到作用。<br>然而,由于其延迟性,术后基质PD-L1的反应预测价值可能会受到较大的限制。理想的预测因子应该是术前的。术前内镜活检的基质PD-L1染色无法预测治疗反应(图S4C和图S4D)。因此,我们进一步评估了术前血清PD-L1水平的临床意义。有趣的是,术前血清PD-L1水平在不同治疗反应的患者中显示出差异(图4C)。在治疗前,响应者的血清PD-L1水平较低,而治疗似乎减弱了这种差异,因为在术后样本中未观察到显著差异(图4E)。利用ROC曲线评估术前和术后血清PD-L1水平的治疗响应预测价值。术前血清PD-L1水平的AUC为0.737(95% CI,0.569–0.904),而术后血清PD-L1水平的AUC约为0.5(图4D和图4F),表明术前血清PD-L1水平是术前化疗的有希望的治疗响应预测因子。术前血清PD-L1水平较高(&gt;5.084归一化蛋白表达[NPX])的患者倾向于对术前化疗显示较差的治疗反应(图S4E)。<br>我们还评估了不同治疗反应患者的治疗期血清PD-L1水平。在响应者中,血清PD-L1在治疗过程中似乎有所增加。响应者的治疗期血清PD-L1水平显著较高(图S4F和图S4G)。这种差异的一个潜在原因可能是肿瘤细胞的破坏。需要更多样本和进一步研究来确认这一发现并揭示潜在机制。进一步测量了PD-L1/PD-1水平与血清PD-L1水平之间的病理学相关性。在不同的配对中,术前血清PD-L1水平和术后基质PD-1水平显示出最强的相关性(图S4H)。术前血清PD-L1水平可能与化疗后肿瘤中PD-1+免疫细胞的浸润有关。<br>总体而言,这些结果表明,术后肿瘤基质PD-L1水平和术前血清PD-L1水平均可以预测术前化疗的反应,而术前血清PD-L1水平应具有更大的临床意义。</p><h3 id="术前血清CCL20水平预测术前化疗的反应">术前血清CCL20水平预测术前化疗的反应</h3><p>受PD-L1的发现启发,我们进一步比较了不同治疗反应患者的术前血清免疫蛋白质组学,结果显示10种蛋白质具有p &lt;0.05的差异。其中,术前CCL20水平显示出最显著的差异。值得注意的是,我们还比较了不同治疗反应患者的术后血清免疫蛋白质组学,与术前样本相比,差异要弱得多(图S5A)。<br>近期的研究已经确立了CCL20在不同癌症中作为化疗抵抗的重要介质。正如图S5B所总结的,Chen等人报告称,化疗通过核因子kB(NF-kB)和CCL20之间的正反馈环路诱导CCL20,并通过上调乳腺癌中的ATP结合盒亚家族B成员1(ABCB1)表达介导化疗抵抗。Wang等人报告称,化疗通过FOXO1/CEBPB/NF-kB信号途径在结直肠癌细胞中上调CCL20,而分泌的CCL20招募调节性T细胞,促进化疗抵抗。Liu等人报告称,顺铂刺激的经典活化巨噬细胞(CAMs)通过增加CCL20的产生促进卵巢癌细胞迁移。总体而言,现有的研究表明,CCL20的上调是由化疗引起的,并且增加的CCL20产生促进了化疗抵抗。<br>然而,我们的研究发现上述模型在胃癌中可能不成立。我们发现,在术前化疗的响应者中,治疗开始前血清CCL20水平显著较低(图5B)。术前血清CCL20水平预测治疗反应,AUC为0.769(95% CI,0.614–0.925)(图5C),表明胃癌患者在治疗前的血清CCL20水平存在差异。与现有的研究结果一致,非响应者的肿瘤中CCL20 mRNA水平上调(图5D)。然而,治疗后血清CCL20水平在响应者和非响应者之间没有差异,表明血清和肿瘤CCL20水平脱钩(图5E)。有趣的是,参考沈等人报道的可切除胃癌的血清和组织蛋白质组学,我们发现胃癌患者的血清CCL20水平相对于健康人有所升高(图5F)。肿瘤样本中CCL20蛋白水平也较正常胃组织高(图S5C)。然而,通过胃切除手术切除肿瘤并没有恢复血清CCL20水平,而是进一步增加了血清CCL20水平(图5F)。这些结果表明,血清CCL20并不是肿瘤CCL20的系统反映,而是系统免疫对胃癌和化疗的重要组成部分。<br>我们还验证了现有研究提出的CCL20上调的信号模型。Kim等人收集了在接受第一线标准化疗但未接受PD-1阻断的治疗前和治疗过程中胃活检样本的治疗前患者。我们分析了他们的转录组数据,并发现化疗并没有增加肿瘤样本中CCL20 mRNA水平。相反,化疗后CCL20 mRNA水平下降(图5G)。这一发现挑战了CCL20在胃癌中是由化疗引起的假设。与此同时,ABCB1、CEBPB和FOXO1 mRNA水平在不同反应的肿瘤之间(图5H)以及在化疗前后活检样本之间(图S5D)也没有差异。相反,更高的术前血清CCL20水平与肿瘤中CD4+T细胞的浸润较少相关(图S5E)。CD4+T细胞介导免疫应答,在实现对肿瘤的调节和有效免疫应答中至关重要。与此同时,更高的术前血清CCL20水平与更多基质中PD-1+或PD-L1+细胞的浸润相关(图5I和图S5F),这应该是肿瘤免疫逃逸的关键介质。总体而言,这些结果表明,血清CCL20诱导了一个针对化疗的系统免疫抑制环境。</p><p>正如图5J所总结的,现有的研究提出,肿瘤中CCL20的上调是由化疗引起的,而增加的CCL20产生促进了化疗抵抗。然而,我们发现在化疗开始前患者的血清CCL20水平存在差异。术前血清CCL20水平较高的患者倾向于具有较差的治疗反应。潜在机制是血清CCL20诱导了一个系统性的免疫抑制环境。这些发现提示,在术前血清CCL20水平较高的患者中,免疫治疗可能与化疗的结合更为有效。已经投入了大量努力来开发CCR6-CCL20轴(CCR6是CCL20的细胞受体)的抑制剂。通过抗体或拮抗剂干扰CCR6-CCL20轴在癌症治疗中显示出潜力。术前血清CCL20水平可能有助于选择那些有望从CCR6-CCL20抑制剂中受益的患者。此外,这些发现表明术前期是通过血清蛋白标志物进行患者分层的一个不可替代的时间窗口。因此,我们决定进一步建立一个用于预测术前化疗反应的术前血清蛋白组合。</p><h3 id="一个用于预测术前化疗反应的术前血清蛋白评分系统">一个用于预测术前化疗反应的术前血清蛋白评分系统</h3><p>通过比较不同治疗反应患者的术前血清蛋白水平(图5A),我们将15个p&lt;0.1的蛋白包括在一致性聚类中。基于一致性累积分布函数(CDF)图、增量面积图以及对一致性矩阵的手动检查,我们发现了四个术前血清亚型(图6A、6B和图S6A–S6H)。其中,cluster 2与患者的明显更好的治疗反应相关(图6C)。这种无审查的聚类还与患者的临床特征相关,如肿瘤的Lauren分类。Cluster 1和4与更高比例的腺癌肿瘤类型相关(图6D)。<br>考虑到临床实用性,我们进一步使用最小绝对值收缩和选择算子(LASSO)模型建立了一个用于预测术前化疗反应的术前血清响应预测分数(PSRscore)(图S6I和S6J)。简而言之,LASSO回归是一种使用收缩进行变量选择或参数消除的线性回归类型。通过适当的l值,PSRscore的公式限制为四个蛋白质的血清水平:CCL3、IL-15Ra、CXCL5和CCL20(图6F和图S6K)。PSRscore的ROC曲线,AUC为0.907(95% CI,0.814–1.000),确定了截断值为-0.843(图6E)。患者被分为PSRscore高组和低组(图6F)。低PSRscore与明显较差的治疗反应相关(图6G)。此外,PSRscore低的患者在术后肿瘤中数值上具有更多PD1+/PD-L1+细胞的基质浸润和更高的肿瘤PD-L1染色(图6H和图S6L),这通常导致对抗PD-1/PDL1疗法的适应症。<br>除了CCL20外,PSRscore还包括CCL3、IL-15Ra和CXCL5的术前血清水平。较高的血清CCL3和IL-15Ra水平以及较低的CXCL5水平与较差的治疗反应相关(图S6K)。研究表明,CCL3参与了不同癌症中的免疫逃逸和化疗抵抗。高水平的CCL3与Tregs、肿瘤相关巨噬细胞(TAMs)和髓系源性抑制细胞(MDSCs)的肿瘤内浸润增加相关。CCL3驱动的TAMs招募已被认为是转移性巢穴的驱动事件。已经开发了CCL3的中和抗体和抑制剂,并在抗癌治疗中显示出潜力。 目前对IL-15Ra和CXCL5在化疗抵抗中的作用了解有限,需要更多研究来探索它们在胃癌中的功能。<br>PSRscore评分系统有助于分层胃腺癌患者,并筛选出那些可能不能仅通过术前化疗获益的患者。对于这组患者,我们的工作强烈暗示患者可能从免疫治疗的组合中受益,如免疫检查点抑制剂(ICIs)或CCL3/20中和抗体/抑制剂(图6I)。可以设计前瞻性试验来验证这一策略,并需要建立一个验证队列来验证此评分系统的灵敏性和特异性。</p><h3 id="TME和血清免疫蛋白组学的预后价值">TME和血清免疫蛋白组学的预后价值</h3><p>我们进一步评估了TME和血清免疫蛋白组学的预后价值。在多变量Cox回归中包括了在单变量Cox回归中具有预测价值的所有基本临床特征以及年龄和性别(表S2和S3)。显示为OS或PFS预测因子的免疫细胞与其风险比一起列在森林图中(图S7A和S7B)。绘制了代表性生存预测因子的Kaplan-Meier曲线(图S7C–S7F)。没有免疫细胞类型是OS的独立预测因子,而CD68+巨噬细胞的浸润通过log rank测试、单变量Cox回归和多变量Cox回归证实,预测PFS缩短(图S7C)。虽然不是独立的,CD68+巨噬细胞的浸润也通过log rank测试显示为OS的负面预后因子(图S7D)。<br>显示为OS或PFS预测因子的术前和术后血清蛋白也在森林图中列出,与其风险比一起(图7A、7B、S7G和S7H)。绘制了代表性生存预测因子的Kaplan-Meier曲线(图7C、7D、S7I和S7J)。其中,高术后血清IL-10RB水平与显著缩短的OS和PFS均相关,通过log rank测试、单变量Cox回归和多变量Cox回归证实(图7C和7D)。这表明术后血清IL-10RB水平是接受术前化疗的患者的强烈负面生存预测因子。值得注意的是,术后IL-10RB水平在术前化疗后显著升高,表明其可能参与术前化疗的反应(图S1D)。关于IL-10信号在胃癌中的作用的研究还有限。需要更多的工作来了解IL-10RB在胃癌术前治疗中的作用。</p><h2 id="讨论">讨论</h2><p>在过去的十年里,人们致力于揭示免疫在癌症中的作用。免疫疗法在胃癌治疗中取得了突破,免疫检查点抑制剂成为晚期胃或食管腺癌的一线治疗方法。然而,在胃癌围手术期治疗中,目前没有治疗方法成功挑战了化疗的主导地位。免疫被认为在患者受益于围手术期化疗中起着关键作用。现有研究重点关注肿瘤微环境中局部免疫反应,而对胃癌免疫的改善理解必须特别评估全身性免疫。我们使用血清免疫蛋白组学和经典全身性免疫炎症指标来描述全身免疫,并研究其与肿瘤微环境以及治疗反应的关联。我们发现围手术期治疗诱导了复杂的全身性免疫反应,这表现为动态的免疫蛋白组学。同时,对治疗反应更好的患者在治疗后显示出更具动态性的血清免疫蛋白组学变化。肿瘤微环境也显示与围手术期化疗的反应有关。然而,在治疗开始之前预测潜在的治疗反应将更加实际。令人兴奋的是,我们发现PD-L1和CCL20的术前血清水平是围手术期化疗反应的预测因子,与它们在免疫抑制中的已知作用一致。进一步建立了一个术前血清蛋白质组学面板用于预测反应,能够精确地筛选出可能不会单独对围手术期化疗产生反应的患者。对于这部分患者,我们相信他们将从免疫疗法和化疗的联合治疗中受益。同时,IL-10RB的术后血清水平也被确认为胃癌患者预后的强大预测因子。<br>肿瘤内PD-L1在免疫抑制和化疗抵抗中的作用已经得到确认。然而,关于可溶性PD-L1的研究有限。我们的研究发现,在化疗开始之前,患者的血清PD-L1水平存在差异。对化疗产生反应的患者往往具有较低的血清PD-L1水平。需要进一步研究可溶性PD-L1在化疗抵抗中是否发挥作用。在CCL20中也发现了类似的发现,这是一种已知参与各种癌症化疗抵抗的趋化因子。我们的研究表明,在其他癌症类型中提出的CCL20诱导的化疗抵抗模型在胃癌中可能不成立。将CCL20的变化视为化疗的结果,剥夺了临床医生在治疗前对患者进行分层和干预的主动性。相反,我们的发现显示,在化疗开始之前,对化疗产生不同反应的患者在血清免疫蛋白组学上存在差异,这提前了患者分层和干预的时间窗口。在PD-L1和CCL20的启发下,我们开发了一个用于预测围手术期化疗反应的术前血清蛋白质组学面板,称为PSRscore。通过计算四种免疫蛋白的术前血清蛋白水平,患者可以被分为两组。PSRscore低的患者往往具有较差的治疗反应,并可能从免疫疗法的联合治疗中获益。这种评分系统在患者分层方面具有很大的临床应用潜力。值得注意的是,PSRscore的建立基于一个接受铂类化疗的亚洲队列。这些免疫标志物在接受紫杉醇为基础的方案的非亚洲患者中的表现需要进一步验证。<br>我们相信血清蛋白标志物在胃癌患者术前分层中具有特殊的临床意义。几乎所有现有的胃癌分子分类都依赖于手术或内镜切除的肿瘤组织。以TCGA分类为最著名的例子,微卫星不稳定(MSI)型患者被证明更容易从免疫疗法中受益,而基因组稳定(GS)型患者对化疗反应较差。然而,这些分子分类在临床实践中很少使用。一个重要原因是大多数分子分类依赖于复杂的分子技术,如qPCR、原位杂交,甚至是组学技术,这在大多数临床中是不可获得的。此外,在胃癌中,术前获取肿瘤样本依赖于胃镜活检。胃癌存在显著的肿瘤内异质性,且活检深度有限,这在很大程度上影响了活检样本的代表性。因此,在胃切除术之前确定胃癌的分子分类一直非常困难。相比之下,血清蛋白质组学涵盖了系统和肿瘤局部特征,因此具有灵敏性和信息性。在临床中,可以轻松获取血清样本,对患者造成的损害有限。像前列腺特异性抗原(PSA)或甲胎蛋白(AFP)这样的血清蛋白标志物已经几十年用于癌症的诊断和随访。各种医院都广泛提供用于测量血清蛋白的设备和培训人员。这些因素赋予了胃癌血清蛋白质组学研究在临床上巨大的意义。未来应建立胃癌的血清蛋白分类,以指导胃癌的围手术期治疗。</p><h2 id="局限性">局限性</h2><p>研究存在一些需要注意的局限性。首先,治疗期间血清样本的数量相对较小,这限制了得出某些结论的统计能力。其次,多重免疫荧光(mIF)只测量了肿瘤微环境(TME)中的关键免疫细胞。单细胞测序可以更好地描绘TME。第三,本研究的一些结论和建议应在接受术前化疗的患者的前瞻性队列甚至随机对照试验中进行进一步验证。在解释数据时应考虑这些局限性。<br>总的来说,我们对胃癌患者的全身免疫系统和肿瘤微环境进行了描述,并展示了它们与术前化疗反应的关联。我们鉴定了用于预测治疗反应和预后的血清生物标志物。这项工作强调了全身免疫在胃癌术前化疗中的基本但很大程度上被低估的作用,支持了一种基于术前血清免疫蛋白质组学的患者分层策略,并突显了在未来研究中全面描绘免疫的重要性。</p>]]></content:encoded>
  145. <category domain="https://hexo.limour.top/tags/%E7%BF%BB%E8%AF%91/">翻译</category>
  146. <category domain="https://hexo.limour.top/tags/%E9%A2%84%E5%90%8E%E6%A8%A1%E5%9E%8B/">预后模型</category>
  147. <comments>https://hexo.limour.top/Multiplex-immune-profiling-reveals-the-role-of-serum-immune-proteomics-in-predicting-response-to-preoperative-chemotherapy-of-gastric-cancer#disqus_thread</comments>
  148. </item>
  149. <item>
  150. <title>【转载】圈中人</title>
  151. <link>https://hexo.limour.top/repost-in-circle</link>
  152. <guid>https://hexo.limour.top/repost-in-circle</guid>
  153. <pubDate>Thu, 16 Nov 2023 06:59:47 GMT</pubDate>
  154. <description>&lt;blockquote&gt;
  155. &lt;p&gt;《&lt;a href=&quot;https://hexo.limour.top/go/#aHR0cHM6Ly93ZWIuYXJjaGl2ZS5vcmcvd2ViLzIwMjMwOTA5MTcwMDQxL2h0dHBzOi8vYmxvZy5jeHBsYXkub3</description>
  156. <content:encoded><![CDATA[<blockquote><p>《<a href="https://hexo.limour.top/go/#aHR0cHM6Ly93ZWIuYXJjaGl2ZS5vcmcvd2ViLzIwMjMwOTA5MTcwMDQxL2h0dHBzOi8vYmxvZy5jeHBsYXkub3JnL3dvcmtzL2luLWNpcmNsZS8=" rel="noopener external nofollow noreferrer">圈中人</a>》 from <a href="https://hexo.limour.top/go/#aHR0cHM6Ly9ibG9nLmN4cGxheS5vcmcv" rel="noopener external nofollow noreferrer">CXPLAY World</a></p></blockquote><h2 id="序">序</h2><p>外面很危险, 于是有人在地上画了一圈并对我说: “这个圈外很危险, 你不要随便出去, 我会帮你对付这些危险, 所以我很忙, 但我也会派人监督你.”;</p><p>监督人来了, 他对我说: &quot;你知道要怎么做了吧?但为了防止你总是不小心碰到边界, 我会给你的粗心大意一些小小的惩罚, 好让你长记性, 毕竟我要监督的人可不止你一个. &quot;, 于是监督人在圈内画了一个更小的圈.</p><p>最后, 我发现睡觉的时候翻个身也总是不小心超出监督人画的圈, 于是我自己给自己画了一个圈, 好让我只能站在原地不得动弹就再也不能睡觉, 也就不会出现无意识地超出圈外了;</p><p>但我好像忘记了我本来是可以出去的, 但是由于缺少保护自己的经验, 我也渐渐不敢出去了. 因为相比之下, 监督人的惩罚显得比圈外的危险来得更具体更具有危险性, 我也更有经验去应付.</p><h2 id="正文">正文</h2><p>这里的人常说, 如今这个地方变成这个样子完全是由于那座灯塔.</p><p>在那个我还未曾触及到的时代, 这里的格局并不是这个样子, 这里和外面的世界都是一样的一马平川无所遮拦. 没有如今形同棋盘一样的森严布局, 更没有那座灯塔. 但是不知道什么时候, 似乎是从巨墙开始, 有一部分人开始为这个地方建起了障碍.</p><p>起初只是一道篱笆, 一个土沟, 再后来不约而同地全都变成了深红色质地的石块, 这种材料从来没有听说过是从什么地方开采出来, 但就现在的状态来看, 这墙就好像是从地底自己长出来一般: 整齐划一, 密不透风, 坚不可摧. 不过现在也有人觉察到, 曾经光滑细密的红墙上出现了裂缝, 但这裂缝实质上与这墙并没有多大关系, 因为裂缝时而出现时而消失, 没有人知道红墙是如何做到这种程度的自愈的, 就好像它是活的一样.</p><p>再说起那个灯塔, 它的材质和红墙并不一样, 但据说两者建立起来的时间都是相同的. 灯塔没有密不透风的样子, 它的表面就像积木拼接一般有很明显的缝隙, 甚至有的地方还有空缺, 不过这些缺陷也和墙上的缝隙一样时而消失时而出现. 但就我认知之中看来, 人人都说这座灯塔是这里的每个人亲手筑起的, 却没有人知道这墙的成因, 可能是因为人人心里都明白但都心照不宣, 也可能是因为它本来就是自己从这块大地上 “生长” 出来的, 因为它的材质从今天看始终都不像是这个大陆上应该存在的东西.</p><hr><p>那片生长在红墙上的蓝色平原, 听说最开始的时候和墙的颜色一样, 是红色的, 后来平原的住民们逐渐发现所有的植物开始变成了蓝色, 土壤也变成了深蓝色. 由此原来单调的墙上多出来一片蓝色, 也吸引了更多的斗篷们过来在平原上定居. 一位从红色时期就开始在这里居住的斗篷曾经和我说, 目前来说这里变成蓝色和红色的时候并没有什么差别, 但其他斗篷们却都不约而同的住进了这里, 问起这些新来者理由却一个二个都含糊其词甚至回答不上来到底是为什么来到这里, 听得最多的一个理由就是: “因为大家都来这里了.” 事实真是这样的话, 那这个平原上早就已经人满为患了, 有相当部分人踏足这里之后就离去了, 还有部分人暂住过一段时间后也悄然离去, 他们留下的痕迹会很快地消失, 绵密的蓝色植被会再次长满被斗篷践踏的区域, 平原变得就好像从来没有人来过的一样.</p><p>当我问起这位斗篷他自己来这里的理由的时候, 它也沉默了, 但没有很久, 它问我: “你喜欢听故事吗?”, 我说: “是真相吗?”, “我也… 不知道.” 他回答我.</p><p>我接着说: “好吧, 虽然我不是来听故事的, 但如果是你愿意说给我听的故事, 我也愿闻其详.”</p><p>斗篷没有办法呈现出背后人的表情, 所以我在这面具上也办法没找到什么破绽, 它拿起了篝火边的一根细木柴在灰烬边画了起来.</p><p>它画了一个圆圈: “我们现在的位置应该是这里, 应该是.” 它在圆圈边上加了一个小圆圈, 指着它说.</p><p>“但如果我再把里面的「棋盘」画出来.” 它在大圆中画了几个小圆.</p><p>“如果我没猜错, 你我都是这棋盘里面的人, 你应该会明白.” 它接着在大圆和几个小圆之间用另外一个内圆隔开了, 出现了一个同心圆套一堆小圆圈的图画. 最后它在同心圆里面套了更多的圆, 为了套下更多的圆还把几个小圆擦去了, 几个小圆最后被数个大圆套在里面, 整个画面就好像一个靶子一样.</p><p>它放下了木柴, 对我说 “曾经我也是这样一个个小圆中的人, 总以为外面有什么好东西, 想要出去看看.”</p><p>“后来, 我真的出去了, 这个小圆不再是束缚, 但我发现外面的世界有更多的圆圈.” 它指着那些同心圆一路向外, 最后到了代表我们位置的那个大圆边缘上的小圆.</p><p>“最后我费尽千辛万苦, 终于从这些圆里出来了, 到了这里, 现在被叫做「蓝色平原」的地方.” 它又拿起细木柴握在手心.</p><p>“还是红色的那个时候, 我心里就想, 这里也许就是这世界的最后一个圆了吧?”</p><p>“说实话, 现在我们这个圆外面的景色从来就没变过, 我最后本来打算继续往外去亲眼看看那些五彩斑斓的风景的.”</p><p>它沉默下来, 用木柴继续在最外围的圆上加了一个更大的圆, 不过这次画布的面积不太够用了, 只画了一部分.</p><p>它指着新画的那个圆对我说: “有一天我在平原上看到一个蓝色的斗篷, 我一如既往地和它打着招呼, 但是它说着一口不是我们的语言, 它好像也听不懂我在说什么.”</p><p>“蓝色的斗篷好像也明白了我们两人语言不通, 就在脚下开始用手指在雪地上画画.”</p><p>“一个圈, 两个圈, 三个圈. 我已经记不清他当时画了多少个圈了, 最后同心圆的外围上也多出了一个代表着当时位置的小圆.”</p><p>“看起来就和现在这幅画几乎一模一样, 我现在都不知道它当时到底是在往圈外走还是进到一个新的圈里. 「蓝色平原」这个小圈又在什么位置呢?” 它又放下了木柴.</p><p>“蓝斗篷画完之后我明白了它的意思, 它好像要往我曾经来过的路上走, 我也没有阻拦它.”</p><p>“因为我知道, 它以为它正在向圈外走, 就和我当时一样, 也许是以为圈外有什么好东西.”</p><p>我说: “那你觉得你有找到什么 ‘好东西’ 吗?”</p><p>它说: “没什么好东西, 但比起最里面令人窒息的小圈, 这里确实呼吸以及行动上更加自由, 不过…”</p><p>“不过?” 我接他的话道.</p><p>“圈里来的人总是以为这里就是真正的圈外, 喜欢在这些相对自由的地方胡作非为, 大声喧哗. 不过最后它们留下的痕迹总是会被自然而然地抹除, 也无所谓了.” 它的语气变得轻松了起来.</p><p>我问: “你觉得这些 ‘圈’, 或者这些墙是从什么地方来的? 我从里面出来的时候好像没有遇到很多的这些 ‘圈’ ?”</p><p>它说: “我不知道, 我也和后来的人谈过几次这类话题, 他们的回答和你一样, 他们从里面 ‘出来’ 的时候遇到的墙确实和我曾经遇到的数量或者规模上都有很大差别.”</p><p>最后我和它又随便聊了一些有关墙边逐渐侵入的黑藤的事情, 不过由于它很久没有出过这个平原, 所以连黑藤是什么它都不清楚, 它也表示出一种漠不关心的态度, 所以我也没有继续问下去. 离开的时候, 它叮嘱我注意一下平原上的人, 因为它察觉到最近平原上的人越来越稀少了, 如果有什么发现, 希望能够和它分享一下.</p><hr><p>我把这类驻守在「蓝色平原」或者墙上任何地方上的人成为 “守墙人”, 他们虽然身处大多数的圈之外, 但却一般不愿意接触新的事物, 极端的守墙人甚至不愿意和斗篷乃至其他守墙人交流, 还好我遇到的是一个尚且能和其他人交流的斗篷, 不然这个平原上的很多事情我也无从得知.</p><p>之于这个 “圈” 的问题, 在「棋盘」中的时候我就曾经发现, 大部分的小圈中人都不关心圈外的事情, 甚至都不知道有圈存在, 更不用说去突破这圈了, 和这类永远不会走出圈的人打交道总是要先自己去适应它自己的圈, 否则他们也会和守墙人一样把你拒之 “圈” 外.</p><p>但相对的越往外, 这些 “圈” 的意味逐渐变得模糊不清, 没有人知道是谁建起了它, 也从来没有人宣布过自己有关于它的事迹.</p>]]></content:encoded>
  157. <category domain="https://hexo.limour.top/tags/%E8%BD%AC%E8%BD%BD/">转载</category>
  158. <comments>https://hexo.limour.top/repost-in-circle#disqus_thread</comments>
  159. </item>
  160. <item>
  161. <title>【探索】基于WebSocket的内网穿透工具</title>
  162. <link>https://hexo.limour.top/WebSocket-based-intranet-penetration-tool</link>
  163. <guid>https://hexo.limour.top/WebSocket-based-intranet-penetration-tool</guid>
  164. <pubDate>Thu, 09 Nov 2023 11:38:50 GMT</pubDate>
  165. <description>国内的服务器备案麻烦,所以很多内网服务需要使用内网穿透工具。之前尝试使用QUIC来伪装,但不稳定。现在找到了一个特征少的内网穿透工具ProxyNT,可以通过NAT和防火墙将本地服务器暴露到公网上。使用Docker部署服务端和客户端,配置相应的参数后即可使用。</description>
  166. <content:encoded><![CDATA[<p>国内的服务器除了挂个备案,不想再要了。而许多内网的服务需要在外网访问,内网穿透是必不可少的。但是用国外的服务器的话,需要过一层未知的东西,难免被误伤,融入汪洋大海也是必须的。之前折腾了一下通过套一层<a href="/Protocol-for-intranet-penetration-based-on-QUIC">QUIC</a>来伪装,不知道为什么,总是不稳定。寻寻觅觅,又找到一个特征少的内网穿透工具:<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL3NhemltYS9wcm94eW50" rel="noopener external nofollow noreferrer">ProxyNT</a> 。ProxyNT是一个用python编写的基于WebSocket的反向代理服务器,可以透过NAT和防火墙将本地服务器暴露到公网上,从原理看,套上一层CDN保护公网ip也是可以的。</p><h2 id="服务端">服务端</h2><ul><li><a href="/Docker-bu-shu-Nginx-Proxy-Manager">反向代理服务</a></li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-built_in">mkdir</span> -p ~/app/proxynt &amp;&amp; <span class="hljs-built_in">cd</span> ~/app/proxynt &amp;&amp; nano Dockerfile &amp;&amp; nano docker-compose.yml<br>docker build -t limour/proxynt .<br>nano config.json<br>sudo docker-compose up -d<br></code></pre></td></tr></table></figure><figure class="highlight dockerfile"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><code class="hljs Dockerfile"><span class="hljs-keyword">FROM</span> python:<span class="hljs-number">3.9</span>-alpine<br><span class="hljs-keyword">RUN</span><span class="language-bash"> pip install -U python-snappy</span><br><span class="hljs-keyword">RUN</span><span class="language-bash"> pip install -U https://github.com/sazima/proxynt/archive/refs/heads/snappy.zip</span><br><span class="hljs-keyword">ENTRYPOINT</span><span class="language-bash"> [<span class="hljs-string">&quot;nt_server&quot;</span>, <span class="hljs-string">&quot;-c&quot;</span>, <span class="hljs-string">&quot;/opt/config.json&quot;</span>]</span><br></code></pre></td></tr></table></figure><figure class="highlight yml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br></pre></td><td class="code"><pre><code class="hljs yml"><span class="hljs-attr">version:</span> <span class="hljs-string">&#x27;3.3&#x27;</span><br><span class="hljs-attr">services:</span><br> <span class="hljs-attr">proxynt:</span><br> <span class="hljs-attr">restart:</span> <span class="hljs-string">unless-stopped</span><br> <span class="hljs-attr">volumes:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;./config.json:/opt/config.json&#x27;</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;/etc/localtime:/etc/localtime:ro&#x27;</span><br> <span class="hljs-attr">image:</span> <span class="hljs-string">limour/proxynt</span><br> <br><span class="hljs-attr">networks:</span><br> <span class="hljs-attr">default:</span><br> <span class="hljs-attr">external:</span> <span class="hljs-literal">true</span><br> <span class="hljs-attr">name:</span> <span class="hljs-string">ngpm</span><br></code></pre></td></tr></table></figure><figure class="highlight json"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><code class="hljs json"><span class="hljs-punctuation">&#123;</span><br> <span class="hljs-attr">&quot;port&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-number">18888</span><span class="hljs-punctuation">,</span><br> <span class="hljs-attr">&quot;log_file&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;/dev/null&quot;</span><span class="hljs-punctuation">,</span><br> <span class="hljs-attr">&quot;path&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;/websocket_path&quot;</span><span class="hljs-punctuation">,</span><br> <span class="hljs-attr">&quot;password&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;helloworld&quot;</span><span class="hljs-punctuation">,</span><br> <span class="hljs-attr">&quot;admin&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">&#123;</span><br> <span class="hljs-attr">&quot;enable&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span><br> <span class="hljs-attr">&quot;admin_password&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;new_password&quot;</span><br> <span class="hljs-punctuation">&#125;</span><br><span class="hljs-punctuation">&#125;</span><br></code></pre></td></tr></table></figure><p><img src="https://img.limour.top/2023/11/09/654cc58f6ea33.webp" alt="反代 proxynt:18888"></p><h2 id="客户端">客户端</h2><ul><li><a href="/-fu-ke-GitHub-wen-jian-jia-su">GitHub 文件加速</a></li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-built_in">mkdir</span> -p ~/app/proxynt &amp;&amp; <span class="hljs-built_in">cd</span> ~/app/proxynt<br><span class="hljs-comment"># pip install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple</span><br><span class="hljs-comment"># pip install --use-pep517 python-snappy -i https://pypi.tuna.tsinghua.edu.cn/simple</span><br>pip install -U python-snappy -i https://pypi.tuna.tsinghua.edu.cn/simple<br>pip install -U https://xxx.limour.top/token/https://github.com/sazima/proxynt/archive/refs/heads/snappy.zip<br>whereis nt_client<br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><code class="hljs bash">nano config.json<br>nt_client -c config.json <span class="hljs-comment"># 测试</span><br>nano proxynt.sh &amp;&amp; <span class="hljs-built_in">chmod</span> +x proxynt.sh<br>nano proxynt.service<br>sudo <span class="hljs-built_in">mv</span> proxynt.service /etc/systemd/system/proxynt.service<br>sudo systemctl <span class="hljs-built_in">enable</span> proxynt<br>sudo systemctl start proxynt<br>sudo systemctl status proxynt<br></code></pre></td></tr></table></figure><figure class="highlight json"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><code class="hljs json"><span class="hljs-punctuation">&#123;</span><br> <span class="hljs-attr">&quot;server&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">&#123;</span><br> <span class="hljs-attr">&quot;url&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;wss://limour.top:443/websocket_path&quot;</span><span class="hljs-punctuation">,</span><br> <span class="hljs-attr">&quot;password&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;helloworld&quot;</span><span class="hljs-punctuation">,</span><br> <span class="hljs-attr">&quot;compress&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><br> <span class="hljs-punctuation">&#125;</span><span class="hljs-punctuation">,</span><br> <span class="hljs-attr">&quot;client_name&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;home_pc&quot;</span><span class="hljs-punctuation">,</span><br> <span class="hljs-attr">&quot;log_file&quot;</span><span class="hljs-punctuation">:</span> <span class="hljs-string">&quot;/home/limour/app/proxynt/nt.log&quot;</span><br><span class="hljs-punctuation">&#125;</span><br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-meta">#!/bin/sh</span><br><span class="hljs-built_in">export</span> PYTHONPATH=/home/limour/.local/lib/python3.10/site-packages<br>/home/limour/.local/bin/nt_client -c /home/limour/app/proxynt/config.json<br></code></pre></td></tr></table></figure><figure class="highlight ini"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><code class="hljs ini"><span class="hljs-section">[Unit]</span><br><span class="hljs-attr">Description</span>=proxynt<br><span class="hljs-attr">After</span>=network.target<br><span class="hljs-section">[Service]</span><br><span class="hljs-attr">ExecStart</span>=/home/limour/app/proxynt/proxynt.sh<br><span class="hljs-attr">ExecReload</span>=/bin/kill -HUP <span class="hljs-variable">$MAINPID</span><br><span class="hljs-attr">Restart</span>=<span class="hljs-literal">on</span>-failure<br><span class="hljs-section">[Install]</span><br><span class="hljs-attr">WantedBy</span>=multi-user.target<br></code></pre></td></tr></table></figure><ul><li>访问 <code>https://limour.top:443/websocket_path/admin</code></li><li>看到客户端上线后,新建配置即可</li></ul><h2 id="附加-WebSSH">附加 WebSSH</h2><p>和上面的内网穿透配合,连接时host填<code>proxynt</code>,可以保证内网ssh不暴露公网的同时,又能通过公网进行ssh连接。</p><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-built_in">mkdir</span> -p ~/app/webssh &amp;&amp; <span class="hljs-built_in">cd</span> ~/app/webssh &amp;&amp; nano docker-compose.yml<br>sudo docker-compose up -d<br></code></pre></td></tr></table></figure><figure class="highlight yml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br></pre></td><td class="code"><pre><code class="hljs yml"><span class="hljs-attr">version:</span> <span class="hljs-string">&#x27;3.3&#x27;</span><br><span class="hljs-attr">services:</span><br> <span class="hljs-attr">webssh:</span><br> <span class="hljs-attr">restart:</span> <span class="hljs-string">unless-stopped</span><br> <span class="hljs-attr">environment:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">GIN_MODE=release</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">savePass=true</span><br> <span class="hljs-attr">volumes:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;/etc/localtime:/etc/localtime:ro&#x27;</span><br> <span class="hljs-attr">image:</span> <span class="hljs-string">jrohy/webssh:latest</span><br> <br><span class="hljs-attr">networks:</span><br> <span class="hljs-attr">default:</span><br> <span class="hljs-attr">external:</span> <span class="hljs-literal">true</span><br> <span class="hljs-attr">name:</span> <span class="hljs-string">ngpm</span><br></code></pre></td></tr></table></figure><p><img src="https://img.limour.top/2023/11/10/654d918353361.webp" alt="反代 webssh:5032"></p>]]></content:encoded>
  167. <category domain="https://hexo.limour.top/tags/%E6%8E%A2%E7%B4%A2/">探索</category>
  168. <category domain="https://hexo.limour.top/tags/docker/">docker</category>
  169. <category domain="https://hexo.limour.top/tags/ngpm/">ngpm</category>
  170. <category domain="https://hexo.limour.top/tags/%E5%86%85%E7%BD%91%E7%A9%BF%E9%80%8F/">内网穿透</category>
  171. <category domain="https://hexo.limour.top/tags/ssh/">ssh</category>
  172. <category domain="https://hexo.limour.top/tags/ws/">ws</category>
  173. <comments>https://hexo.limour.top/WebSocket-based-intranet-penetration-tool#disqus_thread</comments>
  174. </item>
  175. <item>
  176. <title>【记录】自建去广告的DoH服务器</title>
  177. <link>https://hexo.limour.top/Self-built-ad-blocking-DoH-server</link>
  178. <guid>https://hexo.limour.top/Self-built-ad-blocking-DoH-server</guid>
  179. <pubDate>Sat, 28 Oct 2023 12:56:54 GMT</pubDate>
  180. <description>&lt;h2 id=&quot;进行部署&quot;&gt;进行部署&lt;/h2&gt;
  181. &lt;ul&gt;
  182. &lt;li&gt;&lt;a href=&quot;/Docker-bu-shu-Nginx-Proxy-Manager&quot;&gt;反代服务&lt;/a&gt;&lt;/li&gt;
  183. &lt;/ul&gt;
  184. &lt;figure class=&quot;highlight bash&quot;&gt;&lt;table&gt;&lt;tr&gt;</description>
  185. <content:encoded><![CDATA[<h2 id="进行部署">进行部署</h2><ul><li><a href="/Docker-bu-shu-Nginx-Proxy-Manager">反代服务</a></li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-built_in">mkdir</span> -p ~/app/adguard &amp;&amp; <span class="hljs-built_in">cd</span> ~/app/adguard &amp;&amp; nano docker-compose.yml<br>sudo docker-compose up -d <span class="hljs-comment"># 面板端口 3000</span><br><span class="hljs-comment"># /opt/adguardhome/letsencrypt/live/npm-1/fullchain.pem</span><br><span class="hljs-comment"># /opt/adguardhome/letsencrypt/live/npm-1/privkey.pem</span><br>sed -i <span class="hljs-string">&#x27;s/allow_unencrypted_doh: false/allow_unencrypted_doh: true/&#x27;</span> ./conf/AdGuardHome.yaml &amp;&amp; sudo docker-compose restart<br></code></pre></td></tr></table></figure><figure class="highlight yml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br></pre></td><td class="code"><pre><code class="hljs yml"><span class="hljs-attr">version:</span> <span class="hljs-string">&#x27;3.3&#x27;</span><br><span class="hljs-attr">services:</span><br> <span class="hljs-attr">adguard:</span><br> <span class="hljs-attr">restart:</span> <span class="hljs-string">unless-stopped</span><br> <span class="hljs-attr">volumes:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;./work:/opt/adguardhome/work&#x27;</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;./conf:/opt/adguardhome/conf&#x27;</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;/root/base/NGPM/letsencrypt:/opt/adguardhome/letsencrypt&#x27;</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;/etc/localtime:/etc/localtime:ro&#x27;</span><br> <span class="hljs-attr">image:</span> <span class="hljs-string">adguard/adguardhome:latest</span><br> <br><span class="hljs-attr">networks:</span><br> <span class="hljs-attr">default:</span><br> <span class="hljs-attr">external:</span> <span class="hljs-literal">true</span><br> <span class="hljs-attr">name:</span> <span class="hljs-string">ngpm</span><br></code></pre></td></tr></table></figure><p><img src="https://img.limour.top/2023/10/28/653d065c29ba1.webp" alt=""></p><h2 id="DNS设置">DNS设置</h2><ul><li>导航栏-设置-DNS设置</li><li>DNS 服务配置中启用DNSSEC</li></ul><p><img src="https://img.limour.top/2023/10/28/653d06f4936f9.webp" alt=""><br><img src="https://img.limour.top/2023/10/28/653d07482ee29.webp" alt=""></p><h2 id="DoH设置">DoH设置</h2><ul><li>导航栏-设置-DNS设置</li><li>加密中启用加密</li><li>证书可以设置为npm自动申请的证书</li><li>反代 <code>/dns-query</code>, <code>token</code>保密不要泄露</li><li><code>token</code>后面没有<code>/</code>, <code>dns-query</code>后面有<code>/</code></li><li>在chrome的设置-隐私和安全-安全-DNS中填入<code>https://my.com/token</code></li><li>回到仪表盘,看看有没有记录到DNS查询</li></ul><p><img src="https://img.limour.top/2023/10/28/653d07f74eedb.webp" alt=""></p>]]></content:encoded>
  186. <category domain="https://hexo.limour.top/tags/docker/">docker</category>
  187. <category domain="https://hexo.limour.top/tags/ngpm/">ngpm</category>
  188. <category domain="https://hexo.limour.top/tags/DoH/">DoH</category>
  189. <comments>https://hexo.limour.top/Self-built-ad-blocking-DoH-server#disqus_thread</comments>
  190. </item>
  191. <item>
  192. <title>【探索】基于QUIC的内网穿透协议</title>
  193. <link>https://hexo.limour.top/Protocol-for-intranet-penetration-based-on-QUIC</link>
  194. <guid>https://hexo.limour.top/Protocol-for-intranet-penetration-based-on-QUIC</guid>
  195. <pubDate>Fri, 27 Oct 2023 12:46:07 GMT</pubDate>
  196. <description>&lt;h2 id=&quot;环境和依赖&quot;&gt;环境和依赖&lt;/h2&gt;
  197. &lt;ul&gt;
  198. &lt;li&gt;&lt;a href=&quot;/-ji-lu--an-zhuang-npsfrp-fu-wu-duan-yu-ke-hu-duan&quot;&gt;内网穿透服务&lt;/a&gt;&lt;/li&gt;
  199. &lt;li&gt;&lt;a href=&quot;/Docker-bu-shu-</description>
  200. <content:encoded><![CDATA[<h2 id="环境和依赖">环境和依赖</h2><ul><li><a href="/-ji-lu--an-zhuang-npsfrp-fu-wu-duan-yu-ke-hu-duan">内网穿透服务</a></li><li><a href="/Docker-bu-shu-Nginx-Proxy-Manager">证书自动申请服务</a></li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-built_in">mkdir</span> -p ~/base/NPS &amp;&amp; <span class="hljs-built_in">cd</span> ~/base/NPS &amp;&amp; <span class="hljs-built_in">mkdir</span> conf<br>nano docker-compose.yml<br>nano conf/nps.conf<br><span class="hljs-built_in">touch</span> conf/&#123;clients,hosts,tasks&#125;.json<br>sudo docker-compose up -d<br><span class="hljs-comment"># 反代 dashboard 8080</span><br></code></pre></td></tr></table></figure><figure class="highlight yml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br></pre></td><td class="code"><pre><code class="hljs yml"><span class="hljs-attr">version:</span> <span class="hljs-string">&#x27;3.3&#x27;</span><br><span class="hljs-attr">services:</span><br> <span class="hljs-attr">nps:</span><br> <span class="hljs-attr">container_name:</span> <span class="hljs-string">nps</span><br> <span class="hljs-attr">restart:</span> <span class="hljs-string">unless-stopped</span><br> <span class="hljs-attr">ports:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;8025:8025&#x27;</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;6000-6002:6000-6002/udp&#x27;</span><br> <span class="hljs-attr">volumes:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;./conf:/conf&#x27;</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;/etc/localtime:/etc/localtime:ro&#x27;</span><br> <span class="hljs-attr">image:</span> <span class="hljs-string">yisier1/nps</span><br> <br><span class="hljs-attr">networks:</span><br> <span class="hljs-attr">default:</span><br> <span class="hljs-attr">external:</span> <span class="hljs-literal">true</span><br> <span class="hljs-attr">name:</span> <span class="hljs-string">ngpm</span><br></code></pre></td></tr></table></figure><figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br><span class="line">58</span><br><span class="line">59</span><br><span class="line">60</span><br><span class="line">61</span><br><span class="line">62</span><br><span class="line">63</span><br><span class="line">64</span><br><span class="line">65</span><br><span class="line">66</span><br><span class="line">67</span><br><span class="line">68</span><br><span class="line">69</span><br><span class="line">70</span><br><span class="line">71</span><br><span class="line">72</span><br><span class="line">73</span><br><span class="line">74</span><br><span class="line">75</span><br><span class="line">76</span><br><span class="line">77</span><br><span class="line">78</span><br><span class="line">79</span><br><span class="line">80</span><br><span class="line">81</span><br><span class="line">82</span><br></pre></td><td class="code"><pre><code class="hljs conf">appname = nps<br>#Boot mode(dev|pro)<br>runmode = pro<br> <br>#HTTP(S) proxy port, no startup if empty<br>http_proxy_ip=0.0.0.0<br>http_proxy_port=18081<br> <br>##bridge<br>bridge_type=tcp<br>bridge_port=8024<br>bridge_ip=0.0.0.0<br>tls_bridge_port=8025<br>tls_enable=true<br><br>#Traffic data persistence interval(minute)<br>#Ignorance means no persistence<br>#flow_store_interval=1<br> <br># log level LevelEmergency-&gt;0 LevelAlert-&gt;1 LevelCritical-&gt;2 LevelError-&gt;3 LevelWarning-&gt;4 LevelNotice-&gt;5 LevelInformational-&gt;6 LevelDebug-&gt;7<br>log_level=7<br>#log_path=nps.log<br> <br>#Whether to restrict IP access, true or false or ignore<br>#ip_limit=true<br> <br>#allow_ports=9001-9009,10001,11000-12000<br> <br>#Web management multi-user login<br>allow_user_login=false<br>allow_user_register=false<br>allow_user_change_username=false<br> <br>#extension<br>allow_flow_limit=false<br>allow_rate_limit=false<br>allow_tunnel_num_limit=false<br>allow_local_proxy=false<br>allow_connection_num_limit=false<br>allow_multi_ip=false<br>system_info_display=true<br> <br>#cache<br>http_cache=false<br>http_cache_length=100<br> <br>#get origin ip<br>http_add_origin_header=true<br> <br>#pprof debug options<br>#pprof_ip=0.0.0.0<br>#pprof_port=9999<br> <br>#client disconnect timeout<br>disconnect_timeout=60<br> <br># 以下的需要进行配置<br># Public password, which clients can use to connect to the server<br># After the connection, the server will be able to open relevant ports and parse related domain names according to its own configuration file.<br>public_vkey=&lt;16个字符&gt;<br> <br>#Web API unauthenticated IP address(the len of auth_crypt_key must be 16)<br>#Remove comments if needed<br>auth_key=&lt;24个字符&gt;<br>auth_crypt_key=&lt;16个字符&gt;<br> <br>#web<br>web_host=limour.top<br>web_username=Limour<br>web_password=&lt;16个字符&gt;<br>web_port = 8080<br>web_ip=0.0.0.0<br>web_open_ssl=false<br>web_base_url=<br>open_captcha=true<br># if web under proxy use sub path. like http://host/nps need this.<br>#web_base_url=/nps<br> <br>#p2p<br>p2p_ip=&lt;写服务器的ip&gt;<br>p2p_port=6000<br># 设置为6000,请在控制台防火墙开放6000~6002(额外添加2个端口)udp端口<br></code></pre></td></tr></table></figure><h2 id="配置端口映射">配置端口映射</h2><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><code class="hljs bash">nano Port-Hopping.sh &amp;&amp; <span class="hljs-built_in">chmod</span> +x Port-Hopping.sh<br>nano /etc/systemd/system/Port-Hopping.service<br>systemctl <span class="hljs-built_in">enable</span> Port-Hopping &amp;&amp; systemctl start Port-Hopping &amp;&amp; systemctl status Port-Hopping &amp;&amp; iptables -t nat -L<br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-meta">#!/bin/bash</span><br><span class="hljs-comment"># IPv4</span><br>/usr/sbin/iptables -t nat -A PREROUTING -i eth0 -p udp --dport 32768:61000 -j DNAT --to-destination :3234<br><span class="hljs-comment"># IPv6</span><br>/usr/sbin/ip6tables -t nat -A PREROUTING -i eth0 -p udp --dport 32768:61000 -j DNAT --to-destination :3234<br></code></pre></td></tr></table></figure><figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><code class="hljs service">[Unit]<br>Description=Port-Hopping<br>After=network.target docker.service<br>[Service]<br>ExecStart=/root/Port-Hopping.sh<br>Restart=on-failure<br>[Install]<br>WantedBy=multi-user.target<br></code></pre></td></tr></table></figure><h2 id="配置quic">配置quic</h2><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><code class="hljs bash">sudo docker network create sswitch<br><span class="hljs-built_in">mkdir</span> -p ~/app/quic &amp;&amp; <span class="hljs-built_in">cd</span> ~/app/quic &amp;&amp; nano docker-compose.yml<br>nano hysteria.yaml<br>sudo docker-compose up -d &amp;&amp; sudo docker-compose logs<br></code></pre></td></tr></table></figure><figure class="highlight yml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br></pre></td><td class="code"><pre><code class="hljs yml"><span class="hljs-attr">version:</span> <span class="hljs-string">&#x27;3.9&#x27;</span><br><span class="hljs-attr">services:</span><br> <span class="hljs-attr">hysteria:</span><br> <span class="hljs-attr">image:</span> <span class="hljs-string">tobyxdd/hysteria</span><br> <span class="hljs-attr">restart:</span> <span class="hljs-string">always</span><br> <span class="hljs-attr">extra_hosts:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;host.docker.internal:host-gateway&#x27;</span><br> <span class="hljs-attr">ports:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">&#x27;3234:3234/udp&#x27;</span><br> <span class="hljs-attr">volumes:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">./hysteria.yaml:/etc/hysteria.yaml</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">/root/base/NGPM/letsencrypt:/home/ubuntu/letsencrypt</span><br> <span class="hljs-attr">command:</span> [<span class="hljs-string">&quot;server&quot;</span>, <span class="hljs-string">&quot;-c&quot;</span>, <span class="hljs-string">&quot;/etc/hysteria.yaml&quot;</span>]<br> <br><span class="hljs-attr">networks:</span><br> <span class="hljs-attr">default:</span><br> <span class="hljs-attr">external:</span> <span class="hljs-literal">true</span><br> <span class="hljs-attr">name:</span> <span class="hljs-string">sswitch</span><br></code></pre></td></tr></table></figure><figure class="highlight yml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br></pre></td><td class="code"><pre><code class="hljs yml"><span class="hljs-attr">listen:</span> <span class="hljs-string">:3234</span> <br> <br><span class="hljs-attr">tls:</span><br> <span class="hljs-attr">cert:</span> <span class="hljs-string">/home/ubuntu/letsencrypt/live/npm-1/fullchain.pem</span><br> <span class="hljs-attr">key:</span> <span class="hljs-string">/home/ubuntu/letsencrypt/live/npm-1/privkey.pem</span><br> <br><span class="hljs-attr">auth:</span><br> <span class="hljs-attr">type:</span> <span class="hljs-string">password</span><br> <span class="hljs-attr">password:</span> <span class="hljs-string">Se7RAuFZ8Lzg</span> <br> <br><span class="hljs-attr">bandwidth:</span><br> <span class="hljs-attr">up:</span> <span class="hljs-number">3</span> <span class="hljs-string">mbps</span><br> <span class="hljs-attr">down:</span> <span class="hljs-number">3</span> <span class="hljs-string">mbps</span><br> <br><span class="hljs-attr">masquerade:</span> <br> <span class="hljs-attr">type:</span> <span class="hljs-string">proxy</span><br> <span class="hljs-attr">proxy:</span><br> <span class="hljs-attr">url:</span> <span class="hljs-string">https://hexo.limour.top/</span> <br> <span class="hljs-attr">rewriteHost:</span> <span class="hljs-literal">true</span><br></code></pre></td></tr></table></figure><h2 id="测试转发">测试转发</h2><ul><li>在客户端新建<code>config.yaml</code>, 写入以下内容</li></ul><figure class="highlight yml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br></pre></td><td class="code"><pre><code class="hljs yml"><span class="hljs-attr">server:</span> <span class="hljs-string">hexo.limour.top:32768-61000</span><br> <br><span class="hljs-attr">auth:</span> <span class="hljs-string">Se7RAuFZ8Lzg</span> <br> <br><span class="hljs-attr">bandwidth:</span> <br> <span class="hljs-attr">up:</span> <span class="hljs-number">3</span> <span class="hljs-string">mbps</span><br> <span class="hljs-attr">down:</span> <span class="hljs-number">3</span> <span class="hljs-string">mbps</span><br> <br><span class="hljs-comment">#socks5:</span><br><span class="hljs-comment"># listen: 127.0.0.1:1580 </span><br> <br><span class="hljs-comment">#http:</span><br><span class="hljs-comment"># listen: 127.0.0.1:8580 </span><br> <br><span class="hljs-attr">tcpForwarding:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-attr">listen:</span> <span class="hljs-number">127.0</span><span class="hljs-number">.0</span><span class="hljs-number">.1</span><span class="hljs-string">:8025</span> <br> <span class="hljs-attr">remote:</span> <span class="hljs-string">host.docker.internal:8025</span> <br></code></pre></td></tr></table></figure><h2 id="测试穿透">测试穿透</h2><figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><code class="hljs powershell">.\npc.exe <span class="hljs-literal">--server</span>=<span class="hljs-number">127.0</span>.<span class="hljs-number">0.1</span>:<span class="hljs-number">8024</span> <span class="hljs-literal">-vkey</span>=&lt;vkey&gt; <span class="hljs-literal">-type</span>=tcp<br></code></pre></td></tr></table></figure><h2 id="客户端示例">客户端示例</h2><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-built_in">mkdir</span> -p ~/app/quic-npc &amp;&amp; <span class="hljs-built_in">cd</span> ~/app/quic-npc &amp;&amp; nano docker-compose.yml<br>nano config.yaml<br>sudo docker-compose up -d &amp;&amp; sudo docker-compose logs<br></code></pre></td></tr></table></figure><figure class="highlight yml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br></pre></td><td class="code"><pre><code class="hljs yml"><span class="hljs-attr">version:</span> <span class="hljs-string">&#x27;3.3&#x27;</span><br><span class="hljs-attr">services:</span><br> <span class="hljs-attr">quic_nps:</span><br> <span class="hljs-attr">image:</span> <span class="hljs-string">tobyxdd/hysteria</span><br> <span class="hljs-attr">network_mode:</span> <span class="hljs-string">host</span><br> <span class="hljs-attr">restart:</span> <span class="hljs-string">always</span><br> <span class="hljs-attr">volumes:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">./config.yaml:/etc/config.yaml</span><br> <span class="hljs-attr">command:</span> [<span class="hljs-string">&quot;--config&quot;</span>, <span class="hljs-string">&quot;/etc/config.yaml&quot;</span>]<br> <br> <span class="hljs-attr">npc_lk:</span><br> <span class="hljs-attr">depends_on:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">quic_nps</span><br> <span class="hljs-attr">network_mode:</span> <span class="hljs-string">host</span><br> <span class="hljs-attr">image:</span> <span class="hljs-string">yisier1/npc</span><br> <span class="hljs-attr">restart:</span> <span class="hljs-string">unless-stopped</span><br> <span class="hljs-attr">command:</span> <span class="hljs-string">-server=127.0.0.1:8025</span> <span class="hljs-string">-vkey=&lt;vkey&gt;</span> <span class="hljs-string">-tls_enable=true</span><br></code></pre></td></tr></table></figure><figure class="highlight yml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><code class="hljs yml"><span class="hljs-attr">server:</span> <span class="hljs-string">hexo.limour.top:32768-61000</span><br> <br><span class="hljs-attr">auth:</span> <span class="hljs-string">Se7RAuFZ8Lzg</span> <br> <br><span class="hljs-attr">bandwidth:</span> <br> <span class="hljs-attr">up:</span> <span class="hljs-number">3</span> <span class="hljs-string">mbps</span><br> <span class="hljs-attr">down:</span> <span class="hljs-number">3</span> <span class="hljs-string">mbps</span><br> <br><span class="hljs-attr">tcpForwarding:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-attr">listen:</span> <span class="hljs-number">127.0</span><span class="hljs-number">.0</span><span class="hljs-number">.1</span><span class="hljs-string">:8025</span> <br> <span class="hljs-attr">remote:</span> <span class="hljs-string">host.docker.internal:8025</span> <br></code></pre></td></tr></table></figure>]]></content:encoded>
  201. <category domain="https://hexo.limour.top/tags/%E6%8E%A2%E7%B4%A2/">探索</category>
  202. <category domain="https://hexo.limour.top/tags/docker/">docker</category>
  203. <category domain="https://hexo.limour.top/tags/ngpm/">ngpm</category>
  204. <category domain="https://hexo.limour.top/tags/%E5%86%85%E7%BD%91%E7%A9%BF%E9%80%8F/">内网穿透</category>
  205. <comments>https://hexo.limour.top/Protocol-for-intranet-penetration-based-on-QUIC#disqus_thread</comments>
  206. </item>
  207. <item>
  208. <title>【学习】孟德尔随机化</title>
  209. <link>https://hexo.limour.top/Mendelian-Randomization</link>
  210. <guid>https://hexo.limour.top/Mendelian-Randomization</guid>
  211. <pubDate>Sat, 14 Oct 2023 09:18:54 GMT</pubDate>
  212. <description>&lt;h2 id=&quot;MR定义&quot;&gt;MR定义&lt;/h2&gt;
  213. &lt;p&gt;孟德尔随机化是一种基于全基因组测序数据(GWAS数据),利用单核首酸多态性(SNPs)作为工具变量(IV),用于揭示因果关系的新型流行病学方法,相较于队列研究等观察性研究,暴露在出生前便已确定,较少受到反向因果及混杂因素的影响</description>
  214. <content:encoded><![CDATA[<h2 id="MR定义">MR定义</h2><p>孟德尔随机化是一种基于全基因组测序数据(GWAS数据),利用单核首酸多态性(SNPs)作为工具变量(IV),用于揭示因果关系的新型流行病学方法,相较于队列研究等观察性研究,暴露在出生前便已确定,较少受到反向因果及混杂因素的影响,因而能够有效减少偏倚。<br><img src="https://img.limour.top/2023/10/14/652a61ab222a4.webp" alt="RCT与MR的比较"><br>MR的核心是运用遗传学数据作为桥梁,来探索某一暴露和某一结局之间的因果关联。与RCT将参与者随机分配到试验组或对照组类似,MR研究基于影响危险因素的一个或多个等位基因,对参与基因进行&quot;随机化&quot;(自然的随机化),以确定这些遗传变异的携带者与非携带者相比,是否具有不同的疾病发生风险,因此,孟德尔随机化可以被认为类似于<a href="https://hexo.limour.top/go/#aHR0cHM6Ly93d3cubmNiaS5ubG0ubmloLmdvdi9wbWMvYXJ0aWNsZXMvUE1DMjQ1ODE0NA==" rel="noopener external nofollow noreferrer">&quot;自然的随机对照试验&quot;</a>。<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9tci1kaWN0aW9uYXJ5Lm1yY2lldS5hYy51ay8=" rel="noopener external nofollow noreferrer">MR的相关术语</a></p><h2 id="理论假设">理论假设</h2><ol><li>the variant is associated with the exposure</li><li>the variant is not associated with the outcome via a confounding pathway</li><li>the variant does not affect the outcome directly, only possibly indirectly via the exposure</li></ol><p><img src="https://img.limour.top/2023/10/14/652a651b96f98.webp" alt="孟德尔随机化框架的有向无环图表示"></p><ol><li>关联性假设:变异与暴露有关</li><li>独立性假设:变异与结果之间没有通过混杂途径相关</li><li>排他性假设:变异不直接影响结果,只可能通过暴露途径间接影响</li></ol><ul><li>关联性假设:p值,F统计量,R^2</li><li>排他性假设:与结局的相关性计算时,p值要大于0.05</li><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9kb2kub3JnLzEwLjEwOTMvaWplL2R5djA4MA==" rel="noopener external nofollow noreferrer">MR-Egger</a>回归相比线性回归可以弱化对排他性假设的要求</li></ul><h2 id="适用范围">适用范围</h2><ul><li>不确定先有鸡还是先有蛋,比如,到底是抑郁导致肺癌还是肺癌导致了抑郁?</li><li>暴露因素难以测量,或者花费昂贵。例如,水溶性维生素等生物标志物的检测金标准可能成本太高,大样本无法承受,或者空腹血糖的测量需要隔夜空腹,可能不现实。</li><li>暴露与结局数据来自同一人群,且不存在或存在少量可接受范围内的样本重叠</li></ul><h2 id="配置环境">配置环境</h2><ul><li><a href="/-ji-lu--an-zhuang-sheng-xin-de-dai-ma-bian-xie-huan-jing">基础编程环境</a></li><li><a href="/-fu-ke-GitHub-wen-jian-jia-su">GitHub 下载加速</a></li><li><a href="/-ji-lu-SOCKS5-zhuan-QUIC">可能需要用到的加速服务</a></li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br></pre></td><td class="code"><pre><code class="hljs bash">conda create -n MR -c conda-forge r-devtools -y<br>conda activate MR<br>conda install -c conda-forge r-irkernel -y<br>Rscript -e <span class="hljs-string">&quot;IRkernel::installspec(name=&#x27;MR&#x27;, displayname=&#x27;MR&#x27;)&quot;</span><br><span class="hljs-comment"># Rscript -e &quot;usethis::edit_r_environ()&quot; # 设置 GITHUB_PAT</span><br><span class="hljs-comment"># nano ~/.Renviron # MRCIEU 真是超喜欢GITHUB,要访问一万次 api.github.com</span><br>conda install -c conda-forge r-rmarkdown -y<br>conda install -c conda-forge r-meta -y<br>wget https://github.com/MRCIEU/TwoSampleMR/archive/refs/heads/master.zip -O TwoSampleMR.zip<br>Rscript -e <span class="hljs-string">&quot;devtools::install_local(&#x27;TwoSampleMR.zip&#x27;)&quot;</span><br>wget https://github.com/MRCIEU/MRInstruments/archive/refs/heads/master.zip -O MRInstruments.zip<br>Rscript -e <span class="hljs-string">&quot;devtools::install_local(&#x27;MRInstruments.zip&#x27;)&quot;</span><br>conda install -c conda-forge r-susier -y<br>conda install -c bioconda bioconductor-variantannotation -y<br>wget https://github.com/MRCIEU/gwasglue/archive/refs/heads/master.zip -O gwasglue.zip<br>Rscript -e <span class="hljs-string">&quot;devtools::install_local(&#x27;gwasglue.zip&#x27;)&quot;</span><br><span class="hljs-comment"># wget https://github.com/MRCIEU/genetics.binaRies/archive/refs/heads/master.zip -O genetics.binaRies.zip</span><br><span class="hljs-comment"># Rscript -e &quot;devtools::install_local(&#x27;genetics.binaRies.zip&#x27;)&quot;</span><br>conda install -c bioconda plink -y<br><span class="hljs-comment"># whereis plink # /opt/conda/envs/MR/bin/plink</span><br>Rscript -e <span class="hljs-string">&#x27;install.packages(&quot;MendelianRandomization&quot;)&#x27;</span><br></code></pre></td></tr></table></figure><h2 id="数据来源">数据来源</h2><ul><li>精神病学基因组:<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9wZ2MudW5jLmVkdS8=" rel="noopener external nofollow noreferrer">PGC</a></li><li>社会科学遗传学:<a href="https://hexo.limour.top/go/#aHR0cHM6Ly93d3cudGhlc3NnYWMub3JnLw==" rel="noopener external nofollow noreferrer">SSGAC</a></li><li>大脑健康和疾病:<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9jdGcuY25jci5ubC9zb2Z0d2FyZS9zdW1tYXJ5X3N0YXRpc3RpY3M=" rel="noopener external nofollow noreferrer">CTG</a></li><li>MRCIEU汇总数据库:<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9nd2FzLm1yY2lldS5hYy51ay8=" rel="noopener external nofollow noreferrer">IEU</a></li><li>GWAS研究目录:<a href="https://hexo.limour.top/go/#aHR0cHM6Ly93d3cuZWJpLmFjLnVrL2d3YXMvc2VhcmNo" rel="noopener external nofollow noreferrer">NHGRI-EBI</a></li><li><a href="/shi-yong-GATK-zhao-SNP">自己分析出数据</a></li><li><a href="https://od.limour.top/archives/GWAS/MR">更多相关网站</a></li></ul><h3 id="一些参考数据">一些参考数据</h3><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><code class="hljs bash">wget http://fileserve.mrcieu.ac.uk/ld/1kg.v3.tgz<br>tar -zxvf 1kg.v3.tgz<br><span class="hljs-comment"># mkdir EUR &amp;&amp; mv EUR.* EUR</span><br></code></pre></td></tr></table></figure><h3 id="示例结局数据">示例结局数据</h3><ul><li>浏览器下载 <a href="https://hexo.limour.top/go/#aHR0cHM6Ly9maWdzaGFyZS5jb20vbmRvd25sb2FkZXIvZmlsZXMvNDAwMzY2ODQ=" rel="noopener external nofollow noreferrer">ADHD2022_iPSYCH_deCODE_PGC.meta.gz</a></li><li><a href="/Rclone-bei-fen-VPS-shu-ju-dao-onedrive">上传到服务器</a></li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-comment"># zcat ADHD2022_iPSYCH_deCODE_PGC.meta.gz | head</span><br>CHR SNP BP A1 A2 FRQ_A_38691 FRQ_U_186843 INFO OR SE P Direction Nca Nco<br>8 rs62513865 101592213 C T 0.925 0.937 0.981 0.99631 0.0175 0.8325 +---+++0-++-+ 38691 186843<br>8 rs79643588 106973048 G A 0.91 0.917 1 1.00411 0.0159 0.7967 ++--++-+-+-++ 38691 186843<br>8 rs17396518 108690829 T G 0.561 0.577 0.998 0.99611 0.0096 0.6876 --++-++??-+-- 37367 184388<br>8 rs983166 108681675 A C 0.57 0.586 0.996 0.99491 0.0096 0.5956 --++-++++-+-- 38691 186843<br>8 rs28842593 103044620 T C 0.839 0.836 0.982 0.98314 0.0135 0.2081 ----++0+??--+ 37504 184525<br>8 rs7014597 104152280 G C 0.824 0.824 0.997 0.99950 0.0122 0.9679 +-++-+++++--- 38691 186843<br>8 rs3134156 100479917 T C 0.841 0.833 0.997 0.98866 0.0128 0.3762 -+----+--++-- 38691 186843<br>8 rs6980591 103144592 A C 0.783 0.79 1 1.01106 0.0108 0.3075 ++-++---+++++ 38691 186843<br>8 rs72670434 108166508 A T 0.642 0.623 0.983 1.00672 0.0103 0.5171 +++-+++--+++- 38691 186843<br></code></pre></td></tr></table></figure><figure class="highlight txt"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br></pre></td><td class="code"><pre><code class="hljs txt">CHR Chromosome (hg19)<br>SNP Marker name<br>BP Base pair location (hg19)<br>A1 Reference allele for OR (may or may not be minor allele)<br>A2 Alternative allele<br>FRQ_A_38691 allele frequency of A1 in 38,691 ADHD cases<br>FRQ_U_186843 allele frequency of A1 in 38,691 controls<br>INFO Imputation information score (the reported imputation INFO score is a weighted average across the<br>cohorts contributing to the meta-analysis for that variant)<br>OR Odds ratio for the effect of the A1 allele<br>SE Standard error of the log(OR)<br>P P-value for association test in the meta-analysis<br>Direction direction of effect in the included cohorts<br>Nca number of cases with variant information<br>Nco number of controls with variant information<br></code></pre></td></tr></table></figure><p>其中<code>SNP</code>,<code>Effect allele</code>,<code>Beta(OR)</code>,<code>SE</code>,<code>P</code>这五列是必须的。遇到没有提供EAF的数据,可以<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL0hhb2Jpblpob3UvR2V0X01S" rel="noopener external nofollow noreferrer">匹配千人基因组数据的EAF</a>,<code>get_eaf_from_1000G</code>。</p><h3 id="示例暴露数据">示例暴露数据</h3><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><code class="hljs bash">wget -c https://gwas.mrcieu.ac.uk/files/ieu-a-2/ieu-a-2.vcf.gz<br></code></pre></td></tr></table></figure><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br></pre></td><td class="code"><pre><code class="hljs R">VCF_dat <span class="hljs-operator">=</span> VariantAnnotation<span class="hljs-operator">::</span>readVcf<span class="hljs-punctuation">(</span><span class="hljs-string">&#x27;~/upload/GWAS/IEU/ieu-a-2.vcf.gz&#x27;</span><span class="hljs-punctuation">)</span><br>exp_dat <span class="hljs-operator">=</span> gwasglue<span class="hljs-operator">::</span>gwasvcf_to_TwoSampleMR<span class="hljs-punctuation">(</span>vcf <span class="hljs-operator">=</span> VCF_dat<span class="hljs-punctuation">)</span><br>saveRDS<span class="hljs-punctuation">(</span>file <span class="hljs-operator">=</span> <span class="hljs-string">&#x27;ieu-a-2.exp_dat&#x27;</span><span class="hljs-punctuation">,</span> exp_dat<span class="hljs-punctuation">)</span><br>exp_dat <span class="hljs-operator">=</span> subset<span class="hljs-punctuation">(</span>exp_dat<span class="hljs-punctuation">,</span> pval.exposure <span class="hljs-operator">&lt;</span> <span class="hljs-number">5e-08</span><span class="hljs-punctuation">)</span> <span class="hljs-comment"># 关联性假设</span><br><span class="hljs-comment"># 去除连锁不平衡</span><br><span class="hljs-comment"># exp_dat = TwoSampleMR::clump_data(dat = exp_dat, clump_kb = 10000, clump_r2 = 0.001) # MRCIEU太喜欢用cloud api了</span><br>fix_ld_clump_local <span class="hljs-operator">=</span> <span class="hljs-keyword">function</span> <span class="hljs-punctuation">(</span>dat<span class="hljs-punctuation">,</span> tempfile<span class="hljs-punctuation">,</span> clump_kb<span class="hljs-punctuation">,</span> clump_r2<span class="hljs-punctuation">,</span> clump_p<span class="hljs-punctuation">,</span> bfile<span class="hljs-punctuation">,</span> plink_bin<span class="hljs-punctuation">)</span> <span class="hljs-punctuation">&#123;</span><br> shell <span class="hljs-operator">&lt;-</span> ifelse<span class="hljs-punctuation">(</span>Sys.info<span class="hljs-punctuation">(</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">[</span><span class="hljs-string">&quot;sysname&quot;</span><span class="hljs-punctuation">]</span> <span class="hljs-operator">==</span> <span class="hljs-string">&quot;Windows&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-string">&quot;cmd&quot;</span><span class="hljs-punctuation">,</span> <br> <span class="hljs-string">&quot;sh&quot;</span><span class="hljs-punctuation">)</span><br> write.table<span class="hljs-punctuation">(</span>data.frame<span class="hljs-punctuation">(</span>SNP <span class="hljs-operator">=</span> dat<span class="hljs-punctuation">[[</span><span class="hljs-string">&quot;rsid&quot;</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span> P <span class="hljs-operator">=</span> dat<span class="hljs-punctuation">[[</span><span class="hljs-string">&quot;pval&quot;</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span> <br> file <span class="hljs-operator">=</span> tempfile<span class="hljs-punctuation">,</span> row.names <span class="hljs-operator">=</span> <span class="hljs-built_in">F</span><span class="hljs-punctuation">,</span> col.names <span class="hljs-operator">=</span> <span class="hljs-built_in">T</span><span class="hljs-punctuation">,</span> <span class="hljs-built_in">quote</span> <span class="hljs-operator">=</span> <span class="hljs-built_in">F</span><span class="hljs-punctuation">)</span><br> fun2 <span class="hljs-operator">&lt;-</span> paste0<span class="hljs-punctuation">(</span>shQuote<span class="hljs-punctuation">(</span>plink_bin<span class="hljs-punctuation">,</span> type <span class="hljs-operator">=</span> shell<span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span> <span class="hljs-string">&quot; --bfile &quot;</span><span class="hljs-punctuation">,</span> <br> shQuote<span class="hljs-punctuation">(</span>bfile<span class="hljs-punctuation">,</span> type <span class="hljs-operator">=</span> shell<span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span> <span class="hljs-string">&quot; --clump &quot;</span><span class="hljs-punctuation">,</span> shQuote<span class="hljs-punctuation">(</span>tempfile<span class="hljs-punctuation">,</span> <br> type <span class="hljs-operator">=</span> shell<span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span> <span class="hljs-string">&quot; --clump-p1 &quot;</span><span class="hljs-punctuation">,</span> clump_p<span class="hljs-punctuation">,</span> <span class="hljs-string">&quot; --clump-r2 &quot;</span><span class="hljs-punctuation">,</span> <br> clump_r2<span class="hljs-punctuation">,</span> <span class="hljs-string">&quot; --clump-kb &quot;</span><span class="hljs-punctuation">,</span> clump_kb<span class="hljs-punctuation">,</span> <span class="hljs-string">&quot; --out &quot;</span><span class="hljs-punctuation">,</span> shQuote<span class="hljs-punctuation">(</span>tempfile<span class="hljs-punctuation">,</span> <br> type <span class="hljs-operator">=</span> shell<span class="hljs-punctuation">)</span><span class="hljs-punctuation">)</span><br> print<span class="hljs-punctuation">(</span>fun2<span class="hljs-punctuation">)</span><br> system<span class="hljs-punctuation">(</span>fun2<span class="hljs-punctuation">)</span><br> res <span class="hljs-operator">&lt;-</span> read.table<span class="hljs-punctuation">(</span>paste<span class="hljs-punctuation">(</span>tempfile<span class="hljs-punctuation">,</span> <span class="hljs-string">&quot;.clumped&quot;</span><span class="hljs-punctuation">,</span> sep <span class="hljs-operator">=</span> <span class="hljs-string">&quot;&quot;</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span> header <span class="hljs-operator">=</span> <span class="hljs-built_in">T</span><span class="hljs-punctuation">)</span><br> unlink<span class="hljs-punctuation">(</span>paste<span class="hljs-punctuation">(</span>tempfile<span class="hljs-punctuation">,</span> <span class="hljs-string">&quot;*&quot;</span><span class="hljs-punctuation">,</span> sep <span class="hljs-operator">=</span> <span class="hljs-string">&quot;&quot;</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">)</span><br> y <span class="hljs-operator">&lt;-</span> subset<span class="hljs-punctuation">(</span>dat<span class="hljs-punctuation">,</span> <span class="hljs-operator">!</span>dat<span class="hljs-punctuation">[[</span><span class="hljs-string">&quot;rsid&quot;</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">]</span> <span class="hljs-operator">%in%</span> res<span class="hljs-punctuation">[[</span><span class="hljs-string">&quot;SNP&quot;</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">)</span><br> <span class="hljs-keyword">if</span> <span class="hljs-punctuation">(</span>nrow<span class="hljs-punctuation">(</span>y<span class="hljs-punctuation">)</span> <span class="hljs-operator">&gt;</span> <span class="hljs-number">0</span><span class="hljs-punctuation">)</span> <span class="hljs-punctuation">&#123;</span><br> message<span class="hljs-punctuation">(</span><span class="hljs-string">&quot;Removing &quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-built_in">length</span><span class="hljs-punctuation">(</span>y<span class="hljs-punctuation">[[</span><span class="hljs-string">&quot;rsid&quot;</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span> <span class="hljs-string">&quot; of &quot;</span><span class="hljs-punctuation">,</span> nrow<span class="hljs-punctuation">(</span>dat<span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span> <br> <span class="hljs-string">&quot; variants due to LD with other variants or absence from LD reference panel&quot;</span><span class="hljs-punctuation">)</span><br> <span class="hljs-punctuation">&#125;</span><br> <span class="hljs-built_in">return</span><span class="hljs-punctuation">(</span>subset<span class="hljs-punctuation">(</span>dat<span class="hljs-punctuation">,</span> dat<span class="hljs-punctuation">[[</span><span class="hljs-string">&quot;rsid&quot;</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">]</span> <span class="hljs-operator">%in%</span> res<span class="hljs-punctuation">[[</span><span class="hljs-string">&quot;SNP&quot;</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">)</span><br><span class="hljs-punctuation">&#125;</span><br>fuck <span class="hljs-operator">=</span> fix_ld_clump_local<span class="hljs-punctuation">(</span><br> dat <span class="hljs-operator">=</span> dplyr<span class="hljs-operator">::</span>tibble<span class="hljs-punctuation">(</span>rsid<span class="hljs-operator">=</span>exp_dat<span class="hljs-operator">$</span>SNP<span class="hljs-punctuation">,</span> pval<span class="hljs-operator">=</span>exp_dat<span class="hljs-operator">$</span>pval.exposure<span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span><br> tempfile <span class="hljs-operator">=</span> file.path<span class="hljs-punctuation">(</span>getwd<span class="hljs-punctuation">(</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span><span class="hljs-string">&#x27;tmp.ld_clump.exp_dat&#x27;</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span><br> clump_kb <span class="hljs-operator">=</span> <span class="hljs-number">10000</span><span class="hljs-punctuation">,</span> clump_r2 <span class="hljs-operator">=</span> <span class="hljs-number">0.001</span><span class="hljs-punctuation">,</span> clump_p <span class="hljs-operator">=</span> <span class="hljs-number">1</span><span class="hljs-punctuation">,</span><br> <span class="hljs-comment"># pop = &quot;EUR&quot;, # Super-population. Options are &quot;EUR&quot;, &quot;SAS&quot;, &quot;EAS&quot;, &quot;AFR&quot;, &quot;AMR&quot;</span><br> plink_bin <span class="hljs-operator">=</span> <span class="hljs-string">&#x27;/opt/conda/envs/MR/bin/plink&#x27;</span><span class="hljs-punctuation">,</span> <span class="hljs-comment"># 千万别用什么 genetics.binaRies::get_plink_binary(),他们自己编译的文件有问题</span><br> bfile <span class="hljs-operator">=</span> <span class="hljs-string">&quot;/home/jovyan/upload/GWAS/ld/EUR&quot;</span> <span class="hljs-comment"># 前缀,不是文件夹也不是文件</span><br><span class="hljs-punctuation">)</span><br>exp_dat_clumped <span class="hljs-operator">=</span> exp_dat<span class="hljs-punctuation">[</span>exp_dat<span class="hljs-operator">$</span>SNP <span class="hljs-operator">%in%</span> fuck<span class="hljs-operator">$</span>rsid<span class="hljs-punctuation">,</span><span class="hljs-punctuation">]</span><br>saveRDS<span class="hljs-punctuation">(</span>file <span class="hljs-operator">=</span> <span class="hljs-string">&#x27;ieu-a-2.exp_gwas&#x27;</span><span class="hljs-punctuation">,</span> exp_dat_clumped<span class="hljs-punctuation">)</span><br></code></pre></td></tr></table></figure><h2 id="获取暴露数据">获取暴露数据</h2><h3 id="自己的数据">自己的数据</h3><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><code class="hljs R">df_gwas <span class="hljs-operator">&lt;-</span> data.frame<span class="hljs-punctuation">(</span><br> SNP <span class="hljs-operator">=</span> <span class="hljs-built_in">c</span><span class="hljs-punctuation">(</span><span class="hljs-string">&quot;rs1&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-string">&quot;rs2&quot;</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span><br> beta <span class="hljs-operator">=</span> <span class="hljs-built_in">c</span><span class="hljs-punctuation">(</span><span class="hljs-number">1</span><span class="hljs-punctuation">,</span> <span class="hljs-number">2</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span><br> se <span class="hljs-operator">=</span> <span class="hljs-built_in">c</span><span class="hljs-punctuation">(</span><span class="hljs-number">1</span><span class="hljs-punctuation">,</span> <span class="hljs-number">2</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span><br> effect_allele <span class="hljs-operator">=</span> <span class="hljs-built_in">c</span><span class="hljs-punctuation">(</span><span class="hljs-string">&quot;A&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-string">&quot;T&quot;</span><span class="hljs-punctuation">)</span><br><span class="hljs-punctuation">)</span><br>head<span class="hljs-punctuation">(</span>df_gwas<span class="hljs-punctuation">)</span><br>exp_dat <span class="hljs-operator">&lt;-</span> TwoSampleMR<span class="hljs-operator">::</span>format_data<span class="hljs-punctuation">(</span>df_gwas<span class="hljs-punctuation">,</span> type <span class="hljs-operator">=</span> <span class="hljs-string">&quot;exposure&quot;</span><span class="hljs-punctuation">)</span><br></code></pre></td></tr></table></figure><h3 id="gwas-catalog">gwas_catalog</h3><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><code class="hljs R">df_gwas <span class="hljs-operator">&lt;-</span><br> subset<span class="hljs-punctuation">(</span>MRInstruments<span class="hljs-operator">::</span>gwas_catalog<span class="hljs-punctuation">,</span><br> grepl<span class="hljs-punctuation">(</span><span class="hljs-string">&quot;Speliotes&quot;</span><span class="hljs-punctuation">,</span> Author<span class="hljs-punctuation">)</span> <span class="hljs-operator">&amp;</span><br> Phenotype <span class="hljs-operator">==</span> <span class="hljs-string">&quot;Body mass index&quot;</span><span class="hljs-punctuation">)</span><br>head<span class="hljs-punctuation">(</span>df_gwas<span class="hljs-punctuation">)</span><br>exp_dat <span class="hljs-operator">&lt;-</span> TwoSampleMR<span class="hljs-operator">::</span>format_data<span class="hljs-punctuation">(</span>df_gwas<span class="hljs-punctuation">)</span><br></code></pre></td></tr></table></figure><h3 id="metab-qtls">metab_qtls</h3><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><code class="hljs R">df_gwas <span class="hljs-operator">&lt;-</span><br> subset<span class="hljs-punctuation">(</span>MRInstruments<span class="hljs-operator">::</span>metab_qtls<span class="hljs-punctuation">,</span><br> phenotype <span class="hljs-operator">==</span> <span class="hljs-string">&quot;Ala&quot;</span><br> <span class="hljs-punctuation">)</span><br>head<span class="hljs-punctuation">(</span>df_gwas<span class="hljs-punctuation">)</span><br>exp_dat <span class="hljs-operator">&lt;-</span> TwoSampleMR<span class="hljs-operator">::</span>format_metab_qtls<span class="hljs-punctuation">(</span>df_gwas<span class="hljs-punctuation">)</span><br></code></pre></td></tr></table></figure><h3 id="proteomic-qtls">proteomic_qtls</h3><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><code class="hljs R">df_gwas <span class="hljs-operator">&lt;-</span><br> subset<span class="hljs-punctuation">(</span>MRInstruments<span class="hljs-operator">::</span>proteomic_qtls<span class="hljs-punctuation">,</span><br> analyte <span class="hljs-operator">==</span> <span class="hljs-string">&quot;ApoH&quot;</span><br> <span class="hljs-punctuation">)</span><br>head<span class="hljs-punctuation">(</span>df_gwas<span class="hljs-punctuation">)</span><br>exp_dat <span class="hljs-operator">&lt;-</span> TwoSampleMR<span class="hljs-operator">::</span>format_proteomic_qtls<span class="hljs-punctuation">(</span>df_gwas<span class="hljs-punctuation">)</span><br></code></pre></td></tr></table></figure><h3 id="某个基因">某个基因</h3><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><code class="hljs R">df_gwas <span class="hljs-operator">&lt;-</span><br> subset<span class="hljs-punctuation">(</span>MRInstruments<span class="hljs-operator">::</span>gtex_eqtl<span class="hljs-punctuation">,</span><br> gene_name <span class="hljs-operator">==</span> <span class="hljs-string">&quot;IRAK1BP1&quot;</span> <span class="hljs-operator">&amp;</span> tissue <span class="hljs-operator">==</span> <span class="hljs-string">&quot;Adipose Subcutaneous&quot;</span><br> <span class="hljs-punctuation">)</span><br>head<span class="hljs-punctuation">(</span>df_gwas<span class="hljs-punctuation">)</span><br>exp_dat <span class="hljs-operator">&lt;-</span> TwoSampleMR<span class="hljs-operator">::</span>format_gtex_eqtl<span class="hljs-punctuation">(</span>df_gwas<span class="hljs-punctuation">)</span><br></code></pre></td></tr></table></figure><h3 id="某个性状的某个甲基化位点相关QTL">某个性状的某个甲基化位点相关QTL</h3><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><code class="hljs R">df_gwas <span class="hljs-operator">&lt;-</span><br> subset<span class="hljs-punctuation">(</span>MRInstruments<span class="hljs-operator">::</span>aries_mqtl<span class="hljs-punctuation">,</span><br> cpg <span class="hljs-operator">==</span> <span class="hljs-string">&quot;cg25212131&quot;</span> <span class="hljs-operator">&amp;</span> age <span class="hljs-operator">==</span> <span class="hljs-string">&quot;Birth&quot;</span><br> <span class="hljs-punctuation">)</span><br>head<span class="hljs-punctuation">(</span>df_gwas<span class="hljs-punctuation">)</span><br>exp_dat <span class="hljs-operator">&lt;-</span> TwoSampleMR<span class="hljs-operator">::</span>format_aries_mqtl<span class="hljs-punctuation">(</span>df_gwas<span class="hljs-punctuation">)</span><br></code></pre></td></tr></table></figure><h3 id="IEU的ID">IEU的ID</h3><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><code class="hljs R">exp_gwas <span class="hljs-operator">&lt;-</span> TwoSampleMR<span class="hljs-operator">::</span>extract_instruments<span class="hljs-punctuation">(</span>outcomes <span class="hljs-operator">=</span> <span class="hljs-string">&#x27;ieu-a-2&#x27;</span><span class="hljs-punctuation">)</span><br>head<span class="hljs-punctuation">(</span>exp_gwas<span class="hljs-punctuation">)</span><br>saveRDS<span class="hljs-punctuation">(</span>file <span class="hljs-operator">=</span> <span class="hljs-string">&#x27;ieu-a-2.exp_gwas&#x27;</span><span class="hljs-punctuation">,</span> exp_gwas<span class="hljs-punctuation">)</span> <span class="hljs-comment"># 和自己从VCF开始经过clump得到的差不多</span><br></code></pre></td></tr></table></figure><h3 id="UK-Biobank">UK_Biobank</h3><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br></pre></td><td class="code"><pre><code class="hljs R">hyperten_tophits <span class="hljs-operator">&lt;-</span> ieugwasr<span class="hljs-operator">::</span>tophits<span class="hljs-punctuation">(</span>id<span class="hljs-operator">=</span><span class="hljs-string">&quot;ukb-b-12493&quot;</span><span class="hljs-punctuation">,</span> clump<span class="hljs-operator">=</span><span class="hljs-number">0</span><span class="hljs-punctuation">)</span><br>hyperten_gwas <span class="hljs-operator">&lt;-</span> dplyr<span class="hljs-operator">::</span>rename<span class="hljs-punctuation">(</span>hyperten_tophits<span class="hljs-punctuation">,</span> <span class="hljs-built_in">c</span><span class="hljs-punctuation">(</span><br> <span class="hljs-string">&quot;SNP&quot;</span><span class="hljs-operator">=</span><span class="hljs-string">&quot;rsid&quot;</span><span class="hljs-punctuation">,</span><br> <span class="hljs-string">&quot;effect_allele.exposure&quot;</span><span class="hljs-operator">=</span><span class="hljs-string">&quot;ea&quot;</span><span class="hljs-punctuation">,</span><br> <span class="hljs-string">&quot;other_allele.exposure&quot;</span><span class="hljs-operator">=</span><span class="hljs-string">&quot;nea&quot;</span><span class="hljs-punctuation">,</span><br> <span class="hljs-string">&quot;beta.exposure&quot;</span><span class="hljs-operator">=</span><span class="hljs-string">&quot;beta&quot;</span><span class="hljs-punctuation">,</span><br> <span class="hljs-string">&quot;se.exposure&quot;</span><span class="hljs-operator">=</span><span class="hljs-string">&quot;se&quot;</span><span class="hljs-punctuation">,</span><br> <span class="hljs-string">&quot;eaf.exposure&quot;</span><span class="hljs-operator">=</span><span class="hljs-string">&quot;eaf&quot;</span><span class="hljs-punctuation">,</span><br> <span class="hljs-string">&quot;pval.exposure&quot;</span><span class="hljs-operator">=</span><span class="hljs-string">&quot;p&quot;</span><span class="hljs-punctuation">,</span><br> <span class="hljs-string">&quot;N&quot;</span><span class="hljs-operator">=</span><span class="hljs-string">&quot;n&quot;</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">)</span><br>fuck <span class="hljs-operator">=</span> fix_ld_clump_local<span class="hljs-punctuation">(</span><br> dat <span class="hljs-operator">=</span> dplyr<span class="hljs-operator">::</span>tibble<span class="hljs-punctuation">(</span>rsid<span class="hljs-operator">=</span>hyperten_gwas<span class="hljs-operator">$</span>SNP<span class="hljs-punctuation">,</span> pval<span class="hljs-operator">=</span>hyperten_gwas<span class="hljs-operator">$</span>pval.exposure<span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span><br> tempfile <span class="hljs-operator">=</span> file.path<span class="hljs-punctuation">(</span>getwd<span class="hljs-punctuation">(</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span><span class="hljs-string">&#x27;tmp.ld_clump.exp_dat&#x27;</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span><br> clump_kb <span class="hljs-operator">=</span> <span class="hljs-number">10000</span><span class="hljs-punctuation">,</span> clump_r2 <span class="hljs-operator">=</span> <span class="hljs-number">0.001</span><span class="hljs-punctuation">,</span> clump_p <span class="hljs-operator">=</span> <span class="hljs-number">1</span><span class="hljs-punctuation">,</span><br> <span class="hljs-comment"># pop = &quot;EUR&quot;, # Super-population. Options are &quot;EUR&quot;, &quot;SAS&quot;, &quot;EAS&quot;, &quot;AFR&quot;, &quot;AMR&quot;</span><br> plink_bin <span class="hljs-operator">=</span> <span class="hljs-string">&#x27;/opt/conda/envs/MR/bin/plink&#x27;</span><span class="hljs-punctuation">,</span> <span class="hljs-comment"># 千万别用什么 genetics.binaRies::get_plink_binary(),他们自己编译的文件有问题</span><br> bfile <span class="hljs-operator">=</span> <span class="hljs-string">&quot;/home/jovyan/upload/GWAS/ld/EUR&quot;</span> <span class="hljs-comment"># 前缀,不是文件夹也不是文件</span><br><span class="hljs-punctuation">)</span><br>exp_dat_clumped <span class="hljs-operator">=</span> hyperten_gwas<span class="hljs-punctuation">[</span>hyperten_gwas<span class="hljs-operator">$</span>SNP <span class="hljs-operator">%in%</span> fuck<span class="hljs-operator">$</span>rsid<span class="hljs-punctuation">,</span><span class="hljs-punctuation">]</span><br>MR_calc_r2_F<span class="hljs-punctuation">(</span><br> beta <span class="hljs-operator">=</span> exp_dat_clumped<span class="hljs-operator">$</span>beta.exposure<span class="hljs-punctuation">,</span> <span class="hljs-comment"># Vector of Log odds ratio. beta = log(OR)</span><br> eaf <span class="hljs-operator">=</span> exp_dat_clumped<span class="hljs-operator">$</span>eaf.exposure<span class="hljs-punctuation">,</span> <span class="hljs-comment"># Vector of allele frequencies.</span><br> N <span class="hljs-operator">=</span> exp_dat_clumped<span class="hljs-operator">$</span>N<span class="hljs-punctuation">,</span> <span class="hljs-comment"># Array of sample sizes</span><br> se <span class="hljs-operator">=</span> exp_dat_clumped<span class="hljs-operator">$</span>se.exposure <span class="hljs-comment"># Vector of SE.</span><br><span class="hljs-punctuation">)</span> <span class="hljs-comment"># 取 F&gt;10 的</span><br></code></pre></td></tr></table></figure><h2 id="计算统计效力">计算统计效力</h2><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br></pre></td><td class="code"><pre><code class="hljs R"><span class="hljs-comment"># 分类变量</span><br>tmp_r2 <span class="hljs-operator">=</span>TwoSampleMR<span class="hljs-operator">::</span>get_r_from_lor<span class="hljs-punctuation">(</span><br> lor <span class="hljs-operator">=</span> exp_dat_clumped<span class="hljs-operator">$</span>beta.exposure<span class="hljs-punctuation">,</span> <span class="hljs-comment"># Vector of Log odds ratio. beta = log(OR)</span><br> af <span class="hljs-operator">=</span> exp_dat_clumped<span class="hljs-operator">$</span>eaf.exposure<span class="hljs-punctuation">,</span> <span class="hljs-comment"># Vector of allele frequencies.</span><br> ncase <span class="hljs-operator">=</span> exp_dat_clumped<span class="hljs-operator">$</span>ncase.exposure<span class="hljs-punctuation">,</span> <span class="hljs-comment"># Vector of Number of cases. </span><br> ncontrol <span class="hljs-operator">=</span> exp_dat_clumped<span class="hljs-operator">$</span>ncontrol.exposure<span class="hljs-punctuation">,</span> <span class="hljs-comment"># Vector of Number of controls. </span><br> prevalence <span class="hljs-operator">=</span> <span class="hljs-number">1</span><span class="hljs-punctuation">,</span> <span class="hljs-comment"># Vector of Disease prevalence in the population.</span><br><span class="hljs-punctuation">)</span><br><span class="hljs-comment"># 连续变量</span><br>tmp_r2 <span class="hljs-operator">=</span>TwoSampleMR<span class="hljs-operator">::</span>get_r_from_pn<span class="hljs-punctuation">(</span><br> p <span class="hljs-operator">=</span> exp_dat_clumped<span class="hljs-operator">$</span>pval.exposure<span class="hljs-punctuation">,</span> <span class="hljs-comment"># Array of pvals</span><br> n <span class="hljs-operator">=</span> exp_dat_clumped<span class="hljs-operator">$</span>samplesize.exposure <span class="hljs-comment"># Array of sample sizes</span><br><span class="hljs-punctuation">)</span><br></code></pre></td></tr></table></figure><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br></pre></td><td class="code"><pre><code class="hljs R">MR_calc_r2_F <span class="hljs-operator">=</span> <span class="hljs-keyword">function</span><span class="hljs-punctuation">(</span>beta<span class="hljs-punctuation">,</span> eaf<span class="hljs-punctuation">,</span> N<span class="hljs-punctuation">,</span> se<span class="hljs-punctuation">)</span><span class="hljs-punctuation">&#123;</span><br> <span class="hljs-comment"># https://doi.org/10.1038/s41467-020-14389-8</span><br> <span class="hljs-comment"># https://doi.org/10.1371/journal.pone.0120758</span><br> r2 <span class="hljs-operator">=</span> <span class="hljs-punctuation">(</span><span class="hljs-number">2</span> <span class="hljs-operator">*</span> <span class="hljs-punctuation">(</span>beta<span class="hljs-operator">^</span><span class="hljs-number">2</span><span class="hljs-punctuation">)</span> <span class="hljs-operator">*</span> eaf <span class="hljs-operator">*</span> <span class="hljs-punctuation">(</span><span class="hljs-number">1</span> <span class="hljs-operator">-</span> eaf<span class="hljs-punctuation">)</span><span class="hljs-punctuation">)</span> <span class="hljs-operator">/</span><br> <span class="hljs-punctuation">(</span><span class="hljs-number">2</span> <span class="hljs-operator">*</span> <span class="hljs-punctuation">(</span>beta<span class="hljs-operator">^</span><span class="hljs-number">2</span><span class="hljs-punctuation">)</span> <span class="hljs-operator">*</span> eaf <span class="hljs-operator">*</span> <span class="hljs-punctuation">(</span><span class="hljs-number">1</span> <span class="hljs-operator">-</span> eaf<span class="hljs-punctuation">)</span> <span class="hljs-operator">+</span><br> <span class="hljs-number">2</span> <span class="hljs-operator">*</span> N <span class="hljs-operator">*</span> eaf <span class="hljs-operator">*</span> <span class="hljs-punctuation">(</span><span class="hljs-number">1</span> <span class="hljs-operator">-</span> eaf<span class="hljs-punctuation">)</span> <span class="hljs-operator">*</span> se<span class="hljs-operator">^</span><span class="hljs-number">2</span><span class="hljs-punctuation">)</span><br> <span class="hljs-built_in">F</span> <span class="hljs-operator">=</span> r2 <span class="hljs-operator">*</span> <span class="hljs-punctuation">(</span>N <span class="hljs-operator">-</span> <span class="hljs-number">2</span><span class="hljs-punctuation">)</span> <span class="hljs-operator">/</span> <span class="hljs-punctuation">(</span><span class="hljs-number">1</span> <span class="hljs-operator">-</span> r2<span class="hljs-punctuation">)</span><br> print<span class="hljs-punctuation">(</span>mean<span class="hljs-punctuation">(</span><span class="hljs-built_in">F</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">)</span><br> <span class="hljs-built_in">return</span><span class="hljs-punctuation">(</span>dplyr<span class="hljs-operator">::</span>tibble<span class="hljs-punctuation">(</span>r2<span class="hljs-operator">=</span>r2<span class="hljs-punctuation">,</span> <span class="hljs-built_in">F</span><span class="hljs-operator">=</span><span class="hljs-built_in">F</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">)</span><br><span class="hljs-punctuation">&#125;</span><br>MR_calc_r2_F<span class="hljs-punctuation">(</span><br> beta <span class="hljs-operator">=</span> exp_dat_clumped<span class="hljs-operator">$</span>beta.exposure<span class="hljs-punctuation">,</span> <span class="hljs-comment"># Vector of Log odds ratio. beta = log(OR)</span><br> eaf <span class="hljs-operator">=</span> exp_dat_clumped<span class="hljs-operator">$</span>eaf.exposure<span class="hljs-punctuation">,</span> <span class="hljs-comment"># Vector of allele frequencies.</span><br> N <span class="hljs-operator">=</span> exp_dat_clumped<span class="hljs-operator">$</span>samplesize.exposure<span class="hljs-punctuation">,</span> <span class="hljs-comment"># Array of sample sizes</span><br> se <span class="hljs-operator">=</span> exp_dat_clumped<span class="hljs-operator">$</span>se.exposure <span class="hljs-comment"># Vector of SE.</span><br><span class="hljs-punctuation">)</span> <span class="hljs-comment"># 取 F&gt;10 的</span><br></code></pre></td></tr></table></figure><h2 id="获取结局数据">获取结局数据</h2><h3 id="IEU">IEU</h3><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><code class="hljs R">out_gwas <span class="hljs-operator">=</span> TwoSampleMR<span class="hljs-operator">::</span>extract_outcome_data<span class="hljs-punctuation">(</span>snps <span class="hljs-operator">=</span> exp_gwas<span class="hljs-operator">$</span>SNP<span class="hljs-punctuation">,</span> outcomes <span class="hljs-operator">=</span> <span class="hljs-string">&#x27;ieu-a-7&#x27;</span><span class="hljs-punctuation">)</span><br></code></pre></td></tr></table></figure><h3 id="UK-Biobank-2">UK_Biobank</h3><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><code class="hljs R">anxiety_hyperten_liberal <span class="hljs-operator">&lt;-</span> TwoSampleMR<span class="hljs-operator">::</span>extract_outcome_data<span class="hljs-punctuation">(</span>snps <span class="hljs-operator">=</span> exp_dat_clumped<span class="hljs-operator">$</span>SNP<span class="hljs-punctuation">,</span> outcomes <span class="hljs-operator">=</span> <span class="hljs-string">&quot;ukb-b-11311&quot;</span><span class="hljs-punctuation">)</span><br></code></pre></td></tr></table></figure><h3 id="PGC的示例">PGC的示例</h3><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br></pre></td><td class="code"><pre><code class="hljs R">df_gwas <span class="hljs-operator">=</span> read.table<span class="hljs-punctuation">(</span>gzfile<span class="hljs-punctuation">(</span><span class="hljs-string">&#x27;~/upload/GWAS/PGC/ADHD2022_iPSYCH_deCODE_PGC.meta.gz&#x27;</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span> header <span class="hljs-operator">=</span> <span class="hljs-built_in">T</span><span class="hljs-punctuation">)</span><br>head<span class="hljs-punctuation">(</span>df_gwas<span class="hljs-punctuation">)</span><br>df_gwas <span class="hljs-operator">=</span> df_gwas<span class="hljs-punctuation">[</span>df_gwas<span class="hljs-operator">$</span>SNP <span class="hljs-operator">%in%</span> exp_gwas<span class="hljs-operator">$</span>SNP<span class="hljs-punctuation">,</span><span class="hljs-punctuation">]</span><br>out_gwas <span class="hljs-operator">=</span> data.frame<span class="hljs-punctuation">(</span><br> SNP <span class="hljs-operator">=</span> df_gwas<span class="hljs-operator">$</span>SNP<span class="hljs-punctuation">,</span><br> chr <span class="hljs-operator">=</span> <span class="hljs-built_in">as.character</span><span class="hljs-punctuation">(</span>df_gwas<span class="hljs-operator">$</span>CHR<span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span><br> pos <span class="hljs-operator">=</span> df_gwas<span class="hljs-operator">$</span>BP<span class="hljs-punctuation">,</span><br> beta.outcome <span class="hljs-operator">=</span> <span class="hljs-built_in">log</span><span class="hljs-punctuation">(</span>df_gwas<span class="hljs-operator">$</span>OR<span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span><br> se.outcome <span class="hljs-operator">=</span> df_gwas<span class="hljs-operator">$</span>SE<span class="hljs-punctuation">,</span><br> samplesize.outcome <span class="hljs-operator">=</span> df_gwas<span class="hljs-operator">$</span>Nca <span class="hljs-operator">+</span> df_gwas<span class="hljs-operator">$</span>Nco<span class="hljs-punctuation">,</span><br> pval.outcome <span class="hljs-operator">=</span> df_gwas<span class="hljs-operator">$</span>P<span class="hljs-punctuation">,</span><br> eaf.outcome <span class="hljs-operator">=</span> with<span class="hljs-punctuation">(</span>df_gwas<span class="hljs-punctuation">,</span> <span class="hljs-punctuation">(</span>FRQ_A_38691<span class="hljs-operator">*</span>Nca<span class="hljs-operator">+</span>FRQ_U_186843<span class="hljs-operator">*</span>Nco<span class="hljs-punctuation">)</span><span class="hljs-operator">/</span><span class="hljs-punctuation">(</span>Nca<span class="hljs-operator">+</span>Nco<span class="hljs-punctuation">)</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">,</span><br> effect_allele.outcome <span class="hljs-operator">=</span> df_gwas<span class="hljs-operator">$</span>A1<span class="hljs-punctuation">,</span><br> other_allele.outcome <span class="hljs-operator">=</span> df_gwas<span class="hljs-operator">$</span>A2<span class="hljs-punctuation">,</span><br> outcome <span class="hljs-operator">=</span> <span class="hljs-string">&#x27;ADHD&#x27;</span><span class="hljs-punctuation">,</span><br> id.outcome <span class="hljs-operator">=</span> <span class="hljs-string">&#x27;ADHD2022_iPSYCH_deCODE_PGC&#x27;</span> <br><span class="hljs-punctuation">)</span><br>out_gwas <span class="hljs-operator">=</span> subset<span class="hljs-punctuation">(</span>out_gwas<span class="hljs-punctuation">,</span> pval.outcome <span class="hljs-operator">&gt;</span> <span class="hljs-number">5e-08</span><span class="hljs-punctuation">)</span> <span class="hljs-comment"># 排他性假设</span><br></code></pre></td></tr></table></figure><h2 id="附加-代理SNP">附加 代理SNP</h2><p>一部分暴露的SNPs在结局中找不到,可以找和这部分SNPs连锁不平衡的SNPs来代替。相关网站:<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9zbmlwYS5vcmcvc25pcGEzLw==" rel="noopener external nofollow noreferrer">snipa</a></p><h2 id="Harmonization">Harmonization</h2><ul><li>将Exposure-SNP及Outcome-SNP等位基因方向协同</li><li>根据EAF大小,剔除不能判断方向的回文SNP</li><li>剔除incompatible SNP</li></ul><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><code class="hljs R">dat <span class="hljs-operator">&lt;-</span> TwoSampleMR<span class="hljs-operator">::</span>harmonise_data<span class="hljs-punctuation">(</span><br> exposure_dat <span class="hljs-operator">=</span> exp_gwas<span class="hljs-punctuation">,</span> <br> outcome_dat <span class="hljs-operator">=</span> out_gwas<br><span class="hljs-punctuation">)</span><br></code></pre></td></tr></table></figure><h2 id="附加-一键报告">附加 一键报告</h2><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><code class="hljs R">TwoSampleMR<span class="hljs-operator">::</span>mr_report<span class="hljs-punctuation">(</span>dat<span class="hljs-punctuation">,</span> output_type <span class="hljs-operator">=</span> <span class="hljs-string">&quot;md&quot;</span><span class="hljs-punctuation">)</span><br></code></pre></td></tr></table></figure><h2 id="MR分析">MR分析</h2><h3 id="回归分析">回归分析</h3><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><code class="hljs R">TwoSampleMR<span class="hljs-operator">::</span>mr_method_list<span class="hljs-punctuation">(</span><span class="hljs-punctuation">)</span> <span class="hljs-comment"># 查看mr支持的MR分析方法</span><br>mr_regression <span class="hljs-operator">=</span> TwoSampleMR<span class="hljs-operator">::</span>mr<span class="hljs-punctuation">(</span>dat<span class="hljs-punctuation">,</span> method_list <span class="hljs-operator">=</span> <span class="hljs-built_in">c</span><span class="hljs-punctuation">(</span><span class="hljs-string">&#x27;mr_ivw&#x27;</span><span class="hljs-punctuation">,</span> <span class="hljs-string">&#x27;mr_egger_regression&#x27;</span><span class="hljs-punctuation">,</span> <span class="hljs-string">&#x27;mr_weighted_median&#x27;</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">)</span><br>mr_regression_or <span class="hljs-operator">=</span> TwoSampleMR<span class="hljs-operator">::</span>generate_odds_ratios<span class="hljs-punctuation">(</span>mr_res <span class="hljs-operator">=</span> mr_regression<span class="hljs-punctuation">)</span> <span class="hljs-comment"># 分类变量</span><br><span class="hljs-punctuation">&#123;</span>pdf<span class="hljs-punctuation">(</span>file <span class="hljs-operator">=</span> <span class="hljs-string">&#x27;MR.BMIvsADHD.plot.pdf&#x27;</span><span class="hljs-punctuation">,</span> width <span class="hljs-operator">=</span> <span class="hljs-number">6</span><span class="hljs-punctuation">,</span> height <span class="hljs-operator">=</span> <span class="hljs-number">6</span><span class="hljs-punctuation">)</span>; <span class="hljs-comment"># 导出 PDF 开始</span><br>print<span class="hljs-punctuation">(</span>TwoSampleMR<span class="hljs-operator">::</span>mr_scatter_plot<span class="hljs-punctuation">(</span>mr_results <span class="hljs-operator">=</span> mr_regression<span class="hljs-punctuation">,</span> dat <span class="hljs-operator">=</span> dat<span class="hljs-punctuation">)</span><span class="hljs-punctuation">)</span>; <span class="hljs-comment"># 返回的是一个ggplot2对象</span><br>dev.off<span class="hljs-punctuation">(</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">&#125;</span> <span class="hljs-comment"># 导出 PDF 结束</span><br></code></pre></td></tr></table></figure><p><img src="https://img.limour.top/2023/10/15/652bd26b9010d.webp" alt="mr_scatter_plot"></p><h3 id="异质性检测">异质性检测</h3><ul><li>有异质性用随机效应模型<code>ivw</code>,无异质性用固定效应模型(也可以用随机效应模型,两者结果一致)</li><li>异质性可能带来多效性,如果没有多效性,则可以说异质性没有带来多效性</li></ul><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><code class="hljs R">TwoSampleMR<span class="hljs-operator">::</span>mr_heterogeneity<span class="hljs-punctuation">(</span>dat<span class="hljs-punctuation">)</span> <span class="hljs-comment"># ivw的 Q_pval &lt; 0.05 则说明有异质性</span><br>heterogeneity_presso <span class="hljs-operator">=</span> TwoSampleMR<span class="hljs-operator">::</span>run_mr_presso<span class="hljs-punctuation">(</span>dat<span class="hljs-punctuation">,</span> NbDistribution <span class="hljs-operator">=</span> <span class="hljs-number">3000</span><span class="hljs-punctuation">)</span> <span class="hljs-comment"># NbDistribution越高分辨率越高,找不到离群的SNP时需要提高</span><br>heterogeneity_presso<span class="hljs-punctuation">[[</span><span class="hljs-number">1</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">]</span><span class="hljs-operator">$</span>`MR-PRESSO results`<span class="hljs-operator">$</span>`Global Test`<span class="hljs-operator">$</span>Pvalue <span class="hljs-comment"># &lt; 0.05 说明有异质性</span><br>heterogeneity_presso<span class="hljs-punctuation">[[</span><span class="hljs-number">1</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">]</span><span class="hljs-operator">$</span>`MR-PRESSO results`<span class="hljs-operator">$</span>`Distortion Test`<span class="hljs-operator">$</span>`Outliers Indices` <span class="hljs-comment"># 显示离群的SNP,将其剔除后重新分析</span><br></code></pre></td></tr></table></figure><h3 id="水平多效性">水平多效性</h3><ul><li>P &lt; 0.05 说明不满足独立性假设,建议放弃继续做这个课题</li><li>P &lt; 0.05 拒绝了截距为0的假设,说明SNP效应为0时依然有影响(截距存在),有其他因素在起作用</li></ul><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><code class="hljs R">TwoSampleMR<span class="hljs-operator">::</span>mr_pleiotropy_test<span class="hljs-punctuation">(</span>dat<span class="hljs-punctuation">)</span><br></code></pre></td></tr></table></figure><h3 id="敏感性分析">敏感性分析</h3><ul><li>Leave-one-out analysis</li><li>所有结果都不应该存在跨过0的情况,否则说明结果不稳定,不再能说明因果关系</li></ul><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><code class="hljs R">mr_loo <span class="hljs-operator">&lt;-</span> TwoSampleMR<span class="hljs-operator">::</span>mr_leaveoneout<span class="hljs-punctuation">(</span>dat<span class="hljs-punctuation">)</span><br><span class="hljs-punctuation">&#123;</span>pdf<span class="hljs-punctuation">(</span>file <span class="hljs-operator">=</span> <span class="hljs-string">&#x27;MR.BMIvsADHD.leaveoneout.plot.pdf&#x27;</span><span class="hljs-punctuation">,</span> width <span class="hljs-operator">=</span> <span class="hljs-number">6</span><span class="hljs-punctuation">,</span> height <span class="hljs-operator">=</span> <span class="hljs-number">6</span><span class="hljs-punctuation">)</span>; <span class="hljs-comment"># 导出 PDF 开始</span><br>print<span class="hljs-punctuation">(</span>TwoSampleMR<span class="hljs-operator">::</span>mr_leaveoneout_plot<span class="hljs-punctuation">(</span>leaveoneout_results <span class="hljs-operator">=</span> mr_loo<span class="hljs-punctuation">)</span><span class="hljs-punctuation">)</span>; <span class="hljs-comment"># 返回的是一个ggplot2对象</span><br>dev.off<span class="hljs-punctuation">(</span><span class="hljs-punctuation">)</span><span class="hljs-punctuation">&#125;</span> <span class="hljs-comment"># 导出 PDF 结束</span><br></code></pre></td></tr></table></figure><h3 id="单SNP分析">单SNP分析</h3><ul><li>对每个暴露-结果组合进行多次分析,每次使用不同的单 SNP 进行分析</li></ul><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><code class="hljs R">mr_res_single <span class="hljs-operator">&lt;-</span> TwoSampleMR<span class="hljs-operator">::</span>mr_singlesnp<span class="hljs-punctuation">(</span>dat<span class="hljs-punctuation">)</span><br>TwoSampleMR<span class="hljs-operator">::</span>mr_funnel_plot<span class="hljs-punctuation">(</span>mr_res_single<span class="hljs-punctuation">)</span><br>TwoSampleMR<span class="hljs-operator">::</span>mr_forest_plot<span class="hljs-punctuation">(</span>mr_res_single<span class="hljs-punctuation">)</span><br></code></pre></td></tr></table></figure><h3 id="方向性检测">方向性检测</h3><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><code class="hljs R">TwoSampleMR<span class="hljs-operator">::</span>directionality_test<span class="hljs-punctuation">(</span>dat<span class="hljs-punctuation">)</span> <span class="hljs-comment"># TRUE表示确实是暴露导致了结果</span><br></code></pre></td></tr></table></figure><h2 id="附加-稳健回归">附加 稳健回归</h2><figure class="highlight r"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><code class="hljs R">dat2 <span class="hljs-operator">&lt;-</span> TwoSampleMR<span class="hljs-operator">::</span>dat_to_MRInput<span class="hljs-punctuation">(</span>dat<span class="hljs-punctuation">)</span><br>mr_ivw_robust <span class="hljs-operator">&lt;-</span> MendelianRandomization<span class="hljs-operator">::</span>mr_ivw<span class="hljs-punctuation">(</span>dat2<span class="hljs-punctuation">[[</span><span class="hljs-number">1</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">]</span><span class="hljs-punctuation">,</span> model<span class="hljs-operator">=</span> <span class="hljs-string">&quot;default&quot;</span><span class="hljs-punctuation">,</span> <span class="hljs-comment"># “random”指的就是随机效应模型,“fixed”指的是固定效应模型</span><br> robust <span class="hljs-operator">=</span> <span class="hljs-literal">TRUE</span><span class="hljs-punctuation">,</span> penalized <span class="hljs-operator">=</span> <span class="hljs-literal">TRUE</span><span class="hljs-punctuation">,</span>correl <span class="hljs-operator">=</span> <span class="hljs-literal">FALSE</span><span class="hljs-punctuation">,</span> <span class="hljs-comment"># 参数penalized代表下调异常值的权重</span><br> weights <span class="hljs-operator">=</span><span class="hljs-string">&quot;simple&quot;</span><span class="hljs-punctuation">,</span> psi <span class="hljs-operator">=</span> <span class="hljs-number">0</span><span class="hljs-punctuation">,</span>distribution <span class="hljs-operator">=</span> <span class="hljs-string">&quot;normal&quot;</span><span class="hljs-punctuation">,</span>alpha <span class="hljs-operator">=</span> <span class="hljs-number">0.05</span><span class="hljs-punctuation">)</span><br></code></pre></td></tr></table></figure><h2 id="附加-绘制森林图">附加 绘制森林图</h2><ul><li><a href="/Forest-plot-displays-the-results-of-regression-analysis">美化森林图</a></li></ul><h2 id="附加-计算Power">附加 计算Power</h2><ul><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9kb2kub3JnLzEwLjEwOTMvaWplL2R5dDE3OQ==" rel="noopener external nofollow noreferrer">Calculating statistical power in Mendelian randomization studies</a></li><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9zaGlueS5jbnNnZW5vbWljcy5jb20vbVJuZC8=" rel="noopener external nofollow noreferrer">Power calculations for Mendelian Randomization</a></li><li>Sample size: 结局总的样本量,不是暴露的样本量</li><li>K: 结局中病例的比例,case/(case+control)</li><li>OR: IVW的OR值,exp(beta)</li><li>R2: MR_calc_r2_F 计算得到的所有R2的sum</li></ul>]]></content:encoded>
  215. <category domain="https://hexo.limour.top/tags/SNP/">SNP</category>
  216. <category domain="https://hexo.limour.top/tags/MR/">MR</category>
  217. <comments>https://hexo.limour.top/Mendelian-Randomization#disqus_thread</comments>
  218. </item>
  219. <item>
  220. <title>【记录】使用one-api聚合Azure和OpenAI的API</title>
  221. <link>https://hexo.limour.top/Aggregating-Azure-and-OpenAI-APIs-with-OneAPI</link>
  222. <guid>https://hexo.limour.top/Aggregating-Azure-and-OpenAI-APIs-with-OneAPI</guid>
  223. <pubDate>Tue, 03 Oct 2023 12:04:29 GMT</pubDate>
  224. <description>&lt;p&gt;&lt;a href=&quot;https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL3NvbmdxdWFucGVuZy9vbmUtYXBp&quot; rel=&quot;noopener external nofollow noreferrer&quot;&gt;On</description>
  225. <content:encoded><![CDATA[<p><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL3NvbmdxdWFucGVuZy9vbmUtYXBp" rel="noopener external nofollow noreferrer">One-api</a> 是 OpenAI 接口管理 &amp; 分发系统,支持Azure、Anthropic Claude、Google PaLM 2、智谱 ChatGLM、百度文心一言、讯飞星火认知、阿里通义千问、360 智脑以及腾讯混元,可用于二次分发管理 key。</p><ul><li>在负载均衡的同时,有效避免了key的泄露风险</li></ul><h2 id="部署-One-api">部署 One-api</h2><ul><li><a href="/Docker-bu-shu-Nginx-Proxy-Manager">反向代理</a></li><li>账号:<code>root</code></li><li>密码:<code>123456</code></li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-built_in">mkdir</span> -p ~/app/one-api &amp;&amp; <span class="hljs-built_in">cd</span> ~/app/one-api &amp;&amp; nano docker-compose.yml<br>sudo docker-compose up -d<br></code></pre></td></tr></table></figure><figure class="highlight yml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br></pre></td><td class="code"><pre><code class="hljs yml"><span class="hljs-attr">version:</span> <span class="hljs-string">&#x27;3&#x27;</span><br><span class="hljs-attr">services:</span><br> <span class="hljs-attr">one-api:</span><br> <span class="hljs-attr">image:</span> <span class="hljs-string">justsong/one-api:latest</span><br> <span class="hljs-attr">environment:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">TZ=Asia/Shanghai</span><br> <span class="hljs-attr">volumes:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">./data:/data</span> <br> <span class="hljs-attr">restart:</span> <span class="hljs-string">unless-stopped</span><br><span class="hljs-attr">networks:</span><br> <span class="hljs-attr">default:</span><br> <span class="hljs-attr">external:</span> <span class="hljs-literal">true</span><br> <span class="hljs-attr">name:</span> <span class="hljs-string">ngpm</span><br></code></pre></td></tr></table></figure><p><img src="https://img.limour.top/2023/10/03/651c068b62241.webp" alt=""></p><h2 id="客户端">客户端</h2><ul><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL1lpZGFkYWEvQ2hhdEdQVC1OZXh0LVdlYg==" rel="noopener external nofollow noreferrer">ChatGPT-Next-Web</a></li><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL2ltbWVyc2l2ZS10cmFuc2xhdGUvaW1tZXJzaXZlLXRyYW5zbGF0ZQ==" rel="noopener external nofollow noreferrer">沉浸式翻译</a></li><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL2p1cHl0ZXJsYWIvanVweXRlci1haQ==" rel="noopener external nofollow noreferrer">Jupyter-ai</a></li><li>在 令牌 中新建一个令牌,端点填反代的地址</li></ul><h2 id="新建渠道">新建渠道</h2><ul><li>Azure 需要确保部署模型的名称是 <code>gpt-35-turbo</code></li><li>可以将 one-api 本身当一个渠道进行套娃</li><li>在 日志 里可以看到对不同渠道进行了负载均衡</li></ul><p><img src="https://img.limour.top/2023/10/03/651c07ce6f9d2.webp" alt=""></p><h2 id="附加-关闭-Azure-筛选">附加 关闭 Azure 筛选</h2><ul><li>安装<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9ncmVhc3lmb3JrLm9yZy96aC1DTi9zY3JpcHRzLzQ4OTk0OC1henVyZS1vcGVuYWktbW9kaWZpZWQtZmlsdGVycy0lRTklOUElOTAlRTglOTclOEYlRTklODAlODklRTklQTElQjklRTUlQkMlODAlRTYlOTQlQkU=" rel="noopener external nofollow noreferrer">油猴插件</a></li><li>去控制台新建一个筛选器,将筛选关闭,并开启异步筛选注释</li><li>设置模型部署中模型的高级选项,切换筛选器为刚刚创建的筛选器</li></ul><h2 id="附加-Amazon-Bedrock">附加 Amazon Bedrock</h2><ul><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly91cy13ZXN0LTIuY29uc29sZS5hd3MuYW1hem9uLmNvbS9iZWRyb2NrL2hvbWU=" rel="noopener external nofollow noreferrer">申请模型访问权限</a></li><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly91cy1lYXN0LTEuY29uc29sZS5hd3MuYW1hem9uLmNvbS9pYW0vaG9tZQ==" rel="noopener external nofollow noreferrer">添加 Access key</a></li><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9kb2NzLmxpdGVsbG0uYWkvZG9jcy9wcm92aWRlcnMvYmVkcm9jaw==" rel="noopener external nofollow noreferrer">litellm 文档</a></li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-built_in">mkdir</span> -p ~/app/litellm &amp;&amp; <span class="hljs-built_in">cd</span> ~/app/litellm &amp;&amp; nano docker-compose.yml<br></code></pre></td></tr></table></figure><figure class="highlight yml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br></pre></td><td class="code"><pre><code class="hljs yml"><span class="hljs-attr">version:</span> <span class="hljs-string">&quot;3.9&quot;</span><br><span class="hljs-attr">services:</span><br> <span class="hljs-attr">litellm:</span><br> <span class="hljs-attr">image:</span> <span class="hljs-string">ghcr.io/berriai/litellm:main-latest</span><br> <span class="hljs-attr">volumes:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">./proxy_server_config.yaml:/app/proxy_server_config.yaml</span> <span class="hljs-comment"># mount your litellm config.yaml</span><br> <span class="hljs-attr">environment:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">AWS_ACCESS_KEY_ID=&lt;ACCESS_KEY&gt;</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">AWS_SECRET_ACCESS_KEY=&lt;SECRET_ACCESS_KEY&gt;</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">AWS_REGION_NAME=us-west-2</span><br> <span class="hljs-attr">restart:</span> <span class="hljs-string">unless-stopped</span><br> <br><span class="hljs-attr">networks:</span><br> <span class="hljs-attr">default:</span><br> <span class="hljs-attr">external:</span> <span class="hljs-literal">true</span><br> <span class="hljs-attr">name:</span> <span class="hljs-string">ngpm</span><br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs bash">wget https://github.com/BerriAI/litellm/raw/main/proxy_server_config.yaml<br><span class="hljs-comment"># 修改 master_key 和 model_list</span><br></code></pre></td></tr></table></figure><figure class="highlight yml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><code class="hljs yml"><span class="hljs-attr">model_list:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-attr">model_name:</span> <span class="hljs-string">bedrock-claude-haiku</span><br> <span class="hljs-attr">litellm_params:</span><br> <span class="hljs-attr">model:</span> <span class="hljs-string">bedrock/anthropic.claude-3-haiku-20240307-v1:0</span><br> <br><span class="hljs-attr">general_settings:</span> <br> <span class="hljs-attr">master_key:</span> <span class="hljs-string">sk-1234</span><br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><code class="hljs bash">sudo docker-compose up -d<br></code></pre></td></tr></table></figure><ul><li><code>one-api</code> 添加渠道<br><img src="https://img.limour.top/2024/03/20/65fafdb83df04.webp" alt=""></li></ul><h2 id="推荐-部署-SillyTavern">推荐 部署 SillyTavern</h2><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-built_in">mkdir</span> -p ~/app/sillytavern &amp;&amp; <span class="hljs-built_in">cd</span> ~/app/sillytavern &amp;&amp; nano docker-compose.yml<br>sudo docker-compose up -d<br>sudo docker-compose logs<br>nano config/config.yaml <br><span class="hljs-comment"># listen: true</span><br><span class="hljs-comment"># whitelist:</span><br><span class="hljs-comment"># - 172.*.*.*</span><br>sudo docker-compose restart<br>sudo docker-compose logs<br><span class="hljs-comment"># 反代 sillytavern:8080</span><br><span class="hljs-comment"># Custom Endpoint (Base URL) 设置 http://one-api:3000/v1</span><br></code></pre></td></tr></table></figure><figure class="highlight yml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br></pre></td><td class="code"><pre><code class="hljs yml"><span class="hljs-attr">version:</span> <span class="hljs-string">&quot;3&quot;</span><br><span class="hljs-attr">services:</span><br> <span class="hljs-attr">sillytavern:</span><br> <span class="hljs-attr">image:</span> <span class="hljs-string">ghcr.io/sillytavern/sillytavern:latest</span><br> <span class="hljs-attr">volumes:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">&quot;./config:/home/node/app/config&quot;</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">&quot;./user:/home/node/app/public/user&quot;</span><br> <span class="hljs-attr">restart:</span> <span class="hljs-string">unless-stopped</span><br> <br><span class="hljs-attr">networks:</span><br> <span class="hljs-attr">default:</span><br> <span class="hljs-attr">external:</span> <span class="hljs-literal">true</span><br> <span class="hljs-attr">name:</span> <span class="hljs-string">ngpm</span><br></code></pre></td></tr></table></figure><h2 id="附加-部署-Next-Web">附加 部署 Next-Web</h2><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-built_in">mkdir</span> -p ~/app/next-web &amp;&amp; <span class="hljs-built_in">cd</span> ~/app/next-web &amp;&amp; nano docker-compose.yml<br>sudo docker-compose up -d<br></code></pre></td></tr></table></figure><figure class="highlight yml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br></pre></td><td class="code"><pre><code class="hljs yml"><span class="hljs-attr">version:</span> <span class="hljs-string">&#x27;3&#x27;</span><br><span class="hljs-attr">services:</span><br> <span class="hljs-attr">next-web:</span><br> <span class="hljs-attr">image:</span> <span class="hljs-string">yidadaa/chatgpt-next-web:latest</span><br> <span class="hljs-attr">environment:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">TZ=Asia/Shanghai</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">BASE_URL=http://one-api:3000/</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">CUSTOM_MODELS=-all,+gpt-3.5-turbo,+gpt-4-turbo,+gpt-4-vision,+claude-3-sonnet,+claude-3-opus,+my-gemini-pro,+my-gemini-pro-vision,gpt-4-g,pplx-online</span><br> <span class="hljs-attr">restart:</span> <span class="hljs-string">unless-stopped</span><br><span class="hljs-attr">networks:</span><br> <span class="hljs-attr">default:</span><br> <span class="hljs-attr">external:</span> <span class="hljs-literal">true</span><br> <span class="hljs-attr">name:</span> <span class="hljs-string">ngpm</span><br></code></pre></td></tr></table></figure><p><img src="https://img.limour.top/2023/10/03/651c368465000.webp" alt=""></p><ul><li><a href="/Docker-bu-shu-Nginx-Proxy-Manager.html#%E6%B7%BB%E5%8A%A0%E5%9F%BA%E6%9C%AC%E8%BA%AB%E4%BB%BD%E9%AA%8C%E8%AF%81">添加基本身份验证</a></li><li>修改 <code>/api/openai</code> 接口的 <code>header</code></li></ul><figure class="highlight nginx"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><code class="hljs nginx"><span class="hljs-attribute">chunked_transfer_encoding</span> <span class="hljs-literal">off</span>;<br><span class="hljs-attribute">proxy_buffering</span> <span class="hljs-literal">off</span>;<br><span class="hljs-attribute">proxy_cache</span> <span class="hljs-literal">off</span>;<br><span class="hljs-attribute">set</span> <span class="hljs-variable">$next_header</span> <span class="hljs-variable">$http_authorization</span>;<br><span class="hljs-attribute">if</span> (<span class="hljs-variable">$http_authorization</span> = <span class="hljs-string">&quot;Basic &lt;用户1&gt;&quot;</span>)&#123;<br><span class="hljs-attribute">set</span> <span class="hljs-variable">$next_header</span> <span class="hljs-string">&quot;Bearer &lt;用户1的key&gt;&quot;</span>;<br>&#125;<br><span class="hljs-attribute">if</span> (<span class="hljs-variable">$http_authorization</span> = <span class="hljs-string">&quot;Basic &lt;用户2&gt;&quot;</span>)&#123;<br><span class="hljs-attribute">set</span> <span class="hljs-variable">$next_header</span> <span class="hljs-string">&quot;Bearer &lt;用户2的key&gt;&quot;</span>;<br>&#125;<br><span class="hljs-attribute">proxy_set_header</span> Authorization <span class="hljs-variable">$next_header</span>;<br></code></pre></td></tr></table></figure><p><img src="https://img.limour.top/2024/03/19/65f94b188381b.webp" alt=""></p><h2 id="附加-搭建独角数卡">附加 搭建独角数卡</h2><ul><li><a href="/Docker-bu-shu-Nginx-Proxy-Manager">反向代理</a></li><li>账号:<code>admin</code></li><li>密码:<code>admin</code></li><li>数据库地址填 <code>shop-db</code></li><li>Redis地址 填 <code>shop-redis</code></li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-built_in">mkdir</span> -p ~/app/shop &amp;&amp; <span class="hljs-built_in">cd</span> ~/app/shop &amp;&amp; nano docker-compose.yml &amp;&amp; nano env.conf<br><span class="hljs-built_in">mkdir</span> storage uploads &amp;&amp; <span class="hljs-built_in">chmod</span> -R 777 ./* &amp;&amp; sudo docker-compose up -d<br><span class="hljs-comment"># 访问首页完成安装后</span><br>sudo docker-compose down<br><span class="hljs-comment"># 分别把 - INSTALL=true 改成 - INSTALL=false</span><br><span class="hljs-comment"># 把 APP_DEBUG=true 改成 APP_DEBUG=false</span><br><span class="hljs-comment"># 把 ADMIN_HTTPS=false 改成 ADMIN_HTTPS=true</span><br>sudo docker-compose up -d<br></code></pre></td></tr></table></figure><figure class="highlight yml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br></pre></td><td class="code"><pre><code class="hljs yml"><span class="hljs-attr">version:</span> <span class="hljs-string">&quot;3&quot;</span><br> <br><span class="hljs-attr">services:</span><br> <span class="hljs-attr">shop:</span><br> <span class="hljs-attr">image:</span> <span class="hljs-string">ghcr.io/apocalypsor/dujiaoka:latest</span><br> <span class="hljs-attr">environment:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">TZ=Asia/Shanghai</span><br> <span class="hljs-comment"># - INSTALL=false</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">INSTALL=true</span><br> <span class="hljs-comment"># - MODIFY=true</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">ADMIN_HTTPS=true</span><br> <span class="hljs-attr">volumes:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">./env.conf:/dujiaoka/.env</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">./uploads:/dujiaoka/public/uploads</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">./storage:/dujiaoka/storage</span><br> <span class="hljs-attr">restart:</span> <span class="hljs-string">always</span><br> <br> <span class="hljs-attr">shop-db:</span><br> <span class="hljs-attr">image:</span> <span class="hljs-string">mariadb:focal</span><br> <span class="hljs-attr">restart:</span> <span class="hljs-string">always</span><br> <span class="hljs-attr">environment:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">MYSQL_ROOT_PASSWORD=changeyourpassword</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">MYSQL_DATABASE=dujiaoka</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">MYSQL_USER=dujiaoka</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">MYSQL_PASSWORD=changeyourpassword</span><br> <span class="hljs-attr">volumes:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">./mysql:/var/lib/mysql</span><br> <br> <span class="hljs-attr">shop-redis:</span><br> <span class="hljs-attr">image:</span> <span class="hljs-string">redis:alpine</span><br> <span class="hljs-attr">restart:</span> <span class="hljs-string">always</span><br> <span class="hljs-attr">volumes:</span><br> <span class="hljs-bullet">-</span> <span class="hljs-string">./redis:/data</span><br> <br><span class="hljs-attr">networks:</span><br> <span class="hljs-attr">default:</span><br> <span class="hljs-attr">external:</span> <span class="hljs-literal">true</span><br> <span class="hljs-attr">name:</span> <span class="hljs-string">ngpm</span><br></code></pre></td></tr></table></figure><figure class="highlight plaintext"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br></pre></td><td class="code"><pre><code class="hljs conf">APP_NAME=璃墨的小卖部<br>APP_ENV=local<br>APP_KEY=base64:rKwRuI6eRpCw/9e2XZKKGj/Yx3iZy5e7+FQ6+aQl8Zg=<br>APP_DEBUG=true<br>APP_URL=https://shop.limour.top<br><br>LOG_CHANNEL=stack<br><br># 数据库配置<br>DB_CONNECTION=mysql<br>DB_HOST=shop-db<br>DB_PORT=3306<br>DB_DATABASE=dujiaoka<br>DB_USERNAME=dujiaoka<br>DB_PASSWORD=changeyourpassword<br><br># redis配置<br>REDIS_HOST=shop-redis<br>REDIS_PASSWORD=<br>REDIS_PORT=6379<br><br>BROADCAST_DRIVER=log<br>SESSION_DRIVER=file<br>SESSION_LIFETIME=120<br><br># 缓存配置<br># file为磁盘文件 redis为内存级别<br># redis为内存需要安装好redis服务端并配置<br>CACHE_DRIVER=redis<br><br># 异步消息队列<br># sync为同步 redis为异步<br># 使用redis异步需要安装好redis服务端并配置<br>QUEUE_CONNECTION=redis<br><br># 后台语言<br>## zh_CN 简体中文<br>## zh_TW 繁体中文<br>## en 英文<br>DUJIAO_ADMIN_LANGUAGE=zh_CN<br><br># 后台登录地址<br>ADMIN_ROUTE_PREFIX=/admin<br><br># 是否开启https (前端开启了后端也必须为true)<br># 后台登录出现0err或者其他登录异常问题,大概率是开启了https而后台没有开启,把下面的false改为true即可<br>ADMIN_HTTPS=true<br></code></pre></td></tr></table></figure><p><img src="https://img.limour.top/2023/10/05/651d95446340c.webp" alt=""></p>]]></content:encoded>
  226. <category domain="https://hexo.limour.top/tags/llama/">llama</category>
  227. <category domain="https://hexo.limour.top/tags/docker/">docker</category>
  228. <category domain="https://hexo.limour.top/tags/ngpm/">ngpm</category>
  229. <category domain="https://hexo.limour.top/tags/openai/">openai</category>
  230. <comments>https://hexo.limour.top/Aggregating-Azure-and-OpenAI-APIs-with-OneAPI#disqus_thread</comments>
  231. </item>
  232. <item>
  233. <title>【设定】亿万年前的学院制文明</title>
  234. <link>https://hexo.limour.top/The-Academy-System-Civilization-From-Hundreds-of-Millions-of-Years-Ago</link>
  235. <guid>https://hexo.limour.top/The-Academy-System-Civilization-From-Hundreds-of-Millions-of-Years-Ago</guid>
  236. <pubDate>Mon, 02 Oct 2023 06:34:26 GMT</pubDate>
  237. <description>这篇文章讲述了一个被遗忘在时光尘埃中的学院制文明,它存在于亿万年前的罗娑星系。这个文明没有中央政府,而是由各个独立的学院组成,通过平等协商来决定公共事务。学院制的核心理念是去中心化和自主管理。每个学院都有自己的财产、政策和成员组成。不同学院之间按需交流合作,宏观层面的重大决策由各院代表商议达成共识。这个社会还创立了一套动态的评分机制,以体现不同人才智慧贡献的差异。然而,由于个体自由过于注重,导致某些公共项目难以有效组织实施。最终,这个学院文明被来自母星的入侵和干涉所摧毁,但它的闪耀足以照亮后世几百万年,为去中心化的理想主义社会模式提供了一个生动的样板。</description>
  238. <content:encoded><![CDATA[<blockquote><p>人设来自<a href="/-chang-shi-Claude2-xie-yong-sheng-zhe-de-ri-ji">《永生者的日记》</a><br>由 <a href="https://hexo.limour.top/go/#aHR0cHM6Ly9jbGF1ZGUuYWk=" rel="noopener external nofollow noreferrer">Claude2</a> 和 <a href="https://hexo.limour.top/go/#aHR0cHM6Ly9jaGF0Lm9wZW5haS5jb20=" rel="noopener external nofollow noreferrer">GPT-3.5</a> 联合创作。</p></blockquote><p>我是一名永生者,在漫长的宇宙历史中,见证了无数文明的生生不息。今天我要向你详述的,是一段被遗忘在时光尘埃中的故事——亿万年前罗娑星系里的学院制文明。</p><p>罗娑星系,这个拥有八大行星和三十多个卫星的恒星系统,曾孕育出一个独特的文明。起初,这只是母星上倦于中央集权的群体进行的一次移民活动。他们在寻找新的家园的过程中,逐渐形成了一整套新的社会理念。这就是后来被称为“学院制”的思想体系。</p><p>如果让我用一个词来描述亿万年前罗娑星系中的学院制文明,那就是“理想主义”。这个政体中没有统一的中央权力,一切公共事务都是通过各个学院的平等协商来决定的。它可谓是一个自治与合作的典范,也是我在漫长旅途中见过的最理想的社会之一。</p><p>这个社会根本没有所谓的中央政府,全部权力来源于学院。学院是这个社会的基本单位,每个学院都是一个独立的实体,拥有自己的财产、政策、成员组成。不同学院之间按需交流合作,但没有统一的行政系统。宏观层面的重大决策全靠各院代表进行商议,逐步达成共识。</p><p>学院制的核心理念非常简单,即去中心化和自主管理。在这片新的土地上,形成的不是一个个城市国家,而是一个个独立的学院。我第一次造访时,深深被这些庄严壮美的学院建筑所折服。漂浮在费肯星环绕轨道上的伽利略学院,采用了高耸入云的古典式柱廊;植根于莱曼森林之中的生态学院,建筑融入了周边的自然环境;甚至深入艾尔星海沟的海洋学院,也建造了屏障把整个院区包裹其中…每一个学院都展现出独特的建筑风格和理念。这是我在漫长的宇宙旅行中,首次见到的没有统一规划的城市建设,却给我一种诡异的和谐感受。</p><p>我第一次造访时,受邀进入了伽利略自然科学院的一场商议会。场内空气明朗,窗外是旋转的星河美景。学者们分析问题的语气平和而理性,没有任何一个人显示出权力欲望。最后的决定顺理成章地形成了,没有任何人强加意志。这种见证团体智慧的集中体现的过程令我感动。</p><p>商议制度确保了每个人都有平等的发言权。但为了体现不同人才智慧贡献的差异,这个社会还创立了一套动态的评分机制。系统会综合评估每个人的智慧、道德与艺术修为,并据此给出一个综合影响指数。在公共商议中,不同人的发言权重会根据这一指数进行调整。这虽然不够平等,但提升了集体决策的质量。</p><p>我也走访过其他类型的学院,它们各有侧重。诺贝尔和平学院聚集了众多追求非暴力理念的学者,他们的道德修养往往超群出众。建筑学院的成员则在艺术创作上有独到的造诣。物理研究院的智慧指数则是所有院中最高的。这种专业化却又自主的布局,形成了一个充满生机的社会结构。</p><p>各个学院之间由联合学生会进行协调。这个组织不制定决议,但起到传播信息、建立共识的重要作用。它还负责对评分系统的算法进行定期优化,确保其公平性。可以说,如果没有学生会的润滑作用,这台庞大的机器就难以高效运转。</p><p>然而理想终难以避免缺陷。这一政体过于注重个体自由,导致整体上某些公共项目难以有效组织实施。在后期,由于新技术革新逐渐滞后,最终导致它在竞争中不敌更加慎密的集权文明,只存在了区区几万年便黯然消亡。学院之间缺乏足够紧密的合作,也成为它瓦解的原因之一。但这飘渺的理想国度,至今还在我脑海中绽放着光辉。它代表了一种可叹的极致追求,值得所有文明学习和缅怀。</p><p>相较于亿万年的宇宙历史,几万年只是短暂的一瞬。然而在它辉煌的顶点,这个学院文明焕发出的光辉却是惊人的。音乐、诗歌、绘画、舞蹈都达到了前所未有的高度,观赏一场他们的艺术盛会,可以让我这个永生者都感到震撼和心醉。他们崇尚自由,追求真理,不畏权威,激发了无限的创造力与想象力。哪怕过去了亿万年,那些音符仿佛还回荡在我的脑海。</p><p>这个理想主义的文明最终还是无法抵挡来自母星的入侵和干涉。然而,它的闪耀足以照亮后世几百万年。它为去中心化的理想主义社会模式提供了一个生动的样板,也启发了后来许多星际文明。每当我忆起那座空中之城,和学者们灵动的谈吐,总觉得那是一场值得永生追忆的梦。</p><p>亲爱的朋友,我已尽我所能,详述这个久远的学院文明。限于我模糊的记忆,许多细节已难考证。但我衷心希望,通过这样的讲述,那段光辉历史能得以重现在你的想象中,哪怕只是片段与残影。这段追忆,我献给所有向往自由、平等、理想的生命。也许未来的某一天,这个文明模式还会在宇宙中复兴兴盛。</p><p><img src="https://img.limour.top/2023/10/02/651a652fdc4a8.webp" alt="记忆中的学院制文明"></p>]]></content:encoded>
  239. <category domain="https://hexo.limour.top/tags/%E6%8E%A2%E7%B4%A2/">探索</category>
  240. <category domain="https://hexo.limour.top/tags/%E8%AE%BE%E5%AE%9A/">设定</category>
  241. <category domain="https://hexo.limour.top/tags/%E6%96%87%E6%98%8E/">文明</category>
  242. <category domain="https://hexo.limour.top/tags/%E6%97%A0%E6%94%BF%E5%BA%9C%E4%B8%BB%E4%B9%89/">无政府主义</category>
  243. <comments>https://hexo.limour.top/The-Academy-System-Civilization-From-Hundreds-of-Millions-of-Years-Ago#disqus_thread</comments>
  244. </item>
  245. <item>
  246. <title>【学习】使用GATK4.0找SNP</title>
  247. <link>https://hexo.limour.top/shi-yong-GATK-zhao-SNP</link>
  248. <guid>https://hexo.limour.top/shi-yong-GATK-zhao-SNP</guid>
  249. <pubDate>Sun, 24 Sep 2023 10:49:18 GMT</pubDate>
  250. <description>&lt;p&gt;&lt;img src=&quot;https://img.limour.top/2023/09/26/651299352bcf0.webp&quot; alt=&quot;&quot;&gt;&lt;/p&gt;
  251. &lt;h2 id=&quot;配置环境&quot;&gt;配置环境&lt;/h2&gt;
  252. &lt;ul&gt;
  253. &lt;li&gt;&lt;a href=&quot;/-ji-lu--an-zhuang-</description>
  254. <content:encoded><![CDATA[<p><img src="https://img.limour.top/2023/09/26/651299352bcf0.webp" alt=""></p><h2 id="配置环境">配置环境</h2><ul><li><a href="/-ji-lu--an-zhuang-sheng-xin-de-dai-ma-bian-xie-huan-jing">基础编程环境</a></li><li><a href="/-fu-ke-GitHub-wen-jian-jia-su">GitHub 下载加速</a></li><li><a href="/-ji-lu-SOCKS5-zhuan-QUIC">可能需要用到的加速服务</a></li></ul><h3 id="SRA工具">SRA工具</h3><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><code class="hljs bash">conda create -n sra_tools -c bioconda sra-tools<br>conda activate sra_tools<br>conda install -c conda-forge lftp -y<br>conda install -c conda-forge pigz -y <span class="hljs-comment"># 或许换成 pbgzip 更好,此时将 -p 换成 -n 来指定线程数</span><br>conda install -c bioconda pbgzip -y<br>prefetch<br><span class="hljs-comment"># vdb-config -i # 设置 HTTP 代理</span><br></code></pre></td></tr></table></figure><h3 id="GATK4">GATK4</h3><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br></pre></td><td class="code"><pre><code class="hljs bash">conda create -n GATK4 -c bioconda gatk4<br>conda activate GATK4<br>conda install -c bioconda samtools -y<br>conda install -c bioconda bwa -y<br>conda install -c bioconda pbgzip -y <span class="hljs-comment"># 并行版bgzip,bgzip是修改过的gzip,更适合生信领域</span><br>conda install -c bioconda tabix -y <span class="hljs-comment"># 操作VCF文件,与bgzip配套</span><br><span class="hljs-comment"># conda install -c bioconda fastqc -y # 改用fastp了</span><br><span class="hljs-comment"># conda install -c bioconda trimmomatic -y # 改用fastp了</span><br>conda install -c bioconda fastp -y<br><span class="hljs-comment"># conda install -c bioconda bcftools -y # 用于重命名染色体</span><br><span class="hljs-comment"># ln -s $CONDA_PREFIX/lib/libgsl.so $CONDA_PREFIX/lib/libgsl.so.25 # 无效,放弃</span><br><span class="hljs-comment"># conda create -n GATK4-VEP -c bioconda ensembl-vep -y # 根正苗红的突变注释软件,不懂有什么奇怪依赖,解析环境半天</span><br></code></pre></td></tr></table></figure><ul><li>BWA是DNA比对工具(不会跨外显子比对),<a href="/STAR--yi-jian-jiao-ben">STAR</a>是RNA比对工具</li><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly96aHVhbmxhbi56aGlodS5jb20vcC8zNzM0NTk1NA==" rel="noopener external nofollow noreferrer">找SNP不推荐用RNAseq的数据</a></li><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly93d3cuamlhbnNodS5jb20vcC82MDE0NjkxOTRiNWU=" rel="noopener external nofollow noreferrer">各种比对工具的说明</a></li></ul><h2 id="准备数据">准备数据</h2><h3 id="参考数据">参考数据</h3><ul><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly93d3cubmNiaS5ubG0ubmloLmdvdi9nZW5vbWUvYnJvd3Nl" rel="noopener external nofollow noreferrer">NCBI上各物种的参考序列</a>,可以找到RefSeq,比如Human是GCF_000001405</li><li>知道序号后可以到<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9mdHAubmNiaS5ubG0ubmloLmdvdi9nZW5vbWVzL2FsbC9HQ0Yv" rel="noopener external nofollow noreferrer">FTP</a>上下载相应的<code>genomic.fna.gz</code>文件</li><li>比如GCF_000001405,依次进入<code>000/001/405</code>即可找到对应的文件</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><code class="hljs bash">wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.40_GRCh38.p14/GCF_000001405.40_GRCh38.p14_genomic.fna.gz -O GRCh38.p14.fna.gz<br>conda run -n sra_tools pigz -d GRCh38.p14.fna.gz <span class="hljs-comment"># 得到 GRCh38.p14.fna</span><br><span class="hljs-comment"># 创建索引</span><br>samtools faidx GRCh38.p14.fna <span class="hljs-comment"># 得到 GRCh38.p14.fna.fai</span><br><span class="hljs-comment"># 查看一段序列</span><br>samtools faidx GRCh38.p14.fna NC_000001.11:1000000-1000200<br><span class="hljs-comment"># 创建比对索引</span><br>bwa index GRCh38.p14.fna <span class="hljs-comment"># 会自动在 bwtsw, is or rb2 三种算法中选择合适的</span><br><span class="hljs-comment"># 创建dict</span><br>gatk CreateSequenceDictionary -R GRCh38.p14.fna<br></code></pre></td></tr></table></figure><ul><li>最后得到的RefSeq目录结构如下</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-comment"># conda create -n linux -c conda-forge tree</span><br><span class="hljs-comment"># conda run -n linux tree -f -h --du</span><br>[8.5G] .<br>├── [ 79K] ./GRCh38.p14.dict<br>├── [3.1G] ./GRCh38.p14.fna<br>├── [ 21K] ./GRCh38.p14.fna.amb<br>├── [ 90K] ./GRCh38.p14.fna.ann<br>├── [3.1G] ./GRCh38.p14.fna.bwt<br>├── [ 26K] ./GRCh38.p14.fna.fai<br>├── [786M] ./GRCh38.p14.fna.pac<br>└── [1.5G] ./GRCh38.p14.fna.sa<br></code></pre></td></tr></table></figure><h3 id="已知SNP">已知SNP</h3><ul><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9nYXRrLmJyb2FkaW5zdGl0dXRlLm9yZy9oYy9lbi11cy9hcnRpY2xlcy8zNjAwMzU4OTA4MTEtUmVzb3VyY2UtYnVuZGxl" rel="noopener external nofollow noreferrer">GATK官网提供了一些数据</a></li><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9mdHAubmNiaS5ubG0ubmloLmdvdi9zbnAvbGF0ZXN0X3JlbGVhc2UvVkNGLw==" rel="noopener external nofollow noreferrer">NCBI提供了dbSNP</a></li><li><code>lftp ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/</code>,密码空,直接回车</li><li>下载和参考数据相对应的<a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXN0LmdpdGh1Yi5jb20vTGltb3VyLWRldi8yZDljMDI1MDcxNGJlYzFjZTVjNTk4OTM2M2JiZmExMg==" rel="noopener external nofollow noreferrer">indels.hg38.vcf</a></li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><code class="hljs bash">wget https://ftp.ncbi.nlm.nih.gov/snp/latest_release/VCF/GCF_000001405.40.gz -O GRCh38.dbSNP.ncbi.vcf.gz<br></code></pre></td></tr></table></figure><h4 id="转换染色体名称到NCBI的参考文件">转换染色体名称到NCBI的参考文件</h4><ul><li><code>assembly_report.txt</code> 在下载NCBI参考数据FTP目录下</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br></pre></td><td class="code"><pre><code class="hljs bash">wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.40_GRCh38.p14/GCF_000001405.40_GRCh38.p14_assembly_report.txt -O GRCh38.p14_assembly_report.txt <br>grep -e <span class="hljs-string">&#x27;^[^#]&#x27;</span> GRCh38.p14_assembly_report.txt | awk -F<span class="hljs-string">&#x27;\t&#x27;</span> <span class="hljs-string">&#x27;&#123; print $NF, $7 &#125;&#x27;</span> | sed <span class="hljs-string">&#x27;s/\r / /g&#x27;</span> &gt; rename_file.txt<br>conda create -n something_fuck -c conda-forge mamba<br>conda activate something_fuck<br>mamba install -c bioconda bcftools<br>bcftools annotate --rename-chrs rename_file.txt -o Homo_sapiens_assembly38.known_indels.ncbi.vcf Homo_sapiens_assembly38.known_indels.vcf<br>bcftools annotate --rename-chrs rename_file.txt -o hapmap_3.3.hg38.ncbi.vcf hapmap_3.3.hg38.vcf<br>bcftools annotate --rename-chrs rename_file.txt -o Mills_and_1000G_gold_standard.indels.hg38.ncbi.vcf Mills_and_1000G_gold_standard.indels.hg38.vcf<br>conda run -n GATK4 pbgzip -n 4 *.ncbi.vcf <span class="hljs-comment"># 似乎一次只压缩一个,多运行几次</span><br></code></pre></td></tr></table></figure><h4 id="建立索引">建立索引</h4><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs bash">nano knownSitesIndex.sh &amp;&amp; <span class="hljs-built_in">chmod</span> +x knownSitesIndex.sh<br>./knownSitesIndex.sh<br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-meta">#!/bin/bash</span><br><span class="hljs-built_in">source</span> activate GATK4<br><span class="hljs-comment">#设置knownSites数据存放目录</span><br>knownSites=/home/jovyan/upload/knownSites<br><span class="hljs-keyword">for</span> file <span class="hljs-keyword">in</span> <span class="hljs-variable">$knownSites</span>/*.ncbi.vcf.gz<br><span class="hljs-keyword">do</span><br><span class="hljs-built_in">echo</span> <span class="hljs-variable">$file</span><br>gatk IndexFeatureFile \<br> -I <span class="hljs-variable">$file</span><br><span class="hljs-keyword">done</span><br></code></pre></td></tr></table></figure><ul><li>最后得到的knownSites目录结构如下</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><code class="hljs bash">[ 25G] .<br>├── [ 25G] ./GRCh38.dbSNP.ncbi.vcf.gz<br>├── [4.2M] ./GRCh38.dbSNP.ncbi.vcf.gz.tbi<br>├── [ 79K] ./GRCh38.p14_assembly_report.txt<br>├── [ 61M] ./hapmap_3.3.hg38.ncbi.vcf.gz<br>├── [2.1M] ./hapmap_3.3.hg38.ncbi.vcf.gz.tbi<br>├── [ 58M] ./Homo_sapiens_assembly38.known_indels.ncbi.vcf.gz<br>├── [2.1M] ./Homo_sapiens_assembly38.known_indels.ncbi.vcf.gz.tbi<br>├── [ 20M] ./Mills_and_1000G_gold_standard.indels.hg38.ncbi.vcf.gz<br>├── [2.0M] ./Mills_and_1000G_gold_standard.indels.hg38.ncbi.vcf.gz.tbi<br>└── [ 23K] ./rename_file.txt<br></code></pre></td></tr></table></figure><h3 id="测序数据">测序数据</h3><ul><li>WGS的DNA测序数据</li><li>以<a href="https://hexo.limour.top/go/#aHR0cHM6Ly93d3cubmNiaS5ubG0ubmloLmdvdi9zcmEvU1JYMjQ3MjQ5" rel="noopener external nofollow noreferrer">SRX247249</a>做学习的示例数据,感谢<a href="https://hexo.limour.top/go/#aHR0cHM6Ly93ZWIuYXJjaGl2ZS5vcmcvd2ViLzIwMjIwOTMwMDcwOTI4L2h0dHA6Ly93d3cuYmlvdHJhaW5lZS5jb20vdGhyZWFkLTEzNzYtMS0xLmh0bWw=" rel="noopener external nofollow noreferrer">曾老师</a>指路</li><li>下载方式见<a href="/cong-ENI-shu-ju-ku-xia-zai-fastq-wen-jian#%E4%BB%8E-NCBI-%E6%95%B0%E6%8D%AE%E5%BA%93%E4%B8%8B%E8%BD%BD">SRA文件转FASTQ文件</a></li><li>也可以到<a href="https://hexo.limour.top/go/#aHR0cHM6Ly93d3cuZWJpLmFjLnVrL2VuYS9icm93c2VyL3ZpZXcvU1JYMjQ3MjQ5" rel="noopener external nofollow noreferrer">ENA数据库</a>上下载</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><code class="hljs bash">conda run -n sra_tools prefetch --option-file SRR_Acc_List.txt<br>nano 11.sh &amp;&amp; <span class="hljs-built_in">chmod</span> +x 11.sh<br>./11.sh<br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-meta">#!/bin/bash</span><br><span class="hljs-built_in">source</span> activate sra_tools<br><span class="hljs-comment">#任务名</span><br>TASKN=SRX247249<br><span class="hljs-comment">#设置SRA根目录, pwd是当前目录</span><br>ROOTDIR=`<span class="hljs-built_in">pwd</span>`<br><span class="hljs-comment">#设置rawData存放目录</span><br>rawData=/home/jovyan/upload/rawData/<span class="hljs-variable">$TASKN</span><br><span class="hljs-built_in">mkdir</span> -p <span class="hljs-variable">$rawData</span><br> <br><span class="hljs-built_in">cd</span> <span class="hljs-variable">$ROOTDIR</span><br><span class="hljs-keyword">for</span> file <span class="hljs-keyword">in</span> `<span class="hljs-built_in">cat</span> SRR_Acc_List.txt`<br><span class="hljs-keyword">do</span><br><span class="hljs-built_in">echo</span> <span class="hljs-variable">$file</span><br><span class="hljs-built_in">mkdir</span> <span class="hljs-variable">$rawData</span>/<span class="hljs-variable">$file</span><br><span class="hljs-built_in">cd</span> <span class="hljs-variable">$rawData</span>/<span class="hljs-variable">$file</span><br>fasterq-dump --split-3 <span class="hljs-variable">$ROOTDIR</span>/<span class="hljs-variable">$file</span> -e 6<br>pigz -p 6 *<br><span class="hljs-keyword">done</span><br></code></pre></td></tr></table></figure><h3 id="rawData质控">rawData质控</h3><ul><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly96aHVhbmxhbi56aGlodS5jb20vcC8yODgwMjA4Mw==" rel="noopener external nofollow noreferrer">原始数据质量判断​</a></li><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly96aHVhbmxhbi56aGlodS5jb20vcC8yODkyNDc5Mw==" rel="noopener external nofollow noreferrer">原始数据过滤工具</a></li></ul><h4 id="质量判断(可跳过)">质量判断(可跳过)</h4><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs bash">nano qc.sh &amp;&amp; <span class="hljs-built_in">chmod</span> +x qc.sh<br>./qc.sh<br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-meta">#!/bin/bash</span><br><span class="hljs-built_in">source</span> activate GATK4<br><span class="hljs-comment">#任务名</span><br>TASKN=SRX247249<br><span class="hljs-comment">#设置rawData存放目录</span><br>rawData=/home/jovyan/upload/rawData/<span class="hljs-variable">$TASKN</span><br><span class="hljs-comment">#设置qc结果的输出目录</span><br>QCDIR=/home/jovyan/upload/rawData/<span class="hljs-variable">$TASKN</span><span class="hljs-string">&quot;_fastqc&quot;</span><br><span class="hljs-built_in">mkdir</span> -p <span class="hljs-variable">$QCDIR</span><br> <br><span class="hljs-keyword">for</span> file <span class="hljs-keyword">in</span> <span class="hljs-variable">$rawData</span>/*<br><span class="hljs-keyword">do</span><br><span class="hljs-built_in">echo</span> <span class="hljs-variable">$file</span><br>SAMPLE=<span class="hljs-variable">$&#123;file##*/&#125;</span><br><span class="hljs-built_in">echo</span> <span class="hljs-variable">$QCDIR</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><br><span class="hljs-built_in">mkdir</span> <span class="hljs-variable">$QCDIR</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><br>fastqc -o <span class="hljs-variable">$QCDIR</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span> --threads=6 `<span class="hljs-built_in">ls</span> <span class="hljs-variable">$rawData</span>/<span class="hljs-variable">$SAMPLE</span>/*`<br><span class="hljs-keyword">done</span><br></code></pre></td></tr></table></figure><ul><li>对于PE而言,正向和反向reads的测量过程是独立的,将当成两次SE来处理</li><li>最后的报告中:</li><li>Q20的碱基要在95%以上(最差不低于90%)</li><li>Q30要求大于85%(最差也不要低于80%)</li><li>对于人类来说,GC含量应该在40%左右</li></ul><h4 id="fastp一键质控">fastp一键质控</h4><ul><li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL09wZW5HZW5lL2Zhc3Rw" rel="noopener external nofollow noreferrer">fastp的详细说明</a>; <a href="https://hexo.limour.top/go/#aHR0cHM6Ly96aHVhbmxhbi56aGlodS5jb20vcC8zMzYwMTY5MQ==" rel="noopener external nofollow noreferrer">中文介绍</a></li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs bash">nano qc.sh &amp;&amp; <span class="hljs-built_in">chmod</span> +x qc.sh<br>./qc.sh<br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-meta">#!/bin/bash</span><br><span class="hljs-built_in">source</span> activate GATK4<br><span class="hljs-comment">#任务名</span><br>TASKN=SRX247249<br><span class="hljs-comment">#设置rawData存放目录</span><br>rawData=/home/jovyan/upload/rawData/<span class="hljs-variable">$TASKN</span><br><span class="hljs-comment">#设置qc结果的输出目录</span><br>QCDIR=/home/jovyan/upload/rawData/<span class="hljs-variable">$TASKN</span><span class="hljs-string">&quot;_fastp&quot;</span><br><span class="hljs-built_in">mkdir</span> -p <span class="hljs-variable">$QCDIR</span><br><span class="hljs-comment">#设置cleanData的存放目录</span><br>CLEAN=/home/jovyan/upload/cleanData/<span class="hljs-variable">$TASKN</span><br><span class="hljs-built_in">mkdir</span> -p <span class="hljs-variable">$CLEAN</span><br> <br><span class="hljs-keyword">for</span> file <span class="hljs-keyword">in</span> <span class="hljs-variable">$rawData</span>/*<br><span class="hljs-keyword">do</span><br><span class="hljs-built_in">echo</span> <span class="hljs-variable">$file</span><br>SAMPLE=<span class="hljs-variable">$&#123;file##*/&#125;</span><br><span class="hljs-built_in">echo</span> <span class="hljs-variable">$QCDIR</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><br><span class="hljs-built_in">mkdir</span> <span class="hljs-variable">$QCDIR</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><br><span class="hljs-built_in">echo</span> <span class="hljs-variable">$CLEAN</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><br><span class="hljs-built_in">mkdir</span> <span class="hljs-variable">$CLEAN</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><br><span class="hljs-built_in">cd</span> <span class="hljs-variable">$QCDIR</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><br>fastp -c -w 4 \<br>-o <span class="hljs-variable">$CLEAN</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/out.R1.fq.gz&quot;</span> \<br>-O <span class="hljs-variable">$CLEAN</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/out.R2.fq.gz&quot;</span> \<br>-h <span class="hljs-variable">$QCDIR</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/fastp.html&quot;</span> \<br>-j <span class="hljs-variable">$QCDIR</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/fastp.json&quot;</span> \<br>-i `<span class="hljs-built_in">ls</span> <span class="hljs-variable">$rawData</span>/<span class="hljs-variable">$SAMPLE</span>/*_1.fastq.gz` \<br>-I `<span class="hljs-built_in">ls</span> <span class="hljs-variable">$rawData</span>/<span class="hljs-variable">$SAMPLE</span>/*_2.fastq.gz`<br><span class="hljs-keyword">done</span><br></code></pre></td></tr></table></figure><ul><li>最后得到的cleanData目录结构如下</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><code class="hljs bash">[ 23G] .<br>├── [9.1G] ./SRR799559<br>│ ├── [4.4G] ./SRR799559/out.R1.fq.gz<br>│ └── [4.7G] ./SRR799559/out.R2.fq.gz<br>├── [7.0G] ./SRR799560<br>│ ├── [3.4G] ./SRR799560/out.R1.fq.gz<br>│ └── [3.5G] ./SRR799560/out.R2.fq.gz<br>└── [7.4G] ./SRR799561<br> ├── [3.6G] ./SRR799561/out.R1.fq.gz<br> └── [3.8G] ./SRR799561/out.R2.fq.gz<br></code></pre></td></tr></table></figure><h2 id="数据比对">数据比对</h2><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs bash">nano bwa_and_markdup.sh &amp;&amp; <span class="hljs-built_in">chmod</span> +x bwa_and_markdup.sh<br>./bwa_and_markdup.sh<br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-meta">#!/bin/bash</span><br><span class="hljs-built_in">source</span> activate GATK4<br><span class="hljs-comment">#任务名</span><br>TASKN=SRX247249<br><span class="hljs-comment">#设置cleanData的存放目录</span><br>CLEAN=/home/jovyan/upload/cleanData/<span class="hljs-variable">$TASKN</span><br><span class="hljs-comment">#设置RefSeq的存放目录</span><br>RefSeq=/home/jovyan/data/refseq/GRCh38.p14.fna<br><span class="hljs-comment">#设置Read Group信息,见 https://gatk.broadinstitute.org/hc/en-us/articles/360035890671-Read-groups</span><br>RGroup_PL=ILLUMINA <span class="hljs-comment"># 所用的测序平台:ILLUMINA,SLX,SOLEXA,SOLID,454,LS454,COMPLETE,PACBIO,IONTORRENT,CAPILLARY,HELICOS或UNKNOWN。CG测序为COMPLETE</span><br>RGroup_SM=<span class="hljs-variable">$TASKN</span> <span class="hljs-comment"># 样本ID,同一个样本可能有多个lane,此时用样本ID相关联</span><br>RGroup=<span class="hljs-string">&#x27;PL:&#x27;</span><span class="hljs-variable">$RGroup_PL</span><span class="hljs-string">&#x27;\tSM:&#x27;</span><span class="hljs-variable">$RGroup_SM</span><br><span class="hljs-comment">#设置BAM的存放目录</span><br>BAM=/home/jovyan/upload/BAM/<span class="hljs-variable">$TASKN</span><br><span class="hljs-built_in">mkdir</span> -p <span class="hljs-variable">$BAM</span><br> <br><span class="hljs-keyword">for</span> file <span class="hljs-keyword">in</span> <span class="hljs-variable">$CLEAN</span>/*<br><span class="hljs-keyword">do</span><br> <br><span class="hljs-built_in">echo</span> <span class="hljs-variable">$file</span><br>SAMPLE=<span class="hljs-variable">$&#123;file##*/&#125;</span><br><span class="hljs-built_in">echo</span> <span class="hljs-variable">$BAM</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><br><span class="hljs-built_in">mkdir</span> <span class="hljs-variable">$BAM</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><br><span class="hljs-built_in">echo</span> <span class="hljs-string">&#x27;@RG\tID:&#x27;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&#x27;\t&#x27;</span><span class="hljs-variable">$RGroup</span><br> <br><span class="hljs-comment">#1 比对</span><br>bwa mem -t 4 -M -R <span class="hljs-string">&#x27;@RG\tID:&#x27;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&#x27;\t&#x27;</span><span class="hljs-variable">$RGroup</span> <span class="hljs-variable">$RefSeq</span> `<span class="hljs-built_in">ls</span> <span class="hljs-variable">$CLEAN</span>/<span class="hljs-variable">$SAMPLE</span>/*` \<br>| samtools view -Sb - &gt; <span class="hljs-variable">$BAM</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/raw.bam&quot;</span><br> <br><span class="hljs-comment">#2 排序</span><br>samtools <span class="hljs-built_in">sort</span> -@ 4 -m 4G -O bam -o <span class="hljs-variable">$BAM</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/sorted.bam&quot;</span> <span class="hljs-variable">$BAM</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/raw.bam&quot;</span><br><span class="hljs-built_in">rm</span> <span class="hljs-variable">$BAM</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/raw.bam&quot;</span><br> <br><span class="hljs-comment">#3 标记PCR重复</span><br>gatk MarkDuplicates -I <span class="hljs-variable">$BAM</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/sorted.bam&quot;</span> \<br>-O <span class="hljs-variable">$BAM</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/sorted.markdup.bam&quot;</span> \<br>-M <span class="hljs-variable">$BAM</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/sorted.markdup_metrics.txt&quot;</span><br><span class="hljs-built_in">rm</span> <span class="hljs-variable">$BAM</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/sorted.bam&quot;</span><br> <br><span class="hljs-comment">#4 创建比对索引文件</span><br>samtools index <span class="hljs-variable">$BAM</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/sorted.markdup.bam&quot;</span><br> <br><span class="hljs-keyword">done</span><br></code></pre></td></tr></table></figure><ul><li>最后得到的BAM目录结构如下</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br></pre></td><td class="code"><pre><code class="hljs bash">[ 30G] .<br>├── [ 11G] ./SRR799559<br>│ ├── [ 11G] ./SRR799559/sorted.markdup.bam<br>│ ├── [4.5M] ./SRR799559/sorted.markdup.bam.bai<br>│ └── [3.7K] ./SRR799559/sorted.markdup_metrics.txt<br>├── [8.8G] ./SRR799560<br>│ ├── [8.8G] ./SRR799560/sorted.markdup.bam<br>│ ├── [3.9M] ./SRR799560/sorted.markdup.bam.bai<br>│ └── [3.7K] ./SRR799560/sorted.markdup_metrics.txt<br>└── [9.6G] ./SRR799561<br> ├── [9.5G] ./SRR799561/sorted.markdup.bam<br> ├── [4.1M] ./SRR799561/sorted.markdup.bam.bai<br> └── [3.7K] ./SRR799561/sorted.markdup_metrics.txt<br></code></pre></td></tr></table></figure><h3 id="同样本合并">同样本合并</h3><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs bash">nano merge.sh &amp;&amp; <span class="hljs-built_in">chmod</span> +x merge.sh<br>./merge.sh<br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-meta">#!/bin/bash</span><br><span class="hljs-built_in">source</span> activate GATK4<br><span class="hljs-comment">#任务名</span><br>TASKN=SRX247249<br><span class="hljs-comment">#设置BAM的存放目录</span><br>BAM=/home/jovyan/upload/BAM/<span class="hljs-variable">$TASKN</span><br><span class="hljs-comment">#设置merge后的数据存放目录</span><br>MERGEDBAM=/home/jovyan/upload/merged/<span class="hljs-variable">$TASKN</span>/SAMPLE1<br><span class="hljs-built_in">mkdir</span> -p <span class="hljs-variable">$MERGEDBAM</span><br> <br>samtools merge <span class="hljs-variable">$MERGEDBAM</span><span class="hljs-string">&quot;/sorted.markdup.bam&quot;</span> \<br>`find <span class="hljs-string">&quot;<span class="hljs-variable">$BAM</span>&quot;</span> -name <span class="hljs-string">&quot;sorted.markdup.bam&quot;</span> -<span class="hljs-built_in">type</span> f -<span class="hljs-built_in">exec</span> <span class="hljs-built_in">readlink</span> -f &#123;&#125; \;`<br>samtools index <span class="hljs-variable">$MERGEDBAM</span><span class="hljs-string">&quot;/sorted.markdup.bam&quot;</span><br></code></pre></td></tr></table></figure><ul><li>最后得到的MERGED目录结构如下</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><code class="hljs bash">[ 21G] .<br>└── [ 21G] ./SAMPLE1<br> ├── [ 21G] ./SAMPLE1/sorted.markdup.bam<br> └── [6.8M] ./SAMPLE1/sorted.markdup.bam.bai<br></code></pre></td></tr></table></figure><h3 id="局部重比对">局部重比对</h3><ul><li>具体见<a href="https://hexo.limour.top/go/#aHR0cHM6Ly96aHVhbmxhbi56aGlodS5jb20vcC8yOTQ4NTk4Nw==" rel="noopener external nofollow noreferrer">黄树嘉博士的相关介绍</a></li><li>因为本文是GATK 4.0的HaplotypeCaller模块,自带局部重比对,故用到的时候再写</li></ul><h3 id="BQSR">BQSR</h3><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs bash">nano BQSR.sh &amp;&amp; <span class="hljs-built_in">chmod</span> +x BQSR.sh<br>./BQSR.sh<br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-meta">#!/bin/bash</span><br><span class="hljs-built_in">source</span> activate GATK4<br><span class="hljs-comment">#任务名</span><br>TASKN=SRX247249<br><span class="hljs-comment">#设置merged数据存放目录</span><br>MERGED=/home/jovyan/upload/merged/<span class="hljs-variable">$TASKN</span><br><span class="hljs-comment">#设置RefSeq的存放目录</span><br>RefSeq=/home/jovyan/data/refseq/GRCh38.p14.fna<br><span class="hljs-comment">#设置knownSites数据存放目录</span><br>knownSites=/home/jovyan/upload/knownSites<br>knownSites=$(<span class="hljs-built_in">echo</span> $(<span class="hljs-built_in">ls</span> <span class="hljs-variable">$knownSites</span>/*.ncbi.vcf.gz | sed <span class="hljs-string">&#x27;s/^/--known-sites /&#x27;</span> | <span class="hljs-built_in">tr</span> <span class="hljs-string">&#x27;\n&#x27;</span> <span class="hljs-string">&#x27; &#x27;</span>))<br><span class="hljs-built_in">echo</span> <span class="hljs-variable">$knownSites</span><br> <br><span class="hljs-keyword">for</span> file <span class="hljs-keyword">in</span> <span class="hljs-variable">$MERGED</span>/*<br><span class="hljs-keyword">do</span><br> <br><span class="hljs-built_in">echo</span> <span class="hljs-variable">$file</span><br>SAMPLE=<span class="hljs-variable">$&#123;file##*/&#125;</span><br><span class="hljs-built_in">echo</span> <span class="hljs-variable">$MERGED</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><br> <br>gatk BaseRecalibrator <span class="hljs-variable">$knownSites</span> \<br> -R <span class="hljs-variable">$RefSeq</span> \<br> -I <span class="hljs-variable">$MERGED</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/sorted.markdup.bam&quot;</span> \<br> -O <span class="hljs-variable">$MERGED</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/recal_data.table&quot;</span><br> <br>gatk ApplyBQSR \<br> -R <span class="hljs-variable">$RefSeq</span> \<br> -I <span class="hljs-variable">$MERGED</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/sorted.markdup.bam&quot;</span> \<br> --bqsr-recal-file <span class="hljs-variable">$MERGED</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/recal_data.table&quot;</span> \<br> -O <span class="hljs-variable">$MERGED</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/sorted.markdup.BQSR.bam&quot;</span><br> <br>samtools index <span class="hljs-variable">$MERGED</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/sorted.markdup.BQSR.bam&quot;</span><br> <br><span class="hljs-keyword">done</span><br></code></pre></td></tr></table></figure><ul><li>最后得到的MERGED目录结构如下</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><code class="hljs bash">[ 54G] .<br>└── [ 54G] ./SAMPLE1<br> ├── [2.5M] ./SAMPLE1/recal_data.table<br> ├── [ 21G] ./SAMPLE1/sorted.markdup.bam<br> ├── [6.8M] ./SAMPLE1/sorted.markdup.bam.bai<br> ├── [8.8M] ./SAMPLE1/sorted.markdup.BQSR.bai<br> ├── [ 33G] ./SAMPLE1/sorted.markdup.BQSR.bam<br> └── [7.6M] ./SAMPLE1/sorted.markdup.BQSR.bam.bai<br></code></pre></td></tr></table></figure><h2 id="两步法变异检测">两步法变异检测</h2><h3 id="HaplotypeCaller">HaplotypeCaller</h3><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs bash">nano HaplotypeCaller.sh &amp;&amp; <span class="hljs-built_in">chmod</span> +x HaplotypeCaller.sh<br>./HaplotypeCaller.sh<br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-meta">#!/bin/bash</span><br><span class="hljs-built_in">source</span> activate GATK4<br> <br><span class="hljs-comment">#任务名</span><br>TASKN=SRX247249<br><span class="hljs-comment">#设置BQSR数据存放目录</span><br>MERGED=/home/jovyan/upload/merged/<span class="hljs-variable">$TASKN</span><br><span class="hljs-comment">#设置RefSeq的存放目录</span><br>RefSeq=/home/jovyan/data/refseq/GRCh38.p14.fna<br> <br><span class="hljs-keyword">for</span> file <span class="hljs-keyword">in</span> <span class="hljs-variable">$MERGED</span>/*<br><span class="hljs-keyword">do</span><br> <br><span class="hljs-built_in">echo</span> <span class="hljs-variable">$file</span><br>SAMPLE=<span class="hljs-variable">$&#123;file##*/&#125;</span><br><span class="hljs-built_in">echo</span> <span class="hljs-variable">$MERGED</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><br> <br>gatk --java-options <span class="hljs-string">&quot;-Xmx4g&quot;</span> HaplotypeCaller -ERC GVCF \<br> -R <span class="hljs-variable">$RefSeq</span> \<br> -I <span class="hljs-variable">$MERGED</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/sorted.markdup.BQSR.bam&quot;</span> \<br> -O <span class="hljs-variable">$MERGED</span><span class="hljs-string">&quot;/&quot;</span><span class="hljs-variable">$SAMPLE</span><span class="hljs-string">&quot;/HC.g.vcf.gz&quot;</span><br> <br><span class="hljs-keyword">done</span><br></code></pre></td></tr></table></figure><h3 id="CombineGVCFs">CombineGVCFs</h3><h4 id="单样本">单样本</h4><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><code class="hljs bash">VCFPATH=<span class="hljs-variable">$MERGED</span><span class="hljs-string">&#x27;/SAMPLE1&#x27;</span><br></code></pre></td></tr></table></figure><h4 id="多样本">多样本</h4><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs bash">nano CombineGVCFs.sh &amp;&amp; <span class="hljs-built_in">chmod</span> +x CombineGVCFs.sh<br>./CombineGVCFs.sh<br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-meta">#!/bin/bash</span><br><span class="hljs-built_in">source</span> activate GATK4<br> <br><span class="hljs-comment">#任务名</span><br>TASKN=SRX247249<br><span class="hljs-comment">#设置BQSR数据存放目录</span><br>MERGED=/home/jovyan/upload/merged/<span class="hljs-variable">$TASKN</span><br><span class="hljs-comment">#设置RefSeq的存放目录</span><br>RefSeq=/home/jovyan/data/refseq/GRCh38.p14.fna<br><span class="hljs-comment">#设置最后输出的路径</span><br>VCFPATH=/home/jovyan/upload/VCF/<span class="hljs-variable">$TASKN</span><br><span class="hljs-built_in">mkdir</span> -p <span class="hljs-variable">$VCFPATH</span><br> <br>variant=$(<span class="hljs-built_in">echo</span> $(<span class="hljs-built_in">ls</span> <span class="hljs-variable">$MERGED</span>/*/HC.g.vcf.gz | sed <span class="hljs-string">&#x27;s/^/--variant /&#x27;</span> | <span class="hljs-built_in">tr</span> <span class="hljs-string">&#x27;\n&#x27;</span> <span class="hljs-string">&#x27; &#x27;</span>))<br><span class="hljs-built_in">echo</span> <span class="hljs-variable">$variant</span><br> <br>gatk CombineGVCFs $<span class="hljs-variable">$variant</span> \<br> -R <span class="hljs-variable">$RefSeq</span> \<br> -O <span class="hljs-variable">$VCFPATH</span><span class="hljs-string">&#x27;/HC.g.vcf.gz&#x27;</span><br></code></pre></td></tr></table></figure><h3 id="GenotypeGVCFs">GenotypeGVCFs</h3><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs bash">nano GenotypeGVCFs.sh &amp;&amp; <span class="hljs-built_in">chmod</span> +x GenotypeGVCFs.sh<br>./GenotypeGVCFs.sh<br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-meta">#!/bin/bash</span><br><span class="hljs-built_in">source</span> activate GATK4<br> <br><span class="hljs-comment">#任务名</span><br>TASKN=SRX247249<br><span class="hljs-comment">#设置BQSR数据存放目录</span><br>MERGED=/home/jovyan/upload/merged/<span class="hljs-variable">$TASKN</span><br><span class="hljs-comment">#设置RefSeq的存放目录</span><br>RefSeq=/home/jovyan/data/refseq/GRCh38.p14.fna<br><span class="hljs-comment">#设置最后输出的路径</span><br>VCFPATH=<span class="hljs-variable">$MERGED</span><span class="hljs-string">&#x27;/SAMPLE1&#x27;</span><br><span class="hljs-built_in">mkdir</span> -p <span class="hljs-variable">$VCFPATH</span><br> <br>gatk --java-options <span class="hljs-string">&quot;-Xmx4g&quot;</span> GenotypeGVCFs \<br> -R <span class="hljs-variable">$RefSeq</span> \<br> -V <span class="hljs-variable">$VCFPATH</span><span class="hljs-string">&#x27;/HC.g.vcf.gz&#x27;</span> \<br> -O <span class="hljs-variable">$VCFPATH</span><span class="hljs-string">&#x27;/HC.vcf.gz&#x27;</span><br></code></pre></td></tr></table></figure><ul><li>最后得到的结果如下</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><code class="hljs bash">├── [6.8G] ./HC.g.vcf.gz<br>├── [5.0M] ./HC.g.vcf.gz.tbi<br>├── [127M] ./HC.vcf.gz<br>├── [2.0M] ./HC.vcf.gz.tbi<br></code></pre></td></tr></table></figure><h2 id="VQSR">VQSR</h2><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs bash">nano VQSR.sh &amp;&amp; <span class="hljs-built_in">chmod</span> +x VQSR.sh<br>./VQSR.sh<br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-meta">#!/bin/bash</span><br><span class="hljs-built_in">source</span> activate GATK4<br> <br><span class="hljs-comment">#任务名</span><br>TASKN=SRX247249<br><span class="hljs-comment">#设置BQSR数据存放目录</span><br>MERGED=/home/jovyan/upload/merged/<span class="hljs-variable">$TASKN</span><br><span class="hljs-comment">#设置RefSeq的存放目录</span><br>RefSeq=/home/jovyan/data/refseq/GRCh38.p14.fna<br><span class="hljs-comment">#设置最后输出的路径</span><br>VCFPATH=<span class="hljs-variable">$MERGED</span><span class="hljs-string">&#x27;/SAMPLE1&#x27;</span><br><span class="hljs-comment">#设置knownSites数据存放目录</span><br>knownSites=/home/jovyan/upload/knownSites<br> <br>gatk VariantRecalibrator \<br> -R <span class="hljs-variable">$RefSeq</span> \<br> -V <span class="hljs-variable">$VCFPATH</span><span class="hljs-string">&#x27;/HC.vcf.gz&#x27;</span> \<br> --resource:hapmap,known=<span class="hljs-literal">false</span>,training=<span class="hljs-literal">true</span>,truth=<span class="hljs-literal">true</span>,prior=15.0 <span class="hljs-variable">$knownSites</span>/hapmap_3.3.hg38.ncbi.vcf.gz \<br> --resource:dbsnp,known=<span class="hljs-literal">true</span>,training=<span class="hljs-literal">false</span>,truth=<span class="hljs-literal">false</span>,prior=2.0 <span class="hljs-variable">$knownSites</span>/GRCh38.dbSNP.ncbi.vcf.gz \<br> -an QD -an MQ -an MQRankSum -an ReadPosRankSum -an FS -an SOR \<br> -mode SNP \<br> -O <span class="hljs-variable">$VCFPATH</span>/snp.recal \<br> --tranches-file <span class="hljs-variable">$VCFPATH</span>/snp.tranches \<br> --rscript-file <span class="hljs-variable">$VCFPATH</span>/snp.plots.R<br> <br>gatk ApplyVQSR \<br> -R <span class="hljs-variable">$RefSeq</span> \<br> -V <span class="hljs-variable">$VCFPATH</span><span class="hljs-string">&#x27;/HC.vcf.gz&#x27;</span> \<br> -O <span class="hljs-variable">$VCFPATH</span><span class="hljs-string">&#x27;/snp.VQSR.vcf.gz&#x27;</span> \<br> --truth-sensitivity-filter-level 99.0 \<br> --tranches-file <span class="hljs-variable">$VCFPATH</span>/snp.tranches \<br> --recal-file <span class="hljs-variable">$VCFPATH</span>/snp.recal \<br> -mode SNP<br> <br>gatk VariantRecalibrator \<br> -R <span class="hljs-variable">$RefSeq</span> \<br> -V <span class="hljs-variable">$VCFPATH</span><span class="hljs-string">&#x27;/snp.VQSR.vcf.gz&#x27;</span> \<br> --resource:dbindel,known=<span class="hljs-literal">true</span>,training=<span class="hljs-literal">false</span>,truth=<span class="hljs-literal">false</span>,prior=2.0 <span class="hljs-variable">$knownSites</span>/Homo_sapiens_assembly38.known_indels.ncbi.vcf.gz \<br> --resource:mills,known=<span class="hljs-literal">true</span>,training=<span class="hljs-literal">true</span>,truth=<span class="hljs-literal">true</span>,prior=12.0 <span class="hljs-variable">$knownSites</span>/Mills_and_1000G_gold_standard.indels.hg38.ncbi.vcf.gz \<br> -an QD -an MQ -an MQRankSum -an ReadPosRankSum -an FS -an SOR \<br> -mode INDEL --max-gaussians 6 \<br> -O <span class="hljs-variable">$VCFPATH</span>/snp.indel.recal \<br> --tranches-file <span class="hljs-variable">$VCFPATH</span>/snp.indel.tranches \<br> --rscript-file <span class="hljs-variable">$VCFPATH</span>/snp.indel.plots.R<br> <br>gatk ApplyVQSR \<br> -R <span class="hljs-variable">$RefSeq</span> \<br> -V <span class="hljs-variable">$VCFPATH</span><span class="hljs-string">&#x27;/snp.VQSR.vcf.gz&#x27;</span> \<br> -O <span class="hljs-variable">$VCFPATH</span><span class="hljs-string">&#x27;/snp.indel.VQSR.vcf.gz&#x27;</span> \<br> --truth-sensitivity-filter-level 99.0 \<br> --tranches-file <span class="hljs-variable">$VCFPATH</span>/snp.indel.tranches \<br> --recal-file <span class="hljs-variable">$VCFPATH</span>/snp.indel.recal \<br> -mode INDEL<br></code></pre></td></tr></table></figure><ul><li>最后得到的结果如下</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br></pre></td><td class="code"><pre><code class="hljs bash">├── [2.7M] ./snp.plots.R<br>├── [6.2M] ./snp.plots.R.pdf<br>├── [199M] ./snp.recal<br>├── [7.5M] ./snp.recal.idx<br>├── [ 584] ./snp.tranches<br>├── [7.5K] ./snp.tranches.pdf<br>├── [151M] ./snp.VQSR.vcf.gz<br>├── [2.0M] ./snp.VQSR.vcf.gz.tbi<br>├── [2.8M] ./snp.indel.plots.R<br>├── [6.2M] ./snp.indel.plots.R.pdf<br>├── [ 35M] ./snp.indel.recal<br>├── [256K] ./snp.indel.recal.idx<br>├── [ 595] ./snp.indel.tranches<br>├── [153M] ./snp.indel.VQSR.vcf.gz<br>├── [2.0M] ./snp.indel.VQSR.vcf.gz.tbi<br></code></pre></td></tr></table></figure><ul><li>SNP内容示例</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-comment"># tabix snp.VQSR.vcf.gz NC_000001.11 | head -n 5</span><br>NC_000001.11 16378 . T C 35.32 VQSRTrancheSNP99.90to100.00 AC=2;AF=1.00;AN=2;DP=2;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=20.00;QD=17.66;SOR=2.303;VQSLOD=-1.018e+01;culprit=MQ GT:AD:DP:GQ:PL 1/1:0,2:2:6:47,6,0<br>NC_000001.11 17020 . G A 59.32 VQSRTrancheSNP99.90to100.00 AC=2;AF=1.00;AN=2;DP=2;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=31.66;QD=29.66;SOR=0.693;VQSLOD=-5.480e+00;culprit=MQ GT:AD:DP:GQ:PL 1/1:0,2:2:6:71,6,0<br>NC_000001.11 17385 . G A 60.32 VQSRTrancheSNP99.90to100.00 AC=2;AF=1.00;AN=2;DP=2;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=32.28;QD=30.16;SOR=2.303;VQSLOD=-2.357e+00;culprit=MQ GT:AD:DP:GQ:PL 1/1:0,2:2:6:72,6,0<br>NC_000001.11 20254 . G A 64.64 VQSRTrancheSNP99.90to100.00 AC=1;AF=0.500;AN=2;BaseQRankSum=2.37;DP=8;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=24.89;MQRankSum=-2.030e-01;QD=8.08;ReadPosRankSum=-1.611e+00;SOR=1.034;VQSLOD=-1.317e+01;culprit=MQ GT:AD:DP:GQ:PGT:PID:PL:PS 0|1:5,3:8:72:1|0:20250_T_C:72,0,126:20250<br>NC_000001.11 39230 . G A 83.64 VQSRTrancheSNP99.90to100.00 AC=1;AF=0.500;AN=2;BaseQRankSum=-1.078e+00;DP=15;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=26.72;MQRankSum=2.20;QD=5.58;ReadPosRankSum=-1.917e+00;SOR=1.022;VQSLOD=-1.362e+01;culprit=MQ GT:AD:DP:GQ:PL 0/1:10,5:15:91:91,0,239<br></code></pre></td></tr></table></figure><h2 id="变异注释">变异注释</h2><h3 id="安装-VEP">安装 VEP</h3><ul><li><a href="https://hexo.limour.top/go/#aHR0cDovL3d3dy5lbnNlbWJsLm9yZy9pbmZvL2RvY3MvdG9vbHMvdmVwL2luZGV4Lmh0bWw=" rel="noopener external nofollow noreferrer">VEP官网地址</a></li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><code class="hljs bash">sudo docker pull ensemblorg/ensembl-vep<br>sudo docker run --<span class="hljs-built_in">rm</span> -t -i -v ~/upload:/data:z ensemblorg/ensembl-vep <span class="hljs-built_in">pwd</span><br>sudo docker run --<span class="hljs-built_in">rm</span> -t -i -v ~/upload:/data:z ensemblorg/ensembl-vep <span class="hljs-built_in">ls</span> -al /opt/vep/<br>sudo <span class="hljs-built_in">mkdir</span> -p ~/upload/vep &amp;&amp; sudo <span class="hljs-built_in">chmod</span> 777 ~/upload/vep<br>sudo docker run --<span class="hljs-built_in">rm</span> -t -i -v ~/upload:/data:z -v ~/upload/vep:/opt/vep/.vep:z ensemblorg/ensembl-vep INSTALL.pl<br><span class="hljs-comment"># sudo docker run --rm -t -i -v ~/upload:/data:Z ensemblorg/ensembl-vep cat INSTALL.pl &gt; INSTALL.pl</span><br><span class="hljs-comment"># 自行分析 INSTALL.pl,构造下载后的结构,以下是104版本的</span><br><span class="hljs-comment"># 太慢了,手动下载,请各显神通,下载地址来自上一步的输出</span><br>wget https://ftp.ensembl.org/pub/release-104/variation/indexed_vep_cache/homo_sapiens_vep_104_GRCh38.tar.gz -O ~/upload/vep<br>tar -zxvf homo_sapiens_vep_104_GRCh38.tar.gz<br></code></pre></td></tr></table></figure><h3 id="进行注释">进行注释</h3><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-built_in">mkdir</span> -p ~/upload/VEP/SRX247249 &amp;&amp; <span class="hljs-built_in">chmod</span> -R 777 ~/upload/VEP/SRX247249<br><span class="hljs-comment"># mv ~/data/refseq ~/upload</span><br><span class="hljs-comment"># chmod -R 777 ~/upload/refseq</span><br><span class="hljs-comment"># chmod -R 777 ~/upload/vep</span><br><span class="hljs-comment"># chmod -R 777 ~/upload/merged/SRX247249</span><br>sudo docker run --<span class="hljs-built_in">rm</span> -t -i -v ~/upload:/data:z ensemblorg/ensembl-vep \<br> vep --fasta /data/refseq/GRCh38.p14.fna \<br> --format vcf --vcf --fork 4 --hgvs --force_overwrite --everything \<br> --offline --dir_cache /data/vep \<br> -i /data/merged/SRX247249/SAMPLE1/snp.indel.VQSR.vcf.gz \<br> -o /data/merged/SRX247249/SAMPLE1/snp.indel.VQSR.VEP.vcf<br><span class="hljs-comment"># sudo chmod 777 ~/upload/merged/SRX247249/SAMPLE1/snp.indel.VQSR.VEP.vcf</span><br><span class="hljs-comment"># pbgzip -n 4 snp.indel.VQSR.VEP.vcf</span><br><span class="hljs-comment"># tabix -p vcf snp.indel.VQSR.VEP.vcf.gz</span><br></code></pre></td></tr></table></figure><ul><li>最后得到的结果如下</li></ul><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><code class="hljs bash">├── [346M] ./snp.indel.VQSR.VEP.vcf.gz<br>├── [210K] ./snp.indel.VQSR.VEP.vcf.gz_summary.html<br>├── [1.6M] ./snp.indel.VQSR.VEP.vcf.gz.tbi<br>├── [8.2K] ./snp.indel.VQSR.VEP.vcf.gz_warnings.txt<br></code></pre></td></tr></table></figure><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><code class="hljs bash"><span class="hljs-comment"># tabix snp.indel.VQSR.VEP.vcf.gz NC_000001.11 | head -n 2</span><br>NC_000001.11 16378 . T C 35.32 VQSRTrancheSNP99.90to100.00 AC=2;AF=1.00;AN=2;DP=2;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=20.00;QD=17.66;SOR=2.303;VQSLOD=-1.018e+01;culprit=MQ;CSQ=C|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|Transcript|ENST00000450305|transcribed_unprocessed_pseudogene||||||||||rs148220436|2708|1||SNV|HGNC|HGNC:37102|YES||||||||||||||||||||||||||||||||||||||||||||,C|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|Transcript|ENST00000456328|processed_transcript||||||||||rs148220436|1969|1||SNV|HGNC|HGNC:37102||||1|||||||||||||||||||||||||||||||||||||||||,C|intron_variant&amp;non_coding_transcript_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000488147|unprocessed_pseudogene||8/10|ENST00000488147.1:n.1067+229A&gt;G|||||||rs148220436||-1||SNV|HGNC|HGNC:38034|YES||||||||||||||||||||||||||||||||||||||||||||,C|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|Transcript|ENST00000619216|miRNA||||||||||rs148220436|991|-1||SNV|HGNC|HGNC:50039|YES||||||||||||||||||||||||||||||||||||||||||||,C|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00000344266|CTCF_binding_site||||||||||rs148220436||||SNV|||||||||||||||||||||||||||||||||||||||||||||||,C|regulatory_region_variant|MODIFIER|||RegulatoryFeature|ENSR00001164745|promoter_flanking_region||||||||||rs148220436||||SNV||||||||||||||||||||||||||||||||||||||||||||||| GT:AD:DP:GQ:PL 1/1:0,2:2:6:47,6,0<br>NC_000001.11 17020 . G A 59.32 VQSRTrancheSNP99.90to100.00 AC=2;AF=1.00;AN=2;DP=2;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=31.66;QD=29.66;SOR=0.693;VQSLOD=-5.480e+00;culprit=MQ;CSQ=A|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|Transcript|ENST00000450305|transcribed_unprocessed_pseudogene||||||||||rs199740902|3350|1||SNV|HGNC|HGNC:37102|YES||||||||||||||||||||||||||||||||||||||||||||,A|downstream_gene_variant|MODIFIER|DDX11L1|ENSG00000223972|Transcript|ENST00000456328|processed_transcript||||||||||rs199740902|2611|1||SNV|HGNC|HGNC:37102||||1|||||||||||||||||||||||||||||||||||||||||,A|non_coding_transcript_exon_variant|MODIFIER|WASH7P|ENSG00000227232|Transcript|ENST00000488147|unprocessed_pseudogene|7/11||ENST00000488147.1:n.746C&gt;T||746|||||rs199740902||-1||SNV|HGNC|HGNC:38034|YES||||||||||||||||||||||||||||||||||||||||||||,A|downstream_gene_variant|MODIFIER|MIR6859-1|ENSG00000278267|Transcript|ENST00000619216|miRNA||||||||||rs199740902|349|-1||SNV|HGNC|HGNC:50039|YES|||||||||||||||||||||||||||||||||||||||||||| GT:AD:DP:GQ:PL 1/1:0,2:2:6:71,6,0<br></code></pre></td></tr></table></figure>]]></content:encoded>
  255. <category domain="https://hexo.limour.top/tags/GATK/">GATK</category>
  256. <category domain="https://hexo.limour.top/tags/SNP/">SNP</category>
  257. <category domain="https://hexo.limour.top/tags/WGS/">WGS</category>
  258. <comments>https://hexo.limour.top/shi-yong-GATK-zhao-SNP#disqus_thread</comments>
  259. </item>
  260. </channel>
  261. </rss>