Running-Qwen-on-the-Win10-platform-with-6GB-of-video-memory.html 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746
  1. <!DOCTYPE html>
  2. <html lang="en" data-default-color-scheme=auto>
  3. <head><!-- hexo injector head_begin start -->
  4. <script defer src="https://api.limour.top/vue/0d2f95c1-755d-436b-adf8-eee12a80ed32/script.js"></script>
  5. <!-- hexo injector head_begin end -->
  6. <meta charset="UTF-8">
  7. <link rel="apple-touch-icon" sizes="76x76" href="https://img.limour.top/2023/08/29/64ee07361815a.webp">
  8. <link rel="icon" href="https://img.limour.top/2023/08/29/64ee07361815a.webp">
  9. <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=5.0, shrink-to-fit=no">
  10. <meta http-equiv="x-ua-compatible" content="ie=edge">
  11. <meta name="theme-color" content="#2f4154">
  12. <meta name="author" content="Limour">
  13. <meta name="keywords" content="">
  14. <meta name="description" content="Llama.cpp 能 CPU &amp;amp; GPU 环境混合推理,这里记录一下在 windows10 平台上运行 Qwen-1.8B 的过程,显卡是 1660Ti 。 准备模型 安装conda Tun模式(管理员权限) 1234567conda create -n llamaConvert python=3.10 git -c conda-forgeconda activate llamaCo">
  15. <title>【记录】win10平台6G显存运行Qwen-1.8B - Limour&#39;s Blog</title>
  16. <link rel="stylesheet" href="https://jscdn.limour.top/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" />
  17. <link rel="stylesheet" href="https://jscdn.limour.top/npm/github-markdown-css@4.0.0/github-markdown.min.css" />
  18. <link rel="stylesheet" href="https://jscdn.limour.top/npm/hint.css@2.7.0/hint.min.css" />
  19. <!-- 主题依赖的图标库,不要自行修改 -->
  20. <!-- Do not modify the link that theme dependent icons -->
  21. <link rel="stylesheet" href="//at.alicdn.com/t/font_1749284_hj8rtnfg7um.css">
  22. <link rel="stylesheet" href="//at.alicdn.com/t/font_1736178_lbnruvf0jn.css">
  23. <link rel="stylesheet" href="/css/main.css" />
  24. <link id="highlight-css" rel="stylesheet" href="/css/highlight.css" />
  25. <link id="highlight-css-dark" rel="stylesheet" href="/css/highlight-dark.css" />
  26. <link rel="stylesheet" href="/theme-inject/custom.css">
  27. <link rel="stylesheet" href="/theme-inject/iconfont.css">
  28. <script id="fluid-configs">
  29. var Fluid = window.Fluid || {};
  30. Fluid.ctx = Object.assign({}, Fluid.ctx)
  31. var CONFIG = {"hostname":"hexo.limour.top","root":"/","version":"1.9.7","typing":{"enable":false,"typeSpeed":70,"cursorChar":"_","loop":false,"scope":[]},"anchorjs":{"enable":true,"element":"h1,h2,h3,h4,h5,h6","placement":"left","visible":"hover","icon":"§"},"progressbar":{"enable":true,"height_px":3,"color":"#29d","options":{"showSpinner":false,"trickleSpeed":100}},"code_language":{"enable":true,"default":"TEXT"},"copy_btn":true,"image_caption":{"enable":true},"image_zoom":{"enable":false,"img_url_replace":["",""]},"toc":{"enable":true,"placement":"right","headingSelector":"h1,h2,h3,h4,h5,h6","collapseDepth":0},"lazyload":{"enable":true,"loading_img":"https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg","onlypost":false,"offset_factor":2},"web_analytics":{"enable":false,"follow_dnt":true,"baidu":null,"google":{"measurement_id":null},"tencent":{"sid":null,"cid":null},"woyaola":null,"cnzz":null,"leancloud":{"app_id":null,"app_key":null,"server_url":null,"path":"window.location.pathname","ignore_local":false}},"search_path":"/local-search.xml","include_content_in_search":true};
  32. if (CONFIG.web_analytics.follow_dnt) {
  33. var dntVal = navigator.doNotTrack || window.doNotTrack || navigator.msDoNotTrack;
  34. Fluid.ctx.dnt = dntVal && (dntVal.startsWith('1') || dntVal.startsWith('yes') || dntVal.startsWith('on'));
  35. }
  36. </script>
  37. <script src="/js/utils.js" ></script>
  38. <script src="/js/color-schema.js" ></script>
  39. <link rel="canonical" href="https://hexo.limour.top/Running-Qwen-on-the-Win10-platform-with-6GB-of-video-memory"/>
  40. <meta name="generator" content="Hexo 7.1.1"><link rel="alternate" href="/atom.xml" title="Limour's Blog" type="application/atom+xml">
  41. <link rel="alternate" href="/rss2.xml" title="Limour's Blog" type="application/rss+xml">
  42. </head>
  43. <body>
  44. <header>
  45. <div class="header-inner" style="height: 70vh;">
  46. <nav id="navbar" class="navbar fixed-top navbar-expand-lg navbar-dark scrolling-navbar">
  47. <div class="container">
  48. <a class="navbar-brand" href="/">
  49. <strong>Limour&#39;s Blog</strong>
  50. </a>
  51. <button id="navbar-toggler-btn" class="navbar-toggler" type="button" data-toggle="collapse"
  52. data-target="#navbarSupportedContent"
  53. aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
  54. <div class="animated-icon"><span></span><span></span><span></span></div>
  55. </button>
  56. <!-- Collapsible content -->
  57. <div class="collapse navbar-collapse" id="navbarSupportedContent">
  58. <ul class="navbar-nav ml-auto text-center">
  59. <li class="nav-item">
  60. <a class="nav-link" href="https://hexo.limour.top/" target="_self">
  61. <i class="iconfont icon-home-fill"></i>
  62. <span>Home</span>
  63. </a>
  64. </li>
  65. <li class="nav-item">
  66. <a class="nav-link" href="/archives/" target="_self">
  67. <i class="iconfont icon-archive-fill"></i>
  68. <span>Archive1</span>
  69. </a>
  70. </li>
  71. <li class="nav-item">
  72. <a class="nav-link" href="https://occdn.limour.top/archives/" target="_self">
  73. <i class="iconfont icon-archive-fill"></i>
  74. <span>Archive2</span>
  75. </a>
  76. </li>
  77. <li class="nav-item">
  78. <a class="nav-link" href="https://b.limour.top/archives/" target="_self">
  79. <i class="iconfont icon-archive-fill"></i>
  80. <span>Archive3</span>
  81. </a>
  82. </li>
  83. <li class="nav-item">
  84. <a class="nav-link" href="https://od.limour.top/" target="_self">
  85. <i class="iconfont icon-onedrive"></i>
  86. <span>Alist</span>
  87. </a>
  88. </li>
  89. <li class="nav-item">
  90. <a class="nav-link" href="https://orcid.org/0000-0001-8897-1685" target="_self">
  91. <i class="iconfont icon-orcid"></i>
  92. <span>Orcid</span>
  93. </a>
  94. </li>
  95. <li class="nav-item">
  96. <a class="nav-link" href="/links/" target="_self">
  97. <i class="iconfont icon-link-fill"></i>
  98. <span>Links</span>
  99. </a>
  100. </li>
  101. <li class="nav-item">
  102. <a class="nav-link" href="/atom.xml" target="_self">
  103. <i class="iconfont icon-rss"></i>
  104. <span>RSS</span>
  105. </a>
  106. </li>
  107. <li class="nav-item" id="search-btn">
  108. <a class="nav-link" target="_self" href="javascript:;" data-toggle="modal" data-target="#modalSearch" aria-label="Search">
  109. <i class="iconfont icon-search"></i>
  110. </a>
  111. </li>
  112. <li class="nav-item" id="color-toggle-btn">
  113. <a class="nav-link" target="_self" href="javascript:;" aria-label="Color Toggle">
  114. <i class="iconfont icon-dark" id="color-toggle-icon"></i>
  115. </a>
  116. </li>
  117. </ul>
  118. </div>
  119. </div>
  120. </nav>
  121. <div id="banner" class="banner" parallax=true
  122. style="background: url('https://img.limour.top/2023/08/29/64ee08e108638.webp') no-repeat center center; background-size: cover;">
  123. <div class="full-bg-img">
  124. <div class="mask flex-center" style="background-color: rgba(0, 0, 0, 0.3)">
  125. <div class="banner-text text-center fade-in-up">
  126. <div class="h2">
  127. <span id="subtitle">【记录】win10平台6G显存运行Qwen-1.8B</span>
  128. </div>
  129. <div class="mt-3">
  130. <span class="post-meta mr-2">
  131. <i class="iconfont icon-author" aria-hidden="true"></i>
  132. Limour
  133. </span>
  134. <span class="post-meta">
  135. <i class="iconfont icon-date-fill" aria-hidden="true"></i>
  136. <time datetime="2024-01-01 11:11" pubdate>
  137. January 1, 2024 am
  138. </time>
  139. </span>
  140. </div>
  141. <div class="mt-1">
  142. <span class="post-meta mr-2">
  143. <i class="iconfont icon-chart"></i>
  144. 1.2k words
  145. </span>
  146. <span class="post-meta mr-2">
  147. <i class="iconfont icon-clock-fill"></i>
  148. 10 mins
  149. </span>
  150. </div>
  151. </div>
  152. </div>
  153. </div>
  154. </div>
  155. </div>
  156. </header>
  157. <main>
  158. <div class="container-fluid nopadding-x">
  159. <div class="row nomargin-x">
  160. <div class="side-col d-none d-lg-block col-lg-2">
  161. </div>
  162. <div class="col-lg-8 nopadding-x-md">
  163. <div class="container nopadding-x-md" id="board-ctn">
  164. <div id="board">
  165. <article class="post-content mx-auto">
  166. <h1 id="seo-header">【记录】win10平台6G显存运行Qwen-1.8B</h1>
  167. <p id="updated-time" class="note note-info" style="">
  168. Last updated on March 29, 2024 am
  169. </p>
  170. <div class="markdown-body">
  171. <p><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL2dnZXJnYW5vdi9sbGFtYS5jcHA=" rel="noopener external nofollow noreferrer">Llama.cpp</a> 能 CPU &amp; GPU 环境混合推理,这里记录一下在 windows10 平台上运行 <a href="https://hexo.limour.top/go/#aHR0cHM6Ly9odWdnaW5nZmFjZS5jby9Rd2VuL1F3ZW4tMV84Qg==" rel="noopener external nofollow noreferrer">Qwen-1.8B</a> 的过程,显卡是 1660Ti 。</p>
  172. <h2 id="准备模型">准备模型</h2>
  173. <ul>
  174. <li><a href="/-ji-lu--an-zhuang-conda-bing-geng-huan-qing-hua-yuan">安装conda</a></li>
  175. <li><a href="/Use-Tunnel-to-speed-up-the-connection-of-VPS">Tun模式</a>(管理员权限)</li>
  176. </ul>
  177. <figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><code class="hljs powershell">conda create <span class="hljs-literal">-n</span> llamaConvert python=<span class="hljs-number">3.10</span> git <span class="hljs-literal">-c</span> conda<span class="hljs-literal">-forge</span><br>conda activate llamaConvert<br><span class="hljs-built_in">cd</span> D:\llama<br>git clone <span class="hljs-literal">--depth</span>=<span class="hljs-number">1</span> https://github.com/ggerganov/llama.cpp.git<br><span class="hljs-built_in">cd</span> llama.cpp<br>python <span class="hljs-literal">-m</span> pip install <span class="hljs-literal">-r</span> requirements.txt<br>pip install tiktoken<br></code></pre></td></tr></table></figure>
  178. <figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><code class="hljs powershell">python <span class="hljs-literal">-c</span> <span class="hljs-string">&quot;from huggingface_hub import snapshot_download; snapshot_download(repo_id=&#x27;Qwen/Qwen-1_8B-Chat&#x27;, local_dir=r&#x27;D:\qwen&#x27;, ignore_patterns=[&#x27;*.h5&#x27;, &#x27;*.ot&#x27;, &#x27;*.msgpack&#x27;, &#x27;*.safetensors&#x27;])&quot;</span><br><span class="hljs-built_in">cd</span> D:\qwen<br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;model-00001-of-00002.safetensors&#x27;</span> <span class="hljs-string">&quot;https://huggingface.co/Qwen/Qwen-1_8B-Chat/resolve/main/model-00001-of-00002.safetensors?download=true&quot;</span><br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;model-00002-of-00002.safetensors&#x27;</span> <span class="hljs-string">&quot;https://huggingface.co/Qwen/Qwen-1_8B-Chat/resolve/main/model-00002-of-00002.safetensors?download=true&quot;</span><br></code></pre></td></tr></table></figure>
  179. <figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><code class="hljs powershell"><span class="hljs-built_in">cd</span> D:\llama\llama.cpp<br>python <span class="hljs-built_in">convert-hf</span><span class="hljs-literal">-to-gguf</span>.py D:\qwen<br><span class="hljs-comment"># Model successfully exported to &#x27;D:\qwen\ggml-model-f16.gguf&#x27;</span><br></code></pre></td></tr></table></figure>
  180. <h2 id="运行模型">运行模型</h2>
  181. <ul>
  182. <li>下载 <a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL2dnZXJnYW5vdi9sbGFtYS5jcHAvcmVsZWFzZXM=" rel="noopener external nofollow noreferrer">llama-b1732-bin-win-cublas-cu12.2.0-x64.zip</a></li>
  183. <li>提取文件到 <code>D:\llama</code></li>
  184. </ul>
  185. <figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><code class="hljs powershell">conda create <span class="hljs-literal">-n</span> llamaCpp libcublas cuda<span class="hljs-literal">-toolkit</span> git <span class="hljs-literal">-c</span> nvidia <span class="hljs-literal">-c</span> conda<span class="hljs-literal">-forge</span><br>conda activate llamaCpp<br><span class="hljs-built_in">cd</span> D:\llama ; .\main.exe <span class="hljs-comment">## 检查能否正确运行</span><br><span class="hljs-built_in">cd</span> D:\llama ; .\quantize.exe <span class="hljs-literal">--help</span> <span class="hljs-comment">## 自己决定量化方式</span><br>.\quantize.exe D:\qwen\ggml<span class="hljs-literal">-model-f16</span>.gguf .\qwen<span class="hljs-literal">-1_8-f16</span>.gguf <span class="hljs-built_in">COPY</span><br>.\server.exe <span class="hljs-literal">-m</span> .\qwen<span class="hljs-literal">-1_8-f16</span>.gguf <span class="hljs-literal">-c</span> <span class="hljs-number">4096</span> <span class="hljs-literal">--n-gpu-layers</span> <span class="hljs-number">50</span> <span class="hljs-comment">## 调节 n-gpu-layers 平衡 CPU &amp; GPU</span><br></code></pre></td></tr></table></figure>
  186. <ul>
  187. <li>访问 <code>http://127.0.0.1:8080</code> 选择 <code>Completion</code> 进行测试</li>
  188. </ul>
  189. <h2 id="微调模型">微调模型</h2>
  190. <ul>
  191. <li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9odWdnaW5nZmFjZS5jby9kYXRhc2V0cy9hNjg2ZDM4MC9oLWNvcnB1cy0yMDIz" rel="noopener external nofollow noreferrer">h-corpus数据集</a></li>
  192. <li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL1F3ZW5MTS9Rd2VuL2Jsb2IvbWFpbi9SRUFETUVfQ04ubWQjJUU1JUJFJUFFJUU4JUIwJTgz" rel="noopener external nofollow noreferrer">官方微调教程</a></li>
  193. </ul>
  194. <h2 id="附加-Yi-6B-Chat">附加 Yi-6B-Chat</h2>
  195. <p><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9odWdnaW5nZmFjZS5jby8wMS1haS9ZaS02Qi1DaGF0" rel="noopener external nofollow noreferrer">Yi-6B</a>是零一万物开源的双语语言模型,经过3T多语种语料库的训练,在语言理解、常识推理、阅读理解等方面有一定潜力。</p>
  196. <figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><code class="hljs powershell"><span class="hljs-built_in">cd</span> D:\models\<span class="hljs-number">01</span>yi<br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;model-00001-of-00003.safetensors&#x27;</span> <span class="hljs-string">&quot;https://huggingface.co/01-ai/Yi-6B-Chat/resolve/main/model-00001-of-00003.safetensors?download=true&quot;</span><br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;model-00002-of-00003.safetensors&#x27;</span> <span class="hljs-string">&quot;https://huggingface.co/01-ai/Yi-6B-Chat/resolve/main/model-00002-of-00003.safetensors?download=true&quot;</span><br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;model-00003-of-00003.safetensors&#x27;</span> https://huggingface.co/<span class="hljs-number">01</span><span class="hljs-literal">-ai</span>/Yi<span class="hljs-literal">-6B-Chat</span>/resolve/main/model<span class="hljs-literal">-00003-of-00003</span>.safetensors?download=true<br>conda activate llamaConvert<br>python <span class="hljs-literal">-c</span> <span class="hljs-string">&quot;from huggingface_hub import snapshot_download; snapshot_download(repo_id=&#x27;01-ai/Yi-6B-Chat&#x27;, local_dir=r&#x27;D:\models\01yi&#x27;, ignore_patterns=[&#x27;*.h5&#x27;, &#x27;*.ot&#x27;, &#x27;*.msgpack&#x27;, &#x27;*.safetensors&#x27;])&quot;</span><br></code></pre></td></tr></table></figure>
  197. <figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br></pre></td><td class="code"><pre><code class="hljs powershell">conda activate llamaConvert<br><span class="hljs-built_in">cd</span> D:\llama\llama.cpp<br>python convert.py D:\models\<span class="hljs-number">01</span>yi<br><span class="hljs-comment"># Wrote D:\models\01yi\ggml-model-f16.gguf</span><br>conda activate llamaCpp<br><span class="hljs-built_in">cd</span> D:\llama ; .\quantize.exe <span class="hljs-literal">--help</span><br>.\quantize.exe D:\models\<span class="hljs-number">01</span>yi\ggml<span class="hljs-literal">-model-f16</span>.gguf .\<span class="hljs-number">01</span>yi<span class="hljs-literal">-6b-Q4_K_M</span>.gguf Q4_K_M<br>.\server.exe <span class="hljs-literal">-m</span> .\<span class="hljs-number">01</span>yi<span class="hljs-literal">-6b-Q4_K_M</span>.gguf <span class="hljs-literal">-c</span> <span class="hljs-number">4096</span> <span class="hljs-literal">--n-gpu-layers</span> <span class="hljs-number">50</span><br></code></pre></td></tr></table></figure>
  198. <h2 id="附加-百川2">附加 百川2</h2>
  199. <figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><code class="hljs powershell"><span class="hljs-built_in">cd</span> D:\models\baichuan<br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;pytorch_model.bin&#x27;</span> <span class="hljs-string">&quot;https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat/resolve/main/pytorch_model.bin?download=true&quot;</span><br>conda activate llamaConvert<br>python <span class="hljs-literal">-c</span> <span class="hljs-string">&quot;from huggingface_hub import snapshot_download; snapshot_download(repo_id=&#x27;baichuan-inc/Baichuan2-7B-Chat&#x27;, local_dir=r&#x27;D:\models\baichuan&#x27;, ignore_patterns=[&#x27;*.h5&#x27;, &#x27;*.bin&#x27;, &#x27;*.ot&#x27;, &#x27;*.msgpack&#x27;, &#x27;*.safetensors&#x27;])&quot;</span><br><span class="hljs-built_in">cd</span> D:\llama\llama.cpp<br>python convert.py D:\models\baichuan<br><span class="hljs-comment"># Wrote D:\models\baichuan\ggml-model-f16.gguf</span><br>conda activate llamaCpp<br><span class="hljs-built_in">cd</span> D:\llama ; .\quantize.exe <span class="hljs-literal">--help</span><br>.\quantize.exe D:\models\baichuan\ggml<span class="hljs-literal">-model-f16</span>.gguf .\baichuan<span class="hljs-literal">-7b-Q3_K_M</span>.gguf Q3_K_M<br>.\server.exe <span class="hljs-literal">-m</span> .\baichuan<span class="hljs-literal">-7b-Q3_K_M</span>.gguf <span class="hljs-literal">-c</span> <span class="hljs-number">2048</span> <span class="hljs-literal">--n-gpu-layers</span> <span class="hljs-number">30</span><br></code></pre></td></tr></table></figure>
  200. <h2 id="附加-tigerbot-13b">附加 tigerbot-13b</h2>
  201. <p><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9odWdnaW5nZmFjZS5jby9UaWdlclJlc2VhcmNoL3RpZ2VyYm90LTEzYi1jaGF0LXY1" rel="noopener external nofollow noreferrer">tigerbot-13b</a> 在 <a href="https://hexo.limour.top/go/#aHR0cHM6Ly9naXRodWIuY29tL2plaW5sZWUxOTkxL2NoaW5lc2UtbGxtLWJlbmNobWFyaw==" rel="noopener external nofollow noreferrer">chinese-llm-benchmark</a> 上排名靠前。</p>
  202. <figure class="highlight powershell"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br></pre></td><td class="code"><pre><code class="hljs powershell"><span class="hljs-built_in">cd</span> D:\models\tigerbot<br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;pytorch_model-00001-of-00003.bin&#x27;</span> <span class="hljs-literal">--max-download-limit</span>=<span class="hljs-number">6</span>M <span class="hljs-string">&quot;https://huggingface.co/TigerResearch/tigerbot-13b-chat-v5/resolve/main/pytorch_model-00001-of-00003.bin?download=true&quot;</span><br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;pytorch_model-00002-of-00003.bin&#x27;</span> <span class="hljs-literal">--max-download-limit</span>=<span class="hljs-number">6</span>M <span class="hljs-string">&quot;https://huggingface.co/TigerResearch/tigerbot-13b-chat-v5/resolve/main/pytorch_model-00002-of-00003.bin?download=true&quot;</span><br>D:\aria2\aria2c.exe <span class="hljs-literal">--all-proxy</span>=<span class="hljs-string">&#x27;http://127.0.0.1:7890&#x27;</span> <span class="hljs-literal">-o</span> <span class="hljs-string">&#x27;pytorch_model-00003-of-00003.bin&#x27;</span> <span class="hljs-literal">--max-download-limit</span>=<span class="hljs-number">6</span>M <span class="hljs-string">&quot;https://huggingface.co/TigerResearch/tigerbot-13b-chat-v5/resolve/main/pytorch_model-00003-of-00003.bin?download=true&quot;</span><br>conda activate llamaConvert<br>python <span class="hljs-literal">-c</span> <span class="hljs-string">&quot;from huggingface_hub import snapshot_download; snapshot_download(repo_id=&#x27;TigerResearch/tigerbot-13b-chat-v5&#x27;, local_dir=r&#x27;D:\models\tigerbot&#x27;, ignore_patterns=[&#x27;*.h5&#x27;, &#x27;*.bin&#x27;, &#x27;*.ot&#x27;, &#x27;*.msgpack&#x27;, &#x27;*.safetensors&#x27;])&quot;</span><br><span class="hljs-built_in">cd</span> D:\llama\llama.cpp<br>python convert.py D:\models\tigerbot <span class="hljs-literal">--padvocab</span><br><span class="hljs-built_in">cd</span> D:\llama ; .\quantize.exe <span class="hljs-literal">--help</span><br>.\quantize.exe D:\models\tigerbot\ggml<span class="hljs-literal">-model-f16</span>.gguf D:\models\tigerbot<span class="hljs-literal">-13B-Chat-Q4_K_M</span>.gguf Q4_K_M<br>.\server.exe <span class="hljs-literal">-m</span> D:\models\tigerbot<span class="hljs-literal">-13B-Chat-Q4_K_M</span>.gguf <span class="hljs-literal">-c</span> <span class="hljs-number">4096</span><br></code></pre></td></tr></table></figure>
  203. <div class="note note-info">
  204. <p>感觉 6G 显存下,比较好用的是 Yi-6B-Chat-Q4_K_M<br>tigerbot-13b 在 R5 5600H 上推理速度 4.6 tokens/s,CPU 使用率 60%,频率 3.5GHz,应该是内存带宽瓶颈</p>
  205. </div>
  206. <h2 id="附加-在-Colab-上量化">附加 在 Colab 上量化</h2>
  207. <ul>
  208. <li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9jb2xhYi5yZXNlYXJjaC5nb29nbGUuY29tL2RyaXZlLzFKVDNYRmpEN0NUUkI5N3B1M1FwZUd1eldBMXlZRUFNNz91c3A9c2hhcmluZw==" rel="noopener external nofollow noreferrer">llm2gguf.ipynb</a></li>
  209. <li><a href="https://hexo.limour.top/go/#aHR0cHM6Ly9odWdnaW5nZmFjZS5jby9MaW1vdXIvQ2F1c2FsTE0tMTRCLUdHVUY=" rel="noopener external nofollow noreferrer">量化后的结果</a></li>
  210. </ul>
  211. <h3 id="安装-llama-cpp">安装 llama.cpp</h3>
  212. <figure class="highlight ipython"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><code class="hljs ipython">!git clone --depth=<span class="hljs-number">1</span> https://github.com/ggerganov/llama.cpp.git<br>%cd /content/llama.cpp<br>!LLAMA_CUDA=<span class="hljs-number">1</span> make -j<br></code></pre></td></tr></table></figure>
  213. <h3 id="计算-imatrix">计算 imatrix</h3>
  214. <figure class="highlight ipython"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><code class="hljs ipython">%cd /content<br>!wget -O transient.txt.gz https://huggingface.co/datasets/Limour/b-corpus/resolve/main/<span class="hljs-number">00</span>-preview/<span class="hljs-number">00</span>-transient.txt.gz?download=true<br>!gunzip transient.txt.gz<br>!mkdir -p /content/CausalLM-14B-GGUF<br>!wget -O /content/CausalLM-14B-GGUF/causallm_14b.Q8_0.gguf https://huggingface.co/TheBloke/CausalLM-14B-GGUF/resolve/main/causallm_14b.Q8_0.gguf?download=true<br>!/content/llama.cpp/imatrix -m /content/CausalLM-14B-GGUF/causallm_14b.Q8_0.gguf -f /content/transient.txt -ngl <span class="hljs-number">36</span><br></code></pre></td></tr></table></figure>
  215. <h3 id="登录拥抱脸">登录拥抱脸</h3>
  216. <figure class="highlight ipython"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><code class="hljs ipython"><span class="hljs-keyword">from</span> google.colab <span class="hljs-keyword">import</span> userdata<br><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> login<br><span class="hljs-comment"># login(token=os.environ.get(&quot;HF_TOKEN&quot;), write_permission=True)</span><br>login(token=userdata.get(<span class="hljs-string">&#x27;HF_TOKEN&#x27;</span>), write_permission=<span class="hljs-literal">True</span>)<br><span class="hljs-comment"># from huggingface_hub import notebook_login</span><br><span class="hljs-comment"># notebook_login()</span><br></code></pre></td></tr></table></figure>
  217. <h3 id="跳过-转换模型">(跳过) 转换模型</h3>
  218. <figure class="highlight ipython"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><code class="hljs ipython">%cd llama.cpp<br>!python -m pip install -r requirements.txt<br>!pip install tiktoken<br><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> snapshot_download<br>!mkdir -p ~/CausalLM<br>snapshot_download(repo_id=<span class="hljs-string">&#x27;CausalLM/7B&#x27;</span>, local_dir=<span class="hljs-string">r&#x27;/content/CausalLM&#x27;</span>, ignore_patterns=[<span class="hljs-string">&#x27;*.h5&#x27;</span>, <span class="hljs-string">&#x27;*.ot&#x27;</span>, <span class="hljs-string">&#x27;*.msgpack&#x27;</span>, <span class="hljs-string">&#x27;*.safetensors&#x27;</span>])<br>!python convert.py --vocab-<span class="hljs-built_in">type</span> bpe --pad-vocab --outtype f16 /content/CausalLM <br></code></pre></td></tr></table></figure>
  219. <h3 id="量化模型">量化模型</h3>
  220. <figure class="highlight ipython"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><code class="hljs ipython">!/content/llama.cpp/quantize --allow-requantize --imatrix /content/imatrix.dat /content/CausalLM-14B-GGUF/causallm_14b.Q8_0.gguf /content/CausalLM-14B-GGUF/causallm_14b.IQ3_XS.gguf IQ3_XS<br></code></pre></td></tr></table></figure>
  221. <h3 id="上传模型">上传模型</h3>
  222. <figure class="highlight ipython"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><code class="hljs ipython"><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> HfApi<br>api = HfApi()<br>api.upload_file(<br> path_or_fileobj=<span class="hljs-string">&quot;/content/CausalLM-14B-GGUF/causallm_14b.IQ3_XS.gguf&quot;</span>,<br> path_in_repo=<span class="hljs-string">&quot;causallm_14b.IQ3_XS.gguf&quot;</span>,<br> repo_id=<span class="hljs-string">&quot;Limour/CausalLM-14B-GGUF&quot;</span><br>)<br></code></pre></td></tr></table></figure>
  223. </div>
  224. <hr/>
  225. <div>
  226. <div class="post-metas my-3">
  227. <div class="post-meta">
  228. <i class="iconfont icon-tags"></i>
  229. <a href="/tags/llama/" class="print-no-link">#llama</a>
  230. </div>
  231. </div>
  232. <div class="license-box my-3">
  233. <div class="license-title">
  234. <div>【记录】win10平台6G显存运行Qwen-1.8B</div>
  235. <div>https://hexo.limour.top/Running-Qwen-on-the-Win10-platform-with-6GB-of-video-memory</div>
  236. </div>
  237. <div class="license-meta">
  238. <div class="license-meta-item">
  239. <div>Author</div>
  240. <div>Limour</div>
  241. </div>
  242. <div class="license-meta-item license-meta-date">
  243. <div>Posted on</div>
  244. <div>January 1, 2024</div>
  245. </div>
  246. <div class="license-meta-item license-meta-date">
  247. <div>Updated on</div>
  248. <div>March 29, 2024</div>
  249. </div>
  250. <div class="license-meta-item">
  251. <div>Licensed under</div>
  252. <div>
  253. <a class="print-no-link" target="_blank" href="https://creativecommons.org/licenses/by-nc-sa/4.0/">
  254. <span class="hint--top hint--rounded" aria-label="BY - Attribution">
  255. <i class="iconfont icon-by"></i>
  256. </span>
  257. </a>
  258. <a class="print-no-link" target="_blank" href="https://creativecommons.org/licenses/by-nc-sa/4.0/">
  259. <span class="hint--top hint--rounded" aria-label="NC - Non-commercial">
  260. <i class="iconfont icon-nc"></i>
  261. </span>
  262. </a>
  263. <a class="print-no-link" target="_blank" href="https://creativecommons.org/licenses/by-nc-sa/4.0/">
  264. <span class="hint--top hint--rounded" aria-label="SA - Share-alike">
  265. <i class="iconfont icon-sa"></i>
  266. </span>
  267. </a>
  268. </div>
  269. </div>
  270. </div>
  271. <div class="license-icon iconfont"></div>
  272. </div>
  273. <div class="post-prevnext my-3">
  274. <article class="post-prev col-6">
  275. <a href="/Azure-AI-prevents-reverse-wool-shearing" title="【避坑】Azure AI 避免反向薅羊毛">
  276. <i class="iconfont icon-arrowleft"></i>
  277. <span class="hidden-mobile">【避坑】Azure AI 避免反向薅羊毛</span>
  278. <span class="visible-mobile">Previous</span>
  279. </a>
  280. </article>
  281. <article class="post-next col-6">
  282. <a href="/Lightweight-personal-navigation-page-Flare" title="【记录】轻量个人导航页面 Flare">
  283. <span class="hidden-mobile">【记录】轻量个人导航页面 Flare</span>
  284. <span class="visible-mobile">Next</span>
  285. <i class="iconfont icon-arrowright"></i>
  286. </a>
  287. </article>
  288. </div>
  289. </div>
  290. <article id="comments" lazyload>
  291. <div id="waline"></div>
  292. <script type="text/javascript">
  293. Fluid.utils.loadComments('#waline', function() {
  294. Fluid.utils.createCssLink('https://cdn.staticfile.org/waline/2.15.5/waline.min.css')
  295. Fluid.utils.createScript('https://cdn.staticfile.org/waline/2.15.5/waline.min.js', function() {
  296. var options = Object.assign(
  297. {"serverURL":"https://comments.limour.top","path":"window.location.pathname","meta":["nick","mail","link"],"requiredMeta":["nick"],"lang":"zh-CN","emoji":["https://jscdn.limour.top/gh/walinejs/emojis/weibo"],"dark":"html[data-user-color-scheme=\"dark\"]","wordLimit":0,"pageSize":10},
  298. {
  299. el: '#waline',
  300. path: window.location.pathname
  301. }
  302. )
  303. Waline.init(options);
  304. Fluid.utils.waitElementVisible('#waline .vcontent', () => {
  305. var imgSelector = '#waline .vcontent img:not(.vemoji)';
  306. Fluid.plugins.imageCaption(imgSelector);
  307. Fluid.plugins.fancyBox(imgSelector);
  308. })
  309. });
  310. });
  311. </script>
  312. <noscript>Please enable JavaScript to view the comments</noscript>
  313. </article>
  314. </article>
  315. </div>
  316. </div>
  317. </div>
  318. <div class="side-col d-none d-lg-block col-lg-2">
  319. <aside class="sidebar" style="margin-left: -1rem">
  320. <div id="toc">
  321. <p class="toc-header">
  322. <i class="iconfont icon-list"></i>
  323. <span>Table of Contents</span>
  324. </p>
  325. <div class="toc-body" id="toc-body"></div>
  326. </div>
  327. </aside>
  328. </div>
  329. </div>
  330. </div>
  331. <a id="scroll-top-button" aria-label="TOP" href="#" role="button">
  332. <i class="iconfont icon-arrowup" aria-hidden="true"></i>
  333. </a>
  334. <div class="modal fade" id="modalSearch" tabindex="-1" role="dialog" aria-labelledby="ModalLabel"
  335. aria-hidden="true">
  336. <div class="modal-dialog modal-dialog-scrollable modal-lg" role="document">
  337. <div class="modal-content">
  338. <div class="modal-header text-center">
  339. <h4 class="modal-title w-100 font-weight-bold">Search</h4>
  340. <button type="button" id="local-search-close" class="close" data-dismiss="modal" aria-label="Close">
  341. <span aria-hidden="true">&times;</span>
  342. </button>
  343. </div>
  344. <div class="modal-body mx-3">
  345. <div class="md-form mb-5">
  346. <input type="text" id="local-search-input" class="form-control validate">
  347. <label data-error="x" data-success="v" for="local-search-input">Keyword</label>
  348. </div>
  349. <div class="list-group" id="local-search-result"></div>
  350. </div>
  351. </div>
  352. </div>
  353. </div>
  354. </main>
  355. <footer>
  356. <div class="footer-inner">
  357. <div class="footer-content">
  358. <a target="_blank" rel="nofollow noopener" href="http://www.beian.gov.cn/portal/registerSystemInfo?recordcode=43130202000203"><img src="https://img.limour.top/2023/08/27/64eadeb81d6a0.webp" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload>湘公网安备43130202000203号 </a> <a target="_blank" rel="nofollow noopener" href="https://beian.miit.gov.cn/">湘ICP备20008299号 </a> <a target="_blank" rel="nofollow noopener" href="https://icp.gov.moe/?keyword=20210128">萌ICP备20210128号</a> <br> <a href="https://www.foreverblog.cn/" target="_blank"> <img src="https://img.foreverblog.cn/logo_en_default.png" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload alt="" style="width:auto;height:24px"> </a> <br> <a href="https://hexo.io" target="_blank" rel="nofollow noopener"><span>Hexo</span></a> <i class="iconfont icon-love"></i> <a href="https://github.com/fluid-dev/hexo-theme-fluid" target="_blank" rel="nofollow noopener"><span>Fluid</span></a> <i class="iconfont icon-love"></i> <a href="https://github.com/limour-blog/limour-blog.github.io" target="_blank" rel="nofollow noopener"><span>SRC</span></a> <i class="iconfont icon-love"></i> <a href="https://web.archive.org/web/20231130095837/https://effectiveacceleration.tech/" target="_blank" rel="nofollow noopener"><span>e/Acc</span></a>
  359. </div>
  360. </div>
  361. </footer>
  362. <!-- Scripts -->
  363. <script src="https://jscdn.limour.top/npm/nprogress@0.2.0/nprogress.min.js" ></script>
  364. <link rel="stylesheet" href="https://jscdn.limour.top/npm/nprogress@0.2.0/nprogress.min.css" />
  365. <script>
  366. NProgress.configure({"showSpinner":false,"trickleSpeed":100})
  367. NProgress.start()
  368. window.addEventListener('load', function() {
  369. NProgress.done();
  370. })
  371. </script>
  372. <script src="https://jscdn.limour.top/npm/jquery@3.6.4/dist/jquery.min.js" ></script>
  373. <script src="https://jscdn.limour.top/npm/bootstrap@4.6.1/dist/js/bootstrap.min.js" ></script>
  374. <script src="/js/events.js" ></script>
  375. <script src="/js/plugins.js" ></script>
  376. <script src="/js/img-lazyload.js" ></script>
  377. <script>
  378. Fluid.utils.createScript('https://jscdn.limour.top/npm/tocbot@4.20.1/dist/tocbot.min.js', function() {
  379. var toc = jQuery('#toc');
  380. if (toc.length === 0 || !window.tocbot) { return; }
  381. var boardCtn = jQuery('#board-ctn');
  382. var boardTop = boardCtn.offset().top;
  383. window.tocbot.init(Object.assign({
  384. tocSelector : '#toc-body',
  385. contentSelector : '.markdown-body',
  386. linkClass : 'tocbot-link',
  387. activeLinkClass : 'tocbot-active-link',
  388. listClass : 'tocbot-list',
  389. isCollapsedClass: 'tocbot-is-collapsed',
  390. collapsibleClass: 'tocbot-is-collapsible',
  391. scrollSmooth : true,
  392. includeTitleTags: true,
  393. headingsOffset : -boardTop,
  394. }, CONFIG.toc));
  395. if (toc.find('.toc-list-item').length > 0) {
  396. toc.css('visibility', 'visible');
  397. }
  398. Fluid.events.registerRefreshCallback(function() {
  399. if ('tocbot' in window) {
  400. tocbot.refresh();
  401. var toc = jQuery('#toc');
  402. if (toc.length === 0 || !tocbot) {
  403. return;
  404. }
  405. if (toc.find('.toc-list-item').length > 0) {
  406. toc.css('visibility', 'visible');
  407. }
  408. }
  409. });
  410. });
  411. </script>
  412. <script src=https://lib.baomitu.com/clipboard.js/2.0.11/clipboard.min.js></script>
  413. <script>Fluid.plugins.codeWidget();</script>
  414. <script>
  415. Fluid.utils.createScript('https://jscdn.limour.top/npm/anchor-js@4.3.1/anchor.min.js', function() {
  416. window.anchors.options = {
  417. placement: CONFIG.anchorjs.placement,
  418. visible : CONFIG.anchorjs.visible
  419. };
  420. if (CONFIG.anchorjs.icon) {
  421. window.anchors.options.icon = CONFIG.anchorjs.icon;
  422. }
  423. var el = (CONFIG.anchorjs.element || 'h1,h2,h3,h4,h5,h6').split(',');
  424. var res = [];
  425. for (var item of el) {
  426. res.push('.markdown-body > ' + item.trim());
  427. }
  428. if (CONFIG.anchorjs.placement === 'left') {
  429. window.anchors.options.class = 'anchorjs-link-left';
  430. }
  431. window.anchors.add(res.join(', '));
  432. Fluid.events.registerRefreshCallback(function() {
  433. if ('anchors' in window) {
  434. anchors.removeAll();
  435. var el = (CONFIG.anchorjs.element || 'h1,h2,h3,h4,h5,h6').split(',');
  436. var res = [];
  437. for (var item of el) {
  438. res.push('.markdown-body > ' + item.trim());
  439. }
  440. if (CONFIG.anchorjs.placement === 'left') {
  441. anchors.options.class = 'anchorjs-link-left';
  442. }
  443. anchors.add(res.join(', '));
  444. }
  445. });
  446. });
  447. </script>
  448. <script>Fluid.plugins.imageCaption();</script>
  449. <script src="/js/local-search.js" ></script>
  450. <!-- 主题的启动项,将它保持在最底部 -->
  451. <!-- the boot of the theme, keep it at the bottom -->
  452. <script src="/js/boot.js" ></script>
  453. <noscript>
  454. <div class="noscript-warning">Blog works best with JavaScript enabled</div>
  455. </noscript>
  456. <!-- hexo injector body_end start -->
  457. <script defer src="/theme-inject/timeliness.js"></script>
  458. <!-- hexo injector body_end end --></body>
  459. </html>