309.html 62 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701
  1. <!DOCTYPE html>
  2. <html lang="en" data-default-color-scheme=auto>
  3. <head><!-- hexo injector head_begin start -->
  4. <script async src="https://analytics.umami.is/script.js" data-website-id="e59ec28a-c9a7-4104-9e62-a9f7eb3fac0b"></script>
  5. <!-- hexo injector head_begin end -->
  6. <meta charset="UTF-8">
  7. <link rel="apple-touch-icon" sizes="76x76" href="https://img.limour.top/2023/08/29/64ee07361815a.webp">
  8. <link rel="icon" href="https://img.limour.top/2023/08/29/64ee07361815a.webp">
  9. <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=5.0, shrink-to-fit=no">
  10. <meta http-equiv="x-ua-compatible" content="ie=edge">
  11. <meta name="theme-color" content="#2f4154">
  12. <meta name="author" content="Limour">
  13. <meta name="keywords" content="">
  14. <meta name="description" content="jTessBoxEditorFX-2.3.0 预训练数据 1234567#For CentOS 7 run the following as root to install Tesseract with English language traineddata:yum -y install yum-utilsyum-config-manager --add-repo https:&#x2F;&#x2F;down">
  15. <meta property="og:type" content="article">
  16. <meta property="og:title" content="LSTM应用于验证码识别">
  17. <meta property="og:url" content="https://b.limour.top/309.html">
  18. <meta property="og:site_name" content="Limour&#39;s Blog">
  19. <meta property="og:description" content="jTessBoxEditorFX-2.3.0 预训练数据 1234567#For CentOS 7 run the following as root to install Tesseract with English language traineddata:yum -y install yum-utilsyum-config-manager --add-repo https:&#x2F;&#x2F;down">
  20. <meta property="og:locale" content="en_US">
  21. <meta property="og:image" content="https://img-cdn.limour.top/blog_wp/2020/07/%E5%BE%AE%E4%BF%A1%E5%9B%BE%E7%89%87_20200710090821.png">
  22. <meta property="article:published_time" content="2020-07-11T12:12:46.000Z">
  23. <meta property="article:modified_time" content="2023-09-01T13:32:00.437Z">
  24. <meta property="article:author" content="Limour">
  25. <meta property="article:tag" content="Python">
  26. <meta property="article:tag" content="LSTM">
  27. <meta property="article:tag" content="tesseract">
  28. <meta property="article:tag" content="验证码">
  29. <meta name="twitter:card" content="summary_large_image">
  30. <meta name="twitter:image" content="https://img-cdn.limour.top/blog_wp/2020/07/%E5%BE%AE%E4%BF%A1%E5%9B%BE%E7%89%87_20200710090821.png">
  31. <title>LSTM应用于验证码识别 - Limour&#39;s Blog</title>
  32. <link rel="stylesheet" href="https://jscdn.limour.top/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" />
  33. <link rel="stylesheet" href="https://jscdn.limour.top/npm/github-markdown-css@4.0.0/github-markdown.min.css" />
  34. <link rel="stylesheet" href="https://jscdn.limour.top/npm/hint.css@2.7.0/hint.min.css" />
  35. <link rel="stylesheet" href="https://lib.baomitu.com/fancybox/3.5.7/jquery.fancybox.min.css" />
  36. <!-- 主题依赖的图标库,不要自行修改 -->
  37. <!-- Do not modify the link that theme dependent icons -->
  38. <link rel="stylesheet" href="//at.alicdn.com/t/font_1749284_hj8rtnfg7um.css">
  39. <link rel="stylesheet" href="//at.alicdn.com/t/font_1736178_lbnruvf0jn.css">
  40. <link rel="stylesheet" href="/css/main.css" />
  41. <link id="highlight-css" rel="stylesheet" href="/css/highlight.css" />
  42. <link id="highlight-css-dark" rel="stylesheet" href="/css/highlight-dark.css" />
  43. <link rel="stylesheet" href="/theme-inject/custom.css">
  44. <script id="fluid-configs">
  45. var Fluid = window.Fluid || {};
  46. Fluid.ctx = Object.assign({}, Fluid.ctx)
  47. var CONFIG = {"hostname":"b.limour.top","root":"/","version":"1.9.5-a","typing":{"enable":true,"typeSpeed":70,"cursorChar":"_","loop":false,"scope":[]},"anchorjs":{"enable":true,"element":"h1,h2,h3,h4,h5,h6","placement":"left","visible":"hover","icon":"§"},"progressbar":{"enable":true,"height_px":3,"color":"#29d","options":{"showSpinner":false,"trickleSpeed":100}},"code_language":{"enable":true,"default":"TEXT"},"copy_btn":true,"image_caption":{"enable":true},"image_zoom":{"enable":true,"img_url_replace":["",""]},"toc":{"enable":true,"placement":"right","headingSelector":"h1,h2,h3,h4,h5,h6","collapseDepth":0},"lazyload":{"enable":true,"loading_img":"https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg","onlypost":false,"offset_factor":2},"web_analytics":{"enable":false,"follow_dnt":true,"baidu":null,"google":{"measurement_id":null},"tencent":{"sid":null,"cid":null},"woyaola":null,"cnzz":null,"leancloud":{"app_id":null,"app_key":null,"server_url":null,"path":"window.location.pathname","ignore_local":false}},"search_path":"/local-search.xml","include_content_in_search":true};
  48. if (CONFIG.web_analytics.follow_dnt) {
  49. var dntVal = navigator.doNotTrack || window.doNotTrack || navigator.msDoNotTrack;
  50. Fluid.ctx.dnt = dntVal && (dntVal.startsWith('1') || dntVal.startsWith('yes') || dntVal.startsWith('on'));
  51. }
  52. </script>
  53. <script src="/js/utils.js" ></script>
  54. <script src="/js/color-schema.js" ></script>
  55. <meta name="generator" content="Hexo 6.3.0"></head>
  56. <body>
  57. <header>
  58. <div class="header-inner" style="height: 70vh;">
  59. <nav id="navbar" class="navbar fixed-top navbar-expand-lg navbar-dark scrolling-navbar">
  60. <div class="container">
  61. <a class="navbar-brand" href="/">
  62. <strong>Limour&#39;s Blog</strong>
  63. </a>
  64. <button id="navbar-toggler-btn" class="navbar-toggler" type="button" data-toggle="collapse"
  65. data-target="#navbarSupportedContent"
  66. aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
  67. <div class="animated-icon"><span></span><span></span><span></span></div>
  68. </button>
  69. <!-- Collapsible content -->
  70. <div class="collapse navbar-collapse" id="navbarSupportedContent">
  71. <ul class="navbar-nav ml-auto text-center">
  72. <li class="nav-item">
  73. <a class="nav-link" href="/">
  74. <i class="iconfont icon-home-fill"></i>
  75. <span>Home</span>
  76. </a>
  77. </li>
  78. <li class="nav-item">
  79. <a class="nav-link" href="/archives/">
  80. <i class="iconfont icon-archive-fill"></i>
  81. <span>Archives</span>
  82. </a>
  83. </li>
  84. <li class="nav-item" id="search-btn">
  85. <a class="nav-link" target="_self" href="javascript:;" data-toggle="modal" data-target="#modalSearch" aria-label="Search">
  86. <i class="iconfont icon-search"></i>
  87. </a>
  88. </li>
  89. <li class="nav-item" id="color-toggle-btn">
  90. <a class="nav-link" target="_self" href="javascript:;" aria-label="Color Toggle">
  91. <i class="iconfont icon-dark" id="color-toggle-icon"></i>
  92. </a>
  93. </li>
  94. </ul>
  95. </div>
  96. </div>
  97. </nav>
  98. <div id="banner" class="banner" parallax=true
  99. style="background: url('https://img.limour.top/2023/08/29/64ee08e108638.webp') no-repeat center center; background-size: cover;">
  100. <div class="full-bg-img">
  101. <div class="mask flex-center" style="background-color: rgba(0, 0, 0, 0.3)">
  102. <div class="banner-text text-center fade-in-up">
  103. <div class="h2">
  104. <span id="subtitle" data-typed-text="LSTM应用于验证码识别"></span>
  105. </div>
  106. <div class="mt-3">
  107. <span class="post-meta">
  108. <i class="iconfont icon-date-fill" aria-hidden="true"></i>
  109. <time datetime="2020-07-11 20:12" pubdate>
  110. July 11, 2020 pm
  111. </time>
  112. </span>
  113. </div>
  114. <div class="mt-1">
  115. <span class="post-meta mr-2">
  116. <i class="iconfont icon-chart"></i>
  117. 3.8k words
  118. </span>
  119. <span class="post-meta mr-2">
  120. <i class="iconfont icon-clock-fill"></i>
  121. 32 mins
  122. </span>
  123. </div>
  124. </div>
  125. </div>
  126. </div>
  127. </div>
  128. </div>
  129. </header>
  130. <main>
  131. <div class="container-fluid nopadding-x">
  132. <div class="row nomargin-x">
  133. <div class="side-col d-none d-lg-block col-lg-2">
  134. </div>
  135. <div class="col-lg-8 nopadding-x-md">
  136. <div class="container nopadding-x-md" id="board-ctn">
  137. <div id="board">
  138. <article class="post-content mx-auto">
  139. <h1 id="seo-header">LSTM应用于验证码识别</h1>
  140. <div class="markdown-body">
  141. <p><a target="_blank" rel="noopener" href="https://limour.lanzous.com/iTNKZeg7tja"><img src="" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload></a></p>
  142. <p> <a target="_blank" rel="noopener" href="https://sourceforge.net/projects/vietocr/files/jTessBoxEditor/">jTessBoxEditorFX-2.3.0</a></p>
  143. <p><a target="_blank" rel="noopener" href="https://limour.lanzous.com/iLQ25eh2o1i"><img src="" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload></a></p>
  144. <p><a target="_blank" rel="noopener" href="https://tesseract-ocr.github.io/tessdoc/Data-Files">预训练数据</a></p>
  145. <figure class="highlight awk"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><code class="hljs awk"><span class="hljs-comment">#For CentOS 7 run the following as root to install Tesseract with English language traineddata:</span><br>yum -y install yum-utils<br>yum-config-manager --add-repo https:<span class="hljs-regexp">//</span>download.opensuse.org<span class="hljs-regexp">/repositories/</span>home:<span class="hljs-regexp">/Alexander_Pozdnyakov/</span>CentOS_7/<br>sudo rpm --import https:<span class="hljs-regexp">//</span>build.opensuse.org<span class="hljs-regexp">/projects/</span>home:Alexander_Pozdnyakov/public_key<br>yum update<br>yum install tesseract <br>yum install tesseract-langpack-eng<br></code></pre></td></tr></table></figure>
  146. <figure class="highlight livescript"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><code class="hljs livescript"><span class="hljs-comment">#For Win10 to install Tesseract:</span><br><span class="hljs-number">1.</span>下载解压 jTessBoxEditor<br><span class="hljs-number">2.</span>将 &#123;解压目录&#125;<span class="hljs-string">\jTessBoxEditorFX\tesseract-ocr</span> 添加到 Path<br><span class="hljs-number">3.</span>下载解压预训练数据到当前目录<br><span class="hljs-number">4.</span>新建环境变量 TESSDATA_PREFIX ,值为 &#123;解压目录&#125;<span class="hljs-string">\tessdata</span><br></code></pre></td></tr></table></figure>
  147. <p><img src="https://img-cdn.limour.top/blog_wp/2020/07/%E5%BE%AE%E4%BF%A1%E5%9B%BE%E7%89%87_20200710090821.png" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload></p>
  148. <p>终端中运行命令 tesseract –help-extra 显示如上信息表示安装成功</p>
  149. <p><a target="_blank" rel="noopener" href="https://limour.lanzous.com/icIheeh321c"><img src="" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload></a></p>
  150. <p>自行获取训练所需的验证码</p>
  151. <p>按照<a target="_blank" rel="noopener" href="https://blog.csdn.net/qq_40147863/article/details/82290015">肖鹏伟的《Tesseract-OCR-04-使用 jTessBoxEditor提高文字识别准确率》</a>中的方法生成<code>fdu.ufont.exp0.tif</code>文件</p>
  152. <figure class="highlight stylus"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs stylus">#通过此命令生成fdu<span class="hljs-selector-class">.ufont</span><span class="hljs-selector-class">.exp0</span>.box文件<br>tesseract fdu<span class="hljs-selector-class">.ufont</span><span class="hljs-selector-class">.exp0</span><span class="hljs-selector-class">.tif</span> fdu<span class="hljs-selector-class">.ufont</span><span class="hljs-selector-class">.exp0</span> -l eng <span class="hljs-attr">--psm</span> <span class="hljs-number">8</span> <span class="hljs-attr">--oem</span> <span class="hljs-number">0</span> nobatch box<span class="hljs-selector-class">.train</span> -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzAT-<br></code></pre></td></tr></table></figure>
  153. <p>继续按照肖鹏伟的方法修正<code>.box</code>文件</p>
  154. <figure class="highlight stylus"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><code class="hljs stylus">#将fdu<span class="hljs-selector-class">.ufont</span><span class="hljs-selector-class">.exp0</span>.tif文件、修正后的fdu<span class="hljs-selector-class">.ufont</span><span class="hljs-selector-class">.exp0</span>.box文件一起保存到独立的同一新文件夹下,同目录下运行此.ps1文件即可得到fdu<span class="hljs-selector-class">.traineddata</span><br>tesseract fdu<span class="hljs-selector-class">.ufont</span><span class="hljs-selector-class">.exp0</span><span class="hljs-selector-class">.tif</span> fdu<span class="hljs-selector-class">.ufont</span><span class="hljs-selector-class">.exp0</span> -l enb <span class="hljs-attr">--psm</span> <span class="hljs-number">8</span> lstm<span class="hljs-selector-class">.train</span><br>combine_tessdata -e <span class="hljs-string">&quot;$env:TESSDATA_PREFIX\enb.traineddata&quot;</span> enb<span class="hljs-selector-class">.lstm</span><br><span class="hljs-variable">$PSroot</span> = Get-ChildItem<br><span class="hljs-variable">$PSroot</span> = Split-Path <span class="hljs-variable">$PSroot</span><span class="hljs-selector-class">.Get</span>(<span class="hljs-number">0</span>)<span class="hljs-selector-class">.FullName</span><br><span class="hljs-variable">$fso</span>=New-Object -ComObject Scripting<span class="hljs-selector-class">.FileSystemObject</span><br><span class="hljs-variable">$fso</span><span class="hljs-selector-class">.CreateTextFile</span>(<span class="hljs-string">&#x27;fdu.training_files.txt&#x27;</span>,<span class="hljs-number">2</span>)<span class="hljs-selector-class">.Write</span>(<span class="hljs-string">&quot;$PSroot\fdu.ufont.exp0.lstmf&quot;</span> )<br><span class="hljs-keyword">if</span> (-not (Test-Path -Path output))&#123;mkdir output&#125;<br>lstmtraining <span class="hljs-attr">--model_output</span>=<span class="hljs-string">&quot;$PSroot\output\output&quot;</span> <span class="hljs-attr">--continue_from</span>=<span class="hljs-string">&quot;$PSroot\enb.lstm&quot;</span> <span class="hljs-attr">--train_listfile</span>=<span class="hljs-string">&quot;$PSroot\fdu.training_files.txt&quot;</span> <span class="hljs-attr">--traineddata</span>=<span class="hljs-string">&quot;$env:TESSDATA_PREFIX\enb.traineddata&quot;</span> <span class="hljs-attr">--debug_interval</span> -<span class="hljs-number">1</span> <span class="hljs-attr">--target_error_rate</span> <span class="hljs-number">0.001</span><br>lstmtraining <span class="hljs-attr">--stop_training</span> <span class="hljs-attr">--continue_from</span>=<span class="hljs-string">&quot;$PSroot\output\output_checkpoint&quot;</span> <span class="hljs-attr">--traineddata</span>=<span class="hljs-string">&quot;$env:TESSDATA_PREFIX\enb.traineddata&quot;</span> <span class="hljs-attr">--model_output</span>=<span class="hljs-string">&quot;$PSroot\fdu.traineddata&quot;</span><br></code></pre></td></tr></table></figure>
  155. <p><a target="_blank" rel="noopener" href="https://limour.lanzous.com/ifW3jeireib"><img src="" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload></a></p>
  156. <p>最终得到如上结果</p>
  157. <p>将得到的<code>fdu.traineddata</code>文件移动到<code>tessdata</code>文件夹下即可通过参数<code>-l fdu</code>进行使用</p>
  158. <figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br></pre></td><td class="code"><pre><code class="hljs python"><span class="hljs-comment">#此程序用于简单判断训练效果</span><br><span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image<br><span class="hljs-comment">#from itertools import cycle</span><br><span class="hljs-keyword">import</span> os, random, re<br><span class="hljs-keyword">import</span> pytesseract<br>fl = re.<span class="hljs-built_in">compile</span>(<span class="hljs-string">r&#x27;[a-zA-Z-]+&#x27;</span>)<br><span class="hljs-keyword">def</span> <span class="hljs-title function_">clearStr</span>(<span class="hljs-params"><span class="hljs-built_in">str</span></span>):<br> <span class="hljs-keyword">return</span> <span class="hljs-string">&#x27;&#x27;</span>.join(fl.findall(<span class="hljs-built_in">str</span>))<br><br><span class="hljs-keyword">class</span> <span class="hljs-title class_">Fileset</span>(<span class="hljs-title class_ inherited__">list</span>):<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self, name, ext=<span class="hljs-string">&#x27;&#x27;</span>, _read=<span class="hljs-literal">None</span>, root=<span class="hljs-literal">None</span></span>):<br> <span class="hljs-keyword">if</span> <span class="hljs-built_in">isinstance</span>(name, <span class="hljs-built_in">str</span>) :<br> self.root = os.path.join(root <span class="hljs-keyword">or</span> os.getcwd(), name)<br> self.extend(f <span class="hljs-keyword">for</span> f <span class="hljs-keyword">in</span> os.listdir(self.root) <span class="hljs-keyword">if</span> f.endswith(ext))<br> self._read = _read<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__getitem__</span>(<span class="hljs-params">self, index</span>):<br> <span class="hljs-keyword">if</span> <span class="hljs-built_in">isinstance</span>(index, <span class="hljs-built_in">int</span>):<span class="hljs-comment"># index是索引</span><br> <span class="hljs-keyword">return</span> os.path.join(self.root, <span class="hljs-built_in">super</span>().__getitem__(index))<br> <span class="hljs-keyword">else</span>:<span class="hljs-comment"># index是切片</span><br> fileset = Fileset(<span class="hljs-literal">None</span>)<br> fileset.root = self.root<br> fileset._read = self._read<br> fileset.extend(<span class="hljs-built_in">super</span>().__getitem__(index))<br> <span class="hljs-keyword">return</span> fileset<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">getFileName</span>(<span class="hljs-params">self, index</span>):<br> fname, ext = os.path.splitext(<span class="hljs-built_in">super</span>().__getitem__(index))<br> <span class="hljs-keyword">return</span> fname<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__iter__</span>(<span class="hljs-params">self</span>):<br> <span class="hljs-keyword">if</span> self._read: <span class="hljs-keyword">return</span> (self._read(os.path.join(self.root, f)) <span class="hljs-keyword">for</span> f <span class="hljs-keyword">in</span> <span class="hljs-built_in">super</span>().__iter__())<br> <span class="hljs-keyword">else</span>: <span class="hljs-keyword">return</span> (os.path.join(self.root, f) <span class="hljs-keyword">for</span> f <span class="hljs-keyword">in</span> <span class="hljs-built_in">super</span>().__iter__())<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__call__</span>(<span class="hljs-params">self</span>):<br> retn = random.choice(self)<br> <span class="hljs-keyword">if</span> self._read: <span class="hljs-keyword">return</span> self._read(retn)<br> <span class="hljs-keyword">else</span>: <span class="hljs-keyword">return</span> retn<br><br><span class="hljs-comment"># def fopen(path):</span><br> <span class="hljs-comment"># with open(path, &#x27;rb&#x27;) as f:</span><br> <span class="hljs-comment"># return f.read()</span><br><span class="hljs-comment"># #from tesOCR import tesOCR as OCR1</span><br><span class="hljs-comment"># sample = Fileset(&#x27;Captcha&#x27;, &#x27;.jpg&#x27;, fopen)</span><br>sample = Fileset(<span class="hljs-string">&#x27;Captcha&#x27;</span>, <span class="hljs-string">&#x27;.jpg&#x27;</span>, Image.<span class="hljs-built_in">open</span>)<br><br>config1 = <span class="hljs-string">&#x27;--psm 8&#x27;</span><br><span class="hljs-keyword">def</span> <span class="hljs-title function_">OCR1</span>(<span class="hljs-params">img</span>):<br> <span class="hljs-keyword">return</span> pytesseract.image_to_string(img, lang=<span class="hljs-string">&#x27;fdu&#x27;</span>, config=config1)<br><br>config2 = <span class="hljs-string">&quot;--psm 8 --oem 0 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzAT-&quot;</span><br><span class="hljs-keyword">def</span> <span class="hljs-title function_">OCR2</span>(<span class="hljs-params">img</span>):<br> <span class="hljs-keyword">return</span> pytesseract.image_to_string(img, lang=<span class="hljs-string">&#x27;eng&#x27;</span>, config=config2)<br><br><span class="hljs-keyword">for</span> a <span class="hljs-keyword">in</span> sample:<br> b = a.convert(<span class="hljs-string">&quot;L&quot;</span>)<br> x = clearStr(OCR1(b))<br> y = clearStr(OCR2(b))<br> <span class="hljs-keyword">if</span> x != y:<br> display(a)<br> <span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;LSTM is <span class="hljs-subst">&#123;x&#125;</span> ; Legacy is <span class="hljs-subst">&#123;y&#125;</span>&quot;</span>)<br></code></pre></td></tr></table></figure>
  159. <p><a target="_blank" rel="noopener" href="https://limour.lanzous.com/imK6me9lurg"><img src="" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload></a></p>
  160. <p>我的结果和python调用封装</p>
  161. <p>注释: </p>
  162. <ol>
  163. <li><a target="_blank" rel="noopener" href="https://sourceforge.net/projects/vietocr/files/jTessBoxEditor/">jTessBoxEditor</a>中带有FX表示支持中文<br>2.预训练数据中22.3Mb的是Legacy数据,14.6Mb的是LSTM数据,语言均为eng<br>3.”tessedit_char_whitelist&#x3D;”后面所接内容为验证码中可能出现的字符</li>
  164. </ol>
  165. </div>
  166. <hr/>
  167. <div>
  168. <div class="post-metas my-3">
  169. <div class="post-meta mr-3 d-flex align-items-center">
  170. <i class="iconfont icon-category"></i>
  171. <span class="category-chains">
  172. <span class="category-chain">
  173. <a href="/categories/Python%E7%BB%83%E4%B9%A0/" class="category-chain-item">Python练习</a>
  174. </span>
  175. </span>
  176. </div>
  177. <div class="post-meta">
  178. <i class="iconfont icon-tags"></i>
  179. <a href="/tags/Python/" class="print-no-link">#Python</a>
  180. <a href="/tags/LSTM/" class="print-no-link">#LSTM</a>
  181. <a href="/tags/tesseract/" class="print-no-link">#tesseract</a>
  182. <a href="/tags/%E9%AA%8C%E8%AF%81%E7%A0%81/" class="print-no-link">#验证码</a>
  183. </div>
  184. </div>
  185. <div class="license-box my-3">
  186. <div class="license-title">
  187. <div>LSTM应用于验证码识别</div>
  188. <div>https://b.limour.top/309.html</div>
  189. </div>
  190. <div class="license-meta">
  191. <div class="license-meta-item">
  192. <div>Author</div>
  193. <div>Limour</div>
  194. </div>
  195. <div class="license-meta-item license-meta-date">
  196. <div>Posted on</div>
  197. <div>July 11, 2020</div>
  198. </div>
  199. <div class="license-meta-item">
  200. <div>Licensed under</div>
  201. <div>
  202. <a class="print-no-link" target="_blank" href="https://creativecommons.org/licenses/by-nc-sa/4.0/">
  203. <span class="hint--top hint--rounded" aria-label="BY - Attribution">
  204. <i class="iconfont icon-by"></i>
  205. </span>
  206. </a>
  207. <a class="print-no-link" target="_blank" href="https://creativecommons.org/licenses/by-nc-sa/4.0/">
  208. <span class="hint--top hint--rounded" aria-label="NC - Non-commercial">
  209. <i class="iconfont icon-nc"></i>
  210. </span>
  211. </a>
  212. <a class="print-no-link" target="_blank" href="https://creativecommons.org/licenses/by-nc-sa/4.0/">
  213. <span class="hint--top hint--rounded" aria-label="SA - Share-alike">
  214. <i class="iconfont icon-sa"></i>
  215. </span>
  216. </a>
  217. </div>
  218. </div>
  219. </div>
  220. <div class="license-icon iconfont"></div>
  221. </div>
  222. <div class="post-prevnext my-3">
  223. <article class="post-prev col-6">
  224. <a href="/341.html" title="色温与RGB颜色换算表">
  225. <i class="iconfont icon-arrowleft"></i>
  226. <span class="hidden-mobile">色温与RGB颜色换算表</span>
  227. <span class="visible-mobile">Previous</span>
  228. </a>
  229. </article>
  230. <article class="post-next col-6">
  231. <a href="/297.html" title="pytesseract测试">
  232. <span class="hidden-mobile">pytesseract测试</span>
  233. <span class="visible-mobile">Next</span>
  234. <i class="iconfont icon-arrowright"></i>
  235. </a>
  236. </article>
  237. </div>
  238. </div>
  239. <article id="comments" lazyload>
  240. <div id="waline"></div>
  241. <script type="text/javascript">
  242. Fluid.utils.loadComments('#waline', function() {
  243. Fluid.utils.createCssLink('https://cdn.staticfile.org/waline/2.15.5/waline.min.css')
  244. Fluid.utils.createScript('https://cdn.staticfile.org/waline/2.15.5/waline.min.js', function() {
  245. var options = Object.assign(
  246. {"serverURL":"https://comments.limour.top","path":"window.location.pathname","meta":["nick","mail","link"],"requiredMeta":["nick"],"lang":"zh-CN","emoji":["https://jscdn.limour.top/gh/walinejs/emojis/weibo"],"dark":"html[data-user-color-scheme=\"dark\"]","wordLimit":0,"pageSize":10},
  247. {
  248. el: '#waline',
  249. path: window.location.pathname
  250. }
  251. )
  252. Waline.init(options);
  253. Fluid.utils.waitElementVisible('#waline .vcontent', () => {
  254. var imgSelector = '#waline .vcontent img:not(.vemoji)';
  255. Fluid.plugins.imageCaption(imgSelector);
  256. Fluid.plugins.fancyBox(imgSelector);
  257. })
  258. });
  259. });
  260. </script>
  261. <noscript>Please enable JavaScript to view the comments</noscript>
  262. </article>
  263. </article>
  264. </div>
  265. </div>
  266. </div>
  267. <div class="side-col d-none d-lg-block col-lg-2">
  268. <aside class="sidebar" style="margin-left: -1rem">
  269. <div id="toc">
  270. <p class="toc-header">
  271. <i class="iconfont icon-list"></i>
  272. <span>Table of Contents</span>
  273. </p>
  274. <div class="toc-body" id="toc-body"></div>
  275. </div>
  276. </aside>
  277. </div>
  278. </div>
  279. </div>
  280. <a id="scroll-top-button" aria-label="TOP" href="#" role="button">
  281. <i class="iconfont icon-arrowup" aria-hidden="true"></i>
  282. </a>
  283. <div class="modal fade" id="modalSearch" tabindex="-1" role="dialog" aria-labelledby="ModalLabel"
  284. aria-hidden="true">
  285. <div class="modal-dialog modal-dialog-scrollable modal-lg" role="document">
  286. <div class="modal-content">
  287. <div class="modal-header text-center">
  288. <h4 class="modal-title w-100 font-weight-bold">Search</h4>
  289. <button type="button" id="local-search-close" class="close" data-dismiss="modal" aria-label="Close">
  290. <span aria-hidden="true">&times;</span>
  291. </button>
  292. </div>
  293. <div class="modal-body mx-3">
  294. <div class="md-form mb-5">
  295. <input type="text" id="local-search-input" class="form-control validate">
  296. <label data-error="x" data-success="v" for="local-search-input">Keyword</label>
  297. </div>
  298. <div class="list-group" id="local-search-result"></div>
  299. </div>
  300. </div>
  301. </div>
  302. </div>
  303. </main>
  304. <footer>
  305. <div class="footer-inner">
  306. <div class="footer-content">
  307. <a target="_blank" rel="nofollow noopener" href="http://www.beian.gov.cn/portal/registerSystemInfo?recordcode=43130202000203"><img src="https://img.limour.top/2023/08/27/64eadeb81d6a0.webp" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload>湘公网安备 43130202000203号 </a> <a target="_blank" rel="nofollow noopener" href="https://beian.miit.gov.cn/">湘ICP备20008299号 </a> <a target="_blank" rel="nofollow noopener" href="https://icp.gov.moe/?keyword=20210128">萌ICP备20210128号</a> <br> <a href="https://www.foreverblog.cn/" target="_blank"> <img src="https://img.foreverblog.cn/logo_en_default.png" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload alt="" style="width:auto;height:24px"> </a> <br> <a href="https://hexo.io" target="_blank" rel="nofollow noopener"><span>Hexo</span></a> <i class="iconfont icon-love"></i> <a href="https://github.com/fluid-dev/hexo-theme-fluid" target="_blank" rel="nofollow noopener"><span>Fluid</span></a> <i class="iconfont icon-love"></i> <a href="https://github.com/limour-blog/archives-b" target="_blank" rel="nofollow noopener"><span>SRC</span></a>
  308. </div>
  309. </div>
  310. </footer>
  311. <!-- Scripts -->
  312. <script src="https://jscdn.limour.top/npm/nprogress@0.2.0/nprogress.min.js" ></script>
  313. <link rel="stylesheet" href="https://jscdn.limour.top/npm/nprogress@0.2.0/nprogress.min.css" />
  314. <script>
  315. NProgress.configure({"showSpinner":false,"trickleSpeed":100})
  316. NProgress.start()
  317. window.addEventListener('load', function() {
  318. NProgress.done();
  319. })
  320. </script>
  321. <script src="https://jscdn.limour.top/npm/jquery@3.6.4/dist/jquery.min.js" ></script>
  322. <script src="https://jscdn.limour.top/npm/bootstrap@4.6.1/dist/js/bootstrap.min.js" ></script>
  323. <script src="/js/events.js" ></script>
  324. <script src="/js/plugins.js" ></script>
  325. <script src="https://lib.baomitu.com/typed.js/2.0.12/typed.min.js" ></script>
  326. <script>
  327. (function (window, document) {
  328. var typing = Fluid.plugins.typing;
  329. var subtitle = document.getElementById('subtitle');
  330. if (!subtitle || !typing) {
  331. return;
  332. }
  333. var text = subtitle.getAttribute('data-typed-text');
  334. typing(text);
  335. })(window, document);
  336. </script>
  337. <script src="/js/img-lazyload.js" ></script>
  338. <script>
  339. Fluid.utils.createScript('https://jscdn.limour.top/npm/tocbot@4.20.1/dist/tocbot.min.js', function() {
  340. var toc = jQuery('#toc');
  341. if (toc.length === 0 || !window.tocbot) { return; }
  342. var boardCtn = jQuery('#board-ctn');
  343. var boardTop = boardCtn.offset().top;
  344. window.tocbot.init(Object.assign({
  345. tocSelector : '#toc-body',
  346. contentSelector : '.markdown-body',
  347. linkClass : 'tocbot-link',
  348. activeLinkClass : 'tocbot-active-link',
  349. listClass : 'tocbot-list',
  350. isCollapsedClass: 'tocbot-is-collapsed',
  351. collapsibleClass: 'tocbot-is-collapsible',
  352. scrollSmooth : true,
  353. includeTitleTags: true,
  354. headingsOffset : -boardTop,
  355. }, CONFIG.toc));
  356. if (toc.find('.toc-list-item').length > 0) {
  357. toc.css('visibility', 'visible');
  358. }
  359. Fluid.events.registerRefreshCallback(function() {
  360. if ('tocbot' in window) {
  361. tocbot.refresh();
  362. var toc = jQuery('#toc');
  363. if (toc.length === 0 || !tocbot) {
  364. return;
  365. }
  366. if (toc.find('.toc-list-item').length > 0) {
  367. toc.css('visibility', 'visible');
  368. }
  369. }
  370. });
  371. });
  372. </script>
  373. <script src=https://lib.baomitu.com/clipboard.js/2.0.11/clipboard.min.js></script>
  374. <script>Fluid.plugins.codeWidget();</script>
  375. <script>
  376. Fluid.utils.createScript('https://jscdn.limour.top/npm/anchor-js@4.3.1/anchor.min.js', function() {
  377. window.anchors.options = {
  378. placement: CONFIG.anchorjs.placement,
  379. visible : CONFIG.anchorjs.visible
  380. };
  381. if (CONFIG.anchorjs.icon) {
  382. window.anchors.options.icon = CONFIG.anchorjs.icon;
  383. }
  384. var el = (CONFIG.anchorjs.element || 'h1,h2,h3,h4,h5,h6').split(',');
  385. var res = [];
  386. for (var item of el) {
  387. res.push('.markdown-body > ' + item.trim());
  388. }
  389. if (CONFIG.anchorjs.placement === 'left') {
  390. window.anchors.options.class = 'anchorjs-link-left';
  391. }
  392. window.anchors.add(res.join(', '));
  393. Fluid.events.registerRefreshCallback(function() {
  394. if ('anchors' in window) {
  395. anchors.removeAll();
  396. var el = (CONFIG.anchorjs.element || 'h1,h2,h3,h4,h5,h6').split(',');
  397. var res = [];
  398. for (var item of el) {
  399. res.push('.markdown-body > ' + item.trim());
  400. }
  401. if (CONFIG.anchorjs.placement === 'left') {
  402. anchors.options.class = 'anchorjs-link-left';
  403. }
  404. anchors.add(res.join(', '));
  405. }
  406. });
  407. });
  408. </script>
  409. <script>
  410. Fluid.utils.createScript('https://lib.baomitu.com/fancybox/3.5.7/jquery.fancybox.min.js', function() {
  411. Fluid.plugins.fancyBox();
  412. });
  413. </script>
  414. <script>Fluid.plugins.imageCaption();</script>
  415. <script src="/js/local-search.js" ></script>
  416. <!-- 主题的启动项,将它保持在最底部 -->
  417. <!-- the boot of the theme, keep it at the bottom -->
  418. <script src="/js/boot.js" ></script>
  419. <noscript>
  420. <div class="noscript-warning">Blog works best with JavaScript enabled</div>
  421. </noscript>
  422. <!-- hexo injector body_end start -->
  423. <script defer src="/theme-inject/timeliness.js"></script>
  424. <!-- hexo injector body_end end --></body>
  425. </html>