123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701 |
- <!DOCTYPE html>
- <html lang="en" data-default-color-scheme=auto>
- <head><!-- hexo injector head_begin start -->
- <script async src="https://analytics.umami.is/script.js" data-website-id="e59ec28a-c9a7-4104-9e62-a9f7eb3fac0b"></script>
- <!-- hexo injector head_begin end -->
- <meta charset="UTF-8">
- <link rel="apple-touch-icon" sizes="76x76" href="https://img.limour.top/2023/08/29/64ee07361815a.webp">
- <link rel="icon" href="https://img.limour.top/2023/08/29/64ee07361815a.webp">
- <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=5.0, shrink-to-fit=no">
- <meta http-equiv="x-ua-compatible" content="ie=edge">
-
- <meta name="theme-color" content="#2f4154">
- <meta name="author" content="Limour">
- <meta name="keywords" content="">
-
- <meta name="description" content="jTessBoxEditorFX-2.3.0 预训练数据 1234567#For CentOS 7 run the following as root to install Tesseract with English language traineddata:yum -y install yum-utilsyum-config-manager --add-repo https://down">
- <meta property="og:type" content="article">
- <meta property="og:title" content="LSTM应用于验证码识别">
- <meta property="og:url" content="https://b.limour.top/309.html">
- <meta property="og:site_name" content="Limour's Blog">
- <meta property="og:description" content="jTessBoxEditorFX-2.3.0 预训练数据 1234567#For CentOS 7 run the following as root to install Tesseract with English language traineddata:yum -y install yum-utilsyum-config-manager --add-repo https://down">
- <meta property="og:locale" content="en_US">
- <meta property="og:image" content="https://img-cdn.limour.top/blog_wp/2020/07/%E5%BE%AE%E4%BF%A1%E5%9B%BE%E7%89%87_20200710090821.png">
- <meta property="article:published_time" content="2020-07-11T12:12:46.000Z">
- <meta property="article:modified_time" content="2023-09-01T13:32:00.437Z">
- <meta property="article:author" content="Limour">
- <meta property="article:tag" content="Python">
- <meta property="article:tag" content="LSTM">
- <meta property="article:tag" content="tesseract">
- <meta property="article:tag" content="验证码">
- <meta name="twitter:card" content="summary_large_image">
- <meta name="twitter:image" content="https://img-cdn.limour.top/blog_wp/2020/07/%E5%BE%AE%E4%BF%A1%E5%9B%BE%E7%89%87_20200710090821.png">
-
-
-
- <title>LSTM应用于验证码识别 - Limour's Blog</title>
- <link rel="stylesheet" href="https://jscdn.limour.top/npm/bootstrap@4.6.1/dist/css/bootstrap.min.css" />
- <link rel="stylesheet" href="https://jscdn.limour.top/npm/github-markdown-css@4.0.0/github-markdown.min.css" />
- <link rel="stylesheet" href="https://jscdn.limour.top/npm/hint.css@2.7.0/hint.min.css" />
- <link rel="stylesheet" href="https://lib.baomitu.com/fancybox/3.5.7/jquery.fancybox.min.css" />
- <!-- 主题依赖的图标库,不要自行修改 -->
- <!-- Do not modify the link that theme dependent icons -->
- <link rel="stylesheet" href="//at.alicdn.com/t/font_1749284_hj8rtnfg7um.css">
- <link rel="stylesheet" href="//at.alicdn.com/t/font_1736178_lbnruvf0jn.css">
- <link rel="stylesheet" href="/css/main.css" />
- <link id="highlight-css" rel="stylesheet" href="/css/highlight.css" />
-
- <link id="highlight-css-dark" rel="stylesheet" href="/css/highlight-dark.css" />
-
-
- <link rel="stylesheet" href="/theme-inject/custom.css">
- <script id="fluid-configs">
- var Fluid = window.Fluid || {};
- Fluid.ctx = Object.assign({}, Fluid.ctx)
- var CONFIG = {"hostname":"b.limour.top","root":"/","version":"1.9.5-a","typing":{"enable":true,"typeSpeed":70,"cursorChar":"_","loop":false,"scope":[]},"anchorjs":{"enable":true,"element":"h1,h2,h3,h4,h5,h6","placement":"left","visible":"hover","icon":"§"},"progressbar":{"enable":true,"height_px":3,"color":"#29d","options":{"showSpinner":false,"trickleSpeed":100}},"code_language":{"enable":true,"default":"TEXT"},"copy_btn":true,"image_caption":{"enable":true},"image_zoom":{"enable":true,"img_url_replace":["",""]},"toc":{"enable":true,"placement":"right","headingSelector":"h1,h2,h3,h4,h5,h6","collapseDepth":0},"lazyload":{"enable":true,"loading_img":"https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg","onlypost":false,"offset_factor":2},"web_analytics":{"enable":false,"follow_dnt":true,"baidu":null,"google":{"measurement_id":null},"tencent":{"sid":null,"cid":null},"woyaola":null,"cnzz":null,"leancloud":{"app_id":null,"app_key":null,"server_url":null,"path":"window.location.pathname","ignore_local":false}},"search_path":"/local-search.xml","include_content_in_search":true};
- if (CONFIG.web_analytics.follow_dnt) {
- var dntVal = navigator.doNotTrack || window.doNotTrack || navigator.msDoNotTrack;
- Fluid.ctx.dnt = dntVal && (dntVal.startsWith('1') || dntVal.startsWith('yes') || dntVal.startsWith('on'));
- }
- </script>
- <script src="/js/utils.js" ></script>
- <script src="/js/color-schema.js" ></script>
-
-
- <meta name="generator" content="Hexo 6.3.0"></head>
- <body>
-
- <header>
-
- <div class="header-inner" style="height: 70vh;">
- <nav id="navbar" class="navbar fixed-top navbar-expand-lg navbar-dark scrolling-navbar">
- <div class="container">
- <a class="navbar-brand" href="/">
- <strong>Limour's Blog</strong>
- </a>
- <button id="navbar-toggler-btn" class="navbar-toggler" type="button" data-toggle="collapse"
- data-target="#navbarSupportedContent"
- aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
- <div class="animated-icon"><span></span><span></span><span></span></div>
- </button>
- <!-- Collapsible content -->
- <div class="collapse navbar-collapse" id="navbarSupportedContent">
- <ul class="navbar-nav ml-auto text-center">
-
-
-
-
-
- <li class="nav-item">
- <a class="nav-link" href="/">
- <i class="iconfont icon-home-fill"></i>
- <span>Home</span>
- </a>
- </li>
-
-
-
-
-
-
- <li class="nav-item">
- <a class="nav-link" href="/archives/">
- <i class="iconfont icon-archive-fill"></i>
- <span>Archives</span>
- </a>
- </li>
-
-
-
- <li class="nav-item" id="search-btn">
- <a class="nav-link" target="_self" href="javascript:;" data-toggle="modal" data-target="#modalSearch" aria-label="Search">
- <i class="iconfont icon-search"></i>
- </a>
- </li>
-
-
-
- <li class="nav-item" id="color-toggle-btn">
- <a class="nav-link" target="_self" href="javascript:;" aria-label="Color Toggle">
- <i class="iconfont icon-dark" id="color-toggle-icon"></i>
- </a>
- </li>
-
- </ul>
- </div>
- </div>
- </nav>
-
- <div id="banner" class="banner" parallax=true
- style="background: url('https://img.limour.top/2023/08/29/64ee08e108638.webp') no-repeat center center; background-size: cover;">
- <div class="full-bg-img">
- <div class="mask flex-center" style="background-color: rgba(0, 0, 0, 0.3)">
- <div class="banner-text text-center fade-in-up">
- <div class="h2">
-
- <span id="subtitle" data-typed-text="LSTM应用于验证码识别"></span>
-
- </div>
-
-
- <div class="mt-3">
-
-
- <span class="post-meta">
- <i class="iconfont icon-date-fill" aria-hidden="true"></i>
- <time datetime="2020-07-11 20:12" pubdate>
- July 11, 2020 pm
- </time>
- </span>
-
- </div>
- <div class="mt-1">
-
- <span class="post-meta mr-2">
- <i class="iconfont icon-chart"></i>
-
- 3.8k words
-
- </span>
-
-
- <span class="post-meta mr-2">
- <i class="iconfont icon-clock-fill"></i>
-
-
-
- 32 mins
-
- </span>
-
-
-
- </div>
-
- </div>
-
- </div>
- </div>
- </div>
- </div>
- </header>
- <main>
-
-
- <div class="container-fluid nopadding-x">
- <div class="row nomargin-x">
- <div class="side-col d-none d-lg-block col-lg-2">
-
- </div>
- <div class="col-lg-8 nopadding-x-md">
- <div class="container nopadding-x-md" id="board-ctn">
- <div id="board">
- <article class="post-content mx-auto">
- <h1 id="seo-header">LSTM应用于验证码识别</h1>
-
-
- <div class="markdown-body">
-
- <p><a target="_blank" rel="noopener" href="https://limour.lanzous.com/iTNKZeg7tja"><img src="" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload></a></p>
- <p> <a target="_blank" rel="noopener" href="https://sourceforge.net/projects/vietocr/files/jTessBoxEditor/">jTessBoxEditorFX-2.3.0</a></p>
- <p><a target="_blank" rel="noopener" href="https://limour.lanzous.com/iLQ25eh2o1i"><img src="" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload></a></p>
- <p><a target="_blank" rel="noopener" href="https://tesseract-ocr.github.io/tessdoc/Data-Files">预训练数据</a></p>
- <figure class="highlight awk"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br></pre></td><td class="code"><pre><code class="hljs awk"><span class="hljs-comment">#For CentOS 7 run the following as root to install Tesseract with English language traineddata:</span><br>yum -y install yum-utils<br>yum-config-manager --add-repo https:<span class="hljs-regexp">//</span>download.opensuse.org<span class="hljs-regexp">/repositories/</span>home:<span class="hljs-regexp">/Alexander_Pozdnyakov/</span>CentOS_7/<br>sudo rpm --import https:<span class="hljs-regexp">//</span>build.opensuse.org<span class="hljs-regexp">/projects/</span>home:Alexander_Pozdnyakov/public_key<br>yum update<br>yum install tesseract <br>yum install tesseract-langpack-eng<br></code></pre></td></tr></table></figure>
- <figure class="highlight livescript"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><code class="hljs livescript"><span class="hljs-comment">#For Win10 to install Tesseract:</span><br><span class="hljs-number">1.</span>下载解压 jTessBoxEditor<br><span class="hljs-number">2.</span>将 {解压目录}<span class="hljs-string">\jTessBoxEditorFX\tesseract-ocr</span> 添加到 Path<br><span class="hljs-number">3.</span>下载解压预训练数据到当前目录<br><span class="hljs-number">4.</span>新建环境变量 TESSDATA_PREFIX ,值为 {解压目录}<span class="hljs-string">\tessdata</span><br></code></pre></td></tr></table></figure>
- <p><img src="https://img-cdn.limour.top/blog_wp/2020/07/%E5%BE%AE%E4%BF%A1%E5%9B%BE%E7%89%87_20200710090821.png" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload></p>
- <p>终端中运行命令 tesseract –help-extra 显示如上信息表示安装成功</p>
- <p><a target="_blank" rel="noopener" href="https://limour.lanzous.com/icIheeh321c"><img src="" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload></a></p>
- <p>自行获取训练所需的验证码</p>
- <p>按照<a target="_blank" rel="noopener" href="https://blog.csdn.net/qq_40147863/article/details/82290015">肖鹏伟的《Tesseract-OCR-04-使用 jTessBoxEditor提高文字识别准确率》</a>中的方法生成<code>fdu.ufont.exp0.tif</code>文件</p>
- <figure class="highlight stylus"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><code class="hljs stylus">#通过此命令生成fdu<span class="hljs-selector-class">.ufont</span><span class="hljs-selector-class">.exp0</span>.box文件<br>tesseract fdu<span class="hljs-selector-class">.ufont</span><span class="hljs-selector-class">.exp0</span><span class="hljs-selector-class">.tif</span> fdu<span class="hljs-selector-class">.ufont</span><span class="hljs-selector-class">.exp0</span> -l eng <span class="hljs-attr">--psm</span> <span class="hljs-number">8</span> <span class="hljs-attr">--oem</span> <span class="hljs-number">0</span> nobatch box<span class="hljs-selector-class">.train</span> -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzAT-<br></code></pre></td></tr></table></figure>
- <p>继续按照肖鹏伟的方法修正<code>.box</code>文件</p>
- <figure class="highlight stylus"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br></pre></td><td class="code"><pre><code class="hljs stylus">#将fdu<span class="hljs-selector-class">.ufont</span><span class="hljs-selector-class">.exp0</span>.tif文件、修正后的fdu<span class="hljs-selector-class">.ufont</span><span class="hljs-selector-class">.exp0</span>.box文件一起保存到独立的同一新文件夹下,同目录下运行此.ps1文件即可得到fdu<span class="hljs-selector-class">.traineddata</span><br>tesseract fdu<span class="hljs-selector-class">.ufont</span><span class="hljs-selector-class">.exp0</span><span class="hljs-selector-class">.tif</span> fdu<span class="hljs-selector-class">.ufont</span><span class="hljs-selector-class">.exp0</span> -l enb <span class="hljs-attr">--psm</span> <span class="hljs-number">8</span> lstm<span class="hljs-selector-class">.train</span><br>combine_tessdata -e <span class="hljs-string">"$env:TESSDATA_PREFIX\enb.traineddata"</span> enb<span class="hljs-selector-class">.lstm</span><br><span class="hljs-variable">$PSroot</span> = Get-ChildItem<br><span class="hljs-variable">$PSroot</span> = Split-Path <span class="hljs-variable">$PSroot</span><span class="hljs-selector-class">.Get</span>(<span class="hljs-number">0</span>)<span class="hljs-selector-class">.FullName</span><br><span class="hljs-variable">$fso</span>=New-Object -ComObject Scripting<span class="hljs-selector-class">.FileSystemObject</span><br><span class="hljs-variable">$fso</span><span class="hljs-selector-class">.CreateTextFile</span>(<span class="hljs-string">'fdu.training_files.txt'</span>,<span class="hljs-number">2</span>)<span class="hljs-selector-class">.Write</span>(<span class="hljs-string">"$PSroot\fdu.ufont.exp0.lstmf"</span> )<br><span class="hljs-keyword">if</span> (-not (Test-Path -Path output)){mkdir output}<br>lstmtraining <span class="hljs-attr">--model_output</span>=<span class="hljs-string">"$PSroot\output\output"</span> <span class="hljs-attr">--continue_from</span>=<span class="hljs-string">"$PSroot\enb.lstm"</span> <span class="hljs-attr">--train_listfile</span>=<span class="hljs-string">"$PSroot\fdu.training_files.txt"</span> <span class="hljs-attr">--traineddata</span>=<span class="hljs-string">"$env:TESSDATA_PREFIX\enb.traineddata"</span> <span class="hljs-attr">--debug_interval</span> -<span class="hljs-number">1</span> <span class="hljs-attr">--target_error_rate</span> <span class="hljs-number">0.001</span><br>lstmtraining <span class="hljs-attr">--stop_training</span> <span class="hljs-attr">--continue_from</span>=<span class="hljs-string">"$PSroot\output\output_checkpoint"</span> <span class="hljs-attr">--traineddata</span>=<span class="hljs-string">"$env:TESSDATA_PREFIX\enb.traineddata"</span> <span class="hljs-attr">--model_output</span>=<span class="hljs-string">"$PSroot\fdu.traineddata"</span><br></code></pre></td></tr></table></figure>
- <p><a target="_blank" rel="noopener" href="https://limour.lanzous.com/ifW3jeireib"><img src="" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload></a></p>
- <p>最终得到如上结果</p>
- <p>将得到的<code>fdu.traineddata</code>文件移动到<code>tessdata</code>文件夹下即可通过参数<code>-l fdu</code>进行使用</p>
- <figure class="highlight python"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br><span class="line">45</span><br><span class="line">46</span><br><span class="line">47</span><br><span class="line">48</span><br><span class="line">49</span><br><span class="line">50</span><br><span class="line">51</span><br><span class="line">52</span><br><span class="line">53</span><br><span class="line">54</span><br><span class="line">55</span><br><span class="line">56</span><br><span class="line">57</span><br></pre></td><td class="code"><pre><code class="hljs python"><span class="hljs-comment">#此程序用于简单判断训练效果</span><br><span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image<br><span class="hljs-comment">#from itertools import cycle</span><br><span class="hljs-keyword">import</span> os, random, re<br><span class="hljs-keyword">import</span> pytesseract<br>fl = re.<span class="hljs-built_in">compile</span>(<span class="hljs-string">r'[a-zA-Z-]+'</span>)<br><span class="hljs-keyword">def</span> <span class="hljs-title function_">clearStr</span>(<span class="hljs-params"><span class="hljs-built_in">str</span></span>):<br> <span class="hljs-keyword">return</span> <span class="hljs-string">''</span>.join(fl.findall(<span class="hljs-built_in">str</span>))<br><br><span class="hljs-keyword">class</span> <span class="hljs-title class_">Fileset</span>(<span class="hljs-title class_ inherited__">list</span>):<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self, name, ext=<span class="hljs-string">''</span>, _read=<span class="hljs-literal">None</span>, root=<span class="hljs-literal">None</span></span>):<br> <span class="hljs-keyword">if</span> <span class="hljs-built_in">isinstance</span>(name, <span class="hljs-built_in">str</span>) :<br> self.root = os.path.join(root <span class="hljs-keyword">or</span> os.getcwd(), name)<br> self.extend(f <span class="hljs-keyword">for</span> f <span class="hljs-keyword">in</span> os.listdir(self.root) <span class="hljs-keyword">if</span> f.endswith(ext))<br> self._read = _read<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__getitem__</span>(<span class="hljs-params">self, index</span>):<br> <span class="hljs-keyword">if</span> <span class="hljs-built_in">isinstance</span>(index, <span class="hljs-built_in">int</span>):<span class="hljs-comment"># index是索引</span><br> <span class="hljs-keyword">return</span> os.path.join(self.root, <span class="hljs-built_in">super</span>().__getitem__(index))<br> <span class="hljs-keyword">else</span>:<span class="hljs-comment"># index是切片</span><br> fileset = Fileset(<span class="hljs-literal">None</span>)<br> fileset.root = self.root<br> fileset._read = self._read<br> fileset.extend(<span class="hljs-built_in">super</span>().__getitem__(index))<br> <span class="hljs-keyword">return</span> fileset<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">getFileName</span>(<span class="hljs-params">self, index</span>):<br> fname, ext = os.path.splitext(<span class="hljs-built_in">super</span>().__getitem__(index))<br> <span class="hljs-keyword">return</span> fname<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__iter__</span>(<span class="hljs-params">self</span>):<br> <span class="hljs-keyword">if</span> self._read: <span class="hljs-keyword">return</span> (self._read(os.path.join(self.root, f)) <span class="hljs-keyword">for</span> f <span class="hljs-keyword">in</span> <span class="hljs-built_in">super</span>().__iter__())<br> <span class="hljs-keyword">else</span>: <span class="hljs-keyword">return</span> (os.path.join(self.root, f) <span class="hljs-keyword">for</span> f <span class="hljs-keyword">in</span> <span class="hljs-built_in">super</span>().__iter__())<br> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__call__</span>(<span class="hljs-params">self</span>):<br> retn = random.choice(self)<br> <span class="hljs-keyword">if</span> self._read: <span class="hljs-keyword">return</span> self._read(retn)<br> <span class="hljs-keyword">else</span>: <span class="hljs-keyword">return</span> retn<br><br><span class="hljs-comment"># def fopen(path):</span><br> <span class="hljs-comment"># with open(path, 'rb') as f:</span><br> <span class="hljs-comment"># return f.read()</span><br><span class="hljs-comment"># #from tesOCR import tesOCR as OCR1</span><br><span class="hljs-comment"># sample = Fileset('Captcha', '.jpg', fopen)</span><br>sample = Fileset(<span class="hljs-string">'Captcha'</span>, <span class="hljs-string">'.jpg'</span>, Image.<span class="hljs-built_in">open</span>)<br><br>config1 = <span class="hljs-string">'--psm 8'</span><br><span class="hljs-keyword">def</span> <span class="hljs-title function_">OCR1</span>(<span class="hljs-params">img</span>):<br> <span class="hljs-keyword">return</span> pytesseract.image_to_string(img, lang=<span class="hljs-string">'fdu'</span>, config=config1)<br><br>config2 = <span class="hljs-string">"--psm 8 --oem 0 -c tessedit_char_whitelist=abcdefghijklmnopqrstuvwxyzAT-"</span><br><span class="hljs-keyword">def</span> <span class="hljs-title function_">OCR2</span>(<span class="hljs-params">img</span>):<br> <span class="hljs-keyword">return</span> pytesseract.image_to_string(img, lang=<span class="hljs-string">'eng'</span>, config=config2)<br><br><span class="hljs-keyword">for</span> a <span class="hljs-keyword">in</span> sample:<br> b = a.convert(<span class="hljs-string">"L"</span>)<br> x = clearStr(OCR1(b))<br> y = clearStr(OCR2(b))<br> <span class="hljs-keyword">if</span> x != y:<br> display(a)<br> <span class="hljs-built_in">print</span>(<span class="hljs-string">f"LSTM is <span class="hljs-subst">{x}</span> ; Legacy is <span class="hljs-subst">{y}</span>"</span>)<br></code></pre></td></tr></table></figure>
- <p><a target="_blank" rel="noopener" href="https://limour.lanzous.com/imK6me9lurg"><img src="" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload></a></p>
- <p>我的结果和python调用封装</p>
- <p>注释: </p>
- <ol>
- <li><a target="_blank" rel="noopener" href="https://sourceforge.net/projects/vietocr/files/jTessBoxEditor/">jTessBoxEditor</a>中带有FX表示支持中文<br>2.预训练数据中22.3Mb的是Legacy数据,14.6Mb的是LSTM数据,语言均为eng<br>3.”tessedit_char_whitelist=”后面所接内容为验证码中可能出现的字符</li>
- </ol>
-
- </div>
-
- <hr/>
- <div>
- <div class="post-metas my-3">
-
- <div class="post-meta mr-3 d-flex align-items-center">
- <i class="iconfont icon-category"></i>
-
- <span class="category-chains">
-
-
-
- <span class="category-chain">
-
- <a href="/categories/Python%E7%BB%83%E4%B9%A0/" class="category-chain-item">Python练习</a>
-
-
- </span>
-
-
- </span>
- </div>
-
-
- <div class="post-meta">
- <i class="iconfont icon-tags"></i>
-
- <a href="/tags/Python/" class="print-no-link">#Python</a>
-
- <a href="/tags/LSTM/" class="print-no-link">#LSTM</a>
-
- <a href="/tags/tesseract/" class="print-no-link">#tesseract</a>
-
- <a href="/tags/%E9%AA%8C%E8%AF%81%E7%A0%81/" class="print-no-link">#验证码</a>
-
- </div>
-
- </div>
-
-
- <div class="license-box my-3">
- <div class="license-title">
- <div>LSTM应用于验证码识别</div>
- <div>https://b.limour.top/309.html</div>
- </div>
- <div class="license-meta">
-
- <div class="license-meta-item">
- <div>Author</div>
- <div>Limour</div>
- </div>
-
-
- <div class="license-meta-item license-meta-date">
- <div>Posted on</div>
- <div>July 11, 2020</div>
- </div>
-
-
-
- <div class="license-meta-item">
- <div>Licensed under</div>
- <div>
-
-
-
- <a class="print-no-link" target="_blank" href="https://creativecommons.org/licenses/by-nc-sa/4.0/">
- <span class="hint--top hint--rounded" aria-label="BY - Attribution">
- <i class="iconfont icon-by"></i>
- </span>
- </a>
-
- <a class="print-no-link" target="_blank" href="https://creativecommons.org/licenses/by-nc-sa/4.0/">
- <span class="hint--top hint--rounded" aria-label="NC - Non-commercial">
- <i class="iconfont icon-nc"></i>
- </span>
- </a>
-
- <a class="print-no-link" target="_blank" href="https://creativecommons.org/licenses/by-nc-sa/4.0/">
- <span class="hint--top hint--rounded" aria-label="SA - Share-alike">
- <i class="iconfont icon-sa"></i>
- </span>
- </a>
-
-
- </div>
- </div>
-
- </div>
- <div class="license-icon iconfont"></div>
- </div>
-
- <div class="post-prevnext my-3">
- <article class="post-prev col-6">
-
-
- <a href="/341.html" title="色温与RGB颜色换算表">
- <i class="iconfont icon-arrowleft"></i>
- <span class="hidden-mobile">色温与RGB颜色换算表</span>
- <span class="visible-mobile">Previous</span>
- </a>
-
- </article>
- <article class="post-next col-6">
-
-
- <a href="/297.html" title="pytesseract测试">
- <span class="hidden-mobile">pytesseract测试</span>
- <span class="visible-mobile">Next</span>
- <i class="iconfont icon-arrowright"></i>
- </a>
-
- </article>
- </div>
-
- </div>
-
-
-
- <article id="comments" lazyload>
-
- <div id="waline"></div>
- <script type="text/javascript">
- Fluid.utils.loadComments('#waline', function() {
- Fluid.utils.createCssLink('https://cdn.staticfile.org/waline/2.15.5/waline.min.css')
- Fluid.utils.createScript('https://cdn.staticfile.org/waline/2.15.5/waline.min.js', function() {
- var options = Object.assign(
- {"serverURL":"https://comments.limour.top","path":"window.location.pathname","meta":["nick","mail","link"],"requiredMeta":["nick"],"lang":"zh-CN","emoji":["https://jscdn.limour.top/gh/walinejs/emojis/weibo"],"dark":"html[data-user-color-scheme=\"dark\"]","wordLimit":0,"pageSize":10},
- {
- el: '#waline',
- path: window.location.pathname
- }
- )
- Waline.init(options);
- Fluid.utils.waitElementVisible('#waline .vcontent', () => {
- var imgSelector = '#waline .vcontent img:not(.vemoji)';
- Fluid.plugins.imageCaption(imgSelector);
- Fluid.plugins.fancyBox(imgSelector);
- })
- });
- });
- </script>
- <noscript>Please enable JavaScript to view the comments</noscript>
- </article>
-
- </article>
- </div>
- </div>
- </div>
- <div class="side-col d-none d-lg-block col-lg-2">
-
- <aside class="sidebar" style="margin-left: -1rem">
- <div id="toc">
- <p class="toc-header">
- <i class="iconfont icon-list"></i>
- <span>Table of Contents</span>
- </p>
- <div class="toc-body" id="toc-body"></div>
- </div>
- </aside>
- </div>
- </div>
- </div>
-
-
-
-
-
-
-
- <a id="scroll-top-button" aria-label="TOP" href="#" role="button">
- <i class="iconfont icon-arrowup" aria-hidden="true"></i>
- </a>
-
-
- <div class="modal fade" id="modalSearch" tabindex="-1" role="dialog" aria-labelledby="ModalLabel"
- aria-hidden="true">
- <div class="modal-dialog modal-dialog-scrollable modal-lg" role="document">
- <div class="modal-content">
- <div class="modal-header text-center">
- <h4 class="modal-title w-100 font-weight-bold">Search</h4>
- <button type="button" id="local-search-close" class="close" data-dismiss="modal" aria-label="Close">
- <span aria-hidden="true">×</span>
- </button>
- </div>
- <div class="modal-body mx-3">
- <div class="md-form mb-5">
- <input type="text" id="local-search-input" class="form-control validate">
- <label data-error="x" data-success="v" for="local-search-input">Keyword</label>
- </div>
- <div class="list-group" id="local-search-result"></div>
- </div>
- </div>
- </div>
- </div>
-
-
- </main>
- <footer>
- <div class="footer-inner">
-
- <div class="footer-content">
- <a target="_blank" rel="nofollow noopener" href="http://www.beian.gov.cn/portal/registerSystemInfo?recordcode=43130202000203"><img src="https://img.limour.top/2023/08/27/64eadeb81d6a0.webp" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload>湘公网安备 43130202000203号 </a> <a target="_blank" rel="nofollow noopener" href="https://beian.miit.gov.cn/">湘ICP备20008299号 </a> <a target="_blank" rel="nofollow noopener" href="https://icp.gov.moe/?keyword=20210128">萌ICP备20210128号</a> <br> <a href="https://www.foreverblog.cn/" target="_blank"> <img src="https://img.foreverblog.cn/logo_en_default.png" srcset="https://jscdn.limour.top/gh/Limour-dev/Sakurairo_Vision/load_svg/inload.svg" lazyload alt="" style="width:auto;height:24px"> </a> <br> <a href="https://hexo.io" target="_blank" rel="nofollow noopener"><span>Hexo</span></a> <i class="iconfont icon-love"></i> <a href="https://github.com/fluid-dev/hexo-theme-fluid" target="_blank" rel="nofollow noopener"><span>Fluid</span></a> <i class="iconfont icon-love"></i> <a href="https://github.com/limour-blog/archives-b" target="_blank" rel="nofollow noopener"><span>SRC</span></a>
- </div>
-
-
-
-
- </div>
- </footer>
- <!-- Scripts -->
-
- <script src="https://jscdn.limour.top/npm/nprogress@0.2.0/nprogress.min.js" ></script>
- <link rel="stylesheet" href="https://jscdn.limour.top/npm/nprogress@0.2.0/nprogress.min.css" />
- <script>
- NProgress.configure({"showSpinner":false,"trickleSpeed":100})
- NProgress.start()
- window.addEventListener('load', function() {
- NProgress.done();
- })
- </script>
- <script src="https://jscdn.limour.top/npm/jquery@3.6.4/dist/jquery.min.js" ></script>
- <script src="https://jscdn.limour.top/npm/bootstrap@4.6.1/dist/js/bootstrap.min.js" ></script>
- <script src="/js/events.js" ></script>
- <script src="/js/plugins.js" ></script>
- <script src="https://lib.baomitu.com/typed.js/2.0.12/typed.min.js" ></script>
- <script>
- (function (window, document) {
- var typing = Fluid.plugins.typing;
- var subtitle = document.getElementById('subtitle');
- if (!subtitle || !typing) {
- return;
- }
- var text = subtitle.getAttribute('data-typed-text');
-
- typing(text);
-
- })(window, document);
- </script>
-
- <script src="/js/img-lazyload.js" ></script>
-
-
- <script>
- Fluid.utils.createScript('https://jscdn.limour.top/npm/tocbot@4.20.1/dist/tocbot.min.js', function() {
- var toc = jQuery('#toc');
- if (toc.length === 0 || !window.tocbot) { return; }
- var boardCtn = jQuery('#board-ctn');
- var boardTop = boardCtn.offset().top;
- window.tocbot.init(Object.assign({
- tocSelector : '#toc-body',
- contentSelector : '.markdown-body',
- linkClass : 'tocbot-link',
- activeLinkClass : 'tocbot-active-link',
- listClass : 'tocbot-list',
- isCollapsedClass: 'tocbot-is-collapsed',
- collapsibleClass: 'tocbot-is-collapsible',
- scrollSmooth : true,
- includeTitleTags: true,
- headingsOffset : -boardTop,
- }, CONFIG.toc));
- if (toc.find('.toc-list-item').length > 0) {
- toc.css('visibility', 'visible');
- }
- Fluid.events.registerRefreshCallback(function() {
- if ('tocbot' in window) {
- tocbot.refresh();
- var toc = jQuery('#toc');
- if (toc.length === 0 || !tocbot) {
- return;
- }
- if (toc.find('.toc-list-item').length > 0) {
- toc.css('visibility', 'visible');
- }
- }
- });
- });
- </script>
- <script src=https://lib.baomitu.com/clipboard.js/2.0.11/clipboard.min.js></script>
- <script>Fluid.plugins.codeWidget();</script>
-
- <script>
- Fluid.utils.createScript('https://jscdn.limour.top/npm/anchor-js@4.3.1/anchor.min.js', function() {
- window.anchors.options = {
- placement: CONFIG.anchorjs.placement,
- visible : CONFIG.anchorjs.visible
- };
- if (CONFIG.anchorjs.icon) {
- window.anchors.options.icon = CONFIG.anchorjs.icon;
- }
- var el = (CONFIG.anchorjs.element || 'h1,h2,h3,h4,h5,h6').split(',');
- var res = [];
- for (var item of el) {
- res.push('.markdown-body > ' + item.trim());
- }
- if (CONFIG.anchorjs.placement === 'left') {
- window.anchors.options.class = 'anchorjs-link-left';
- }
- window.anchors.add(res.join(', '));
- Fluid.events.registerRefreshCallback(function() {
- if ('anchors' in window) {
- anchors.removeAll();
- var el = (CONFIG.anchorjs.element || 'h1,h2,h3,h4,h5,h6').split(',');
- var res = [];
- for (var item of el) {
- res.push('.markdown-body > ' + item.trim());
- }
- if (CONFIG.anchorjs.placement === 'left') {
- anchors.options.class = 'anchorjs-link-left';
- }
- anchors.add(res.join(', '));
- }
- });
- });
- </script>
-
- <script>
- Fluid.utils.createScript('https://lib.baomitu.com/fancybox/3.5.7/jquery.fancybox.min.js', function() {
- Fluid.plugins.fancyBox();
- });
- </script>
- <script>Fluid.plugins.imageCaption();</script>
- <script src="/js/local-search.js" ></script>
- <!-- 主题的启动项,将它保持在最底部 -->
- <!-- the boot of the theme, keep it at the bottom -->
- <script src="/js/boot.js" ></script>
-
- <noscript>
- <div class="noscript-warning">Blog works best with JavaScript enabled</div>
- </noscript>
- <!-- hexo injector body_end start -->
- <script defer src="/theme-inject/timeliness.js"></script>
- <!-- hexo injector body_end end --></body>
- </html>
|