如何在C#中使用tesseract 3.02训练数据?
我可以通过命令提示符使用新训练的tessedata(版本3.02)获得正确的OCR输出,但我希望在带有DLL ref的C#代码中使用相同的输出。我已尝试使用tessnet2_32.dll引用但是它抛出exception因此如何使用或通过C#代码使用DLL引用访问tesseract 3.02版本训练的tessedata?
这是针对Tesseract 2.04的。 您需要一个兼容3.02版本的.NET 包装器 。
To access or use tesseract 3.02 trained data we have to create separate wrapper class like below. using System; using System.IO; using System.Diagnostics; using System.Drawing; /// /// Summary description for TesseractOCR /// /// namespace tesseractThree { public class TesseractOCR { public TesseractOCR() { // // TODO: Add constructor logic here // } private string commandpath; private string outpath; private string tmppath; public TesseractOCR(string commandpath) { this.commandpath = commandpath; tmppath = System.Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData) + @"out.tif"; outpath = System.Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData) + @"out.txt"; } public string analyze(string filename,string lang,bool noLine) { string args = filename + " " + outpath.Replace(".txt", ""); ProcessStartInfo startinfo; if (noLine == true) { startinfo = new ProcessStartInfo(commandpath, args + " -l " + lang + " -psm 6"); } else { startinfo = new ProcessStartInfo(commandpath, args + " -l " + lang); } startinfo.CreateNoWindow = true; startinfo.UseShellExecute = false; Process.Start(startinfo).WaitForExit(); string ret = ""; using (StreamReader r = new StreamReader(outpath)) { string content = r.ReadToEnd(); ret = content; } File.Delete(outpath); return ret; } public string OCRFromBitmap(Bitmap bmp,string lang,bool noLine) { bmp.Save(tmppath, System.Drawing.Imaging.ImageFormat.Tiff); string ret = analyze(tmppath,lang,noLine); File.Delete(tmppath); return ret; } /* public string OCRFromFile(string filename) { return analyze(filename); }*/ } } //Usage of this class string lang = "enc"; Bitmap b = new Bitmap(@"D:Imageenc.test_font.exp0.tif"); TesseractOCR ocr = new TesseractOCR(@"C:Program FilesTesseract-OCRtesseract.exe"); string result = ocr.OCRFromBitmap(b, lang,true); Label1.Text = result; OR Refer below link for more details. https://gist.github.com/yatt/915443
使用tesseractengine3.dll我们可以使用tesseract v3.02训练数据,如下所示。
上述就是C#学习教程:如何在C#中使用tesseract 3.02训练数据?分享的全部内容,如果对大家有所用处且需要了解更多关于C#学习教程,希望大家多多关注—计算机技术网(www.ctvol.com)!
using System; using System.Collections.Generic; using System.Linq; using System.Web; using System.Web.UI; using System.Web.UI.WebControls; using tesseract; using System.Drawing; using System.IO; public enum TesseractEngineMode : int { /// /// Run Tesseract only - fastest /// TESSERACT_ONLY = 0, /// /// Run Cube only - better accuracy, but slower /// CUBE_ONLY = 1, /// /// Run both and combine results - best accuracy /// TESSERACT_CUBE_COMBINED = 2, /// /// Specify this mode when calling init_*(), /// to indicate that any of the above modes /// should be automatically inferred from the /// variables in the language-specific config, /// command-line configs, or if not specified /// in any of the above should be set to the /// default OEM_TESSERACT_ONLY. /// DEFAULT = 3 } public enum TesseractPageSegMode : int { /// /// Fully automatic page segmentation /// PSM_AUTO = 0, /// /// Assume a single column of text of variable sizes /// PSM_SINGLE_COLUMN = 1, /// /// Assume a single uniform block of text (Default) /// PSM_SINGLE_BLOCK = 2, /// /// Treat the image as a single text line /// PSM_SINGLE_LINE = 3, /// /// Treat the image as a single word /// PSM_SINGLE_WORD = 4, /// /// Treat the image as a single character /// PSM_SINGLE_CHAR = 5 } public partial class importDLL : System.Web.UI.Page { private TesseractProcessor m_tesseract = null; //private const string m_path = @"....data"; private const string m_path = @"D:tessdata-3.02"; private const string m_lang = "eng"; protected void Page_Load(object sender, EventArgs e) { var image = System.Drawing.Image.FromFile(@"D:ImageCapture1T.tif"); m_tesseract = new TesseractProcessor(); bool succeed = m_tesseract.Init(m_path, m_lang, (int)TesseractEngineMode.DEFAULT); if (!succeed) { } m_tesseract.SetVariable("tessedit_pageseg_mode", ((int)TesseractPageSegMode.PSM_SINGLE_LINE).ToString()); m_tesseract.Clear(); m_tesseract.ClearAdaptiveClassifier(); string outValue= m_tesseract.Apply(image); Response.Write(outValue); } }
本文来自网络收集,不代表计算机技术网立场,如涉及侵权请联系管理员删除。
ctvol管理联系方式QQ:251552304
本文章地址:https://www.ctvol.com/cdevelopment/985346.html