Using an external OCR engine during PDF/OCR generation
This example shows how to use any external OCR engine during PDF/OCR generation using the GdPicturePDF
class.
/*** This topic assumes you are already familiar about how to use the GdPicturePDF class to build PDF/OCR using the GdPicturePDF class.
It is possible to easily use any external OCR engine during PDF/OCR generation using the GdPicturePDF class.
The concept is quite straightforward: you need to provide the OCR result to a GdPicturePDF instance, through a specific event, passing a string variable which is the serialization of a specific model. Several serialization methods will be supported, please read the "Supported models for serialization" section of this topic to get them.
After Nutrient .NET SDK (formerly GdPicture.NET) Toolkit installation, please have a look at our csharp "PDF to PDF-OCR" demo included into the demo folder. You will be able to find complete implementation of external OCR engines:
- GdPicture.NET built-in OCR using the GdPictureOCR class.
- OmniPage.
- Other engines shall be exposed soon...
**Step by step instructions**1: tells to the instance to use an external OCR engine.gdpicturePDF.SetOverrideOcrEngine(true);
2: intercept the ExternalOcrRequest event.gdpicturePDF.ExternalOcrPageRequest += this.ExternalOcrRequest;
3: implement the logic to provide the OCR result through the ExternalOcrRequest event handler.*/
// this version is using the "gdpictureocr-json" model. (the recommended one).private void ExternalOcrRequest(int ImageID, PdfOcrOptions PdfOcrOptions, out GdPictureStatus Status, out string ResultEncoding, out string OcrResult){ using (GdPictureOCR gdpictureOCR = new GdPictureOCR()) { gdpictureOCR.ResourceFolder = PdfOcrOptions.ResourcePath; gdpictureOCR.AddCustomDictionary(PdfOcrOptions.Dictionary); gdpictureOCR.OCRMode = PdfOcrOptions.OCRMode; gdpictureOCR.EnableOrientationDetection = PdfOcrOptions.DetectOrientation; gdpictureOCR.EnableSkewDetection = PdfOcrOptions.DetectSkew; gdpictureOCR.SetImage(ImageID); string resultID = gdpictureOCR.RunOCR(); Status = gdpictureOCR.GetStat(); if (Status == GdPictureStatus.OK) { ResultEncoding = "gdpictureocr-json"; OcrResult = gdpictureOCR.GetSerializedResult(resultID); Status = gdpictureOCR.GetStat(); } else { ResultEncoding = OcrResult = null; } }}
//this version is using the "json" model.private void ExternalOcrRequest(int ImageID, PdfOcrOptions PdfOcrOptions, out GdPictureStatus Status, out string ResultEncoding, out string OcrResult){ using (GdPictureOCR gdpictureOCR = new GdPictureOCR()) { gdpictureOCR.ResourceFolder = PdfOcrOptions.ResourcePath; gdpictureOCR.AddCustomDictionary(PdfOcrOptions.Dictionary); gdpictureOCR.OCRMode = PdfOcrOptions.OCRMode; gdpictureOCR.EnableOrientationDetection = PdfOcrOptions.DetectOrientation; gdpictureOCR.EnableSkewDetection = PdfOcrOptions.DetectSkew; gdpictureOCR.SetImage(ImageID); string resultID = gdpictureOCR.RunOCR(); Status = gdpictureOCR.GetStat(); if (Status == GdPictureStatus.OK) { GdPictureOcrResult ocrResult = new GdPictureOcrResult() { Paragraphs = new List<GdPictureOcrParagraph>(), PageRotation = gdpictureOCR.GetOrientation() }; for (int paragraphIdx = 0; paragraphIdx < gdpictureOCR.GetParagraphCount(resultID); paragraphIdx++) { OCRBlockType blockType = gdpictureOCR.GetBlockType(resultID, gdpictureOCR.GetParagraphBlockIndex(resultID, paragraphIdx)); //rejecting non text block. if (blockType != OCRBlockType.CaptionText && blockType != OCRBlockType.FlowingText && blockType != OCRBlockType.HeadingText && blockType != OCRBlockType.PulloutText && blockType != OCRBlockType.VerticalText && blockType != OCRBlockType.Table) { continue; } GdPictureOcrParagraph paragraph = new GdPictureOcrParagraph() { Lines = new List<GdPictureOcrLine>() }; ((List<GdPictureOcrParagraph>)ocrResult.Paragraphs).Add(paragraph); int firstLineIdx = gdpictureOCR.GetParagraphFirstTextLineIndex(resultID, paragraphIdx); int lineCount = gdpictureOCR.GetParagraphTextLineCount(resultID, paragraphIdx); for (int lineIdx = firstLineIdx; lineIdx < firstLineIdx + lineCount; lineIdx++) { GdPictureOcrLine line = new GdPictureOcrLine() { Words = new List<GdPictureOcrWord>() }; ((List<GdPictureOcrLine>)paragraph.Lines).Add(line); int firstWordIdx = gdpictureOCR.GetTextLineFirstWordIndex(resultID, lineIdx); int wordCount = gdpictureOCR.GetTextLineWordCount(resultID, lineIdx); for (int wordIdx = firstWordIdx; wordIdx < firstWordIdx + wordCount; wordIdx++) { GdPictureOcrWord word = new GdPictureOcrWord() { Characters = new List<GdPictureOcrCharacter>() }; ((List<GdPictureOcrWord>)line.Words).Add(word); int firstCharacterIdx = gdpictureOCR.GetWordFirstCharacterIndex(resultID, wordIdx); int characterCount = gdpictureOCR.GetWordCharacterCount(resultID, wordIdx); for (int characterIdx = firstCharacterIdx; characterIdx < firstCharacterIdx + characterCount; characterIdx++) { int characterLeft = gdpictureOCR.GetCharacterLeft(resultID, characterIdx); int characterTop = gdpictureOCR.GetCharacterTop(resultID, characterIdx); int characterRight = gdpictureOCR.GetCharacterRight(resultID, characterIdx); int characterBottom = gdpictureOCR.GetCharacterBottom(resultID, characterIdx); GdPictureOcrCharacter character = new GdPictureOcrCharacter() { BBox = new GdPictureOcrRect(characterLeft, characterTop, characterRight, characterBottom), Value = gdpictureOCR.GetCharacterValue(resultID, characterIdx) }; ((List<GdPictureOcrCharacter>)word.Characters).Add(character); } } } } ResultEncoding = "json"; OcrResult = JsonConvert.SerializeObject(ocrResult); } else { ResultEncoding = OcrResult = null; } }}/**
Supported models for serializationModel name: "gdpictureocr-json".
Model information: the model is not public. To obtain serialized data from such model the method GetSerializedResult of the GdPictureOcr class must be used.
Model name: "json".
Model information: the provided data must be an enumeration of paragraphs containing lines containing words containing characters.*/
/// <summary>/// The OcrResult class manages the ocr result./// </summary>[Serializable]public sealed class GdPictureOcrResult{ /// <summary> /// The standard rotation applied to the page before starting the OCR process. /// Accepted values are 0, 90, 180 and 270. /// </summary> public int PageRotation;
/// <summary> /// The detected page skew angle, in degrees, clockwise. /// </summary> public float PageSkewAngle;
/// <summary> /// The paragraphs of the page. /// </summary> public IEnumerable<GdPictureOcrParagraph> Paragraphs;}
[Serializable]public sealed class GdPictureOcrParagraph{ /// <summary> /// The standard rotation of the paragraph. /// Accepted values are 0, 90, 180 and 270. /// </summary> public int ParagraphRotation;
/// <summary> /// The text writing direction. /// Supported values are: 0 for left to right, 1 for right to left, 2 for top to bottom. public int TextWritingDirection;
/// <summary> /// The lines of the paragraph. /// </summary> public IEnumerable<GdPictureOcrLine> Lines;}
[Serializable]public sealed class GdPictureOcrLine{ /// <summary> /// The words of the line. /// </summary> public IEnumerable<GdPictureOcrWord> Words;}
[Serializable]public sealed class GdPictureOcrWord{ /// <summary> /// The bounding box. /// It is not mandatory to provide it since it can be computed from character boxes. /// </summary> public GdPictureOcrRect BBox;
/// <summary> /// The characters of the word. /// </summary> public IEnumerable<GdPictureOcrCharacter> Characters;}
[Serializable]public sealed class GdPictureOcrCharacter{ /// <summary> /// The bounding box. /// </summary> public GdPictureOcrRect BBox;
/// <summary> /// The character value. /// </summary> public char Value;}
[Serializable]public sealed class GdPictureOcrRect{ public int Left; public int Top; public int Right; public int Bottom;
public GdPictureOcrRect(int Left, int Top, int Right, int Bottom) { this.Left = Left; this.Top = Top; this.Right = Right; this.Bottom = Bottom; }}
This code sample is an example that illustrates how to use our SDK. Please adapt it to your specific use case.