Using an external OCR engine during PDF/OCR generation

This example shows how to use any external OCR engine during PDF/OCR generation using the GdPicturePDF class.
1
/**
2
* This topic assumes you are already familiar about how to use the GdPicturePDF class to build PDF/OCR using the GdPicturePDF class.
3

4
It is possible to easily use any external OCR engine during PDF/OCR generation using the GdPicturePDF class.
5

6
The concept is quite straightforward: you need to provide the OCR result to a GdPicturePDF instance, through a specific event, passing a string variable which is the serialization of a specific model. Several serialization methods will be supported, please read the "Supported models for serialization" section of this topic to get them.
7

8
After Nutrient .NET SDK (formerly GdPicture.NET) Toolkit installation, please have a look at our csharp  "PDF to PDF-OCR" demo included into the demo folder. You will be able to find complete implementation of external OCR engines:
9

10
- GdPicture.NET built-in OCR using the GdPictureOCR class.
11

12
- OmniPage.
13

14
- Other engines shall be exposed soon...
15

16
**Step by step instructions**
17
1: tells to the instance to use an external OCR engine.
18
gdpicturePDF.SetOverrideOcrEngine(true);
19

20
2: intercept the ExternalOcrRequest event.
21
gdpicturePDF.ExternalOcrPageRequest += this.ExternalOcrRequest;
22

23
3: implement the logic to provide the OCR result through the ExternalOcrRequest event handler.
24
*/
25

26
// this version is using the "gdpictureocr-json" model. (the recommended one).
27
private void ExternalOcrRequest(int ImageID, PdfOcrOptions PdfOcrOptions, out GdPictureStatus Status, out string ResultEncoding, out string OcrResult)
28
{
29
   using (GdPictureOCR gdpictureOCR = new GdPictureOCR())
30
   {
31
    gdpictureOCR.ResourceFolder = PdfOcrOptions.ResourcePath;
32
    gdpictureOCR.AddCustomDictionary(PdfOcrOptions.Dictionary);
33
    gdpictureOCR.OCRMode = PdfOcrOptions.OCRMode;
34
    gdpictureOCR.EnableOrientationDetection = PdfOcrOptions.DetectOrientation;
35
    gdpictureOCR.EnableSkewDetection = PdfOcrOptions.DetectSkew;
36
    gdpictureOCR.SetImage(ImageID);
37
    string resultID = gdpictureOCR.RunOCR();
38
    Status = gdpictureOCR.GetStat();
39
    if (Status == GdPictureStatus.OK)
40
    {
41
     ResultEncoding = "gdpictureocr-json";
42
     OcrResult = gdpictureOCR.GetSerializedResult(resultID);
43
     Status = gdpictureOCR.GetStat();
44
    }
45
    else
46
    {
47
     ResultEncoding = OcrResult = null;
48
    }
49
   }
50
}
51

52
//this version is using the "json" model.
53
private void ExternalOcrRequest(int ImageID, PdfOcrOptions PdfOcrOptions, out GdPictureStatus Status, out string ResultEncoding, out string OcrResult)
54
{
55
   using (GdPictureOCR gdpictureOCR = new GdPictureOCR())
56
   {
57
    gdpictureOCR.ResourceFolder = PdfOcrOptions.ResourcePath;
58
    gdpictureOCR.AddCustomDictionary(PdfOcrOptions.Dictionary);
59
    gdpictureOCR.OCRMode = PdfOcrOptions.OCRMode;
60
    gdpictureOCR.EnableOrientationDetection = PdfOcrOptions.DetectOrientation;
61
    gdpictureOCR.EnableSkewDetection = PdfOcrOptions.DetectSkew;
62
    gdpictureOCR.SetImage(ImageID);
63
    string resultID = gdpictureOCR.RunOCR();
64
    Status = gdpictureOCR.GetStat();
65
    if (Status == GdPictureStatus.OK)
66
    {
67
     GdPictureOcrResult ocrResult = new GdPictureOcrResult()
68
     {
69
      Paragraphs = new List<GdPictureOcrParagraph>(),
70
      PageRotation = gdpictureOCR.GetOrientation()
71
     };
72
     for (int paragraphIdx = 0; paragraphIdx < gdpictureOCR.GetParagraphCount(resultID); paragraphIdx++)
73
     {
74
      OCRBlockType blockType = gdpictureOCR.GetBlockType(resultID, gdpictureOCR.GetParagraphBlockIndex(resultID, paragraphIdx));
75
      //rejecting non text block.
76
      if (blockType != OCRBlockType.CaptionText &&
77
        blockType != OCRBlockType.FlowingText &&
78
        blockType != OCRBlockType.HeadingText &&
79
        blockType != OCRBlockType.PulloutText &&
80
        blockType != OCRBlockType.VerticalText &&
81
        blockType != OCRBlockType.Table)
82
      {
83
         continue;
84
      }
85
      GdPictureOcrParagraph paragraph = new GdPictureOcrParagraph()
86
      {
87
         Lines = new List<GdPictureOcrLine>()
88
      };
89
      ((List<GdPictureOcrParagraph>)ocrResult.Paragraphs).Add(paragraph);
90
      int firstLineIdx = gdpictureOCR.GetParagraphFirstTextLineIndex(resultID, paragraphIdx);
91
      int lineCount = gdpictureOCR.GetParagraphTextLineCount(resultID, paragraphIdx);
92
      for (int lineIdx = firstLineIdx; lineIdx < firstLineIdx + lineCount; lineIdx++)
93
      {
94
         GdPictureOcrLine line = new GdPictureOcrLine()
95
         {
96
          Words = new List<GdPictureOcrWord>()
97
         };
98
         ((List<GdPictureOcrLine>)paragraph.Lines).Add(line);
99
         int firstWordIdx = gdpictureOCR.GetTextLineFirstWordIndex(resultID, lineIdx);
100
         int wordCount = gdpictureOCR.GetTextLineWordCount(resultID, lineIdx);
101
         for (int wordIdx = firstWordIdx; wordIdx < firstWordIdx + wordCount; wordIdx++)
102
         {
103
          GdPictureOcrWord word = new GdPictureOcrWord()
104
          {
105
           Characters = new List<GdPictureOcrCharacter>()
106
          };
107
          ((List<GdPictureOcrWord>)line.Words).Add(word);
108
          int firstCharacterIdx = gdpictureOCR.GetWordFirstCharacterIndex(resultID, wordIdx);
109
          int characterCount = gdpictureOCR.GetWordCharacterCount(resultID, wordIdx);
110
          for (int characterIdx = firstCharacterIdx; characterIdx < firstCharacterIdx + characterCount; characterIdx++)
111
          {
112
           int characterLeft = gdpictureOCR.GetCharacterLeft(resultID, characterIdx);
113
           int characterTop = gdpictureOCR.GetCharacterTop(resultID, characterIdx);
114
           int characterRight = gdpictureOCR.GetCharacterRight(resultID, characterIdx);
115
           int characterBottom = gdpictureOCR.GetCharacterBottom(resultID, characterIdx);
116
           GdPictureOcrCharacter character = new GdPictureOcrCharacter()
117
           {
118
            BBox = new GdPictureOcrRect(characterLeft, characterTop, characterRight, characterBottom),
119
            Value = gdpictureOCR.GetCharacterValue(resultID, characterIdx)
120
           };
121
           ((List<GdPictureOcrCharacter>)word.Characters).Add(character);
122
          }
123
         }
124
      }
125
     }
126
     ResultEncoding = "json";
127
     OcrResult = JsonConvert.SerializeObject(ocrResult);
128
    }
129
    else
130
    {
131
     ResultEncoding = OcrResult = null;
132
    }
133
   }
134
}
135
/**
136

137
Supported models for serialization
138
Model name: "gdpictureocr-json".
139

140
Model information: the model is not public. To obtain serialized data from such model the method GetSerializedResult of the GdPictureOcr class must be used.
141

142
Model name: "json".
143

144
Model information: the provided data must be an enumeration of paragraphs containing lines containing words containing characters.
145
*/
146

147
/// <summary>
148
/// The OcrResult class manages the ocr result.
149
/// </summary>
150
[Serializable]
151
public sealed class GdPictureOcrResult
152
{
153
   /// <summary>
154
   /// The standard rotation applied to the page before starting the OCR process.
155
   /// Accepted values are 0, 90, 180 and 270.
156
   /// </summary>
157
   public int PageRotation;
158

159
   /// <summary>
160
   /// The detected page skew angle, in degrees, clockwise.
161
   /// </summary>
162
   public float PageSkewAngle;
163

164
   /// <summary>
165
   /// The paragraphs of the page.
166
   /// </summary>
167
   public IEnumerable<GdPictureOcrParagraph> Paragraphs;
168
}
169

170
[Serializable]
171
public sealed class GdPictureOcrParagraph
172
{
173
   /// <summary>
174
   /// The standard rotation of the paragraph.
175
   /// Accepted values are 0, 90, 180 and 270.
176
   /// </summary>
177
   public int ParagraphRotation;
178

179
   /// <summary>
180
   /// The text writing direction.
181
   /// Supported values are: 0 for left to right, 1 for right to left, 2 for top to bottom.
182
   public int TextWritingDirection;
183

184
   /// <summary>
185
   /// The lines of the paragraph.
186
   /// </summary>
187
   public IEnumerable<GdPictureOcrLine> Lines;
188
}
189

190
[Serializable]
191
public sealed class GdPictureOcrLine
192
{
193
   /// <summary>
194
   /// The words of the line.
195
   /// </summary>
196
   public IEnumerable<GdPictureOcrWord> Words;
197
}
198

199
[Serializable]
200
public sealed class GdPictureOcrWord
201
{
202
   /// <summary>
203
   /// The bounding box.
204
   /// It is not mandatory to provide it since it can be computed from character boxes.
205
   /// </summary>
206
   public GdPictureOcrRect BBox;
207

208
   /// <summary>
209
   /// The characters of the word.
210
   /// </summary>
211
   public IEnumerable<GdPictureOcrCharacter> Characters;
212
}
213

214
[Serializable]
215
public sealed class GdPictureOcrCharacter
216
{
217
   /// <summary>
218
   /// The bounding box.
219
   /// </summary>
220
   public GdPictureOcrRect BBox;
221

222
   /// <summary>
223
   /// The character value.
224
   /// </summary>
225
   public char Value;
226
}
227

228
[Serializable]
229
public sealed class GdPictureOcrRect
230
{
231
   public int Left;
232
   public int Top;
233
   public int Right;
234
   public int Bottom;
235

236
   public GdPictureOcrRect(int Left, int Top, int Right, int Bottom)
237
   {
238
    this.Left = Left;
239
    this.Top = Top;
240
    this.Right = Right;
241
    this.Bottom = Bottom;
242
   }
243
}
This code sample is an example that illustrates how to use our SDK. Please adapt it to your specific use case.
Using an external OCR engine during PDF/OCR generation

Was this helpful?

Help us improve

Thank you for your feedback!

Something went wrong. Please try again or let us know.