Reference Guide
Programming / OCR / Using an external OCR engine during PDF/OCR generation
In This Topic
    Using an external OCR engine during PDF/OCR generation
    In This Topic
    This topic assumes you are already familiar about how to use the GdPicturePDF class to build PDF/OCR using the GdPicturePDF class.

    Overview

    It is possible to easily use any external OCR engine during PDF/OCR generation using the GdPicturePDF class.

    The concept is quite straightforward: you need to provide the OCR result to a GdPicturePDF instance, through a specific event, passing a string variable which is the serialization of a specific model. Several serialization methods will be supported, please read the "Supported models for serialization" section of this topic to get them.

    After GdPicture.NET Toolkit installation, please have a look at our csharp  "PDF to PDF-OCR" demo included into the demo folder. You will be able to find complete implementation of external OCR engines:

    - GdPicture.NET built-in OCR using the GdPictureOCR class.

    - OmniPage.

    - Other engines shall be exposed soon...

     

    Step by step instructions

    gdpicturePDF.SetOverrideOcrEngine(true);

    gdpicturePDF.ExternalOcrPageRequest += this.ExternalOcrRequest;

    Copy Code
           //this version is using the "gdpictureocr-json" model. (the recommended one).
            private void ExternalOcrRequest(int ImageID, PdfOcrOptions PdfOcrOptions, out GdPictureStatus Status, out string ResultEncoding, out string OcrResult)
            {
                using (GdPictureOCR gdpictureOCR = new GdPictureOCR())
                {
                    gdpictureOCR.ResourceFolder = PdfOcrOptions.ResourcePath;
                    gdpictureOCR.AddCustomDictionary(PdfOcrOptions.Dictionary);
                    gdpictureOCR.OCRMode = PdfOcrOptions.OCRMode;
                    gdpictureOCR.EnableOrientationDetection = PdfOcrOptions.DetectOrientation;
                    gdpictureOCR.EnableSkewDetection = PdfOcrOptions.DetectSkew;
                    gdpictureOCR.SetImage(ImageID);
                    string resultID = gdpictureOCR.RunOCR();
                    Status = gdpictureOCR.GetStat();
                    if (Status == GdPictureStatus.OK)
                    {
                        ResultEncoding = "gdpictureocr-json";
                        OcrResult = gdpictureOCR.GetSerializedResult(resultID);
                        Status = gdpictureOCR.GetStat();
                    }
                    else
                    {
                        ResultEncoding = OcrResult = null;
                    }               
                }
            }
    
           //this version is using the "json" model.
           private void ExternalOcrRequest(int ImageID, PdfOcrOptions PdfOcrOptions, out GdPictureStatus Status, out string ResultEncoding, out string OcrResult)
            {
                using (GdPictureOCR gdpictureOCR = new GdPictureOCR())
                {
                    gdpictureOCR.ResourceFolder = PdfOcrOptions.ResourcePath;
                    gdpictureOCR.AddCustomDictionary(PdfOcrOptions.Dictionary);
                    gdpictureOCR.OCRMode = PdfOcrOptions.OCRMode;
                    gdpictureOCR.EnableOrientationDetection = PdfOcrOptions.DetectOrientation;
                    gdpictureOCR.EnableSkewDetection = PdfOcrOptions.DetectSkew;
                    gdpictureOCR.SetImage(ImageID);
                    string resultID = gdpictureOCR.RunOCR();
                    Status = gdpictureOCR.GetStat();
                    if (Status == GdPictureStatus.OK)
                    {
                        GdPictureOcrResult ocrResult = new GdPictureOcrResult()
                        {
                            Paragraphs = new List<GdPictureOcrParagraph>(),
                            PageRotation = gdpictureOCR.GetOrientation()
                        };
                        for (int paragraphIdx = 0; paragraphIdx < gdpictureOCR.GetParagraphCount(resultID); paragraphIdx++)
                        {
                            OCRBlockType blockType = gdpictureOCR.GetBlockType(resultID, gdpictureOCR.GetParagraphBlockIndex(resultID, paragraphIdx));
                            //rejecting non text block.
                            if (blockType != OCRBlockType.CaptionText &&
                                blockType != OCRBlockType.FlowingText &&
                                blockType != OCRBlockType.HeadingText &&
                                blockType != OCRBlockType.PulloutText &&
                                blockType != OCRBlockType.VerticalText &&
                                blockType != OCRBlockType.Table)
                            {
                                continue;
                            }
                            GdPictureOcrParagraph paragraph = new GdPictureOcrParagraph()
                            {
                                Lines = new List<GdPictureOcrLine>()
                            };
                            ((List<GdPictureOcrParagraph>)ocrResult.Paragraphs).Add(paragraph);
                            int firstLineIdx = gdpictureOCR.GetParagraphFirstTextLineIndex(resultID, paragraphIdx);
                            int lineCount = gdpictureOCR.GetParagraphTextLineCount(resultID, paragraphIdx);
                            for (int lineIdx = firstLineIdx; lineIdx < firstLineIdx + lineCount; lineIdx++)
                            {
                                GdPictureOcrLine line = new GdPictureOcrLine()
                                {
                                    Words = new List<GdPictureOcrWord>()
                                };
                                ((List<GdPictureOcrLine>)paragraph.Lines).Add(line);
                                int firstWordIdx = gdpictureOCR.GetTextLineFirstWordIndex(resultID, lineIdx);
                                int wordCount = gdpictureOCR.GetTextLineWordCount(resultID, lineIdx);
                                for (int wordIdx = firstWordIdx; wordIdx < firstWordIdx + wordCount; wordIdx++)
                                {
                                    GdPictureOcrWord word = new GdPictureOcrWord()
                                    {
                                        Characters = new List<GdPictureOcrCharacter>()
                                    };
                                    ((List<GdPictureOcrWord>)line.Words).Add(word);
                                    int firstCharacterIdx = gdpictureOCR.GetWordFirstCharacterIndex(resultID, wordIdx);
                                    int characterCount = gdpictureOCR.GetWordCharacterCount(resultID, wordIdx);
                                    for (int characterIdx = firstCharacterIdx; characterIdx < firstCharacterIdx + characterCount; characterIdx++)
                                    {
                                        int characterLeft = gdpictureOCR.GetCharacterLeft(resultID, characterIdx);
                                        int characterTop = gdpictureOCR.GetCharacterTop(resultID, characterIdx);
                                        int characterRight = gdpictureOCR.GetCharacterRight(resultID, characterIdx);
                                        int characterBottom = gdpictureOCR.GetCharacterBottom(resultID, characterIdx);
                                        GdPictureOcrCharacter character = new GdPictureOcrCharacter()
                                        {
                                            BBox = new GdPictureOcrRect(characterLeft, characterTop, characterRight, characterBottom),
                                            Value = gdpictureOCR.GetCharacterValue(resultID, characterIdx)
                                        };
                                        ((List<GdPictureOcrCharacter>)word.Characters).Add(character);
                                    }
                                }
                            }
                        }
                        ResultEncoding = "json";
                        OcrResult = JsonConvert.SerializeObject(ocrResult);
                    }
                    else
                    {
                        ResultEncoding = OcrResult = null;
                    }
                }
            }
    
     

     Supported models for serialization

    Model name: "gdpictureocr-json".

    Model information: the model is not public. To obtain serialized data from such model the method GetSerializedResult of the GdPictureOcr class must be used.

     


     

    Model name: "json".

    Model information: the provided data must be an enumeration of paragraphs containing lines containing words containing characters.

    Model definition (CSharp):

    Copy Code
        /// <summary>
        /// The OcrResult class manages the ocr result.
        /// </summary>
        [Serializable]
        public sealed class GdPictureOcrResult
        {
            /// <summary>
            /// The standard rotation applied to the page before starting the OCR process.
            /// Accepted values are 0, 90, 180 and 270.
            /// </summary>
            public int PageRotation;
    
            /// <summary>
            /// The detected page skew angle, in degrees, clockwise.
            /// </summary>
            public float PageSkewAngle;
    
            /// <summary>
            /// The paragraphs of the page.
            /// </summary>
            public IEnumerable<GdPictureOcrParagraph> Paragraphs;
        }
    
        [Serializable]
        public sealed class GdPictureOcrParagraph
        {
            /// <summary>
            /// The standard rotation of the paragraph.
            /// Accepted values are 0, 90, 180 and 270.
            /// </summary>
            public int ParagraphRotation;
    
            /// <summary>
            /// The text writing direction.
            /// Supported values are: 0 for left to right, 1 for right to left, 2 for top to bottom.
            public int TextWritingDirection;
    
            /// <summary>
            /// The lines of the paragraph.
            /// </summary>
            public IEnumerable<GdPictureOcrLine> Lines;
        }
    
        [Serializable]
        public sealed class GdPictureOcrLine
        {
            /// <summary>
            /// The words of the line.
            /// </summary>
            public IEnumerable<GdPictureOcrWord> Words;
        }
    
        [Serializable]
        public sealed class GdPictureOcrWord
        {
            /// <summary>
            /// The bounding box.
            /// It is not mandatory to provide it since it can be computed from character boxes.
            /// </summary>
            public GdPictureOcrRect BBox;
    
            /// <summary>
            /// The characters of the word.
            /// </summary>
            public IEnumerable<GdPictureOcrCharacter> Characters;
        }
    
        [Serializable]
        public sealed class GdPictureOcrCharacter
        {
            /// <summary>
            /// The bounding box.
            /// </summary>
            public GdPictureOcrRect BBox;
    
            /// <summary>
            /// The character value.
            /// </summary>
            public char Value;
        }
    
        [Serializable]
        public sealed class GdPictureOcrRect
        {
            public int Left;
            public int Top;
            public int Right;
            public int Bottom;
    
            public GdPictureOcrRect(int Left, int Top, int Right, int Bottom)
            {
                this.Left = Left;
                this.Top = Top;
                this.Right = Right;
                this.Bottom = Bottom;
            }
        }