OCRPageRange Method

The OCRPageRange method performs OCR on a range of pages or the complete document. The document is first opened using the Open or OpenEx methods and should be resaved afterwards.  This method by itself will not resave the document.

 

Syntax

VB:

Sub OCRPageRange(startPage As Integer, EndPage As Integer, Language As String, Options As ACPDFCREACTIVEX.acOCROptions)

C#:

void OCRPageRange(System.Int32 startPage, System.Int32 EndPage, System.String Language, ACPDFCREACTIVEX.acOCROptions Options)

C++:

HRESULT IPDFCreactiveX::OCRPageRange(long startPage, long EndPage, _bstr_t Language, enum acOCROptions Options)

 

 

Parameters

StartPage, EndPage

Start and end page numbers to OCR. Page numbers start with page 1.

Language

3 letter ISO_639_Language_Code indicates which dictionary to use during OCR. OCR accuracy is greatly improved by indicating to the OCR engine which is the main document language. Support values are: eng (English), fra (French), ita (Italian), deu (German), por (Portuguese), spa (Spanish), vie (Vietnamese), nld (Dutch)

Options

Only one option is currently supported:

acOCROptionVisibleText = 1

By default, the text that is retrieved from the OCR engine is hidden and lies on top of the original document contents. This makes the document searchable without the text hiding the original document contents. When this option is set to 1, the text is visible. This option should be set to 1 in order to extract the text to a separate TXT or RTF file.

 

Remarks

This method uses the PageSequence or PageSecuenceStr attributes from Document Object to determine which pages are going to be saved to the PDF file.

 

For more information about the OCRDataFilesLocation, please, check the Document attributes page.

 

Example

Sub Sample()

    ' Constants for Activation codes

    Const strLicenseTo As String = "Amyuni PDF OCR Module Evaluation"

    Const strActivationCode As String = "07EFCDAB010001005A888A7BCB912FAF9284F5623992DE32607C682FED0215DC171A7C2DFC70738C3B9BD6718DA5BB4A837D98E783BF"

 

    ' Initializing PDFCreativeX Object

    Dim pdf As ACPDFCREACTIVEX.PDFCreactiveX = New ACPDFCREACTIVEX.PDFCreactiveX()

 

    ' Set license key

    pdf.SetLicenseKey(strLicenseTo, strActivationCode)

 

    ' Open an existent PDF file

    Dim fileName As String = "c:\temp\PDFdocument.pdf"

    Dim password As String = ""

    pdf.Open(fileName, password)

 

    ' OCR configuration

    pdf.ObjectAttribute("Document", "OCRDataFilesLocation") = "C:\Tesseract41"

 

    ' OCR

    Dim startPage As Integer = 1

    Dim endPage As Integer = pdf.PageCount

    Dim language As String = "eng"

    pdf.OCRPageRange(startPage, endPage, language, ACPDFCREACTIVEX.acOCROptions.acOCROptionVisibleText)

 

    ' Save PDF

    pdf.Save("c:\temp\CreatePDFDocument_resulting.pdf", ACPDFCREACTIVEX.FileSaveOptionConstants.acFileSaveView)

 

    ' destroy objects

    pdf = Nothing

End Sub

static void Sample()

{

    const string strLicenseTo = "Amyuni PDF OCR Module Evaluation";

    const string strActivationCode = "07EFCDAB010001005A888A7BCB912FAF9284F5623992DE32607C682FED0215DC171A7C2DFC70738C3B9BD6718DA5BB4A837D98E783BF";

 

    // Initializing PDFCreativeX Object

    ACPDFCREACTIVEX.PDFCreactiveX pdf = new ACPDFCREACTIVEX.PDFCreactiveX();

 

    // Set license key

    pdf.SetLicenseKey(strLicenseTo, strActivationCode);

 

    // Open an existent PDF file

    string fileName = @"c:\temp\PDFdocument.pdf";

    string password = "";

    pdf.Open(fileName, password);

 

    // OCR configuration

    pdf.ObjectAttribute("Document", "OCRDataFilesLocation") = @"C:\Tesseract41";

 

    // OCR

    int startPage = 1;

    int endPage = pdf.PageCount;

    string language = "eng";

    pdf.OCRPageRange(startPage, endPage, language, ACPDFCREACTIVEX.acOCROptions.acOCROptionVisibleText);

 

    // Save PDF

    pdf.Save(@"c:\temp\CreatePDFDocument_resulting.pdf", ACPDFCREACTIVEX.FileSaveOptionConstants.acFileSaveView);

 

    // destroy objects

    pdf = null;

}

#include <iostream>

#import "c:\users\amyuni\pdfcreactivex.dll" no_namespace

 

using namespace std;

 

int main()

{

    // Constants for Activation codes

    bstr_t strLicenseTo = "Amyuni PDF OCR Module Evaluation";

    bstr_t strActivationCode = "07EFCDAB010001005A888A7BCB912FAF9284F5623992DE32607C682FED0215DC171A7C2DFC70738C3B9BD6718DA5BB4A837D98E783BF";

 

    // Initialize the COM subsystem

    CoInitialize(0);

 

    // IPDFCreactiveXPtr is a smart pointer type defined in pdfcreactivex.tlh,

    // the type library header file generated by the #import instruction above

    IPDFCreactiveXPtr pdf;

 

    // Create the PDFCreactiveX instance

    pdf.CreateInstance(__uuidof(PDFCreactiveX));

 

    // set license key

    pdf->SetLicenseKey(_bstr_t(strLicenseTo), _bstr_t(strActivationCode));

 

    // Open an existent PDF file

    _bstr_t fileName = "c:\\temp\\PDFdocument.pdf";

    _bstr_t password = "";

    pdf->Open(fileName, password);

 

    // OCR configuration

    pdf->PutObjectAttribute("Document", "OCRDataFilesLocation", "C:\\Tesseract41");

 

    // OCR

    int startPage = 1;

    int endPage = pdf->PageCount;

    _bstr_t language = "eng";

    pdf->OCRPageRange(startPage, endPage, language, acOCROptionVisibleText);

 

    // Save PDF

    pdf->Save("c:\\temp\\CreatePDFDocument_resulting.pdf", acFileSaveView);

 

    // destroy objects

    pdf = NULL;

 

    return 0;

}

' OCR options

Const acOCROptionVisibleText = 1

 

' FileSaveOptionConstants

Const acFileSaveAll = 0

Const acFileSaveDefault = -1

Const acFileSaveView = 1

Const acFileSaveDesign = 2

Const acFileSavePDFA_7 = 3

Const acFileSavePDFA = 4

Const acFileSavePDF14 = 5

 

' Constants for Activation codes

Const strLicenseTo = "Amyuni PDF OCR Module Evaluation"

Const strActivationCode = "07EFCDAB010001005A888A7BCB912FAF9284F5623992DE32607C682FED0215DC171A7C2DFC70738C3B9BD6718DA5BB4A837D98E783BF"

 

' Initializing PDFCreativeX Object

Dim pdf

Set pdf = CreateObject("PDFCreactiveX.PDFCreactiveX.6.5")

 

' Set license key

pdf.SetLicenseKey strLicenseTo, strActivationCode

 

' Open an existent PDF file

Dim fileName

fileName = "c:\temp\ocrtest.pdf"

Dim password

password = ""

pdf.Open fileName, password

 

' OCR configuration

pdf.ObjectAttributeStr "Document", "OCRDataFilesLocation", "C:\Tesseract41"

 

' OCR

Dim startPage

startPage=1

Dim endPage

endPage = pdf.PageCount

Dim language

language = "eng"

pdf.OCRPageRange startPage, endPage, language, acOCROptionVisibleText

 

' Save PDF using StartSave, SavePage and EndSave

pdf.Save "c:\temp\ocred.pdf", acFileSaveView

 

' destroy Objects

Set pdf = Nothing

 

Important Note

All the samples that are provided in this documentation assume that the developer is using the ActiveX version (PDFCreactiveX.dll.)

 

When using the .NET version (acPDFCreatorLib.Net.Dll), the functions are very similar although the code slightly different. Rather than duplicating all the documentation and sample code, we have chosen to provide a complete .NET sample at the end of this documentation.