The OCRPageRange method performs OCR on a range of pages or the complete document. The document is first opened using the Open or OpenEx methods and should be resaved afterwards. This method by itself will not resave the document.
Sub OCRPageRange(startPage As Integer, EndPage As Integer, Language As String, Options As ACPDFCREACTIVEX.acOCROptions)
void OCRPageRange(System.Int32 startPage, System.Int32 EndPage, System.String Language, ACPDFCREACTIVEX.acOCROptions Options)
HRESULT IPDFCreactiveX::OCRPageRange(long startPage, long EndPage, _bstr_t Language, enum acOCROptions Options)
StartPage, EndPage
Start and end page numbers to OCR. Page numbers start with page 1.
Language
3 letter ISO_639_Language_Code indicates which dictionary to use during OCR. OCR accuracy is greatly improved by indicating to the OCR engine which is the main document language. Support values are: eng (English), fra (French), ita (Italian), deu (German), por (Portuguese), spa (Spanish), vie (Vietnamese), nld (Dutch)
Options
Only one option is currently supported:
acOCROptionVisibleText = 1
By default, the text that is retrieved from the OCR engine is hidden and lies on top of the original document contents. This makes the document searchable without the text hiding the original document contents. When this option is set to 1, the text is visible. This option should be set to 1 in order to extract the text to a separate TXT or RTF file.
This method uses the PageSequence or PageSecuenceStr attributes from Document Object to determine which pages are going to be saved to the PDF file.
For more information about the OCRDataFilesLocation, please, check the Document attributes page.
Sub Sample()
' Constants for Activation codes
Const strLicenseTo As String = "Amyuni PDF OCR Module Evaluation"
Const strActivationCode As String = "07EFCDAB010001005A888A7BCB912FAF9284F5623992DE32607C682FED0215DC171A7C2DFC70738C3B9BD6718DA5BB4A837D98E783BF"
' Initializing PDFCreativeX Object
Dim pdf As ACPDFCREACTIVEX.PDFCreactiveX = New ACPDFCREACTIVEX.PDFCreactiveX()
' Set license key
pdf.SetLicenseKey(strLicenseTo, strActivationCode)
' Open an existent PDF file
Dim fileName As String = "c:\temp\PDFdocument.pdf"
Dim password As String = ""
pdf.Open(fileName, password)
' OCR configuration
pdf.ObjectAttribute("Document", "OCRDataFilesLocation") = "C:\Tesseract41"
' OCR
Dim startPage As Integer = 1
Dim endPage As Integer = pdf.PageCount
Dim language As String = "eng"
pdf.OCRPageRange(startPage, endPage, language, ACPDFCREACTIVEX.acOCROptions.acOCROptionVisibleText)
' Save PDF
pdf.Save("c:\temp\CreatePDFDocument_resulting.pdf", ACPDFCREACTIVEX.FileSaveOptionConstants.acFileSaveView)
' destroy objects
pdf = Nothing
End Sub
static void Sample()
{
const string strLicenseTo = "Amyuni PDF OCR Module Evaluation";
const string strActivationCode = "07EFCDAB010001005A888A7BCB912FAF9284F5623992DE32607C682FED0215DC171A7C2DFC70738C3B9BD6718DA5BB4A837D98E783BF";
// Initializing PDFCreativeX Object
ACPDFCREACTIVEX.PDFCreactiveX pdf = new ACPDFCREACTIVEX.PDFCreactiveX();
// Set license key
pdf.SetLicenseKey(strLicenseTo, strActivationCode);
// Open an existent PDF file
string fileName = @"c:\temp\PDFdocument.pdf";
string password = "";
pdf.Open(fileName, password);
// OCR configuration
pdf.ObjectAttribute("Document", "OCRDataFilesLocation") = @"C:\Tesseract41";
// OCR
int startPage = 1;
int endPage = pdf.PageCount;
string language = "eng";
pdf.OCRPageRange(startPage, endPage, language, ACPDFCREACTIVEX.acOCROptions.acOCROptionVisibleText);
// Save PDF
pdf.Save(@"c:\temp\CreatePDFDocument_resulting.pdf", ACPDFCREACTIVEX.FileSaveOptionConstants.acFileSaveView);
// destroy objects
pdf = null;
}
#include <iostream>
#import "c:\users\amyuni\pdfcreactivex.dll" no_namespace
using namespace std;
int main()
{
// Constants for Activation codes
bstr_t strLicenseTo = "Amyuni PDF OCR Module Evaluation";
bstr_t strActivationCode = "07EFCDAB010001005A888A7BCB912FAF9284F5623992DE32607C682FED0215DC171A7C2DFC70738C3B9BD6718DA5BB4A837D98E783BF";
// Initialize the COM subsystem
CoInitialize(0);
// IPDFCreactiveXPtr is a smart pointer type defined in pdfcreactivex.tlh,
// the type library header file generated by the #import instruction above
IPDFCreactiveXPtr pdf;
// Create the PDFCreactiveX instance
pdf.CreateInstance(__uuidof(PDFCreactiveX));
// set license key
pdf->SetLicenseKey(_bstr_t(strLicenseTo), _bstr_t(strActivationCode));
// Open an existent PDF file
_bstr_t fileName = "c:\\temp\\PDFdocument.pdf";
_bstr_t password = "";
pdf->Open(fileName, password);
// OCR configuration
pdf->PutObjectAttribute("Document", "OCRDataFilesLocation", "C:\\Tesseract41");
// OCR
int startPage = 1;
int endPage = pdf->PageCount;
_bstr_t language = "eng";
pdf->OCRPageRange(startPage, endPage, language, acOCROptionVisibleText);
// Save PDF
pdf->Save("c:\\temp\\CreatePDFDocument_resulting.pdf", acFileSaveView);
// destroy objects
pdf = NULL;
return 0;
}
' OCR options
Const acOCROptionVisibleText = 1
' FileSaveOptionConstants
Const acFileSaveAll = 0
Const acFileSaveDefault = -1
Const acFileSaveView = 1
Const acFileSaveDesign = 2
Const acFileSavePDFA_7 = 3
Const acFileSavePDFA = 4
Const acFileSavePDF14 = 5
' Constants for Activation codes
Const strLicenseTo = "Amyuni PDF OCR Module Evaluation"
Const strActivationCode = "07EFCDAB010001005A888A7BCB912FAF9284F5623992DE32607C682FED0215DC171A7C2DFC70738C3B9BD6718DA5BB4A837D98E783BF"
' Initializing PDFCreativeX Object
Dim pdf
Set pdf = CreateObject("PDFCreactiveX.PDFCreactiveX.6.5")
' Set license key
pdf.SetLicenseKey strLicenseTo, strActivationCode
' Open an existent PDF file
Dim fileName
fileName = "c:\temp\ocrtest.pdf"
Dim password
password = ""
pdf.Open fileName, password
' OCR configuration
pdf.ObjectAttributeStr "Document", "OCRDataFilesLocation", "C:\Tesseract41"
' OCR
Dim startPage
startPage=1
Dim endPage
endPage = pdf.PageCount
Dim language
language = "eng"
pdf.OCRPageRange startPage, endPage, language, acOCROptionVisibleText
' Save PDF using StartSave, SavePage and EndSave
pdf.Save "c:\temp\ocred.pdf", acFileSaveView
' destroy Objects
Set pdf = Nothing
Important Note
All the samples that are provided in this documentation assume that the developer is using the ActiveX version (PDFCreactiveX.dll.)
When using the .NET version (acPDFCreatorLib.Net.Dll), the functions are very similar although the code slightly different. Rather than duplicating all the documentation and sample code, we have chosen to provide a complete .NET sample at the end of this documentation.