The OptimizeDocument method is used to optimize the document before exporting to RTF or Text format after recognition has been applied to it.
The text inside a PDF file is usually split into multiple parts, a single sentence or paragraph can consist of multiple pieces of text positioned independently inside the PDF document. This method attempts to regroup lines or paragraphs prior to exporting the PDF file into another format, or to make the file easier to edit.
Sub OptimizeDocument(Level As Long)
void OptimizeDocument(int level)
HRESULT OptimizeDocument(int Level)
Level
Optimization Level |
Value |
Description |
---|---|---|
No optimization |
0 |
Recommended when exporting to JPEG and Tiff formats. |
Line optimization |
1 |
Recommended when exporting to RTF format. |
Paragraph optimization |
2 |
Recommended when exporting to HTML format. |
Table optimization |
3 |
Recommended when exporting to Excel format. |
The screen shots below show the borders of text objects of a document at different stages of optimization.
<Flags>
Public Enum OPTIMIZATION_LEVEL
NO_OPTIMIZATION = 0
LINE_OPTIMIZATION = 1
PARAGRAPH_OPTIMIZATION = 2
TABLE_OPTIMIZATION = 3
End Enum
Sub Sample()
' Constants for Activation codes
Const strLicenseTo As String = "Amyuni PDF OCR Module Evaluation"
Const strActivationCode As String = "07EFCDAB0100010093D94E09B8D9E1238F7DF9E1C5978D7D37AF93D9345B1D74DDC2D9DA41BB345E54160991EA7537246947E74D57A4"
' Initializing PDFCreativeX Object
Dim pdf As ACPDFCREACTIVEX.PDFCreactiveX = New ACPDFCREACTIVEX.PDFCreactiveX()
' Set license key
pdf.SetLicenseKey(strLicenseTo, strActivationCode)
' Open an existent PDF file
Dim fileName As String = "c:\temp\PDFdocument.pdf"
Dim password As String = ""
pdf.Open(fileName, password)
' Optimization
Dim level As Integer = OPTIMIZATION_LEVEL.LINE_OPTIMIZATION
pdf.OptimizeDocument(level)
' Exporting
Dim outFileName As String = "c:\temp\file.rtf"
Dim options As ACPDFCREACTIVEX.acRtfExportOptions = ACPDFCREACTIVEX.acRtfExportOptions.acRtfExportOptionText
DIm UseTabs As Long = 1
pdf.ExportToRTF(outFileName, options, UseTabs)
' destroy objects
pdf = Nothing
End Sub
[Flags]
public enum OPTIMIZATION_LEVEL
{
NO_OPTIMIZATION = 0,
LINE_OPTIMIZATION = 1,
PARAGRAPH_OPTIMIZATION = 2,
TABLE_OPTIMIZATION = 3
}
static void Sample()
{
// Constants for Activation codes
const string strLicenseTo = "Amyuni PDF OCR Module Evaluation";
const string strActivationCode = "07EFCDAB0100010093D94E09B8D9E1238F7DF9E1C5978D7D37AF93D9345B1D74DDC2D9DA41BB345E54160991EA7537246947E74D57A4";
// Initializing PDFCreativeX Object
ACPDFCREACTIVEX.PDFCreactiveX pdf = new ACPDFCREACTIVEX.PDFCreactiveX();
// Set license key
pdf.SetLicenseKey(strLicenseTo, strActivationCode);
// Open an existent PDF file
string fileName = @"c:\temp\PDFdocument.pdf";
string password = "";
pdf.Open(fileName, password);
// Optimization
int level = (int)OPTIMIZATION_LEVEL.LINE_OPTIMIZATION;
pdf.OptimizeDocument(level);
// Exporting
string outFileName = @"c:\temp\file.rtf";
ACPDFCREACTIVEX.acRtfExportOptions options = ACPDFCREACTIVEX.acRtfExportOptions.acRtfExportOptionText;
long UseTabs = 1;
pdf.ExportToRTF(outFileName, options, UseTabs);
// destroy objects
pdf = null;
}
#include <iostream>
#import "c:\users\amyuni\pdfcreactivex.dll" no_namespace
using namespace std;
enum OPTIMIZATION_LEVEL
{
NO_OPTIMIZATION = 0,
LINE_OPTIMIZATION = 1,
PARAGRAPH_OPTIMIZATION = 2,
TABLE_OPTIMIZATION = 3
};
int main()
{
// Constants for Activation codes
bstr_t strLicenseTo = "Amyuni PDF OCR Module Evaluationn";
bstr_t strActivationCode = "07EFCDAB0100010093D94E09B8D9E1238F7DF9E1C5978D7D37AF93D9345B1D74DDC2D9DA41BB345E54160991EA7537246947E74D57A4";
// Initialize the COM subsystem
CoInitialize(0);
// IPDFCreactiveXPtr is a smart pointer type defined in pdfcreactivex.tlh,
// the type library header file generated by the #import instruction above
IPDFCreactiveXPtr pdf;
// Create the PDFCreactiveX instance
pdf.CreateInstance(__uuidof(PDFCreactiveX));
// set license key
pdf->SetLicenseKey(_bstr_t(strLicenseTo), _bstr_t(strActivationCode));
// Open an existent PDF file
_bstr_t fileName = "c:\\temp\\PDFdocument.pdf";
_bstr_t password = "";
pdf->Open(fileName, password);
// Optimization
int level = (int)LINE_OPTIMIZATION;
pdf->OptimizeDocument(level);
// Exporting
bstr_t outFileName = "c:\\temp\\file.rtf";
acRtfExportOptions options = acRtfExportOptionText;
long UseTabs = 1;
pdf->ExportToRTF(outFileName, options, UseTabs);
// destroy objects
pdf = NULL;
return 0;
}
' acRtfExportOptions
Const acRtfExportOptionAdvancedRTF = 0
Const acRtfExportOptionFullRTF = 1
Const acRtfExportOptionRTFText = 2
Const acRtfExportOptionText = 3
Const acRtfExportOptionTextANSI = 4
' Optimization level
Const NO_OPTIMIZATION = 0
Const LINE_OPTIMIZATION = 1
Const PARAGRAPH_OPTIMIZATION = 2
Const TABLE_OPTIMIZATION = 3
' Constants for Activation codes
Const strLicenseTo = "Amyuni PDF OCR Module Evaluation"
Const strActivationCode = "07EFCDAB0100010093D94E09B8D9E1238F7DF9E1C5978D7D37AF93D9345B1D74DDC2D9DA41BB345E54160991EA7537246947E74D57A4"
' Initializing PDFCreativeX Object
Dim pdf
Set pdf = CreateObject("PDFCreactiveX.PDFCreactiveX.6.5")
' Set license key
pdf.SetLicenseKey strLicenseTo, strActivationCode
' Open an existent PDF file
Dim fileName
fileName = "c:\temp\PDFdocument.pdf"
Dim password
password = ""
pdf.Open fileName, password
' Optimization
Dim level
level = LINE_OPTIMIZATION
pdf.OptimizeDocument level
' Exporting
Dim outFileName
outFileName = "c:\temp\file.rtf"
Dim options
options = acRtfExportOptionText
Dim UseTabs
UseTabs = 1
pdf.ExportToRTF outFileName, options, UseTabs
' destroy objects
Set pdf = Nothing
Important Note
All the samples that are provided in this documentation assume that the developer is using the ActiveX version (PDFCreactiveX.dll.)
When using the .NET version (acPDFCreatorLib.Net.Dll), the functions are very similar although the code slightly different. Rather than duplicating all the documentation and sample code, we have chosen to provide a complete .NET sample at the end of this documentation.