OptimizeDocument Method

The OptimizeDocument method is used to optimize the document before exporting to RTF or Text format after recognition has been applied to it.

The text inside a PDF file is usually split into multiple parts, a single sentence or paragraph can consist of multiple pieces of text positioned independently inside the PDF document. This method attempts to regroup lines or paragraphs prior to exporting the PDF file into another format, or to make the file easier to edit.

 

Syntax

VB:

Sub OptimizeDocument(Level As Long)

C#:

void OptimizeDocument(int level)

C++:

HRESULT OptimizeDocument(int Level)

 

Parameters

Level

Optimization Level

Value

Description

No optimization

0

Recommended when exporting to JPEG and Tiff formats.

Line optimization

1

Recommended when exporting to RTF format.

Paragraph optimization

2

Recommended when exporting to HTML format.

Table optimization

3

Recommended when exporting to Excel format.

 

 

Remarks

The screen shots below show the borders of text objects of a document at different stages of optimization.

Figure 11: OptimizeDocument, Original Document

Figure 12: OptimizeDocument, Optimize to Line

 

Example

<Flags>

Public Enum OPTIMIZATION_LEVEL

    NO_OPTIMIZATION = 0

    LINE_OPTIMIZATION = 1

    PARAGRAPH_OPTIMIZATION = 2

    TABLE_OPTIMIZATION = 3

End Enum

 

Sub Sample()

    ' Constants for Activation codes

    Const strLicenseTo As String = "Amyuni PDF OCR Module Evaluation"

    Const strActivationCode As String = "07EFCDAB0100010093D94E09B8D9E1238F7DF9E1C5978D7D37AF93D9345B1D74DDC2D9DA41BB345E54160991EA7537246947E74D57A4"

 

    ' Initializing PDFCreativeX Object

    Dim pdf As ACPDFCREACTIVEX.PDFCreactiveX = New ACPDFCREACTIVEX.PDFCreactiveX()

 

    ' Set license key

    pdf.SetLicenseKey(strLicenseTo, strActivationCode)

 

    ' Open an existent PDF file

    Dim fileName As String = "c:\temp\PDFdocument.pdf"

    Dim password As String = ""

    pdf.Open(fileName, password)

 

    ' Optimization

    Dim level As Integer = OPTIMIZATION_LEVEL.LINE_OPTIMIZATION

    pdf.OptimizeDocument(level)

 

    ' Exporting

    Dim outFileName As String = "c:\temp\file.rtf"

    Dim options As ACPDFCREACTIVEX.acRtfExportOptions = ACPDFCREACTIVEX.acRtfExportOptions.acRtfExportOptionText

    DIm UseTabs As Long = 1

    pdf.ExportToRTF(outFileName, options, UseTabs)

 

    ' destroy objects

    pdf = Nothing

End Sub

[Flags]

public enum OPTIMIZATION_LEVEL

{

    NO_OPTIMIZATION = 0,

    LINE_OPTIMIZATION = 1,

    PARAGRAPH_OPTIMIZATION = 2,

    TABLE_OPTIMIZATION = 3

}

 

static void Sample()

{

    // Constants for Activation codes

    const string strLicenseTo = "Amyuni PDF OCR Module Evaluation";

    const string strActivationCode = "07EFCDAB0100010093D94E09B8D9E1238F7DF9E1C5978D7D37AF93D9345B1D74DDC2D9DA41BB345E54160991EA7537246947E74D57A4";

 

    // Initializing PDFCreativeX Object

    ACPDFCREACTIVEX.PDFCreactiveX pdf = new ACPDFCREACTIVEX.PDFCreactiveX();

 

    // Set license key

    pdf.SetLicenseKey(strLicenseTo, strActivationCode);

 

    // Open an existent PDF file

    string fileName = @"c:\temp\PDFdocument.pdf";

    string password = "";

    pdf.Open(fileName, password);

 

    // Optimization

    int level = (int)OPTIMIZATION_LEVEL.LINE_OPTIMIZATION;

    pdf.OptimizeDocument(level);

 

    // Exporting

    string outFileName = @"c:\temp\file.rtf";

    ACPDFCREACTIVEX.acRtfExportOptions options = ACPDFCREACTIVEX.acRtfExportOptions.acRtfExportOptionText;

    long UseTabs = 1;

    pdf.ExportToRTF(outFileName, options, UseTabs);   

 

    // destroy objects

    pdf = null;

}

#include <iostream>

#import "c:\users\amyuni\pdfcreactivex.dll" no_namespace

 

using namespace std;

 

enum OPTIMIZATION_LEVEL

{

    NO_OPTIMIZATION = 0,

    LINE_OPTIMIZATION = 1,

    PARAGRAPH_OPTIMIZATION = 2,

    TABLE_OPTIMIZATION = 3

};

 

int main()

{

    // Constants for Activation codes

    bstr_t strLicenseTo = "Amyuni PDF OCR Module Evaluationn";

    bstr_t strActivationCode = "07EFCDAB0100010093D94E09B8D9E1238F7DF9E1C5978D7D37AF93D9345B1D74DDC2D9DA41BB345E54160991EA7537246947E74D57A4";

 

    // Initialize the COM subsystem

    CoInitialize(0);

 

    // IPDFCreactiveXPtr is a smart pointer type defined in pdfcreactivex.tlh,

    // the type library header file generated by the #import instruction above

    IPDFCreactiveXPtr pdf;

 

    // Create the PDFCreactiveX instance

    pdf.CreateInstance(__uuidof(PDFCreactiveX));

 

    // set license key

    pdf->SetLicenseKey(_bstr_t(strLicenseTo), _bstr_t(strActivationCode));

 

    // Open an existent PDF file

    _bstr_t fileName = "c:\\temp\\PDFdocument.pdf";

    _bstr_t password = "";

    pdf->Open(fileName, password);

 

    // Optimization

    int level = (int)LINE_OPTIMIZATION;

    pdf->OptimizeDocument(level);

 

    // Exporting

    bstr_t outFileName = "c:\\temp\\file.rtf";

    acRtfExportOptions options = acRtfExportOptionText;

    long UseTabs = 1;

    pdf->ExportToRTF(outFileName, options,  UseTabs);    

 

    // destroy objects

    pdf = NULL;

 

    return 0;

}

' acRtfExportOptions

Const acRtfExportOptionAdvancedRTF = 0

Const acRtfExportOptionFullRTF = 1

Const acRtfExportOptionRTFText = 2

Const acRtfExportOptionText = 3

Const acRtfExportOptionTextANSI = 4

 

' Optimization level

Const NO_OPTIMIZATION = 0

Const LINE_OPTIMIZATION = 1

Const PARAGRAPH_OPTIMIZATION = 2

Const TABLE_OPTIMIZATION = 3

 

' Constants for Activation codes

Const strLicenseTo = "Amyuni PDF OCR Module Evaluation"

Const strActivationCode = "07EFCDAB0100010093D94E09B8D9E1238F7DF9E1C5978D7D37AF93D9345B1D74DDC2D9DA41BB345E54160991EA7537246947E74D57A4"

 

' Initializing PDFCreativeX Object

Dim pdf

Set pdf = CreateObject("PDFCreactiveX.PDFCreactiveX.6.5")

 

' Set license key

pdf.SetLicenseKey strLicenseTo, strActivationCode

 

' Open an existent PDF file

Dim fileName

fileName = "c:\temp\PDFdocument.pdf"

Dim password

password  = ""

pdf.Open fileName, password

 

' Optimization

Dim level

level = LINE_OPTIMIZATION

pdf.OptimizeDocument level

 

' Exporting

Dim outFileName

outFileName = "c:\temp\file.rtf"

Dim options

options = acRtfExportOptionText

Dim UseTabs

UseTabs = 1

pdf.ExportToRTF outFileName, options, UseTabs

 

' destroy objects

Set pdf = Nothing

 

Important Note

All the samples that are provided in this documentation assume that the developer is using the ActiveX version (PDFCreactiveX.dll.)

 

When using the .NET version (acPDFCreatorLib.Net.Dll), the functions are very similar although the code slightly different. Rather than duplicating all the documentation and sample code, we have chosen to provide a complete .NET sample at the end of this documentation.