SetTextExtractionOptions Method (GdPicturePDF)
In This Topic
Sets the various settings of the text search/extraction engine for further use when searching or extracting text from the currently loaded PDF document.
Please note that you need to create or load the PDF document to allow these settings to work properly. At the same, the settings are reset to an undefined value (TextExtractionOptions.Default), when creating or loading a new PDF document.
Syntax
Parameters
- Options
- A bitwise combination of values of the TextExtractionOptions enumeration. Specifies required options for both text search and text extraction.
Example
How to extract text of all pages in the PDF document to a text file using custom text extraction engine options.
Dim caption As String = "Example: SetTextExtractionOptions"
Dim gdpicturePDF As New GdPicturePDF()
Dim status As GdPictureStatus = gdpicturePDF.LoadFromFile("test.pdf", False)
If status = GdPictureStatus.OK Then
Dim text_file As New System.IO.StreamWriter("text_from_pages.txt")
Dim pageCount As Integer = gdpicturePDF.GetPageCount()
status = gdpicturePDF.GetStat()
If status = GdPictureStatus.OK Then
Dim message As String = Nothing
Dim page_text As String = Nothing
oGDPicturePDF.SetTextExtractionOptions(TextExtractionOptions.ExactWordLineMatching)
For i As Integer = 1 To pageCount
status = gdpicturePDF.SelectPage(i)
If status = GdPictureStatus.OK Then
page_text = gdpicturePDF.GetPageText()
status = gdpicturePDF.GetStat()
If status = GdPictureStatus.OK Then
message = "Page: " + i.ToString() + " Status: " + status.ToString()
MessageBox.Show(message, caption)
text_file.WriteLine(message)
text_file.WriteLine(page_text)
End If
Else
MessageBox.Show("The SelectPage() method has failed with the status: " + status.ToString(), caption)
End If
Next
Else
MessageBox.Show("The GetPageCount() method has failed with the status: " + status.ToString(), caption)
End If
text_file.Close()
Else
MessageBox.Show("The file can't be loaded.", caption)
End If
gdpicturePDF.Dispose()
string caption = "Example: SetTextExtractionOptions";
GdPicturePDF gdpicturePDF = new GdPicturePDF();
GdPictureStatus status = gdpicturePDF.LoadFromFile("test.pdf", false);
if (status == GdPictureStatus.OK)
{
System.IO.StreamWriter text_file = new System.IO.StreamWriter("text_from_pages.txt");
int pageCount = gdpicturePDF.GetPageCount();
status = gdpicturePDF.GetStat();
if (status == GdPictureStatus.OK)
{
string message = null;
string page_text = null;
oGDPicturePDF.SetTextExtractionOptions(TextExtractionOptions.ExactWordLineMatching);
for (int i = 1; i <= pageCount; i++)
{
status = gdpicturePDF.SelectPage(i);
if (status == GdPictureStatus.OK)
{
page_text = gdpicturePDF.GetPageText();
status = gdpicturePDF.GetStat();
if (status == GdPictureStatus.OK)
{
message = "Page: " + i.ToString() + " Status: " + status.ToString();
MessageBox.Show(message, caption);
text_file.WriteLine(message);
text_file.WriteLine(page_text);
}
}
else
{
MessageBox.Show("The SelectPage() method has failed with the status: " + status.ToString(), caption);
}
}
}
else
{
MessageBox.Show("The GetPageCount() method has failed with the status: " + status.ToString(), caption);
}
text_file.Close();
}
else
{
MessageBox.Show("The file can't be loaded.", caption);
}
gdpicturePDF.Dispose();
Example
How to extract text of all pages in the PDF document to a text file using custom text extraction engine options.
Dim caption As String = "Example: SetTextExtractionOptions"
Dim gdpicturePDF As New GdPicturePDF()
Dim status As GdPictureStatus = gdpicturePDF.LoadFromFile("test.pdf", False)
If status = GdPictureStatus.OK Then
Dim text_file As New System.IO.StreamWriter("text_from_pages.txt")
Dim pageCount As Integer = gdpicturePDF.GetPageCount()
status = gdpicturePDF.GetStat()
If status = GdPictureStatus.OK Then
Dim message As String = Nothing
Dim page_text As String = Nothing
oGDPicturePDF.SetTextExtractionOptions(TextExtractionOptions.ExactWordLineMatching)
For i As Integer = 1 To pageCount
status = gdpicturePDF.SelectPage(i)
If status = GdPictureStatus.OK Then
page_text = gdpicturePDF.GetPageText()
status = gdpicturePDF.GetStat()
If status = GdPictureStatus.OK Then
message = "Page: " + i.ToString() + " Status: " + status.ToString()
MessageBox.Show(message, caption)
text_file.WriteLine(message)
text_file.WriteLine(page_text)
End If
Else
MessageBox.Show("The SelectPage() method has failed with the status: " + status.ToString(), caption)
End If
Next
Else
MessageBox.Show("The GetPageCount() method has failed with the status: " + status.ToString(), caption)
End If
text_file.Close()
Else
MessageBox.Show("The file can't be loaded.", caption)
End If
gdpicturePDF.Dispose()
string caption = "Example: SetTextExtractionOptions";
GdPicturePDF gdpicturePDF = new GdPicturePDF();
GdPictureStatus status = gdpicturePDF.LoadFromFile("test.pdf", false);
if (status == GdPictureStatus.OK)
{
System.IO.StreamWriter text_file = new System.IO.StreamWriter("text_from_pages.txt");
int pageCount = gdpicturePDF.GetPageCount();
status = gdpicturePDF.GetStat();
if (status == GdPictureStatus.OK)
{
string message = null;
string page_text = null;
oGDPicturePDF.SetTextExtractionOptions(TextExtractionOptions.ExactWordLineMatching);
for (int i = 1; i <= pageCount; i++)
{
status = gdpicturePDF.SelectPage(i);
if (status == GdPictureStatus.OK)
{
page_text = gdpicturePDF.GetPageText();
status = gdpicturePDF.GetStat();
if (status == GdPictureStatus.OK)
{
message = "Page: " + i.ToString() + " Status: " + status.ToString();
MessageBox.Show(message, caption);
text_file.WriteLine(message);
text_file.WriteLine(page_text);
}
}
else
{
MessageBox.Show("The SelectPage() method has failed with the status: " + status.ToString(), caption);
}
}
}
else
{
MessageBox.Show("The GetPageCount() method has failed with the status: " + status.ToString(), caption);
}
text_file.Close();
}
else
{
MessageBox.Show("The file can't be loaded.", caption);
}
gdpicturePDF.Dispose();
See Also