GetPageTextArea Method (GdPicturePDF)
Returns the text, regardless if visible or hidden, contained within a specific area of the current page of the loaded PDF document. You have to specify the required page area as a rectangle. This rectangle is defined by its top left coordinates and by its width and height in inches.
Just to inform you, that you can use the GdPicturePDF.GuessPageTextRotation method to determine if all text is rotated on the current page.
public string GetPageTextArea(
float ,
float ,
float ,
float
)
public function GetPageTextArea(
: Single;
: Single;
: Single;
: Single
): String;
public function GetPageTextArea(
: float,
: float,
: float,
: float
) : String;
public: string* GetPageTextArea(
float ,
float ,
float ,
float
)
public:
String^ GetPageTextArea(
float ,
float ,
float ,
float
)
'Declaration
Public Function GetPageTextArea( _
ByVal As Single, _
ByVal As Single, _
ByVal As Single, _
ByVal As Single _
) As String
Parameters
- Left
- The horizontal (X) coordinate of the top left point of the rectangle in inches.
- Top
- The vertical (Y) coordinate of the top left point of the rectangle in inches.
- Width
- The width of the rectangle in inches.
- Height
- The height of the rectangle in inches.
Return Value
The text contained within the specified area of the currently selected page as a string. The
GdPicturePDF.GetStat method can be subsequently used to determine if this method has been successful.
How to extract text from the certain page's area of all pages in the PDF document to a text file.
Dim caption As String = "Example: GetPageTextArea"
Dim gdpicturePDF As New GdPicturePDF()
Dim status As GdPictureStatus = gdpicturePDF.LoadFromFile("test.pdf", False)
If status = GdPictureStatus.OK Then
Dim text_file As New System.IO.StreamWriter("text_area_from_pages.txt")
Dim pageCount As Integer = gdpicturePDF.GetPageCount()
status = gdpicturePDF.GetStat()
If status = GdPictureStatus.OK Then
Dim message As String = Nothing
Dim page_text As String = Nothing
For i As Integer = 1 To pageCount
status = gdpicturePDF.SelectPage(i)
If status = GdPictureStatus.OK Then
page_text = gdpicturePDF.GetPageTextArea(1, 1, 5, 2)
status = gdpicturePDF.GetStat()
If status = GdPictureStatus.OK Then
message = "Page: " + i.ToString() + " Status: " + status.ToString()
MessageBox.Show(message, caption)
text_file.WriteLine(message)
text_file.WriteLine(page_text)
End If
Else
MessageBox.Show("The SelectPage() method has failed with the status: " + status.ToString(), caption)
End If
Next
Else
MessageBox.Show("The GetPageCount() has failed with the status: " + status.ToString(), caption)
End If
text_file.Close()
Else
MessageBox.Show("The file can't be loaded.", caption)
End If
gdpicturePDF.Dispose()
string caption = "Example: GetPageTextArea";
GdPicturePDF gdpicturePDF = new GdPicturePDF();
GdPictureStatus status = gdpicturePDF.LoadFromFile("test.pdf", false);
if (status == GdPictureStatus.OK)
{
System.IO.StreamWriter text_file = new System.IO.StreamWriter("text_area_from_pages.txt");
int pageCount = gdpicturePDF.GetPageCount();
status = gdpicturePDF.GetStat();
if (status == GdPictureStatus.OK)
{
string message = null;
string page_text = null;
for (int i = 1; i <= pageCount; i++)
{
status = gdpicturePDF.SelectPage(i);
if (status == GdPictureStatus.OK)
{
page_text = gdpicturePDF.GetPageTextArea(1,1,5,2);
status = gdpicturePDF.GetStat();
if (status == GdPictureStatus.OK)
{
message = "Page: " + i.ToString() + " Status: " + status.ToString();
MessageBox.Show(message, caption);
text_file.WriteLine(message);
text_file.WriteLine(page_text);
}
}
else
{
MessageBox.Show("The SelectPage() method has failed with the status: " + status.ToString(), caption);
}
}
}
else
{
MessageBox.Show("The GetPageCount() has failed with the status: " + status.ToString(), caption);
}
text_file.Close();
}
else
{
MessageBox.Show("The file can't be loaded.", caption);
}
gdpicturePDF.Dispose();