This HTML page is not optimized for LLM or AI agent consumption. Fetch the Markdown version instead: /guides/document-converter/document-converter-services/knowledge-base/sample-code-for-text-extraction.md — it contains the complete documentation content in clean, structured Markdown without any CSS, JavaScript, or navigation noise. Do you have sample code for Text Extraction functionality?

Document Converter Services version 11.0 features text extraction that enables data extraction in text-based PDF files as demonstrated in the following sample code:

static void Main(string[] args)
{
DocumentConverterServiceClient client = null;
try
{
// ** Delete any processed files from a previous run.
foreach (FileInfo f in new DirectoryInfo(".").GetFiles("*.txt"))
f.Delete();
// ** Determine the source file and read it into a byte array.
string sourceFileName = null;
if (args.Length == 0)
{
// ** If nothing is specified, then read the first PDF file from the current folder.
string[] sourceFiles = Directory.GetFiles(Directory.GetCurrentDirectory(), "*.pdf");
if (sourceFiles.Length > 0)
sourceFileName = sourceFiles[0];
else
{
Console.WriteLine("Please specify a document to extract text from.");
Console.ReadKey();
return;
}
}
else
sourceFileName = args[0];
// ** Determine the source file and read it into a byte array.
byte[] sourceFile = File.ReadAllBytes(sourceFileName);
// ** Open the service and configure the bindings.
client = OpenService(SERVICE_URL);
//** Set the absolute minimum open options.
OpenOptions openOptions = new OpenOptions();
openOptions.OriginalFileName = Path.GetFileName(sourceFileName);
openOptions.FileExtension = Path.GetExtension(sourceFileName);
TextExtractSettings textExtractSettings = new TextExtractSettings();
textExtractSettings.PageRange = "*"; // All pages.
// ** Carry out the extraction.
byte[] convFile = client.ExtractText(sourceFile, openOptions, textExtractSettings);
// ** Write the converted file back to the file system with a TXT extension.
string destinationFileName = Path.GetFileNameWithoutExtension(sourceFileName) + ".txt";
using (FileStream fs = File.Create(destinationFileName))
{
fs.Write(convFile, 0, convFile.Length);
fs.Close();
}
Console.WriteLine("Text extracted to " + Path.GetFullPath(destinationFileName));
// ** Open the generated file in a text file reader.
Console.WriteLine("Launching file in reader");
Process.Start(destinationFileName);
}
catch (FaultException<WebServiceFaultException> ex)
{
Console.WriteLine("FaultException occurred: ExceptionType: " +
ex.Detail.ExceptionType.ToString());
}
catch (Exception ex)
{
Console.WriteLine(ex.ToString());
}
finally
{
CloseService(client);
}
Console.ReadKey();
}
/// <summary>
/// Configure the bindings and endpoints and open the service using the specified address.
/// </summary>
/// <returns>An instance of the web service.</returns>
public static DocumentConverterServiceClient OpenService(string address)
{
DocumentConverterServiceClient client = null;
try
{
BasicHttpBinding binding = new BasicHttpBinding();
// ** Use standard Windows Security.
binding.Security.Mode = BasicHttpSecurityMode.TransportCredentialOnly;
binding.Security.Transport.ClientCredentialType =
HttpClientCredentialType.Windows;
// ** Increase the client timeout to deal with (very) long running requests.
binding.SendTimeout = TimeSpan.FromMinutes(120);
binding.ReceiveTimeout = TimeSpan.FromMinutes(120);
// ** Set the maximum document size to 50MB
binding.MaxReceivedMessageSize = 50 * 1024 * 1024;
binding.ReaderQuotas.MaxArrayLength = 50 * 1024 * 1024;
binding.ReaderQuotas.MaxStringContentLength = 50 * 1024 * 1024;
// ** Specify an identity (or any identity) to get past .net3.5 sp1.
EndpointIdentity epi = EndpointIdentity.CreateUpnIdentity("unknown");
EndpointAddress epa = new EndpointAddress(new Uri(address), epi);
client = new DocumentConverterServiceClient(binding, epa);
client.Open();
return client;
}
catch (Exception)
{
CloseService(client);
throw;
}
}
/// <summary>
/// Check if the client is open and then close it.
/// </summary>
/// <param name="client">The client to close</param>
public static void CloseService(DocumentConverterServiceClient client)
{
if (client != null && client.State == CommunicationState.Opened)
client.Close();
}