Extract tabular data from a PDF
This example demonstrates how to extract tables from a PDF document using the Nutrient Document Converter Services (DCS) API. The extracted table can be returned as an Excel spreadsheet or JSON file.
Prerequisites
-
Nutrient Document Converter Services (DCS) running and accessible.
-
Appropriate service license for using the table extraction functionality.
-
Implemented
OpenService()
andCloseService()
helpers.
Sample code
/// <summary> /// Extract tabular data from a PDF. /// </summary> /// <param name="ServiceURL">URL endpoint for the PDF Converter service.</param> /// <param name="sourceFileName">Source filename.</param> /// <param name="targetFolder">Target folder to receive the output file.</param> /// <param name="outputFileType">XLSX or JSON.</param> /// <param name="languages">List of languages.</param> static void TestTableExtract(string ServiceURL, string sourceFileName, string targetFolder, string outputFileType, string languages = "eng") { Console.WriteLine($"Extracting attachments from {sourceFileName}"); DocumentConverterServiceClient client = null; // Create an `OpenOptions` instance with minimum properties needed for file identification. OpenOptions openOptions = new OpenOptions(); openOptions.FileExtension = Path.GetExtension(sourceFileName); openOptions.OriginalFileName = Path.GetFileName(sourceFileName); // Create a `TableExtractionSettings` object. TableExtractionSettings settings = new TableExtractionSettings(); settings.DPI = "300"; settings.SeparateTables = BooleanEnum.True; settings.EnableOrientationDetection = BooleanEnum.True; settings.EnableSkewDetection = BooleanEnum.True; settings.RenderFormFields = BooleanEnum.True; settings.OutputFileType = outputFileType; settings.OCRLanguage = languages; try { // Determine the source file and read it into a byte array. byte[] sourceFile = File.ReadAllBytes(sourceFileName); // Open the service and configure the bindings. client = OpenService(ServiceURL); // Carry out the conversion. BatchResult result = client.ExtractTables(sourceFile, openOptions, settings); if(result != null) { // Create the target folder if it does not exist. if (!Directory.Exists(targetFolder)) { Directory.CreateDirectory(targetFolder); } Console.WriteLine($"Output to: {targetFolder}"); // Get the filename. string filename = result.FileName; Console.WriteLine(filename); // Write the result to a file. File.WriteAllBytes(Path.Combine(targetFolder, filename), result.File); } else { Console.WriteLine("No result returned"); } } finally { if (client != null) { CloseService(client); } } }