Use Nutrient DWS as a table extraction API from PDF to Excel, CSV, JSON, and XML. Extract tables from PDF documents when your workflow needs line items, statements, reports, and other tabular content as structured output instead of page images or manual rekeying.
This example will convert your uploaded PDF file to an XLSX.
Try it out in three steps
document.pdf to your project folder.result.xlsx in your project folder to view the results.curl -X POST https://api.nutrient.io/build \ -H "Authorization: Bearer your_api_key_here" \ -o result.xlsx \ --fail \ -F document=@document.pdf \ -F instructions='{ "parts": [ { "file": "document" } ], "output": { "type": "xlsx" } }'curl -X POST https://api.nutrient.io/build ^ -H "Authorization: Bearer your_api_key_here" ^ -o result.xlsx ^ --fail ^ -F document=@document.pdf ^ -F instructions="{\"parts\": [{\"file\": \"document\"}], \"output\": {\"type\": \"xlsx\"}}"package com.example.pspdfkit;
import java.io.File;import java.io.IOException;import java.nio.file.FileSystems;import java.nio.file.Files;import java.nio.file.StandardCopyOption;
import org.json.JSONArray;import org.json.JSONObject;
import okhttp3.MediaType;import okhttp3.MultipartBody;import okhttp3.OkHttpClient;import okhttp3.Request;import okhttp3.RequestBody;import okhttp3.Response;
public final class PspdfkitApiExample { public static void main(final String[] args) throws IOException { final RequestBody body = new MultipartBody.Builder() .setType(MultipartBody.FORM) .addFormDataPart( "document", "document.pdf", RequestBody.create( MediaType.parse("application/pdf"), new File("document.pdf") ) ) .addFormDataPart( "instructions", new JSONObject() .put("parts", new JSONArray() .put(new JSONObject() .put("file", "document") ) ) .put("output", new JSONObject() .put("type", "xlsx") ).toString() ) .build();
final Request request = new Request.Builder() .url("https://api.nutrient.io/build") .method("POST", body) .addHeader("Authorization", "Bearer your_api_key_here") .build();
final OkHttpClient client = new OkHttpClient() .newBuilder() .build();
final Response response = client.newCall(request).execute();
if (response.isSuccessful()) { Files.copy( response.body().byteStream(), FileSystems.getDefault().getPath("result.xlsx"), StandardCopyOption.REPLACE_EXISTING ); } else { // Handle the error throw new IOException(response.body().string()); } }}using System;using System.IO;using System.Net;using RestSharp;
namespace PspdfkitApiDemo{ class Program { static void Main(string[] args) { var client = new RestClient("https://api.nutrient.io/build");
var request = new RestRequest(Method.POST) .AddHeader("Authorization", "Bearer your_api_key_here") .AddFile("document", "document.pdf") .AddParameter("instructions", new JsonObject { ["parts"] = new JsonArray { new JsonObject { ["file"] = "document" } }, ["output"] = new JsonObject { ["type"] = "xlsx" } }.ToString());
request.AdvancedResponseWriter = (responseStream, response) => { if (response.StatusCode == HttpStatusCode.OK) { using (responseStream) { using var outputFileWriter = File.OpenWrite("result.xlsx"); responseStream.CopyTo(outputFileWriter); } } else { var responseStreamReader = new StreamReader(responseStream); Console.Write(responseStreamReader.ReadToEnd()); } };
client.Execute(request); } }}// This code requires Node.js. Do not run this code directly in a web browser.
const axios = require('axios')const FormData = require('form-data')const fs = require('fs')
const formData = new FormData()formData.append('instructions', JSON.stringify({ parts: [ { file: "document" } ], output: { type: "xlsx" }}))formData.append('document', fs.createReadStream('document.pdf'))
;(async () => { try { const response = await axios.post('https://api.nutrient.io/build', formData, { headers: formData.getHeaders({ 'Authorization': 'Bearer your_api_key_here' }), responseType: "stream" })
response.data.pipe(fs.createWriteStream("result.xlsx")) } catch (e) { const errorString = await streamToString(e.response.data) console.log(errorString) }})()
function streamToString(stream) { const chunks = [] return new Promise((resolve, reject) => { stream.on("data", (chunk) => chunks.push(Buffer.from(chunk))) stream.on("error", (err) => reject(err)) stream.on("end", () => resolve(Buffer.concat(chunks).toString("utf8"))) })}import requestsimport json
response = requests.request( 'POST', 'https://api.nutrient.io/build', headers = { 'Authorization': 'Bearer your_api_key_here' }, files = { 'document': open('document.pdf', 'rb') }, data = { 'instructions': json.dumps({ 'parts': [ { 'file': 'document' } ], 'output': { 'type': 'xlsx' } }) }, stream = True)
if response.ok: with open('result.xlsx', 'wb') as fd: for chunk in response.iter_content(chunk_size=8096): fd.write(chunk)else: print(response.text) exit()<?php
$FileHandle = fopen('result.xlsx', 'w+');
$curl = curl_init();
curl_setopt_array($curl, array( CURLOPT_URL => 'https://api.nutrient.io/build', CURLOPT_CUSTOMREQUEST => 'POST', CURLOPT_RETURNTRANSFER => true, CURLOPT_ENCODING => '', CURLOPT_POSTFIELDS => array( 'instructions' => '{ "parts": [ { "file": "document" } ], "output": { "type": "xlsx" } }', 'document' => new CURLFILE('document.pdf') ), CURLOPT_HTTPHEADER => array( 'Authorization: Bearer your_api_key_here' ), CURLOPT_FILE => $FileHandle,));
$response = curl_exec($curl);
curl_close($curl);
fclose($FileHandle);POST https://api.nutrient.io/build HTTP/1.1Content-Type: multipart/form-data; boundary=--customboundaryAuthorization: Bearer your_api_key_here
--customboundaryContent-Disposition: form-data; name="instructions"Content-Type: application/json
{ "parts": [ { "file": "document" } ], "output": { "type": "xlsx" }}--customboundaryContent-Disposition: form-data; name="document"; filename="document.pdf"Content-Type: application/pdf
(document data)--customboundary--A PDF-to-XML API is currently in development. To get an email when we launch this feature, or to learn more, please get in touch. Nutrient’s document understanding engine will analyze input PDF documents and output structured data as XML files. The API will be usable by PHP, Java, Python, Node, and more.
To get an email when we launch this feature, or to learn more, please tell us about your project and we’ll be happy to get back to you.
By submitting this form, you agree to Nutrient’s Privacy Policy and Terms of Service.
Thanks for filling out the form and letting us know about your interest in our PDF-to-XML API.
This example will extract tables from a PDF and return them as a JSON file.
Try it out in three steps
document.pdf to your project folder.result.json to see the output.curl -X POST https://api.nutrient.io/build \ -H "Authorization: Bearer your_api_key_here" \ -o result.json \ --fail \ -F document=@document.pdf \ -F instructions='{ "parts": [ { "file": "document" } ], "output": { "type": "json-content", "plainText": false, "structuredText": false, "keyValuePairs": false, "tables": true } }'curl -X POST https://api.nutrient.io/build ^ -H "Authorization: Bearer your_api_key_here" ^ -o result.json ^ --fail ^ -F document=@document.pdf ^ -F instructions="{\"parts\": [{\"file\": \"document\"}], \"output\": {\"type\": \"json-content\", \"plainText\": false, \"structuredText\": false, \"keyValuePairs\": false, \"tables\": true}}"package com.example.pspdfkit;
import java.io.File;import java.io.IOException;import java.nio.file.FileSystems;import java.nio.file.Files;import java.nio.file.StandardCopyOption;
import org.json.JSONArray;import org.json.JSONObject;
import okhttp3.MediaType;import okhttp3.MultipartBody;import okhttp3.OkHttpClient;import okhttp3.Request;import okhttp3.RequestBody;import okhttp3.Response;
public final class PspdfkitApiExample { public static void main(final String[] args) throws IOException { final RequestBody body = new MultipartBody.Builder() .setType(MultipartBody.FORM) .addFormDataPart( "document", "document.pdf", RequestBody.create( MediaType.parse("application/pdf"), new File("document.pdf") ) ) .addFormDataPart( "instructions", new JSONObject() .put("parts", new JSONArray() .put(new JSONObject() .put("file", "document") ) ) .put("output", new JSONObject() .put("type", "json-content") .put("plainText", false) .put("structuredText", false) .put("keyValuePairs", false) .put("tables", true) ).toString() ) .build();
final Request request = new Request.Builder() .url("https://api.nutrient.io/build") .method("POST", body) .addHeader("Authorization", "Bearer your_api_key_here") .build();
final OkHttpClient client = new OkHttpClient() .newBuilder() .build();
final Response response = client.newCall(request).execute();
if (response.isSuccessful()) { Files.copy( response.body().byteStream(), FileSystems.getDefault().getPath("result.json"), StandardCopyOption.REPLACE_EXISTING ); } else { // Handle the error throw new IOException(response.body().string()); } }}using System;using System.IO;using System.Net;using RestSharp;
namespace PspdfkitApiDemo{ class Program { static void Main(string[] args) { var client = new RestClient("https://api.nutrient.io/build");
var request = new RestRequest(Method.POST) .AddHeader("Authorization", "Bearer your_api_key_here") .AddFile("document", "document.pdf") .AddParameter("instructions", new JsonObject { ["parts"] = new JsonArray { new JsonObject { ["file"] = "document" } }, ["output"] = new JsonObject { ["type"] = "json-content", ["plainText"] = false, ["structuredText"] = false, ["keyValuePairs"] = false, ["tables"] = true } }.ToString());
request.AdvancedResponseWriter = (responseStream, response) => { if (response.StatusCode == HttpStatusCode.OK) { using (responseStream) { using var outputFileWriter = File.OpenWrite("result.json"); responseStream.CopyTo(outputFileWriter); } } else { var responseStreamReader = new StreamReader(responseStream); Console.Write(responseStreamReader.ReadToEnd()); } };
client.Execute(request); } }}// This code requires Node.js. Do not run this code directly in a web browser.
const axios = require('axios')const FormData = require('form-data')const fs = require('fs')
const formData = new FormData()formData.append('instructions', JSON.stringify({ parts: [ { file: "document" } ], output: { type: "json-content", plainText: false, structuredText: false, keyValuePairs: false, tables: true }}))formData.append('document', fs.createReadStream('document.pdf'))
;(async () => { try { const response = await axios.post('https://api.nutrient.io/build', formData, { headers: formData.getHeaders({ 'Authorization': 'Bearer your_api_key_here' }), responseType: "stream" })
response.data.pipe(fs.createWriteStream("result.json")) } catch (e) { const errorString = await streamToString(e.response.data) console.log(errorString) }})()
function streamToString(stream) { const chunks = [] return new Promise((resolve, reject) => { stream.on("data", (chunk) => chunks.push(Buffer.from(chunk))) stream.on("error", (err) => reject(err)) stream.on("end", () => resolve(Buffer.concat(chunks).toString("utf8"))) })}import requestsimport json
response = requests.request( 'POST', 'https://api.nutrient.io/build', headers = { 'Authorization': 'Bearer your_api_key_here' }, files = { 'document': open('document.pdf', 'rb') }, data = { 'instructions': json.dumps({ 'parts': [ { 'file': 'document' } ], 'output': { 'type': 'json-content', 'plainText': False, 'structuredText': False, 'keyValuePairs': False, 'tables': True } }) }, stream = True)
if response.ok: with open('result.json', 'wb') as fd: for chunk in response.iter_content(chunk_size=8096): fd.write(chunk)else: print(response.text) exit()<?php
$FileHandle = fopen('result.json', 'w+');
$curl = curl_init();
curl_setopt_array($curl, array( CURLOPT_URL => 'https://api.nutrient.io/build', CURLOPT_CUSTOMREQUEST => 'POST', CURLOPT_RETURNTRANSFER => true, CURLOPT_ENCODING => '', CURLOPT_POSTFIELDS => array( 'instructions' => '{ "parts": [ { "file": "document" } ], "output": { "type": "json-content", "plainText": false, "structuredText": false, "keyValuePairs": false, "tables": true } }', 'document' => new CURLFILE('document.pdf') ), CURLOPT_HTTPHEADER => array( 'Authorization: Bearer your_api_key_here' ), CURLOPT_FILE => $FileHandle,));
$response = curl_exec($curl);
curl_close($curl);
fclose($FileHandle);POST https://api.nutrient.io/build HTTP/1.1Content-Type: multipart/form-data; boundary=--customboundaryAuthorization: Bearer your_api_key_here
--customboundaryContent-Disposition: form-data; name="instructions"Content-Type: application/json
{ "parts": [ { "file": "document" } ], "output": { "type": "json-content", "plainText": false, "structuredText": false, "keyValuePairs": false, "tables": true }}--customboundaryContent-Disposition: form-data; name="document"; filename="document.pdf"Content-Type: application/pdf
(document data)--customboundary--A PDF-to-CSV API is currently in development. To get an email when we launch this feature, or to learn more, please get in touch. Nutrient’s document understanding engine will analyze input PDF documents and output structured data as CSV files. The API will be usable by PHP, Java, Python, Node, and more.
To get an email when we launch this feature, or to learn more, please tell us about your project and we’ll be happy to get back to you.
By submitting this form, you agree to Nutrient’s Privacy Policy and Terms of Service.
Thanks for filling out the form and letting us know about your interest in our PDF-to-CSV API.
Convert PDF documents to Microsoft Excel files containing structured data.
Convert PDF documents to XML files with our table extraction API.
Use our PDF-to-JSON conversion API to generate structured JSON files from PDF documents.
Use our PDF-to-CSV conversion API to output table data as comma-separated values.
Most common next steps
Use the following:
Get started:
No input or resulting documents are stored on our infrastructure. All files are deleted as soon as a request finishes. Alternatively, check out our self-hosted product.
All communication between your application and Nutrient is done via HTTPS to ensure your data is encrypted when it’s sent to us.
All payments are handled by Paddle. Nutrient DWS Processor API never has direct access to any of your payment data.
Create an account to get your DWS Processor API key and start making API calls.