Table extraction API from PDF to Excel and JSON workflows

Use Nutrient DWS as a table extraction API from PDF to Excel, CSV, JSON, and XML. Extract tables from PDF documents when your workflow needs line items, statements, reports, and other tabular content as structured output instead of page images or manual rekeying.

TRY FOR FREE PROCESSOR PRICING

Try it out

This example will convert your uploaded PDF file to an XLSX.

Try it out in three steps

Add a PDF file named document.pdf to your project folder.
Run the code from the same folder.
Open result.xlsx in your project folder to view the results.

curl -X POST https://api.nutrient.io/build \
  -H "Authorization: Bearer your_api_key_here" \
  -o result.xlsx \
  --fail \
  -F document=@document.pdf \
  -F instructions='{
      "parts": [
        {
          "file": "document"
        }
      ],
      "output": {
        "type": "xlsx"
      }
    }'

curl -X POST https://api.nutrient.io/build ^
  -H "Authorization: Bearer your_api_key_here" ^
  -o result.xlsx ^
  --fail ^
  -F document=@document.pdf ^
  -F instructions="{\"parts\": [{\"file\": \"document\"}], \"output\": {\"type\": \"xlsx\"}}"

package com.example.pspdfkit;

import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;

import org.json.JSONArray;
import org.json.JSONObject;

import okhttp3.MediaType;
import okhttp3.MultipartBody;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;

public final class PspdfkitApiExample {
  public static void main(final String[] args) throws IOException {
    final RequestBody body = new MultipartBody.Builder()
      .setType(MultipartBody.FORM)
      .addFormDataPart(
        "document",
        "document.pdf",
        RequestBody.create(
          MediaType.parse("application/pdf"),
          new File("document.pdf")
        )
      )
      .addFormDataPart(
        "instructions",
        new JSONObject()
          .put("parts", new JSONArray()
            .put(new JSONObject()
              .put("file", "document")
            )
          )
          .put("output", new JSONObject()
            .put("type", "xlsx")
          ).toString()
      )
      .build();

    final Request request = new Request.Builder()
      .url("https://api.nutrient.io/build")
      .method("POST", body)
      .addHeader("Authorization", "Bearer your_api_key_here")
      .build();

    final OkHttpClient client = new OkHttpClient()
      .newBuilder()
      .build();

    final Response response = client.newCall(request).execute();

    if (response.isSuccessful()) {
      Files.copy(
        response.body().byteStream(),
        FileSystems.getDefault().getPath("result.xlsx"),
        StandardCopyOption.REPLACE_EXISTING
      );
    } else {
      // Handle the error
      throw new IOException(response.body().string());
    }
  }
}

using System;
using System.IO;
using System.Net;
using RestSharp;

namespace PspdfkitApiDemo
{
  class Program
  {
    static void Main(string[] args)
    {
      var client = new RestClient("https://api.nutrient.io/build");

      var request = new RestRequest(Method.POST)
        .AddHeader("Authorization", "Bearer your_api_key_here")
        .AddFile("document", "document.pdf")
        .AddParameter("instructions", new JsonObject
        {
          ["parts"] = new JsonArray
          {
            new JsonObject
            {
              ["file"] = "document"
            }
          },
          ["output"] = new JsonObject
          {
            ["type"] = "xlsx"
          }
        }.ToString());

      request.AdvancedResponseWriter = (responseStream, response) =>
      {
        if (response.StatusCode == HttpStatusCode.OK)
        {
          using (responseStream)
          {
            using var outputFileWriter = File.OpenWrite("result.xlsx");
            responseStream.CopyTo(outputFileWriter);
          }
        }
        else
        {
          var responseStreamReader = new StreamReader(responseStream);
          Console.Write(responseStreamReader.ReadToEnd());
        }
      };

      client.Execute(request);
    }
  }
}

// This code requires Node.js. Do not run this code directly in a web browser.

const axios = require('axios')
const FormData = require('form-data')
const fs = require('fs')

const formData = new FormData()
formData.append('instructions', JSON.stringify({
  parts: [
    {
      file: "document"
    }
  ],
  output: {
    type: "xlsx"
  }
}))
formData.append('document', fs.createReadStream('document.pdf'))

;(async () => {
  try {
    const response = await axios.post('https://api.nutrient.io/build', formData, {
      headers: formData.getHeaders({
        'Authorization': 'Bearer your_api_key_here'
      }),
      responseType: "stream"
    })

    response.data.pipe(fs.createWriteStream("result.xlsx"))
  } catch (e) {
    const errorString = await streamToString(e.response.data)
    console.log(errorString)
  }
})()

function streamToString(stream) {
  const chunks = []
  return new Promise((resolve, reject) => {
    stream.on("data", (chunk) => chunks.push(Buffer.from(chunk)))
    stream.on("error", (err) => reject(err))
    stream.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")))
  })
}

import requests
import json

response = requests.request(
  'POST',
  'https://api.nutrient.io/build',
  headers = {
    'Authorization': 'Bearer your_api_key_here'
  },
  files = {
    'document': open('document.pdf', 'rb')
  },
  data = {
    'instructions': json.dumps({
      'parts': [
        {
          'file': 'document'
        }
      ],
      'output': {
        'type': 'xlsx'
      }
    })
  },
  stream = True
)

if response.ok:
  with open('result.xlsx', 'wb') as fd:
    for chunk in response.iter_content(chunk_size=8096):
      fd.write(chunk)
else:
  print(response.text)
  exit()

<?php

$FileHandle = fopen('result.xlsx', 'w+');

$curl = curl_init();

curl_setopt_array($curl, array(
  CURLOPT_URL => 'https://api.nutrient.io/build',
  CURLOPT_CUSTOMREQUEST => 'POST',
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => '',
  CURLOPT_POSTFIELDS => array(
    'instructions' => '{
      "parts": [
        {
          "file": "document"
        }
      ],
      "output": {
        "type": "xlsx"
      }
    }',
    'document' => new CURLFILE('document.pdf')
  ),
  CURLOPT_HTTPHEADER => array(
    'Authorization: Bearer your_api_key_here'
  ),
  CURLOPT_FILE => $FileHandle,
));

$response = curl_exec($curl);

curl_close($curl);

fclose($FileHandle);

POST https://api.nutrient.io/build HTTP/1.1
Content-Type: multipart/form-data; boundary=--customboundary
Authorization: Bearer your_api_key_here

--customboundary
Content-Disposition: form-data; name="instructions"
Content-Type: application/json

{
  "parts": [
    {
      "file": "document"
    }
  ],
  "output": {
    "type": "xlsx"
  }
}
--customboundary
Content-Disposition: form-data; name="document"; filename="document.pdf"
Content-Type: application/pdf

(document data)
--customboundary--

Try it out

This example will extract tables from a PDF and return them as a JSON file.

Try it out in three steps

Add a PDF named document.pdf to your project folder.
Run the code from the same folder.
Open result.json to see the output.

curl -X POST https://api.nutrient.io/build \
  -H "Authorization: Bearer your_api_key_here" \
  -o result.json \
  --fail \
  -F document=@document.pdf \
  -F instructions='{
      "parts": [
        {
          "file": "document"
        }
      ],
      "output": {
        "type": "json-content",
        "plainText": false,
        "structuredText": false,
        "keyValuePairs": false,
        "tables": true
      }
    }'

curl -X POST https://api.nutrient.io/build ^
  -H "Authorization: Bearer your_api_key_here" ^
  -o result.json ^
  --fail ^
  -F document=@document.pdf ^
  -F instructions="{\"parts\": [{\"file\": \"document\"}], \"output\": {\"type\": \"json-content\", \"plainText\": false, \"structuredText\": false, \"keyValuePairs\": false, \"tables\": true}}"

package com.example.pspdfkit;

import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;

import org.json.JSONArray;
import org.json.JSONObject;

import okhttp3.MediaType;
import okhttp3.MultipartBody;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;

public final class PspdfkitApiExample {
  public static void main(final String[] args) throws IOException {
    final RequestBody body = new MultipartBody.Builder()
      .setType(MultipartBody.FORM)
      .addFormDataPart(
        "document",
        "document.pdf",
        RequestBody.create(
          MediaType.parse("application/pdf"),
          new File("document.pdf")
        )
      )
      .addFormDataPart(
        "instructions",
        new JSONObject()
          .put("parts", new JSONArray()
            .put(new JSONObject()
              .put("file", "document")
            )
          )
          .put("output", new JSONObject()
            .put("type", "json-content")
            .put("plainText", false)
            .put("structuredText", false)
            .put("keyValuePairs", false)
            .put("tables", true)
          ).toString()
      )
      .build();

    final Request request = new Request.Builder()
      .url("https://api.nutrient.io/build")
      .method("POST", body)
      .addHeader("Authorization", "Bearer your_api_key_here")
      .build();

    final OkHttpClient client = new OkHttpClient()
      .newBuilder()
      .build();

    final Response response = client.newCall(request).execute();

    if (response.isSuccessful()) {
      Files.copy(
        response.body().byteStream(),
        FileSystems.getDefault().getPath("result.json"),
        StandardCopyOption.REPLACE_EXISTING
      );
    } else {
      // Handle the error
      throw new IOException(response.body().string());
    }
  }
}

using System;
using System.IO;
using System.Net;
using RestSharp;

namespace PspdfkitApiDemo
{
  class Program
  {
    static void Main(string[] args)
    {
      var client = new RestClient("https://api.nutrient.io/build");

      var request = new RestRequest(Method.POST)
        .AddHeader("Authorization", "Bearer your_api_key_here")
        .AddFile("document", "document.pdf")
        .AddParameter("instructions", new JsonObject
        {
          ["parts"] = new JsonArray
          {
            new JsonObject
            {
              ["file"] = "document"
            }
          },
          ["output"] = new JsonObject
          {
            ["type"] = "json-content",
            ["plainText"] = false,
            ["structuredText"] = false,
            ["keyValuePairs"] = false,
            ["tables"] = true
          }
        }.ToString());

      request.AdvancedResponseWriter = (responseStream, response) =>
      {
        if (response.StatusCode == HttpStatusCode.OK)
        {
          using (responseStream)
          {
            using var outputFileWriter = File.OpenWrite("result.json");
            responseStream.CopyTo(outputFileWriter);
          }
        }
        else
        {
          var responseStreamReader = new StreamReader(responseStream);
          Console.Write(responseStreamReader.ReadToEnd());
        }
      };

      client.Execute(request);
    }
  }
}

// This code requires Node.js. Do not run this code directly in a web browser.

const axios = require('axios')
const FormData = require('form-data')
const fs = require('fs')

const formData = new FormData()
formData.append('instructions', JSON.stringify({
  parts: [
    {
      file: "document"
    }
  ],
  output: {
    type: "json-content",
    plainText: false,
    structuredText: false,
    keyValuePairs: false,
    tables: true
  }
}))
formData.append('document', fs.createReadStream('document.pdf'))

;(async () => {
  try {
    const response = await axios.post('https://api.nutrient.io/build', formData, {
      headers: formData.getHeaders({
        'Authorization': 'Bearer your_api_key_here'
      }),
      responseType: "stream"
    })

    response.data.pipe(fs.createWriteStream("result.json"))
  } catch (e) {
    const errorString = await streamToString(e.response.data)
    console.log(errorString)
  }
})()

function streamToString(stream) {
  const chunks = []
  return new Promise((resolve, reject) => {
    stream.on("data", (chunk) => chunks.push(Buffer.from(chunk)))
    stream.on("error", (err) => reject(err))
    stream.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")))
  })
}

import requests
import json

response = requests.request(
  'POST',
  'https://api.nutrient.io/build',
  headers = {
    'Authorization': 'Bearer your_api_key_here'
  },
  files = {
    'document': open('document.pdf', 'rb')
  },
  data = {
    'instructions': json.dumps({
      'parts': [
        {
          'file': 'document'
        }
      ],
      'output': {
        'type': 'json-content',
        'plainText': False,
        'structuredText': False,
        'keyValuePairs': False,
        'tables': True
      }
    })
  },
  stream = True
)

if response.ok:
  with open('result.json', 'wb') as fd:
    for chunk in response.iter_content(chunk_size=8096):
      fd.write(chunk)
else:
  print(response.text)
  exit()

<?php

$FileHandle = fopen('result.json', 'w+');

$curl = curl_init();

curl_setopt_array($curl, array(
  CURLOPT_URL => 'https://api.nutrient.io/build',
  CURLOPT_CUSTOMREQUEST => 'POST',
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => '',
  CURLOPT_POSTFIELDS => array(
    'instructions' => '{
      "parts": [
        {
          "file": "document"
        }
      ],
      "output": {
        "type": "json-content",
        "plainText": false,
        "structuredText": false,
        "keyValuePairs": false,
        "tables": true
      }
    }',
    'document' => new CURLFILE('document.pdf')
  ),
  CURLOPT_HTTPHEADER => array(
    'Authorization: Bearer your_api_key_here'
  ),
  CURLOPT_FILE => $FileHandle,
));

$response = curl_exec($curl);

curl_close($curl);

fclose($FileHandle);

POST https://api.nutrient.io/build HTTP/1.1
Content-Type: multipart/form-data; boundary=--customboundary
Authorization: Bearer your_api_key_here

--customboundary
Content-Disposition: form-data; name="instructions"
Content-Type: application/json

{
  "parts": [
    {
      "file": "document"
    }
  ],
  "output": {
    "type": "json-content",
    "plainText": false,
    "structuredText": false,
    "keyValuePairs": false,
    "tables": true
  }
}
--customboundary
Content-Disposition: form-data; name="document"; filename="document.pdf"
Content-Type: application/pdf

(document data)
--customboundary--

PDF-to-Excel

Convert PDF documents to Microsoft Excel files containing structured data.

Request

PDF-to-XML API

Convert PDF documents to XML files with our table extraction API.

PDF-to-JSON API

Use our PDF-to-JSON conversion API to generate structured JSON files from PDF documents.

Request

PDF-to-CSV API

Use our PDF-to-CSV conversion API to output table data as comma-separated values.

Most common next steps

Use exact output pages when the request is more specific than table extraction

OPEN PDF-TO-EXCEL API

Use the following:

PDF to Excel When the query already implies Excel as the preferred structured output

PDF to JSON When the query already implies JSON as the preferred structured output or downstream system

Key-value pair extraction If the workflow needs invoice fields or labeled data rather than tabular rows

Data extraction API To connect table extraction to structured output workflows

Get started:

Getting started For API key setup

Postman collection For the fastest first request

Supported languages For JavaScript, Python, Java, PHP, and HTTP implementation options

REST API reference For endpoint details

Platform resources:

Processor API pricing For credits

Security documentation For compliance review

Privacy documentation For uploaded-file handling

Processor API overview For broader DWS evaluation

Security is our top priority

No document storage

No input or resulting documents are stored on our infrastructure. All files are deleted as soon as a request finishes. Alternatively, check out our self-hosted product.

HTTPS encryption

All communication between your application and Nutrient is done via HTTPS to ensure your data is encrypted when it’s sent to us.

Safe payment processing

All payments are handled by Paddle. Nutrient DWS Processor API never has direct access to any of your payment data.

Ready to try it?

Create an account to get your DWS Processor API key and start making API calls.

START FOR FREE

Table extraction API from PDF to Excel and JSON workflows

Try it out

PDF-to-XML API

Get in touch

Thank you!

Try it out

PDF-to-CSV API

Get in touch

Thank you!

PDF-to-Excel

PDF-to-XML API

PDF-to-JSON API

PDF-to-CSV API

Use exact output pages when the request is more specific than table extraction

Security is our top priority

No document storage

HTTPS encryption

Safe payment processing

Ready to try it?