PDF-to-JSON API

Extract tables from PDF to JSON using our data extraction API.

Extract structured tables automatically

Convert tables from PDFs into clean, machine-readable JSON. Ideal for invoices, forms, reports, and more.

Automate with Zapier

Automatically extract tables from PDFs in Google Drive and save as JSON files using our Zapier integration. A no-code workflow for document-to-data processing.

CONNECT WITH ZAPIER

No manual data entry

Save time and eliminate human error by automating table extraction from PDFs with high accuracy.

Try it out

This example will extract tables from a PDF and return them as a JSON file.

Try it out in three steps

Add a PDF named document.pdf to your project folder.
Run the code from the same folder.
Open result.json to see the output.

curl -X POST https://api.nutrient.io/build \
  -H "Authorization: Bearer your_api_key_here" \
  -o result.json \
  --fail \
  -F document=@document.pdf \
  -F instructions='{
      "parts": [
        {
          "file": "document"
        }
      ],
      "output": {
        "type": "json-content",
        "plainText": false,
        "structuredText": false,
        "keyValuePairs": false,
        "tables": true
      }
    }'

curl -X POST https://api.nutrient.io/build ^
  -H "Authorization: Bearer your_api_key_here" ^
  -o result.json ^
  --fail ^
  -F document=@document.pdf ^
  -F instructions="{\"parts\": [{\"file\": \"document\"}], \"output\": {\"type\": \"json-content\", \"plainText\": false, \"structuredText\": false, \"keyValuePairs\": false, \"tables\": true}}"

package com.example.pspdfkit;

import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;

import org.json.JSONArray;
import org.json.JSONObject;

import okhttp3.MediaType;
import okhttp3.MultipartBody;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;

public final class PspdfkitApiExample {
  public static void main(final String[] args) throws IOException {
    final RequestBody body = new MultipartBody.Builder()
      .setType(MultipartBody.FORM)
      .addFormDataPart(
        "document",
        "document.pdf",
        RequestBody.create(
          MediaType.parse("application/pdf"),
          new File("document.pdf")
        )
      )
      .addFormDataPart(
        "instructions",
        new JSONObject()
          .put("parts", new JSONArray()
            .put(new JSONObject()
              .put("file", "document")
            )
          )
          .put("output", new JSONObject()
            .put("type", "json-content")
            .put("plainText", false)
            .put("structuredText", false)
            .put("keyValuePairs", false)
            .put("tables", true)
          ).toString()
      )
      .build();

    final Request request = new Request.Builder()
      .url("https://api.nutrient.io/build")
      .method("POST", body)
      .addHeader("Authorization", "Bearer your_api_key_here")
      .build();

    final OkHttpClient client = new OkHttpClient()
      .newBuilder()
      .build();

    final Response response = client.newCall(request).execute();

    if (response.isSuccessful()) {
      Files.copy(
        response.body().byteStream(),
        FileSystems.getDefault().getPath("result.json"),
        StandardCopyOption.REPLACE_EXISTING
      );
    } else {
      // Handle the error
      throw new IOException(response.body().string());
    }
  }
}

using System;
using System.IO;
using System.Net;
using RestSharp;

namespace PspdfkitApiDemo
{
  class Program
  {
    static void Main(string[] args)
    {
      var client = new RestClient("https://api.nutrient.io/build");

      var request = new RestRequest(Method.POST)
        .AddHeader("Authorization", "Bearer your_api_key_here")
        .AddFile("document", "document.pdf")
        .AddParameter("instructions", new JsonObject
        {
          ["parts"] = new JsonArray
          {
            new JsonObject
            {
              ["file"] = "document"
            }
          },
          ["output"] = new JsonObject
          {
            ["type"] = "json-content",
            ["plainText"] = false,
            ["structuredText"] = false,
            ["keyValuePairs"] = false,
            ["tables"] = true
          }
        }.ToString());

      request.AdvancedResponseWriter = (responseStream, response) =>
      {
        if (response.StatusCode == HttpStatusCode.OK)
        {
          using (responseStream)
          {
            using var outputFileWriter = File.OpenWrite("result.json");
            responseStream.CopyTo(outputFileWriter);
          }
        }
        else
        {
          var responseStreamReader = new StreamReader(responseStream);
          Console.Write(responseStreamReader.ReadToEnd());
        }
      };

      client.Execute(request);
    }
  }
}

// This code requires Node.js. Do not run this code directly in a web browser.

const axios = require('axios')
const FormData = require('form-data')
const fs = require('fs')

const formData = new FormData()
formData.append('instructions', JSON.stringify({
  parts: [
    {
      file: "document"
    }
  ],
  output: {
    type: "json-content",
    plainText: false,
    structuredText: false,
    keyValuePairs: false,
    tables: true
  }
}))
formData.append('document', fs.createReadStream('document.pdf'))

;(async () => {
  try {
    const response = await axios.post('https://api.nutrient.io/build', formData, {
      headers: formData.getHeaders({
        'Authorization': 'Bearer your_api_key_here'
      }),
      responseType: "stream"
    })

    response.data.pipe(fs.createWriteStream("result.json"))
  } catch (e) {
    const errorString = await streamToString(e.response.data)
    console.log(errorString)
  }
})()

function streamToString(stream) {
  const chunks = []
  return new Promise((resolve, reject) => {
    stream.on("data", (chunk) => chunks.push(Buffer.from(chunk)))
    stream.on("error", (err) => reject(err))
    stream.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")))
  })
}

import requests
import json

response = requests.request(
  'POST',
  'https://api.nutrient.io/build',
  headers = {
    'Authorization': 'Bearer your_api_key_here'
  },
  files = {
    'document': open('document.pdf', 'rb')
  },
  data = {
    'instructions': json.dumps({
      'parts': [
        {
          'file': 'document'
        }
      ],
      'output': {
        'type': 'json-content',
        'plainText': False,
        'structuredText': False,
        'keyValuePairs': False,
        'tables': True
      }
    })
  },
  stream = True
)

if response.ok:
  with open('result.json', 'wb') as fd:
    for chunk in response.iter_content(chunk_size=8096):
      fd.write(chunk)
else:
  print(response.text)
  exit()

<?php

$FileHandle = fopen('result.json', 'w+');

$curl = curl_init();

curl_setopt_array($curl, array(
  CURLOPT_URL => 'https://api.nutrient.io/build',
  CURLOPT_CUSTOMREQUEST => 'POST',
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => '',
  CURLOPT_POSTFIELDS => array(
    'instructions' => '{
      "parts": [
        {
          "file": "document"
        }
      ],
      "output": {
        "type": "json-content",
        "plainText": false,
        "structuredText": false,
        "keyValuePairs": false,
        "tables": true
      }
    }',
    'document' => new CURLFILE('document.pdf')
  ),
  CURLOPT_HTTPHEADER => array(
    'Authorization: Bearer your_api_key_here'
  ),
  CURLOPT_FILE => $FileHandle,
));

$response = curl_exec($curl);

curl_close($curl);

fclose($FileHandle);

POST https://api.nutrient.io/build HTTP/1.1
Content-Type: multipart/form-data; boundary=--customboundary
Authorization: Bearer your_api_key_here

--customboundary
Content-Disposition: form-data; name="instructions"
Content-Type: application/json

{
  "parts": [
    {
      "file": "document"
    }
  ],
  "output": {
    "type": "json-content",
    "plainText": false,
    "structuredText": false,
    "keyValuePairs": false,
    "tables": true
  }
}
--customboundary
Content-Disposition: form-data; name="document"; filename="document.pdf"
Content-Type: application/pdf

(document data)
--customboundary--

Start now

Create an account to access your API key and start with 50 free credits per month

Start building with DWS Processor API in minutes — no payment information required.

SIGN UP

Already have an account? Sign in →

Most common next steps

Connect PDF-to-JSON extraction to getting started, pricing, and broader structured-output workflows

OPEN GETTING STARTED

Use the following:

PDF to Excel If the workflow needs XLSX spreadsheet output rather than JSON

Data extraction API When the workflow spans structured JSON, key-value, table, and text extraction

Get started:

Getting started For API key setup

Postman collection For the fastest first request

REST API reference For endpoint details

Platform resources:

Processor API pricing For credits

Security documentation For document handling and compliance review

Privacy documentation For data handling review

Processor API overview For broader DWS evaluation

Security is our top priority

SOC 2 Type 2 audited

Nutrient’s infrastructure is SOC 2 Type 2 audited and GDPR-compliant. See our privacy policy and security documentation for details on data handling.

HTTPS encryption

All communication between your application and Nutrient is done via HTTPS to ensure your data is encrypted when it’s sent to us.

Safe payment processing

All payments are handled by Paddle. Nutrient DWS Processor API never has direct access to any of your payment data.

Frequently asked questions

What is a PDF-to-JSON API?

A PDF-to-JSON API extracts structured data — including tables, text, and document structure — from PDF files and returns it as JSON with a single API call. Nutrient’s DWS Processor API does this without you building your own extraction pipeline.

How do I extract tables from a PDF programmatically?

Send your PDF to the Nutrient DWS Processor API, and it returns the extracted tables and content as JSON. You can call it via REST, Postman, curl, JavaScript, Python, Java, C#, or PHP.

What data can I extract from a PDF to JSON?

You can extract tabular data, text content, and document structure into machine-readable JSON — useful for feeding PDFs into databases, analytics, or downstream automation.

Which programming languages does the API support?

You can call the API from any language that can make HTTP requests. We provide quick start examples for JavaScript, Python, Java, C# (.NET), and PHP, plus REST and a Postman collection.

Is there a free PDF-to-JSON API?

Yes. You can start for free — sign up to receive processing credits and try the PDF-to-JSON API before choosing a Processor API plan.

Ready to try it?

Create an account to get your DWS Processor API key and start making API calls.

START FOR FREE