Extract text from images and transform scanned documents into interactive, searchable PDFs with a reliable OCR API built for seamless integration.
Unlike generic OCR tools, Nutrient is optimized for PDFs, preserving layout, handling embedded fonts, and supporting searchable PDF output for seamless integration into document pipelines.
With support for 20 widely used languages, Nutrient delivers reliable text extraction across multilingual documents, ideal for global business use cases.
Extract text from scanned PDFs in Google Drive using our Zapier integration. It’s a no-code way to automatically turn scanned PDFs into searchable, editable documents.
This example will run English language OCR on your uploaded document, making any text in the document selectable and searchable.
Try it out in three steps
document.pdf to your project folder.result.pdf to see the output.curl -X POST https://api.nutrient.io/processor/ocr \ -H "Authorization: Bearer your_api_key_here" \ -o result.pdf \ --fail \ -F file=@document.pdf \ -F data='{ "language": "english" }'curl -X POST https://api.nutrient.io/processor/ocr ^ -H "Authorization: Bearer your_api_key_here" ^ -o result.pdf ^ --fail ^ -F file=@document.pdf ^ -F data="{\"language\": \"english\"}"package com.example.pspdfkit;
import java.io.File;import java.io.IOException;import java.nio.file.FileSystems;import java.nio.file.Files;import java.nio.file.StandardCopyOption;
import org.json.JSONArray;import org.json.JSONObject;
import okhttp3.MediaType;import okhttp3.MultipartBody;import okhttp3.OkHttpClient;import okhttp3.Request;import okhttp3.RequestBody;import okhttp3.Response;
public final class PspdfkitApiExample { public static void main(final String[] args) throws IOException { final RequestBody body = new MultipartBody.Builder() .setType(MultipartBody.FORM) .addFormDataPart( "file", "document.pdf", RequestBody.create( MediaType.parse("application/pdf"), new File("document.pdf") ) ) .addFormDataPart( "data", new JSONObject() .put("language", "english").toString() ) .build();
final Request request = new Request.Builder() .url("https://api.nutrient.io/processor/ocr") .method("POST", body) .addHeader("Authorization", "Bearer your_api_key_here") .build();
final OkHttpClient client = new OkHttpClient() .newBuilder() .build();
final Response response = client.newCall(request).execute();
if (response.isSuccessful()) { Files.copy( response.body().byteStream(), FileSystems.getDefault().getPath("result.pdf"), StandardCopyOption.REPLACE_EXISTING ); } else { // Handle the error throw new IOException(response.body().string()); } }}using System;using System.IO;using System.Net;using RestSharp;
namespace PspdfkitApiDemo{ class Program { static void Main(string[] args) { var client = new RestClient("https://api.nutrient.io/processor/ocr");
var request = new RestRequest(Method.POST) .AddHeader("Authorization", "Bearer your_api_key_here") .AddFile("file", "document.pdf") .AddParameter("data", new JsonObject { ["language"] = "english" }.ToString());
request.AdvancedResponseWriter = (responseStream, response) => { if (response.StatusCode == HttpStatusCode.OK) { using (responseStream) { using var outputFileWriter = File.OpenWrite("result.pdf"); responseStream.CopyTo(outputFileWriter); } } else { var responseStreamReader = new StreamReader(responseStream); Console.Write(responseStreamReader.ReadToEnd()); } };
client.Execute(request); } }}// This code requires Node.js. Do not run this code directly in a web browser.
const axios = require('axios')const FormData = require('form-data')const fs = require('fs')
const formData = new FormData()formData.append('data', JSON.stringify({ language: "english"}))formData.append('file', fs.createReadStream('document.pdf'))
;(async () => { try { const response = await axios.post('https://api.nutrient.io/processor/ocr', formData, { headers: formData.getHeaders({ 'Authorization': 'Bearer your_api_key_here' }), responseType: "stream" })
response.data.pipe(fs.createWriteStream("result.pdf")) } catch (e) { const errorString = await streamToString(e.response.data) console.log(errorString) }})()
function streamToString(stream) { const chunks = [] return new Promise((resolve, reject) => { stream.on("data", (chunk) => chunks.push(Buffer.from(chunk))) stream.on("error", (err) => reject(err)) stream.on("end", () => resolve(Buffer.concat(chunks).toString("utf8"))) })}import requestsimport json
response = requests.request( 'POST', 'https://api.nutrient.io/processor/ocr', headers = { 'Authorization': 'Bearer your_api_key_here' }, files = { 'file': open('document.pdf', 'rb') }, data = { 'data': json.dumps({ 'language': 'english' }) }, stream = True)
if response.ok: with open('result.pdf', 'wb') as fd: for chunk in response.iter_content(chunk_size=8096): fd.write(chunk)else: print(response.text) exit()<?php
$FileHandle = fopen('result.pdf', 'w+');
$curl = curl_init();
curl_setopt_array($curl, array( CURLOPT_URL => 'https://api.nutrient.io/processor/ocr', CURLOPT_CUSTOMREQUEST => 'POST', CURLOPT_RETURNTRANSFER => true, CURLOPT_ENCODING => '', CURLOPT_POSTFIELDS => array( 'data' => '{ "language": "english" }', 'file' => new CURLFILE('document.pdf') ), CURLOPT_HTTPHEADER => array( 'Authorization: Bearer your_api_key_here' ), CURLOPT_FILE => $FileHandle,));
$response = curl_exec($curl);
curl_close($curl);
fclose($FileHandle);POST https://api.nutrient.io/processor/ocr HTTP/1.1Content-Type: multipart/form-data; boundary=--customboundaryAuthorization: Bearer your_api_key_here
--customboundaryContent-Disposition: form-data; name="data"Content-Type: application/json
{ "language": "english"}--customboundaryContent-Disposition: form-data; name="file"; filename="document.pdf"Content-Type: application/pdf
(file data)--customboundary--curl -X POST https://api.nutrient.io/build \ -H "Authorization: Bearer your_api_key_here" \ -o result.pdf \ --fail \ -F scanned=@document.pdf \ -F instructions='{ "parts": [ { "file": "scanned" } ], "actions": [ { "type": "ocr", "language": "english" } ] }'curl -X POST https://api.nutrient.io/build ^ -H "Authorization: Bearer your_api_key_here" ^ -o result.pdf ^ --fail ^ -F scanned=@document.pdf ^ -F instructions="{\"parts\": [{\"file\": \"scanned\"}], \"actions\": [{\"type\": \"ocr\", \"language\": \"english\"}]}"package com.example.pspdfkit;
import java.io.File;import java.io.IOException;import java.nio.file.FileSystems;import java.nio.file.Files;import java.nio.file.StandardCopyOption;
import org.json.JSONArray;import org.json.JSONObject;
import okhttp3.MediaType;import okhttp3.MultipartBody;import okhttp3.OkHttpClient;import okhttp3.Request;import okhttp3.RequestBody;import okhttp3.Response;
public final class PspdfkitApiExample { public static void main(final String[] args) throws IOException { final RequestBody body = new MultipartBody.Builder() .setType(MultipartBody.FORM) .addFormDataPart( "scanned", "document.pdf", RequestBody.create( MediaType.parse("application/pdf"), new File("document.pdf") ) ) .addFormDataPart( "instructions", new JSONObject() .put("parts", new JSONArray() .put(new JSONObject() .put("file", "scanned") ) ) .put("actions", new JSONArray() .put(new JSONObject() .put("type", "ocr") .put("language", "english") ) ).toString() ) .build();
final Request request = new Request.Builder() .url("https://api.nutrient.io/build") .method("POST", body) .addHeader("Authorization", "Bearer your_api_key_here") .build();
final OkHttpClient client = new OkHttpClient() .newBuilder() .build();
final Response response = client.newCall(request).execute();
if (response.isSuccessful()) { Files.copy( response.body().byteStream(), FileSystems.getDefault().getPath("result.pdf"), StandardCopyOption.REPLACE_EXISTING ); } else { // Handle the error throw new IOException(response.body().string()); } }}using System;using System.IO;using System.Net;using RestSharp;
namespace PspdfkitApiDemo{ class Program { static void Main(string[] args) { var client = new RestClient("https://api.nutrient.io/build");
var request = new RestRequest(Method.POST) .AddHeader("Authorization", "Bearer your_api_key_here") .AddFile("scanned", "document.pdf") .AddParameter("instructions", new JsonObject { ["parts"] = new JsonArray { new JsonObject { ["file"] = "scanned" } }, ["actions"] = new JsonArray { new JsonObject { ["type"] = "ocr", ["language"] = "english" } } }.ToString());
request.AdvancedResponseWriter = (responseStream, response) => { if (response.StatusCode == HttpStatusCode.OK) { using (responseStream) { using var outputFileWriter = File.OpenWrite("result.pdf"); responseStream.CopyTo(outputFileWriter); } } else { var responseStreamReader = new StreamReader(responseStream); Console.Write(responseStreamReader.ReadToEnd()); } };
client.Execute(request); } }}// This code requires Node.js. Do not run this code directly in a web browser.
const axios = require('axios')const FormData = require('form-data')const fs = require('fs')
const formData = new FormData()formData.append('instructions', JSON.stringify({ parts: [ { file: "scanned" } ], actions: [ { type: "ocr", language: "english" } ]}))formData.append('scanned', fs.createReadStream('document.pdf'))
;(async () => { try { const response = await axios.post('https://api.nutrient.io/build', formData, { headers: formData.getHeaders({ 'Authorization': 'Bearer your_api_key_here' }), responseType: "stream" })
response.data.pipe(fs.createWriteStream("result.pdf")) } catch (e) { const errorString = await streamToString(e.response.data) console.log(errorString) }})()
function streamToString(stream) { const chunks = [] return new Promise((resolve, reject) => { stream.on("data", (chunk) => chunks.push(Buffer.from(chunk))) stream.on("error", (err) => reject(err)) stream.on("end", () => resolve(Buffer.concat(chunks).toString("utf8"))) })}import requestsimport json
response = requests.request( 'POST', 'https://api.nutrient.io/build', headers = { 'Authorization': 'Bearer your_api_key_here' }, files = { 'scanned': open('document.pdf', 'rb') }, data = { 'instructions': json.dumps({ 'parts': [ { 'file': 'scanned' } ], 'actions': [ { 'type': 'ocr', 'language': 'english' } ] }) }, stream = True)
if response.ok: with open('result.pdf', 'wb') as fd: for chunk in response.iter_content(chunk_size=8096): fd.write(chunk)else: print(response.text) exit()<?php
$FileHandle = fopen('result.pdf', 'w+');
$curl = curl_init();
curl_setopt_array($curl, array( CURLOPT_URL => 'https://api.nutrient.io/build', CURLOPT_CUSTOMREQUEST => 'POST', CURLOPT_RETURNTRANSFER => true, CURLOPT_ENCODING => '', CURLOPT_POSTFIELDS => array( 'instructions' => '{ "parts": [ { "file": "scanned" } ], "actions": [ { "type": "ocr", "language": "english" } ] }', 'scanned' => new CURLFILE('document.pdf') ), CURLOPT_HTTPHEADER => array( 'Authorization: Bearer your_api_key_here' ), CURLOPT_FILE => $FileHandle,));
$response = curl_exec($curl);
curl_close($curl);
fclose($FileHandle);POST https://api.nutrient.io/build HTTP/1.1Content-Type: multipart/form-data; boundary=--customboundaryAuthorization: Bearer your_api_key_here
--customboundaryContent-Disposition: form-data; name="instructions"Content-Type: application/json
{ "parts": [ { "file": "scanned" } ], "actions": [ { "type": "ocr", "language": "english" } ]}--customboundaryContent-Disposition: form-data; name="scanned"; filename="document.pdf"Content-Type: application/pdf
(scanned data)--customboundary--Streamlined API for performing OCR on documents. Perfect for most use cases.
FEATURES
Maximum flexibility and advanced features for complex workflows.
FEATURES
The following section will walk you through how to best make use of all the functionality the OCR API provides.
No input or resulting documents are stored on our infrastructure. All files are deleted as soon as a request finishes. Alternatively, check out our self-hosted product.
All communication between your application and Nutrient is done via HTTPS to ensure your data is encrypted when it’s sent to us.
All payments are handled by Paddle. Nutrient DWS Processor API never has direct access to any of your payment data.
Create an account to get your DWS Processor API key and start making API calls.