Convert images to text on iOS
Nutrient supports extracting textual information from a scanned PDF. To do so, follow the steps below:
- First, convert the image into a PDF file as described in the image-to-PDF conversion guide.
- Next, perform OCR on the PDF file so that the textual information is extracted out of the image. This process is described in detail in the converting a scan into a searchable PDF guide.
- After performing OCR on the document, use
PSPDFTextParser
to retrieve the text, text blocks, words, or glyphs from a page. Here’s a detailed guide explaining how to do that.
This entire process is explained with the sample code below:
// Convert the image to a PDF file.let image: UIImage = ...let outputFileURL: URL = ... // Writable file URL.let pageTemplate = PageTemplate(pageType: .emptyPage, identifier: nil)let newPageConfiguration = PDFNewPageConfiguration(pageTemplate: pageTemplate) { builder in builder.item = ProcessorItem(image: image, jpegCompressionQuality: 0.7, builderBlock: nil) builder.pageSize = image.size}
let configuration = Processor.Configuration()configuration.addNewPage(at: 0, configuration: newPageConfiguration)
do { try Processor(configuration: configuration, securityOptions: nil).write(toFileURL: outputFileURL)} catch { print("Could not create PDF file: \(error)")}
// Perform OCR on the file.let document = Document(url: outputFileURL)guard let processorConfiguration = Processor.Configuration(document: document) else { return}// Mark the processor to perform OCR on all document pages and detect text in English.processorConfiguration.performOCROnPages(at: IndexSet(0..<IndexSet.Element(document.pageCount)), options: ProcessorOCROptions(language: .english))
let processor = Processor(configuration: processorConfiguration, securityOptions: nil)let ocrURL: URL = ... // Writable file URL.
DispatchQueue.global(qos: .userInitiated).async { do { // This performs the actual OCR and generates the new document at the provided URL. try processor.write(toFileURL: ocrURL) } catch { // Handle error. } DispatchQueue.main.async { let ocrDocument = Document(url: ocrURL)
// Retrieve text from the document. guard let textParser = ocrDocument.textParserForPage(at: 0) else { // Handle failure } print("Text of page 0: \(textParser.text)")
for textBlock in textParser.textBlocks { print("TextBlock at \(textBlock.frame): \(textBlock.content)") } }}