Package Exports
- unpdf
Readme
unpdf
A collection of utilities to work with PDFs. Uses Mozilla's PDF.js under the hood.
unpdf takes advantage of export conditions to circumvent build issues in serverless environments. PDF.js depends on the optional canvas module, which doesn't work inside worker threads.
This library is also intended as a modern alternative to the unmaintained pdf-parse.
Features
- 🏗️ Conditional exports for Browser, Node and worker environments
- 💬 Extract text from PDFs
- 🧱 Opt-in to legacy PDF.js build
Installation
Run the following command to add unpdf to your project.
# pnpm
pnpm add -D unpdf
# npm
npm install -D unpdf
# yarn
yarn add -D unpdfUsage
import { extractPDFText } from 'unpdf'
const pdfBuffer = await fetch('https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf')
.then(res => res.arrayBuffer())
const { totalPages, text } = await extractPDFText(
new Uint8Array(pdfBuffer), { mergePages: true }
)Use Legacy Or Custom PDF.js Build
// Before using any other methods, define the PDF.js module
import { defineUnPDFConfig } from 'unpdf'
// Use the legacy build
defineUnPDFConfig({
pdfjs: () => import('pdfjs-dist/legacy/build/pdf.js')
})
// Now, you can use the other methods
// …Config
interface UnPDFConfiguration {
/**
* By default, UnPDF will use the latest version of PDF.js. If you want to
* use an older version or the legacy build, set a promise that resolves to
* the PDF.js module.
*
* @example
* () => import('pdfjs-dist/legacy/build/pdf.js')
*/
pdfjs?: () => typeof PDFJS
}Methods
defineUnPDFConfig
function defineUnPDFConfig({ pdfjs }: UnPDFConfiguration): Promise<void>getPDFMeta
function getPDFMeta(data: ArrayBuffer): Promise<{
info: Record<string, any>
metadata: any
}>extractPDFText
function extractPDFText(
data: ArrayBuffer,
{ mergePages }?: { mergePages?: boolean }
): Promise<{
totalPages: number
text: string | string[]
}>getImagesFromPage
function getImagesFromPage(
data: ArrayBuffer,
pageNumber: number
): Promise<ArrayBuffer[]>License
MIT License © 2023-PRESENT Johann Schopplich