Package Exports

websnapper
websnapper/index.js

This package does not declare an exports field, so the exports above have been automatically detected and optimized by JSPM instead. If any package subpath is missing, it is recommended to post an issue to the original package (websnapper) to support the "exports" field. If that is not possible, create a JSPM override to customize the exports field for this package.

Readme

WebSnapper

A powerful Node.js library for converting web pages to PDF and images using Puppeteer. WebSnapper provides a simple yet comprehensive API for web-to-file conversion with advanced features and progress tracking.

Features

🚀 Convert URLs, HTML files, or HTML strings to PDF, PNG, JPG
📊 Real-time progress tracking with events
🎯 Advanced page manipulation (hide/remove elements, inject CSS/JS)
📱 Responsive viewport configuration
🔄 Batch processing support
🛡️ Robust error handling
💾 Memory-efficient browser management
⚡ High-performance headless Chrome automation

Installation

npm install websnapper

Quick Start

const { WebSnapper } = require('websnapper');

async function example() {
  const snapper = new WebSnapper();
  
  // Convert URL to PDF
  await snapper.pdf('https://example.com', {
    path: 'output.pdf'
  });
  
  // Take screenshot
  await snapper.png('https://example.com', {
    path: 'screenshot.png'
  });
  
  await snapper.close();
}

example();

API Reference

WebSnapper Class

Constructor

const snapper = new WebSnapper();

Methods

`convert(input, options)`

Main conversion method that handles all output formats.

Parameters:

input (string): URL, file path, or HTML content
options (object): Configuration options

Returns: Promise resolving to conversion result

Format-specific methods

pdf(input, options) - Convert to PDF
png(input, options) - Convert to PNG
jpg(input, options) - Convert to JPG
jpeg(input, options) - Convert to JPEG

Utility methods

init() - Initialize browser instance
close() - Close browser and cleanup
batch(inputs, options) - Process multiple inputs
getPageInfo(input) - Extract page metadata

Configuration Options

Option	Type	Default	Description
`output`	string	'pdf'	Output format: 'pdf', 'png', 'jpg', 'jpeg'
`path`	string	undefined	Output file path
`quality`	number	90	Image quality (1-100, not for PNG)
`viewport`	object	{width: 1920, height: 1080}	Browser viewport size
`waitFor`	number/string/function	2000	Wait condition before capture
`margin`	object	{top: '1cm', right: '1cm', bottom: '1cm', left: '1cm'}	PDF margins
`landscape`	boolean	false	PDF orientation
`pageSize`	string	'A4'	PDF page size
`fullPage`	boolean	true	Capture full page or viewport only
`printBackground`	boolean	true	Include background graphics
`hideElements`	array	[]	CSS selectors to hide
`removeElements`	array	[]	CSS selectors to remove
`injectCSS`	string	undefined	Custom CSS to inject
`injectJS`	string	undefined	Custom JavaScript to inject
`scrollToBottom`	boolean	false	Auto-scroll to load lazy content
`timeout`	number	30000	Navigation timeout in ms
`userAgent`	string	undefined	Custom user agent
`cookies`	array	undefined	Cookies to set
`extraHTTPHeaders`	object	undefined	Additional HTTP headers

Examples

Basic PDF Generation

const { WebSnapper } = require('websnapper');

async function createPDF() {
  const snapper = new WebSnapper();
  
  const result = await snapper.pdf('https://github.com', {
    path: 'github.pdf',
    format: 'A4',
    margin: {
      top: '2cm',
      right: '2cm',
      bottom: '2cm',
      left: '2cm'
    }
  });
  
  console.log('PDF created:', result.path);
  await snapper.close();
}

createPDF();

Screenshot with Custom Viewport

const { WebSnapper } = require('websnapper');

async function takeScreenshot() {
  const snapper = new WebSnapper();
  
  const result = await snapper.png('https://example.com', {
    path: 'mobile-view.png',
    viewport: { width: 375, height: 667 }, // iPhone viewport
    quality: 95,
    fullPage: true
  });
  
  console.log('Screenshot saved:', result.filename);
  await snapper.close();
}

takeScreenshot();

Advanced Page Manipulation

const { WebSnapper } = require('websnapper');

async function cleanCapture() {
  const snapper = new WebSnapper();
  
  await snapper.pdf('https://news.ycombinator.com', {
    path: 'clean-hn.pdf',
    hideElements: [
      '.ad',
      '.sidebar', 
      'footer'
    ],
    removeElements: [
      '.popup',
      '.cookie-banner'
    ],
    injectCSS: `
      body { font-size: 14px !important; }
      .main { max-width: 800px !important; }
    `,
    waitFor: 3000 // Wait for dynamic content
  });
  
  await snapper.close();
}

cleanCapture();

Batch Processing

const { WebSnapper } = require('websnapper');

async function batchConvert() {
  const snapper = new WebSnapper();
  
  const urls = [
    'https://github.com',
    'https://stackoverflow.com',
    'https://developer.mozilla.org'
  ];
  
  const options = [
    { path: 'github.pdf', format: 'A4' },
    { path: 'stackoverflow.pdf', format: 'A4' },
    { path: 'mdn.pdf', format: 'A4' }
  ];
  
  // Track overall progress
  snapper.on('progress', (data) => {
    console.log(`Progress: ${data.message} - ${data.step}`);
  });
  
  const results = await snapper.batch(urls, options);
  
  results.forEach((result, index) => {
    if (result.success) {
      console.log(`✅ ${urls[index]} -> ${result.filename}`);
    } else {
      console.log(`❌ ${urls[index]} failed: ${result.error}`);
    }
  });
  
  await snapper.close();
}

batchConvert();

Converting HTML Content

const { WebSnapper } = require('websnapper');

async function htmlToPDF() {
  const snapper = new WebSnapper();
  
  const htmlContent = `
    <!DOCTYPE html>
    <html>
    <head>
      <title>My Document</title>
      <style>
        body { font-family: Arial, sans-serif; padding: 2rem; }
        h1 { color: #333; }
      </style>
    </head>
    <body>
      <h1>Hello World</h1>
      <p>This is a dynamically generated PDF from HTML content.</p>
    </body>
    </html>
  `;
  
  await snapper.pdf(htmlContent, {
    path: 'dynamic.pdf',
    format: 'A4'
  });
  
  await snapper.close();
}

htmlToPDF();

Progress Tracking

const { WebSnapper } = require('websnapper');

async function trackProgress() {
  const snapper = new WebSnapper();
  
  // Listen to progress events
  snapper.on('progress', (data) => {
    console.log(`📈 ${data.step}: ${data.progress}% - ${data.message}`);
  });
  
  await snapper.pdf('https://example.com', {
    path: 'tracked.pdf',
    onProgress: (data) => {
      // Custom progress handler
      if (data.progress % 10 === 0) {
        console.log(`🔄 Custom: ${data.progress}% complete`);
      }
    }
  });
  
  await snapper.close();
}

trackProgress();

Quick Convert Functions

For simple use cases, use the quick convert functions:

const { quickConvert } = require('websnapper');

// Quick PDF
await quickConvert.pdf('https://example.com', 'quick.pdf');

// Quick screenshot
await quickConvert.png('https://example.com', 'quick.png');

// With progress callback
await quickConvert.pdf('https://example.com', 'progress.pdf', (data) => {
  console.log(`Progress: ${data.progress}%`);
});

Page Information Extraction

const { WebSnapper } = require('websnapper');

async function getPageInfo() {
  const snapper = new WebSnapper();
  
  const info = await snapper.getPageInfo('https://github.com');
  
  console.log('Page Info:', {
    title: info.title,
    dimensions: info.dimensions,
    description: info.meta.description
  });
  
  await snapper.close();
}

getPageInfo();

Working with Cookies and Headers

const { WebSnapper } = require('websnapper');

async function authenticatedCapture() {
  const snapper = new WebSnapper();
  
  await snapper.pdf('https://example.com/dashboard', {
    path: 'dashboard.pdf',
    cookies: [
      {
        name: 'session',
        value: 'your-session-token',
        domain: 'example.com'
      }
    ],
    extraHTTPHeaders: {
      'Authorization': 'Bearer your-token',
      'Custom-Header': 'value'
    },
    userAgent: 'Mozilla/5.0 (compatible; WebSnapper/1.0)'
  });
  
  await snapper.close();
}

authenticatedCapture();

Error Handling

const { WebSnapper } = require('websnapper');

async function handleErrors() {
  const snapper = new WebSnapper();
  
  try {
    const result = await snapper.pdf('https://invalid-url', {
      path: 'test.pdf',
      timeout: 10000
    });
    
    if (result.success) {
      console.log('✅ PDF created successfully');
    }
  } catch (error) {
    console.error('❌ Conversion failed:', error.message);
    
    // Handle specific error types
    if (error.message.includes('timeout')) {
      console.log('💡 Try increasing the timeout option');
    }
  } finally {
    await snapper.close(); // Always cleanup
  }
}

handleErrors();

Best Practices

1. Always Close the Browser

const snapper = new WebSnapper();
try {
  // Your operations
} finally {
  await snapper.close(); // Prevent memory leaks
}

2. Reuse Browser Instance

// Good: Reuse for multiple operations
const snapper = new WebSnapper();
await snapper.pdf('url1', options1);
await snapper.png('url2', options2);
await snapper.close();

// Avoid: Creating new instances
// const snapper1 = new WebSnapper();
// const snapper2 = new WebSnapper();

3. Handle Large Pages

await snapper.pdf('https://long-page.com', {
  timeout: 60000,        // Increase timeout
  waitFor: 5000,         // Wait for content to load
  scrollToBottom: true   // Load lazy content
});

4. Optimize for Performance

const snapper = new WebSnapper();

// Disable unnecessary features for faster processing
await snapper.pdf('https://example.com', {
  printBackground: false,  // Skip backgrounds if not needed
  preferCSSPageSize: true, // Use CSS page size
  waitFor: 1000           // Reduce wait time if possible
});

TypeScript Support

WebSnapper includes TypeScript definitions:

import { WebSnapper, WebSnapperOptions, WebSnapperResult } from 'websnapper';

const snapper = new WebSnapper();

const options: WebSnapperOptions = {
  output: 'pdf',
  path: 'output.pdf',
  quality: 90
};

const result: WebSnapperResult = await snapper.convert('https://example.com', options);

Requirements

Node.js 12.0.0 or higher
Chrome/Chromium browser (automatically managed by Puppeteer)

Creator

Made by BLUEZLY

WebSnapper - Convert the web, one page at a time. 🌐➡️📄