Package Exports
- website-to-json
This package does not declare an exports field, so the exports above have been automatically detected and optimized by JSPM instead. If any package subpath is missing, it is recommended to post an issue to the original package (website-to-json) to support the "exports" field. If that is not possible, create a JSPM override to customize the exports field for this package.
Readme
Website to json converter (wtj)
This tool converts each website to understandable JSON by jQuery selectors.
Installation
$ npm install website-to-json --save
Getting started
Examples
Stack Overflow
var wtj = require('website-to-json')
wtj.extractData('http://stackoverflow.com/questions/3207418/crawler-vs-scraper', {
fields: ['data'],
parse: function($) {
return {
title: $("h1").text(),
keywords: $('.post-taglist a').map(function(val) {
return $(this).text()
}).get()
}
}
})
.then(function(res) {
console.log(JSON.stringify(res, null, 2));
})
Response
{
"data": {
"title": "crawler vs scraper",
"keywords": [
"web-crawler",
"terminology",
"scraper"
]
}
}
IMDB
var trim = require('trim')
var wtj = require('website-to-json')
wtj.extractData('http://www.imdb.com/title/tt0111161', {
fields: ['data'],
parse: function($) {
return {
title: trim($(".title_wrapper h1").text()),
image: $(".poster img").attr('src'),
summary: trim($(".plot_summary .summary_text").text())
}
}
})
.then(function(res) {
console.log(JSON.stringify(res, null, 2));
})
Response
{
"data": {
"title": "The Shawshank Redemption (1994)",
"image": "https://images-na.ssl-images-amazon.com/images/M/MV5BODU4MjU4NjIwNl5BMl5BanBnXkFtZTgwMDU2MjEyMDE@._V1_UX182_CR0,0,182,268_AL_.jpg",
"summary": "Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency."
}
}
Nightmare.js
CLI
$ sudo npm install website-to-json -g
$ wtj twitter.com/itemsapi