Package Exports
- parse-english
This package does not declare an exports field, so the exports above have been automatically detected and optimized by JSPM instead. If any package subpath is missing, it is recommended to post an issue to the original package (parse-english) to support the "exports" field. If that is not possible, create a JSPM override to customize the exports field for this package.
Readme
parse-english

An English language parser producing NLCST nodes.
Installation
npm:
npm install parse-englishparse-english is also available for bower, component, and duo, and as an AMD, CommonJS, and globals module, uncompressed and compressed.
Usage
var ParseEnglish = require('parse-english'),
english = new ParseEnglish();
/**
* parse-latin would fail helplessly at the full-stop preceding the
* capital `H`, and would erroneously parse the following as two
* sentences.
*/
english.parse(
'Mr. Henry Brown: A hapless but friendly City of London worker.'
);
/*
* Object
* ├─ type: "RootNode"
* └─ children: Array[1]
* └─ 0: Object
* ├─ type: "ParagraphNode"
* └─ children: Array[1]
* └─ 0: Object
* ├─ type: "SentenceNode"
* └─ children: Array[23]
* ├─ 0: Object
* | ├─ type: "WordNode"
* | └─ children: Array[2]
* | ├─ 0: Object
* | | ├─ type: "TextNode"
* | | └─ value: "Mr"
* | └─ 1: Object
* | ├─ type: "PunctuationNode"
* | └─ value: "."
* ├─ 1: Object
* | ├─ type: "WhiteSpaceNode"
* | └─ value: " "
* ├─ 2: Object
* | ├─ type: "WordNode"
* | └─ children: Array[1]
* | └─ 0: Object
* | ├─ type: "TextNode"
* | └─ value: "Henry"
* ├─ 3: Object
* | ├─ type: "WhiteSpaceNode"
* | └─ value: " "
* ├─ 4: Object
* | ├─ type: "WordNode"
* | └─ children: Array[1]
* | └─ 0: Object
* | ├─ type: "TextNode"
* | └─ value: "Brown"
* ├─ 5: Object
* | ├─ type: "PunctuationNode"
* | └─ value: ":"
* ├─ 6: Object
* | ├─ type: "WhiteSpaceNode"
* | └─ value: " "
* ├─ 7: Object
* | ├─ type: "WordNode"
* | └─ children: Array[1]
* | └─ 0: Object
* | ├─ type: "TextNode"
* | └─ value: "A"
* ├─ 8: Object
* | ├─ type: "WhiteSpaceNode"
* | └─ value: " "
* ├─ 9: Object
* | ├─ type: "WordNode"
* | └─ children: Array[1]
* | └─ 0: Object
* | ├─ type: "TextNode"
* | └─ value: "hapless"
* ├─ 10: Object
* | ├─ type: "WhiteSpaceNode"
* | └─ value: " "
* ├─ 11: Object
* | ├─ type: "WordNode"
* | └─ children: Array[1]
* | └─ 0: Object
* | ├─ type: "TextNode"
* | └─ value: "but"
* ├─ 12: Object
* | ├─ type: "WhiteSpaceNode"
* | └─ value: " "
* ├─ 13: Object
* | ├─ type: "WordNode"
* | └─ children: Array[1]
* | └─ 0: Object
* | ├─ type: "TextNode"
* | └─ value: "friendly"
* ├─ 14: Object
* | ├─ type: "WhiteSpaceNode"
* | └─ value: " "
* ├─ 15: Object
* | ├─ type: "WordNode"
* | └─ children: Array[1]
* | └─ 0: Object
* | ├─ type: "TextNode"
* | └─ value: "City"
* ├─ 16: Object
* | ├─ type: "WhiteSpaceNode"
* | └─ value: " "
* ├─ 17: Object
* | ├─ type: "WordNode"
* | └─ children: Array[1]
* | └─ 0: Object
* | ├─ type: "TextNode"
* | └─ value: "of"
* ├─ 18: Object
* | ├─ type: "WhiteSpaceNode"
* | └─ value: " "
* ├─ 19: Object
* | ├─ type: "WordNode"
* | └─ children: Array[1]
* | └─ 0: Object
* | ├─ type: "TextNode"
* | └─ value: "London"
* ├─ 20: Object
* | ├─ type: "WhiteSpaceNode"
* | └─ value: " "
* ├─ 21: Object
* | ├─ type: "WordNode"
* | └─ children: Array[1]
* | └─ 0: Object
* | ├─ type: "TextNode"
* | └─ value: "worker"
* └─ 22: Object
* ├─ type: "PunctuationNode"
* └─ value: "."
*/API
parse-english exposes the same API as parse-latin, but returns results better suited for English natural language.
Support includes:
- Unit abbreviations (tsp., tbsp., oz., ft., and more);
- Time references (sec., min., tues., thu., feb., and more);
- Business Abbreviations (Inc. and Ltd);
- Social titles (Mr., Mmes., Sr., and more);
- Rank and academic titles (Dr., Rep., Gen., Prof., Pres., and more);
- Geographical abbreviations (Ave., Blvd., Ft., Hwy., and more);
- American state abbreviations (Ala., Minn., La., Tex., and more);
- Canadian province abbreviations (Alta., Qué., Yuk., and more);
- English county abbreviations (Beds., Leics., Shrops., and more);
- Common elision (omission of letters) (’n’, ’o, ’em, ’twas, ’80s, and more).