Package Exports
- tokenizers
This package does not declare an exports field, so the exports above have been automatically detected and optimized by JSPM instead. If any package subpath is missing, it is recommended to post an issue to the original package (tokenizers) to support the "exports" field. If that is not possible, create a JSPM override to customize the exports field for this package.
Readme
elasticsearch tokenizers
Using tokenizers
Install tokenizers from npm
With yarn
yarn add tokenizers
or alternatively using npm:
npm install --save tokenizers
const {
asciiFolding,
standardTokenizer,
asciiFoldingTokenizer,
nGramTokenizerCreater,
edgeNGramTokenizerCreater,
} = require('tokenizers');
asciiFolding('Iлtèrnåtïonɑlíƶatï߀ԉO')
// ~ Iлternationɑlizati߀ԉO
standardTokenizer("Düsseldorf Köln, Москва 北京市 !@#$");
// ~ [ 'düsseldorf', 'köln', 'москва', '北', '京', '市' ];
asciiFoldingTokenizer("Düsseldorf Köln, Москва 北京市 !@#$");
// ~ [ 'dusseldorf', 'koln', 'москва', '北', '京', '市' ];
const standardNGramTokenizer = nGramTokenizerCreater({
min: 3,
max: 4,
tokenChars: ['letter', 'digit'],
});
standardNGramTokenizer('Nam quốc sơn hà nam đế cư');
// ~ ['nam', 'quố', 'quốc', 'uốc', 'sơn', 'nam']
const asciifoldingEdgeNGramTokenizer = edgeNGramTokenizerCreater({
min: 2,
max: 10,
filters: ['asciifolding'],
tokenChars: ['letter', 'digit'],
});
asciifoldingEdgeNGramTokenizer('Tiệt nhiên phận định tại thiên thư')
// ~ ['ti', 'tie', 'tiet', 'nh', 'nhi', 'nhie', 'nhien', 'ph', 'pha', 'phan', 'di', 'din', 'dinh', 'ta', 'tai', 'th', 'thi', 'thie', 'thien', 'th', 'thu']