Package Exports
- es-html-parser
- es-html-parser/dist/index.js
This package does not declare an exports field, so the exports above have been automatically detected and optimized by JSPM instead. If any package subpath is missing, it is recommended to post an issue to the original package (es-html-parser) to support the "exports" field. If that is not possible, create a JSPM override to customize the exports field for this package.
Readme
ES HTML Parser
ES HTML Parser is an HTML parser that generates an abstract syntax tree similar to the ESTree specification.
This project began as a fork of hyntax and is developed to follow ESTree-like ast specification.
Table of Contents
Install
npm install es-html-parser
Usage
import { parse } from "es-html-parser";
const input = `
<html>
<body>
<button type="button"> press here </button>
</body>
</html>
`;
const { ast, tokens } = parse(input);
API Reference
Functions
parse
parse(html: string): ParseResult;
Arguments
html
: HTML string to parse.
Returns
ParseResult
: Result of parsing
Types
ParseResult
interface ParseResult {
ast: DocumentNode;
tokens: AnyToken[];
}
ast
: The root node of the ast.tokens
: An array of resulting tokens.
AnyNode
The AnyNode
is an union type of all nodes.
type AnyNode =
| DocumentNode
| TextNode
| TagNode
| OpenTagStartNode
| OpenTagEndNode
| CloseTagNode
| AttributeNode
| AttributeKeyNode
| AttributeValueNode
| AttributeValueWrapperStartNode
| AttributeValueWrapperEndNode
| ScriptTagNode
| OpenScriptTagStartNode
| CloseScriptTagNode
| OpenScriptTagEndNode
| ScriptTagContentNode
| StyleTagNode
| OpenStyleTagStartNode
| OpenStyleTagEndNode
| StyleTagContentNode
| CloseStyleTagNode
| CommentNode
| CommentStartNode
| CommentEndNode
| CommentContentNode
| DoctypeNode
| DoctypeStartNode
| DoctypeEndNode
| DoctypeAttributeNode
| DoctypeAttributeValueNode
| DoctypeAttributeWrapperStart
| DoctypeAttributeWrapperEnd;
AnyToken
The AnyToken
is an union type all tokens.
type AnyToken =
| Token<TokenTypes.Text>
| Token<TokenTypes.OpenTagStart>
| Token<TokenTypes.OpenTagEnd>
| Token<TokenTypes.CloseTag>
| Token<TokenTypes.AttributeKey>
| Token<TokenTypes.AttributeAssignment>
| Token<TokenTypes.AttributeValueWrapperStart>
| Token<TokenTypes.AttributeValue>
| Token<TokenTypes.AttributeValueWrapperEnd>
| Token<TokenTypes.DoctypeStart>
| Token<TokenTypes.DoctypeAttributeValue>
| Token<TokenTypes.DoctypeAttributeWrapperStart>
| Token<TokenTypes.DoctypeAttributeWrapperEnd>
| Token<TokenTypes.DoctypeEnd>
| Token<TokenTypes.CommentStart>
| Token<TokenTypes.CommentContent>
| Token<TokenTypes.CommentEnd>
| Token<TokenTypes.OpenScriptTagStart>
| Token<TokenTypes.OpenScriptTagEnd>
| Token<TokenTypes.ScriptTagContent>
| Token<TokenTypes.CloseScriptTag>
| Token<TokenTypes.OpenStyleTagStart>
| Token<TokenTypes.OpenStyleTagEnd>
| Token<TokenTypes.StyleTagContent>
| Token<TokenTypes.CloseStyleTag>;
Constants
TokenTypes
enum TokenTypes {
Text = "Text",
OpenTagStart = "OpenTagStart",
OpenTagEnd = "OpenTagEnd",
CloseTag = "CloseTag",
AttributeKey = "AttributeKey",
AttributeAssignment = "AttributeAssignment",
AttributeValueWrapperStart = "AttributeValueWrapperStart",
AttributeValue = "AttributeValue",
AttributeValueWrapperEnd = "AttributeValueWrapperEnd",
DoctypeStart = "DoctypeStart",
DoctypeAttributeValue = "DoctypeAttributeValue",
DoctypeAttributeWrapperStart = "DoctypeAttributeWrapperStart",
DoctypeAttributeWrapperEnd = "DoctypeAttributeWrapperEnd",
DoctypeEnd = "DoctypeEnd",
CommentStart = "CommentStart",
CommentContent = "CommentContent",
CommentEnd = "CommentEnd",
OpenScriptTagStart = "OpenScriptTagStart",
OpenScriptTagEnd = "OpenScriptTagEnd",
ScriptTagContent = "ScriptTagContent",
CloseScriptTag = "CloseScriptTag",
OpenStyleTagStart = "OpenStyleTagStart",
OpenStyleTagEnd = "OpenStyleTagEnd",
StyleTagContent = "StyleTagContent",
CloseStyleTag = "CloseStyleTag",
}
NodeTypes
enum NodeTypes {
Document = "Document",
Tag = "Tag",
Text = "Text",
Doctype = "Doctype",
Comment = "Comment",
CommentStart = "CommentStart",
CommentEnd = "CommentEnd",
CommentContent = "CommentContent",
Attribute = "Attribute",
AttributeKey = "AttributeKey",
AttributeValue = "AttributeValue",
AttributeAssignment = "AttributeAssignment",
AttributeValueWrapperStart = "AttributeValueWrapperStart",
AttributeValueWrapperEnd = "AttributeValueWrapperEnd",
CloseTag = "CloseTag",
OpenTagEnd = "OpenTagEnd",
OpenTagStart = "OpenTagStart",
DoctypeStart = "DoctypeStart",
DoctypeAttribute = "DoctypeAttribute",
DoctypeEnd = "DoctypeEnd",
ScriptTag = "ScriptTag",
OpenScriptTagStart = "OpenScriptTagStart",
OpenScriptTagEnd = "OpenScriptTagEnd",
ScriptTagContent = "ScriptTagContent",
StyleTag = "StyleTag",
OpenStyleTagStart = "OpenStyleTagStart",
OpenStyleTagEnd = "OpenStyleTagEnd",
StyleTagContent = "StyleTagContent",
CloseStyleTag = "CloseStyleTag",
CloseScriptTag = "CloseScriptTag",
DoctypeAttributeValue = "DoctypeAttributeValue",
DoctypeAttributeWrapperStart = "DoctypeAttributeWrapperStart",
DoctypeAttributeWrapperEnd = "DoctypeAttributeWrapperEnd",
}
AST Format
Common
BaseNode
Every AST node and token implements the BaseNode
interface.
interface BaseNode {
type: string;
loc: SourceLocation;
range: [number, number];
}
The type
field is representing the AST type. Its value is one of the NodeTypes
or TokenTypes
.
The loc
and range
fields represent the source location of the node.
SourceLocation
interface SourceLocation {
start: Position;
end: Position;
}
The start
field represents the start location of the node.
The end
field represents the end location of the node.
Position
interface Position {
line: number; // >= 1
column: number; // >= 0
}
The line
field is a number representing the line number where the node positioned. (1-based index).
The column
field is a number representing the offset in the line. (0-based index).
Token
All tokens implement the Token
interface.
interface Token<T extends TokenTypes> extends BaseNode {
type: T;
value: string;
}
DocumentNode
DocumentNode
represents a whole parsed document. It's a root node of the AST.
interface DocumentNode extends BaseNode {
type: "Document";
children: Array<TextNode | TagNode | ScriptNode | StyleNode | CommentNode>;
}
TextNode
TextNode
represents any plain text in HTML.
interface TextNode extends BaseNode {
type: "Text";
value: string;
}
TagNode
TagNode
represents all kinds of tag nodes in HTML except for doctype, script, style, and comment. (e.g. <div></div>
, <span></span>
...)
interface TagNode extends BaseNode {
type: "Tag";
selfClosing: boolean;
name: string;
openStart: OpenTagStartNode;
openEnd: OpenTagEndNode;
close?: CloseTagNode;
children: Array<TextNode | TagNode | ScriptNode | StyleNode | CommentNode>;
attributes: Array<AttributeNode>;
}
OpenTagStartNode
OpenTagStartNode
represents the opening part of the Start tags. (e.g. <div
)
interface OpenTagStartNode {
type: "OpenTagStart";
value: string;
}
OpenTagEndNode
OpenTagEndNode
represents the closing part of the Start tags. (e.g. >
, />
)
interface OpenTagEndNode {
type: "OpenTagEnd";
value: string;
}
CloseTagNode
ClosingTagNode
represents the End tags. (e.g. </div>
)
interface CloseTagNode {
type: "CloseTag";
value: string;
}
AttributeNode
AttributeNode
represents an attribute. (e.g. id="foo"
)
interface AttributeNode extends BaseNode {
type: "Attribute";
key: AttributeKeyNode;
value?: AttributeValueNode;
startWrapper?: AttributeValueWrapperStartNode;
endWrapper?: AttributeValueWrapperEndNode;
}
AttributeKeyNode
AttributeKeyNode
represents a key part of an attribute. (e.g. id
)
interface AttributeKeyNode extends BaseNode {
type: "AttributeKey";
value: string;
}
AttributeValueWrapperStartNode
AttributeValueWrapperStartNode
represents the left side character that wraps the value of the attribute. (e.g. "
, '
)
interface AttributeValueWrapperStartNode extends BaseNode {
type: "AttributeValueWrapperStart";
value: string;
}
AttributeValueWrapperEndNode
AttributeValueWrapperEndNode
represents the right side character that wraps the value of the attribute. (e.g. "
, '
)
interface AttributeValueWrapperEndNode extends BaseNode {
type: "AttributeValueWrapperEnd";
value: string;
}
AttributeValueNode
AttributeValueNode
represents the value part of the attribute. It does not include wrapper characters. (e.g. foo
)
interface AttributeValueNode extends BaseNode {
type: "AttributeValue";
value: string;
}
ScriptTagNode
The ScriptTagNode
represents a script tags in the HTML. (e.g. <script> console.log('hello'); </script>
).
interface ScriptTagNode extends BaseNode {
type: "ScriptTag";
attributes: Array<AttributeNode>;
openStart: OpenScriptTagStartNode;
openEnd: OpenScriptTagEndNode;
close: CloseScriptTagNode;
value?: ScriptTagContentNode;
}
OpenScriptTagStartNode
OpenScriptTagStartNode
represents an opening part of a start script tag. (e.g. <script
)
interface OpenScriptTagStartNode extends BaseNode {
type: "OpenScriptTagStart";
value: string;
}
OpenScriptTagEndNode
OpenScriptTagEndNode
represents a closing part of a start script tag. (e.g. >
)
interface OpenScriptTagEndNode extends BaseNode {
type: "OpenScriptTagEnd";
value: string;
}
CloseScriptTagNode
CloseScriptTagNode
represents a close script tag. (e.g. </script>
)
interface CloseScriptTagNode extends BaseNode {
type: "CloseScriptTag";
value: string;
}
ScriptTagContentNode
ScriptTagContentNode
represents a script content in script tag. (e.g. console.log('hello');
)
interface ScriptTagContentNode extends BaseNode {
type: "ScriptTagContent";
value: string;
}
StyleTagNode
StyleTagNode
represents style tags. (e.g. <style> .foo {} </style>
)
interface StyleTagNode extends BaseNode {
type: "StyleTag";
attributes: Array<AttributeNode>;
openStart: OpenStyleTagStartNode;
openEnd: OpenStyleTagEndNode;
close: CloseStyleTagNode;
value?: StyleTagContentNode;
}
OpenStyleTagStartNode
OpenStyleTagStartNode
represents an opening part of a start style tag. (e.g. <style
)
interface OpenStyleTagStartNode extends BaseNode {
type: "OpenStyleTagStart";
value: string;
}
OpenStyleTagEndNode
OpenStyleTagEndNode
represents a closing part of a start style tag. (e.g. >
)
interface OpenStyleTagEndNode extends BaseNode {
type: "OpenStyleTagEnd";
value: string;
}
CloseStyleTagNode
CloseStyleTagNode
represents a close style tag. (e.g. </style>
)
interface CloseStyleTagNode extends BaseNode {
type: "CloseStyleTag";
value: string;
}
StyleTagContentNode
StyleTagContentNode
represents a style content in style tag.
interface StyleTagContentNode extends BaseNode {
type: "StyleTagContent";
value: string;
}
CommentNode
CommentNode
represents comment in HTML. (e.g. <!-- content -->
)
interface CommentNode extends BaseNode {
type: "Comment";
start: CommentStartNode;
end: CommentEndNode;
value: CommentContentNode;
}
CommentStartNode
CommentStartNode
represents comment start character sequence. (e.g. <!--
)
interface CommentStartNode extends BaseNode {
type: "CommentStart";
value: string;
}
CommentEndNode
CommentEndNode
represents comment end character sequence. (e.g. -->
)
interface CommentEndNode extends BaseNode {
type: "CommentEnd";
value: string;
}
CommentContentNode
The CommentContentNode
represents text in the comment.
interface CommentContentNode extends BaseNode {
type: "CommentContent";
value: string;
}
DoctypeNode
DoctypeNode
represents the DOCTYPE in html.
interface DoctypeNode extends BaseNode {
type: "Doctype";
attributes: Array<DoctypeAttributeNode>;
start: DoctypeStartNode;
end: DoctypeEndNode;
}
DoctypeStartNode
DoctypeStartNode
represents character sequence of doctype start . (<!DOCTYPE
)
interface DoctypeStartNode extends BaseNode {
type: "DoctypeStart";
value: string;
}
DoctypeEndNode
DoctypeEndNode
represents the doctype end character sequence (e.g. >
)
interface DoctypeEndNode extends BaseNode {
type: "DoctypeEnd";
value: string;
}
DoctypeAttributeNode
DoctypeAttributeNode
represents an attribute of doctype node. (e.g. html
, "-//W3C//DTD HTML 4.01 Transitional//EN"
)
interface DoctypeAttributeNode extends BaseNode {
type: "DoctypeAttribute";
key: DoctypeAttributeKey;
}
DoctypeAttributeValueNode
DoctypeAttributeValueNode
represents a value of doctype node's attribute. (e.g. html
, -//W3C//DTD HTML 4.01 Transitional//EN
)
. It does not include wrapper characters ('
, "
)
interface DoctypeAttributeValueNode extends BaseNode {
type: "DoctypeAttributeValue";
value: string;
}
DoctypeAttributeWrapperStartNode
DoctypeAttributeWrapperStartNode
represents a left side character that wraps the value of the attribute. (e.g. "
, '
)
interface DoctypeAttributeWrapperStartNode extends BaseNode {
type: "DoctypeAttributeWrapperStart";
value: string;
}
DoctypeAttributeWrapperEndNode
DoctypeAttributeWrapperEndNode
represents a right side character that wraps the value of the attribute. (e.g. "
, '
)
interface DoctypeAttributeWrapperEndNode extends BaseNode {
type: "DoctypeAttributeWrapperEnd";
value: string;
}