JSPM

  • Created
  • Published
  • Downloads 34163
  • Score
    100M100P100Q188260F
  • License MIT

Package Exports

    This package does not declare an exports field, so the exports above have been automatically detected and optimized by JSPM instead. If any package subpath is missing, it is recommended to post an issue to the original package (es-html-parser) to support the "exports" field. If that is not possible, create a JSPM override to customize the exports field for this package.

    Readme

    ES HTML Parser

    CI Badge

    ES HTML Parser is a HTML parser that generates an abstract syntax tree similar to the ESTree specification.

    This project began as a fork of hyntax and is developed to follow ESTree-like ast specification.

    See online demo.

    Table of Contents

    Install

    npm install es-html-parser

    Usage

    import { parse } from "es-html-parser";
    
    const input = `
    <html>
      <body>
        <button type="button"> press here </button>
      </body>
    </html>
    `;
    
    const { ast, tokens } = parse(input);

    API Reference

    Functions

    parse

    parse(html: string, options?: Options): ParseResult;

    Arguments

    • html: HTML string to parse.
    • options (optional)
      • tokenAdapter: The adapter option for changing tokens information.

    Returns

    • ParseResult: Result of parsing

    Types

    ParseResult

    interface ParseResult {
      ast: DocumentNode;
      tokens: AnyToken[];
    }
    • ast: The root node of the ast.
    • tokens: An array of resulting tokens.

    AnyNode

    The AnyNode is an union type of all nodes.

    type AnyNode =
      | DocumentNode
      | TextNode
      | TagNode
      | OpenTagStartNode
      | OpenTagEndNode
      | CloseTagNode
      | AttributeNode
      | AttributeKeyNode
      | AttributeValueNode
      | AttributeValueWrapperStartNode
      | AttributeValueWrapperEndNode
      | ScriptTagNode
      | OpenScriptTagStartNode
      | CloseScriptTagNode
      | OpenScriptTagEndNode
      | ScriptTagContentNode
      | StyleTagNode
      | OpenStyleTagStartNode
      | OpenStyleTagEndNode
      | StyleTagContentNode
      | CloseStyleTagNode
      | CommentNode
      | CommentOpenNode
      | CommentCloseNode
      | CommentContentNode
      | DoctypeNode
      | DoctypeOpenNode
      | DoctypeCloseNode
      | DoctypeAttributeNode
      | DoctypeAttributeValueNode
      | DoctypeAttributeWrapperStartNode
      | DoctypeAttributeWrapperEndNode;

    AnyToken

    The AnyToken is an union type all tokens.

    type AnyToken =
      | Token<TokenTypes.Text>
      | Token<TokenTypes.OpenTagStart>
      | Token<TokenTypes.OpenTagEnd>
      | Token<TokenTypes.CloseTag>
      | Token<TokenTypes.AttributeKey>
      | Token<TokenTypes.AttributeAssignment>
      | Token<TokenTypes.AttributeValueWrapperStart>
      | Token<TokenTypes.AttributeValue>
      | Token<TokenTypes.AttributeValueWrapperEnd>
      | Token<TokenTypes.DoctypeOpen>
      | Token<TokenTypes.DoctypeAttributeValue>
      | Token<TokenTypes.DoctypeAttributeWrapperStart>
      | Token<TokenTypes.DoctypeAttributeWrapperEnd>
      | Token<TokenTypes.DoctypeClose>
      | Token<TokenTypes.CommentOpen>
      | Token<TokenTypes.CommentContent>
      | Token<TokenTypes.CommentClose>
      | Token<TokenTypes.OpenScriptTagStart>
      | Token<TokenTypes.OpenScriptTagEnd>
      | Token<TokenTypes.ScriptTagContent>
      | Token<TokenTypes.CloseScriptTag>
      | Token<TokenTypes.OpenStyleTagStart>
      | Token<TokenTypes.OpenStyleTagEnd>
      | Token<TokenTypes.StyleTagContent>
      | Token<TokenTypes.CloseStyleTag>;

    Constants

    TokenTypes

    enum TokenTypes {
      Text = "Text",
      OpenTagStart = "OpenTagStart",
      OpenTagEnd = "OpenTagEnd",
      CloseTag = "CloseTag",
      AttributeKey = "AttributeKey",
      AttributeAssignment = "AttributeAssignment",
      AttributeValueWrapperStart = "AttributeValueWrapperStart",
      AttributeValue = "AttributeValue",
      AttributeValueWrapperEnd = "AttributeValueWrapperEnd",
      DoctypeOpen = "DoctypeOpen",
      DoctypeAttributeValue = "DoctypeAttributeValue",
      DoctypeAttributeWrapperStart = "DoctypeAttributeWrapperStart",
      DoctypeAttributeWrapperEnd = "DoctypeAttributeWrapperEnd",
      DoctypeClose = "DoctypeClose",
      CommentOpen = "CommentOpen",
      CommentContent = "CommentContent",
      CommentClose = "CommentClose",
      OpenScriptTagStart = "OpenScriptTagStart",
      OpenScriptTagEnd = "OpenScriptTagEnd",
      ScriptTagContent = "ScriptTagContent",
      CloseScriptTag = "CloseScriptTag",
      OpenStyleTagStart = "OpenStyleTagStart",
      OpenStyleTagEnd = "OpenStyleTagEnd",
      StyleTagContent = "StyleTagContent",
      CloseStyleTag = "CloseStyleTag",
    }

    NodeTypes

    enum NodeTypes {
      Document = "Document",
      Tag = "Tag",
      Text = "Text",
      Doctype = "Doctype",
      Comment = "Comment",
      CommentOpen = "CommentOpen",
      CommentClose = "CommentClose",
      CommentContent = "CommentContent",
      Attribute = "Attribute",
      AttributeKey = "AttributeKey",
      AttributeValue = "AttributeValue",
      AttributeValueWrapperStart = "AttributeValueWrapperStart",
      AttributeValueWrapperEnd = "AttributeValueWrapperEnd",
      CloseTag = "CloseTag",
      OpenTagEnd = "OpenTagEnd",
      OpenTagStart = "OpenTagStart",
      DoctypeOpen = "DoctypeOpen",
      DoctypeAttribute = "DoctypeAttribute",
      DoctypeClose = "DoctypeClose",
      ScriptTag = "ScriptTag",
      OpenScriptTagStart = "OpenScriptTagStart",
      OpenScriptTagEnd = "OpenScriptTagEnd",
      ScriptTagContent = "ScriptTagContent",
      StyleTag = "StyleTag",
      OpenStyleTagStart = "OpenStyleTagStart",
      OpenStyleTagEnd = "OpenStyleTagEnd",
      StyleTagContent = "StyleTagContent",
      CloseStyleTag = "CloseStyleTag",
      CloseScriptTag = "CloseScriptTag",
      DoctypeAttributeValue = "DoctypeAttributeValue",
      DoctypeAttributeWrapperStart = "DoctypeAttributeWrapperStart",
      DoctypeAttributeWrapperEnd = "DoctypeAttributeWrapperEnd",
    }

    AST Format

    Common

    BaseNode

    Every AST node and token implements the BaseNode interface.

    interface BaseNode {
      type: string;
      loc: SourceLocation;
      range: [number, number];
    }

    The type field is representing the AST type. Its value is one of the NodeTypes or TokenTypes. The loc and range fields represent the source location of the node.

    SourceLocation

    interface SourceLocation {
      start: Position;
      end: Position;
    }

    The start field represents the start location of the node.

    The end field represents the end location of the node.

    Position

    interface Position {
      line: number; // >= 1
      column: number; // >= 0
    }

    The line field is a number representing the line number where the node positioned. (1-based index).

    The column field is a number representing the offset in the line. (0-based index).

    Token

    All tokens implement the Token interface.

    interface Token<T extends TokenTypes> extends BaseNode {
      type: T;
      value: string;
    }

    DocumentNode

    DocumentNode represents a whole parsed document. It's a root node of the AST.

    interface DocumentNode extends BaseNode {
      type: "Document";
      children: Array<TextNode | TagNode | ScriptNode | StyleNode | CommentNode>;
    }

    TextNode

    TextNode represents any plain text in HTML.

    interface TextNode extends BaseNode {
      type: "Text";
      value: string;
    }

    TagNode

    TagNode represents all kinds of tag nodes in HTML except for doctype, script, style, and comment. (e.g. <div></div>, <span></span> ...)

    interface TagNode extends BaseNode {
      type: "Tag";
      selfClosing: boolean;
      name: string;
      openStart: OpenTagStartNode;
      openEnd: OpenTagEndNode;
      close?: CloseTagNode;
      children: Array<TextNode | TagNode | ScriptNode | StyleNode | CommentNode>;
      attributes: Array<AttributeNode>;
    }

    OpenTagStartNode

    OpenTagStartNode represents the opening part of the Start tags. (e.g. <div)

    interface OpenTagStartNode extends BaseNode {
      type: "OpenTagStart";
      value: string;
    }

    OpenTagEndNode

    OpenTagEndNode represents the closing part of the Start tags. (e.g. >, />)

    interface OpenTagEndNode extends BaseNode {
      type: "OpenTagEnd";
      value: string;
    }

    CloseTagNode

    ClosingTagNode represents the End tags. (e.g. </div>)

    interface CloseTagNode extends BaseNode {
      type: "CloseTag";
      value: string;
    }

    AttributeNode

    AttributeNode represents an attribute. (e.g. id="foo")

    interface AttributeNode extends BaseNode {
      type: "Attribute";
      key: AttributeKeyNode;
      value?: AttributeValueNode;
      startWrapper?: AttributeValueWrapperStartNode;
      endWrapper?: AttributeValueWrapperEndNode;
    }

    AttributeKeyNode

    AttributeKeyNode represents a key part of an attribute. (e.g. id)

    interface AttributeKeyNode extends BaseNode {
      type: "AttributeKey";
      value: string;
    }

    AttributeValueWrapperStartNode

    AttributeValueWrapperStartNode represents the left side character that wraps the value of the attribute. (e.g. ", ')

    interface AttributeValueWrapperStartNode extends BaseNode {
      type: "AttributeValueWrapperStart";
      value: string;
    }

    AttributeValueWrapperEndNode

    AttributeValueWrapperEndNode represents the right side character that wraps the value of the attribute. (e.g. ", ')

    interface AttributeValueWrapperEndNode extends BaseNode {
      type: "AttributeValueWrapperEnd";
      value: string;
    }

    AttributeValueNode

    AttributeValueNode represents the value part of the attribute. It does not include wrapper characters. (e.g. foo)

    interface AttributeValueNode extends BaseNode {
      type: "AttributeValue";
      value: string;
    }

    ScriptTagNode

    The ScriptTagNode represents a script tags in the HTML. (e.g. <script> console.log('hello'); </script>).

    interface ScriptTagNode extends BaseNode {
      type: "ScriptTag";
      attributes: Array<AttributeNode>;
      openStart: OpenScriptTagStartNode;
      openEnd: OpenScriptTagEndNode;
      close: CloseScriptTagNode;
      value?: ScriptTagContentNode;
    }

    OpenScriptTagStartNode

    OpenScriptTagStartNode represents an opening part of a start script tag. (e.g. <script)

    interface OpenScriptTagStartNode extends BaseNode {
      type: "OpenScriptTagStart";
      value: string;
    }

    OpenScriptTagEndNode

    OpenScriptTagEndNode represents a closing part of a start script tag. (e.g. >)

    interface OpenScriptTagEndNode extends BaseNode {
      type: "OpenScriptTagEnd";
      value: string;
    }

    CloseScriptTagNode

    CloseScriptTagNode represents a close script tag. (e.g. </script>)

    interface CloseScriptTagNode extends BaseNode {
      type: "CloseScriptTag";
      value: string;
    }

    ScriptTagContentNode

    ScriptTagContentNode represents a script content in script tag. (e.g. console.log('hello');)

    interface ScriptTagContentNode extends BaseNode {
      type: "ScriptTagContent";
      value: string;
    }

    StyleTagNode

    StyleTagNode represents style tags. (e.g. <style> .foo {} </style>)

    interface StyleTagNode extends BaseNode {
      type: "StyleTag";
      attributes: Array<AttributeNode>;
      openStart: OpenStyleTagStartNode;
      openEnd: OpenStyleTagEndNode;
      close: CloseStyleTagNode;
      value?: StyleTagContentNode;
    }

    OpenStyleTagStartNode

    OpenStyleTagStartNode represents an opening part of a start style tag. (e.g. <style)

    interface OpenStyleTagStartNode extends BaseNode {
      type: "OpenStyleTagStart";
      value: string;
    }

    OpenStyleTagEndNode

    OpenStyleTagEndNode represents a closing part of a start style tag. (e.g. >)

    interface OpenStyleTagEndNode extends BaseNode {
      type: "OpenStyleTagEnd";
      value: string;
    }

    CloseStyleTagNode

    CloseStyleTagNode represents a close style tag. (e.g. </style>)

    interface CloseStyleTagNode extends BaseNode {
      type: "CloseStyleTag";
      value: string;
    }

    StyleTagContentNode

    StyleTagContentNode represents a style content in style tag.

    interface StyleTagContentNode extends BaseNode {
      type: "StyleTagContent";
      value: string;
    }

    CommentNode

    CommentNode represents comment in HTML. (e.g. <!-- content --> )

    interface CommentNode extends BaseNode {
      type: "Comment";
      open: CommentOpenNode;
      close: CommentCloseNode;
      value: CommentContentNode;
    }

    CommentOpenNode

    CommentOpenNode represents comment start character sequence. (e.g. <!--)

    interface CommentOpenNode extends BaseNode {
      type: "CommentOpen";
      value: string;
    }

    CommentCloseNode

    CommentCloseNode represents comment end character sequence. (e.g. -->)

    interface CommentCloseNode extends BaseNode {
      type: "CommentClose";
      value: string;
    }

    CommentContentNode

    The CommentContentNode represents text in the comment.

    interface CommentContentNode extends BaseNode {
      type: "CommentContent";
      value: string;
    }

    DoctypeNode

    DoctypeNode represents the DOCTYPE in html.

    interface DoctypeNode extends BaseNode {
      type: "Doctype";
      attributes: Array<DoctypeAttributeNode>;
      open: DoctypeOpenNode;
      close: DoctypeCloseNode;
    }

    DoctypeOpenNode

    DoctypeOpenNode represents character sequence of doctype start . (<!DOCTYPE)

    interface DoctypeOpenNode extends BaseNode {
      type: "DoctypeOpen";
      value: string;
    }

    DoctypeCloseNode

    DoctypeCloseNode represents the doctype end character sequence (e.g. >)

    interface DoctypeCloseNode extends BaseNode {
      type: "DoctypeClose";
      value: string;
    }

    DoctypeAttributeNode

    DoctypeAttributeNode represents an attribute of doctype node. (e.g. html, "-//W3C//DTD HTML 4.01 Transitional//EN")

    interface DoctypeAttributeNode extends BaseNode {
      type: "DoctypeAttribute";
      key: DoctypeAttributeKey;
    }

    DoctypeAttributeValueNode

    DoctypeAttributeValueNode represents a value of doctype node's attribute. (e.g. html, -//W3C//DTD HTML 4.01 Transitional//EN) . It does not include wrapper characters (', ")

    interface DoctypeAttributeValueNode extends BaseNode {
      type: "DoctypeAttributeValue";
      value: string;
    }

    DoctypeAttributeWrapperStartNode

    DoctypeAttributeWrapperStartNode represents a left side character that wraps the value of the attribute. (e.g. ", ')

    interface DoctypeAttributeWrapperStartNode extends BaseNode {
      type: "DoctypeAttributeWrapperStart";
      value: string;
    }

    DoctypeAttributeWrapperEndNode

    DoctypeAttributeWrapperEndNode represents a right side character that wraps the value of the attribute. (e.g. ", ')

    interface DoctypeAttributeWrapperEndNode extends BaseNode {
      type: "DoctypeAttributeWrapperEnd";
      value: string;
    }

    License

    MIT