htmlparser2

  • Version 12.0.0
  • Published
  • 235 kB
  • 4 dependencies
  • MIT license

Install

npm i htmlparser2
yarn add htmlparser2
pnpm add htmlparser2

Overview

Fast & forgiving HTML/XML parser

Index

Functions

function createDocumentStream

createDocumentStream: (
callback: (error: Error | null, document: Document) => void,
options?: Options,
elementCallback?: (element: Element) => void
) => Parser;
  • Creates a parser instance, with an attached DOM handler.

    Parameter callback

    A callback that will be called once parsing has been completed, with the resulting document.

    Parameter options

    Optional options for the parser and DOM handler.

    Parameter elementCallback

    An optional callback that will be called every time a tag has been completed inside of the DOM.

function parseDocument

parseDocument: (data: string, options?: Options) => Document;
  • Parses the data, returns the resulting document.

    Parameter data

    The data that should be parsed.

    Parameter options

    Optional options for the parser and DOM handler.

function parseFeed

parseFeed: (feed: string, options?: Options) => Feed | null;
  • Parse a feed.

    Parameter feed

    The feed that should be parsed, as a string.

    Parameter options

    Optionally, options for parsing. When using this, you should set xmlMode to true.

Classes

class Parser

class Parser implements Callbacks {}
  • Incremental parser implementation.

constructor

constructor(cbs?: Partial<Handler>, options?: ParserOptions);

    property endIndex

    endIndex: number;
    • The end index of the last event.

    property startIndex

    startIndex: number;
    • The start index of the last event.

    method end

    end: (chunk?: string) => void;
    • Parses the end of the buffer and clears the stack, calls onend.

      Parameter chunk

      Optional final chunk to parse.

    method isVoidElement

    protected isVoidElement: (name: string) => boolean;
    • Checks if the current tag is a void element. Override this if you want to specify your own additional void elements.

      Parameter name

      Name of the pseudo selector.

    method parseComplete

    parseComplete: (data: string) => void;
    • Resets the parser, then parses a complete document and pushes it to the handler.

      Parameter data

      Document to parse.

    method pause

    pause: () => void;
    • Pauses parsing. The parser won't emit events until resume is called.

    method reset

    reset: () => void;
    • Resets the parser to a blank state, ready to parse a new HTML document

    method resume

    resume: () => void;
    • Resumes parsing after pause was called.

    method write

    write: (chunk: string) => void;
    • Parses a chunk of data and calls the corresponding callbacks.

      Parameter chunk

      Chunk to parse.

    class Tokenizer

    class Tokenizer {}
    • Tokenizer implementation used by Parser.

    constructor

    constructor(
    {
    xmlMode,
    decodeEntities,
    recognizeSelfClosing,
    }: {
    xmlMode?: boolean;
    decodeEntities?: boolean;
    recognizeSelfClosing?: boolean;
    },
    cbs: Callbacks
    );

      property running

      running: boolean;
      • Indicates whether the tokenizer has been paused.

      method end

      end: () => void;

        method pause

        pause: () => void;

          method reset

          reset: () => void;

            method resume

            resume: () => void;

              method write

              write: (chunk: string) => void;

                Interfaces

                interface Handler

                interface Handler {}
                • Parser callback interface used by the tokenizer.

                method onattribute

                onattribute: (
                name: string,
                value: string,
                quote?: string | undefined | null
                ) => void;
                • Parameter name

                  Name of the attribute

                  Parameter value

                  Value of the attribute.

                  Parameter quote

                  Quotes used around the attribute. null if the attribute has no quotes around the value, undefined if the attribute has no value.

                method oncdataend

                oncdataend: () => void;

                  method oncdatastart

                  oncdatastart: () => void;

                    method onclosetag

                    onclosetag: (name: string, isImplied: boolean) => void;

                      method oncomment

                      oncomment: (data: string) => void;

                        method oncommentend

                        oncommentend: () => void;

                          method onend

                          onend: () => void;
                          • Signals the handler that parsing is done

                          method onerror

                          onerror: (error: Error) => void;

                            method onopentag

                            onopentag: (
                            name: string,
                            attribs: { [s: string]: string },
                            isImplied: boolean
                            ) => void;

                              method onopentagname

                              onopentagname: (name: string) => void;

                                method onparserinit

                                onparserinit: (parser: Parser) => void;

                                  method onprocessinginstruction

                                  onprocessinginstruction: (name: string, data: string) => void;

                                    method onreset

                                    onreset: () => void;
                                    • Resets the handler back to starting state

                                    method ontext

                                    ontext: (data: string) => void;

                                      interface ParserOptions

                                      interface ParserOptions {}
                                      • Options for the streaming HTML/XML parser.

                                      property decodeEntities

                                      decodeEntities?: boolean;
                                      • Decode entities within the document. true

                                      property lowerCaseAttributeNames

                                      lowerCaseAttributeNames?: boolean;
                                      • If set to true, all attribute names will be lowercased. This has noticeable impact on speed. !xmlMode

                                      property lowerCaseTags

                                      lowerCaseTags?: boolean;
                                      • If set to true, all tags will be lowercased. !xmlMode

                                      property recognizeCDATA

                                      recognizeCDATA?: boolean;
                                      • If set to true, CDATA sections will be recognized as text even if the xmlMode option is not enabled. NOTE: If xmlMode is set to true then CDATA sections will always be recognized as text. xmlMode

                                      property recognizeSelfClosing

                                      recognizeSelfClosing?: boolean;
                                      • If set to true, self-closing tags will trigger the onclosetag event even if xmlMode is not set to true. NOTE: If xmlMode is set to true then self-closing tags will always be recognized. xmlMode

                                      property Tokenizer

                                      Tokenizer?: typeof Tokenizer;
                                      • Allows the default tokenizer to be overwritten.

                                      property xmlMode

                                      xmlMode?: boolean;
                                      • Indicates whether special tags (<script>, <style>, and <title>) should get special treatment and if "empty" tags (eg. <br>) can have children. If false, the content of special tags will be text only. For feeds and other XML content (documents that don't consist of HTML), set this to true. false

                                      interface TokenizerCallbacks

                                      interface Callbacks {}
                                      • Low-level tokenizer callback interface.

                                      method isInForeignContext

                                      isInForeignContext: () => boolean;

                                        method onattribdata

                                        onattribdata: (start: number, endIndex: number) => void;

                                          method onattribend

                                          onattribend: (quote: QuoteType, endIndex: number) => void;

                                            method onattribentity

                                            onattribentity: (codepoint: number) => void;

                                              method onattribname

                                              onattribname: (start: number, endIndex: number) => void;

                                                method oncdata

                                                oncdata: (start: number, endIndex: number, endOffset: number) => void;

                                                  method onclosetag

                                                  onclosetag: (start: number, endIndex: number) => void;

                                                    method oncomment

                                                    oncomment: (start: number, endIndex: number, endOffset: number) => void;

                                                      method ondeclaration

                                                      ondeclaration: (start: number, endIndex: number) => void;

                                                        method onend

                                                        onend: () => void;

                                                          method onopentagend

                                                          onopentagend: (endIndex: number) => void;

                                                            method onopentagname

                                                            onopentagname: (start: number, endIndex: number) => void;

                                                              method onprocessinginstruction

                                                              onprocessinginstruction: (start: number, endIndex: number) => void;

                                                                method onselfclosingtag

                                                                onselfclosingtag: (endIndex: number) => void;

                                                                  method ontext

                                                                  ontext: (start: number, endIndex: number) => void;

                                                                    method ontextentity

                                                                    ontextentity: (codepoint: number, endIndex: number) => void;

                                                                      Enums

                                                                      enum QuoteType

                                                                      enum QuoteType {
                                                                      NoValue = 0,
                                                                      Unquoted = 1,
                                                                      Single = 2,
                                                                      Double = 3,
                                                                      }
                                                                      • Quote style used for parsed attributes.

                                                                      member Double

                                                                      Double = 3

                                                                        member NoValue

                                                                        NoValue = 0

                                                                          member Single

                                                                          Single = 2

                                                                            member Unquoted

                                                                            Unquoted = 1

                                                                              Type Aliases

                                                                              type Options

                                                                              type Options = ParserOptions & DomHandlerOptions;
                                                                              • Combined parser and handler options.

                                                                              Package Files (3)

                                                                              Dependencies (4)

                                                                              Dev Dependencies (8)

                                                                              Peer Dependencies (0)

                                                                              No peer dependencies.

                                                                              Badge

                                                                              To add a badge like this onejsDocs.io badgeto your package's README, use the codes available below.

                                                                              You may also use Shields.io to create a custom badge linking to https://www.jsdocs.io/package/htmlparser2.

                                                                              • Markdown
                                                                                [![jsDocs.io](https://img.shields.io/badge/jsDocs.io-reference-blue)](https://www.jsdocs.io/package/htmlparser2)
                                                                              • HTML
                                                                                <a href="https://www.jsdocs.io/package/htmlparser2"><img src="https://img.shields.io/badge/jsDocs.io-reference-blue" alt="jsDocs.io"></a>