htmlparser2

  • Version 7.1.2
  • Published
  • 94.8 kB
  • 4 dependencies
  • MIT license

Install

npm i htmlparser2
yarn add htmlparser2
pnpm add htmlparser2

Overview

Fast & forgiving HTML/XML parser

Index

Functions

function createDomStream

createDomStream: (
cb: (error: Error | null, dom: Node[]) => void,
options?: Options,
elementCb?: (element: any) => void
) => Parser;
  • Creates a parser instance, with an attached DOM handler.

    Parameter cb

    A callback that will be called once parsing has been completed.

    Parameter options

    Optional options for the parser and DOM builder.

    Parameter elementCb

    An optional callback that will be called every time a tag has been completed inside of the DOM.

function parseDocument

parseDocument: (data: string, options?: Options) => any;
  • Parses the data, returns the resulting document.

    Parameter data

    The data that should be parsed.

    Parameter options

    Optional options for the parser and DOM builder.

function parseDOM

parseDOM: (data: string, options?: Options) => Node[];
  • Parses data, returns an array of the root nodes.

    Note that the root nodes still have a Document node as their parent. Use parseDocument to get the Document node instead.

    Parameter data

    The data that should be parsed.

    Parameter options

    Optional options for the parser and DOM builder.

    Deprecated

    Use parseDocument instead.

function parseFeed

parseFeed: (
feed: string,
options?: ParserOptions & DomHandlerOptions
) => Feed | null;
  • Parse a feed.

    Parameter feed

    The feed that should be parsed, as a string.

    Parameter options

    Optionally, options for parsing. When using this, you should set xmlMode to true.

Classes

class FeedHandler

class FeedHandler extends DomHandler {}
  • Deprecated

    Handler is no longer necessary; use getFeed or parseFeed instead.

constructor

constructor(callback?: any, options?: any);
  • Parameter callback

    Parameter options

property feed

feed?: any;

    method onend

    onend: () => void;

      class Parser

      class Parser {}

        constructor

        constructor(cbs?: Partial<Handler>, options?: ParserOptions);

          property endIndex

          endIndex: number;
          • The end index of the last event.

          property startIndex

          startIndex: number;
          • The start index of the last event.

          method done

          done: (chunk?: string) => void;
          • Alias of end, for backwards compatibility.

            Parameter chunk

            Optional final chunk to parse.

            Deprecated

          method end

          end: (chunk?: string) => void;
          • Parses the end of the buffer and clears the stack, calls onend.

            Parameter chunk

            Optional final chunk to parse.

          method isVoidElement

          protected isVoidElement: (name: string) => boolean;

            method parseChunk

            parseChunk: (chunk: string) => void;
            • Alias of write, for backwards compatibility.

              Parameter chunk

              Chunk to parse.

              Deprecated

            method parseComplete

            parseComplete: (data: string) => void;
            • Resets the parser, then parses a complete document and pushes it to the handler.

              Parameter data

              Document to parse.

            method pause

            pause: () => void;
            • Pauses parsing. The parser won't emit events until resume is called.

            method reset

            reset: () => void;
            • Resets the parser to a blank state, ready to parse a new HTML document

            method resume

            resume: () => void;
            • Resumes parsing after pause was called.

            method write

            write: (chunk: string) => void;
            • Parses a chunk of data and calls the corresponding callbacks.

              Parameter chunk

              Chunk to parse.

            class RssHandler

            class FeedHandler extends DomHandler {}
            • Deprecated

              Handler is no longer necessary; use getFeed or parseFeed instead.

            constructor

            constructor(callback?: any, options?: any);
            • Parameter callback

              Parameter options

            property feed

            feed?: any;

              method onend

              onend: () => void;

                class Tokenizer

                class Tokenizer {}

                  constructor

                  constructor(
                  { xmlMode, decodeEntities }: { xmlMode?: boolean; decodeEntities?: boolean },
                  cbs: Callbacks
                  );

                    property sectionStart

                    sectionStart: number;
                    • The beginning of the section that is currently being read.

                    method end

                    end: (chunk?: string) => void;

                      method getAbsoluteIndex

                      getAbsoluteIndex: () => number;
                      • The current index within all of the written data.

                      method getAbsoluteSectionStart

                      getAbsoluteSectionStart: () => number;
                      • The start of the current section.

                      method pause

                      pause: () => void;

                        method reset

                        reset: () => void;

                          method resume

                          resume: () => void;

                            method write

                            write: (chunk: string) => void;

                              Interfaces

                              interface ParserOptions

                              interface ParserOptions {}

                                property decodeEntities

                                decodeEntities?: boolean;
                                • Decode entities within the document.

                                  true

                                property lowerCaseAttributeNames

                                lowerCaseAttributeNames?: boolean;
                                • If set to true, all attribute names will be lowercased. This has noticeable impact on speed.

                                  !xmlMode

                                property lowerCaseTags

                                lowerCaseTags?: boolean;
                                • If set to true, all tags will be lowercased.

                                  !xmlMode

                                property recognizeCDATA

                                recognizeCDATA?: boolean;
                                • If set to true, CDATA sections will be recognized as text even if the xmlMode option is not enabled. NOTE: If xmlMode is set to true then CDATA sections will always be recognized as text.

                                  xmlMode

                                property recognizeSelfClosing

                                recognizeSelfClosing?: boolean;
                                • If set to true, self-closing tags will trigger the onclosetag event even if xmlMode is not set to true. NOTE: If xmlMode is set to true then self-closing tags will always be recognized.

                                  xmlMode

                                property Tokenizer

                                Tokenizer?: typeof Tokenizer;
                                • Allows the default tokenizer to be overwritten.

                                property xmlMode

                                xmlMode?: boolean;
                                • Indicates whether special tags (<script>, <style>, and <title>) should get special treatment and if "empty" tags (eg. <br>) can have children. If false, the content of special tags will be text only. For feeds and other XML content (documents that don't consist of HTML), set this to true.

                                  false

                                interface TokenizerCallbacks

                                interface Callbacks {}

                                  method onattribdata

                                  onattribdata: (value: string) => void;

                                    method onattribend

                                    onattribend: (quote: string | undefined | null) => void;

                                      method onattribname

                                      onattribname: (name: string) => void;

                                        method oncdata

                                        oncdata: (data: string) => void;

                                          method onclosetag

                                          onclosetag: (name: string) => void;

                                            method oncomment

                                            oncomment: (data: string) => void;

                                              method ondeclaration

                                              ondeclaration: (content: string) => void;

                                                method onend

                                                onend: () => void;

                                                  method onerror

                                                  onerror: (error: Error, state?: State) => void;

                                                    method onopentagend

                                                    onopentagend: () => void;

                                                      method onopentagname

                                                      onopentagname: (name: string) => void;

                                                        method onprocessinginstruction

                                                        onprocessinginstruction: (instruction: string) => void;

                                                          method onselfclosingtag

                                                          onselfclosingtag: () => void;

                                                            method ontext

                                                            ontext: (value: string) => void;

                                                              Package Files (4)

                                                              Dependencies (4)

                                                              Dev Dependencies (10)

                                                              Peer Dependencies (0)

                                                              No peer dependencies.

                                                              Badge

                                                              To add a badge like this onejsDocs.io badgeto your package's README, use the codes available below.

                                                              You may also use Shields.io to create a custom badge linking to https://www.jsdocs.io/package/htmlparser2.

                                                              • Markdown
                                                                [![jsDocs.io](https://img.shields.io/badge/jsDocs.io-reference-blue)](https://www.jsdocs.io/package/htmlparser2)
                                                              • HTML
                                                                <a href="https://www.jsdocs.io/package/htmlparser2"><img src="https://img.shields.io/badge/jsDocs.io-reference-blue" alt="jsDocs.io"></a>