Source: lib/hls/manifest_text_parser.js

/*! @license
 * Shaka Player
 * Copyright 2016 Google LLC
 * SPDX-License-Identifier: Apache-2.0
 */

goog.provide('shaka.hls.ManifestTextParser');

goog.require('shaka.hls.Attribute');
goog.require('shaka.hls.Playlist');
goog.require('shaka.hls.PlaylistType');
goog.require('shaka.hls.Segment');
goog.require('shaka.hls.Tag');
goog.require('shaka.hls.Utils');
goog.require('shaka.util.Error');
goog.require('shaka.util.StringUtils');
goog.require('shaka.util.TextParser');


/**
 * HlS manifest text parser.
 */
shaka.hls.ManifestTextParser = class {
  constructor() {
    /** @private {number} */
    this.globalId_ = 0;
  }

  /**
   * @param {BufferSource} data
   * @return {!shaka.hls.Playlist}
   */
  parsePlaylist(data) {
    // Get the input as a string.  Normalize newlines to \n.
    let str = shaka.util.StringUtils.fromUTF8(data);
    str = str.replace(/\r\n|\r(?=[^\n]|$)/gm, '\n').trim();

    const lines = str.split(/\n+/m);

    if (!shaka.hls.ManifestTextParser.STATIC_PATTERNS.header.test(lines[0])) {
      throw new shaka.util.Error(
          shaka.util.Error.Severity.CRITICAL,
          shaka.util.Error.Category.MANIFEST,
          shaka.util.Error.Code.HLS_PLAYLIST_HEADER_MISSING);
    }

    // Single-pass state machine for parsing
    let playlistType = shaka.hls.PlaylistType.MASTER;
    let typeDetected = false;
    const tags = [];
    let skip = true;
    for (let i = 0; i < lines.length; i++) {
      const line = lines[i];
      const next = lines[i + 1];
      // Skip comments
      if (shaka.hls.Utils.isComment(line) || skip) {
        skip = false;
        continue;
      }

      const tag = this.parseTag_(line);

      // Detect playlist type on first relevant tag
      if (!typeDetected) {
        if (shaka.hls.ManifestTextParser.MEDIA_PLAYLIST_TAGS.has(tag.name)) {
          playlistType = shaka.hls.PlaylistType.MEDIA;
          typeDetected = true;
        } else if (tag.name == 'EXT-X-STREAM-INF' ||
                   tag.name == 'EXT-X-MEDIA') {
          // Master playlist detected, keep as MASTER
          typeDetected = true;
        } else if (shaka.hls.ManifestTextParser.SEGMENT_TAGS.has(tag.name)) {
          // Segment tags also indicate MEDIA playlist
          playlistType = shaka.hls.PlaylistType.MEDIA;
          typeDetected = true;
        }
      }

      // Transition to segment parsing
      if (shaka.hls.ManifestTextParser.SEGMENT_TAGS.has(tag.name)) {
        if (playlistType != shaka.hls.PlaylistType.MEDIA) {
          // Only media playlists should contain segment tags
          throw new shaka.util.Error(
              shaka.util.Error.Severity.CRITICAL,
              shaka.util.Error.Category.MANIFEST,
              shaka.util.Error.Code.HLS_INVALID_PLAYLIST_HIERARCHY);
        }

        const segments = this.parseSegments_(lines, i, tags);
        return new shaka.hls.Playlist(playlistType, tags, segments);
      }

      tags.push(tag);

      // An EXT-X-STREAM-INF tag is followed by a URI of a media playlist.
      // Add the URI to the tag object.
      if (tag.name == 'EXT-X-STREAM-INF') {
        const tagUri = new shaka.hls.Attribute('URI', next);
        tag.addAttribute(tagUri);
        skip = true;
      }
    }

    return new shaka.hls.Playlist(playlistType, tags);
  }

  /**
   * Parses an array of strings into an array of HLS Segment objects.
   *
   * @param {!Array<string>} lines
   * @param {number} startIndex
   * @param {!Array<!shaka.hls.Tag>} playlistTags
   * @return {!Array<shaka.hls.Segment>}
   * @private
   */
  parseSegments_(lines, startIndex, playlistTags) {
    // Pre-allocate segments array for better performance with large playlists
    const remainingLines = lines.length - startIndex;
    const estimatedSegments = Math.max(100, Math.floor(remainingLines / 2));
    /** @type {!Array<shaka.hls.Segment>} */
    const segments = new Array(estimatedSegments);
    let segmentIndex = 0;
    /** @type {!Array<shaka.hls.Tag>} */
    let segmentTags = [];

    /** @type {!Array<shaka.hls.Tag>} */
    let partialSegmentTags = [];

    // The last parsed EXT-X-MAP tag.
    /** @type {?shaka.hls.Tag} */
    let currentMapTag = null;

    for (let lineIndex = startIndex; lineIndex < lines.length; lineIndex++) {
      const line = lines[lineIndex];
      if (line.startsWith('#EXT')) {
        const tag = this.parseTag_(line);
        if (shaka.hls.ManifestTextParser.MEDIA_PLAYLIST_TAGS.has(tag.name)) {
          playlistTags.push(tag);
        } else {
          // Mark the the EXT-X-MAP tag, and add it to the segment tags
          // following it later.
          if (tag.name == 'EXT-X-MAP') {
            currentMapTag = tag;
          } else if (tag.name == 'EXT-X-PART') {
            partialSegmentTags.push(tag);
          } else if (tag.name == 'EXT-X-PRELOAD-HINT') {
            if (tag.getAttributeValue('TYPE') == 'PART') {
              partialSegmentTags.push(tag);
            } else if (tag.getAttributeValue('TYPE') == 'MAP') {
              // Rename the Preload Hint tag to be a Map tag.
              tag.setName('EXT-X-MAP');
              currentMapTag = tag;
            }
          } else {
            segmentTags.push(tag);
          }
        }
      } else if (shaka.hls.Utils.isComment(line)) {
        // Skip comments.
      } else {
        const verbatimSegmentUri = line.trim();
        // Attach the last parsed EXT-X-MAP tag to the segment.
        if (currentMapTag) {
          segmentTags.push(currentMapTag);
        }
        // The URI appears after all of the tags describing the segment.
        const segment = new shaka.hls.Segment(
            verbatimSegmentUri, segmentTags, partialSegmentTags);
        if (segmentIndex < segments.length) {
          segments[segmentIndex++] = segment;
        } else {
          segments.push(segment);
          segmentIndex++;
        }
        segmentTags = [];
        partialSegmentTags = [];
      }
    }
    // After all the partial segments of a regular segment is published,
    // a EXTINF tag and Uri for a regular segment containing the same media
    // content will get published at last.
    // If no EXTINF tag follows the list of partial segment tags at the end,
    // create a segment to wrap the partial segment tags.
    if (partialSegmentTags.length) {
      if (currentMapTag) {
        segmentTags.push(currentMapTag);
      }
      const segment = new shaka.hls.Segment('', segmentTags,
          partialSegmentTags);
      if (segmentIndex < segments.length) {
        segments[segmentIndex++] = segment;
      } else {
        segments.push(segment);
        segmentIndex++;
      }
    }

    // Trim pre-allocated array to actual size
    return segmentIndex < segments.length ?
        segments.slice(0, segmentIndex) : segments;
  }

  /**
   * Parses a string into an HLS Tag object while tracking what id to use next.
   *
   * @param {string} word
   * @return {!shaka.hls.Tag}
   * @private
   */
  parseTag_(word) {
    return shaka.hls.ManifestTextParser.parseTag(this.globalId_++, word);
  }

  /**
   * Parses a string into an HLS Tag object.
   *
   * @param {number} id
   * @param {string} word
   * @return {!shaka.hls.Tag}
   */
  static parseTag(id, word) {
    /* HLS tags start with '#EXT'. A tag can have a set of attributes
      (#EXT-<tagname>:<attribute list>) and/or a value (#EXT-<tagname>:<value>).
      An attribute's format is 'AttributeName=AttributeValue'.
      The parsing logic goes like this:
       1. Everything before ':' is a name (we ignore '#').
       2. Everything after ':' is a list of comma-separated items,
            2a. The first item might be a value, if it does not contain '='.
            2b. Otherwise, items are attributes.
       3. If there is no ":", it's a simple tag with no attributes and no value.
    */
    const blocks = word.match(
        shaka.hls.ManifestTextParser.STATIC_PATTERNS.tagBlocks);
    if (!blocks) {
      throw new shaka.util.Error(
          shaka.util.Error.Severity.CRITICAL,
          shaka.util.Error.Category.MANIFEST,
          shaka.util.Error.Code.INVALID_HLS_TAG,
          word);
    }
    const name = blocks[1];
    const data = blocks[2];
    const attributes = [];
    let value;

    if (data) {
      const parser = new shaka.util.TextParser(data);
      let blockAttrs;

      // re-using global regex: reset lastIndex
      shaka.hls.ManifestTextParser.STATIC_PATTERNS.valueRegex.lastIndex = 0;
      const blockValue = parser.readRegex(
          shaka.hls.ManifestTextParser.STATIC_PATTERNS.valueRegex);

      if (blockValue) {
        value = blockValue[1];
      }

      // re-using global regex: reset lastIndex
      shaka.hls.ManifestTextParser.STATIC_PATTERNS.attributeRegex.lastIndex = 0;
      blockAttrs = parser.readRegex(
          shaka.hls.ManifestTextParser.STATIC_PATTERNS.attributeRegex);
      while (blockAttrs) {
        const attrName = blockAttrs[1];
        const attrValue = blockAttrs[2] || blockAttrs[3];
        const attribute = new shaka.hls.Attribute(attrName, attrValue);
        attributes.push(attribute);
        parser.skipWhitespace();
        blockAttrs = parser.readRegex(
            shaka.hls.ManifestTextParser.STATIC_PATTERNS.attributeRegex);
      }
    }

    return new shaka.hls.Tag(id, name, attributes, value);
  }
};

/**
 * Static pre-compiled regex patterns for maximum performance.
 * Shared across all parser instances.
 * Pre-compiled patterns eliminate repeated regex compilation during parsing.
 * @const {{tagBlocks: !RegExp, valueRegex: !RegExp, attributeRegex: !RegExp,
 *   header: !RegExp}}
 */
shaka.hls.ManifestTextParser.STATIC_PATTERNS = {
  tagBlocks: /^#(EXT[^:]*)(?::(.*))?$/,
  header: /^#EXTM3U($|[ \t\n])/m,

  // Regex: any number of non-equals-sign characters at the beginning
  // terminated by comma or end of line
  valueRegex: /^([^,=]+)(?:,|$)/g,

  // Regex:
  // 1. Key name ([1])
  // 2. Equals sign
  // 3. Either:
  //   a. A quoted string (everything up to the next quote, [2])
  //   b. An unquoted string
  //    (everything up to the next comma or end of line, [3])
  // 4. Either:
  //   a. A comma
  //   b. End of line
  attributeRegex: /([^=]+)=(?:"([^"]*)"|([^",]*))(?:,|$)/g,
};


/**
 * HLS tags that only appear on Media Playlists.
 * Used to determine a playlist type.
 * O(1) lookup set for tag classification.
 *
 * @const {!Set<string>}
 */
shaka.hls.ManifestTextParser.MEDIA_PLAYLIST_TAGS = new Set([
  'EXT-X-TARGETDURATION',
  'EXT-X-MEDIA-SEQUENCE',
  'EXT-X-DISCONTINUITY-SEQUENCE',
  'EXT-X-PLAYLIST-TYPE',
  'EXT-X-I-FRAMES-ONLY',
  'EXT-X-ENDLIST',
  'EXT-X-SERVER-CONTROL',
  'EXT-X-SKIP',
  'EXT-X-PART-INF',
  'EXT-X-DATERANGE',
]);


/**
 * HLS tags that only appear on Segments in a Media Playlists.
 * Used to determine the start of the segments info.
 * O(1) lookup set for tag classification.
 *
 * @const {!Set<string>}
 */
shaka.hls.ManifestTextParser.SEGMENT_TAGS = new Set([
  'EXTINF',
  'EXT-X-BYTERANGE',
  'EXT-X-DISCONTINUITY',
  'EXT-X-PROGRAM-DATE-TIME',
  'EXT-X-KEY',
  'EXT-X-DATERANGE',
  'EXT-X-MAP',
  'EXT-X-GAP',
  'EXT-X-TILES',
]);