/*
 * Copyright (C) 2013 Google, Inc. All Rights Reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef HTMLToken_h
#define HTMLToken_h

#include "core/dom/Attribute.h"
#include "core/html/parser/HTMLParserIdioms.h"
#include "wtf/Forward.h"
#include "wtf/PtrUtil.h"
#include <memory>

namespace blink {

class DoctypeData {
  USING_FAST_MALLOC(DoctypeData);
  WTF_MAKE_NONCOPYABLE(DoctypeData);

 public:
  DoctypeData()
      : m_hasPublicIdentifier(false),
        m_hasSystemIdentifier(false),
        m_forceQuirks(false) {}

  bool m_hasPublicIdentifier;
  bool m_hasSystemIdentifier;
  WTF::Vector<UChar> m_publicIdentifier;
  WTF::Vector<UChar> m_systemIdentifier;
  bool m_forceQuirks;
};

static inline Attribute* findAttributeInVector(Vector<Attribute>& attributes,
                                               const QualifiedName& name) {
  for (unsigned i = 0; i < attributes.size(); ++i) {
    if (attributes.at(i).name().matches(name))
      return &attributes.at(i);
  }
  return 0;
}

class HTMLToken {
  WTF_MAKE_NONCOPYABLE(HTMLToken);
  USING_FAST_MALLOC(HTMLToken);

 public:
  enum TokenType {
    Uninitialized,
    DOCTYPE,
    StartTag,
    EndTag,
    Comment,
    Character,
    EndOfFile,
  };

  class Attribute {
    DISALLOW_NEW_EXCEPT_PLACEMENT_NEW();

   public:
    class Range {
      DISALLOW_NEW();

     public:
      static constexpr int kInvalidOffset = -1;

      inline void clear() {
#if ENABLE(ASSERT)
        start = kInvalidOffset;
        end = kInvalidOffset;
#endif
      }

      // Check Range instance that is actively being parsed.
      inline void checkValidStart() const {
        DCHECK_NE(start, kInvalidOffset);
        DCHECK_GE(start, 0);
      }

      // Check Range instance which finished parse.
      inline void checkValid() const {
        checkValidStart();
        DCHECK_NE(end, kInvalidOffset);
        DCHECK_GE(end, 0);
        DCHECK_LE(start, end);
      }

      int start;
      int end;
    };

    AtomicString name() const { return AtomicString(m_name); }
    String nameAttemptStaticStringCreation() const {
      return attemptStaticStringCreation(m_name, Likely8Bit);
    }
    const Vector<UChar, 32>& nameAsVector() const { return m_name; }

    void appendToName(UChar c) { m_name.append(c); }

    PassRefPtr<StringImpl> value8BitIfNecessary() const {
      return StringImpl::create8BitIfPossible(m_value);
    }
    String value() const { return String(m_value); }

    void appendToValue(UChar c) { m_value.append(c); }
    void appendToValue(const String& value) { value.appendTo(m_value); }
    void clearValue() { m_value.clear(); }

    const Range& nameRange() const { return m_nameRange; }
    const Range& valueRange() const { return m_valueRange; }
    Range& mutableNameRange() { return m_nameRange; }
    Range& mutableValueRange() { return m_valueRange; }

   private:
    Vector<UChar, 32> m_name;
    Vector<UChar, 32> m_value;
    Range m_nameRange;
    Range m_valueRange;
  };

  typedef Vector<Attribute, 10> AttributeList;

  // By using an inline capacity of 256, we avoid spilling over into an malloced
  // buffer approximately 99% of the time based on a non-scientific browse
  // around a number of popular web sites on 23 May 2013.
  typedef Vector<UChar, 256> DataVector;

  HTMLToken() { clear(); }

  void clear() {
    m_type = Uninitialized;
    m_range.clear();
    m_range.start = 0;
    m_baseOffset = 0;
    // Don't call Vector::clear() as that would destroy the
    // alloced VectorBuffer. If the innerHTML'd content has
    // two 257 character text nodes in a row, we'll needlessly
    // thrash malloc. When we finally finish the parse the
    // HTMLToken will be destroyed and the VectorBuffer released.
    m_data.shrink(0);
    m_orAllData = 0;
  }

  bool isUninitialized() { return m_type == Uninitialized; }
  TokenType type() const { return m_type; }

  void makeEndOfFile() {
    ASSERT(m_type == Uninitialized);
    m_type = EndOfFile;
  }

  // Range and offset methods exposed for HTMLSourceTracker and
  // HTMLViewSourceParser.
  int startIndex() const { return m_range.start; }
  int endIndex() const { return m_range.end; }

  void setBaseOffset(int offset) { m_baseOffset = offset; }

  void end(int endOffset) { m_range.end = endOffset - m_baseOffset; }

  const DataVector& data() const {
    ASSERT(m_type == Character || m_type == Comment || m_type == StartTag ||
           m_type == EndTag);
    return m_data;
  }

  bool isAll8BitData() const { return (m_orAllData <= 0xff); }

  const DataVector& name() const {
    ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
    return m_data;
  }

  void appendToName(UChar character) {
    ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE);
    ASSERT(character);
    m_data.append(character);
    m_orAllData |= character;
  }

  /* DOCTYPE Tokens */

  bool forceQuirks() const {
    ASSERT(m_type == DOCTYPE);
    return m_doctypeData->m_forceQuirks;
  }

  void setForceQuirks() {
    ASSERT(m_type == DOCTYPE);
    m_doctypeData->m_forceQuirks = true;
  }

  void beginDOCTYPE() {
    ASSERT(m_type == Uninitialized);
    m_type = DOCTYPE;
    m_doctypeData = wrapUnique(new DoctypeData);
  }

  void beginDOCTYPE(UChar character) {
    ASSERT(character);
    beginDOCTYPE();
    m_data.append(character);
    m_orAllData |= character;
  }

  // FIXME: Distinguish between a missing public identifer and an empty one.
  const WTF::Vector<UChar>& publicIdentifier() const {
    ASSERT(m_type == DOCTYPE);
    return m_doctypeData->m_publicIdentifier;
  }

  // FIXME: Distinguish between a missing system identifer and an empty one.
  const WTF::Vector<UChar>& systemIdentifier() const {
    ASSERT(m_type == DOCTYPE);
    return m_doctypeData->m_systemIdentifier;
  }

  void setPublicIdentifierToEmptyString() {
    ASSERT(m_type == DOCTYPE);
    m_doctypeData->m_hasPublicIdentifier = true;
    m_doctypeData->m_publicIdentifier.clear();
  }

  void setSystemIdentifierToEmptyString() {
    ASSERT(m_type == DOCTYPE);
    m_doctypeData->m_hasSystemIdentifier = true;
    m_doctypeData->m_systemIdentifier.clear();
  }

  void appendToPublicIdentifier(UChar character) {
    ASSERT(character);
    ASSERT(m_type == DOCTYPE);
    ASSERT(m_doctypeData->m_hasPublicIdentifier);
    m_doctypeData->m_publicIdentifier.append(character);
  }

  void appendToSystemIdentifier(UChar character) {
    ASSERT(character);
    ASSERT(m_type == DOCTYPE);
    ASSERT(m_doctypeData->m_hasSystemIdentifier);
    m_doctypeData->m_systemIdentifier.append(character);
  }

  std::unique_ptr<DoctypeData> releaseDoctypeData() {
    return std::move(m_doctypeData);
  }

  /* Start/End Tag Tokens */

  bool selfClosing() const {
    ASSERT(m_type == StartTag || m_type == EndTag);
    return m_selfClosing;
  }

  void setSelfClosing() {
    ASSERT(m_type == StartTag || m_type == EndTag);
    m_selfClosing = true;
  }

  void beginStartTag(UChar character) {
    ASSERT(character);
    ASSERT(m_type == Uninitialized);
    m_type = StartTag;
    m_selfClosing = false;
    m_currentAttribute = 0;
    m_attributes.clear();

    m_data.append(character);
    m_orAllData |= character;
  }

  void beginEndTag(LChar character) {
    ASSERT(m_type == Uninitialized);
    m_type = EndTag;
    m_selfClosing = false;
    m_currentAttribute = 0;
    m_attributes.clear();

    m_data.append(character);
  }

  void beginEndTag(const Vector<LChar, 32>& characters) {
    ASSERT(m_type == Uninitialized);
    m_type = EndTag;
    m_selfClosing = false;
    m_currentAttribute = 0;
    m_attributes.clear();

    m_data.appendVector(characters);
  }

  void addNewAttribute() {
    ASSERT(m_type == StartTag || m_type == EndTag);
    m_attributes.grow(m_attributes.size() + 1);
    m_currentAttribute = &m_attributes.last();
    m_currentAttribute->mutableNameRange().clear();
    m_currentAttribute->mutableValueRange().clear();
  }

  void beginAttributeName(int offset) {
    m_currentAttribute->mutableNameRange().start = offset - m_baseOffset;
    m_currentAttribute->nameRange().checkValidStart();
  }

  void endAttributeName(int offset) {
    int index = offset - m_baseOffset;
    m_currentAttribute->mutableNameRange().end = index;
    m_currentAttribute->nameRange().checkValid();
    m_currentAttribute->mutableValueRange().start = index;
    m_currentAttribute->mutableValueRange().end = index;
  }

  void beginAttributeValue(int offset) {
    m_currentAttribute->mutableValueRange().clear();
    m_currentAttribute->mutableValueRange().start = offset - m_baseOffset;
    m_currentAttribute->valueRange().checkValidStart();
  }

  void endAttributeValue(int offset) {
    m_currentAttribute->mutableValueRange().end = offset - m_baseOffset;
    m_currentAttribute->valueRange().checkValid();
  }

  void appendToAttributeName(UChar character) {
    ASSERT(character);
    ASSERT(m_type == StartTag || m_type == EndTag);
    m_currentAttribute->nameRange().checkValidStart();
    m_currentAttribute->appendToName(character);
  }

  void appendToAttributeValue(UChar character) {
    ASSERT(character);
    ASSERT(m_type == StartTag || m_type == EndTag);
    m_currentAttribute->valueRange().checkValidStart();
    m_currentAttribute->appendToValue(character);
  }

  void appendToAttributeValue(size_t i, const String& value) {
    ASSERT(!value.isEmpty());
    ASSERT(m_type == StartTag || m_type == EndTag);
    m_attributes[i].appendToValue(value);
  }

  const AttributeList& attributes() const {
    ASSERT(m_type == StartTag || m_type == EndTag);
    return m_attributes;
  }

  const Attribute* getAttributeItem(const QualifiedName& name) const {
    for (unsigned i = 0; i < m_attributes.size(); ++i) {
      if (m_attributes.at(i).name() == name.localName())
        return &m_attributes.at(i);
    }
    return 0;
  }

  // Used by the XSSAuditor to nuke XSS-laden attributes.
  void eraseValueOfAttribute(size_t i) {
    ASSERT(m_type == StartTag || m_type == EndTag);
    m_attributes[i].clearValue();
  }

  /* Character Tokens */

  // Starting a character token works slightly differently than starting
  // other types of tokens because we want to save a per-character branch.
  void ensureIsCharacterToken() {
    ASSERT(m_type == Uninitialized || m_type == Character);
    m_type = Character;
  }

  const DataVector& characters() const {
    ASSERT(m_type == Character);
    return m_data;
  }

  void appendToCharacter(char character) {
    ASSERT(m_type == Character);
    m_data.append(character);
  }

  void appendToCharacter(UChar character) {
    ASSERT(m_type == Character);
    m_data.append(character);
    m_orAllData |= character;
  }

  void appendToCharacter(const Vector<LChar, 32>& characters) {
    ASSERT(m_type == Character);
    m_data.appendVector(characters);
  }

  /* Comment Tokens */

  const DataVector& comment() const {
    ASSERT(m_type == Comment);
    return m_data;
  }

  void beginComment() {
    ASSERT(m_type == Uninitialized);
    m_type = Comment;
  }

  void appendToComment(UChar character) {
    ASSERT(character);
    ASSERT(m_type == Comment);
    m_data.append(character);
    m_orAllData |= character;
  }

  // Only for XSSAuditor
  void eraseCharacters() {
    ASSERT(m_type == Character);
    m_data.clear();
    m_orAllData = 0;
  }

 private:
  TokenType m_type;
  Attribute::Range m_range;  // Always starts at zero.
  int m_baseOffset;
  DataVector m_data;
  UChar m_orAllData;

  // For StartTag and EndTag
  bool m_selfClosing;
  AttributeList m_attributes;

  // A pointer into m_attributes used during lexing.
  Attribute* m_currentAttribute;

  // For DOCTYPE
  std::unique_ptr<DoctypeData> m_doctypeData;
};

#ifndef NDEBUG
const char* toString(HTMLToken::TokenType);
#endif

}  // namespace blink

#endif
