// Copyright 2010-2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "composer/internal/char_chunk.h"

#include <set>
#include <string>
#include <vector>

#include "base/logging.h"
#include "base/util.h"
#include "composer/internal/composition_input.h"
#include "composer/internal/transliterators.h"
#include "composer/table.h"

namespace mozc {
namespace composer {

namespace {
// Max recursion count for looking up pending loop.
const int kMaxRecursion = 4;

// Delete "end" from "target", if "target" ends with the "end".
bool DeleteEnd(const string &end, string *target) {
  const string::size_type rindex = target->rfind(end);
  if (rindex == string::npos) {
    return false;
  }
  target->erase(rindex);
  return true;
}

// Get from pending rules recursively
// The recursion will be stopped if recursion_count is 0.
// When returns false, the caller doesn't append result entries.
// If we have the rule,
// '1' -> '', 'あ'
// 'あ1' -> '', 'い'
// 'い1' -> '', 'う'
// 'う1' -> '', 'え'
// 'え1' -> '', 'お'
// 'お1' -> '', 'あ'
// 'あ*' -> '', '{*}ぁ'
// '{*}ぁ' -> '', '{*}あ'
// '{*}あ' -> '', '{*}ぁ'
// 'い*' -> '', '{*}ぃ'
// '{*}ぃ' -> '', '{*}い'
// '{*}い' -> '', '{*}ぃ'
// Here, we want to get '{*}あ' <-> '{*}ぁ' loop from the input, 'あ'
bool GetFromPending(const Table *table, const string &key,
                    int recursion_count, set<string> *result) {
  DCHECK(result);
  if (recursion_count == 0) {
    // Don't find the loop within the |recursion_count|.
    return false;
  }
  if (result->find(key) != result->end()) {
    // Found the entry that is already looked up.
    // Return true because we found the loop.
    return true;
  }
  result->insert(key);

  vector<const Entry *> entries;
  table->LookUpPredictiveAll(key, &entries);
  for (size_t i = 0; i < entries.size(); ++i) {
    if (!entries[i]->result().empty()) {
      // skip rules with result, because this causes too many results.
      // for example, if we have
      //  'k' -> 'っ', 'k'
      //  'ka' -> 'か', ''
      // From the input 'k', this causes 'か', 'っ', 'っか', ...
      // So here we stop calling recursion.
      return false;
    }
    if (!GetFromPending(table, entries[i]->pending(),
                        recursion_count - 1, result)) {
      return false;
    }
  }
  return true;
}
}  // anonymous namespace

CharChunk::CharChunk(Transliterators::Transliterator transliterator,
                     const Table *table)
    : transliterator_(transliterator),
      table_(table),
      attributes_(NO_TABLE_ATTRIBUTE) {
  DCHECK_NE(Transliterators::LOCAL, transliterator);
}

void CharChunk::Clear() {
  raw_.clear();
  conversion_.clear();
  pending_.clear();
  ambiguous_.clear();
}

size_t CharChunk::GetLength(Transliterators::Transliterator t12r) const {
  const string t13n = Transliterate(
      t12r,
      Table::DeleteSpecialKey(raw_),
      Table::DeleteSpecialKey(conversion_ + pending_));
  return Util::CharsLen(t13n);
}

void CharChunk::AppendResult(Transliterators::Transliterator t12r,
                             string *result) const {
  const string t13n = Transliterate(
      t12r,
      Table::DeleteSpecialKey(raw_),
      Table::DeleteSpecialKey(conversion_ + pending_));
  result->append(t13n);
}

void CharChunk::AppendTrimedResult(Transliterators::Transliterator t12r,
                                   string *result) const {
  // Only determined value (e.g. |conversion_| only) is added.
  string converted = conversion_;
  if (!pending_.empty()) {
    size_t key_length = 0;
    bool fixed = false;
    const Entry *entry = table_->LookUpPrefix(pending_, &key_length, &fixed);
    if (entry != NULL && entry->input() == entry->result()) {
      converted.append(entry->result());
    }
  }
  result->append(Transliterate(
      t12r,
      Table::DeleteSpecialKey(raw_),
      Table::DeleteSpecialKey(converted)));
}

void CharChunk::AppendFixedResult(Transliterators::Transliterator t12r,
                                  string *result) const {
  string converted = conversion_;
  if (!ambiguous_.empty()) {
    // Add the |ambiguous_| value as a fixed value.  |ambiguous_|
    // contains an undetermined result string like "ん" converted
    // from a single 'n'.
    converted.append(ambiguous_);
  } else {
    // If |pending_| exists but |ambiguous_| does not exist,
    // |pending_| is appended.  If |ambiguous_| exists, the value of
    // |pending_| is usually equal to |ambiguous_| so it is not
    // appended.
    converted.append(pending_);
  }
  result->append(Transliterate(
      t12r,
      Table::DeleteSpecialKey(raw_),
      Table::DeleteSpecialKey(converted)));
}

// If we have the rule (roman),
// 1: 'ka' -> 'か', ''
// 2: 'ki' -> 'き', ''
// 3: 'ku' -> 'く', ''
// 4: 'kk' -> 'っ', 'k'
// From the input 'k', we want to correct 'k', 'か', 'き', 'く', 'っ'
// We don't expand for next k for rule 4, because it causes
// many useless looped results, like 'っか', 'っっか', 'っっ', etc
//
// If we have the input 'kk', we will get 'か', 'き', 'く', 'っ' from the
// pending 'k' of rule 4.
// With the result of AppendTrimedResult, 'っ', we can get
// 'っか', 'っき', 'っく', 'っっ' from this input.
//
// If we have the rule (kana),
// 'は゜' -> 'ぱ', ''
// 'は゛' -> 'ば', ''
// From the input 'は', we want 'は', 'ば', 'ぱ'
//
// For mobile rules,
// If we have the rule,
// '1' -> '', 'あ'
// 'あ1' -> '', 'い'
// 'い1' -> '', 'う'
// 'う1' -> '', 'え'
// 'え1' -> '', 'お'
// 'お1' -> '', 'あ'
// 'あ*' -> '', '{*}ぁ'
// '{*}ぁ' -> '', '{*}あ'
// '{*}あ' -> '', '{*}ぁ'
// 'い*' -> '', '{*}ぃ'
// '{*}ぃ' -> '', '{*}い'
// '{*}い' -> '', '{*}ぃ'
// From the input '1', we want 'あ', 'ぁ', not including 'い', 'ぃ', 'う', etc
//
// Actually, we don't need to recursive lookup for above two patterns.
//
// What we want to append here is the 'looped rule' in |kMaxRecursion| lookup.
// Here, '{*}ぁ' -> '{*}あ' -> '{*}ぁ' is the loop.
void CharChunk::GetExpandedResults(set<string> *results) const {
  DCHECK(results);

  if (pending_.empty()) {
    return;
  }
  // Append current pending string
  if (conversion_.empty()) {
    results->insert(Table::DeleteSpecialKey(pending_));
  }
  vector<const Entry *> entries;
  table_->LookUpPredictiveAll(pending_, &entries);
  for (size_t i = 0; i < entries.size(); ++i) {
    if (!entries[i]->result().empty()) {
      results->insert(Table::DeleteSpecialKey(entries[i]->result()));
    }
    if (entries[i]->pending().empty()) {
      continue;
    }
    set<string> loop_result;
    if (!GetFromPending(table_, entries[i]->pending(),
                        kMaxRecursion, &loop_result)) {
      continue;
    }
    for (set<string>::const_iterator itr = loop_result.begin();
         itr != loop_result.end(); ++itr) {
      results->insert(Table::DeleteSpecialKey(*itr));
    }
  }
}

bool CharChunk::IsFixed() const {
  return pending_.empty();
}

bool CharChunk::IsAppendable(Transliterators::Transliterator t12r,
                             const Table *table) const {
  return !pending_.empty() &&
      (t12r == Transliterators::LOCAL || t12r == transliterator_) &&
      table == table_;
}

bool CharChunk::IsConvertible(
    Transliterators::Transliterator t12r,
    const Table *table,
    const string &input) const {
  if (!IsAppendable(t12r, table)) {
    return false;
  }

  size_t key_length = 0;
  bool fixed = false;
  string key = pending_ + input;
  const Entry *entry = table->LookUpPrefix(key, &key_length, &fixed);

  return entry && (key.size() == key_length) && fixed;
}

void CharChunk::Combine(const CharChunk &left_chunk) {
  conversion_ = left_chunk.conversion_ + conversion_;
  raw_ = left_chunk.raw_ + raw_;
  // TODO(komatsu): This is a hacky way.  We should look up the
  // conversion table with the new |raw_| value.
  if (left_chunk.ambiguous_.empty()) {
    ambiguous_.clear();
  } else {
    if (ambiguous_.empty()) {
      ambiguous_ = left_chunk.ambiguous_ + pending_;
    } else {
      ambiguous_ = left_chunk.ambiguous_ + ambiguous_;
    }
  }
  pending_ = left_chunk.pending_ + pending_;
}

bool CharChunk::AddInputInternal(string *input) {
  const bool kNoLoop = false;

  size_t key_length = 0;
  bool fixed = false;
  string key = pending_ + *input;
  const Entry *entry = table_->LookUpPrefix(key, &key_length, &fixed);

  if (entry == NULL) {
    if (key_length == 0) {
      // No prefix character is not contained in the table, fallback
      // operation is performed.
      if (pending_.empty()) {
        // Conversion data was not found.
        AddConvertedChar(input);
      }
      return kNoLoop;
    }

    if (key_length < pending_.size()) {
      // Do not modify this char_chunk, all key characters will be used
      // by the next char_chunk.
      return kNoLoop;
    }

    DCHECK_GE(key_length, pending_.size());
    // Some prefix character is contained in the table, but not
    // reached any conversion result (like "t" with "ta->た").
    key_length -= pending_.size();

    // Conversion data had only pending.
    const string new_pending_chars(*input, 0, key_length);
    raw_.append(new_pending_chars);
    pending_.append(new_pending_chars);
    if (!ambiguous_.empty()) {
      // If ambiguos_ has already a value, ambiguous_ is appended.
      // ex. "ny" makes ambiguous_ "んy", but "sh" leaves ambiguous_ empty.
      ambiguous_.append(new_pending_chars);
    }
    input->erase(0, key_length);
    return kNoLoop;
  }

  // The prefix of key reached a conversion result, thus entry is not NULL.

  if (key.size() == key_length) {
    const bool is_following_entry = (
        !conversion_.empty() ||
        (!raw_.empty() && !pending_.empty() && raw_ != pending_));

    raw_.append(*input);
    input->clear();
    if (fixed) {
      // The whole key has been used, table lookup has reached a fixed
      // value.  It is a stable world. (like "ka->か", "q@->だ").
      conversion_.append(entry->result());
      pending_ = entry->pending();
      ambiguous_.clear();

      // If this entry is the first entry, the table attributes are
      // applied to this chunk.
      if (!is_following_entry) {
        attributes_ = entry->attributes();
      }
    } else {  // !fixed
      // The whole string of key reached a conversion result, but the
      // result is ambiguous (like "n" with "n->ん and na->な").
      pending_ = key;
      ambiguous_ = entry->result();
    }
    return kNoLoop;
  }

  // Delete pending_ from raw_ if matched.
  DeleteEnd(pending_, &raw_);

  // A result was found without any ambiguity.
  input->assign(key, key_length, key.size() - key_length);
  raw_.append(key, 0, key_length);
  conversion_.append(entry->result());
  pending_ = entry->pending();
  ambiguous_.clear();

  if (input->empty() || pending_.empty()) {
    // If the remaining input character or pending character is empty,
    // there is no reason to continue the looping.
    return kNoLoop;
  }

  const bool kLoop = true;
  return kLoop;
}

void CharChunk::AddInput(string *input) {
  while (AddInputInternal(input)) {}
}

void CharChunk::AddConvertedChar(string *input) {
  // TODO(komatsu) Nice to make "string Util::PopOneChar(string *str);".
  string first_char = Util::SubString(*input, 0, 1);
  conversion_.append(first_char);
  raw_.append(first_char);
  *input = Util::SubString(*input, 1, string::npos);
}

void CharChunk::AddInputAndConvertedChar(string *key,
                                         string *converted_char) {
  // If this chunk is empty, the key and converted_char are simply
  // copied.
  if (raw_.empty() && pending_.empty() && conversion_.empty()) {
    raw_ = *key;
    key->clear();
    pending_ = *converted_char;
    // TODO(komatsu): We should check if the |converted_char| is
    // really ambigous or not, otherwise the last character of the
    // preedit on Kana input is always dropped.
    ambiguous_ = *converted_char;
    converted_char->clear();

    // If this entry is the first entry, the table attributes are
    // applied to this chunk.
    const Entry *entry = table_->LookUp(pending_);
    if (entry != NULL) {
      attributes_ = entry->attributes();
    }
    return;
  }

  const string input = pending_ + *converted_char;
  size_t key_length = 0;
  bool fixed = false;
  const Entry *entry = table_->LookUpPrefix(input, &key_length, &fixed);
  if (entry == NULL) {
    // Do not modify this char_chunk, all key and converted_char
    // values will be used by the next char_chunk.
    return;
  }

  // The whole input string was used.
  if (key_length == input.size()) {
    raw_.append(*key);
    if (fixed) {
      conversion_.append(entry->result());
      pending_ = entry->pending();
      ambiguous_.clear();
    } else {  // !fixed
      // |conversion_| remains the current value.
      pending_ = entry->result();
      ambiguous_ = entry->result();
    }
    key->clear();
    converted_char->clear();
    return;
  }

  // The following key_length == pending_.size() means the new key and
  // and converted_char does not affect at all.  Do not any thing here
  // and a new char_chunk will be made for the new key and
  // converted_char.
  if (key_length == pending_.size()) {
    return;
  }

  // The input is partially used.
  raw_.append(*key);
  conversion_.append(entry->result());
  pending_ = entry->pending();
  // While the whole key is used in this char_chunk, the
  // converted_char is separated to this char_chunk and the next
  // char_chunk.  This is not a preferred behavior, but there is no
  // better way to work around this limitation.
  key->clear();
  converted_char->assign(input, key_length, input.size() - key_length);
}

bool CharChunk::ShouldCommit() const {
  return (attributes_ & DIRECT_INPUT) && pending_.empty();
}

bool CharChunk::ShouldInsertNewChunk(const CompositionInput &input) const {
  if (raw_.empty() && conversion_.empty() && pending_.empty()) {
    return false;
  }

  const bool is_new_input =
      input.is_new_input() ||
      ((attributes_ & END_CHUNK) && pending_.empty());

  if (is_new_input &&
      (table_->HasNewChunkEntry(input.raw()) ||
          !table_->HasSubRules(input.raw()))) {
    // Such input which is not on the table is treated as NewChunk entry.
    return true;
  }
  return false;
}

void CharChunk::AddCompositionInput(CompositionInput *input) {
  if (input->has_conversion()) {
    AddInputAndConvertedChar(
                             input->mutable_raw(),
                             input->mutable_conversion());
    return;
  }

  if (ShouldInsertNewChunk(*input)) {
    return;
  }
  AddInput(input->mutable_raw());
}

void CharChunk::SetTransliterator(
    const Transliterators::Transliterator transliterator) {
  if (transliterator == Transliterators::LOCAL) {
    // LOCAL transliterator shouldn't be set as local transliterator.
    // Just ignore.
    return;
  }
  transliterator_ = transliterator;
}

const string &CharChunk::raw() const {
  return raw_;
}

void CharChunk::set_raw(const string &raw) {
  raw_ = raw;
}

const string &CharChunk::conversion() const {
  return conversion_;
}

void CharChunk::set_conversion(const string &conversion) {
  conversion_ = conversion;
}

const string &CharChunk::pending() const {
  return pending_;
}

void CharChunk::set_pending(const string &pending) {
  pending_ = pending;
}

const string &CharChunk::ambiguous() const {
  return ambiguous_;
}

void CharChunk::set_ambiguous(const string &ambiguous) {
  ambiguous_ = ambiguous;
}

bool CharChunk::SplitChunk(Transliterators::Transliterator t12r,
                           const size_t position,
                           CharChunk **left_new_chunk) {
  if (position <= 0 || position >= GetLength(t12r)) {
    LOG(WARNING) << "Invalid position: " << position;
    return false;
  }

  string raw_lhs, raw_rhs, converted_lhs, converted_rhs;
  Transliterators::GetTransliterator(GetTransliterator(t12r))->Split(
      position,
      Table::DeleteSpecialKey(raw_),
      Table::DeleteSpecialKey(conversion_ + pending_),
      &raw_lhs, &raw_rhs, &converted_lhs, &converted_rhs);

  *left_new_chunk = new CharChunk(transliterator_, table_);
  (*left_new_chunk)->set_raw(raw_lhs);
  set_raw(raw_rhs);

  if (converted_lhs.size() > conversion_.size()) {
    // [ conversion | pending ] => [ conv | pend#1 ] [ pend#2 ]
    const string pending_lhs(converted_lhs, conversion_.size());
    (*left_new_chunk)->set_conversion(conversion_);
    (*left_new_chunk)->set_pending(pending_lhs);

    conversion_.clear();
    pending_ = converted_rhs;
    ambiguous_.clear();
  } else {
    // [ conversion | pending ] => [ conv#1 ] [ conv#2 | pending ]
    (*left_new_chunk)->set_conversion(converted_lhs);
    // left_new_chunk->set_pending("");
    const size_t pending_pos = converted_rhs.size() - pending_.size();
    conversion_.assign(converted_rhs, 0, pending_pos);
    // pending_ = pending_;
  }
  return true;
}

Transliterators::Transliterator
CharChunk::GetTransliterator(
    Transliterators::Transliterator transliterator) const {
  if (attributes_ && NO_TRANSLITERATION) {
    if (transliterator == Transliterators::LOCAL ||
        transliterator == Transliterators::HALF_ASCII ||
        transliterator == Transliterators::FULL_ASCII) {
      return Transliterators::CONVERSION_STRING;
    }
    return transliterator;
  }
  if (transliterator == Transliterators::LOCAL) {
      return transliterator_;
  }
  return transliterator;
}

string CharChunk::Transliterate(
    Transliterators::Transliterator transliterator,
    const string &raw, const string &converted) const {
  return Transliterators::GetTransliterator(
      GetTransliterator(transliterator))->Transliterate(raw, converted);
}


CharChunk *CharChunk::Clone() const {
  // TODO(hsumita): Implements TransliteratorFactory and uses it instead of
  // copying a pointer.
  CharChunk *new_char_chunk = new CharChunk(transliterator_, table_);
  new_char_chunk->raw_.assign(raw_);
  new_char_chunk->conversion_.assign(conversion_);
  new_char_chunk->pending_.assign(pending_);
  new_char_chunk->ambiguous_.assign(ambiguous_);
  new_char_chunk->attributes_ = attributes_;
  return new_char_chunk;
}

}  // namespace composer
}  // namespace mozc
