// Copyright 2010-2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "prediction/user_history_predictor.h"

#include <algorithm>
#include <cctype>
#include <climits>
#include <string>

#include "base/config_file_stream.h"
#include "base/flags.h"
#include "base/init.h"
#include "base/logging.h"
#include "base/thread.h"
#include "base/trie.h"
#include "base/util.h"
#include "composer/composer.h"
#include "config/config.pb.h"
#include "config/config_handler.h"
#include "converter/conversion_request.h"
#include "converter/segments.h"
#include "dictionary/dictionary_interface.h"
#include "dictionary/pos_matcher.h"
#include "dictionary/suppression_dictionary.h"
#include "prediction/predictor_interface.h"
#include "prediction/user_history_predictor.pb.h"
#include "rewriter/variants_rewriter.h"
#include "session/commands.pb.h"
#include "storage/encrypted_string_storage.h"
#include "storage/lru_cache.h"
#include "usage_stats/usage_stats.h"

using mozc::commands::Request;
using mozc::dictionary::SuppressionDictionary;

// This flag is set by predictor.cc
// We can remove this after the ambiguity expansion feature get stable.
DEFINE_bool(enable_expansion_for_user_history_predictor,
            false,
            "enable ambiguity expansion for user_history_predictor.");

namespace mozc {
namespace {
// find suggestion candidates from the most recent 3000 history in LRU.
// We don't check all history, since suggestion is called every key event
const size_t kMaxSuggestionTrial = 3000;

// find suffix matches of history_segments from the most recent 500 histories
// in LRU.
const size_t kMaxPrevValueTrial = 500;

// cache size
// Typically memory/storage footprint becomes kLRUCacheSize * 70 bytes.
#ifdef OS_ANDROID
const size_t kLRUCacheSize = 2000;
#else  // OS_ANDROID
const size_t kLRUCacheSize = 10000;
#endif  // OS_ANDROID

// don't save key/value that are
// longer than kMaxCandidateSize to avoid memory explosion
const size_t kMaxStringLength = 256;

// maximum size of next_entries
const size_t kMaxNextEntriesSize = 4;

// revert id for user_history_predictor
const uint16 kRevertId = 1;

// default object pool size for EntryPriorityQueue
const size_t kEntryPoolSize = 16;

// file name for the history
#ifdef OS_WIN
const char kFileName[] = "user://history.db";
#else
const char kFileName[] = "user://.history.db";
#endif

// use '\t' as a key/value delimiter
const char kDelimiter[] = "\t";

// "絵文字"
const char kEmojiDescription[] = "\xE7\xB5\xB5\xE6\x96\x87\xE5\xAD\x97";

// TODO(peria, hidehiko): Unify this checker and IsEmojiCandidate in
//     EmojiRewriter.  If you make similar functions before the merging in
//     case, put a similar note to avoid twisted dependency.
bool IsEmojiEntry(const UserHistoryPredictor::Entry &entry) {
  return (entry.has_description() &&
          entry.description().find(kEmojiDescription) != string::npos);
}

bool IsPunctuation(const string &value) {
  //  return (value == "。" || value == "." ||
  //          value == "、" || value == "," ||
  //          value == "？" || value == "?" ||
  //          value == "！" || value == "!" ||
  //          value == "，" || value == "．");
  return (value == "\xE3\x80\x82" || value == "." ||
          value == "\xE3\x80\x81" || value == "," ||
          value == "\xEF\xBC\x9F" || value == "?" ||
          value == "\xEF\xBC\x81" || value == "!" ||
          value == "\xEF\xBC\x8C" || value == "\xEF\xBC\x8E");
}

bool IsSentenceLikeCandidate(const Segment::Candidate &candidate) {
  // A sentence should have a long reading.  Length check is done using key to
  // absorb length difference in value variation, e.g.,
  // "〜ください" and "〜下さい".
  if (candidate.value.empty() || Util::CharsLen(candidate.key) < 8) {
    return false;
  }
  // Our primary target sentence ends with Hiragana, e.g., "〜ます".
  const ConstChar32ReverseIterator iter(candidate.value);
  bool ret = Util::GetScriptType(iter.Get()) == Util::HIRAGANA;
  return ret;
}

// Return romanaized string.
string ToRoman(const string &str) {
  string result;
  Util::HiraganaToRomanji(str, &result);
  return result;
}

// return true if value looks like a content word.
// Currently, just checks the script type.
bool IsContentWord(const string &value) {
  return Util::CharsLen(value) > 1 ||
      Util::GetScriptType(value) != Util::UNKNOWN_SCRIPT;
}

// Return candidate description.
// If candidate is spelling correction, typing correction
// or auto partial suggestion,
// don't use the description, since "did you mean" like description must be
// provided at an appropriate timing and context.
string GetDescription(const Segment::Candidate &candidate) {
  if (candidate.attributes &
      (Segment::Candidate::SPELLING_CORRECTION |
       Segment::Candidate::TYPING_CORRECTION |
       Segment::Candidate::AUTO_PARTIAL_SUGGESTION)) {
    return "";
  }
  return candidate.description;
}

}  // namespace

// Returns true if the input first candidate seems to be a privacy sensitive
// such like password.
bool UserHistoryPredictor::IsPrivacySensitive(const Segments *segments) const {
  const bool kNonSensitive = false;
  const bool kSensitive = true;

  // Skip privacy sensitive check if |segments| consists of multiple conversion
  // segment. That is, segments like "パスワードは|x7LAGhaR" where '|'
  // represents segment boundary is not considered to be privacy sensitive.
  // TODO(team): Revisit this rule if necessary.
  if (segments->conversion_segments_size() != 1) {
    return kNonSensitive;
  }

  // Hereafter, we must have only one conversion segment.
  const Segment &conversion_segment = segments->conversion_segment(0);
  const string &segment_key = conversion_segment.key();

  // The top candidate, which is about to be commited.
  const Segment::Candidate &candidate = conversion_segment.candidate(0);
  const string &candidate_value = candidate.value;

  // If |candidate_value| contains any non-ASCII character, do not treat
  // it as privacy sensitive information.
  // TODO(team): Improve the following rule. For example,
  //     "0000－0000－0000－0000" is not treated as privacy sensitive
  //     because of this rule. When a user commits his password in
  //     full-width form by mistake, like "ｘ７ＬＡＧｈａＲ", it is not
  //     treated as privacy sensitive too.
  if (Util::GetCharacterSet(candidate_value) != Util::ASCII) {
    return kNonSensitive;
  }

  // Hereafter, |candidate_value| consists of ASCII characters only.

  // Note: if the key looks like hiragana, the candidate might be Katakana to
  // English transliteration. Don't suppress transliterated candidates.
  // http://b/4394325

  // If the key consists of number characters only, treat it as privacy
  // sensitive.
  if (Util::GetScriptType(segment_key) == Util::NUMBER) {
    return kSensitive;
  }

  // If the key contains any alphabetical character but it is in our dictionary,
  // it can be treated as privacy nonsensitive word; cf. b/5995529. Besides,
  // short words would be considered as privacy nonsensitive word as well.
  if (segment_key.size() <= 3) {
    return kNonSensitive;
  }

  // Dictionary-based sensitivity test. If the word user typed is in dictionary,
  // treat it as privacy insensitive. For English (ASCII) words,
  // dictionary-based test is extended to the following forms:
  //   1) All lower case (e.g., hello)
  //   2) All upper case (e.g., HELLO)
  //   3) Capitalized (e.g., Hello)
  //   4) As-is (e.g., HeLlO)
  // Since English words are stored in lower case, in case of upper case and
  // capitalized keys, we convert it to lower case in advance.
  if (Util::IsUpperOrCapitalizedAscii(candidate_value)) {
    // Look up for keys that are all in upper case or capitalized ASCII.
    string lower_case_value(candidate_value);
    Util::LowerString(&lower_case_value);
    if (dictionary_->HasValue(lower_case_value)) {
      return kNonSensitive;
    }
  } else {
    // Look up for the original key, including those that are all in lower case
    // ASCII.
    if (dictionary_->HasValue(candidate_value)) {
      return kNonSensitive;
    }
  }
  // If the key contains any alphabetical character and is not in our
  // dictionary, treat it as privacy sensitive. There also remains some cases to
  // be considered. Compare following two cases.
  //   Case A:
  //     1. Type "ywwz1sxm" in Roman-input style then get "yっwz1sxm".
  //     2. Hit F10 key to convert it to "ywwz1sxm" by
  //        ConvertToHalfAlphanumeric command.
  //     3. Commit it.
  //     In this case, |segment_key| is "yっwz1sxm" and actually contains
  //     alphabetical characters. So kSensitive will be returned.
  //     So far so good.
  //   Case B:
  //     1. type "ia1bo3xu" in Roman-input style then get "いあ1ぼ3ぅ".
  //     2. hit F10 key to convert it to "ia1bo3xu" by
  //        ConvertToHalfAlphanumeric command.
  //     3. commit it.
  //     In this case, |segment_key| is "ia1bo3xu" and contains no
  //     alphabetical character. So the following check does nothing.
  // TODO(team): Improve the following rule so that our user experience
  //     can be consistent between case A and B.
  if (Util::ContainsScriptType(segment_key, Util::ALPHABET)) {
    return kSensitive;
  }

  return kNonSensitive;
}

UserHistoryStorage::UserHistoryStorage(const string &filename)
    : storage_(new storage::EncryptedStringStorage(filename)) {
}

UserHistoryStorage::~UserHistoryStorage() {}

bool UserHistoryStorage::Load() {
  string input;
  if (!storage_->Load(&input)) {
    LOG(ERROR) << "Can't load user history data.";
    return false;
  }

  if (!ParseFromString(input)) {
    LOG(ERROR) << "ParseFromString failed. message looks broken";
    return false;
  }

  VLOG(1) << "Loaded user histroy, size=" << entries_size();
  return true;
}

bool UserHistoryStorage::Save() const {
  if (entries_size() == 0) {
    LOG(WARNING) << "etries size is 0. Not saved";
    return false;
  }

  string output;
  if (!AppendToString(&output)) {
    LOG(ERROR) << "AppendToString failed";
    return false;
  }

  if (!storage_->Save(output)) {
    LOG(ERROR) << "Can't save user history data.";
    return false;
  }

  return true;
}

UserHistoryPredictor::EntryPriorityQueue::EntryPriorityQueue()
    : pool_(kEntryPoolSize) {}

UserHistoryPredictor::EntryPriorityQueue::~EntryPriorityQueue() {}

bool UserHistoryPredictor::EntryPriorityQueue::Push(Entry *entry) {
  DCHECK(entry);
  if (!seen_.insert(Util::Fingerprint32(entry->value())).second) {
    VLOG(2) << "found dups";
    return false;
  }
  const uint32 score = UserHistoryPredictor::GetScore(*entry);
  agenda_.push(make_pair(score, entry));
  return true;
}

UserHistoryPredictor::Entry *
UserHistoryPredictor::EntryPriorityQueue::Pop() {
  if (agenda_.empty()) {
    return NULL;
  }
  const QueueElement &element = agenda_.top();
  Entry *result = element.second;
  DCHECK(result);
  agenda_.pop();
  return result;
}

UserHistoryPredictor::Entry *
UserHistoryPredictor::EntryPriorityQueue::NewEntry() {
  return pool_.Alloc();
}

class UserHistoryPredictorSyncer : public Thread {
 public:
  enum RequestType {
    LOAD,
    SAVE
  };

  UserHistoryPredictorSyncer(UserHistoryPredictor *predictor,
                             RequestType type)
      : predictor_(predictor), type_(type) {
    DCHECK(predictor_);
  }

  virtual void Run() {
    switch (type_) {
      case LOAD:
        VLOG(1) << "Executing Reload method";
        predictor_->Load();
        break;
      case SAVE:
        VLOG(1) << "Executing Sync method";
        predictor_->Save();
        break;
      default:
        LOG(ERROR) << "Unknown request: " << static_cast<int>(type_);
    }
  }

  virtual ~UserHistoryPredictorSyncer() {
    Join();
  }

  UserHistoryPredictor *predictor_;
  RequestType type_;
};

UserHistoryPredictor::UserHistoryPredictor(
    const DictionaryInterface *dictionary,
    const POSMatcher *pos_matcher,
    const SuppressionDictionary *suppression_dictionary,
    bool enable_content_word_learning)
    : dictionary_(dictionary),
      pos_matcher_(pos_matcher),
      suppression_dictionary_(suppression_dictionary),
      predictor_name_("UserHistoryPredictor"),
      content_word_learning_enabled_(enable_content_word_learning),
      updated_(false),
      dic_(new DicCache(UserHistoryPredictor::cache_size())) {
  AsyncLoad();  // non-blocking
  // Load()  blocking version can be used if any
}

UserHistoryPredictor::~UserHistoryPredictor() {
  // In destructor, must call blocking version
  WaitForSyncer();
  Save();   // blocking
}

string UserHistoryPredictor::GetUserHistoryFileName() {
  return ConfigFileStream::GetFileName(kFileName);
}

// return revert id
// static
uint16 UserHistoryPredictor::revert_id() {
  return kRevertId;
}

void UserHistoryPredictor::WaitForSyncer() {
  if (syncer_.get() != NULL) {
    syncer_->Join();
    syncer_.reset(NULL);
  }
}

bool UserHistoryPredictor::WaitForSyncerForTest() {
  WaitForSyncer();
  return true;
}

bool UserHistoryPredictor::CheckSyncerAndDelete() const {
  if (syncer_.get() != NULL) {
    if (syncer_->IsRunning()) {
      return false;
    } else {
      syncer_.reset(NULL);  // remove
    }
  }

  return true;
}

bool UserHistoryPredictor::Sync() {
  return AsyncSave();
  // return Save();   blocking version
}

bool UserHistoryPredictor::Reload() {
  WaitForSyncer();
  return AsyncLoad();
}

bool UserHistoryPredictor::AsyncLoad() {
  if (!CheckSyncerAndDelete()) {  // now loading/saving
    return true;
  }

  syncer_.reset(new UserHistoryPredictorSyncer(
      this,
      UserHistoryPredictorSyncer::LOAD));
  syncer_->Start();

  return true;
}

bool UserHistoryPredictor::AsyncSave() {
  if (!updated_) {
    return true;
  }

  if (!CheckSyncerAndDelete()) {  // now loading/saving
    return true;
  }

  syncer_.reset(new UserHistoryPredictorSyncer(
      this,
      UserHistoryPredictorSyncer::SAVE));
  syncer_->Start();

  return true;
}

bool UserHistoryPredictor::Load() {
  const string filename = GetUserHistoryFileName();

  UserHistoryStorage history(filename);
  if (!history.Load()) {
    LOG(ERROR) << "UserHistoryStorage::Load() failed";
    return false;
  }

  for (size_t i = 0; i < history.entries_size(); ++i) {
    dic_->Insert(EntryFingerprint(history.entries(i)),
                 history.entries(i));
  }

  VLOG(1) << "Loaded user histroy, size=" << history.entries_size();

  return true;
}

bool UserHistoryPredictor::Save() {
  if (!updated_) {
    return true;
  }

  if (GET_CONFIG(incognito_mode)) {
    VLOG(2) << "incognito mode";
    return true;
  }

  if (!GET_CONFIG(use_history_suggest)) {
    VLOG(2) << "no history suggest";
    return true;
  }

  const DicElement *tail = dic_->Tail();
  if (tail == NULL) {
    return true;
  }

  const string filename = GetUserHistoryFileName();

  UserHistoryStorage history(filename);
  for (const DicElement *elm = tail; elm != NULL; elm = elm->prev) {
    history.add_entries()->CopyFrom(elm->value);
  }

  // update usage stats here.
  usage_stats::UsageStats::SetInteger(
      "UserHistoryPredictorEntrySize",
      static_cast<int>(history.entries_size()));

  if (!history.Save()) {
    LOG(ERROR) << "UserHistoryStorage::Save() failed";
    return false;
  }

  updated_ = false;

  return true;
}

bool UserHistoryPredictor::ClearAllHistory() {
  // Wait until syncer finishes
  WaitForSyncer();

  VLOG(1) << "Clearing user prediction";
  // renew DicCache as LRUCache tries to reuse the internal value by
  // using FreeList
  dic_.reset(new DicCache(UserHistoryPredictor::cache_size()));

  // insert a dummy event entry.
  InsertEvent(Entry::CLEAN_ALL_EVENT);

  updated_ = true;

  Sync();

  return true;
}

bool UserHistoryPredictor::ClearUnusedHistory() {
  // Wait until syncer finishes
  WaitForSyncer();

  VLOG(1) << "Clearing unused prediction";
  const DicElement *head = dic_->Head();
  if (head == NULL) {
    VLOG(2) << "dic head is NULL";
    return false;
  }

  vector<uint32> keys;
  for (const DicElement *elm = head; elm != NULL; elm = elm->next) {
    VLOG(3) << elm->key << " " << elm->value.suggestion_freq();
    if (elm->value.suggestion_freq() == 0) {
      keys.push_back(elm->key);
    }
  }

  for (size_t i = 0; i < keys.size(); ++i) {
    VLOG(2) << "Removing: " << keys[i];
    if (!dic_->Erase(keys[i])) {
      LOG(ERROR) << "cannot erase " << keys[i];
    }
  }

  // insert a dummy event entry.
  InsertEvent(Entry::CLEAN_UNUSED_EVENT);

  updated_ = true;

  Sync();

  VLOG(1) << keys.size() << " removed";

  return true;
}

// Erases all the next_entries whose entry_fp field equals |fp|.
void UserHistoryPredictor::EraseNextEntries(uint32 fp, Entry *entry) {
  const size_t orig_size = entry->next_entries_size();
  size_t new_size = orig_size;
  for (size_t pos = 0; pos < new_size; ) {
    if (entry->next_entries(pos).entry_fp() == fp) {
      entry->mutable_next_entries()->SwapElements(pos, --new_size);
    } else {
      ++pos;
    }
  }
  for (size_t i = 0; i < orig_size - new_size; ++i) {
    entry->mutable_next_entries()->RemoveLast();
  }
}

// Recursively finds the Ngram history that produces |target_key| and
// |target_value| and removes the last link. For example, if there exists a
// chain like
//    ("aaa", "AAA") -- ("bbb", "BBB") -- ("ccc", "CCC"),
// and if target_key == "aaabbbccc" and target_value == "AAABBBCCC", the link
// from ("bbb", "BBB") to ("ccc", "CCC") is removed. If a link was removed, this
// method returns DONE. If no history entries can produce the target key
// value, then NOT_FOUND is returned. TAIL is returned only when the
// tail was found, e.g., in the above example, when the method finds the tail
// node ("ccc", "CCC").
UserHistoryPredictor::RemoveNgramChainResult
UserHistoryPredictor::RemoveNgramChain(const string &target_key,
                                       const string &target_value,
                                       Entry *entry,
                                       vector<StringPiece> *key_ngrams,
                                       size_t key_ngrams_len,
                                       vector<StringPiece> *value_ngrams,
                                       size_t value_ngrams_len) {
  DCHECK(entry);
  DCHECK(key_ngrams);
  DCHECK(value_ngrams);

  // Update the lengths with the current entry node.
  key_ngrams_len += entry->key().size();
  value_ngrams_len += entry->value().size();

  // This is the case where ngram key and value are shorter than the target key
  // and value, respectively. In this case, we need to find further entries to
  // concatenate in order to make |target_key| and |target_value|.
  if (key_ngrams_len < target_key.size() &&
      value_ngrams_len < target_value.size()) {
    key_ngrams->push_back(entry->key());
    value_ngrams->push_back(entry->value());
    for (size_t i = 0; i < entry->next_entries().size(); ++i) {
      const uint32 fp = entry->next_entries(i).entry_fp();
      Entry *e = dic_->MutableLookupWithoutInsert(fp);
      if (e == NULL) {
        continue;
      }
      const RemoveNgramChainResult r = RemoveNgramChain(target_key,
                                                        target_value,
                                                        e,
                                                        key_ngrams,
                                                        key_ngrams_len,
                                                        value_ngrams,
                                                        value_ngrams_len);
      switch (r) {
        case DONE:
          return DONE;
        case TAIL:
          // |entry| is the second-to-the-last node. So cut the link to the
          // child entry.
          EraseNextEntries(fp, entry);
          return DONE;
        default:
          break;
      }
    }
    // Recover the state.
    key_ngrams->pop_back();
    value_ngrams->pop_back();
    return NOT_FOUND;
  }

  // This is the case where the current ngram key and value have the same
  // lengths as those of |target_key| and |target_value|, respectively.
  if (key_ngrams_len == target_key.size() &&
      value_ngrams_len == target_value.size()) {
    key_ngrams->push_back(entry->key());
    value_ngrams->push_back(entry->value());
    string ngram_key, ngram_value;
    Util::JoinStringPieces(*key_ngrams, "", &ngram_key);
    Util::JoinStringPieces(*value_ngrams, "", &ngram_value);
    if (ngram_key == target_key && ngram_value == target_value) {
      // |entry| is the last node. So return TAIL to tell the caller so
      // that it can remove the link to this last node.
      return TAIL;
    }
    key_ngrams->pop_back();
    value_ngrams->pop_back();
    return NOT_FOUND;
  }

  return NOT_FOUND;
}

bool UserHistoryPredictor::ClearHistoryEntry(const string &key,
                                             const string &value) {
  bool deleted = false;
  {
    // Find the history entry that has the exactly same key and value and has
    // not been removed yet. If exists, remove it.
    Entry *entry = dic_->MutableLookupWithoutInsert(Fingerprint(key, value));
    if (entry != NULL && !entry->removed()) {
      entry->set_suggestion_freq(0);
      entry->set_conversion_freq(0);
      entry->set_removed(true);
      // We don't clear entry->next_entries() so that we can generate prediction
      // by chaining.
      deleted = true;
    }
  }
  {
    // Find a chain of history entries that produces key and value. If exists,
    // remove the link so that N-gram history prediction never generates this
    // key value pair..
    for (DicElement *elm = dic_->MutableHead(); elm != NULL; elm = elm->next) {
      Entry *entry = &elm->value;
      if (!Util::StartsWith(key, entry->key()) ||
          !Util::StartsWith(value, entry->value())) {
        continue;
      }
      vector<StringPiece> key_ngrams, value_ngrams;
      if (RemoveNgramChain(
              key, value, entry, &key_ngrams, 0, &value_ngrams, 0) == DONE) {
        deleted = true;
      }
    }
  }
  if (deleted) {
    updated_ = true;
  }
  return deleted;
}

// return true if prev_entry has a next_fp link to entry
// static
bool UserHistoryPredictor::HasBigramEntry(
    const UserHistoryPredictor::Entry &entry,
    const UserHistoryPredictor::Entry &prev_entry) {
  const uint32 fp = EntryFingerprint(entry);
  for (int i = 0; i < prev_entry.next_entries_size(); ++i) {
    if (fp == prev_entry.next_entries(i).entry_fp()) {
      return true;
    }
  }
  return false;
}

// static
string UserHistoryPredictor::GetRomanMisspelledKey(
    const Segments &segments) {
  if (GET_CONFIG(preedit_method) != config::Config::ROMAN) {
    return "";
  }

  const string &preedit = segments.conversion_segment(0).key();
  // TODO(team): Use composer if it is available.
  // segments.composer()->GetQueryForConversion(&preedit);
  // Since ConverterInterface doesn't have StartPredictionWithComposer,
  // we cannot use composer currently.
  if (!preedit.empty() && MaybeRomanMisspelledKey(preedit)) {
    return ToRoman(preedit);
  }

  return "";
}

// static
bool UserHistoryPredictor::MaybeRomanMisspelledKey(const string &key) {
  int num_alpha = 0;
  int num_hiragana = 0;
  int num_unknown = 0;
  for (ConstChar32Iterator iter(key); !iter.Done(); iter.Next()) {
    const char32 w = iter.Get();
    const Util::ScriptType type = Util::GetScriptType(w);
    if (type == Util::HIRAGANA || w == 0x30FC) {  // "ー".
      ++num_hiragana;
      continue;
    }
    if (type == Util::UNKNOWN_SCRIPT && num_unknown <= 0) {
      ++num_unknown;
      continue;
    }
    if (type == Util::ALPHABET && num_alpha <= 0) {
      ++num_alpha;
      continue;
    }
    return false;
  }

  return (num_hiragana > 0 &&
          ((num_alpha == 1 && num_unknown == 0) ||
           (num_alpha == 0 && num_unknown == 1)));
}

// static
bool UserHistoryPredictor::RomanFuzzyPrefixMatch(
    const string &str, const string &prefix) {
  if (prefix.empty() || prefix.size() > str.size()) {
    return false;
  }

  // 1. allow one character delete in Romanji sequence.
  // 2. allow one swap in Romanji sequence.
  for (size_t i = 0; i < prefix.size(); ++i) {
    if (prefix[i] == str[i]) {
      continue;
    }

    if (str[i] == '-') {
      // '-' voice sound mark can be matched to any
      // non-alphanum character.
      if (!isalnum(prefix[i])) {
        string replaced_prefix = prefix;
        replaced_prefix[i] = str[i];
        if (Util::StartsWith(str, replaced_prefix)) {
          return true;
        }
      }
    } else {
      // deletion.
      string inserted_prefix = prefix;
      inserted_prefix.insert(i, 1, str[i]);
      if (Util::StartsWith(str, inserted_prefix)) {
        return true;
      }

      // swap.
      if (i + 1 < prefix.size()) {
        string swapped_prefix = prefix;
        swap(swapped_prefix[i], swapped_prefix[i + 1]);
        if (Util::StartsWith(str, swapped_prefix)) {
          return true;
        }
      }
    }

    return false;
  }

  // |prefix| is an exact suffix of |str|.
  return false;
}

bool UserHistoryPredictor::RomanFuzzyLookupEntry(
    const string &roman_input_key,
    const UserHistoryPredictor::Entry *entry,
    EntryPriorityQueue *results) const {
  if (roman_input_key.empty()) {
    return false;
  }

  DCHECK(entry);
  DCHECK(results);

  if (!RomanFuzzyPrefixMatch(ToRoman(entry->key()),
                             roman_input_key)) {
    return false;
  }

  Entry *result = results->NewEntry();
  DCHECK(result);
  result->Clear();
  result->CopyFrom(*entry);
  result->set_spelling_correction(true);
  results->Push(result);

  return true;
}

UserHistoryPredictor::Entry *UserHistoryPredictor::AddEntry(
    const Entry &entry, EntryPriorityQueue *results) const {
  // We add an entry even if it was marked as removed so that it can be used to
  // generate prediction by entry chaining. The deleted entry itself is never
  // shown in the final prediction result as it is filtered finally.
  Entry *new_entry = results->NewEntry();
  new_entry->Clear();
  new_entry->CopyFrom(entry);
  return new_entry;
}

UserHistoryPredictor::Entry *UserHistoryPredictor::AddEntryWithNewKeyValue(
    const string &key, const string &value, const Entry &entry,
    EntryPriorityQueue *results) const {
  // We add an entry even if it was marked as removed so that it can be used to
  // generate prediction by entry chaining. The deleted entry itself is never
  // shown in the final prediction result as it is filtered finally.
  Entry *new_entry = results->NewEntry();
  new_entry->Clear();
  new_entry->CopyFrom(entry);
  new_entry->set_key(key);
  new_entry->set_value(value);

  // Set removed field true if the new key and value were removed.
  const Entry *e = dic_->LookupWithoutInsert(Fingerprint(key, value));
  new_entry->set_removed(e != NULL && e->removed());

  return new_entry;
}

bool UserHistoryPredictor::LookupEntry(
    const string &input_key,
    const string &key_base,
    const Trie<string> *key_expanded,
    const UserHistoryPredictor::Entry *entry,
    const UserHistoryPredictor::Entry *prev_entry,
    EntryPriorityQueue *results) const {
  CHECK(entry);
  CHECK(results);

  Entry *result = NULL;

  const Entry *last_entry = NULL;

  // last_access_time of the left-closest content word.
  uint64 left_last_access_time = 0;

  // last_access_time of the left-most content word.
  uint64 left_most_last_access_time = 0;

  // Example: [a|B|c|D]
  // a,c: functional word
  // B,D: content word
  // left_last_access_time:   timestamp of D
  // left_most_last_access_time:   timestamp of B

  // |input_key| is a query user is now typing.
  // |entry->key()| is a target value saved in the database.
  //  const string input_key = key_base;

  const MatchType mtype = GetMatchTypeFromInput(
      input_key, key_base, key_expanded, entry->key());
  if (mtype == NO_MATCH) {
    return false;
  } else if (mtype == LEFT_EMPTY_MATCH) {  // zero-query-suggestion
    // if |input_key| is empty, the |prev_entry| and |entry| must
    // have bigram relation.
    if (prev_entry != NULL && HasBigramEntry(*entry, *prev_entry)) {
      result = AddEntry(*entry, results);
      if (result) {
        last_entry = entry;
        left_last_access_time = entry->last_access_time();
        left_most_last_access_time = IsContentWord(entry->value()) ?
            left_last_access_time : 0;
      }
    } else {
      return false;
    }
  } else if (mtype == LEFT_PREFIX_MATCH) {
    // |input_key| is shorter than |entry->key()|
    // This scenario is a simple prefix match.
    // e.g., |input_key|="foo", |entry->key()|="foobar"
    result = AddEntry(*entry, results);
    if (result) {
      last_entry = entry;
      left_last_access_time = entry->last_access_time();
      left_most_last_access_time = IsContentWord(entry->value()) ?
          left_last_access_time : 0;
    }
  } else if (mtype == RIGHT_PREFIX_MATCH || mtype == EXACT_MATCH) {
    // |input_key| is longer than or the same as |entry->key()|.
    // In this case, recursively traverse "next_entries" until
    // target entry gets longer than input_key.
    // e.g., |input_key|="foobar", |entry->key()|="foo"
    left_last_access_time = entry->last_access_time();
    left_most_last_access_time = IsContentWord(entry->value()) ?
        left_last_access_time : 0;
    string key = entry->key();
    string value = entry->value();
    const Entry *current_entry = entry;
    set<uint32> seen;
    seen.insert(EntryFingerprint(*current_entry));
    // Until target entry gets longer than input_key.
    while (key.size() <= input_key.size()) {
      const Entry *latest_entry = NULL;
      const Entry *left_same_timestamp_entry = NULL;
      const Entry *left_most_same_timestamp_entry = NULL;
      for (size_t i = 0; i < current_entry->next_entries_size(); ++i) {
        const Entry *tmp_entry = dic_->LookupWithoutInsert(
            current_entry->next_entries(i).entry_fp());
        if (tmp_entry == NULL || tmp_entry->key().empty()) {
          continue;
        }
        const MatchType mtype2 = GetMatchType(key + tmp_entry->key(),
                                              input_key);
        if (mtype2 == NO_MATCH || mtype2 == LEFT_EMPTY_MATCH) {
          continue;
        }
        if (latest_entry == NULL ||
            latest_entry->last_access_time() < tmp_entry->last_access_time()) {
          latest_entry = tmp_entry;
        }
        if (tmp_entry->last_access_time() == left_last_access_time) {
          left_same_timestamp_entry = tmp_entry;
        }
        if (tmp_entry->last_access_time() == left_most_last_access_time) {
          left_most_same_timestamp_entry = tmp_entry;
        }
      }

      // Prefer bigrams which are generated at the same time.
      // When last_access_time are the same, these two bigrams were
      // input together.
      // The preferences:
      // (1). The current entry's time stamp is equal to that of
      //      left most content word
      // (2). The current entry's time stamp is equal to that of
      //      left closest content word
      // (3). The current entry is the latest
      const Entry *next_entry = left_most_same_timestamp_entry;
      if (next_entry == NULL) {
        next_entry = left_same_timestamp_entry;
      }
      if (next_entry == NULL) {
        next_entry = latest_entry;
      }

      if (next_entry == NULL || next_entry->key().empty()) {
        break;
      }

      // if duplicate entry is found, don't expand more.
      // This is because an entry only has one timestamp.
      // we cannot trust the timestamp if there are duplicate values
      // in one input.
      if (!seen.insert(EntryFingerprint(*next_entry)).second) {
        break;
      }

      key += next_entry->key();
      value += next_entry->value();
      current_entry = next_entry;
      last_entry = next_entry;

      // Don't update left_access_time if the current entry is
      // not a content word.
      // The time-stamp of non-content-word will be updated frequently.
      // The time-stamp of the previous candidate is more trustful.
      // It partially fixes the bug http://b/2843371.
      const bool is_content_word = IsContentWord(current_entry->value());

      if (is_content_word) {
        left_last_access_time = current_entry->last_access_time();
      }

      // if left_most entry is a functional word (symbols/punctuations),
      // we don't take it as a canonical candidate.
      if (left_most_last_access_time == 0 && is_content_word) {
        left_most_last_access_time = current_entry->last_access_time();
      }
    }

    if (key.size() < input_key.size()) {
      VLOG(3) << "Cannot find prefix match even after chain rules";
      return false;
    }

    result = AddEntryWithNewKeyValue(key, value, *entry, results);
  } else {
    LOG(ERROR) << "Unknown match mode: " << mtype;
    return false;
  }

  if (result == NULL) {
    return false;
  }

  // if prev entry is not NULL, check whether there is a bigram
  // from |prev_entry| to |entry|.
  result->set_bigram_boost(false);

  if (prev_entry != NULL && HasBigramEntry(*entry, *prev_entry)) {
    // set bigram_boost flag so that this entry is boosted
    // against LRU policy.
    result->set_bigram_boost(true);
  }

  if (!result->removed()) {
    results->Push(result);
  }

  // Expand new entry which was input just after "last_entry"
  if (last_entry != NULL &&
      Util::CharsLen(result->key()) >= 1 &&
      2 * Util::CharsLen(input_key) >= Util::CharsLen(result->key())) {
    const Entry *latest_entry = NULL;
    const Entry *left_same_timestamp_entry = NULL;
    const Entry *left_most_same_timestamp_entry = NULL;
    for (int i = 0; i < last_entry->next_entries_size(); ++i) {
      const Entry *tmp_entry = dic_->LookupWithoutInsert(
          last_entry->next_entries(i).entry_fp());
      if (tmp_entry == NULL || tmp_entry->key().empty()) {
        continue;
      }
      if (latest_entry == NULL ||
          latest_entry->last_access_time() < tmp_entry->last_access_time()) {
        latest_entry = tmp_entry;
      }
      if (tmp_entry->last_access_time() == left_last_access_time) {
        left_same_timestamp_entry = tmp_entry;
      }
      if (tmp_entry->last_access_time() == left_most_last_access_time) {
        left_most_same_timestamp_entry = tmp_entry;
      }
    }

    const Entry *next_entry = left_most_same_timestamp_entry;
    if (next_entry == NULL) {
      next_entry = left_same_timestamp_entry;
    }
    if (next_entry == NULL) {
      next_entry = latest_entry;
    }

    // the new entry was input within 10 seconds.
    // TODO(taku): This is a simple heuristics.
    if (next_entry != NULL && !next_entry->key().empty() &&
        abs(static_cast<int32>(next_entry->last_access_time() -
                               last_entry->last_access_time())) <= 10 &&
        IsContentWord(next_entry->value())) {
      Entry *result2 = AddEntryWithNewKeyValue(
          result->key() + next_entry->key(),
          result->value() + next_entry->value(),
          *result,
          results);
      if (!result2->removed()) {
        results->Push(result2);
      }
    }
  }

  return true;
}

bool UserHistoryPredictor::Predict(Segments *segments) const {
  ConversionRequest default_request;
  return PredictForRequest(default_request, segments);
}

bool UserHistoryPredictor::PredictForRequest(const ConversionRequest &request,
                                             Segments *segments) const {
  if (!CheckSyncerAndDelete()) {
    LOG(WARNING) << "Syncer is running";
    return false;
  }

  if (GET_CONFIG(incognito_mode)) {
    VLOG(2) << "incognito mode";
    return false;
  }

  if (segments->request_type() == Segments::CONVERSION) {
    VLOG(2) << "request type is CONVERSION";
    return false;
  }

  if (!GET_CONFIG(use_history_suggest) &&
      segments->request_type() == Segments::SUGGESTION) {
    VLOG(2) << "no history suggest";
    return false;
  }

  if (segments->conversion_segments_size() < 1) {
    VLOG(2) << "segment size < 1";
    return false;
  }

  if (dic_->Head() == NULL) {
    VLOG(2) << "dic head is NULL";
    return false;
  }

  const RequestType request_type = request.request().zero_query_suggestion() ?
      ZERO_QUERY_SUGGESTION : DEFAULT;
  const string &input_key = segments->conversion_segment(0).key();
  if (IsPunctuation(Util::SubString(input_key, 0, 1))) {
    VLOG(2) << "input_key starts with punctuations";
    return false;
  }

  const size_t input_key_len = Util::CharsLen(input_key);
  if (input_key_len == 0 && request_type == DEFAULT) {
    VLOG(2) << "key length is 0";
    return false;
  }

  const Entry *prev_entry =
      LookupPrevEntry(*segments, request.request().available_emoji_carrier());
  if (input_key_len == 0 && prev_entry == NULL) {
    VLOG(1) << "If input_key_len is 0, prev_entry must be set";
    return false;
  }

  EntryPriorityQueue results;
  GetResultsFromHistoryDictionary(request, *segments, prev_entry, &results);
  if (results.size() == 0) {
    VLOG(2) << "no prefix match candiate is found.";
    return false;
  }

  return InsertCandidates(request_type, request, segments, &results);
}

const UserHistoryPredictor::Entry *UserHistoryPredictor::LookupPrevEntry(
    const Segments &segments, uint32 available_emoji_carrier) const {
  const size_t history_segments_size = segments.history_segments_size();
  const Entry *prev_entry = NULL;
  // when threre are non-zero history segments, lookup an entry
  // from the LRU dictionary, which is correspoinding to the last
  // history segment.
  if (history_segments_size == 0) {
    return NULL;
  }

  const Segment &history_segment =
      segments.history_segment(history_segments_size - 1);

  // Simply lookup the history_segment.
  prev_entry = dic_->LookupWithoutInsert(SegmentFingerprint(history_segment));

  // When |prev_entry| is NULL or |prev_entry| has no valid next_entries,
  // do linear-search over the LRU.
  if ((prev_entry == NULL && history_segment.candidates_size() > 0) ||
      (prev_entry != NULL && prev_entry->next_entries_size() == 0)) {
    const string &prev_value = prev_entry == NULL ?
        history_segment.candidate(0).value : prev_entry->value();
    int trial = 0;
    for (const DicElement *elm = dic_->Head();
         trial++ < kMaxPrevValueTrial && elm != NULL; elm = elm->next) {
      const Entry *entry = &(elm->value);
      // entry->value() equals to the prev_value or
      // entry->value() is a SUFFIX of prev_value.
      // length of entry->value() must be >= 2, as single-length
      // match would be noisy.
      if (IsValidEntry(*entry, available_emoji_carrier) &&
          entry != prev_entry &&
          entry->next_entries_size() > 0 &&
          Util::CharsLen(entry->value()) >= 2 &&
          (entry->value() == prev_value ||
           Util::EndsWith(prev_value, entry->value()))) {
        prev_entry = entry;
        break;
      }
    }
  }
  return prev_entry;
}

void UserHistoryPredictor::GetResultsFromHistoryDictionary(
    const ConversionRequest &request,
    const Segments &segments, const Entry *prev_entry,
    EntryPriorityQueue *results) const {
  DCHECK(results);
  const size_t max_results_size = 5 * segments.max_prediction_candidates_size();

  // Get romanized input key if the given preedit looks misspelled.
  const string roman_input_key = GetRomanMisspelledKey(segments);

  // TODO(team): make GetKanaMisspelledKey(segments);
  // const string kana_input_key = GetKanaMisspelledKey(segments);

  // If we have ambiguity for the input, get expanded key.
  // Example1 roman input: for "あk", we will get |base|, "あ" and |expanded|,
  // "か", "き", etc
  // Example2 kana input: for "あか", we will get |base|, "あ" and |expanded|,
  // "か", and "が".

  // |base_key| and |input_key| could be different
  // For kana-input, we will expand the ambiguity for "゛".
  // When we input "もす",
  // |base_key|: "も"
  // |expanded|: "す", "ず"
  // |input_key|: "もす"
  // In this case, we want to show candidates for "もす" as EXACT match,
  // and candidates for "もず" as LEFT_PREFIX_MATCH
  //
  // For roman-input, when we input "あｋ",
  // |input_key| is "あｋ" and |base_key| is "あ"
  string input_key;
  string base_key;
  scoped_ptr<Trie<string> > expanded;
  GetInputKeyFromSegments(request, segments, &input_key, &base_key, &expanded);

  int trial = 0;
  for (const DicElement *elm = dic_->Head(); elm != NULL; elm = elm->next) {
    if (!IsValidEntryIgnoringRemovedField(
            elm->value, request.request().available_emoji_carrier())) {
      continue;
    }
    if (segments.request_type() == Segments::SUGGESTION &&
        trial++ >= kMaxSuggestionTrial) {
      VLOG(2) << "too many trials";
      break;
    }

    // lookup key from elm_value and prev_entry.
    // If a new entry is found, the entry is pushed to the results.
    // TODO(team): make KanaFuzzyLookupEntry().
    if (!LookupEntry(input_key, base_key, expanded.get(), &(elm->value),
                     prev_entry, results) &&
        !RomanFuzzyLookupEntry(roman_input_key, &(elm->value), results)) {
      continue;
    }

    // already found enough results.
    if (results->size() >= max_results_size) {
      break;
    }
  }
}

// static
void UserHistoryPredictor::GetInputKeyFromSegments(
    const ConversionRequest &request, const Segments &segments,
    string *input_key, string *base,
    scoped_ptr<Trie<string> > *expanded) {
  DCHECK(input_key);
  DCHECK(base);

  if (!request.has_composer() ||
      !FLAGS_enable_expansion_for_user_history_predictor) {
    *input_key = segments.conversion_segment(0).key();
    *base = segments.conversion_segment(0).key();
    return;
  }

  request.composer().GetStringForPreedit(input_key);
  set<string> expanded_set;
  request.composer().GetQueriesForPrediction(base, &expanded_set);
  if (expanded_set.size() > 0) {
    expanded->reset(new Trie<string>);
    for (set<string>::const_iterator itr = expanded_set.begin();
         itr != expanded_set.end(); ++itr) {
      // For getting matched key, insert values
      (*expanded)->AddEntry(*itr, *itr);
    }
  }
}

bool UserHistoryPredictor::InsertCandidates(RequestType request_type,
                                            const ConversionRequest &request,
                                            Segments *segments,
                                            EntryPriorityQueue *results) const {
  DCHECK(results);
  Segment *segment = segments->mutable_conversion_segment(0);
  if (segment == NULL) {
    LOG(ERROR) << "segment is NULL";
    return false;
  }
  const uint32 input_key_len = Util::CharsLen(segment->key());
  while (segment->candidates_size() <
         segments->max_prediction_candidates_size()) {
    // |results| is a priority queue where the elemtnt
    // in the queue is sorted by the score defined in GetScore().
    const Entry *result_entry = results->Pop();
    if (result_entry == NULL) {
      // Pop() returns NULL when no more valid entry exists.
      break;
    }
    bool is_valid_candidate = false;
    if (segments->request_type() == Segments::PREDICTION) {
      is_valid_candidate = true;
    } else if (segments->request_type() == Segments::SUGGESTION) {
      // The top result of suggestion should be a VALID suggestion candidate.
      // i.e., SuggestionTrigerFunc should return true for the first
      // candidate.
      // If user types "デスノート" too many times, "デスノート" will be
      // suggested when user types "で". It is expected, but if user types
      // "です" after that,  showing "デスノート" is annoying.
      // In this situation, "です" is in the LRU, but SuggestionTrigerFunc
      // returns false for "です", since it is short.
      if (IsValidSuggestion(request_type,
                            input_key_len, *result_entry)) {
        is_valid_candidate = true;
      } else if (segment->candidates_size() == 0) {
        VLOG(2) << "candidates size is 0";
        return false;
      }
    } else {
      LOG(ERROR) << "Unknown mode";
      return false;
    }

    if (!is_valid_candidate) {
      VLOG(2) << "not a valid candidate: " << result_entry->key();
      continue;
    }

    if (request.request().mixed_conversion() &&
        result_entry->suggestion_freq() < 2 &&
        Util::CharsLen(result_entry->value()) > 8) {
      // Don't show long history for mixed conversion
      // TODO(toshiyuki): Better to merge this into IsValidSuggestion logic.
      VLOG(2) << "long candidate: " << result_entry->value();
      continue;
    }

    Segment::Candidate *candidate = segment->push_back_candidate();
    DCHECK(candidate);
    candidate->Init();
    candidate->key = result_entry->key();
    candidate->content_key = result_entry->key();
    candidate->value = result_entry->value();
    candidate->content_value = result_entry->value();
    candidate->attributes |=
        Segment::Candidate::USER_HISTORY_PREDICTION |
        Segment::Candidate::NO_VARIANTS_EXPANSION;
    if (result_entry->spelling_correction()) {
      candidate->attributes |= Segment::Candidate::SPELLING_CORRECTION;
    }
    const string &description = result_entry->description();
    // If we have stored description, set it exactly.
    if (!description.empty()) {
      candidate->description = description;
      candidate->attributes |= Segment::Candidate::NO_EXTRA_DESCRIPTION;
    } else {
      VariantsRewriter::SetDescriptionForPrediction(*pos_matcher_, candidate);
    }
#if DEBUG
    if (candidate->description.find("History") == string::npos) {
      candidate->description += " History";
    }
#endif  // DEBUG
  }

  return (segment->candidates_size() > 0);
}

void UserHistoryPredictor::InsertNextEntry(
    const UserHistoryPredictor::NextEntry &next_entry,
    UserHistoryPredictor::Entry *entry) const {
  if (next_entry.entry_fp() == 0 || entry == NULL) {
    return;
  }

  NextEntry *target_next_entry = NULL;

  // If next_entries_size is less than kMaxNextEntriesSize,
  // we simply allocate a new entry.
  if (entry->next_entries_size() < max_next_entries_size()) {
    target_next_entry = entry->add_next_entries();
  } else {
    // Otherwise, find the oldest next_entry.
    uint64 last_access_time = kuint64max;
    for (int i = 0; i < entry->next_entries_size(); ++i) {
      // already has the same id
      if (next_entry.entry_fp() == entry->next_entries(i).entry_fp()) {
        target_next_entry = entry->mutable_next_entries(i);
        break;
      }
      const Entry *found_entry = dic_->LookupWithoutInsert(
          entry->next_entries(i).entry_fp());
      // reuse the entry if it is already removed from the LRU.
      if (found_entry == NULL) {
        target_next_entry = entry->mutable_next_entries(i);
        break;
      }
      // preserve the oldest entry
      if (target_next_entry == NULL ||
          last_access_time > found_entry->last_access_time()) {
        target_next_entry = entry->mutable_next_entries(i);
        last_access_time = found_entry->last_access_time();
      }
    }
  }

  if (target_next_entry == NULL) {
    LOG(ERROR) << "cannot find a room for inserting next fp";
    return;
  }

  target_next_entry->CopyFrom(next_entry);
}

bool UserHistoryPredictor::IsValidEntry(
    const Entry &entry, uint32 available_emoji_carrier) const {
  return !entry.removed() &&
      IsValidEntryIgnoringRemovedField(entry, available_emoji_carrier);
}

bool UserHistoryPredictor::IsValidEntryIgnoringRemovedField(
    const Entry &entry, uint32 available_emoji_carrier) const {
  if (entry.entry_type() != Entry::DEFAULT_ENTRY ||
      suppression_dictionary_->SuppressEntry(entry.key(), entry.value())) {
    return false;
  }

  if (IsEmojiEntry(entry)) {
    if (Util::IsAndroidPuaEmoji(entry.value())) {
      // Android carrier dependent emoji.
      const uint32 kAndroidCarrier =
          Request::DOCOMO_EMOJI |
          Request::SOFTBANK_EMOJI |
          Request::KDDI_EMOJI;
      if (!(available_emoji_carrier & kAndroidCarrier)) {
        return false;
      }
    } else {
      // Unicode 6.0 emoji.
      if (!(available_emoji_carrier & Request::UNICODE_EMOJI)) {
        return false;
      }
    }
  }

  return true;
}

void UserHistoryPredictor::InsertEvent(EntryType type) {
  if (type == Entry::DEFAULT_ENTRY) {
    return;
  }

  const uint64 last_access_time = Util::GetTime();
  const uint32 dic_key = Fingerprint("", "", type);

  CHECK(dic_.get());
  DicElement *e = dic_->Insert(dic_key);
  if (e == NULL) {
    VLOG(2) << "insert failed";
    return;
  }

  Entry *entry = &(e->value);
  DCHECK(entry);
  entry->Clear();
  entry->set_entry_type(type);
  entry->set_last_access_time(last_access_time);
}

void UserHistoryPredictor::Insert(const string &key,
                                  const string &value,
                                  const string &description,
                                  bool is_suggestion_selected,
                                  uint32 next_fp,
                                  uint64 last_access_time,
                                  Segments *segments) {
  if (key.empty() || value.empty() ||
      key.size() > kMaxStringLength ||
      value.size() > kMaxStringLength ||
      description.size() > kMaxStringLength) {
    return;
  }

  const uint32 dic_key = Fingerprint(key, value);

  if (!dic_->HasKey(dic_key)) {
    // the key is a new key inserted in the last Finish method.
    // Here we push a new RevertEntry so that the new "key" can be
    // removed when Revert() method is called.
    Segments::RevertEntry *revert_entry = segments->push_back_revert_entry();
    DCHECK(revert_entry);
    revert_entry->key = Uint32ToString(dic_key);
    revert_entry->id = UserHistoryPredictor::revert_id();
    revert_entry->revert_entry_type = Segments::RevertEntry::CREATE_ENTRY;
  } else {
    // the key is a old key not inserted in the last Finish method
    // TODO(taku):
    // add a treatment for UPDATE_ENTRY mode
  }

  DicElement *e = dic_->Insert(dic_key);
  if (e == NULL) {
    VLOG(2) << "insert failed";
    return;
  }

  Entry *entry = &(e->value);
  DCHECK(entry);

  entry->set_key(key);
  entry->set_value(value);
  entry->set_removed(false);

  if (description.empty()) {
    entry->clear_description();
  } else {
    entry->set_description(description);
  }

  entry->set_last_access_time(last_access_time);
  if (is_suggestion_selected) {
    entry->set_suggestion_freq(entry->suggestion_freq() + 1);
  } else {
    entry->set_conversion_freq(entry->conversion_freq() + 1);
  }

  // Insert next_fp to the entry
  if (next_fp != 0) {
    NextEntry next_entry;
    next_entry.set_entry_fp(next_fp);
    InsertNextEntry(next_entry, entry);
  }

  VLOG(2) << key << " " << value << " has inserted: "
          << entry->Utf8DebugString();

  // new entry is inserted to the cache
  updated_ = true;
}

void UserHistoryPredictor::Finish(Segments *segments) {
  if (segments->request_type() == Segments::REVERSE_CONVERSION) {
    // Do nothing for REVERSE_CONVERSION.
    return;
  }

  if (GET_CONFIG(incognito_mode)) {
    VLOG(2) << "incognito mode";
    return;
  }

  if (!GET_CONFIG(use_history_suggest)) {
    VLOG(2) << "no history suggest";
    return;
  }

  if (!CheckSyncerAndDelete()) {
    LOG(WARNING) << "Syncer is running";
    return;
  }

  const bool is_suggestion = segments->request_type() != Segments::CONVERSION;
  const uint64 last_access_time = Util::GetTime();

  // If user inputs a punctuation just after some long sentence,
  // we make a new candidate by concatinating the top element in LRU and
  // the punctuation user input. The top element in LRU is supposed to be
  // the long sentence user input before.
  // This is a fix for http://b/issue?id=2216838
  if (dic_->Head() != NULL &&
      dic_->Head()->value.last_access_time() + 5 > last_access_time &&
      // Check if the current value is a punctuation.
      segments->conversion_segments_size() == 1 &&
      segments->conversion_segment(0).candidates_size() > 0 &&
      IsPunctuation(segments->conversion_segment(0).candidate(0).value) &&
      // Check if the previous value looks like a sentence.
      segments->history_segments_size() > 0 &&
      segments->history_segment(
          segments->history_segments_size() - 1).candidates_size() > 0 &&
      IsSentenceLikeCandidate(segments->history_segment(
          segments->history_segments_size() - 1).candidate(0))) {
    const Entry *entry = &(dic_->Head()->value);
    DCHECK(entry);
    const string &last_value =
        segments->history_segment(
            segments->history_segments_size() - 1).candidate(0).value;
    // Check if the head value in LRU ends with the candidate value in history
    // segments.
    if (Util::EndsWith(entry->value(), last_value)) {
      const Segment::Candidate &candidate =
          segments->conversion_segment(0).candidate(0);
      const string key = entry->key() + candidate.key;
      const string value = entry->value() + candidate.value;
      // use the same last_access_time stored in the top element
      // so that this item can be grouped together.
      Insert(key, value, entry->description(), is_suggestion, 0,
             entry->last_access_time(), segments);
    }
  }

  string all_key, all_value;
  const size_t history_segments_size = segments->history_segments_size();

  // Check every segment is valid.
  for (size_t i = history_segments_size; i < segments->segments_size(); ++i) {
    const Segment &segment = segments->segment(i);
    if (segment.candidates_size() < 1) {
      VLOG(2) << "candidates size < 1";
      return;
    }
    if (segment.segment_type() != Segment::FIXED_VALUE) {
      VLOG(2) << "segment is not FIXED_VALUE";
      return;
    }
    const Segment::Candidate &candidate = segment.candidate(0);
    if (candidate.attributes &
        Segment::Candidate::NO_SUGGEST_LEARNING) {
      VLOG(2) << "NO_SUGGEST_LEARNING";
      return;
    }
  }

  if (IsPrivacySensitive(segments)) {
    VLOG(2) << "do not remember privacy sensitive input";
    return;
  }

  InsertHistory(is_suggestion, last_access_time, segments);
  return;
}

void UserHistoryPredictor::MakeLearningSegments(
    const Segments &segments, SegmentsForLearning *learning_segments) const {
  DCHECK(learning_segments);

  for (size_t i = 0; i < segments.history_segments_size(); ++i) {
    const Segment &segment = segments.history_segment(i);
    SegmentForLearning learning_segment;
    DCHECK_LE(1, segment.candidates_size());
    learning_segment.key = segment.candidate(0).key;
    learning_segment.value = segment.candidate(0).value;
    learning_segment.content_key = segment.candidate(0).content_key;
    learning_segment.content_value = segment.candidate(0).content_value;
    learning_segment.description = GetDescription(segment.candidate(0));
    learning_segments->push_back_history_segment(learning_segment);
  }
  for (size_t i = 0; i < segments.conversion_segments_size(); ++i) {
    const Segment &segment = segments.conversion_segment(i);
    const Segment::Candidate &candidate = segment.candidate(0);
    if (candidate.inner_segment_boundary.empty()) {
      SegmentForLearning learning_segment;
      learning_segment.key = candidate.key;
      learning_segment.value = candidate.value;
      learning_segment.content_key = candidate.content_key;
      learning_segment.content_value = candidate.content_value;
      learning_segment.description = GetDescription(candidate);
      learning_segments->push_back_conversion_segment(learning_segment);
    } else {
      SegmentForLearning learning_segment;
      for (Segment::Candidate::InnerSegmentIterator iter(&candidate);
           !iter.Done(); iter.Next()) {
        iter.GetKey().CopyToString(&learning_segment.key);
        iter.GetValue().CopyToString(&learning_segment.value);
        iter.GetContentKey().CopyToString(&learning_segment.content_key);
        iter.GetContentValue().CopyToString(&learning_segment.content_value);
        learning_segments->push_back_conversion_segment(learning_segment);
      }
    }
  }
}

void UserHistoryPredictor::InsertHistory(bool is_suggestion_selected,
                                         uint64 last_access_time,
                                         Segments *segments) {
  SegmentsForLearning learning_segments;
  MakeLearningSegments(*segments, &learning_segments);

  string all_key, all_value;
  set<uint32> seen;
  bool this_was_seen = false;
  const size_t history_segments_size =
      learning_segments.history_segments_size();

  for (size_t i = history_segments_size;
       i < learning_segments.all_segments_size(); ++i) {
    const SegmentForLearning &segment = learning_segments.all_segment(i);
    all_key += segment.key;
    all_value += segment.value;
    uint32 next_fp = (i == learning_segments.all_segments_size() - 1) ?
        0 : LearningSegmentFingerprint(learning_segments.all_segment(i + 1));
    // remember the first segment
    if (i == history_segments_size) {
      seen.insert(LearningSegmentFingerprint(segment));
    }
    uint32 next_fp_to_set = next_fp;
    // If two duplicate segments exist, kills the link
    // TO/FROM the second one to prevent loops.
    // Only killing "TO" link caused bug #2982886:
    // after converting "らいおん（もうじゅう）とぞうりむし（びせいぶつ）"
    // and typing "ぞうりむし", "ゾウリムシ（猛獣" was suggested.
    if (this_was_seen) {
      next_fp_to_set = 0;
    }
    if (!seen.insert(next_fp).second) {
      next_fp_to_set = 0;
      this_was_seen = true;
    } else {
      this_was_seen = false;
    }
    Insert(segment.key,
           segment.value,
           segment.description,
           is_suggestion_selected, next_fp_to_set,
           last_access_time, segments);
    if (content_word_learning_enabled_ &&
        segment.content_key != segment.key &&
        segment.content_value != segment.value) {
      Insert(segment.content_key,
             segment.content_value,
             segment.description,
             is_suggestion_selected, 0,
             last_access_time, segments);
    }
  }

  // Insert all_key/all_value
  if (learning_segments.conversion_segments_size() > 1 &&
      !all_key.empty() && !all_value.empty()) {
    Insert(all_key, all_value, "",
           is_suggestion_selected,
           0, last_access_time, segments);
  }

  // Make a link from the last history_segment to the first conversion segment
  // or to the entire user input.
  if (learning_segments.history_segments_size() > 0 &&
      learning_segments.conversion_segments_size() > 0) {
    const SegmentForLearning &history_segment =
        learning_segments.history_segment(
            segments->history_segments_size() - 1);
    const SegmentForLearning &conversion_segment =
        learning_segments.conversion_segment(0);
    // Don't learn a link from/to a punctuation.  Note that another piece of
    // code handles learning for (sentence + punctuation) form; see Finish().
    if (IsPunctuation(history_segment.value) ||
        IsPunctuation(conversion_segment.value)) {
      return;
    }
    Entry *history_entry = dic_->MutableLookupWithoutInsert(
        LearningSegmentFingerprint(history_segment));
    NextEntry next_entry;
    if (segments->request_type() == Segments::CONVERSION) {
      next_entry.set_entry_fp(LearningSegmentFingerprint(conversion_segment));
      InsertNextEntry(next_entry, history_entry);
    }

    // Entire user input or SUGGESTION
    if (segments->request_type() != Segments::CONVERSION ||
        learning_segments.conversion_segments_size() > 1) {
      next_entry.set_entry_fp(Fingerprint(all_key, all_value));
      InsertNextEntry(next_entry, history_entry);
    }
  }

  return;
}

void UserHistoryPredictor::Revert(Segments *segments) {
  if (!CheckSyncerAndDelete()) {
    LOG(WARNING) << "Syncer is running";
    return;
  }

  for (size_t i = 0; i < segments->revert_entries_size(); ++i) {
    const Segments::RevertEntry &revert_entry =
        segments->revert_entry(i);
    if (revert_entry.id == UserHistoryPredictor::revert_id() &&
        revert_entry.revert_entry_type == Segments::RevertEntry::CREATE_ENTRY) {
      VLOG(2) << "Erasing the key: " << StringToUint32(revert_entry.key);
      dic_->Erase(StringToUint32(revert_entry.key));
    }
  }
}

// static
UserHistoryPredictor::MatchType UserHistoryPredictor::GetMatchType(
    const string &lstr, const string &rstr) {
  if (lstr.empty() && !rstr.empty()) {
    return LEFT_EMPTY_MATCH;
  }

  const size_t size = min(lstr.size(), rstr.size());
  if (size == 0) {
    return NO_MATCH;
  }

  const int result = memcmp(lstr.data(), rstr.data(), size);
  if (result != 0) {
    return NO_MATCH;
  }

  if (lstr.size() == rstr.size()) {
    return EXACT_MATCH;
  } else if (lstr.size() < rstr.size()) {
    return LEFT_PREFIX_MATCH;
  } else {
    return RIGHT_PREFIX_MATCH;
  }

  return NO_MATCH;
}

// static
UserHistoryPredictor::MatchType UserHistoryPredictor::GetMatchTypeFromInput(
    const string &input_key,
    const string &key_base, const Trie<string> *key_expanded,
    const string &target) {
  if (key_expanded == NULL) {
    // |input_key| and |key_base| can be different by compoesr modification.
    // For example, |input_key|, "８，＋", and |base| "８、＋".
    return GetMatchType(key_base, target);
  }

  // we can assume key_expanded != NULL from here.
  if (key_base.empty()) {
      string value;
      size_t key_length = 0;
      bool has_subtrie = false;
      if (!key_expanded->LookUpPrefix(target, &value,
                                      &key_length, &has_subtrie)) {
        return NO_MATCH;
      } else if (value == target && value == input_key) {
        return EXACT_MATCH;
      } else {
        return LEFT_PREFIX_MATCH;
      }
  } else {
    const size_t size = min(key_base.size(), target.size());
    if (size == 0) {
      return NO_MATCH;
    }
    const int result = memcmp(key_base.data(), target.data(), size);
    if (result != 0) {
      return NO_MATCH;
    }
    if (target.size() <= key_base.size()) {
      return RIGHT_PREFIX_MATCH;
    }
    string value;
    size_t key_length = 0;
    bool has_subtrie = false;
    if (!key_expanded->LookUpPrefix(target.data() + key_base.size(),
                                    &value,
                                    &key_length, &has_subtrie)) {
      return NO_MATCH;
    }
    const string matched = key_base + value;
    if (matched == target && matched == input_key) {
      return EXACT_MATCH;
    } else {
      return LEFT_PREFIX_MATCH;
    }
  }

  DCHECK(false) << "Should not come here";
  return NO_MATCH;
}

// static
uint32 UserHistoryPredictor::Fingerprint(const string &key,
                                         const string &value,
                                         EntryType type) {
  if (type == Entry::DEFAULT_ENTRY) {
    // Since we have already used the fingerprint function for next entries and
    // next entries are saved in user's local machine, we are not able
    // to change the Fingerprint function for the old key/value type.
    return Util::Fingerprint32(key + kDelimiter + value);
  } else {
    uint8 id = static_cast<uint8>(type);
    return Util::Fingerprint32(reinterpret_cast<char *>(&id), sizeof(id));
  }
}

// static
uint32 UserHistoryPredictor::Fingerprint(const string &key,
                                         const string &value) {
  return Fingerprint(key, value, Entry::DEFAULT_ENTRY);
}

uint32 UserHistoryPredictor::EntryFingerprint(
    const UserHistoryPredictor::Entry &entry) {
  return Fingerprint(entry.key(), entry.value());
}

// static
uint32 UserHistoryPredictor::SegmentFingerprint(const Segment &segment) {
  if (segment.candidates_size() > 0) {
    return Fingerprint(segment.candidate(0).key, segment.candidate(0).value);
  }
  return 0;
}

// static
uint32 UserHistoryPredictor::LearningSegmentFingerprint(
    const SegmentForLearning &segment) {
  return Fingerprint(segment.key, segment.value);
}

// static
string UserHistoryPredictor::Uint32ToString(uint32 fp) {
  string buf(reinterpret_cast<const char *>(&fp), sizeof(fp));
  return buf;
}

// static
uint32 UserHistoryPredictor::StringToUint32(const string &input) {
  uint32 result = 0;
  if (input.size() == sizeof(result)) {
    memcpy(reinterpret_cast<char *>(&result), input.data(), input.size());
  }
  return result;
}

// static
bool UserHistoryPredictor::IsValidSuggestion(
    RequestType request_type,
    uint32 prefix_len,
    const UserHistoryPredictor::Entry &entry) {
  // when bigram_boost is true, that means that previous user input
  // and current input have bigram relation.
  if (entry.bigram_boost()) {
    return true;
  }
  // when zero_query_suggestion is true, that means that
  // predictor is running on mobile device. In this case,
  // make the behavior more aggressive.
  if (request_type == ZERO_QUERY_SUGGESTION) {
    return true;
  }
  // Handle suggestion_freq and conversion_freq differently.
  // conversion_freq is less aggressively affecting to the final decision.
  const uint32 freq = max(entry.suggestion_freq(),
                          entry.conversion_freq() / 4);

  // TODO(taku,komatsu): better to make it simpler and easier to be understood.
  const uint32 base_prefix_len = 3 - min(static_cast<uint32>(2), freq);
  return (prefix_len >= base_prefix_len);
}

// static
// 1) sort by last_access_time, which is basically the same as LRU policy.
// 2) boost shorter candidate, if having the same last_access_time.
// 3) add a bigram boost as a special bonus.
// TODO(taku): better to take "frequency" into consideration
uint32 UserHistoryPredictor::GetScore(
    const UserHistoryPredictor::Entry &entry) {
  const uint32 kBigramBoostAsTime = 7 * 24 * 60 * 60;   // 1 week.
  return
      entry.last_access_time() - Util::CharsLen(entry.value()) +
      (entry.bigram_boost() ? kBigramBoostAsTime : 0);
}

// return the size of cache.
// static
uint32 UserHistoryPredictor::cache_size() {
  return kLRUCacheSize;
}

// return the size of next entries.
// static
uint32 UserHistoryPredictor::max_next_entries_size() {
  return kMaxNextEntriesSize;
}

}  // namespace mozc
