// Copyright 2010-2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "converter/immutable_converter.h"

#include <algorithm>
#include <cctype>
#include <climits>
#include <string>
#include <utility>
#include <vector>

#include "base/logging.h"
#include "base/port.h"
#include "base/scoped_ptr.h"
#include "base/stl_util.h"
#include "base/string_piece.h"
#include "base/util.h"
#include "config/config.pb.h"
#include "config/config_handler.h"
#include "converter/connector_interface.h"
#include "converter/conversion_request.h"
#include "converter/key_corrector.h"
#include "converter/lattice.h"
#include "converter/nbest_generator.h"
#include "converter/node.h"
#include "converter/node_list_builder.h"
#include "converter/segmenter_interface.h"
#include "converter/segments.h"
#include "dictionary/dictionary_interface.h"
#include "dictionary/pos_group.h"
#include "dictionary/pos_matcher.h"
#include "dictionary/suppression_dictionary.h"
#include "prediction/suggestion_filter.h"
#include "session/commands.pb.h"

DECLARE_bool(disable_lattice_cache);
DEFINE_bool(disable_predictive_realtime_conversion,
            false,
            "disable predictive realtime conversion");

namespace mozc {
namespace {

const size_t kMaxSegmentsSize                   = 256;
const size_t kMaxCharLength                     = 1024;
const size_t kMaxCharLengthForReverseConversion = 600;  // 200 chars in UTF8
const int    kMaxCost                           = 32767;
const int    kMinCost                           = -32767;
const int    kDefaultNumberCost                 = 3000;

class KeyCorrectedNodeListBuilder : public BaseNodeListBuilder {
 public:
  KeyCorrectedNodeListBuilder(size_t pos,
                              StringPiece original_lookup_key,
                              const KeyCorrector *key_corrector,
                              NodeAllocatorInterface *allocator)
      : BaseNodeListBuilder(allocator, allocator->max_nodes_size()),
        pos_(pos),
        original_lookup_key_(original_lookup_key),
        key_corrector_(key_corrector),
        tail_(NULL) {}

  virtual ResultType OnToken(StringPiece key, StringPiece actual_key,
                             const Token &token) {
    const size_t offset =
        key_corrector_->GetOriginalOffset(pos_, token.key.size());
    if (!KeyCorrector::IsValidPosition(offset) || offset == 0) {
      return TRAVERSE_NEXT_KEY;
    }
    Node *node = NewNodeFromToken(token);
    node->key.assign(original_lookup_key_.data() + pos_, offset);
    node->wcost += KeyCorrector::GetCorrectedCostPenalty(node->key);

    // Push back |node| to the end.
    if (result_ == NULL) {
      result_ = node;
    } else {
      DCHECK(tail_ != NULL);
      tail_->bnext = node;
    }
    tail_ = node;
    return TRAVERSE_CONTINUE;
  }

  Node *tail() const { return tail_; }

 private:
  const size_t pos_;
  const StringPiece original_lookup_key_;
  const KeyCorrector *key_corrector_;
  Node *tail_;
};

void InsertCorrectedNodes(size_t pos, const string &key,
                          const ConversionRequest &request,
                          const KeyCorrector *key_corrector,
                          const DictionaryInterface *dictionary,
                          Lattice *lattice) {
  if (key_corrector == NULL) {
    return;
  }
  size_t length = 0;
  const char *str = key_corrector->GetCorrectedPrefix(pos, &length);
  if (str == NULL || length == 0) {
    return;
  }
  KeyCorrectedNodeListBuilder builder(pos, key, key_corrector,
                                      lattice->node_allocator());
  dictionary->LookupPrefix(
      StringPiece(str, length),
      request.IsKanaModifierInsensitiveConversion(),
      &builder);
  if (builder.tail() != NULL) {
    builder.tail()->bnext = NULL;
  }
  if (builder.result() != NULL) {
    lattice->Insert(pos, builder.result());
  }
}

bool IsNumber(const char c) {
  return c >= '0' && c <= '9';
}

bool ContainsWhiteSpacesOnly(const StringPiece s) {
  for (ConstChar32Iterator iter(s); !iter.Done(); iter.Next()) {
    switch (iter.Get()) {
      case 0x09:    // TAB
      case 0x20:    // Half-width space
      case 0x3000:  // Full-width space
        break;
      default:
        return false;
    }
  }
  return true;
}

void DecomposeNumberAndSuffix(const string &input,
                              string *number, string *suffix) {
  const char *begin = input.data();
  const char *end = input.data() + input.size();
  size_t pos = 0;
  while (begin < end) {
    if (IsNumber(*begin)) {
      ++pos;
      ++begin;
      continue;
    }
    break;
  }
  number->assign(input, 0, pos);
  suffix->assign(input, pos, input.size() - pos);
}

void DecomposePrefixAndNumber(const string &input,
                              string *prefix, string *number) {
  const char *begin = input.data();
  const char *end = input.data() + input.size() - 1;
  size_t pos = input.size();
  while (begin <= end) {
    if (IsNumber(*end)) {
      --pos;
      --end;
      continue;
    }
    break;
  }
  prefix->assign(input, 0, pos);
  number->assign(input, pos, input.size() - pos);
}

void NormalizeHistorySegments(Segments *segments) {
  for (size_t i = 0; i < segments->history_segments_size(); ++i) {
    Segment *segment = segments->mutable_history_segment(i);
    if (segment == NULL || segment->candidates_size() == 0) {
      continue;
    }

    string key;
    Segment::Candidate *c = segment->mutable_candidate(0);
    const string value = c->value;
    const string content_value = c->content_value;
    const string content_key = c->content_key;
    Util::FullWidthAsciiToHalfWidthAscii(segment->key(), &key);
    Util::FullWidthAsciiToHalfWidthAscii(value, &c->value);
    Util::FullWidthAsciiToHalfWidthAscii(content_value, &c->content_value);
    Util::FullWidthAsciiToHalfWidthAscii(content_key, &c->content_key);
    c->key = key;
    segment->set_key(key);

    // Ad-hoc rewrite for Numbers
    // Since number candidate is generative, i.e, any number can be
    // written by users, we normalize the value here. normalzied number
    // is used for the ranking tweaking based on history
    if (key.size() > 1 &&
        key == c->value &&
        key == c->content_value &&
        key == c->key &&
        key == c->content_key &&
        Util::GetScriptType(key) == Util::NUMBER &&
        IsNumber(key[key.size() - 1])) {
      key = key[key.size() - 1];  // use the last digit only
      segment->set_key(key);
      c->value = key;
      c->content_value = key;
      c->content_key = key;
    }
  }
}

Lattice *GetLattice(Segments *segments, bool is_prediction) {
  Lattice *lattice = segments->mutable_cached_lattice();
  if (lattice == NULL) {
    return NULL;
  }

  const size_t history_segments_size = segments->history_segments_size();

  string history_key = "";
  for (size_t i = 0; i < history_segments_size; ++i) {
    history_key.append(segments->segment(i).key());
  }
  string conversion_key = "";
  for (size_t i = history_segments_size; i < segments->segments_size(); ++i) {
    conversion_key.append(segments->segment(i).key());
  }

  const size_t lattice_history_end_pos = lattice->history_end_pos();

  if (!is_prediction ||
      FLAGS_disable_lattice_cache ||
      Util::CharsLen(conversion_key) <= 1 ||
      lattice_history_end_pos != history_key.size()) {
    // Do not cache if conversion is not prediction, or disable_lattice_cache
    // flag is used.  In addition, if a user input the key right after the
    // finish of conversion, reset the lattice to erase old nodes.
    // Even if the lattice key is not changed, we should reset the lattice
    // when the history size is changed.
    // When we submit the candidate partially, the entire key will not changed,
    // but the history position will be changed.
    lattice->Clear();
  }

  return lattice;
}

}  // namespace

ImmutableConverterImpl::ImmutableConverterImpl(
    const DictionaryInterface *dictionary,
    const DictionaryInterface *suffix_dictionary,
    const SuppressionDictionary *suppression_dictionary,
    const ConnectorInterface *connector,
    const SegmenterInterface *segmenter,
    const POSMatcher *pos_matcher,
    const PosGroup *pos_group,
    const SuggestionFilter *suggestion_filter)
    : dictionary_(dictionary),
      suffix_dictionary_(suffix_dictionary),
      suppression_dictionary_(suppression_dictionary),
      connector_(connector),
      segmenter_(segmenter),
      pos_matcher_(pos_matcher),
      pos_group_(pos_group),
      suggestion_filter_(suggestion_filter),
      first_name_id_(pos_matcher_->GetFirstNameId()),
      last_name_id_(pos_matcher_->GetLastNameId()),
      number_id_(pos_matcher_->GetNumberId()),
      unknown_id_(pos_matcher_->GetUnknownId()),
      last_to_first_name_transition_cost_(
          connector_->GetTransitionCost(last_name_id_, first_name_id_)) {
  DCHECK(dictionary_);
  DCHECK(suffix_dictionary_);
  DCHECK(suppression_dictionary_);
  DCHECK(connector_);
  DCHECK(segmenter_);
  DCHECK(pos_matcher_);
  DCHECK(pos_group_);
  DCHECK(suggestion_filter_);
}

void ImmutableConverterImpl::ExpandCandidates(
    const string &original_key, NBestGenerator *nbest, Segment *segment,
    Segments::RequestType request_type, size_t expand_size) const {
  DCHECK(nbest);
  DCHECK(segment);
  CHECK_GT(expand_size, 0);

  while (segment->candidates_size() < expand_size) {
    Segment::Candidate *candidate = segment->push_back_candidate();
    DCHECK(candidate);
    candidate->Init();

    // if NBestGenerator::Next() returns NULL,
    // no more entries are generated.
    if (!nbest->Next(original_key, candidate, request_type)) {
      segment->pop_back_candidate();
      break;
    }
  }
}

void ImmutableConverterImpl::InsertDummyCandidates(Segment *segment,
                                                   size_t expand_size) const {
  const Segment::Candidate *top_candidate =
      segment->candidates_size() == 0 ? NULL :
      segment->mutable_candidate(0);
  const Segment::Candidate *last_candidate =
      segment->candidates_size() == 0 ? NULL :
      segment->mutable_candidate(segment->candidates_size() - 1);

  // Insert a dummy candiate whose content_value is katakana.
  // If functional_key() is empty, no need to make a dummy candidate.
  if (segment->candidates_size() > 0 &&
      segment->candidates_size() < expand_size &&
      !segment->candidate(0).functional_key().empty() &&
      Util::GetScriptType(segment->candidate(0).content_key) ==
      Util::HIRAGANA) {
    // Use last_candidate as a refernce of cost.
    // Use top_candidate as a refarence of lid/rid and key/value.
    DCHECK(top_candidate);
    DCHECK(last_candidate);
    Segment::Candidate *new_candidate = segment->add_candidate();
    DCHECK(new_candidate);

    new_candidate->CopyFrom(*top_candidate);
    Util::HiraganaToKatakana(segment->candidate(0).content_key,
                             &new_candidate->content_value);
    Util::ConcatStrings(new_candidate->content_value,
                        top_candidate->functional_value(),
                        &new_candidate->value);
    new_candidate->cost = last_candidate->cost + 1;
    new_candidate->wcost = last_candidate->wcost + 1;
    new_candidate->structure_cost = last_candidate->structure_cost + 1;
    new_candidate->attributes = 0;
    // We cannot copy inner_segment_boundary; see b/8109381.
    new_candidate->inner_segment_boundary.clear();
    DCHECK(new_candidate->IsValid());
    last_candidate = new_candidate;
  }

  // Insert a dummy hiragana candidate.
  if (segment->candidates_size() == 0 ||
      (segment->candidates_size() < expand_size &&
       Util::GetScriptType(segment->key()) == Util::HIRAGANA)) {
    Segment::Candidate *new_candidate = segment->add_candidate();
    DCHECK(new_candidate);

    if (last_candidate != NULL) {
      new_candidate->CopyFrom(*last_candidate);
      // We cannot copy inner_segment_boundary; see b/8109381.
      new_candidate->inner_segment_boundary.clear();
    } else {
      new_candidate->Init();
    }
    new_candidate->key = segment->key();
    new_candidate->value = segment->key();
    new_candidate->content_key = segment->key();
    new_candidate->content_value = segment->key();
    if (last_candidate != NULL) {
      new_candidate->cost = last_candidate->cost + 1;
      new_candidate->wcost = last_candidate->wcost + 1;
      new_candidate->structure_cost = last_candidate->structure_cost + 1;
    }
    new_candidate->attributes = 0;
    last_candidate = new_candidate;
    // One character hiragana/katakana will cause side effect.
    // Type "し" and choose "シ". After that, "しました" will become "シました".
    if (Util::CharsLen(new_candidate->key) <= 1) {
      new_candidate->attributes |= Segment::Candidate::CONTEXT_SENSITIVE;
    }
    DCHECK(new_candidate->IsValid());
  }

  // Insert a dummy katakana candidate.
  string katakana_value;
  Util::HiraganaToKatakana(segment->key(), &katakana_value);
  if (segment->candidates_size() > 0 &&
      segment->candidates_size() < expand_size &&
      Util::GetScriptType(katakana_value) == Util::KATAKANA) {
    Segment::Candidate *new_candidate = segment->add_candidate();
    DCHECK(new_candidate);
    DCHECK(last_candidate);
    new_candidate->Init();
    new_candidate->key = segment->key();
    new_candidate->value = katakana_value;
    new_candidate->content_key = segment->key();
    new_candidate->content_value = katakana_value;
    new_candidate->cost = last_candidate->cost + 1;
    new_candidate->wcost = last_candidate->wcost + 1;
    new_candidate->structure_cost = last_candidate->structure_cost + 1;
    new_candidate->lid = last_candidate->lid;
    new_candidate->rid = last_candidate->rid;
    if (Util::CharsLen(new_candidate->key) <= 1) {
      new_candidate->attributes |= Segment::Candidate::CONTEXT_SENSITIVE;
    }
    DCHECK(new_candidate->IsValid());
  }

  DCHECK_GT(segment->candidates_size(), 0);
}

void ImmutableConverterImpl::ApplyResegmentRules(
    size_t pos, Lattice *lattice) const {
  if (ResegmentArabicNumberAndSuffix(pos, lattice)) {
    VLOG(1) << "ResegmentArabicNumberAndSuffix returned true";
    return;
  }

  if (ResegmentPrefixAndArabicNumber(pos, lattice)) {
    VLOG(1) << "ResegmentArabicNumberAndSuffix returned true";
    return;
  }

  if (ResegmentPersonalName(pos, lattice)) {
    VLOG(1) << "ResegmentPersonalName returned true";
    return;
  }
}

// Currently, only arabic_number + suffix patterns are resegmented
// TODO(taku): consider kanji number into consideration
bool ImmutableConverterImpl::ResegmentArabicNumberAndSuffix(
    size_t pos, Lattice *lattice) const {
  const Node *bnode = lattice->begin_nodes(pos);
  if (bnode == NULL) {
    VLOG(1) << "bnode is NULL";
    return false;
  }

  bool modified = false;

  for (const Node *compound_node = bnode;
       compound_node != NULL; compound_node = compound_node->bnext) {
    if (!compound_node->value.empty() && !compound_node->key.empty() &&
        pos_matcher_->IsNumber(compound_node->lid) &&
        !pos_matcher_->IsNumber(compound_node->rid) &&
        IsNumber(compound_node->value[0]) && IsNumber(compound_node->key[0])) {
      string number_value, number_key;
      string suffix_value, suffix_key;
      DecomposeNumberAndSuffix(compound_node->value,
                               &number_value, &suffix_value);
      DecomposeNumberAndSuffix(compound_node->key,
                               &number_key, &suffix_key);

      if (suffix_value.empty() || suffix_key.empty()) {
        continue;
      }

      // not compatibile
      if (number_value != number_key) {
        LOG(WARNING) << "Incompatible key/value number pair";
        continue;
      }

      // do -1 so that resegmented nodes are boosted
      // over compound node.
      const int32 wcost = max(compound_node->wcost / 2 - 1, 0);

      Node *number_node = lattice->NewNode();
      CHECK(number_node);
      number_node->key = number_key;
      number_node->value = number_value;
      number_node->lid = compound_node->lid;
      number_node->rid = 0;   // 0 to 0 transition cost is 0
      number_node->wcost = wcost;
      number_node->node_type = Node::NOR_NODE;
      number_node->bnext = NULL;

      // insert number into the lattice
      lattice->Insert(pos, number_node);

      Node *suffix_node = lattice->NewNode();
      CHECK(suffix_node);
      suffix_node->key = suffix_key;
      suffix_node->value = suffix_value;
      suffix_node->lid = 0;
      suffix_node->rid = compound_node->rid;
      suffix_node->wcost = wcost;
      suffix_node->node_type = Node::NOR_NODE;
      suffix_node->bnext = NULL;

      suffix_node->constrained_prev = number_node;

      // insert suffix into the lattice
      lattice->Insert(pos + number_node->key.size(), suffix_node);
      VLOG(1) << "Resegmented: " << compound_node->value << " "
              << number_node->value << " " << suffix_node->value;

      modified = true;
    }
  }

  return modified;
}

bool ImmutableConverterImpl::ResegmentPrefixAndArabicNumber(
    size_t pos, Lattice *lattice) const {
  const Node *bnode = lattice->begin_nodes(pos);
  if (bnode == NULL) {
    VLOG(1) << "bnode is NULL";
    return false;
  }

  bool modified = false;

  for (const Node *compound_node = bnode;
       compound_node != NULL; compound_node = compound_node->bnext) {
    // Unlike ResegmentArabicNumberAndSuffix, we don't
    // check POS as words ending with Arabic numbers are pretty rare.
    if (compound_node->value.size() > 1 && compound_node->key.size() > 1 &&
        !IsNumber(compound_node->value[0]) &&
        !IsNumber(compound_node->key[0]) &&
        IsNumber(compound_node->value[compound_node->value.size() - 1]) &&
        IsNumber(compound_node->key[compound_node->key.size() - 1])) {
      string number_value, number_key;
      string prefix_value, prefix_key;
      DecomposePrefixAndNumber(compound_node->value,
                               &prefix_value, &number_value);
      DecomposePrefixAndNumber(compound_node->key,
                               &prefix_key, &number_key);

      if (prefix_value.empty() || prefix_key.empty()) {
        continue;
      }

      // not compatibile
      if (number_value != number_key) {
        LOG(WARNING) << "Incompatible key/value number pair";
        continue;
      }

      // do -1 so that resegmented nodes are boosted
      // over compound node.
      const int32 wcost = max(compound_node->wcost / 2 - 1, 0);

      Node *prefix_node = lattice->NewNode();
      CHECK(prefix_node);
      prefix_node->key = prefix_key;
      prefix_node->value = prefix_value;
      prefix_node->lid = compound_node->lid;
      prefix_node->rid = 0;   // 0 to 0 transition cost is 0
      prefix_node->wcost = wcost;
      prefix_node->node_type = Node::NOR_NODE;
      prefix_node->bnext = NULL;

      // insert number into the lattice
      lattice->Insert(pos, prefix_node);

      Node *number_node = lattice->NewNode();
      CHECK(number_node);
      number_node->key = number_key;
      number_node->value = number_value;
      number_node->lid = 0;
      number_node->rid = compound_node->rid;
      number_node->wcost = wcost;
      number_node->node_type = Node::NOR_NODE;
      number_node->bnext = NULL;

      number_node->constrained_prev = prefix_node;

      // insert number into the lattice
      lattice->Insert(pos + prefix_node->key.size(), number_node);
      VLOG(1) << "Resegmented: " << compound_node->value << " "
              << prefix_node->value << " " << number_node->value;

      modified = true;
    }
  }

  return modified;
}

bool ImmutableConverterImpl::ResegmentPersonalName(
    size_t pos, Lattice *lattice) const {
  const Node *bnode = lattice->begin_nodes(pos);
  if (bnode == NULL) {
    VLOG(1) << "bnode is NULL";
    return false;
  }

  bool modified = false;

  // find a combination of last_name and first_name, e.g. "田中麗奈".
  for (const Node *compound_node = bnode;
       compound_node != NULL; compound_node = compound_node->bnext) {
    // left word is last name and right word is first name
    if (compound_node->lid != last_name_id_ ||
        compound_node->rid != first_name_id_) {
      continue;
    }

    const size_t len = Util::CharsLen(compound_node->value);

    // Don't resegment one-word last_name/first_name like 林健,
    // as it would deliver side effect.
    if (len <= 2) {
      continue;
    }

    // Don't resegment if the value is katakana
    if (Util::GetScriptType(compound_node->value) == Util::KATAKANA) {
      continue;
    }

    // Do constrained Viterbi search inside the compound "田中麗奈".
    // Constraints:
    // 1. Concats of last_name and first_name should be "田中麗奈"
    // 2. consisting of two words (last_name and first_name)
    // 3. Segment-boundary exist between the two words.
    // 4.a Either (a) POS of lnode is last_name or (b) POS of rnode is fist_name
    //     (len >= 4)
    // 4.b Both (a) POS of lnode is last_name and (b) POS of rnode is fist_name
    //     (len == 3)
    const Node *best_last_name_node = NULL;
    const Node *best_first_name_node = NULL;
    int best_cost = 0x7FFFFFFF;
    for (const Node *lnode = bnode; lnode != NULL; lnode = lnode->bnext) {
      // lnode(last_name) is a prefix of compound, Constraint 1.
      if (compound_node->value.size() > lnode->value.size() &&
          compound_node->key.size() > lnode->key.size() &&
          Util::StartsWith(compound_node->value, lnode->value)) {
        // rnode(first_name) is a suffix of compound, Constraint 1.
        for (const Node *rnode = lattice->begin_nodes(pos + lnode->key.size());
             rnode != NULL; rnode = rnode->bnext) {
          if ((lnode->value.size() + rnode->value.size())
              == compound_node->value.size() &&
              (lnode->value + rnode->value) == compound_node->value &&
              segmenter_->IsBoundary(lnode, rnode, false)) {   // Constraint 3.
            const int32 cost = lnode->wcost + GetCost(lnode, rnode);
            if (cost < best_cost) {   // choose the smallest ones
              best_last_name_node = lnode;
              best_first_name_node = rnode;
              best_cost = cost;
            }
          }
        }
      }
    }

    // No valid first/last names are found
    if (best_first_name_node == NULL || best_last_name_node == NULL) {
      continue;
    }

    // Constraint 4.a
    if (len >= 4 &&
        (best_last_name_node->lid != last_name_id_ &&
         best_first_name_node->rid != first_name_id_)) {
      continue;
    }

    // Constraint 4.b
    if (len == 3 &&
        (best_last_name_node->lid != last_name_id_ ||
         best_first_name_node->rid != first_name_id_)) {
      continue;
    }

    // Insert LastName and FirstName as independent nodes.
    // Duplications will be removed in nbest enumerations.
    // word costs are calculated from compound node by assuming that
    // transition cost is 0.
    //
    // last_name_cost + transition_cost + first_name_cost == compound_cost
    // last_name_cost == first_name_cost
    // i.e,
    // last_name_cost = first_name_cost =
    // (compound_cost - transition_cost) / 2;
    const int32 wcost = (compound_node->wcost -
                         last_to_first_name_transition_cost_) / 2;

    Node *last_name_node = lattice->NewNode();
    CHECK(last_name_node);
    last_name_node->key = best_last_name_node->key;
    last_name_node->value = best_last_name_node->value;
    last_name_node->lid = compound_node->lid;
    last_name_node->rid = last_name_id_;
    last_name_node->wcost = wcost;
    last_name_node->node_type = Node::NOR_NODE;
    last_name_node->bnext = NULL;

    // insert last_name into the lattice
    lattice->Insert(pos, last_name_node);

    Node *first_name_node = lattice->NewNode();
    CHECK(first_name_node);
    first_name_node->key = best_first_name_node->key;
    first_name_node->value = best_first_name_node->value;
    first_name_node->lid = first_name_id_;
    first_name_node->rid = compound_node->rid;
    first_name_node->wcost = wcost;
    first_name_node->node_type = Node::NOR_NODE;
    first_name_node->bnext = NULL;

    first_name_node->constrained_prev = last_name_node;

    // insert first_name into the lattice
    lattice->Insert(pos + last_name_node->key.size(), first_name_node);

    VLOG(2) << "Resegmented: " << compound_node->value << " "
            << last_name_node->value << " " << first_name_node->value;

    modified = true;
  }

  return modified;
}

namespace {

class NodeListBuilderWithCacheEnabled : public NodeListBuilderForLookupPrefix {
 public:
  NodeListBuilderWithCacheEnabled(NodeAllocatorInterface *allocator,
                                  size_t min_key_length)
      : NodeListBuilderForLookupPrefix(allocator,
                                       allocator->max_nodes_size(),
                                       min_key_length) {
    DCHECK(allocator);
  }

  virtual ResultType OnToken(StringPiece key, StringPiece actual_key,
                             const Token &token) {
    Node *node = NewNodeFromToken(token);
    node->attributes |= Node::ENABLE_CACHE;
    node->raw_wcost = node->wcost;
    PrependNode(node);
    return (limit_ <= 0) ? TRAVERSE_DONE : TRAVERSE_CONTINUE;
  }
};

}  // namespace

Node *ImmutableConverterImpl::Lookup(const int begin_pos,
                                     const int end_pos,
                                     const ConversionRequest &request,
                                     bool is_reverse,
                                     bool is_prediction,
                                     Lattice *lattice) const {
  CHECK_LE(begin_pos, end_pos);
  const char *begin = lattice->key().data() + begin_pos;
  const char *end = lattice->key().data() + end_pos;
  const size_t len = end_pos - begin_pos;

  lattice->node_allocator()->set_max_nodes_size(8192);
  Node *result_node = NULL;
  if (is_reverse) {
    BaseNodeListBuilder builder(
        lattice->node_allocator(),
        lattice->node_allocator()->max_nodes_size());
    dictionary_->LookupReverse(
        StringPiece(begin, len), lattice->node_allocator(), &builder);
    result_node = builder.result();
  } else {
    if (is_prediction && !FLAGS_disable_lattice_cache) {
      NodeListBuilderWithCacheEnabled builder(
          lattice->node_allocator(),
          lattice->cache_info(begin_pos) + 1);
      dictionary_->LookupPrefix(
          StringPiece(begin, len),
          request.IsKanaModifierInsensitiveConversion(),
          &builder);
      result_node = builder.result();
      lattice->SetCacheInfo(begin_pos, len);
    } else {
      // When cache feature is not used, look up normally
      BaseNodeListBuilder builder(
          lattice->node_allocator(),
          lattice->node_allocator()->max_nodes_size());
      dictionary_->LookupPrefix(
          StringPiece(begin, len),
          request.IsKanaModifierInsensitiveConversion(),
          &builder);
      result_node = builder.result();
    }
  }
  return AddCharacterTypeBasedNodes(begin, end, lattice, result_node);
}

Node *ImmutableConverterImpl::AddCharacterTypeBasedNodes(
    const char *begin, const char *end, Lattice *lattice, Node *nodes) const {

  size_t mblen = 0;
  const char32 ucs4 = Util::UTF8ToUCS4(begin, end, &mblen);

  const Util::ScriptType first_script_type = Util::GetScriptType(ucs4);
  const Util::FormType first_form_type = Util::GetFormType(ucs4);

  // Add 1 character node. It can be either UnknownId or NumberId.
  {
    Node *new_node = lattice->NewNode();
    CHECK(new_node);
    if (first_script_type == Util::NUMBER) {
      new_node->lid = number_id_;
      new_node->rid = number_id_;
    } else {
      new_node->lid = unknown_id_;
      new_node->rid = unknown_id_;
    }

    new_node->wcost = kMaxCost;
    new_node->value.assign(begin, mblen);
    new_node->key.assign(begin, mblen);
    new_node->node_type = Node::NOR_NODE;
    new_node->bnext = nodes;
    nodes = new_node;
  }  // scope out |new_node|

  if (first_script_type == Util::NUMBER) {
    nodes->wcost = kDefaultNumberCost;
    return nodes;
  }

  if (first_script_type != Util::ALPHABET &&
      first_script_type != Util::KATAKANA) {
    return nodes;
  }

  // group by same char type
  int num_char = 1;
  const char *p = begin + mblen;
  while (p < end) {
    const char32 next_ucs4 = Util::UTF8ToUCS4(p, end, &mblen);
    if (first_script_type != Util::GetScriptType(next_ucs4) ||
        first_form_type != Util::GetFormType(next_ucs4)) {
      break;
    }
    p += mblen;
    ++num_char;
  }

  if (num_char > 1) {
    mblen = static_cast<uint32>(p - begin);
    Node *new_node = lattice->NewNode();
    CHECK(new_node);
    if (first_script_type == Util::NUMBER) {
      new_node->lid = number_id_;
      new_node->rid = number_id_;
    } else {
      new_node->lid = unknown_id_;
      new_node->rid = unknown_id_;
    }
    new_node->wcost = kMaxCost / 2;
    new_node->value.assign(begin, mblen);
    new_node->key.assign(begin, mblen);
    new_node->node_type = Node::NOR_NODE;
    new_node->bnext = nodes;
    nodes = new_node;
  }

  return nodes;
}

namespace {

// Reasonably big cost. Cannot use INT_MAX because a new cost will be
// calculated based on kVeryBigCost.
const int kVeryBigCost = (INT_MAX >> 2);

// Runs viterbi algorithm at position |pos|. The left_boundary/right_boundary
// are the next boundary looked from pos. (If pos is on the boundary,
// left_boundary should be the previous one, and right_boundary should be
// the next).
inline void ViterbiInternal(
    const ConnectorInterface &connector, size_t pos, size_t right_boundary,
    Lattice *lattice) {
  for (Node *rnode = lattice->begin_nodes(pos);
       rnode != NULL; rnode = rnode->bnext) {
    if (rnode->end_pos > right_boundary) {
      // Invalid rnode.
      rnode->prev = NULL;
      continue;
    }

    if (rnode->constrained_prev != NULL) {
      // Constrained node.
      if (rnode->constrained_prev->prev == NULL) {
        rnode->prev = NULL;
      } else {
        rnode->prev = rnode->constrained_prev;
        rnode->cost =
            rnode->prev->cost +
            rnode->wcost +
            connector.GetTransitionCost(rnode->prev->rid, rnode->lid);
      }
      continue;
    }

    // Find a valid node which connects to the rnode with minimum cost.
    int best_cost = kVeryBigCost;
    Node *best_node = NULL;
    for (Node *lnode = lattice->end_nodes(pos);
         lnode != NULL; lnode = lnode->enext) {
      if (lnode->prev == NULL) {
        // Invalid lnode.
        continue;
      }

      int cost =
          lnode->cost + connector.GetTransitionCost(lnode->rid, rnode->lid);
      if (cost < best_cost) {
        best_cost = cost;
        best_node = lnode;
      }
    }

    rnode->prev = best_node;
    rnode->cost = best_cost + rnode->wcost;
  }
}
}  // namespace

bool ImmutableConverterImpl::Viterbi(
    const Segments &segments, Lattice *lattice) const {
  const string &key = lattice->key();

  // Process BOS.
  {
    Node *bos_node = lattice->bos_nodes();
    // Ensure only one bos node is available.
    DCHECK(bos_node != NULL);
    DCHECK(bos_node->enext == NULL);

    const size_t right_boundary = segments.segment(0).key().size();
    for (Node *rnode = lattice->begin_nodes(0);
         rnode != NULL; rnode = rnode->bnext) {
      if (rnode->end_pos > right_boundary) {
        // Invalid rnode.
        continue;
      }

      // Ensure no constraint.
      DCHECK(rnode->constrained_prev == NULL);

      rnode->prev = bos_node;
      rnode->cost =
          bos_node->cost +
          connector_->GetTransitionCost(bos_node->rid, rnode->lid) +
          rnode->wcost;
    }
  }

  size_t left_boundary = 0;
  const size_t segments_size = segments.segments_size();

  // Specialization for the first segment.
  // Don't run on the left boundary (the connection with BOS node),
  // beacuse it is already run above.
  {
    const size_t right_boundary =
        left_boundary + segments.segment(0).key().size();
    for (size_t pos = left_boundary + 1; pos < right_boundary; ++pos) {
      ViterbiInternal(*connector_, pos, right_boundary, lattice);
    }
    left_boundary = right_boundary;
  }

  // The condition to break is in the loop.
  for (size_t i = 1; i < segments_size; ++i) {
    // Run Viterbi for each position the segment.
    const size_t right_boundary =
        left_boundary + segments.segment(i).key().size();
    for (size_t pos = left_boundary; pos < right_boundary; ++pos) {
      ViterbiInternal(*connector_, pos, right_boundary, lattice);
    }
    left_boundary = right_boundary;
  }

  // Process EOS.
  {
    Node *eos_node = lattice->eos_nodes();

    // Ensure only one eos node.
    DCHECK(eos_node != NULL);
    DCHECK(eos_node->bnext == NULL);

    // No constrained prev.
    DCHECK(eos_node->constrained_prev == NULL);

    left_boundary =
        key.size() - segments.segment(segments_size - 1).key().size();
    // Find a valid node which connects to the rnode with minimum cost.
    int best_cost = kVeryBigCost;
    Node *best_node = NULL;
    for (Node *lnode = lattice->end_nodes(key.size());
         lnode != NULL; lnode = lnode->enext) {
      if (lnode->prev == NULL) {
        // Invalid lnode.
        continue;
      }

      int cost =
          lnode->cost +
          connector_->GetTransitionCost(lnode->rid, eos_node->lid);
      if (cost < best_cost) {
        best_cost = cost;
        best_node = lnode;
      }
    }

    eos_node->prev = best_node;
    eos_node->cost = best_cost + eos_node->wcost;
  }

  // Traverse the node from end to begin.
  Node *node = lattice->eos_nodes();
  CHECK(node->bnext == NULL);
  Node *prev = NULL;
  while (node->prev != NULL) {
    prev = node->prev;
    prev->next = node;
    node = prev;
  }

  if (lattice->bos_nodes() != prev) {
    LOG(WARNING) << "cannot make lattice";
    return false;
  }

  return true;
}

// faster Viterbi algorithm for prediction
//
// Run simple Viterbi algorithm with contracting the same lid and rid.
// Because the original Viterbi has speciall nodes, we should take it
// consideration.
// 1. CONNECTED nodes: are normal nodes.
// 2. WEAK_CONNECTED nodes: don't occur in prediction, so we do not have to
//    consider about them.
// 3. NOT_CONNECTED nodes: occur when they are between history nodes and
//    normal nodes.
// For NOT_CONNECTED nodes, we should run Viterbi for history nodes first,
// and do it for normal nodes second. The function "PredictionViterbiSub"
// runs Viterbi for positions between calc_begin_pos and calc_end_pos,
// inclusive.
//
// We cannot apply this function in suggestion because in suggestion there are
// WEAK_CONNECTED nodes and this function is not designed for them.
//
// TODO(toshiyuki): We may be able to use faster viterbi for
// conversion/suggestion if we use richer info as contraction group.

bool ImmutableConverterImpl::PredictionViterbi(
    const Segments &segments, Lattice *lattice) const {
  const size_t key_length = lattice->key().size();
  const size_t history_segments_size = segments.history_segments_size();
  size_t history_length = 0;
  for (size_t i = 0; i < history_segments_size; ++i) {
    history_length += segments.segment(i).key().size();
  }
  PredictionViterbiInternal(0, history_length, lattice);
  PredictionViterbiInternal(history_length, key_length, lattice);

  Node *node = lattice->eos_nodes();
  CHECK(node->bnext == NULL);
  Node *prev = NULL;
  while (node->prev != NULL) {
    prev = node->prev;
    prev->next = node;
    node = prev;
  }

  if (lattice->bos_nodes() != prev) {
    LOG(WARNING) << "cannot make lattice";
    return false;
  }

  return true;
}

void ImmutableConverterImpl::PredictionViterbiInternal(
    int calc_begin_pos, int calc_end_pos, Lattice *lattice) const {
  CHECK_LE(calc_begin_pos, calc_end_pos);

  // Mapping from lnode's rid to (cost, Node) of best way/cost, and vice versa.
  // Note that, the average number of lid/rid variation is less than 30 in
  // most cases. So, in order to avoid too many allocations for internal
  // nodes of std::map, we use vector of key-value pairs.
  typedef vector<pair<int, pair<int, Node*> > > BestMap;
  typedef OrderBy<FirstKey, Less> OrderByFirst;
  BestMap lbest, rbest;
  lbest.reserve(128);
  rbest.reserve(128);

  const pair<int, Node*> kInvalidValue(INT_MAX, static_cast<Node*>(NULL));

  for (size_t pos = calc_begin_pos; pos <= calc_end_pos; ++pos) {
    lbest.clear();
    for (Node *lnode = lattice->end_nodes(pos);
         lnode != NULL; lnode = lnode->enext) {
      const int rid = lnode->rid;
      BestMap::value_type key(rid, kInvalidValue);
      BestMap::iterator iter =
          lower_bound(lbest.begin(), lbest.end(), key, OrderByFirst());
      if (iter == lbest.end() || iter->first != rid) {
        lbest.insert(
            iter, BestMap::value_type(rid, make_pair(lnode->cost, lnode)));
      } else if (lnode->cost < iter->second.first) {
        iter->second.first = lnode->cost;
        iter->second.second = lnode;
      }
    }

    if (lbest.empty()) {
      continue;
    }

    rbest.clear();
    Node *rnode_begin = lattice->begin_nodes(pos);
    for (Node *rnode = rnode_begin; rnode != NULL; rnode = rnode->bnext) {
      if (rnode->end_pos > calc_end_pos) {
        continue;
      }
      BestMap::value_type key(rnode->lid, kInvalidValue);
      BestMap::iterator iter =
          lower_bound(rbest.begin(), rbest.end(), key, OrderByFirst());
      if (iter == rbest.end() || iter->first != rnode->lid) {
        rbest.insert(iter, key);
      }
    }

    if (rbest.empty()) {
      continue;
    }

    for (BestMap::iterator liter = lbest.begin();
         liter != lbest.end(); ++liter) {
      for (BestMap::iterator riter = rbest.begin();
           riter != rbest.end(); ++riter) {
        const int cost = liter->second.first +
            connector_->GetTransitionCost(liter->first, riter->first);
        if (cost < riter->second.first) {
          riter->second.first = cost;
          riter->second.second = liter->second.second;
        }
      }
    }

    for (Node *rnode = rnode_begin; rnode != NULL; rnode = rnode->bnext) {
      if (rnode->end_pos > calc_end_pos) {
        continue;
      }
      BestMap::value_type key(rnode->lid, kInvalidValue);
      BestMap::iterator iter =
          lower_bound(rbest.begin(), rbest.end(), key, OrderByFirst());
      if (iter == rbest.end() || iter->first != rnode->lid ||
          iter->second.second == NULL) {
        continue;
      }

      rnode->cost = iter->second.first + rnode->wcost;
      rnode->prev = iter->second.second;
    }
  }
}

namespace {

// Adds penalty for predictive nodes when building a node list.
class NodeListBuilderForPredictiveNodes : public BaseNodeListBuilder {
 public:
  NodeListBuilderForPredictiveNodes(NodeAllocatorInterface *allocator,
                                    int limit, const POSMatcher *pos_matcher)
      : BaseNodeListBuilder(allocator, limit), pos_matcher_(pos_matcher) {}

  virtual ~NodeListBuilderForPredictiveNodes() {}

  virtual ResultType OnToken(StringPiece key, StringPiece actual_key,
                             const Token &token) {
    Node *node = NewNodeFromToken(token);
    const int kPredictiveNodeDefaultPenalty = 900;  // ~= -500 * log(1/6)
    int additional_cost = kPredictiveNodeDefaultPenalty;

    // Bonus for suffix word.
    if (pos_matcher_->IsSuffixWord(node->rid) &&
        pos_matcher_->IsSuffixWord(node->lid)) {
      const int kSuffixWordBonus = 700;
      additional_cost -= kSuffixWordBonus;
    }

    // Penalty for unique noun word.
    if (pos_matcher_->IsUniqueNoun(node->rid) ||
        pos_matcher_->IsUniqueNoun(node->lid)) {
      const int kUniqueNounPenalty = 500;
      additional_cost += kUniqueNounPenalty;
    }

    // Penalty for number.
    if (pos_matcher_->IsNumber(node->rid) ||
        pos_matcher_->IsNumber(node->lid)) {
      const int kNumberPenalty = 4000;
      additional_cost += kNumberPenalty;
    }

    node->wcost += additional_cost;
    PrependNode(node);
    return (limit_ <= 0) ? TRAVERSE_DONE : TRAVERSE_CONTINUE;
  }

 private:
  const POSMatcher *pos_matcher_;
};

}  // namespace

// Add predictive nodes from conversion key.
void ImmutableConverterImpl::MakeLatticeNodesForPredictiveNodes(
    const Segments &segments, const ConversionRequest &request,
    Lattice *lattice) const {
  const string &key = lattice->key();
  string conversion_key;
  for (size_t i = 0; i < segments.conversion_segments_size(); ++i) {
    conversion_key += segments.conversion_segment(i).key();
  }
  DCHECK_NE(string::npos, key.find(conversion_key));
  vector<string> conversion_key_chars;
  Util::SplitStringToUtf8Chars(conversion_key, &conversion_key_chars);

  // do nothing if the conversion key is short
  const size_t kKeyMinLength = 7;
  if (conversion_key_chars.size() < kKeyMinLength) {
    return;
  }

  // Predictive search from suffix dictionary.
  // (search words with between 1 and 6 characters)
  {
    const size_t kMaxSuffixLookupKey = 6;
    const size_t max_sufffix_len =
        min(kMaxSuffixLookupKey, conversion_key_chars.size());
    size_t pos = key.size();

    for (size_t suffix_len = 1; suffix_len <= max_sufffix_len; ++suffix_len) {
      pos -= conversion_key_chars[
          conversion_key_chars.size() - suffix_len].size();
      DCHECK_GE(key.size(), pos);
      NodeListBuilderForPredictiveNodes builder(
          lattice->node_allocator(),
          lattice->node_allocator()->max_nodes_size(),
          pos_matcher_);
      suffix_dictionary_->LookupPredictive(
          StringPiece(key.data() + pos, key.size() - pos),
          request.IsKanaModifierInsensitiveConversion(), &builder);
      if (builder.result() != NULL) {
        lattice->Insert(pos, builder.result());
      }
    }
  }

  // Predictive search from system dictionary.
  // (search words with between 5 and 8 characters)
  {
    const size_t kMinSystemLookupKey = 5;
    const size_t kMaxSystemLookupKey = 8;
    const size_t max_suffix_len =
        min(kMaxSystemLookupKey, conversion_key_chars.size());
    size_t pos = key.size();
    for (size_t suffix_len = 1; suffix_len <= max_suffix_len; ++suffix_len) {
      pos -= conversion_key_chars[
          conversion_key_chars.size() - suffix_len].size();
      DCHECK_GE(key.size(), pos);

      if (suffix_len < kMinSystemLookupKey) {
        // Just update |pos|.
        continue;
      }

      NodeListBuilderForPredictiveNodes builder(
          lattice->node_allocator(),
          lattice->node_allocator()->max_nodes_size(),
          pos_matcher_);
      dictionary_->LookupPredictive(
          StringPiece(key.data() + pos, key.size() - pos),
          request.IsKanaModifierInsensitiveConversion(), &builder);
      if (builder.result() != NULL) {
        lattice->Insert(pos, builder.result());
      }
    }
  }
}

bool ImmutableConverterImpl::MakeLattice(
    const ConversionRequest &request,
    Segments *segments, Lattice *lattice) const {
  if (segments == NULL) {
    LOG(ERROR) << "Segments is NULL";
    return false;
  }

  if (lattice == NULL) {
    LOG(ERROR) << "Lattice is NULL";
    return false;
  }

  if (segments->segments_size() >= kMaxSegmentsSize) {
    LOG(WARNING) << "too many segments";
    return false;
  }

  NormalizeHistorySegments(segments);

  const bool is_reverse =
      (segments->request_type() == Segments::REVERSE_CONVERSION);

  const bool is_prediction =
      (segments->request_type() == Segments::SUGGESTION ||
       segments->request_type() == Segments::PREDICTION);

  // In suggestion mode, ImmutableConverter will not accept multiple-segments.
  // The result always consists of one segment.
  if ((is_reverse || is_prediction) &&
      (segments->conversion_segments_size() != 1 ||
       segments->conversion_segment(0).segment_type() != Segment::FREE)) {
    LOG(WARNING) << "ImmutableConverter doesn't support constrained requests";
    return false;
  }

  // Make the conversion key.
  string conversion_key;
  const size_t history_segments_size = segments->history_segments_size();
  for (size_t i = history_segments_size; i < segments->segments_size(); ++i) {
    DCHECK(!segments->segment(i).key().empty());
    conversion_key.append(segments->segment(i).key());
  }
  const size_t max_char_len =
      is_reverse ? kMaxCharLengthForReverseConversion : kMaxCharLength;
  if (conversion_key.empty() || conversion_key.size() >= max_char_len) {
    LOG(WARNING) << "Conversion key is empty or too long: " << conversion_key;
    return false;
  }

  // Make the history key.
  string history_key;
  for (size_t i = 0; i < history_segments_size; ++i) {
    DCHECK(!segments->segment(i).key().empty());
    history_key.append(segments->segment(i).key());
  }
  // Check if the total length (length of history_key + conversion_key) doesn't
  // exceed the maximum key length. If it exceeds the limit, we simply clears
  // such useless history segments, which is acceptable because such cases
  // rarely happen in normal use cases.
  if (history_key.size() + conversion_key.size() >= max_char_len) {
    LOG(WARNING) << "Clear history segments due to the limit of key length.";
    segments->clear_history_segments();
    history_key.clear();
  }

  const string key = history_key + conversion_key;
  lattice->UpdateKey(key);
  lattice->ResetNodeCost();

  if (is_reverse) {
    // Reverse lookup for each prefix string in key is slow with current
    // implementation, so run it for them at once and cache the result.
    dictionary_->PopulateReverseLookupCache(key, lattice->node_allocator());
  }

  bool is_valid_lattice = true;
  // Perform the main part of lattice construction.
  if (!MakeLatticeNodesForHistorySegments(*segments, request, lattice) ||
      lattice->end_nodes(history_key.size()) == NULL) {
    is_valid_lattice = false;
  }

  // Can not apply key corrector to invalid lattice.
  if (is_valid_lattice) {
    MakeLatticeNodesForConversionSegments(
        *segments, request, history_key, lattice);
  }

  if (is_reverse) {
    // No reverse look up will happen afterwards.
    dictionary_->ClearReverseLookupCache(lattice->node_allocator());
  }

  // Predictive real time conversion
  if (is_prediction && !FLAGS_disable_predictive_realtime_conversion) {
    MakeLatticeNodesForPredictiveNodes(*segments, request, lattice);
  }

  if (!is_valid_lattice) {
    // Safely bail out, since reverse look up cache was released already.
    return false;
  }

  if (lattice->end_nodes(key.size()) == NULL) {
    LOG(WARNING) << "cannot build lattice from input";
    return false;
  }

  ApplyPrefixSuffixPenalty(conversion_key, lattice);

  // Re-segment personal-names, numbers ...etc
  const bool is_conversion =
      (segments->request_type() == Segments::CONVERSION);
  if (is_conversion) {
    Resegment(*segments, history_key, conversion_key, lattice);
  }

  return true;
}

bool ImmutableConverterImpl::MakeLatticeNodesForHistorySegments(
    const Segments &segments, const ConversionRequest &request,
    Lattice *lattice) const {
  const bool is_reverse =
     (segments.request_type() == Segments::REVERSE_CONVERSION);
  const size_t history_segments_size = segments.history_segments_size();
  const string &key = lattice->key();

  size_t segments_pos = 0;
  uint16 last_rid = 0;

  for (size_t s = 0; s < history_segments_size; ++s) {
    const Segment &segment = segments.segment(s);
    if (segment.segment_type() != Segment::HISTORY &&
        segment.segment_type() != Segment::SUBMITTED) {
      LOG(WARNING) << "inconsistent history";
      return false;
    }
    if (segment.key().empty()) {
      LOG(WARNING) << "invalid history: key is empty";
      return false;
    }
    const Segment::Candidate &candidate = segment.candidate(0);

    // Add a virtual nodes corresponding to HISTORY segments.
    Node *rnode = lattice->NewNode();
    CHECK(rnode);
    rnode->lid = candidate.lid;
    rnode->rid = candidate.rid;
    rnode->wcost = 0;
    rnode->value = candidate.value;
    rnode->key = segment.key();
    rnode->node_type = Node::HIS_NODE;
    rnode->bnext = NULL;
    lattice->Insert(segments_pos, rnode);

    // For the last history segment,  we also insert a new node having
    // EOS part-of-speech. Viterbi algorithm will find the
    // best path from rnode(context) and rnode2(EOS).
    if (s + 1 == history_segments_size && candidate.rid != 0) {
      Node *rnode2 = lattice->NewNode();
      CHECK(rnode2);
      rnode2->lid = candidate.lid;
      rnode2->rid = 0;   // 0 is BOS/EOS

      // This cost was originally set to 1500.
      // It turned out this penalty was so strong that it caused some
      // undesirable conversions like "の-なまえ" -> "の-な前" etc., so we
      // changed this to 0.
      // Reducing the cost promotes context-unaware conversions, and this may
      // have some unexpected side effects.
      // TODO(team): Figure out a better way to set the cost using
      // boundary.def-like approach.
      rnode2->wcost = 0;
      rnode2->value = candidate.value;
      rnode2->key = segment.key();
      rnode2->node_type = Node::HIS_NODE;
      rnode2->bnext = NULL;
      lattice->Insert(segments_pos, rnode2);
    }

    // Dictionary lookup for the candidates which are
    // overlapping between history and conversion.
    // Check only the last history segment at this moment.
    //
    // Example: history "おいかわ(及川)", conversion: "たくや"
    // Here, try to find "おいかわたくや(及川卓也)" from dictionary
    // and insert "卓也" as a new word node with a modified cost
    if (s + 1 == history_segments_size) {
      const bool is_prediction =
          (segments.request_type() == Segments::SUGGESTION ||
           segments.request_type() == Segments::PREDICTION);
      const Node *node = Lookup(segments_pos, key.size(), request,
                                is_reverse, is_prediction, lattice);
      for (const Node *compound_node = node; compound_node != NULL;
           compound_node = compound_node->bnext) {
        // No overlapps
        if (compound_node->key.size() <= rnode->key.size() ||
            compound_node->value.size() <= rnode->value.size() ||
            !Util::StartsWith(compound_node->key, rnode->key) ||
            !Util::StartsWith(compound_node->value, rnode->value)) {
          // not a prefix
          continue;
        }

        // Must be in the same POS group.
        // http://b/issue?id=2977618
        if (pos_group_->GetPosGroup(candidate.lid)
            != pos_group_->GetPosGroup(compound_node->lid)) {
          continue;
        }

        // make new virtual node
        Node *new_node = lattice->NewNode();
        CHECK(new_node);

        // get the suffix part ("たくや/卓也")
        new_node->key.assign(compound_node->key, rnode->key.size(),
                             compound_node->key.size() - rnode->key.size());
        new_node->value.assign(
            compound_node->value, rnode->value.size(),
            compound_node->value.size() - rnode->value.size());

        // rid/lid are derived from the compound.
        // lid is just an approximation
        new_node->rid = compound_node->rid;
        new_node->lid = compound_node->lid;
        new_node->bnext = NULL;
        new_node->node_type = Node::NOR_NODE;
        new_node->attributes |= Segment::Candidate::CONTEXT_SENSITIVE;

        // New cost recalcuration:
        //
        // compound_node->wcost * (candidate len / compound_node len)
        // - trans(candidate.rid, new_node.lid)
        new_node->wcost =
            compound_node->wcost *
            candidate.value.size() / compound_node->value.size()
            - connector_->GetTransitionCost(candidate.rid, new_node->lid);

        VLOG(2) << " compound_node->lid=" << compound_node->lid
                << " compound_node->rid=" << compound_node->rid
                << " compound_node->wcost=" << compound_node->wcost;
        VLOG(2) << " last_rid=" << last_rid
                << " candidate.lid=" << candidate.lid
                << " candidate.rid=" << candidate.rid
                << " candidate.cost=" << candidate.cost
                << " candidate.wcost=" << candidate.wcost;
        VLOG(2) << " new_node->wcost=" << new_node->wcost;

        new_node->constrained_prev = rnode;

        // Added as new node
        lattice->Insert(segments_pos + rnode->key.size(), new_node);

        VLOG(2) << "Added: " << new_node->key << " " << new_node->value;
      }
    }

    // update segment pos
    segments_pos += rnode->key.size();
    last_rid = rnode->rid;
  }
  lattice->set_history_end_pos(segments_pos);
  return true;
}

void ImmutableConverterImpl::MakeLatticeNodesForConversionSegments(
    const Segments &segments, const ConversionRequest &request,
    const string &history_key, Lattice *lattice) const {
  const string &key = lattice->key();
  const bool is_conversion =
      (segments.request_type() == Segments::CONVERSION);
  // Do not use KeyCorrector if user changes the boundary.
  // http://b/issue?id=2804996
  scoped_ptr<KeyCorrector> key_corrector;
  if (is_conversion && !segments.resized()) {
    KeyCorrector::InputMode mode = KeyCorrector::ROMAN;
    if (GET_CONFIG(preedit_method) != config::Config::ROMAN) {
      mode = KeyCorrector::KANA;
    }
    key_corrector.reset(new KeyCorrector(key, mode, history_key.size()));
  }

  const bool is_reverse =
      (segments.request_type() == Segments::REVERSE_CONVERSION);
  const bool is_prediction =
      (segments.request_type() == Segments::SUGGESTION ||
       segments.request_type() == Segments::PREDICTION);
  for (size_t pos = history_key.size(); pos < key.size(); ++pos) {
    if (lattice->end_nodes(pos) != NULL) {
      Node *rnode =
          Lookup(pos, key.size(), request, is_reverse, is_prediction, lattice);
      // If history key is NOT empty and user input seems to starts with
      // a particle ("はにで..."), mark the node as STARTS_WITH_PARTICLE.
      // We change the segment boundary if STARTS_WITH_PARTICLE attribute
      // is assigned.
      if (!history_key.empty() && pos == history_key.size()) {
        for (Node *node = rnode; node != NULL; node = node->bnext) {
          if (pos_matcher_->IsAcceptableParticleAtBeginOfSegment(node->lid) &&
              node->lid == node->rid) {  // not a compound.
            node->attributes |= Node::STARTS_WITH_PARTICLE;
          }
        }
      }
      CHECK(rnode != NULL);
      lattice->Insert(pos, rnode);
      InsertCorrectedNodes(
          pos, key, request, key_corrector.get(), dictionary_, lattice);
    }
  }
}

void ImmutableConverterImpl::ApplyPrefixSuffixPenalty(
    const string &conversion_key,
    Lattice *lattice) const {
  const string &key = lattice->key();
  DCHECK_LE(conversion_key.size(), key.size());
  for (Node *node = lattice->begin_nodes(key.size() -
                                         conversion_key.size());
       node != NULL; node = node->bnext) {
    // TODO(taku):
    // We might be able to tweak the penalty according to
    // the size of history segments.
    // If history-segments is non-empty, we can make the
    // penalty smaller so that history context is more likely
    // selected.
    node->wcost += segmenter_->GetPrefixPenalty(node->lid);
  }

  for (Node *node = lattice->end_nodes(key.size());
       node != NULL; node = node->enext) {
    node->wcost += segmenter_->GetSuffixPenalty(node->rid);
  }
}

void ImmutableConverterImpl::Resegment(
    const Segments &segments,
    const string &history_key, const string &conversion_key,
    Lattice *lattice) const {
  for (size_t pos = history_key.size();
       pos < history_key.size() + conversion_key.size(); ++pos) {
    ApplyResegmentRules(pos, lattice);
  }

  // Enable constrained node.
  size_t segments_pos = 0;
  for (size_t s = 0; s < segments.segments_size(); ++s) {
    const Segment &segment = segments.segment(s);
    if (segment.segment_type() == Segment::FIXED_VALUE) {
      const Segment::Candidate &candidate = segment.candidate(0);
      Node *rnode = lattice->NewNode();
      CHECK(rnode);
      rnode->lid       = candidate.lid;
      rnode->rid       = candidate.rid;
      rnode->wcost     = kMinCost;
      rnode->value     = candidate.value;
      rnode->key       = segment.key();
      rnode->node_type = Node::CON_NODE;
      rnode->bnext     = NULL;
      lattice->Insert(segments_pos, rnode);
    }
    segments_pos += segment.key().size();
  }
}

// Single segment conversion results should be set to |segments|.
void ImmutableConverterImpl::InsertFirstSegmentToCandidates(
    Segments *segments,
    const Lattice &lattice,
    const vector<uint16> &group,
    size_t max_candidates_size) const {
  const size_t only_first_segment_candidate_pos =
      segments->conversion_segment(0).candidates_size();
  InsertCandidates(segments, lattice, group,
                   max_candidates_size,
                   ONLY_FIRST_SEGMENT);
  // Note that inserted candidates might consume the entire key.
  // e.g. key: "なのは", value: "ナノは"
  // Erase them later.
  if (segments->conversion_segment(0).candidates_size() <=
      only_first_segment_candidate_pos) {
    return;
  }

  // Set new costs for only first segment candidates
  // Basically, only first segment candidates cost is smaller
  // than that of single segment conversion results.
  // For example, the cost of "私の" is smaller than "私の名前は".
  // To merge these two categories of results, we will add the
  // cost penalty based on the cost diff.
  const Segment &first_segment = segments->conversion_segment(0);
  const int base_cost_diff =
      max(0,
          (first_segment.candidate(0).cost -
           first_segment.candidate(only_first_segment_candidate_pos).cost));
  const int base_wcost_diff =
      max(0,
          (first_segment.candidate(0).wcost -
           first_segment.candidate(only_first_segment_candidate_pos).wcost));
  for (size_t i = only_first_segment_candidate_pos;
       i < first_segment.candidates_size();) {
    static const int kOnlyFirstSegmentOffset = 300;
    Segment::Candidate *candidate =
        segments->mutable_conversion_segment(0)->mutable_candidate(i);
    // If the size of candidate's key is greater than or
    // equal to 1st segment's key,
    // it means that the result consumes the entire key.
    // Such results are not appropriate for PARTIALLY_KEY_CONSUMED so erase it.
    if (candidate->key.size() >= first_segment.key().size()) {
      segments->mutable_conversion_segment(0)->erase_candidate(i);
      continue;
    }
    candidate->cost += (base_cost_diff + kOnlyFirstSegmentOffset);
    candidate->wcost += (base_wcost_diff + kOnlyFirstSegmentOffset);
    DCHECK(!(candidate->attributes &
             Segment::Candidate::PARTIALLY_KEY_CONSUMED));
    candidate->attributes |= Segment::Candidate::PARTIALLY_KEY_CONSUMED;
    candidate->consumed_key_size = Util::CharsLen(candidate->key);
    ++i;
  }
}

bool ImmutableConverterImpl::IsSegmentEndNode(
    const Segments &segments, const Node *node,
    const vector<uint16> &group, bool is_single_segment) const {
  DCHECK(node->next);
  if (node->next->node_type == Node::EOS_NODE) {
    return true;
  }

  // In reverse conversion, group consecutive white spaces into one segment.
  // For example, "ほん むりょう" -> "ほん", " ", "むりょう".
  if (segments.request_type() == Segments::REVERSE_CONVERSION) {
    const bool this_node_is_ws = ContainsWhiteSpacesOnly(node->key);
    const bool next_node_is_ws = ContainsWhiteSpacesOnly(node->next->key);
    if (this_node_is_ws) {
      return !next_node_is_ws;
    }
    if (next_node_is_ws) {
      return true;
    }
    // If this and next nodes are both non-white spaces, fall back to the
    // subsequent logic.
  }

  const Segment &old_segment = segments.segment(group[node->begin_pos]);
  // |node| and |node->next| should be in same segment due to FIXED_BOUNDAY
  // |node->next| is NOT a boundary. Very strong constraint.
  if (group[node->begin_pos] == group[node->next->begin_pos] &&
      old_segment.segment_type() == Segment::FIXED_BOUNDARY) {
    return false;
  }

  // |node->next| is a boundary. Very strong constraint.
  if (group[node->begin_pos] != group[node->next->begin_pos]) {
    return true;
  }

  // CON_NODE is generated for FIXED_VALUE candidate.
  if (node->node_type == Node::CON_NODE) {
    return true;
  }

  // Grammatically segmented.
  if (segmenter_->IsBoundary(node, node->next, is_single_segment)) {
    return true;
  }

  return false;
}

Segment *ImmutableConverterImpl::GetInsertTargetSegment(
    const Lattice &lattice,
    const vector<uint16> &group,
    InsertCandidatesType type,
    size_t begin_pos,
    const Node *node,
    Segments *segments) const {
  if (type != MULTI_SEGMENTS) {
    DCHECK(type == SINGLE_SEGMENT || type == ONLY_FIRST_SEGMENT);
    // Realtime conversion that produces only one segment.
    return segments->mutable_segment(segments->segments_size() - 1);
  }

  // 'Normal' conversion. Add new segment and initialize it.
  Segment *segment = segments->add_segment();
  DCHECK(segment);
  segment->clear_candidates();
  segment->set_key(
      lattice.key().substr(begin_pos, node->end_pos - begin_pos));
  const Segment &old_segment = segments->segment(group[node->begin_pos]);
  segment->set_segment_type(old_segment.segment_type());
  return segment;
}

void ImmutableConverterImpl::InsertCandidates(
    Segments *segments,
    const Lattice &lattice,
    const vector<uint16> &group,
    size_t max_candidates_size,
    InsertCandidatesType type) const {
  // skip HIS_NODE(s)
  Node *prev = lattice.bos_nodes();
  for (Node *node = lattice.bos_nodes()->next;
       node->next != NULL && node->node_type == Node::HIS_NODE;
       node = node->next) {
    prev = node;
  }

  const size_t expand_size =
      max(static_cast<size_t>(1),
          min(static_cast<size_t>(512), max_candidates_size));

  const bool is_single_segment = (type == SINGLE_SEGMENT);
  NBestGenerator nbest_generator(
      suppression_dictionary_, segmenter_, connector_, pos_matcher_,
      &lattice, suggestion_filter_);

  string original_key;
  for (size_t i = 0; i < segments->conversion_segments_size(); ++i) {
    original_key.append(segments->conversion_segment(i).key());
  }

  size_t begin_pos = string::npos;
  for (Node *node = prev->next; node->next != NULL; node = node->next) {
    if (begin_pos == string::npos) {
      begin_pos = node->begin_pos;
    }

    if (!IsSegmentEndNode(*segments, node, group, is_single_segment)) {
      continue;
    }

    Segment *segment = GetInsertTargetSegment(
        lattice, group, type, begin_pos, node, segments);
    CHECK(segment);

    NBestGenerator::BoundaryCheckMode mode = NBestGenerator::STRICT;
    if (type == SINGLE_SEGMENT) {
      // For realtime conversion.
      mode = NBestGenerator::ONLY_EDGE;
    } else if (segment->segment_type() == Segment::FIXED_BOUNDARY) {
      // Boundary is specified. Skip boundary check in nbest generator.
      mode = NBestGenerator::ONLY_MID;
    }
    nbest_generator.Reset(prev, node->next, mode);

    ExpandCandidates(original_key, &nbest_generator, segment,
                     segments->request_type(), expand_size);

    if (type == MULTI_SEGMENTS || type == SINGLE_SEGMENT) {
      InsertDummyCandidates(segment, expand_size);
    }

    if (node->node_type == Node::CON_NODE) {
      segment->set_segment_type(Segment::FIXED_VALUE);
    }

    if (type == ONLY_FIRST_SEGMENT) {
      break;
    }
    begin_pos = string::npos;
    prev = node;
  }
}

bool ImmutableConverterImpl::MakeSegments(const ConversionRequest &request,
                                          const Lattice &lattice,
                                          const vector<uint16> &group,
                                          Segments *segments) const {
  if (segments == NULL) {
    LOG(WARNING) << "Segments is NULL";
    return false;
  }

  const Segments::RequestType type = segments->request_type();
  const bool is_prediction = (type == Segments::PREDICTION ||
                              type == Segments::SUGGESTION ||
                              type == Segments::PARTIAL_PREDICTION ||
                              type == Segments::PARTIAL_SUGGESTION);

  if (is_prediction) {
    const size_t max_candidates_size =
        segments->max_prediction_candidates_size();

    if (request.create_partial_candidates()) {
      // TODO(toshiyuki): It may be better to change this value
      // according to the key length.
      static const size_t kOnlyFirstSegmentCandidateSize = 3;
      const size_t single_segment_candidates_size =
          ((max_candidates_size > kOnlyFirstSegmentCandidateSize) ?
           max_candidates_size - kOnlyFirstSegmentCandidateSize : 1);
      InsertCandidates(segments, lattice, group,
                       single_segment_candidates_size, SINGLE_SEGMENT);

      // Even if single_segment_candidates_size + kOnlyFirstSegmentCandidateSize
      // is greater than max_candidates_size, we cannot skip
      // InsertFirstSegmentToCandidates().
      // For example:
      //   the sum: 11
      //   max_candidates_size: 10
      //   current candidate size: 8
      // In this case, the sum > |max_candidates_size|, but we should not
      // skip calling InsertFirstSegmentToCandidates, as we want to add
      // two candidates.
      const size_t only_first_segment_candidates_size =
          min(max_candidates_size,
              single_segment_candidates_size + kOnlyFirstSegmentCandidateSize);
      InsertFirstSegmentToCandidates(
          segments, lattice, group, only_first_segment_candidates_size);
    } else {
      InsertCandidates(
          segments, lattice, group, max_candidates_size, SINGLE_SEGMENT);
    }
  } else {
    DCHECK(!request.create_partial_candidates());
    // Currently, we assume that REVERSE_CONVERSION only
    // requires 1 result.
    // TODO(taku): support to set the size on REVESER_CONVERSION mode.
    const size_t max_candidates_size =
        ((type == Segments::REVERSE_CONVERSION) ?
         1 : segments->max_conversion_candidates_size());

    // InsertCandidates inserts new segments after the existing
    // conversion segments. So we have to erase old conversion segments.
    // We have to keep old segments for calling InsertCandidates because
    // we need segment constraints like FIXED_BOUNDARY.
    // TODO(toshiyuki): We want more beautiful structure.
    const size_t old_conversion_segments_size =
        segments->conversion_segments_size();
    InsertCandidates(
        segments, lattice, group, max_candidates_size, MULTI_SEGMENTS);
    if (old_conversion_segments_size > 0) {
      segments->erase_segments(segments->history_segments_size(),
                               old_conversion_segments_size);
    }
  }
  return true;
}

void ImmutableConverterImpl::MakeGroup(
    const Segments &segments, vector<uint16> *group) const {
  group->clear();
  for (size_t i = 0; i < segments.segments_size(); ++i) {
    for (size_t j = 0; j < segments.segment(i).key().size(); ++j) {
      group->push_back(static_cast<uint16>(i));
    }
  }
  group->push_back(static_cast<uint16>(segments.segments_size()));
}

bool ImmutableConverterImpl::ConvertForRequest(
    const ConversionRequest &request, Segments *segments) const {
  const bool is_prediction =
      (segments->request_type() == Segments::PREDICTION ||
       segments->request_type() == Segments::SUGGESTION);

  Lattice *lattice = GetLattice(segments, is_prediction);

  if (!MakeLattice(request, segments, lattice)) {
    LOG(WARNING) << "could not make lattice";
    return false;
  }

  vector<uint16> group;
  MakeGroup(*segments, &group);

  if (is_prediction) {
    if (!PredictionViterbi(*segments, lattice)) {
      LOG(WARNING) << "prediction_viterbi failed";
      return false;
    }
  } else {
    if (!Viterbi(*segments, lattice)) {
      LOG(WARNING) << "viterbi failed";
      return false;
    }
  }

  VLOG(2) << lattice->DebugString();
  if (!MakeSegments(request, *lattice, group, segments)) {
    LOG(WARNING) << "make segments failed";
    return false;
  }

  return true;
}

}  // namespace mozc
