src/converter/immutable_converter.cc - mozc - Git at Google

 // Copyright 2010-2015, Google Inc.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
 //
 //     * Redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above
 // copyright notice, this list of conditions and the following disclaimer
 // in the documentation and/or other materials provided with the
 // distribution.
 //     * Neither the name of Google Inc. nor the names of its
 // contributors may be used to endorse or promote products derived from
 // this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #include "converter/immutable_converter.h"

 #include <algorithm>
 #include <cctype>
 #include <climits>
 #include <string>
 #include <utility>
 #include <vector>

 #include "base/logging.h"
 #include "base/port.h"
 #include "base/scoped_ptr.h"
 #include "base/stl_util.h"
 #include "base/string_piece.h"
 #include "base/util.h"
 #include "config/config.pb.h"
 #include "config/config_handler.h"
 #include "converter/connector.h"
 #include "converter/conversion_request.h"
 #include "converter/key_corrector.h"
 #include "converter/lattice.h"
 #include "converter/nbest_generator.h"
 #include "converter/node.h"
 #include "converter/node_allocator.h"
 #include "converter/node_list_builder.h"
 #include "converter/segmenter.h"
 #include "converter/segments.h"
 #include "dictionary/dictionary_interface.h"
 #include "dictionary/pos_group.h"
 #include "dictionary/pos_matcher.h"
 #include "dictionary/suppression_dictionary.h"
 #include "prediction/suggestion_filter.h"
 #include "session/commands.pb.h"

 using mozc::dictionary::SuppressionDictionary;

 namespace mozc {
 namespace {

 const size_t kMaxSegmentsSize                   = 256;
 const size_t kMaxCharLength                     = 1024;
 const size_t kMaxCharLengthForReverseConversion = 600;  // 200 chars in UTF8
 const int    kMaxCost                           = 32767;
 const int    kMinCost                           = -32767;
 const int    kDefaultNumberCost                 = 3000;

 class KeyCorrectedNodeListBuilder : public BaseNodeListBuilder {
  public:
   KeyCorrectedNodeListBuilder(size_t pos,
                               StringPiece original_lookup_key,
                               const KeyCorrector *key_corrector,
                               NodeAllocator *allocator)
       : BaseNodeListBuilder(allocator, allocator->max_nodes_size()),
         pos_(pos),
         original_lookup_key_(original_lookup_key),
         key_corrector_(key_corrector),
         tail_(NULL) {}

   virtual ResultType OnToken(StringPiece key, StringPiece actual_key,
                              const Token &token) {
     const size_t offset =
         key_corrector_->GetOriginalOffset(pos_, token.key.size());
     if (!KeyCorrector::IsValidPosition(offset) || offset == 0) {
       return TRAVERSE_NEXT_KEY;
     }
     Node *node = NewNodeFromToken(token);
     node->key.assign(original_lookup_key_.data() + pos_, offset);
     node->wcost += KeyCorrector::GetCorrectedCostPenalty(node->key);

     // Push back |node| to the end.
     if (result_ == NULL) {
       result_ = node;
     } else {
       DCHECK(tail_ != NULL);
       tail_->bnext = node;
     }
     tail_ = node;
     return TRAVERSE_CONTINUE;
   }

   Node *tail() const { return tail_; }

  private:
   const size_t pos_;
   const StringPiece original_lookup_key_;
   const KeyCorrector *key_corrector_;
   Node *tail_;
 };

 void InsertCorrectedNodes(size_t pos, const string &key,
                           const ConversionRequest &request,
                           const KeyCorrector *key_corrector,
                           const DictionaryInterface *dictionary,
                           Lattice *lattice) {
   if (key_corrector == NULL) {
     return;
   }
   size_t length = 0;
   const char *str = key_corrector->GetCorrectedPrefix(pos, &length);
   if (str == NULL || length == 0) {
     return;
   }
   KeyCorrectedNodeListBuilder builder(pos, key, key_corrector,
                                       lattice->node_allocator());
   dictionary->LookupPrefix(
       StringPiece(str, length),
       request.IsKanaModifierInsensitiveConversion(),
       &builder);
   if (builder.tail() != NULL) {
     builder.tail()->bnext = NULL;
   }
   if (builder.result() != NULL) {
     lattice->Insert(pos, builder.result());
   }
 }

 bool IsNumber(const char c) {
   return c >= '0' && c <= '9';
 }

 bool ContainsWhiteSpacesOnly(const StringPiece s) {
   for (ConstChar32Iterator iter(s); !iter.Done(); iter.Next()) {
     switch (iter.Get()) {
       case 0x09:    // TAB
       case 0x20:    // Half-width space
       case 0x3000:  // Full-width space
         break;
       default:
         return false;
     }
   }
   return true;
 }

 void DecomposeNumberAndSuffix(const string &input,
                               string *number, string *suffix) {
   const char *begin = input.data();
   const char *end = input.data() + input.size();
   size_t pos = 0;
   while (begin < end) {
     if (IsNumber(*begin)) {
       ++pos;
       ++begin;
       continue;
     }
     break;
   }
   number->assign(input, 0, pos);
   suffix->assign(input, pos, input.size() - pos);
 }

 void DecomposePrefixAndNumber(const string &input,
                               string *prefix, string *number) {
   const char *begin = input.data();
   const char *end = input.data() + input.size() - 1;
   size_t pos = input.size();
   while (begin <= end) {
     if (IsNumber(*end)) {
       --pos;
       --end;
       continue;
     }
     break;
   }
   prefix->assign(input, 0, pos);
   number->assign(input, pos, input.size() - pos);
 }

 void NormalizeHistorySegments(Segments *segments) {
   for (size_t i = 0; i < segments->history_segments_size(); ++i) {
     Segment *segment = segments->mutable_history_segment(i);
     if (segment == NULL || segment->candidates_size() == 0) {
       continue;
     }

     string key;
     Segment::Candidate *c = segment->mutable_candidate(0);
     const string value = c->value;
     const string content_value = c->content_value;
     const string content_key = c->content_key;
     Util::FullWidthAsciiToHalfWidthAscii(segment->key(), &key);
     Util::FullWidthAsciiToHalfWidthAscii(value, &c->value);
     Util::FullWidthAsciiToHalfWidthAscii(content_value, &c->content_value);
     Util::FullWidthAsciiToHalfWidthAscii(content_key, &c->content_key);
     c->key = key;
     segment->set_key(key);

     // Ad-hoc rewrite for Numbers
     // Since number candidate is generative, i.e, any number can be
     // written by users, we normalize the value here. normalzied number
     // is used for the ranking tweaking based on history
     if (key.size() > 1 &&
         key == c->value &&
         key == c->content_value &&
         key == c->key &&
         key == c->content_key &&
         Util::GetScriptType(key) == Util::NUMBER &&
         IsNumber(key[key.size() - 1])) {
       key = key[key.size() - 1];  // use the last digit only
       segment->set_key(key);
       c->value = key;
       c->content_value = key;
       c->content_key = key;
     }
   }
 }

 Lattice *GetLattice(Segments *segments, bool is_prediction) {
   Lattice *lattice = segments->mutable_cached_lattice();
   if (lattice == NULL) {
     return NULL;
   }

   const size_t history_segments_size = segments->history_segments_size();

   string history_key = "";
   for (size_t i = 0; i < history_segments_size; ++i) {
     history_key.append(segments->segment(i).key());
   }
   string conversion_key = "";
   for (size_t i = history_segments_size; i < segments->segments_size(); ++i) {
     conversion_key.append(segments->segment(i).key());
   }

   const size_t lattice_history_end_pos = lattice->history_end_pos();

   if (!is_prediction ||
       Util::CharsLen(conversion_key) <= 1 ||
       lattice_history_end_pos != history_key.size()) {
     // Do not cache if conversion is not prediction.  In addition, if a user
     // input the key right after the finish of conversion, reset the lattice to
     // erase old nodes.  Even if the lattice key is not changed, we should reset
     // the lattice when the history size is changed.  When we submit the
     // candidate partially, the entire key will not changed, but the history
     // position will be changed.
     lattice->Clear();
   }

   return lattice;
 }

 }  // namespace

 ImmutableConverterImpl::ImmutableConverterImpl(
     const DictionaryInterface *dictionary,
     const DictionaryInterface *suffix_dictionary,
     const SuppressionDictionary *suppression_dictionary,
     const Connector *connector,
     const Segmenter *segmenter,
     const POSMatcher *pos_matcher,
     const PosGroup *pos_group,
     const SuggestionFilter *suggestion_filter)
     : dictionary_(dictionary),
       suffix_dictionary_(suffix_dictionary),
       suppression_dictionary_(suppression_dictionary),
       connector_(connector),
       segmenter_(segmenter),
       pos_matcher_(pos_matcher),
       pos_group_(pos_group),
       suggestion_filter_(suggestion_filter),
       first_name_id_(pos_matcher_->GetFirstNameId()),
       last_name_id_(pos_matcher_->GetLastNameId()),
       number_id_(pos_matcher_->GetNumberId()),
       unknown_id_(pos_matcher_->GetUnknownId()),
       last_to_first_name_transition_cost_(
           connector_->GetTransitionCost(last_name_id_, first_name_id_)) {
   DCHECK(dictionary_);
   DCHECK(suffix_dictionary_);
   DCHECK(suppression_dictionary_);
   DCHECK(connector_);
   DCHECK(segmenter_);
   DCHECK(pos_matcher_);
   DCHECK(pos_group_);
   DCHECK(suggestion_filter_);
 }

 void ImmutableConverterImpl::ExpandCandidates(
     const string &original_key, NBestGenerator *nbest, Segment *segment,
     Segments::RequestType request_type, size_t expand_size) const {
   DCHECK(nbest);
   DCHECK(segment);
   CHECK_GT(expand_size, 0);

   while (segment->candidates_size() < expand_size) {
     Segment::Candidate *candidate = segment->push_back_candidate();
     DCHECK(candidate);
     candidate->Init();

     // if NBestGenerator::Next() returns NULL,
     // no more entries are generated.
     if (!nbest->Next(original_key, candidate, request_type)) {
       segment->pop_back_candidate();
       break;
     }
   }
 }

 void ImmutableConverterImpl::InsertDummyCandidates(Segment *segment,
                                                    size_t expand_size) const {
   const Segment::Candidate *top_candidate =
       segment->candidates_size() == 0 ? NULL :
       segment->mutable_candidate(0);
   const Segment::Candidate *last_candidate =
       segment->candidates_size() == 0 ? NULL :
       segment->mutable_candidate(segment->candidates_size() - 1);

   // Insert a dummy candiate whose content_value is katakana.
   // If functional_key() is empty, no need to make a dummy candidate.
   if (segment->candidates_size() > 0 &&
       segment->candidates_size() < expand_size &&
       !segment->candidate(0).functional_key().empty() &&
       Util::GetScriptType(segment->candidate(0).content_key) ==
       Util::HIRAGANA) {
     // Use last_candidate as a refernce of cost.
     // Use top_candidate as a refarence of lid/rid and key/value.
     DCHECK(top_candidate);
     DCHECK(last_candidate);
     Segment::Candidate *new_candidate = segment->add_candidate();
     DCHECK(new_candidate);

     new_candidate->CopyFrom(*top_candidate);
     Util::HiraganaToKatakana(segment->candidate(0).content_key,
                              &new_candidate->content_value);
     Util::ConcatStrings(new_candidate->content_value,
                         top_candidate->functional_value(),
                         &new_candidate->value);
     new_candidate->cost = last_candidate->cost + 1;
     new_candidate->wcost = last_candidate->wcost + 1;
     new_candidate->structure_cost = last_candidate->structure_cost + 1;
     new_candidate->attributes = 0;
     // We cannot copy inner_segment_boundary; see b/8109381.
     new_candidate->inner_segment_boundary.clear();
     DCHECK(new_candidate->IsValid());
     last_candidate = new_candidate;
   }

   // Insert a dummy hiragana candidate.
   if (segment->candidates_size() == 0 ||
       (segment->candidates_size() < expand_size &&
        Util::GetScriptType(segment->key()) == Util::HIRAGANA)) {
     Segment::Candidate *new_candidate = segment->add_candidate();
     DCHECK(new_candidate);

     if (last_candidate != NULL) {
       new_candidate->CopyFrom(*last_candidate);
       // We cannot copy inner_segment_boundary; see b/8109381.
       new_candidate->inner_segment_boundary.clear();
     } else {
       new_candidate->Init();
     }
     new_candidate->key = segment->key();
     new_candidate->value = segment->key();
     new_candidate->content_key = segment->key();
     new_candidate->content_value = segment->key();
     if (last_candidate != NULL) {
       new_candidate->cost = last_candidate->cost + 1;
       new_candidate->wcost = last_candidate->wcost + 1;
       new_candidate->structure_cost = last_candidate->structure_cost + 1;
     }
     new_candidate->attributes = 0;
     last_candidate = new_candidate;
     // One character hiragana/katakana will cause side effect.
     // Type "し" and choose "シ". After that, "しました" will become "シました".
     if (Util::CharsLen(new_candidate->key) <= 1) {
       new_candidate->attributes |= Segment::Candidate::CONTEXT_SENSITIVE;
     }
     DCHECK(new_candidate->IsValid());
   }

   // Insert a dummy katakana candidate.
   string katakana_value;
   Util::HiraganaToKatakana(segment->key(), &katakana_value);
   if (segment->candidates_size() > 0 &&
       segment->candidates_size() < expand_size &&
       Util::GetScriptType(katakana_value) == Util::KATAKANA) {
     Segment::Candidate *new_candidate = segment->add_candidate();
     DCHECK(new_candidate);
     DCHECK(last_candidate);
     new_candidate->Init();
     new_candidate->key = segment->key();
     new_candidate->value = katakana_value;
     new_candidate->content_key = segment->key();
     new_candidate->content_value = katakana_value;
     new_candidate->cost = last_candidate->cost + 1;
     new_candidate->wcost = last_candidate->wcost + 1;
     new_candidate->structure_cost = last_candidate->structure_cost + 1;
     new_candidate->lid = last_candidate->lid;
     new_candidate->rid = last_candidate->rid;
     if (Util::CharsLen(new_candidate->key) <= 1) {
       new_candidate->attributes |= Segment::Candidate::CONTEXT_SENSITIVE;
     }
     DCHECK(new_candidate->IsValid());
   }

   DCHECK_GT(segment->candidates_size(), 0);
 }

 void ImmutableConverterImpl::ApplyResegmentRules(
     size_t pos, Lattice *lattice) const {
   if (ResegmentArabicNumberAndSuffix(pos, lattice)) {
     VLOG(1) << "ResegmentArabicNumberAndSuffix returned true";
     return;
   }

   if (ResegmentPrefixAndArabicNumber(pos, lattice)) {
     VLOG(1) << "ResegmentArabicNumberAndSuffix returned true";
     return;
   }

   if (ResegmentPersonalName(pos, lattice)) {
     VLOG(1) << "ResegmentPersonalName returned true";
     return;
   }
 }

 // Currently, only arabic_number + suffix patterns are resegmented
 // TODO(taku): consider kanji number into consideration
 bool ImmutableConverterImpl::ResegmentArabicNumberAndSuffix(
     size_t pos, Lattice *lattice) const {
   const Node *bnode = lattice->begin_nodes(pos);
   if (bnode == NULL) {
     VLOG(1) << "bnode is NULL";
     return false;
   }

   bool modified = false;

   for (const Node *compound_node = bnode;
        compound_node != NULL; compound_node = compound_node->bnext) {
     if (!compound_node->value.empty() && !compound_node->key.empty() &&
         pos_matcher_->IsNumber(compound_node->lid) &&
         !pos_matcher_->IsNumber(compound_node->rid) &&
         IsNumber(compound_node->value[0]) && IsNumber(compound_node->key[0])) {
       string number_value, number_key;
       string suffix_value, suffix_key;
       DecomposeNumberAndSuffix(compound_node->value,
                                &number_value, &suffix_value);
       DecomposeNumberAndSuffix(compound_node->key,
                                &number_key, &suffix_key);

       if (suffix_value.empty() || suffix_key.empty()) {
         continue;
       }

       // not compatibile
       if (number_value != number_key) {
         LOG(WARNING) << "Incompatible key/value number pair";
         continue;
       }

       // do -1 so that resegmented nodes are boosted
       // over compound node.
       const int32 wcost = max(compound_node->wcost / 2 - 1, 0);

       Node *number_node = lattice->NewNode();
       CHECK(number_node);
       number_node->key = number_key;
       number_node->value = number_value;
       number_node->lid = compound_node->lid;
       number_node->rid = 0;   // 0 to 0 transition cost is 0
       number_node->wcost = wcost;
       number_node->node_type = Node::NOR_NODE;
       number_node->bnext = NULL;

       // insert number into the lattice
       lattice->Insert(pos, number_node);

       Node *suffix_node = lattice->NewNode();
       CHECK(suffix_node);
       suffix_node->key = suffix_key;
       suffix_node->value = suffix_value;
       suffix_node->lid = 0;
       suffix_node->rid = compound_node->rid;
       suffix_node->wcost = wcost;
       suffix_node->node_type = Node::NOR_NODE;
       suffix_node->bnext = NULL;

       suffix_node->constrained_prev = number_node;

       // insert suffix into the lattice
       lattice->Insert(pos + number_node->key.size(), suffix_node);
       VLOG(1) << "Resegmented: " << compound_node->value << " "
               << number_node->value << " " << suffix_node->value;

       modified = true;
     }
   }

   return modified;
 }

 bool ImmutableConverterImpl::ResegmentPrefixAndArabicNumber(
     size_t pos, Lattice *lattice) const {
   const Node *bnode = lattice->begin_nodes(pos);
   if (bnode == NULL) {
     VLOG(1) << "bnode is NULL";
     return false;
   }

   bool modified = false;

   for (const Node *compound_node = bnode;
        compound_node != NULL; compound_node = compound_node->bnext) {
     // Unlike ResegmentArabicNumberAndSuffix, we don't
     // check POS as words ending with Arabic numbers are pretty rare.
     if (compound_node->value.size() > 1 && compound_node->key.size() > 1 &&
         !IsNumber(compound_node->value[0]) &&
         !IsNumber(compound_node->key[0]) &&
         IsNumber(compound_node->value[compound_node->value.size() - 1]) &&
         IsNumber(compound_node->key[compound_node->key.size() - 1])) {
       string number_value, number_key;
       string prefix_value, prefix_key;
       DecomposePrefixAndNumber(compound_node->value,
                                &prefix_value, &number_value);
       DecomposePrefixAndNumber(compound_node->key,
                                &prefix_key, &number_key);

       if (prefix_value.empty() || prefix_key.empty()) {
         continue;
       }

       // not compatibile
       if (number_value != number_key) {
         LOG(WARNING) << "Incompatible key/value number pair";
         continue;
       }

       // do -1 so that resegmented nodes are boosted
       // over compound node.
       const int32 wcost = max(compound_node->wcost / 2 - 1, 0);

       Node *prefix_node = lattice->NewNode();
       CHECK(prefix_node);
       prefix_node->key = prefix_key;
       prefix_node->value = prefix_value;
       prefix_node->lid = compound_node->lid;
       prefix_node->rid = 0;   // 0 to 0 transition cost is 0
       prefix_node->wcost = wcost;
       prefix_node->node_type = Node::NOR_NODE;
       prefix_node->bnext = NULL;

       // insert number into the lattice
       lattice->Insert(pos, prefix_node);

       Node *number_node = lattice->NewNode();
       CHECK(number_node);
       number_node->key = number_key;
       number_node->value = number_value;
       number_node->lid = 0;
       number_node->rid = compound_node->rid;
       number_node->wcost = wcost;
       number_node->node_type = Node::NOR_NODE;
       number_node->bnext = NULL;

       number_node->constrained_prev = prefix_node;

       // insert number into the lattice
       lattice->Insert(pos + prefix_node->key.size(), number_node);
       VLOG(1) << "Resegmented: " << compound_node->value << " "
               << prefix_node->value << " " << number_node->value;

       modified = true;
     }
   }

   return modified;
 }

 bool ImmutableConverterImpl::ResegmentPersonalName(
     size_t pos, Lattice *lattice) const {
   const Node *bnode = lattice->begin_nodes(pos);
   if (bnode == NULL) {
     VLOG(1) << "bnode is NULL";
     return false;
   }

   bool modified = false;

   // find a combination of last_name and first_name, e.g. "田中麗奈".
   for (const Node *compound_node = bnode;
        compound_node != NULL; compound_node = compound_node->bnext) {
     // left word is last name and right word is first name
     if (compound_node->lid != last_name_id_ ||
         compound_node->rid != first_name_id_) {
       continue;
     }

     const size_t len = Util::CharsLen(compound_node->value);

     // Don't resegment one-word last_name/first_name like 林健,
     // as it would deliver side effect.
     if (len <= 2) {
       continue;
     }

     // Don't resegment if the value is katakana
     if (Util::GetScriptType(compound_node->value) == Util::KATAKANA) {
       continue;
     }

     // Do constrained Viterbi search inside the compound "田中麗奈".
     // Constraints:
     // 1. Concats of last_name and first_name should be "田中麗奈"
     // 2. consisting of two words (last_name and first_name)
     // 3. Segment-boundary exist between the two words.
     // 4.a Either (a) POS of lnode is last_name or (b) POS of rnode is fist_name
     //     (len >= 4)
     // 4.b Both (a) POS of lnode is last_name and (b) POS of rnode is fist_name
     //     (len == 3)
     const Node *best_last_name_node = NULL;
     const Node *best_first_name_node = NULL;
     int best_cost = 0x7FFFFFFF;
     for (const Node *lnode = bnode; lnode != NULL; lnode = lnode->bnext) {
       // lnode(last_name) is a prefix of compound, Constraint 1.
       if (compound_node->value.size() > lnode->value.size() &&
           compound_node->key.size() > lnode->key.size() &&
           Util::StartsWith(compound_node->value, lnode->value)) {
         // rnode(first_name) is a suffix of compound, Constraint 1.
         for (const Node *rnode = lattice->begin_nodes(pos + lnode->key.size());
              rnode != NULL; rnode = rnode->bnext) {
           if ((lnode->value.size() + rnode->value.size())
               == compound_node->value.size() &&
               (lnode->value + rnode->value) == compound_node->value &&
               segmenter_->IsBoundary(*lnode, *rnode, false)) {  // Constraint 3.
             const int32 cost = lnode->wcost + GetCost(lnode, rnode);
             if (cost < best_cost) {   // choose the smallest ones
               best_last_name_node = lnode;
               best_first_name_node = rnode;
               best_cost = cost;
             }
           }
         }
       }
     }

     // No valid first/last names are found
     if (best_first_name_node == NULL || best_last_name_node == NULL) {
       continue;
     }

     // Constraint 4.a
     if (len >= 4 &&
         (best_last_name_node->lid != last_name_id_ &&
          best_first_name_node->rid != first_name_id_)) {
       continue;
     }

     // Constraint 4.b
     if (len == 3 &&
         (best_last_name_node->lid != last_name_id_ ||
          best_first_name_node->rid != first_name_id_)) {
       continue;
     }

     // Insert LastName and FirstName as independent nodes.
     // Duplications will be removed in nbest enumerations.
     // word costs are calculated from compound node by assuming that
     // transition cost is 0.
     //
     // last_name_cost + transition_cost + first_name_cost == compound_cost
     // last_name_cost == first_name_cost
     // i.e,
     // last_name_cost = first_name_cost =
     // (compound_cost - transition_cost) / 2;
     const int32 wcost = (compound_node->wcost -
                          last_to_first_name_transition_cost_) / 2;

     Node *last_name_node = lattice->NewNode();
     CHECK(last_name_node);
     last_name_node->key = best_last_name_node->key;
     last_name_node->value = best_last_name_node->value;
     last_name_node->lid = compound_node->lid;
     last_name_node->rid = last_name_id_;
     last_name_node->wcost = wcost;
     last_name_node->node_type = Node::NOR_NODE;
     last_name_node->bnext = NULL;

     // insert last_name into the lattice
     lattice->Insert(pos, last_name_node);

     Node *first_name_node = lattice->NewNode();
     CHECK(first_name_node);
     first_name_node->key = best_first_name_node->key;
     first_name_node->value = best_first_name_node->value;
     first_name_node->lid = first_name_id_;
     first_name_node->rid = compound_node->rid;
     first_name_node->wcost = wcost;
     first_name_node->node_type = Node::NOR_NODE;
     first_name_node->bnext = NULL;

     first_name_node->constrained_prev = last_name_node;

     // insert first_name into the lattice
     lattice->Insert(pos + last_name_node->key.size(), first_name_node);

     VLOG(2) << "Resegmented: " << compound_node->value << " "
             << last_name_node->value << " " << first_name_node->value;

     modified = true;
   }

   return modified;
 }

 namespace {

 class NodeListBuilderWithCacheEnabled : public NodeListBuilderForLookupPrefix {
  public:
   NodeListBuilderWithCacheEnabled(NodeAllocator *allocator,
                                   size_t min_key_length)
       : NodeListBuilderForLookupPrefix(allocator,
                                        allocator->max_nodes_size(),
                                        min_key_length) {
     DCHECK(allocator);
   }

   virtual ResultType OnToken(StringPiece key, StringPiece actual_key,
                              const Token &token) {
     Node *node = NewNodeFromToken(token);
     node->attributes |= Node::ENABLE_CACHE;
     node->raw_wcost = node->wcost;
     PrependNode(node);
     return (limit_ <= 0) ? TRAVERSE_DONE : TRAVERSE_CONTINUE;
   }
 };

 }  // namespace

 Node *ImmutableConverterImpl::Lookup(const int begin_pos,
                                      const int end_pos,
                                      const ConversionRequest &request,
                                      bool is_reverse,
                                      bool is_prediction,
                                      Lattice *lattice) const {
   CHECK_LE(begin_pos, end_pos);
   const char *begin = lattice->key().data() + begin_pos;
   const char *end = lattice->key().data() + end_pos;
   const size_t len = end_pos - begin_pos;

   lattice->node_allocator()->set_max_nodes_size(8192);
   Node *result_node = NULL;
   if (is_reverse) {
     BaseNodeListBuilder builder(
         lattice->node_allocator(),
         lattice->node_allocator()->max_nodes_size());
     dictionary_->LookupReverse(StringPiece(begin, len), &builder);
     result_node = builder.result();
   } else {
     if (is_prediction) {
       NodeListBuilderWithCacheEnabled builder(
           lattice->node_allocator(),
           lattice->cache_info(begin_pos) + 1);
       dictionary_->LookupPrefix(
           StringPiece(begin, len),
           request.IsKanaModifierInsensitiveConversion(),
           &builder);
       result_node = builder.result();
       lattice->SetCacheInfo(begin_pos, len);
     } else {
       // When cache feature is not used, look up normally
       BaseNodeListBuilder builder(
           lattice->node_allocator(),
           lattice->node_allocator()->max_nodes_size());
       dictionary_->LookupPrefix(
           StringPiece(begin, len),
           request.IsKanaModifierInsensitiveConversion(),
           &builder);
       result_node = builder.result();
     }
   }
   return AddCharacterTypeBasedNodes(begin, end, lattice, result_node);
 }

 Node *ImmutableConverterImpl::AddCharacterTypeBasedNodes(
     const char *begin, const char *end, Lattice *lattice, Node *nodes) const {

   size_t mblen = 0;
   const char32 ucs4 = Util::UTF8ToUCS4(begin, end, &mblen);

   const Util::ScriptType first_script_type = Util::GetScriptType(ucs4);
   const Util::FormType first_form_type = Util::GetFormType(ucs4);

   // Add 1 character node. It can be either UnknownId or NumberId.
   {
     Node *new_node = lattice->NewNode();
     CHECK(new_node);
     if (first_script_type == Util::NUMBER) {
       new_node->lid = number_id_;
       new_node->rid = number_id_;
     } else {
       new_node->lid = unknown_id_;
       new_node->rid = unknown_id_;
     }

     new_node->wcost = kMaxCost;
     new_node->value.assign(begin, mblen);
     new_node->key.assign(begin, mblen);
     new_node->node_type = Node::NOR_NODE;
     new_node->bnext = nodes;
     nodes = new_node;
   }  // scope out |new_node|

   if (first_script_type == Util::NUMBER) {
     nodes->wcost = kDefaultNumberCost;
     return nodes;
   }

   if (first_script_type != Util::ALPHABET &&
       first_script_type != Util::KATAKANA) {
     return nodes;
   }

   // group by same char type
   int num_char = 1;
   const char *p = begin + mblen;
   while (p < end) {
     const char32 next_ucs4 = Util::UTF8ToUCS4(p, end, &mblen);
     if (first_script_type != Util::GetScriptType(next_ucs4) ||
         first_form_type != Util::GetFormType(next_ucs4)) {
       break;
     }
     p += mblen;
     ++num_char;
   }

   if (num_char > 1) {
     mblen = static_cast<uint32>(p - begin);
     Node *new_node = lattice->NewNode();
     CHECK(new_node);
     if (first_script_type == Util::NUMBER) {
       new_node->lid = number_id_;
       new_node->rid = number_id_;
     } else {
       new_node->lid = unknown_id_;
       new_node->rid = unknown_id_;
     }
     new_node->wcost = kMaxCost / 2;
     new_node->value.assign(begin, mblen);
     new_node->key.assign(begin, mblen);
     new_node->node_type = Node::NOR_NODE;
     new_node->bnext = nodes;
     nodes = new_node;
   }

   return nodes;
 }

 namespace {

 // Reasonably big cost. Cannot use INT_MAX because a new cost will be
 // calculated based on kVeryBigCost.
 const int kVeryBigCost = (INT_MAX >> 2);

 // Runs viterbi algorithm at position |pos|. The left_boundary/right_boundary
 // are the next boundary looked from pos. (If pos is on the boundary,
 // left_boundary should be the previous one, and right_boundary should be
 // the next).
 inline void ViterbiInternal(
     const Connector &connector, size_t pos, size_t right_boundary,
     Lattice *lattice) {
   for (Node *rnode = lattice->begin_nodes(pos);
        rnode != NULL; rnode = rnode->bnext) {
     if (rnode->end_pos > right_boundary) {
       // Invalid rnode.
       rnode->prev = NULL;
       continue;
     }

     if (rnode->constrained_prev != NULL) {
       // Constrained node.
       if (rnode->constrained_prev->prev == NULL) {
         rnode->prev = NULL;
       } else {
         rnode->prev = rnode->constrained_prev;
         rnode->cost =
             rnode->prev->cost +
             rnode->wcost +
             connector.GetTransitionCost(rnode->prev->rid, rnode->lid);
       }
       continue;
     }

     // Find a valid node which connects to the rnode with minimum cost.
     int best_cost = kVeryBigCost;
     Node *best_node = NULL;
     for (Node *lnode = lattice->end_nodes(pos);
          lnode != NULL; lnode = lnode->enext) {
       if (lnode->prev == NULL) {
         // Invalid lnode.
         continue;
       }

       int cost =
           lnode->cost + connector.GetTransitionCost(lnode->rid, rnode->lid);
       if (cost < best_cost) {
         best_cost = cost;
         best_node = lnode;
       }
     }

     rnode->prev = best_node;
     rnode->cost = best_cost + rnode->wcost;
   }
 }
 }  // namespace

 bool ImmutableConverterImpl::Viterbi(
     const Segments &segments, Lattice *lattice) const {
   const string &key = lattice->key();

   // Process BOS.
   {
     Node *bos_node = lattice->bos_nodes();
     // Ensure only one bos node is available.
     DCHECK(bos_node != NULL);
     DCHECK(bos_node->enext == NULL);

     const size_t right_boundary = segments.segment(0).key().size();
     for (Node *rnode = lattice->begin_nodes(0);
          rnode != NULL; rnode = rnode->bnext) {
       if (rnode->end_pos > right_boundary) {
         // Invalid rnode.
         continue;
       }

       // Ensure no constraint.
       DCHECK(rnode->constrained_prev == NULL);

       rnode->prev = bos_node;
       rnode->cost =
           bos_node->cost +
           connector_->GetTransitionCost(bos_node->rid, rnode->lid) +
           rnode->wcost;
     }
   }

   size_t left_boundary = 0;
   const size_t segments_size = segments.segments_size();

   // Specialization for the first segment.
   // Don't run on the left boundary (the connection with BOS node),
   // beacuse it is already run above.
   {
     const size_t right_boundary =
         left_boundary + segments.segment(0).key().size();
     for (size_t pos = left_boundary + 1; pos < right_boundary; ++pos) {
       ViterbiInternal(*connector_, pos, right_boundary, lattice);
     }
     left_boundary = right_boundary;
   }

   // The condition to break is in the loop.
   for (size_t i = 1; i < segments_size; ++i) {
     // Run Viterbi for each position the segment.
     const size_t right_boundary =
         left_boundary + segments.segment(i).key().size();
     for (size_t pos = left_boundary; pos < right_boundary; ++pos) {
       ViterbiInternal(*connector_, pos, right_boundary, lattice);
     }
     left_boundary = right_boundary;
   }

   // Process EOS.
   {
     Node *eos_node = lattice->eos_nodes();

     // Ensure only one eos node.
     DCHECK(eos_node != NULL);
     DCHECK(eos_node->bnext == NULL);

     // No constrained prev.
     DCHECK(eos_node->constrained_prev == NULL);

     left_boundary =
         key.size() - segments.segment(segments_size - 1).key().size();
     // Find a valid node which connects to the rnode with minimum cost.
     int best_cost = kVeryBigCost;
     Node *best_node = NULL;
     for (Node *lnode = lattice->end_nodes(key.size());
          lnode != NULL; lnode = lnode->enext) {
       if (lnode->prev == NULL) {
         // Invalid lnode.
         continue;
       }

       int cost =
           lnode->cost +
           connector_->GetTransitionCost(lnode->rid, eos_node->lid);
       if (cost < best_cost) {
         best_cost = cost;
         best_node = lnode;
       }
     }

     eos_node->prev = best_node;
     eos_node->cost = best_cost + eos_node->wcost;
   }

   // Traverse the node from end to begin.
   Node *node = lattice->eos_nodes();
   CHECK(node->bnext == NULL);
   Node *prev = NULL;
   while (node->prev != NULL) {
     prev = node->prev;
     prev->next = node;
     node = prev;
   }

   if (lattice->bos_nodes() != prev) {
     LOG(WARNING) << "cannot make lattice";
     return false;
   }

   return true;
 }

 // faster Viterbi algorithm for prediction
 //
 // Run simple Viterbi algorithm with contracting the same lid and rid.
 // Because the original Viterbi has speciall nodes, we should take it
 // consideration.
 // 1. CONNECTED nodes: are normal nodes.
 // 2. WEAK_CONNECTED nodes: don't occur in prediction, so we do not have to
 //    consider about them.
 // 3. NOT_CONNECTED nodes: occur when they are between history nodes and
 //    normal nodes.
 // For NOT_CONNECTED nodes, we should run Viterbi for history nodes first,
 // and do it for normal nodes second. The function "PredictionViterbiSub"
 // runs Viterbi for positions between calc_begin_pos and calc_end_pos,
 // inclusive.
 //
 // We cannot apply this function in suggestion because in suggestion there are
 // WEAK_CONNECTED nodes and this function is not designed for them.
 //
 // TODO(toshiyuki): We may be able to use faster viterbi for
 // conversion/suggestion if we use richer info as contraction group.

 bool ImmutableConverterImpl::PredictionViterbi(
     const Segments &segments, Lattice *lattice) const {
   const size_t key_length = lattice->key().size();
   const size_t history_segments_size = segments.history_segments_size();
   size_t history_length = 0;
   for (size_t i = 0; i < history_segments_size; ++i) {
     history_length += segments.segment(i).key().size();
   }
   PredictionViterbiInternal(0, history_length, lattice);
   PredictionViterbiInternal(history_length, key_length, lattice);

   Node *node = lattice->eos_nodes();
   CHECK(node->bnext == NULL);
   Node *prev = NULL;
   while (node->prev != NULL) {
     prev = node->prev;
     prev->next = node;
     node = prev;
   }

   if (lattice->bos_nodes() != prev) {
     LOG(WARNING) << "cannot make lattice";
     return false;
   }

   return true;
 }

 void ImmutableConverterImpl::PredictionViterbiInternal(
     int calc_begin_pos, int calc_end_pos, Lattice *lattice) const {
   CHECK_LE(calc_begin_pos, calc_end_pos);

   // Mapping from lnode's rid to (cost, Node) of best way/cost, and vice versa.
   // Note that, the average number of lid/rid variation is less than 30 in
   // most cases. So, in order to avoid too many allocations for internal
   // nodes of std::map, we use vector of key-value pairs.
   typedef vector<pair<int, pair<int, Node*> > > BestMap;
   typedef OrderBy<FirstKey, Less> OrderByFirst;
   BestMap lbest, rbest;
   lbest.reserve(128);
   rbest.reserve(128);

   const pair<int, Node*> kInvalidValue(INT_MAX, static_cast<Node*>(NULL));

   for (size_t pos = calc_begin_pos; pos <= calc_end_pos; ++pos) {
     lbest.clear();
     for (Node *lnode = lattice->end_nodes(pos);
          lnode != NULL; lnode = lnode->enext) {
       const int rid = lnode->rid;
       BestMap::value_type key(rid, kInvalidValue);
       BestMap::iterator iter =
           lower_bound(lbest.begin(), lbest.end(), key, OrderByFirst());
       if (iter == lbest.end() || iter->first != rid) {
         lbest.insert(
             iter, BestMap::value_type(rid, make_pair(lnode->cost, lnode)));
       } else if (lnode->cost < iter->second.first) {
         iter->second.first = lnode->cost;
         iter->second.second = lnode;
       }
     }

     if (lbest.empty()) {
       continue;
     }

     rbest.clear();
     Node *rnode_begin = lattice->begin_nodes(pos);
     for (Node *rnode = rnode_begin; rnode != NULL; rnode = rnode->bnext) {
       if (rnode->end_pos > calc_end_pos) {
         continue;
       }
       BestMap::value_type key(rnode->lid, kInvalidValue);
       BestMap::iterator iter =
           lower_bound(rbest.begin(), rbest.end(), key, OrderByFirst());
       if (iter == rbest.end() || iter->first != rnode->lid) {
         rbest.insert(iter, key);
       }
     }

     if (rbest.empty()) {
       continue;
     }

     for (BestMap::iterator liter = lbest.begin();
          liter != lbest.end(); ++liter) {
       for (BestMap::iterator riter = rbest.begin();
            riter != rbest.end(); ++riter) {
         const int cost = liter->second.first +
             connector_->GetTransitionCost(liter->first, riter->first);
         if (cost < riter->second.first) {
           riter->second.first = cost;
           riter->second.second = liter->second.second;
         }
       }
     }

     for (Node *rnode = rnode_begin; rnode != NULL; rnode = rnode->bnext) {
       if (rnode->end_pos > calc_end_pos) {
         continue;
       }
       BestMap::value_type key(rnode->lid, kInvalidValue);
       BestMap::iterator iter =
           lower_bound(rbest.begin(), rbest.end(), key, OrderByFirst());
       if (iter == rbest.end() || iter->first != rnode->lid ||
           iter->second.second == NULL) {
         continue;
       }

       rnode->cost = iter->second.first + rnode->wcost;
       rnode->prev = iter->second.second;
     }
   }
 }

 namespace {

 // Adds penalty for predictive nodes when building a node list.
 class NodeListBuilderForPredictiveNodes : public BaseNodeListBuilder {
  public:
   NodeListBuilderForPredictiveNodes(NodeAllocator *allocator,
                                     int limit, const POSMatcher *pos_matcher)
       : BaseNodeListBuilder(allocator, limit), pos_matcher_(pos_matcher) {}

   virtual ~NodeListBuilderForPredictiveNodes() {}

   virtual ResultType OnToken(StringPiece key, StringPiece actual_key,
                              const Token &token) {
     Node *node = NewNodeFromToken(token);
     const int kPredictiveNodeDefaultPenalty = 900;  // ~= -500 * log(1/6)
     int additional_cost = kPredictiveNodeDefaultPenalty;

     // Bonus for suffix word.
     if (pos_matcher_->IsSuffixWord(node->rid) &&
         pos_matcher_->IsSuffixWord(node->lid)) {
       const int kSuffixWordBonus = 700;
       additional_cost -= kSuffixWordBonus;
     }

     // Penalty for unique noun word.
     if (pos_matcher_->IsUniqueNoun(node->rid) ||
         pos_matcher_->IsUniqueNoun(node->lid)) {
       const int kUniqueNounPenalty = 500;
       additional_cost += kUniqueNounPenalty;
     }

     // Penalty for number.
     if (pos_matcher_->IsNumber(node->rid) ||
         pos_matcher_->IsNumber(node->lid)) {
       const int kNumberPenalty = 4000;
       additional_cost += kNumberPenalty;
     }

     node->wcost += additional_cost;
     PrependNode(node);
     return (limit_ <= 0) ? TRAVERSE_DONE : TRAVERSE_CONTINUE;
   }

  private:
   const POSMatcher *pos_matcher_;
 };

 }  // namespace

 // Add predictive nodes from conversion key.
 void ImmutableConverterImpl::MakeLatticeNodesForPredictiveNodes(
     const Segments &segments, const ConversionRequest &request,
     Lattice *lattice) const {
   const string &key = lattice->key();
   string conversion_key;
   for (size_t i = 0; i < segments.conversion_segments_size(); ++i) {
     conversion_key += segments.conversion_segment(i).key();
   }
   DCHECK_NE(string::npos, key.find(conversion_key));
   vector<string> conversion_key_chars;
   Util::SplitStringToUtf8Chars(conversion_key, &conversion_key_chars);

   // do nothing if the conversion key is short
   const size_t kKeyMinLength = 7;
   if (conversion_key_chars.size() < kKeyMinLength) {
     return;
   }

   // Predictive search from suffix dictionary.
   // (search words with between 1 and 6 characters)
   {
     const size_t kMaxSuffixLookupKey = 6;
     const size_t max_sufffix_len =
         min(kMaxSuffixLookupKey, conversion_key_chars.size());
     size_t pos = key.size();

     for (size_t suffix_len = 1; suffix_len <= max_sufffix_len; ++suffix_len) {
       pos -= conversion_key_chars[
           conversion_key_chars.size() - suffix_len].size();
       DCHECK_GE(key.size(), pos);
       NodeListBuilderForPredictiveNodes builder(
           lattice->node_allocator(),
           lattice->node_allocator()->max_nodes_size(),
           pos_matcher_);
       suffix_dictionary_->LookupPredictive(
           StringPiece(key.data() + pos, key.size() - pos),
           request.IsKanaModifierInsensitiveConversion(), &builder);
       if (builder.result() != NULL) {
         lattice->Insert(pos, builder.result());
       }
     }
   }

   // Predictive search from system dictionary.
   // (search words with between 5 and 8 characters)
   {
     const size_t kMinSystemLookupKey = 5;
     const size_t kMaxSystemLookupKey = 8;
     const size_t max_suffix_len =
         min(kMaxSystemLookupKey, conversion_key_chars.size());
     size_t pos = key.size();
     for (size_t suffix_len = 1; suffix_len <= max_suffix_len; ++suffix_len) {
       pos -= conversion_key_chars[
           conversion_key_chars.size() - suffix_len].size();
       DCHECK_GE(key.size(), pos);

       if (suffix_len < kMinSystemLookupKey) {
         // Just update |pos|.
         continue;
       }

       NodeListBuilderForPredictiveNodes builder(
           lattice->node_allocator(),
           lattice->node_allocator()->max_nodes_size(),
           pos_matcher_);
       dictionary_->LookupPredictive(
           StringPiece(key.data() + pos, key.size() - pos),
           request.IsKanaModifierInsensitiveConversion(), &builder);
       if (builder.result() != NULL) {
         lattice->Insert(pos, builder.result());
       }
     }
   }
 }

 bool ImmutableConverterImpl::MakeLattice(
     const ConversionRequest &request,
     Segments *segments, Lattice *lattice) const {
   if (segments == NULL) {
     LOG(ERROR) << "Segments is NULL";
     return false;
   }

   if (lattice == NULL) {
     LOG(ERROR) << "Lattice is NULL";
     return false;
   }

   if (segments->segments_size() >= kMaxSegmentsSize) {
     LOG(WARNING) << "too many segments";
     return false;
   }

   NormalizeHistorySegments(segments);

   const bool is_reverse =
       (segments->request_type() == Segments::REVERSE_CONVERSION);

   const bool is_prediction =
       (segments->request_type() == Segments::SUGGESTION ||
        segments->request_type() == Segments::PREDICTION);

   // In suggestion mode, ImmutableConverter will not accept multiple-segments.
   // The result always consists of one segment.
   if ((is_reverse || is_prediction) &&
       (segments->conversion_segments_size() != 1 ||
        segments->conversion_segment(0).segment_type() != Segment::FREE)) {
     LOG(WARNING) << "ImmutableConverter doesn't support constrained requests";
     return false;
   }

   // Make the conversion key.
   string conversion_key;
   const size_t history_segments_size = segments->history_segments_size();
   for (size_t i = history_segments_size; i < segments->segments_size(); ++i) {
     DCHECK(!segments->segment(i).key().empty());
     conversion_key.append(segments->segment(i).key());
   }
   const size_t max_char_len =
       is_reverse ? kMaxCharLengthForReverseConversion : kMaxCharLength;
   if (conversion_key.empty() || conversion_key.size() >= max_char_len) {
     LOG(WARNING) << "Conversion key is empty or too long: " << conversion_key;
     return false;
   }

   // Make the history key.
   string history_key;
   for (size_t i = 0; i < history_segments_size; ++i) {
     DCHECK(!segments->segment(i).key().empty());
     history_key.append(segments->segment(i).key());
   }
   // Check if the total length (length of history_key + conversion_key) doesn't
   // exceed the maximum key length. If it exceeds the limit, we simply clears
   // such useless history segments, which is acceptable because such cases
   // rarely happen in normal use cases.
   if (history_key.size() + conversion_key.size() >= max_char_len) {
     LOG(WARNING) << "Clear history segments due to the limit of key length.";
     segments->clear_history_segments();
     history_key.clear();
   }

   const string key = history_key + conversion_key;
   lattice->UpdateKey(key);
   lattice->ResetNodeCost();

   if (is_reverse) {
     // Reverse lookup for each prefix string in key is slow with current
     // implementation, so run it for them at once and cache the result.
     dictionary_->PopulateReverseLookupCache(key);
   }

   bool is_valid_lattice = true;
   // Perform the main part of lattice construction.
   if (!MakeLatticeNodesForHistorySegments(*segments, request, lattice) ||
       lattice->end_nodes(history_key.size()) == NULL) {
     is_valid_lattice = false;
   }

   // Can not apply key corrector to invalid lattice.
   if (is_valid_lattice) {
     MakeLatticeNodesForConversionSegments(
         *segments, request, history_key, lattice);
   }

   if (is_reverse) {
     // No reverse look up will happen afterwards.
     dictionary_->ClearReverseLookupCache();
   }

   // Predictive real time conversion
   if (is_prediction) {
     MakeLatticeNodesForPredictiveNodes(*segments, request, lattice);
   }

   if (!is_valid_lattice) {
     // Safely bail out, since reverse look up cache was released already.
     return false;
   }

   if (lattice->end_nodes(key.size()) == NULL) {
     LOG(WARNING) << "cannot build lattice from input";
     return false;
   }

   ApplyPrefixSuffixPenalty(conversion_key, lattice);

   // Re-segment personal-names, numbers ...etc
   if (segments->request_type() == Segments::CONVERSION) {
     Resegment(*segments, history_key, conversion_key, lattice);
   }

   return true;
 }

 bool ImmutableConverterImpl::MakeLatticeNodesForHistorySegments(
     const Segments &segments, const ConversionRequest &request,
     Lattice *lattice) const {
   const bool is_reverse =
      (segments.request_type() == Segments::REVERSE_CONVERSION);
   const size_t history_segments_size = segments.history_segments_size();
   const string &key = lattice->key();

   size_t segments_pos = 0;
   uint16 last_rid = 0;

   for (size_t s = 0; s < history_segments_size; ++s) {
     const Segment &segment = segments.segment(s);
     if (segment.segment_type() != Segment::HISTORY &&
         segment.segment_type() != Segment::SUBMITTED) {
       LOG(WARNING) << "inconsistent history";
       return false;
     }
     if (segment.key().empty()) {
       LOG(WARNING) << "invalid history: key is empty";
       return false;
     }
     const Segment::Candidate &candidate = segment.candidate(0);

     // Add a virtual nodes corresponding to HISTORY segments.
     Node *rnode = lattice->NewNode();
     CHECK(rnode);
     rnode->lid = candidate.lid;
     rnode->rid = candidate.rid;
     rnode->wcost = 0;
     rnode->value = candidate.value;
     rnode->key = segment.key();
     rnode->node_type = Node::HIS_NODE;
     rnode->bnext = NULL;
     lattice->Insert(segments_pos, rnode);

     // For the last history segment,  we also insert a new node having
     // EOS part-of-speech. Viterbi algorithm will find the
     // best path from rnode(context) and rnode2(EOS).
     if (s + 1 == history_segments_size && candidate.rid != 0) {
       Node *rnode2 = lattice->NewNode();
       CHECK(rnode2);
       rnode2->lid = candidate.lid;
       rnode2->rid = 0;   // 0 is BOS/EOS

       // This cost was originally set to 1500.
       // It turned out this penalty was so strong that it caused some
       // undesirable conversions like "の-なまえ" -> "の-な前" etc., so we
       // changed this to 0.
       // Reducing the cost promotes context-unaware conversions, and this may
       // have some unexpected side effects.
       // TODO(team): Figure out a better way to set the cost using
       // boundary.def-like approach.
       rnode2->wcost = 0;
       rnode2->value = candidate.value;
       rnode2->key = segment.key();
       rnode2->node_type = Node::HIS_NODE;
       rnode2->bnext = NULL;
       lattice->Insert(segments_pos, rnode2);
     }

     // Dictionary lookup for the candidates which are
     // overlapping between history and conversion.
     // Check only the last history segment at this moment.
     //
     // Example: history "おいかわ(及川)", conversion: "たくや"
     // Here, try to find "おいかわたくや(及川卓也)" from dictionary
     // and insert "卓也" as a new word node with a modified cost
     if (s + 1 == history_segments_size) {
       const bool is_prediction =
           (segments.request_type() == Segments::SUGGESTION ||
            segments.request_type() == Segments::PREDICTION);
       const Node *node = Lookup(segments_pos, key.size(), request,
                                 is_reverse, is_prediction, lattice);
       for (const Node *compound_node = node; compound_node != NULL;
            compound_node = compound_node->bnext) {
         // No overlapps
         if (compound_node->key.size() <= rnode->key.size() ||
             compound_node->value.size() <= rnode->value.size() ||
             !Util::StartsWith(compound_node->key, rnode->key) ||
             !Util::StartsWith(compound_node->value, rnode->value)) {
           // not a prefix
           continue;
         }

         // Must be in the same POS group.
         // http://b/issue?id=2977618
         if (pos_group_->GetPosGroup(candidate.lid)
             != pos_group_->GetPosGroup(compound_node->lid)) {
           continue;
         }

         // make new virtual node
         Node *new_node = lattice->NewNode();
         CHECK(new_node);

         // get the suffix part ("たくや/卓也")
         new_node->key.assign(compound_node->key, rnode->key.size(),
                              compound_node->key.size() - rnode->key.size());
         new_node->value.assign(
             compound_node->value, rnode->value.size(),
             compound_node->value.size() - rnode->value.size());

         // rid/lid are derived from the compound.
         // lid is just an approximation
         new_node->rid = compound_node->rid;
         new_node->lid = compound_node->lid;
         new_node->bnext = NULL;
         new_node->node_type = Node::NOR_NODE;
         new_node->attributes |= Segment::Candidate::CONTEXT_SENSITIVE;

         // New cost recalcuration:
         //
         // compound_node->wcost * (candidate len / compound_node len)
         // - trans(candidate.rid, new_node.lid)
         new_node->wcost =
             compound_node->wcost *
             candidate.value.size() / compound_node->value.size()
             - connector_->GetTransitionCost(candidate.rid, new_node->lid);

         VLOG(2) << " compound_node->lid=" << compound_node->lid
                 << " compound_node->rid=" << compound_node->rid
                 << " compound_node->wcost=" << compound_node->wcost;
         VLOG(2) << " last_rid=" << last_rid
                 << " candidate.lid=" << candidate.lid
                 << " candidate.rid=" << candidate.rid
                 << " candidate.cost=" << candidate.cost
                 << " candidate.wcost=" << candidate.wcost;
         VLOG(2) << " new_node->wcost=" << new_node->wcost;

         new_node->constrained_prev = rnode;

         // Added as new node
         lattice->Insert(segments_pos + rnode->key.size(), new_node);

         VLOG(2) << "Added: " << new_node->key << " " << new_node->value;
       }
     }

     // update segment pos
     segments_pos += rnode->key.size();
     last_rid = rnode->rid;
   }
   lattice->set_history_end_pos(segments_pos);
   return true;
 }

 void ImmutableConverterImpl::MakeLatticeNodesForConversionSegments(
     const Segments &segments, const ConversionRequest &request,
     const string &history_key, Lattice *lattice) const {
   const string &key = lattice->key();
   const bool is_conversion =
       (segments.request_type() == Segments::CONVERSION);
   // Do not use KeyCorrector if user changes the boundary.
   // http://b/issue?id=2804996
   scoped_ptr<KeyCorrector> key_corrector;
   if (is_conversion && !segments.resized()) {
     KeyCorrector::InputMode mode = KeyCorrector::ROMAN;
     if (GET_CONFIG(preedit_method) != config::Config::ROMAN) {
       mode = KeyCorrector::KANA;
     }
     key_corrector.reset(new KeyCorrector(key, mode, history_key.size()));
   }

   const bool is_reverse =
       (segments.request_type() == Segments::REVERSE_CONVERSION);
   const bool is_prediction =
       (segments.request_type() == Segments::SUGGESTION ||
        segments.request_type() == Segments::PREDICTION);
   for (size_t pos = history_key.size(); pos < key.size(); ++pos) {
     if (lattice->end_nodes(pos) != NULL) {
       Node *rnode =
           Lookup(pos, key.size(), request, is_reverse, is_prediction, lattice);
       // If history key is NOT empty and user input seems to starts with
       // a particle ("はにで..."), mark the node as STARTS_WITH_PARTICLE.
       // We change the segment boundary if STARTS_WITH_PARTICLE attribute
       // is assigned.
       if (!history_key.empty() && pos == history_key.size()) {
         for (Node *node = rnode; node != NULL; node = node->bnext) {
           if (pos_matcher_->IsAcceptableParticleAtBeginOfSegment(node->lid) &&
               node->lid == node->rid) {  // not a compound.
             node->attributes |= Node::STARTS_WITH_PARTICLE;
           }
         }
       }
       CHECK(rnode != NULL);
       lattice->Insert(pos, rnode);
       InsertCorrectedNodes(
           pos, key, request, key_corrector.get(), dictionary_, lattice);
     }
   }
 }

 void ImmutableConverterImpl::ApplyPrefixSuffixPenalty(
     const string &conversion_key,
     Lattice *lattice) const {
   const string &key = lattice->key();
   DCHECK_LE(conversion_key.size(), key.size());
   for (Node *node = lattice->begin_nodes(key.size() -
                                          conversion_key.size());
        node != NULL; node = node->bnext) {
     // TODO(taku):
     // We might be able to tweak the penalty according to
     // the size of history segments.
     // If history-segments is non-empty, we can make the
     // penalty smaller so that history context is more likely
     // selected.
     node->wcost += segmenter_->GetPrefixPenalty(node->lid);
   }

   for (Node *node = lattice->end_nodes(key.size());
        node != NULL; node = node->enext) {
     node->wcost += segmenter_->GetSuffixPenalty(node->rid);
   }
 }

 void ImmutableConverterImpl::Resegment(
     const Segments &segments,
     const string &history_key, const string &conversion_key,
     Lattice *lattice) const {
   for (size_t pos = history_key.size();
        pos < history_key.size() + conversion_key.size(); ++pos) {
     ApplyResegmentRules(pos, lattice);
   }

   // Enable constrained node.
   size_t segments_pos = 0;
   for (size_t s = 0; s < segments.segments_size(); ++s) {
     const Segment &segment = segments.segment(s);
     if (segment.segment_type() == Segment::FIXED_VALUE) {
       const Segment::Candidate &candidate = segment.candidate(0);
       Node *rnode = lattice->NewNode();
       CHECK(rnode);
       rnode->lid       = candidate.lid;
       rnode->rid       = candidate.rid;
       rnode->wcost     = kMinCost;
       rnode->value     = candidate.value;
       rnode->key       = segment.key();
       rnode->node_type = Node::CON_NODE;
       rnode->bnext     = NULL;
       lattice->Insert(segments_pos, rnode);
     }
     segments_pos += segment.key().size();
   }
 }

 // Single segment conversion results should be set to |segments|.
 void ImmutableConverterImpl::InsertFirstSegmentToCandidates(
     Segments *segments,
     const Lattice &lattice,
     const vector<uint16> &group,
     size_t max_candidates_size) const {
   const size_t only_first_segment_candidate_pos =
       segments->conversion_segment(0).candidates_size();
   InsertCandidates(segments, lattice, group,
                    max_candidates_size,
                    ONLY_FIRST_SEGMENT);
   // Note that inserted candidates might consume the entire key.
   // e.g. key: "なのは", value: "ナノは"
   // Erase them later.
   if (segments->conversion_segment(0).candidates_size() <=
       only_first_segment_candidate_pos) {
     return;
   }

   // Set new costs for only first segment candidates
   // Basically, only first segment candidates cost is smaller
   // than that of single segment conversion results.
   // For example, the cost of "私の" is smaller than "私の名前は".
   // To merge these two categories of results, we will add the
   // cost penalty based on the cost diff.
   const Segment &first_segment = segments->conversion_segment(0);
   const int base_cost_diff =
       max(0,
           (first_segment.candidate(0).cost -
            first_segment.candidate(only_first_segment_candidate_pos).cost));
   const int base_wcost_diff =
       max(0,
           (first_segment.candidate(0).wcost -
            first_segment.candidate(only_first_segment_candidate_pos).wcost));
   for (size_t i = only_first_segment_candidate_pos;
        i < first_segment.candidates_size();) {
     static const int kOnlyFirstSegmentOffset = 300;
     Segment::Candidate *candidate =
         segments->mutable_conversion_segment(0)->mutable_candidate(i);
     // If the size of candidate's key is greater than or
     // equal to 1st segment's key,
     // it means that the result consumes the entire key.
     // Such results are not appropriate for PARTIALLY_KEY_CONSUMED so erase it.
     if (candidate->key.size() >= first_segment.key().size()) {
       segments->mutable_conversion_segment(0)->erase_candidate(i);
       continue;
     }
     candidate->cost += (base_cost_diff + kOnlyFirstSegmentOffset);
     candidate->wcost += (base_wcost_diff + kOnlyFirstSegmentOffset);
     DCHECK(!(candidate->attributes &
              Segment::Candidate::PARTIALLY_KEY_CONSUMED));
     candidate->attributes |= Segment::Candidate::PARTIALLY_KEY_CONSUMED;
     candidate->consumed_key_size = Util::CharsLen(candidate->key);
     ++i;
   }
 }

 bool ImmutableConverterImpl::IsSegmentEndNode(
     const Segments &segments, const Node *node,
     const vector<uint16> &group, bool is_single_segment) const {
   DCHECK(node->next);
   if (node->next->node_type == Node::EOS_NODE) {
     return true;
   }

   // In reverse conversion, group consecutive white spaces into one segment.
   // For example, "ほん むりょう" -> "ほん", " ", "むりょう".
   if (segments.request_type() == Segments::REVERSE_CONVERSION) {
     const bool this_node_is_ws = ContainsWhiteSpacesOnly(node->key);
     const bool next_node_is_ws = ContainsWhiteSpacesOnly(node->next->key);
     if (this_node_is_ws) {
       return !next_node_is_ws;
     }
     if (next_node_is_ws) {
       return true;
     }
     // If this and next nodes are both non-white spaces, fall back to the
     // subsequent logic.
   }

   const Segment &old_segment = segments.segment(group[node->begin_pos]);
   // |node| and |node->next| should be in same segment due to FIXED_BOUNDAY
   // |node->next| is NOT a boundary. Very strong constraint.
   if (group[node->begin_pos] == group[node->next->begin_pos] &&
       old_segment.segment_type() == Segment::FIXED_BOUNDARY) {
     return false;
   }

   // |node->next| is a boundary. Very strong constraint.
   if (group[node->begin_pos] != group[node->next->begin_pos]) {
     return true;
   }

   // CON_NODE is generated for FIXED_VALUE candidate.
   if (node->node_type == Node::CON_NODE) {
     return true;
   }

   // Grammatically segmented.
   if (segmenter_->IsBoundary(*node, *node->next, is_single_segment)) {
     return true;
   }

   return false;
 }

 Segment *ImmutableConverterImpl::GetInsertTargetSegment(
     const Lattice &lattice,
     const vector<uint16> &group,
     InsertCandidatesType type,
     size_t begin_pos,
     const Node *node,
     Segments *segments) const {
   if (type != MULTI_SEGMENTS) {
     DCHECK(type == SINGLE_SEGMENT || type == ONLY_FIRST_SEGMENT);
     // Realtime conversion that produces only one segment.
     return segments->mutable_segment(segments->segments_size() - 1);
   }

   // 'Normal' conversion. Add new segment and initialize it.
   Segment *segment = segments->add_segment();
   DCHECK(segment);
   segment->clear_candidates();
   segment->set_key(
       lattice.key().substr(begin_pos, node->end_pos - begin_pos));
   const Segment &old_segment = segments->segment(group[node->begin_pos]);
   segment->set_segment_type(old_segment.segment_type());
   return segment;
 }

 void ImmutableConverterImpl::InsertCandidates(
     Segments *segments,
     const Lattice &lattice,
     const vector<uint16> &group,
     size_t max_candidates_size,
     InsertCandidatesType type) const {
   // skip HIS_NODE(s)
   Node *prev = lattice.bos_nodes();
   for (Node *node = lattice.bos_nodes()->next;
        node->next != NULL && node->node_type == Node::HIS_NODE;
        node = node->next) {
     prev = node;
   }

   const size_t expand_size =
       max(static_cast<size_t>(1),
           min(static_cast<size_t>(512), max_candidates_size));

   const bool is_single_segment = (type == SINGLE_SEGMENT);
   NBestGenerator nbest_generator(
       suppression_dictionary_, segmenter_, connector_, pos_matcher_,
       &lattice, suggestion_filter_);

   string original_key;
   for (size_t i = 0; i < segments->conversion_segments_size(); ++i) {
     original_key.append(segments->conversion_segment(i).key());
   }

   size_t begin_pos = string::npos;
   for (Node *node = prev->next; node->next != NULL; node = node->next) {
     if (begin_pos == string::npos) {
       begin_pos = node->begin_pos;
     }

     if (!IsSegmentEndNode(*segments, node, group, is_single_segment)) {
       continue;
     }

     Segment *segment = GetInsertTargetSegment(
         lattice, group, type, begin_pos, node, segments);
     CHECK(segment);

     NBestGenerator::BoundaryCheckMode mode = NBestGenerator::STRICT;
     if (type == SINGLE_SEGMENT) {
       // For realtime conversion.
       mode = NBestGenerator::ONLY_EDGE;
     } else if (segment->segment_type() == Segment::FIXED_BOUNDARY) {
       // Boundary is specified. Skip boundary check in nbest generator.
       mode = NBestGenerator::ONLY_MID;
     }
     nbest_generator.Reset(prev, node->next, mode);

     ExpandCandidates(original_key, &nbest_generator, segment,
                      segments->request_type(), expand_size);

     if (type == MULTI_SEGMENTS || type == SINGLE_SEGMENT) {
       InsertDummyCandidates(segment, expand_size);
     }

     if (node->node_type == Node::CON_NODE) {
       segment->set_segment_type(Segment::FIXED_VALUE);
     }

     if (type == ONLY_FIRST_SEGMENT) {
       break;
     }
     begin_pos = string::npos;
     prev = node;
   }
 }

 bool ImmutableConverterImpl::MakeSegments(const ConversionRequest &request,
                                           const Lattice &lattice,
                                           const vector<uint16> &group,
                                           Segments *segments) const {
   if (segments == NULL) {
     LOG(WARNING) << "Segments is NULL";
     return false;
   }

   const Segments::RequestType type = segments->request_type();
   const bool is_prediction = (type == Segments::PREDICTION ||
                               type == Segments::SUGGESTION ||
                               type == Segments::PARTIAL_PREDICTION ||
                               type == Segments::PARTIAL_SUGGESTION);

   if (is_prediction) {
     const size_t max_candidates_size =
         segments->max_prediction_candidates_size();

     if (request.create_partial_candidates()) {
       // TODO(toshiyuki): It may be better to change this value
       // according to the key length.
       static const size_t kOnlyFirstSegmentCandidateSize = 3;
       const size_t single_segment_candidates_size =
           ((max_candidates_size > kOnlyFirstSegmentCandidateSize) ?
            max_candidates_size - kOnlyFirstSegmentCandidateSize : 1);
       InsertCandidates(segments, lattice, group,
                        single_segment_candidates_size, SINGLE_SEGMENT);

       // Even if single_segment_candidates_size + kOnlyFirstSegmentCandidateSize
       // is greater than max_candidates_size, we cannot skip
       // InsertFirstSegmentToCandidates().
       // For example:
       //   the sum: 11
       //   max_candidates_size: 10
       //   current candidate size: 8
       // In this case, the sum > |max_candidates_size|, but we should not
       // skip calling InsertFirstSegmentToCandidates, as we want to add
       // two candidates.
       const size_t only_first_segment_candidates_size =
           min(max_candidates_size,
               single_segment_candidates_size + kOnlyFirstSegmentCandidateSize);
       InsertFirstSegmentToCandidates(
           segments, lattice, group, only_first_segment_candidates_size);
     } else {
       InsertCandidates(
           segments, lattice, group, max_candidates_size, SINGLE_SEGMENT);
     }
   } else {
     DCHECK(!request.create_partial_candidates());
     // Currently, we assume that REVERSE_CONVERSION only
     // requires 1 result.
     // TODO(taku): support to set the size on REVESER_CONVERSION mode.
     const size_t max_candidates_size =
         ((type == Segments::REVERSE_CONVERSION) ?
          1 : segments->max_conversion_candidates_size());

     // InsertCandidates inserts new segments after the existing
     // conversion segments. So we have to erase old conversion segments.
     // We have to keep old segments for calling InsertCandidates because
     // we need segment constraints like FIXED_BOUNDARY.
     // TODO(toshiyuki): We want more beautiful structure.
     const size_t old_conversion_segments_size =
         segments->conversion_segments_size();
     InsertCandidates(
         segments, lattice, group, max_candidates_size, MULTI_SEGMENTS);
     if (old_conversion_segments_size > 0) {
       segments->erase_segments(segments->history_segments_size(),
                                old_conversion_segments_size);
     }
   }
   return true;
 }

 void ImmutableConverterImpl::MakeGroup(
     const Segments &segments, vector<uint16> *group) const {
   group->clear();
   for (size_t i = 0; i < segments.segments_size(); ++i) {
     for (size_t j = 0; j < segments.segment(i).key().size(); ++j) {
       group->push_back(static_cast<uint16>(i));
     }
   }
   group->push_back(static_cast<uint16>(segments.segments_size()));
 }

 bool ImmutableConverterImpl::ConvertForRequest(
     const ConversionRequest &request, Segments *segments) const {
   const bool is_prediction =
       (segments->request_type() == Segments::PREDICTION ||
        segments->request_type() == Segments::SUGGESTION);

   Lattice *lattice = GetLattice(segments, is_prediction);

   if (!MakeLattice(request, segments, lattice)) {
     LOG(WARNING) << "could not make lattice";
     return false;
   }

   vector<uint16> group;
   MakeGroup(*segments, &group);

   if (is_prediction) {
     if (!PredictionViterbi(*segments, lattice)) {
       LOG(WARNING) << "prediction_viterbi failed";
       return false;
     }
   } else {
     if (!Viterbi(*segments, lattice)) {
       LOG(WARNING) << "viterbi failed";
       return false;
     }
   }

   VLOG(2) << lattice->DebugString();
   if (!MakeSegments(request, *lattice, group, segments)) {
     LOG(WARNING) << "make segments failed";
     return false;
   }

   return true;
 }

 }  // namespace mozc