src/dictionary/system/system_dictionary.cc - mozc - Git at Google

 // Copyright 2010-2014, Google Inc.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
 //
 //     * Redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above
 // copyright notice, this list of conditions and the following disclaimer
 // in the documentation and/or other materials provided with the
 // distribution.
 //     * Neither the name of Google Inc. nor the names of its
 // contributors may be used to endorse or promote products derived from
 // this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 // System dictionary maintains following sections
 //  (1) Key trie
 //       Trie containing encoded key. Returns ids for lookup.
 //       We can get the key using the id by performing reverse lookup
 //       against the trie.
 //  (2) Value trie
 //       Trie containing encoded value. Returns ids for lookup.
 //       We can get the value using the id by performing reverse lookup
 //       against the trie.
 //  (3) Token array
 //       Array containing encoded tokens. Array index is the id in key trie.
 //       Token contains cost, POS, the id in key trie, etc.
 //  (4) Table for high frequent POS(left/right ID)
 //       Frequenty appearing POSs are stored as POS ids in token info for
 //       reducing binary size. This table is the map from the id to the
 //       actual ids.

 #include "dictionary/system/system_dictionary.h"

 #include <algorithm>
 #include <cstring>
 #include <limits>
 #include <map>
 #include <string>
 #include <utility>
 #include <vector>

 #include "base/logging.h"
 #include "base/port.h"
 #include "base/string_piece.h"
 #include "base/system_util.h"
 #include "base/util.h"
 #include "converter/node_allocator.h"
 #include "dictionary/dictionary_token.h"
 #include "dictionary/file/dictionary_file.h"
 #include "dictionary/system/codec_interface.h"
 #include "dictionary/system/words_info.h"
 #include "storage/louds/bit_vector_based_array.h"
 #include "storage/louds/louds_trie.h"

 namespace mozc {
 namespace dictionary {

 using mozc::storage::louds::BitVectorBasedArray;
 using mozc::storage::louds::KeyExpansionTable;
 using mozc::storage::louds::LoudsTrie;

 namespace {

 // rbx_array default setting
 const int kMinRbxBlobSize = 4;
 const char *kReverseLookupCache = "reverse_lookup_cache";

 class ReverseLookupCache : public NodeAllocatorData::Data {
  public:
   multimap<int, SystemDictionary::ReverseLookupResult> results;
 };

 bool IsCacheAvailable(
     const set<int> &id_set,
     const multimap<int, SystemDictionary::ReverseLookupResult> &results) {
   for (set<int>::const_iterator itr = id_set.begin();
        itr != id_set.end();
        ++itr) {
     if (results.find(*itr) == results.end()) {
       return false;
     }
   }
   return true;
 }

 // Expansion table format:
 // "<Character to expand>[<Expanded character 1><Expanded character 2>...]"
 //
 // Only characters that will be encoded into 1-byte ASCII char are allowed in
 // the table.
 //
 // Note that this implementation has potential issue that the key/values may
 // be mixed.
 // TODO(hidehiko): Clean up this hacky implementation.
 const char *kHiraganaExpansionTable[] = {
   "\xe3\x81\x82\xe3\x81\x82\xe3\x81\x81",  // "ああぁ"
   "\xe3\x81\x84\xe3\x81\x84\xe3\x81\x83",  // "いいぃ"
   "\xe3\x81\x86\xe3\x81\x86\xe3\x81\x85\xe3\x82\x94",  // "ううぅゔ"
   "\xe3\x81\x88\xe3\x81\x88\xe3\x81\x87",  // "ええぇ"
   "\xe3\x81\x8a\xe3\x81\x8a\xe3\x81\x89",  // "おおぉ"
   "\xe3\x81\x8b\xe3\x81\x8b\xe3\x81\x8c",  // "かかが"
   "\xe3\x81\x8d\xe3\x81\x8d\xe3\x81\x8e",  // "ききぎ"
   "\xe3\x81\x8f\xe3\x81\x8f\xe3\x81\x90",  // "くくぐ"
   "\xe3\x81\x91\xe3\x81\x91\xe3\x81\x92",  // "けけげ"
   "\xe3\x81\x93\xe3\x81\x93\xe3\x81\x94",  // "ここご"
   "\xe3\x81\x95\xe3\x81\x95\xe3\x81\x96",  // "ささざ"
   "\xe3\x81\x97\xe3\x81\x97\xe3\x81\x98",  // "ししじ"
   "\xe3\x81\x99\xe3\x81\x99\xe3\x81\x9a",  // "すすず"
   "\xe3\x81\x9b\xe3\x81\x9b\xe3\x81\x9c",  // "せせぜ"
   "\xe3\x81\x9d\xe3\x81\x9d\xe3\x81\x9e",  // "そそぞ"
   "\xe3\x81\x9f\xe3\x81\x9f\xe3\x81\xa0",  // "たただ"
   "\xe3\x81\xa1\xe3\x81\xa1\xe3\x81\xa2",  // "ちちぢ"
   "\xe3\x81\xa4\xe3\x81\xa4\xe3\x81\xa3\xe3\x81\xa5",  // "つつっづ"
   "\xe3\x81\xa6\xe3\x81\xa6\xe3\x81\xa7",  // "ててで"
   "\xe3\x81\xa8\xe3\x81\xa8\xe3\x81\xa9",  // "ととど"
   "\xe3\x81\xaf\xe3\x81\xaf\xe3\x81\xb0\xe3\x81\xb1",  // "ははばぱ"
   "\xe3\x81\xb2\xe3\x81\xb2\xe3\x81\xb3\xe3\x81\xb4",  // "ひひびぴ"
   "\xe3\x81\xb5\xe3\x81\xb5\xe3\x81\xb6\xe3\x81\xb7",  // "ふふぶぷ"
   "\xe3\x81\xb8\xe3\x81\xb8\xe3\x81\xb9\xe3\x81\xba",  // "へへべぺ"
   "\xe3\x81\xbb\xe3\x81\xbb\xe3\x81\xbc\xe3\x81\xbd",  // "ほほぼぽ"
   "\xe3\x82\x84\xe3\x82\x84\xe3\x82\x83",  // "ややゃ"
   "\xe3\x82\x86\xe3\x82\x86\xe3\x82\x85",  // "ゆゆゅ"
   "\xe3\x82\x88\xe3\x82\x88\xe3\x82\x87",  // "よよょ"
   "\xe3\x82\x8f\xe3\x82\x8f\xe3\x82\x8e",  // "わわゎ"
 };

 const uint32 kAsciiRange = 0x80;

 // Confirm that all the characters are within ASCII range.
 bool ContainsAsciiCodeOnly(const string &str) {
   for (string::const_iterator it = str.begin(); it != str.end(); ++it) {
     if (static_cast<unsigned char>(*it) >= kAsciiRange) {
       return false;
     }
   }
   return true;
 }

 void SetKeyExpansion(char key, const string &expansion,
                      KeyExpansionTable *key_expansion_table) {
   key_expansion_table->Add(key, expansion);
 }

 void BuildHiraganaExpansionTable(
     const SystemDictionaryCodecInterface &codec,
     KeyExpansionTable *encoded_table) {
   for (size_t index = 0; index < arraysize(kHiraganaExpansionTable); ++index) {
     string encoded;
     codec.EncodeKey(kHiraganaExpansionTable[index], &encoded);
     DCHECK(ContainsAsciiCodeOnly(encoded)) <<
         "Encoded expansion data are supposed to fit within ASCII";

     DCHECK_GT(encoded.size(), 0) << "Expansion data is empty";

     if (encoded.size() == 1) {
       continue;
     } else {
       SetKeyExpansion(encoded[0], encoded.substr(1), encoded_table);
     }
   }
 }

 // Note that this class is just introduced due to performance reason.
 // Conceptually, it should be in somewhere close to the codec implementation
 // (see comments in Next method for details).
 // However, it is necessary to refactor a bit larger area, especially around
 // codec implementations, to make it happen.
 // Considering the merit to introduce this class, we temporarily put it here.
 // TODO(hidehiko): Move this class into a Codec related file.
 class TokenDecodeIterator {
  public:
   TokenDecodeIterator(
       const SystemDictionaryCodecInterface *codec,
       const LoudsTrie *value_trie,
       const uint32 *frequent_pos,
       StringPiece key,
       const uint8 *ptr)
       : codec_(codec),
         value_trie_(value_trie),
         frequent_pos_(frequent_pos),
         key_(key),
         state_(HAS_NEXT),
         ptr_(ptr),
         token_info_(NULL) {
     key.CopyToString(&token_.key);
     NextInternal();
   }

   ~TokenDecodeIterator() {
   }

   const TokenInfo& Get() const { return token_info_; }
   bool Done() const { return state_ == DONE; }

   void Next() {
     DCHECK_NE(state_, DONE);
     if (state_ == LAST_TOKEN) {
       state_ = DONE;
       return;
     }
     NextInternal();
   }

  private:
   enum State {
     HAS_NEXT,
     LAST_TOKEN,
     DONE,
   };

   void NextInternal() {
     // Reset token_info with preserving some needed info in previous token.
     int prev_id_in_value_trie = token_info_.id_in_value_trie;
     token_info_.Clear();
     token_info_.token = &token_;

     // Do not clear key in token.
     token_info_.token->attributes = Token::NONE;

     // This implementation is depending on the internal behavior of DecodeToken
     // especially which fields are updated or not. Important fields are:
     // Token::key, Token::value : key and value are never updated.
     // Token::cost : always updated.
     // Token::lid, Token::rid : updated iff the pos_type is neither
     //   FREQUENT_POS nor SAME_AS_PREV_POS.
     // Token::attributes : updated iff the value is SPELLING_COLLECTION.
     // TokenInfo::id_in_value_trie : updated iff the value_type is
     //   DEFAULT_VALUE.
     // Thus, by not-reseting Token instance intentionally, we can skip most
     //   SAME_AS_PREV operations.
     // The exception is Token::attributes. It is not-always set, so we need
     // reset it everytime.
     // This kind of structure should be packed in the codec or some
     // related but new class.
     int read_bytes;
     if (!codec_->DecodeToken(ptr_, &token_info_, &read_bytes)) {
       state_ = LAST_TOKEN;
     }
     ptr_ += read_bytes;

     // Fill remaining values.
     switch (token_info_.value_type) {
       case TokenInfo::DEFAULT_VALUE: {
         token_.value.clear();
         LookupValue(token_info_.id_in_value_trie, &token_.value);
         break;
       }
       case TokenInfo::SAME_AS_PREV_VALUE: {
         DCHECK_NE(prev_id_in_value_trie, -1);
         token_info_.id_in_value_trie = prev_id_in_value_trie;
         // We can keep the current value here.
         break;
       }
       case TokenInfo::AS_IS_HIRAGANA: {
         token_.value = token_.key;
         break;
       }
       case TokenInfo::AS_IS_KATAKANA: {
         if (!key_.empty() && key_katakana_.empty()) {
           Util::HiraganaToKatakana(key_, &key_katakana_);
         }
         token_.value = key_katakana_;
         break;
       }
       default: {
         LOG(DFATAL) << "unknown value_type: " << token_info_.value_type;
         break;
       }
     }

     if (token_info_.accent_encoding_type == TokenInfo::EMBEDDED_IN_TOKEN) {
       token_.value += "_" + Util::StringPrintf("%d", token_info_.accent_type);
     }

     if (token_info_.pos_type == TokenInfo::FREQUENT_POS) {
       const uint32 pos = frequent_pos_[token_info_.id_in_frequent_pos_map];
       token_.lid = pos >> 16;
       token_.rid = pos & 0xffff;
     }
   }

   void LookupValue(int id, string *value) const {
     char buffer[LoudsTrie::kMaxDepth + 1];
     const char *encoded_value = value_trie_->Reverse(id, buffer);
     const size_t encoded_value_len =
         LoudsTrie::kMaxDepth - (encoded_value - buffer);
     DCHECK_EQ(encoded_value_len, strlen(encoded_value));
     codec_->DecodeValue(StringPiece(encoded_value, encoded_value_len), value);
   }

   const SystemDictionaryCodecInterface *codec_;
   const LoudsTrie *value_trie_;
   const uint32 *frequent_pos_;

   const StringPiece key_;
   // Katakana key will be lazily initialized.
   string key_katakana_;

   State state_;
   const uint8 *ptr_;

   TokenInfo token_info_;
   Token token_;

   DISALLOW_COPY_AND_ASSIGN(TokenDecodeIterator);
 };

 // Iterator for scanning token array.
 // This iterator does not return actual token info but returns
 // id data and the position only.
 // This will be used only for reverse lookup.
 // Forward lookup does not need such iterator because it can access
 // a token directly without linear scan.
 //
 //  Usage:
 //    for (TokenScanIterator iter(codec_, token_array_.get());
 //         !iter.Done(); iter.Next()) {
 //      const TokenScanIterator::Result &result = iter.Get();
 //      // Do something with |result|.
 //    }
 class TokenScanIterator {
  public:
   struct Result {
     // Value id for the current token
     int value_id;
     // Index (= key id) for the current token
     int index;
     // Offset from the tokens section beginning.
     // (token_array_->Get(id_in_key_trie) ==
     //  token_array_->Get(0) + tokens_offset)
     int tokens_offset;
   };

   TokenScanIterator(
       const SystemDictionaryCodecInterface *codec,
       const storage::louds::BitVectorBasedArray *token_array)
       : codec_(codec),
         termination_flag_(codec->GetTokensTerminationFlag()),
         state_(HAS_NEXT),
         offset_(0),
         tokens_offset_(0),
         index_(0) {
     size_t dummy_length = 0;
     encoded_tokens_ptr_ =
         reinterpret_cast<const uint8*>(token_array->Get(0, &dummy_length));
     NextInternal();
   }

   ~TokenScanIterator() {
   }

   const Result &Get() const { return result_; }

   bool Done() const { return state_ == DONE; }

   void Next() {
     DCHECK_NE(state_, DONE);
     NextInternal();
   }

  private:
   enum State {
     HAS_NEXT,
     DONE,
   };

   void NextInternal() {
     if (encoded_tokens_ptr_[offset_] == termination_flag_) {
       state_ = DONE;
       return;
     }
     int read_bytes;
     result_.value_id = -1;
     result_.index = index_;
     result_.tokens_offset = tokens_offset_;
     const bool is_last_token =
         !(codec_->ReadTokenForReverseLookup(encoded_tokens_ptr_ + offset_,
                                             &result_.value_id, &read_bytes));
     if (is_last_token) {
       int tokens_size = offset_ + read_bytes - tokens_offset_;
       if (tokens_size < kMinRbxBlobSize) {
         tokens_size = kMinRbxBlobSize;
       }
       tokens_offset_ += tokens_size;
       ++index_;
       offset_ = tokens_offset_;
     } else {
       offset_ += read_bytes;
     }
   }

   const SystemDictionaryCodecInterface *codec_;
   const uint8 *encoded_tokens_ptr_;
   const uint8 termination_flag_;
   State state_;
   Result result_;
   int offset_;
   int tokens_offset_;
   int index_;

  private:
   DISALLOW_COPY_AND_ASSIGN(TokenScanIterator);
 };

 }  // namespace

 class ReverseLookupIndex {
  public:
   ReverseLookupIndex(
       const SystemDictionaryCodecInterface *codec,
       const storage::louds::BitVectorBasedArray *token_array) {
     // Gets id size.
     int value_id_max = -1;
     for (TokenScanIterator iter(codec, token_array);
          !iter.Done(); iter.Next()) {
       const TokenScanIterator::Result &result = iter.Get();
       value_id_max = max(value_id_max, result.value_id);
     }

     CHECK_GE(value_id_max, 0);
     index_size_ = value_id_max + 1;
     index_.reset(new ReverseLookupResultArray[index_size_]);

     // Gets result size for each ids.
     for (TokenScanIterator iter(codec, token_array);
          !iter.Done(); iter.Next()) {
       const TokenScanIterator::Result &result = iter.Get();
       if (result.value_id != -1) {
         DCHECK_LT(result.value_id, index_size_);
         ++(index_[result.value_id].size);
       }
     }

     for (size_t i = 0; i < index_size_; ++i) {
       index_[i].results.reset(
           new SystemDictionary::ReverseLookupResult[index_[i].size]);
     }

     // Builds index.
     for (TokenScanIterator iter(codec, token_array);
          !iter.Done(); iter.Next()) {
       const TokenScanIterator::Result &result = iter.Get();
       if (result.value_id == -1) {
         continue;
       }

       DCHECK_LT(result.value_id, index_size_);
       ReverseLookupResultArray *result_array = &index_[result.value_id];

       // Finds uninitialized result.
       size_t result_index = 0;
       for (result_index = 0;
            result_index < result_array->size;
            ++result_index) {
         const SystemDictionary::ReverseLookupResult &lookup_result =
             result_array->results[result_index];
         if (lookup_result.tokens_offset == -1 &&
             lookup_result.id_in_key_trie == -1) {
           result_array->results[result_index].tokens_offset =
               result.tokens_offset;
           result_array->results[result_index].id_in_key_trie = result.index;
           break;
         }
       }
     }

     CHECK(index_.get() != NULL);
   }

   ~ReverseLookupIndex() {}

   void FillResultMap(
       const set<int> &id_set,
       multimap<int, SystemDictionary::ReverseLookupResult> *result_map) {
     for (set<int>::const_iterator id_itr  = id_set.begin();
          id_itr != id_set.end(); ++id_itr) {
       const ReverseLookupResultArray &result_array = index_[*id_itr];
       for (size_t i = 0; i < result_array.size; ++i) {
         result_map->insert(make_pair(*id_itr, result_array.results[i]));
       }
     }
   }

  private:
   struct ReverseLookupResultArray {
     ReverseLookupResultArray() : size(0) {}
     // Use scoped_ptr for reducing memory consumption as possible.
     // Using vector requires 90 MB even when we call resize explicitly.
     // On the other hand, scoped_ptr requires 57 MB.
     scoped_ptr<SystemDictionary::ReverseLookupResult[]> results;
     size_t size;
   };

   // Use scoped array for reducing memory consumption as possible.
   scoped_ptr<ReverseLookupResultArray[]> index_;
   size_t index_size_;

   DISALLOW_COPY_AND_ASSIGN(ReverseLookupIndex);
 };

 SystemDictionary::Builder::Builder(const string &filename)
     : type_(FILENAME), filename_(filename),
       ptr_(NULL), len_(-1), options_(NONE), codec_(NULL)  {}

 SystemDictionary::Builder::Builder(const char *ptr, int len)
     : type_(IMAGE), filename_(""),
       ptr_(ptr), len_(len), options_(NONE), codec_(NULL) {}

 SystemDictionary::Builder::~Builder() {}

 void SystemDictionary::Builder::SetOptions(Options options) {
   options_ = options;
 }

 // This does not have the ownership of |codec|
 void SystemDictionary::Builder::SetCodec(
     const SystemDictionaryCodecInterface *codec) {
   codec_ = codec;
 }

 SystemDictionary *SystemDictionary::Builder::Build() {
   if (codec_ == NULL) {
     codec_ = SystemDictionaryCodecFactory::GetCodec();
   }

   scoped_ptr<SystemDictionary> instance(new SystemDictionary(codec_));

   switch (type_) {
     case FILENAME:
       if (!instance->dictionary_file_->OpenFromFile(filename_)) {
         LOG(ERROR) << "Failed to open system dictionary file";
         return NULL;
       }
       break;
     case IMAGE:
       // Make the dictionary not to be paged out.
       // We don't check the return value because the process doesn't necessarily
       // has the priviledge to mlock.
       // Note that we don't munlock the space because it's always better to keep
       // the singleton system dictionary paged in as long as the process runs.
       SystemUtil::MaybeMLock(ptr_, len_);
       if (!instance->dictionary_file_->OpenFromImage(ptr_, len_)) {
         LOG(ERROR) << "Failed to open system dictionary file";
         return NULL;
       }
       break;
     default:
       LOG(ERROR) << "Invalid input type.";
       return NULL;
   }

   if (!instance->OpenDictionaryFile(
           (options_ & ENABLE_REVERSE_LOOKUP_INDEX) != 0)) {
     LOG(ERROR) << "Failed to create system dictionary";
     return NULL;
   }

   return instance.release();
 }

 SystemDictionary::SystemDictionary(const SystemDictionaryCodecInterface *codec)
     : key_trie_(new LoudsTrie),
       value_trie_(new LoudsTrie),
       token_array_(new BitVectorBasedArray),
       dictionary_file_(new DictionaryFile),
       frequent_pos_(NULL),
       codec_(codec) {}

 SystemDictionary::~SystemDictionary() {}

 // static
 SystemDictionary *SystemDictionary::CreateSystemDictionaryFromFileWithOptions(
     const string &filename, Options options) {
   Builder builder(filename);
   builder.SetOptions(options);
   return builder.Build();
 }

 // static
 SystemDictionary *SystemDictionary::CreateSystemDictionaryFromFile(
     const string &filename) {
   return CreateSystemDictionaryFromFileWithOptions(filename, NONE);
 }

 // static
 SystemDictionary *SystemDictionary::CreateSystemDictionaryFromImageWithOptions(
     const char *ptr, int len, Options options) {
   Builder builder(ptr, len);
   builder.SetOptions(options);
   return builder.Build();
 }

 // static
 SystemDictionary *SystemDictionary::CreateSystemDictionaryFromImage(
     const char *ptr, int len) {
   return CreateSystemDictionaryFromImageWithOptions(ptr, len, NONE);
 }

 bool SystemDictionary::OpenDictionaryFile(bool enable_reverse_lookup_index) {
   int len;

   const uint8 *key_image = reinterpret_cast<const uint8 *>(
       dictionary_file_->GetSection(codec_->GetSectionNameForKey(), &len));
   if (!key_trie_->Open(key_image)) {
     LOG(ERROR) << "cannot open key trie";
     return false;
   }

   BuildHiraganaExpansionTable(*codec_, &hiragana_expansion_table_);

   const uint8 *value_image = reinterpret_cast<const uint8 *>(
       dictionary_file_->GetSection(codec_->GetSectionNameForValue(), &len));
   if (!value_trie_->Open(value_image)) {
     LOG(ERROR) << "can not open value trie";
     return false;
   }

   const unsigned char *token_image = reinterpret_cast<const unsigned char *>(
       dictionary_file_->GetSection(codec_->GetSectionNameForTokens(), &len));
   token_array_->Open(token_image);

   frequent_pos_ = reinterpret_cast<const uint32*>(
       dictionary_file_->GetSection(codec_->GetSectionNameForPos(), &len));
   if (frequent_pos_ == NULL) {
     LOG(ERROR) << "can not find frequent pos section";
     return false;
   }

   if (enable_reverse_lookup_index) {
     InitReverseLookupIndex();
   }

   return true;
 }

 void SystemDictionary::InitReverseLookupIndex() {
   if (reverse_lookup_index_.get() != NULL) {
     return;
   }

   reverse_lookup_index_.reset(
       new ReverseLookupIndex(codec_, token_array_.get()));
 }

 bool SystemDictionary::HasKey(StringPiece key) const {
   string encoded_key;
   codec_->EncodeKey(key, &encoded_key);
   return (key_trie_->ExactSearch(encoded_key) != -1);
 }

 bool SystemDictionary::HasValue(StringPiece value) const {
   string encoded_value;
   codec_->EncodeValue(value, &encoded_value);
   if (value_trie_->ExactSearch(encoded_value) != -1) {
     return true;
   }

   // Because Hiragana, Katakana and Alphabet words are not stored in the
   // value_trie for the data compression.  They are only stored in the
   // key_trie with flags.  So we also need to check the existence in
   // the key_trie.

   // Normalize the value as the key.  This process depends on the
   // implementation of SystemDictionaryBuilder::BuildValueTrie.
   string key;
   Util::KatakanaToHiragana(value, &key);

   string encoded_key;
   codec_->EncodeKey(key, &encoded_key);
   const int key_id = key_trie_->ExactSearch(encoded_key);
   if (key_id == -1) {
     return false;
   }

   // We need to check the contents of value_trie for Katakana values.
   // If (key, value) = (かな, カナ) is in the dictionary, "カナ" is
   // not used as a key for value_trie or key_trie.  Only "かな" is
   // used as a key for key_trie.  If we accept this limitation, we can
   // skip the following code.
   //
   // If we add "if (key == value) return true;" here, we can check
   // almost all cases of Hiragana and Alphabet words without the
   // following iteration.  However, when (mozc, MOZC) is stored but
   // (mozc, mozc) is NOT stored, HasValue("mozc") wrongly returns
   // true.

   // Get the block of tokens for this key.
   size_t length = 0;  // unused
   const uint8 *encoded_tokens_ptr = reinterpret_cast<const uint8*>(
       token_array_->Get(key_id, &length));

   // Check tokens.
   for (TokenDecodeIterator iter(
            codec_, value_trie_.get(), frequent_pos_, key, encoded_tokens_ptr);
        !iter.Done(); iter.Next()) {
     const Token *token = iter.Get().token;
     if (value == token->value) {
       return true;
     }
   }
   return false;
 }

 namespace {

 // Converts a value of SystemDictionary::Callback::ResultType to the
 // corresponding value of LoudsTrie::Callback::ResultType.
 inline LoudsTrie::Callback::ResultType ConvertResultType(
     const SystemDictionary::Callback::ResultType result) {
   switch (result) {
     case SystemDictionary::Callback::TRAVERSE_DONE: {
       return LoudsTrie::Callback::SEARCH_DONE;
     }
     case SystemDictionary::Callback::TRAVERSE_NEXT_KEY: {
       return LoudsTrie::Callback::SEARCH_CONTINUE;
     }
     case SystemDictionary::Callback::TRAVERSE_CULL: {
       return LoudsTrie::Callback::SEARCH_CULL;
     }
     default: {
       DLOG(FATAL) << "Enum value " << result << " cannot be converted";
       return LoudsTrie::Callback::SEARCH_DONE;  // dummy
     }
   }
 }

 // Collects short keys preferentially.
 class ShortKeyCollector : public LoudsTrie::Callback {
  public:
   // Holds a lookup result from trie.
   struct Entry {
     string encoded_key;  // Encoded lookup key
     string encoded_actual_key;  // Encoded actual key in trie (expanded key)
     size_t actual_key_len;  // Decoded actual key length
     int key_id;  // Key ID in trie
   };

   ShortKeyCollector(const SystemDictionaryCodecInterface *codec,
                     StringPiece original_encoded_key,
                     size_t min_key_len,
                     size_t limit)
       : codec_(codec),
         original_encoded_key_(original_encoded_key),
         min_key_len_(min_key_len),
         limit_(limit),
         current_max_key_len_(0),
         num_max_key_length_entries_(0) {
     entry_list_.reserve(limit);
   }

   virtual ResultType Run(const char *trie_key,
                          size_t trie_key_len, int key_id) {
     const StringPiece encoded_actual_key(trie_key, trie_key_len);

     // First calculate the length of decoded key.
     // Note: In the current kana modifier insensitive lookup mechanism, the
     // lengths of the original lookup key and its expanded key are equal, so we
     // can omit the construction of lookup key by calculating the length of
     // decoded actual key. Just DCHECK here.
     const size_t key_len = codec_->GetDecodedKeyLength(encoded_actual_key);
     DCHECK_EQ(key_len, codec_->GetDecodedKeyLength(
         original_encoded_key_.as_string() +
         string(trie_key + original_encoded_key_.size(),
                trie_key_len - original_encoded_key_.size())));
     // Uninterested in too short key.
     if (key_len < min_key_len_) {
       return SEARCH_CONTINUE;
     }

     // Check the key length after decoding and update the internal state. As
     // explained above, the length of actual key (expanded key) is equal to that
     // of key.
     const size_t actual_key_len = key_len;
     if (actual_key_len > current_max_key_len_) {
       if (entry_list_.size() > limit_) {
         return SEARCH_CULL;
       }
       current_max_key_len_ = actual_key_len;
       num_max_key_length_entries_ = 1;
     } else if (actual_key_len == current_max_key_len_) {
       ++num_max_key_length_entries_;
     } else {
       if (entry_list_.size() - num_max_key_length_entries_ + 1 >= limit_) {
         RemoveAllMaxKeyLengthEntries();
         UpdateMaxKeyLengthInternal();
       }
     }

     // Keep this entry at the back.
     entry_list_.push_back(Entry());
     entry_list_.back().encoded_key.reserve(trie_key_len);
     original_encoded_key_.CopyToString(&entry_list_.back().encoded_key);
     entry_list_.back().encoded_key.append(
         trie_key + original_encoded_key_.size(),
         trie_key_len - original_encoded_key_.size());
     entry_list_.back().encoded_actual_key.assign(trie_key, trie_key_len);
     entry_list_.back().actual_key_len = actual_key_len;
     entry_list_.back().key_id = key_id;

     return SEARCH_CONTINUE;
   }

   const vector<Entry> &entry_list() const {
     return entry_list_;
   }

  private:
   void RemoveAllMaxKeyLengthEntries() {
     for (size_t i = 0; i < entry_list_.size(); ) {
       Entry *result = &entry_list_[i];
       if (result->actual_key_len < current_max_key_len_) {
         // This result should still be kept.
         ++i;
         continue;
       }

       // Remove the i-th result by swapping it for the last one.
       Entry *last_result = &entry_list_.back();
       swap(result->encoded_key, last_result->encoded_key);
       swap(result->encoded_actual_key, last_result->encoded_actual_key);
       result->actual_key_len = last_result->actual_key_len;
       result->key_id = last_result->key_id;
       entry_list_.pop_back();

       // Do not update the |i| here.
     }
   }

   void UpdateMaxKeyLengthInternal() {
     current_max_key_len_ = 0;
     num_max_key_length_entries_ = 0;
     for (size_t i = 0; i < entry_list_.size(); ++i) {
       if (entry_list_[i].actual_key_len > current_max_key_len_) {
         current_max_key_len_ = entry_list_[i].actual_key_len;
         num_max_key_length_entries_ = 1;
       } else if (entry_list_[i].actual_key_len == current_max_key_len_) {
         ++num_max_key_length_entries_;
       }
     }
   }

   const SystemDictionaryCodecInterface *codec_;
   const StringPiece original_encoded_key_;

   // Filter conditions.
   const size_t min_key_len_;
   const size_t limit_;

   // Internal state for tracking current maximum key length.
   size_t current_max_key_len_;
   size_t num_max_key_length_entries_;

   // Contains lookup results.
   vector<Entry> entry_list_;

   DISALLOW_COPY_AND_ASSIGN(ShortKeyCollector);
 };

 }  // namespace

 void SystemDictionary::LookupPredictive(
     StringPiece key, bool use_kana_modifier_insensitive_lookup,
     Callback *callback) const {
   // Do nothing for empty key, although looking up all the entries with empty
   // string seems natural.
   if (key.empty()) {
     return;
   }

   string lookup_key_str;
   codec_->EncodeKey(key, &lookup_key_str);
   if (lookup_key_str.size() > LoudsTrie::kMaxDepth) {
     return;
   }

   // First, collect up to 64 keys so that results are as short as possible,
   // which emulates BFS over trie.
   ShortKeyCollector collector(codec_, lookup_key_str, 0, 64);
   const KeyExpansionTable &table =
       use_kana_modifier_insensitive_lookup ?
       hiragana_expansion_table_ : KeyExpansionTable::GetDefaultInstance();
   key_trie_->PredictiveSearchWithKeyExpansion(lookup_key_str.c_str(), table,
                                               &collector);

   string decoded_key, actual_key;
   for (size_t i = 0; i < collector.entry_list().size(); ++i) {
     const ShortKeyCollector::Entry &entry = collector.entry_list()[i];

     decoded_key.clear();
     codec_->DecodeKey(entry.encoded_key, &decoded_key);
     switch (callback->OnKey(decoded_key)) {
       case DictionaryInterface::Callback::TRAVERSE_DONE:
         return;
       case DictionaryInterface::Callback::TRAVERSE_NEXT_KEY:
         continue;
       case DictionaryInterface::Callback::TRAVERSE_CULL:
         LOG(FATAL) << "Culling is not implemented.";
         continue;
       default:
         break;
     }

     actual_key.clear();
     codec_->DecodeKey(entry.encoded_actual_key, &actual_key);
     const bool is_expanded = entry.encoded_key != entry.encoded_actual_key;
     switch (callback->OnActualKey(decoded_key, actual_key, is_expanded)) {
       case DictionaryInterface::Callback::TRAVERSE_DONE:
         return;
       case DictionaryInterface::Callback::TRAVERSE_NEXT_KEY:
         continue;
       case DictionaryInterface::Callback::TRAVERSE_CULL:
         LOG(FATAL) << "Culling is not implemented.";
         continue;
       default:
         break;
     }

     size_t dummy_length = 0;
     const uint8 *encoded_tokens_ptr = reinterpret_cast<const uint8*>(
         token_array_->Get(entry.key_id, &dummy_length));
     for (TokenDecodeIterator iter(codec_, value_trie_.get(),
                                   frequent_pos_, actual_key,
                                   encoded_tokens_ptr);
          !iter.Done(); iter.Next()) {
       const TokenInfo &token_info = iter.Get();
       const DictionaryInterface::Callback::ResultType result =
           callback->OnToken(decoded_key, actual_key, *token_info.token);
       if (result == DictionaryInterface::Callback::TRAVERSE_DONE) {
         return;
       }
       if (result == DictionaryInterface::Callback::TRAVERSE_NEXT_KEY) {
         break;
       }
       DCHECK_NE(DictionaryInterface::Callback::TRAVERSE_CULL, result)
           << "Culling is not implemented.";
     }
   }
 }

 namespace {

 // A general purpose traverser for prefix search over the system dictionary.
 class PrefixTraverser : public LoudsTrie::Callback {
  public:
   PrefixTraverser(const BitVectorBasedArray *token_array,
                   const LoudsTrie *value_trie,
                   const SystemDictionaryCodecInterface *codec,
                   const uint32 *frequent_pos,
                   StringPiece original_encoded_key,
                   SystemDictionary::Callback *callback)
       : token_array_(token_array),
         value_trie_(value_trie),
         codec_(codec),
         frequent_pos_(frequent_pos),
         original_encoded_key_(original_encoded_key),
         callback_(callback) {
   }

   virtual ResultType Run(const char *trie_key,
                          size_t trie_key_len, int key_id) {
     string key, actual_key;
     SystemDictionary::Callback::ResultType result = RunOnKeyAndOnActualKey(
         trie_key, trie_key_len, key_id, &key, &actual_key);
     if (result != SystemDictionary::Callback::TRAVERSE_CONTINUE) {
       return ConvertResultType(result);
     }

     // Decode tokens and call back OnToken() for each token.
     size_t dummy_length = 0;
     const uint8 *encoded_tokens_ptr = reinterpret_cast<const uint8*>(
         token_array_->Get(key_id, &dummy_length));
     for (TokenDecodeIterator iter(
              codec_, value_trie_, frequent_pos_,
              actual_key, encoded_tokens_ptr);
          !iter.Done(); iter.Next()) {
       const TokenInfo &token_info = iter.Get();
       result = callback_->OnToken(key, actual_key, *token_info.token);
       if (result != SystemDictionary::Callback::TRAVERSE_CONTINUE) {
         return ConvertResultType(result);
       }
     }
     return SEARCH_CONTINUE;
   }

  protected:
   SystemDictionary::Callback::ResultType RunOnKeyAndOnActualKey(
       const char *trie_key, size_t trie_key_len, int key_id,
       string *key, string *actual_key) {
     // Decode key and call back OnKey().
     const StringPiece encoded_key =
         original_encoded_key_.substr(0, trie_key_len);
     codec_->DecodeKey(encoded_key, key);
     SystemDictionary::Callback::ResultType result = callback_->OnKey(*key);
     if (result != SystemDictionary::Callback::TRAVERSE_CONTINUE) {
       return result;
     }

     // Decode actual key (expanded key) and call back OnActualKey().  To check
     // if the actual key is expanded, compare the keys in encoded domain for
     // performance (this is guaranteed as codec is a bijection).
     const StringPiece encoded_actual_key(trie_key, trie_key_len);
     actual_key->reserve(key->size());
     codec_->DecodeKey(encoded_actual_key, actual_key);
     const bool is_expanded = encoded_actual_key != encoded_key;
     DCHECK_EQ(is_expanded, *key != *actual_key);
     return callback_->OnActualKey(*key, *actual_key, is_expanded);
   }

   const BitVectorBasedArray *token_array_;
   const LoudsTrie *value_trie_;
   const SystemDictionaryCodecInterface *codec_;
   const uint32 *frequent_pos_;
   const StringPiece original_encoded_key_;
   SystemDictionary::Callback *callback_;

  private:
   DISALLOW_COPY_AND_ASSIGN(PrefixTraverser);
 };

 class ReverseLookupCallbackWrapper : public DictionaryInterface::Callback {
  public:
   explicit ReverseLookupCallbackWrapper(DictionaryInterface::Callback *callback)
       : callback_(callback) {}
   virtual ~ReverseLookupCallbackWrapper() {}
   virtual SystemDictionary::Callback::ResultType OnToken(
       StringPiece key, StringPiece actual_key, const Token &token) {
     Token modified_token = token;
     modified_token.key.swap(modified_token.value);
     return callback_->OnToken(key, actual_key, modified_token);
   }

   DictionaryInterface::Callback *callback_;
 };

 }  // namespace

 void SystemDictionary::LookupPrefix(
     StringPiece key,
     bool use_kana_modifier_insensitive_lookup,
     Callback *callback) const {
   string original_encoded_key;
   codec_->EncodeKey(key, &original_encoded_key);
   PrefixTraverser traverser(token_array_.get(), value_trie_.get(), codec_,
                             frequent_pos_, original_encoded_key, callback);
   const KeyExpansionTable &table = use_kana_modifier_insensitive_lookup ?
       hiragana_expansion_table_ : KeyExpansionTable::GetDefaultInstance();
   key_trie_->PrefixSearchWithKeyExpansion(
       original_encoded_key.c_str(), table, &traverser);
 }

 void SystemDictionary::LookupExact(StringPiece key, Callback *callback) const {
   // Find the key in the key trie.
   string encoded_key;
   codec_->EncodeKey(key, &encoded_key);
   const int key_id = key_trie_->ExactSearch(encoded_key);
   if (key_id == -1) {
     return;
   }
   if (callback->OnKey(key) != Callback::TRAVERSE_CONTINUE) {
     return;
   }

   // Get the block of tokens for this key.
   size_t dummy_length = 0;
   const uint8 *encoded_tokens_ptr = reinterpret_cast<const uint8*>(
       token_array_->Get(key_id, &dummy_length));

   // Callback on each token.
   for (TokenDecodeIterator iter(
            codec_, value_trie_.get(), frequent_pos_, key, encoded_tokens_ptr);
        !iter.Done(); iter.Next()) {
     if (callback->OnToken(key, key, *iter.Get().token) !=
         Callback::TRAVERSE_CONTINUE) {
       break;
     }
   }
 }

 void SystemDictionary::LookupReverse(
     StringPiece str, NodeAllocatorInterface *allocator,
     Callback *callback) const {
   // 1st step: Hiragana/Katakana are not in the value trie
   // 2nd step: Reverse lookup in value trie
   ReverseLookupCallbackWrapper callback_wrapper(callback);
   RegisterReverseLookupTokensForT13N(str, &callback_wrapper);
   RegisterReverseLookupTokensForValue(str, allocator, &callback_wrapper);
 }

 namespace {

 // Collects all the IDs of louds trie whose keys match lookup query. The limit
 // parameter can be used to restrict the maximum number of lookup.
 class IdCollector : public LoudsTrie::Callback {
  public:
   IdCollector() : limit_(numeric_limits<int>::max()) {
   }

   explicit IdCollector(int limit) : limit_(limit) {
   }

   // Called back on each key found. Inserts the key id to |id_set_| up to
   // |limit_|.
   virtual ResultType Run(const char *key, size_t key_len, int key_id) {
     if (limit_ <= 0) {
       return SEARCH_DONE;
     }
     id_set_.insert(key_id);
     --limit_;
     return SEARCH_CONTINUE;
   }

   const set<int> &id_set() const {
     return id_set_;
   }

  private:
   int limit_;
   set<int> id_set_;

   DISALLOW_COPY_AND_ASSIGN(IdCollector);
 };

 }  // namespace

 void SystemDictionary::PopulateReverseLookupCache(
     StringPiece str, NodeAllocatorInterface *allocator) const {
   if (allocator == NULL) {
     return;
   }
   if (reverse_lookup_index_ != NULL) {
     // We don't need to prepare cache for the current reverse conversion,
     // as we have already built the index for reverse lookup.
     return;
   }
   ReverseLookupCache *cache =
       allocator->mutable_data()->get<ReverseLookupCache>(kReverseLookupCache);
   DCHECK(cache) << "can't get cache data.";

   IdCollector id_collector;
   int pos = 0;
   // Iterate each suffix and collect IDs of all substrings.
   while (pos < str.size()) {
     const StringPiece suffix(str, pos);
     string lookup_key;
     codec_->EncodeValue(suffix, &lookup_key);
     value_trie_->PrefixSearch(lookup_key.c_str(), &id_collector);
     pos += Util::OneCharLen(suffix.data());
   }
   // Collect tokens for all IDs.
   ScanTokens(id_collector.id_set(), &cache->results);
 }

 void SystemDictionary::ClearReverseLookupCache(
     NodeAllocatorInterface *allocator) const {
   allocator->mutable_data()->erase(kReverseLookupCache);
 }

 namespace {

 // A traverser for prefix search over T13N entries.
 class T13nPrefixTraverser : public PrefixTraverser {
  public:
   T13nPrefixTraverser(const BitVectorBasedArray *token_array,
                       const LoudsTrie *value_trie,
                       const SystemDictionaryCodecInterface *codec,
                       const uint32 *frequent_pos,
                       StringPiece original_encoded_key,
                       SystemDictionary::Callback *callback)
       : PrefixTraverser(token_array, value_trie, codec, frequent_pos,
                         original_encoded_key, callback) {}

   virtual ResultType Run(const char *trie_key,
                          size_t trie_key_len, int key_id) {
     string key, actual_key;
     SystemDictionary::Callback::ResultType result = RunOnKeyAndOnActualKey(
         trie_key, trie_key_len, key_id, &key, &actual_key);
     if (result != SystemDictionary::Callback::TRAVERSE_CONTINUE) {
       return ConvertResultType(result);
     }

     // Decode tokens and call back OnToken() for each T13N token.
     size_t dummy_length = 0;
     const uint8 *encoded_tokens_ptr = reinterpret_cast<const uint8*>(
         token_array_->Get(key_id, &dummy_length));
     for (TokenDecodeIterator iter(
              codec_, value_trie_, frequent_pos_,
              actual_key, encoded_tokens_ptr);
          !iter.Done(); iter.Next()) {
       const TokenInfo &token_info = iter.Get();
       // Skip spelling corrections.
       if (token_info.token->attributes & Token::SPELLING_CORRECTION) {
         continue;
       }
       if (token_info.value_type != TokenInfo::AS_IS_HIRAGANA &&
           token_info.value_type != TokenInfo::AS_IS_KATAKANA) {
         // SAME_AS_PREV_VALUE may be t13n token.
         string hiragana;
         Util::KatakanaToHiragana(token_info.token->value, &hiragana);
         if (token_info.token->key != hiragana) {
           continue;
         }
       }
       result = callback_->OnToken(key, actual_key, *token_info.token);
       if (result != SystemDictionary::Callback::TRAVERSE_CONTINUE) {
         return ConvertResultType(result);
       }
     }
     return SEARCH_CONTINUE;
   }

  private:
   DISALLOW_COPY_AND_ASSIGN(T13nPrefixTraverser);
 };

 }  // namespace

 void SystemDictionary::RegisterReverseLookupTokensForT13N(
     StringPiece value, Callback *callback) const {
   string hiragana, original_encoded_key;
   Util::KatakanaToHiragana(value, &hiragana);
   codec_->EncodeKey(hiragana, &original_encoded_key);
   T13nPrefixTraverser traverser(token_array_.get(), value_trie_.get(), codec_,
                                 frequent_pos_, original_encoded_key, callback);
   key_trie_->PrefixSearchWithKeyExpansion(
       original_encoded_key.c_str(), KeyExpansionTable::GetDefaultInstance(),
       &traverser);
 }

 void SystemDictionary::RegisterReverseLookupTokensForValue(
     StringPiece value, NodeAllocatorInterface *allocator,
     Callback *callback) const {
   string lookup_key;
   codec_->EncodeValue(value, &lookup_key);

   IdCollector id_collector;
   value_trie_->PrefixSearch(lookup_key.c_str(), &id_collector);
   const set<int> &id_set = id_collector.id_set();

   const bool has_cache = (allocator != NULL &&
                           allocator->data().has(kReverseLookupCache));
   ReverseLookupCache *cache =
       (has_cache ? allocator->mutable_data()->get<ReverseLookupCache>(
           kReverseLookupCache) : NULL);

   multimap<int, ReverseLookupResult> *results = NULL;
   multimap<int, ReverseLookupResult> non_cached_results;
   if (reverse_lookup_index_ != NULL) {
     reverse_lookup_index_->FillResultMap(id_set, &non_cached_results);
     results = &non_cached_results;
   } else if (cache != NULL && IsCacheAvailable(id_set, cache->results)) {
     results = &(cache->results);
   } else {
     // Cache is not available. Get token for each ID.
     ScanTokens(id_set, &non_cached_results);
     results = &non_cached_results;
   }
   DCHECK(results != NULL);

   RegisterReverseLookupResults(id_set, *results, callback);
 }

 void SystemDictionary::ScanTokens(
     const set<int> &id_set,
     multimap<int, ReverseLookupResult> *reverse_results) const {
   for (TokenScanIterator iter(codec_, token_array_.get());
        !iter.Done(); iter.Next()) {
     const TokenScanIterator::Result &result = iter.Get();
     if (result.value_id != -1 &&
         id_set.find(result.value_id) != id_set.end()) {
       ReverseLookupResult lookup_result;
       lookup_result.tokens_offset = result.tokens_offset;
       lookup_result.id_in_key_trie = result.index;
       reverse_results->insert(make_pair(result.value_id, lookup_result));
     }
   }
 }

 void SystemDictionary::RegisterReverseLookupResults(
     const set<int> &id_set,
     const multimap<int, ReverseLookupResult> &reverse_results,
     Callback *callback) const {
   size_t dummy_length = 0;
   const uint8 *encoded_tokens_ptr =
       reinterpret_cast<const uint8*>(token_array_->Get(0, &dummy_length));
   char buffer[LoudsTrie::kMaxDepth + 1];
   for (set<int>::const_iterator set_itr = id_set.begin();
        set_itr != id_set.end();
        ++set_itr) {
     FilterInfo filter;
     filter.conditions =
         (FilterInfo::VALUE_ID | FilterInfo::NO_SPELLING_CORRECTION);
     filter.value_id = *set_itr;

     typedef multimap<int, ReverseLookupResult>::const_iterator ResultItr;
     pair<ResultItr, ResultItr> range = reverse_results.equal_range(*set_itr);
     for (ResultItr result_itr = range.first;
          result_itr != range.second;
          ++result_itr) {
       const ReverseLookupResult &reverse_result = result_itr->second;

       const char *encoded_key =
           key_trie_->Reverse(reverse_result.id_in_key_trie, buffer);
       const size_t encoded_key_len =
           LoudsTrie::kMaxDepth - (encoded_key - buffer);
       DCHECK_EQ(encoded_key_len, strlen(encoded_key));
       string tokens_key;
       codec_->DecodeKey(StringPiece(encoded_key, encoded_key_len), &tokens_key);
       if (callback->OnKey(tokens_key) !=
               SystemDictionary::Callback::TRAVERSE_CONTINUE) {
         continue;
       }

       // actual_key is always the same as tokens_key for reverse conversions.
       RegisterTokens(
           filter,
           tokens_key,
           tokens_key,
           encoded_tokens_ptr + reverse_result.tokens_offset,
           callback);
     }
   }
 }

 void SystemDictionary::RegisterTokens(
     const FilterInfo &filter,
     const string &tokens_key,
     const string &actual_key,
     const uint8 *encoded_tokens_ptr,
     Callback *callback) const {
   for (TokenDecodeIterator iter(codec_, value_trie_.get(), frequent_pos_,
                                 actual_key, encoded_tokens_ptr);
        !iter.Done(); iter.Next()) {
     const TokenInfo &token_info = iter.Get();
     if (IsBadToken(filter, token_info)) {
       continue;
     }
     callback->OnToken(tokens_key, actual_key, *token_info.token);
   }
 }

 bool SystemDictionary::IsBadToken(
     const FilterInfo &filter,
     const TokenInfo &token_info) const {
   if ((filter.conditions & FilterInfo::NO_SPELLING_CORRECTION) &&
       (token_info.token->attributes & Token::SPELLING_CORRECTION)) {
     return true;
   }

   if ((filter.conditions & FilterInfo::VALUE_ID) &&
       token_info.id_in_value_trie != filter.value_id) {
     return true;
   }

   if ((filter.conditions & FilterInfo::ONLY_T13N) &&
       (token_info.value_type != TokenInfo::AS_IS_HIRAGANA &&
        token_info.value_type != TokenInfo::AS_IS_KATAKANA)) {
     // SAME_AS_PREV_VALUE may be t13n token.
     string hiragana;
     Util::KatakanaToHiragana(token_info.token->value, &hiragana);
     if (token_info.token->key != hiragana) {
       return true;
     }
   }
   return false;
 }

 }  // namespace dictionary
 }  // namespace mozc