src/dictionary/system/system_dictionary_test.cc - mozc - Git at Google

 // Copyright 2010-2015, Google Inc.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
 //
 //     * Redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above
 // copyright notice, this list of conditions and the following disclaimer
 // in the documentation and/or other materials provided with the
 // distribution.
 //     * Neither the name of Google Inc. nor the names of its
 // contributors may be used to endorse or promote products derived from
 // this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #include "dictionary/system/system_dictionary.h"

 #include <cstdlib>
 #include <string>
 #include <utility>
 #include <vector>

 #include "base/file_util.h"
 #include "base/logging.h"
 #include "base/port.h"
 #include "base/stl_util.h"
 #include "base/system_util.h"
 #include "base/util.h"
 #include "data_manager/user_pos_manager.h"
 #include "dictionary/dictionary_test_util.h"
 #include "dictionary/dictionary_token.h"
 #include "dictionary/pos_matcher.h"
 #include "dictionary/system/codec_interface.h"
 #include "dictionary/system/system_dictionary_builder.h"
 #include "dictionary/text_dictionary_loader.h"
 #include "testing/base/public/googletest.h"
 #include "testing/base/public/gunit.h"

 using mozc::dictionary::CollectTokenCallback;

 namespace {
 // We cannot use #ifdef in DEFINE_int32.
 #ifdef DEBUG
 const uint32 kDefaultReverseLookupTestSize = 1000;
 #else
 const uint32 kDefaultReverseLookupTestSize = 10000;
 #endif
 }  // namespace

 // TODO(noriyukit): Ideally, the copy rule of dictionary_oss/dictionary00.txt
 // can be shared with one in
 // data_manager/dictionary_oss/oss_data_manager_test.gyp. However, to avoid
 // conflict of copy destination name, the copy destination here is changed from
 // the original one. See also comments in system_dictionary_test.gyp.
 DEFINE_string(
     dictionary_source,
     "data/system_dictionary_test/dictionary00.txt",
     "source dictionary file to run test");

 DEFINE_int32(dictionary_test_size, 100000,
              "Dictionary size for this test.");
 DEFINE_int32(dictionary_reverse_lookup_test_size, kDefaultReverseLookupTestSize,
              "Number of tokens to run reverse lookup test.");
 DECLARE_string(test_srcdir);
 DECLARE_string(test_tmpdir);
 DECLARE_int32(min_key_length_to_use_small_cost_encoding);

 namespace mozc {
 namespace dictionary {

 namespace {

 const bool kEnableKanaModiferInsensitiveLookup = true;
 const bool kDisableKanaModiferInsensitiveLookup = false;

 }  // namespace

 class SystemDictionaryTest : public testing::Test {
  protected:
   SystemDictionaryTest()
       : text_dict_(new TextDictionaryLoader(
           *UserPosManager::GetUserPosManager()->GetPOSMatcher())),
         dic_fn_(FLAGS_test_tmpdir + "/mozc.dic") {
     const string dic_path = FileUtil::JoinPath(FLAGS_test_srcdir,
                                                FLAGS_dictionary_source);
     text_dict_->LoadWithLineLimit(dic_path, "", FLAGS_dictionary_test_size);
   }

   virtual void SetUp() {
     SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);

     // Don't use small cost encoding by default.
     original_flags_min_key_length_to_use_small_cost_encoding_ =
         FLAGS_min_key_length_to_use_small_cost_encoding;
     FLAGS_min_key_length_to_use_small_cost_encoding = kint32max;
   }

   virtual void TearDown() {
     FLAGS_min_key_length_to_use_small_cost_encoding =
         original_flags_min_key_length_to_use_small_cost_encoding_;
   }

   void BuildSystemDictionary(const vector <Token *>& tokens,
                              int num_tokens);
   Token* CreateToken(const string& key, const string& value) const;
   bool CompareTokensForLookup(const Token &a, const Token &b,
                               bool reverse) const;

   scoped_ptr<TextDictionaryLoader> text_dict_;
   const string dic_fn_;
   int original_flags_min_key_length_to_use_small_cost_encoding_;
 };

 void SystemDictionaryTest::BuildSystemDictionary(const vector<Token *>& source,
                                                  int num_tokens) {
   SystemDictionaryBuilder builder;
   vector<Token *> tokens;
   // Picks up first tokens.
   for (vector<Token *>::const_iterator it = source.begin();
        tokens.size() < num_tokens && it != source.end(); ++it) {
     tokens.push_back(*it);
   }
   builder.BuildFromTokens(tokens);
   builder.WriteToFile(dic_fn_);
 }

 Token* SystemDictionaryTest::CreateToken(const string& key,
                                          const string& value) const {
   Token* t = new Token;
   t->key = key;
   t->value = value;
   t->cost = 0;
   t->lid = 0;
   t->rid = 0;
   return t;
 }

 // Returns true if they seem to be same
 bool SystemDictionaryTest::CompareTokensForLookup(
     const Token &a, const Token &b, bool reverse) const {
   const bool key_value_check = reverse ?
       (a.key == b.value && a.value == b.key) :
       (a.key == b.key && a.value == b.value);
   if (!key_value_check) {
     return false;
   }
   const bool comp_cost = a.cost == b.cost;
   if (!comp_cost) {
     return false;
   }
   const bool spelling_match =
       (a.attributes & Token::SPELLING_CORRECTION) ==
       (b.attributes & Token::SPELLING_CORRECTION);
   if (!spelling_match) {
     return false;
   }
   const bool id_match = (a.lid == b.lid) && (a.rid == b.rid);
   if (!id_match) {
     return false;
   }
   return true;
 }

 TEST_F(SystemDictionaryTest, HasValue) {
   vector<Token *> tokens;
   for (int i = 0; i < 4; ++i) {
     Token *token = new Token;
     // "きー%d"
     token->key = Util::StringPrintf("\xE3\x81\x8D\xE3\x83\xBC%d", i);
     // "バリュー%d"
     token->value = Util::StringPrintf(
         "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC%d", i);
     tokens.push_back(token);
   }

   {  // Alphabet
     Token *token = new Token;
     token->key = "Mozc";
     token->value = "Mozc";
     tokens.push_back(token);
   }

   {  // Alphabet upper case
     Token *token = new Token;
     token->key = "upper";
     token->value = "UPPER";
     tokens.push_back(token);
   }

   // "ｆｕｌｌ"
   const string kFull = "\xEF\xBD\x86\xEF\xBD\x95\xEF\xBD\x8C\xEF\xBD\x8C";
   // "ひらがな"
   const string kHiragana = "\xE3\x81\xB2\xE3\x82\x89\xE3\x81\x8C\xE3\x81\xAA";
   // "かたかな"
   const string kKatakanaKey =
       "\xE3\x81\x8B\xE3\x81\x9F\xE3\x81\x8B\xE3\x81\xAA";
   // "カタカナ"
   const string kKatakanaValue =
       "\xE3\x82\xAB\xE3\x82\xBF\xE3\x82\xAB\xE3\x83\x8A";

   {  // Alphabet full width
     Token *token = new Token;
     token->key = "full";
     token->value = kFull;  // "ｆｕｌｌ"
     tokens.push_back(token);
   }

   {  // Hiragana
     Token *token = new Token;
     token->key = kHiragana;  // "ひらがな"
     token->value = kHiragana;  // "ひらがな"
     tokens.push_back(token);
   }

   {  // Katakana
     Token *token = new Token;
     token->key = kKatakanaKey;  // "かたかな"
     token->value = kKatakanaValue;  // "カタカナ"
     tokens.push_back(token);
   }

   BuildSystemDictionary(tokens, tokens.size());

   scoped_ptr<SystemDictionary> system_dic(
       SystemDictionary::Builder(dic_fn_).Build());
   ASSERT_TRUE(system_dic.get() != NULL)
       << "Failed to open dictionary source:" << dic_fn_;

   EXPECT_TRUE(system_dic->HasValue(
       // "バリュー0"
       "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x30"));
   EXPECT_TRUE(system_dic->HasValue(
       // "バリュー1"
       "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x31"));
   EXPECT_TRUE(system_dic->HasValue(
       // "バリュー2"
       "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x32"));
   EXPECT_TRUE(system_dic->HasValue(
       // "バリュー3"
       "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x33"));
   EXPECT_FALSE(system_dic->HasValue(
       // "バリュー4"
       "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x34"));
   EXPECT_FALSE(system_dic->HasValue(
       // "バリュー5"
       "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x35"));
   EXPECT_FALSE(system_dic->HasValue(
       // "バリュー6"
       "\xE3\x83\x90\xE3\x83\xAA\xE3\x83\xA5\xE3\x83\xBC\x36"));

   EXPECT_TRUE(system_dic->HasValue("Mozc"));
   EXPECT_FALSE(system_dic->HasValue("mozc"));

   EXPECT_TRUE(system_dic->HasValue("UPPER"));
   EXPECT_FALSE(system_dic->HasValue("upper"));

   EXPECT_TRUE(system_dic->HasValue(kFull));  // "ｆｕｌｌ"
   EXPECT_FALSE(system_dic->HasValue("full"));

   EXPECT_TRUE(system_dic->HasValue(kHiragana));  //"ひらがな"
   EXPECT_FALSE(system_dic->HasValue(
       "\xE3\x83\x92\xE3\x83\xA9\xE3\x82\xAC\xE3\x83\x8A\x0A"));  // "ヒラガナ"

   EXPECT_TRUE(system_dic->HasValue(kKatakanaValue));  // "カタカナ"
   EXPECT_FALSE(system_dic->HasValue(kKatakanaKey));  // "かたかな"

   STLDeleteElements(&tokens);
 }

 TEST_F(SystemDictionaryTest, NormalWord) {
   vector<Token *> source_tokens;
   scoped_ptr<Token> t0(new Token);
   // "あ"
   t0->key = "\xe3\x81\x82";
   // "亜"
   t0->value = "\xe4\xba\x9c";
   t0->cost = 100;
   t0->lid = 50;
   t0->rid = 70;
   source_tokens.push_back(t0.get());
   BuildSystemDictionary(source_tokens, FLAGS_dictionary_test_size);

   scoped_ptr<SystemDictionary> system_dic(
       SystemDictionary::Builder(dic_fn_).Build());
   ASSERT_TRUE(system_dic.get() != NULL)
       << "Failed to open dictionary source:" << dic_fn_;

   CollectTokenCallback callback;

   // Look up by exact key.
   system_dic->LookupPrefix(t0->key, false, &callback);
   ASSERT_EQ(1, callback.tokens().size());
   EXPECT_TOKEN_EQ(*t0, callback.tokens().front());

   // Look up by prefix.
   callback.Clear();
   system_dic->LookupPrefix(
       "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86",  // "あいう"
       false, &callback);
   ASSERT_EQ(1, callback.tokens().size());
   EXPECT_TOKEN_EQ(*t0, callback.tokens().front());

   // Nothing should be looked up.
   callback.Clear();
   system_dic->LookupPrefix(
       "\xE3\x81\x8B\xE3\x81\x8D\xE3\x81\x8F",  // "かきく"
       false, &callback);
   EXPECT_TRUE(callback.tokens().empty());
 }

 TEST_F(SystemDictionaryTest, SameWord) {
   vector<Token> tokens(4);

   tokens[0].key = "\xe3\x81\x82";  // "あ"
   tokens[0].value = "\xe4\xba\x9c";  // "亜"
   tokens[0].cost = 100;
   tokens[0].lid = 50;
   tokens[0].rid = 70;

   tokens[1].key = "\xe3\x81\x82";  // "あ"
   tokens[1].value = "\xe4\xba\x9c";  // "亜"
   tokens[1].cost = 150;
   tokens[1].lid = 100;
   tokens[1].rid = 200;

   tokens[2].key = "\xe3\x81\x82";  // "あ"
   tokens[2].value = "\xe3\x81\x82";  // "あ"
   tokens[2].cost = 100;
   tokens[2].lid = 1000;
   tokens[2].rid = 2000;

   tokens[3].key = "\xe3\x81\x82";  // "あ"
   tokens[3].value = "\xe4\xba\x9c";  // "亜"
   tokens[3].cost = 1000;
   tokens[3].lid = 2000;
   tokens[3].rid = 3000;

   vector<Token *> source_tokens;
   for (size_t i = 0; i < tokens.size(); ++i) {
     source_tokens.push_back(&tokens[i]);
   }
   BuildSystemDictionary(source_tokens, FLAGS_dictionary_test_size);

   scoped_ptr<SystemDictionary> system_dic(
       SystemDictionary::Builder(dic_fn_).Build());
   ASSERT_TRUE(system_dic.get() != NULL)
       << "Failed to open dictionary source:" << dic_fn_;

   // All the tokens should be looked up.
   CollectTokenCallback callback;
   system_dic->LookupPrefix("\xe3\x81\x82",  // "あ"
                            false, &callback);
   EXPECT_TOKENS_EQ_UNORDERED(source_tokens, callback.tokens());
 }

 TEST_F(SystemDictionaryTest, LookupAllWords) {
   const vector<Token *> &source_tokens = text_dict_->tokens();
   BuildSystemDictionary(source_tokens, FLAGS_dictionary_test_size);

   scoped_ptr<SystemDictionary> system_dic(
       SystemDictionary::Builder(dic_fn_).Build());
   ASSERT_TRUE(system_dic.get() != NULL)
       << "Failed to open dictionary source:" << dic_fn_;

   // All the tokens should be looked up.
   for (size_t i = 0; i < source_tokens.size(); ++i) {
     CheckTokenExistenceCallback callback(source_tokens[i]);
     system_dic->LookupPrefix(source_tokens[i]->key, false, &callback);
     EXPECT_TRUE(callback.found())
         << "Token was not found: " << PrintToken(*source_tokens[i]);
   }
 }

 TEST_F(SystemDictionaryTest, SimpleLookupPrefix) {
   // "は"
   const string k0 = "\xe3\x81\xaf";
   // "はひふへほ"
   const string k1 = "\xe3\x81\xaf\xe3\x81\xb2\xe3\x81\xb5\xe3\x81\xb8\xe3\x81"
                     "\xbb";
   scoped_ptr<Token> t0(CreateToken(k0, "aa"));
   scoped_ptr<Token> t1(CreateToken(k1, "bb"));

   vector<Token *> source_tokens;
   source_tokens.push_back(t0.get());
   source_tokens.push_back(t1.get());
   text_dict_->CollectTokens(&source_tokens);
   BuildSystemDictionary(source_tokens, 100);

   scoped_ptr<SystemDictionary> system_dic(
       SystemDictionary::Builder(dic_fn_).Build());
   ASSERT_TRUE(system_dic.get() != NULL)
       << "Failed to open dictionary source:" << dic_fn_;

   // |t0| should be looked up from |k1|.
   CheckTokenExistenceCallback callback(t0.get());
   system_dic->LookupPrefix(k1, false, &callback);
   EXPECT_TRUE(callback.found());
 }

 namespace {

 class LookupPrefixTestCallback : public SystemDictionary::Callback {
  public:
   virtual ResultType OnKey(StringPiece key) {
     if (key == "\xE3\x81\x8B\xE3\x81\x8D") {  // key == "かき"
       return TRAVERSE_CULL;
     } else if (key == "\xE3\x81\x95") {  // key == "さ"
       return TRAVERSE_NEXT_KEY;
     } else if (key == "\xE3\x81\x9F") {  // key == "た"
       return TRAVERSE_DONE;
     }
     return TRAVERSE_CONTINUE;
   }

   virtual ResultType OnToken(StringPiece key, StringPiece actual_key,
                              const Token &token) {
     result_.insert(make_pair(token.key, token.value));
     return TRAVERSE_CONTINUE;
   }

   const set<pair<string, string> > &result() const {
     return result_;
   }

  private:
   set<pair<string, string> > result_;
 };

 }  // namespace

 TEST_F(SystemDictionaryTest, LookupPrefix) {
   // Set up a test dictionary.
   struct {
     const char *key;
     const char *value;
   } kKeyValues[] = {
     // "あ", "亜"
     { "\xE3\x81\x82", "\xE4\xBA\x9C" },
     // "あ", "安"
     { "\xE3\x81\x82", "\xE5\xAE\x89" },
     // "あ", "在"
     { "\xE3\x81\x82", "\xE5\x9C\xA8" },
     // "あい", "愛"
     { "\xE3\x81\x82\xE3\x81\x84", "\xE6\x84\x9B" },
     // "あい", "藍"
     { "\xE3\x81\x82\xE3\x81\x84", "\xE8\x97\x8D" },
     // "あいう", "藍雨"
     { "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86", "\xE8\x97\x8D\xE9\x9B\xA8" },
     // "か", "可"
     { "\xE3\x81\x8B", "\xE5\x8F\xAF" },
     // "かき", "牡蠣"
     { "\xE3\x81\x8B\xE3\x81\x8D", "\xE7\x89\xA1\xE8\xA0\xA3" },
     // "かき", "夏季"
     { "\xE3\x81\x8B\xE3\x81\x8D", "\xE5\xA4\x8F\xE5\xAD\xA3" },
     // "かきく", "柿久"
     { "\xE3\x81\x8B\xE3\x81\x8D\xE3\x81\x8F", "\xE6\x9F\xBF\xE4\xB9\x85" },
     // "さ", "差"
     { "\xE3\x81\x95", "\xE5\xB7\xAE" },
     // "さ", "左"
     { "\xE3\x81\x95", "\xE5\xB7\xA6" },
     // "さし", "刺"
     { "\xE3\x81\x95\xE3\x81\x97", "\xE5\x88\xBA" },
     // "た", "田"
     { "\xE3\x81\x9F", "\xE7\x94\xB0" },
     // "た", "多"
     { "\xE3\x81\x9F", "\xE5\xA4\x9A" },
     // "たち", 多値"
     { "\xE3\x81\x9F\xE3\x81\xA1", "\xE5\xA4\x9A\xE5\x80\xA4" },
     // "たちつ", "タチツ"
     { "\xE3\x81\x9F\xE3\x81\xA1\xE3\x81\xA4",
       "\xE3\x82\xBF\xE3\x83\x81\xE3\x83\x84" },
     // "は", "葉"
     { "\xE3\x81\xAF", "\xE8\x91\x89" },
     // "は", "歯"
     { "\xE3\x81\xAF", "\xE6\xAD\xAF" },
     // "はひ", "ハヒ"
     { "\xE3\x81\xAF\xE3\x81\xB2", "\xE3\x83\x8F\xE3\x83\x92" },
     // "ば", "場"
     { "\xE3\x81\xB0", "\xE5\xA0\xB4" },
     // "はび", "波美"
     { "\xE3\x81\xAF\xE3\x81\xB3", "\xE6\xB3\xA2\xE7\xBE\x8E" },
     // "ばび", "馬尾"
     { "\xE3\x81\xB0\xE3\x81\xB3", "\xE9\xA6\xAC\xE5\xB0\xBE" },
     // "ばびぶ", "バビブ"
     { "\xE3\x81\xB0\xE3\x81\xB3\xE3\x81\xB6",
       "\xE3\x83\x90\xE3\x83\x93\xE3\x83\x96" },
   };
   const size_t kKeyValuesSize = arraysize(kKeyValues);
   scoped_ptr<Token> tokens[kKeyValuesSize];
   vector<Token *> source_tokens(kKeyValuesSize);
   for (size_t i = 0; i < kKeyValuesSize; ++i) {
     tokens[i].reset(CreateToken(kKeyValues[i].key, kKeyValues[i].value));
     source_tokens[i] = tokens[i].get();
   }
   text_dict_->CollectTokens(&source_tokens);
   BuildSystemDictionary(source_tokens, kKeyValuesSize);
   scoped_ptr<SystemDictionary> system_dic(
       SystemDictionary::Builder(dic_fn_).Build());
   ASSERT_TRUE(system_dic.get() != NULL)
       << "Failed to open dictionary source:" << dic_fn_;

   // Test for normal prefix lookup without key expansion.
   {
     LookupPrefixTestCallback callback;
     system_dic->LookupPrefix("\xE3\x81\x82\xE3\x81\x84",  // "あい"
                              false, &callback);
     const set<pair<string, string> > &result = callback.result();
     // "あ" -- "あい" should be found.
     for (size_t i = 0; i < 5; ++i) {
       const pair<string, string> entry(
           kKeyValues[i].key, kKeyValues[i].value);
       EXPECT_TRUE(result.end() != result.find(entry));
     }
     // The others should not be found.
     for (size_t i = 5; i < arraysize(kKeyValues); ++i) {
       const pair<string, string> entry(
           kKeyValues[i].key, kKeyValues[i].value);
       EXPECT_TRUE(result.end() == result.find(entry));
     }
   }

   // Test for normal prefix lookup without key expansion, but with culling
   // feature.
   {
     LookupPrefixTestCallback callback;
     system_dic->LookupPrefix(
         "\xE3\x81\x8B\xE3\x81\x8D\xE3\x81\x8F",  //"かきく"
         false,
         &callback);
     const set<pair<string, string> > &result = callback.result();
     // Only "か" should be found as the callback doesn't traverse the subtree of
     // "かき" due to culling request from LookupPrefixTestCallback::OnKey().
     for (size_t i = 0; i < kKeyValuesSize; ++i) {
       const pair<string, string> entry(
           kKeyValues[i].key, kKeyValues[i].value);
       EXPECT_EQ(entry.first == "\xE3\x81\x8B",  // "か"
                 result.find(entry) != result.end());
     }
   }

   // Test for TRAVERSE_NEXT_KEY.
   {
     LookupPrefixTestCallback callback;
     system_dic->LookupPrefix(
         "\xE3\x81\x95\xE3\x81\x97\xE3\x81\x99",  // "さしす"
         false,
         &callback);
     const set<pair<string, string> > &result = callback.result();
     // Only "さし" should be found as tokens for "さ" is skipped (see
     // LookupPrefixTestCallback::OnKey()).
     for (size_t i = 0; i < kKeyValuesSize; ++i) {
       const pair<string, string> entry(
           kKeyValues[i].key, kKeyValues[i].value);
       EXPECT_EQ(entry.first == "\xE3\x81\x95\xE3\x81\x97",  // "さし"
                 result.find(entry) != result.end());
     }
   }

   // Test for TRAVERSE_DONE.
   {
     LookupPrefixTestCallback callback;
     system_dic->LookupPrefix(
         "\xE3\x81\x9F\xE3\x81\xA1\xE3\x81\xA4",  // "たちつ"
         false,
         &callback);
     const set<pair<string, string> > &result = callback.result();
     // Nothing should be found as the traversal is immediately done after seeing
     // "た"; see LookupPrefixTestCallback::OnKey().
     EXPECT_TRUE(result.empty());
   }

   // Test for prefix lookup with key expansion.
   {
     LookupPrefixTestCallback callback;
     system_dic->LookupPrefix(
         "\xE3\x81\xAF\xE3\x81\xB2",  // "はひ"
         true,  // Use kana modifier insensitive lookup
         &callback);
     const set<pair<string, string> > &result = callback.result();
     const char *kExpectedKeys[] = {
       "\xE3\x81\xAF",  // "は"
       "\xE3\x81\xB0",  // "ば"
       "\xE3\x81\xAF\xE3\x81\xB2",  // "はひ"
       "\xE3\x81\xB0\xE3\x81\xB2",  // "ばひ"
       "\xE3\x81\xAF\xE3\x81\xB3",  // "はび"
       "\xE3\x81\xB0\xE3\x81\xB3",  // "ばび"
     };
     const set<string> expected(kExpectedKeys,
                                kExpectedKeys + arraysize(kExpectedKeys));
     for (size_t i = 0; i < kKeyValuesSize; ++i) {
       const bool to_be_found =
           expected.find(kKeyValues[i].key) != expected.end();
       const pair<string, string> entry(
           kKeyValues[i].key, kKeyValues[i].value);
       EXPECT_EQ(to_be_found, result.find(entry) != result.end());
     }
   }
 }

 TEST_F(SystemDictionaryTest, LookupPredictive) {
   vector<Token *> tokens;
   ScopedElementsDeleter<vector<Token *> > deleter(&tokens);

   // "まみむめもや" -> "value0"
   tokens.push_back(CreateToken("\xe3\x81\xbe\xe3\x81\xbf\xe3\x82\x80"
                                "\xe3\x82\x81\xe3\x82\x82\xe3\x82\x84",
                                "value0"));
   // "まみむめもやゆよ" -> "value1"
   tokens.push_back(CreateToken("\xe3\x81\xbe\xe3\x81\xbf\xe3\x82\x80"
                                "\xe3\x82\x81\xe3\x82\x82\xe3\x82\x84"
                                "\xe3\x82\x86\xe3\x82\x88",
                                "value1"));
   // Build a dictionary with the above two tokens plus those from test data.
   {
     vector<Token *> source_tokens = tokens;
     text_dict_->CollectTokens(&source_tokens);  // Load test data.
     BuildSystemDictionary(source_tokens, 10000);
   }
   scoped_ptr<SystemDictionary> system_dic(
       SystemDictionary::Builder(dic_fn_).Build());
   ASSERT_TRUE(system_dic.get() != NULL)
       << "Failed to open dictionary source: " << dic_fn_;

   // All the tokens in |tokens| should be looked up by "まみむめも".
   const char *kMamimumemo =
       "\xe3\x81\xbe\xe3\x81\xbf\xe3\x82\x80\xe3\x82\x81\xe3\x82\x82";
   CheckMultiTokensExistenceCallback callback(tokens);
   system_dic->LookupPredictive(kMamimumemo, false, &callback);
   EXPECT_TRUE(callback.AreAllFound());
 }

 TEST_F(SystemDictionaryTest, LookupPredictive_KanaModifierInsensitiveLookup) {
   vector<Token *> tokens;
   ScopedElementsDeleter<vector<Token *> > deleter(&tokens);

   // "がっこう" -> "学校"
   tokens.push_back(CreateToken(
       "\xE3\x81\x8C\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86",
       "\xE5\xAD\xA6\xE6\xA0\xA1"));
   // "かっこう" -> "格好"
   tokens.push_back(CreateToken(
       "\xE3\x81\x8B\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86",
       "\xE6\xA0\xBC\xE5\xA5\xBD"));

   BuildSystemDictionary(tokens, 100);
   scoped_ptr<SystemDictionary> system_dic(
       SystemDictionary::Builder(dic_fn_).Build());
   ASSERT_TRUE(system_dic.get() != NULL)
       << "Failed to open dictionary source: " << dic_fn_;

   // "かつこう"
   const string kKey = "\xE3\x81\x8B\xE3\x81\xA4\xE3\x81\x93\xE3\x81\x86";

   // Without Kana modifier insensitive lookup flag, nothing is looked up.
   CollectTokenCallback callback;
   system_dic->LookupPredictive(kKey, false, &callback);
   EXPECT_TRUE(callback.tokens().empty());

   // With Kana modifier insensitive lookup flag, every token is looked up.
   callback.Clear();
   system_dic->LookupPredictive(kKey, true, &callback);
   EXPECT_TOKENS_EQ_UNORDERED(tokens, callback.tokens());
 }

 TEST_F(SystemDictionaryTest, LookupPredictive_CutOffEmulatingBFS) {
   vector<Token *> tokens;
   ScopedElementsDeleter<vector<Token *> > deleter(&tokens);

   // "あい" -> "ai"
   tokens.push_back(CreateToken("\xe3\x81\x82\xe3\x81\x84", "ai"));
   // "あいうえお" -> "aiueo"
   tokens.push_back(CreateToken(
       "\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a",
       "aiueo"));
   // Build a dictionary with the above two tokens plus those from test data.
   {
     vector<Token *> source_tokens = tokens;
     text_dict_->CollectTokens(&source_tokens);  // Load test data.
     BuildSystemDictionary(source_tokens, 10000);
   }
   scoped_ptr<SystemDictionary> system_dic(
       SystemDictionary::Builder(dic_fn_).Build());
   ASSERT_TRUE(system_dic.get() != NULL)
       << "Failed to open dictionary source: " << dic_fn_;

   // Since there are many entries starting with "あ" in test dictionary, it's
   // expected that "あいうえお" is not looked up because of longer key cut-off
   // mechanism.  However, "あい" is looked up as it's short.
   CheckMultiTokensExistenceCallback callback(tokens);
   system_dic->LookupPredictive("\xe3\x81\x82",  // "あ"
                                false, &callback);
   EXPECT_TRUE(callback.IsFound(tokens[0]));
   EXPECT_FALSE(callback.IsFound(tokens[1]));
 }

 TEST_F(SystemDictionaryTest, LookupExact) {
   vector<Token *> source_tokens;

   // "は"
   const string k0 = "\xe3\x81\xaf";
   // "はひふへほ"
   const string k1 = "\xe3\x81\xaf\xe3\x81\xb2\xe3\x81\xb5\xe3\x81\xb8\xe3\x81"
                     "\xbb";

   scoped_ptr<Token> t0(CreateToken(k0, "aa"));
   scoped_ptr<Token> t1(CreateToken(k1, "bb"));
   source_tokens.push_back(t0.get());
   source_tokens.push_back(t1.get());
   text_dict_->CollectTokens(&source_tokens);
   BuildSystemDictionary(source_tokens, 100);

   scoped_ptr<SystemDictionary> system_dic(
       SystemDictionary::Builder(dic_fn_).Build());
   ASSERT_TRUE(system_dic.get() != NULL)
       << "Failed to open dictionary source:" << dic_fn_;

   // |t0| should not be looked up from |k1|.
   CheckTokenExistenceCallback callback0(t0.get());
   system_dic->LookupExact(k1, &callback0);
   EXPECT_FALSE(callback0.found());
   // But |t1| should be found.
   CheckTokenExistenceCallback callback1(t1.get());
   system_dic->LookupExact(k1, &callback1);
   EXPECT_TRUE(callback1.found());

   // Nothing should be found from "hoge".
   CollectTokenCallback callback_hoge;
   system_dic->LookupExact("hoge", &callback_hoge);
   EXPECT_TRUE(callback_hoge.tokens().empty());
 }

 TEST_F(SystemDictionaryTest, LookupReverse) {
   scoped_ptr<Token> t0(new Token);
   // "ど"
   t0->key = "\xe3\x81\xa9";
   // "ド"
   t0->value = "\xe3\x83\x89";
   t0->cost = 1;
   t0->lid = 2;
   t0->rid = 3;
   scoped_ptr<Token> t1(new Token);
   // "どらえもん"
   t1->key = "\xe3\x81\xa9\xe3\x82\x89\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93";
   // "ドラえもん"
   t1->value = "\xe3\x83\x89\xe3\x83\xa9\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93";
   t1->cost = 1;
   t1->lid = 2;
   t1->rid = 3;
   scoped_ptr<Token> t2(new Token);
   // "といざらす®"
   t2->key = "\xe3\x81\xa8\xe3\x81\x84\xe3\x81\x96\xe3\x82\x89\xe3\x81\x99\xc2"
             "\xae";
   // "トイザらス®"
   t2->value = "\xe3\x83\x88\xe3\x82\xa4\xe3\x82\xb6\xe3\x82\x89\xe3\x82\xb9\xc2"
               "\xae";
   t2->cost = 1;
   t2->lid = 2;
   t2->rid = 3;
   scoped_ptr<Token> t3(new Token);
   // "ああああああ"
   // Both t3 and t4 will be encoded into 3 bytes.
   t3->key = "\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82"
       "\xe3\x81\x82\xe3\x81\x82\xe3\x81\x82";
   t3->value = t3->key;
   t3->cost = 32000;
   t3->lid = 1;
   t3->rid = 1;
   scoped_ptr<Token> t4(new Token);
   *t4 = *t3;
   t4->lid = 1;
   t4->rid = 2;
   scoped_ptr<Token> t5(new Token);
   // "いいいいいい"
   // t5 will be encoded into 3 bytes.
   t5->key = "\xe3\x81\x84\xe3\x81\x84\xe3\x81\x84"
       "\xe3\x81\x84\xe3\x81\x84\xe3\x81\x84";
   t5->value = t5->key;
   t5->cost = 32000;
   t5->lid = 1;
   t5->rid = 1;
   // spelling correction token should not be retrieved by reverse lookup.
   scoped_ptr<Token> t6(new Token);
   // "どらえもん"
   t6->key = "\xe3\x81\xa9\xe3\x82\x89\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93";
   // "ドラえもん"
   t6->value = "\xe3\x83\x89\xe3\x83\xa9\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93";
   t6->cost = 1;
   t6->lid = 2;
   t6->rid = 3;
   t6->attributes = Token::SPELLING_CORRECTION;
   scoped_ptr<Token> t7(new Token);
   // "こんさーと"
   t7->key = "\xe3\x81\x93\xe3\x82\x93\xe3\x81\x95\xe3\x83\xbc\xe3\x81\xa8";
   // "コンサート"
   t7->value = "\xe3\x82\xb3\xe3\x83\xb3\xe3\x82\xb5\xe3\x83\xbc\xe3\x83\x88";
   t7->cost = 1;
   t7->lid = 1;
   t7->rid = 1;
   // "バージョン" should not return a result with the key "ヴァージョン".
   scoped_ptr<Token> t8(new Token);
   // "ばーじょん"
   t8->key = "\xE3\x81\xB0\xE3\x83\xBC\xE3\x81\x98\xE3\x82\x87\xE3\x82\x93";
   // "バージョン"
   t8->value = "\xE3\x83\x90\xE3\x83\xBC\xE3\x82\xB8\xE3\x83\xA7\xE3\x83\xB3";
   t8->cost = 1;
   t8->lid = 1;
   t8->rid = 1;

   vector<Token *> source_tokens;
   source_tokens.push_back(t0.get());
   source_tokens.push_back(t1.get());
   source_tokens.push_back(t2.get());
   source_tokens.push_back(t3.get());
   source_tokens.push_back(t4.get());
   source_tokens.push_back(t5.get());
   source_tokens.push_back(t6.get());
   source_tokens.push_back(t7.get());
   source_tokens.push_back(t8.get());

   text_dict_->CollectTokens(&source_tokens);
   BuildSystemDictionary(source_tokens, source_tokens.size());

   scoped_ptr<SystemDictionary> system_dic(
       SystemDictionary::Builder(dic_fn_).Build());
   ASSERT_TRUE(system_dic.get() != NULL)
       << "Failed to open dictionary source:" << dic_fn_;
   const size_t test_size = min(
       static_cast<size_t>(FLAGS_dictionary_reverse_lookup_test_size),
       source_tokens.size());
   for (size_t source_index = 0; source_index < test_size; ++source_index) {
     const Token &source_token = *source_tokens[source_index];
     CollectTokenCallback callback;
     system_dic->LookupReverse(source_token.value, &callback);
     const vector<Token> &tokens = callback.tokens();

     bool found = false;
     for (size_t i = 0; i < tokens.size(); ++i) {
       const Token &token = tokens[i];
       // Make sure any of the key lengths of the lookup results
       // doesn't exceed the original key length.
       // It happened once
       // when called with "バージョン", returning "ヴァージョン".
       EXPECT_LE(token.key.size(), source_token.value.size())
           << string(token.key) << ":" << string(token.value)
           << "\t" << string(source_token.value);
       if (CompareTokensForLookup(source_token, token, true)) {
         found = true;
       }
     }

     if ((source_token.attributes & Token::SPELLING_CORRECTION) ==
         Token::SPELLING_CORRECTION) {
       EXPECT_FALSE(found) << "Spelling correction token was retrieved:"
                           << PrintToken(source_token);
       if (found) {
         return;
       }
     } else {
       EXPECT_TRUE(found)
           << "Failed to find " << source_token.key << ":" << source_token.value;
       if (!found) {
         return;
       }
     }
   }

   {
     // test for non exact transliterated index string.
     // append "が"
     const string key = t7->value + "\xe3\x81\x8c";
     CollectTokenCallback callback;
     system_dic->LookupReverse(key, &callback);
     const vector<Token> &tokens = callback.tokens();
     bool found = false;
     for (size_t i = 0; i < tokens.size(); ++i) {
       if (CompareTokensForLookup(*t7, tokens[i], true)) {
         found = true;
       }
     }
     EXPECT_TRUE(found)
         << "Missed token for non exact transliterated index " << key;
   }
 }

 TEST_F(SystemDictionaryTest, LookupReverseIndex) {
   const vector<Token *> &source_tokens = text_dict_->tokens();
   BuildSystemDictionary(source_tokens, FLAGS_dictionary_test_size);

   scoped_ptr<SystemDictionary> system_dic_without_index(
       SystemDictionary::Builder(dic_fn_)
       .SetOptions(SystemDictionary::NONE)
       .Build());
   ASSERT_TRUE(system_dic_without_index.get() != NULL)
       << "Failed to open dictionary source:" << dic_fn_;
   scoped_ptr<SystemDictionary> system_dic_with_index(
       SystemDictionary::Builder(dic_fn_)
       .SetOptions(SystemDictionary::ENABLE_REVERSE_LOOKUP_INDEX)
       .Build());
   ASSERT_TRUE(system_dic_with_index.get() != NULL)
       << "Failed to open dictionary source:" << dic_fn_;

   vector<Token *>::const_iterator it;
   int size = FLAGS_dictionary_reverse_lookup_test_size;
   for (it = source_tokens.begin();
        size > 0 && it != source_tokens.end(); ++it, --size) {
     const Token &t = **it;
     CollectTokenCallback callback1, callback2;
     system_dic_without_index->LookupReverse(t.value, &callback1);
     system_dic_with_index->LookupReverse(t.value, &callback2);

     const vector<Token> &tokens1 = callback1.tokens();
     const vector<Token> &tokens2 = callback2.tokens();
     ASSERT_EQ(tokens1.size(), tokens2.size());
     for (size_t i = 0; i < tokens1.size(); ++i) {
       EXPECT_TOKEN_EQ(tokens1[i], tokens2[i]);
     }
   }
 }

 TEST_F(SystemDictionaryTest, LookupReverseWithCache) {
   const string kDoraemon =
       "\xe3\x83\x89\xe3\x83\xa9\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93";

   Token source_token;
   // "どらえもん"
   source_token.key =
       "\xe3\x81\xa9\xe3\x82\x89\xe3\x81\x88\xe3\x82\x82\xe3\x82\x93";
   // "ドラえもん"
   source_token.value = kDoraemon;
   source_token.cost = 1;
   source_token.lid = 2;
   source_token.rid = 3;
   vector<Token *> source_tokens;
   source_tokens.push_back(&source_token);
   text_dict_->CollectTokens(&source_tokens);
   BuildSystemDictionary(source_tokens, source_tokens.size());

   Token target_token = source_token;
   target_token.key.swap(target_token.value);

   scoped_ptr<SystemDictionary> system_dic(
       SystemDictionary::Builder(dic_fn_).Build());
   ASSERT_TRUE(system_dic.get() != NULL)
       << "Failed to open dictionary source:" << dic_fn_;
   system_dic->PopulateReverseLookupCache(kDoraemon);
   CheckTokenExistenceCallback callback(&target_token);
   system_dic->LookupReverse(kDoraemon, &callback);
   EXPECT_TRUE(callback.found())
       << "Could not find " << PrintToken(source_token);
   system_dic->ClearReverseLookupCache();
 }

 TEST_F(SystemDictionaryTest, SpellingCorrectionTokens) {
   vector<Token> tokens(3);

   // "あぼがど"
   tokens[0].key = "\xe3\x81\x82\xe3\x81\xbc\xe3\x81\x8c\xe3\x81\xa9";
   // "アボカド"
   tokens[0].value = "\xe3\x82\xa2\xe3\x83\x9c\xe3\x82\xab\xe3\x83\x89";
   tokens[0].cost = 1;
   tokens[0].lid = 0;
   tokens[0].rid = 2;
   tokens[0].attributes = Token::SPELLING_CORRECTION;

   // "しゅみれーしょん"
   tokens[1].key =
       "\xe3\x81\x97\xe3\x82\x85\xe3\x81\xbf\xe3\x82\x8c"
       "\xe3\x83\xbc\xe3\x81\x97\xe3\x82\x87\xe3\x82\x93";
   // "シミュレーション"
   tokens[1].value =
       "\xe3\x82\xb7\xe3\x83\x9f\xe3\x83\xa5\xe3\x83\xac"
       "\xe3\x83\xbc\xe3\x82\xb7\xe3\x83\xa7\xe3\x83\xb3";
   tokens[1].cost = 1;
   tokens[1].lid = 100;
   tokens[1].rid = 3;
   tokens[1].attributes = Token::SPELLING_CORRECTION;

   // "あきはばら"
   tokens[2].key =
       "\xe3\x81\x82\xe3\x81\x8d\xe3\x81\xaf\xe3\x81\xb0\xe3\x82\x89";
   // "秋葉原"
   tokens[2].value = "\xe7\xa7\x8b\xe8\x91\x89\xe5\x8e\x9f";
   tokens[2].cost = 1000;
   tokens[2].lid = 1;
   tokens[2].rid = 2;

   vector<Token *> source_tokens;
   for (size_t i = 0; i < tokens.size(); ++i) {
     source_tokens.push_back(&tokens[i]);
   }
   BuildSystemDictionary(source_tokens, source_tokens.size());

   scoped_ptr<SystemDictionary> system_dic(
       SystemDictionary::Builder(dic_fn_).Build());
   ASSERT_TRUE(system_dic.get() != NULL)
       << "Failed to open dictionary source:" << dic_fn_;

   for (size_t i = 0; i < source_tokens.size(); ++i) {
     CheckTokenExistenceCallback callback(source_tokens[i]);
     system_dic->LookupPrefix(source_tokens[i]->key, false, &callback);
     EXPECT_TRUE(callback.found())
         << "Token " << i << " was not found: " << PrintToken(*source_tokens[i]);
   }
 }

 TEST_F(SystemDictionaryTest, EnableNoModifierTargetWithLoudsTrie) {
   // "かつ"
   const string k0 = "\xE3\x81\x8B\xE3\x81\xA4";
   // "かっこ"
   const string k1 = "\xE3\x81\x8B\xE3\x81\xA3\xE3\x81\x93";
   // "かつこう"
   const string k2 = "\xE3\x81\x8B\xE3\x81\xA4\xE3\x81\x93\xE3\x81\x86";
   // "かっこう"
   const string k3 = "\xE3\x81\x8B\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86";
   // "がっこう"
   const string k4 = "\xE3\x81\x8C\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86";

   scoped_ptr<Token> tokens[5];
   tokens[0].reset(CreateToken(k0, "aa"));
   tokens[1].reset(CreateToken(k1, "bb"));
   tokens[2].reset(CreateToken(k2, "cc"));
   tokens[3].reset(CreateToken(k3, "dd"));
   tokens[4].reset(CreateToken(k4, "ee"));

   vector<Token *> source_tokens;
   for (size_t i = 0; i < arraysize(tokens); ++i) {
     source_tokens.push_back(tokens[i].get());
   }
   text_dict_->CollectTokens(&source_tokens);
   BuildSystemDictionary(source_tokens, 100);

   scoped_ptr<SystemDictionary> system_dic(
       SystemDictionary::Builder(dic_fn_).Build());
   ASSERT_TRUE(system_dic.get() != NULL)
       << "Failed to open dictionary source:" << dic_fn_;

   // Prefix search
   for (size_t i = 0; i < arraysize(tokens); ++i) {
     CheckTokenExistenceCallback callback(tokens[i].get());
     // "かつこう" -> "かつ", "かっこ", "かつこう", "かっこう" and "がっこう"
     system_dic->LookupPrefix(
         k2, kEnableKanaModiferInsensitiveLookup, &callback);
     EXPECT_TRUE(callback.found())
         << "Token " << i << " was not found: " << PrintToken(*tokens[i]);
   }

   // Predictive searches
   {
     // "かつ" -> "かつ", "かっこ", "かつこう", "かっこう" and "がっこう"
     vector<Token *> expected;
     for (size_t i = 0; i < arraysize(tokens); ++i) {
       expected.push_back(tokens[i].get());
     }
     CheckMultiTokensExistenceCallback callback(expected);
     system_dic->LookupPredictive(
         k0, kEnableKanaModiferInsensitiveLookup, &callback);
     EXPECT_TRUE(callback.AreAllFound());
   }
   {
     // "かっこ" -> "かっこ", "かっこう" and "がっこう"
     vector<Token *> expected;
     expected.push_back(tokens[1].get());
     expected.push_back(tokens[3].get());
     expected.push_back(tokens[4].get());
     CheckMultiTokensExistenceCallback callback(expected);
     system_dic->LookupPredictive(
         k1, kEnableKanaModiferInsensitiveLookup, &callback);
     EXPECT_TRUE(callback.AreAllFound());
   }
 }

 TEST_F(SystemDictionaryTest, NoModifierForKanaEntries) {
   // "ていすてぃんぐ", "テイスティング"
   scoped_ptr<Token> t0(CreateToken(
       "\xe3\x81\xa6\xe3\x81\x84\xe3\x81\x99\xe3\x81\xa6"
       "\xe3\x81\x83\xe3\x82\x93\xe3\x81\x90",
       "\xe3\x83\x86\xe3\x82\xa4\xe3\x82\xb9\xe3\x83\x86"
       "\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0"));
   // "てすとです", "てすとです"
   scoped_ptr<Token> t1(CreateToken(
       "\xe3\x81\xa6\xe3\x81\x99\xe3\x81\xa8\xe3\x81\xa7\xe3\x81\x99",
       "\xe3\x81\xa6\xe3\x81\x99\xe3\x81\xa8\xe3\x81\xa7\xe3\x81\x99"));

   vector<Token *> source_tokens;
   source_tokens.push_back(t0.get());
   source_tokens.push_back(t1.get());

   text_dict_->CollectTokens(&source_tokens);
   BuildSystemDictionary(source_tokens, 100);

   scoped_ptr<SystemDictionary> system_dic(
       SystemDictionary::Builder(dic_fn_).Build());
   ASSERT_TRUE(system_dic.get() != NULL)
       << "Failed to open dictionary source:" << dic_fn_;

   // Lookup |t0| from "ていすていんぐ"
   const string k = "\xe3\x81\xa6\xe3\x81\x84\xe3\x81\x99\xe3\x81\xa6"
       "\xe3\x81\x84\xe3\x82\x93\xe3\x81\x90";
   CheckTokenExistenceCallback callback(t0.get());
   system_dic->LookupPrefix(k, kEnableKanaModiferInsensitiveLookup,
                            &callback);
   EXPECT_TRUE(callback.found()) << "Not found: " << PrintToken(*t0);
 }

 TEST_F(SystemDictionaryTest, DoNotReturnNoModifierTargetWithLoudsTrie) {
   // "かつ"
   const string k0 = "\xE3\x81\x8B\xE3\x81\xA4";
   // "かっこ"
   const string k1 = "\xE3\x81\x8B\xE3\x81\xA3\xE3\x81\x93";
   // "かつこう"
   const string k2 = "\xE3\x81\x8B\xE3\x81\xA4\xE3\x81\x93\xE3\x81\x86";
   // "かっこう"
   const string k3 = "\xE3\x81\x8B\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86";
   // "がっこう"
   const string k4 = "\xE3\x81\x8C\xE3\x81\xA3\xE3\x81\x93\xE3\x81\x86";

   scoped_ptr<Token> t0(CreateToken(k0, "aa"));
   scoped_ptr<Token> t1(CreateToken(k1, "bb"));
   scoped_ptr<Token> t2(CreateToken(k2, "cc"));
   scoped_ptr<Token> t3(CreateToken(k3, "dd"));
   scoped_ptr<Token> t4(CreateToken(k4, "ee"));

   vector<Token *> source_tokens;
   source_tokens.push_back(t0.get());
   source_tokens.push_back(t1.get());
   source_tokens.push_back(t2.get());
   source_tokens.push_back(t3.get());
   source_tokens.push_back(t4.get());

   text_dict_->CollectTokens(&source_tokens);
   BuildSystemDictionary(source_tokens, 100);

   scoped_ptr<SystemDictionary> system_dic(
       SystemDictionary::Builder(dic_fn_).Build());
   ASSERT_TRUE(system_dic.get() != NULL)
       << "Failed to open dictionary source:" << dic_fn_;

   // Prefix search
   {
     // "かっこう" (k3) -> "かっこ" (k1) and "かっこう" (k3)
     // Make sure "がっこう" is not in the results when searched by "かっこう"
     vector<Token *> to_be_looked_up, not_to_be_looked_up;
     to_be_looked_up.push_back(t1.get());
     to_be_looked_up.push_back(t3.get());
     not_to_be_looked_up.push_back(t0.get());
     not_to_be_looked_up.push_back(t2.get());
     not_to_be_looked_up.push_back(t4.get());
     for (size_t i = 0; i < to_be_looked_up.size(); ++i) {
       CheckTokenExistenceCallback callback(to_be_looked_up[i]);
       system_dic->LookupPrefix(
           k3, kDisableKanaModiferInsensitiveLookup, &callback);
       EXPECT_TRUE(callback.found())
           << "Token is not found: " << PrintToken(*to_be_looked_up[i]);
     }
     for (size_t i = 0; i < not_to_be_looked_up.size(); ++i) {
       CheckTokenExistenceCallback callback(not_to_be_looked_up[i]);
       system_dic->LookupPrefix(
           k3, kDisableKanaModiferInsensitiveLookup, &callback);
       EXPECT_FALSE(callback.found())
           << "Token should not be found: "
           << PrintToken(*not_to_be_looked_up[i]);
     }
   }

   // Predictive search
   {
     // "かっこ" -> "かっこ" and "かっこう"
     // Make sure "がっこう" is not in the results when searched by "かっこ"
     vector<Token *> to_be_looked_up, not_to_be_looked_up;
     to_be_looked_up.push_back(t1.get());
     to_be_looked_up.push_back(t3.get());
     not_to_be_looked_up.push_back(t0.get());
     not_to_be_looked_up.push_back(t2.get());
     not_to_be_looked_up.push_back(t4.get());
     for (size_t i = 0; i < to_be_looked_up.size(); ++i) {
       CheckTokenExistenceCallback callback(to_be_looked_up[i]);
       system_dic->LookupPredictive(
           k1, kDisableKanaModiferInsensitiveLookup, &callback);
       EXPECT_TRUE(callback.found())
           << "Token is not found: " << PrintToken(*to_be_looked_up[i]);
     }
     for (size_t i = 0; i < not_to_be_looked_up.size(); ++i) {
       CheckTokenExistenceCallback callback(not_to_be_looked_up[i]);
       system_dic->LookupPredictive(
           k3, kDisableKanaModiferInsensitiveLookup, &callback);
       EXPECT_FALSE(callback.found())
           << "Token should not be found: "
           << PrintToken(*not_to_be_looked_up[i]);
     }
   }
 }

 }  // namespace dictionary
 }  // namespace mozc