Suppress LangAwareRewriter to suggest the raw text if the query is in the dictionary as a key

This CL adds DictionaryInterface::HasKey with which LangAwareRewriter can determine when the raw text should be suggested.

Closes Issue 267.

BUG=Issue mozc:267
TEST=manually done on Ubuntu 14.04

git-svn-id: https://mozc.googlecode.com/svn/trunk@479 a6090854-d499-a067-5803-1114d4e51264
diff --git a/src/converter/immutable_converter_test.cc b/src/converter/immutable_converter_test.cc
index 5b982eb..b3b4c96 100644
--- a/src/converter/immutable_converter_test.cc
+++ b/src/converter/immutable_converter_test.cc
@@ -247,6 +247,7 @@
       : target_query_(query), received_target_query_(false) {}
   virtual ~KeyCheckDictionary() {}
 
+  virtual bool HasKey(StringPiece key) const { return false; }
   virtual bool HasValue(StringPiece value) const { return false; }
 
   virtual void LookupPredictive(
diff --git a/src/dictionary/dictionary_impl.cc b/src/dictionary/dictionary_impl.cc
index bb1b6d3..b6dd775 100644
--- a/src/dictionary/dictionary_impl.cc
+++ b/src/dictionary/dictionary_impl.cc
@@ -70,6 +70,15 @@
   dics_.clear();
 }
 
+bool DictionaryImpl::HasKey(StringPiece key) const {
+  for (size_t i = 0; i < dics_.size(); ++i) {
+    if (dics_[i]->HasKey(key)) {
+      return true;
+    }
+  }
+  return false;
+}
+
 bool DictionaryImpl::HasValue(StringPiece value) const {
   for (size_t i = 0; i < dics_.size(); ++i) {
     if (dics_[i]->HasValue(value)) {
diff --git a/src/dictionary/dictionary_impl.h b/src/dictionary/dictionary_impl.h
index d1bb6c3..feb6231 100644
--- a/src/dictionary/dictionary_impl.h
+++ b/src/dictionary/dictionary_impl.h
@@ -62,6 +62,8 @@
 
   virtual ~DictionaryImpl();
 
+  virtual bool HasKey(StringPiece key) const;
+
   virtual bool HasValue(StringPiece value) const;
 
   virtual void LookupPredictive(
diff --git a/src/dictionary/dictionary_interface.h b/src/dictionary/dictionary_interface.h
index 8c8e672..01b3721 100644
--- a/src/dictionary/dictionary_interface.h
+++ b/src/dictionary/dictionary_interface.h
@@ -112,6 +112,9 @@
 
   virtual ~DictionaryInterface() {}
 
+  // Returns true if the dictionary has an entry for the given key.
+  virtual bool HasKey(StringPiece key) const = 0;
+
   // Returns true if the dictionary has an entry for the given value.
   virtual bool HasValue(StringPiece value) const = 0;
 
diff --git a/src/dictionary/dictionary_mock.cc b/src/dictionary/dictionary_mock.cc
index 80c2a12..7dc79e6 100644
--- a/src/dictionary/dictionary_mock.cc
+++ b/src/dictionary/dictionary_mock.cc
@@ -45,7 +45,21 @@
 
 const int kDummyPosId = 1;
 
-bool HasValueInternal(const map<string, vector<Token *> > &dic,
+bool HasKeyInternal(const map<string, vector<Token *>> &dic, StringPiece key) {
+  typedef vector<Token *> TokenPtrVector;
+  for (map<string, vector<Token *> >::const_iterator map_it = dic.begin();
+       map_it != dic.end(); ++map_it) {
+    const TokenPtrVector &v = map_it->second;
+    for (TokenPtrVector::const_iterator it = v.begin(); it != v.end(); ++it) {
+      if ((*it)->key == key) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+bool HasValueInternal(const map<string, vector<Token *>> &dic,
                       StringPiece value) {
   typedef vector<Token *> TokenPtrVector;
   for (map<string, vector<Token *> >::const_iterator map_it = dic.begin();
@@ -92,6 +106,13 @@
   DeletePtrs(&predictive_dictionary_);
 }
 
+bool DictionaryMock::HasKey(StringPiece key) const {
+  return HasKeyInternal(predictive_dictionary_, key) ||
+         HasKeyInternal(prefix_dictionary_, key) ||
+         HasKeyInternal(reverse_dictionary_, key) ||
+         HasKeyInternal(exact_dictionary_, key);
+}
+
 bool DictionaryMock::HasValue(StringPiece value) const {
   return HasValueInternal(predictive_dictionary_, value) ||
          HasValueInternal(prefix_dictionary_, value) ||
diff --git a/src/dictionary/dictionary_mock.h b/src/dictionary/dictionary_mock.h
index 5cc94f8..53e332c 100644
--- a/src/dictionary/dictionary_mock.h
+++ b/src/dictionary/dictionary_mock.h
@@ -71,6 +71,8 @@
   DictionaryMock();
   virtual ~DictionaryMock();
 
+  virtual bool HasKey(StringPiece key) const;
+
   virtual bool HasValue(StringPiece value) const;
 
   // DictionaryMock doesn't support a limitation.  Note also that only the
diff --git a/src/dictionary/suffix_dictionary.cc b/src/dictionary/suffix_dictionary.cc
index 8643571..ee8424d 100644
--- a/src/dictionary/suffix_dictionary.cc
+++ b/src/dictionary/suffix_dictionary.cc
@@ -72,6 +72,15 @@
 
 }  // namespace
 
+bool SuffixDictionary::HasKey(StringPiece key) const {
+  // SuffixDictionary::HasKey() is never called and unnecessary to
+  // implement. To avoid accidental calls of this method, the method simply dies
+  // so that we can immediately notice this unimplemented method during
+  // development.
+  LOG(FATAL) << "bool SuffixDictionary::HasKey() is not implemented";
+  return false;
+}
+
 bool SuffixDictionary::HasValue(StringPiece value) const {
   // SuffixDictionary::HasValue() is never called and unnecessary to
   // implement. To avoid accidental calls of this method, the method simply dies
diff --git a/src/dictionary/suffix_dictionary.h b/src/dictionary/suffix_dictionary.h
index dd34535..e739f3b 100644
--- a/src/dictionary/suffix_dictionary.h
+++ b/src/dictionary/suffix_dictionary.h
@@ -55,6 +55,7 @@
                    size_t suffix_tokens_size);
   virtual ~SuffixDictionary();
 
+  virtual bool HasKey(StringPiece key) const;
   virtual bool HasValue(StringPiece value) const;
 
   // Kana modifier insensitive lookup is not supported.
diff --git a/src/dictionary/system/system_dictionary.cc b/src/dictionary/system/system_dictionary.cc
index 539847d..545ac3d 100644
--- a/src/dictionary/system/system_dictionary.cc
+++ b/src/dictionary/system/system_dictionary.cc
@@ -657,6 +657,12 @@
       new ReverseLookupIndex(codec_, token_array_.get()));
 }
 
+bool SystemDictionary::HasKey(StringPiece key) const {
+  string encoded_key;
+  codec_->EncodeKey(key, &encoded_key);
+  return (key_trie_->ExactSearch(encoded_key) != -1);
+}
+
 bool SystemDictionary::HasValue(StringPiece value) const {
   string encoded_value;
   codec_->EncodeValue(value, &encoded_value);
diff --git a/src/dictionary/system/system_dictionary.h b/src/dictionary/system/system_dictionary.h
index f138951..bc5b3a6 100644
--- a/src/dictionary/system/system_dictionary.h
+++ b/src/dictionary/system/system_dictionary.h
@@ -140,6 +140,7 @@
       const char *ptr, int len, Options options);
 
   // Implementation of DictionaryInterface.
+  virtual bool HasKey(StringPiece key) const;
   virtual bool HasValue(StringPiece value) const;
 
   // Predictive lookup
diff --git a/src/dictionary/system/value_dictionary.cc b/src/dictionary/system/value_dictionary.cc
index fe6c33f..dba5543 100644
--- a/src/dictionary/system/value_dictionary.cc
+++ b/src/dictionary/system/value_dictionary.cc
@@ -108,6 +108,14 @@
 }
 
 // ValueDictionary is supposed to use the same data with SystemDictionary
+// and SystemDictionary::HasKey should return the same result with
+// ValueDictionary::HasKey.  So we can skip the actual logic of HasKey
+// and return just false.
+bool ValueDictionary::HasKey(StringPiece key) const {
+  return false;
+}
+
+// ValueDictionary is supposed to use the same data with SystemDictionary
 // and SystemDictionary::HasValue should return the same result with
 // ValueDictionary::HasValue.  So we can skip the actual logic of HasValue
 // and return just false.
diff --git a/src/dictionary/system/value_dictionary.h b/src/dictionary/system/value_dictionary.h
index 5c5a4e7..5f002af 100644
--- a/src/dictionary/system/value_dictionary.h
+++ b/src/dictionary/system/value_dictionary.h
@@ -69,6 +69,7 @@
       const POSMatcher& pos_matcher, const char *ptr, int len);
 
   // Implementation of DictionaryInterface
+  virtual bool HasKey(StringPiece key) const;
   virtual bool HasValue(StringPiece value) const;
   virtual void LookupPredictive(
       StringPiece key, bool use_kana_modifier_insensitive_lookup,
diff --git a/src/dictionary/user_dictionary.cc b/src/dictionary/user_dictionary.cc
index 4d1bbd4..3cbd8ed 100644
--- a/src/dictionary/user_dictionary.cc
+++ b/src/dictionary/user_dictionary.cc
@@ -291,6 +291,13 @@
   delete tokens_;
 }
 
+bool UserDictionary::HasKey(StringPiece key) const {
+  // TODO(noriyukit): Currently, we don't support HasKey() for user dictionary
+  // because we need to search tokens linearly, which might be slow in extreme
+  // cases where 100K entries exist.
+  return false;
+}
+
 bool UserDictionary::HasValue(StringPiece value) const {
   // TODO(noriyukit): Currently, we don't support HasValue() for user dictionary
   // because we need to search tokens linearly, which might be slow in extreme
diff --git a/src/dictionary/user_dictionary.h b/src/dictionary/user_dictionary.h
index 9f38e57..f4a43fa 100644
--- a/src/dictionary/user_dictionary.h
+++ b/src/dictionary/user_dictionary.h
@@ -55,6 +55,7 @@
                  SuppressionDictionary *suppression_dictionary);
   virtual ~UserDictionary();
 
+  virtual bool HasKey(StringPiece key) const;
   virtual bool HasValue(StringPiece value) const;
   // Lookup methods don't support kana modifier insensitive lookup, i.e.,
   // Callback::OnActualKey() is never called.
diff --git a/src/dictionary/user_dictionary_stub.h b/src/dictionary/user_dictionary_stub.h
index 9ccf60a..07f7e02 100644
--- a/src/dictionary/user_dictionary_stub.h
+++ b/src/dictionary/user_dictionary_stub.h
@@ -37,6 +37,9 @@
 
 class UserDictionaryStub : public DictionaryInterface {
  public:
+  virtual bool HasKey(StringPiece key) const {
+    return false;
+  }
   virtual bool HasValue(StringPiece value) const {
     return false;
   }
diff --git a/src/mozc_version_template.txt b/src/mozc_version_template.txt
index b6b4a66..d31c95d 100644
--- a/src/mozc_version_template.txt
+++ b/src/mozc_version_template.txt
@@ -1,6 +1,6 @@
 MAJOR=2
 MINOR=16
-BUILD=2012
+BUILD=2013
 REVISION=102
 # NACL_DICTIONARY_VERSION is the target version of the system dictionary to be
 # downloaded by NaCl Mozc.
diff --git a/src/prediction/dictionary_predictor_test.cc b/src/prediction/dictionary_predictor_test.cc
index 3c7c9b1..cf0cec9 100644
--- a/src/prediction/dictionary_predictor_test.cc
+++ b/src/prediction/dictionary_predictor_test.cc
@@ -283,6 +283,8 @@
   CallCheckDictionary() {}
   virtual ~CallCheckDictionary() {}
 
+  MOCK_CONST_METHOD1(HasKey,
+                     bool(StringPiece));
   MOCK_CONST_METHOD1(HasValue,
                      bool(StringPiece));
   MOCK_CONST_METHOD3(LookupPredictive,
@@ -1690,6 +1692,10 @@
   TestSuffixDictionary() {}
   virtual ~TestSuffixDictionary() {}
 
+  virtual bool HasKey(StringPiece value) const {
+    return false;
+  }
+
   virtual bool HasValue(StringPiece value) const {
     return false;
   }
diff --git a/src/rewriter/language_aware_rewriter.cc b/src/rewriter/language_aware_rewriter.cc
index 4fee439..8f05e47 100644
--- a/src/rewriter/language_aware_rewriter.cc
+++ b/src/rewriter/language_aware_rewriter.cc
@@ -130,11 +130,16 @@
     return true;
   }
 
+  // If the composition is storead as a key in the dictionary like
+  // "はな" (hana), "たけ" (take), the query is not handled as a raw query.
+  // It is a little conservative, but a safer way.
+  if (dictionary->HasKey(key)) {
+    return false;
+  }
+
   // If the input text is stored in the dictionary, it is perhaps a raw query.
   // For example, the input characters of "れもヴぇ" (remove) is in the
-  // dictionary, so it is treated as a raw text.  This logic is a little
-  // aggressive because "たけ" (take), "ほうせ" (house) and so forth are also
-  // treated as raw texts.
+  // dictionary, so it is treated as a raw text.
   if (dictionary->HasValue(raw_text)) {
     *rank = 2;
     return true;
diff --git a/src/rewriter/language_aware_rewriter_test.cc b/src/rewriter/language_aware_rewriter_test.cc
index 4d6b423..1249be2 100644
--- a/src/rewriter/language_aware_rewriter_test.cc
+++ b/src/rewriter/language_aware_rewriter_test.cc
@@ -156,6 +156,12 @@
   dictionary_mock_->AddLookupExact("house", "house", "house", Token::NONE);
   dictionary_mock_->AddLookupExact("query", "query", "query", Token::NONE);
   dictionary_mock_->AddLookupExact("google", "google", "google", Token::NONE);
+  dictionary_mock_->AddLookupExact("naru", "naru", "naru", Token::NONE);
+  // "なる"
+  dictionary_mock_->AddLookupExact("\xE3\x81\xAA\xE3\x82\x8B",
+                                   "\xE3\x81\xAA\xE3\x82\x8B",
+                                   "naru",
+                                   Token::NONE);
 
   scoped_ptr<LanguageAwareRewriter> rewriter(CreateLanguageAwareRewriter());
 
@@ -258,6 +264,19 @@
                                                &composition, &segments));
     EXPECT_EQ("google", composition);
   }
+
+  {
+    // The key "なる" has two value "naru" and "なる".
+    // In this case, language aware rewriter should not be triggered.
+    string composition;
+    Segments segments;
+    EXPECT_FALSE(RewriteWithLanguageAwareInput(rewriter.get(), "naru",
+                                               &composition, &segments));
+
+    // "なる"
+    EXPECT_EQ("\xE3\x81\xAA\xE3\x82\x8B", composition);
+    EXPECT_EQ(0, segments.conversion_segment(0).candidates_size());
+  }
 }
 
 TEST_F(LanguageAwareRewriterTest, LanguageAwareInputUsageStats) {