Suppress LangAwareRewriter to suggest the raw text if the query is in the dictionary as a key
This CL adds DictionaryInterface::HasKey with which LangAwareRewriter can determine when the raw text should be suggested.
Closes Issue 267.
BUG=Issue mozc:267
TEST=manually done on Ubuntu 14.04
git-svn-id: https://mozc.googlecode.com/svn/trunk@479 a6090854-d499-a067-5803-1114d4e51264
diff --git a/src/converter/immutable_converter_test.cc b/src/converter/immutable_converter_test.cc
index 5b982eb..b3b4c96 100644
--- a/src/converter/immutable_converter_test.cc
+++ b/src/converter/immutable_converter_test.cc
@@ -247,6 +247,7 @@
: target_query_(query), received_target_query_(false) {}
virtual ~KeyCheckDictionary() {}
+ virtual bool HasKey(StringPiece key) const { return false; }
virtual bool HasValue(StringPiece value) const { return false; }
virtual void LookupPredictive(
diff --git a/src/dictionary/dictionary_impl.cc b/src/dictionary/dictionary_impl.cc
index bb1b6d3..b6dd775 100644
--- a/src/dictionary/dictionary_impl.cc
+++ b/src/dictionary/dictionary_impl.cc
@@ -70,6 +70,15 @@
dics_.clear();
}
+bool DictionaryImpl::HasKey(StringPiece key) const {
+ for (size_t i = 0; i < dics_.size(); ++i) {
+ if (dics_[i]->HasKey(key)) {
+ return true;
+ }
+ }
+ return false;
+}
+
bool DictionaryImpl::HasValue(StringPiece value) const {
for (size_t i = 0; i < dics_.size(); ++i) {
if (dics_[i]->HasValue(value)) {
diff --git a/src/dictionary/dictionary_impl.h b/src/dictionary/dictionary_impl.h
index d1bb6c3..feb6231 100644
--- a/src/dictionary/dictionary_impl.h
+++ b/src/dictionary/dictionary_impl.h
@@ -62,6 +62,8 @@
virtual ~DictionaryImpl();
+ virtual bool HasKey(StringPiece key) const;
+
virtual bool HasValue(StringPiece value) const;
virtual void LookupPredictive(
diff --git a/src/dictionary/dictionary_interface.h b/src/dictionary/dictionary_interface.h
index 8c8e672..01b3721 100644
--- a/src/dictionary/dictionary_interface.h
+++ b/src/dictionary/dictionary_interface.h
@@ -112,6 +112,9 @@
virtual ~DictionaryInterface() {}
+ // Returns true if the dictionary has an entry for the given key.
+ virtual bool HasKey(StringPiece key) const = 0;
+
// Returns true if the dictionary has an entry for the given value.
virtual bool HasValue(StringPiece value) const = 0;
diff --git a/src/dictionary/dictionary_mock.cc b/src/dictionary/dictionary_mock.cc
index 80c2a12..7dc79e6 100644
--- a/src/dictionary/dictionary_mock.cc
+++ b/src/dictionary/dictionary_mock.cc
@@ -45,7 +45,21 @@
const int kDummyPosId = 1;
-bool HasValueInternal(const map<string, vector<Token *> > &dic,
+bool HasKeyInternal(const map<string, vector<Token *>> &dic, StringPiece key) {
+ typedef vector<Token *> TokenPtrVector;
+ for (map<string, vector<Token *> >::const_iterator map_it = dic.begin();
+ map_it != dic.end(); ++map_it) {
+ const TokenPtrVector &v = map_it->second;
+ for (TokenPtrVector::const_iterator it = v.begin(); it != v.end(); ++it) {
+ if ((*it)->key == key) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool HasValueInternal(const map<string, vector<Token *>> &dic,
StringPiece value) {
typedef vector<Token *> TokenPtrVector;
for (map<string, vector<Token *> >::const_iterator map_it = dic.begin();
@@ -92,6 +106,13 @@
DeletePtrs(&predictive_dictionary_);
}
+bool DictionaryMock::HasKey(StringPiece key) const {
+ return HasKeyInternal(predictive_dictionary_, key) ||
+ HasKeyInternal(prefix_dictionary_, key) ||
+ HasKeyInternal(reverse_dictionary_, key) ||
+ HasKeyInternal(exact_dictionary_, key);
+}
+
bool DictionaryMock::HasValue(StringPiece value) const {
return HasValueInternal(predictive_dictionary_, value) ||
HasValueInternal(prefix_dictionary_, value) ||
diff --git a/src/dictionary/dictionary_mock.h b/src/dictionary/dictionary_mock.h
index 5cc94f8..53e332c 100644
--- a/src/dictionary/dictionary_mock.h
+++ b/src/dictionary/dictionary_mock.h
@@ -71,6 +71,8 @@
DictionaryMock();
virtual ~DictionaryMock();
+ virtual bool HasKey(StringPiece key) const;
+
virtual bool HasValue(StringPiece value) const;
// DictionaryMock doesn't support a limitation. Note also that only the
diff --git a/src/dictionary/suffix_dictionary.cc b/src/dictionary/suffix_dictionary.cc
index 8643571..ee8424d 100644
--- a/src/dictionary/suffix_dictionary.cc
+++ b/src/dictionary/suffix_dictionary.cc
@@ -72,6 +72,15 @@
} // namespace
+bool SuffixDictionary::HasKey(StringPiece key) const {
+ // SuffixDictionary::HasKey() is never called and unnecessary to
+ // implement. To avoid accidental calls of this method, the method simply dies
+ // so that we can immediately notice this unimplemented method during
+ // development.
+ LOG(FATAL) << "bool SuffixDictionary::HasKey() is not implemented";
+ return false;
+}
+
bool SuffixDictionary::HasValue(StringPiece value) const {
// SuffixDictionary::HasValue() is never called and unnecessary to
// implement. To avoid accidental calls of this method, the method simply dies
diff --git a/src/dictionary/suffix_dictionary.h b/src/dictionary/suffix_dictionary.h
index dd34535..e739f3b 100644
--- a/src/dictionary/suffix_dictionary.h
+++ b/src/dictionary/suffix_dictionary.h
@@ -55,6 +55,7 @@
size_t suffix_tokens_size);
virtual ~SuffixDictionary();
+ virtual bool HasKey(StringPiece key) const;
virtual bool HasValue(StringPiece value) const;
// Kana modifier insensitive lookup is not supported.
diff --git a/src/dictionary/system/system_dictionary.cc b/src/dictionary/system/system_dictionary.cc
index 539847d..545ac3d 100644
--- a/src/dictionary/system/system_dictionary.cc
+++ b/src/dictionary/system/system_dictionary.cc
@@ -657,6 +657,12 @@
new ReverseLookupIndex(codec_, token_array_.get()));
}
+bool SystemDictionary::HasKey(StringPiece key) const {
+ string encoded_key;
+ codec_->EncodeKey(key, &encoded_key);
+ return (key_trie_->ExactSearch(encoded_key) != -1);
+}
+
bool SystemDictionary::HasValue(StringPiece value) const {
string encoded_value;
codec_->EncodeValue(value, &encoded_value);
diff --git a/src/dictionary/system/system_dictionary.h b/src/dictionary/system/system_dictionary.h
index f138951..bc5b3a6 100644
--- a/src/dictionary/system/system_dictionary.h
+++ b/src/dictionary/system/system_dictionary.h
@@ -140,6 +140,7 @@
const char *ptr, int len, Options options);
// Implementation of DictionaryInterface.
+ virtual bool HasKey(StringPiece key) const;
virtual bool HasValue(StringPiece value) const;
// Predictive lookup
diff --git a/src/dictionary/system/value_dictionary.cc b/src/dictionary/system/value_dictionary.cc
index fe6c33f..dba5543 100644
--- a/src/dictionary/system/value_dictionary.cc
+++ b/src/dictionary/system/value_dictionary.cc
@@ -108,6 +108,14 @@
}
// ValueDictionary is supposed to use the same data with SystemDictionary
+// and SystemDictionary::HasKey should return the same result with
+// ValueDictionary::HasKey. So we can skip the actual logic of HasKey
+// and return just false.
+bool ValueDictionary::HasKey(StringPiece key) const {
+ return false;
+}
+
+// ValueDictionary is supposed to use the same data with SystemDictionary
// and SystemDictionary::HasValue should return the same result with
// ValueDictionary::HasValue. So we can skip the actual logic of HasValue
// and return just false.
diff --git a/src/dictionary/system/value_dictionary.h b/src/dictionary/system/value_dictionary.h
index 5c5a4e7..5f002af 100644
--- a/src/dictionary/system/value_dictionary.h
+++ b/src/dictionary/system/value_dictionary.h
@@ -69,6 +69,7 @@
const POSMatcher& pos_matcher, const char *ptr, int len);
// Implementation of DictionaryInterface
+ virtual bool HasKey(StringPiece key) const;
virtual bool HasValue(StringPiece value) const;
virtual void LookupPredictive(
StringPiece key, bool use_kana_modifier_insensitive_lookup,
diff --git a/src/dictionary/user_dictionary.cc b/src/dictionary/user_dictionary.cc
index 4d1bbd4..3cbd8ed 100644
--- a/src/dictionary/user_dictionary.cc
+++ b/src/dictionary/user_dictionary.cc
@@ -291,6 +291,13 @@
delete tokens_;
}
+bool UserDictionary::HasKey(StringPiece key) const {
+ // TODO(noriyukit): Currently, we don't support HasKey() for user dictionary
+ // because we need to search tokens linearly, which might be slow in extreme
+ // cases where 100K entries exist.
+ return false;
+}
+
bool UserDictionary::HasValue(StringPiece value) const {
// TODO(noriyukit): Currently, we don't support HasValue() for user dictionary
// because we need to search tokens linearly, which might be slow in extreme
diff --git a/src/dictionary/user_dictionary.h b/src/dictionary/user_dictionary.h
index 9f38e57..f4a43fa 100644
--- a/src/dictionary/user_dictionary.h
+++ b/src/dictionary/user_dictionary.h
@@ -55,6 +55,7 @@
SuppressionDictionary *suppression_dictionary);
virtual ~UserDictionary();
+ virtual bool HasKey(StringPiece key) const;
virtual bool HasValue(StringPiece value) const;
// Lookup methods don't support kana modifier insensitive lookup, i.e.,
// Callback::OnActualKey() is never called.
diff --git a/src/dictionary/user_dictionary_stub.h b/src/dictionary/user_dictionary_stub.h
index 9ccf60a..07f7e02 100644
--- a/src/dictionary/user_dictionary_stub.h
+++ b/src/dictionary/user_dictionary_stub.h
@@ -37,6 +37,9 @@
class UserDictionaryStub : public DictionaryInterface {
public:
+ virtual bool HasKey(StringPiece key) const {
+ return false;
+ }
virtual bool HasValue(StringPiece value) const {
return false;
}
diff --git a/src/mozc_version_template.txt b/src/mozc_version_template.txt
index b6b4a66..d31c95d 100644
--- a/src/mozc_version_template.txt
+++ b/src/mozc_version_template.txt
@@ -1,6 +1,6 @@
MAJOR=2
MINOR=16
-BUILD=2012
+BUILD=2013
REVISION=102
# NACL_DICTIONARY_VERSION is the target version of the system dictionary to be
# downloaded by NaCl Mozc.
diff --git a/src/prediction/dictionary_predictor_test.cc b/src/prediction/dictionary_predictor_test.cc
index 3c7c9b1..cf0cec9 100644
--- a/src/prediction/dictionary_predictor_test.cc
+++ b/src/prediction/dictionary_predictor_test.cc
@@ -283,6 +283,8 @@
CallCheckDictionary() {}
virtual ~CallCheckDictionary() {}
+ MOCK_CONST_METHOD1(HasKey,
+ bool(StringPiece));
MOCK_CONST_METHOD1(HasValue,
bool(StringPiece));
MOCK_CONST_METHOD3(LookupPredictive,
@@ -1690,6 +1692,10 @@
TestSuffixDictionary() {}
virtual ~TestSuffixDictionary() {}
+ virtual bool HasKey(StringPiece value) const {
+ return false;
+ }
+
virtual bool HasValue(StringPiece value) const {
return false;
}
diff --git a/src/rewriter/language_aware_rewriter.cc b/src/rewriter/language_aware_rewriter.cc
index 4fee439..8f05e47 100644
--- a/src/rewriter/language_aware_rewriter.cc
+++ b/src/rewriter/language_aware_rewriter.cc
@@ -130,11 +130,16 @@
return true;
}
+ // If the composition is storead as a key in the dictionary like
+ // "はな" (hana), "たけ" (take), the query is not handled as a raw query.
+ // It is a little conservative, but a safer way.
+ if (dictionary->HasKey(key)) {
+ return false;
+ }
+
// If the input text is stored in the dictionary, it is perhaps a raw query.
// For example, the input characters of "れもヴぇ" (remove) is in the
- // dictionary, so it is treated as a raw text. This logic is a little
- // aggressive because "たけ" (take), "ほうせ" (house) and so forth are also
- // treated as raw texts.
+ // dictionary, so it is treated as a raw text.
if (dictionary->HasValue(raw_text)) {
*rank = 2;
return true;
diff --git a/src/rewriter/language_aware_rewriter_test.cc b/src/rewriter/language_aware_rewriter_test.cc
index 4d6b423..1249be2 100644
--- a/src/rewriter/language_aware_rewriter_test.cc
+++ b/src/rewriter/language_aware_rewriter_test.cc
@@ -156,6 +156,12 @@
dictionary_mock_->AddLookupExact("house", "house", "house", Token::NONE);
dictionary_mock_->AddLookupExact("query", "query", "query", Token::NONE);
dictionary_mock_->AddLookupExact("google", "google", "google", Token::NONE);
+ dictionary_mock_->AddLookupExact("naru", "naru", "naru", Token::NONE);
+ // "なる"
+ dictionary_mock_->AddLookupExact("\xE3\x81\xAA\xE3\x82\x8B",
+ "\xE3\x81\xAA\xE3\x82\x8B",
+ "naru",
+ Token::NONE);
scoped_ptr<LanguageAwareRewriter> rewriter(CreateLanguageAwareRewriter());
@@ -258,6 +264,19 @@
&composition, &segments));
EXPECT_EQ("google", composition);
}
+
+ {
+ // The key "なる" has two value "naru" and "なる".
+ // In this case, language aware rewriter should not be triggered.
+ string composition;
+ Segments segments;
+ EXPECT_FALSE(RewriteWithLanguageAwareInput(rewriter.get(), "naru",
+ &composition, &segments));
+
+ // "なる"
+ EXPECT_EQ("\xE3\x81\xAA\xE3\x82\x8B", composition);
+ EXPECT_EQ(0, segments.conversion_segment(0).candidates_size());
+ }
}
TEST_F(LanguageAwareRewriterTest, LanguageAwareInputUsageStats) {