blob: c0892b5116448772e2872fa66e9f42cd7d85382c [file] [log] [blame]
// Copyright 2010-2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "rewriter/language_aware_rewriter.h"
#include <string>
#include "base/logging.h"
#include "base/scoped_ptr.h"
#include "base/system_util.h"
#include "base/util.h"
#include "composer/composer.h"
#include "composer/table.h"
#include "config/config.pb.h"
#include "config/config_handler.h"
#include "converter/conversion_request.h"
#include "converter/segments.h"
#ifdef MOZC_USE_PACKED_DICTIONARY
#include "data_manager/packed/packed_data_manager.h"
#include "data_manager/packed/packed_data_mock.h"
#endif // MOZC_USE_PACKED_DICTIONARY
#include "data_manager/user_pos_manager.h"
#include "dictionary/dictionary_mock.h"
#include "dictionary/pos_matcher.h"
#include "session/commands.pb.h"
#include "testing/base/public/gunit.h"
#include "usage_stats/usage_stats.h"
#include "usage_stats/usage_stats_testing_util.h"
DECLARE_string(test_tmpdir);
namespace mozc {
namespace {
void InsertASCIISequence(const string &text, composer::Composer *composer) {
for (size_t i = 0; i < text.size(); ++i) {
commands::KeyEvent key;
key.set_key_code(text[i]);
composer->InsertCharacterKeyEvent(key);
}
}
} // namespace
class LanguageAwareRewriterTest : public testing::Test {
protected:
// Workaround for C2512 error (no default appropriate constructor) on MSVS.
LanguageAwareRewriterTest() {}
virtual ~LanguageAwareRewriterTest() {}
virtual void SetUp() {
usage_stats::UsageStats::ClearAllStatsForTest();
#ifdef MOZC_USE_PACKED_DICTIONARY
// Registers mocked PackedDataManager.
scoped_ptr<packed::PackedDataManager>
data_manager(new packed::PackedDataManager());
CHECK(data_manager->Init(string(kPackedSystemDictionary_data,
kPackedSystemDictionary_size)));
packed::RegisterPackedDataManager(data_manager.release());
#endif // MOZC_USE_PACKED_DICTIONARY
SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);
config::ConfigHandler::GetDefaultConfig(&default_config_);
config::ConfigHandler::SetConfig(default_config_);
dictionary_mock_.reset(new DictionaryMock);
}
virtual void TearDown() {
config::ConfigHandler::GetDefaultConfig(&default_config_);
config::ConfigHandler::SetConfig(default_config_);
#ifdef MOZC_USE_PACKED_DICTIONARY
// Unregisters mocked PackedDataManager.
packed::RegisterPackedDataManager(NULL);
#endif // MOZC_USE_PACKED_DICTIONARY
dictionary_mock_.reset(NULL);
usage_stats::UsageStats::ClearAllStatsForTest();
}
LanguageAwareRewriter *CreateLanguageAwareRewriter() const {
return new LanguageAwareRewriter(
*UserPosManager::GetUserPosManager()->GetPOSMatcher(),
dictionary_mock_.get());
}
scoped_ptr<DictionaryMock> dictionary_mock_;
usage_stats::scoped_usage_stats_enabler usage_stats_enabler_;
private:
config::Config default_config_;
};
namespace {
bool RewriteWithLanguageAwareInput(const LanguageAwareRewriter *rewriter,
const string &key,
string *composition,
Segments *segments) {
commands::Request client_request;
client_request.set_language_aware_input(
commands::Request::LANGUAGE_AWARE_SUGGESTION);
composer::Table table;
config::Config default_config;
table.InitializeWithRequestAndConfig(client_request, default_config);
composer::Composer composer(&table, &client_request);
InsertASCIISequence(key, &composer);
composer.GetStringForPreedit(composition);
// Perform the rewrite command.
segments->set_request_type(Segments::SUGGESTION);
if (segments->conversion_segments_size() == 0) {
segments->add_segment();
}
Segment *segment = segments->mutable_conversion_segment(0);
segment->set_key(*composition);
ConversionRequest request(&composer, &client_request);
return rewriter->Rewrite(request, segments);
}
void PushFrontCandidate(const string &data, Segment *segment) {
Segment::Candidate *candidate = segment->push_front_candidate();
candidate->Init();
candidate->value = data;
candidate->key = data;
candidate->content_value = data;
candidate->content_key = data;
}
} // namespace
TEST_F(LanguageAwareRewriterTest, LanguageAwareInput) {
dictionary_mock_->AddLookupExact("house", "house", "house", Token::NONE);
dictionary_mock_->AddLookupExact("query", "query", "query", Token::NONE);
dictionary_mock_->AddLookupExact("google", "google", "google", Token::NONE);
dictionary_mock_->AddLookupExact("naru", "naru", "naru", Token::NONE);
// "なる"
dictionary_mock_->AddLookupExact("\xE3\x81\xAA\xE3\x82\x8B",
"\xE3\x81\xAA\xE3\x82\x8B",
"naru",
Token::NONE);
scoped_ptr<LanguageAwareRewriter> rewriter(CreateLanguageAwareRewriter());
const string &kPrefix = "\xE2\x86\x92 "; // "→ "
const string &kDidYouMean =
// "もしかして"
"\xE3\x82\x82\xE3\x81\x97\xE3\x81\x8B\xE3\x81\x97\xE3\x81\xA6";
{
// "python" is composed to "pyてょn", but "python" should be suggested,
// because alphabet characters are in the middle of the word.
string composition;
Segments segments;
EXPECT_TRUE(RewriteWithLanguageAwareInput(rewriter.get(), "python",
&composition, &segments));
// "pyてょn"
EXPECT_EQ("\xEF\xBD\x90\xEF\xBD\x99\xE3\x81\xA6\xE3\x82\x87\xEF\xBD\x8E",
composition);
const Segment::Candidate &candidate =
segments.conversion_segment(0).candidate(0);
EXPECT_EQ("python", candidate.key);
EXPECT_EQ("python", candidate.value);
EXPECT_EQ(kPrefix, candidate.prefix);
EXPECT_EQ(kDidYouMean, candidate.description);
}
{
// "mozuk" is composed to "もずk", then "mozuk" is not suggested.
// The tailing alphabet characters are not counted.
string composition;
Segments segments;
EXPECT_FALSE(RewriteWithLanguageAwareInput(rewriter.get(), "mozuk",
&composition, &segments));
// "もずk"
EXPECT_EQ("\xE3\x82\x82\xE3\x81\x9A\xEF\xBD\x8B", composition);
EXPECT_EQ(0, segments.conversion_segment(0).candidates_size());
}
{
// "house" is composed to "ほうせ". Since "house" is in the dictionary
// dislike the above "mozuk" case, "house" should be suggested.
string composition;
Segments segments;
if (segments.conversion_segments_size() == 0) {
segments.add_segment();
}
Segment *segment = segments.mutable_conversion_segment(0);
// Add three candidates.
// => ["cand0", "cand1", "cand2"]
PushFrontCandidate("cand2", segment);
PushFrontCandidate("cand1", segment);
PushFrontCandidate("cand0", segment);
EXPECT_EQ(3, segment->candidates_size());
// "house" should be inserted as the 3rd candidate (b/w cand1 and cand2).
// => ["cand0", "cand1", "house", "cand2"]
EXPECT_TRUE(RewriteWithLanguageAwareInput(rewriter.get(), "house",
&composition, &segments));
EXPECT_EQ(4, segment->candidates_size());
// "ほうせ"
EXPECT_EQ("\xE3\x81\xBB\xE3\x81\x86\xE3\x81\x9B", composition);
const Segment::Candidate &candidate =
segments.conversion_segment(0).candidate(2);
EXPECT_EQ("house", candidate.key);
EXPECT_EQ("house", candidate.value);
EXPECT_EQ(kPrefix, candidate.prefix);
EXPECT_EQ(kDidYouMean, candidate.description);
}
{
// "query" is composed to "くえry". Since "query" is in the dictionary
// dislike the above "mozuk" case, "query" should be suggested.
string composition;
Segments segments;
EXPECT_TRUE(RewriteWithLanguageAwareInput(rewriter.get(), "query",
&composition, &segments));
// "くえry"
EXPECT_EQ("\xE3\x81\x8F\xE3\x81\x88\xEF\xBD\x92\xEF\xBD\x99", composition);
const Segment::Candidate &candidate =
segments.conversion_segment(0).candidate(0);
EXPECT_EQ("query", candidate.key);
EXPECT_EQ("query", candidate.value);
EXPECT_EQ(kPrefix, candidate.prefix);
EXPECT_EQ(kDidYouMean, candidate.description);
}
{
// "google" is composed to "google" by mode_switching_handler.
// If the suggestion is equal to the composition, that suggestion
// is not added.
string composition;
Segments segments;
EXPECT_FALSE(RewriteWithLanguageAwareInput(rewriter.get(), "google",
&composition, &segments));
EXPECT_EQ("google", composition);
}
{
// The key "なる" has two value "naru" and "なる".
// In this case, language aware rewriter should not be triggered.
string composition;
Segments segments;
EXPECT_FALSE(RewriteWithLanguageAwareInput(rewriter.get(), "naru",
&composition, &segments));
// "なる"
EXPECT_EQ("\xE3\x81\xAA\xE3\x82\x8B", composition);
EXPECT_EQ(0, segments.conversion_segment(0).candidates_size());
}
}
TEST_F(LanguageAwareRewriterTest, LanguageAwareInputUsageStats) {
scoped_ptr<LanguageAwareRewriter> rewriter(CreateLanguageAwareRewriter());
EXPECT_STATS_NOT_EXIST("LanguageAwareSuggestionTriggered");
EXPECT_STATS_NOT_EXIST("LanguageAwareSuggestionCommitted");
const string kPyTeyoN =
// "pyてょn"
"\xEF\xBD\x90\xEF\xBD\x99\xE3\x81\xA6\xE3\x82\x87\xEF\xBD\x8E";
{
// "python" is composed to "pyてょn", but "python" should be suggested,
// because alphabet characters are in the middle of the word.
string composition;
Segments segments;
EXPECT_TRUE(RewriteWithLanguageAwareInput(rewriter.get(), "python",
&composition, &segments));
EXPECT_EQ(kPyTeyoN, composition);
const Segment::Candidate &candidate =
segments.conversion_segment(0).candidate(0);
EXPECT_EQ("python", candidate.key);
EXPECT_EQ("python", candidate.value);
EXPECT_COUNT_STATS("LanguageAwareSuggestionTriggered", 1);
EXPECT_STATS_NOT_EXIST("LanguageAwareSuggestionCommitted");
}
{
// Call Rewrite with "python" again, then call Finish. Both ...Triggered
// and ...Committed should be incremented.
// Note, RewriteWithLanguageAwareInput is not used here, because
// Finish also requires ConversionRequest.
string composition;
Segments segments;
commands::Request client_request;
client_request.set_language_aware_input(
commands::Request::LANGUAGE_AWARE_SUGGESTION);
composer::Table table;
config::Config default_config;
table.InitializeWithRequestAndConfig(client_request, default_config);
composer::Composer composer(&table, &client_request);
InsertASCIISequence("python", &composer);
composer.GetStringForPreedit(&composition);
EXPECT_EQ(kPyTeyoN, composition);
// Perform the rewrite command.
segments.set_request_type(Segments::SUGGESTION);
Segment *segment = segments.add_segment();
segment->set_key(composition);
ConversionRequest request(&composer, &client_request);
EXPECT_TRUE(rewriter->Rewrite(request, &segments));
EXPECT_COUNT_STATS("LanguageAwareSuggestionTriggered", 2);
segment->set_segment_type(Segment::FIXED_VALUE);
EXPECT_LT(0, segment->candidates_size());
rewriter->Finish(request, &segments);
EXPECT_COUNT_STATS("LanguageAwareSuggestionCommitted", 1);
}
}
} // namespace mozc