blob: fae59642c3d349252794e930e7001684a91a549d [file] [log] [blame]
// Copyright 2010-2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "rewriter/focus_candidate_rewriter.h"
#include <string>
#include <vector>
#include "base/number_util.h"
#include "base/scoped_ptr.h"
#include "base/system_util.h"
#include "config/config.pb.h"
#include "config/config_handler.h"
#include "converter/segments.h"
#include "data_manager/testing/mock_data_manager.h"
#include "testing/base/public/googletest.h"
#include "testing/base/public/gunit.h"
#include "transliteration/transliteration.h"
DECLARE_string(test_tmpdir);
namespace mozc {
namespace {
void AddCandidate(Segment *segment, const string &value) {
Segment::Candidate *c = segment->add_candidate();
c->Init();
c->value = value;
c->content_value = value;
}
void AddCandidateWithContentValue(Segment *segment,
const string &value,
const string &content_value) {
Segment::Candidate *c = segment->add_candidate();
c->Init();
c->value = value;
c->content_value = content_value;
}
} // namespace
class FocusCandidateRewriterTest : public ::testing::Test {
protected:
virtual void SetUp() {
SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);
config::ConfigHandler::GetDefaultConfig(&default_config_);
config::ConfigHandler::SetConfig(default_config_);
rewriter_.reset(new FocusCandidateRewriter(&mock_data_manager_));
}
const RewriterInterface *GetRewriter() {
return rewriter_.get();
}
private:
scoped_ptr<FocusCandidateRewriter> rewriter_;
config::Config default_config_;
testing::MockDataManager mock_data_manager_;
};
TEST_F(FocusCandidateRewriterTest, FocusCandidateRewriterInvalidQuery) {
Segments segments;
Segment *seg1 = segments.add_segment();
Segment *seg2 = segments.add_segment();
Segment *seg3 = segments.add_segment();
Segment *seg4 = segments.add_segment();
// "「"
AddCandidate(seg1, "\xef\xbd\xa2");
AddCandidate(seg1, "(");
AddCandidate(seg1, "[");
AddCandidate(seg1, "{");
// "テスト"
AddCandidate(seg2, "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88");
// "てすと"
AddCandidate(seg2, "\xe3\x81\xa6\xe3\x81\x99\xe3\x81\xa8");
// "です"
AddCandidate(seg3, "\xe3\x81\xa7\xe3\x81\x99");
// "デス"
AddCandidate(seg3, "\xe3\x83\x87\xe3\x82\xb9");
// "」"
AddCandidate(seg4, "\xef\xbd\xa3");
AddCandidate(seg4, ")");
AddCandidate(seg4, "]");
AddCandidate(seg4, "}");
// invalid queries
EXPECT_FALSE(GetRewriter()->Focus(&segments, 5, 0));
EXPECT_FALSE(GetRewriter()->Focus(&segments, 0, 10));
EXPECT_FALSE(GetRewriter()->Focus(&segments, 1, 0));
EXPECT_FALSE(GetRewriter()->Focus(&segments, 2, 0));
}
TEST_F(FocusCandidateRewriterTest, FocusCandidateRewriterLeftToRight) {
Segments segments;
Segment *seg1 = segments.add_segment();
Segment *seg2 = segments.add_segment();
Segment *seg3 = segments.add_segment();
Segment *seg4 = segments.add_segment();
// "「"
AddCandidate(seg1, "\xef\xbd\xa2");
AddCandidate(seg1, "(");
AddCandidate(seg1, "[");
AddCandidate(seg1, "{");
// "テスト"
AddCandidate(seg2, "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88");
// "てすと"
AddCandidate(seg2, "\xe3\x81\xa6\xe3\x81\x99\xe3\x81\xa8");
// "です"
AddCandidate(seg3, "\xe3\x81\xa7\xe3\x81\x99");
// "デス"
AddCandidate(seg3, "\xe3\x83\x87\xe3\x82\xb9");
// "」"
AddCandidate(seg4, "\xef\xbd\xa3");
AddCandidate(seg4, ")");
AddCandidate(seg4, "]");
AddCandidate(seg4, "}");
EXPECT_TRUE(GetRewriter()->Focus(&segments, 0, 0));
// "」"
EXPECT_EQ("\xef\xbd\xa3", seg4->candidate(0).content_value);
EXPECT_TRUE(GetRewriter()->Focus(&segments, 0, 1));
EXPECT_EQ(")", seg4->candidate(0).content_value);
EXPECT_TRUE(GetRewriter()->Focus(&segments, 0, 2));
EXPECT_EQ("]", seg4->candidate(0).content_value);
EXPECT_TRUE(GetRewriter()->Focus(&segments, 0, 3));
EXPECT_EQ("}", seg4->candidate(0).content_value);
EXPECT_TRUE(GetRewriter()->Focus(&segments, 0, 0));
// "」"
EXPECT_EQ("\xef\xbd\xa3", seg4->candidate(0).content_value);
}
TEST_F(FocusCandidateRewriterTest, FocusCandidateRewriterRightToLeft) {
Segments segments;
Segment *seg1 = segments.add_segment();
Segment *seg2 = segments.add_segment();
Segment *seg3 = segments.add_segment();
Segment *seg4 = segments.add_segment();
// "「"
AddCandidate(seg1, "\xef\xbd\xa2");
AddCandidate(seg1, "(");
AddCandidate(seg1, "[");
AddCandidate(seg1, "{");
// "テスト"
AddCandidate(seg2, "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88");
// "てすと"
AddCandidate(seg2, "\xe3\x81\xa6\xe3\x81\x99\xe3\x81\xa8");
// "です"
AddCandidate(seg3, "\xe3\x81\xa7\xe3\x81\x99");
// "デス"
AddCandidate(seg3, "\xe3\x83\x87\xe3\x82\xb9");
// "」"
AddCandidate(seg4, "\xef\xbd\xa3");
AddCandidate(seg4, ")");
AddCandidate(seg4, "]");
AddCandidate(seg4, "}");
// right to left
EXPECT_TRUE(GetRewriter()->Focus(&segments, 3, 0));
// "「"
EXPECT_EQ("\xef\xbd\xa2", seg1->candidate(0).content_value);
EXPECT_TRUE(GetRewriter()->Focus(&segments, 3, 1));
EXPECT_EQ("(", seg1->candidate(0).content_value);
EXPECT_TRUE(GetRewriter()->Focus(&segments, 3, 2));
EXPECT_EQ("[", seg1->candidate(0).content_value);
EXPECT_TRUE(GetRewriter()->Focus(&segments, 3, 3));
EXPECT_EQ("{", seg1->candidate(0).content_value);
EXPECT_TRUE(GetRewriter()->Focus(&segments, 3, 0));
// "「"
EXPECT_EQ("\xef\xbd\xa2", seg1->candidate(0).content_value);
}
TEST_F(FocusCandidateRewriterTest, FocusCandidateRewriterLeftToRightNest) {
Segments segments;
Segment *seg[7];
for (int i = 0; i < arraysize(seg); ++i) {
seg[i] = segments.add_segment();
}
// "「"
AddCandidate(seg[0], "\xef\xbd\xa2");
AddCandidate(seg[0], "(");
AddCandidate(seg[0], "[");
AddCandidate(seg[0], "{");
// "テスト1"
AddCandidate(seg[1], "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88\x31");
// "「"
AddCandidate(seg[2], "\xef\xbd\xa2");
AddCandidate(seg[2], "(");
AddCandidate(seg[2], "[");
AddCandidate(seg[2], "{");
// "テスト2"
AddCandidate(seg[3], "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88\x32");
// "」"
AddCandidate(seg[4], "\xef\xbd\xa3");
AddCandidate(seg[4], ")");
AddCandidate(seg[4], "]");
AddCandidate(seg[4], "}");
// "テスト3"
AddCandidate(seg[5], "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88\x33");
// "」"
AddCandidate(seg[6], "\xef\xbd\xa3");
AddCandidate(seg[6], ")");
AddCandidate(seg[6], "]");
AddCandidate(seg[6], "}");
EXPECT_TRUE(GetRewriter()->Focus(&segments, 0, 0));
// "」"
EXPECT_EQ("\xef\xbd\xa3", seg[6]->candidate(0).content_value);
// "」"
EXPECT_EQ("\xef\xbd\xa3", seg[4]->candidate(0).content_value);
EXPECT_TRUE(GetRewriter()->Focus(&segments, 0, 1));
EXPECT_EQ(")", seg[6]->candidate(0).content_value);
// "」"
EXPECT_EQ("\xef\xbd\xa3", seg[4]->candidate(0).content_value);
EXPECT_TRUE(GetRewriter()->Focus(&segments, 2, 0));
EXPECT_EQ(")", seg[6]->candidate(0).content_value);
// "」"
EXPECT_EQ("\xef\xbd\xa3", seg[4]->candidate(0).content_value);
EXPECT_TRUE(GetRewriter()->Focus(&segments, 2, 1));
EXPECT_EQ(")", seg[6]->candidate(0).content_value);
EXPECT_EQ(")", seg[4]->candidate(0).content_value);
}
TEST_F(FocusCandidateRewriterTest, FocusCandidateRewriterRightToLeftNest) {
Segments segments;
Segment *seg[7];
for (int i = 0; i < arraysize(seg); ++i) {
seg[i] = segments.add_segment();
}
// "「"
AddCandidate(seg[0], "\xef\xbd\xa2");
AddCandidate(seg[0], "(");
AddCandidate(seg[0], "[");
AddCandidate(seg[0], "{");
// "テスト1"
AddCandidate(seg[1], "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88\x31");
// "「"
AddCandidate(seg[2], "\xef\xbd\xa2");
AddCandidate(seg[2], "(");
AddCandidate(seg[2], "[");
AddCandidate(seg[2], "{");
// "テスト2"
AddCandidate(seg[3], "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88\x32");
// "」"
AddCandidate(seg[4], "\xef\xbd\xa3");
AddCandidate(seg[4], ")");
AddCandidate(seg[4], "]");
AddCandidate(seg[4], "}");
// "テスト3"
AddCandidate(seg[5], "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88\x33");
// "」"
AddCandidate(seg[6], "\xef\xbd\xa3");
AddCandidate(seg[6], ")");
AddCandidate(seg[6], "]");
AddCandidate(seg[6], "}");
EXPECT_TRUE(GetRewriter()->Focus(&segments, 6, 0));
// "「"
EXPECT_EQ("\xef\xbd\xa2", seg[0]->candidate(0).content_value);
// "「"
EXPECT_EQ("\xef\xbd\xa2", seg[2]->candidate(0).content_value);
EXPECT_TRUE(GetRewriter()->Focus(&segments, 6, 1));
EXPECT_EQ("(", seg[0]->candidate(0).content_value);
// "「"
EXPECT_EQ("\xef\xbd\xa2", seg[2]->candidate(0).content_value);
EXPECT_TRUE(GetRewriter()->Focus(&segments, 4, 0));
EXPECT_EQ("(", seg[0]->candidate(0).content_value);
// "「"
EXPECT_EQ("\xef\xbd\xa2", seg[2]->candidate(0).content_value);
EXPECT_TRUE(GetRewriter()->Focus(&segments, 4, 1));
EXPECT_EQ("(", seg[0]->candidate(0).content_value);
EXPECT_EQ("(", seg[2]->candidate(0).content_value);
}
TEST_F(FocusCandidateRewriterTest, FocusCandidateRewriterMetaCandidate) {
Segments segments;
Segment *seg[3];
for (int i = 0; i < arraysize(seg); ++i) {
seg[i] = segments.add_segment();
}
// set key
// "「"
seg[0]->set_key("\xe3\x80\x8c");
// set meta candidates
{
EXPECT_EQ(0, seg[0]->meta_candidates_size());
vector<Segment::Candidate> *meta_cands = seg[0]->mutable_meta_candidates();
meta_cands->resize(transliteration::NUM_T13N_TYPES);
for (size_t i = 0; i < transliteration::NUM_T13N_TYPES; ++i) {
meta_cands->at(i).Init();
// "「"
meta_cands->at(i).value = "\xe3\x80\x8c";
meta_cands->at(i).content_value = "\xe3\x80\x8c";
}
meta_cands->at(transliteration::HALF_KATAKANA).value = "\xef\xbd\xa2";
meta_cands->at(transliteration::HALF_KATAKANA).content_value =
"\xef\xbd\xa2";
EXPECT_EQ(transliteration::NUM_T13N_TYPES, seg[0]->meta_candidates_size());
}
// "「"
EXPECT_EQ(
"\xef\xbd\xa2",
seg[0]->meta_candidate(transliteration::HALF_KATAKANA).content_value);
// "「"
AddCandidate(seg[0], "\xef\xbd\xa2");
AddCandidate(seg[0], "(");
AddCandidate(seg[0], "[");
AddCandidate(seg[0], "{");
// "テスト1"
AddCandidate(seg[1], "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88\x31");
// set key
// "」"
seg[2]->set_key("\xe3\x80\x8d");
// set meta candidates
{
EXPECT_EQ(0, seg[2]->meta_candidates_size());
vector<Segment::Candidate> *meta_cands = seg[2]->mutable_meta_candidates();
meta_cands->resize(transliteration::NUM_T13N_TYPES);
for (size_t i = 0; i < transliteration::NUM_T13N_TYPES; ++i) {
meta_cands->at(i).Init();
// "」"
meta_cands->at(i).value = "\xe3\x80\x8d";
meta_cands->at(i).content_value = "\xe3\x80\x8d";
}
// "」"
meta_cands->at(transliteration::HALF_KATAKANA).value = "\xef\xbd\xa3";
meta_cands->at(transliteration::HALF_KATAKANA).content_value =
"\xef\xbd\xa3";
EXPECT_EQ(transliteration::NUM_T13N_TYPES, seg[2]->meta_candidates_size());
}
// "」"
EXPECT_EQ(
"\xef\xbd\xa3",
seg[2]->meta_candidate(transliteration::HALF_KATAKANA).content_value);
// "」"
AddCandidate(seg[2], "\xef\xbd\xa3");
AddCandidate(seg[2], ")");
AddCandidate(seg[2], "]");
AddCandidate(seg[2], "}");
const int half_index = -transliteration::HALF_KATAKANA - 1;
EXPECT_TRUE(GetRewriter()->Focus(&segments, 0,
half_index));
// "「"
EXPECT_EQ("\xef\xbd\xa2", seg[0]->candidate(0).content_value);
// "」"
EXPECT_EQ("\xef\xbd\xa3", seg[2]->candidate(0).content_value);
const int valid_index = -(transliteration::NUM_T13N_TYPES - 1) - 1;
EXPECT_TRUE(GetRewriter()->Focus(&segments, 0,
valid_index));
const int invalid_index = -transliteration::NUM_T13N_TYPES - 1;
EXPECT_FALSE(GetRewriter()->Focus(&segments, 0,
invalid_index));
}
TEST_F(FocusCandidateRewriterTest, FocusCandidateRewriterNumber) {
Segments segments;
Segment *seg[7];
for (int i = 0; i < arraysize(seg); ++i) {
seg[i] = segments.add_segment();
}
// set key
seg[0]->set_key("2");
AddCandidate(seg[0], "2");
// AddCandidate(seg[0], "2");
// AddCandidate(seg[0], "ニ");
// AddCandidate(seg[0], "弐");
AddCandidate(seg[0], "\xEF\xBC\x92");
AddCandidate(seg[0], "\xE3\x83\x8B");
AddCandidate(seg[0], "\xE5\xBC\x90");
seg[0]->mutable_candidate(2)->style = NumberUtil::NumberString::NUMBER_KANJI;
seg[0]->mutable_candidate(3)->style =
NumberUtil::NumberString::NUMBER_OLD_KANJI;
// "テスト1"
AddCandidate(seg[1], "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88\x31");
// set key
seg[2]->set_key("3");
AddCandidate(seg[2], "3");
// AddCandidate(seg[2], "3");
// AddCandidate(seg[2], "三");
// AddCandidate(seg[2], "参");
AddCandidate(seg[2], "\xEF\xBC\x93");
AddCandidate(seg[2], "\xE4\xB8\x89");
AddCandidate(seg[2], "\xE5\x8F\x82");
seg[2]->mutable_candidate(2)->style = NumberUtil::NumberString::NUMBER_KANJI;
seg[2]->mutable_candidate(3)->style =
NumberUtil::NumberString::NUMBER_OLD_KANJI;
seg[3]->set_key("4");
AddCandidate(seg[3], "4");
// AddCandidate(seg[3], "4");
// AddCandidate(seg[3], "四");
AddCandidate(seg[3], "\xEF\xBC\x94");
AddCandidate(seg[3], "\xE5\x9B\x9B");
seg[3]->mutable_candidate(2)->style = NumberUtil::NumberString::NUMBER_KANJI;
// "テスト1"
AddCandidate(seg[4], "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88\x31");
AddCandidate(seg[5], "\xe3\x83\x86\xe3\x82\xb9\xe3\x83\x88\x31");
seg[6]->set_key("4");
AddCandidate(seg[6], "4");
// AddCandidate(seg[6], "4");
// AddCandidate(seg[6], "四");
AddCandidate(seg[6], "\xEF\xBC\x94");
AddCandidate(seg[6], "\xE5\x9B\x9B");
seg[6]->mutable_candidate(2)->style = NumberUtil::NumberString::NUMBER_KANJI;
EXPECT_TRUE(GetRewriter()->Focus(&segments, 0, 0));
EXPECT_EQ("2", seg[0]->candidate(0).content_value);
EXPECT_EQ("3", seg[2]->candidate(0).content_value);
EXPECT_EQ("4", seg[3]->candidate(0).content_value);
EXPECT_TRUE(GetRewriter()->Focus(&segments, 0, 1));
// EXPECT_EQ("3", seg[2]->candidate(0).content_value);
// EXPECT_EQ("4", seg[3]->candidate(0).content_value);
EXPECT_EQ("\xEF\xBC\x93", seg[2]->candidate(0).content_value);
EXPECT_EQ("\xEF\xBC\x94", seg[3]->candidate(0).content_value);
EXPECT_EQ("4", seg[6]->candidate(0).content_value); // far from
EXPECT_TRUE(GetRewriter()->Focus(&segments, 0, 2));
// EXPECT_EQ("三", seg[2]->candidate(0).content_value);
// EXPECT_EQ("四", seg[3]->candidate(0).content_value);
EXPECT_EQ("\xE4\xB8\x89", seg[2]->candidate(0).content_value);
EXPECT_EQ("\xE5\x9B\x9B", seg[3]->candidate(0).content_value);
EXPECT_EQ("4", seg[6]->candidate(0).content_value); // far from
EXPECT_TRUE(GetRewriter()->Focus(&segments, 0, 3));
// EXPECT_EQ("参", seg[2]->candidate(0).content_value);
EXPECT_EQ("\xE5\x8F\x82", seg[2]->candidate(0).content_value);
EXPECT_EQ("4", seg[6]->candidate(0).content_value); // far from
}
// Bug #4596846: Non-number characters are changed to numbers
TEST_F(FocusCandidateRewriterTest, DontChangeNonNumberSegment) {
Segments segments;
Segment *seg[2];
for (int i = 0; i < arraysize(seg); ++i) {
seg[i] = segments.add_segment();
}
// set key
seg[0]->set_key("1");
AddCandidate(seg[0], "1");
AddCandidate(seg[0], "\xEF\xBC\x91"); // "1"
AddCandidate(seg[1], "\xE8\xAA\x9E"); // "語"
AddCandidate(seg[1], "\xEF\xBC\x95"); // "5"
// Should not change a segment that doesn't have a number as its first
// candidate.
EXPECT_FALSE(GetRewriter()->Focus(&segments, 0, 1));
EXPECT_NE("\xEF\xBC\x95", seg[1]->candidate(0).content_value);
}
TEST_F(FocusCandidateRewriterTest, FocusCandidateRewriterSuffix) {
{
Segments segments;
Segment *seg[6];
for (int i = 0; i < arraysize(seg); ++i) {
seg[i] = segments.add_segment();
}
seg[0]->set_key("2");
AddCandidate(seg[0], "2");
// "かい"
seg[1]->set_key("\xE3\x81\x8B\xE3\x81\x84");
AddCandidate(seg[1], "\xE5\x9B\x9E"); // "回"
AddCandidate(seg[1], "\xE9\x9A\x8E"); // "解"
seg[2]->set_key("3");
AddCandidate(seg[2], "3");
// "かい"
seg[3]->set_key("\xE3\x81\x8B\xE3\x81\x84");
AddCandidate(seg[3], "\xE5\x9B\x9E"); // "回"
AddCandidate(seg[3], "\xE9\x9A\x8E"); // "解"
seg[4]->set_key("4");
AddCandidate(seg[4], "4");
// "かい"
seg[5]->set_key("\xE3\x81\x8B\xE3\x81\x84");
AddCandidate(seg[5], "\xE5\x9B\x9E"); // "回"
AddCandidate(seg[5], "\xE9\x9A\x8E"); // "解"
EXPECT_TRUE(GetRewriter()->Focus(&segments, 1, 1));
// "階"
EXPECT_EQ("\xE9\x9A\x8E", seg[3]->candidate(0).content_value);
EXPECT_EQ("\xE9\x9A\x8E", seg[5]->candidate(0).content_value);
}
// No Number
{
Segments segments;
Segment *seg[3];
for (int i = 0; i < arraysize(seg); ++i) {
seg[i] = segments.add_segment();
}
// "かい"
seg[0]->set_key("\xE3\x81\x8B\xE3\x81\x84");
AddCandidate(seg[0], "\xE5\x9B\x9E"); // "回"
AddCandidate(seg[0], "\xE9\x9A\x8E"); // "階"
seg[1]->set_key("3");
AddCandidate(seg[1], "3");
// "かい"
seg[2]->set_key("\xE3\x81\x8B\xE3\x81\x84");
AddCandidate(seg[2], "\xE5\x9B\x9E"); // "回"
AddCandidate(seg[2], "\xE9\x9A\x8E"); // "階"
EXPECT_FALSE(GetRewriter()->Focus(&segments, 0, 1));
}
// No Number
{
Segments segments;
Segment *seg[3];
for (int i = 0; i < arraysize(seg); ++i) {
seg[i] = segments.add_segment();
}
// "かい"
seg[0]->set_key("\xE3\x81\x8B\xE3\x81\x84");
AddCandidate(seg[0], "\xE5\x9B\x9E"); // "回"
AddCandidate(seg[0], "\xE9\x9A\x8E"); // "階"
seg[1]->set_key("3");
AddCandidate(seg[1], "3");
// "かい"
seg[2]->set_key("\xE3\x81\x8B\xE3\x81\x84");
AddCandidate(seg[2], "\xE5\x9B\x9E"); // "回"
AddCandidate(seg[2], "\xE9\x9A\x8E"); // "階"
EXPECT_FALSE(GetRewriter()->Focus(&segments, 0, 1));
}
// No number
{
Segments segments;
Segment *seg[3];
for (int i = 0; i < arraysize(seg); ++i) {
seg[i] = segments.add_segment();
}
seg[0]->set_key("2");
AddCandidate(seg[0], "2");
// "かい"
seg[1]->set_key("\xE3\x81\x8B\xE3\x81\x84");
AddCandidate(seg[1], "\xE5\x9B\x9E"); // "回"
AddCandidate(seg[1], "\xE9\x9A\x8E"); // "階"
// "かい"
seg[2]->set_key("\xE3\x81\x8B\xE3\x81\x84");
AddCandidate(seg[2], "\xE5\x9B\x9E"); // "回"
AddCandidate(seg[2], "\xE9\x9A\x8E"); // "解"
EXPECT_FALSE(GetRewriter()->Focus(&segments, 1, 1));
}
}
TEST_F(FocusCandidateRewriterTest, NumberAndSuffixCompound) {
// Test for reranking of number compound pattern:
{
// Test scenario: Construct the following segments:
// Seg 0 Seg 1
// | "いっかい" | "にかい" |
// cand 0 | "一階" | "2階" |
// cand 1 | "一回" | "二階" |
// cand 2 | | "二回" |
//
// Then, focusing on (Seg 0, cand 1) should move "二回" in Seg 1 to the top.
Segments segments;
Segment *seg[2];
for (int i = 0; i < arraysize(seg); ++i) {
seg[i] = segments.add_segment();
}
// "いっかい"
seg[0]->set_key("\xE3\x81\x84\xE3\x81\xA3\xE3\x81\x8B\xE3\x81\x84");
// value = "一階", content_value = "一階"
AddCandidate(seg[0], "\xE4\xB8\x80\xE9\x9A\x8E");
// value = "一回", content_value = "一回"
AddCandidate(seg[0], "\xE4\xB8\x80\xE5\x9B\x9E");
// "にかい"
seg[1]->set_key("\xE3\x81\xAB\xE3\x81\x8B\xE3\x81\x84");
AddCandidate(seg[1], "\x32\xE9\x9A\x8E"); // "2階"
AddCandidate(seg[1], "\xE4\xBA\x8C\xE9\x9A\x8E"); // "二階"
AddCandidate(seg[1], "\xE4\xBA\x8C\xE5\x9B\x9E"); // "二回"
EXPECT_TRUE(GetRewriter()->Focus(&segments, 0, 1));
EXPECT_EQ("\xE4\xBA\x8C\xE5\x9B\x9E", seg[1]->candidate(0).value);
}
// Test for reranking of number compound + parallel marker pattern:
// http://mozcsuorg.appspot.com/#issue/49
{
// Test scenario: Similar to the above case, but construct the following
// segments with parallel marker:
// Seg 0 Seg 1
// | "いっかいや" | "にかい" |
// cand 0 | "一階や" | "2階" |
// cand 1 | "一回や" | "二階" |
// cand 2 | | "二回" |
//
// Then, focusing on (Seg 0, cand 1) should move "二回" in Seg 1 to the top.
Segments segments;
Segment *seg[2];
for (int i = 0; i < arraysize(seg); ++i) {
seg[i] = segments.add_segment();
}
const int kNounId = 1939;
const int kParallelMarkerYa = 290;
seg[0]->set_key(
// "いっかいや"
"\xE3\x81\x84\xE3\x81\xA3\xE3\x81\x8B\xE3\x81\x84\xE3\x82\x84");
// value = "一階や", content_value = "一階"
AddCandidateWithContentValue(seg[0],
"\xE4\xB8\x80\xE9\x9A\x8E\xE3\x82\x84",
"\xE4\xB8\x80\xE9\x9A\x8E");
seg[0]->mutable_candidate(0)->lid = kNounId;
seg[0]->mutable_candidate(0)->rid = kParallelMarkerYa;
// value = "一回や", content_value = "一回"
AddCandidateWithContentValue(seg[0],
"\xE4\xB8\x80\xE5\x9B\x9E\xE3\x82\x84",
"\xE4\xB8\x80\xE5\x9B\x9E");
seg[0]->mutable_candidate(1)->lid = kNounId;
seg[0]->mutable_candidate(1)->rid = kParallelMarkerYa;
seg[1]->set_key(
// "にかい"
"\xE3\x81\xAB\xE3\x81\x8B\xE3\x81\x84");
AddCandidate(seg[1], "\x32\xE9\x9A\x8E"); // "2階"
seg[1]->mutable_candidate(0)->lid = kNounId;
seg[1]->mutable_candidate(0)->rid = kNounId;
AddCandidate(seg[1], "\xE4\xBA\x8C\xE9\x9A\x8E"); // "二階"
seg[1]->mutable_candidate(1)->lid = kNounId;
seg[1]->mutable_candidate(1)->rid = kNounId;
AddCandidate(seg[1], "\xE4\xBA\x8C\xE5\x9B\x9E"); // "二回"
seg[1]->mutable_candidate(2)->lid = kNounId;
seg[1]->mutable_candidate(2)->rid = kNounId;
EXPECT_TRUE(GetRewriter()->Focus(&segments, 0, 1));
EXPECT_EQ("\xE4\xBA\x8C\xE5\x9B\x9E", seg[1]->candidate(0).value);
}
// Test for reranking of number compound + parallel marker pattern for 3
// segments.
{
// Test scenario: Similar to the above case, but construct the following
// segments with parallel marker:
// Seg 0 Seg 1 Seg 2
// | "いっかいや" | "にかいや" | "さんかい"
// cand 0 | "一階や" | "2階や" | "参回"
// cand 1 | "一回や" | "二階や" | "3階"
// cand 2 | | "二回や" | "三回"
//
// Then, focusing on (Seg 0, cand 1) should move "二回や" in Seg 1 and "三回
// " in Seg 2 to the top of each segment.
Segments segments;
Segment *seg[3];
for (int i = 0; i < arraysize(seg); ++i) {
seg[i] = segments.add_segment();
}
const int kNounId = 1939;
const int kParallelMarkerYa = 290;
seg[0]->set_key(
// "いっかいや"
"\xE3\x81\x84\xE3\x81\xA3\xE3\x81\x8B\xE3\x81\x84\xE3\x82\x84");
// value = "一階や", content_value = "一階"
AddCandidateWithContentValue(seg[0],
"\xE4\xB8\x80\xE9\x9A\x8E\xE3\x82\x84",
"\xE4\xB8\x80\xE9\x9A\x8E");
seg[0]->mutable_candidate(0)->lid = kNounId;
seg[0]->mutable_candidate(0)->rid = kParallelMarkerYa;
// value = "一回や", content_value = "一回"
AddCandidateWithContentValue(seg[0],
"\xE4\xB8\x80\xE5\x9B\x9E\xE3\x82\x84",
"\xE4\xB8\x80\xE5\x9B\x9E");
seg[0]->mutable_candidate(1)->lid = kNounId;
seg[0]->mutable_candidate(1)->rid = kParallelMarkerYa;
seg[1]->set_key(
// "にかいや"
"\xE3\x81\xAB\xE3\x81\x8B\xE3\x81\x84\xE3\x82\x84");
//value = "2階や", content_value = "2階"
AddCandidateWithContentValue(seg[1],
"\x32\xE9\x9A\x8E\xE3\x82\x84",
"\x32\xE9\x9A\x8E");
seg[1]->mutable_candidate(0)->lid = kNounId;
seg[1]->mutable_candidate(0)->rid = kParallelMarkerYa;
// value = "二階や", content_value = "二階"
AddCandidateWithContentValue(seg[1],
"\xE4\xBA\x8C\xE9\x9A\x8E\xE3\x82\x84",
"\xE4\xBA\x8C\xE9\x9A\x8E");
seg[1]->mutable_candidate(1)->lid = kNounId;
seg[1]->mutable_candidate(1)->rid = kParallelMarkerYa;
// value = "二回や", content_value = "二回"
AddCandidateWithContentValue(seg[1],
"\xE4\xBA\x8C\xE5\x9B\x9E\xE3\x82\x84",
"\xE4\xBA\x8C\xE5\x9B\x9E");
seg[1]->mutable_candidate(2)->lid = kNounId;
seg[1]->mutable_candidate(2)->rid = kParallelMarkerYa;
seg[2]->set_key(
// "さんかいや"
"\xE3\x81\x95\xE3\x82\x93\xE3\x81\x8B\xE3\x81\x84\xE3\x82\x84");
AddCandidate(seg[2], "\xE5\x8F\x82\xE5\x9B\x9E"); // "参回"
seg[2]->mutable_candidate(0)->lid = kNounId;
seg[2]->mutable_candidate(0)->rid = kNounId;
AddCandidate(seg[2], "\x33\xE9\x9A\x8E"); // "3階"
seg[2]->mutable_candidate(1)->lid = kNounId;
seg[2]->mutable_candidate(1)->rid = kNounId;
AddCandidate(seg[2], "\xE4\xB8\x89\xE5\x9B\x9E"); // "三回"
seg[2]->mutable_candidate(2)->lid = kNounId;
seg[2]->mutable_candidate(2)->rid = kNounId;
EXPECT_TRUE(GetRewriter()->Focus(&segments, 0, 1));
// "二回や"
EXPECT_EQ("\xE4\xBA\x8C\xE5\x9B\x9E\xE3\x82\x84",
seg[1]->candidate(0).value);
// "三回"
EXPECT_EQ("\xE4\xB8\x89\xE5\x9B\x9E", seg[2]->candidate(0).value);
}
// Test case where two number segments are too far to be rewritten.
{
// Test scenario: Similar to the above case, but construct the following
// segments with parallel marker:
// Seg 0 Seg 1 Seg 2 Seg 3 Seg 4
// | "いっかいや" | "あ" | "あ" | "あ" | "にかい"
// cand 0 | "一階や" | "あ" | "あ" | "あ" | "2階"
// cand 1 | "一回や" | | | | "二回"
//
// Then, focusing on (Seg 0, cand 1) cannot move "二回" in Seg 3 to the top.
Segments segments;
Segment *seg[5];
for (int i = 0; i < arraysize(seg); ++i) {
seg[i] = segments.add_segment();
}
const int kNounId = 1939;
const int kParallelMarkerYa = 290;
seg[0]->set_key(
// "いっかいや"
"\xE3\x81\x84\xE3\x81\xA3\xE3\x81\x8B\xE3\x81\x84\xE3\x82\x84");
// value = "一階や", content_value = "一階"
AddCandidateWithContentValue(seg[0],
"\xE4\xB8\x80\xE9\x9A\x8E\xE3\x82\x84",
"\xE4\xB8\x80\xE9\x9A\x8E");
seg[0]->mutable_candidate(0)->lid = kNounId;
seg[0]->mutable_candidate(0)->rid = kParallelMarkerYa;
// value = "一回や", content_value = "一回"
AddCandidateWithContentValue(seg[0],
"\xE4\xB8\x80\xE5\x9B\x9E\xE3\x82\x84",
"\xE4\xB8\x80\xE5\x9B\x9E");
seg[0]->mutable_candidate(1)->lid = kNounId;
seg[0]->mutable_candidate(1)->rid = kParallelMarkerYa;
for (size_t i = 1; i <= 3; ++i) {
seg[i]->set_key("\xE3\x81\x82"); // "あ"
AddCandidate(seg[i], "\xE3\x81\x82"); // "あ"
seg[i]->mutable_candidate(0)->lid = kNounId;
seg[i]->mutable_candidate(0)->rid = kNounId;
}
seg[4]->set_key(
// "にかい"
"\xE3\x81\xAB\xE3\x81\x8B\xE3\x81\x84");
AddCandidate(seg[4], "\x32\xE9\x9A\x8E"); // "2階"
seg[4]->mutable_candidate(0)->lid = kNounId;
seg[4]->mutable_candidate(0)->rid = kNounId;
AddCandidate(seg[4], "\xE4\xBA\x8C\xE5\x9B\x9E"); // "ニ回"
seg[4]->mutable_candidate(0)->lid = kNounId;
seg[4]->mutable_candidate(0)->rid = kNounId;
EXPECT_FALSE(GetRewriter()->Focus(&segments, 0, 1));
}
// Test for the case where we shouldn't rewrite.
{
// Test scenario: Similar to the above cases, but construct the following
// segments with particles へ and は:
// Seg 0 Seg 1 Seg 2
// | "いっかいへは" | "にかい" | "いった"
// cand 0 | "一階へは" | "二回" | "行った"
// cand 1 | "一回へは" | "ニ階" |
// cand 2 | | "2回" |
//
// Here, "一階へは" has the following structure:
// "一階(noun)" + "へ(格助詞)" + "は(係助詞)"
// For this case, focusing on (Seg 0, cand 1) should not move "二階" in Seg
// 1 to the top.
Segments segments;
Segment *seg[3];
for (int i = 0; i < arraysize(seg); ++i) {
seg[i] = segments.add_segment();
}
// "名詞,一般,*,*,*,*,*"
const int kNounId = 1939;
// "助詞,係助詞,*,*,*,*,は"
const int kKakariJoshiHa = 299;
// "動詞,非自立,*,*,五段・カ行促音便,連用タ接続,行く"
const int kIkuTaSetsuzoku = 1501;
// "助動詞,*,*,*,特殊・タ,基本形,た"
const int kJodoushiTa = 161;
seg[0]->set_key(
// "いっかいへは"
"\xE3\x81\x84\xE3\x81\xA3\xE3\x81\x8B\xE3\x81\x84"
"\xE3\x81\xB8\xE3\x81\xAF");
// value = "一階へは", content_value = "一階"
AddCandidateWithContentValue(
seg[0],
"\xE4\xB8\x80\xE9\x9A\x8E\xE3\x81\xB8\xE3\x81\xAF",
"\xE4\xB8\x80\xE9\x9A\x8E");
seg[0]->mutable_candidate(0)->lid = kNounId;
seg[0]->mutable_candidate(0)->rid = kKakariJoshiHa;
// value = "一回へは", content_value = "一回"
AddCandidateWithContentValue(
seg[0],
"\xE4\xB8\x80\xE5\x9B\x9E\xE3\x81\xB8\xE3\x81\xAF",
"\xE4\xB8\x80\xE5\x9B\x9E");
seg[0]->mutable_candidate(1)->lid = kNounId;
seg[0]->mutable_candidate(1)->rid = kKakariJoshiHa;
seg[1]->set_key(
// "にかい"
"\xE3\x81\xAB\xE3\x81\x8B\xE3\x81\x84");
AddCandidate(seg[1], "\xE4\xBA\x8C\xE5\x9B\x9E"); // "二回"
seg[1]->mutable_candidate(0)->lid = kNounId;
seg[1]->mutable_candidate(0)->rid = kNounId;
AddCandidate(seg[1], "\xE4\xBA\x8C\xE9\x9A\x8E"); // "二階"
seg[1]->mutable_candidate(1)->lid = kNounId;
seg[1]->mutable_candidate(1)->rid = kNounId;
AddCandidate(seg[1], "\x32\xE9\x9A\x8E"); // "2階"
seg[1]->mutable_candidate(2)->lid = kNounId;
seg[1]->mutable_candidate(2)->rid = kNounId;
seg[2]->set_key(
// "いった"
"\xE3\x81\x84\xE3\x81\xA3\xE3\x81\x9F");
AddCandidate(seg[2], "\xE8\xA1\x8C\xE3\x81\xA3\xE3\x81\x9F"); // "行った"
seg[2]->mutable_candidate(0)->lid = kIkuTaSetsuzoku;
seg[2]->mutable_candidate(0)->rid = kJodoushiTa;
EXPECT_FALSE(GetRewriter()->Focus(&segments, 0, 1));
}
}
} // namespace mozc