blob: a6d2591f40039a8197514bf7d78dd58f7750f34c [file] [log] [blame]
// Copyright 2010-2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "rewriter/collocation_rewriter.h"
#include <cstddef>
#include <string>
#include "base/logging.h"
#include "base/system_util.h"
#include "config/config.pb.h"
#include "config/config_handler.h"
#include "converter/conversion_request.h"
#include "data_manager/testing/mock_data_manager.h"
#include "dictionary/pos_matcher.h"
#include "testing/base/public/gunit.h"
DECLARE_string(test_tmpdir);
namespace mozc {
class CollocationRewriterTest : public ::testing::Test {
protected:
// Helper data structures to define test cases.
// Used to generate Segment::Candidate.
struct CandidateData {
const char *key;
const char *content_key;
const char *value;
const char *content_value;
const uint16 lid;
const uint16 rid;
};
// Used to generate Segment.
struct SegmentData {
const char *key;
const CandidateData *candidates;
const size_t candidates_size;
};
// Used to generate Segments.
struct SegmentsData {
const SegmentData *segments;
const size_t segments_size;
};
CollocationRewriterTest() {}
virtual ~CollocationRewriterTest() {}
virtual void SetUp() {
SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);
config::ConfigHandler::GetConfig(&config_backup_);
config::Config default_config;
config::ConfigHandler::GetDefaultConfig(&default_config);
config::ConfigHandler::SetConfig(default_config);
const mozc::testing::MockDataManager data_manager;
pos_matcher_ = data_manager.GetPOSMatcher();
collocation_rewriter_.reset(new CollocationRewriter(&data_manager));
}
virtual void TearDown() {
config::ConfigHandler::SetConfig(config_backup_);
}
// Makes a segment from SegmentData.
static void MakeSegment(const SegmentData &data, Segment *segment) {
segment->Clear();
segment->set_key(data.key);
for (size_t i = 0; i < data.candidates_size; ++i) {
Segment::Candidate *cand = segment->add_candidate();
const CandidateData &cand_data = data.candidates[i];
cand->key = cand_data.key;
cand->content_key = cand_data.content_key;
cand->value = cand_data.value;
cand->content_value = cand_data.content_value;
cand->lid = cand_data.lid;
cand->rid = cand_data.rid;
}
}
// Makes a segments from SegmentsData.
static void MakeSegments(const SegmentsData &data, Segments *segments) {
segments->Clear();
for (size_t i = 0; i < data.segments_size; ++i) {
MakeSegment(data.segments[i], segments->add_segment());
}
}
bool Rewrite(Segments *segments) const {
const ConversionRequest request;
return collocation_rewriter_->Rewrite(request, segments);
}
// Returns the concatenated string of top candidates.
static string GetTopValue(const Segments &segments) {
string result;
for (size_t i = 0; i < segments.conversion_segments_size(); ++i) {
const Segment::Candidate &candidate =
segments.conversion_segment(i).candidate(0);
result.append(candidate.value);
}
return result;
}
const POSMatcher *pos_matcher_;
private:
config::Config config_backup_;
scoped_ptr<const CollocationRewriter> collocation_rewriter_;
DISALLOW_COPY_AND_ASSIGN(CollocationRewriterTest);
};
TEST_F(CollocationRewriterTest, NekowoKaitai) {
// Make the following Segments:
// "ねこを" | "かいたい"
// --------------------
// "ネコを" | "買いたい"
// "猫を" | "解体"
// | "飼いたい"
const char *kNekowo =
"\xE3\x81\xAD\xE3\x81\x93\xE3\x82\x92"; // "ねこを"
const char *kNeko = "\xE3\x81\xAD\xE3\x81\x93"; // "ねこ"
const uint16 id = pos_matcher_->GetUnknownId();
const CandidateData kNekowoCands[] = {
{kNekowo, kNeko,
"\xE3\x83\x8D\xE3\x82\xB3\xE3\x82\x92", // "ネコを"
"\xE3\x83\x8D\xE3\x82\xB3\xE3\x82\x92", // "ネコ"
id, id},
{kNekowo, kNeko,
// "猫を", "猫"
"\xE7\x8C\xAB\xE3\x82\x92", "\xE7\x8C\xAB\xE3\x82\x92",
id, id},
};
const char *kKaitaiHiragana =
"\xE3\x81\x8B\xE3\x81\x84\xE3\x81\x9F\xE3\x81\x84"; // "かいたい"
const char *kBuy =
"\xE8\xB2\xB7\xE3\x81\x84\xE3\x81\x9F\xE3\x81\x84"; // "買いたい"
const char *kCut = "\xE8\xA7\xA3\xE4\xBD\x93"; // "解体"
const char *kFeed =
"\xE9\xA3\xBC\xE3\x81\x84\xE3\x81\x9F\xE3\x81\x84"; // "飼いたい"
const CandidateData kKaitaiCands[] = {
{kKaitaiHiragana, kKaitaiHiragana, kBuy, kBuy, id, id},
{kKaitaiHiragana, kKaitaiHiragana, kCut, kCut, id, id},
{kKaitaiHiragana, kKaitaiHiragana, kFeed, kFeed, id, id},
};
const SegmentData kSegmentData[] = {
{kNekowo, kNekowoCands, arraysize(kNekowoCands)},
{kKaitaiHiragana, kKaitaiCands, arraysize(kKaitaiCands)},
};
const SegmentsData kSegments = {kSegmentData, arraysize(kSegmentData)};
Segments segments;
MakeSegments(kSegments, &segments);
// "猫を飼いたい" should be promoted.
EXPECT_TRUE(Rewrite(&segments));
EXPECT_EQ(
// "猫を飼いたい"
"\xE7\x8C\xAB\xE3\x82\x92"
"\xE9\xA3\xBC\xE3\x81\x84\xE3\x81\x9F\xE3\x81\x84",
GetTopValue(segments)) << segments.DebugString();
}
TEST_F(CollocationRewriterTest, MagurowoKaitai) {
// Make the following Segments:
// "まぐろを" | "かいたい"
// --------------------
// "マグロを" | "買いたい"
// "鮪を" | "解体"
// | "飼いたい"
const char *kMagurowo =
"\xE3\x81\xBE\xE3\x81\x90\xE3\x82\x8D\xE3\x82\x92"; // "まぐろを"
const char *kMaguro =
"\xE3\x81\xBE\xE3\x81\x90\xE3\x82\x8D"; // "まぐろ"
const uint16 id = pos_matcher_->GetUnknownId();
const CandidateData kMagurowoCands[] = {
{kMagurowo, kMaguro,
"\xE3\x83\x9E\xE3\x82\xB0\xE3\x83\xAD\xE3\x82\x92", // "マグロを"
"\xE3\x83\x9E\xE3\x82\xB0\xE3\x83\xAD", // "マグロ"
id, id},
// "鮪を", "鮪を"
{kMagurowo, kMaguro, "\xE9\xAE\xAA\xE3\x82\x92", "\xE9\xAE\xAA", id, id},
};
const char *kKaitaiHiragana =
"\xE3\x81\x8B\xE3\x81\x84\xE3\x81\x9F\xE3\x81\x84"; // "かいたい"
const char *kBuy =
"\xE8\xB2\xB7\xE3\x81\x84\xE3\x81\x9F\xE3\x81\x84"; // "買いたい"
const char *kCut = "\xE8\xA7\xA3\xE4\xBD\x93"; // "解体"
const char *kFeed =
"\xE9\xA3\xBC\xE3\x81\x84\xE3\x81\x9F\xE3\x81\x84"; // "飼いたい"
const CandidateData kKaitaiCands[] = {
{kKaitaiHiragana, kKaitaiHiragana, kBuy, kBuy, id, id},
{kKaitaiHiragana, kKaitaiHiragana, kCut, kCut, id, id},
{kKaitaiHiragana, kKaitaiHiragana, kFeed, kFeed, id, id},
};
const SegmentData kSegmentData[] = {
{kMagurowo, kMagurowoCands, arraysize(kMagurowoCands)},
{kKaitaiHiragana, kKaitaiCands, arraysize(kKaitaiCands)},
};
const SegmentsData kSegments = {kSegmentData, arraysize(kSegmentData)};
Segments segments;
MakeSegments(kSegments, &segments);
// "マグロを解体" should be promoted.
EXPECT_TRUE(Rewrite(&segments));
EXPECT_EQ(
// "マグロを解体"
"\xE3\x83\x9E\xE3\x82\xB0\xE3\x83\xAD\xE3\x82\x92"
"\xE8\xA7\xA3\xE4\xBD\x93",
GetTopValue(segments)) << segments.DebugString();
}
TEST_F(CollocationRewriterTest, CrossOverAdverbSegment) {
// "ねこを" | "ネコを" "猫を"
// "すごく" | "すごく"
// "かいたい" | "買いたい" "解体" "飼いたい"
const char *kNekowo =
"\xE3\x81\xAD\xE3\x81\x93\xE3\x82\x92"; // "ねこを"
const char *kNeko = "\xE3\x81\xAD\xE3\x81\x93"; // "ねこ"
const uint16 id = pos_matcher_->GetUnknownId();
const CandidateData kNekowoCands[] = {
{kNekowo, kNeko,
"\xE3\x83\x8D\xE3\x82\xB3\xE3\x82\x92", // "ネコを"
"\xE3\x83\x8D\xE3\x82\xB3\xE3\x82\x92", // "ネコ"
id, id},
{kNekowo, kNeko,
// "猫を", "猫"
"\xE7\x8C\xAB\xE3\x82\x92", "\xE7\x8C\xAB\xE3\x82\x92",
id, id},
};
// "すごく"
const char *kSugoku = "\xe3\x81\x99\xe3\x81\x94\xe3\x81\x8f";
const uint16 adverb_id = pos_matcher_->GetAdverbId();
const CandidateData kSugokuCands[] = {
{kSugoku, kSugoku, kSugoku, kSugoku, adverb_id, adverb_id},
};
const char *kKaitaiHiragana =
"\xE3\x81\x8B\xE3\x81\x84\xE3\x81\x9F\xE3\x81\x84"; // "かいたい"
const char *kBuy =
"\xE8\xB2\xB7\xE3\x81\x84\xE3\x81\x9F\xE3\x81\x84"; // "買いたい"
const char *kCut = "\xE8\xA7\xA3\xE4\xBD\x93"; // "解体"
const char *kFeed =
"\xE9\xA3\xBC\xE3\x81\x84\xE3\x81\x9F\xE3\x81\x84"; // "飼いたい"
const CandidateData kKaitaiCands[] = {
{kKaitaiHiragana, kKaitaiHiragana, kBuy, kBuy, id, id},
{kKaitaiHiragana, kKaitaiHiragana, kCut, kCut, id, id},
{kKaitaiHiragana, kKaitaiHiragana, kFeed, kFeed, id, id},
};
const SegmentData kSegmentData[] = {
{kNekowo, kNekowoCands, arraysize(kNekowoCands)},
{kSugoku, kSugokuCands, arraysize(kSugokuCands)},
{kKaitaiHiragana, kKaitaiCands, arraysize(kKaitaiCands)},
};
const SegmentsData kSegments = {kSegmentData, arraysize(kSegmentData)};
Segments segments;
MakeSegments(kSegments, &segments);
// "猫を飼いたい" should be promoted.
EXPECT_TRUE(Rewrite(&segments));
EXPECT_EQ(
// "猫をすごく飼いたい"
"\xe7\x8c\xab\xe3\x82\x92\xe3\x81\x99\xe3\x81\x94\xe3\x81\x8f\xe9"
"\xa3\xbc\xe3\x81\x84\xe3\x81\x9f\xe3\x81\x84",
GetTopValue(segments)) << segments.DebugString();
}
TEST_F(CollocationRewriterTest, DoNotCrossOverNonAdverbSegment) {
// "ねこを" | "ネコを" "猫を"
// "すごく" | "すごく"
// "かいたい" | "買いたい" "解体" "飼いたい"
const char *kNekowo =
"\xE3\x81\xAD\xE3\x81\x93\xE3\x82\x92"; // "ねこを"
const char *kNeko = "\xE3\x81\xAD\xE3\x81\x93"; // "ねこ"
const uint16 id = pos_matcher_->GetUnknownId();
const CandidateData kNekowoCands[] = {
{kNekowo, kNeko,
"\xE3\x83\x8D\xE3\x82\xB3\xE3\x82\x92", // "ネコを"
"\xE3\x83\x8D\xE3\x82\xB3\xE3\x82\x92", // "ネコ"
id, id},
{kNekowo, kNeko,
// "猫を", "猫"
"\xE7\x8C\xAB\xE3\x82\x92", "\xE7\x8C\xAB\xE3\x82\x92",
id, id},
};
// "すごく"
const char *kSugoku = "\xe3\x81\x99\xe3\x81\x94\xe3\x81\x8f";
const CandidateData kSugokuCands[] = {
{kSugoku, kSugoku, kSugoku, kSugoku, id, id},
};
const char *kKaitaiHiragana =
"\xE3\x81\x8B\xE3\x81\x84\xE3\x81\x9F\xE3\x81\x84"; // "かいたい"
const char *kBuy =
"\xE8\xB2\xB7\xE3\x81\x84\xE3\x81\x9F\xE3\x81\x84"; // "買いたい"
const char *kCut = "\xE8\xA7\xA3\xE4\xBD\x93"; // "解体"
const char *kFeed =
"\xE9\xA3\xBC\xE3\x81\x84\xE3\x81\x9F\xE3\x81\x84"; // "飼いたい"
const CandidateData kKaitaiCands[] = {
{kKaitaiHiragana, kKaitaiHiragana, kBuy, kBuy, id, id},
{kKaitaiHiragana, kKaitaiHiragana, kCut, kCut, id, id},
{kKaitaiHiragana, kKaitaiHiragana, kFeed, kFeed, id, id},
};
const SegmentData kSegmentData[] = {
{kNekowo, kNekowoCands, arraysize(kNekowoCands)},
{kSugoku, kSugokuCands, arraysize(kSugokuCands)},
{kKaitaiHiragana, kKaitaiCands, arraysize(kKaitaiCands)},
};
const SegmentsData kSegments = {kSegmentData, arraysize(kSegmentData)};
Segments segments;
MakeSegments(kSegments, &segments);
// "猫を飼いたい" should be promoted.
EXPECT_FALSE(Rewrite(&segments));
EXPECT_NE(
// "猫をすごく飼いたい"
"\xe7\x8c\xab\xe3\x82\x92\xe3\x81\x99\xe3\x81\x94\xe3\x81\x8f\xe9"
"\xa3\xbc\xe3\x81\x84\xe3\x81\x9f\xe3\x81\x84",
GetTopValue(segments)) << segments.DebugString();
}
} // namespace mozc