blob: 76fd59b6e42388bd7ed5ca45365d6ff79b919c3f [file] [log] [blame]
// Copyright 2010-2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "rewriter/symbol_rewriter.h"
#include <string>
#include "base/logging.h"
#include "base/scoped_ptr.h"
#include "base/system_util.h"
#include "base/util.h"
#include "config/config.pb.h"
#include "config/config_handler.h"
#include "converter/conversion_request.h"
#include "converter/segments.h"
#include "data_manager/testing/mock_data_manager.h"
#include "engine/engine_interface.h"
#include "engine/mock_data_engine_factory.h"
#include "session/commands.pb.h"
#include "testing/base/public/gunit.h"
DECLARE_string(test_tmpdir);
namespace mozc {
namespace {
void AddSegment(const string &key, const string &value,
Segments *segments) {
Segment *seg = segments->push_back_segment();
seg->set_key(key);
Segment::Candidate *candidate = seg->add_candidate();
candidate->Init();
candidate->value = key;
candidate->content_key = key;
candidate->content_value = value;
}
void AddCandidate(const string &value, Segment *segment) {
Segment::Candidate *candidate = segment->add_candidate();
candidate->Init();
candidate->value = value;
candidate->content_key = segment->key();
candidate->content_value = value;
}
bool HasCandidateAndDescription(const Segments &segments,
int index,
const string &key,
const string &description) {
CHECK_GT(segments.segments_size(), index);
bool check_description = !description.empty();
for (size_t i = 0; i < segments.segment(index).candidates_size(); ++i) {
const Segment::Candidate &candidate = segments.segment(index).candidate(i);
if (candidate.value == key) {
if (check_description) {
bool result = candidate.description == description;
return result;
} else {
return true;
}
}
}
return false;
}
bool HasCandidate(const Segments &segments, int index, const string &value) {
return HasCandidateAndDescription(segments, index, value, "");
}
} // namespace
class SymbolRewriterTest : public ::testing::Test {
protected:
SymbolRewriterTest() {}
~SymbolRewriterTest() {}
virtual void SetUp() {
SystemUtil::SetUserProfileDirectory(FLAGS_test_tmpdir);
config::Config config;
config::ConfigHandler::GetDefaultConfig(&config);
config::ConfigHandler::SetConfig(config);
// We cannot use mock converter here because SymbolRewriter uses
// ResizeSegment of converter implementation. However, SymbolRewriter is
// independent of underlying dictionary and, hence, we can use a converter
// with mock data.
engine_.reset(MockDataEngineFactory::Create());
converter_ = engine_->GetConverter();
data_manager_.reset(new testing::MockDataManager);
}
virtual void TearDown() {
// Just in case, reset the config in test_tmpdir
config::Config config;
config::ConfigHandler::GetDefaultConfig(&config);
config::ConfigHandler::SetConfig(config);
}
scoped_ptr<EngineInterface> engine_;
const ConverterInterface *converter_;
scoped_ptr<testing::MockDataManager> data_manager_;
};
// Note that these tests are using default symbol dictionary.
// Test result can be changed if symbol dictionary is modified.
// TODO(toshiyuki): Modify symbol rewriter so that we can use symbol dictionary
// for testing.
TEST_F(SymbolRewriterTest, TriggerRewriteTest) {
SymbolRewriter symbol_rewriter(converter_, data_manager_.get());
const ConversionRequest request;
{
Segments segments;
// "ー"
AddSegment("\xe3\x83\xbc", "test", &segments);
// ">"
AddSegment("\x3e", "test", &segments);
EXPECT_TRUE(symbol_rewriter.Rewrite(request, &segments));
// "→"
EXPECT_TRUE(HasCandidate(segments, 0, "\xe2\x86\x92"));
}
{
Segments segments;
// "ー"
AddSegment("\xe3\x83\xbc", "test", &segments);
// "ー"
AddSegment("\xe3\x83\xbc", "test", &segments);
EXPECT_TRUE(symbol_rewriter.Rewrite(request, &segments));
// "―"
EXPECT_TRUE(HasCandidate(segments, 0, "\xe2\x80\x95"));
// "―"
EXPECT_TRUE(HasCandidate(segments, 1, "\xe2\x80\x95"));
}
}
TEST_F(SymbolRewriterTest, TriggerRewriteEntireTest) {
SymbolRewriter symbol_rewriter(converter_, data_manager_.get());
const ConversionRequest request;
{
Segments segments;
// "ー"
AddSegment("\xe3\x83\xbc", "test", &segments);
// ">"
AddSegment("\x3e", "test", &segments);
EXPECT_TRUE(symbol_rewriter.RewriteEntireCandidate(request, &segments));
// "→"
EXPECT_TRUE(HasCandidate(segments, 0, "\xe2\x86\x92"));
}
{
Segments segments;
// "ー"
AddSegment("\xe3\x83\xbc", "test", &segments);
// "ー"
AddSegment("\xe3\x83\xbc", "test", &segments);
EXPECT_FALSE(symbol_rewriter.RewriteEntireCandidate(request, &segments));
}
}
TEST_F(SymbolRewriterTest, TriggerRewriteEachTest) {
SymbolRewriter symbol_rewriter(converter_, data_manager_.get());
{
Segments segments;
// "ー"
AddSegment("\xe3\x83\xbc", "test", &segments);
// ">"
AddSegment("\x3e", "test", &segments);
EXPECT_TRUE(symbol_rewriter.RewriteEachCandidate(&segments));
EXPECT_EQ(2, segments.segments_size());
// "―"
EXPECT_TRUE(HasCandidate(segments, 0, "\xe2\x80\x95"));
// "→"
EXPECT_FALSE(HasCandidate(segments, 0, "\xe2\x86\x92"));
// "〉"
EXPECT_TRUE(HasCandidate(segments, 1, "\xe3\x80\x89"));
}
}
TEST_F(SymbolRewriterTest, TriggerRewriteDescriptionTest) {
SymbolRewriter symbol_rewriter(converter_, data_manager_.get());
{
Segments segments;
// "したつき"
AddSegment("\xE3\x81\x97\xE3\x81\x9F\xE3\x81\xA4\xE3\x81\x8D",
"test", &segments);
EXPECT_TRUE(symbol_rewriter.RewriteEachCandidate(&segments));
EXPECT_EQ(1, segments.segments_size());
// "₍"
EXPECT_TRUE(HasCandidateAndDescription(segments, 0, "\xE2\x82\x8D",
// "下付き文字(始め丸括弧)"
"\xE4\xB8\x8B\xE4\xBB\x98\xE3\x81\x8D\xE6\x96\x87\xE5\xAD\x97"
"("
"\xE5\xA7\x8B\xE3\x82\x81\xE4\xB8\xB8\xE6\x8B\xAC\xE5\xBC\xA7"
")"));
}
}
TEST_F(SymbolRewriterTest, InsertAfterSingleKanjiAndT13n) {
SymbolRewriter symbol_rewriter(converter_, data_manager_.get());
const ConversionRequest request;
{
Segments segments;
// "てん", "てん"
AddSegment("\xe3\x81\xa6\xe3\x82\x93", "\xe3\x81\xa6\xe3\x82\x93",
&segments);
Segment *seg = segments.mutable_segment(0);
// Add 15 single-kanji and transliterated candidates
// "点"
AddCandidate("\xe7\x82\xb9", seg);
// "転"
AddCandidate("\xe8\xbb\xa2", seg);
// "天"
AddCandidate("\xe5\xa4\xa9", seg);
// "てん"
AddCandidate("\xe3\x81\xa6\xe3\x82\x93", seg);
// "テン"
AddCandidate("\xe3\x83\x86\xe3\x83\xb3", seg);
// "展"
AddCandidate("\xe5\xb1\x95", seg);
// "店"
AddCandidate("\xe5\xba\x97", seg);
// "典"
AddCandidate("\xe5\x85\xb8", seg);
// "添"
AddCandidate("\xe6\xb7\xbb", seg);
// "填"
AddCandidate("\xe5\xa1\xab", seg);
// "顛"
AddCandidate("\xe9\xa1\x9b", seg);
// "辿"
AddCandidate("\xe8\xbe\xbf", seg);
// "纏"
AddCandidate("\xe7\xba\x8f", seg);
// "甜"
AddCandidate("\xe7\x94\x9c", seg);
// "貼"
AddCandidate("\xe8\xb2\xbc", seg);
EXPECT_TRUE(symbol_rewriter.Rewrite(request, &segments));
EXPECT_GT(segments.segment(0).candidates_size(), 16);
for (int i = 0; i < 16; ++i) {
const string &value = segments.segment(0).candidate(i).value;
EXPECT_FALSE(Util::IsScriptType(value, Util::UNKNOWN_SCRIPT))
<< i << ": " << value;
}
}
}
TEST_F(SymbolRewriterTest, SetKey) {
SymbolRewriter symbol_rewriter(converter_, data_manager_.get());
Segments segments;
const ConversionRequest request;
Segment *segment = segments.push_back_segment();
// "てん"
const string kKey = "\xe3\x81\xa6\xe3\x82\x93";
segment->set_key(kKey);
Segment::Candidate *candidate = segment->add_candidate();
candidate->Init();
candidate->key = "strange key";
candidate->value = "strange value";
candidate->content_key = "strange key";
candidate->content_value = "strange value";
EXPECT_EQ(1, segment->candidates_size());
EXPECT_TRUE(symbol_rewriter.Rewrite(request, &segments));
EXPECT_GT(segment->candidates_size(), 1);
for (size_t i = 1; i < segment->candidates_size(); ++i) {
EXPECT_EQ(kKey, segment->candidate(i).key);
}
}
TEST_F(SymbolRewriterTest, MobileEnvironmentTest) {
commands::Request input;
SymbolRewriter rewriter(converter_, data_manager_.get());
{
input.set_mixed_conversion(true);
const ConversionRequest request(NULL, &input);
EXPECT_EQ(RewriterInterface::ALL, rewriter.capability(request));
}
{
input.set_mixed_conversion(false);
const ConversionRequest request(NULL, &input);
EXPECT_EQ(RewriterInterface::CONVERSION, rewriter.capability(request));
}
}
TEST_F(SymbolRewriterTest, ExpandSpace) {
SymbolRewriter symbol_rewriter(converter_, data_manager_.get());
Segments segments;
const ConversionRequest request;
Segment *segment = segments.push_back_segment();
segment->set_key(" ");
Segment::Candidate *candidate = segment->add_candidate();
candidate->Init();
candidate->key = " ";
candidate->value = " ";
candidate->content_key = " ";
candidate->content_value = " ";
candidate->PushBackInnerSegmentBoundary(1, 1, 1, 1);
EXPECT_TRUE(symbol_rewriter.Rewrite(request, &segments));
EXPECT_LE(2, segment->candidates_size());
const Segment::Candidate &cand0 = segment->candidate(0);
EXPECT_EQ(" ", cand0.key);
EXPECT_EQ(" ", cand0.value);
EXPECT_EQ(" ", cand0.content_key);
EXPECT_EQ(" ", cand0.content_value);
ASSERT_EQ(1, cand0.inner_segment_boundary.size());
EXPECT_EQ(Segment::Candidate::EncodeLengths(1, 1, 1, 1),
cand0.inner_segment_boundary[0]);
const char *kFullWidthSpace = "\xe3\x80\x80";
const Segment::Candidate &cand1 = segment->candidate(1);
EXPECT_EQ(" ", cand1.key);
EXPECT_EQ(kFullWidthSpace, cand1.value);
EXPECT_EQ(" ", cand1.content_key);
EXPECT_EQ(kFullWidthSpace, cand1.content_value);
EXPECT_TRUE(cand1.inner_segment_boundary.empty());
}
} // namespace mozc