blob: d58b4489f9d3340a84d8512134e9a44cf7a2f466 [file] [log] [blame]
// Copyright 2010-2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "rewriter/number_compound_util.h"
#include "data_manager/testing/mock_data_manager.h"
#include "dictionary/pos_matcher.h"
#include "rewriter/counter_suffix.h"
#include "testing/base/public/gunit.h"
namespace mozc {
namespace number_compound_util {
TEST(NumberCompoundUtilTest, SplitStringIntoNumberAndCounterSuffix) {
const CounterSuffixEntry kCounterSuffixSortedArray[] = {
{
// "デシベル"
"\xE3\x83\x87\xE3\x82\xB7\xE3\x83\x99\xE3\x83\xAB", 12,
},
{
// "回"
"\xE5\x9B\x9E", 3,
},
{
// "階"
"\xE9\x9A\x8E", 3,
},
};
// Test cases for splittable compounds.
struct {
const char* input;
const char* expected_number;
const char* expected_suffix;
uint32 expected_script_type;
} kSplittableCases[] = {
{
"\xE4\xB8\x80\xE9\x9A\x8E", //"一階",
"\xE4\xB8\x80", // "一"
"\xE9\x9A\x8E", // "階"
number_compound_util::KANJI,
},
{
"\xE5\xA3\xB1\xE9\x9A\x8E", //"壱階",
"\xE5\xA3\xB1", // "壱"
"\xE9\x9A\x8E", // "階"
number_compound_util::OLD_KANJI,
},
{
"\xE4\xB8\x89\xE5\x8D\x81\xE4\xB8\x80\xE5\x9B\x9E", // "三十一回"
"\xE4\xB8\x89\xE5\x8D\x81\xE4\xB8\x80", // "三十一",
"\xE5\x9B\x9E", //"回"
number_compound_util::KANJI,
},
{
"\xE4\xB8\x89\xE5\x8D\x81\xE4\xB8\x80", // "三十一"
"\xE4\xB8\x89\xE5\x8D\x81\xE4\xB8\x80", // "三十一",
"",
number_compound_util::KANJI,
},
{
"\xE3\x83\x87\xE3\x82\xB7\xE3\x83\x99\xE3\x83\xAB", // "デシベル"
"",
"\xE3\x83\x87\xE3\x82\xB7\xE3\x83\x99\xE3\x83\xAB", // "デシベル"
},
{
"\xE5\x9B\x9E", // "回"
"",
"\xE5\x9B\x9E", // "回"
},
{
"\xE9\x9A\x8E", // "階"
"",
"\xE9\x9A\x8E", // "階"
},
};
for (size_t i = 0; i < arraysize(kSplittableCases); ++i) {
StringPiece actual_number, actual_suffix;
uint32 actual_script_type = 0;
EXPECT_TRUE(SplitStringIntoNumberAndCounterSuffix(
kCounterSuffixSortedArray, arraysize(kCounterSuffixSortedArray),
kSplittableCases[i].input, &actual_number, &actual_suffix,
&actual_script_type));
EXPECT_EQ(kSplittableCases[i].expected_number, actual_number);
EXPECT_EQ(kSplittableCases[i].expected_suffix, actual_suffix);
EXPECT_EQ(kSplittableCases[i].expected_script_type, actual_script_type);
}
// Test cases for unsplittable compounds.
const char* kUnsplittableCases[] = {
"\xE5\x9B\x9E\xE5\x85\xAB", // "階八"
"Google",
"\xE3\x82\xA2\xE4\xB8\x80\xE9\x9A\x8E", // "ア一階"
// "八億九千万600七十4デシベル"
"\xE5\x85\xAB\xE5\x84\x84\xE4\xB9\x9D\xE5\x8D\x83\xE4\xB8\x87"
"\x36\x30\x30\xE4\xB8\x83\xE5\x8D\x81\xEF\xBC\x94\xE3\x83\x87"
"\xE3\x82\xB7\xE3\x83\x99\xE3\x83\xAB",
};
for (size_t i = 0; i < arraysize(kUnsplittableCases); ++i) {
StringPiece actual_number, actual_suffix;
uint32 actual_script_type = 0;
EXPECT_FALSE(SplitStringIntoNumberAndCounterSuffix(
kCounterSuffixSortedArray, arraysize(kCounterSuffixSortedArray),
kUnsplittableCases[i], &actual_number, &actual_suffix,
&actual_script_type));
}
}
TEST(NumberCompoundUtilTest, IsNumber) {
const CounterSuffixEntry kCounterSuffixSortedArray[] = {
{
// "回"
"\xE5\x9B\x9E", 3,
},
{
// "階"
"\xE9\x9A\x8E", 3,
},
};
const testing::MockDataManager data_manager;
const POSMatcher *pos_matcher = data_manager.GetPOSMatcher();
Segment::Candidate c;
c.Init();
c.lid = pos_matcher->GetNumberId();
c.rid = pos_matcher->GetNumberId();
EXPECT_TRUE(IsNumber(kCounterSuffixSortedArray,
arraysize(kCounterSuffixSortedArray),
*pos_matcher, c));
c.Init();
c.lid = pos_matcher->GetKanjiNumberId();
c.rid = pos_matcher->GetKanjiNumberId();
EXPECT_TRUE(IsNumber(kCounterSuffixSortedArray,
arraysize(kCounterSuffixSortedArray),
*pos_matcher, c));
c.Init();
c.lid = pos_matcher->GetNumberId();
c.rid = pos_matcher->GetCounterSuffixWordId();
EXPECT_TRUE(IsNumber(kCounterSuffixSortedArray,
arraysize(kCounterSuffixSortedArray),
*pos_matcher, c));
c.Init();
c.lid = pos_matcher->GetNumberId();
c.rid = pos_matcher->GetParallelMarkerId();
EXPECT_TRUE(IsNumber(kCounterSuffixSortedArray,
arraysize(kCounterSuffixSortedArray),
*pos_matcher, c));
c.Init();
c.value = "\xE4\xB8\x80\xE9\x9A\x8E"; //"一階"
c.content_value = "\xE4\xB8\x80\xE9\x9A\x8E"; //"一階"
c.lid = pos_matcher->GetNumberId();
c.rid = pos_matcher->GetNumberId();
EXPECT_TRUE(IsNumber(kCounterSuffixSortedArray,
arraysize(kCounterSuffixSortedArray),
*pos_matcher, c));
c.Init();
c.lid = pos_matcher->GetAdverbId();
c.rid = pos_matcher->GetAdverbId();
EXPECT_FALSE(IsNumber(kCounterSuffixSortedArray,
arraysize(kCounterSuffixSortedArray),
*pos_matcher, c));
}
} // namespace number_compound_util
} // namespace mozc