blob: 251c1426984f815db234ade5e650d21efa6838a8 [file] [log] [blame]
// Copyright 2010-2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "composer/internal/typing_corrector.h"
#include <string>
#include <vector>
#include "base/singleton.h"
#include "config/config_handler.h"
#include "composer/table.h"
#include "composer/type_corrected_query.h"
#include "composer/internal/typing_model.h"
#include "session/commands.pb.h"
#include "testing/base/public/gunit.h"
namespace mozc {
namespace composer {
namespace {
#include "composer/internal/typing_model_qwerty_mobile-hiragana.h"
}
using mozc::config::Config;
using mozc::config::ConfigHandler;
// Embedded cost for testing purpose.
class CostTableForTest {
public:
typedef map<StringPiece, ProbableKeyEvents> CorrectionTable;
CostTableForTest() {
{
ProbableKeyEvents *v = &table_["a"];
AddProbableKeyEvent('a', 0.99, v);
AddProbableKeyEvent('q', 0.003, v);
AddProbableKeyEvent('w', 0.003, v);
AddProbableKeyEvent('s', 0.004, v);
}
{
ProbableKeyEvents *v = &table_["b"];
AddProbableKeyEvent('b', 0.99, v);
AddProbableKeyEvent('v', 0.0025, v);
AddProbableKeyEvent('h', 0.0025, v);
AddProbableKeyEvent('j', 0.0025, v);
AddProbableKeyEvent('n', 0.0025, v);
}
{
ProbableKeyEvents *v = &table_["c"];
AddProbableKeyEvent('c', 0.99, v);
AddProbableKeyEvent('x', 0.0025, v);
AddProbableKeyEvent('f', 0.0025, v);
AddProbableKeyEvent('g', 0.0025, v);
AddProbableKeyEvent('v', 0.0025, v);
}
{
ProbableKeyEvents *v = &table_["d"];
AddProbableKeyEvent('d', 0.99, v);
AddProbableKeyEvent('s', 0.002, v);
AddProbableKeyEvent('e', 0.002, v);
AddProbableKeyEvent('f', 0.002, v);
AddProbableKeyEvent('x', 0.002, v);
AddProbableKeyEvent('z', 0.002, v);
}
{
ProbableKeyEvents *v = &table_["e"];
AddProbableKeyEvent('e', 0.99, v);
AddProbableKeyEvent('w', 0.003, v);
AddProbableKeyEvent('d', 0.003, v);
AddProbableKeyEvent('r', 0.004, v);
}
{
ProbableKeyEvents *v = &table_["f"];
AddProbableKeyEvent('f', 0.99, v);
AddProbableKeyEvent('d', 0.002, v);
AddProbableKeyEvent('r', 0.002, v);
AddProbableKeyEvent('g', 0.002, v);
AddProbableKeyEvent('c', 0.002, v);
AddProbableKeyEvent('x', 0.002, v);
}
{
ProbableKeyEvents *v = &table_["g"];
AddProbableKeyEvent('g', 0.99, v);
AddProbableKeyEvent('f', 0.002, v);
AddProbableKeyEvent('t', 0.002, v);
AddProbableKeyEvent('h', 0.002, v);
AddProbableKeyEvent('v', 0.002, v);
AddProbableKeyEvent('c', 0.002, v);
}
{
ProbableKeyEvents *v = &table_["h"];
AddProbableKeyEvent('h', 0.99, v);
AddProbableKeyEvent('g', 0.002, v);
AddProbableKeyEvent('y', 0.002, v);
AddProbableKeyEvent('j', 0.002, v);
AddProbableKeyEvent('b', 0.002, v);
AddProbableKeyEvent('v', 0.002, v);
}
{
ProbableKeyEvents *v = &table_["i"];
AddProbableKeyEvent('i', 0.99, v);
AddProbableKeyEvent('u', 0.003, v);
AddProbableKeyEvent('k', 0.003, v);
AddProbableKeyEvent('o', 0.004, v);
}
{
ProbableKeyEvents *v = &table_["j"];
AddProbableKeyEvent('j', 0.99, v);
AddProbableKeyEvent('h', 0.002, v);
AddProbableKeyEvent('k', 0.002, v);
AddProbableKeyEvent('u', 0.002, v);
AddProbableKeyEvent('n', 0.002, v);
AddProbableKeyEvent('b', 0.002, v);
}
{
ProbableKeyEvents *v = &table_["k"];
AddProbableKeyEvent('k', 0.99, v);
AddProbableKeyEvent('j', 0.002, v);
AddProbableKeyEvent('i', 0.002, v);
AddProbableKeyEvent('l', 0.002, v);
AddProbableKeyEvent('m', 0.002, v);
AddProbableKeyEvent('n', 0.002, v);
}
{
ProbableKeyEvents *v = &table_["l"];
AddProbableKeyEvent('l', 0.99, v);
AddProbableKeyEvent('k', 0.0025, v);
AddProbableKeyEvent('-', 0.0025, v);
AddProbableKeyEvent('p', 0.0025, v);
AddProbableKeyEvent('o', 0.0025, v);
}
{
ProbableKeyEvents *v = &table_["m"];
AddProbableKeyEvent('m', 0.99, v);
AddProbableKeyEvent('n', 0.003, v);
AddProbableKeyEvent('k', 0.003, v);
AddProbableKeyEvent('l', 0.004, v);
}
{
ProbableKeyEvents *v = &table_["n"];
AddProbableKeyEvent('n', 0.99, v);
AddProbableKeyEvent('b', 0.0025, v);
AddProbableKeyEvent('m', 0.0025, v);
AddProbableKeyEvent('j', 0.0025, v);
AddProbableKeyEvent('k', 0.0025, v);
}
{
ProbableKeyEvents *v = &table_["o"];
AddProbableKeyEvent('o', 0.99, v);
AddProbableKeyEvent('i', 0.0025, v);
AddProbableKeyEvent('k', 0.0025, v);
AddProbableKeyEvent('l', 0.0025, v);
AddProbableKeyEvent('p', 0.0025, v);
}
{
ProbableKeyEvents *v = &table_["p"];
AddProbableKeyEvent('p', 0.99, v);
AddProbableKeyEvent('o', 0.003, v);
AddProbableKeyEvent('l', 0.003, v);
AddProbableKeyEvent('-', 0.004, v);
}
{
ProbableKeyEvents *v = &table_["q"];
AddProbableKeyEvent('q', 0.99, v);
AddProbableKeyEvent('w', 0.003, v);
AddProbableKeyEvent('a', 0.003, v);
AddProbableKeyEvent('s', 0.004, v);
}
{
ProbableKeyEvents *v = &table_["r"];
AddProbableKeyEvent('r', 0.99, v);
AddProbableKeyEvent('t', 0.003, v);
AddProbableKeyEvent('f', 0.003, v);
AddProbableKeyEvent('e', 0.004, v);
}
{
ProbableKeyEvents *v = &table_["s"];
AddProbableKeyEvent('s', 0.99, v);
AddProbableKeyEvent('a', 0.0025, v);
AddProbableKeyEvent('d', 0.0025, v);
AddProbableKeyEvent('w', 0.0025, v);
AddProbableKeyEvent('z', 0.0025, v);
}
{
ProbableKeyEvents *v = &table_["t"];
AddProbableKeyEvent('t', 0.99, v);
AddProbableKeyEvent('r', 0.003, v);
AddProbableKeyEvent('y', 0.003, v);
AddProbableKeyEvent('g', 0.003, v);
}
{
ProbableKeyEvents *v = &table_["u"];
AddProbableKeyEvent('u', 0.99, v);
AddProbableKeyEvent('y', 0.003, v);
AddProbableKeyEvent('i', 0.003, v);
AddProbableKeyEvent('j', 0.004, v);
}
{
ProbableKeyEvents *v = &table_["v"];
AddProbableKeyEvent('v', 0.99, v);
AddProbableKeyEvent('c', 0.0025, v);
AddProbableKeyEvent('g', 0.0025, v);
AddProbableKeyEvent('h', 0.0025, v);
AddProbableKeyEvent('b', 0.0025, v);
}
{
ProbableKeyEvents *v = &table_["w"];
AddProbableKeyEvent('w', 0.99, v);
AddProbableKeyEvent('q', 0.0025, v);
AddProbableKeyEvent('e', 0.0025, v);
AddProbableKeyEvent('a', 0.0025, v);
AddProbableKeyEvent('s', 0.0025, v);
}
{
ProbableKeyEvents *v = &table_["x"];
AddProbableKeyEvent('x', 0.99, v);
AddProbableKeyEvent('z', 0.0025, v);
AddProbableKeyEvent('d', 0.0025, v);
AddProbableKeyEvent('f', 0.0025, v);
AddProbableKeyEvent('c', 0.0025, v);
}
{
ProbableKeyEvents *v = &table_["y"];
AddProbableKeyEvent('y', 0.99, v);
AddProbableKeyEvent('t', 0.003, v);
AddProbableKeyEvent('h', 0.003, v);
AddProbableKeyEvent('u', 0.004, v);
}
{
ProbableKeyEvents *v = &table_["z"];
AddProbableKeyEvent('z', 0.99, v);
AddProbableKeyEvent('s', 0.003, v);
AddProbableKeyEvent('d', 0.003, v);
AddProbableKeyEvent('x', 0.004, v);
}
{
ProbableKeyEvents *v = &table_["-"];
AddProbableKeyEvent('-', 0.99, v);
AddProbableKeyEvent('p', 0.003, v);
AddProbableKeyEvent('o', 0.003, v);
AddProbableKeyEvent('l', 0.004, v);
}
}
void InsertCharacter(TypingCorrector *corrector,
const StringPiece& key) const {
corrector->InsertCharacter(key, table_.find(key)->second);
}
private:
CorrectionTable table_;
void AddProbableKeyEvent(int key_code, double probability,
ProbableKeyEvents *probable_key_events) {
ProbableKeyEvent *event = probable_key_events->Add();
event->set_key_code(key_code);
event->set_probability(probability);
}
DISALLOW_COPY_AND_ASSIGN(CostTableForTest);
};
class TypingCorrectorTest : public ::testing::Test {
protected:
TypingCorrectorTest() :
qwerty_typing_model_(
kKeyCharacters_QwertyMobileHiragana,
kKeyCharactersSize_QwertyMobileHiragana,
kCostTable_QwertyMobileHiragana,
kCostTableSize_QwertyMobileHiragana,
kCostMappingTable_QwertyMobileHiragana) {
}
virtual void SetUp() {
ConfigHandler::GetConfig(&config_backup_);
Config config;
ConfigHandler::GetDefaultConfig(&config);
config.set_use_typing_correction(true);
ConfigHandler::SetConfig(config);
commands::Request request;
request.set_special_romanji_table(
commands::Request::QWERTY_MOBILE_TO_HIRAGANA);
qwerty_table_.InitializeWithRequestAndConfig(request, config);
qwerty_table_.typing_model_ = &qwerty_typing_model_;
}
virtual void TearDown() {
ConfigHandler::SetConfig(config_backup_);
}
void InsertOneByOne(const char *keys, TypingCorrector *corrector) {
for (const char *key = keys; *key != '\0'; ++key) {
Singleton<CostTableForTest>::get()->InsertCharacter(corrector,
StringPiece(key, 1));
}
}
bool FindKey(const vector<TypeCorrectedQuery> &queries,
const string &key) {
for (size_t i = 0; i < queries.size(); ++i) {
const set<string> &expanded = queries[i].expanded;
if (expanded.empty() && queries[i].base == key) {
return true;
}
for (set<string>::const_iterator itr = expanded.begin();
itr != expanded.end(); ++itr) {
if (queries[i].base + *itr == key) {
return true;
}
}
}
return false;
}
void ExpectTypingCorrectorEqual(const TypingCorrector &l,
const TypingCorrector &r) {
EXPECT_EQ(l.available_, r.available_);
EXPECT_EQ(l.table_, r.table_);
EXPECT_EQ(l.max_correction_query_candidates_,
r.max_correction_query_candidates_);
EXPECT_EQ(l.max_correction_query_results_,
r.max_correction_query_results_);
EXPECT_EQ(l.top_n_.size(), r.top_n_.size());
for (size_t i = 0; i < l.top_n_.size(); ++i) {
EXPECT_EQ(l.top_n_[i], l.top_n_[i]);
}
}
Config config_backup_;
Table qwerty_table_;
TypingModel qwerty_typing_model_;
};
TEST_F(TypingCorrectorTest, TypingCorrection) {
const int kCorrectedQueryCandidates = 1000;
const int kCorrectedQueryResults = 1000;
TypingCorrector corrector(&qwerty_table_,
kCorrectedQueryCandidates,
kCorrectedQueryResults);
ASSERT_TRUE(corrector.IsAvailable());
struct {
const char *keys;
const char *correction;
const char *exact_composition;
} kTestCases[] = {
// "phayou" -> "おはよう", "pはよう"
{ "phayou",
"\xE3\x81\x8A\xE3\x81\xAF\xE3\x82\x88\xE3\x81\x86",
"\xEF\xBD\x90\xE3\x81\xAF\xE3\x82\x88\xE3\x81\x86"},
// "orukaresama" -> "おつかれさま", "おるかれさま"
{ "orukaresama",
"\xE3\x81\x8A\xE3\x81\xA4\xE3\x81\x8B"
"\xE3\x82\x8C\xE3\x81\x95\xE3\x81\xBE",
"\xE3\x81\x8A\xE3\x82\x8B\xE3\x81\x8B"
"\xE3\x82\x8C\xE3\x81\x95\xE3\x81\xBE"},
// "gu-huru" -> "ぐーぐる", "ぐーふる"
{ "gu-huru",
"\xE3\x81\x90\xE3\x83\xBC\xE3\x81\x90\xE3\x82\x8B",
"\xE3\x81\x90\xE3\x83\xBC\xE3\x81\xB5\xE3\x82\x8B"},
// "bihongo" -> "にほんご", "びほんご"
{ "bihongo",
"\xE3\x81\xAB\xE3\x81\xBB\xE3\x82\x93\xE3\x81\x94",
"\xE3\x81\xB3\xE3\x81\xBB\xE3\x82\x93\xE3\x81\x94"},
// "yajiniku" -> "やきにく", "やじにく"
{ "yajiniku",
"\xE3\x82\x84\xE3\x81\x8D\xE3\x81\xAB\xE3\x81\x8F",
"\xE3\x82\x84\xE3\x81\x98\xE3\x81\xAB\xE3\x81\x8F"},
// "so-natsu" -> "どーなつ", "そーなつ"
{ "so-natsu",
"\xE3\x81\xA9\xE3\x83\xBC\xE3\x81\xAA\xE3\x81\xA4",
"\xE3\x81\x9D\xE3\x83\xBC\xE3\x81\xAA\xE3\x81\xA4"},
// "ohayou" -> NULL, "おはよう"
// "おはよう" can be generated from raw key so
// it shouldn't be in correction candidates.
{ "ohayou",
NULL,
"\xE3\x81\x8A\xE3\x81\xAF\xE3\x82\x88\xE3\x81\x86"},
// "syamoji" -> NULL, "しゃもじ"
// A query which can be composed from raw input
// shouldn't be in correction candidates.
// This is more complex pattern than "おはよう" test case.
// "おはよう" case can be processed correctly by comparing
// raw input and corrected query.
// But this case "syamoji" (raw input) and "shamoji" (corrected input)
// are different but their query is identical ("しゃもじ").
// Thus we have to also check not only raw/corrected input but also
// raw/corrected queries.
{ "syamozi",
NULL,
"\xE3\x81\x97\xE3\x82\x83\xE3\x82\x82\xE3\x81\x98"},
// "kaish" -> NULL, "かいしゃ"
// Pending input is expanded into possible queries for
// kana-modifier-insensitive-conversion (a.k.a かつこう変換).
// In this case "kaish" is expanded into "かいしゃ", "かいしゅ"
// and so on.
// Typing corrected input "kaisy" is also expanded into
// "かいしゃ", "かいしゅ" and so on but they are duplicate
// of expanded queries from "kaish".
// Thus they shouldn't be in corrected candidates.
{ "kaish",
NULL,
"\xE3\x81\x8B\xE3\x81\x84\xE3\x81\x97\xE3\x82\x83"},
};
for (size_t i = 0; i < arraysize(kTestCases); ++i) {
SCOPED_TRACE(string("key: ") + kTestCases[i].keys);
InsertOneByOne(kTestCases[i].keys, &corrector);
vector<TypeCorrectedQuery> queries;
corrector.GetQueriesForPrediction(&queries);
// Number of queries can be equal to kCorrectedQueries.
EXPECT_GE(kCorrectedQueryResults, queries.size());
for (vector<TypeCorrectedQuery>::iterator it = queries.begin();
it != queries.end();
++it) {
// Empty TypeCorrectedQuery is unexpected.
EXPECT_FALSE(it->base.empty() && it->expanded.empty());
}
if (kTestCases[i].correction) {
EXPECT_TRUE(FindKey(queries, kTestCases[i].correction))
<< kTestCases[i].correction << " isn't contained";
}
EXPECT_FALSE(FindKey(queries, kTestCases[i].exact_composition))
<< kTestCases[i].exact_composition << " is contained unexpectedly";
corrector.Reset();
}
}
TEST_F(TypingCorrectorTest, Invalidate) {
const CostTableForTest *table = Singleton<CostTableForTest>::get();
TypingCorrector corrector(&qwerty_table_, 30, 30);
EXPECT_TRUE(corrector.IsAvailable());
table->InsertCharacter(&corrector, "p");
table->InsertCharacter(&corrector, "h");
table->InsertCharacter(&corrector, "a");
corrector.Invalidate();
EXPECT_FALSE(corrector.IsAvailable());
table->InsertCharacter(&corrector, "y");
table->InsertCharacter(&corrector, "o");
table->InsertCharacter(&corrector, "u");
vector<TypeCorrectedQuery> queries;
corrector.GetQueriesForPrediction(&queries);
EXPECT_TRUE(queries.empty());
}
TEST_F(TypingCorrectorTest, CopyFrom) {
TypingCorrector corrector(&qwerty_table_, 30, 30);
InsertOneByOne("phayou", &corrector);
TypingCorrector corrector2(NULL, 1000, 1000);
corrector2.CopyFrom(corrector);
ExpectTypingCorrectorEqual(corrector, corrector2);
}
} // namespace composer
} // namespace mozc