blob: 1b3430c685e6521c36548b121e8e6c44680de456 [file] [log] [blame]
// Copyright 2010-2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Interactive composer from a Roman string to a Hiragana string
#include "composer/composer.h"
#include "base/logging.h"
#include "base/singleton.h"
#include "base/util.h"
#include "composer/internal/composition.h"
#include "composer/internal/composition_input.h"
#include "composer/internal/mode_switching_handler.h"
#include "composer/internal/transliterators.h"
#include "composer/internal/typing_corrector.h"
#include "composer/table.h"
#include "composer/type_corrected_query.h"
#include "config/character_form_manager.h"
#include "config/config.pb.h"
#include "config/config_handler.h"
#include "session/commands.pb.h"
#include "session/key_event_util.h"
// Use flags instead of constant for performance evaluation.
DEFINE_uint64(max_typing_correction_query_candidates, 40,
"Maximum # of typing correction query temporary candidates.");
DEFINE_uint64(max_typing_correction_query_results, 8,
"Maximum # of typing correction query results.");
DECLARE_bool(enable_typing_correction);
namespace mozc {
namespace composer {
using ::mozc::config::CharacterFormManager;
namespace {
const Transliterators::Transliterator GetTransliterator(
transliteration::TransliterationType comp_mode) {
switch (comp_mode) {
case transliteration::HALF_ASCII:
case transliteration::HALF_ASCII_UPPER:
case transliteration::HALF_ASCII_LOWER:
case transliteration::HALF_ASCII_CAPITALIZED:
return Transliterators::HALF_ASCII;
case transliteration::FULL_ASCII:
case transliteration::FULL_ASCII_UPPER:
case transliteration::FULL_ASCII_LOWER:
case transliteration::FULL_ASCII_CAPITALIZED:
return Transliterators::FULL_ASCII;
case transliteration::HALF_KATAKANA:
return Transliterators::HALF_KATAKANA;
case transliteration::FULL_KATAKANA:
return Transliterators::FULL_KATAKANA;
case transliteration::HIRAGANA:
return Transliterators::HIRAGANA;
default:
LOG(ERROR) << "Unknown TransliterationType: " << comp_mode;
return Transliterators::CONVERSION_STRING;
}
}
transliteration::TransliterationType GetTransliterationType(
Transliterators::Transliterator transliterator,
const transliteration::TransliterationType default_type) {
if (transliterator == Transliterators::HIRAGANA) {
return transliteration::HIRAGANA;
}
if (transliterator == Transliterators::HALF_ASCII) {
return transliteration::HALF_ASCII;
}
if (transliterator == Transliterators::FULL_ASCII) {
return transliteration::FULL_ASCII;
}
if (transliterator == Transliterators::FULL_KATAKANA) {
return transliteration::FULL_KATAKANA;
}
if (transliterator == Transliterators::HALF_KATAKANA) {
return transliteration::HALF_KATAKANA;
}
return default_type;
}
void Transliterate(const transliteration::TransliterationType mode,
const string &input,
string *output) {
// When the mode is HALF_KATAKANA, Full width ASCII is also
// transformed.
if (mode == transliteration::HALF_KATAKANA) {
string tmp_input;
Util::HiraganaToKatakana(input, &tmp_input);
Util::FullWidthToHalfWidth(tmp_input, output);
return;
}
switch (mode) {
case transliteration::HALF_ASCII:
Util::FullWidthAsciiToHalfWidthAscii(input, output);
break;
case transliteration::HALF_ASCII_UPPER:
Util::FullWidthAsciiToHalfWidthAscii(input, output);
Util::UpperString(output);
break;
case transliteration::HALF_ASCII_LOWER:
Util::FullWidthAsciiToHalfWidthAscii(input, output);
Util::LowerString(output);
break;
case transliteration::HALF_ASCII_CAPITALIZED:
Util::FullWidthAsciiToHalfWidthAscii(input, output);
Util::CapitalizeString(output);
break;
case transliteration::FULL_ASCII:
Util::HalfWidthAsciiToFullWidthAscii(input, output);
break;
case transliteration::FULL_ASCII_UPPER:
Util::HalfWidthAsciiToFullWidthAscii(input, output);
Util::UpperString(output);
break;
case transliteration::FULL_ASCII_LOWER:
Util::HalfWidthAsciiToFullWidthAscii(input, output);
Util::LowerString(output);
break;
case transliteration::FULL_ASCII_CAPITALIZED:
Util::HalfWidthAsciiToFullWidthAscii(input, output);
Util::CapitalizeString(output);
break;
case transliteration::FULL_KATAKANA:
Util::HiraganaToKatakana(input, output);
break;
case transliteration::HIRAGANA:
*output = input;
break;
default:
LOG(ERROR) << "Unknown TransliterationType: " << mode;
*output = input;
break;
}
}
transliteration::TransliterationType GetTransliterationTypeFromCompositionMode(
const commands::CompositionMode mode) {
switch (mode) {
case commands::HIRAGANA:
return transliteration::HIRAGANA;
case commands::FULL_KATAKANA:
return transliteration::FULL_KATAKANA;
case commands::HALF_ASCII:
return transliteration::HALF_ASCII;
case commands::FULL_ASCII:
return transliteration::FULL_ASCII;
case commands::HALF_KATAKANA:
return transliteration::HALF_KATAKANA;
default:
// commands::DIRECT or invalid mode.
LOG(ERROR) << "Invalid CompositionMode: " << mode;
return transliteration::HIRAGANA;
}
}
const size_t kMaxPreeditLength = 256;
} // namespace
Composer::Composer(const Table *table, const commands::Request *request)
: position_(0),
is_new_input_(true),
input_mode_(transliteration::HIRAGANA),
output_mode_(transliteration::HIRAGANA),
comeback_input_mode_(transliteration::HIRAGANA),
input_field_type_(commands::Context::NORMAL),
shifted_sequence_count_(0),
composition_(new Composition(table)),
typing_corrector_(table,
FLAGS_max_typing_correction_query_candidates,
FLAGS_max_typing_correction_query_results),
max_length_(kMaxPreeditLength),
request_(request) {
SetInputMode(transliteration::HIRAGANA);
Reset();
}
Composer::~Composer() {}
void Composer::Reset() {
EditErase();
ResetInputMode();
SetOutputMode(transliteration::HIRAGANA);
source_text_.assign("");
typing_corrector_.Reset();
}
void Composer::ResetInputMode() {
SetInputMode(comeback_input_mode_);
}
void Composer::ReloadConfig() {
// Do nothing at this moment.
}
bool Composer::Empty() const {
return (GetLength() == 0);
}
void Composer::SetTable(const Table *table) {
composition_->SetTable(table);
typing_corrector_.SetTable(table);
}
void Composer::SetRequest(const commands::Request *request) {
request_ = request;
}
void Composer::SetInputMode(transliteration::TransliterationType mode) {
comeback_input_mode_ = mode;
input_mode_ = mode;
shifted_sequence_count_ = 0;
is_new_input_ = true;
composition_->SetInputMode(GetTransliterator(mode));
}
void Composer::SetTemporaryInputMode(
transliteration::TransliterationType mode) {
// Set comeback_input_mode_ to revert back the current input mode.
comeback_input_mode_ = input_mode_;
input_mode_ = mode;
shifted_sequence_count_ = 0;
is_new_input_ = true;
composition_->SetInputMode(GetTransliterator(mode));
}
void Composer::UpdateInputMode() {
if (position_ != 0 &&
request_->update_input_mode_from_surrounding_text()) {
const Transliterators::Transliterator current_t12r =
composition_->GetTransliterator(position_);
if (position_ == composition_->GetLength() ||
current_t12r == composition_->GetTransliterator(position_ + 1)) {
// - The cursor is at the tail of composition.
// Use last character's transliterator as the input mode.
// - If the current cursor is between the same character type like
// "A|B" and "あ|い", the input mode follows the character type.
input_mode_ = GetTransliterationType(current_t12r, comeback_input_mode_);
shifted_sequence_count_ = 0;
is_new_input_ = true;
composition_->SetInputMode(GetTransliterator(input_mode_));
return;
}
}
// Set the default input mode.
SetInputMode(comeback_input_mode_);
}
transliteration::TransliterationType Composer::GetInputMode() const {
return input_mode_;
}
transliteration::TransliterationType Composer::GetComebackInputMode() const {
return comeback_input_mode_;
}
void Composer::ToggleInputMode() {
if (input_mode_ == transliteration::HIRAGANA) {
// TODO(komatsu): Refer user's perference.
SetInputMode(transliteration::HALF_ASCII);
} else {
SetInputMode(transliteration::HIRAGANA);
}
}
transliteration::TransliterationType Composer::GetOutputMode() const {
return output_mode_;
}
void Composer::SetOutputMode(transliteration::TransliterationType mode) {
output_mode_ = mode;
composition_->SetTransliterator(
0, composition_->GetLength(), GetTransliterator(mode));
position_ = composition_->GetLength();
}
void Composer::ApplyTemporaryInputMode(const string &input, bool caps_locked) {
DCHECK(!input.empty());
const config::Config::ShiftKeyModeSwitch switch_mode =
GET_CONFIG(shift_key_mode_switch);
// Input is NOT an ASCII code.
if (Util::OneCharLen(input.c_str()) != 1) {
SetInputMode(comeback_input_mode_);
return;
}
// Input is an ASCII code.
// we use first character to determin temporary input mode.
const char key = input[0];
const bool alpha_with_shift =
(!caps_locked && ('A' <= key && key <= 'Z')) ||
(caps_locked && ('a' <= key && key <= 'z'));
const bool alpha_without_shift =
(caps_locked && ('A' <= key && key <= 'Z')) ||
(!caps_locked && ('a' <= key && key <= 'z'));
if (alpha_with_shift) {
if (switch_mode == config::Config::ASCII_INPUT_MODE) {
if (input_mode_ == transliteration::HALF_ASCII ||
input_mode_ == transliteration::FULL_ASCII) {
// Do nothing.
} else {
SetTemporaryInputMode(transliteration::HALF_ASCII);
}
} else if (switch_mode == config::Config::KATAKANA_INPUT_MODE) {
if (input_mode_ == transliteration::HIRAGANA) {
SetTemporaryInputMode(transliteration::FULL_KATAKANA);
} else {
// Do nothing.
}
}
++shifted_sequence_count_;
} else if (alpha_without_shift) {
// When shifted input continues, the next lower input is the end
// of temporary half-width Ascii input.
if (shifted_sequence_count_ > 1 &&
switch_mode == config::Config::ASCII_INPUT_MODE) {
SetInputMode(comeback_input_mode_);
}
if (switch_mode == config::Config::KATAKANA_INPUT_MODE) {
SetInputMode(comeback_input_mode_);
}
shifted_sequence_count_ = 0;
} else {
// If the key is not an alphabet, reset shifted_sequence_count_
// because "Continuous shifted input" feature should be reset
// when the input meets non-alphabet character.
shifted_sequence_count_ = 0;
}
}
bool Composer::InsertCharacterInternal(const string &key) {
if (!EnableInsert()) {
return false;
}
CompositionInput input;
input.set_raw(key);
input.set_is_new_input(is_new_input_);
position_ = composition_->InsertInput(position_, input);
is_new_input_ = false;
return true;
}
void Composer::InsertCharacter(const string &key) {
if (!InsertCharacterInternal(key)) {
return;
}
const ProbableKeyEvents empty_events;
typing_corrector_.InsertCharacter(key, empty_events);
}
void Composer::InsertCharacterForProbableKeyEvents(
const string &key,
const ProbableKeyEvents &probable_key_events) {
if (!InsertCharacterInternal(key)) {
return;
}
typing_corrector_.InsertCharacter(key, probable_key_events);
}
void Composer::InsertCommandCharacter(const InternalCommand internal_command) {
switch (internal_command) {
case REWIND:
InsertCharacter(Table::ParseSpecialKey("{<}"));
break;
default:
LOG(ERROR) << "Unkown command : " << internal_command;
}
}
void Composer::InsertCharacterPreedit(const string &input) {
size_t begin = 0;
const size_t end = input.size();
while (begin < end) {
const size_t mblen = Util::OneCharLen(input.c_str() + begin);
const string character(input, begin, mblen);
if (!InsertCharacterKeyAndPreedit(character, character)) {
return;
}
begin += mblen;
}
DCHECK_EQ(begin, end);
}
void Composer::InsertCharacterPreeditForProbableKeyEvents(
const string &input,
const ProbableKeyEvents &probable_key_events) {
InsertCharacterKeyAndPreeditForProbableKeyEvents(input,
input,
probable_key_events);
}
bool Composer::InsertCharacterKeyAndPreeditInternal(const string &key,
const string &preedit) {
if (!EnableInsert()) {
return false;
}
CompositionInput input;
input.set_raw(key);
input.set_conversion(preedit);
input.set_is_new_input(is_new_input_);
position_ = composition_->InsertInput(position_, input);
is_new_input_ = false;
return true;
}
bool Composer::InsertCharacterKeyAndPreedit(const string &key,
const string &preedit) {
if (!InsertCharacterKeyAndPreeditInternal(key, preedit)) {
return false;
}
const ProbableKeyEvents empty_events;
typing_corrector_.InsertCharacter(key, empty_events);
return true;
}
void Composer::InsertCharacterKeyAndPreeditForProbableKeyEvents(
const string &key,
const string &preedit,
const ProbableKeyEvents &probable_key_events) {
if (!InsertCharacterKeyAndPreeditInternal(key, preedit)) {
return;
}
typing_corrector_.InsertCharacter(key, probable_key_events);
}
bool Composer::InsertCharacterKeyEvent(const commands::KeyEvent &key) {
if (!EnableInsert()) {
return false;
}
if (key.has_mode()) {
const transliteration::TransliterationType new_input_mode =
GetTransliterationTypeFromCompositionMode(key.mode());
if (new_input_mode != input_mode_) {
// Only when the new input mode is different from the current
// input mode, SetInputMode is called. Otherwise the value of
// comeback_input_mode_ is lost.
SetInputMode(new_input_mode);
}
}
// If only SHIFT is pressed, this is used to revert back to the
// previous input mode.
if (!key.has_key_code()) {
for (size_t i = 0; key.modifier_keys_size(); ++i) {
if (key.modifier_keys(i) == commands::KeyEvent::SHIFT) {
// TODO(komatsu): Enable to customize the behavior.
SetInputMode(comeback_input_mode_);
return true;
}
}
}
// Fill input representing user's raw input.
string input;
if (key.has_key_code()) {
Util::UCS4ToUTF8(key.key_code(), &input);
} else if (key.has_key_string()) {
input = key.key_string();
} else {
LOG(WARNING) << "input is empty";
return false;
}
bool is_typing_correction_enabled = GET_CONFIG(use_typing_correction) ||
FLAGS_enable_typing_correction;
if (key.has_key_string()) {
if (key.input_style() == commands::KeyEvent::AS_IS ||
key.input_style() == commands::KeyEvent::DIRECT_INPUT) {
composition_->SetInputMode(Transliterators::CONVERSION_STRING);
if (is_typing_correction_enabled) {
InsertCharacterKeyAndPreeditForProbableKeyEvents(
input,
key.key_string(),
key.probable_key_event());
} else {
InsertCharacterKeyAndPreedit(input, key.key_string());
}
SetInputMode(comeback_input_mode_);
} else {
// Kana input usually has key_string. Note that, the existence of
// key_string never determine if the input mode is Kana or Romaji.
if (is_typing_correction_enabled) {
InsertCharacterKeyAndPreeditForProbableKeyEvents(
input,
key.key_string(),
key.probable_key_event());
} else {
InsertCharacterKeyAndPreedit(input, key.key_string());
}
}
} else {
// Romaji input usually does not has key_string. Note that, the
// existence of key_string never determines if the input mode is
// Kana or Romaji.
const uint32 modifiers = KeyEventUtil::GetModifiers(key);
ApplyTemporaryInputMode(input, KeyEventUtil::HasCaps(modifiers));
if (is_typing_correction_enabled) {
InsertCharacterForProbableKeyEvents(input, key.probable_key_event());
} else {
InsertCharacter(input);
}
}
if (comeback_input_mode_ == input_mode_) {
AutoSwitchMode();
}
return true;
}
void Composer::DeleteAt(size_t pos) {
composition_->DeleteAt(pos);
// Adjust cursor position for composition mode.
if (position_ > pos) {
position_--;
}
// We do not call UpdateInputMode() here.
// 1. In composition mode, UpdateInputMode finalizes pending chunk.
// 2. In conversion mode, InputMode needs not to change.
typing_corrector_.Invalidate();
}
void Composer::Delete() {
position_ = composition_->DeleteAt(position_);
UpdateInputMode();
typing_corrector_.Invalidate();
}
void Composer::DeleteRange(size_t pos, size_t length) {
for (int i = 0; i < length && pos < composition_->GetLength(); ++i) {
DeleteAt(pos);
}
typing_corrector_.Invalidate();
}
void Composer::EditErase() {
composition_->Erase();
position_ = 0;
SetInputMode(comeback_input_mode_);
typing_corrector_.Reset();
}
void Composer::Backspace() {
if (position_ == 0) {
return;
}
// In the view point of updating input mode,
// backspace is special case because new input mode is based on both
// new current character and *character to be deleted*.
// At first, move to left.
// Now the cursor is between 'new current character'
// and 'character to be deleted'.
--position_;
// Update input mode based on both 'new current character' and
// 'character to be deleted'.
UpdateInputMode();
// Delete 'character to be deleted'
position_ = composition_->DeleteAt(position_);
typing_corrector_.Invalidate();
}
void Composer::MoveCursorLeft() {
if (position_ > 0) {
--position_;
}
UpdateInputMode();
typing_corrector_.Invalidate();
}
void Composer::MoveCursorRight() {
if (position_ < composition_->GetLength()) {
++position_;
}
UpdateInputMode();
typing_corrector_.Invalidate();
}
void Composer::MoveCursorToBeginning() {
position_ = 0;
SetInputMode(comeback_input_mode_);
typing_corrector_.Invalidate();
}
void Composer::MoveCursorToEnd() {
position_ = composition_->GetLength();
// Behavior between MoveCursorToEnd and MoveCursorToRight is different.
// MoveCursorToEnd always makes current input mode default.
SetInputMode(comeback_input_mode_);
typing_corrector_.Invalidate();
}
void Composer::MoveCursorTo(uint32 new_position) {
if (new_position <= composition_->GetLength()) {
position_ = new_position;
UpdateInputMode();
}
typing_corrector_.Invalidate();
}
void Composer::GetPreedit(string *left, string *focused, string *right) const {
DCHECK(left);
DCHECK(focused);
DCHECK(right);
composition_->GetPreedit(position_, left, focused, right);
// TODO(komatsu): This function can be obsolete.
string preedit = *left + *focused + *right;
if (TransformCharactersForNumbers(&preedit)) {
const size_t left_size = Util::CharsLen(*left);
const size_t focused_size = Util::CharsLen(*focused);
*left = Util::SubString(preedit, 0, left_size);
*focused = Util::SubString(preedit, left_size, focused_size);
*right = Util::SubString(preedit, left_size + focused_size, string::npos);
}
}
void Composer::GetStringForPreedit(string *output) const {
composition_->GetString(output);
TransformCharactersForNumbers(output);
// If the input field type needs half ascii characters,
// perform conversion here.
// Note that this purpose is also achieved by the client by setting
// input type as "half ascii".
// But the architecture of Mozc expects the server to handle such character
// width management.
// In addition, we also think about PASSWORD field type.
// we can prepare NUMBER and TEL keyboard layout, which has
// "half ascii" composition mode. This works.
// But we will not have PASSWORD only keyboard. We will share the basic
// keyboard on usual and password mode
// so such hacky code cannot be applicable.
// TODO(matsuzakit): Move this logic to another appopriate location.
// SetOutputMode() is not currently applicable but ideally it is
// better location than here.
const commands::Context::InputFieldType field_type =
GetInputFieldType();
if (field_type == commands::Context::NUMBER ||
field_type == commands::Context::PASSWORD ||
field_type == commands::Context::TEL) {
const string tmp = *output;
Util::FullWidthAsciiToHalfWidthAscii(tmp, output);
}
}
void Composer::GetStringForSubmission(string *output) const {
// TODO(komatsu): We should make sure if we can integrate this
// function to GetStringForPreedit after a while.
GetStringForPreedit(output);
}
void Composer::GetQueryForConversion(string *output) const {
string base_output;
composition_->GetStringWithTrimMode(FIX, &base_output);
TransformCharactersForNumbers(&base_output);
Util::FullWidthAsciiToHalfWidthAscii(base_output, output);
}
namespace {
// Determine which query is suitable for a prediction query and return
// its pointer.
// Exmaple:
// = Romanji Input =
// ("もz", "も") -> "も" // a part of romanji should be trimed.
// ("もzky", "もz") -> "もzky" // a user might intentionally typed them.
// ("z", "") -> "z" // ditto.
// = Kana Input =
// ("か", "") -> "か" // a part of kana (it can be "が") should not be trimed.
string *GetBaseQueryForPrediction(string *asis_query,
string *trimed_query) {
// If the sizes are equal, there is no matter.
if (asis_query->size() == trimed_query->size()) {
return asis_query;
}
// Get the different part between asis_query and trimed_query. For
// example, "ky" is the different part where asis_query is "もzky"
// and trimed_query is "もz".
DCHECK_GT(asis_query->size(), trimed_query->size());
const string asis_tail(*asis_query, trimed_query->size());
DCHECK(!asis_tail.empty());
// If the different part is not an alphabet, asis_query is used.
// This check is mainly used for Kana Input.
const Util::ScriptType asis_tail_type = Util::GetScriptType(asis_tail);
if (asis_tail_type != Util::ALPHABET) {
return asis_query;
}
// If the trimed_query is empty and asis_query is alphabet, an asis
// string is used because the query may be typed intentionally.
if (trimed_query->empty()) { // alphabet???
const Util::ScriptType asis_type = Util::GetScriptType(*asis_query);
if (asis_type == Util::ALPHABET) {
return asis_query;
} else {
return trimed_query;
}
}
// Now there are two patterns: ("もzk", "もz") and ("もずk", "もず").
// We assume "もzk" is user's intentional query, but "もずk" is not.
// So our results are:
// ("もzk", "もz") => "もzk" and ("もずk", "もず") => "もず".
const string trimed_tail = Util::SubString(*trimed_query,
Util::CharsLen(*trimed_query) - 1,
string::npos);
DCHECK(!trimed_tail.empty());
const Util::ScriptType trimed_tail_type = Util::GetScriptType(trimed_tail);
if (trimed_tail_type == Util::ALPHABET) {
return asis_query;
} else {
return trimed_query;
}
}
} // anonymous namespace
void Composer::GetQueryForPrediction(string *output) const {
string asis_query;
composition_->GetStringWithTrimMode(ASIS, &asis_query);
switch (input_mode_) {
case transliteration::HALF_ASCII: {
output->assign(asis_query);
return;
}
case transliteration::FULL_ASCII: {
Util::FullWidthAsciiToHalfWidthAscii(asis_query, output);
return;
}
default: {}
}
string trimed_query;
composition_->GetStringWithTrimMode(TRIM, &trimed_query);
// NOTE(komatsu): This is a hack to go around the difference
// expectation between Romanji-Input and Kana-Input. "かn" in
// Romaji-Input should be "か" while "あか" in Kana-Input should be
// "あか", although "かn" and "あか" have the same properties. An
// ideal solution is to expand the ambguity and pass all of them to
// the converter. (e.g. "かn" -> ["かな",..."かの", "かん", ...] /
// "あか" -> ["あか", "あが"])
string *base_query = GetBaseQueryForPrediction(&asis_query, &trimed_query);
TransformCharactersForNumbers(base_query);
Util::FullWidthAsciiToHalfWidthAscii(*base_query, output);
}
void Composer::GetQueriesForPrediction(
string *base, set<string> *expanded) const {
DCHECK(base);
DCHECK(expanded);
DCHECK(composition_.get());
// In case of the Latin input modes, we don't perform expansion.
switch (input_mode_) {
case transliteration::HALF_ASCII:
case transliteration::FULL_ASCII: {
GetQueryForPrediction(base);
expanded->clear();
return;
}
default: {}
}
composition_->GetExpandedStrings(base, expanded);
}
void Composer::GetTypeCorrectedQueriesForPrediction(
vector<TypeCorrectedQuery> *queries) const {
typing_corrector_.GetQueriesForPrediction(queries);
}
size_t Composer::GetLength() const {
return composition_->GetLength();
}
size_t Composer::GetCursor() const {
return position_;
}
void Composer::GetTransliteratedText(
Transliterators::Transliterator t12r,
const size_t position,
const size_t size,
string *result) const {
DCHECK(result);
string full_base;
composition_->GetStringWithTransliterator(t12r, &full_base);
const size_t t13n_start =
composition_->ConvertPosition(position, Transliterators::LOCAL, t12r);
const size_t t13n_end =
composition_->ConvertPosition(position + size,
Transliterators::LOCAL, t12r);
const size_t t13n_size = t13n_end - t13n_start;
Util::SubString(full_base, t13n_start, t13n_size, result);
}
void Composer::GetRawString(string *raw_string) const {
GetRawSubString(0, GetLength(), raw_string);
}
void Composer::GetRawSubString(
const size_t position,
const size_t size,
string *raw_sub_string) const {
DCHECK(raw_sub_string);
GetTransliteratedText(Transliterators::RAW_STRING, position, size,
raw_sub_string);
}
void Composer::GetTransliterations(
transliteration::Transliterations *t13ns) const {
GetSubTransliterations(0, GetLength(), t13ns);
}
void Composer::GetSubTransliteration(
const transliteration::TransliterationType type,
const size_t position,
const size_t size,
string *transliteration) const {
const Transliterators::Transliterator t12r = GetTransliterator(type);
string result;
GetTransliteratedText(t12r, position, size, &result);
transliteration->clear();
Transliterate(type, result, transliteration);
}
void Composer::GetSubTransliterations(
const size_t position,
const size_t size,
transliteration::Transliterations *transliterations) const {
string t13n;
for (size_t i = 0; i < transliteration::NUM_T13N_TYPES; ++i) {
const transliteration::TransliterationType t13n_type =
transliteration::TransliterationTypeArray[i];
GetSubTransliteration(t13n_type, position, size, &t13n);
transliterations->push_back(t13n);
}
}
bool Composer::EnableInsert() const {
if (GetLength() >= max_length_) {
// do not accept long chars to prevent DOS attack.
LOG(WARNING) << "The length is too long.";
return false;
}
return true;
}
void Composer::AutoSwitchMode() {
if (!GET_CONFIG(use_auto_ime_turn_off)) {
return;
}
// AutoSwitchMode is only available on Roma input
if (GET_CONFIG(preedit_method) != config::Config::ROMAN) {
return;
}
string key;
// Key should be in half-width alphanumeric.
composition_->GetStringWithTransliterator(
GetTransliterator(transliteration::HALF_ASCII), &key);
ModeSwitchingHandler::ModeSwitching display_mode =
ModeSwitchingHandler::NO_CHANGE;
ModeSwitchingHandler::ModeSwitching input_mode =
ModeSwitchingHandler::NO_CHANGE;
if (!ModeSwitchingHandler::GetModeSwitchingHandler()->GetModeSwitchingRule(
key, &display_mode, &input_mode)) {
// If the key is not a pattern of mode switch rule, the procedure
// stops here.
return;
}
// |display_mode| affects the existing composition the user typed.
switch (display_mode) {
case ModeSwitchingHandler::NO_CHANGE:
// Do nothing.
break;
case ModeSwitchingHandler::REVERT_TO_PREVIOUS_MODE:
// Invalid value for display_mode
LOG(ERROR) << "REVERT_TO_PREVIOUS_MODE is an invalid value "
<< "for display_mode.";
break;
case ModeSwitchingHandler::PREFERRED_ALPHANUMERIC:
if (input_mode_ == transliteration::FULL_ASCII) {
SetOutputMode(transliteration::FULL_ASCII);
} else {
SetOutputMode(transliteration::HALF_ASCII);
}
break;
case ModeSwitchingHandler::HALF_ALPHANUMERIC:
SetOutputMode(transliteration::HALF_ASCII);
break;
case ModeSwitchingHandler::FULL_ALPHANUMERIC:
SetOutputMode(transliteration::FULL_ASCII);
break;
default:
LOG(ERROR) << "Unkown value: " << display_mode;
break;
}
// |input_mode| affects the current input mode used for the user's
// new typing.
switch (input_mode) {
case ModeSwitchingHandler::NO_CHANGE:
// Do nothing.
break;
case ModeSwitchingHandler::REVERT_TO_PREVIOUS_MODE:
SetInputMode(comeback_input_mode_);
break;
case ModeSwitchingHandler::PREFERRED_ALPHANUMERIC:
if (input_mode_ != transliteration::HALF_ASCII &&
input_mode_ != transliteration::FULL_ASCII) {
SetTemporaryInputMode(transliteration::HALF_ASCII);
}
break;
case ModeSwitchingHandler::HALF_ALPHANUMERIC:
if (input_mode_ != transliteration::HALF_ASCII) {
SetTemporaryInputMode(transliteration::HALF_ASCII);
}
break;
case ModeSwitchingHandler::FULL_ALPHANUMERIC:
if (input_mode_ != transliteration::FULL_ASCII) {
SetTemporaryInputMode(transliteration::FULL_ASCII);
}
break;
default:
LOG(ERROR) << "Unkown value: " << display_mode;
break;
}
}
bool Composer::ShouldCommit() const {
return composition_->ShouldCommit();
}
bool Composer::ShouldCommitHead(size_t *length_to_commit) const {
size_t max_remaining_composition_length;
switch (GetInputFieldType()) {
case commands::Context::PASSWORD:
max_remaining_composition_length = 1;
break;
case commands::Context::TEL:
case commands::Context::NUMBER:
max_remaining_composition_length = 0;
break;
default:
// No need to commit. Return here.
return false;
}
if (GetLength() > max_remaining_composition_length) {
*length_to_commit = GetLength() - max_remaining_composition_length;
return true;
}
return false;
}
namespace {
enum Script {
ALPHABET, // alphabet characters or symbols
NUMBER, // 0 - 9, "0" - "9"
JA_HYPHEN, // "ー"
JA_COMMA, // "、"
JA_PERIOD, // "。"
OTHER,
};
bool IsAlphabetOrNumber(const Script script) {
return (script == ALPHABET) || (script == NUMBER);
}
} // anonymous namespace
// static
bool Composer::TransformCharactersForNumbers(string *query) {
if (query == NULL) {
LOG(ERROR) << "query is NULL";
return false;
}
// Create a vector of scripts of query characters to avoid
// processing query string many times.
const size_t chars_len = Util::CharsLen(*query);
vector<Script> char_scripts;
char_scripts.reserve(chars_len);
// flags to determine whether continue to the next step.
bool has_symbols = false;
bool has_alphanumerics = false;
for (ConstChar32Iterator iter(*query); !iter.Done(); iter.Next()) {
const char32 one_char = iter.Get();
switch (one_char) {
case 0x30FC: // "ー"
has_symbols = true;
char_scripts.push_back(JA_HYPHEN);
break;
case 0x3001: // "、"
has_symbols = true;
char_scripts.push_back(JA_COMMA);
break;
case 0x3002: // "。"
has_symbols = true;
char_scripts.push_back(JA_PERIOD);
break;
case '+':
case '*':
case '/':
case '=':
case '(':
case ')':
case '<':
case '>':
case 0xFF0B: // "+"
case 0xFF0A: // "*"
case 0xFF0F: // "/"
case 0xFF1D: // "="
case 0xFF08: // "("
case 0xFF09: // ")"
case 0xFF1C: // "<"
case 0xFF1E: // ">"
char_scripts.push_back(ALPHABET);
break;
default: {
Util::ScriptType script_type = Util::GetScriptType(one_char);
if (script_type == Util::NUMBER) {
has_alphanumerics = true;
char_scripts.push_back(NUMBER);
} else if (script_type == Util::ALPHABET) {
has_alphanumerics = true;
char_scripts.push_back(ALPHABET);
} else {
char_scripts.push_back(OTHER);
}
}
}
}
DCHECK_EQ(chars_len, char_scripts.size());
if (!has_alphanumerics || !has_symbols) {
VLOG(1) << "The query contains neither alphanumeric nor symbol.";
return false;
}
string transformed_query;
bool transformed = false;
size_t i = 0;
string append_char;
for (ConstChar32Iterator iter(*query); !iter.Done(); iter.Next(), ++i) {
append_char.clear();
switch (char_scripts[i]) {
case JA_HYPHEN: {
// JA_HYPHEN(s) "ー" is/are transformed to "−" if:
// (i) query has one and only one leading JA_HYPHEN followed by a
// number,
// (ii) JA_HYPHEN(s) follow(s) after an alphanumeric (ex. 0-, 0----,
// etc).
// Note that rule (i) implies that if query starts with more than
// one JA_HYPHENs, those JA_HYPHENs are not transformed.
bool check = false;
if (i == 0 && chars_len > 1) {
check = (char_scripts[1] == NUMBER);
} else {
for (size_t j = i; j > 0; --j) {
if (char_scripts[j - 1] == JA_HYPHEN) {
continue;
}
check = IsAlphabetOrNumber(char_scripts[j - 1]);
break;
}
}
// JA_HYPHEN should be transformed to MINUS.
if (check) {
CharacterFormManager::GetCharacterFormManager()->
ConvertPreeditString("\xE2\x88\x92", &append_char); // "−"
DCHECK(!append_char.empty());
}
break;
}
case JA_COMMA: {
// "、" should be "," if the previous character is alphanumerics.
// character are both alphanumerics.
// Previous char should exist and be a number.
const bool lhs_check =
(i > 0 && IsAlphabetOrNumber(char_scripts[i - 1]));
// JA_COMMA should be transformed to COMMA.
if (lhs_check) {
CharacterFormManager::GetCharacterFormManager()->
ConvertPreeditString("\xEF\xBC\x8C", &append_char); // ","
DCHECK(!append_char.empty());
}
break;
}
case JA_PERIOD: {
// "。" should be "." if the previous character and the next
// character are both alphanumerics.
// Previous char should exist and be a number.
const bool lhs_check =
(i > 0 && IsAlphabetOrNumber(char_scripts[i - 1]));
// JA_PRERIOD should be transformed to PRERIOD.
if (lhs_check) {
CharacterFormManager::GetCharacterFormManager()->
ConvertPreeditString("\xEF\xBC\x8E", &append_char); // "."
DCHECK(!append_char.empty());
}
break;
}
default: {
// Do nothing.
}
}
if (append_char.empty()) {
// Append one character.
Util::UCS4ToUTF8Append(iter.Get(), &transformed_query);
} else {
// Append the transformed character.
transformed_query.append(append_char);
transformed = true;
}
}
if (!transformed) {
return false;
}
// It is possible that the query's size in byte differs from the
// orig_query's size in byte.
DCHECK_EQ(Util::CharsLen(*query), Util::CharsLen(transformed_query));
*query = transformed_query;
return true;
}
void Composer::SetNewInput() {
is_new_input_ = true;
}
void Composer::CopyFrom(const Composer &src) {
Reset();
input_mode_ = src.input_mode_;
comeback_input_mode_ = src.comeback_input_mode_;
output_mode_ = src.output_mode_;
input_field_type_ = src.input_field_type_;
position_ = src.position_;
is_new_input_ = src.is_new_input_;
shifted_sequence_count_ = src.shifted_sequence_count_;
source_text_.assign(src.source_text_);
max_length_ = src.max_length_;
composition_.reset(src.composition_->Clone());
request_ = src.request_;
typing_corrector_.CopyFrom(src.typing_corrector_);
}
bool Composer::is_new_input() const {
return is_new_input_;
}
size_t Composer::shifted_sequence_count() const {
return shifted_sequence_count_;
}
const string &Composer::source_text() const {
return source_text_;
}
string *Composer::mutable_source_text() {
return &source_text_;
}
void Composer::set_source_text(const string &source_text) {
source_text_.assign(source_text);
}
size_t Composer::max_length() const {
return max_length_;
}
void Composer::set_max_length(size_t length) {
max_length_ = length;
}
void Composer::SetInputFieldType(commands::Context::InputFieldType type) {
input_field_type_ = type;
}
commands::Context::InputFieldType Composer::GetInputFieldType() const {
return input_field_type_;
}
} // namespace composer
} // namespace mozc