blob: 88e08746af9bee7fca2b15d61b67b13a7d2a87d9 [file] [log] [blame]
// Copyright 2010-2015, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "dictionary/user_dictionary_util.h"
#include <string.h>
#include <algorithm>
#include "base/config_file_stream.h"
#include "base/file_stream.h"
#include "base/logging.h"
#include "base/protobuf/message.h"
#include "base/protobuf/unknown_field_set.h"
#include "base/util.h"
#include "dictionary/user_pos_interface.h"
namespace mozc {
using ::mozc::protobuf::RepeatedPtrField;
using ::mozc::protobuf::UnknownField;
using ::mozc::protobuf::UnknownFieldSet;
using ::mozc::user_dictionary::UserDictionaryCommandStatus;
namespace {
// Maximum string length in UserDictionaryEntry's field
const size_t kMaxKeySize = 300;
const size_t kMaxValueSize = 300;
const size_t kMaxCommentSize = 300;
const char kInvalidChars[]= "\n\r\t";
const char kUserDictionaryFile[] = "user://user_dictionary.db";
const mozc::user_dictionary::UserDictionary::PosType kInvalidPosType =
static_cast<mozc::user_dictionary::UserDictionary::PosType>(-1);
// Maximum string length for dictionary name.
const size_t kMaxDictionaryNameSize = 300;
// The limits of dictionary/entry size.
const size_t kMaxDictionarySize = 100;
const size_t kMaxEntrySize = 1000000;
} // namespace
size_t UserDictionaryUtil::max_dictionary_size() {
return kMaxDictionarySize;
}
size_t UserDictionaryUtil::max_entry_size() {
return kMaxEntrySize;
}
bool UserDictionaryUtil::IsValidEntry(
const UserPOSInterface &user_pos,
const user_dictionary::UserDictionary::Entry &entry) {
return ValidateEntry(entry) ==
UserDictionaryCommandStatus::USER_DICTIONARY_COMMAND_SUCCESS;
}
namespace {
#define INRANGE(w, a, b) ((w) >= (a) && (w) <= (b))
bool InternalValidateNormalizedReading(const string &reading) {
for (ConstChar32Iterator iter(reading); !iter.Done(); iter.Next()) {
const char32 c = iter.Get();
if (!INRANGE(c, 0x0021, 0x007E) && // Basic Latin (Ascii)
!INRANGE(c, 0x3041, 0x3096) && // Hiragana
!INRANGE(c, 0x309B, 0x309C) && // KATAKANA-HIRAGANA VOICED/SEMI-VOICED
// SOUND MARK
!INRANGE(c, 0x30FB, 0x30FC) && // Nakaten, Prolonged sound mark
!INRANGE(c, 0x3001, 0x3002) && // Japanese punctuation marks
!INRANGE(c, 0x300C, 0x300F) && // Japanese brackets
!INRANGE(c, 0x301C, 0x301C)) { // Japanese Wavedash
LOG(INFO) << "Invalid character in reading.";
return false;
}
}
return true;
}
#undef INRANGE
} // namespace
bool UserDictionaryUtil::IsValidReading(const string &reading) {
string normalized;
NormalizeReading(reading, &normalized);
return InternalValidateNormalizedReading(normalized);
}
void UserDictionaryUtil::NormalizeReading(const string &input, string *output) {
output->clear();
string tmp1, tmp2;
Util::FullWidthAsciiToHalfWidthAscii(input, &tmp1);
Util::HalfWidthKatakanaToFullWidthKatakana(tmp1, &tmp2);
Util::KatakanaToHiragana(tmp2, output);
}
UserDictionaryCommandStatus::Status UserDictionaryUtil::ValidateEntry(
const user_dictionary::UserDictionary::Entry &entry) {
// Validate reading.
const string &reading = entry.key();
if (reading.empty()) {
VLOG(1) << "key is empty";
return UserDictionaryCommandStatus::READING_EMPTY;
}
if (reading.size() > kMaxKeySize) {
VLOG(1) << "Too long key.";
return UserDictionaryCommandStatus::READING_TOO_LONG;
}
if (!IsValidReading(reading)) {
VLOG(1) << "Invalid reading";
return UserDictionaryCommandStatus::READING_CONTAINS_INVALID_CHARACTER;
}
// Validate word.
const string &word = entry.value();
if (word.empty()) {
return UserDictionaryCommandStatus::WORD_EMPTY;
}
if (word.size() > kMaxValueSize) {
VLOG(1) << "Too long value.";
return UserDictionaryCommandStatus::WORD_TOO_LONG;
}
if (word.find_first_of(kInvalidChars) != string::npos) {
VLOG(1) << "Invalid character in value.";
return UserDictionaryCommandStatus::WORD_CONTAINS_INVALID_CHARACTER;
}
// Validate comment.
const string &comment = entry.comment();
if (comment.size() > kMaxCommentSize) {
VLOG(1) << "Too long comment.";
return UserDictionaryCommandStatus::COMMENT_TOO_LONG;
}
if (comment.find_first_of(kInvalidChars) != string::npos) {
VLOG(1) << "Invalid character in comment.";
return UserDictionaryCommandStatus::COMMENT_CONTAINS_INVALID_CHARACTER;
}
// Validate pos.
if (!entry.has_pos() ||
!user_dictionary::UserDictionary::PosType_IsValid(entry.pos())) {
VLOG(1) << "Invalid POS";
return UserDictionaryCommandStatus::INVALID_POS_TYPE;
}
return UserDictionaryCommandStatus::USER_DICTIONARY_COMMAND_SUCCESS;
}
bool UserDictionaryUtil::IsStorageFull(
const user_dictionary::UserDictionaryStorage &storage) {
return storage.dictionaries_size() >= kMaxDictionarySize;
}
bool UserDictionaryUtil::IsDictionaryFull(
const user_dictionary::UserDictionary &dictionary) {
return dictionary.entries_size() >= kMaxEntrySize;
}
const user_dictionary::UserDictionary *
UserDictionaryUtil::GetUserDictionaryById(
const user_dictionary::UserDictionaryStorage &storage,
uint64 dictionary_id) {
int index = GetUserDictionaryIndexById(storage, dictionary_id);
return index >= 0 ? &storage.dictionaries(index) : NULL;
}
user_dictionary::UserDictionary *
UserDictionaryUtil::GetMutableUserDictionaryById(
user_dictionary::UserDictionaryStorage *storage, uint64 dictionary_id) {
int index = GetUserDictionaryIndexById(*storage, dictionary_id);
return index >= 0 ? storage->mutable_dictionaries(index) : NULL;
}
int UserDictionaryUtil::GetUserDictionaryIndexById(
const user_dictionary::UserDictionaryStorage &storage,
uint64 dictionary_id) {
for (int i = 0; i < storage.dictionaries_size(); ++i) {
const user_dictionary::UserDictionary &dictionary =
storage.dictionaries(i);
if (dictionary.id() == dictionary_id) {
return i;
}
}
LOG(ERROR) << "Cannot find dictionary id: " << dictionary_id;
return -1;
}
string UserDictionaryUtil::GetUserDictionaryFileName() {
return ConfigFileStream::GetFileName(kUserDictionaryFile);
}
// static
bool UserDictionaryUtil::SanitizeEntry(
user_dictionary::UserDictionary::Entry *entry) {
bool modified = false;
modified |= Sanitize(entry->mutable_key(), kMaxKeySize);
modified |= Sanitize(entry->mutable_value(), kMaxValueSize);
if (!user_dictionary::UserDictionary::PosType_IsValid(entry->pos())) {
// Fallback to NOUN.
entry->set_pos(user_dictionary::UserDictionary::NOUN);
modified = true;
}
modified |= Sanitize(entry->mutable_comment(), kMaxCommentSize);
return modified;
}
// static
bool UserDictionaryUtil::Sanitize(string *str, size_t max_size) {
// First part: Remove invalid characters.
{
const size_t original_size = str->size();
string::iterator begin = str->begin();
string::iterator end = str->end();
end = remove(begin, end, '\t');
end = remove(begin, end, '\n');
end = remove(begin, end, '\r');
if (end - begin <= max_size) {
if (end - begin == original_size) {
return false;
} else {
str->erase(end - begin);
return true;
}
}
}
// Second part: Truncate long strings.
{
const char *begin = str->data();
const char *p = begin;
const char *end = begin + str->size();
while (p < end) {
const size_t len = Util::OneCharLen(p);
if ((p + len - begin) > max_size) {
str->erase(p - begin);
return true;
}
p += len;
}
LOG(FATAL) <<
"There should be a bug in implementation of the function.";
}
return true;
}
UserDictionaryCommandStatus::Status UserDictionaryUtil::ValidateDictionaryName(
const user_dictionary::UserDictionaryStorage &storage,
const string &dictionary_name) {
if (dictionary_name.empty()) {
VLOG(1) << "Empty dictionary name.";
return UserDictionaryCommandStatus::DICTIONARY_NAME_EMPTY;
}
if (dictionary_name.size() > kMaxDictionaryNameSize) {
VLOG(1) << "Too long dictionary name";
return UserDictionaryCommandStatus::DICTIONARY_NAME_TOO_LONG;
}
if (dictionary_name.find_first_of(kInvalidChars) != string::npos) {
VLOG(1) << "Invalid character in dictionary name: " << dictionary_name;
return UserDictionaryCommandStatus
::DICTIONARY_NAME_CONTAINS_INVALID_CHARACTER;
}
for (int i = 0; i < storage.dictionaries_size(); ++i) {
if (storage.dictionaries(i).name() == dictionary_name) {
LOG(ERROR) << "duplicated dictionary name";
return UserDictionaryCommandStatus::DICTIONARY_NAME_DUPLICATED;
}
}
return UserDictionaryCommandStatus::USER_DICTIONARY_COMMAND_SUCCESS;
}
namespace {
// The index of each element should be matched with the actual value of enum.
// See also user_dictionary_storage.proto for the definition of the enum.
// Note that the '0' is invalid in the definition, so the corresponding
// element is NULL.
const char *kPosTypeStringTable[] = {
NULL,
"\xE5\x90\x8D\xE8\xA9\x9E", // "名詞"
"\xE7\x9F\xAD\xE7\xB8\xAE\xE3\x82\x88\xE3\x81\xBF", // "短縮よみ"
"\xE3\x82\xB5\xE3\x82\xB8\xE3\x82\xA7\xE3\x82\xB9\xE3\x83\x88"
"\xE3\x81\xAE\xE3\x81\xBF", // "サジェストのみ"
"\xE5\x9B\xBA\xE6\x9C\x89\xE5\x90\x8D\xE8\xA9\x9E", // "固有名詞"
"\xE4\xBA\xBA\xE5\x90\x8D", // "人名"
"\xE5\xA7\x93", // "姓"
"\xE5\x90\x8D", // "名"
"\xE7\xB5\x84\xE7\xB9\x94", // "組織"
"\xE5\x9C\xB0\xE5\x90\x8D", // "地名"
"\xE5\x90\x8D\xE8\xA9\x9E\xE3\x82\xB5\xE5\xA4\x89", // "名詞サ変"
"\xE5\x90\x8D\xE8\xA9\x9E\xE5\xBD\xA2\xE5\x8B\x95", // "名詞形動"
"\xE6\x95\xB0", // "数"
"\xE3\x82\xA2\xE3\x83\xAB\xE3\x83\x95\xE3\x82\xA1"
"\xE3\x83\x99\xE3\x83\x83\xE3\x83\x88", // "アルファベット"
"\xE8\xA8\x98\xE5\x8F\xB7", // "記号"
"\xE9\xA1\x94\xE6\x96\x87\xE5\xAD\x97", // "顔文字"
"\xE5\x89\xAF\xE8\xA9\x9E", // "副詞"
"\xE9\x80\xA3\xE4\xBD\x93\xE8\xA9\x9E", // "連体詞"
"\xE6\x8E\xA5\xE7\xB6\x9A\xE8\xA9\x9E", // "接続詞"
"\xE6\x84\x9F\xE5\x8B\x95\xE8\xA9\x9E", // "感動詞"
"\xE6\x8E\xA5\xE9\xA0\xAD\xE8\xAA\x9E", // "接頭語"
"\xE5\x8A\xA9\xE6\x95\xB0\xE8\xA9\x9E", // "助数詞"
"\xE6\x8E\xA5\xE5\xB0\xBE\xE4\xB8\x80\xE8\x88\xAC", // "接尾一般"
"\xE6\x8E\xA5\xE5\xB0\xBE\xE4\xBA\xBA\xE5\x90\x8D", // "接尾人名"
"\xE6\x8E\xA5\xE5\xB0\xBE\xE5\x9C\xB0\xE5\x90\x8D", // "接尾地名"
"\xE5\x8B\x95\xE8\xA9\x9E"
"\xE3\x83\xAF\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞ワ行五段"
"\xE5\x8B\x95\xE8\xA9\x9E"
"\xE3\x82\xAB\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞カ行五段"
"\xE5\x8B\x95\xE8\xA9\x9E"
"\xE3\x82\xB5\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞サ行五段"
"\xE5\x8B\x95\xE8\xA9\x9E"
"\xE3\x82\xBF\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞タ行五段"
"\xE5\x8B\x95\xE8\xA9\x9E"
"\xE3\x83\x8A\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞ナ行五段"
"\xE5\x8B\x95\xE8\xA9\x9E"
"\xE3\x83\x9E\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞マ行五段"
"\xE5\x8B\x95\xE8\xA9\x9E"
"\xE3\x83\xA9\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞ラ行五段"
"\xE5\x8B\x95\xE8\xA9\x9E"
"\xE3\x82\xAC\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞ガ行五段"
"\xE5\x8B\x95\xE8\xA9\x9E"
"\xE3\x83\x90\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5", // "動詞バ行五段"
"\xE5\x8B\x95\xE8\xA9\x9E"
"\xE3\x83\x8F\xE8\xA1\x8C\xE5\x9B\x9B\xE6\xAE\xB5", // "動詞ハ行四段"
"\xE5\x8B\x95\xE8\xA9\x9E\xE4\xB8\x80\xE6\xAE\xB5", // "動詞一段"
"\xE5\x8B\x95\xE8\xA9\x9E\xE3\x82\xAB\xE5\xA4\x89", // "動詞カ変"
"\xE5\x8B\x95\xE8\xA9\x9E\xE3\x82\xB5\xE5\xA4\x89", // "動詞サ変"
"\xE5\x8B\x95\xE8\xA9\x9E\xE3\x82\xB6\xE5\xA4\x89", // "動詞ザ変"
"\xE5\x8B\x95\xE8\xA9\x9E\xE3\x83\xA9\xE5\xA4\x89", // "動詞ラ変"
"\xE5\xBD\xA2\xE5\xAE\xB9\xE8\xA9\x9E", // "形容詞"
"\xE7\xB5\x82\xE5\x8A\xA9\xE8\xA9\x9E", // "終助詞"
"\xE5\x8F\xA5\xE8\xAA\xAD\xE7\x82\xB9", // "句読点"
"\xE7\x8B\xAC\xE7\xAB\x8B\xE8\xAA\x9E", // "独立語"
"\xE6\x8A\x91\xE5\x88\xB6\xE5\x8D\x98\xE8\xAA\x9E", // "抑制単語"
};
} // namespace
const char* UserDictionaryUtil::GetStringPosType(
user_dictionary::UserDictionary::PosType pos_type) {
if (user_dictionary::UserDictionary::PosType_IsValid(pos_type)) {
return kPosTypeStringTable[pos_type];
}
return NULL;
}
user_dictionary::UserDictionary::PosType UserDictionaryUtil::ToPosType(
const char *string_pos_type) {
// Skip the element at 0.
for (int i = 1; i < arraysize(kPosTypeStringTable); ++i) {
if (strcmp(kPosTypeStringTable[i], string_pos_type) == 0) {
return static_cast<user_dictionary::UserDictionary::PosType>(i);
}
}
// Not found. Return invalid value.
return static_cast<user_dictionary::UserDictionary::PosType>(-1);
}
namespace {
const UnknownField *GetUnknownFieldByTagNumber(
const UnknownFieldSet &unknown_field_set, int tag_number) {
for (int i = 0; i < unknown_field_set.field_count(); ++i) {
const UnknownField &field = unknown_field_set.field(i);
if (field.number() == tag_number) {
// Use first entry.
return &field;
}
}
return NULL;
}
void RemoveUnknownFieldByTagNumber(
int tag_number, UnknownFieldSet *unknown_field_set) {
UnknownFieldSet temporary_unknown_field_set;
for (int i = 0; i < unknown_field_set->field_count(); ++i) {
const UnknownField &field = unknown_field_set->field(i);
if (field.number() == tag_number) {
continue;
}
temporary_unknown_field_set.AddField(field);
}
unknown_field_set->Swap(&temporary_unknown_field_set);
}
struct RemovedPosTypeResolveTable {
const char *name;
mozc::user_dictionary::UserDictionary::PosType pos_type;
};
const RemovedPosTypeResolveTable kRemovedPosType[] = {
// Removed in CL/9909127.
// "名詞副詞可能"
{ "\xE5\x90\x8D\xE8\xA9\x9E\xE5\x89\xAF\xE8\xA9\x9E\xE5\x8F\xAF\xE8\x83\xBD",
mozc::user_dictionary::UserDictionary::NOUN },
// "接頭形容詞接続"
{ "\xE6\x8E\xA5\xE9\xA0\xAD\xE5\xBD\xA2\xE5\xAE\xB9\xE8\xA9\x9E"
"\xE6\x8E\xA5\xE7\xB6\x9A",
mozc::user_dictionary::UserDictionary::PREFIX },
// "接頭数接続"
{ "\xE6\x8E\xA5\xE9\xA0\xAD\xE6\x95\xB0\xE6\x8E\xA5\xE7\xB6\x9A",
mozc::user_dictionary::UserDictionary::PREFIX },
// "接頭動詞接続"
{ "\xE6\x8E\xA5\xE9\xA0\xAD\xE5\x8B\x95\xE8\xA9\x9E\xE6\x8E\xA5\xE7\xB6\x9A",
mozc::user_dictionary::UserDictionary::PREFIX },
// "接頭名詞接続"
{ "\xE6\x8E\xA5\xE9\xA0\xAD\xE5\x90\x8D\xE8\xA9\x9E\xE6\x8E\xA5\xE7\xB6\x9A",
mozc::user_dictionary::UserDictionary::PREFIX },
// "形容詞アウオ段"
{ "\xE5\xBD\xA2\xE5\xAE\xB9\xE8\xA9\x9E"
"\xE3\x82\xA2\xE3\x82\xA6\xE3\x82\xAA\xE6\xAE\xB5",
mozc::user_dictionary::UserDictionary::ADJECTIVE },
// "形容詞イ段"
{ "\xE5\xBD\xA2\xE5\xAE\xB9\xE8\xA9\x9E\xE3\x82\xA4\xE6\xAE\xB5",
mozc::user_dictionary::UserDictionary::ADJECTIVE },
// Removed in CL/18000642.
// "括弧開"
{ "\xE6\x8B\xAC\xE5\xBC\xA7\xE9\x96\x8B",
mozc::user_dictionary::UserDictionary::SYMBOL },
// "括弧閉"
{ "\xE6\x8B\xAC\xE5\xBC\xA7\xE9\x96\x89",
mozc::user_dictionary::UserDictionary::SYMBOL },
};
mozc::user_dictionary::UserDictionary::PosType ResolveRemovedPosType(
const string &name) {
for (size_t i = 0; i < arraysize(kRemovedPosType); ++i) {
if (name == kRemovedPosType[i].name) {
return kRemovedPosType[i].pos_type;
}
}
// Not found. Return invalid pos type.
return kInvalidPosType;
}
// The deprecated tag number of "pos" field in UserDictionary::Entry.
const int kDeprecatedPosTagNumber = 3;
} // namespace
bool UserDictionaryUtil::ResolveUnknownFieldSet(
user_dictionary::UserDictionaryStorage *storage) {
using mozc::user_dictionary::UserDictionary;
typedef UserDictionary::Entry Entry;
bool result = true;
for (int i = 0; i < storage->dictionaries_size(); ++i) {
UserDictionary *dictionary = storage->mutable_dictionaries(i);
for (int j = 0; j < dictionary->entries_size(); ++j) {
Entry *entry = dictionary->mutable_entries(j);
const UnknownField *unknown_field = GetUnknownFieldByTagNumber(
entry->unknown_fields(), kDeprecatedPosTagNumber);
if (unknown_field == NULL) {
// Here, there are two cases:
// 1) The entry is already in the new format. Don't need migration.
// 2) The entry doesn't have POS actually.
// Note that it is "possible" (but not valid) for an entry to keep
// its POS field empty. Do not treat it as "resolving failure."
LOG_IF(WARNING, !entry->has_pos()) << "Unknown field is not found.";
continue;
}
UserDictionary::PosType pos_type = kInvalidPosType;
switch (unknown_field->type()) {
case UnknownField::TYPE_VARINT:
pos_type =
static_cast<UserDictionary::PosType>(unknown_field->varint());
break;
case UnknownField::TYPE_LENGTH_DELIMITED:
pos_type = ToPosType(unknown_field->length_delimited().c_str());
if (pos_type == kInvalidPosType) {
// The value may be created by very old mozc dictionary tool.
// Try to find the value from a list containing removed pos names.
pos_type =
ResolveRemovedPosType(unknown_field->length_delimited());
}
break;
default:
LOG(ERROR) << "Unknown deprecated pos type value: "
<< unknown_field->type();
break;
}
if (pos_type == kInvalidPosType) {
LOG(ERROR) << "Failed to resolve old pos data.";
if (!entry->has_pos()) {
// If there is no pos here, users cannot use this entry for the
// conversion. Thus, as a fallback, we fill NOUN by default.
entry->set_pos(UserDictionary::NOUN);
}
result = false;
continue;
}
if (entry->has_pos()) {
if (entry->pos() != pos_type) {
LOG(ERROR) << "Failed to resolve the entry due to "
<< "pos type inconsistency: "
<< entry->pos() << ", " << pos_type;
result = false;
continue;
}
} else {
entry->set_pos(pos_type);
}
// In future, we may want to add some fields into the message.
// So, don't touch any fields other than ones we processed.
UnknownFieldSet *unknown_field_set = entry->mutable_unknown_fields();
RemoveUnknownFieldByTagNumber(
kDeprecatedPosTagNumber, unknown_field_set);
if (unknown_field_set->field_count() == 0) {
entry->DiscardUnknownFields();
}
}
}
return result;
}
void UserDictionaryUtil::FillDesktopDeprecatedPosField(
user_dictionary::UserDictionaryStorage *storage) {
for (int i = 0; i < storage->dictionaries_size(); ++i) {
user_dictionary::UserDictionary *dictionary =
storage->mutable_dictionaries(i);
for (int j = 0; j < dictionary->entries_size(); ++j) {
user_dictionary::UserDictionary::Entry *entry =
dictionary->mutable_entries(j);
if (!entry->has_pos()) {
// No pos is found, so don't need backward compatibility process.
continue;
}
UnknownFieldSet *unknown_field_set = entry->mutable_unknown_fields();
static const int kDeprecatedPosTagNumber = 3;
unknown_field_set->AddLengthDelimited(
kDeprecatedPosTagNumber,
UserDictionaryUtil::GetStringPosType(entry->pos()));
}
}
}
uint64 UserDictionaryUtil::CreateNewDictionaryId(
const user_dictionary::UserDictionaryStorage &storage) {
static const uint64 kInvalidDictionaryId = 0;
uint64 id = kInvalidDictionaryId;
while (id == kInvalidDictionaryId) {
Util::GetRandomSequence(reinterpret_cast<char *>(&id), sizeof(id));
#ifdef __native_client__
// Because JavaScript does not support uint64, we downsize the dictionary id
// range from uint64 to uint32 in NaCl.
id = static_cast<uint32>(id);
#endif // __native_client__
// Duplication check.
for (int i = 0; i < storage.dictionaries_size(); ++i) {
if (storage.dictionaries(i).id() == id) {
// Duplicated id is found. So invalidate it to retry the generating.
id = kInvalidDictionaryId;
break;
}
}
}
return id;
}
UserDictionaryCommandStatus::Status UserDictionaryUtil::CreateDictionary(
user_dictionary::UserDictionaryStorage *storage,
const string &dictionary_name,
uint64 *new_dictionary_id) {
UserDictionaryCommandStatus::Status status =
ValidateDictionaryName(*storage, dictionary_name);
if (status != UserDictionaryCommandStatus::USER_DICTIONARY_COMMAND_SUCCESS) {
LOG(ERROR) << "Invalid dictionary name is passed";
return status;
}
if (IsStorageFull(*storage)) {
LOG(ERROR) << "too many dictionaries";
return UserDictionaryCommandStatus::DICTIONARY_SIZE_LIMIT_EXCEEDED;
}
if (new_dictionary_id == NULL) {
LOG(ERROR) << "new_dictionary_id is NULL";
return UserDictionaryCommandStatus::UNKNOWN_ERROR;
}
*new_dictionary_id = CreateNewDictionaryId(*storage);
user_dictionary::UserDictionary* dictionary = storage->add_dictionaries();
if (dictionary == NULL) {
LOG(ERROR) << "add_dictionaries failed.";
return UserDictionaryCommandStatus::UNKNOWN_ERROR;
}
dictionary->set_id(*new_dictionary_id);
dictionary->set_name(dictionary_name);
return UserDictionaryCommandStatus::USER_DICTIONARY_COMMAND_SUCCESS;
}
bool UserDictionaryUtil::DeleteDictionary(
user_dictionary::UserDictionaryStorage *storage,
uint64 dictionary_id,
int *original_index,
user_dictionary::UserDictionary **deleted_dictionary) {
const int index = GetUserDictionaryIndexById(*storage, dictionary_id);
if (original_index != NULL) {
*original_index = index;
}
if (index < 0) {
LOG(ERROR) << "Invalid dictionary id: " << dictionary_id;
return false;
}
RepeatedPtrField<user_dictionary::UserDictionary> *dictionaries =
storage->mutable_dictionaries();
// Move the target dictionary to the end.
rotate(dictionaries->pointer_begin() + index,
dictionaries->pointer_begin() + index + 1,
dictionaries->pointer_end());
if (deleted_dictionary == NULL) {
dictionaries->RemoveLast();
} else {
*deleted_dictionary = dictionaries->ReleaseLast();
}
return true;
}
} // namespace mozc