src/dictionary/user_dictionary_util.cc - mozc - Git at Google

 // Copyright 2010-2015, Google Inc.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
 //
 //     * Redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above
 // copyright notice, this list of conditions and the following disclaimer
 // in the documentation and/or other materials provided with the
 // distribution.
 //     * Neither the name of Google Inc. nor the names of its
 // contributors may be used to endorse or promote products derived from
 // this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #include "dictionary/user_dictionary_util.h"

 #include <string.h>
 #include <algorithm>

 #include "base/config_file_stream.h"
 #include "base/file_stream.h"
 #include "base/logging.h"
 #include "base/protobuf/message.h"
 #include "base/protobuf/unknown_field_set.h"
 #include "base/util.h"
 #include "dictionary/user_pos_interface.h"

 namespace mozc {

 using ::mozc::protobuf::RepeatedPtrField;
 using ::mozc::protobuf::UnknownField;
 using ::mozc::protobuf::UnknownFieldSet;
 using ::mozc::user_dictionary::UserDictionaryCommandStatus;

 namespace {
 // Maximum string length in UserDictionaryEntry's field
 const size_t kMaxKeySize = 300;
 const size_t kMaxValueSize = 300;
 const size_t kMaxCommentSize = 300;
 const char kInvalidChars[]= "\n\r\t";
 const char kUserDictionaryFile[] = "user://user_dictionary.db";

 const mozc::user_dictionary::UserDictionary::PosType kInvalidPosType =
     static_cast<mozc::user_dictionary::UserDictionary::PosType>(-1);

 // Maximum string length for dictionary name.
 const size_t kMaxDictionaryNameSize = 300;

 // The limits of dictionary/entry size.
 const size_t kMaxDictionarySize = 100;
 const size_t kMaxEntrySize = 1000000;
 }  // namespace

 size_t UserDictionaryUtil::max_dictionary_size() {
   return kMaxDictionarySize;
 }

 size_t UserDictionaryUtil::max_entry_size() {
   return kMaxEntrySize;
 }

 bool UserDictionaryUtil::IsValidEntry(
     const UserPOSInterface &user_pos,
     const user_dictionary::UserDictionary::Entry &entry) {
   return ValidateEntry(entry) ==
       UserDictionaryCommandStatus::USER_DICTIONARY_COMMAND_SUCCESS;
 }

 namespace {

 #define INRANGE(w, a, b) ((w) >= (a) && (w) <= (b))

 bool InternalValidateNormalizedReading(const string &reading) {
   for (ConstChar32Iterator iter(reading); !iter.Done(); iter.Next()) {
     const char32 c = iter.Get();
     if (!INRANGE(c, 0x0021, 0x007E) &&  // Basic Latin (Ascii)
         !INRANGE(c, 0x3041, 0x3096) &&  // Hiragana
         !INRANGE(c, 0x309B, 0x309C) &&  // KATAKANA-HIRAGANA VOICED/SEMI-VOICED
                                         // SOUND MARK
         !INRANGE(c, 0x30FB, 0x30FC) &&  // Nakaten, Prolonged sound mark
         !INRANGE(c, 0x3001, 0x3002) &&  // Japanese punctuation marks
         !INRANGE(c, 0x300C, 0x300F) &&  // Japanese brackets
         !INRANGE(c, 0x301C, 0x301C)) {  // Japanese Wavedash
       LOG(INFO) << "Invalid character in reading.";
       return false;
     }
   }
   return true;
 }

 #undef INRANGE

 }  // namespace

 bool UserDictionaryUtil::IsValidReading(const string &reading) {
   string normalized;
   NormalizeReading(reading, &normalized);
   return InternalValidateNormalizedReading(normalized);
 }

 void UserDictionaryUtil::NormalizeReading(const string &input, string *output) {
   output->clear();
   string tmp1, tmp2;
   Util::FullWidthAsciiToHalfWidthAscii(input, &tmp1);
   Util::HalfWidthKatakanaToFullWidthKatakana(tmp1, &tmp2);
   Util::KatakanaToHiragana(tmp2, output);
 }

 UserDictionaryCommandStatus::Status UserDictionaryUtil::ValidateEntry(
     const user_dictionary::UserDictionary::Entry &entry) {
   // Validate reading.
   const string &reading = entry.key();
   if (reading.empty()) {
     VLOG(1) << "key is empty";
     return UserDictionaryCommandStatus::READING_EMPTY;
   }
   if (reading.size() > kMaxKeySize) {
     VLOG(1) << "Too long key.";
     return UserDictionaryCommandStatus::READING_TOO_LONG;
   }
   if (!IsValidReading(reading)) {
     VLOG(1) << "Invalid reading";
     return UserDictionaryCommandStatus::READING_CONTAINS_INVALID_CHARACTER;
   }

   // Validate word.
   const string &word = entry.value();
   if (word.empty()) {
     return UserDictionaryCommandStatus::WORD_EMPTY;
   }
   if (word.size() > kMaxValueSize) {
     VLOG(1) << "Too long value.";
     return UserDictionaryCommandStatus::WORD_TOO_LONG;
   }
   if (word.find_first_of(kInvalidChars) != string::npos) {
     VLOG(1) << "Invalid character in value.";
     return UserDictionaryCommandStatus::WORD_CONTAINS_INVALID_CHARACTER;
   }

   // Validate comment.
   const string &comment = entry.comment();
   if (comment.size() > kMaxCommentSize) {
     VLOG(1) << "Too long comment.";
     return UserDictionaryCommandStatus::COMMENT_TOO_LONG;
   }
   if (comment.find_first_of(kInvalidChars) != string::npos) {
     VLOG(1) << "Invalid character in comment.";
     return UserDictionaryCommandStatus::COMMENT_CONTAINS_INVALID_CHARACTER;
   }

   // Validate pos.
   if (!entry.has_pos() ||
       !user_dictionary::UserDictionary::PosType_IsValid(entry.pos())) {
     VLOG(1) << "Invalid POS";
     return UserDictionaryCommandStatus::INVALID_POS_TYPE;
   }

   return UserDictionaryCommandStatus::USER_DICTIONARY_COMMAND_SUCCESS;
 }

 bool UserDictionaryUtil::IsStorageFull(
     const user_dictionary::UserDictionaryStorage &storage) {
   return storage.dictionaries_size() >= kMaxDictionarySize;
 }

 bool UserDictionaryUtil::IsDictionaryFull(
     const user_dictionary::UserDictionary &dictionary) {
   return dictionary.entries_size() >= kMaxEntrySize;
 }

 const user_dictionary::UserDictionary *
 UserDictionaryUtil::GetUserDictionaryById(
     const user_dictionary::UserDictionaryStorage &storage,
     uint64 dictionary_id) {
   int index = GetUserDictionaryIndexById(storage, dictionary_id);
   return index >= 0 ? &storage.dictionaries(index) : NULL;
 }

 user_dictionary::UserDictionary *
 UserDictionaryUtil::GetMutableUserDictionaryById(
     user_dictionary::UserDictionaryStorage *storage, uint64 dictionary_id) {
   int index = GetUserDictionaryIndexById(*storage, dictionary_id);
   return index >= 0 ? storage->mutable_dictionaries(index) : NULL;
 }

 int UserDictionaryUtil::GetUserDictionaryIndexById(
     const user_dictionary::UserDictionaryStorage &storage,
     uint64 dictionary_id) {
   for (int i = 0; i < storage.dictionaries_size(); ++i) {
     const user_dictionary::UserDictionary &dictionary =
         storage.dictionaries(i);
     if (dictionary.id() == dictionary_id) {
       return i;
     }
   }

   LOG(ERROR) << "Cannot find dictionary id: " << dictionary_id;
   return -1;
 }

 string UserDictionaryUtil::GetUserDictionaryFileName() {
   return ConfigFileStream::GetFileName(kUserDictionaryFile);
 }

 // static
 bool UserDictionaryUtil::SanitizeEntry(
     user_dictionary::UserDictionary::Entry *entry) {
   bool modified = false;
   modified |= Sanitize(entry->mutable_key(), kMaxKeySize);
   modified |= Sanitize(entry->mutable_value(), kMaxValueSize);
   if (!user_dictionary::UserDictionary::PosType_IsValid(entry->pos())) {
     // Fallback to NOUN.
     entry->set_pos(user_dictionary::UserDictionary::NOUN);
     modified = true;
   }
   modified |= Sanitize(entry->mutable_comment(), kMaxCommentSize);
   return modified;
 }

 // static
 bool UserDictionaryUtil::Sanitize(string *str, size_t max_size) {
   // First part: Remove invalid characters.
   {
     const size_t original_size = str->size();
     string::iterator begin = str->begin();
     string::iterator end = str->end();
     end = remove(begin, end, '\t');
     end = remove(begin, end, '\n');
     end = remove(begin, end, '\r');

     if (end - begin <= max_size) {
       if (end - begin == original_size) {
         return false;
       } else {
         str->erase(end - begin);
         return true;
       }
     }
   }

   // Second part: Truncate long strings.
   {
     const char *begin = str->data();
     const char *p = begin;
     const char *end = begin + str->size();
     while (p < end) {
       const size_t len = Util::OneCharLen(p);
       if ((p + len - begin) > max_size) {
         str->erase(p - begin);
         return true;
       }
       p += len;
     }
     LOG(FATAL) <<
         "There should be a bug in implementation of the function.";
   }

   return true;
 }

 UserDictionaryCommandStatus::Status UserDictionaryUtil::ValidateDictionaryName(
     const user_dictionary::UserDictionaryStorage &storage,
     const string &dictionary_name) {
   if (dictionary_name.empty()) {
     VLOG(1) << "Empty dictionary name.";
     return UserDictionaryCommandStatus::DICTIONARY_NAME_EMPTY;
   }
   if (dictionary_name.size() > kMaxDictionaryNameSize) {
     VLOG(1) << "Too long dictionary name";
     return UserDictionaryCommandStatus::DICTIONARY_NAME_TOO_LONG;
   }
   if (dictionary_name.find_first_of(kInvalidChars) != string::npos) {
     VLOG(1) << "Invalid character in dictionary name: " << dictionary_name;
     return UserDictionaryCommandStatus
         ::DICTIONARY_NAME_CONTAINS_INVALID_CHARACTER;
   }
   for (int i = 0; i < storage.dictionaries_size(); ++i) {
     if (storage.dictionaries(i).name() == dictionary_name) {
       LOG(ERROR) << "duplicated dictionary name";
       return UserDictionaryCommandStatus::DICTIONARY_NAME_DUPLICATED;
     }
   }

   return UserDictionaryCommandStatus::USER_DICTIONARY_COMMAND_SUCCESS;
 }

 namespace {
 // The index of each element should be matched with the actual value of enum.
 // See also user_dictionary_storage.proto for the definition of the enum.
 // Note that the '0' is invalid in the definition, so the corresponding
 // element is NULL.
 const char *kPosTypeStringTable[] = {
   NULL,
   "\xE5\x90\x8D\xE8\xA9\x9E",  // "名詞"
   "\xE7\x9F\xAD\xE7\xB8\xAE\xE3\x82\x88\xE3\x81\xBF",  // "短縮よみ"
   "\xE3\x82\xB5\xE3\x82\xB8\xE3\x82\xA7\xE3\x82\xB9\xE3\x83\x88"
       "\xE3\x81\xAE\xE3\x81\xBF",  // "サジェストのみ"
   "\xE5\x9B\xBA\xE6\x9C\x89\xE5\x90\x8D\xE8\xA9\x9E",  // "固有名詞"
   "\xE4\xBA\xBA\xE5\x90\x8D",  // "人名"
   "\xE5\xA7\x93",  // "姓"
   "\xE5\x90\x8D",  // "名"
   "\xE7\xB5\x84\xE7\xB9\x94",  // "組織"
   "\xE5\x9C\xB0\xE5\x90\x8D",  // "地名"
   "\xE5\x90\x8D\xE8\xA9\x9E\xE3\x82\xB5\xE5\xA4\x89",  // "名詞サ変"
   "\xE5\x90\x8D\xE8\xA9\x9E\xE5\xBD\xA2\xE5\x8B\x95",  // "名詞形動"
   "\xE6\x95\xB0",  // "数"
   "\xE3\x82\xA2\xE3\x83\xAB\xE3\x83\x95\xE3\x82\xA1"
       "\xE3\x83\x99\xE3\x83\x83\xE3\x83\x88",  // "アルファベット"
   "\xE8\xA8\x98\xE5\x8F\xB7",  // "記号"
   "\xE9\xA1\x94\xE6\x96\x87\xE5\xAD\x97",  // "顔文字"

   "\xE5\x89\xAF\xE8\xA9\x9E",  // "副詞"
   "\xE9\x80\xA3\xE4\xBD\x93\xE8\xA9\x9E",  // "連体詞"
   "\xE6\x8E\xA5\xE7\xB6\x9A\xE8\xA9\x9E",  // "接続詞"
   "\xE6\x84\x9F\xE5\x8B\x95\xE8\xA9\x9E",  // "感動詞"
   "\xE6\x8E\xA5\xE9\xA0\xAD\xE8\xAA\x9E",  // "接頭語"
   "\xE5\x8A\xA9\xE6\x95\xB0\xE8\xA9\x9E",  // "助数詞"
   "\xE6\x8E\xA5\xE5\xB0\xBE\xE4\xB8\x80\xE8\x88\xAC",  // "接尾一般"
   "\xE6\x8E\xA5\xE5\xB0\xBE\xE4\xBA\xBA\xE5\x90\x8D",  // "接尾人名"
   "\xE6\x8E\xA5\xE5\xB0\xBE\xE5\x9C\xB0\xE5\x90\x8D",  // "接尾地名"
   "\xE5\x8B\x95\xE8\xA9\x9E"
       "\xE3\x83\xAF\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5",  // "動詞ワ行五段"
   "\xE5\x8B\x95\xE8\xA9\x9E"
       "\xE3\x82\xAB\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5",  // "動詞カ行五段"
   "\xE5\x8B\x95\xE8\xA9\x9E"
       "\xE3\x82\xB5\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5",  // "動詞サ行五段"
   "\xE5\x8B\x95\xE8\xA9\x9E"
       "\xE3\x82\xBF\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5",  // "動詞タ行五段"
   "\xE5\x8B\x95\xE8\xA9\x9E"
       "\xE3\x83\x8A\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5",  // "動詞ナ行五段"
   "\xE5\x8B\x95\xE8\xA9\x9E"
       "\xE3\x83\x9E\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5",  // "動詞マ行五段"
   "\xE5\x8B\x95\xE8\xA9\x9E"
       "\xE3\x83\xA9\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5",  // "動詞ラ行五段"
   "\xE5\x8B\x95\xE8\xA9\x9E"
       "\xE3\x82\xAC\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5",  // "動詞ガ行五段"
   "\xE5\x8B\x95\xE8\xA9\x9E"
       "\xE3\x83\x90\xE8\xA1\x8C\xE4\xBA\x94\xE6\xAE\xB5",  // "動詞バ行五段"
   "\xE5\x8B\x95\xE8\xA9\x9E"
       "\xE3\x83\x8F\xE8\xA1\x8C\xE5\x9B\x9B\xE6\xAE\xB5",  // "動詞ハ行四段"
   "\xE5\x8B\x95\xE8\xA9\x9E\xE4\xB8\x80\xE6\xAE\xB5",  // "動詞一段"
   "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x82\xAB\xE5\xA4\x89",  // "動詞カ変"
   "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x82\xB5\xE5\xA4\x89",  // "動詞サ変"
   "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x82\xB6\xE5\xA4\x89",  // "動詞ザ変"
   "\xE5\x8B\x95\xE8\xA9\x9E\xE3\x83\xA9\xE5\xA4\x89",  // "動詞ラ変"
   "\xE5\xBD\xA2\xE5\xAE\xB9\xE8\xA9\x9E",  // "形容詞"
   "\xE7\xB5\x82\xE5\x8A\xA9\xE8\xA9\x9E",  // "終助詞"
   "\xE5\x8F\xA5\xE8\xAA\xAD\xE7\x82\xB9",  // "句読点"
   "\xE7\x8B\xAC\xE7\xAB\x8B\xE8\xAA\x9E",  // "独立語"
   "\xE6\x8A\x91\xE5\x88\xB6\xE5\x8D\x98\xE8\xAA\x9E",  // "抑制単語"
 };
 }  // namespace

 const char* UserDictionaryUtil::GetStringPosType(
     user_dictionary::UserDictionary::PosType pos_type) {
   if (user_dictionary::UserDictionary::PosType_IsValid(pos_type)) {
     return kPosTypeStringTable[pos_type];
   }
   return NULL;
 }

 user_dictionary::UserDictionary::PosType UserDictionaryUtil::ToPosType(
     const char *string_pos_type) {
   // Skip the element at 0.
   for (int i = 1; i < arraysize(kPosTypeStringTable); ++i) {
     if (strcmp(kPosTypeStringTable[i], string_pos_type) == 0) {
       return static_cast<user_dictionary::UserDictionary::PosType>(i);
     }
   }

   // Not found. Return invalid value.
   return static_cast<user_dictionary::UserDictionary::PosType>(-1);
 }

 namespace {

 const UnknownField *GetUnknownFieldByTagNumber(
     const UnknownFieldSet &unknown_field_set, int tag_number) {
   for (int i = 0; i < unknown_field_set.field_count(); ++i) {
     const UnknownField &field = unknown_field_set.field(i);
     if (field.number() == tag_number) {
       // Use first entry.
       return &field;
     }
   }
   return NULL;
 }

 void RemoveUnknownFieldByTagNumber(
     int tag_number, UnknownFieldSet *unknown_field_set) {
   UnknownFieldSet temporary_unknown_field_set;
   for (int i = 0; i < unknown_field_set->field_count(); ++i) {
     const UnknownField &field = unknown_field_set->field(i);
     if (field.number() == tag_number) {
       continue;
     }
     temporary_unknown_field_set.AddField(field);
   }
   unknown_field_set->Swap(&temporary_unknown_field_set);
 }

 struct RemovedPosTypeResolveTable {
   const char *name;
   mozc::user_dictionary::UserDictionary::PosType pos_type;
 };

 const RemovedPosTypeResolveTable kRemovedPosType[] = {
   // Removed in CL/9909127.
   // "名詞副詞可能"
   { "\xE5\x90\x8D\xE8\xA9\x9E\xE5\x89\xAF\xE8\xA9\x9E\xE5\x8F\xAF\xE8\x83\xBD",
     mozc::user_dictionary::UserDictionary::NOUN },
   // "接頭形容詞接続"
   { "\xE6\x8E\xA5\xE9\xA0\xAD\xE5\xBD\xA2\xE5\xAE\xB9\xE8\xA9\x9E"
     "\xE6\x8E\xA5\xE7\xB6\x9A",
     mozc::user_dictionary::UserDictionary::PREFIX },
   // "接頭数接続"
   { "\xE6\x8E\xA5\xE9\xA0\xAD\xE6\x95\xB0\xE6\x8E\xA5\xE7\xB6\x9A",
     mozc::user_dictionary::UserDictionary::PREFIX },
   // "接頭動詞接続"
   { "\xE6\x8E\xA5\xE9\xA0\xAD\xE5\x8B\x95\xE8\xA9\x9E\xE6\x8E\xA5\xE7\xB6\x9A",
     mozc::user_dictionary::UserDictionary::PREFIX },
   // "接頭名詞接続"
   { "\xE6\x8E\xA5\xE9\xA0\xAD\xE5\x90\x8D\xE8\xA9\x9E\xE6\x8E\xA5\xE7\xB6\x9A",
     mozc::user_dictionary::UserDictionary::PREFIX },
   // "形容詞アウオ段"
   { "\xE5\xBD\xA2\xE5\xAE\xB9\xE8\xA9\x9E"
     "\xE3\x82\xA2\xE3\x82\xA6\xE3\x82\xAA\xE6\xAE\xB5",
     mozc::user_dictionary::UserDictionary::ADJECTIVE },
   // "形容詞イ段"
   { "\xE5\xBD\xA2\xE5\xAE\xB9\xE8\xA9\x9E\xE3\x82\xA4\xE6\xAE\xB5",
     mozc::user_dictionary::UserDictionary::ADJECTIVE },

   // Removed in CL/18000642.
   // "括弧開"
   { "\xE6\x8B\xAC\xE5\xBC\xA7\xE9\x96\x8B",
     mozc::user_dictionary::UserDictionary::SYMBOL },
   // "括弧閉"
   { "\xE6\x8B\xAC\xE5\xBC\xA7\xE9\x96\x89",
     mozc::user_dictionary::UserDictionary::SYMBOL },
 };

 mozc::user_dictionary::UserDictionary::PosType ResolveRemovedPosType(
     const string &name) {
   for (size_t i = 0; i < arraysize(kRemovedPosType); ++i) {
     if (name == kRemovedPosType[i].name) {
       return kRemovedPosType[i].pos_type;
     }
   }

   // Not found. Return invalid pos type.
   return kInvalidPosType;
 }

 // The deprecated tag number of "pos" field in UserDictionary::Entry.
 const int kDeprecatedPosTagNumber = 3;

 }  // namespace

 bool UserDictionaryUtil::ResolveUnknownFieldSet(
     user_dictionary::UserDictionaryStorage *storage) {
   using mozc::user_dictionary::UserDictionary;
   typedef UserDictionary::Entry Entry;

   bool result = true;
   for (int i = 0; i < storage->dictionaries_size(); ++i) {
     UserDictionary *dictionary = storage->mutable_dictionaries(i);
     for (int j = 0; j < dictionary->entries_size(); ++j) {
       Entry *entry = dictionary->mutable_entries(j);

       const UnknownField *unknown_field = GetUnknownFieldByTagNumber(
           entry->unknown_fields(), kDeprecatedPosTagNumber);
       if (unknown_field == NULL) {
         // Here, there are two cases:
         // 1) The entry is already in the new format. Don't need migration.
         // 2) The entry doesn't have POS actually.
         // Note that it is "possible" (but not valid) for an entry to keep
         // its POS field empty. Do not treat it as "resolving failure."
         LOG_IF(WARNING, !entry->has_pos()) << "Unknown field is not found.";
         continue;
       }

       UserDictionary::PosType pos_type = kInvalidPosType;
       switch (unknown_field->type()) {
         case UnknownField::TYPE_VARINT:
           pos_type =
               static_cast<UserDictionary::PosType>(unknown_field->varint());
           break;
         case UnknownField::TYPE_LENGTH_DELIMITED:
           pos_type = ToPosType(unknown_field->length_delimited().c_str());
           if (pos_type == kInvalidPosType) {
             // The value may be created by very old mozc dictionary tool.
             // Try to find the value from a list containing removed pos names.
             pos_type =
                 ResolveRemovedPosType(unknown_field->length_delimited());
           }
           break;
         default:
           LOG(ERROR) << "Unknown deprecated pos type value: "
                      << unknown_field->type();
           break;
       }

       if (pos_type == kInvalidPosType) {
         LOG(ERROR) << "Failed to resolve old pos data.";
         if (!entry->has_pos()) {
           // If there is no pos here, users cannot use this entry for the
           // conversion. Thus, as a fallback, we fill NOUN by default.
           entry->set_pos(UserDictionary::NOUN);
         }
         result = false;
         continue;
       }

       if (entry->has_pos()) {
         if (entry->pos() != pos_type) {
           LOG(ERROR) << "Failed to resolve the entry due to "
                      << "pos type inconsistency: "
                      << entry->pos() << ", " << pos_type;
           result = false;
           continue;
         }
       } else {
         entry->set_pos(pos_type);
       }

       // In future, we may want to add some fields into the message.
       // So, don't touch any fields other than ones we processed.
       UnknownFieldSet *unknown_field_set = entry->mutable_unknown_fields();
       RemoveUnknownFieldByTagNumber(
           kDeprecatedPosTagNumber, unknown_field_set);
       if (unknown_field_set->field_count() == 0) {
         entry->DiscardUnknownFields();
       }
     }
   }

   return result;
 }

 void UserDictionaryUtil::FillDesktopDeprecatedPosField(
     user_dictionary::UserDictionaryStorage *storage) {
   for (int i = 0; i < storage->dictionaries_size(); ++i) {
     user_dictionary::UserDictionary *dictionary =
         storage->mutable_dictionaries(i);
     for (int j = 0; j < dictionary->entries_size(); ++j) {
       user_dictionary::UserDictionary::Entry *entry =
           dictionary->mutable_entries(j);
       if (!entry->has_pos()) {
         // No pos is found, so don't need backward compatibility process.
         continue;
       }

       UnknownFieldSet *unknown_field_set = entry->mutable_unknown_fields();
       static const int kDeprecatedPosTagNumber = 3;
       unknown_field_set->AddLengthDelimited(
           kDeprecatedPosTagNumber,
           UserDictionaryUtil::GetStringPosType(entry->pos()));
     }
   }
 }

 uint64 UserDictionaryUtil::CreateNewDictionaryId(
     const user_dictionary::UserDictionaryStorage &storage) {
   static const uint64 kInvalidDictionaryId = 0;

   uint64 id = kInvalidDictionaryId;
   while (id == kInvalidDictionaryId) {
     Util::GetRandomSequence(reinterpret_cast<char *>(&id), sizeof(id));

 #ifdef  __native_client__
     // Because JavaScript does not support uint64, we downsize the dictionary id
     // range from uint64 to uint32 in NaCl.
     id = static_cast<uint32>(id);
 #endif  // __native_client__

     // Duplication check.
     for (int i = 0; i < storage.dictionaries_size(); ++i) {
       if (storage.dictionaries(i).id() == id) {
         // Duplicated id is found. So invalidate it to retry the generating.
         id = kInvalidDictionaryId;
         break;
       }
     }
   }

   return id;
 }

 UserDictionaryCommandStatus::Status UserDictionaryUtil::CreateDictionary(
     user_dictionary::UserDictionaryStorage *storage,
     const string &dictionary_name,
     uint64 *new_dictionary_id) {
   UserDictionaryCommandStatus::Status status =
       ValidateDictionaryName(*storage, dictionary_name);
   if (status != UserDictionaryCommandStatus::USER_DICTIONARY_COMMAND_SUCCESS) {
     LOG(ERROR) << "Invalid dictionary name is passed";
     return status;
   }

   if (IsStorageFull(*storage)) {
     LOG(ERROR) << "too many dictionaries";
     return UserDictionaryCommandStatus::DICTIONARY_SIZE_LIMIT_EXCEEDED;
   }

   if (new_dictionary_id == NULL) {
     LOG(ERROR) << "new_dictionary_id is NULL";
     return UserDictionaryCommandStatus::UNKNOWN_ERROR;
   }

   *new_dictionary_id = CreateNewDictionaryId(*storage);
   user_dictionary::UserDictionary* dictionary = storage->add_dictionaries();
   if (dictionary == NULL) {
     LOG(ERROR) << "add_dictionaries failed.";
     return UserDictionaryCommandStatus::UNKNOWN_ERROR;
   }

   dictionary->set_id(*new_dictionary_id);
   dictionary->set_name(dictionary_name);
   return UserDictionaryCommandStatus::USER_DICTIONARY_COMMAND_SUCCESS;
 }

 bool UserDictionaryUtil::DeleteDictionary(
     user_dictionary::UserDictionaryStorage *storage,
     uint64 dictionary_id,
     int *original_index,
     user_dictionary::UserDictionary **deleted_dictionary) {
   const int index = GetUserDictionaryIndexById(*storage, dictionary_id);
   if (original_index != NULL) {
     *original_index = index;
   }

   if (index < 0) {
     LOG(ERROR) << "Invalid dictionary id: " << dictionary_id;
     return false;
   }

   RepeatedPtrField<user_dictionary::UserDictionary> *dictionaries =
       storage->mutable_dictionaries();
   // Move the target dictionary to the end.
   rotate(dictionaries->pointer_begin() + index,
          dictionaries->pointer_begin() + index + 1,
          dictionaries->pointer_end());

   if (deleted_dictionary == NULL) {
     dictionaries->RemoveLast();
   } else {
     *deleted_dictionary = dictionaries->ReleaseLast();
   }

   return true;
 }

 }  // namespace mozc