src/win32/base/imm_reconvert_string.cc - mozc - Git at Google

 // Copyright 2010-2015, Google Inc.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
 //
 //     * Redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above
 // copyright notice, this list of conditions and the following disclaimer
 // in the documentation and/or other materials provided with the
 // distribution.
 //     * Neither the name of Google Inc. nor the names of its
 // contributors may be used to endorse or promote products derived from
 // this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #include "win32/base/imm_reconvert_string.h"

 #include <safeint.h>

 #include "base/logging.h"
 #include "base/util.h"

 namespace mozc {
 namespace win32 {
 namespace {

 using msl::utilities::SafeAdd;
 using msl::utilities::SafeCast;
 using msl::utilities::SafeMultiply;
 using msl::utilities::SafeSubtract;

 template <typename T>
 bool CheckAddressSpace(const T *ptr) {
 #if defined(_M_X64)
   const DWORD64 addr = reinterpret_cast<DWORD64>(ptr);
   DWORD64 addr_last = 0;
 #elif defined(_M_IX86)
   const DWORD addr = reinterpret_cast<DWORD>(ptr);
   DWORD addr_last = 0;
 #endif
   if (!SafeAdd(addr, ptr->dwSize, addr_last)) {
     // buffer exceeds process address space.
     return false;
   }
   return true;
 }

 // TODO(yukawa): Make a mechanism to generate this code from UnicodeData.txt.
 bool IsControlCode(wchar_t c) {
   // Based on UnicodeData.txt (5.2.0).
   // [U+0000 (NUL), U+001F (INFORMATION SEPARATOR ONE)]
   // [U+007F (DELETE), U+009F (APPLICATION PROGRAM COMMAND)]
   return (0x0000 <= c && c <= 0x001F) || (0x007F <= c && c <= 0x009F);
 }

 // TODO(yukawa): Move this to util.cc.
 char32 SurrogatePairToUCS4(wchar_t high, wchar_t low) {
   return (((high - 0xD800) & 0x3FF) << 10) +
          ((low - 0xDC00) & 0x3FF) + 0x10000;
 }
 }  // anonymous namespace

 bool ReconvertString::Compose(const wstring &preceding_text,
                               const wstring &preceding_composition,
                               const wstring &target,
                               const wstring &following_composition,
                               const wstring &following_text,
                               RECONVERTSTRING *reconvert_string) {
   if (reconvert_string == nullptr) {
     return false;
   }

   if (!CheckAddressSpace(reconvert_string)) {
     return false;
   }

   DWORD preceding_text_len = 0;
   if (!SafeCast(preceding_text.size(), preceding_text_len)) {
     return false;
   }
   DWORD preceding_composition_len = 0;
   if (!SafeCast(preceding_composition.size(), preceding_composition_len)) {
     return false;
   }
   DWORD target_len = 0;
   if (!SafeCast(target.size(), target_len)) {
     return false;
   }
   DWORD following_composition_len = 0;
   if (!SafeCast(following_composition.size(), following_composition_len)) {
     return false;
   }
   DWORD following_text_len = 0;
   if (!SafeCast(following_text.size(), following_text_len)) {
     return false;
   }

   DWORD total_chars = 0;
   if (!SafeAdd(total_chars, preceding_text_len, total_chars)) {
     return false;
   }
   if (!SafeAdd(total_chars, preceding_composition_len, total_chars)) {
     return false;
   }
   if (!SafeAdd(total_chars, target_len, total_chars)) {
     return false;
   }
   if (!SafeAdd(total_chars, following_composition_len, total_chars)) {
     return false;
   }
   if (!SafeAdd(total_chars, following_text_len, total_chars)) {
     return false;
   }

   DWORD total_buffer_size = 0;
   if (!SafeMultiply(total_chars, sizeof(wchar_t), total_buffer_size)) {
     return false;
   }

   DWORD minimum_dw_size = 0;
   if (!SafeAdd(total_buffer_size, sizeof(RECONVERTSTRING), minimum_dw_size)) {
     return false;
   }

   if (minimum_dw_size > reconvert_string->dwSize) {
     // |dwSize| is too small.
     return false;
   }

   // |dwVersion| is fixed to 0.
   // http://msdn.microsoft.com/en-us/library/dd319107.aspx
   reconvert_string->dwVersion = 0;

   reconvert_string->dwStrOffset = sizeof(RECONVERTSTRING);
   reconvert_string->dwStrLen = total_chars;
   reconvert_string->dwTargetStrLen = target_len;

   if (!SafeAdd(preceding_composition_len, target_len,
                reconvert_string->dwCompStrLen)) {
     return false;
   }
   if (!SafeAdd(reconvert_string->dwCompStrLen, following_composition_len,
                reconvert_string->dwCompStrLen)) {
     return false;
   }

   if (!SafeMultiply(preceding_text_len, sizeof(wchar_t),
                     reconvert_string->dwCompStrOffset)) {
     return false;
   }

   DWORD target_offset_chars = 0;
   if (!SafeAdd(preceding_text_len, preceding_composition_len,
                target_offset_chars)) {
     return false;
   }
   if (!SafeMultiply(target_offset_chars, sizeof(wchar_t),
                     reconvert_string->dwTargetStrOffset)) {
     return false;
   }

   wchar_t *string_buffer = reinterpret_cast<wchar_t *>(
       reinterpret_cast<BYTE *>(reconvert_string) +
       reconvert_string->dwStrOffset);

   // concatenate |preceding_text|, |preceding_composition|, |target|,
   // |following_composition|, and |following_text| into |string_buffer|.
   {
     size_t index = 0;
     for (size_t i = 0; i < preceding_text.size(); ++i) {
       string_buffer[index] = preceding_text[i];
       ++index;
     }
     for (size_t i = 0; i < preceding_composition.size(); ++i) {
       string_buffer[index] = preceding_composition[i];
       ++index;
     }
     for (size_t i = 0; i < target.size(); ++i) {
       string_buffer[index] = target[i];
       ++index;
     }
     for (size_t i = 0; i < following_composition.size(); ++i) {
       string_buffer[index] = following_composition[i];
       ++index;
     }
     for (size_t i = 0; i < following_text.size(); ++i) {
       string_buffer[index] = following_text[i];
       ++index;
     }
   }

   return true;
 }

 bool ReconvertString::Decompose(const RECONVERTSTRING *reconvert_string,
                                 wstring *preceding_text,
                                 wstring *preceding_composition,
                                 wstring *target,
                                 wstring *following_composition,
                                 wstring *following_text) {
   if (reconvert_string == nullptr) {
     return false;
   }

   if (reconvert_string->dwSize < sizeof(RECONVERTSTRING)) {
     // |dwSize| must be equal to or greater than sizeof(RECONVERTSTRING).
     return false;
   }

   if (reconvert_string->dwVersion != 0) {
     // |dwVersion| must be 0.
     return false;
   }

   if (!CheckAddressSpace(reconvert_string)) {
     return false;
   }

   if (reconvert_string->dwStrOffset > reconvert_string->dwSize) {
     // |dwStrOffset| must be inside of the buffer.
     return false;
   }

   const wchar_t *string_buffer = reinterpret_cast<const wchar_t *>(
       reinterpret_cast<const BYTE *>(reconvert_string) +
       reconvert_string->dwStrOffset);

   DWORD buffer_size_in_byte = 0;
   {
     // This must be always S_OK because |dwStrOffset <= dwSize|.
     if (!SafeSubtract(reconvert_string->dwSize,
                       reconvert_string->dwStrOffset,
                       buffer_size_in_byte)) {
       return false;
     }
   }

   DWORD string_size_in_byte = 0;
   {
     if (!SafeMultiply(reconvert_string->dwStrLen,
                       sizeof(wchar_t),
                       string_size_in_byte)) {
       return false;
     }
   }

   if (string_size_in_byte > buffer_size_in_byte) {
     // |dwStrLen| must be inside of the string buffer.
     return false;
   }

   if (reconvert_string->dwCompStrOffset > buffer_size_in_byte) {
     // |dwStrOffset| must be inside of the string buffer.
     return false;
   }

   if (reconvert_string->dwTargetStrOffset > buffer_size_in_byte) {
     // |dwStrOffset| must be inside of the string buffer.
     return false;
   }

   if ((reconvert_string->dwCompStrOffset % sizeof(wchar_t)) == 1) {
     // |dwCompStrOffset| must be a multiple of sizeof(wchar_t).
     return false;
   }
   const DWORD composition_begin_in_chars =
       reconvert_string->dwCompStrOffset / sizeof(wchar_t);
   DWORD composition_end_in_chars = 0;
   {
     if (!SafeAdd(composition_begin_in_chars,
                  reconvert_string->dwCompStrLen,
                  composition_end_in_chars)) {
       return false;
     }
   }

   if ((reconvert_string->dwTargetStrOffset % sizeof(wchar_t)) == 1) {
     // |dwCompStrOffset| must be a multiple of sizeof(wchar_t).
     return false;
   }
   const DWORD target_begin_in_chars =
       reconvert_string->dwTargetStrOffset / sizeof(wchar_t);
   DWORD target_end_in_chars = 0;
   {
     if (!SafeAdd(target_begin_in_chars,
                  reconvert_string->dwTargetStrLen,
                  target_end_in_chars)) {
       return false;
     }
   }

   const bool incluion_check =
       (composition_begin_in_chars <= target_begin_in_chars) &&
       (target_end_in_chars <= composition_end_in_chars) &&
       (composition_end_in_chars <= reconvert_string->dwStrLen);
   if (!incluion_check) {
     return false;
   }

   if (preceding_text != nullptr) {
     preceding_text->assign(
         string_buffer,
         string_buffer + composition_begin_in_chars);
   }
   if (preceding_composition != nullptr) {
     preceding_composition->assign(
         string_buffer + composition_begin_in_chars,
         string_buffer + target_begin_in_chars);
   }
   if (target != nullptr) {
     target->assign(
         string_buffer + target_begin_in_chars,
         string_buffer + target_end_in_chars);
   }
   if (following_composition != nullptr) {
     following_composition->assign(
         string_buffer + target_end_in_chars,
         string_buffer + composition_end_in_chars);
   }
   if (following_text != nullptr) {
     following_text->assign(
         string_buffer + composition_end_in_chars,
         string_buffer + reconvert_string->dwStrLen);
   }

   return true;
 }

 bool ReconvertString::Validate(const RECONVERTSTRING *reconvert_string) {
   return Decompose(reconvert_string, nullptr, nullptr, nullptr, nullptr,
                    nullptr);
 }

 bool ReconvertString::EnsureCompositionIsNotEmpty(
     RECONVERTSTRING *reconvert_string) {
   wstring preceding_text;
   wstring preceding_composition;
   wstring target;
   wstring following_composition;
   wstring following_text;
   if (!ReconvertString::Decompose(
           reconvert_string, &preceding_text, &preceding_composition,
           &target, &following_composition, &following_text)) {
     return false;
   }

   if (reconvert_string->dwCompStrLen > 0) {
     // If the composition range is not empty, given |reconvert_string| is
     // acceptable.
     return true;
   }

   DCHECK_EQ(0, reconvert_string->dwCompStrLen);
   DCHECK_EQ(0, reconvert_string->dwTargetStrLen);
   DCHECK(preceding_composition.empty());
   DCHECK(target.empty());
   DCHECK(following_composition.empty());

   // Here, there is no text selection and |reconvert_string->dwTargetStrLen|
   // represents the cursor position. In this case, the given surrounding text
   // is divided into |following_text| and |preceding_text| at the cursor
   // position. For example, if the text is "SN1[Cursor]987A", |preceding_text|
   // and |following_text| contain "SN1" and "987A", respectively.
   // In this case, existing Japanese IMEs seem to make a composition range
   // which consists of a minimum segment. Since text segmentation command has
   // not been supported by the Mozc server, here Util::ScriptType is used to
   // implement naive segmentation. This works as follows.
   // 1) Like other Japanese IMEs, the character just after the cursor is
   //    checked first. For example, if the text is "SN1[Cursor]987A",
   //    '9' is picked up. If there is no character just after the cursor,
   //    the character just before the cursor is picked up.
   // 2) Check the script type of the character picked up. If the character is
   //    '9', |script_type| is NUMBER.
   // 3) Make a text range greedily by using the |script_type| from the cursor
   //    position. If the text is "SN1[Cursor]987A", "1987" is picked up by
   //    using the script type NUMBER.
   // To avoid unexpected situation, assume characters categolized into
   // UNKNOWN_SCRIPT never compose a segment.

   Util::ScriptType script_type = Util::SCRIPT_TYPE_SIZE;
   size_t involved_following_len = 0;
   size_t involved_preceding_len = 0;

   // Check if the cursor is splitting a surrogate pair.
   if ((following_text.size() >= 1) && (preceding_text.size()) >=1 &&
       IS_SURROGATE_PAIR(*preceding_text.rbegin(), *following_text.begin())) {
     ++involved_following_len;
     ++involved_preceding_len;
     const char32 unichar =
         SurrogatePairToUCS4(*preceding_text.rbegin(), *following_text.begin());
     script_type = Util::GetScriptType(unichar);
   }

   while (involved_following_len < following_text.size()) {
     // Stop searching when the previous character is UNKNOWN_SCRIPT.
     if (script_type == Util::UNKNOWN_SCRIPT) {
       break;
     }
     char32 unichar = following_text[involved_following_len];
     size_t num_wchar = 1;
     // Check if this |unichar| is the high part of a surrogate-pair.
     if (IS_HIGH_SURROGATE(unichar) &&
         (involved_following_len + 1 < following_text.size()) &&
         IS_LOW_SURROGATE(following_text[involved_following_len+1])) {
       const char32 high_surrogate = unichar;
       const char32 low_surrogate = following_text[involved_following_len+1];
       unichar = SurrogatePairToUCS4(high_surrogate, low_surrogate);
       num_wchar = 2;
     }
     // Stop searching when any control code is found.
     if (IsControlCode(unichar)) {
       break;
     }
     const Util::ScriptType type = Util::GetScriptType(unichar);
     if (script_type == Util::SCRIPT_TYPE_SIZE) {
       // This is the first character found so store its script type for later
       // use.
       script_type = type;
     } else if (script_type != type) {
       // Different script type of character found.
       break;
     }
     involved_following_len += num_wchar;
   }

   while (involved_preceding_len < preceding_text.size()) {
     // Stop searching when the previous character is UNKNOWN_SCRIPT.
     if (script_type == Util::UNKNOWN_SCRIPT) {
       break;
     }
     const size_t index = preceding_text.size() - involved_preceding_len - 1;
     char32 unichar = preceding_text[index];
     size_t num_wchar = 1;
     // Check if this |unichar| is the low part of a surrogate-pair.
     if (IS_LOW_SURROGATE(unichar) &&
         (involved_preceding_len + 1 < preceding_text.size()) &&
         IS_HIGH_SURROGATE(preceding_text[index-1])) {
       const char32 high_surrogate = preceding_text[index-1];
       const char32 low_surrogate = unichar;
       unichar = SurrogatePairToUCS4(high_surrogate, low_surrogate);
       num_wchar = 2;
     }
     // Stop searching when any control code is found.
     if (IsControlCode(unichar)) {
       break;
     }
     const Util::ScriptType type = Util::GetScriptType(unichar);
     if (script_type == Util::SCRIPT_TYPE_SIZE) {
       // This is the first character found so store its script type for later
       // use.
       script_type = type;
     } else if (script_type != type) {
       // Different script type of character found.
       break;
     }
     involved_preceding_len += num_wchar;
   }

   const size_t new_preceding_len =
       preceding_text.size() - involved_preceding_len;

   const DWORD new_composition_len =
       involved_preceding_len + involved_following_len;

   if (new_composition_len == 0) {
     return false;
   }

   reconvert_string->dwCompStrOffset = new_preceding_len * sizeof(wchar_t);
   reconvert_string->dwTargetStrOffset = new_preceding_len * sizeof(wchar_t);

   reconvert_string->dwCompStrLen = new_composition_len;
   reconvert_string->dwTargetStrLen = new_composition_len;

   return true;
 }
 }  // namespace win32
 }  // namespace mozc
	// Copyright 2010-2015, Google Inc.
	// All rights reserved.
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are
	// met:
	//
	// * Redistributions of source code must retain the above copyright
	// notice, this list of conditions and the following disclaimer.
	// * Redistributions in binary form must reproduce the above
	// copyright notice, this list of conditions and the following disclaimer
	// in the documentation and/or other materials provided with the
	// distribution.
	// * Neither the name of Google Inc. nor the names of its
	// contributors may be used to endorse or promote products derived from
	// this software without specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	#include "win32/base/imm_reconvert_string.h"

	#include <safeint.h>

	#include "base/logging.h"
	#include "base/util.h"

	namespace mozc {
	namespace win32 {
	namespace {

	using msl::utilities::SafeAdd;
	using msl::utilities::SafeCast;
	using msl::utilities::SafeMultiply;
	using msl::utilities::SafeSubtract;

	template <typename T>
	bool CheckAddressSpace(const T *ptr) {
	#if defined(_M_X64)
	const DWORD64 addr = reinterpret_cast<DWORD64>(ptr);
	DWORD64 addr_last = 0;
	#elif defined(_M_IX86)
	const DWORD addr = reinterpret_cast<DWORD>(ptr);
	DWORD addr_last = 0;
	#endif
	if (!SafeAdd(addr, ptr->dwSize, addr_last)) {
	// buffer exceeds process address space.
	return false;
	}
	return true;
	}

	// TODO(yukawa): Make a mechanism to generate this code from UnicodeData.txt.
	bool IsControlCode(wchar_t c) {
	// Based on UnicodeData.txt (5.2.0).
	// [U+0000 (NUL), U+001F (INFORMATION SEPARATOR ONE)]
	// [U+007F (DELETE), U+009F (APPLICATION PROGRAM COMMAND)]
	return (0x0000 <= c && c <= 0x001F) \|\| (0x007F <= c && c <= 0x009F);
	}

	// TODO(yukawa): Move this to util.cc.
	char32 SurrogatePairToUCS4(wchar_t high, wchar_t low) {
	return (((high - 0xD800) & 0x3FF) << 10) +
	((low - 0xDC00) & 0x3FF) + 0x10000;
	}
	} // anonymous namespace

	bool ReconvertString::Compose(const wstring &preceding_text,
	const wstring &preceding_composition,
	const wstring &target,
	const wstring &following_composition,
	const wstring &following_text,
	RECONVERTSTRING *reconvert_string) {
	if (reconvert_string == nullptr) {
	return false;
	}

	if (!CheckAddressSpace(reconvert_string)) {
	return false;
	}

	DWORD preceding_text_len = 0;
	if (!SafeCast(preceding_text.size(), preceding_text_len)) {
	return false;
	}
	DWORD preceding_composition_len = 0;
	if (!SafeCast(preceding_composition.size(), preceding_composition_len)) {
	return false;
	}
	DWORD target_len = 0;
	if (!SafeCast(target.size(), target_len)) {
	return false;
	}
	DWORD following_composition_len = 0;
	if (!SafeCast(following_composition.size(), following_composition_len)) {
	return false;
	}
	DWORD following_text_len = 0;
	if (!SafeCast(following_text.size(), following_text_len)) {
	return false;
	}

	DWORD total_chars = 0;
	if (!SafeAdd(total_chars, preceding_text_len, total_chars)) {
	return false;
	}
	if (!SafeAdd(total_chars, preceding_composition_len, total_chars)) {
	return false;
	}
	if (!SafeAdd(total_chars, target_len, total_chars)) {
	return false;
	}
	if (!SafeAdd(total_chars, following_composition_len, total_chars)) {
	return false;
	}
	if (!SafeAdd(total_chars, following_text_len, total_chars)) {
	return false;
	}

	DWORD total_buffer_size = 0;
	if (!SafeMultiply(total_chars, sizeof(wchar_t), total_buffer_size)) {
	return false;
	}

	DWORD minimum_dw_size = 0;
	if (!SafeAdd(total_buffer_size, sizeof(RECONVERTSTRING), minimum_dw_size)) {
	return false;
	}

	if (minimum_dw_size > reconvert_string->dwSize) {
	// \|dwSize\| is too small.
	return false;
	}

	// \|dwVersion\| is fixed to 0.
	// http://msdn.microsoft.com/en-us/library/dd319107.aspx
	reconvert_string->dwVersion = 0;

	reconvert_string->dwStrOffset = sizeof(RECONVERTSTRING);
	reconvert_string->dwStrLen = total_chars;
	reconvert_string->dwTargetStrLen = target_len;

	if (!SafeAdd(preceding_composition_len, target_len,
	reconvert_string->dwCompStrLen)) {
	return false;
	}
	if (!SafeAdd(reconvert_string->dwCompStrLen, following_composition_len,
	reconvert_string->dwCompStrLen)) {
	return false;
	}

	if (!SafeMultiply(preceding_text_len, sizeof(wchar_t),
	reconvert_string->dwCompStrOffset)) {
	return false;
	}

	DWORD target_offset_chars = 0;
	if (!SafeAdd(preceding_text_len, preceding_composition_len,
	target_offset_chars)) {
	return false;
	}
	if (!SafeMultiply(target_offset_chars, sizeof(wchar_t),
	reconvert_string->dwTargetStrOffset)) {
	return false;
	}

	wchar_t string_buffer = reinterpret_cast<wchar_t >(
	reinterpret_cast<BYTE *>(reconvert_string) +
	reconvert_string->dwStrOffset);

	// concatenate \|preceding_text\|, \|preceding_composition\|, \|target\|,
	// \|following_composition\|, and \|following_text\| into \|string_buffer\|.
	{
	size_t index = 0;
	for (size_t i = 0; i < preceding_text.size(); ++i) {
	string_buffer[index] = preceding_text[i];
	++index;
	}
	for (size_t i = 0; i < preceding_composition.size(); ++i) {
	string_buffer[index] = preceding_composition[i];
	++index;
	}
	for (size_t i = 0; i < target.size(); ++i) {
	string_buffer[index] = target[i];
	++index;
	}
	for (size_t i = 0; i < following_composition.size(); ++i) {
	string_buffer[index] = following_composition[i];
	++index;
	}
	for (size_t i = 0; i < following_text.size(); ++i) {
	string_buffer[index] = following_text[i];
	++index;
	}
	}

	return true;
	}

	bool ReconvertString::Decompose(const RECONVERTSTRING *reconvert_string,
	wstring *preceding_text,
	wstring *preceding_composition,
	wstring *target,
	wstring *following_composition,
	wstring *following_text) {
	if (reconvert_string == nullptr) {
	return false;
	}

	if (reconvert_string->dwSize < sizeof(RECONVERTSTRING)) {
	// \|dwSize\| must be equal to or greater than sizeof(RECONVERTSTRING).
	return false;
	}

	if (reconvert_string->dwVersion != 0) {
	// \|dwVersion\| must be 0.
	return false;
	}

	if (!CheckAddressSpace(reconvert_string)) {
	return false;
	}

	if (reconvert_string->dwStrOffset > reconvert_string->dwSize) {
	// \|dwStrOffset\| must be inside of the buffer.
	return false;
	}

	const wchar_t string_buffer = reinterpret_cast<const wchar_t >(
	reinterpret_cast<const BYTE *>(reconvert_string) +
	reconvert_string->dwStrOffset);

	DWORD buffer_size_in_byte = 0;
	{
	// This must be always S_OK because \|dwStrOffset <= dwSize\|.
	if (!SafeSubtract(reconvert_string->dwSize,
	reconvert_string->dwStrOffset,
	buffer_size_in_byte)) {
	return false;
	}
	}

	DWORD string_size_in_byte = 0;
	{
	if (!SafeMultiply(reconvert_string->dwStrLen,
	sizeof(wchar_t),
	string_size_in_byte)) {
	return false;
	}
	}

	if (string_size_in_byte > buffer_size_in_byte) {
	// \|dwStrLen\| must be inside of the string buffer.
	return false;
	}

	if (reconvert_string->dwCompStrOffset > buffer_size_in_byte) {
	// \|dwStrOffset\| must be inside of the string buffer.
	return false;
	}

	if (reconvert_string->dwTargetStrOffset > buffer_size_in_byte) {
	// \|dwStrOffset\| must be inside of the string buffer.
	return false;
	}

	if ((reconvert_string->dwCompStrOffset % sizeof(wchar_t)) == 1) {
	// \|dwCompStrOffset\| must be a multiple of sizeof(wchar_t).
	return false;
	}
	const DWORD composition_begin_in_chars =
	reconvert_string->dwCompStrOffset / sizeof(wchar_t);
	DWORD composition_end_in_chars = 0;
	{
	if (!SafeAdd(composition_begin_in_chars,
	reconvert_string->dwCompStrLen,
	composition_end_in_chars)) {
	return false;
	}
	}

	if ((reconvert_string->dwTargetStrOffset % sizeof(wchar_t)) == 1) {
	// \|dwCompStrOffset\| must be a multiple of sizeof(wchar_t).
	return false;
	}
	const DWORD target_begin_in_chars =
	reconvert_string->dwTargetStrOffset / sizeof(wchar_t);
	DWORD target_end_in_chars = 0;
	{
	if (!SafeAdd(target_begin_in_chars,
	reconvert_string->dwTargetStrLen,
	target_end_in_chars)) {
	return false;
	}
	}

	const bool incluion_check =
	(composition_begin_in_chars <= target_begin_in_chars) &&
	(target_end_in_chars <= composition_end_in_chars) &&
	(composition_end_in_chars <= reconvert_string->dwStrLen);
	if (!incluion_check) {
	return false;
	}

	if (preceding_text != nullptr) {
	preceding_text->assign(
	string_buffer,
	string_buffer + composition_begin_in_chars);
	}
	if (preceding_composition != nullptr) {
	preceding_composition->assign(
	string_buffer + composition_begin_in_chars,
	string_buffer + target_begin_in_chars);
	}
	if (target != nullptr) {
	target->assign(
	string_buffer + target_begin_in_chars,
	string_buffer + target_end_in_chars);
	}
	if (following_composition != nullptr) {
	following_composition->assign(
	string_buffer + target_end_in_chars,
	string_buffer + composition_end_in_chars);
	}
	if (following_text != nullptr) {
	following_text->assign(
	string_buffer + composition_end_in_chars,
	string_buffer + reconvert_string->dwStrLen);
	}

	return true;
	}

	bool ReconvertString::Validate(const RECONVERTSTRING *reconvert_string) {
	return Decompose(reconvert_string, nullptr, nullptr, nullptr, nullptr,
	nullptr);
	}

	bool ReconvertString::EnsureCompositionIsNotEmpty(
	RECONVERTSTRING *reconvert_string) {
	wstring preceding_text;
	wstring preceding_composition;
	wstring target;
	wstring following_composition;
	wstring following_text;
	if (!ReconvertString::Decompose(
	reconvert_string, &preceding_text, &preceding_composition,
	&target, &following_composition, &following_text)) {
	return false;
	}

	if (reconvert_string->dwCompStrLen > 0) {
	// If the composition range is not empty, given \|reconvert_string\| is
	// acceptable.
	return true;
	}

	DCHECK_EQ(0, reconvert_string->dwCompStrLen);
	DCHECK_EQ(0, reconvert_string->dwTargetStrLen);
	DCHECK(preceding_composition.empty());
	DCHECK(target.empty());
	DCHECK(following_composition.empty());

	// Here, there is no text selection and \|reconvert_string->dwTargetStrLen\|
	// represents the cursor position. In this case, the given surrounding text
	// is divided into \|following_text\| and \|preceding_text\| at the cursor
	// position. For example, if the text is "SN1[Cursor]987A", \|preceding_text\|
	// and \|following_text\| contain "SN1" and "987A", respectively.
	// In this case, existing Japanese IMEs seem to make a composition range
	// which consists of a minimum segment. Since text segmentation command has
	// not been supported by the Mozc server, here Util::ScriptType is used to
	// implement naive segmentation. This works as follows.
	// 1) Like other Japanese IMEs, the character just after the cursor is
	// checked first. For example, if the text is "SN1[Cursor]987A",
	// '9' is picked up. If there is no character just after the cursor,
	// the character just before the cursor is picked up.
	// 2) Check the script type of the character picked up. If the character is
	// '9', \|script_type\| is NUMBER.
	// 3) Make a text range greedily by using the \|script_type\| from the cursor
	// position. If the text is "SN1[Cursor]987A", "1987" is picked up by
	// using the script type NUMBER.
	// To avoid unexpected situation, assume characters categolized into
	// UNKNOWN_SCRIPT never compose a segment.

	Util::ScriptType script_type = Util::SCRIPT_TYPE_SIZE;
	size_t involved_following_len = 0;
	size_t involved_preceding_len = 0;

	// Check if the cursor is splitting a surrogate pair.
	if ((following_text.size() >= 1) && (preceding_text.size()) >=1 &&
	IS_SURROGATE_PAIR(preceding_text.rbegin(), following_text.begin())) {
	++involved_following_len;
	++involved_preceding_len;
	const char32 unichar =
	SurrogatePairToUCS4(preceding_text.rbegin(), following_text.begin());
	script_type = Util::GetScriptType(unichar);
	}

	while (involved_following_len < following_text.size()) {
	// Stop searching when the previous character is UNKNOWN_SCRIPT.
	if (script_type == Util::UNKNOWN_SCRIPT) {
	break;
	}
	char32 unichar = following_text[involved_following_len];
	size_t num_wchar = 1;
	// Check if this \|unichar\| is the high part of a surrogate-pair.
	if (IS_HIGH_SURROGATE(unichar) &&
	(involved_following_len + 1 < following_text.size()) &&
	IS_LOW_SURROGATE(following_text[involved_following_len+1])) {
	const char32 high_surrogate = unichar;
	const char32 low_surrogate = following_text[involved_following_len+1];
	unichar = SurrogatePairToUCS4(high_surrogate, low_surrogate);
	num_wchar = 2;
	}
	// Stop searching when any control code is found.
	if (IsControlCode(unichar)) {
	break;
	}
	const Util::ScriptType type = Util::GetScriptType(unichar);
	if (script_type == Util::SCRIPT_TYPE_SIZE) {
	// This is the first character found so store its script type for later
	// use.
	script_type = type;
	} else if (script_type != type) {
	// Different script type of character found.
	break;
	}
	involved_following_len += num_wchar;
	}

	while (involved_preceding_len < preceding_text.size()) {
	// Stop searching when the previous character is UNKNOWN_SCRIPT.
	if (script_type == Util::UNKNOWN_SCRIPT) {
	break;
	}
	const size_t index = preceding_text.size() - involved_preceding_len - 1;
	char32 unichar = preceding_text[index];
	size_t num_wchar = 1;
	// Check if this \|unichar\| is the low part of a surrogate-pair.
	if (IS_LOW_SURROGATE(unichar) &&
	(involved_preceding_len + 1 < preceding_text.size()) &&
	IS_HIGH_SURROGATE(preceding_text[index-1])) {
	const char32 high_surrogate = preceding_text[index-1];
	const char32 low_surrogate = unichar;
	unichar = SurrogatePairToUCS4(high_surrogate, low_surrogate);
	num_wchar = 2;
	}
	// Stop searching when any control code is found.
	if (IsControlCode(unichar)) {
	break;
	}
	const Util::ScriptType type = Util::GetScriptType(unichar);
	if (script_type == Util::SCRIPT_TYPE_SIZE) {
	// This is the first character found so store its script type for later
	// use.
	script_type = type;
	} else if (script_type != type) {
	// Different script type of character found.
	break;
	}
	involved_preceding_len += num_wchar;
	}

	const size_t new_preceding_len =
	preceding_text.size() - involved_preceding_len;

	const DWORD new_composition_len =
	involved_preceding_len + involved_following_len;

	if (new_composition_len == 0) {
	return false;
	}

	reconvert_string->dwCompStrOffset = new_preceding_len * sizeof(wchar_t);
	reconvert_string->dwTargetStrOffset = new_preceding_len * sizeof(wchar_t);

	reconvert_string->dwCompStrLen = new_composition_len;
	reconvert_string->dwTargetStrLen = new_composition_len;

	return true;
	}
	} // namespace win32
	} // namespace mozc