blob: 22e0b20883b4d60b333a053b5762ab0f25e68ef1 [file] [log] [blame]
// Copyright 2010-2014, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "base/util.h"
#include <climits>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <map>
#include <sstream>
#include <string>
#include "base/clock_mock.h"
#include "base/compiler_specific.h"
#include "base/file_stream.h"
#include "base/file_util.h"
#include "base/logging.h"
#include "base/mutex.h"
#include "base/number_util.h"
#include "base/thread.h"
#include "testing/base/public/googletest.h"
#include "testing/base/public/gunit.h"
DECLARE_string(test_srcdir);
namespace mozc {
namespace {
void FillTestCharacterSetMap(map<char32, Util::CharacterSet> *test_map) {
CHECK(test_map);
const char kCharacterSetTestFile[] =
"data/test/character_set/character_set.tsv";
const string &path = FileUtil::JoinPath(FLAGS_test_srcdir,
kCharacterSetTestFile);
CHECK(FileUtil::FileExists(path)) << path << " does not exist.";
map<string, Util::CharacterSet> character_set_type_map;
character_set_type_map["ASCII"] = Util::ASCII;
character_set_type_map["JISX0201"] = Util::JISX0201;
character_set_type_map["JISX0208"] = Util::JISX0208;
character_set_type_map["JISX0212"] = Util::JISX0212;
character_set_type_map["JISX0213"] = Util::JISX0213;
character_set_type_map["CP932"] = Util::CP932;
// UNICODE_ONLY should not appear in the tsv file though.
character_set_type_map["UNICODE_ONLY"] = Util::UNICODE_ONLY;
InputFileStream finput(path.c_str());
// Read tsv file.
string line;
while (!getline(finput, line).fail()) {
if (Util::StartsWith(line, "#")) {
// Skip comment line.
continue;
}
vector<string> col;
mozc::Util::SplitStringUsing(line, "\t", &col);
CHECK_GE(col.size(), 2) << "format error: " << line;
const char32 ucs4 = NumberUtil::SimpleAtoi(col[0]);
map<string, Util::CharacterSet>::const_iterator itr =
character_set_type_map.find(col[1]);
// We cannot use CHECK_NE here because of overload resolution.
CHECK(character_set_type_map.end() != itr)
<< "Unknown character set type: " << col[1];
test_map->insert(make_pair(ucs4, itr->second));
}
}
Util::CharacterSet GetExpectedCharacterSet(
const map<char32, Util::CharacterSet> &test_map,
char32 ucs4) {
map<char32, Util::CharacterSet>::const_iterator itr =
test_map.find(ucs4);
if (test_map.find(ucs4) == test_map.end()) {
// If the test data does not have an entry, it should be
// interpreted as |Util::UNICODE_ONLY|.
return Util::UNICODE_ONLY;
}
return itr->second;
}
} // namespace
class ThreadTest: public Thread {
public:
virtual void Run() {
for (int i = 0; i < 3; ++i) {
Util::Sleep(1000);
}
}
};
TEST(UtilTest, JoinStrings) {
vector<string> input;
input.push_back("ab");
input.push_back("cdef");
input.push_back("ghr");
string output;
Util::JoinStrings(input, ":", &output);
EXPECT_EQ("ab:cdef:ghr", output);
}
TEST(UtilTest, JoinStringPieces) {
{
vector<StringPiece> input;
input.push_back("ab");
string output;
Util::JoinStringPieces(input, ":", &output);
EXPECT_EQ("ab", output);
}
{
vector<StringPiece> input;
input.push_back("ab");
input.push_back("cdef");
input.push_back("ghr");
string output;
Util::JoinStringPieces(input, ":", &output);
EXPECT_EQ("ab:cdef:ghr", output);
}
{
vector<StringPiece> input;
input.push_back("ab");
input.push_back("cdef");
input.push_back("ghr");
string output;
Util::JoinStringPieces(input, "::", &output);
EXPECT_EQ("ab::cdef::ghr", output);
}
}
TEST(UtilTest, ConcatStrings) {
string s;
Util::ConcatStrings("", "", &s);
EXPECT_TRUE(s.empty());
Util::ConcatStrings("ABC", "", &s);
EXPECT_EQ("ABC", s);
Util::ConcatStrings("", "DEF", &s);
EXPECT_EQ("DEF", s);
Util::ConcatStrings("ABC", "DEF", &s);
EXPECT_EQ("ABCDEF", s);
}
TEST(UtilTest, AppendStringWithDelimiter) {
string result;
string input;
const char kDelemiter[] = ":";
{
result.clear();
Util::AppendStringWithDelimiter(kDelemiter, "test", &result);
EXPECT_EQ("test", result);
}
{
result = "foo";
Util::AppendStringWithDelimiter(kDelemiter, "test", &result);
EXPECT_EQ("foo:test", result);
}
{
result = "foo";
Util::AppendStringWithDelimiter(kDelemiter, "", &result);
EXPECT_EQ("foo:", result);
}
}
TEST(UtilTest, SplitIterator_SingleDelimiter_SkipEmpty) {
typedef SplitIterator<SingleDelimiter, SkipEmpty> SplitIterator;
{
SplitIterator iter("", " ");
EXPECT_TRUE(iter.Done());
}
{
SplitIterator iter(StringPiece(), " ");
EXPECT_TRUE(iter.Done());
}
{
const char *s = "a b cde";
SplitIterator iter(s, " ");
EXPECT_FALSE(iter.Done());
EXPECT_EQ("a", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("b", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("cde", iter.Get());
iter.Next();
EXPECT_TRUE(iter.Done());
}
{
const char *s = " a b cde ";
SplitIterator iter(s, " ");
EXPECT_FALSE(iter.Done());
EXPECT_EQ("a", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("b", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("cde", iter.Get());
iter.Next();
EXPECT_TRUE(iter.Done());
}
{
StringPiece s("a b cde ", 5); // s = "a b ";
SplitIterator iter(s, " ");
EXPECT_FALSE(iter.Done());
EXPECT_EQ("a", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("b", iter.Get());
iter.Next();
EXPECT_TRUE(iter.Done());
}
}
TEST(UtilTest, SplitIterator_MultiDelimiter_SkipEmpty) {
typedef SplitIterator<MultiDelimiter, SkipEmpty> SplitIterator;
{
SplitIterator iter("", " \t,");
EXPECT_TRUE(iter.Done());
}
{
SplitIterator iter(StringPiece(), ",.");
EXPECT_TRUE(iter.Done());
}
{
const char *s = "a b\tcde:fg";
SplitIterator iter(s, " \t:");
EXPECT_FALSE(iter.Done());
EXPECT_EQ("a", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("b", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("cde", iter.Get());
EXPECT_FALSE(iter.Done());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("fg", iter.Get());
iter.Next();
EXPECT_TRUE(iter.Done());
}
{
const char *s = " \t:a b\t\tcde:fg:";
SplitIterator iter(s, " \t:");
EXPECT_FALSE(iter.Done());
EXPECT_EQ("a", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("b", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("cde", iter.Get());
EXPECT_FALSE(iter.Done());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("fg", iter.Get());
iter.Next();
EXPECT_TRUE(iter.Done());
}
}
TEST(UtilTest, SplitIterator_SingleDelimiter_AllowEmpty) {
typedef SplitIterator<SingleDelimiter, AllowEmpty> SplitIterator;
{
SplitIterator iter("", " ");
EXPECT_TRUE(iter.Done());
}
{
SplitIterator iter(StringPiece(), " ");
EXPECT_TRUE(iter.Done());
}
{
const char *s = "a b cde";
SplitIterator iter(s, " ");
EXPECT_FALSE(iter.Done());
EXPECT_EQ("a", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("b", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("cde", iter.Get());
iter.Next();
EXPECT_TRUE(iter.Done());
}
{
const char *s = " a b cde ";
SplitIterator iter(s, " ");
EXPECT_FALSE(iter.Done());
EXPECT_EQ("", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("a", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("b", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("cde", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("", iter.Get());
iter.Next();
EXPECT_TRUE(iter.Done());
}
{
StringPiece s("a b cde ", 5); // s = "a b ";
SplitIterator iter(s, " ");
EXPECT_FALSE(iter.Done());
EXPECT_EQ("a", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("b", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("", iter.Get());
iter.Next();
EXPECT_TRUE(iter.Done());
}
}
TEST(UtilTest, SplitIterator_MultiDelimiter_AllowEmpty) {
typedef SplitIterator<MultiDelimiter, AllowEmpty> SplitIterator;
{
SplitIterator iter("", " \t,");
EXPECT_TRUE(iter.Done());
}
{
SplitIterator iter(StringPiece(), ",.");
EXPECT_TRUE(iter.Done());
}
{
const char *s = "a b\tcde:fg";
SplitIterator iter(s, " \t:");
EXPECT_FALSE(iter.Done());
EXPECT_EQ("a", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("b", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("cde", iter.Get());
EXPECT_FALSE(iter.Done());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("fg", iter.Get());
iter.Next();
EXPECT_TRUE(iter.Done());
}
{
const char *s = "a b\t\tcde:fg:";
SplitIterator iter(s, " \t:");
EXPECT_FALSE(iter.Done());
EXPECT_EQ("a", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("b", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("cde", iter.Get());
EXPECT_FALSE(iter.Done());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("fg", iter.Get());
iter.Next();
EXPECT_FALSE(iter.Done());
EXPECT_EQ("", iter.Get());
iter.Next();
EXPECT_TRUE(iter.Done());
}
}
TEST(UtilTest, SplitStringUsing) {
{
const string input = "a b c def";
vector<string> output;
Util::SplitStringUsing(input, " ", &output);
EXPECT_EQ(output.size(), 4);
EXPECT_EQ("a", output[0]);
EXPECT_EQ("b", output[1]);
EXPECT_EQ("c", output[2]);
EXPECT_EQ("def", output[3]);
}
{
const string input = " a b c";
vector<string> output;
Util::SplitStringUsing(input, " ", &output);
EXPECT_EQ(output.size(), 3);
EXPECT_EQ("a", output[0]);
EXPECT_EQ("b", output[1]);
EXPECT_EQ("c", output[2]);
}
{
const string input = "a b c ";
vector<string> output;
Util::SplitStringUsing(input, " ", &output);
EXPECT_EQ(output.size(), 3);
EXPECT_EQ("a", output[0]);
EXPECT_EQ("b", output[1]);
EXPECT_EQ("c", output[2]);
}
{
const string input = "a:b cd ";
vector<string> output;
Util::SplitStringUsing(input, ": ", &output);
EXPECT_EQ(output.size(), 3);
EXPECT_EQ("a", output[0]);
EXPECT_EQ("b", output[1]);
EXPECT_EQ("cd", output[2]);
}
{
const string input = "Empty delimiter";
vector<string> output;
Util::SplitStringUsing(input, "", &output);
EXPECT_EQ(output.size(), 1);
EXPECT_EQ(input, output[0]);
}
}
TEST(UtilTest, SplitStringAllowEmpty) {
{
const string input = "a b c def";
vector<string> output;
Util::SplitStringAllowEmpty(input, " ", &output);
EXPECT_EQ(output.size(), 5);
EXPECT_EQ("a", output[0]);
EXPECT_EQ("b", output[1]);
EXPECT_EQ("", output[2]);
EXPECT_EQ("c", output[3]);
EXPECT_EQ("def", output[4]);
}
{
const string input = " a b c";
vector<string> output;
Util::SplitStringAllowEmpty(input, " ", &output);
EXPECT_EQ(output.size(), 5);
EXPECT_EQ("", output[0]);
EXPECT_EQ("a", output[1]);
EXPECT_EQ("b", output[2]);
EXPECT_EQ("", output[3]);
EXPECT_EQ("c", output[4]);
}
{
const string input = "a b c ";
vector<string> output;
Util::SplitStringAllowEmpty(input, " ", &output);
EXPECT_EQ(output.size(), 5);
EXPECT_EQ("a", output[0]);
EXPECT_EQ("b", output[1]);
EXPECT_EQ("", output[2]);
EXPECT_EQ("c", output[3]);
EXPECT_EQ("", output[4]);
}
{
const string input = "a:b c ";
vector<string> output;
Util::SplitStringAllowEmpty(input, ": ", &output);
EXPECT_EQ(output.size(), 5);
EXPECT_EQ("a", output[0]);
EXPECT_EQ("b", output[1]);
EXPECT_EQ("", output[2]);
EXPECT_EQ("c", output[3]);
EXPECT_EQ("", output[4]);
}
{
const string input = "Empty delimiter";
vector<string> output;
Util::SplitStringAllowEmpty(input, "", &output);
EXPECT_EQ(output.size(), 1);
EXPECT_EQ(input, output[0]);
}
}
TEST(UtilTest, StripWhiteSpaces) {
// basic scenario.
{
const string input = " foo ";
string output;
Util::StripWhiteSpaces(input, &output);
EXPECT_EQ("foo", output);
}
// no space means just copy.
{
const string input = "foo";
string output;
Util::StripWhiteSpaces(input, &output);
EXPECT_EQ("foo", output);
}
// tabs and linebreaks are also spaces.
{
const string input = " \tfoo\n";
string output;
Util::StripWhiteSpaces(input, &output);
EXPECT_EQ("foo", output);
}
// spaces in the middle remains.
{
const string input = " foo bar baz ";
string output;
Util::StripWhiteSpaces(input, &output);
EXPECT_EQ("foo bar baz", output);
}
// all spaces means clear out output.
{
const string input = " \v \r ";
string output;
Util::StripWhiteSpaces(input, &output);
EXPECT_TRUE(output.empty());
}
// empty input.
{
const string input = "";
string output;
Util::StripWhiteSpaces(input, &output);
EXPECT_TRUE(output.empty());
}
// one character.
{
const string input = "a";
string output;
Util::StripWhiteSpaces(input, &output);
EXPECT_EQ("a", output);
}
}
TEST(UtilTest, SplitStringToUtf8Chars) {
{
vector<string> output;
Util::SplitStringToUtf8Chars("", &output);
EXPECT_EQ(0, output.size());
}
{
// "a" "あ" "A" "亜" "\n" "a"
const string kInputs[] = {
"a",
"\xE3\x81\x82",
"\xEF\xBC\xA1",
"\xE4\xBA\x9C",
"\n",
"a",
};
string joined_string;
for (int i = 0; i < arraysize(kInputs); ++i) {
joined_string += kInputs[i];
}
vector<string> output;
Util::SplitStringToUtf8Chars(joined_string, &output);
EXPECT_EQ(arraysize(kInputs), output.size());
for (size_t i = 0; i < output.size(); ++i) {
EXPECT_EQ(kInputs[i], output[i]);
}
}
}
TEST(UtilTest, SplitCSV) {
vector<string> answer_vector;
Util::SplitCSV(
"Google,x,\"Buchheit, Paul\",\"string with \"\" quote in it\"",
&answer_vector);
CHECK_EQ(answer_vector.size(), 4);
CHECK_EQ(answer_vector[0], "Google");
CHECK_EQ(answer_vector[1], "x");
CHECK_EQ(answer_vector[2], "Buchheit, Paul");
CHECK_EQ(answer_vector[3], "string with \" quote in it");
Util::SplitCSV("Google,hello,", &answer_vector);
CHECK_EQ(answer_vector.size(), 3);
CHECK_EQ(answer_vector[0], "Google");
CHECK_EQ(answer_vector[1], "hello");
CHECK_EQ(answer_vector[2], "");
Util::SplitCSV("Google rocks,hello", &answer_vector);
CHECK_EQ(answer_vector.size(), 2);
CHECK_EQ(answer_vector[0], "Google rocks");
CHECK_EQ(answer_vector[1], "hello");
Util::SplitCSV(",,\"\",,", &answer_vector);
CHECK_EQ(answer_vector.size(), 5);
CHECK_EQ(answer_vector[0], "");
CHECK_EQ(answer_vector[1], "");
CHECK_EQ(answer_vector[2], "");
CHECK_EQ(answer_vector[3], "");
CHECK_EQ(answer_vector[4], "");
// Test a string containing a comma.
Util::SplitCSV("\",\",hello", &answer_vector);
CHECK_EQ(answer_vector.size(), 2);
CHECK_EQ(answer_vector[0], ",");
CHECK_EQ(answer_vector[1], "hello");
// Invalid CSV
Util::SplitCSV("\"no,last,quote", &answer_vector);
CHECK_EQ(answer_vector.size(), 1);
CHECK_EQ(answer_vector[0], "no,last,quote");
Util::SplitCSV("backslash\\,is,no,an,\"escape\"", &answer_vector);
CHECK_EQ(answer_vector.size(), 5);
CHECK_EQ(answer_vector[0], "backslash\\");
CHECK_EQ(answer_vector[1], "is");
CHECK_EQ(answer_vector[2], "no");
CHECK_EQ(answer_vector[3], "an");
CHECK_EQ(answer_vector[4], "escape");
Util::SplitCSV("", &answer_vector);
CHECK_EQ(answer_vector.size(), 0);
}
TEST(UtilTest, ReplaceString) {
const string input = "foobarfoobar";
string output;
Util::StringReplace(input, "bar", "buz", true, &output);
EXPECT_EQ("foobuzfoobuz", output);
output.clear();
Util::StringReplace(input, "bar", "buz", false, &output);
EXPECT_EQ("foobuzfoobar", output);
}
TEST(UtilTest, LowerString) {
string s = "TeSTtest";
Util::LowerString(&s);
EXPECT_EQ("testtest", s);
// "TeST@ABCXYZ[`abcxyz{"
string s2 = "\xef\xbc\xb4\xef\xbd\x85\xef\xbc\xb3\xef\xbc\xb4\xef\xbc\xa0\xef"
"\xbc\xa1\xef\xbc\xa2\xef\xbc\xa3\xef\xbc\xb8\xef\xbc\xb9\xef\xbc"
"\xba\xef\xbc\xbb\xef\xbd\x80\xef\xbd\x81\xef\xbd\x82\xef\xbd\x83"
"\xef\xbd\x98\xef\xbd\x99\xef\xbd\x9a\xef\xbd\x9b";
Util::LowerString(&s2);
// "test@abcxyz[`abcxyz{"
EXPECT_EQ("\xef\xbd\x94\xef\xbd\x85\xef\xbd\x93\xef\xbd\x94\xef\xbc\xa0\xef"
"\xbd\x81\xef\xbd\x82\xef\xbd\x83\xef\xbd\x98\xef\xbd\x99\xef\xbd"
"\x9a\xef\xbc\xbb\xef\xbd\x80\xef\xbd\x81\xef\xbd\x82\xef\xbd\x83"
"\xef\xbd\x98\xef\xbd\x99\xef\xbd\x9a\xef\xbd\x9b", s2);
}
TEST(UtilTest, UpperString) {
string s = "TeSTtest";
Util::UpperString(&s);
EXPECT_EQ("TESTTEST", s);
// "TeST@ABCXYZ[`abcxyz{"
string s2 = "\xef\xbc\xb4\xef\xbd\x85\xef\xbc\xb3\xef\xbc\xb4\xef\xbc\xa0\xef"
"\xbc\xa1\xef\xbc\xa2\xef\xbc\xa3\xef\xbc\xb8\xef\xbc\xb9\xef\xbc"
"\xba\xef\xbc\xbb\xef\xbd\x80\xef\xbd\x81\xef\xbd\x82\xef\xbd\x83"
"\xef\xbd\x98\xef\xbd\x99\xef\xbd\x9a\xef\xbd\x9b";
Util::UpperString(&s2);
// "TEST@ABCXYZ[`ABCXYZ{"
EXPECT_EQ("\xef\xbc\xb4\xef\xbc\xa5\xef\xbc\xb3\xef\xbc\xb4\xef\xbc\xa0\xef"
"\xbc\xa1\xef\xbc\xa2\xef\xbc\xa3\xef\xbc\xb8\xef\xbc\xb9\xef\xbc"
"\xba\xef\xbc\xbb\xef\xbd\x80\xef\xbc\xa1\xef\xbc\xa2\xef\xbc\xa3"
"\xef\xbc\xb8\xef\xbc\xb9\xef\xbc\xba\xef\xbd\x9b", s2);
}
TEST(UtilTest, CapitalizeString) {
string s = "TeSTtest";
Util::CapitalizeString(&s);
EXPECT_EQ("Testtest", s);
// "TeST@ABCXYZ[`abcxyz{"
string s2 = "\xef\xbc\xb4\xef\xbd\x85\xef\xbc\xb3\xef\xbc\xb4\xef\xbc\xa0\xef"
"\xbc\xa1\xef\xbc\xa2\xef\xbc\xa3\xef\xbc\xb8\xef\xbc\xb9\xef\xbc"
"\xba\xef\xbc\xbb\xef\xbd\x80\xef\xbd\x81\xef\xbd\x82\xef\xbd\x83"
"\xef\xbd\x98\xef\xbd\x99\xef\xbd\x9a\xef\xbd\x9b";
Util::CapitalizeString(&s2);
// "Test@abcxyz[`abcxyz{"
EXPECT_EQ("\xef\xbc\xb4\xef\xbd\x85\xef\xbd\x93\xef\xbd\x94\xef\xbc\xa0\xef"
"\xbd\x81\xef\xbd\x82\xef\xbd\x83\xef\xbd\x98\xef\xbd\x99\xef\xbd"
"\x9a\xef\xbc\xbb\xef\xbd\x80\xef\xbd\x81\xef\xbd\x82\xef\xbd\x83"
"\xef\xbd\x98\xef\xbd\x99\xef\xbd\x9a\xef\xbd\x9b", s2);
}
TEST(UtilTest, IsLowerAscii) {
EXPECT_TRUE(Util::IsLowerAscii(""));
EXPECT_TRUE(Util::IsLowerAscii("hello"));
EXPECT_FALSE(Util::IsLowerAscii("HELLO"));
EXPECT_FALSE(Util::IsLowerAscii("Hello"));
EXPECT_FALSE(Util::IsLowerAscii("HeLlO"));
EXPECT_FALSE(Util::IsLowerAscii("symbol!"));
EXPECT_FALSE(Util::IsLowerAscii( // "Hello"
"\xEF\xBC\xA8\xEF\xBD\x85\xEF\xBD\x8C\xEF\xBD\x8C\xEF\xBD\x8F"));
}
TEST(UtilTest, IsUpperAscii) {
EXPECT_TRUE(Util::IsUpperAscii(""));
EXPECT_FALSE(Util::IsUpperAscii("hello"));
EXPECT_TRUE(Util::IsUpperAscii("HELLO"));
EXPECT_FALSE(Util::IsUpperAscii("Hello"));
EXPECT_FALSE(Util::IsUpperAscii("HeLlO"));
EXPECT_FALSE(Util::IsUpperAscii("symbol!"));
EXPECT_FALSE(Util::IsUpperAscii( // "Hello"
"\xEF\xBC\xA8\xEF\xBD\x85\xEF\xBD\x8C\xEF\xBD\x8C\xEF\xBD\x8F"));
}
TEST(UtilTest, IsCapitalizedAscii) {
EXPECT_TRUE(Util::IsCapitalizedAscii(""));
EXPECT_FALSE(Util::IsCapitalizedAscii("hello"));
EXPECT_FALSE(Util::IsCapitalizedAscii("HELLO"));
EXPECT_TRUE(Util::IsCapitalizedAscii("Hello"));
EXPECT_FALSE(Util::IsCapitalizedAscii("HeLlO"));
EXPECT_FALSE(Util::IsCapitalizedAscii("symbol!"));
EXPECT_FALSE(Util::IsCapitalizedAscii( // "Hello"
"\xEF\xBC\xA8\xEF\xBD\x85\xEF\xBD\x8C\xEF\xBD\x8C\xEF\xBD\x8F"));
}
TEST(UtilTest, IsLowerOrUpperAscii) {
EXPECT_TRUE(Util::IsLowerOrUpperAscii(""));
EXPECT_TRUE(Util::IsLowerOrUpperAscii("hello"));
EXPECT_TRUE(Util::IsLowerOrUpperAscii("HELLO"));
EXPECT_FALSE(Util::IsLowerOrUpperAscii("Hello"));
EXPECT_FALSE(Util::IsLowerOrUpperAscii("HeLlO"));
EXPECT_FALSE(Util::IsLowerOrUpperAscii("symbol!"));
EXPECT_FALSE(Util::IsLowerOrUpperAscii( // "Hello"
"\xEF\xBC\xA8\xEF\xBD\x85\xEF\xBD\x8C\xEF\xBD\x8C\xEF\xBD\x8F"));
}
TEST(UtilTest, IsUpperOrCapitalizedAscii) {
EXPECT_TRUE(Util::IsUpperOrCapitalizedAscii(""));
EXPECT_FALSE(Util::IsUpperOrCapitalizedAscii("hello"));
EXPECT_TRUE(Util::IsUpperOrCapitalizedAscii("HELLO"));
EXPECT_TRUE(Util::IsUpperOrCapitalizedAscii("Hello"));
EXPECT_FALSE(Util::IsUpperOrCapitalizedAscii("HeLlO"));
EXPECT_FALSE(Util::IsUpperOrCapitalizedAscii("symbol!"));
EXPECT_FALSE(Util::IsUpperOrCapitalizedAscii( // "Hello"
"\xEF\xBC\xA8\xEF\xBD\x85\xEF\xBD\x8C\xEF\xBD\x8C\xEF\xBD\x8F"));
}
void VerifyUTF8ToUCS4(const string &text, char32 expected_ucs4,
size_t expected_len) {
const char *begin = text.data();
const char *end = begin + text.size();
size_t mblen = 0;
char32 result = Util::UTF8ToUCS4(begin, end, &mblen);
EXPECT_EQ(expected_ucs4, result) << text << " " << expected_ucs4;
EXPECT_EQ(expected_len, mblen) << text << " " << expected_len;
}
TEST(UtilTest, UTF8ToUCS4) {
VerifyUTF8ToUCS4("", 0, 0);
VerifyUTF8ToUCS4("\x01", 1, 1);
VerifyUTF8ToUCS4("\x7F", 0x7F, 1);
VerifyUTF8ToUCS4("\xC2\x80", 0x80, 2);
VerifyUTF8ToUCS4("\xDF\xBF", 0x7FF, 2);
VerifyUTF8ToUCS4("\xE0\xA0\x80", 0x800, 3);
VerifyUTF8ToUCS4("\xEF\xBF\xBF", 0xFFFF, 3);
VerifyUTF8ToUCS4("\xF0\x90\x80\x80", 0x10000, 4);
VerifyUTF8ToUCS4("\xF7\xBF\xBF\xBF", 0x1FFFFF, 4);
// do not test 5-6 bytes because it's out of spec of UTF8.
}
TEST(UtilTest, UCS4ToUTF8) {
string output;
// Do nothing if |c| is NUL. Previous implementation of UCS4ToUTF8 worked like
// this even though the reason is unclear.
Util::UCS4ToUTF8(0, &output);
EXPECT_TRUE(output.empty());
Util::UCS4ToUTF8(0x7F, &output);
EXPECT_EQ("\x7F", output);
Util::UCS4ToUTF8(0x80, &output);
EXPECT_EQ("\xC2\x80", output);
Util::UCS4ToUTF8(0x7FF, &output);
EXPECT_EQ("\xDF\xBF", output);
Util::UCS4ToUTF8(0x800, &output);
EXPECT_EQ("\xE0\xA0\x80", output);
Util::UCS4ToUTF8(0xFFFF, &output);
EXPECT_EQ("\xEF\xBF\xBF", output);
Util::UCS4ToUTF8(0x10000, &output);
EXPECT_EQ("\xF0\x90\x80\x80", output);
Util::UCS4ToUTF8(0x1FFFFF, &output);
EXPECT_EQ("\xF7\xBF\xBF\xBF", output);
}
TEST(UtilTest, CharsLen) {
// "私の名前は中野です"
const string src = "\xe7\xa7\x81\xe3\x81\xae\xe5\x90\x8d\xe5\x89\x8d\xe3\x81"
"\xaf\xe4\xb8\xad\xe9\x87\x8e\xe3\x81\xa7\xe3\x81\x99";
EXPECT_EQ(Util::CharsLen(src.c_str(), src.size()), 9);
}
TEST(UtilTest, SubStringPiece) {
// "私の名前は中野です"
const string src = "\xe7\xa7\x81\xe3\x81\xae\xe5\x90\x8d\xe5\x89\x8d\xe3\x81"
"\xaf\xe4\xb8\xad\xe9\x87\x8e\xe3\x81\xa7\xe3\x81\x99";
StringPiece result;
result = Util::SubStringPiece(src, 0, 2);
// "私の"
EXPECT_EQ("\xe7\xa7\x81\xe3\x81\xae", result);
// |result|'s data should point to the same memory block as src.
EXPECT_LE(src.data(), result.data());
result = Util::SubStringPiece(src, 4, 1);
// "は"
EXPECT_EQ("\xe3\x81\xaf", result);
EXPECT_LE(src.data(), result.data());
result = Util::SubStringPiece(src, 5, 3);
// "中野で"
EXPECT_EQ("\xe4\xb8\xad\xe9\x87\x8e\xe3\x81\xa7", result);
EXPECT_LE(src.data(), result.data());
result = Util::SubStringPiece(src, 6, 10);
// "野です"
EXPECT_EQ("\xe9\x87\x8e\xe3\x81\xa7\xe3\x81\x99", result);
EXPECT_LE(src.data(), result.data());
result = Util::SubStringPiece(src, 4, 2);
// "は中"
EXPECT_EQ("\xe3\x81\xaf\xe4\xb8\xad", result);
EXPECT_LE(src.data(), result.data());
result = Util::SubStringPiece(src, 2, string::npos);
// "名前は中野です"
EXPECT_EQ("\xe5\x90\x8d\xe5\x89\x8d\xe3\x81\xaf\xe4\xb8\xad\xe9\x87"
"\x8e\xe3\x81\xa7\xe3\x81\x99",
result);
EXPECT_LE(src.data(), result.data());
result = Util::SubStringPiece(src, 5, string::npos);
// "中野です"
EXPECT_EQ("\xe4\xb8\xad\xe9\x87\x8e\xe3\x81\xa7\xe3\x81\x99", result);
EXPECT_LE(src.data(), result.data());
}
TEST(UtilTest, SubStringPiece2) {
// "私はGoogleです"
const string src =
"\xE7\xA7\x81\xE3\x81\xAF\x47\x6F\x6F\x67\x6C\x65"
"\xE3\x81\xA7\xE3\x81\x99";
StringPiece result;
result = Util::SubStringPiece(src, 0);
EXPECT_EQ(src, result);
result = Util::SubStringPiece(src, 5);
// "gleです"
EXPECT_EQ("\x67\x6C\x65\xE3\x81\xA7\xE3\x81\x99", result);
result = Util::SubStringPiece(src, 10);
EXPECT_TRUE(result.empty());
result = Util::SubStringPiece(src, 13);
EXPECT_TRUE(result.empty());
}
TEST(UtilTest, SubString) {
// "私の名前は中野です"
const string src = "\xe7\xa7\x81\xe3\x81\xae\xe5\x90\x8d\xe5\x89\x8d\xe3\x81"
"\xaf\xe4\xb8\xad\xe9\x87\x8e\xe3\x81\xa7\xe3\x81\x99";
string result;
result.clear();
Util::SubString(src, 0, 2, &result);
// "私の"
EXPECT_EQ(result, "\xe7\xa7\x81\xe3\x81\xae");
result.clear();
Util::SubString(src, 4, 1, &result);
// "は"
EXPECT_EQ(result, "\xe3\x81\xaf");
result.clear();
Util::SubString(src, 5, 3, &result);
// "中野で"
EXPECT_EQ(result, "\xe4\xb8\xad\xe9\x87\x8e\xe3\x81\xa7");
result.clear();
Util::SubString(src, 6, 10, &result);
// "野です"
EXPECT_EQ(result, "\xe9\x87\x8e\xe3\x81\xa7\xe3\x81\x99");
result.clear();
Util::SubString(src, 4, 2, &result);
// "は中"
EXPECT_EQ(result, "\xe3\x81\xaf\xe4\xb8\xad");
result.clear();
Util::SubString(src, 2, string::npos, &result);
// "名前は中野です"
EXPECT_EQ(result, "\xe5\x90\x8d\xe5\x89\x8d\xe3\x81\xaf\xe4\xb8\xad\xe9\x87"
"\x8e\xe3\x81\xa7\xe3\x81\x99");
result.clear();
Util::SubString(src, 5, string::npos, &result);
// "中野です"
EXPECT_EQ(result, "\xe4\xb8\xad\xe9\x87\x8e\xe3\x81\xa7\xe3\x81\x99");
// Doesn't clear result and call Util::SubString
Util::SubString(src, 5, string::npos, &result);
// "中野です"
EXPECT_EQ(result, "\xe4\xb8\xad\xe9\x87\x8e\xe3\x81\xa7\xe3\x81\x99");
}
TEST(UtilTest, StartsWith) {
const string str = "abcdefg";
EXPECT_TRUE(Util::StartsWith(str, ""));
EXPECT_TRUE(Util::StartsWith(str, "a"));
EXPECT_TRUE(Util::StartsWith(str, "abc"));
EXPECT_TRUE(Util::StartsWith(str, "abcdefg"));
EXPECT_FALSE(Util::StartsWith(str, "abcdefghi"));
EXPECT_FALSE(Util::StartsWith(str, "foobar"));
}
TEST(UtilTest, EndsWith) {
const string str = "abcdefg";
EXPECT_TRUE(Util::EndsWith(str, ""));
EXPECT_TRUE(Util::EndsWith(str, "g"));
EXPECT_TRUE(Util::EndsWith(str, "fg"));
EXPECT_TRUE(Util::EndsWith(str, "abcdefg"));
EXPECT_FALSE(Util::EndsWith(str, "aaabcdefg"));
EXPECT_FALSE(Util::EndsWith(str, "foobar"));
EXPECT_FALSE(Util::EndsWith(str, "foobarbuzbuz"));
}
TEST(UtilTest, StripUTF8BOM) {
string line;
// Should be stripped.
line = "\xef\xbb\xbf" "abc";
Util::StripUTF8BOM(&line);
EXPECT_EQ("abc", line);
// Should be stripped.
line = "\xef\xbb\xbf";
Util::StripUTF8BOM(&line);
EXPECT_EQ("", line);
// BOM in the middle of text. Shouldn't be stripped.
line = "a" "\xef\xbb\xbf" "bc";
Util::StripUTF8BOM(&line);
EXPECT_EQ("a" "\xef\xbb\xbf" "bc", line);
// Incomplete BOM. Shouldn't be stripped.
line = "\xef\xbb" "abc";
Util::StripUTF8BOM(&line);
EXPECT_EQ("\xef\xbb" "abc", line);
// String shorter than the BOM. Do nothing.
line = "a";
Util::StripUTF8BOM(&line);
EXPECT_EQ("a", line);
// Empty string. Do nothing.
line = "";
Util::StripUTF8BOM(&line);
EXPECT_EQ("", line);
}
TEST(UtilTest, IsUTF16BOM) {
EXPECT_FALSE(Util::IsUTF16BOM(""));
EXPECT_FALSE(Util::IsUTF16BOM("abc"));
EXPECT_TRUE(Util::IsUTF16BOM("\xfe\xff"));
EXPECT_TRUE(Util::IsUTF16BOM("\xff\xfe"));
EXPECT_TRUE(Util::IsUTF16BOM("\xfe\xff "));
EXPECT_TRUE(Util::IsUTF16BOM("\xff\xfe "));
EXPECT_FALSE(Util::IsUTF16BOM(" \xfe\xff"));
EXPECT_FALSE(Util::IsUTF16BOM(" \xff\xfe"));
EXPECT_FALSE(Util::IsUTF16BOM("\xff\xff"));
}
TEST(UtilTest, IsAndroidPuaEmoji) {
EXPECT_FALSE(Util::IsAndroidPuaEmoji(""));
EXPECT_FALSE(Util::IsAndroidPuaEmoji("A"));
EXPECT_FALSE(Util::IsAndroidPuaEmoji("a"));
string str;
Util::UCS4ToUTF8(0xFDFFF, &str);
EXPECT_FALSE(Util::IsAndroidPuaEmoji(str));
Util::UCS4ToUTF8(0xFE000, &str);
EXPECT_TRUE(Util::IsAndroidPuaEmoji(str));
Util::UCS4ToUTF8(0xFE800, &str);
EXPECT_TRUE(Util::IsAndroidPuaEmoji(str));
Util::UCS4ToUTF8(0xFEEA0, &str);
EXPECT_TRUE(Util::IsAndroidPuaEmoji(str));
Util::UCS4ToUTF8(0xFEEA1, &str);
EXPECT_FALSE(Util::IsAndroidPuaEmoji(str));
// If it has two ucs4 chars (or more), just expect false.
Util::UCS4ToUTF8(0xFE000, &str);
Util::UCS4ToUTF8Append(0xFE000, &str);
EXPECT_FALSE(Util::IsAndroidPuaEmoji(str));
}
TEST(UtilTest, StringPrintf) {
// On GCC, |EXPECT_EQ("", Util::StringPrintf(""))| may cause
// "warning: zero-length printf format string" so we disable this check.
MOZC_GCC_DISABLE_WARNING_INLINE(format-zero-length);
// strings
EXPECT_EQ("", Util::StringPrintf(""));
EXPECT_EQ("", Util::StringPrintf("%s", ""));
EXPECT_EQ("hello, world", Util::StringPrintf("hello, world"));
EXPECT_EQ("hello, world", Util::StringPrintf("%s", "hello, world"));
EXPECT_EQ("hello, world", Util::StringPrintf("%s, %s", "hello", "world"));
const char kHello[] = "\xE3\x81\xAF\xE3\x82\x8D\xE3\x83\xBC" // はろー
"\xE4\xB8\x96\xE7\x95\x8C"; // 世界
EXPECT_EQ(kHello, Util::StringPrintf("%s", kHello));
// 32-bit integers
EXPECT_EQ("-2147483648", Util::StringPrintf("%d", kint32min));
EXPECT_EQ("2147483647", Util::StringPrintf("%d", kint32max));
EXPECT_EQ("4294967295", Util::StringPrintf("%u", kuint32max));
EXPECT_EQ("80000000", Util::StringPrintf("%x", kint32min));
EXPECT_EQ("7fffffff", Util::StringPrintf("%x", kint32max));
EXPECT_EQ("FFFFFFFF", Util::StringPrintf("%X", kuint32max));
// 64-bit integers
EXPECT_EQ("-9223372036854775808",
Util::StringPrintf("%" GG_LL_FORMAT "d", kint64min));
EXPECT_EQ("9223372036854775807",
Util::StringPrintf("%" GG_LL_FORMAT "d", kint64max));
EXPECT_EQ("18446744073709551615",
Util::StringPrintf("%" GG_LL_FORMAT "u", kuint64max));
EXPECT_EQ("8000000000000000",
Util::StringPrintf("%" GG_LL_FORMAT "x", kint64min));
EXPECT_EQ("7fffffffffffffff",
Util::StringPrintf("%" GG_LL_FORMAT "x", kint64max));
EXPECT_EQ("FFFFFFFFFFFFFFFF",
Util::StringPrintf("%" GG_LL_FORMAT "X", kuint64max));
// Simple test for floating point numbers
EXPECT_EQ("-1.75", Util::StringPrintf("%.2f", -1.75));
// 4096 is greater than a temporary buffer size (1024 bytes)
// which is used in StringPrintf().
const string kLongStrA(4096, '.');
const string kLongStrB(4096, '_');
const string& result = Util::StringPrintf("%s\t%s\n",
kLongStrA.c_str(),
kLongStrB.c_str());
EXPECT_EQ(kLongStrA + "\t" + kLongStrB + "\n", result);
}
TEST(UtilTest, HiraganaToKatakana) {
{
// "あいうえおぁぃぅぇぉかきくけこがぎぐげごさしすせそざじずぜぞたちつてと"
// "だぢづでどっなにぬねのはひふへほばびぶべぼぱぴぷぺぽまみむめもやゆよゃ"
// "ゅょらりるれろわゎをんゔ"
const string input =
"\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A"
"\xE3\x81\x81\xE3\x81\x83\xE3\x81\x85\xE3\x81\x87\xE3\x81\x89"
"\xE3\x81\x8B\xE3\x81\x8D\xE3\x81\x8F\xE3\x81\x91\xE3\x81\x93"
"\xE3\x81\x8C\xE3\x81\x8E\xE3\x81\x90\xE3\x81\x92\xE3\x81\x94"
"\xE3\x81\x95\xE3\x81\x97\xE3\x81\x99\xE3\x81\x9B\xE3\x81\x9D"
"\xE3\x81\x96\xE3\x81\x98\xE3\x81\x9A\xE3\x81\x9C\xE3\x81\x9E"
"\xE3\x81\x9F\xE3\x81\xA1\xE3\x81\xA4\xE3\x81\xA6\xE3\x81\xA8"
"\xE3\x81\xA0\xE3\x81\xA2\xE3\x81\xA5\xE3\x81\xA7\xE3\x81\xA9"
"\xE3\x81\xA3\xE3\x81\xAA\xE3\x81\xAB\xE3\x81\xAC\xE3\x81\xAD"
"\xE3\x81\xAE\xE3\x81\xAF\xE3\x81\xB2\xE3\x81\xB5\xE3\x81\xB8"
"\xE3\x81\xBB\xE3\x81\xB0\xE3\x81\xB3\xE3\x81\xB6\xE3\x81\xB9"
"\xE3\x81\xBC\xE3\x81\xB1\xE3\x81\xB4\xE3\x81\xB7\xE3\x81\xBA"
"\xE3\x81\xBD\xE3\x81\xBE\xE3\x81\xBF\xE3\x82\x80\xE3\x82\x81"
"\xE3\x82\x82\xE3\x82\x84\xE3\x82\x86\xE3\x82\x88\xE3\x82\x83"
"\xE3\x82\x85\xE3\x82\x87\xE3\x82\x89\xE3\x82\x8A\xE3\x82\x8B"
"\xE3\x82\x8C\xE3\x82\x8D\xE3\x82\x8F\xE3\x82\x8E\xE3\x82\x92"
"\xE3\x82\x93\xE3\x82\x94";
string output;
Util::HiraganaToKatakana(input, &output);
// "アイウエオァィゥェォカキクケコガギグゲゴサシスセソザジズゼゾタチツテト"
// "ダヂヅデドッナニヌネノハヒフヘホバビブベボパピプペポマミムメモヤユヨャ"
// "ュョラリルレロワヮヲンヴ"
EXPECT_EQ("\xE3\x82\xA2\xE3\x82\xA4\xE3\x82\xA6\xE3\x82\xA8\xE3\x82\xAA"
"\xE3\x82\xA1\xE3\x82\xA3\xE3\x82\xA5\xE3\x82\xA7\xE3\x82\xA9"
"\xE3\x82\xAB\xE3\x82\xAD\xE3\x82\xAF\xE3\x82\xB1\xE3\x82\xB3"
"\xE3\x82\xAC\xE3\x82\xAE\xE3\x82\xB0\xE3\x82\xB2\xE3\x82\xB4"
"\xE3\x82\xB5\xE3\x82\xB7\xE3\x82\xB9\xE3\x82\xBB\xE3\x82\xBD"
"\xE3\x82\xB6\xE3\x82\xB8\xE3\x82\xBA\xE3\x82\xBC\xE3\x82\xBE"
"\xE3\x82\xBF\xE3\x83\x81\xE3\x83\x84\xE3\x83\x86\xE3\x83\x88"
"\xE3\x83\x80\xE3\x83\x82\xE3\x83\x85\xE3\x83\x87\xE3\x83\x89"
"\xE3\x83\x83\xE3\x83\x8A\xE3\x83\x8B\xE3\x83\x8C\xE3\x83\x8D"
"\xE3\x83\x8E\xE3\x83\x8F\xE3\x83\x92\xE3\x83\x95\xE3\x83\x98"
"\xE3\x83\x9B\xE3\x83\x90\xE3\x83\x93\xE3\x83\x96\xE3\x83\x99"
"\xE3\x83\x9C\xE3\x83\x91\xE3\x83\x94\xE3\x83\x97\xE3\x83\x9A"
"\xE3\x83\x9D\xE3\x83\x9E\xE3\x83\x9F\xE3\x83\xA0\xE3\x83\xA1"
"\xE3\x83\xA2\xE3\x83\xA4\xE3\x83\xA6\xE3\x83\xA8\xE3\x83\xA3"
"\xE3\x83\xA5\xE3\x83\xA7\xE3\x83\xA9\xE3\x83\xAA\xE3\x83\xAB"
"\xE3\x83\xAC\xE3\x83\xAD\xE3\x83\xAF\xE3\x83\xAE\xE3\x83\xB2"
"\xE3\x83\xB3\xE3\x83\xB4",
output);
}
{
// "わたしのなまえはなかのですうまーよろしゅう"
const string input = "\xe3\x82\x8f\xe3\x81\x9f\xe3\x81\x97\xe3\x81\xae\xe3"
"\x81\xaa\xe3\x81\xbe\xe3\x81\x88\xe3\x81\xaf\xe3\x81"
"\xaa\xe3\x81\x8b\xe3\x81\xae\xe3\x81\xa7\xe3\x81\x99"
"\xe3\x81\x86\xe3\x81\xbe\xe3\x83\xbc\xe3\x82\x88\xe3"
"\x82\x8d\xe3\x81\x97\xe3\x82\x85\xe3\x81\x86";
string output;
Util::HiraganaToKatakana(input, &output);
// "ワタシノナマエハナカノデスウマーヨロシュウ"
EXPECT_EQ("\xe3\x83\xaf\xe3\x82\xbf\xe3\x82\xb7\xe3\x83\x8e\xe3\x83\x8a\xe3"
"\x83\x9e\xe3\x82\xa8\xe3\x83\x8f\xe3\x83\x8a\xe3\x82\xab\xe3\x83"
"\x8e\xe3\x83\x87\xe3\x82\xb9\xe3\x82\xa6\xe3\x83\x9e\xe3\x83\xbc"
"\xe3\x83\xa8\xe3\x83\xad\xe3\x82\xb7\xe3\x83\xa5\xe3\x82\xa6",
output);
}
{
// "グーグル工藤よろしくabc"
const string input = "\xe3\x82\xb0\xe3\x83\xbc\xe3\x82\xb0\xe3\x83\xab\xe5"
"\xb7\xa5\xe8\x97\xa4\xe3\x82\x88\xe3\x82\x8d\xe3\x81"
"\x97\xe3\x81\x8f\x61\x62\x63";
string output;
Util::HiraganaToKatakana(input, &output);
// "グーグル工藤ヨロシクabc"
EXPECT_EQ("\xe3\x82\xb0\xe3\x83\xbc\xe3\x82\xb0\xe3\x83\xab\xe5\xb7\xa5\xe8"
"\x97\xa4\xe3\x83\xa8\xe3\x83\xad\xe3\x82\xb7\xe3\x82\xaf\x61\x62"
"\x63", output);
}
}
TEST(UtilTest, KatakanaToHiragana) {
{
// "アイウエオァィゥェォカキクケコガギグゲゴサシスセソザジズゼゾタチツテト"
// "ダヂヅデドッナニヌネノハヒフヘホバビブベボパピプペポマミムメモヤユヨャ"
// "ュョラリルレロワヮヲンヰヱヴ"
const string input =
"\xE3\x82\xA2\xE3\x82\xA4\xE3\x82\xA6\xE3\x82\xA8\xE3\x82\xAA"
"\xE3\x82\xA1\xE3\x82\xA3\xE3\x82\xA5\xE3\x82\xA7\xE3\x82\xA9"
"\xE3\x82\xAB\xE3\x82\xAD\xE3\x82\xAF\xE3\x82\xB1\xE3\x82\xB3"
"\xE3\x82\xAC\xE3\x82\xAE\xE3\x82\xB0\xE3\x82\xB2\xE3\x82\xB4"
"\xE3\x82\xB5\xE3\x82\xB7\xE3\x82\xB9\xE3\x82\xBB\xE3\x82\xBD"
"\xE3\x82\xB6\xE3\x82\xB8\xE3\x82\xBA\xE3\x82\xBC\xE3\x82\xBE"
"\xE3\x82\xBF\xE3\x83\x81\xE3\x83\x84\xE3\x83\x86\xE3\x83\x88"
"\xE3\x83\x80\xE3\x83\x82\xE3\x83\x85\xE3\x83\x87\xE3\x83\x89"
"\xE3\x83\x83\xE3\x83\x8A\xE3\x83\x8B\xE3\x83\x8C\xE3\x83\x8D"
"\xE3\x83\x8E\xE3\x83\x8F\xE3\x83\x92\xE3\x83\x95\xE3\x83\x98"
"\xE3\x83\x9B\xE3\x83\x90\xE3\x83\x93\xE3\x83\x96\xE3\x83\x99"
"\xE3\x83\x9C\xE3\x83\x91\xE3\x83\x94\xE3\x83\x97\xE3\x83\x9A"
"\xE3\x83\x9D\xE3\x83\x9E\xE3\x83\x9F\xE3\x83\xA0\xE3\x83\xA1"
"\xE3\x83\xA2\xE3\x83\xA4\xE3\x83\xA6\xE3\x83\xA8\xE3\x83\xA3"
"\xE3\x83\xA5\xE3\x83\xA7\xE3\x83\xA9\xE3\x83\xAA\xE3\x83\xAB"
"\xE3\x83\xAC\xE3\x83\xAD\xE3\x83\xAF\xE3\x83\xAE\xE3\x83\xB2"
"\xE3\x83\xB3\xE3\x83\xB0\xE3\x83\xB1\xE3\x83\xB4";
string output;
Util::KatakanaToHiragana(input, &output);
// "あいうえおぁぃぅぇぉかきくけこがぎぐげごさしすせそざじずぜぞたちつてと"
// "だぢづでどっなにぬねのはひふへほばびぶべぼぱぴぷぺぽまみむめもやゆよゃ"
// "ゅょらりるれろわゎをんゐゑゔ"
EXPECT_EQ("\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A"
"\xE3\x81\x81\xE3\x81\x83\xE3\x81\x85\xE3\x81\x87\xE3\x81\x89"
"\xE3\x81\x8B\xE3\x81\x8D\xE3\x81\x8F\xE3\x81\x91\xE3\x81\x93"
"\xE3\x81\x8C\xE3\x81\x8E\xE3\x81\x90\xE3\x81\x92\xE3\x81\x94"
"\xE3\x81\x95\xE3\x81\x97\xE3\x81\x99\xE3\x81\x9B\xE3\x81\x9D"
"\xE3\x81\x96\xE3\x81\x98\xE3\x81\x9A\xE3\x81\x9C\xE3\x81\x9E"
"\xE3\x81\x9F\xE3\x81\xA1\xE3\x81\xA4\xE3\x81\xA6\xE3\x81\xA8"
"\xE3\x81\xA0\xE3\x81\xA2\xE3\x81\xA5\xE3\x81\xA7\xE3\x81\xA9"
"\xE3\x81\xA3\xE3\x81\xAA\xE3\x81\xAB\xE3\x81\xAC\xE3\x81\xAD"
"\xE3\x81\xAE\xE3\x81\xAF\xE3\x81\xB2\xE3\x81\xB5\xE3\x81\xB8"
"\xE3\x81\xBB\xE3\x81\xB0\xE3\x81\xB3\xE3\x81\xB6\xE3\x81\xB9"
"\xE3\x81\xBC\xE3\x81\xB1\xE3\x81\xB4\xE3\x81\xB7\xE3\x81\xBA"
"\xE3\x81\xBD\xE3\x81\xBE\xE3\x81\xBF\xE3\x82\x80\xE3\x82\x81"
"\xE3\x82\x82\xE3\x82\x84\xE3\x82\x86\xE3\x82\x88\xE3\x82\x83"
"\xE3\x82\x85\xE3\x82\x87\xE3\x82\x89\xE3\x82\x8A\xE3\x82\x8B"
"\xE3\x82\x8C\xE3\x82\x8D\xE3\x82\x8F\xE3\x82\x8E\xE3\x82\x92"
"\xE3\x82\x93\xE3\x82\x90\xE3\x82\x91\xE3\x82\x94",
output);
}
{
// "ワタシノナマエハナカノデスウマーヨロシュウ"
const string input =
"\xE3\x82\x8F\xE3\x81\x9F\xE3\x81\x97\xE3\x81\xAE\xE3\x81\xAA"
"\xE3\x81\xBE\xE3\x81\x88\xE3\x81\xAF\xE3\x81\xAA\xE3\x81\x8B"
"\xE3\x81\xAE\xE3\x81\xA7\xE3\x81\x99\xE3\x81\x86\xE3\x81\xBE"
"\xE3\x83\xBC\xE3\x82\x88\xE3\x82\x8D\xE3\x81\x97\xE3\x82\x85"
"\xE3\x81\x86";
string output;
Util::KatakanaToHiragana(input, &output);
// "わたしのなまえはなかのですうまーよろしゅう"
EXPECT_EQ("\xE3\x82\x8F\xE3\x81\x9F\xE3\x81\x97\xE3\x81\xAE"
"\xE3\x81\xAA\xE3\x81\xBE\xE3\x81\x88\xE3\x81\xAF"
"\xE3\x81\xAA\xE3\x81\x8B\xE3\x81\xAE\xE3\x81\xA7"
"\xE3\x81\x99\xE3\x81\x86\xE3\x81\xBE\xE3\x83\xBC"
"\xE3\x82\x88\xE3\x82\x8D\xE3\x81\x97\xE3\x82\x85"
"\xE3\x81\x86",
output);
}
{
// "グーグル工藤ヨロシクabc"
const string input =
"\xE3\x82\xB0\xE3\x83\xBC\xE3\x82\xB0\xE3\x83\xAB\xE5\xB7\xA5"
"\xE8\x97\xA4\xE3\x83\xA8\xE3\x83\xAD\xE3\x82\xB7\xE3\x82\xAF"
"\x61\x62\x63";
string output;
Util::KatakanaToHiragana(input, &output);
// "ぐーぐる工藤よろしくabc"
EXPECT_EQ("\xE3\x81\x90\xE3\x83\xBC\xE3\x81\x90\xE3\x82\x8B"
"\xE5\xB7\xA5\xE8\x97\xA4\xE3\x82\x88\xE3\x82\x8D"
"\xE3\x81\x97\xE3\x81\x8F\x61\x62\x63",
output);
}
}
TEST(UtilTest, RomanjiToHiragana) {
struct {
const char *input;
const char *expected;
} kTestCases[] = {
{"watasinonamaehatakahashinoriyukidesu",
// "わたしのなまえはたかはしのりゆきです"
"\xE3\x82\x8F\xE3\x81\x9F\xE3\x81\x97\xE3\x81\xAE\xE3\x81\xAA"
"\xE3\x81\xBE\xE3\x81\x88\xE3\x81\xAF\xE3\x81\x9F\xE3\x81\x8B"
"\xE3\x81\xAF\xE3\x81\x97\xE3\x81\xAE\xE3\x82\x8A\xE3\x82\x86"
"\xE3\x81\x8D\xE3\x81\xA7\xE3\x81\x99"},
{"majissukamajiyabexe",
// "まじっすかまじやべぇ"
"\xE3\x81\xBE\xE3\x81\x98\xE3\x81\xA3\xE3\x81\x99\xE3\x81\x8B"
"\xE3\x81\xBE\xE3\x81\x98\xE3\x82\x84\xE3\x81\xB9\xE3\x81\x87"},
{"kk",
// "っk"
"\xE3\x81\xA3\x6B"},
{"xyz", "xyz"},
};
for (size_t i = 0; i < arraysize(kTestCases); ++i) {
string actual;
Util::RomanjiToHiragana(kTestCases[i].input, &actual);
EXPECT_EQ(kTestCases[i].expected, actual);
}
}
TEST(UtilTest, NormalizeVoicedSoundMark) {
// "僕のう゛ぁいおりん"
const string input = "\xe5\x83\x95\xe3\x81\xae\xe3\x81\x86\xe3\x82\x9b\xe3"
"\x81\x81\xe3\x81\x84\xe3\x81\x8a\xe3\x82\x8a\xe3\x82"
"\x93";
string output;
Util::NormalizeVoicedSoundMark(input, &output);
// "僕のゔぁいおりん"
EXPECT_EQ("\xe5\x83\x95\xe3\x81\xae\xe3\x82\x94\xe3\x81\x81\xe3\x81\x84\xe3"
"\x81\x8a\xe3\x82\x8a\xe3\x82\x93", output);
}
TEST(UtilTest, IsFullWidthSymbolInHalfWidthKatakana) {
// "グーグル"
EXPECT_FALSE(Util::IsFullWidthSymbolInHalfWidthKatakana("\xe3\x82\xb0\xe3\x83"
"\xbc\xe3\x82\xb0\xe3"
"\x83\xab"));
// "ー"
EXPECT_TRUE(Util::IsFullWidthSymbolInHalfWidthKatakana("\xe3\x83\xbc"));
// "。"
EXPECT_TRUE(Util::IsFullWidthSymbolInHalfWidthKatakana("\xe3\x80\x82"));
// "グーグル。"
EXPECT_FALSE(Util::IsFullWidthSymbolInHalfWidthKatakana("\xe3\x82\xb0\xe3\x83"
"\xbc\xe3\x82\xb0\xe3"
"\x83\xab\xe3\x80"
"\x82"));
// "ー。"
EXPECT_TRUE(Util::IsFullWidthSymbolInHalfWidthKatakana(
"\xe3\x83\xbc\xe3\x80\x82"));
// "ーグ。"
EXPECT_FALSE(Util::IsFullWidthSymbolInHalfWidthKatakana(
"\xe3\x83\xbc\xe3\x82\xb0\xe3\x80\x82"));
}
TEST(UtilTest, IsHalfWidthKatakanaSymbol) {
// "グーグル"
EXPECT_FALSE(Util::IsHalfWidthKatakanaSymbol("\xef\xbd\xb8\xef\xbe\x9e\xef"
"\xbd\xb0\xef\xbd\xb8\xef\xbe"
"\x9e\xef\xbe\x99"));
// "ー"
EXPECT_TRUE(Util::IsHalfWidthKatakanaSymbol("\xef\xbd\xb0"));
// "。"
EXPECT_TRUE(Util::IsHalfWidthKatakanaSymbol("\xef\xbd\xa1"));
// "、"
EXPECT_TRUE(Util::IsHalfWidthKatakanaSymbol("\xef\xbd\xa4"));
// "グーグル。"
EXPECT_FALSE(Util::IsHalfWidthKatakanaSymbol("\xe3\x82\xb0\xe3\x83\xbc\xe3"
"\x82\xb0\xe3\x83\xab\xef\xbd"
"\xa1"));
// "、。"
// "not 。、"
EXPECT_TRUE(Util::IsHalfWidthKatakanaSymbol("\xef\xbd\xa4\xef\xbd\xa1"));
}
TEST(UtilTest, FullWidthAndHalfWidth) {
string output;
Util::FullWidthToHalfWidth("", &output);
CHECK_EQ("", output);
Util::HalfWidthToFullWidth("", &output);
CHECK_EQ("", output);
Util::HalfWidthToFullWidth("abc[]?.", &output);
// "abc[]?."
CHECK_EQ("\xef\xbd\x81\xef\xbd\x82\xef\xbd\x83\xef\xbc\xbb\xef\xbc\xbd\xef"
"\xbc\x9f\xef\xbc\x8e", output);
// "インターネット「」"
Util::HalfWidthToFullWidth("\xef\xbd\xb2\xef\xbe\x9d\xef\xbe\x80\xef\xbd\xb0"
"\xef\xbe\x88\xef\xbd\xaf\xef\xbe\x84\xef\xbd\xa2"
"\xe3\x80\x8d", &output);
// "インターネット「」"
CHECK_EQ("\xe3\x82\xa4\xe3\x83\xb3\xe3\x82\xbf\xe3\x83\xbc\xe3\x83\x8d\xe3"
"\x83\x83\xe3\x83\x88\xe3\x80\x8c\xe3\x80\x8d", output);
// "インターネットグーグル"
Util::HalfWidthToFullWidth("\xef\xbd\xb2\xef\xbe\x9d\xef\xbe\x80\xef\xbd\xb0"
"\xef\xbe\x88\xef\xbd\xaf\xef\xbe\x84\xe3\x82\xb0"
"\xe3\x83\xbc\xe3\x82\xb0\xe3\x83\xab", &output);
// "インターネットグーグル"
CHECK_EQ("\xe3\x82\xa4\xe3\x83\xb3\xe3\x82\xbf\xe3\x83\xbc\xe3\x83\x8d\xe3"
"\x83\x83\xe3\x83\x88\xe3\x82\xb0\xe3\x83\xbc\xe3\x82\xb0\xe3\x83"
"\xab", output);
// "abc[]?."
Util::FullWidthToHalfWidth("\xef\xbd\x81\xef\xbd\x82\xef\xbd\x83\xef\xbc\xbb"
"\xef\xbc\xbd\xef\xbc\x9f\xef\xbc\x8e", &output);
CHECK_EQ("abc[]?.", output);
// "インターネット"
Util::FullWidthToHalfWidth("\xe3\x82\xa4\xe3\x83\xb3\xe3\x82\xbf\xe3\x83\xbc"
"\xe3\x83\x8d\xe3\x83\x83\xe3\x83\x88", &output);
// "インターネット"
CHECK_EQ("\xef\xbd\xb2\xef\xbe\x9d\xef\xbe\x80\xef\xbd\xb0\xef\xbe\x88\xef"
"\xbd\xaf\xef\xbe\x84", output);
// "インターネットグーグル"
Util::FullWidthToHalfWidth("\xef\xbd\xb2\xef\xbe\x9d\xef\xbe\x80\xef\xbd\xb0"
"\xef\xbe\x88\xef\xbd\xaf\xef\xbe\x84\xe3\x82\xb0"
"\xe3\x83\xbc\xe3\x82\xb0\xe3\x83\xab", &output);
// "インターネットグーグル"
CHECK_EQ("\xef\xbd\xb2\xef\xbe\x9d\xef\xbe\x80\xef\xbd\xb0\xef\xbe\x88\xef"
"\xbd\xaf\xef\xbe\x84\xef\xbd\xb8\xef\xbe\x9e\xef\xbd\xb0\xef\xbd"
"\xb8\xef\xbe\x9e\xef\xbe\x99", output);
// spaces
// "  "
Util::FullWidthToHalfWidth("\x20\xe3\x80\x80", &output);
CHECK_EQ(" ", output);
// "  "
Util::HalfWidthToFullWidth("\x20\xe3\x80\x80", &output);
// "  "
CHECK_EQ("\xe3\x80\x80\xe3\x80\x80", output);
// spaces are treated as Ascii here
// "  "
Util::FullWidthAsciiToHalfWidthAscii("\x20\xe3\x80\x80", &output);
CHECK_EQ(" ", output);
// "  "
Util::HalfWidthAsciiToFullWidthAscii("\x20\xe3\x80\x80", &output);
// "  "
CHECK_EQ("\xe3\x80\x80\xe3\x80\x80", output);
// "  "
Util::FullWidthKatakanaToHalfWidthKatakana("\x20\xe3\x80\x80", &output);
// "  "
CHECK_EQ("\x20\xe3\x80\x80", output);
// "  "
Util::HalfWidthKatakanaToFullWidthKatakana("\x20\xe3\x80\x80", &output);
// "  "
CHECK_EQ("\x20\xe3\x80\x80", output);
}
TEST(UtilTest, BracketTest) {
static const struct BracketType {
const char *open_bracket;
const char *close_bracket;
} kBracketType[] = {
// { "(", ")" },
// { "〔", "〕" },
// { "[", "]" },
// { "{", "}" },
// { "〈", "〉" },
// { "《", "》" },
// { "「", "」" },
// { "『", "』" },
// { "【", "】" },
// { "〘", "〙" },
// { "〚", "〛" },
{ "\xEF\xBC\x88", "\xEF\xBC\x89" },
{ "\xE3\x80\x94", "\xE3\x80\x95" },
{ "\xEF\xBC\xBB", "\xEF\xBC\xBD" },
{ "\xEF\xBD\x9B", "\xEF\xBD\x9D" },
{ "\xE3\x80\x88", "\xE3\x80\x89" },
{ "\xE3\x80\x8A", "\xE3\x80\x8B" },
{ "\xE3\x80\x8C", "\xE3\x80\x8D" },
{ "\xE3\x80\x8E", "\xE3\x80\x8F" },
{ "\xE3\x80\x90", "\xE3\x80\x91" },
{ "\xe3\x80\x98", "\xe3\x80\x99" },
{ "\xe3\x80\x9a", "\xe3\x80\x9b" },
{ NULL, NULL }, // sentinel
};
string pair;
for (size_t i = 0;
(kBracketType[i].open_bracket != NULL ||
kBracketType[i].close_bracket != NULL);
++i) {
EXPECT_TRUE(Util::IsOpenBracket(kBracketType[i].open_bracket, &pair));
EXPECT_EQ(kBracketType[i].close_bracket, pair);
EXPECT_TRUE(Util::IsCloseBracket(kBracketType[i].close_bracket, &pair));
EXPECT_EQ(kBracketType[i].open_bracket, pair);
EXPECT_FALSE(Util::IsOpenBracket(kBracketType[i].close_bracket, &pair));
EXPECT_FALSE(Util::IsCloseBracket(kBracketType[i].open_bracket, &pair));
}
}
TEST(UtilTest, IsEnglishTransliteration) {
EXPECT_TRUE(Util::IsEnglishTransliteration("ABC"));
EXPECT_TRUE(Util::IsEnglishTransliteration("Google"));
EXPECT_TRUE(Util::IsEnglishTransliteration("Google Map"));
EXPECT_TRUE(Util::IsEnglishTransliteration("ABC-DEF"));
EXPECT_TRUE(Util::IsEnglishTransliteration("Foo-bar"));
EXPECT_TRUE(Util::IsEnglishTransliteration("Foo!"));
EXPECT_TRUE(Util::IsEnglishTransliteration("Who's"));
EXPECT_TRUE(Util::IsEnglishTransliteration("!"));
EXPECT_TRUE(Util::IsEnglishTransliteration(" "));
// EXPECT_FALSE(Util::IsEnglishTransliteration("てすと"));
// EXPECT_FALSE(Util::IsEnglishTransliteration("テスト"));
// EXPECT_FALSE(Util::IsEnglishTransliteration("東京"));
EXPECT_FALSE(Util::IsEnglishTransliteration(
"\xE3\x81\xA6\xE3\x81\x99\xE3\x81\xA8"));
EXPECT_FALSE(Util::IsEnglishTransliteration(
"\xE3\x83\x86\xE3\x82\xB9\xE3\x83\x88"));
EXPECT_FALSE(Util::IsEnglishTransliteration(
"\xE6\x9D\xB1\xE4\xBA\xAC"));
}
TEST(MutexTest, MutexTest) {
mozc::Mutex mutex;
mozc::scoped_lock l(&mutex);
}
TEST(ThreadTest, ThreadTest) {
ThreadTest test;
// test.SetJoinable(true);
// test.Join();
}
TEST(UtilTest, ChopReturns) {
string line = "line\n";
EXPECT_TRUE(Util::ChopReturns(&line));
EXPECT_EQ("line", line);
line = "line\r";
EXPECT_TRUE(Util::ChopReturns(&line));
EXPECT_EQ("line", line);
line = "line\r\n";
EXPECT_TRUE(Util::ChopReturns(&line));
EXPECT_EQ("line", line);
line = "line";
EXPECT_FALSE(Util::ChopReturns(&line));
EXPECT_EQ("line", line);
line = "line1\nline2\n";
EXPECT_TRUE(Util::ChopReturns(&line));
EXPECT_EQ("line1\nline2", line);
line = "line\n\n\n";
EXPECT_TRUE(Util::ChopReturns(&line));
EXPECT_EQ("line", line);
}
// 2020-12-23 13:24:35 (Wed) UTC
// 123456 [usec]
const uint64 kTestSeconds = 1608729875uLL;
const uint32 kTestMicroSeconds = 123456u;
// time utility test with mock clock
TEST(UtilTest, TimeTestWithMock) {
scoped_ptr<ClockMock> mock_clock(
new ClockMock(kTestSeconds, kTestMicroSeconds));
Util::SetClockHandler(mock_clock.get());
// GetTime,
{
EXPECT_EQ(kTestSeconds, Util::GetTime());
}
// GetTimeOfDay
{
uint64 current_sec;
uint32 current_usec;
Util::GetTimeOfDay(&current_sec, &current_usec);
EXPECT_EQ(kTestSeconds, current_sec);
EXPECT_EQ(kTestMicroSeconds, current_usec);
}
// GetCurrentTm
// 2020-12-23 13:24:35 (Wed)
{
tm current_tm;
Util::GetCurrentTm(&current_tm);
EXPECT_EQ(120, current_tm.tm_year);
EXPECT_EQ(11, current_tm.tm_mon);
EXPECT_EQ(23, current_tm.tm_mday);
EXPECT_EQ(13, current_tm.tm_hour);
EXPECT_EQ(24, current_tm.tm_min);
EXPECT_EQ(35, current_tm.tm_sec);
EXPECT_EQ(3, current_tm.tm_wday);
}
// GetTmWithoutOffsetSecond
// 2024/02/23 23:11:15 (Fri)
{
const int offset_seconds = 100000000;
tm offset_tm;
Util::GetTmWithOffsetSecond(&offset_tm, offset_seconds);
EXPECT_EQ(124, offset_tm.tm_year);
EXPECT_EQ(1, offset_tm.tm_mon);
EXPECT_EQ(23, offset_tm.tm_mday);
EXPECT_EQ(23, offset_tm.tm_hour);
EXPECT_EQ(11, offset_tm.tm_min);
EXPECT_EQ(15, offset_tm.tm_sec);
EXPECT_EQ(5, offset_tm.tm_wday);
}
// GetFrequency / GetTicks
{
const uint64 kFrequency = 12345;
const uint64 kTicks = 54321;
mock_clock->SetFrequency(kFrequency);
EXPECT_EQ(kFrequency, Util::GetFrequency());
mock_clock->SetTicks(kTicks);
EXPECT_EQ(kTicks, Util::GetTicks());
}
// unset clock handler
Util::SetClockHandler(NULL);
// GetFrequency / GetTicks without ClockMock
{
EXPECT_NE(0, Util::GetFrequency());
EXPECT_NE(0, Util::GetTicks());
}
}
// time utility test without mock clock
TEST(UtilTest, TimeTestWithoutMock) {
uint64 get_time_of_day_sec, get_time_sec;
uint32 get_time_of_day_usec;
Util::GetTimeOfDay(&get_time_of_day_sec, &get_time_of_day_usec);
get_time_sec = Util::GetTime();
// hmm, unstable test.
const int margin = 1;
EXPECT_NEAR(get_time_of_day_sec, get_time_sec, margin)
<< ": This test have possibilities to fail "
<< "when system is busy and slow.";
}
TEST(UtilTest, EncodeURI) {
string encoded;
// "もずく"
Util::EncodeURI("\xe3\x82\x82\xe3\x81\x9a\xe3\x81\x8f", &encoded);
EXPECT_EQ("%E3%82%82%E3%81%9A%E3%81%8F", encoded);
encoded.clear();
Util::EncodeURI("mozc", &encoded);
EXPECT_EQ("mozc", encoded);
encoded.clear();
Util::EncodeURI("http://mozc/?q=Hello World", &encoded);
EXPECT_EQ("http%3A%2F%2Fmozc%2F%3Fq%3DHello%20World", encoded);
}
TEST(UtilTest, DecodeURI) {
string decoded;
Util::DecodeURI("%E3%82%82%E3%81%9A%E3%81%8F", &decoded);
// "もずく"
EXPECT_EQ("\xe3\x82\x82\xe3\x81\x9a\xe3\x81\x8f", decoded);
decoded.clear();
Util::DecodeURI("mozc", &decoded);
EXPECT_EQ("mozc", decoded);
decoded.clear();
Util::DecodeURI("http%3A%2F%2Fmozc%2F%3Fq%3DHello+World", &decoded);
EXPECT_EQ("http://mozc/?q=Hello World", decoded);
}
TEST(UtilTest, AppendCGIParams) {
vector<pair<string, string> > params;
string url;
Util::AppendCGIParams(params, &url);
EXPECT_TRUE(url.empty());
params.push_back(make_pair("foo", "b a+r"));
url = "http://mozc.com?";
Util::AppendCGIParams(params, &url);
EXPECT_EQ("http://mozc.com?foo=b%20a%2Br", url);
params.push_back(make_pair("buzz", "mozc"));
url.clear();
Util::AppendCGIParams(params, &url);
EXPECT_EQ("foo=b%20a%2Br&buzz=mozc", url);
}
TEST(UtilTest, Escape) {
string escaped;
// "らむだ"
Util::Escape("\xe3\x82\x89\xe3\x82\x80\xe3\x81\xa0", &escaped);
EXPECT_EQ("\\xE3\\x82\\x89\\xE3\\x82\\x80\\xE3\\x81\\xA0", escaped);
}
TEST(UtilTest, EscapeUrl) {
string escaped;
// "らむだ"
Util::EscapeUrl("\xe3\x82\x89\xe3\x82\x80\xe3\x81\xa0", &escaped);
EXPECT_EQ("%E3%82%89%E3%82%80%E3%81%A0", escaped);
EXPECT_EQ("%E3%82%89%E3%82%80%E3%81%A0",
Util::EscapeUrl("\xe3\x82\x89\xe3\x82\x80\xe3\x81\xa0"));
}
TEST(UtilTest, EscapeHtml) {
string escaped;
Util::EscapeHtml("<>&'\"abc", &escaped);
EXPECT_EQ("&lt;&gt;&amp;&#39;&quot;abc", escaped);
}
TEST(UtilTest, EscapeCss) {
string escaped;
Util::EscapeCss("<>&'\"abc", &escaped);
EXPECT_EQ("&lt;>&'\"abc", escaped);
}
TEST(UtilTest, ScriptType) {
// "くどう"
EXPECT_TRUE(Util::IsScriptType("\xe3\x81\x8f\xe3\x81\xa9\xe3\x81\x86",
Util::HIRAGANA));
// "京都"
EXPECT_TRUE(Util::IsScriptType("\xe4\xba\xac\xe9\x83\xbd", Util::KANJI));
// "人々" (b/4201140)
EXPECT_TRUE(Util::IsScriptType("\xE4\xBA\xBA\xE3\x80\x85", Util::KANJI));
// "モズク"
EXPECT_TRUE(Util::IsScriptType("\xe3\x83\xa2\xe3\x82\xba\xe3\x82\xaf",
Util::KATAKANA));
// "モズクモズク"
EXPECT_TRUE(Util::IsScriptType("\xe3\x83\xa2\xe3\x82\xba\xe3\x82\xaf\xef\xbe"
"\x93\xef\xbd\xbd\xef\xbe\x9e\xef\xbd\xb8",
Util::KATAKANA));
// "ぐーぐる"
EXPECT_TRUE(Util::IsScriptType("\xe3\x81\x90\xe3\x83\xbc\xe3\x81\x90\xe3\x82"
"\x8b", Util::HIRAGANA));
// "グーグル"
EXPECT_TRUE(Util::IsScriptType("\xe3\x82\xb0\xe3\x83\xbc\xe3\x82\xb0\xe3\x83"
"\xab", Util::KATAKANA));
// "ゟ" U+309F: HIRAGANA DIGRAPH YORI
EXPECT_TRUE(Util::IsScriptType("\xE3\x82\x9F", Util::HIRAGANA));
// "ヿ" U+30FF: KATAKANA DIGRAPH KOTO
EXPECT_TRUE(Util::IsScriptType("\xE3\x83\xBF", Util::KATAKANA));
// "ヷヸヹヺㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ"
EXPECT_TRUE(Util::IsScriptType(
"\xE3\x83\xB7\xE3\x83\xB8\xE3\x83\xB9\xE3\x83\xBA\xE3\x87\xB0"
"\xE3\x87\xB1\xE3\x87\xB2\xE3\x87\xB3\xE3\x87\xB4\xE3\x87\xB5"
"\xE3\x87\xB6\xE3\x87\xB7\xE3\x87\xB8\xE3\x87\xB9\xE3\x87\xBA"
"\xE3\x87\xBB\xE3\x87\xBC\xE3\x87\xBD\xE3\x87\xBE\xE3\x87\xBF",
Util::KATAKANA));
// "𛀀›€€" U+1B000: KATAKANA LETTER ARCHAIC E
EXPECT_TRUE(Util::IsScriptType("\xF0\x9B\x80\x80", Util::KATAKANA));
// "𛀁›€" U+1B001: HIRAGANA LETTER ARCHAIC YE
EXPECT_TRUE(Util::IsScriptType("\xF0\x9B\x80\x81", Util::HIRAGANA));
EXPECT_TRUE(Util::IsScriptType("012", Util::NUMBER));
// "012012"
EXPECT_TRUE(Util::IsScriptType("\xef\xbc\x90\xef\xbc\x91\xef\xbc\x92\x30\x31"
"\x32", Util::NUMBER));
EXPECT_TRUE(Util::IsScriptType("abcABC", Util::ALPHABET));
// "ABCD"
EXPECT_TRUE(Util::IsScriptType("\xef\xbc\xa1\xef\xbc\xa2\xef\xbc\xa3\xef\xbc"
"\xa4", Util::ALPHABET));
EXPECT_TRUE(Util::IsScriptType("@!#", Util::UNKNOWN_SCRIPT));
// "くどカう"
EXPECT_FALSE(Util::IsScriptType("\xe3\x81\x8f\xe3\x81\xa9\xe3\x82\xab\xe3\x81"
"\x86", Util::HIRAGANA));
// "京あ都"
EXPECT_FALSE(Util::IsScriptType("\xe4\xba\xac\xe3\x81\x82\xe9\x83\xbd",
Util::KANJI));
// "モズあク"
EXPECT_FALSE(Util::IsScriptType("\xe3\x83\xa2\xe3\x82\xba\xe3\x81\x82\xe3\x82"
"\xaf", Util::KATAKANA));
// "モあズクモズク"
EXPECT_FALSE(Util::IsScriptType("\xe3\x83\xa2\xe3\x81\x82\xe3\x82\xba\xe3\x82"
"\xaf\xef\xbe\x93\xef\xbd\xbd\xef\xbe\x9e\xef"
"\xbd\xb8", Util::KATAKANA));
// "012あ"
EXPECT_FALSE(Util::IsScriptType("\x30\x31\x32\xe3\x81\x82", Util::NUMBER));
// "012あ012"
EXPECT_FALSE(Util::IsScriptType("\xef\xbc\x90\xef\xbc\x91\xef\xbc\x92\xe3\x81"
"\x82\x30\x31\x32", Util::NUMBER));
// "abcABあC"
EXPECT_FALSE(Util::IsScriptType("\x61\x62\x63\x41\x42\xe3\x81\x82\x43",
Util::ALPHABET));
// "ABあCD"
EXPECT_FALSE(Util::IsScriptType("\xef\xbc\xa1\xef\xbc\xa2\xe3\x81\x82\xef\xbc"
"\xa3\xef\xbc\xa4", Util::ALPHABET));
// "ぐーぐるグ"
EXPECT_FALSE(Util::IsScriptType("\xe3\x81\x90\xe3\x83\xbc\xe3\x81\x90\xe3\x82"
"\x8b\xe3\x82\xb0", Util::HIRAGANA));
// "グーグルぐ"
EXPECT_FALSE(Util::IsScriptType("\xe3\x82\xb0\xe3\x83\xbc\xe3\x82\xb0\xe3\x83"
"\xab\xe3\x81\x90", Util::KATAKANA));
// "グーグルsuggest"
EXPECT_TRUE(Util::ContainsScriptType("\xe3\x82\xb0\xe3\x83\xbc\xe3\x82\xb0"
"\xe3\x83\xab\x73\x75\x67\x67\x65\x73"
"\x74", Util::ALPHABET));
// "グーグルサジェスト"
EXPECT_FALSE(Util::ContainsScriptType("\xe3\x82\xb0\xe3\x83\xbc\xe3\x82\xb0"
"\xe3\x83\xab\xe3\x82\xb5\xe3\x82\xb8"
"\xe3\x82\xa7\xe3\x82\xb9\xe3\x83\x88",
Util::ALPHABET));
// "くどう"
EXPECT_EQ(Util::HIRAGANA, Util::GetScriptType("\xe3\x81\x8f\xe3\x81\xa9\xe3"
"\x81\x86"));
// "京都"
EXPECT_EQ(Util::KANJI, Util::GetScriptType("\xe4\xba\xac\xe9\x83\xbd"));
// "人々" (b/4201140)
EXPECT_EQ(Util::KANJI, Util::GetScriptType("\xE4\xBA\xBA\xE3\x80\x85"));
// "モズク"
EXPECT_EQ(Util::KATAKANA, Util::GetScriptType("\xe3\x83\xa2\xe3\x82\xba\xe3"
"\x82\xaf"));
// "モズクモズク"
EXPECT_EQ(Util::KATAKANA, Util::GetScriptType("\xe3\x83\xa2\xe3\x82\xba\xe3"
"\x82\xaf\xef\xbe\x93\xef\xbd"
"\xbd\xef\xbe\x9e\xef\xbd"
"\xb8"));
// "ぐーぐる"
EXPECT_EQ(Util::HIRAGANA, Util::GetScriptType("\xe3\x81\x90\xe3\x83\xbc\xe3"
"\x81\x90\xe3\x82\x8b"));
EXPECT_EQ(Util::HIRAGANA,
Util::GetFirstScriptType("\xe3\x81\x90\xe3\x83\xbc\xe3\x81\x90"
"\xe3\x82\x8b"));
// "グーグル"
EXPECT_EQ(Util::KATAKANA, Util::GetScriptType("\xe3\x82\xb0\xe3\x83\xbc\xe3"
"\x82\xb0\xe3\x83\xab"));
EXPECT_EQ(Util::KATAKANA,
Util::GetFirstScriptType("\xe3\x82\xb0\xe3\x83\xbc\xe3\x82\xb0"
"\xe3\x83\xab"));
// "ゟ" U+309F HIRAGANA DIGRAPH YORI
EXPECT_EQ(Util::HIRAGANA, Util::GetScriptType("\xE3\x82\x9F"));
EXPECT_EQ(Util::HIRAGANA, Util::GetFirstScriptType("\xE3\x82\x9F"));
// "ヿ" U+30FF KATAKANA DIGRAPH KOTO
EXPECT_EQ(Util::KATAKANA, Util::GetScriptType("\xE3\x83\xBF"));
// "ヷヸヹヺㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ"
EXPECT_EQ(Util::KATAKANA, Util::GetScriptType(
"\xE3\x83\xB7\xE3\x83\xB8\xE3\x83\xB9\xE3\x83\xBA\xE3\x87\xB0"
"\xE3\x87\xB1\xE3\x87\xB2\xE3\x87\xB3\xE3\x87\xB4\xE3\x87\xB5"
"\xE3\x87\xB6\xE3\x87\xB7\xE3\x87\xB8\xE3\x87\xB9\xE3\x87\xBA"
"\xE3\x87\xBB\xE3\x87\xBC\xE3\x87\xBD\xE3\x87\xBE\xE3\x87\xBF"));
// "𛀀" U+1B000 KATAKANA LETTER ARCHAIC E
EXPECT_EQ(Util::KATAKANA, Util::GetScriptType("\xF0\x9B\x80\x80"));
// "𛀁" U+1B001 HIRAGANA LETTER ARCHAIC YE
EXPECT_EQ(Util::HIRAGANA, Util::GetScriptType("\xF0\x9B\x80\x81"));
// "!グーグル"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptType("\x21\xe3\x82\xb0\xe3\x83"
"\xbc\xe3\x82\xb0\xe3\x83"
"\xab"));
// "ー"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptType("\xe3\x83\xbc"));
EXPECT_EQ(Util::KATAKANA, Util::GetFirstScriptType("\xe3\x83\xbc"));
// "ーー"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptType("\xe3\x83\xbc\xe3\x83"
"\xbc"));
EXPECT_EQ(Util::KATAKANA, Util::GetFirstScriptType("\xe3\x83\xbc\xe3"
"\x83\xbc"));
// "゛"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptType("\xe3\x82\x9b"));
// "゜"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptType("\xe3\x82\x9c"));
EXPECT_EQ(Util::NUMBER, Util::GetScriptType("012"));
// "012012"
EXPECT_EQ(Util::NUMBER, Util::GetScriptType("\xef\xbc\x90\xef\xbc\x91\xef\xbc"
"\x92\x30\x31\x32"));
EXPECT_EQ(Util::ALPHABET, Util::GetScriptType("abcABC"));
// "ABCD"
EXPECT_EQ(Util::ALPHABET, Util::GetScriptType("\xef\xbc\xa1\xef\xbc\xa2\xef"
"\xbc\xa3\xef\xbc\xa4"));
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptType("@!#"));
// "@!#"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptType("\xef\xbc\xa0\xef\xbc\x81"
"\xef\xbc\x83"));
// "ーひらがな"
EXPECT_EQ(Util::HIRAGANA, Util::GetScriptType("\xe3\x83\xbc\xe3\x81\xb2\xe3"
"\x82\x89\xe3\x81\x8c\xe3\x81"
"\xaa"));
EXPECT_EQ(Util::KATAKANA, Util::GetFirstScriptType("\xe3\x83\xbc\xe3\x81\xb2"
"\xe3\x82\x89\xe3\x81\x8c"
"\xe3\x81\xaa"));
// "ーカタカナ"
EXPECT_EQ(Util::KATAKANA, Util::GetScriptType("\xe3\x83\xbc\xe3\x82\xab\xe3"
"\x82\xbf\xe3\x82\xab\xe3\x83"
"\x8a"));
// "ーカタカナ"
EXPECT_EQ(Util::KATAKANA, Util::GetScriptType("\xef\xbd\xb0\xef\xbd\xb6\xef"
"\xbe\x80\xef\xbd\xb6\xef\xbe"
"\x85"));
// "ひらがなー"
EXPECT_EQ(Util::HIRAGANA, Util::GetScriptType("\xe3\x81\xb2\xe3\x82\x89\xe3"
"\x81\x8c\xe3\x81\xaa\xe3\x83"
"\xbc"));
// "カタカナー"
EXPECT_EQ(Util::KATAKANA, Util::GetScriptType("\xe3\x82\xab\xe3\x82\xbf\xe3"
"\x82\xab\xe3\x83\x8a\xe3\x83"
"\xbc"));
// "カタカナー"
EXPECT_EQ(Util::KATAKANA, Util::GetScriptType("\xef\xbd\xb6\xef\xbe\x80\xef"
"\xbd\xb6\xef\xbe\x85\xef\xbd"
"\xb0"));
// "あ゛っ"
EXPECT_EQ(Util::HIRAGANA, Util::GetScriptType("\xe3\x81\x82\xe3\x82\x9b\xe3"
"\x81\xa3"));
// "あ゜っ"
EXPECT_EQ(Util::HIRAGANA, Util::GetScriptType("\xe3\x81\x82\xe3\x82\x9c\xe3"
"\x81\xa3"));
// "ア゛ッ"
EXPECT_EQ(Util::KATAKANA, Util::GetScriptType("\xe3\x82\xa2\xe3\x82\x9b\xe3"
"\x83\x83"));
// "ア゜ッ"
EXPECT_EQ(Util::KATAKANA, Util::GetScriptType("\xe3\x82\xa2\xe3\x82\x9c\xe3"
"\x83\x83"));
// "くどカう"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptType("\xe3\x81\x8f\xe3\x81\xa9"
"\xe3\x82\xab\xe3\x81"
"\x86"));
// "京あ都"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptType("\xe4\xba\xac\xe3\x81\x82"
"\xe9\x83\xbd"));
EXPECT_EQ(Util::KANJI, Util::GetFirstScriptType("\xe4\xba\xac\xe3\x81\x82"
"\xe9\x83\xbd"));
// "モズあク"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptType("\xe3\x83\xa2\xe3\x82\xba"
"\xe3\x81\x82\xe3\x82"
"\xaf"));
EXPECT_EQ(Util::KATAKANA, Util::GetFirstScriptType("\xe3\x83\xa2\xe3\x82\xba"
"\xe3\x81\x82\xe3\x82"
"\xaf"));
// "モあズクモズク"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptType("\xe3\x83\xa2\xe3\x81\x82"
"\xe3\x82\xba\xe3\x82\xaf"
"\xef\xbe\x93\xef\xbd\xbd"
"\xef\xbe\x9e\xef\xbd"
"\xb8"));
// "012あ"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptType("\x30\x31\x32\xe3\x81"
"\x82"));
EXPECT_EQ(Util::NUMBER, Util::GetFirstScriptType("\x30\x31\x32\xe3\x81"
"\x82"));
// "012あ012"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptType("\xef\xbc\x90\xef\xbc\x91"
"\xef\xbc\x92\xe3\x81\x82"
"\x30\x31\x32"));
EXPECT_EQ(Util::NUMBER, Util::GetFirstScriptType("\xef\xbc\x90\xef\xbc\x91"
"\xef\xbc\x92\xe3\x81\x82"
"\x30\x31\x32"));
// "abcABあC"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptType("\x61\x62\x63\x41\x42\xe3"
"\x81\x82\x43"));
// "ABあCD"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptType("\xef\xbc\xa1\xef\xbc\xa2"
"\xe3\x81\x82\xef\xbc\xa3"
"\xef\xbc\xa4"));
// "ぐーぐるグ"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptType("\xe3\x81\x90\xe3\x83\xbc"
"\xe3\x81\x90\xe3\x82\x8b"
"\xe3\x82\xb0"));
// "グーグルぐ"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptType("\xe3\x82\xb0\xe3\x83\xbc"
"\xe3\x82\xb0\xe3\x83\xab"
"\xe3\x81\x90"));
// "龦" U+9FA6
EXPECT_EQ(Util::KANJI, Util::GetScriptType("\xE9\xBE\xA6"));
// "龻" U+9FBB
EXPECT_EQ(Util::KANJI, Util::GetScriptType("\xE9\xBE\xBB"));
// U+9FFF is not assigned yet but reserved for CJK Unified Ideographs.
EXPECT_EQ(Util::KANJI, Util::GetScriptType("\xE9\xBF\xBF"));
// "𠮟咤" U+20B9F U+54A4
EXPECT_EQ(Util::KANJI, Util::GetScriptType("\xF0\xA0\xAE\x9F\xE5\x92\xA4"));
// "𠮷野" U+20BB7 U+91CE
EXPECT_EQ(Util::KANJI, Util::GetScriptType("\xF0\xA0\xAE\xB7\xE9\x87\x8E"));
// "巽" U+2F884
EXPECT_EQ(Util::KANJI, Util::GetScriptType("\xF0\xAF\xA2\x84"));
// U+1F466, BOY/smile emoji
EXPECT_EQ(Util::EMOJI, Util::GetScriptType("\xF0\x9F\x91\xA6"));
}
TEST(UtilTest, ScriptTypeWithoutSymbols) {
// "くど う"
EXPECT_EQ(Util::HIRAGANA, Util::GetScriptTypeWithoutSymbols(
"\xe3\x81\x8f\xe3\x81\xa9 \xe3\x81\x86"));
// "京 都"
EXPECT_EQ(Util::KANJI, Util::GetScriptTypeWithoutSymbols(
"\xe4\xba\xac \xe9\x83\xbd"));
// "モズ ク"
EXPECT_EQ(Util::KATAKANA, Util::GetScriptTypeWithoutSymbols(
"\xe3\x83\xa2\xe3\x82\xba\xe3\x82\xaf"));
// "モズ クモズク"
EXPECT_EQ(Util::KATAKANA, Util::GetScriptTypeWithoutSymbols(
"\xe3\x83\xa2\xe3\x82\xba \xe3\x82\xaf\xef\xbe\x93\xef\xbd"
"\xbd\xef\xbe\x9e\xef\xbd\xb8"));
// "Google Earth"
EXPECT_EQ(Util::ALPHABET, Util::GetScriptTypeWithoutSymbols(
"Google Earth"));
// "Google "
EXPECT_EQ(Util::ALPHABET, Util::GetScriptTypeWithoutSymbols(
"Google "));
// " Google"
EXPECT_EQ(Util::ALPHABET, Util::GetScriptTypeWithoutSymbols(
" Google"));
// " Google "
EXPECT_EQ(Util::ALPHABET, Util::GetScriptTypeWithoutSymbols(
" Google "));
// " g"
EXPECT_EQ(Util::ALPHABET, Util::GetScriptTypeWithoutSymbols(
" g"));
// ""
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptTypeWithoutSymbols(
""));
// " "
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptTypeWithoutSymbols(
" "));
// " "
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptTypeWithoutSymbols(
" "));
EXPECT_EQ(Util::ALPHABET, Util::GetScriptTypeWithoutSymbols("Hello!"));
// "Hello!あ"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptTypeWithoutSymbols(
"\x48\x65\x6c\x6c\x6f\x21\xe3\x81\x82"));
EXPECT_EQ(Util::ALPHABET, Util::GetScriptTypeWithoutSymbols("CD-ROM"));
// "CD-ROMア"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptTypeWithoutSymbols(
"\x43\x44\x2d\x52\x4f\x4d\xe3\x82\xa2"));
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptTypeWithoutSymbols("-"));
EXPECT_EQ(Util::ALPHABET, Util::GetScriptTypeWithoutSymbols("-A"));
EXPECT_EQ(Util::ALPHABET, Util::GetScriptTypeWithoutSymbols("--A"));
EXPECT_EQ(Util::ALPHABET, Util::GetScriptTypeWithoutSymbols("--A---"));
// "--A-ア-"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptTypeWithoutSymbols(
"\x2d\x2d\x41\x2d\xef\xbd\xb1\x2d"));
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptTypeWithoutSymbols("!"));
// "・あ"
EXPECT_EQ(Util::HIRAGANA, Util::GetScriptTypeWithoutSymbols(
"\xe3\x83\xbb\xe3\x81\x82"));
// "・・あ"
EXPECT_EQ(Util::HIRAGANA, Util::GetScriptTypeWithoutSymbols(
"\xe3\x83\xbb\xe3\x83\xbb\xe3\x81\x82"));
// "コギト・エルゴ・スム"
EXPECT_EQ(Util::KATAKANA, Util::GetScriptTypeWithoutSymbols(
"\xe3\x82\xb3\xe3\x82\xae\xe3\x83\x88\xe3\x83\xbb\xe3\x82\xa8"
"\xe3\x83\xab\xe3\x82\xb4\xe3\x83\xbb\xe3\x82\xb9\xe3\x83\xa0"));
// "コギト・エルゴ・住む"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptTypeWithoutSymbols(
"\xe3\x82\xb3\xe3\x82\xae\xe3\x83\x88\xe3\x83\xbb\xe3\x82\xa8"
"\xe3\x83\xab\xe3\x82\xb4\xe3\x83\xbb\xe4\xbd\x8f\xe3\x82\x80"));
// "人☆名"
EXPECT_EQ(Util::KANJI, Util::GetScriptTypeWithoutSymbols(
"\xe4\xba\xba\xe2\x98\x86\xe5\x90\x8d"));
// "ひとの☆なまえ"
EXPECT_EQ(Util::HIRAGANA, Util::GetScriptTypeWithoutSymbols(
"\xe3\x81\xb2\xe3\x81\xa8\xe3\x81\xae\xe2\x98\x86\xe3\x81\xaa"
"\xe3\x81\xbe\xe3\x81\x88"));
// "超☆最高です"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptTypeWithoutSymbols(
"\xe8\xb6\x85\xe2\x98\x86\xe6\x9c\x80\xe9\xab\x98\xe3\x81\xa7"
"\xe3\x81\x99"));
// "・--☆"
EXPECT_EQ(Util::UNKNOWN_SCRIPT, Util::GetScriptTypeWithoutSymbols(
"\xe3\x83\xbb\x2d\x2d\xe2\x98\x86"));
}
TEST(UtilTest, FormType) {
// "くどう"
EXPECT_EQ(Util::FULL_WIDTH, Util::GetFormType("\xe3\x81\x8f\xe3\x81\xa9\xe3"
"\x81\x86"));
// "京都"
EXPECT_EQ(Util::FULL_WIDTH, Util::GetFormType("\xe4\xba\xac\xe9\x83\xbd"));
// "モズク"
EXPECT_EQ(Util::FULL_WIDTH, Util::GetFormType("\xe3\x83\xa2\xe3\x82\xba\xe3"
"\x82\xaf"));
// "モズク"
EXPECT_EQ(Util::HALF_WIDTH, Util::GetFormType("\xef\xbe\x93\xef\xbd\xbd\xef"
"\xbe\x9e\xef\xbd\xb8"));
// "ぐーぐる"
EXPECT_EQ(Util::FULL_WIDTH, Util::GetFormType("\xe3\x81\x90\xe3\x83\xbc\xe3"
"\x81\x90\xe3\x82\x8b"));
// "グーグル"
EXPECT_EQ(Util::FULL_WIDTH, Util::GetFormType("\xe3\x82\xb0\xe3\x83\xbc\xe3"
"\x82\xb0\xe3\x83\xab"));
// "グーグル"
EXPECT_EQ(Util::HALF_WIDTH, Util::GetFormType("\xef\xbd\xb8\xef\xbe\x9e\xef"
"\xbd\xb0\xef\xbd\xb8\xef\xbe"
"\x9e\xef\xbe\x99"));
// "ー"
EXPECT_EQ(Util::HALF_WIDTH, Util::GetFormType("\xef\xbd\xb0"));
// "ー"
EXPECT_EQ(Util::FULL_WIDTH, Util::GetFormType("\xe3\x83\xbc"));
// "¢£¥¦¬¯"
EXPECT_EQ(Util::HALF_WIDTH, Util::GetFormType("\xc2\xa2\xc2\xa3\xc2\xa5"
"\xc2\xa6\xc2\xac\xc2\xaf"));
// "│←↑→↓■○"
EXPECT_EQ(Util::HALF_WIDTH, Util::GetFormType(
"\xef\xbf\xa8\xef\xbf\xa9\xef\xbf\xaa\xef\xbf\xab\xef\xbf\xac"
"\xef\xbf\xad\xef\xbf\xae"));
// Half-width mathematical symbols
// [U+27E6, U+27ED], U+2985, and U+2986
EXPECT_EQ(Util::HALF_WIDTH, Util::GetFormType(
"\xe2\x9f\xa6\xe2\x9f\xa7\xe2\x9f\xa8\xe2\x9f\xa9\xe2\x9f\xaa\xe2"
"\x9f\xab\xe2\x9f\xac\xe2\x9f\xad\xe2\xa6\x85\xe2\xa6\x86"));
// Half-width hangul "ᅠᄀᄁ"
EXPECT_EQ(Util::HALF_WIDTH, Util::GetFormType("\xef\xbe\xa0\xef\xbe\xa1"
"\xef\xbe\xa2"));
// Half-width won "₩"
EXPECT_EQ(Util::HALF_WIDTH, Util::GetFormType("\xe2\x82\xa9"));
EXPECT_EQ(Util::HALF_WIDTH, Util::GetFormType("012"));
// "012012"
EXPECT_EQ(Util::UNKNOWN_FORM, Util::GetFormType("\xef\xbc\x90\xef\xbc\x91\xef"
"\xbc\x92\x30\x31\x32"));
EXPECT_EQ(Util::HALF_WIDTH, Util::GetFormType("abcABC"));
// "ABCD"
EXPECT_EQ(Util::FULL_WIDTH, Util::GetFormType("\xef\xbc\xa1\xef\xbc\xa2\xef"
"\xbc\xa3\xef\xbc\xa4"));
EXPECT_EQ(Util::HALF_WIDTH, Util::GetFormType("@!#"));
}
// We have a snapshot of the result of |Util::GetCharacterSet(ucs4)| in
// data/test/character_set/character_set.tsv.
// Compare the result for each character just in case.
TEST(UtilTest, CharacterSetFullTest) {
map<char32, Util::CharacterSet> test_set;
FillTestCharacterSetMap(&test_set);
EXPECT_FALSE(test_set.empty());
// Unicode characters consist of [U+0000, U+10FFFF].
for (char32 ucs4 = 0; ucs4 <= 0x10ffff; ++ucs4) {
EXPECT_EQ(GetExpectedCharacterSet(test_set, ucs4),
Util::GetCharacterSet(ucs4))
<< "Character set changed at " << ucs4;
}
}
TEST(UtilTest, CharacterSet_gen_character_set) {
// [0x00, 0x7f] are ASCII
for (size_t i = 0; i <= 0x7f; ++i) {
EXPECT_EQ(Util::ASCII, Util::GetCharacterSet(i));
}
// [0x80, 0xff] are not ASCII
for (size_t i = 0x80; i <= 0xff; ++i) {
EXPECT_NE(Util::ASCII, Util::GetCharacterSet(i));
}
// 0213
// "Ⅰ"
EXPECT_EQ(Util::JISX0213, Util::GetCharacterSet(0x2160));
// "①"
EXPECT_EQ(Util::JISX0213, Util::GetCharacterSet(0x2460));
// "㊤"
EXPECT_EQ(Util::JISX0213, Util::GetCharacterSet(0x32A4));
// "𠮟" from UCS4 range (b/4176888)
EXPECT_EQ(Util::JISX0213, Util::GetCharacterSet(0x20B9F));
// "𪚲" from UCS4 range (b/4176888)
EXPECT_EQ(Util::JISX0213, Util::GetCharacterSet(0x2A6B2));
// only in CP932
// "凬"
EXPECT_EQ(Util::CP932, Util::GetCharacterSet(0x51EC));
// only in Unicode
// "₩"
EXPECT_EQ(Util::UNICODE_ONLY, Util::GetCharacterSet(0xFFE6));
// "ð ®·" from UCS4 range (b/4176888)
EXPECT_EQ(Util::UNICODE_ONLY, Util::GetCharacterSet(0x20BB7));
}
TEST(UtilTest, CharacterSet) {
// "あいうえお"
EXPECT_EQ(Util::JISX0208, Util::GetCharacterSet("\xe3\x81\x82\xe3\x81\x84\xe3"
"\x81\x86\xe3\x81\x88\xe3\x81"
"\x8a"));
EXPECT_EQ(Util::ASCII, Util::GetCharacterSet("abc"));
// "abcあいう"
EXPECT_EQ(Util::JISX0208, Util::GetCharacterSet("\x61\x62\x63\xe3\x81\x82\xe3"
"\x81\x84\xe3\x81\x86"));
// half width katakana
// "カタカナ"
EXPECT_EQ(Util::JISX0201, Util::GetCharacterSet("\xef\xbd\xb6\xef\xbe\x80\xef"
"\xbd\xb6\xef\xbe\x85"));
// "カタカナカタカナ"
EXPECT_EQ(Util::JISX0208, Util::GetCharacterSet("\xef\xbd\xb6\xef\xbe\x80\xef"
"\xbd\xb6\xef\xbe\x85\xe3\x82"
"\xab\xe3\x82\xbf\xe3\x82\xab"
"\xe3\x83\x8a"));
// 0213
// "Ⅰ"
EXPECT_EQ(Util::JISX0213, Util::GetCharacterSet("\xe2\x85\xa0"));
// "①"
EXPECT_EQ(Util::JISX0213, Util::GetCharacterSet("\xe2\x91\xa0"));
// "㊤"
EXPECT_EQ(Util::JISX0213, Util::GetCharacterSet("\xe3\x8a\xa4"));
// "𠮟" from UCS4 range (b/4176888)
EXPECT_EQ(Util::JISX0213, Util::GetCharacterSet("\xF0\xA0\xAE\x9F"));
// "𪚲" from UCS4 range (b/4176888)
EXPECT_EQ(Util::JISX0213, Util::GetCharacterSet("\xF0\xAA\x9A\xB2"));
// only in CP932
// "凬"
EXPECT_EQ(Util::CP932, Util::GetCharacterSet("\xe5\x87\xac"));
// only in Unicode
// "₩"
EXPECT_EQ(Util::UNICODE_ONLY, Util::GetCharacterSet("\xef\xbf\xa6"));
// "ð ®·" from UCS4 range (b/4176888)
EXPECT_EQ(Util::UNICODE_ONLY, Util::GetCharacterSet("\xF0\xA0\xAE\xB7"));
}
#ifdef OS_WIN
TEST(UtilTest, WideCharsLen) {
// "a𠮟b"
const string input_utf8 = "a\360\240\256\237b";
EXPECT_EQ(4, Util::WideCharsLen(input_utf8));
EXPECT_EQ(0, Util::WideCharsLen(Util::SubString(input_utf8, 0, 0)));
EXPECT_EQ(1, Util::WideCharsLen(Util::SubString(input_utf8, 0, 1)));
EXPECT_EQ(3, Util::WideCharsLen(Util::SubString(input_utf8, 0, 2)));
EXPECT_EQ(4, Util::WideCharsLen(Util::SubString(input_utf8, 0, 3)));
}
TEST(UtilTest, UTF8ToWide) {
const string input_utf8 = "abc";
wstring output_wide;
Util::UTF8ToWide(input_utf8, &output_wide);
string output_utf8;
Util::WideToUTF8(output_wide, &output_utf8);
EXPECT_EQ("abc", output_utf8);
}
TEST(UtilTest, WideToUTF8_SurrogatePairSupport) {
// Visual C++ 2008 does not support embedding surrogate pair in string
// literals like L"\uD842\uDF9F". This is why we use wchar_t array instead.
// "𠮟"
const wchar_t input_wide[] = {0xD842, 0xDF9F, 0};
string output_utf8;
Util::WideToUTF8(input_wide, &output_utf8);
wstring output_wide;
Util::UTF8ToWide(output_utf8, &output_wide);
EXPECT_EQ("\360\240\256\237", output_utf8);
EXPECT_EQ(input_wide, output_wide);
}
#endif // OS_WIN
TEST(UtilTest, IsKanaSymbolContained) {
const string kFullstop("\xe3\x80\x82"); // "。"
const string kSpace(" ");
EXPECT_TRUE(Util::IsKanaSymbolContained(kFullstop));
EXPECT_TRUE(Util::IsKanaSymbolContained(kSpace + kFullstop));
EXPECT_TRUE(Util::IsKanaSymbolContained(kFullstop + kSpace));
EXPECT_FALSE(Util::IsKanaSymbolContained(kSpace));
EXPECT_FALSE(Util::IsKanaSymbolContained(""));
}
#ifdef OS_ANDROID
// At the moment, encoding is not the target of build for Android.
#else
TEST(UtilTest, Issue2190350) {
string result = "";
// \xE3\x81\x82 == Hiragana a in UTF8
Util::UTF8ToSJIS("\xE3\x81\x82", &result);
EXPECT_EQ(2, result.length());
// \x82\xA0 == Hiragana a in Shift-JIS
EXPECT_EQ("\x82\xA0", result);
result = "";
Util::SJISToUTF8("\x82\xA0", &result);
EXPECT_EQ(3, result.length());
EXPECT_EQ("\xE3\x81\x82", result);
}
#endif
TEST(UtilTest, Fingerprint32WithSeed_uint32) {
const uint32 seed = 0xabcdef;
const uint32 num = 0x12345678; // Assumed little endian
const uint32 num_hash = Util::Fingerprint32WithSeed(num, seed);
const char* str = "\x78\x56\x34\x12";
const uint32 str_hash = Util::Fingerprint32WithSeed(str, 4, seed);
EXPECT_EQ(num_hash, str_hash) << num_hash << " != " << str_hash;
}
TEST(UtilTest, RandomSeedTest) {
Util::SetRandomSeed(0);
const int first_try = Util::Random(INT_MAX);
const int second_try = Util::Random(INT_MAX);
EXPECT_NE(first_try, second_try);
// Reset the seed.
Util::SetRandomSeed(0);
EXPECT_EQ(first_try, Util::Random(INT_MAX));
}
TEST(UtilTest, SplitFirstChar32) {
StringPiece rest;
char32 c = 0;
rest.clear();
c = 0;
EXPECT_FALSE(Util::SplitFirstChar32("", &c, &rest));
EXPECT_EQ(0, c);
EXPECT_TRUE(rest.empty());
// Allow NULL to ignore the matched value.
rest.clear();
EXPECT_TRUE(Util::SplitFirstChar32("01", NULL, &rest));
EXPECT_EQ("1", rest);
// Allow NULL to ignore the matched value.
c = 0;
EXPECT_TRUE(Util::SplitFirstChar32("01", &c, NULL));
EXPECT_EQ('0', c);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitFirstChar32("\x01 ", &c, &rest));
EXPECT_EQ(1, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitFirstChar32("\x7F ", &c, &rest));
EXPECT_EQ(0x7F, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitFirstChar32("\xC2\x80 ", &c, &rest));
EXPECT_EQ(0x80, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitFirstChar32("\xDF\xBF ", &c, &rest));
EXPECT_EQ(0x7FF, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitFirstChar32("\xE0\xA0\x80 ", &c, &rest));
EXPECT_EQ(0x800, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitFirstChar32("\xEF\xBF\xBF ", &c, &rest));
EXPECT_EQ(0xFFFF, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitFirstChar32("\xF0\x90\x80\x80 ", &c, &rest));
EXPECT_EQ(0x10000, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitFirstChar32("\xF7\xBF\xBF\xBF ", &c, &rest));
EXPECT_EQ(0x1FFFFF, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitFirstChar32("\xF8\x88\x80\x80\x80 ", &c, &rest));
EXPECT_EQ(0x200000, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitFirstChar32("\xFB\xBF\xBF\xBF\xBF ", &c, &rest));
EXPECT_EQ(0x3FFFFFF, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitFirstChar32("\xFC\x84\x80\x80\x80\x80 ", &c, &rest));
EXPECT_EQ(0x4000000, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitFirstChar32("\xFD\xBF\xBF\xBF\xBF\xBF ", &c, &rest));
EXPECT_EQ(0x7FFFFFFF, c);
EXPECT_EQ(" ", rest);
// If there is any invalid sequence, the entire text should be treated as
// am empty string.
{
c = 0;
EXPECT_FALSE(Util::SplitFirstChar32("\xC2 ", &c, &rest));
EXPECT_EQ(0, c);
c = 0;
EXPECT_FALSE(Util::SplitFirstChar32("\xC2\xC2 ", &c, &rest));
EXPECT_EQ(0, c);
c = 0;
EXPECT_FALSE(Util::SplitFirstChar32("\xE0 ", &c, &rest));
EXPECT_EQ(0, c);
c = 0;
EXPECT_FALSE(Util::SplitFirstChar32("\xE0\xE0\xE0 ", &c, &rest));
EXPECT_EQ(0, c);
c = 0;
EXPECT_FALSE(Util::SplitFirstChar32("\xF0 ", &c, &rest));
EXPECT_EQ(0, c);
c = 0;
EXPECT_FALSE(Util::SplitFirstChar32("\xF0\xF0\xF0\xF0 ", &c, &rest));
EXPECT_EQ(0, c);
}
// BOM should be treated as invalid byte.
{
c = 0;
EXPECT_FALSE(Util::SplitFirstChar32("\xFF ", &c, &rest));
EXPECT_EQ(0, c);
c = 0;
EXPECT_FALSE(Util::SplitFirstChar32("\xFE ", &c, &rest));
EXPECT_EQ(0, c);
}
// Invalid sequence for U+002F (redundant encoding)
{
c = 0;
EXPECT_FALSE(Util::SplitFirstChar32("\xC0\xAF", &c, &rest));
EXPECT_EQ(0, c);
c = 0;
EXPECT_FALSE(Util::SplitFirstChar32("\xE0\x80\xAF", &c, &rest));
EXPECT_EQ(0, c);
c = 0;
EXPECT_FALSE(Util::SplitFirstChar32("\xF0\x80\x80\xAF", &c, &rest));
EXPECT_EQ(0, c);
}
}
TEST(UtilTest, SplitLastChar32) {
StringPiece rest;
char32 c = 0;
rest.clear();
c = 0;
EXPECT_FALSE(Util::SplitLastChar32("", &rest, &c));
EXPECT_EQ(0, c);
EXPECT_TRUE(rest.empty());
// Allow NULL to ignore the matched value.
c = 0;
EXPECT_TRUE(Util::SplitLastChar32("01", NULL, &c));
EXPECT_EQ('1', c);
// Allow NULL to ignore the matched value.
rest.clear();
EXPECT_TRUE(Util::SplitLastChar32("01", &rest, NULL));
EXPECT_EQ("0", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitLastChar32(" \x01", &rest, &c));
EXPECT_EQ(1, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitLastChar32(" \x7F", &rest, &c));
EXPECT_EQ(0x7F, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitLastChar32(" \xC2\x80", &rest, &c));
EXPECT_EQ(0x80, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitLastChar32(" \xDF\xBF", &rest, &c));
EXPECT_EQ(0x7FF, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitLastChar32(" \xE0\xA0\x80", &rest, &c));
EXPECT_EQ(0x800, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitLastChar32(" \xEF\xBF\xBF", &rest, &c));
EXPECT_EQ(0xFFFF, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitLastChar32(" \xF0\x90\x80\x80", &rest, &c));
EXPECT_EQ(0x10000, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitLastChar32(" \xF7\xBF\xBF\xBF", &rest, &c));
EXPECT_EQ(0x1FFFFF, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitLastChar32(" \xF8\x88\x80\x80\x80", &rest, &c));
EXPECT_EQ(0x200000, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitLastChar32(" \xFB\xBF\xBF\xBF\xBF", &rest, &c));
EXPECT_EQ(0x3FFFFFF, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitLastChar32(" \xFC\x84\x80\x80\x80\x80", &rest, &c));
EXPECT_EQ(0x4000000, c);
EXPECT_EQ(" ", rest);
rest.clear();
c = 0;
EXPECT_TRUE(Util::SplitLastChar32(" \xFD\xBF\xBF\xBF\xBF\xBF", &rest, &c));
EXPECT_EQ(0x7FFFFFFF, c);
EXPECT_EQ(" ", rest);
// If there is any invalid sequence, the entire text should be treated as
// am empty string.
{
c = 0;
EXPECT_FALSE(Util::SplitLastChar32(" \xC2", &rest, &c));
EXPECT_EQ(0, c);
c = 0;
EXPECT_FALSE(Util::SplitLastChar32(" \xC2\xC2", &rest, &c));
EXPECT_EQ(0, c);
c = 0;
EXPECT_FALSE(Util::SplitLastChar32(" \xE0", &rest, &c));
EXPECT_EQ(0, c);
c = 0;
EXPECT_FALSE(Util::SplitLastChar32(" \xE0\xE0\xE0", &rest, &c));
EXPECT_EQ(0, c);
c = 0;
EXPECT_FALSE(Util::SplitLastChar32(" \xF0", &rest, &c));
EXPECT_EQ(0, c);
c = 0;
EXPECT_FALSE(Util::SplitLastChar32(" \xF0\xF0\xF0\xF0", &rest, &c));
EXPECT_EQ(0, c);
}
// BOM should be treated as invalid byte.
{
c = 0;
EXPECT_FALSE(Util::SplitLastChar32(" \xFF", &rest, &c));
EXPECT_EQ(0, c);
c = 0;
EXPECT_FALSE(Util::SplitLastChar32(" \xFE", &rest, &c));
EXPECT_EQ(0, c);
}
// Invalid sequence for U+002F (redundant encoding)
{
c = 0;
EXPECT_FALSE(Util::SplitLastChar32("\xC0\xAF", &rest, &c));
EXPECT_EQ(0, c);
c = 0;
EXPECT_FALSE(Util::SplitLastChar32("\xE0\x80\xAF", &rest, &c));
EXPECT_EQ(0, c);
c = 0;
EXPECT_FALSE(Util::SplitLastChar32("\xF0\x80\x80\xAF", &rest, &c));
EXPECT_EQ(0, c);
}
}
} // namespace mozc