| // Copyright 2010-2015, Google Inc. |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #ifndef MOZC_BASE_UTIL_H_ |
| #define MOZC_BASE_UTIL_H_ |
| |
| #include <climits> |
| #include <ctime> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "base/logging.h" |
| #include "base/port.h" |
| #include "base/string_piece.h" |
| |
| struct tm; |
| |
| namespace mozc { |
| |
| // SplitIterator - Iteratively splits a StringPiece to sub-StringPieces. |
| // |
| // This template class takes two template parameters, Delimiter and Option. |
| // |
| // Delimiter: |
| // - SingleDelimiter: Input is splitted by only one character. If your |
| // delimiter is a single character, use this parameter because algorithm |
| // is optimized for this common case. |
| // - MultiDelimiter: Input is splitted by any of the specified characters. |
| // |
| // Option: |
| // - SkipEmpty (default): empty pieces are ignored: |
| // ",a,b,,c," -> {"a", "b", "c"} (delimiter = ',') |
| // - AllowEmpty: empty pieces are not ignored: |
| // ",a,b,,c," -> {"", "a", "b", "", "c", ""} (delimiter = ',') |
| // |
| // Usage Example: |
| // |
| // // 1. SingleDelimiter and SkipEmpty |
| // for (SplitIterator<SingleDelimiter> iter("this,is,,mozc", ","); |
| // !iter.Done(); iter.Next()) { |
| // StringPiece sp = iter.Get(); // "this", "is", and finally "mozc" |
| // ... |
| // } |
| // |
| // // 2. SingleDelimiter and AllowEmpty |
| // for (SplitIterator<SingleDelimiter, AllowEmpty> iter("this,is,,mozc", ","); |
| // !iter.Done(); iter.Next()) { |
| // StringPiece sp = iter.Get(); // "this", "is", "", and finally "mozc" |
| // ... |
| // } |
| // |
| // // 3. MultiDelimiter and SkipEmpty |
| // for (SplitIterator<MultiDelimiter> iter("this,is:\tmozc", ",:\t"); |
| // !iter.Done(); iter.Next()) { |
| // StringPiece sp = iter.Get(); // "this", "is", and finally "mozc" |
| // ... |
| // } |
| // |
| // // 4. MultiDelimiter and AllowEmpty |
| // for (SplitIterator<MultiDelimiter, AllowEmpty> |
| // iter("this,is::\tmozc", ",:\t"); !iter.Done(); iter.Next()) { |
| // StringPiece sp = iter.Get(); // "this", "is", "", "", and finally "mozc" |
| // ... |
| // } |
| class SingleDelimiter; |
| class MultiDelimiter; |
| struct SkipEmpty {}; |
| struct AllowEmpty {}; |
| |
| template <typename Delimiter, typename Option = SkipEmpty> |
| class SplitIterator { |
| public: |
| SplitIterator(StringPiece s, const char *delim); |
| StringPiece Get() const; |
| void Next(); |
| bool Done() const; |
| }; |
| |
| class Util { |
| public: |
| // String utils |
| template <typename StringContainer> |
| static void PushBackStringPiece(StringPiece s, StringContainer *container) { |
| container->push_back(string()); |
| s.CopyToString(&container->back()); |
| } |
| |
| static void SplitStringUsing(StringPiece str, |
| const char *delm, |
| vector<string> *output); |
| static void SplitStringUsing(StringPiece str, |
| const char *delm, |
| vector<StringPiece> *output); |
| |
| static void SplitStringAllowEmpty(StringPiece str, |
| const char *delm, |
| vector<string> *output); |
| |
| static void SplitStringToUtf8Chars(const string &str, |
| vector<string> *output); |
| |
| static void SplitCSV(const string &str, vector<string> *output); |
| |
| static void JoinStrings(const vector<string> &str, |
| const char *delm, |
| string *output); |
| static void JoinStringPieces(const vector<StringPiece> &str, |
| const char *delm, |
| string *output); |
| static void ConcatStrings(StringPiece s1, StringPiece s2, string *output); |
| |
| static void AppendStringWithDelimiter(StringPiece delimiter, |
| StringPiece append_string, |
| string *output); |
| |
| static void StringReplace(StringPiece s, StringPiece oldsub, |
| StringPiece newsub, bool replace_all, |
| string *res); |
| |
| static void LowerString(string *output); |
| static void UpperString(string *output); |
| |
| // Transforms the first character to the upper case and tailing characters to |
| // the lower cases. ex. "abCd" => "Abcd". |
| static void CapitalizeString(string *output); |
| |
| // Returns true if the characters in [first, last) are all in lower case |
| // ASCII. |
| static bool IsLowerAscii(StringPiece s); |
| |
| // Returns true if the characters in [first, last) are all in upper case |
| // ASCII. |
| static bool IsUpperAscii(StringPiece s); |
| |
| // Returns true if the text in the rage [first, last) is capitalized ASCII. |
| static bool IsCapitalizedAscii(StringPiece s); |
| |
| // Returns true if the characters in [first, last) are all in lower case ASCII |
| // or all in upper case ASCII. Namely, equivalent to |
| // IsLowerAscii(first, last) || IsUpperAscii(first last) |
| static bool IsLowerOrUpperAscii(StringPiece s); |
| |
| // Returns true if the text in the range [first, last) is 1) all in upper case |
| // ASCII, or 2) capitalized. |
| static bool IsUpperOrCapitalizedAscii(StringPiece s); |
| |
| // Strips the leading/trailing white spaces from the input and stores it to |
| // the output. If the input does not have such white spaces, this method just |
| // copies the input into the output. It clears the output always. |
| static void StripWhiteSpaces(const string &str, string *output); |
| |
| static size_t OneCharLen(const char *src); |
| |
| static size_t CharsLen(const char *src, size_t size); |
| |
| static size_t CharsLen(StringPiece str) { |
| return CharsLen(str.data(), str.size()); |
| } |
| |
| static char32 UTF8ToUCS4(const char *begin, |
| const char *end, |
| size_t *mblen); |
| |
| // Returns true if |s| is split into |first_char32| + |rest|. |
| // You can pass NULL to |first_char32| and/or |rest| to ignore the matched |
| // value. |
| // Returns false if an invalid UTF-8 sequence is prefixed. That is, |rest| may |
| // contain any invalid sequence even when this method returns true. |
| static bool SplitFirstChar32(StringPiece s, |
| char32 *first_char32, |
| StringPiece *rest); |
| |
| // Returns true if |s| is split into |rest| + |last_char32|. |
| // You can pass NULL to |rest| and/or |last_char32| to ignore the matched |
| // value. |
| // Returns false if an invalid UTF-8 sequence is suffixed. That is, |rest| may |
| // contain any invalid sequence even when this method returns true. |
| static bool SplitLastChar32(StringPiece s, |
| StringPiece *rest, |
| char32 *last_char32); |
| |
| static void UCS4ToUTF8(char32 c, string *output); |
| static void UCS4ToUTF8Append(char32 c, string *output); |
| |
| #ifdef OS_WIN |
| // Returns how many wide characters are necessary in UTF-16 to represent |
| // given UTF-8 string. Note that the result of this method becomes greater |
| // than that of Util::CharsLen if |src| contains any character which is |
| // encoded by the surrogate-pair in UTF-16. |
| static size_t WideCharsLen(StringPiece src); |
| // Converts the encoding of the specified string from UTF-8 to UTF-16, and |
| // vice versa. |
| static int UTF8ToWide(StringPiece input, wstring *output); |
| static int WideToUTF8(const wchar_t *input, string *output); |
| static int WideToUTF8(const wstring &input, string *output); |
| #endif // OS_WIN |
| |
| // Extracts a substring range, where both start and length are in terms of |
| // UTF8 size. Note that the returned string piece refers to the same memory |
| // block as the input. |
| static StringPiece SubStringPiece(StringPiece src, |
| size_t start, size_t length); |
| // This version extracts the substring to the end. |
| static StringPiece SubStringPiece(StringPiece src, size_t start); |
| |
| // Extracts a substring of length |length| starting at |start|. |
| // Note: |start| is the start position in UTF8, not byte position. |
| static void SubString(StringPiece src, size_t start, size_t length, |
| string *result); |
| |
| static string SubString(StringPiece src, size_t start, size_t length) { |
| string result; |
| SubString(src, start, length, &result); |
| return result; |
| } |
| |
| // Determines whether the beginning of |str| matches |prefix|. |
| static bool StartsWith(StringPiece str, StringPiece prefix); |
| |
| // Determines whether the end of |str| matches |suffix|. |
| static bool EndsWith(StringPiece str, StringPiece suffix); |
| |
| // Strip a heading UTF-8 BOM (binary order mark) sequence (= \xef\xbb\xbf). |
| static void StripUTF8BOM(string *line); |
| |
| // return true the line starts with UTF16-LE/UTF16-BE BOM. |
| static bool IsUTF16BOM(const string &line); |
| |
| // Returns true if the given |s| has only one ucs4 character, and it is |
| // in the range of Android Emoji PUA. |
| static bool IsAndroidPuaEmoji(StringPiece s); |
| |
| // C++ string version of sprintf. |
| static string StringPrintf(const char *format, ...) |
| // Tell the compiler to do printf format string checking. |
| PRINTF_ATTRIBUTE(1, 2); |
| |
| // Chop the return characters (i.e. '\n' and '\r') at the end of the |
| // given line. |
| static bool ChopReturns(string *line); |
| |
| // 32bit Fingerprint |
| static uint32 Fingerprint32(const string &key); |
| static uint32 Fingerprint32(const char *str, size_t length); |
| static uint32 Fingerprint32(const char *str); |
| |
| static uint32 Fingerprint32WithSeed(const string &key, |
| uint32 seed); |
| static uint32 Fingerprint32WithSeed(const char *str, |
| size_t length, uint32 seed); |
| static uint32 Fingerprint32WithSeed(const char *str, |
| uint32 seed); |
| static uint32 Fingerprint32WithSeed(uint32 num, uint32 seed); |
| |
| // 64bit Fingerprint |
| static uint64 Fingerprint(const string &key); |
| static uint64 Fingerprint(const char *str, size_t length); |
| |
| static uint64 FingerprintWithSeed(const string &key, uint32 seed); |
| |
| static uint64 FingerprintWithSeed(const char *str, |
| size_t length, uint32 seed); |
| |
| // Generate a random sequence. It uses secure method if possible, or Random() |
| // as a fallback method. |
| static void GetRandomSequence(char *buf, size_t buf_size); |
| static void GetRandomAsciiSequence(char *buf, size_t buf_size); |
| |
| // Return random variable whose range is [0..size-1]. |
| // This function uses rand() internally, so don't use it for |
| // security-sensitive purpose. |
| // Caveats: The returned value does not have good granularity especially |
| // when |size| is larger than |RAND_MAX|. |
| // TODO(yukawa): Improve the granularity. |
| // TODO(yukawa): Clarify the semantics when |size| is 0 or smaller. |
| static int Random(int size); |
| |
| // Set the seed of Util::Random(). |
| static void SetRandomSeed(uint32 seed); |
| |
| // Get the current time info using gettimeofday-like functions. |
| // sec: number of seconds from epoch |
| // usec: micro-second passed: [0,1000000) |
| static void GetTimeOfDay(uint64 *sec, uint32 *usec); |
| |
| // Get the current time info using time-like function |
| // For Windows, _time64() is used. |
| // For Linux/Mac, time() is used. |
| static uint64 GetTime(); |
| |
| // Get the current local time to current_time. Returns true if succeeded. |
| static bool GetCurrentTm(tm *current_time); |
| // Get local time, which is offset_sec seconds after now. Returns true if |
| // succeeded. |
| static bool GetTmWithOffsetSecond(tm *time_with_offset, int offset_sec); |
| |
| // Get the system frequency to calculate the time from ticks. |
| static uint64 GetFrequency(); |
| |
| // Get the current ticks. It may return incorrect value on Virtual Machines. |
| // If you'd like to get a value in secs, it is necessary to divide a result by |
| // GetFrequency(). |
| static uint64 GetTicks(); |
| |
| #ifdef __native_client__ |
| // Sets the time difference between local time and UTC time in seconds. |
| // We use this function in NaCl Mozc because we can't know the local timezone |
| // in NaCl environment. |
| static void SetTimezoneOffset(int32 timezone_offset_sec); |
| #endif // __native_client__ |
| |
| // Interface of the helper class. |
| // Default implementation is defined in the .cc file. |
| class ClockInterface { |
| public: |
| virtual ~ClockInterface() {} |
| virtual void GetTimeOfDay(uint64 *sec, uint32 *usec) = 0; |
| virtual uint64 GetTime() = 0; |
| virtual bool GetTmWithOffsetSecond(time_t offset_sec, tm *output) = 0; |
| |
| // High accuracy clock. |
| virtual uint64 GetFrequency() = 0; |
| virtual uint64 GetTicks() = 0; |
| #ifdef __native_client__ |
| virtual void SetTimezoneOffset(int32 timezone_offset_sec) = 0; |
| #endif // __native_client__ |
| }; |
| |
| // This function is provided for test. |
| // The behavior of system clock can be customized by replacing this handler. |
| static void SetClockHandler(Util::ClockInterface *handler); |
| |
| // Suspends the execution of the current thread until |
| // the time-out interval elapses. |
| static void Sleep(uint32 msec); |
| |
| // Japanese utilities for character form transliteration. |
| static void HiraganaToKatakana(StringPiece input, string *output); |
| static void HiraganaToHalfwidthKatakana(StringPiece input, string *output); |
| static void HiraganaToRomanji(StringPiece input, string *output); |
| static void HalfWidthAsciiToFullWidthAscii(StringPiece input, string *output); |
| static void FullWidthAsciiToHalfWidthAscii(StringPiece input, string *output); |
| static void HiraganaToFullwidthRomanji(StringPiece input, string *output); |
| static void RomanjiToHiragana(StringPiece input, string *output); |
| static void KatakanaToHiragana(StringPiece input, string *output); |
| static void HalfWidthKatakanaToFullWidthKatakana(StringPiece input, |
| string *output); |
| static void FullWidthKatakanaToHalfWidthKatakana(StringPiece input, |
| string *output); |
| static void FullWidthToHalfWidth(StringPiece input, string *output); |
| static void HalfWidthToFullWidth(StringPiece input, string *output); |
| |
| // Returns true if all chars in input are both defined |
| // in full width and half-width-katakana area |
| static bool IsFullWidthSymbolInHalfWidthKatakana(const string &input); |
| |
| // Returns true if all chars are defiend in half-width-katakana area. |
| static bool IsHalfWidthKatakanaSymbol(const string &input); |
| |
| // Returns true if one or more Kana-symbol characters are in the input. |
| static bool IsKanaSymbolContained(const string &input); |
| |
| // Returns true if |input| looks like a pure English word. |
| static bool IsEnglishTransliteration(const string &input); |
| |
| static void NormalizeVoicedSoundMark(StringPiece input, string *output); |
| |
| // Returns true if key is an open bracket. If key is an open bracket, |
| // corresponding close bracket is assigned. |
| static bool IsOpenBracket(const string &key, string *close_bracket); |
| |
| // Returns true if key is a close bracket. If key is a close bracket, |
| // corresponding open bracket is assigned. |
| static bool IsCloseBracket(const string &key, string *open_bracket); |
| |
| // Code converter |
| #ifndef OS_WIN |
| static void UTF8ToEUC(const string &input, string *output); |
| static void EUCToUTF8(const string &input, string *output); |
| #endif // OS_WIDNWOS |
| |
| static void UTF8ToSJIS(const string &input, string *output); |
| static void SJISToUTF8(const string &input, string *output); |
| |
| static void EncodeURI(const string &input, string *output); |
| static void DecodeURI(const string &input, string *output); |
| |
| // Make a string for CGI parameters from params and append it to |
| // base. The result looks like: |
| // <base><key1>=<encoded val1>&<key2>=<encoded val2> |
| // The base is supposed to end "?" or "&". |
| static void AppendCGIParams(const vector<pair<string, string> > ¶ms, |
| string *base); |
| |
| // Escape any characters into \x prefixed hex digits. |
| // ex. "ABC" => "\x41\x42\x43". |
| static void Escape(const string &input, string *output); |
| |
| // Escape any characters into % prefixed hex digits. |
| // ex. "ABC" => "%41%42%43" |
| static void EscapeUrl(const string &input, string *output); |
| static string EscapeUrl(const string &input); |
| |
| // Escape/Unescape unsafe html characters such as <, > and &. |
| static void EscapeHtml(const string &text, string *res); |
| static void UnescapeHtml(const string &text, string *res); |
| |
| // Escape unsafe CSS characters like <. Note > and & are not |
| // escaped becaused they are operands of CSS. |
| static void EscapeCss(const string &text, string *result); |
| |
| enum ScriptType { |
| UNKNOWN_SCRIPT, |
| KATAKANA, |
| HIRAGANA, |
| KANJI, |
| NUMBER, |
| ALPHABET, |
| EMOJI, |
| SCRIPT_TYPE_SIZE, |
| }; |
| |
| // return script type of w |
| static ScriptType GetScriptType(char32 w); |
| |
| // return script type of first character in [begin, end) |
| // This function finds the first UTF-8 chars and returns its script type. |
| // The length of the character will be returned in *mblen. |
| // This function calls GetScriptType(char32) internally. |
| static ScriptType GetScriptType(const char *begin, const char *end, |
| size_t *mblen); |
| |
| // return script type of first character in str |
| static ScriptType GetFirstScriptType(const string &str); |
| |
| // return script type of string. all chars in str must be |
| // KATAKANA/HIRAGANA/KANJI/NUMBER or ALPHABET. |
| // If str has mixed scripts, this function returns UNKNOWN_SCRIPT |
| static ScriptType GetScriptType(const string &str); |
| |
| // The same as GetScryptType(), but it ignores symbols |
| // in the |str|. |
| static ScriptType GetScriptTypeWithoutSymbols(const string &str); |
| |
| // return true if all script_type in str is "type" |
| static bool IsScriptType(StringPiece str, ScriptType type); |
| |
| // return true if the string contains script_type char |
| static bool ContainsScriptType(StringPiece str, ScriptType type); |
| |
| // See 'Unicode Standard Annex #11: EAST ASIAN WIDTH' |
| // http://www.unicode.org/reports/tr11/ |
| // http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt |
| enum FormType { |
| UNKNOWN_FORM, |
| HALF_WIDTH, // [Na] and [H] in 'Unicode Standard Annex #11' |
| FULL_WIDTH, // Any other characters |
| FORM_TYPE_SIZE, |
| }; |
| |
| // return Form type of single character. |
| // This function never returns UNKNOWN_FORM. |
| static FormType GetFormType(char32 w); |
| |
| // return FormType of string. |
| // return UNKNOWN_FORM if |str| contains both HALF_WIDTH and FULL_WIDTH. |
| static FormType GetFormType(const string &str); |
| |
| // Basically, if charset >= JIX0212, the char is platform dependent char. |
| enum CharacterSet { |
| ASCII, // ASCII (simply ucs4 <= 0x007F) |
| JISX0201, // defined at least in 0201 (can be in 0208/0212/0213/CP9232) |
| JISX0208, // defined at least in 0208 (can be in 0212/0213/CP932) |
| JISX0212, // defined at least in 0212 (can be in 0213/CP932) |
| JISX0213, // defined at least in 0213 (can be in CP932) |
| CP932, // defined only in CP932, not in JISX02* |
| UNICODE_ONLY, // defined only in UNICODE, not in JISX* nor CP932 |
| CHARACTER_SET_SIZE, |
| }; |
| |
| // return CharacterSet |
| static CharacterSet GetCharacterSet(char32 ucs4); |
| |
| // return CharacterSet of string. |
| // if the given string contains multiple charasets, return |
| // the maximum character set. |
| static CharacterSet GetCharacterSet(const string &str); |
| |
| private: |
| DISALLOW_IMPLICIT_CONSTRUCTORS(Util); |
| }; |
| |
| // Const iterator implementation to traverse on a (utf8) string as a char32 |
| // string. |
| // |
| // Example usage: |
| // string utf8; |
| // for (ConstChar32Iterator iter(utf8); !iter.Done(); iter.Next()) { |
| // char32 c = iter.Get(); |
| // ... |
| // } |
| class ConstChar32Iterator { |
| public: |
| explicit ConstChar32Iterator(StringPiece utf8_string); |
| char32 Get() const; |
| void Next(); |
| bool Done() const; |
| |
| private: |
| StringPiece utf8_string_; |
| char32 current_; |
| bool done_; |
| |
| DISALLOW_COPY_AND_ASSIGN(ConstChar32Iterator); |
| }; |
| |
| // Const reverse iterator implementation to traverse on a (utf8) string as a |
| // char32 string. |
| // |
| // Example usage: |
| // string utf8; |
| // for (ConstChar32ReverseIterator iter(utf8); !iter.Done(); iter.Next()) { |
| // char32 c = iter.Get(); |
| // ... |
| // } |
| class ConstChar32ReverseIterator { |
| public: |
| explicit ConstChar32ReverseIterator(StringPiece utf8_string); |
| char32 Get() const; |
| void Next(); |
| bool Done() const; |
| |
| private: |
| StringPiece utf8_string_; |
| char32 current_; |
| bool done_; |
| |
| DISALLOW_COPY_AND_ASSIGN(ConstChar32ReverseIterator); |
| }; |
| |
| // Actual definitions of delimiter classes. |
| class SingleDelimiter { |
| public: |
| explicit SingleDelimiter(const char *delim) : delim_(*delim) {} |
| bool Contains(char c) const { return c == delim_; } |
| |
| private: |
| const char delim_; |
| DISALLOW_COPY_AND_ASSIGN(SingleDelimiter); |
| }; |
| |
| class MultiDelimiter { |
| public: |
| static const size_t kTableSize = UCHAR_MAX / 8; |
| |
| explicit MultiDelimiter(const char* delim); |
| |
| bool Contains(char c) const { |
| const unsigned char uc = static_cast<unsigned char>(c); |
| return (lookup_table_[uc >> 3] & (1 << (uc & 0x07))) != 0; |
| } |
| |
| private: |
| // Bit field for looking up delimiters. Think of this as a 256-bit array where |
| // n-th bit is set to 1 if the delimiters contain a character whose unsigned |
| // char code is n. |
| unsigned char lookup_table_[kTableSize]; |
| DISALLOW_COPY_AND_ASSIGN(MultiDelimiter); |
| }; |
| |
| // Declarations of the partial specializations of SplitIterator for two options. |
| // Implementations are explicitly instantiated in util.cc. |
| template <typename Delimiter> |
| class SplitIterator<Delimiter, SkipEmpty> { |
| public: |
| SplitIterator(StringPiece s, const char *delim); |
| StringPiece Get() const { return StringPiece(sp_begin_, sp_len_); } |
| bool Done() const { return sp_begin_ == end_; } |
| void Next(); |
| |
| private: |
| const char *const end_; |
| const Delimiter delim_; |
| const char *sp_begin_; |
| StringPiece::size_type sp_len_; |
| |
| DISALLOW_COPY_AND_ASSIGN(SplitIterator); |
| }; |
| |
| template <typename Delimiter> |
| class SplitIterator<Delimiter, AllowEmpty> { |
| public: |
| SplitIterator(StringPiece s, const char *delim); |
| StringPiece Get() const { return StringPiece(sp_begin_, sp_len_); } |
| bool Done() const { return done_; } |
| void Next(); |
| |
| private: |
| const char *const end_; |
| const Delimiter delim_; |
| const char *sp_begin_; |
| StringPiece::size_type sp_len_; |
| bool done_; |
| |
| DISALLOW_COPY_AND_ASSIGN(SplitIterator); |
| }; |
| |
| } // namespace mozc |
| |
| #endif // MOZC_BASE_UTIL_H_ |