| // Copyright 2010-2015, Google Inc. |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #ifndef MOZC_BASE_NUMBER_UTIL_H_ |
| #define MOZC_BASE_NUMBER_UTIL_H_ |
| |
| #include <string> |
| #include <vector> |
| |
| #include "base/port.h" |
| #include "base/string_piece.h" |
| |
| namespace mozc { |
| |
| // This class sets up utilities to manage strings including numbers like |
| // Arabic numbers, Roman numbers, Kanji numbers, and so on. |
| class NumberUtil { |
| public: |
| // Convert the number to a string and append it to output. |
| static string SimpleItoa(int32 number); |
| static string SimpleItoa(uint32 number); |
| static string SimpleItoa(int64 number); |
| static string SimpleItoa(uint64 number); |
| |
| // Convert the string to a number and return it. |
| static int SimpleAtoi(StringPiece str); |
| |
| // Returns true if the given input_string contains only number characters |
| // (regardless of halfwidth or fullwidth). |
| // False for empty string. |
| static bool IsArabicNumber(StringPiece input_string); |
| |
| // Returns true if the given str consists of only ASCII digits. |
| // False for empty string. |
| static bool IsDecimalInteger(StringPiece str); |
| |
| struct NumberString { |
| public: |
| enum Style { |
| DEFAULT_STYLE = 0, |
| // 123,456,789 |
| NUMBER_SEPARATED_ARABIC_HALFWIDTH, |
| // "123,456,789" |
| NUMBER_SEPARATED_ARABIC_FULLWIDTH, |
| // "123億456万7890" |
| NUMBER_ARABIC_AND_KANJI_HALFWIDTH, |
| // "123億456万7890" |
| NUMBER_ARABIC_AND_KANJI_FULLWIDTH, |
| // "一億二千三百四十五万六千七百八十九" |
| NUMBER_KANJI, |
| // "壱億弐千参百四拾五万六千七百八拾九" |
| NUMBER_OLD_KANJI, |
| // "ⅠⅡⅢ" |
| NUMBER_ROMAN_CAPITAL, |
| // "ⅰⅱⅲ" |
| NUMBER_ROMAN_SMALL, |
| // "①②③" |
| NUMBER_CIRCLED, |
| // "ニ〇〇" |
| NUMBER_KANJI_ARABIC, |
| // "0x4d2" (1234 in decimal) |
| NUMBER_HEX, |
| // "02322" (1234 in decimal) |
| NUMBER_OCT, |
| // "0b10011010010" (1234 in decimal) |
| NUMBER_BIN, |
| }; |
| |
| NumberString(StringPiece value, StringPiece description, Style style) |
| : value(value.as_string()), |
| description(description.as_string()), |
| style(style) {} |
| |
| // Converted string |
| string value; |
| |
| // Description of Converted String |
| string description; |
| |
| // Converted Number Style |
| Style style; |
| }; |
| |
| // Following five functions are main functions to convert number strings. |
| // They receive two arguments: |
| // - input_num: a string consisting of Arabic numeric characters. |
| // - output: a vector consists of conveted results. |
| // If |input_num| is invalid or cannot represent as the form, these |
| // functions do nothing. If a method finds more than one representations, |
| // it pushes all candidates into the output. |
| |
| // Converts half-width Arabic number string to Kan-su-ji string. |
| // - input_num: a string which *must* be half-width number string. |
| // - output: function appends new representation into output vector. |
| // value, desc and style are stored same size and same order. |
| // if invalid string is set, this function do nothing. |
| static bool ArabicToKanji(StringPiece input_num, |
| vector<NumberString> *output); |
| |
| // Converts half-width Arabic number string to Separated Arabic string. |
| // (e.g. 1234567890 are converted to 1,234,567,890) |
| // Arguments are same as ArabicToKanji (above). |
| static bool ArabicToSeparatedArabic(StringPiece input_num, |
| vector<NumberString> *output); |
| |
| // Converts half-width Arabic number string to full-width Arabic number |
| // string. |
| // Arguments are same as ArabicToKanji (above). |
| static bool ArabicToWideArabic(StringPiece input_num, |
| vector<NumberString> *output); |
| |
| // Converts half-width Arabic number to various styles. |
| // Arguments are same as ArabicToKanji (above). |
| // - Roman style (i) (ii) ... |
| static bool ArabicToOtherForms(StringPiece input_num, |
| vector<NumberString> *output); |
| |
| // Converts half-width Arabic number to various radices (2,8,16). |
| // Arguments are same as ArabicToKanji (above). |
| // Excepted number of input digits is smaller than 20, but it can be |
| // converted only if it can be stored in an unsigned 64-bit integer. |
| static bool ArabicToOtherRadixes(StringPiece input_num, |
| vector<NumberString> *output); |
| |
| // Converts the string to a 32-/64-bit signed/unsigned int. Returns true if |
| // success or false if the string is in the wrong format. |
| static bool SafeStrToInt32(StringPiece str, int32 *value); |
| static bool SafeStrToInt64(StringPiece str, int64 *value); |
| static bool SafeStrToUInt32(StringPiece str, uint32 *value); |
| static bool SafeStrToUInt64(StringPiece str, uint64 *value); |
| static bool SafeHexStrToUInt32(StringPiece str, uint32 *value); |
| static bool SafeOctStrToUInt32(StringPiece str, uint32 *value); |
| |
| // Converts the string to a double. Returns true if success or false if the |
| // string is in the wrong format. |
| // If |str| is a hexadecimal number like "0x1234", the result depends on |
| // compiler. It returns false when compiled by VisualC++. On the other hand |
| // it returns true and sets correct value when compiled by gcc. |
| static bool SafeStrToDouble(StringPiece str, double *value); |
| |
| // Converts the string to a float. Returns true if success or false if the |
| // string is in the wrong format. |
| static bool SafeStrToFloat(StringPiece str, float *value); |
| // Converts the string to a float. |
| static float StrToFloat(StringPiece str) { |
| float value; |
| SafeStrToFloat(str, &value); |
| return value; |
| } |
| |
| // Convert Kanji numeric into Arabic numeric. |
| // When the trim_leading_zeros is true, leading zeros for arabic_output |
| // are trimmed off. |
| // TODO(toshiyuki): This parameter is only applied for arabic_output now. |
| // |
| // Input: "2千五百" |
| // kanji_output: "二千五百" |
| // arabic output: 2500 |
| // |
| // NormalizeNumbers() returns false if it finds non-number characters. |
| // NormalizeNumbersWithSuffix() skips trailing non-number characters and |
| // return them in "suffix". |
| static bool NormalizeNumbers(StringPiece input, |
| bool trim_leading_zeros, |
| string *kanji_output, |
| string *arabic_output); |
| |
| static bool NormalizeNumbersWithSuffix(StringPiece input, |
| bool trim_leading_zeros, |
| string *kanji_output, |
| string *arabic_output, |
| string *suffix); |
| |
| // Note: this function just does charcter-by-character conversion |
| // "百二十" -> 10020 |
| static void KanjiNumberToArabicNumber(StringPiece input, string *output); |
| |
| private: |
| DISALLOW_IMPLICIT_CONSTRUCTORS(NumberUtil); |
| }; |
| |
| } // namespace mozc |
| |
| #endif // MOZC_BASE_NUMBER_UTIL_H_ |