| # -*- coding: utf-8 -*- |
| # Copyright 2010-2015, Google Inc. |
| # All rights reserved. |
| # |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions are |
| # met: |
| # |
| # * Redistributions of source code must retain the above copyright |
| # notice, this list of conditions and the following disclaimer. |
| # * Redistributions in binary form must reproduce the above |
| # copyright notice, this list of conditions and the following disclaimer |
| # in the documentation and/or other materials provided with the |
| # distribution. |
| # * Neither the name of Google Inc. nor the names of its |
| # contributors may be used to endorse or promote products derived from |
| # this software without specific prior written permission. |
| # |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| """Generate emoji data file (.java) |
| |
| Generated .java file is used by Android version. |
| """ |
| |
| __author__ = "yoichio" |
| |
| from collections import defaultdict |
| import logging |
| import optparse |
| import sys |
| |
| from build_tools import code_generator_util |
| |
| CATEGORY_LIST = ['FACE', 'FOOD', 'CITY', 'ACTIVITY', 'NATURE'] |
| |
| |
| def ReadData(stream): |
| category_map = defaultdict(list) |
| stream = code_generator_util.SkipLineComment(stream) |
| stream = code_generator_util.ParseColumnStream(stream, delimiter='\t') |
| stream = code_generator_util.SelectColumn(stream, [0, 2, 8, 9, 10, 11, 12]) |
| for (code, pua_code, japanese_name, docomo_name, softbank_name, kddi_name, |
| category_index) in stream: |
| if not pua_code or pua_code[0] == '>': |
| continue |
| if not code: |
| if japanese_name: |
| logging.fatal('No Unicode emoji code point found.') |
| sys.exit(-1) |
| # Use dummy code point |
| code = '0' |
| |
| (category, index) = category_index.split('-') |
| category_map[category].append( |
| (index, int(code, 16), int(pua_code, 16), |
| japanese_name, docomo_name, softbank_name, kddi_name)) |
| return category_map |
| |
| |
| CHARA_NORMALIZE_MAP = { |
| u'A': 'A', |
| u'B': 'B', |
| u'C': 'C', |
| u'D': 'D', |
| u'E': 'E', |
| u'F': 'F', |
| u'G': 'G', |
| u'H': 'H', |
| u'I': 'I', |
| u'J': 'J', |
| u'K': 'K', |
| u'L': 'L', |
| u'M': 'M', |
| u'N': 'N', |
| u'O': 'O', |
| u'P': 'P', |
| u'Q': 'Q', |
| u'R': 'R', |
| u'S': 'S', |
| u'T': 'T', |
| u'U': 'U', |
| u'V': 'V', |
| u'W': 'W', |
| u'X': 'X', |
| u'Y': 'Y', |
| u'Z': 'Z', |
| |
| u'a': 'a', |
| u'b': 'b', |
| u'c': 'c', |
| u'd': 'd', |
| u'e': 'e', |
| u'f': 'f', |
| u'g': 'g', |
| u'h': 'h', |
| u'i': 'i', |
| u'j': 'j', |
| u'k': 'k', |
| u'l': 'l', |
| u'm': 'm', |
| u'n': 'n', |
| u'o': 'o', |
| u'p': 'p', |
| u'q': 'q', |
| u'r': 'r', |
| u's': 's', |
| u't': 't', |
| u'u': 'u', |
| u'v': 'v', |
| u'w': 'w', |
| u'x': 'x', |
| u'y': 'y', |
| u'z': 'z', |
| |
| u'0': '0', |
| u'1': '1', |
| u'2': '2', |
| u'3': '3', |
| u'4': '4', |
| u'5': '5', |
| u'6': '6', |
| u'7': '7', |
| u'8': '8', |
| u'9': '9', |
| |
| u'(': '(', |
| u')': ')', |
| } |
| |
| |
| def PreprocessName(name): |
| name = unicode(name, 'utf-8') |
| name = u''.join(CHARA_NORMALIZE_MAP.get(c, c) for c in name) |
| name = name.encode('utf-8') |
| name = name.replace('(', '\\n(') |
| return name |
| |
| |
| def OutputData(category_map, stream): |
| for data_list in category_map.itervalues(): |
| data_list.sort() |
| |
| stream.write('package org.mozc.android.inputmethod.japanese.emoji;\n' |
| 'public class EmojiData {\n') |
| |
| for category in CATEGORY_LIST: |
| # The content of data list is |
| # 0: Index in the category |
| # 1: Code point of Unicode 6.0 emoji |
| # 2: Code point of carrier emoji. |
| # 3: Japanese Unicode 6.0 emoji name |
| # 4: DOCOMO carrier emoji name |
| # 5: Softbank carrier emoji name |
| # 6: KDDI carrier emoji name |
| data_list = [c for c in category_map[category] |
| if c[3] or c[4] or c[5] or c[6]] |
| stream.write( |
| ' public static final String[] %s_VALUES = new String[]{\n' % |
| category) |
| for _, code, pua_code, japanese, docomo, softbank, kddi in data_list: |
| stream.write( |
| ' %s,\n' % (code_generator_util.ToJavaStringLiteral(code))) |
| stream.write(' };\n') |
| |
| stream.write( |
| ' public static final String[] %s_PUA_VALUES = new String[]{\n' % |
| category) |
| for _, code, pua_code, japanese, docomo, softbank, kddi in data_list: |
| stream.write( |
| ' %s,\n' % (code_generator_util.ToJavaStringLiteral(pua_code))) |
| stream.write(' };\n') |
| |
| stream.write( |
| ' public static final String[] UNICODE_%s_NAME = {\n' % category) |
| for _, code, pua_code, japanese, docomo, softbank, kddi in data_list: |
| if japanese: |
| stream.write(' "%s", \n' % PreprocessName(japanese)) |
| else: |
| stream.write(' null, \n') |
| stream.write(' };\n') |
| |
| stream.write( |
| ' public static final String[] DOCOMO_%s_NAME = {\n' % category) |
| for _, code, pua_code, japanese, docomo, softbank, kddi in data_list: |
| if docomo: |
| stream.write(' "%s", \n' % PreprocessName(docomo)) |
| else: |
| stream.write(' null, \n') |
| stream.write(' };\n') |
| |
| stream.write( |
| ' public static final String[] SOFTBANK_%s_NAME = {\n' % category) |
| for _, code, pua_code, japanese, docomo, softbank, kddi in data_list: |
| if softbank: |
| stream.write(' "%s", \n' % PreprocessName(softbank)) |
| else: |
| stream.write(' null, \n') |
| stream.write(' };\n') |
| |
| stream.write( |
| ' public static final String[] KDDI_%s_NAME = {\n' % category) |
| for _, code, pua_code, japanese, docomo, softbank, kddi in data_list: |
| if kddi: |
| stream.write(' "%s", \n' % PreprocessName(kddi)) |
| else: |
| stream.write(' null, \n') |
| stream.write(' };\n') |
| |
| stream.write('}\n') |
| |
| |
| def ParseOptions(): |
| parser = optparse.OptionParser() |
| parser.add_option('--emoji_data', dest='emoji_data', |
| help='Path to emoji_data.tsv') |
| parser.add_option('--output', dest='output', help='Output file name') |
| return parser.parse_args()[0] |
| |
| |
| def main(): |
| options = ParseOptions() |
| with open(options.emoji_data) as stream: |
| emoji_data = ReadData(stream) |
| |
| with open(options.output, 'w') as stream: |
| OutputData(emoji_data, stream) |
| |
| |
| if __name__ == '__main__': |
| main() |