blob: a6a6e252d7e6e843432ebecf26027e6885278d0b [file] [log] [blame]
# -*- coding: utf-8 -*-
# Copyright 2010-2015, Google Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Generate emoji data file (.java)
Generated .java file is used by Android version.
"""
__author__ = "yoichio"
from collections import defaultdict
import logging
import optparse
import sys
from build_tools import code_generator_util
CATEGORY_LIST = ['FACE', 'FOOD', 'CITY', 'ACTIVITY', 'NATURE']
def ReadData(stream):
category_map = defaultdict(list)
stream = code_generator_util.SkipLineComment(stream)
stream = code_generator_util.ParseColumnStream(stream, delimiter='\t')
stream = code_generator_util.SelectColumn(stream, [0, 2, 8, 9, 10, 11, 12])
for (code, pua_code, japanese_name, docomo_name, softbank_name, kddi_name,
category_index) in stream:
if not pua_code or pua_code[0] == '>':
continue
if not code:
if japanese_name:
logging.fatal('No Unicode emoji code point found.')
sys.exit(-1)
# Use dummy code point
code = '0'
(category, index) = category_index.split('-')
category_map[category].append(
(index, int(code, 16), int(pua_code, 16),
japanese_name, docomo_name, softbank_name, kddi_name))
return category_map
CHARA_NORMALIZE_MAP = {
u'A': 'A',
u'B': 'B',
u'C': 'C',
u'D': 'D',
u'E': 'E',
u'F': 'F',
u'G': 'G',
u'H': 'H',
u'I': 'I',
u'J': 'J',
u'K': 'K',
u'L': 'L',
u'M': 'M',
u'N': 'N',
u'O': 'O',
u'P': 'P',
u'Q': 'Q',
u'R': 'R',
u'S': 'S',
u'T': 'T',
u'U': 'U',
u'V': 'V',
u'W': 'W',
u'X': 'X',
u'Y': 'Y',
u'Z': 'Z',
u'a': 'a',
u'b': 'b',
u'c': 'c',
u'd': 'd',
u'e': 'e',
u'f': 'f',
u'g': 'g',
u'h': 'h',
u'i': 'i',
u'j': 'j',
u'k': 'k',
u'l': 'l',
u'm': 'm',
u'n': 'n',
u'o': 'o',
u'p': 'p',
u'q': 'q',
u'r': 'r',
u's': 's',
u't': 't',
u'u': 'u',
u'v': 'v',
u'w': 'w',
u'x': 'x',
u'y': 'y',
u'z': 'z',
u'0': '0',
u'1': '1',
u'2': '2',
u'3': '3',
u'4': '4',
u'5': '5',
u'6': '6',
u'7': '7',
u'8': '8',
u'9': '9',
u'(': '(',
u')': ')',
}
def PreprocessName(name):
name = unicode(name, 'utf-8')
name = u''.join(CHARA_NORMALIZE_MAP.get(c, c) for c in name)
name = name.encode('utf-8')
name = name.replace('(', '\\n(')
return name
def OutputData(category_map, stream):
for data_list in category_map.itervalues():
data_list.sort()
stream.write('package org.mozc.android.inputmethod.japanese.emoji;\n'
'public class EmojiData {\n')
for category in CATEGORY_LIST:
# The content of data list is
# 0: Index in the category
# 1: Code point of Unicode 6.0 emoji
# 2: Code point of carrier emoji.
# 3: Japanese Unicode 6.0 emoji name
# 4: DOCOMO carrier emoji name
# 5: Softbank carrier emoji name
# 6: KDDI carrier emoji name
data_list = [c for c in category_map[category]
if c[3] or c[4] or c[5] or c[6]]
stream.write(
' public static final String[] %s_VALUES = new String[]{\n' %
category)
for _, code, pua_code, japanese, docomo, softbank, kddi in data_list:
stream.write(
' %s,\n' % (code_generator_util.ToJavaStringLiteral(code)))
stream.write(' };\n')
stream.write(
' public static final String[] %s_PUA_VALUES = new String[]{\n' %
category)
for _, code, pua_code, japanese, docomo, softbank, kddi in data_list:
stream.write(
' %s,\n' % (code_generator_util.ToJavaStringLiteral(pua_code)))
stream.write(' };\n')
stream.write(
' public static final String[] UNICODE_%s_NAME = {\n' % category)
for _, code, pua_code, japanese, docomo, softbank, kddi in data_list:
if japanese:
stream.write(' "%s", \n' % PreprocessName(japanese))
else:
stream.write(' null, \n')
stream.write(' };\n')
stream.write(
' public static final String[] DOCOMO_%s_NAME = {\n' % category)
for _, code, pua_code, japanese, docomo, softbank, kddi in data_list:
if docomo:
stream.write(' "%s", \n' % PreprocessName(docomo))
else:
stream.write(' null, \n')
stream.write(' };\n')
stream.write(
' public static final String[] SOFTBANK_%s_NAME = {\n' % category)
for _, code, pua_code, japanese, docomo, softbank, kddi in data_list:
if softbank:
stream.write(' "%s", \n' % PreprocessName(softbank))
else:
stream.write(' null, \n')
stream.write(' };\n')
stream.write(
' public static final String[] KDDI_%s_NAME = {\n' % category)
for _, code, pua_code, japanese, docomo, softbank, kddi in data_list:
if kddi:
stream.write(' "%s", \n' % PreprocessName(kddi))
else:
stream.write(' null, \n')
stream.write(' };\n')
stream.write('}\n')
def ParseOptions():
parser = optparse.OptionParser()
parser.add_option('--emoji_data', dest='emoji_data',
help='Path to emoji_data.tsv')
parser.add_option('--output', dest='output', help='Output file name')
return parser.parse_args()[0]
def main():
options = ParseOptions()
with open(options.emoji_data) as stream:
emoji_data = ReadData(stream)
with open(options.output, 'w') as stream:
OutputData(emoji_data, stream)
if __name__ == '__main__':
main()