blob: f149f3701afd04aeee9871460c0cd0272e50dd75 [file] [log] [blame]
# -*- coding: utf-8 -*-
# Copyright 2010-2015, Google Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Generate header file of a string array for zero query suggestion.
Usage:
gen_embedded_string_array_for_zero_query.py --input input.def \
--output /path/to/output/zero_query_hoge.h --var_name ZeroQueryHoge
Input format:
<key> <TAB> <candidate_1>,<candidate_2>,..,<candidate_n>
...
For more details, please refer to definition files under mozc/data/zero_query/
Output format:
const char *Var0[] = {"Key0", "Cand00", "Cand01", .., 0};
const char *Var1[] = {"Key1", "Cand10", "Cand11", .., 0};
const char **Var[] = {Var0, Var1, .., VarN};
Here, (Cand00, Cand10, ...) is sorted so that we can use binary search.
"""
__author__ = "toshiyuki"
import optparse
import os
_MOZC_DIR_FOR_DEFINE_GUARD = 'MOZC'
def EscapeString(string):
"""Escapes string."""
return '"' + string.encode('string_escape') + '"'
def GetDefineGuardSymbol(file_name):
"""Returns define guard symbol for .h file.
For example, returns 'SOME_EXAMPLE_H' for '/path/to/some_example.h'
Args:
file_name: a string indicating output file path.
Returns:
A string for define guard.
"""
return os.path.basename(file_name).upper().replace('.', '_')
def GetDefineGuardHeaderLines(output_file_name):
"""Returns define guard header for .h file."""
result = []
result.append(
'#ifndef %s_PREDICTION_%s_' %(_MOZC_DIR_FOR_DEFINE_GUARD,
GetDefineGuardSymbol(output_file_name)))
result.append(
'#define %s_PREDICTION_%s_' %(_MOZC_DIR_FOR_DEFINE_GUARD,
GetDefineGuardSymbol(output_file_name)))
return result
def GetDefineGuardFooterLines(output_file_name):
"""Returns define guard footer for .h file."""
return [
'#endif // %s_PREDICTION_%s_' %(_MOZC_DIR_FOR_DEFINE_GUARD,
GetDefineGuardSymbol(output_file_name))]
def GetZeroQueryRules(input_file_name):
"""Returns zero query trigerring rules. The list is sorted by key."""
rules = []
with open(input_file_name, 'r') as input_file:
for line in input_file:
if line.startswith('#'):
continue
line = line.rstrip('\r\n')
if not line:
continue
tokens = line.split('\t')
key = tokens[0]
values = tokens[1].split(',')
rules.append((key, values))
rules.sort(lambda x, y: cmp(x[0], y[0])) # For binary search
return rules
def GetHeaderContents(input_file_name, var_name, output_file_name):
"""Returns contents for header file that contains a string array."""
zero_query_rules = GetZeroQueryRules(input_file_name)
result = []
result.extend(GetDefineGuardHeaderLines(output_file_name))
result.append('namespace mozc {')
result.append('namespace {')
for i, rule in enumerate(zero_query_rules):
result.append('const char *%s%d[] = {' % (var_name, i))
result.append(' ' + ', '.join(
[EscapeString(s) for s in [rule[0]] + rule[1]] + ['0']))
result.append('};')
result.append('} // namespace')
result.append('const char **%s_data[] = {' % var_name)
result.append(' ' + ', '.join(
['%s%d' % (var_name, c) for c in range(len(zero_query_rules))]))
result.append('};')
result.append(
'const size_t %s_size = %d;' % (var_name, len(zero_query_rules)))
result.append('} // namespace mozc')
result.extend(GetDefineGuardFooterLines(output_file_name))
return result
def ParseOption():
"""Parses command line options."""
parser = optparse.OptionParser()
parser.add_option('--input', dest='input', help='Input file path')
parser.add_option('--output', dest='output', help='Output file path')
parser.add_option(
'--var_name', dest='var_name', help='Var name for the array')
return parser.parse_args()[0]
def main():
options = ParseOption()
lines = GetHeaderContents(options.input, options.var_name, options.output)
with open(options.output, 'w') as out_file:
out_file.write('\n'.join(lines))
if __name__ == '__main__':
main()