| # Copyright 2010, Google Inc. |
| # All rights reserved. |
| # |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions are |
| # met: |
| # |
| # * Redistributions of source code must retain the above copyright |
| # notice, this list of conditions and the following disclaimer. |
| # * Redistributions in binary form must reproduce the above |
| # copyright notice, this list of conditions and the following disclaimer |
| # in the documentation and/or other materials provided with the |
| # distribution. |
| # * Neither the name of Google Inc. nor the names of its |
| # contributors may be used to endorse or promote products derived from |
| # this software without specific prior written permission. |
| # |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| """A script to generate a C++ header file for the POS conversion map. |
| """ |
| |
| __author__ = "keni" |
| |
| import sys |
| |
| BODY = """// Copyright 2009 Google Inc. All Rights Reserved. |
| // Author: keni |
| |
| #ifndef MOZC_DICTIONARY_POS_MAP_H_ |
| #define MOZC_DICTIONARY_POS_MAP_H_ |
| |
| // POS conversion rules |
| const POSMap kPOSMap[] = { |
| %s}; |
| |
| #endif // MOZC_DICTIONARY_POS_MAP_H_ |
| """ |
| |
| def Escape(str): |
| result = [] |
| length = len(str) |
| x = 0 |
| for c in str: |
| result.append("\\x%2X" % ord(c)) |
| return "".join(result) |
| |
| def GenPOSMap(pos_map_file, user_pos_file): |
| user_pos = {} |
| outputs = [] |
| dup = {} |
| |
| # target POS must be found in user_pos map |
| for line in open(user_pos_file, "r"): |
| fields = line.split() |
| user_pos[fields[0]] = True |
| |
| # read all POS mapping |
| for line in open(pos_map_file, "r"): |
| line = line.lstrip("\n"); |
| if line == '' or line[0] == '#': |
| continue |
| |
| fields = line.split() |
| num_fields = len(fields) |
| |
| assert(num_fields >= 1 and num_fields <= 3) |
| |
| if (num_fields >= 2): |
| assert(user_pos.has_key(fields[1])) |
| |
| output = '' |
| if len(fields) == 1: |
| output = ' { "%s", NULL }, ' % Escape(fields[0]) |
| elif len(fields) >= 2: |
| output = ' { "%s", "%s" }, ' % (Escape(fields[0]), |
| Escape(fields[1])) |
| |
| # For example, when ATOK has POS "FOO", and MS-IME has the |
| # same POS "FOO", we assume that these two POSes can be |
| # translated into the same Mozc POS. |
| # We allow duplicate source POSes in the pos_map, but target pos |
| # for these POSes must be the same. |
| if dup.has_key(fields[0]): |
| assert(dup[fields[0]] == output) |
| continue |
| |
| outputs.append(output) |
| dup[fields[0]] = output |
| |
| # Make a Mozc to Mozc mapping rule. |
| for line in open(user_pos_file, "r"): |
| fields = line.split() |
| output = '' |
| if not dup.has_key(fields[0]): |
| outputs.append(' { "%s", "%s" }, ' % (Escape(fields[0]), |
| Escape(fields[0]))) |
| |
| outputs.sort() |
| return "\n".join(outputs) + "\n" |
| |
| def main(): |
| user_pos_file = sys.argv[1] |
| pos_map_file = sys.argv[2] |
| pos_map = GenPOSMap(pos_map_file, user_pos_file) |
| print BODY % pos_map |
| |
| if __name__ == '__main__': |
| main() |