Codebase list mozc / debian/1.15.1857.102-1ubuntu1 android / gen_emoji_data.py
debian/1.15.1857.102-1ubuntu1

Tree @debian/1.15.1857.102-1ubuntu1 (Download .tar.gz)

gen_emoji_data.py @debian/1.15.1857.102-1ubuntu1raw · history · blame

# -*- coding: utf-8 -*-
# Copyright 2010-2014, Google Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
#     * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
#     * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

"""Generate emoji data file (.java)

Generated .java file is used by Android version.
"""

__author__ = "yoichio"

from collections import defaultdict
import logging
import optparse
import sys

from build_tools import code_generator_util

CATEGORY_LIST = ['FACE', 'FOOD', 'CITY', 'ACTIVITY', 'NATURE']


def ReadData(stream):
  category_map = defaultdict(list)
  stream = code_generator_util.SkipLineComment(stream)
  stream = code_generator_util.ParseColumnStream(stream, delimiter='\t')
  stream = code_generator_util.SelectColumn(stream, [0, 2, 8, 9, 10, 11, 12])
  for (code, pua_code, japanese_name, docomo_name, softbank_name, kddi_name,
       category_index) in stream:
    if not pua_code or pua_code[0] == '>':
      continue
    if not code:
      if japanese_name:
        logging.fatal('No Unicode emoji code point found.')
        sys.exit(-1)
      # Use dummy code point
      code = '0'

    (category, index) = category_index.split('-')
    category_map[category].append(
        (index, int(code, 16), int(pua_code, 16),
         japanese_name, docomo_name, softbank_name, kddi_name))
  return category_map


CHARA_NORMALIZE_MAP = {
    u'A': 'A',
    u'B': 'B',
    u'C': 'C',
    u'D': 'D',
    u'E': 'E',
    u'F': 'F',
    u'G': 'G',
    u'H': 'H',
    u'I': 'I',
    u'J': 'J',
    u'K': 'K',
    u'L': 'L',
    u'M': 'M',
    u'N': 'N',
    u'O': 'O',
    u'P': 'P',
    u'Q': 'Q',
    u'R': 'R',
    u'S': 'S',
    u'T': 'T',
    u'U': 'U',
    u'V': 'V',
    u'W': 'W',
    u'X': 'X',
    u'Y': 'Y',
    u'Z': 'Z',

    u'a': 'a',
    u'b': 'b',
    u'c': 'c',
    u'd': 'd',
    u'e': 'e',
    u'f': 'f',
    u'g': 'g',
    u'h': 'h',
    u'i': 'i',
    u'j': 'j',
    u'k': 'k',
    u'l': 'l',
    u'm': 'm',
    u'n': 'n',
    u'o': 'o',
    u'p': 'p',
    u'q': 'q',
    u'r': 'r',
    u's': 's',
    u't': 't',
    u'u': 'u',
    u'v': 'v',
    u'w': 'w',
    u'x': 'x',
    u'y': 'y',
    u'z': 'z',

    u'0': '0',
    u'1': '1',
    u'2': '2',
    u'3': '3',
    u'4': '4',
    u'5': '5',
    u'6': '6',
    u'7': '7',
    u'8': '8',
    u'9': '9',

    u'(': '(',
    u')': ')',
}


def PreprocessName(name):
  name = unicode(name, 'utf-8')
  name = u''.join(CHARA_NORMALIZE_MAP.get(c, c) for c in name)
  name = name.encode('utf-8')
  name = name.replace('(', '\\n(')
  return name


def OutputData(category_map, stream):
  for data_list in category_map.itervalues():
    data_list.sort()

  stream.write('package org.mozc.android.inputmethod.japanese.emoji;\n'
               'public class EmojiData {\n')

  for category in CATEGORY_LIST:
    # The content of data list is
    # 0: Index in the category
    # 1: Code point of Unicode 6.0 emoji
    # 2: Code point of carrier emoji.
    # 3: Japanese Unicode 6.0 emoji name
    # 4: DOCOMO carrier emoji name
    # 5: Softbank carrier emoji name
    # 6: KDDI carrier emoji name
    data_list = [c for c in category_map[category]
                 if c[3] or c[4] or c[5] or c[6]]
    stream.write(
        '  public static final String[] %s_VALUES = new String[]{\n' %
        category)
    for _, code, pua_code, japanese, docomo, softbank, kddi in data_list:
      stream.write(
          '    %s,\n' % (code_generator_util.ToJavaStringLiteral(code)))
    stream.write('  };\n')

    stream.write(
        '  public static final String[] %s_PUA_VALUES = new String[]{\n' %
        category)
    for _, code, pua_code, japanese, docomo, softbank, kddi in data_list:
      stream.write(
          '    %s,\n' % (code_generator_util.ToJavaStringLiteral(pua_code)))
    stream.write('  };\n')

    stream.write(
        '  public static final String[] UNICODE_%s_NAME = {\n' % category)
    for _, code, pua_code, japanese, docomo, softbank, kddi in data_list:
      if japanese:
        stream.write('    "%s", \n' % PreprocessName(japanese))
      else:
        stream.write('    null, \n')
    stream.write('  };\n')

    stream.write(
        '  public static final String[] DOCOMO_%s_NAME = {\n' % category)
    for _, code, pua_code, japanese, docomo, softbank, kddi in data_list:
      if docomo:
        stream.write('    "%s", \n' % PreprocessName(docomo))
      else:
        stream.write('    null, \n')
    stream.write('  };\n')

    stream.write(
        '  public static final String[] SOFTBANK_%s_NAME = {\n' % category)
    for _, code, pua_code, japanese, docomo, softbank, kddi in data_list:
      if softbank:
        stream.write('    "%s", \n' % PreprocessName(softbank))
      else:
        stream.write('    null, \n')
    stream.write('  };\n')

    stream.write(
        '  public static final String[] KDDI_%s_NAME = {\n' % category)
    for _, code, pua_code, japanese, docomo, softbank, kddi in data_list:
      if kddi:
        stream.write('    "%s", \n' % PreprocessName(kddi))
      else:
        stream.write('    null, \n')
    stream.write('  };\n')

  stream.write('}\n')


def ParseOptions():
  parser = optparse.OptionParser()
  parser.add_option('--emoji_data', dest='emoji_data',
                    help='Path to emoji_data.tsv')
  parser.add_option('--output', dest='output', help='Output file name')
  return parser.parse_args()[0]


def main():
  options = ParseOptions()
  with open(options.emoji_data) as stream:
    emoji_data = ReadData(stream)

  with open(options.output, 'w') as stream:
    OutputData(emoji_data, stream)


if __name__ == '__main__':
  main()