Update emoji to unicode 13.0

2024-11-21 18:50:47 +03:00 · 2020-01-24 04:18:14 +01:00 · 2020-01-24 04:18:14 +01:00 · 73cf7b239b
commit 73cf7b239b
parent 8fcb670bde
5 changed files with 9176 additions and 1405 deletions
--- a/resources/emoji-test.txt
+++ b/resources/emoji-test.txt
--- a/resources/emoji.json
+++ b/resources/emoji.json
--- a/scripts/emoji_codegen.py
+++ b/scripts/emoji_codegen.py
@ -1,31 +1,19 @@
 #!/usr/bin/env python3
 import sys
-import json
+import re
 from jinja2 import Template
 class Emoji(object):
-    def __init__(self, code, shortname, category, order):
+    def __init__(self, code, shortname):
-        self.code = ''.join(list(map(code_to_bytes, code.split('-'))))
+        self.code = repr(code.encode('utf-8'))[1:].strip("'")
        self.shortname = shortname
        self.category = category
        self.order = int(order)
 def code_to_bytes(codepoint):
    '''
    Convert hex unicode codepoint to hex byte array.
    '''
    bytes = chr(int(codepoint, 16)).encode('utf-8')
    return str(bytes)[1:].strip("'")
 def generate_code(emojis, category):
    tmpl = Template('''
-const QList<Emoji> EmojiProvider::{{ category }} = {
+const std::vector<Emoji> emoji::Provider::{{ category }} = {
    {%- for e in emoji %}
        Emoji{QString::fromUtf8("{{ e.code }}"), "{{ e.shortname }}"},
    {%- endfor %}
@ -38,44 +26,56 @@ const QList<Emoji> EmojiProvider::{{ category }} = {
 if __name__ == '__main__':
    if len(sys.argv) < 2:
-        print('usage: emoji_codegen.py /path/to/emoji.json')
+        print('usage: emoji_codegen.py /path/to/emoji-test.txt')
        sys.exit(1)
    filename = sys.argv[1]
    data = {}
-    with open(filename, 'r') as filename:
+    people = []
-        data = json.loads(filename.read())
+    nature = []
    food = []
    activity = []
    travel = []
    objects = []
    symbols = []
    flags = []
-    emojis = []
+    categories = {
        'Smileys & Emotion': people,
        'People & Body': people,
        'Animals & Nature': nature,
        'Food & Drink': food,
        'Travel & Places': travel,
        'Activities': activity,
        'Objects': objects,
        'Symbols': symbols,
        'Flags': flags
    }
-    for emoji_name in data:
+    current_category = ''
-        tmp = data[emoji_name]
+    for line in open(filename, 'r'):
        if line.startswith('# group:'):
            current_category = line.split(':', 1)[1].strip()
-        l = len(tmp['unicode'].split('-'))
+        if not line or line.startswith('#'):
        if l > 1 and tmp['category'] == 'people':
            continue
-        emojis.append(
+        segments = re.split(r'\s+[#;] ', line.strip())
-            Emoji(
+        if len(segments) != 3:
-                tmp['unicode'],
+            continue
                tmp['shortname'],
                tmp['category'],
                tmp['emoji_order']
            )
        )
-    emojis.sort(key=lambda x: x.order)
+        code, qualification, charAndName = segments
-    people = list(filter(lambda x: x.category == "people", emojis))
+        # skip fully qualified versions of same unicode
-    nature = list(filter(lambda x: x.category == "nature", emojis))
+        if code.endswith('FE0F'):
-    food = list(filter(lambda x: x.category == "food", emojis))
+            continue
-    activity = list(filter(lambda x: x.category == "activity", emojis))
+
-    travel = list(filter(lambda x: x.category == "travel", emojis))
+        if qualification == 'component':
-    objects = list(filter(lambda x: x.category == "objects", emojis))
+            continue
-    symbols = list(filter(lambda x: x.category == "symbols", emojis))
+
-    flags = list(filter(lambda x: x.category == "flags", emojis))
+        char, name = re.match(r'^(\S+) E\d+\.\d+ (.*)$', charAndName).groups()
        categories[current_category].append(Emoji(char, name))
    # Use xclip to pipe the output to clipboard.
    # e.g ./codegen.py emoji.json | xclip -sel clip
--- a/scripts/update_emoji.md
+++ b/scripts/update_emoji.md
@ -0,0 +1,7 @@
 # Updating emoji
 1. Get the latest emoji-test.txt from here: https://unicode.org/Public/emoji/
 2. Overwrite the existing resources/emoji-test.txt with the new one
 3. Run `./scripts/emoji_codegen.py resources/emoji-test.txt` and replace the current tail of src/emoji/Provider.cpp with the new output
 4. `make lint`
 5. Compile and test
--- a/src/emoji/Provider.cpp
+++ b/src/emoji/Provider.cpp