More codes and rules

2024-11-21 18:50:47 +03:00 · 2022-04-24 12:14:23 -04:00 · 2022-04-24 12:14:23 -04:00 · 97105ebc5f
commit 97105ebc5f
parent ea03802d0d
3 changed files with 291 additions and 7495 deletions
--- a/resources/shortcodes.txt
+++ b/resources/shortcodes.txt
@ -8,3 +8,35 @@ face savoring food:yum
 smiling face with halo:innocent
 smiling face with open hands:hugging_face
 high voltage:zap
+double exclamation mark:bangbang
+exclamation question mark:interrobang
+red question mark:question 
+red exclamation mark:exclamation 
+white question mark:white_question
+white exclamation mark:white_exclamation
+input latin uppercase:big_abcd
+input latin lowercase:abcd 
+input numbers:1234
+input symbols:symbols 
+input latin letters:abc
+smiling face with heart-eyes:heart_eyes
+smiling face with tear:cry_smile
+smiling face with horns:smiling_imp 
+angry face with horns:imp
+pouting face:rage
+face with steam from nose:triumph
+grinning cat:smiley_cat
+grinning cat with smiling eyes:smile_cat 
+cat with tears of joy:joy_cat
+smiling cat with heart-eyes:heart_eyes_cat
+cat with wry smile:smirk_cat
+weary cat:scream_cat
+hundred points:100
+waving hand:wave
+backhand index pointing left:point_left
+backhand index pointing right:point_right
+backhand index pointing up:point_up_2
+backhand index pointing down:point_down
+index pointing up:point_up 
+index pointing at the viewer:point_you
+person shrugging:shrug
--- a/scripts/emoji_codegen.py
+++ b/scripts/emoji_codegen.py
@ -25,7 +25,37 @@ const QVector<Emoji> emoji::Provider::emoji = {
    ''')
    d = dict(kwargs=kwargs)
    print(tmpl.render(d))
-
+# FIXME: Stop this madness
+def humanize_keypad(num): 
+    match num: 
+        case "0": 
+            return "zero" 
+        case "1": 
+            return "one"
+        case "2": 
+            return "two"
+        case "3": 
+            return "three"
+        case "4": 
+            return "four"
+        case "5": 
+            return "five"
+        case "6": 
+            return "six" 
+        case "7": 
+            return "seven" 
+        case "8": 
+            return "eight"
+        case "9": 
+            return "nine"
+        case "10": 
+            return "ten"
+        case "*": 
+            return "asterisk"
+        case "#": 
+            return "hash"
+        case _: 
+            return None
 if __name__ == '__main__':
    if len(sys.argv) < 3:
        print('usage: emoji_codegen.py /path/to/emoji-test.txt /path/to/shortcodes.txt')
@ -52,7 +82,8 @@ if __name__ == '__main__':
        'Activities': activity,
        'Objects': objects,
        'Symbols': symbols,
-        'Flags': flags
+        'Flags': flags,
+        'Component': symbols
    }
    shortcodeDict = {} 
    # for my sanity - this strips newlines
@ -74,34 +105,63 @@ if __name__ == '__main__':
        code, qualification, charAndName = segments

        # skip unqualified versions of same unicode
-        if qualification != 'fully-qualified':
+        if qualification != 'fully-qualified' and qualification != 'component' :
            continue
-
+        

        char, name = re.match(r'^(\S+) E\d+\.\d+ (.*)$', charAndName).groups()
        shortname = name
-
+        
+        # discard skin tone variants for sanity
+        # __contains__ is so stupid i hate prototype languages
+        if name.__contains__("skin tone") and qualification != 'component': 
+            continue
+        if qualification == 'component' and not name.__contains__("skin tone"): 
+            continue
        #TODO: Handle skintone modifiers in a sane way
        if shortname in shortcodeDict: 
            shortname = shortcodeDict[shortname]
-        else: 
+        else:
+            shortname = shortname.lower()
+            if shortname.endswith(' (blood type)'): 
+                shortname = shortname[:-13]
+            if shortname.endswith(': red hair'): 
+                shortname = "red_haired_" + shortname[:-10]
+            if shortname.endswith(': curly hair'): 
+                shortname = "curly_haired_" + shortname[:-12]
+            if shortname.endswith(': white hair'): 
+                shortname = "white_haried_" + shortname[:-12]
+            if shortname.endswith(': bald'): 
+                shortname = "bald_" + shortname[:-6]
+            if shortname.endswith(': beard'): 
+                shortname = "bearded_" + shortname[:-7]
            if shortname.endswith(' face'): 
                shortname = shortname[:-5]
-            elif shortname.endswith(' button'): 
+            if shortname.endswith(' button'): 
                shortname = shortname[:-7] 
-            else: 
-                # FIXME: Is there a better way to do this?
-                matchobj = re.match(r'^flag: (.*)$', shortname) 
-                if matchobj: 
-                    country, = matchobj.groups() 
-                    shortname = country + " flag"
-            shortname = shortname.replace(" ", "_")
-            shortname = shortname.replace("“", "")
-            shortname = shortname.replace("”", "")
-            shortname = shortname.replace(":", "")
+            if shortname.endswith(' banknote'): 
+                shortname = shortname[:-9]
+            keycapmtch = re.match(r'^keycap: (.+)$', shortname)
+            if keycapmtch: 
+                keycapthing, = keycapmtch.groups()
+                type(keycapthing)
+                num_name = humanize_keypad(keycapthing) 
+                if num_name: 
+                    shortname = num_name
+                else: 
+                    raise Exception("incomplete keycap " + keycapthing + ", fix ur code")
+                
+            # FIXME: Is there a better way to do this?
+            matchobj = re.match(r'^flag: (.*)$', shortname) 
+            if matchobj: 
+                country, = matchobj.groups() 
+                shortname = country + " flag"
+            shortname = shortname.replace("u.s.", "us")
+            shortname = shortname.replace("&", "and")
            shortname = shortname.replace("-", "_")
+            shortname, = re.match(r'^_*(.+)_*$', shortname).groups()
+            shortname = re.sub(r'\W', '_', shortname) 
            shortname = re.sub(r'_{2,}', '_', shortname) 
-            shortname = shortname.lower()
            shortname = unidecode(shortname)
        categories[current_category].append(Emoji(code, shortname, name))

--- a/src/emoji/Provider.cpp
+++ b/src/emoji/Provider.cpp