emoji-updater: Apply found data to LatinIME

* Instead of having to sort the data manually, try finding the parts to be replaced automatically * Print out whatever could not be applied in the same way as before Change-Id: I297897cc934556a925dd1569a4c0f7bd95aed670
2023-03-24 23:25:43 +01:00
parent 28aef9685b
commit 5fdd957c8d
1 changed files with 62 additions and 6 deletions
--- a/emoji-updater/emoji-updater.py
+++ b/emoji-updater/emoji-updater.py
@@ -1,18 +1,74 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
+import os
+import requests
 import sys

-import requests
+from pathlib import Path

-if __name__ == '__main__':
+if __name__ == "__main__":
    if len(sys.argv) < 2:
-        sys.exit(f'usage: {sys.argv[0]} [url|https://unicode.org/Public/emoji/15.0/emoji-test.txt]')
+        sys.exit(
+            f"usage: {sys.argv[0]} [url|https://unicode.org/Public/emoji/15.0/emoji-test.txt]"
+        )

    url = sys.argv[1]
    req = requests.get(url=url)

+    group_name = ""
+    items = {}
+
    for line in req.text.splitlines():
-        if line.startswith('# subgroup: '):
-            print(f'        <!-- {line.split(maxsplit=2)[-1]} -->')
+        if line.startswith("# subgroup: "):
+            group_name = line.split(maxsplit=2)[-1]
        elif '; fully-qualified' in line and not 'skin tone' in line:
-            print(f'        <item>{line.split(";")[0].strip().replace(" ", ",")}</item>')
+            item = line.split(";")[0].strip().replace(" ", ",")
+            items.setdefault(group_name, []).append(item)
+
+    # We want to transfer the received data into the target file
+    absolute_path = os.path.dirname(__file__)
+    relative_path = "../../../packages/inputmethods/LatinIME/java/res/values-v19/emoji-categories.xml"
+    target_path = Path(os.path.join(absolute_path, relative_path)).resolve()
+
+    with open(target_path, "r+") as f:
+        lines = f.read()
+        f.seek(0)
+        f.truncate()
+
+        for key in [*items.keys()]:
+            header = f"<!-- {key} -->"
+            start = lines.find(header)
+
+            if start != -1:
+                while start != -1:
+                    end1 = lines.find("</array>", start)
+                    end2 = lines.find("<!--", start + 1)
+
+                    if end1 == -1 or end2 == -1:
+                        min_end = max(end1, end2)
+                    else:
+                        min_end = min(end1, end2)
+                    replace = lines[start:min_end].rstrip()
+
+                    built = header + "\n"
+                    for c in items[key]:
+                        built += f"        <item>{c}</item>\n"
+                    built = built.rstrip()
+
+                    lines = lines.replace(replace, built)
+                    start = lines.find(header, start + len(built))
+
+                del items[key]
+
+        f.write(lines)
+
+    if len(items) > 0:
+        print("Could not process the following items automatically:")
+
+        for key in items:
+            built = f"        <!-- {key} -->"
+
+            for c in items[key]:
+                built += f"\n        <item>{c}</item>"
+
+            print(built)