1
0
mirror of synced 2025-12-19 17:48:10 -05:00
Files
fonts/lang/snippets/fix-exemplars-bases.py

57 lines
1.7 KiB
Python

from collections import Counter
import unicodedata
from google.protobuf import text_format
from gflanguages import languages_public_pb2
ATTRIBUTES = "base auxiliary marks punctuation index".split(" ")
def main(args=None):
for path in args:
with open(path, encoding="utf-8") as fp:
language = text_format.Parse(
fp.read(), languages_public_pb2.LanguageProto()
)
changed = False
exemplar_values = {}
bases = language.exemplar_chars.base.split(" ")
marks = language.exemplar_chars.marks.split(" ")
if not len(bases) or bases == [""]:
continue
new_marks = []
new_bases = []
for chars in marks:
if not chars:
continue
if chars[0] != "\u25CC":
chars = "\u25CC" + chars
if chars not in new_marks:
new_marks.append(chars)
for chars in bases:
if not chars:
continue
if chars[0] == "\u25CC":
chars = chars[1:]
cat = unicodedata.category(chars[0])
if cat in ["Mn", "Mc"]:
if chars[0] != "\u25CC":
chars = "\u25CC" + chars
if chars not in new_marks:
new_marks.append(chars)
else:
new_bases.append(chars)
language.exemplar_chars.base = " ".join(new_bases)
language.exemplar_chars.marks = " ".join(new_marks)
with open(path, "w", encoding="utf-8") as fp:
fp.write(text_format.MessageToString(language, as_utf8=True))
fp.close()
if __name__ == "__main__":
import sys
main(args=sys.argv[1:])