1
0
mirror of synced 2025-12-25 02:02:49 -05:00

Merge commit '9b4d8d28810488fde8c03ae1f37be1628b6b764a' into lang-v0.6.1

This commit is contained in:
Simon Cozens
2024-05-16 09:56:56 +01:00
20 changed files with 48 additions and 33 deletions

View File

@@ -24,5 +24,7 @@
# Please keep the list sorted. # Please keep the list sorted.
# (first name; alphabetical order) # (first name; alphabetical order)
Denis Moyogo Jacquerye <moyogo@gmail.com>
Felipe Correa da Silva Sanches <juca@members.fsf.org> Felipe Correa da Silva Sanches <juca@members.fsf.org>
Marc Foley <m.foley.88@gmail.com> Marc Foley <m.foley.88@gmail.com>
Simon Cozens <simon@simon-cozens.org>

View File

@@ -1,7 +1,8 @@
id: "bdh_Latn" id: "bdh_Latn"
language: "bdh" language: "bdh"
script: "Latn" script: "Latn"
name: "Baka" name: "Baka (DRC/South Sudan)"
autonym: "Tara Baká"
population: 60000 population: 60000
region: "CD" region: "CD"
region: "SS" region: "SS"

View File

@@ -1,7 +1,7 @@
id: "beh_Latn" id: "beh_Latn"
language: "beh" language: "beh"
script: "Latn" script: "Latn"
name: "Baka" name: "Biali"
population: 100000 population: 100000
region: "BJ" region: "BJ"
exemplar_chars { exemplar_chars {

View File

@@ -2,7 +2,7 @@ id: "bgn_Arab"
language: "bgn" language: "bgn"
script: "Arab" script: "Arab"
name: "Western Balochi" name: "Western Balochi"
preferred_name: "Balochi" #preferred_name: "Balochi"
population: 2037382 population: 2037382
region: "AF" region: "AF"
region: "IR" region: "IR"

View File

@@ -1,7 +1,7 @@
id: "bkc_Latn" id: "bkc_Latn"
language: "bkc" language: "bkc"
script: "Latn" script: "Latn"
name: "Baka" name: "Baka (Cameroon/Gabon)"
population: 71000 population: 71000
region: "CM" region: "CM"
region: "GA" region: "GA"

View File

@@ -1,7 +1,7 @@
id: "bsc_Latn_GN" id: "bsc_Latn_GN"
language: "bsc" language: "bsc"
script: "Latn" script: "Latn"
name: "Bassari" name: "Guinean Bassari"
autonym: "oneyan" autonym: "oneyan"
population: 18000 population: 18000
region: "GN" region: "GN"

View File

@@ -1,7 +1,7 @@
id: "bsq_Bass" id: "bsq_Bass"
language: "bsq" language: "bsq"
script: "Bass" script: "Bass"
name: "Bassa" name: "Bassa, Vah"
population: 410000 population: 410000
region: "LR" region: "LR"
region: "SL" region: "SL"

View File

@@ -1,7 +1,7 @@
id: "bsq_Latn" id: "bsq_Latn"
language: "bsq" language: "bsq"
script: "Latn" script: "Latn"
name: "Bassa" name: "Bassa, Latin"
population: 410000 population: 410000
region: "LR" region: "LR"
region: "SL" region: "SL"

View File

@@ -1,7 +1,7 @@
id: "crh_Cyrl" id: "crh_Cyrl"
language: "crh" language: "crh"
script: "Cyrl" script: "Cyrl"
name: "Crimean Turkish" name: "Crimean Turkish, Cyrillic"
autonym: "Къырымтатар" autonym: "Къырымтатар"
population: 245968 population: 245968
region: "UA" region: "UA"

View File

@@ -1,7 +1,7 @@
id: "dnj_Latn_LR" id: "dnj_Latn_LR"
language: "dnj" language: "dnj"
script: "Latn" script: "Latn"
name: "Dan" name: "Liberian Dan"
autonym: "Gio" autonym: "Gio"
population: 1099244 population: 1099244
region: "LR" region: "LR"

View File

@@ -1,7 +1,7 @@
id: "evn_Latn" id: "evn_Latn"
language: "evn" language: "evn"
script: "Latn" script: "Latn"
name: "Evenki" name: "Evenki, Latin"
population: 16000 population: 16000
region: "RU" region: "RU"
region: "CN" region: "CN"

View File

@@ -1,4 +1,4 @@
id: "kr_Arab" id: "kr_Arab"
language: "kr" language: "kr"
script: "Arab" script: "Arab"
name: "Kanuri" name: "Kanuri, Arabic"

View File

@@ -1,11 +0,0 @@
id: "mlt_Latn"
language: "mlt"
script: "Latn"
name: "Maltese"
population: 530000
region: "MT"
exemplar_chars {
base: "a A à À b B ċ Ċ d D e E è È f F ġ Ġ g G h H ħ Ħ i I ì Ì j J k K l L m M n N o O ò Ò p P q Q r R s S t T u U ù Ù v V w W x X ż Ż z Z"
marks: "◌̀ ◌̇"
auxiliary: "c C y Y"
}

View File

@@ -2,7 +2,7 @@
id: "sa_Nand" id: "sa_Nand"
language: "sa" language: "sa"
script: "Nand" script: "Nand"
name: "Sanskrit" name: "Sanskrit, Nandinagari"
autonym: "𑧍𑧞𑧍𑧠𑦮𑧖𑦽𑧆𑧠" autonym: "𑧍𑧞𑧍𑧠𑦮𑧖𑦽𑧆𑧠"
region: "IN" region: "IN"
exemplar_chars { exemplar_chars {

View File

@@ -1,4 +1,5 @@
id: "uma_Latn" id: "uma_Latn"
language: "uma"
script: "Latn" script: "Latn"
name: "Umatilla" name: "Umatilla"
population: 25 population: 25

View File

@@ -1,6 +1,6 @@
id: "wal_Ethi" id: "wal_Ethi"
language: "wal" language: "wal"
script: "Ethi" script: "Ethi"
name: "Wolaytta" name: "Wolaytta, Ethiopic"
population: 1946034 population: 1946034
region: "ET" region: "ET"

View File

@@ -1,7 +1,7 @@
id: "wal_Latn" id: "wal_Latn"
language: "wal" language: "wal"
script: "Latn" script: "Latn"
name: "Wolaytta" name: "Wolaytta, Latin"
population: 7000000 population: 7000000
region: "ET" region: "ET"
exemplar_chars { exemplar_chars {

View File

@@ -1,7 +1,7 @@
id: "xsm_Latn_BF" id: "xsm_Latn_BF"
language: "xsm" language: "xsm"
script: "Latn" script: "Latn"
name: "Kasem" name: "Burkinabè Kasem"
population: 250000 population: 250000
region: "BF" region: "BF"
exemplar_chars { exemplar_chars {

View File

@@ -265,3 +265,18 @@ def test_exemplar_parser():
"l", "l",
"̍", "̍",
} }
def test_language_uniqueness():
names = Counter([])
for lang in LANGUAGES.values():
# We check that names are unique *within a script* since
# when we display them in a menu we segment that menu by
# script and then by language
if lang.preferred_name:
names[lang.script + "/" + lang.preferred_name] += 1
else:
names[lang.name + "/" + lang.preferred_name] += 1
if any(count > 1 for count in names.values()):
duplicates = {name: count for name, count in names.items() if count > 1}
pytest.fail(f"Duplicate language names: {duplicates}")

View File

@@ -7,9 +7,16 @@ from google.protobuf import text_format
languages_dir = os.path.join(DATA_DIR, "languages") languages_dir = os.path.join(DATA_DIR, "languages")
textproto_files = [os.path.basename(x) for x in glob.iglob(os.path.join(languages_dir, "*.textproto"))] textproto_files = [
os.path.basename(x) for x in glob.iglob(os.path.join(languages_dir, "*.textproto"))
]
@pytest.mark.parametrize("lang_code", textproto_files) @pytest.mark.parametrize("lang_code", textproto_files)
def test_parsable(lang_code): def test_parsable(lang_code):
with open(os.path.join(languages_dir, lang_code), "r", encoding="utf-8") as f: with open(os.path.join(languages_dir, lang_code), "r", encoding="utf-8") as f:
text_format.Parse(f.read(), languages_public_pb2.LanguageProto()) msg = text_format.Parse(f.read(), languages_public_pb2.LanguageProto())
assert msg.id
assert msg.language
assert msg.script
assert msg.population is not None