Merge commit '9b4d8d28810488fde8c03ae1f37be1628b6b764a' into lang-v0.6.1
This commit is contained in:
@@ -24,5 +24,7 @@
|
||||
# Please keep the list sorted.
|
||||
# (first name; alphabetical order)
|
||||
|
||||
Denis Moyogo Jacquerye <moyogo@gmail.com>
|
||||
Felipe Correa da Silva Sanches <juca@members.fsf.org>
|
||||
Marc Foley <m.foley.88@gmail.com>
|
||||
Simon Cozens <simon@simon-cozens.org>
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
id: "bdh_Latn"
|
||||
language: "bdh"
|
||||
script: "Latn"
|
||||
name: "Baka"
|
||||
name: "Baka (DRC/South Sudan)"
|
||||
autonym: "Tara Baká"
|
||||
population: 60000
|
||||
region: "CD"
|
||||
region: "SS"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
id: "beh_Latn"
|
||||
language: "beh"
|
||||
script: "Latn"
|
||||
name: "Baka"
|
||||
name: "Biali"
|
||||
population: 100000
|
||||
region: "BJ"
|
||||
exemplar_chars {
|
||||
|
||||
@@ -2,7 +2,7 @@ id: "bgn_Arab"
|
||||
language: "bgn"
|
||||
script: "Arab"
|
||||
name: "Western Balochi"
|
||||
preferred_name: "Balochi"
|
||||
#preferred_name: "Balochi"
|
||||
population: 2037382
|
||||
region: "AF"
|
||||
region: "IR"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
id: "bkc_Latn"
|
||||
language: "bkc"
|
||||
script: "Latn"
|
||||
name: "Baka"
|
||||
name: "Baka (Cameroon/Gabon)"
|
||||
population: 71000
|
||||
region: "CM"
|
||||
region: "GA"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
id: "bsc_Latn_GN"
|
||||
language: "bsc"
|
||||
script: "Latn"
|
||||
name: "Bassari"
|
||||
name: "Guinean Bassari"
|
||||
autonym: "oneyan"
|
||||
population: 18000
|
||||
region: "GN"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
id: "bsq_Bass"
|
||||
language: "bsq"
|
||||
script: "Bass"
|
||||
name: "Bassa"
|
||||
name: "Bassa, Vah"
|
||||
population: 410000
|
||||
region: "LR"
|
||||
region: "SL"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
id: "bsq_Latn"
|
||||
language: "bsq"
|
||||
script: "Latn"
|
||||
name: "Bassa"
|
||||
name: "Bassa, Latin"
|
||||
population: 410000
|
||||
region: "LR"
|
||||
region: "SL"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
id: "crh_Cyrl"
|
||||
language: "crh"
|
||||
script: "Cyrl"
|
||||
name: "Crimean Turkish"
|
||||
name: "Crimean Turkish, Cyrillic"
|
||||
autonym: "Къырымтатар"
|
||||
population: 245968
|
||||
region: "UA"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
id: "dnj_Latn_LR"
|
||||
language: "dnj"
|
||||
script: "Latn"
|
||||
name: "Dan"
|
||||
name: "Liberian Dan"
|
||||
autonym: "Gio"
|
||||
population: 1099244
|
||||
region: "LR"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
id: "evn_Latn"
|
||||
language: "evn"
|
||||
script: "Latn"
|
||||
name: "Evenki"
|
||||
name: "Evenki, Latin"
|
||||
population: 16000
|
||||
region: "RU"
|
||||
region: "CN"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
id: "kr_Arab"
|
||||
language: "kr"
|
||||
script: "Arab"
|
||||
name: "Kanuri"
|
||||
name: "Kanuri, Arabic"
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
id: "mlt_Latn"
|
||||
language: "mlt"
|
||||
script: "Latn"
|
||||
name: "Maltese"
|
||||
population: 530000
|
||||
region: "MT"
|
||||
exemplar_chars {
|
||||
base: "a A à À b B ċ Ċ d D e E è È f F ġ Ġ g G h H ħ Ħ i I ì Ì j J k K l L m M n N o O ò Ò p P q Q r R s S t T u U ù Ù v V w W x X ż Ż z Z"
|
||||
marks: "◌̀ ◌̇"
|
||||
auxiliary: "c C y Y"
|
||||
}
|
||||
@@ -2,7 +2,7 @@
|
||||
id: "sa_Nand"
|
||||
language: "sa"
|
||||
script: "Nand"
|
||||
name: "Sanskrit"
|
||||
name: "Sanskrit, Nandinagari"
|
||||
autonym: "𑧍𑧞𑧍𑧠𑦮𑧖𑦽𑧆𑧠"
|
||||
region: "IN"
|
||||
exemplar_chars {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
id: "uma_Latn"
|
||||
language: "uma"
|
||||
script: "Latn"
|
||||
name: "Umatilla"
|
||||
population: 25
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
id: "wal_Ethi"
|
||||
language: "wal"
|
||||
script: "Ethi"
|
||||
name: "Wolaytta"
|
||||
name: "Wolaytta, Ethiopic"
|
||||
population: 1946034
|
||||
region: "ET"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
id: "wal_Latn"
|
||||
language: "wal"
|
||||
script: "Latn"
|
||||
name: "Wolaytta"
|
||||
name: "Wolaytta, Latin"
|
||||
population: 7000000
|
||||
region: "ET"
|
||||
exemplar_chars {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
id: "xsm_Latn_BF"
|
||||
language: "xsm"
|
||||
script: "Latn"
|
||||
name: "Kasem"
|
||||
name: "Burkinabè Kasem"
|
||||
population: 250000
|
||||
region: "BF"
|
||||
exemplar_chars {
|
||||
|
||||
@@ -265,3 +265,18 @@ def test_exemplar_parser():
|
||||
"l",
|
||||
"̍",
|
||||
}
|
||||
|
||||
|
||||
def test_language_uniqueness():
|
||||
names = Counter([])
|
||||
for lang in LANGUAGES.values():
|
||||
# We check that names are unique *within a script* since
|
||||
# when we display them in a menu we segment that menu by
|
||||
# script and then by language
|
||||
if lang.preferred_name:
|
||||
names[lang.script + "/" + lang.preferred_name] += 1
|
||||
else:
|
||||
names[lang.name + "/" + lang.preferred_name] += 1
|
||||
if any(count > 1 for count in names.values()):
|
||||
duplicates = {name: count for name, count in names.items() if count > 1}
|
||||
pytest.fail(f"Duplicate language names: {duplicates}")
|
||||
|
||||
@@ -7,9 +7,16 @@ from google.protobuf import text_format
|
||||
|
||||
|
||||
languages_dir = os.path.join(DATA_DIR, "languages")
|
||||
textproto_files = [os.path.basename(x) for x in glob.iglob(os.path.join(languages_dir, "*.textproto"))]
|
||||
textproto_files = [
|
||||
os.path.basename(x) for x in glob.iglob(os.path.join(languages_dir, "*.textproto"))
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lang_code", textproto_files)
|
||||
def test_parsable(lang_code):
|
||||
with open(os.path.join(languages_dir, lang_code), "r", encoding="utf-8") as f:
|
||||
text_format.Parse(f.read(), languages_public_pb2.LanguageProto())
|
||||
msg = text_format.Parse(f.read(), languages_public_pb2.LanguageProto())
|
||||
assert msg.id
|
||||
assert msg.language
|
||||
assert msg.script
|
||||
assert msg.population is not None
|
||||
|
||||
Reference in New Issue
Block a user