Merge commit '9b4d8d28810488fde8c03ae1f37be1628b6b764a' into lang-v0.6.1
This commit is contained in:
@@ -24,5 +24,7 @@
|
|||||||
# Please keep the list sorted.
|
# Please keep the list sorted.
|
||||||
# (first name; alphabetical order)
|
# (first name; alphabetical order)
|
||||||
|
|
||||||
|
Denis Moyogo Jacquerye <moyogo@gmail.com>
|
||||||
Felipe Correa da Silva Sanches <juca@members.fsf.org>
|
Felipe Correa da Silva Sanches <juca@members.fsf.org>
|
||||||
Marc Foley <m.foley.88@gmail.com>
|
Marc Foley <m.foley.88@gmail.com>
|
||||||
|
Simon Cozens <simon@simon-cozens.org>
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
id: "bdh_Latn"
|
id: "bdh_Latn"
|
||||||
language: "bdh"
|
language: "bdh"
|
||||||
script: "Latn"
|
script: "Latn"
|
||||||
name: "Baka"
|
name: "Baka (DRC/South Sudan)"
|
||||||
|
autonym: "Tara Baká"
|
||||||
population: 60000
|
population: 60000
|
||||||
region: "CD"
|
region: "CD"
|
||||||
region: "SS"
|
region: "SS"
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
id: "beh_Latn"
|
id: "beh_Latn"
|
||||||
language: "beh"
|
language: "beh"
|
||||||
script: "Latn"
|
script: "Latn"
|
||||||
name: "Baka"
|
name: "Biali"
|
||||||
population: 100000
|
population: 100000
|
||||||
region: "BJ"
|
region: "BJ"
|
||||||
exemplar_chars {
|
exemplar_chars {
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ id: "bgn_Arab"
|
|||||||
language: "bgn"
|
language: "bgn"
|
||||||
script: "Arab"
|
script: "Arab"
|
||||||
name: "Western Balochi"
|
name: "Western Balochi"
|
||||||
preferred_name: "Balochi"
|
#preferred_name: "Balochi"
|
||||||
population: 2037382
|
population: 2037382
|
||||||
region: "AF"
|
region: "AF"
|
||||||
region: "IR"
|
region: "IR"
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
id: "bkc_Latn"
|
id: "bkc_Latn"
|
||||||
language: "bkc"
|
language: "bkc"
|
||||||
script: "Latn"
|
script: "Latn"
|
||||||
name: "Baka"
|
name: "Baka (Cameroon/Gabon)"
|
||||||
population: 71000
|
population: 71000
|
||||||
region: "CM"
|
region: "CM"
|
||||||
region: "GA"
|
region: "GA"
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
id: "bsc_Latn_GN"
|
id: "bsc_Latn_GN"
|
||||||
language: "bsc"
|
language: "bsc"
|
||||||
script: "Latn"
|
script: "Latn"
|
||||||
name: "Bassari"
|
name: "Guinean Bassari"
|
||||||
autonym: "oneyan"
|
autonym: "oneyan"
|
||||||
population: 18000
|
population: 18000
|
||||||
region: "GN"
|
region: "GN"
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
id: "bsq_Bass"
|
id: "bsq_Bass"
|
||||||
language: "bsq"
|
language: "bsq"
|
||||||
script: "Bass"
|
script: "Bass"
|
||||||
name: "Bassa"
|
name: "Bassa, Vah"
|
||||||
population: 410000
|
population: 410000
|
||||||
region: "LR"
|
region: "LR"
|
||||||
region: "SL"
|
region: "SL"
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
id: "bsq_Latn"
|
id: "bsq_Latn"
|
||||||
language: "bsq"
|
language: "bsq"
|
||||||
script: "Latn"
|
script: "Latn"
|
||||||
name: "Bassa"
|
name: "Bassa, Latin"
|
||||||
population: 410000
|
population: 410000
|
||||||
region: "LR"
|
region: "LR"
|
||||||
region: "SL"
|
region: "SL"
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
id: "crh_Cyrl"
|
id: "crh_Cyrl"
|
||||||
language: "crh"
|
language: "crh"
|
||||||
script: "Cyrl"
|
script: "Cyrl"
|
||||||
name: "Crimean Turkish"
|
name: "Crimean Turkish, Cyrillic"
|
||||||
autonym: "Къырымтатар"
|
autonym: "Къырымтатар"
|
||||||
population: 245968
|
population: 245968
|
||||||
region: "UA"
|
region: "UA"
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
id: "dnj_Latn_LR"
|
id: "dnj_Latn_LR"
|
||||||
language: "dnj"
|
language: "dnj"
|
||||||
script: "Latn"
|
script: "Latn"
|
||||||
name: "Dan"
|
name: "Liberian Dan"
|
||||||
autonym: "Gio"
|
autonym: "Gio"
|
||||||
population: 1099244
|
population: 1099244
|
||||||
region: "LR"
|
region: "LR"
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
id: "evn_Latn"
|
id: "evn_Latn"
|
||||||
language: "evn"
|
language: "evn"
|
||||||
script: "Latn"
|
script: "Latn"
|
||||||
name: "Evenki"
|
name: "Evenki, Latin"
|
||||||
population: 16000
|
population: 16000
|
||||||
region: "RU"
|
region: "RU"
|
||||||
region: "CN"
|
region: "CN"
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
id: "kr_Arab"
|
id: "kr_Arab"
|
||||||
language: "kr"
|
language: "kr"
|
||||||
script: "Arab"
|
script: "Arab"
|
||||||
name: "Kanuri"
|
name: "Kanuri, Arabic"
|
||||||
|
|||||||
@@ -1,11 +0,0 @@
|
|||||||
id: "mlt_Latn"
|
|
||||||
language: "mlt"
|
|
||||||
script: "Latn"
|
|
||||||
name: "Maltese"
|
|
||||||
population: 530000
|
|
||||||
region: "MT"
|
|
||||||
exemplar_chars {
|
|
||||||
base: "a A à À b B ċ Ċ d D e E è È f F ġ Ġ g G h H ħ Ħ i I ì Ì j J k K l L m M n N o O ò Ò p P q Q r R s S t T u U ù Ù v V w W x X ż Ż z Z"
|
|
||||||
marks: "◌̀ ◌̇"
|
|
||||||
auxiliary: "c C y Y"
|
|
||||||
}
|
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
id: "sa_Nand"
|
id: "sa_Nand"
|
||||||
language: "sa"
|
language: "sa"
|
||||||
script: "Nand"
|
script: "Nand"
|
||||||
name: "Sanskrit"
|
name: "Sanskrit, Nandinagari"
|
||||||
autonym: "𑧍𑧞𑧍𑧠𑦮𑧖𑦽𑧆𑧠"
|
autonym: "𑧍𑧞𑧍𑧠𑦮𑧖𑦽𑧆𑧠"
|
||||||
region: "IN"
|
region: "IN"
|
||||||
exemplar_chars {
|
exemplar_chars {
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
id: "uma_Latn"
|
id: "uma_Latn"
|
||||||
|
language: "uma"
|
||||||
script: "Latn"
|
script: "Latn"
|
||||||
name: "Umatilla"
|
name: "Umatilla"
|
||||||
population: 25
|
population: 25
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
id: "wal_Ethi"
|
id: "wal_Ethi"
|
||||||
language: "wal"
|
language: "wal"
|
||||||
script: "Ethi"
|
script: "Ethi"
|
||||||
name: "Wolaytta"
|
name: "Wolaytta, Ethiopic"
|
||||||
population: 1946034
|
population: 1946034
|
||||||
region: "ET"
|
region: "ET"
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
id: "wal_Latn"
|
id: "wal_Latn"
|
||||||
language: "wal"
|
language: "wal"
|
||||||
script: "Latn"
|
script: "Latn"
|
||||||
name: "Wolaytta"
|
name: "Wolaytta, Latin"
|
||||||
population: 7000000
|
population: 7000000
|
||||||
region: "ET"
|
region: "ET"
|
||||||
exemplar_chars {
|
exemplar_chars {
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
id: "xsm_Latn_BF"
|
id: "xsm_Latn_BF"
|
||||||
language: "xsm"
|
language: "xsm"
|
||||||
script: "Latn"
|
script: "Latn"
|
||||||
name: "Kasem"
|
name: "Burkinabè Kasem"
|
||||||
population: 250000
|
population: 250000
|
||||||
region: "BF"
|
region: "BF"
|
||||||
exemplar_chars {
|
exemplar_chars {
|
||||||
|
|||||||
@@ -265,3 +265,18 @@ def test_exemplar_parser():
|
|||||||
"l",
|
"l",
|
||||||
"̍",
|
"̍",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_language_uniqueness():
|
||||||
|
names = Counter([])
|
||||||
|
for lang in LANGUAGES.values():
|
||||||
|
# We check that names are unique *within a script* since
|
||||||
|
# when we display them in a menu we segment that menu by
|
||||||
|
# script and then by language
|
||||||
|
if lang.preferred_name:
|
||||||
|
names[lang.script + "/" + lang.preferred_name] += 1
|
||||||
|
else:
|
||||||
|
names[lang.name + "/" + lang.preferred_name] += 1
|
||||||
|
if any(count > 1 for count in names.values()):
|
||||||
|
duplicates = {name: count for name, count in names.items() if count > 1}
|
||||||
|
pytest.fail(f"Duplicate language names: {duplicates}")
|
||||||
|
|||||||
@@ -7,9 +7,16 @@ from google.protobuf import text_format
|
|||||||
|
|
||||||
|
|
||||||
languages_dir = os.path.join(DATA_DIR, "languages")
|
languages_dir = os.path.join(DATA_DIR, "languages")
|
||||||
textproto_files = [os.path.basename(x) for x in glob.iglob(os.path.join(languages_dir, "*.textproto"))]
|
textproto_files = [
|
||||||
|
os.path.basename(x) for x in glob.iglob(os.path.join(languages_dir, "*.textproto"))
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("lang_code", textproto_files)
|
@pytest.mark.parametrize("lang_code", textproto_files)
|
||||||
def test_parsable(lang_code):
|
def test_parsable(lang_code):
|
||||||
with open(os.path.join(languages_dir, lang_code), "r", encoding="utf-8") as f:
|
with open(os.path.join(languages_dir, lang_code), "r", encoding="utf-8") as f:
|
||||||
text_format.Parse(f.read(), languages_public_pb2.LanguageProto())
|
msg = text_format.Parse(f.read(), languages_public_pb2.LanguageProto())
|
||||||
|
assert msg.id
|
||||||
|
assert msg.language
|
||||||
|
assert msg.script
|
||||||
|
assert msg.population is not None
|
||||||
|
|||||||
Reference in New Issue
Block a user