1
0
mirror of synced 2025-12-19 09:37:52 -05:00

Merge commit '03b1e914483de5d5faced153c8f0ce20a8b6fe9d' into lang-v0.7.6

This commit is contained in:
Simon Cozens
2025-07-18 13:02:22 +01:00
13 changed files with 81 additions and 24 deletions

View File

@@ -80,3 +80,28 @@ jobs:
with:
# repository-url: https://test.pypi.org/legacy/ # for testing purposes
verify-metadata: false # twine previously didn't verify metadata when uploading
rust:
name: Publish Rust 🦀 distribution 📦 to crates.io
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 0
- name: Set up Rust
uses: actions-rust-lang/setup-rust-toolchain@v1
- name: Check tag name matches Cargo.toml version
run: |-
TAG_NAME=${GITHUB_REF/refs\/tags\/v/}
CARGO_VERSION=$(cargo pkgid | sed 's/.*@//')
if [ "$TAG_NAME" != "$CARGO_VERSION" ]; then
echo "Tag name ($TAG_NAME) does not match Cargo.toml version ($CARGO_VERSION)"
exit 1
fi
- name: Publish Rust distribution 📦 to crates.io
run: cargo publish --token "${{ secrets.CRATES_IO_API_TOKEN }}"

View File

@@ -1,6 +1,6 @@
[package]
name = "google-fonts-languages"
version = "0.7.4"
version = "0.7.6"
edition = "2021"
description = "Google Fonts script and language support data"
repository = "https://github.com/googlefonts/lang"

View File

@@ -10,17 +10,21 @@ region: "ET"
exemplar_chars {
base: "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z"
}
# Originally we had seehadaytiinô, matching the translation at
# http://alma.matrix.msu.edu/wp-content/uploads/2022/06/UDHR-Afar-Somalia.pdf
# but I can't find any reference for there being diacritics in this alphabet, so we
# go with the exemplars
sample_text {
masthead_full: "KkAa"
masthead_partial: "Rr"
styles: "Seehada le karaamat kee garway inki gide"
tester: "Seehadayti garwah iggimaa kee bura aliinoh abtoy seehadaytiinô"
tester: "Seehadayti garwah iggimaa kee bura aliinoh abtoy seehadaytiino"
poster_sm: "Seehadayti"
poster_md: "Ummattah"
poster_lg: "Karaamat"
specimen_48: "Galli-Maroh ayyuntitte qusba addah kukti addal tan seehadayti"
specimen_36: "Kulli num ayyunti caddoo kee baad caddola kuktih addal tan garwah addal tanim dudda luk geyaamih gar le."
specimen_32: "Seehada le karaamat kee garway inki gide takke edde yaaqitoonu duude waana yaaxigeenim baadal curriyataay, qadlii kee wagari rakiiboh tanim kinnim cedak,"
specimen_21: "Seehada le karaamat kee garway inki gide takke edde yaaqitoonu duude waana yaaxigeenim baadal curriyataay, qadlii kee wagari rakiiboh tanim kinnim cedak,\nSeehadayti garwah iggimaa kee bura aliinoh abtoy seehadaytiinô kas cerissat sinam bahtem kee seehada yab kee diini curriyat elle leeh, gilic kee assommik elle tesseqqeh xintu dudda baadih geyto seehadaytuk inkih naba fayxi kinnim cedak,"
specimen_16: "Seehada le karaamat kee garway inki gide takke edde yaaqitoonu duude waana yaaxigeenim baadal curriyataay, qadlii kee wagari rakiiboh tanim kinnim cedak,\nSeehadayti garwah iggimaa kee bura aliinoh abtoy seehadaytiinô kas cerissat sinam bahtem kee seehada yab kee diini curriyat elle leeh, gilic kee assommik elle tesseqqeh xintu dudda baadih geyto seehadaytuk inkih naba fayxi kinnim cedak,\nSeehadayti dulmi kee xukkot koosituh ellecaboh eleytoh qeebi haysite wayuh seehadayti garwa garti xintoh dacayri geytam saqlah tanim cedak,\nUmmattah fanal qalliini dadal ilsiisaanam saqlah tanim kinnim cedak,"
specimen_21: "Seehada le karaamat kee garway inki gide takke edde yaaqitoonu duude waana yaaxigeenim baadal curriyataay, qadlii kee wagari rakiiboh tanim kinnim cedak,\nSeehadayti garwah iggimaa kee bura aliinoh abtoy seehadaytiino kas cerissat sinam bahtem kee seehada yab kee diini curriyat elle leeh, gilic kee assommik elle tesseqqeh xintu dudda baadih geyto seehadaytuk inkih naba fayxi kinnim cedak,"
specimen_16: "Seehada le karaamat kee garway inki gide takke edde yaaqitoonu duude waana yaaxigeenim baadal curriyataay, qadlii kee wagari rakiiboh tanim kinnim cedak,\nSeehadayti garwah iggimaa kee bura aliinoh abtoy seehadaytiino kas cerissat sinam bahtem kee seehada yab kee diini curriyat elle leeh, gilic kee assommik elle tesseqqeh xintu dudda baadih geyto seehadaytuk inkih naba fayxi kinnim cedak,\nSeehadayti dulmi kee xukkot koosituh ellecaboh eleytoh qeebi haysite wayuh seehadayti garwa garti xintoh dacayri geytam saqlah tanim cedak,\nUmmattah fanal qalliini dadal ilsiisaanam saqlah tanim kinnim cedak,"
}

View File

@@ -2,3 +2,9 @@ id: "ber_Latn"
language: "ber"
script: "Latn"
name: "Berber (Latin)"
exemplar_chars {
base: "a A b B ḇ Ḇ {bʷ} {Bʷ} {bh} {BH} c C {c̣} {C̣} č Č d D ḍ Ḍ ḏ Ḏ e E f F g G ǧ Ǧ ḡ Ḡ {gʷ} {Gʷ} h H ḥ Ḥ i I j J {j̣} {J̣} k K ḵ Ḵ {kʷ} {Kʷ} l L ḷ Ḷ m M {mʷ} {Mʷ} n N ɲ Ɲ ŋ Ŋ p P {pʷ} {Pʷ} q Q {qʷ} {Qʷ} r R ṛ Ṛ s S ṣ Ṣ š Š {ṣ̌} {Ṣ̌} t T ț Ț ṭ Ṭ ṯ Ṯ u U w W x X {x̣} {X̣} {xʷ} {Xʷ} y Y z Z ẓ Ẓ ž Ž ɛ Ɛ ɣ Ɣ {ɣʷ} {Ɣʷ} {z̦} {Z̦}"
auxiliary: "ə Ə ţ Ţ ʿ ʾ ġ Ġ {z̧} {Z̧}"
marks: "◌̄ ◌̌ ◌̣ ◌̦ ◌̧ ◌̱"
}
source: "Samir El-Arifi, Tamazight de lAtlas Blidéen, Office des publications universitaires, 2020"

View File

@@ -27,5 +27,6 @@ sample_text {
source: "Département de Mayotte, Alphabets des langues mahoraises, 2020"
source: "Département de Mayotte, Recueil des actes administratifs, 20 avril 2020"
source: "N. J. Gueunier, Dictionnaire du dialecte malgache de Mayotte (Comores), Moroni: KomEDIT, 2016"
source: "N. J. Gueunier & Madjidhoubi Said, Contes comoriens en dialecte malgache de lîle de Mayotte, vol. 1-3, Peeters, vol. 4, Karthala, 1990-2011"
source: "R. Janet, Dictionnaire Kibushi, Webonary.org, 2020"
note: "Gueunier 2016 proposes and uses ƀ (with a middle stroke), đ (with a middle stroke, or ꟈ) and uses ĵ, n̂, n̈, ô, ŝ, ẑ as linguists from the Malagasy department of the Université dAntananarivo. Gueunier 2016 also uses tilde for nasalisation of vowels. In 2020, the Département de Mayotte adopted a different alphabet consistent with the Shimaore alphabet, using ɓ, ɗ, ŋ (or gn), several digraphs, tilde accent for nasalisation of vowels, and with o, u instead of ô, u. The grave accent is typically used for indicating stress when irregular and necessary for disambiguation, like in Malagasy, but its not clear if it is combined with other diacritics in practice."
note: "Gueunier 2016 and earlier work propose and use ƀ (with a middle stroke), đ (with a middle stroke, or ꟈ) and uses ĵ, n̂, n̈, ô, ŝ, ẑ as linguists from the Malagasy department of the Université dAntananarivo. Gueunier 2016 also uses tilde for nasalisation of vowels but . In 2020, the Département de Mayotte adopted a different alphabet consistent with the Shimaore alphabet, using ɓ, ɗ, ŋ (or gn), several digraphs, tilde accent for nasalisation of vowels, and with o, u instead of ô, u. The grave accent is typically used for indicating stress when irregular and necessary for disambiguation, like in Malagasy, but its not clear if it is combined with other diacritics in practice."

View File

@@ -8,6 +8,7 @@ region: "SC"
exemplar_chars {
base: "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z"
}
# We originally had kiín, but this is not supported by the orthography; see eric-muller/udhr#139
sample_text {
masthead_full: "NnOo"
masthead_partial: "Uu"
@@ -18,7 +19,7 @@ sample_text {
poster_lg: "Nou"
specimen_48: "Sakenn-de-nou ganny drwa zwir tou nou bann drwa ek nou bann liberte"
specimen_36: "Personn pa devret ganny arete, obyen ganny met an detansyon, obyen ganny anvoye an ekzile an bonnavini."
specimen_32: "Tou dimoun merit en lord sosyal e enternasyonal kot nou bann drwa ek nou bann liberte, dapre sa kiín ekri dan sa Deklarasyon, a vin en realite konplet."
specimen_32: "Tou dimoun merit en lord sosyal e enternasyonal kot nou bann drwa ek nou bann liberte, dapre sa kiin ekri dan sa Deklarasyon, a vin en realite konplet."
specimen_21: "Napa narnyen dan sa Deklarasyon ki devret ganny enterprete komkwa en leta, an group obyen en dimoun annan en drwa kelkonk pou fer nenport ki aktivite obyen aksyon ki kapab detri bann drwa ek bann liberte kin ganny ekri ladan.\nAn rekonnesans ki dignite imen e bann drwa egal e enseparab pou tou manm lafanmiyr imen I fondasyon pou laliberte, lazistis e lape dan lemonn,"
specimen_16: "An rekonnesans ki dignite imen e bann drwa egal e enseparab pou tou manm lafanmiyr imen I fondasyon pou laliberte, lazistis e lape dan lemonn,\nAn konsiderasyon ki mank konnesans e respe pou bann drwa imen in okazyonn en bann aksyon barbar kiín boulvers konsyans limanite, e osi larive en monn kot imen a kapab eksprim zot lekor dan laliberte e dapre zot konsyans san okenn lafreyer obyen mank mwayen, parey in ganny proklanmen konman laspirasyon pli o pou tou imen,\nAn konsiderasyon ki I esansyel ki bann drwa imen I ganny proteze par en sistenm lalwa kot dimoun pa oblize, konman en dernyen rekour, pou revolte kont la tirani e lopresyon,"
specimen_16: "An rekonnesans ki dignite imen e bann drwa egal e enseparab pou tou manm lafanmiyr imen I fondasyon pou laliberte, lazistis e lape dan lemonn,\nAn konsiderasyon ki mank konnesans e respe pou bann drwa imen in okazyonn en bann aksyon barbar kiin boulvers konsyans limanite, e osi larive en monn kot imen a kapab eksprim zot lekor dan laliberte e dapre zot konsyans san okenn lafreyer obyen mank mwayen, parey in ganny proklanmen konman laspirasyon pli o pou tou imen,\nAn konsiderasyon ki I esansyel ki bann drwa imen I ganny proteze par en sistenm lalwa kot dimoun pa oblize, konman en dernyen rekour, pou revolte kont la tirani e lopresyon,"
}

View File

@@ -5,7 +5,7 @@ name: "Kimbundu"
population: 8130575
region: "AO"
exemplar_chars {
base: "A B D E F H I J K L M N O P S T U V X Z a b d e f h i j k l m n o p s t u v x z"
base: "A B D E F H I J K L M N O P S T U V X Z a b d e é ê f h i j k l m n o ô p s t u v x z"
}
sample_text {
masthead_full: "OoAa"

View File

@@ -5,7 +5,7 @@ name: "Ao Naga"
population: 305000
region: "IN"
exemplar_chars {
base: "A B C E I J K L M N O P R S T U Y Z a b c e i j k l m n o p r s t u y z"
base: "A B C E I J K L M N O P R S T U Y Z a b c e i j k l m n o p r s t u ü y z"
}
sample_text {
masthead_full: "MmEe"

View File

@@ -10,7 +10,7 @@ region: "NL"
region: "PT"
population: 541000
exemplar_chars {
base: "a A b B {ch} {CH} d D {dj} {DJ} e E f F g G {gu} {GU} i I j J k K l L {li} {LI} m M n N {nh} {NH} {n} {N} o O p P r R s S t T {tch} {TCH} u U v V x X z Z"
base: "a A b B {ch} {CH} d D {dj} {DJ} e E f F g G {gu} {GU} i I j J k K l L {li} {LI} m M n N {nh} {NH} {n} {N} ñ o O p P r R s S t T {tch} {TCH} u U v V x X z Z"
auxiliary: "h H q Q w W y Y"
}
sample_text {

View File

@@ -6,20 +6,25 @@ autonym: "Reo Tahiti"
population: 91487
region: "PF"
exemplar_chars {
base: "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z Ā Ē Ī Ō Ū a b c d e f g h i j k l m n o p q r s t u v w x y z ā ē ī ō ū"
marks: "◌̄"
base: "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z Ā Ē Ī Ō Ū Ï a b c d e f g h i j k l m n o p q r s t u v w x y z ā ē ī ō ū ï"
marks: "◌̄ ◌̈"
numerals: "- , . % + 0 1 2 3 4 5 6 7 8 9"
punctuation: "- — , ; : ! ? . … \' \" “ ” « » ( ) [ ] @ * / & # "
}
sample_text {
masthead_full: "EeFf"
masthead_partial: "Aa"
styles: "I te fa\'ari\'i i te fa\'ati\'amā o te ihomana tumu"
tester: "I te fa\'ari\'i e no te ha\'apa\'o\'ore, e no te fa\'afaufa\'a\'ore i te mau ti\'amanara\'a"
poster_sm: "I te fa\'afaufa\'a"
poster_md: "I te fa\'ari\'i,"
styles: "I te faarii i te faatiamā o te ihomana tumu"
tester: "I te faarii e no te haapaoore, e no te faafaufaaore i te mau tiamanaraa"
poster_sm: "I te faafaufaa"
poster_md: "I te faarii,"
poster_lg: "fanauhia"
specimen_48: "I te fa\'ari\'i i te \'opuara\'a\'amui a te mau Hau Mero e a te Fa\'anahora\'a"
specimen_36: "I te fa\'ari\'i i te fa\'ati\'amā o te ihomana tumu o te mau mero tā\'āto\'a o te nuna\'a ta\'ata e i to ratou ato\'a ti\'amanara\'a"
specimen_32: "E fanauhia te tā\'āto\'ara\'a o te ta\'ata-tupu ma te ti\'amā e te ti\'amanara\'a \'aifaito. Ua \'ī te mana\'o pa\'ari e i te manava e ma te \'a\'au taea\'e \'oia ta ratou ha\'a i rotopū ia ratou iho, e ti\'a ai;"
specimen_21: "I te fa\'ari\'i i te fa\'ati\'amā o te ihomana tumu o te mau mero tā\'āto\'a o te nuna\'a ta\'ata e i to ratou ato\'a ti\'amanara\'a \'aifaito e te tūtu\'u-mana-\'ore \'ei papa no te orara\'ati\'amā, no te orara\'a-\'aifaro e no te hau i roto i teie nei ao, e ti\'a ai,\nI te fa\'afaufa\'a i te paruru i te ti\'amanara\'a o te ta\'ata ma te hō\'ē fa\'aterera\'a-ture, ia \'ore \'oia te ta\'ata ia hepohia, i te hope\'a, i te hepo-\'orure-hau i te fa\'atītīra\'a e te fa\'afaora\'a a te taehae, e tia ai,"
specimen_16: "I te fa\'ari\'i i te fa\'ati\'amā o te ihomana tumu o te mau mero tā\'āto\'a o te nuna\'a ta\'ata e i to ratou ato\'a ti\'amanara\'a \'aifaito e te tūtu\'u-mana-\'ore \'ei papa no te orara\'ati\'amā, no te orara\'a-\'aifaro e no te hau i roto i teie nei ao, e ti\'a ai,\nI te fa\'ari\'i e no te ha\'apa\'o\'ore, e no te fa\'afaufa\'a\'ore i te mau ti\'amanara\'a o te ta\'ata, i tupu ai te mau peu \'oviri i fa\'ati\'arepu i te manava o te tā\'āto\'ara\'a o te ao; i te fa\'ari\'i i te ti\'aoroara\'ahia \'ei a\'e nui no te ta\'ata te hitira\'a mai o te hō\'ē ao iti i reira te mau ta\'ata, i ora mai mai roto mai i te taiā e te veve, e \'aparaparau noa ai ma te ti\'aturi fa\'ahou, e ti\'a ai,"
specimen_48: "I te faarii i te opuaraaamui a te mau Hau Mero e a te Faanahoraa"
specimen_36: "I te faarii i te faatiamā o te ihomana tumu o te mau mero tāātoa o te nunaa taata e i to ratou atoa tiamanaraa"
specimen_32: "E fanauhia te tāātoaraa o te taata-tupu ma te tiamā e te tiamanaraa aifaito. Ua ī te manao paari e i te manava e ma te aau taeae oia ta ratou haa i rotopū ia ratou iho, e tia ai;"
specimen_21: "I te faarii i te faatiamā o te ihomana tumu o te mau mero tāātoa o te nunaa taata e i to ratou atoa tiamanaraa aifaito e te tūtuu-mana-ore ei papa no te oraraatiamā, no te oraraa-aifaro e no te hau i roto i teie nei ao, e tia ai,\nI te faafaufaa i te paruru i te tiamanaraa o te taata ma te hōē faatereraa-ture, ia ore oia te taata ia hepohia, i te hopea, i te hepo-orure-hau i te faatītīraa e te faafaoraa a te taehae, e tia ai,"
specimen_16: "I te faarii i te faatiamā o te ihomana tumu o te mau mero tāātoa o te nunaa taata e i to ratou atoa tiamanaraa aifaito e te tūtuu-mana-ore ei papa no te oraraatiamā, no te oraraa-aifaro e no te hau i roto i teie nei ao, e tia ai,\nI te faarii e no te haapaoore, e no te faafaufaaore i te mau tiamanaraa o te taata, i tupu ai te mau peu oviri i faatiarepu i te manava o te tāātoaraa o te ao; i te faarii i te tiaoroaraahia ei ae nui no te taata te hitiraa mai o te hōē ao iti i reira te mau taata, i ora mai mai roto mai i te taiā e te veve, e aparaparau noa ai ma te tiaturi faahou, e tia ai,"
}
source: "Graphie et graphies de la langue tahitienne, Fare Vāna'a, 2003-2007, https://web.archive.org/web/20070310220723/http://www.farevanaa.pf/theme_detail.php?id=5"
source: "Mahei Adams et al., Reo tahiti: Piha haapiiraa, ME-DGEE, 2020, https://www.ebooks.education.pf/wp-content/ebooks/outils/Manuel-REO-TAHITI-6e/94/"
note: "The glottal stop is written with the apostrophe, which can be ' U+0027, U+2019, U+2018 or even ʼ U+02BC, ʻ U+02BB in some cases."

View File

@@ -0,0 +1,15 @@
id: "zen_Latn"
language: "zen"
script: "Latn"
name: "Zenaga (Latin)"
autonym: "tuẓẓungiyyä"
population: 2700
region: "MR"
region: "SN"
exemplar_chars {
base: "a A ā Ā ä Ä e E ə Ə i I ī Ī u U ū Ū ˀ ʔ b B ḅ Ḅ p P d D đ Đ {dʸ} {Dʸ} ḍ Ḍ {đ̣} {Đ̣} f F ḟ Ḟ ṿ Ṿ g G ġ Ġ h H ḥ Ḥ k K ḫ Ḫ l L ḷ Ḷ m M ṃ Ṃ n N {nʸ} {Nʸ} q Q r R ṛ Ṛ s S š Š ṣ Ṣ t T {tʸ} {Tʸ} ṭ Ṭ w W ʷ y Y ʸ z Z {z̄} {Z̄} ž Ž {ẕ̌} {Ẕ̌} ẓ Ẓ {ẓ̄} {Ẓ̄} ˁ ʕ"
auxiliary: "Ɣ ɣ ˤ"
marks: "◌̄ ◌̌ ◌̇ ◌̈ ◌̣ ◌̱ ◌̮"
}
source: "Catherine Taine-Cheikh, Dictionnaire françaiszénaga (berbère de Mauritanie) : avec renvois au classement par racines du Dictionnaire zénagafrançais, Köln: Rüdiger Köppe Verlag, 2010"
note: "The transcription systems used for Zenaga vary to some degree and there is no practical nor standardized orthography."

View File

@@ -1,6 +1,6 @@
id: "zen_Tfng"
language: "zen"
script: "Tfng"
name: "Zenaga"
name: "Zenaga (Tifinagh)"
population: 0
historical: true

View File

@@ -11,7 +11,6 @@ region: "SZ"
region: "ZA"
exemplar_chars {
base: "A B {BH} C {CH} D {DL} {DY} E F G {GC} {GQ} {GX} H {HH} {HL} I J K {KH} {KL} {KP} L M N {NC} {NGC} {NGQ} {NGX} {NHL} {NK} {NKC} {NKQ} {NKX} {NQ} {NTSH} {NX} {NY} O P {PH} Q {QH} R {RH} S {SH} T {TH} {TL} {TS} {TSH} U V W X {XH} Y Z a b {bh} c {ch} d {dl} {dy} e f g {gc} {gq} {gx} h {hh} {hl} i j k {kh} {kl} {kp} l m n {nc} {ngc} {ngq} {ngx} {nhl} {nk} {nkc} {nkq} {nkx} {nq} {ntsh} {nx} {ny} o p {ph} q {qh} r {rh} s {sh} t {th} {tl} {ts} {tsh} u v w x {xh} y z"
marks: "◌́ ◌̀ ◌̆ ◌̂ ◌̊ ◌̈ ◌̃ ◌̄ ◌̧"
numerals: "- , . % + 0 1 2 3 4 5 6 7 8 9"
punctuation: "- , ; : ! ? . ( ) [ ] { }"
index: "A B C D E F G H I J K L M N O P Q R S T U V W X Y Z"
@@ -31,4 +30,5 @@ sample_text {
specimen_16: "Ngokunjalo ukwamukelwa ngokuzuzwa kwesithunzi samalungelo alinganayo najwayelekile awowonke amalunga omndeni wesintu kuyisisekelo senkululeko, sobulungiswa noxolo emhlabeni,\nNgokunjalo ukunganakwa nokwedelelwa kwamalungelo esintu kube nomphumela wezenzo zobuqaba eseziqede unembeza kubantu, kanye nomhlaba obhekwe isintu ukuba senamele inkululeko yokukhuluma nokukholwa, inkululeko yokungesabi futhi nokufuna kugunyazwe ugqozi kubobonke abantu,\nNgokunjalo kudingekile, uma umuntu engenakuphoqwa ukulandela umgudu othize, njengendlela yokugcina, avukele ababusi nabacindezeli, ukuze amalungelo esintu avikelwe ngokubusa ngomthetho,"
}
source: "Clement M. Doke, B. W. Vilakazi, Zulu-English dictionary, Johannesburg: Witwatersand University Press, 1972 (1st ed. 1958)"
note: "Old orthography used Ɓ ɓ. Marks like ◌́ for tone are only used in dictionaries."
source: "C.M. Doke, Benedict Wallet Vilakazi, D. M. Malcolm, Mzilikazi Khumalo, English-isiZulu / isiZulu-English Dictionary, 2014 (4th ed.)"
note: "Old orthography used Ɓ ɓ. Tone marking (high, low, falling) with ◌́ ◌̀ ◌̂ is only used in a few dictionaries and doesnt seem to be used in teaching material."