Merge commit '7d71975ee699566498f4e4aa7f99e0d4794bfbcb' into lang-v0.7.1
This commit is contained in:
26
lang/Cargo.toml
Normal file
26
lang/Cargo.toml
Normal file
@@ -0,0 +1,26 @@
|
||||
[package]
|
||||
name = "google-fonts-languages"
|
||||
version = "0.7.1"
|
||||
edition = "2021"
|
||||
description = "Google Fonts script and language support data"
|
||||
repository = "https://github.com/googlefonts/lang"
|
||||
license-file = "LICENSE.txt"
|
||||
|
||||
[dependencies]
|
||||
bytes = "1.7.1"
|
||||
prost = "0.13"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
|
||||
[build-dependencies]
|
||||
prost-build = "0.13"
|
||||
protobuf-support = "3.7.1"
|
||||
protobuf = "3.7.1"
|
||||
protobuf-parse = "3.7.1"
|
||||
glob = "0"
|
||||
prettyplease = "0.2"
|
||||
quote = "1.0"
|
||||
proc-macro2 = "1.0"
|
||||
syn = "2.0"
|
||||
itertools = "0.13"
|
||||
serde_json = "1.0"
|
||||
@@ -22,10 +22,15 @@ data on the Google Fonts collection.
|
||||
import glob
|
||||
import os
|
||||
import unicodedata
|
||||
import sys
|
||||
|
||||
from gflanguages import languages_public_pb2
|
||||
from google.protobuf import text_format
|
||||
from importlib_resources import files
|
||||
|
||||
if sys.version_info < (3, 10):
|
||||
from importlib_resources import files
|
||||
else:
|
||||
from importlib.resources import files
|
||||
|
||||
try:
|
||||
from ._version import version as __version__ # type: ignore
|
||||
|
||||
@@ -9,13 +9,13 @@ sample_text {
|
||||
masthead_full: ""
|
||||
masthead_partial: ""
|
||||
styles: " "
|
||||
tester: " । "
|
||||
tester: " "
|
||||
poster_sm: " "
|
||||
poster_md: " "
|
||||
poster_lg: ""
|
||||
specimen_48: " । "
|
||||
specimen_36: " । ।"
|
||||
specimen_32: " । ।"
|
||||
specimen_21: " । ।\n । ।\n । ।"
|
||||
specimen_16: " । ।\n । ।\n । ।\n । ।\n । ।\n । ।"
|
||||
specimen_48: " "
|
||||
specimen_36: " "
|
||||
specimen_32: " "
|
||||
specimen_21: " \n \n "
|
||||
specimen_16: " \n \n \n \n \n "
|
||||
}
|
||||
|
||||
168
lang/build.rs
Normal file
168
lang/build.rs
Normal file
@@ -0,0 +1,168 @@
|
||||
use proc_macro2::TokenStream;
|
||||
use protobuf::reflect::{FieldDescriptor, ReflectValueRef};
|
||||
use quote::quote;
|
||||
use serde_json::Map;
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::{env, fs::File, path::Path};
|
||||
|
||||
fn main() {
|
||||
// First we load up the descriptor using the protobuf crate
|
||||
// so that we can do reflection on it.
|
||||
let descriptors = protobuf_parse::Parser::new()
|
||||
.pure()
|
||||
.include(".")
|
||||
.input("Lib/gflanguages/languages_public.proto")
|
||||
.file_descriptor_set()
|
||||
.expect("Could not parse languages_public.proto");
|
||||
let protofile = descriptors.file.first().expect("No file in descriptor");
|
||||
let descriptor = protobuf::reflect::FileDescriptor::new_dynamic(protofile.clone(), &[])
|
||||
.expect("Could not create descriptor");
|
||||
|
||||
// Now we use the prost crate to compile them, so that we can
|
||||
// generate Rust structs.
|
||||
let mut config = prost_build::Config::new();
|
||||
// config.boxed(".google.languages_public.LanguageProto.sample_text");
|
||||
// config.boxed(".google.languages_public.LanguageProto.exemplar_chars");
|
||||
|
||||
// The reflection can tell us what messages we have, so we can configure
|
||||
// them to be deserializable with serde
|
||||
for message in descriptor.messages() {
|
||||
config.type_attribute(
|
||||
message.full_name(),
|
||||
"#[derive(serde::Serialize, serde::Deserialize)]",
|
||||
);
|
||||
}
|
||||
// Let's make our structs; this produces google.languages_public.rs
|
||||
config
|
||||
.compile_protos(
|
||||
&["Lib/gflanguages/languages_public.proto"],
|
||||
&["Lib/gflanguages/"],
|
||||
)
|
||||
.expect("Could not compile languages_public.proto");
|
||||
|
||||
let path = Path::new(&env::var("OUT_DIR").unwrap()).join("data.rs");
|
||||
let mut file = BufWriter::new(File::create(path).unwrap());
|
||||
let mut output = quote! { use std::collections::BTreeMap; use std::sync::LazyLock; };
|
||||
|
||||
output.extend(serialize_a_structure(
|
||||
".google.languages_public.RegionProto",
|
||||
"Lib/gflanguages/data/regions/*.textproto",
|
||||
"REGIONS",
|
||||
&descriptor,
|
||||
));
|
||||
|
||||
output.extend(serialize_a_structure(
|
||||
".google.languages_public.ScriptProto",
|
||||
"Lib/gflanguages/data/scripts/*.textproto",
|
||||
"SCRIPTS",
|
||||
&descriptor,
|
||||
));
|
||||
|
||||
output.extend(serialize_a_structure(
|
||||
".google.languages_public.LanguageProto",
|
||||
"Lib/gflanguages/data/languages/*.textproto",
|
||||
"LANGUAGES",
|
||||
&descriptor,
|
||||
));
|
||||
// file.write_all(output.to_string().as_bytes())
|
||||
// .expect("Could not write to file");
|
||||
|
||||
let abstract_file: syn::File = syn::parse2(output).expect("Could not parse output");
|
||||
let formatted = prettyplease::unparse(&abstract_file);
|
||||
file.write_all(formatted.as_bytes())
|
||||
.expect("Could not write to file");
|
||||
}
|
||||
|
||||
fn serialize_a_structure(
|
||||
proto_name: &str,
|
||||
pathglob: &str,
|
||||
output_variable: &str,
|
||||
descriptor: &protobuf::reflect::FileDescriptor,
|
||||
) -> TokenStream {
|
||||
let proto = descriptor
|
||||
.message_by_full_name(proto_name)
|
||||
.unwrap_or_else(|| panic!("No {} message", proto_name));
|
||||
let files: Vec<std::path::PathBuf> = glob::glob(pathglob)
|
||||
.expect("Failed to read glob pattern")
|
||||
.flatten()
|
||||
.collect();
|
||||
let name: TokenStream = proto.name().parse().unwrap();
|
||||
let variable: TokenStream = output_variable.parse().unwrap();
|
||||
let mut map = Map::new();
|
||||
for file in files.into_iter() {
|
||||
serialize_file(file, &proto, &mut map);
|
||||
}
|
||||
let json_var: TokenStream = format!("__{}", output_variable).parse().unwrap();
|
||||
let docmsg = format!("A map of all the {} objects", name);
|
||||
let json_dump = serde_json::to_string(&map).expect("Could not serialize");
|
||||
quote! {
|
||||
static #json_var: &str = #json_dump;
|
||||
|
||||
#[doc = #docmsg]
|
||||
pub static #variable: LazyLock<BTreeMap<String, Box<#name>>> = LazyLock::new(|| {
|
||||
serde_json::from_str(#json_var).expect("Could not deserialize")
|
||||
});
|
||||
}
|
||||
}
|
||||
fn serialize_file(
|
||||
path: std::path::PathBuf,
|
||||
descriptor: &protobuf::reflect::MessageDescriptor,
|
||||
value: &mut Map<String, serde_json::Value>,
|
||||
) {
|
||||
let mut message = descriptor.new_instance();
|
||||
let message_mut = message.as_mut();
|
||||
let input = std::fs::read_to_string(&path).expect("Could not read file");
|
||||
protobuf::text_format::merge_from_str(message_mut, &input)
|
||||
.unwrap_or_else(|e| panic!("Could not parse file {:?}: {:?}", path, e));
|
||||
let id = path.file_stem().unwrap().to_str().unwrap();
|
||||
value.insert(id.to_string(), serialize_message(message_mut));
|
||||
}
|
||||
|
||||
fn serialize_message(message: &dyn protobuf::MessageDyn) -> serde_json::Value {
|
||||
let descriptor = message.descriptor_dyn();
|
||||
// let descriptor_name: TokenStream = descriptor.name().parse().unwrap();
|
||||
let mut output = Map::new();
|
||||
for field in descriptor.fields() {
|
||||
let field_name: TokenStream = field.name().parse().unwrap();
|
||||
let field_contents = serialize_field(&field, message);
|
||||
output.insert(field_name.to_string(), field_contents);
|
||||
}
|
||||
output.into()
|
||||
}
|
||||
|
||||
fn serialize_field(
|
||||
field: &FieldDescriptor,
|
||||
message: &dyn protobuf::MessageDyn,
|
||||
) -> serde_json::Value {
|
||||
if field.is_repeated() {
|
||||
let v: Vec<serde_json::Value> = field
|
||||
.get_repeated(message)
|
||||
.into_iter()
|
||||
.map(|value| serialize_field_value(field, value))
|
||||
.collect();
|
||||
v.into()
|
||||
} else if field.is_required() {
|
||||
serialize_field_value(field, field.get_singular(message).unwrap())
|
||||
} else if field.has_field(message) {
|
||||
let value = serialize_field_value(field, field.get_singular(message).unwrap());
|
||||
value.into()
|
||||
} else {
|
||||
serde_json::Value::Null
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_field_value(_field: &FieldDescriptor, value: ReflectValueRef) -> serde_json::Value {
|
||||
match value {
|
||||
ReflectValueRef::Bool(value) => value.into(),
|
||||
ReflectValueRef::I32(value) => value.into(),
|
||||
ReflectValueRef::I64(value) => value.into(),
|
||||
ReflectValueRef::U32(value) => value.into(),
|
||||
ReflectValueRef::U64(value) => value.into(),
|
||||
ReflectValueRef::F32(value) => value.into(),
|
||||
ReflectValueRef::F64(value) => value.into(),
|
||||
ReflectValueRef::String(value) => value.into(),
|
||||
ReflectValueRef::Bytes(value) => value.into(),
|
||||
ReflectValueRef::Enum(_value, _ix) => unimplemented!(),
|
||||
ReflectValueRef::Message(value) => serialize_message(&*value),
|
||||
}
|
||||
}
|
||||
@@ -16,6 +16,7 @@ dynamic = ["version"]
|
||||
|
||||
name = "gflanguages"
|
||||
description = "A python API for evaluating language support in the Google Fonts collection."
|
||||
requires-python = ">=3.8"
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
{ name = "Simon Cozens", email = "simon@simon-cozens.org" }
|
||||
@@ -23,7 +24,7 @@ authors = [
|
||||
|
||||
dependencies = [
|
||||
"protobuf>=3.7.0, <4",
|
||||
"importlib_resources", # Needed for 3.9 and below
|
||||
"importlib_resources ; python_version < '3.10'",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
||||
25
lang/src/lib.rs
Normal file
25
lang/src/lib.rs
Normal file
@@ -0,0 +1,25 @@
|
||||
include!(concat!(env!("OUT_DIR"), "/google.languages_public.rs"));
|
||||
include!(concat!(env!("OUT_DIR"), "/data.rs"));
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn regions() {
|
||||
assert!((*REGIONS).contains_key("BG"));
|
||||
assert_eq!(REGIONS.get("BG").unwrap().name.as_deref(), Some("Bulgaria"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scripts() {
|
||||
assert!((*SCRIPTS).contains_key("Arab"));
|
||||
assert_eq!(SCRIPTS.get("Arab").unwrap().name.as_deref(), Some("Arabic"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn languages() {
|
||||
assert!(LANGUAGES.len() > 1000);
|
||||
assert!((*LANGUAGES).contains_key("ar_Arab"));
|
||||
}
|
||||
}
|
||||
@@ -14,36 +14,45 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from pkg_resources import resource_filename
|
||||
|
||||
import sys
|
||||
|
||||
if sys.version_info < (3, 10):
|
||||
import importlib_resources as importlib_resources
|
||||
else:
|
||||
import importlib.resources as importlib_resources
|
||||
|
||||
from gflanguages import (LoadLanguages,
|
||||
LoadRegions,
|
||||
LoadScripts)
|
||||
|
||||
DATA_DIR = resource_filename("gflanguages", "data")
|
||||
DATA_DIR = importlib_resources.files("gflanguages") / "data"
|
||||
|
||||
|
||||
def test_LoadLanguages():
|
||||
for langs in [LoadLanguages(),
|
||||
LoadLanguages(None),
|
||||
LoadLanguages(DATA_DIR)]:
|
||||
numerals = langs["yi_Hebr"].exemplar_chars.numerals
|
||||
assert numerals == '- , . % + 0 1 2 3 4 5 6 7 8 9'
|
||||
with importlib_resources.as_file(DATA_DIR) as data_path:
|
||||
for langs in [LoadLanguages(),
|
||||
LoadLanguages(None),
|
||||
LoadLanguages(data_path)]:
|
||||
numerals = langs["yi_Hebr"].exemplar_chars.numerals
|
||||
assert numerals == '- , . % + 0 1 2 3 4 5 6 7 8 9'
|
||||
|
||||
|
||||
def test_LoadScripts():
|
||||
for scripts in [LoadScripts(),
|
||||
LoadScripts(None),
|
||||
LoadScripts(DATA_DIR)]:
|
||||
scripts = LoadScripts()
|
||||
assert scripts["Tagb"].name == 'Tagbanwa'
|
||||
with importlib_resources.as_file(DATA_DIR) as data_path:
|
||||
for scripts in [LoadScripts(),
|
||||
LoadScripts(None),
|
||||
LoadScripts(data_path)]:
|
||||
scripts = LoadScripts()
|
||||
assert scripts["Tagb"].name == 'Tagbanwa'
|
||||
|
||||
|
||||
def test_LoadRegions():
|
||||
for regions in [LoadRegions(),
|
||||
LoadRegions(None),
|
||||
LoadRegions(DATA_DIR)]:
|
||||
regions = LoadRegions()
|
||||
br = regions["BR"]
|
||||
assert br.name == 'Brazil'
|
||||
assert br.region_group == ['Americas']
|
||||
with importlib_resources.as_file(DATA_DIR) as data_path:
|
||||
for regions in [LoadRegions(),
|
||||
LoadRegions(None),
|
||||
LoadRegions(data_path)]:
|
||||
regions = LoadRegions()
|
||||
br = regions["BR"]
|
||||
assert br.name == 'Brazil'
|
||||
assert br.region_group == ['Americas']
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
from importlib_resources import files
|
||||
import glob
|
||||
import os
|
||||
import pytest
|
||||
import sys
|
||||
from gflanguages import languages_public_pb2
|
||||
from google.protobuf import text_format
|
||||
|
||||
if sys.version_info < (3, 10):
|
||||
from importlib_resources import files
|
||||
else:
|
||||
from importlib.resources import files
|
||||
|
||||
languages_dir = files("gflanguages.data").joinpath("languages")
|
||||
textproto_files = [
|
||||
|
||||
13
lang/tox.ini
Normal file
13
lang/tox.ini
Normal file
@@ -0,0 +1,13 @@
|
||||
[tox]
|
||||
env_list = py3{8,9,10,11,12,13}
|
||||
minversion = 4.23.2
|
||||
|
||||
[testenv]
|
||||
description = run the tests with pytest
|
||||
package = wheel
|
||||
wheel_build_env = .pkg
|
||||
deps =
|
||||
pytest>=6
|
||||
extras = dev
|
||||
commands =
|
||||
pytest {tty:--color=yes} {posargs}
|
||||
Reference in New Issue
Block a user