# Please enter a commit message to explain why this merge is necessary, # especially if it merges an updated upstream into a topic branch. # # Lines starting with '#' will be ignored, and an empty message aborts # the commit.
58 lines
2.4 KiB
Python
58 lines
2.4 KiB
Python
import re
|
|
import random
|
|
|
|
from gflanguages import languages_public_pb2
|
|
from google.protobuf.json_format import ParseDict
|
|
from google.protobuf.text_format import MessageToString
|
|
|
|
text = " ́ ́ . ́ . ."
|
|
|
|
sentences = re.split(r"(?<=[。\.])\s+", text)
|
|
sentences = [s for s in sentences if s.strip()]
|
|
|
|
words = re.findall(r"\S+", text.replace(".", ""))
|
|
glyphs = set(re.findall(r"\S", text))
|
|
|
|
|
|
def random_phrase(inputs, min_len, max_len, separator=" "):
|
|
phrase = ""
|
|
satisfied = False
|
|
repetitions = 1
|
|
while not satisfied:
|
|
sentence_bag = list(inputs) * repetitions
|
|
remaining = max_len
|
|
while remaining > 0 and len(sentence_bag) > 0:
|
|
new_sentence = random.choice(sentence_bag)
|
|
sentence_bag.remove(new_sentence)
|
|
phrase += new_sentence + separator
|
|
remaining -= len(new_sentence) + len(separator)
|
|
if len(phrase) >= min_len:
|
|
satisfied = True
|
|
break
|
|
if not satisfied:
|
|
print("Trying again with a new bag of sentences")
|
|
repetitions += 1
|
|
return phrase
|
|
|
|
|
|
sample_text = {
|
|
"masthead_full": "".join(random.choices(list(glyphs), k=4)),
|
|
"masthead_partial": "".join(random.choices(list(glyphs), k=2)),
|
|
"styles": random_phrase(sentences, 40, 60, ". "),
|
|
"tester": random_phrase(sentences, 60, 90, ". "),
|
|
"poster_sm": random_phrase(words, 10, 17, " "),
|
|
"poster_md": random_phrase(words, 6, 12, " "),
|
|
"poster_lg": random_phrase(words, 3, 8, " "),
|
|
"specimen_48": random_phrase(sentences, 50, 80, ". "),
|
|
"specimen_36": random_phrase(sentences, 100, 120, ". "),
|
|
"specimen_32": random_phrase(sentences, 140, 180, ". "),
|
|
"specimen_21": random_phrase(sentences, 300, 500, ". "),
|
|
"specimen_16": random_phrase(sentences, 550, 750, ". "),
|
|
}
|
|
# print(words)
|
|
message = ParseDict(
|
|
sample_text,
|
|
languages_public_pb2.SampleTextProto(),
|
|
)
|
|
print(MessageToString(message, as_utf8=True))
|