mirror of
https://github.com/pyscript/pyscript.git
synced 2025-12-19 18:27:29 -05:00
Fix MicroPython badly handling unicode chars (#2018)
## Changes * fixed an issue with the **py-editor** related to the new `linebuffer` directive * provide in worker hook scope a simple callback that pre-buffers unicode sequences [accordingly to the standard](https://encoding.spec.whatwg.org/#utf-8-bytes-needed) so that the buffer is sent to the terminal only once those sequences are fulfilled * test with both `µ` and way more convoluted sequences such as 👩❤️👨 that the output, if either requested as input or already evaluated from the page works ... in latter case `test = "👩❤️👨"` completely messes up the program and the resulting string is empty
This commit is contained in:
committed by
GitHub
parent
6ee8217593
commit
2d5cf096e0
12
pyscript.core/package-lock.json
generated
12
pyscript.core/package-lock.json
generated
@@ -1,17 +1,17 @@
|
|||||||
{
|
{
|
||||||
"name": "@pyscript/core",
|
"name": "@pyscript/core",
|
||||||
"version": "0.4.16",
|
"version": "0.4.18",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@pyscript/core",
|
"name": "@pyscript/core",
|
||||||
"version": "0.4.16",
|
"version": "0.4.18",
|
||||||
"license": "APACHE-2.0",
|
"license": "APACHE-2.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@ungap/with-resolvers": "^0.1.0",
|
"@ungap/with-resolvers": "^0.1.0",
|
||||||
"basic-devtools": "^0.1.6",
|
"basic-devtools": "^0.1.6",
|
||||||
"polyscript": "^0.12.2",
|
"polyscript": "^0.12.3",
|
||||||
"sticky-module": "^0.1.1",
|
"sticky-module": "^0.1.1",
|
||||||
"to-json-callback": "^0.1.1",
|
"to-json-callback": "^0.1.1",
|
||||||
"type-checked-collections": "^0.1.7"
|
"type-checked-collections": "^0.1.7"
|
||||||
@@ -2435,9 +2435,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/polyscript": {
|
"node_modules/polyscript": {
|
||||||
"version": "0.12.2",
|
"version": "0.12.3",
|
||||||
"resolved": "https://registry.npmjs.org/polyscript/-/polyscript-0.12.2.tgz",
|
"resolved": "https://registry.npmjs.org/polyscript/-/polyscript-0.12.3.tgz",
|
||||||
"integrity": "sha512-qHZbcSVhp4bDW9YjcPyYw2AWDRrBEDUVxKMuvjACjQK7O891H6x7dNKVYNjij75Ygn9akma+X1n6eTW4syBFmQ==",
|
"integrity": "sha512-aekNrFZzdLe0KQuSMWKFsUwkv414hIIjDgqzCbEXl4l5xZA8vgiv+jFFOZnkJk9/HeybLRPJBRdlhBxfdKVV0Q==",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@ungap/structured-clone": "^1.2.0",
|
"@ungap/structured-clone": "^1.2.0",
|
||||||
"@ungap/with-resolvers": "^0.1.0",
|
"@ungap/with-resolvers": "^0.1.0",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@pyscript/core",
|
"name": "@pyscript/core",
|
||||||
"version": "0.4.16",
|
"version": "0.4.18",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"description": "PyScript",
|
"description": "PyScript",
|
||||||
"module": "./index.js",
|
"module": "./index.js",
|
||||||
@@ -42,7 +42,7 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@ungap/with-resolvers": "^0.1.0",
|
"@ungap/with-resolvers": "^0.1.0",
|
||||||
"basic-devtools": "^0.1.6",
|
"basic-devtools": "^0.1.6",
|
||||||
"polyscript": "^0.12.2",
|
"polyscript": "^0.12.3",
|
||||||
"sticky-module": "^0.1.1",
|
"sticky-module": "^0.1.1",
|
||||||
"to-json-callback": "^0.1.1",
|
"to-json-callback": "^0.1.1",
|
||||||
"type-checked-collections": "^0.1.7"
|
"type-checked-collections": "^0.1.7"
|
||||||
|
|||||||
@@ -15,8 +15,8 @@ const hooks = {
|
|||||||
codeBeforeRun: () => stdlib,
|
codeBeforeRun: () => stdlib,
|
||||||
// works on both Pyodide and MicroPython
|
// works on both Pyodide and MicroPython
|
||||||
onReady: ({ runAsync, io }, { sync }) => {
|
onReady: ({ runAsync, io }, { sync }) => {
|
||||||
io.stdout = (line) => sync.write(line);
|
io.stdout = io.buffered(sync.write);
|
||||||
io.stderr = (line) => sync.writeErr(line);
|
io.stderr = io.buffered(sync.writeErr);
|
||||||
sync.revoke();
|
sync.revoke();
|
||||||
sync.runAsync = runAsync;
|
sync.runAsync = runAsync;
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -59,7 +59,37 @@ const workerReady = ({ interpreter, io, run, type }, { sync }) => {
|
|||||||
});
|
});
|
||||||
run("from _pyscript_input import input");
|
run("from _pyscript_input import input");
|
||||||
|
|
||||||
io.stdout = generic.write;
|
// this is needed to avoid truncated unicode in MicroPython
|
||||||
|
// the reason is that `linebuffer` false just send one byte
|
||||||
|
// per time and readline here doesn't like it much.
|
||||||
|
// MicroPython also has issues with code-points and
|
||||||
|
// replProcessChar(byte) but that function accepts only
|
||||||
|
// one byte per time so ... we have an issue!
|
||||||
|
// @see https://github.com/pyscript/pyscript/pull/2018
|
||||||
|
// @see https://github.com/WebReflection/buffer-points
|
||||||
|
const bufferPoints = (stdio) => {
|
||||||
|
const bytes = [];
|
||||||
|
let needed = 0;
|
||||||
|
return (buffer) => {
|
||||||
|
let written = 0;
|
||||||
|
for (const byte of buffer) {
|
||||||
|
bytes.push(byte);
|
||||||
|
// @see https://encoding.spec.whatwg.org/#utf-8-bytes-needed
|
||||||
|
if (needed) needed--;
|
||||||
|
else if (0xc2 <= byte && byte <= 0xdf) needed = 1;
|
||||||
|
else if (0xe0 <= byte && byte <= 0xef) needed = 2;
|
||||||
|
else if (0xf0 <= byte && byte <= 0xf4) needed = 3;
|
||||||
|
if (!needed) {
|
||||||
|
written += bytes.length;
|
||||||
|
stdio(new Uint8Array(bytes.splice(0)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return written;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
io.stdout = bufferPoints(generic.write);
|
||||||
|
|
||||||
// tiny shim of the code module with only interact
|
// tiny shim of the code module with only interact
|
||||||
// to bootstrap a REPL like environment
|
// to bootstrap a REPL like environment
|
||||||
interpreter.registerJsModule("code", {
|
interpreter.registerJsModule("code", {
|
||||||
@@ -69,14 +99,14 @@ const workerReady = ({ interpreter, io, run, type }, { sync }) => {
|
|||||||
|
|
||||||
const encoder = new TextEncoder();
|
const encoder = new TextEncoder();
|
||||||
const acc = [];
|
const acc = [];
|
||||||
|
const handlePoints = bufferPoints((buffer) => {
|
||||||
|
acc.push(...buffer);
|
||||||
|
pyterminal_write(decoder.decode(buffer));
|
||||||
|
});
|
||||||
|
|
||||||
io.stdout = (buffer) => {
|
// avoid duplicating the output produced by the input
|
||||||
// avoid duplicating the output produced by the input
|
io.stdout = (buffer) =>
|
||||||
if (length++ > input.length) {
|
length++ > input.length ? handlePoints(buffer) : 0;
|
||||||
acc.push(...buffer);
|
|
||||||
pyterminal_write(decoder.decode(buffer));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
interpreter.replInit();
|
interpreter.replInit();
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@
|
|||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<script type="mpy" worker terminal>
|
<script type="mpy" worker terminal>
|
||||||
|
print("µpython")
|
||||||
import code
|
import code
|
||||||
code.interact()
|
code.interact()
|
||||||
</script>
|
</script>
|
||||||
|
|||||||
Reference in New Issue
Block a user