Fix MicroPython badly handling unicode chars (#2018)

## Changes * fixed an issue with the **py-editor** related to the new `linebuffer` directive * provide in worker hook scope a simple callback that pre-buffers unicode sequences [accordingly to the standard](https://encoding.spec.whatwg.org/#utf-8-bytes-needed) so that the buffer is sent to the terminal only once those sequences are fulfilled * test with both `µ` and way more convoluted sequences such as 👩‍❤️‍👨 that the output, if either requested as input or already evaluated from the page works ... in latter case `test = "👩‍❤️‍👨"` completely messes up the program and the resulting string is empty
2026-03-07 09:00:12 -05:00 · 2024-04-09 14:51:10 +02:00
parent 6ee8217593
commit 2d5cf096e0
5 changed files with 49 additions and 18 deletions
--- a/pyscript.core/package-lock.json
+++ b/pyscript.core/package-lock.json
@@ -1,17 +1,17 @@
 {
    "name": "@pyscript/core",
-    "version": "0.4.16",
+    "version": "0.4.18",
    "lockfileVersion": 3,
    "requires": true,
    "packages": {
        "": {
            "name": "@pyscript/core",
-            "version": "0.4.16",
+            "version": "0.4.18",
            "license": "APACHE-2.0",
            "dependencies": {
                "@ungap/with-resolvers": "^0.1.0",
                "basic-devtools": "^0.1.6",
-                "polyscript": "^0.12.2",
+                "polyscript": "^0.12.3",
                "sticky-module": "^0.1.1",
                "to-json-callback": "^0.1.1",
                "type-checked-collections": "^0.1.7"
@@ -2435,9 +2435,9 @@
            }
        },
        "node_modules/polyscript": {
-            "version": "0.12.2",
-            "resolved": "https://registry.npmjs.org/polyscript/-/polyscript-0.12.2.tgz",
-            "integrity": "sha512-qHZbcSVhp4bDW9YjcPyYw2AWDRrBEDUVxKMuvjACjQK7O891H6x7dNKVYNjij75Ygn9akma+X1n6eTW4syBFmQ==",
+            "version": "0.12.3",
+            "resolved": "https://registry.npmjs.org/polyscript/-/polyscript-0.12.3.tgz",
+            "integrity": "sha512-aekNrFZzdLe0KQuSMWKFsUwkv414hIIjDgqzCbEXl4l5xZA8vgiv+jFFOZnkJk9/HeybLRPJBRdlhBxfdKVV0Q==",
            "dependencies": {
                "@ungap/structured-clone": "^1.2.0",
                "@ungap/with-resolvers": "^0.1.0",
--- a/pyscript.core/package.json
+++ b/pyscript.core/package.json
@@ -1,6 +1,6 @@
 {
    "name": "@pyscript/core",
-    "version": "0.4.16",
+    "version": "0.4.18",
    "type": "module",
    "description": "PyScript",
    "module": "./index.js",
@@ -42,7 +42,7 @@
    "dependencies": {
        "@ungap/with-resolvers": "^0.1.0",
        "basic-devtools": "^0.1.6",
-        "polyscript": "^0.12.2",
+        "polyscript": "^0.12.3",
        "sticky-module": "^0.1.1",
        "to-json-callback": "^0.1.1",
        "type-checked-collections": "^0.1.7"
--- a/pyscript.core/src/plugins/py-editor.js
+++ b/pyscript.core/src/plugins/py-editor.js
@@ -15,8 +15,8 @@ const hooks = {
        codeBeforeRun: () => stdlib,
        // works on both Pyodide and MicroPython
        onReady: ({ runAsync, io }, { sync }) => {
-            io.stdout = (line) => sync.write(line);
-            io.stderr = (line) => sync.writeErr(line);
+            io.stdout = io.buffered(sync.write);
+            io.stderr = io.buffered(sync.writeErr);
            sync.revoke();
            sync.runAsync = runAsync;
        },
--- a/pyscript.core/src/plugins/py-terminal.js
+++ b/pyscript.core/src/plugins/py-terminal.js
@@ -59,7 +59,37 @@ const workerReady = ({ interpreter, io, run, type }, { sync }) => {
        });
        run("from _pyscript_input import input");

-        io.stdout = generic.write;
+        // this is needed to avoid truncated unicode in MicroPython
+        // the reason is that `linebuffer` false just send one byte
+        // per time and readline here doesn't like it much.
+        // MicroPython also has issues with code-points and
+        // replProcessChar(byte) but that function accepts only
+        // one byte per time so ... we have an issue!
+        // @see https://github.com/pyscript/pyscript/pull/2018
+        // @see https://github.com/WebReflection/buffer-points
+        const bufferPoints = (stdio) => {
+            const bytes = [];
+            let needed = 0;
+            return (buffer) => {
+                let written = 0;
+                for (const byte of buffer) {
+                    bytes.push(byte);
+                    // @see https://encoding.spec.whatwg.org/#utf-8-bytes-needed
+                    if (needed) needed--;
+                    else if (0xc2 <= byte && byte <= 0xdf) needed = 1;
+                    else if (0xe0 <= byte && byte <= 0xef) needed = 2;
+                    else if (0xf0 <= byte && byte <= 0xf4) needed = 3;
+                    if (!needed) {
+                        written += bytes.length;
+                        stdio(new Uint8Array(bytes.splice(0)));
+                    }
+                }
+                return written;
+            };
+        };
+
+        io.stdout = bufferPoints(generic.write);
+
        // tiny shim of the code module with only interact
        // to bootstrap a REPL like environment
        interpreter.registerJsModule("code", {
@@ -69,14 +99,14 @@ const workerReady = ({ interpreter, io, run, type }, { sync }) => {

                const encoder = new TextEncoder();
                const acc = [];
+                const handlePoints = bufferPoints((buffer) => {
+                    acc.push(...buffer);
+                    pyterminal_write(decoder.decode(buffer));
+                });

-                io.stdout = (buffer) => {
-                    // avoid duplicating the output produced by the input
-                    if (length++ > input.length) {
-                        acc.push(...buffer);
-                        pyterminal_write(decoder.decode(buffer));
-                    }
-                };
+                // avoid duplicating the output produced by the input
+                io.stdout = (buffer) =>
+                    length++ > input.length ? handlePoints(buffer) : 0;

                interpreter.replInit();

--- a/pyscript.core/test/py-terminal.html
+++ b/pyscript.core/test/py-terminal.html
@@ -10,6 +10,7 @@
    </head>
    <body>
        <script type="mpy" worker terminal>
+            print("µpython")
            import code
            code.interact()
        </script>