Move pyodide to a web worker (#1333)

This PR adds support for optionally running pyodide in a web worker:

- add a new option config.execution_thread, which can be `main` or `worker`. The default is `main`

- improve the test machinery so that we run all tests twice, once for `main` and once for `worker`

- add a new esbuild target which builds the code for the worker

The support for workers is not complete and many features are still missing: there are 71 tests which are marked as `@skip_worker`, but we can fix them in subsequent PRs.

The vast majority of tests fail because js.document is unavailable: for it to run transparently, we need the "auto-syncify" feature of synclink.


Co-authored-by: Hood Chatham <roberthoodchatham@gmail.com>
Co-authored-by: Madhur Tandon <20173739+madhur-tandon@users.noreply.github.com>
This commit is contained in:
Antonio Cuni
2023-04-14 10:55:31 +02:00
committed by GitHub
parent dfa116eb70
commit 8c5475f78f
28 changed files with 497 additions and 99 deletions

View File

@@ -40,7 +40,6 @@ export class InterpreterClient extends Object {
*/
async initializeRemote(): Promise<void> {
await this._remote.loadInterpreter(this.config, Synclink.proxy(this.stdio));
// await this._remote.loadInterpreter(this.config, Synclink.proxy(this.stdio));
this.globals = this._remote.globals;
}

View File

@@ -0,0 +1,26 @@
// XXX: what about code duplications?
// With the current build configuration, the code for logger,
// remote_interpreter and everything which is included from there is
// bundled/fetched/executed twice, once in pyscript.js and once in
// worker_interpreter.js.
import { getLogger } from '../logger';
import { RemoteInterpreter } from '../remote_interpreter';
import * as Synclink from 'synclink';
const logger = getLogger('worker');
logger.info('Interpreter worker starting...');
async function worker_initialize(cfg) {
const remote_interpreter = new RemoteInterpreter(cfg.src);
// this is the equivalent of await import(interpreterURL)
logger.info(`Downloading ${cfg.name}...`); // XXX we should use logStatus
importScripts(cfg.src);
logger.info('worker_initialize() complete');
return Synclink.proxy(remote_interpreter);
}
Synclink.expose(worker_initialize);
export type { worker_initialize };

View File

@@ -1,7 +1,7 @@
import './styles/pyscript_base.css';
import { loadConfigFromElement } from './pyconfig';
import type { AppConfig } from './pyconfig';
import type { AppConfig, InterpreterConfig } from './pyconfig';
import { InterpreterClient } from './interpreter_client';
import { PluginManager, Plugin, PythonPlugin } from './plugin';
import { make_PyScript, initHandlers, mountElements } from './components/pyscript';
@@ -59,16 +59,6 @@ throwHandler.serialize = new_error_transfer_handler;
user scripts
8. initialize the rest of web components such as py-button, py-repl, etc.
More concretely:
- Points 1-4 are implemented sequentially in PyScriptApp.main().
- PyScriptApp.loadInterpreter adds a <script> tag to the document to initiate
the download, and then adds an event listener for the 'load' event, which
in turns calls PyScriptApp.afterInterpreterLoad().
- PyScriptApp.afterInterpreterLoad() implements all the points >= 5.
*/
export let interpreter;
@@ -173,6 +163,52 @@ export class PyScriptApp {
logger.info('config loaded:\n' + JSON.stringify(this.config, null, 2));
}
_get_base_url(): string {
// Note that this requires that pyscript is loaded via a <script>
// tag. If we want to allow loading via an ES6 module in the future,
// we need to think about some other strategy
const elem = document.currentScript as HTMLScriptElement;
const slash = elem.src.lastIndexOf('/');
return elem.src.slice(0, slash);
}
async _startInterpreter_main(interpreter_cfg: InterpreterConfig) {
logger.info('Starting the interpreter in the main thread');
// this is basically equivalent to worker_initialize()
const remote_interpreter = new RemoteInterpreter(interpreter_cfg.src);
const { port1, port2 } = new Synclink.FakeMessageChannel() as unknown as MessageChannel;
port1.start();
port2.start();
Synclink.expose(remote_interpreter, port2);
const wrapped_remote_interpreter = Synclink.wrap(port1);
this.logStatus(`Downloading ${interpreter_cfg.name}...`);
/* Dynamically download and import pyodide: the import() puts a
loadPyodide() function into globalThis, which is later called by
RemoteInterpreter.
This is suboptimal: ideally, we would like to import() a module
which exports loadPyodide(), but this plays badly with workers
because at the moment of writing (2023-03-24) Firefox does not
support ES modules in workers:
https://caniuse.com/mdn-api_worker_worker_ecmascript_modules
*/
const interpreterURL = interpreter_cfg.src;
await import(interpreterURL);
return { remote_interpreter, wrapped_remote_interpreter };
}
async _startInterpreter_worker(interpreter_cfg: InterpreterConfig) {
logger.warn('execution_thread = "worker" is still VERY experimental, use it at your own risk');
logger.info('Starting the interpreter in a web worker');
const base_url = this._get_base_url();
const worker = new Worker(base_url + '/interpreter_worker.js');
const worker_initialize: any = Synclink.wrap(worker);
const wrapped_remote_interpreter = await worker_initialize(interpreter_cfg);
const remote_interpreter = undefined; // this is _unwrapped_remote
return { remote_interpreter, wrapped_remote_interpreter };
}
// lifecycle (4)
async loadInterpreter() {
logger.info('Initializing interpreter');
@@ -184,35 +220,21 @@ export class PyScriptApp {
showWarning('Multiple interpreters are not supported yet.<br />Only the first will be used', 'html');
}
const interpreter_cfg = this.config.interpreters[0];
const cfg = this.config.interpreters[0];
let x;
if (this.config.execution_thread == 'worker') {
x = await this._startInterpreter_worker(cfg);
} else {
x = await this._startInterpreter_main(cfg);
}
const { remote_interpreter, wrapped_remote_interpreter } = x;
const remote_interpreter = new RemoteInterpreter(interpreter_cfg.src);
const { port1, port2 } = new Synclink.FakeMessageChannel() as unknown as MessageChannel;
port1.start();
port2.start();
Synclink.expose(remote_interpreter, port2);
const wrapped_remote_interpreter = Synclink.wrap(port1);
this.interpreter = new InterpreterClient(
this.config,
this._stdioMultiplexer,
wrapped_remote_interpreter as Synclink.Remote<RemoteInterpreter>,
remote_interpreter,
);
this.logStatus(`Downloading ${interpreter_cfg.name}...`);
/* Dynamically download and import pyodide: the import() puts a
loadPyodide() function into globalThis, which is later called by
RemoteInterpreter.
This is suboptimal: ideally, we would like to import() a module
which exports loadPyodide(), but this plays badly with workers
because at the moment of writing (2023-03-24) Firefox does not
support ES modules in workers:
https://caniuse.com/mdn-api_worker_worker_ecmascript_modules
*/
const interpreterURL = await this.interpreter._remote.src;
await import(interpreterURL);
await this.afterInterpreterLoad(this.interpreter);
}

View File

@@ -22,6 +22,7 @@ export interface AppConfig extends Record<string, any> {
fetch?: FetchConfig[];
plugins?: string[];
pyscript?: PyScriptMetadata;
execution_thread?: string; // "main" or "worker"
}
export type FetchConfig = {
@@ -43,7 +44,7 @@ export type PyScriptMetadata = {
};
const allKeys = Object.entries({
string: ['name', 'description', 'version', 'type', 'author_name', 'author_email', 'license'],
string: ['name', 'description', 'version', 'type', 'author_name', 'author_email', 'license', 'execution_thread'],
number: ['schema_version'],
array: ['runtimes', 'interpreters', 'packages', 'fetch', 'plugins'],
});
@@ -63,6 +64,7 @@ export const defaultConfig: AppConfig = {
packages: [],
fetch: [],
plugins: [],
execution_thread: 'main',
};
export function loadConfigFromElement(el: Element): AppConfig {
@@ -237,6 +239,15 @@ function validateConfig(configText: string, configType = 'toml') {
}
finalConfig[item].push(eachFetchConfig);
});
} else if (item == 'execution_thread') {
const value = config[item];
if (value !== 'main' && value !== 'worker') {
throw new UserError(
ErrorCode.BAD_CONFIG,
`"${value}" is not a valid value for the property "execution_thread". The only valid values are "main" and "worker"`,
);
}
finalConfig[item] = value;
} else {
finalConfig[item] = config[item];
}

View File

@@ -1,7 +1,7 @@
import time
from textwrap import dedent
from js import console, document
import js
from . import _internal
from ._mime import format_mime as _format_mime
@@ -22,7 +22,7 @@ class HTML:
def write(element_id, value, append=False, exec_id=0):
"""Writes value to the element with id "element_id"""
Element(element_id).write(value=value, append=append)
console.warn(
js.console.warn(
dedent(
"""PyScript Deprecation Warning: PyScript.write is
marked as deprecated and will be removed sometime soon. Please, use
@@ -55,7 +55,7 @@ class Element:
def element(self):
"""Return the dom element"""
if not self._element:
self._element = document.querySelector(f"#{self._id}")
self._element = js.document.querySelector(f"#{self._id}")
return self._element
@property
@@ -72,7 +72,7 @@ class Element:
return
if append:
child = document.createElement("div")
child = js.document.createElement("div")
self.element.appendChild(child)
if append and self.element.children:
@@ -81,7 +81,7 @@ class Element:
out_element = self.element
if mime_type in ("application/javascript", "text/html"):
script_element = document.createRange().createContextualFragment(html)
script_element = js.document.createRange().createContextualFragment(html)
out_element.appendChild(script_element)
else:
out_element.innerHTML = html
@@ -102,7 +102,7 @@ class Element:
if _el:
return Element(_el.id, _el)
else:
console.warn(f"WARNING: can't find element matching query {query}")
js.console.warn(f"WARNING: can't find element matching query {query}")
def clone(self, new_id=None, to=None):
if new_id is None:
@@ -142,7 +142,7 @@ def add_classes(element, class_list):
def create(what, id_=None, classes=""):
element = document.createElement(what)
element = js.document.createElement(what)
if id_:
element.id = id_
add_classes(element, classes)
@@ -256,7 +256,7 @@ class PyListTemplate:
Element(new_id).element.onclick = foo
def connect(self):
self.md = main_div = document.createElement("div")
self.md = main_div = js.document.createElement("div")
main_div.id = self._id + "-list-tasks-container"
if self.theme:

View File

@@ -105,7 +105,6 @@ export class RemoteInterpreter extends Object {
this.interface = Synclink.proxy(
await loadPyodide({
stdout: (msg: string) => {
// TODO: add syncify when moved to worker
stdio.stdout_writeline(msg).syncify();
},
stderr: (msg: string) => {
@@ -114,6 +113,8 @@ export class RemoteInterpreter extends Object {
fullStdLib: false,
}),
);
this.interface.registerComlink(Synclink);
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
this.FS = this.interface.FS;
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
this.PATH = (this.interface as any)._module.PATH;