Use Promise.all to fetch files part of py-config (#1322)

This is a first step towards loading more stuff simultaneously rather
than sequentially.

The functional part of this is pretty small: call `calculateFetchPaths` and
then `Promise.all(fetchPaths.map(loadFileFromURL));`. I also transposed the
return type of `calculateFetchPaths` since it's more convenient to consume
this way.

I redid the logic in `calculateFetchPaths` a bit. I renamed `src/plugins/fetch.ts`
to `calculateFetchPaths.ts` since the file performs no fetching. I also
renamed `loadFromFile` to `loadFileFromURL`.
This commit is contained in:
Hood Chatham
2023-03-29 07:32:09 -07:00
committed by GitHub
parent 26f07246e1
commit 9fedfe3699
6 changed files with 80 additions and 85 deletions

View File

@@ -8,7 +8,7 @@ import { PluginManager, define_custom_element, Plugin } from './plugin';
import { make_PyScript, initHandlers, mountElements } from './components/pyscript';
import { getLogger } from './logger';
import { showWarning, globalExport, createLock } from './utils';
import { calculatePaths } from './plugins/fetch';
import { calculateFetchPaths } from './plugins/calculateFetchPaths';
import { createCustomElements } from './components/elements';
import { UserError, ErrorCode, _createAlertBanner } from './exceptions';
import { type Stdio, StdioMultiplexer, DEFAULT_STDIO } from './stdio';
@@ -304,11 +304,7 @@ export class PyScriptApp {
pyscript._install_deprecated_globals_2022_12_1(globals())
`);
if (this.config.packages) {
logger.info('Packages to install: ', this.config.packages);
await interpreter._remote.installPackage(this.config.packages);
}
await this.fetchPaths(interpreter);
await Promise.all([this.installPackages(), this.fetchPaths(interpreter)]);
//This may be unnecessary - only useful if plugins try to import files fetch'd in fetchPaths()
await interpreter._remote.invalidate_module_path_cache();
@@ -316,22 +312,25 @@ export class PyScriptApp {
await this.fetchUserPlugins(interpreter);
}
async fetchPaths(interpreter: InterpreterClient) {
// XXX this can be VASTLY improved: for each path we need to fetch a
// URL and write to the virtual filesystem: pyodide.loadFromFile does
// it in Python, which means we need to have the interpreter
// initialized. But we could easily do it in JS in parallel with the
// download/startup of pyodide.
const [paths, fetchPaths] = calculatePaths(this.config.fetch);
logger.info('Paths to write: ', paths);
logger.info('Paths to fetch: ', fetchPaths);
for (let i = 0; i < paths.length; i++) {
logger.info(` fetching path: ${fetchPaths[i]}`);
// Exceptions raised from here will create an alert banner
await interpreter._remote.loadFromFile(paths[i], fetchPaths[i]);
async installPackages() {
if (!this.config.packages) {
return;
}
logger.info('All paths fetched');
logger.info('Packages to install: ', this.config.packages);
await this.interpreter._remote.installPackage(this.config.packages);
}
async fetchPaths(interpreter: InterpreterClient) {
// TODO: start fetching before interpreter initialization
const paths = calculateFetchPaths(this.config.fetch);
logger.info('Fetching urls:', paths.map(({ url }) => url).join(', '));
await Promise.all(
paths.map(async ({ path, url }) => {
await interpreter._remote.loadFileFromURL(path, url);
logger.info(` Fetched ${url} ==> ${path}`);
}),
);
logger.info('Fetched all paths');
}
/**
@@ -408,7 +407,7 @@ export class PyScriptApp {
const pathArr = filePath.split('/');
const filename = pathArr.pop();
// TODO: Would be probably be better to store plugins somewhere like /plugins/python/ or similar
await interpreter._remote.loadFromFile(filename, filePath);
await interpreter._remote.loadFileFromURL(filename, filePath);
//refresh module cache before trying to import module files into interpreter
await interpreter._remote.invalidate_module_path_cache();

View File

@@ -0,0 +1,26 @@
import { joinPaths } from '../utils';
import { FetchConfig } from '../pyconfig';
import { UserError, ErrorCode } from '../exceptions';
export function calculateFetchPaths(fetch_cfg: FetchConfig[]): { url: string; path: string }[] {
for (const { files, to_file, from = '' } of fetch_cfg) {
if (files !== undefined && to_file !== undefined) {
throw new UserError(ErrorCode.BAD_CONFIG, `Cannot use 'to_file' and 'files' parameters together!`);
}
if (files === undefined && to_file === undefined && from.endsWith('/')) {
throw new UserError(
ErrorCode.BAD_CONFIG,
`Couldn't determine the filename from the path ${from}, please supply 'to_file' parameter.`,
);
}
}
return fetch_cfg.flatMap(function ({ from = '', to_folder = '.', to_file, files }) {
if (files !== undefined) {
return files.map(file => ({ url: joinPaths([from, file]), path: joinPaths([to_folder, file]) }));
}
const filename = to_file || from.slice(1 + from.lastIndexOf('/'));
const to_path = joinPaths([to_folder, filename]);
return [{ url: from, path: to_path }];
});
}

View File

@@ -1,37 +0,0 @@
import { joinPaths } from '../utils';
import { FetchConfig } from '../pyconfig';
import { UserError, ErrorCode } from '../exceptions';
export function calculatePaths(fetch_cfg: FetchConfig[]) {
const fetchPaths: string[] = [];
const paths: string[] = [];
fetch_cfg.forEach(function (each_fetch_cfg: FetchConfig) {
const from = each_fetch_cfg.from || '';
const to_folder = each_fetch_cfg.to_folder || '.';
const to_file = each_fetch_cfg.to_file;
const files = each_fetch_cfg.files;
if (files !== undefined) {
if (to_file !== undefined) {
throw new UserError(ErrorCode.BAD_CONFIG, `Cannot use 'to_file' and 'files' parameters together!`);
}
for (const each_f of files) {
const each_fetch_path = joinPaths([from, each_f]);
fetchPaths.push(each_fetch_path);
const each_path = joinPaths([to_folder, each_f]);
paths.push(each_path);
}
} else {
fetchPaths.push(from);
const filename = to_file || from.split('/').pop();
if (filename === '') {
throw new UserError(
ErrorCode.BAD_CONFIG,
`Couldn't determine the filename from the path ${from}, please supply 'to_file' parameter.`,
);
} else {
paths.push(joinPaths([to_folder, filename]));
}
}
});
return [paths, fetchPaths];
}

View File

@@ -226,11 +226,11 @@ export class RemoteInterpreter extends Object {
/**
*
* @param path : the path in the filesystem
* @param fetch_path : the path to be fetched
* @param url : the url to be fetched
*
* Given a file available at `fetch_path` URL (eg:
* `http://dummy.com/hi.py`), the function downloads the file and saves it
* to the `path` (eg: `a/b/c/foo.py`) on the FS.
* Given a file available at `url` URL (eg: `http://dummy.com/hi.py`), the
* function downloads the file and saves it to the `path` (eg:
* `a/b/c/foo.py`) on the FS.
*
* Example usage: await loadFromFile(`a/b/c/foo.py`,
* `http://dummy.com/hi.py`)
@@ -246,13 +246,13 @@ export class RemoteInterpreter extends Object {
* in `/home/pyodide/a/b.py`, `../a/b.py` will be placed into `/home/a/b.py`
* and `/a/b.py` will be placed into `/a/b.py`.
*/
async loadFromFile(path: string, fetch_path: string): Promise<void> {
async loadFileFromURL(path: string, url: string): Promise<void> {
path = this.PATH_FS.resolve(path);
const dir: string = this.PATH.dirname(path);
this.FS.mkdirTree(dir);
// `robustFetch` checks for failures in getting a response
const response = await robustFetch(fetch_path);
const response = await robustFetch(url);
const buffer = await response.arrayBuffer();
const data = new Uint8Array(buffer);

View File

@@ -80,7 +80,7 @@ export function joinPaths(parts: string[], separator = '/') {
.map(function (part) {
return part.trim().replace(/(^[/]*|[/]*$)/g, '');
})
.filter(p => p !== '')
.filter(p => p !== '' && p !== '.')
.join(separator || '/');
if (parts[0].startsWith('/')) {
return '/' + res;

View File

@@ -1,55 +1,62 @@
import { calculatePaths } from '../../src/plugins/fetch';
import { calculateFetchPaths } from '../../src/plugins/calculateFetchPaths';
import { FetchConfig } from '../../src/pyconfig';
describe('CalculateFetchPaths', () => {
it('should calculate paths when only from is provided', () => {
const fetch_cfg: FetchConfig[] = [{ from: 'http://a.com/data.csv' }];
const [paths, fetchPaths] = calculatePaths(fetch_cfg);
expect(paths).toStrictEqual(['./data.csv']);
expect(fetchPaths).toStrictEqual(['http://a.com/data.csv']);
const res = calculateFetchPaths(fetch_cfg);
expect(res).toStrictEqual([{ url: 'http://a.com/data.csv', path: 'data.csv' }]);
});
it('should calculate paths when only files is provided', () => {
const fetch_cfg: FetchConfig[] = [{ files: ['foo/__init__.py', 'foo/mod.py'] }];
const [paths, fetchPaths] = calculatePaths(fetch_cfg);
expect(paths).toStrictEqual(['./foo/__init__.py', './foo/mod.py']);
expect(fetchPaths).toStrictEqual(['foo/__init__.py', 'foo/mod.py']);
const fetch_cfg: FetchConfig[] = [{ files: ['foo/__init__.py', 'foo/mod.py', 'foo2/mod.py'] }];
const res = calculateFetchPaths(fetch_cfg);
expect(res).toStrictEqual([
{ url: 'foo/__init__.py', path: 'foo/__init__.py' },
{ url: 'foo/mod.py', path: 'foo/mod.py' },
{ url: 'foo2/mod.py', path: 'foo2/mod.py' },
]);
});
it('should calculate paths when files and to_folder is provided', () => {
const fetch_cfg: FetchConfig[] = [{ files: ['foo/__init__.py', 'foo/mod.py'], to_folder: '/my/lib/' }];
const [paths, fetchPaths] = calculatePaths(fetch_cfg);
expect(paths).toStrictEqual(['/my/lib/foo/__init__.py', '/my/lib/foo/mod.py']);
expect(fetchPaths).toStrictEqual(['foo/__init__.py', 'foo/mod.py']);
const res = calculateFetchPaths(fetch_cfg);
expect(res).toStrictEqual([
{ url: 'foo/__init__.py', path: '/my/lib/foo/__init__.py' },
{ url: 'foo/mod.py', path: '/my/lib/foo/mod.py' },
]);
});
it('should calculate paths when from and files and to_folder is provided', () => {
const fetch_cfg: FetchConfig[] = [
{ from: 'http://a.com/download/', files: ['foo/__init__.py', 'foo/mod.py'], to_folder: '/my/lib/' },
];
const [paths, fetchPaths] = calculatePaths(fetch_cfg);
expect(paths).toStrictEqual(['/my/lib/foo/__init__.py', '/my/lib/foo/mod.py']);
expect(fetchPaths).toStrictEqual(['http://a.com/download/foo/__init__.py', 'http://a.com/download/foo/mod.py']);
const res = calculateFetchPaths(fetch_cfg);
expect(res).toStrictEqual([
{ url: 'http://a.com/download/foo/__init__.py', path: '/my/lib/foo/__init__.py' },
{ url: 'http://a.com/download/foo/mod.py', path: '/my/lib/foo/mod.py' },
]);
});
it("should error out while calculating paths when filename cannot be determined from 'from'", () => {
const fetch_cfg: FetchConfig[] = [{ from: 'http://google.com/', to_folder: '/tmp' }];
expect(() => calculatePaths(fetch_cfg)).toThrowError(
expect(() => calculateFetchPaths(fetch_cfg)).toThrowError(
"Couldn't determine the filename from the path http://google.com/",
);
});
it('should calculate paths when to_file is explicitly supplied', () => {
const fetch_cfg: FetchConfig[] = [{ from: 'http://a.com/data.csv?version=1', to_file: 'pkg/tmp/data.csv' }];
const [paths, fetchPaths] = calculatePaths(fetch_cfg);
expect(paths).toStrictEqual(['./pkg/tmp/data.csv']);
expect(fetchPaths).toStrictEqual(['http://a.com/data.csv?version=1']);
const res = calculateFetchPaths(fetch_cfg);
expect(res).toStrictEqual([{ path: 'pkg/tmp/data.csv', url: 'http://a.com/data.csv?version=1' }]);
});
it('should error out when both to_file and files parameters are provided', () => {
const fetch_cfg: FetchConfig[] = [
{ from: 'http://a.com/data.csv?version=1', to_file: 'pkg/tmp/data.csv', files: ['a.py', 'b.py'] },
];
expect(() => calculatePaths(fetch_cfg)).toThrowError("Cannot use 'to_file' and 'files' parameters together!");
expect(() => calculateFetchPaths(fetch_cfg)).toThrowError(
"Cannot use 'to_file' and 'files' parameters together!",
);
});
});