From 3ac5b49d85e4edb9efc7a64e880403d3182bf64c Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Tue, 17 Sep 2024 20:38:33 +0200 Subject: [PATCH] module: refator ESM loader for adding future synchronous hooks This lays the foundation for supporting synchronous hooks proposed in https://github.com/nodejs/loaders/pull/198 for ESM. - Corrects and adds several JSDoc comments for internal functions of the ESM loader, as well as explaining how require() for import CJS work in the special resolve/load paths. This doesn't consolidate it with import in require(esm) yet due to caching differences, which is left as a TODO. - The moduleProvider passed into ModuleJob is replaced as moduleOrModulePromise, we call the translators directly in the ESM loader and verify it right after loading for clarity. - Reuse a few refactored out helpers for require(esm) in getModuleJobForRequire(). PR-URL: https://github.com/nodejs/node/pull/54769 Reviewed-By: Matteo Collina Reviewed-By: Stephen Belanger Reviewed-By: James M Snell --- lib/internal/modules/esm/loader.js | 322 ++++++++++++++++-------- lib/internal/modules/esm/module_job.js | 61 +++-- lib/internal/modules/esm/translators.js | 37 +-- 3 files changed, 264 insertions(+), 156 deletions(-) diff --git a/lib/internal/modules/esm/loader.js b/lib/internal/modules/esm/loader.js index 0ac3ed855a6278..6d147800131ce6 100644 --- a/lib/internal/modules/esm/loader.js +++ b/lib/internal/modules/esm/loader.js @@ -206,13 +206,11 @@ class ModuleLoader { } async eval(source, url, isEntryPoint = false) { - const evalInstance = (url) => { - return compileSourceTextModule(url, source, this); - }; const { ModuleJob } = require('internal/modules/esm/module_job'); + const wrap = compileSourceTextModule(url, source, this); const module = await onImport.tracePromise(async () => { const job = new ModuleJob( - this, url, undefined, evalInstance, false, false); + this, url, undefined, wrap, false, false); this.loadCache.set(url, undefined, job); const { module } = await job.run(isEntryPoint); return module; @@ -230,40 +228,49 @@ class ModuleLoader { } /** - * Get a (possibly still pending) module job from the cache, - * or create one and return its Promise. - * @param {string} specifier The string after `from` in an `import` statement, - * or the first parameter of an `import()` - * expression - * @param {string | undefined} parentURL The URL of the module importing this - * one, unless this is the Node.js entry - * point. - * @param {Record} importAttributes Validations for the - * module import. - * @returns {Promise} The (possibly pending) module job + * Get a (possibly not yet fully linked) module job from the cache, or create one and return its Promise. + * @param {string} specifier The module request of the module to be resolved. Typically, what's + * requested by `import ''` or `import('')`. + * @param {string} [parentURL] The URL of the module where the module request is initiated. + * It's undefined if it's from the root module. + * @param {ImportAttributes} importAttributes Attributes from the import statement or expression. + * @returns {Promise} importAttributes Validations for the - * module import. - * @param {string} [parentURL] The absolute URL of the module importing this - * one, unless this is the Node.js entry point - * @param {string} [format] The format hint possibly returned by the - * `resolve` hook - * @returns {Promise} The (possibly pending) module job + * Translate a loaded module source into a ModuleWrap. This is run synchronously, + * but the translator may return the ModuleWrap in a Promise. + * @param {stirng} url URL of the module to be translated. + * @param {string} format Format of the module to be translated. This is used to find + * matching translators. + * @param {ModuleSource} source Source of the module to be translated. + * @param {boolean} isMain Whether the module to be translated is the entry point. + * @returns {ModuleWrap | Promise} + */ + #translate(url, format, source, isMain) { + this.validateLoadResult(url, format); + const translator = getTranslators().get(format); + + if (!translator) { + throw new ERR_UNKNOWN_MODULE_FORMAT(format, url); + } + + return FunctionPrototypeCall(translator, this, url, source, isMain); + } + + /** + * Load a module and translate it into a ModuleWrap for require() in imported CJS. + * This is run synchronously, and the translator always return a ModuleWrap synchronously. + * @param {string} url URL of the module to be translated. + * @param {object} loadContext See {@link load} + * @param {boolean} isMain Whether the module to be translated is the entry point. + * @returns {ModuleWrap} */ - #createModuleJob(url, importAttributes, parentURL, format, sync) { - const callTranslator = ({ format: finalFormat, responseURL, source }, isMain) => { - const translator = getTranslators().get(finalFormat); + loadAndTranslateForRequireInImportedCJS(url, loadContext, isMain) { + const { format: formatFromLoad, source } = this.#loadSync(url, loadContext); + + if (formatFromLoad === 'wasm') { // require(wasm) is not supported. + throw new ERR_UNKNOWN_MODULE_FORMAT(formatFromLoad, url); + } - if (!translator) { - throw new ERR_UNKNOWN_MODULE_FORMAT(finalFormat, responseURL); + if (formatFromLoad === 'module' || formatFromLoad === 'module-typescript') { + if (!getOptionValue('--experimental-require-module')) { + throw new ERR_REQUIRE_ESM(url, true); } + } - return FunctionPrototypeCall(translator, this, responseURL, source, isMain); - }; - const context = { format, importAttributes }; + let finalFormat = formatFromLoad; + if (formatFromLoad === 'commonjs') { + finalFormat = 'require-commonjs'; + } + if (formatFromLoad === 'commonjs-typescript') { + finalFormat = 'require-commonjs-typescript'; + } - const moduleProvider = sync ? - (url, isMain) => callTranslator(this.loadSync(url, context), isMain) : - async (url, isMain) => callTranslator(await this.load(url, context), isMain); + const wrap = this.#translate(url, finalFormat, source, isMain); + assert(wrap instanceof ModuleWrap, `Translator used for require(${url}) should not be async`); + return wrap; + } + + /** + * Load a module and translate it into a ModuleWrap for ordinary imported ESM. + * This is run asynchronously. + * @param {string} url URL of the module to be translated. + * @param {object} loadContext See {@link load} + * @param {boolean} isMain Whether the module to be translated is the entry point. + * @returns {Promise} + */ + async loadAndTranslate(url, loadContext, isMain) { + const { format, source } = await this.load(url, loadContext); + return this.#translate(url, format, source, isMain); + } + + /** + * Load a module and translate it into a ModuleWrap, and create a ModuleJob from it. + * This runs synchronously. If isForRequireInImportedCJS is true, the module should be linked + * by the time this returns. Otherwise it may still have pending module requests. + * @param {string} url The URL that was resolved for this module. + * @param {ImportAttributes} importAttributes See {@link getModuleJobForImport} + * @param {string} [parentURL] See {@link getModuleJobForImport} + * @param {string} [format] The format hint possibly returned by the `resolve` hook + * @param {boolean} isForRequireInImportedCJS Whether this module job is created for require() + * in imported CJS. + * @returns {ModuleJobBase} The (possibly pending) module job + */ + #createModuleJob(url, importAttributes, parentURL, format, isForRequireInImportedCJS) { + const context = { format, importAttributes }; const isMain = parentURL === undefined; + let moduleOrModulePromise; + if (isForRequireInImportedCJS) { + moduleOrModulePromise = this.loadAndTranslateForRequireInImportedCJS(url, context, isMain); + } else { + moduleOrModulePromise = this.loadAndTranslate(url, context, isMain); + } + const inspectBrk = ( isMain && getOptionValue('--inspect-brk') @@ -457,10 +516,10 @@ class ModuleLoader { this, url, importAttributes, - moduleProvider, + moduleOrModulePromise, isMain, inspectBrk, - sync, + isForRequireInImportedCJS, ); this.loadCache.set(url, importAttributes.type, job); @@ -479,7 +538,7 @@ class ModuleLoader { */ async import(specifier, parentURL, importAttributes, isEntryPoint = false) { return onImport.tracePromise(async () => { - const moduleJob = await this.getModuleJob(specifier, parentURL, importAttributes); + const moduleJob = await this.getModuleJobForImport(specifier, parentURL, importAttributes); const { module } = await moduleJob.run(isEntryPoint); return module.getNamespace(); }, { @@ -504,39 +563,72 @@ class ModuleLoader { } /** - * Resolve the location of the module. - * @param {string} originalSpecifier The specified URL path of the module to - * be resolved. - * @param {string} [parentURL] The URL path of the module's parent. - * @param {ImportAttributes} importAttributes Attributes from the import - * statement or expression. - * @returns {{ format: string, url: URL['href'] }} + * Resolve a module request to a URL identifying the location of the module. Handles customization hooks, + * if any. + * @param {string|URL} specifier The module request of the module to be resolved. Typically, what's + * requested by `import specifier`, `import(specifier)` or + * `import.meta.resolve(specifier)`. + * @param {string} [parentURL] The URL of the module where the module request is initiated. + * It's undefined if it's from the root module. + * @param {ImportAttributes} importAttributes Attributes from the import statement or expression. + * @returns {Promise<{format: string, url: string}>} */ - resolve(originalSpecifier, parentURL, importAttributes) { - originalSpecifier = `${originalSpecifier}`; - if (this.#customizations) { - return this.#customizations.resolve(originalSpecifier, parentURL, importAttributes); + resolve(specifier, parentURL, importAttributes) { + specifier = `${specifier}`; + if (this.#customizations) { // Only has module.register hooks. + return this.#customizations.resolve(specifier, parentURL, importAttributes); } - const requestKey = this.#resolveCache.serializeKey(originalSpecifier, importAttributes); + return this.#cachedDefaultResolve(specifier, parentURL, importAttributes); + } + + /** + * Either return a cached resolution, or perform the default resolution which is synchronous, and + * cache the result. + * @param {string} specifier See {@link resolve}. + * @param {string} [parentURL] See {@link resolve}. + * @param {ImportAttributes} importAttributes See {@link resolve}. + * @returns {{ format: string, url: string }} + */ + #cachedDefaultResolve(specifier, parentURL, importAttributes) { + const requestKey = this.#resolveCache.serializeKey(specifier, importAttributes); const cachedResult = this.#resolveCache.get(requestKey, parentURL); if (cachedResult != null) { return cachedResult; } - const result = this.defaultResolve(originalSpecifier, parentURL, importAttributes); + const result = this.defaultResolve(specifier, parentURL, importAttributes); this.#resolveCache.set(requestKey, parentURL, result); return result; } /** - * Just like `resolve` except synchronous. This is here specifically to support - * `import.meta.resolve` which must happen synchronously. + * This is the default resolve step for future synchronous hooks, which incorporates asynchronous hooks + * from module.register() which are run in a blocking fashion for it to be synchronous. + * @param {string|URL} specifier See {@link resolveSync}. + * @param {{ parentURL?: string, importAttributes: ImportAttributes}} context See {@link resolveSync}. + * @returns {{ format: string, url: string }} */ - resolveSync(originalSpecifier, parentURL, importAttributes) { - originalSpecifier = `${originalSpecifier}`; + #resolveAndMaybeBlockOnLoaderThread(specifier, context) { if (this.#customizations) { - return this.#customizations.resolveSync(originalSpecifier, parentURL, importAttributes); + return this.#customizations.resolveSync(specifier, context.parentURL, context.importAttributes); } - return this.defaultResolve(originalSpecifier, parentURL, importAttributes); + return this.#cachedDefaultResolve(specifier, context.parentURL, context.importAttributes); + } + + /** + * Similar to {@link resolve}, but the results are always synchronously returned. If there are any + * asynchronous resolve hooks from module.register(), it will block until the results are returned + * from the loader thread for this to be synchornous. + * This is here to support `import.meta.resolve()`, `require()` in imported CJS, and + * future synchronous hooks. + * + * TODO(joyeecheung): consolidate the cache behavior and use this in require(esm). + * @param {string|URL} specifier See {@link resolve}. + * @param {string} [parentURL] See {@link resolve}. + * @param {ImportAttributes} [importAttributes] See {@link resolve}. + * @returns {{ format: string, url: string }} + */ + resolveSync(specifier, parentURL, importAttributes = { __proto__: null }) { + return this.#resolveAndMaybeBlockOnLoaderThread(`${specifier}`, { parentURL, importAttributes }); } /** @@ -558,41 +650,49 @@ class ModuleLoader { } /** - * Provide source that is understood by one of Node's translators. - * @param {URL['href']} url The URL/path of the module to be loaded - * @param {object} [context] Metadata about the module + * Provide source that is understood by one of Node's translators. Handles customization hooks, + * if any. + * @param {string} url The URL of the module to be loaded. + * @param {object} context Metadata about the module * @returns {Promise<{ format: ModuleFormat, source: ModuleSource }>} */ async load(url, context) { + if (this.#customizations) { + return this.#customizations.load(url, context); + } + defaultLoad ??= require('internal/modules/esm/load').defaultLoad; - const result = this.#customizations ? - await this.#customizations.load(url, context) : - await defaultLoad(url, context); - this.validateLoadResult(url, result?.format); - return result; + return defaultLoad(url, context); } - loadSync(url, context) { - defaultLoadSync ??= require('internal/modules/esm/load').defaultLoadSync; - - let result = this.#customizations ? - this.#customizations.loadSync(url, context) : - defaultLoadSync(url, context); - let format = result?.format; - if (format === 'module' || format === 'module-typescript') { - throw new ERR_REQUIRE_ESM(url, true); - } - if (format === 'commonjs') { - format = 'require-commonjs'; - result = { __proto__: result, format }; - } - if (format === 'commonjs-typescript') { - format = 'require-commonjs-typescript'; - result = { __proto__: result, format }; + /** + * This is the default load step for future synchronous hooks, which incorporates asynchronous hooks + * from module.register() which are run in a blocking fashion for it to be synchronous. + * @param {string} url See {@link load} + * @param {object} context See {@link load} + * @returns {{ format: ModuleFormat, source: ModuleSource }} + */ + #loadAndMaybeBlockOnLoaderThread(url, context) { + if (this.#customizations) { + return this.#customizations.loadSync(url, context); } + defaultLoadSync ??= require('internal/modules/esm/load').defaultLoadSync; + return defaultLoadSync(url, context); + } - this.validateLoadResult(url, format); - return result; + /** + * Similar to {@link load} but this is always run synchronously. If there are asynchronous hooks + * from module.register(), this blocks on the loader thread for it to return synchronously. + * + * This is here to support `require()` in imported CJS and future synchronous hooks. + * + * TODO(joyeecheung): consolidate the cache behavior and use this in require(esm). + * @param {string} url See {@link load} + * @param {object} [context] See {@link load} + * @returns {{ format: ModuleFormat, source: ModuleSource }} + */ + #loadSync(url, context) { + return this.#loadAndMaybeBlockOnLoaderThread(url, context); } validateLoadResult(url, format) { diff --git a/lib/internal/modules/esm/module_job.js b/lib/internal/modules/esm/module_job.js index 2f42909e0c6f82..62206fcc44c2d1 100644 --- a/lib/internal/modules/esm/module_job.js +++ b/lib/internal/modules/esm/module_job.js @@ -8,7 +8,6 @@ const { ObjectSetPrototypeOf, PromisePrototypeThen, PromiseResolve, - ReflectApply, RegExpPrototypeExec, RegExpPrototypeSymbolReplace, SafePromiseAllReturnArrayLike, @@ -56,13 +55,12 @@ const isCommonJSGlobalLikeNotDefinedError = (errorMessage) => ); class ModuleJobBase { - constructor(url, importAttributes, moduleWrapMaybePromise, isMain, inspectBrk) { + constructor(url, importAttributes, isMain, inspectBrk) { this.importAttributes = importAttributes; this.isMain = isMain; this.inspectBrk = inspectBrk; this.url = url; - this.module = moduleWrapMaybePromise; } } @@ -70,21 +68,29 @@ class ModuleJobBase { * its dependencies, over time. */ class ModuleJob extends ModuleJobBase { #loader = null; - // `loader` is the Loader instance used for loading dependencies. + + /** + * @param {ModuleLoader} loader The ESM loader. + * @param {string} url URL of the module to be wrapped in ModuleJob. + * @param {ImportAttributes} importAttributes Import attributes from the import statement. + * @param {ModuleWrap|Promise} moduleOrModulePromise Translated ModuleWrap for the module. + * @param {boolean} isMain Whether the module is the entry point. + * @param {boolean} inspectBrk Whether this module should be evaluated with the + * first line paused in the debugger (because --inspect-brk is passed). + * @param {boolean} isForRequireInImportedCJS Whether this is created for require() in imported CJS. + */ constructor(loader, url, importAttributes = { __proto__: null }, - moduleProvider, isMain, inspectBrk, sync = false) { - const modulePromise = ReflectApply(moduleProvider, loader, [url, isMain]); - super(url, importAttributes, modulePromise, isMain, inspectBrk); + moduleOrModulePromise, isMain, inspectBrk, isForRequireInImportedCJS = false) { + super(url, importAttributes, isMain, inspectBrk); this.#loader = loader; - // Expose the promise to the ModuleWrap directly for linking below. - // `this.module` is also filled in below. - this.modulePromise = modulePromise; - if (sync) { - this.module = this.modulePromise; + // Expose the promise to the ModuleWrap directly for linking below. + if (isForRequireInImportedCJS) { + this.module = moduleOrModulePromise; + assert(this.module instanceof ModuleWrap); this.modulePromise = PromiseResolve(this.module); } else { - this.modulePromise = PromiseResolve(this.modulePromise); + this.modulePromise = moduleOrModulePromise; } // Promise for the list of all dependencyJobs. @@ -123,7 +129,7 @@ class ModuleJob extends ModuleJobBase { for (let idx = 0; idx < moduleRequests.length; idx++) { const { specifier, attributes } = moduleRequests[idx]; - const dependencyJobPromise = this.#loader.getModuleJob( + const dependencyJobPromise = this.#loader.getModuleJobForImport( specifier, this.url, attributes, ); const modulePromise = PromisePrototypeThen(dependencyJobPromise, (job) => { @@ -288,14 +294,33 @@ class ModuleJob extends ModuleJobBase { } } -// This is a fully synchronous job and does not spawn additional threads in any way. -// All the steps are ensured to be synchronous and it throws on instantiating -// an asynchronous graph. +/** + * This is a fully synchronous job and does not spawn additional threads in any way. + * All the steps are ensured to be synchronous and it throws on instantiating + * an asynchronous graph. It also disallows CJS <-> ESM cycles. + * + * This is used for ES modules loaded via require(esm). Modules loaded by require() in + * imported CJS are handled by ModuleJob with the isForRequireInImportedCJS set to true instead. + * The two currently have different caching behaviors. + * TODO(joyeecheung): consolidate this with the isForRequireInImportedCJS variant of ModuleJob. + */ class ModuleJobSync extends ModuleJobBase { #loader = null; + + /** + * @param {ModuleLoader} loader The ESM loader. + * @param {string} url URL of the module to be wrapped in ModuleJob. + * @param {ImportAttributes} importAttributes Import attributes from the import statement. + * @param {ModuleWrap} moduleWrap Translated ModuleWrap for the module. + * @param {boolean} isMain Whether the module is the entry point. + * @param {boolean} inspectBrk Whether this module should be evaluated with the + * first line paused in the debugger (because --inspect-brk is passed). + */ constructor(loader, url, importAttributes, moduleWrap, isMain, inspectBrk) { - super(url, importAttributes, moduleWrap, isMain, inspectBrk, true); + super(url, importAttributes, isMain, inspectBrk, true); + this.#loader = loader; + this.module = moduleWrap; assert(this.module instanceof ModuleWrap); // Store itself into the cache first before linking in case there are circular diff --git a/lib/internal/modules/esm/translators.js b/lib/internal/modules/esm/translators.js index 5901319805d7a0..6a5804e656adee 100644 --- a/lib/internal/modules/esm/translators.js +++ b/lib/internal/modules/esm/translators.js @@ -68,28 +68,11 @@ function getSource(url) { /** @type {import('deps/cjs-module-lexer/lexer.js').parse} */ let cjsParse; /** - * Initializes the CommonJS module lexer parser. - * If WebAssembly is available, it uses the optimized version from the dist folder. - * Otherwise, it falls back to the JavaScript version from the lexer folder. + * Initializes the CommonJS module lexer parser using the JavaScript version. + * TODO(joyeecheung): Use `require('internal/deps/cjs-module-lexer/dist/lexer').initSync()` + * when cjs-module-lexer 1.4.0 is rolled in. */ -async function initCJSParse() { - if (typeof WebAssembly === 'undefined') { - initCJSParseSync(); - } else { - const { parse, init } = - require('internal/deps/cjs-module-lexer/dist/lexer'); - try { - await init(); - cjsParse = parse; - } catch { - initCJSParseSync(); - } - } -} - function initCJSParseSync() { - // TODO(joyeecheung): implement a binding that directly compiles using - // v8::WasmModuleObject::Compile() synchronously. if (cjsParse === undefined) { cjsParse = require('internal/deps/cjs-module-lexer/lexer').parse; } @@ -159,7 +142,7 @@ function loadCJSModule(module, source, url, filename, isMain) { } specifier = `${pathToFileURL(path)}`; } - const job = cascadedLoader.getModuleJobSync(specifier, url, importAttributes); + const job = cascadedLoader.getModuleJobForRequireInImportedCJS(specifier, url, importAttributes); job.runSync(); return cjsCache.get(job.url).exports; }; @@ -250,6 +233,7 @@ translators.set('commonjs-sync', function requireCommonJS(url, source, isMain) { // Handle CommonJS modules referenced by `require` calls. // This translator function must be sync, as `require` is sync. translators.set('require-commonjs', (url, source, isMain) => { + initCJSParseSync(); assert(cjsParse); return createCJSModuleWrap(url, source); @@ -266,10 +250,9 @@ translators.set('require-commonjs-typescript', (url, source, isMain) => { // Handle CommonJS modules referenced by `import` statements or expressions, // or as the initial entry point when the ESM loader handles a CommonJS entry. -translators.set('commonjs', async function commonjsStrategy(url, source, - isMain) { +translators.set('commonjs', function commonjsStrategy(url, source, isMain) { if (!cjsParse) { - await initCJSParse(); + initCJSParseSync(); } // For backward-compatibility, it's possible to return a nullish value for @@ -287,7 +270,6 @@ translators.set('commonjs', async function commonjsStrategy(url, source, // Continue regardless of error. } return createCJSModuleWrap(url, source, isMain, cjsLoader); - }); /** @@ -448,8 +430,9 @@ translators.set('wasm', async function(url, source) { let compiled; try { - // TODO(joyeecheung): implement a binding that directly compiles using - // v8::WasmModuleObject::Compile() synchronously. + // TODO(joyeecheung): implement a translator that just uses + // compiled = new WebAssembly.Module(source) to compile it + // synchronously. compiled = await WebAssembly.compile(source); } catch (err) { err.message = errPath(url) + ': ' + err.message;