From 7aa69a7c51b31ab1adb37b9d5cab682b5da6c715 Mon Sep 17 00:00:00 2001 From: Rico Huijbers Date: Fri, 7 Jan 2022 12:29:14 +0100 Subject: [PATCH] fix(rosetta): `transliterate` tries to recompile samples from tablets (#3324) In #3262, the performance of `transliterate` was improved by running an `extract` before translating, because `extract` has the facilities to do parallel translate, and can read from a cache. However, another thing `extract` does is *discard translations from the cache* if they are considered "dirty" for some reason. This reason can be: the current source is different than the cached source, the translation mechanism has changed, compilation didn't succeed last time, or the types referenced in the example have changed. This is actually not desirable for `transliterate`, which wants to do a last-ditch effort to translate whatever is necessary to translate, but should really take what it can from the cache if there's a cached translation available. Add a flag to allow reading dirty translations from the cache. Also adds the `rosetta coverage` command, which I used to find out why the Construct Hub performance on the newest `aws-cdk-lib` version is still poor. --- By submitting this pull request, I confirm that my contribution is made under the terms of the [Apache 2.0 license]. [Apache 2.0 license]: https://www.apache.org/licenses/LICENSE-2.0 --- packages/jsii-rosetta/bin/jsii-rosetta.ts | 16 +++ .../jsii-rosetta/lib/commands/coverage.ts | 35 +++++ packages/jsii-rosetta/lib/commands/extract.ts | 8 ++ .../lib/commands/transliterate.ts | 1 + .../jsii-rosetta/lib/rosetta-translator.ts | 136 ++++++++++++++---- .../test/commands/transliterate.test.ts | 50 ++++++- 6 files changed, 219 insertions(+), 27 deletions(-) create mode 100644 packages/jsii-rosetta/lib/commands/coverage.ts diff --git a/packages/jsii-rosetta/bin/jsii-rosetta.ts b/packages/jsii-rosetta/bin/jsii-rosetta.ts index 8f05aed94c..39df072af2 100644 --- a/packages/jsii-rosetta/bin/jsii-rosetta.ts +++ b/packages/jsii-rosetta/bin/jsii-rosetta.ts @@ -6,6 +6,7 @@ import * as yargs from 'yargs'; import { TranslateResult, translateTypeScript, RosettaDiagnostic } from '../lib'; import { translateMarkdown } from '../lib/commands/convert'; +import { checkCoverage } from '../lib/commands/coverage'; import { extractAndInfuse, extractSnippets, ExtractOptions } from '../lib/commands/extract'; import { infuse, DEFAULT_INFUSION_RESULTS_NAME } from '../lib/commands/infuse'; import { readTablet } from '../lib/commands/read'; @@ -348,6 +349,21 @@ function main() { }); }), ) + .command( + 'coverage [ASSEMBLY..]', + 'Check the translation coverage of implicit tablets for the given assemblies', + (command) => + command.positional('ASSEMBLY', { + type: 'string', + string: true, + default: ['.'], + describe: 'Assembly or directory to search', + }), + wrapHandler(async (args) => { + const absAssemblies = (args.ASSEMBLY.length > 0 ? args.ASSEMBLY : ['.']).map((x) => path.resolve(x)); + await checkCoverage(absAssemblies); + }), + ) .command( 'read [KEY] [LANGUAGE]', 'Display snippets in a language tablet file', diff --git a/packages/jsii-rosetta/lib/commands/coverage.ts b/packages/jsii-rosetta/lib/commands/coverage.ts new file mode 100644 index 0000000000..d7fb57b0d9 --- /dev/null +++ b/packages/jsii-rosetta/lib/commands/coverage.ts @@ -0,0 +1,35 @@ +import { loadAssemblies, allTypeScriptSnippets, loadAllDefaultTablets } from '../jsii/assemblies'; +import * as logging from '../logging'; +import { RosettaTranslator } from '../rosetta-translator'; +import { formatLocation } from '../snippet'; + +export async function checkCoverage(assemblyLocations: readonly string[]): Promise { + logging.info(`Loading ${assemblyLocations.length} assemblies`); + const assemblies = await loadAssemblies(assemblyLocations, false); + + const snippets = Array.from(allTypeScriptSnippets(assemblies, true)); + + const translator = new RosettaTranslator({ + assemblies: assemblies.map((a) => a.assembly), + allowDirtyTranslations: true, + }); + translator.addTabletsToCache(...Object.values(await loadAllDefaultTablets(assemblies))); + + process.stdout.write(`- ${snippets.length} total snippets.\n`); + process.stdout.write(`- ${translator.cache.count} translations in cache.\n`); + process.stdout.write('\n'); + + const results = translator.readFromCache(snippets, true, true); + process.stdout.write(`- ${results.translations.length - results.dirtyCount} successful cache hits.\n`); + process.stdout.write(` ${results.infusedCount} infused.\n`); + process.stdout.write(`- ${results.dirtyCount} translations in cache but dirty (ok for pacmak, transliterate)\n`); + process.stdout.write(` ${results.dirtySourceCount} sources have changed.\n`); + process.stdout.write(` ${results.dirtyTranslatorCount} translator has changed.\n`); + process.stdout.write(` ${results.dirtyTypesCount} types have changed.\n`); + process.stdout.write(` ${results.dirtyDidntCompile} did not successfully compile.\n`); + process.stdout.write(`- ${results.remaining.length} snippets untranslated.\n`); + + for (const remaining of results.remaining) { + process.stdout.write(` ${formatLocation(remaining.location)}\n`); + } +} diff --git a/packages/jsii-rosetta/lib/commands/extract.ts b/packages/jsii-rosetta/lib/commands/extract.ts index d878e83a85..2d0105af59 100644 --- a/packages/jsii-rosetta/lib/commands/extract.ts +++ b/packages/jsii-rosetta/lib/commands/extract.ts @@ -58,6 +58,13 @@ export interface ExtractOptions { * @default false */ readonly loose?: boolean; + + /** + * Accept dirty translations from the cache + * + * @default false + */ + readonly allowDirtyTranslations?: boolean; } export async function extractAndInfuse(assemblyLocations: string[], options: ExtractOptions): Promise { @@ -96,6 +103,7 @@ export async function extractSnippets( const translatorOptions: RosettaTranslatorOptions = { includeCompilerDiagnostics: options.includeCompilerDiagnostics ?? false, assemblies: assemblies.map((a) => a.assembly), + allowDirtyTranslations: options.allowDirtyTranslations, }; const translator = options.translatorFactory diff --git a/packages/jsii-rosetta/lib/commands/transliterate.ts b/packages/jsii-rosetta/lib/commands/transliterate.ts index 86c2f334af..ea6904a683 100644 --- a/packages/jsii-rosetta/lib/commands/transliterate.ts +++ b/packages/jsii-rosetta/lib/commands/transliterate.ts @@ -60,6 +60,7 @@ export async function transliterateAssembly( loose: options.loose, cacheFromFile: options.tablet, writeToImplicitTablets: false, + allowDirtyTranslations: true, }); // Now do a regular "tablet reader" cycle, expecting everything to be translated already, diff --git a/packages/jsii-rosetta/lib/rosetta-translator.ts b/packages/jsii-rosetta/lib/rosetta-translator.ts index 1a00e463c6..047db93de0 100644 --- a/packages/jsii-rosetta/lib/rosetta-translator.ts +++ b/packages/jsii-rosetta/lib/rosetta-translator.ts @@ -33,6 +33,13 @@ export interface RosettaTranslatorOptions { * @default false */ readonly includeCompilerDiagnostics?: boolean; + + /** + * Allow reading dirty translations from cache + * + * @default false + */ + readonly allowDirtyTranslations?: boolean; } /** @@ -49,13 +56,16 @@ export class RosettaTranslator { */ public readonly tablet = new LanguageTablet(); + public readonly cache = new LanguageTablet(); + private readonly fingerprinter: TypeFingerprinter; - private readonly cache = new LanguageTablet(); private readonly includeCompilerDiagnostics: boolean; + private readonly allowDirtyTranslations: boolean; public constructor(options: RosettaTranslatorOptions = {}) { this.fingerprinter = new TypeFingerprinter(options?.assemblies ?? []); this.includeCompilerDiagnostics = options.includeCompilerDiagnostics ?? false; + this.allowDirtyTranslations = options.allowDirtyTranslations ?? false; } /** @@ -90,25 +100,58 @@ export class RosettaTranslator { * Will remove the cached snippets from the input array. */ public readFromCache(snippets: TypeScriptSnippet[], addToTablet = true, compiledOnly = false): ReadFromCacheResults { - const remaining = [...snippets]; const translations = new Array(); + const remaining = new Array(); + + let infusedCount = 0; + let dirtyCount = 0; + let dirtySourceCount = 0; + let dirtyTypesCount = 0; + let dirtyTranslatorCount = 0; + let dirtyDidntCompile = 0; - let i = 0; - while (i < remaining.length) { - const fromCache = tryReadFromCache(remaining[i], this.cache, this.fingerprinter); - // If compiledOnly is set, do not consider cached snippets that do not compile - if (fromCache && (!compiledOnly || fromCache.snippet.didCompile)) { - if (addToTablet) { - this.tablet.addSnippet(fromCache); - } - remaining.splice(i, 1); - translations.push(fromCache); - } else { - i += 1; + for (const snippet of snippets) { + const fromCache = tryReadFromCache(snippet, this.cache, this.fingerprinter, compiledOnly); + switch (fromCache.type) { + case 'hit': + if (addToTablet) { + this.tablet.addSnippet(fromCache.snippet); + } + translations.push(fromCache.snippet); + + infusedCount += fromCache.infused ? 1 : 0; + break; + + case 'dirty': + dirtyCount += 1; + dirtySourceCount += fromCache.dirtySource ? 1 : 0; + dirtyTranslatorCount += fromCache.dirtyTranslator ? 1 : 0; + dirtyTypesCount += fromCache.dirtyTypes ? 1 : 0; + dirtyDidntCompile += fromCache.dirtyDidntCompile ? 1 : 0; + + if (this.allowDirtyTranslations) { + translations.push(fromCache.translation); + } else { + remaining.push(snippet); + } + break; + + case 'miss': + remaining.push(snippet); + break; } } - return { translations, remaining }; + return { + translations, + remaining, + infusedCount, + dirtyCount, + dirtySourceCount, + dirtyTranslatorCount, + dirtyTypesCount, + dirtyDidntCompile, + }; } public async translateAll(snippets: TypeScriptSnippet[], addToTablet = true): Promise { @@ -146,32 +189,75 @@ export class RosettaTranslator { * doesn't really make a lot of difference. So, for simplification's sake, * we'll regen all translations if there's at least one that's outdated. */ -function tryReadFromCache(sourceSnippet: TypeScriptSnippet, cache: LanguageTablet, fingerprinter: TypeFingerprinter) { +function tryReadFromCache( + sourceSnippet: TypeScriptSnippet, + cache: LanguageTablet, + fingerprinter: TypeFingerprinter, + compiledOnly: boolean, +): CacheHit { const fromCache = cache.tryGetSnippet(snippetKey(sourceSnippet)); + if (!fromCache) { + return { type: 'miss' }; + } + // infused snippets won't pass the full source check or the fingerprinter // but there is no reason to try to recompile it, so return cached snippet // if there exists one. if (isInfused(sourceSnippet)) { - return fromCache; + return { type: 'hit', snippet: fromCache, infused: true }; } - const cacheable = - fromCache && - completeSource(sourceSnippet) === fromCache.snippet.fullSource && - Object.entries(TARGET_LANGUAGES).every( - ([lang, translator]) => fromCache.snippet.translations?.[lang]?.version === translator.version, - ) && - fingerprinter.fingerprintAll(fromCache.fqnsReferenced()) === fromCache.snippet.fqnsFingerprint; + const dirtySource = completeSource(sourceSnippet) !== fromCache.snippet.fullSource; + const dirtyTranslator = !Object.entries(TARGET_LANGUAGES).every( + ([lang, translator]) => fromCache.snippet.translations?.[lang]?.version === translator.version, + ); + const dirtyTypes = fingerprinter.fingerprintAll(fromCache.fqnsReferenced()) !== fromCache.snippet.fqnsFingerprint; + const dirtyDidntCompile = compiledOnly && !fromCache.snippet.didCompile; - return cacheable ? fromCache : undefined; + if (dirtySource || dirtyTranslator || dirtyTypes || dirtyDidntCompile) { + return { type: 'dirty', translation: fromCache, dirtySource, dirtyTranslator, dirtyTypes, dirtyDidntCompile }; + } + return { type: 'hit', snippet: fromCache, infused: false }; } +export type CacheHit = + | { readonly type: 'miss' } + | { readonly type: 'hit'; readonly snippet: TranslatedSnippet; readonly infused: boolean } + | { + readonly type: 'dirty'; + readonly translation: TranslatedSnippet; + readonly dirtySource: boolean; + readonly dirtyTranslator: boolean; + readonly dirtyTypes: boolean; + readonly dirtyDidntCompile: boolean; + }; + function isInfused(snippet: TypeScriptSnippet) { return snippet.parameters?.infused !== undefined; } export interface ReadFromCacheResults { + /** + * Successful translations + */ readonly translations: TranslatedSnippet[]; + + /** + * Successful but dirty hits + */ readonly remaining: TypeScriptSnippet[]; + + /** + * How many successfully hit translations were infused + */ + readonly infusedCount: number; + + readonly dirtyCount: number; + + // Counts for dirtiness (a single snippet may be dirty for more than one reason) + readonly dirtySourceCount: number; + readonly dirtyTranslatorCount: number; + readonly dirtyTypesCount: number; + readonly dirtyDidntCompile: number; } diff --git a/packages/jsii-rosetta/test/commands/transliterate.test.ts b/packages/jsii-rosetta/test/commands/transliterate.test.ts index 9a8f36e663..c5693523f5 100644 --- a/packages/jsii-rosetta/test/commands/transliterate.test.ts +++ b/packages/jsii-rosetta/test/commands/transliterate.test.ts @@ -1,11 +1,13 @@ -import { SPEC_FILE_NAME } from '@jsii/spec'; +import { Assembly, SPEC_FILE_NAME } from '@jsii/spec'; import * as fs from 'fs-extra'; import * as jsii from 'jsii'; import * as path from 'path'; +import { extractSnippets } from '../../lib/commands/extract'; import { transliterateAssembly } from '../../lib/commands/transliterate'; import { TargetLanguage } from '../../lib/languages/target-language'; -import { withTemporaryDirectory } from '../testutil'; +import { TabletSchema } from '../../lib/tablets/schema'; +import { withTemporaryDirectory, TestJsiiModule, DUMMY_JSII_CONFIG } from '../testutil'; jest.setTimeout(60_000); @@ -1345,3 +1347,47 @@ export class ClassName implements IInterface { }), ).resolves.not.toThrow(); })); + +test('will read translations from cache even if they are dirty', async () => { + const infusedAssembly = await TestJsiiModule.fromSource( + { + 'index.ts': ` + /** + * ClassA + * + * @example x + */ + export class ClassA { + public someMethod() { + } + } + `, + }, + { + name: 'my_assembly', + jsii: DUMMY_JSII_CONFIG, + }, + ); + try { + // Run an extract + await extractSnippets([infusedAssembly.moduleDirectory]); + + // Mess up the extracted source file + const schema: TabletSchema = await fs.readJson(path.join(infusedAssembly.moduleDirectory, '.jsii.tabl.json')); + for (const snippet of Object.values(schema.snippets)) { + snippet.translations[TargetLanguage.PYTHON] = { + source: 'oops', + version: '999', + }; + } + await fs.writeJson(path.join(infusedAssembly.moduleDirectory, '.jsii.tabl.json'), schema); + + // Run a transliterate, should have used the translation from the cache even though the version is wrong + await transliterateAssembly([infusedAssembly.moduleDirectory], [TargetLanguage.PYTHON]); + + const translated: Assembly = await fs.readJson(path.join(infusedAssembly.moduleDirectory, '.jsii.python')); + expect(translated.types?.['my_assembly.ClassA'].docs?.example).toEqual('oops'); + } finally { + await infusedAssembly.cleanup(); + } +});