Skip to content

Commit

Permalink
fix(rosetta): transliterate tries to recompile samples from tablets (
Browse files Browse the repository at this point in the history
…#3324)

In #3262, the performance of `transliterate` was improved by running an
`extract` before translating, because `extract` has the facilities to do
parallel translate, and can read from a cache.

However, another thing `extract` does is *discard translations from the
cache* if they are considered "dirty" for some reason. This reason
can be: the current source is different than the cached source, the
translation mechanism has changed, compilation didn't succeed last time,
or the types referenced in the example have changed.

This is actually not desirable for `transliterate`, which wants to
do a last-ditch effort to translate whatever is necessary to translate,
but should really take what it can from the cache if there's a cached
translation available.

Add a flag to allow reading dirty translations from the cache.

Also adds the `rosetta coverage` command, which I used to find out why
the Construct Hub performance on the newest `aws-cdk-lib` version is
still poor.

---

By submitting this pull request, I confirm that my contribution is made under the terms of the [Apache 2.0 license].

[Apache 2.0 license]: https://www.apache.org/licenses/LICENSE-2.0
  • Loading branch information
rix0rrr authored Jan 7, 2022
1 parent 9506983 commit 7aa69a7
Show file tree
Hide file tree
Showing 6 changed files with 219 additions and 27 deletions.
16 changes: 16 additions & 0 deletions packages/jsii-rosetta/bin/jsii-rosetta.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import * as yargs from 'yargs';

import { TranslateResult, translateTypeScript, RosettaDiagnostic } from '../lib';
import { translateMarkdown } from '../lib/commands/convert';
import { checkCoverage } from '../lib/commands/coverage';
import { extractAndInfuse, extractSnippets, ExtractOptions } from '../lib/commands/extract';
import { infuse, DEFAULT_INFUSION_RESULTS_NAME } from '../lib/commands/infuse';
import { readTablet } from '../lib/commands/read';
Expand Down Expand Up @@ -348,6 +349,21 @@ function main() {
});
}),
)
.command(
'coverage [ASSEMBLY..]',
'Check the translation coverage of implicit tablets for the given assemblies',
(command) =>
command.positional('ASSEMBLY', {
type: 'string',
string: true,
default: ['.'],
describe: 'Assembly or directory to search',
}),
wrapHandler(async (args) => {
const absAssemblies = (args.ASSEMBLY.length > 0 ? args.ASSEMBLY : ['.']).map((x) => path.resolve(x));
await checkCoverage(absAssemblies);
}),
)
.command(
'read <TABLET> [KEY] [LANGUAGE]',
'Display snippets in a language tablet file',
Expand Down
35 changes: 35 additions & 0 deletions packages/jsii-rosetta/lib/commands/coverage.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import { loadAssemblies, allTypeScriptSnippets, loadAllDefaultTablets } from '../jsii/assemblies';
import * as logging from '../logging';
import { RosettaTranslator } from '../rosetta-translator';
import { formatLocation } from '../snippet';

export async function checkCoverage(assemblyLocations: readonly string[]): Promise<void> {
logging.info(`Loading ${assemblyLocations.length} assemblies`);
const assemblies = await loadAssemblies(assemblyLocations, false);

const snippets = Array.from(allTypeScriptSnippets(assemblies, true));

const translator = new RosettaTranslator({
assemblies: assemblies.map((a) => a.assembly),
allowDirtyTranslations: true,
});
translator.addTabletsToCache(...Object.values(await loadAllDefaultTablets(assemblies)));

process.stdout.write(`- ${snippets.length} total snippets.\n`);
process.stdout.write(`- ${translator.cache.count} translations in cache.\n`);
process.stdout.write('\n');

const results = translator.readFromCache(snippets, true, true);
process.stdout.write(`- ${results.translations.length - results.dirtyCount} successful cache hits.\n`);
process.stdout.write(` ${results.infusedCount} infused.\n`);
process.stdout.write(`- ${results.dirtyCount} translations in cache but dirty (ok for pacmak, transliterate)\n`);
process.stdout.write(` ${results.dirtySourceCount} sources have changed.\n`);
process.stdout.write(` ${results.dirtyTranslatorCount} translator has changed.\n`);
process.stdout.write(` ${results.dirtyTypesCount} types have changed.\n`);
process.stdout.write(` ${results.dirtyDidntCompile} did not successfully compile.\n`);
process.stdout.write(`- ${results.remaining.length} snippets untranslated.\n`);

for (const remaining of results.remaining) {
process.stdout.write(` ${formatLocation(remaining.location)}\n`);
}
}
8 changes: 8 additions & 0 deletions packages/jsii-rosetta/lib/commands/extract.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,13 @@ export interface ExtractOptions {
* @default false
*/
readonly loose?: boolean;

/**
* Accept dirty translations from the cache
*
* @default false
*/
readonly allowDirtyTranslations?: boolean;
}

export async function extractAndInfuse(assemblyLocations: string[], options: ExtractOptions): Promise<ExtractResult> {
Expand Down Expand Up @@ -96,6 +103,7 @@ export async function extractSnippets(
const translatorOptions: RosettaTranslatorOptions = {
includeCompilerDiagnostics: options.includeCompilerDiagnostics ?? false,
assemblies: assemblies.map((a) => a.assembly),
allowDirtyTranslations: options.allowDirtyTranslations,
};

const translator = options.translatorFactory
Expand Down
1 change: 1 addition & 0 deletions packages/jsii-rosetta/lib/commands/transliterate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ export async function transliterateAssembly(
loose: options.loose,
cacheFromFile: options.tablet,
writeToImplicitTablets: false,
allowDirtyTranslations: true,
});

// Now do a regular "tablet reader" cycle, expecting everything to be translated already,
Expand Down
136 changes: 111 additions & 25 deletions packages/jsii-rosetta/lib/rosetta-translator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ export interface RosettaTranslatorOptions {
* @default false
*/
readonly includeCompilerDiagnostics?: boolean;

/**
* Allow reading dirty translations from cache
*
* @default false
*/
readonly allowDirtyTranslations?: boolean;
}

/**
Expand All @@ -49,13 +56,16 @@ export class RosettaTranslator {
*/
public readonly tablet = new LanguageTablet();

public readonly cache = new LanguageTablet();

private readonly fingerprinter: TypeFingerprinter;
private readonly cache = new LanguageTablet();
private readonly includeCompilerDiagnostics: boolean;
private readonly allowDirtyTranslations: boolean;

public constructor(options: RosettaTranslatorOptions = {}) {
this.fingerprinter = new TypeFingerprinter(options?.assemblies ?? []);
this.includeCompilerDiagnostics = options.includeCompilerDiagnostics ?? false;
this.allowDirtyTranslations = options.allowDirtyTranslations ?? false;
}

/**
Expand Down Expand Up @@ -90,25 +100,58 @@ export class RosettaTranslator {
* Will remove the cached snippets from the input array.
*/
public readFromCache(snippets: TypeScriptSnippet[], addToTablet = true, compiledOnly = false): ReadFromCacheResults {
const remaining = [...snippets];
const translations = new Array<TranslatedSnippet>();
const remaining = new Array<TypeScriptSnippet>();

let infusedCount = 0;
let dirtyCount = 0;
let dirtySourceCount = 0;
let dirtyTypesCount = 0;
let dirtyTranslatorCount = 0;
let dirtyDidntCompile = 0;

let i = 0;
while (i < remaining.length) {
const fromCache = tryReadFromCache(remaining[i], this.cache, this.fingerprinter);
// If compiledOnly is set, do not consider cached snippets that do not compile
if (fromCache && (!compiledOnly || fromCache.snippet.didCompile)) {
if (addToTablet) {
this.tablet.addSnippet(fromCache);
}
remaining.splice(i, 1);
translations.push(fromCache);
} else {
i += 1;
for (const snippet of snippets) {
const fromCache = tryReadFromCache(snippet, this.cache, this.fingerprinter, compiledOnly);
switch (fromCache.type) {
case 'hit':
if (addToTablet) {
this.tablet.addSnippet(fromCache.snippet);
}
translations.push(fromCache.snippet);

infusedCount += fromCache.infused ? 1 : 0;
break;

case 'dirty':
dirtyCount += 1;
dirtySourceCount += fromCache.dirtySource ? 1 : 0;
dirtyTranslatorCount += fromCache.dirtyTranslator ? 1 : 0;
dirtyTypesCount += fromCache.dirtyTypes ? 1 : 0;
dirtyDidntCompile += fromCache.dirtyDidntCompile ? 1 : 0;

if (this.allowDirtyTranslations) {
translations.push(fromCache.translation);
} else {
remaining.push(snippet);
}
break;

case 'miss':
remaining.push(snippet);
break;
}
}

return { translations, remaining };
return {
translations,
remaining,
infusedCount,
dirtyCount,
dirtySourceCount,
dirtyTranslatorCount,
dirtyTypesCount,
dirtyDidntCompile,
};
}

public async translateAll(snippets: TypeScriptSnippet[], addToTablet = true): Promise<TranslateAllResult> {
Expand Down Expand Up @@ -146,32 +189,75 @@ export class RosettaTranslator {
* doesn't really make a lot of difference. So, for simplification's sake,
* we'll regen all translations if there's at least one that's outdated.
*/
function tryReadFromCache(sourceSnippet: TypeScriptSnippet, cache: LanguageTablet, fingerprinter: TypeFingerprinter) {
function tryReadFromCache(
sourceSnippet: TypeScriptSnippet,
cache: LanguageTablet,
fingerprinter: TypeFingerprinter,
compiledOnly: boolean,
): CacheHit {
const fromCache = cache.tryGetSnippet(snippetKey(sourceSnippet));

if (!fromCache) {
return { type: 'miss' };
}

// infused snippets won't pass the full source check or the fingerprinter
// but there is no reason to try to recompile it, so return cached snippet
// if there exists one.
if (isInfused(sourceSnippet)) {
return fromCache;
return { type: 'hit', snippet: fromCache, infused: true };
}

const cacheable =
fromCache &&
completeSource(sourceSnippet) === fromCache.snippet.fullSource &&
Object.entries(TARGET_LANGUAGES).every(
([lang, translator]) => fromCache.snippet.translations?.[lang]?.version === translator.version,
) &&
fingerprinter.fingerprintAll(fromCache.fqnsReferenced()) === fromCache.snippet.fqnsFingerprint;
const dirtySource = completeSource(sourceSnippet) !== fromCache.snippet.fullSource;
const dirtyTranslator = !Object.entries(TARGET_LANGUAGES).every(
([lang, translator]) => fromCache.snippet.translations?.[lang]?.version === translator.version,
);
const dirtyTypes = fingerprinter.fingerprintAll(fromCache.fqnsReferenced()) !== fromCache.snippet.fqnsFingerprint;
const dirtyDidntCompile = compiledOnly && !fromCache.snippet.didCompile;

return cacheable ? fromCache : undefined;
if (dirtySource || dirtyTranslator || dirtyTypes || dirtyDidntCompile) {
return { type: 'dirty', translation: fromCache, dirtySource, dirtyTranslator, dirtyTypes, dirtyDidntCompile };
}
return { type: 'hit', snippet: fromCache, infused: false };
}

export type CacheHit =
| { readonly type: 'miss' }
| { readonly type: 'hit'; readonly snippet: TranslatedSnippet; readonly infused: boolean }
| {
readonly type: 'dirty';
readonly translation: TranslatedSnippet;
readonly dirtySource: boolean;
readonly dirtyTranslator: boolean;
readonly dirtyTypes: boolean;
readonly dirtyDidntCompile: boolean;
};

function isInfused(snippet: TypeScriptSnippet) {
return snippet.parameters?.infused !== undefined;
}

export interface ReadFromCacheResults {
/**
* Successful translations
*/
readonly translations: TranslatedSnippet[];

/**
* Successful but dirty hits
*/
readonly remaining: TypeScriptSnippet[];

/**
* How many successfully hit translations were infused
*/
readonly infusedCount: number;

readonly dirtyCount: number;

// Counts for dirtiness (a single snippet may be dirty for more than one reason)
readonly dirtySourceCount: number;
readonly dirtyTranslatorCount: number;
readonly dirtyTypesCount: number;
readonly dirtyDidntCompile: number;
}
50 changes: 48 additions & 2 deletions packages/jsii-rosetta/test/commands/transliterate.test.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import { SPEC_FILE_NAME } from '@jsii/spec';
import { Assembly, SPEC_FILE_NAME } from '@jsii/spec';
import * as fs from 'fs-extra';
import * as jsii from 'jsii';
import * as path from 'path';

import { extractSnippets } from '../../lib/commands/extract';
import { transliterateAssembly } from '../../lib/commands/transliterate';
import { TargetLanguage } from '../../lib/languages/target-language';
import { withTemporaryDirectory } from '../testutil';
import { TabletSchema } from '../../lib/tablets/schema';
import { withTemporaryDirectory, TestJsiiModule, DUMMY_JSII_CONFIG } from '../testutil';

jest.setTimeout(60_000);

Expand Down Expand Up @@ -1345,3 +1347,47 @@ export class ClassName implements IInterface {
}),
).resolves.not.toThrow();
}));

test('will read translations from cache even if they are dirty', async () => {
const infusedAssembly = await TestJsiiModule.fromSource(
{
'index.ts': `
/**
* ClassA
*
* @example x
*/
export class ClassA {
public someMethod() {
}
}
`,
},
{
name: 'my_assembly',
jsii: DUMMY_JSII_CONFIG,
},
);
try {
// Run an extract
await extractSnippets([infusedAssembly.moduleDirectory]);

// Mess up the extracted source file
const schema: TabletSchema = await fs.readJson(path.join(infusedAssembly.moduleDirectory, '.jsii.tabl.json'));
for (const snippet of Object.values(schema.snippets)) {
snippet.translations[TargetLanguage.PYTHON] = {
source: 'oops',
version: '999',
};
}
await fs.writeJson(path.join(infusedAssembly.moduleDirectory, '.jsii.tabl.json'), schema);

// Run a transliterate, should have used the translation from the cache even though the version is wrong
await transliterateAssembly([infusedAssembly.moduleDirectory], [TargetLanguage.PYTHON]);

const translated: Assembly = await fs.readJson(path.join(infusedAssembly.moduleDirectory, '.jsii.python'));
expect(translated.types?.['my_assembly.ClassA'].docs?.example).toEqual('oops');
} finally {
await infusedAssembly.cleanup();
}
});

0 comments on commit 7aa69a7

Please sign in to comment.