diff --git a/.changeset/unlucky-jokes-battle.md b/.changeset/unlucky-jokes-battle.md new file mode 100644 index 0000000..172c2d6 --- /dev/null +++ b/.changeset/unlucky-jokes-battle.md @@ -0,0 +1,5 @@ +--- +"neogrok": minor +--- + +Add syntax highlighting with shikiji diff --git a/.yarn/patches/shikiji-core-npm-0.9.18-826293a3df.patch b/.yarn/patches/shikiji-core-npm-0.9.18-826293a3df.patch new file mode 100644 index 0000000..e7e630d --- /dev/null +++ b/.yarn/patches/shikiji-core-npm-0.9.18-826293a3df.patch @@ -0,0 +1,49 @@ +diff --git a/dist/chunk-types.d.mts b/dist/chunk-types.d.mts +index 2992357a1d0e670afce9ec11133a58a4e17593da..788cab9adb4134796ab6cd91fa9fd25b3a971dc5 100644 +--- a/dist/chunk-types.d.mts ++++ b/dist/chunk-types.d.mts +@@ -1124,10 +1124,8 @@ interface ThemedTokenExplanation { + interface ThemedToken extends TokenStyles, TokenBase { + } + interface TokenBase { +- /** +- * The content of the token +- */ +- content: string; ++ start: number; ++ end: number; + /** + * Explanation of + * +diff --git a/dist/index.mjs b/dist/index.mjs +index 6f891f5e37470cb673f8bcb8914c44634ca47091..1b52e8c87fa19d739e66f824715a18f37ae05fc6 100644 +--- a/dist/index.mjs ++++ b/dist/index.mjs +@@ -3709,7 +3709,7 @@ function dimColor(color) { + function codeToThemedTokens(internal, code, options = {}) { + const { lang = 'text', theme: themeName = internal.getLoadedThemes()[0], } = options; + if (isPlaintext(lang)) { +- const lines = code.split(/\r\n|\r|\n/); ++ const lines = code; + return [...lines.map(line => [{ content: line }])]; + } + const { theme, colorMap } = internal.setTheme(themeName); +@@ -3723,7 +3723,7 @@ function tokenizeWithTheme(code, grammar, theme, colorMap, options) { + ...theme.colorReplacements, + ...options?.colorReplacements, + }; +- const lines = code.split(/\r\n|\r|\n/); ++ const lines = code; + let ruleStack = INITIAL; + let actual = []; + const final = []; +@@ -3754,7 +3754,8 @@ function tokenizeWithTheme(code, grammar, theme, colorMap, options) { + const foregroundColor = applyColorReplacements(colorMap[foreground], colorReplacements); + const fontStyle = StackElementMetadata.getFontStyle(metadata); + const token = { +- content: line.substring(startIndex, nextStartIndex), ++ start: startIndex, ++ end: nextStartIndex, + color: foregroundColor, + fontStyle, + }; diff --git a/.yarn/patches/shikiji-npm-0.9.18-945ea5efcb.patch b/.yarn/patches/shikiji-npm-0.9.18-945ea5efcb.patch new file mode 100644 index 0000000..be31b91 --- /dev/null +++ b/.yarn/patches/shikiji-npm-0.9.18-945ea5efcb.patch @@ -0,0 +1,13 @@ +diff --git a/dist/bundle-full.d.mts b/dist/bundle-full.d.mts +index b8e655052e4a1b6e10e617eb06008a0782ff3a70..db1121f519c64bcada055f10f07500c27afca436 100644 +--- a/dist/bundle-full.d.mts ++++ b/dist/bundle-full.d.mts +@@ -12,7 +12,7 @@ type Highlighter = HighlighterGeneric; + declare const getHighlighter: shikiji_core.GetHighlighterFactory; + declare const codeToHtml: (code: string, options: shikiji_core.CodeToHastOptions) => Promise; + declare const codeToHast: (code: string, options: shikiji_core.CodeToHastOptions) => Promise; +-declare const codeToThemedTokens: (code: string, options: shikiji_core.RequireKeys, "lang" | "theme">) => Promise; ++declare const codeToThemedTokens: (code: string[], options: shikiji_core.RequireKeys, "lang" | "theme">) => Promise; + declare const codeToTokensWithThemes: (code: string, options: shikiji_core.RequireKeys, "lang" | "themes">) => Promise; + declare const getSingletonHighlighter: () => Promise>; + diff --git a/package.json b/package.json index 0aaf7b4..da57147 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,7 @@ "@sveltejs/kit": "2.0.6", "@sveltejs/vite-plugin-svelte": "3.0.1", "@types/lucene": "2.1.5", + "@types/node": "20.10.0", "@typescript-eslint/eslint-plugin": "6.7.5", "@typescript-eslint/parser": "6.7.5", "eslint": "8.51.0", @@ -56,7 +57,8 @@ "lucene": "2.1.1", "lucide-svelte": "0.303.0", "pretty-bytes": "6.1.1", - "prom-client": "15.0.0" + "prom-client": "15.0.0", + "shikiji": "patch:shikiji@npm%3A0.9.18#~/.yarn/patches/shikiji-npm-0.9.18-945ea5efcb.patch" }, "engines": { "node": ">=20" @@ -105,6 +107,7 @@ "typed-array-length": "npm:@nolyfill/typed-array-length@latest", "unbox-primitive": "npm:@nolyfill/unbox-primitive@latest", "which-boxed-primitive": "npm:@nolyfill/which-boxed-primitive@latest", - "which-typed-array": "npm:@nolyfill/which-typed-array@latest" + "which-typed-array": "npm:@nolyfill/which-typed-array@latest", + "shikiji-core@npm:0.9.18": "patch:shikiji-core@npm%3A0.9.18#~/.yarn/patches/shikiji-core-npm-0.9.18-826293a3df.patch" } } diff --git a/src/lib/server/content-parser.test.ts b/src/lib/server/content-parser.test.ts index 7127731..b53eafc 100644 --- a/src/lib/server/content-parser.test.ts +++ b/src/lib/server/content-parser.test.ts @@ -3,31 +3,34 @@ import { parseChunkMatch, parseFileNameMatch } from "./content-parser"; describe("parseFileNameMatch", () => { it("parses file name matches", () => { - expect(parseFileNameMatch(Buffer.from("foo"), [])).toEqual([ - { text: "foo" }, - ]); + expect(parseFileNameMatch(Buffer.from("foo"), [])).toEqual({ + text: "foo", + matchRanges: [], + }); expect( parseFileNameMatch(Buffer.from("foo"), [{ start: 0, end: 3 }]), - ).toEqual([{ text: "foo", match: true }]); + ).toEqual({ text: "foo", matchRanges: [{ start: 0, end: 3 }] }); expect( parseFileNameMatch(Buffer.from("foo"), [{ start: 0, end: 2 }]), - ).toEqual([{ text: "fo", match: true }, { text: "o" }]); + ).toEqual({ text: "foo", matchRanges: [{ start: 0, end: 2 }] }); expect( parseFileNameMatch(Buffer.from("foo"), [{ start: 1, end: 3 }]), - ).toEqual([{ text: "f" }, { text: "oo", match: true }]); + ).toEqual({ text: "foo", matchRanges: [{ start: 1, end: 3 }] }); expect( parseFileNameMatch(Buffer.from("foo"), [{ start: 1, end: 2 }]), - ).toEqual([{ text: "f" }, { text: "o", match: true }, { text: "o" }]); + ).toEqual({ text: "foo", matchRanges: [{ start: 1, end: 2 }] }); expect( parseFileNameMatch(Buffer.from("foo"), [ { start: 1, end: 2 }, { start: 2, end: 3 }, ]), - ).toEqual([ - { text: "f" }, - { text: "o", match: true }, - { text: "o", match: true }, - ]); + ).toEqual({ + text: "foo", + matchRanges: [ + { start: 1, end: 2 }, + { start: 2, end: 3 }, + ], + }); }); }); @@ -35,19 +38,19 @@ describe("parseChunkMatch", () => { it("parses chunk matches", () => { // Single line. expect(parseChunkMatch(Buffer.from("foo"), [])).toEqual([ - [{ text: "foo" }], + { text: "foo", matchRanges: [] }, ]); expect(parseChunkMatch(Buffer.from("foo"), [{ start: 0, end: 3 }])).toEqual( - [[{ text: "foo", match: true }]], + [{ text: "foo", matchRanges: [{ start: 0, end: 3 }] }], ); expect(parseChunkMatch(Buffer.from("foo"), [{ start: 0, end: 2 }])).toEqual( - [[{ text: "fo", match: true }, { text: "o" }]], + [{ text: "foo", matchRanges: [{ start: 0, end: 2 }] }], ); expect(parseChunkMatch(Buffer.from("foo"), [{ start: 1, end: 3 }])).toEqual( - [[{ text: "f" }, { text: "oo", match: true }]], + [{ text: "foo", matchRanges: [{ start: 1, end: 3 }] }], ); expect(parseChunkMatch(Buffer.from("foo"), [{ start: 1, end: 2 }])).toEqual( - [[{ text: "f" }, { text: "o", match: true }, { text: "o" }]], + [{ text: "foo", matchRanges: [{ start: 1, end: 2 }] }], ); expect( parseChunkMatch(Buffer.from("foo"), [ @@ -55,31 +58,49 @@ describe("parseChunkMatch", () => { { start: 2, end: 3 }, ]), ).toEqual([ - [{ text: "f" }, { text: "o", match: true }, { text: "o", match: true }], + { + text: "foo", + matchRanges: [ + { start: 1, end: 2 }, + { start: 2, end: 3 }, + ], + }, ]); // Multi-line. expect(parseChunkMatch(Buffer.from("foo\n"), [])).toEqual([ - [{ text: "foo" }], - [], + { text: "foo", matchRanges: [] }, + { text: "", matchRanges: [] }, ]); expect( parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 3 }]), - ).toEqual([[{ text: "foo", match: true }], []]); + ).toEqual([ + { text: "foo", matchRanges: [{ start: 0, end: 3 }] }, + { text: "", matchRanges: [] }, + ]); expect( parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 4 }]), - ).toEqual([[{ text: "foo", match: true }], []]); + ).toEqual([ + { text: "foo", matchRanges: [{ start: 0, end: 3 }] }, + { text: "", matchRanges: [] }, + ]); expect(parseChunkMatch(Buffer.from("foo\nbar"), [])).toEqual([ - [{ text: "foo" }], - [{ text: "bar" }], + { text: "foo", matchRanges: [] }, + { text: "bar", matchRanges: [] }, ]); expect( parseChunkMatch(Buffer.from("foo\nbar"), [{ start: 0, end: 3 }]), - ).toEqual([[{ text: "foo", match: true }], [{ text: "bar" }]]); + ).toEqual([ + { text: "foo", matchRanges: [{ start: 0, end: 3 }] }, + { text: "bar", matchRanges: [] }, + ]); expect( parseChunkMatch(Buffer.from("foo\nbar"), [{ start: 0, end: 4 }]), - ).toEqual([[{ text: "foo", match: true }], [{ text: "bar" }]]); + ).toEqual([ + { text: "foo", matchRanges: [{ start: 0, end: 3 }] }, + { text: "bar", matchRanges: [] }, + ]); expect( parseChunkMatch(Buffer.from("foo\nbar"), [ @@ -87,8 +108,14 @@ describe("parseChunkMatch", () => { { start: 2, end: 5 }, ]), ).toEqual([ - [{ text: "f", match: true }, { text: "o" }, { text: "o", match: true }], - [{ text: "b", match: true }, { text: "ar" }], + { + text: "foo", + matchRanges: [ + { start: 0, end: 1 }, + { start: 2, end: 3 }, + ], + }, + { text: "bar", matchRanges: [{ start: 0, end: 1 }] }, ]); }); }); diff --git a/src/lib/server/content-parser.ts b/src/lib/server/content-parser.ts index 759efa9..0216057 100644 --- a/src/lib/server/content-parser.ts +++ b/src/lib/server/content-parser.ts @@ -1,11 +1,9 @@ -/* Parsed content, as emitted by this module. */ -export type ContentToken = { +export type ContentLine = { readonly text: string; - // true | undefined for less wasteful JSON serialization - readonly match?: true; + readonly matchRanges: ReadonlyArray; }; -type Range = { +export type Range = { // inclusive readonly start: number; // exclusive @@ -20,29 +18,28 @@ type Range = { */ export const parseFileNameMatch = ( content: Buffer, - matchRanges: ReadonlyArray, -): // Needs to be mutable to satisfy valita. -Array => { - const contentTokens: Array = []; + byteRanges: ReadonlyArray, +): ContentLine => { + const matchRanges: Array = []; + let text = ""; let base = 0; - for (const { start: matchStart, end: matchEnd } of matchRanges) { + for (const { start: matchStart, end: matchEnd } of byteRanges) { if (matchStart > base) { - contentTokens.push({ - text: content.toString("utf8", base, matchStart), - }); + text += content.toString("utf8", base, matchStart); } - contentTokens.push({ - text: content.toString("utf8", matchStart, matchEnd), - match: true, + matchRanges.push({ + start: text.length, + end: (text += content.toString("utf8", matchStart, matchEnd)).length, }); + base = matchEnd; } if (base < content.length) { - contentTokens.push({ text: content.toString("utf8", base) }); + text += content.toString("utf8", base); } - return contentTokens; + return { text, matchRanges }; }; /** @@ -51,17 +48,18 @@ Array => { */ export const parseChunkMatch = ( content: Buffer, - matchRanges: ReadonlyArray, + byteRanges: ReadonlyArray, ): // Needs to be mutable to satisfy valita. -Array> => { - const lines: Array> = []; - let currentLineTokens: Array = []; +Array => { + const lines: Array = []; + let currentLineText = ""; + let currentLineMatchRanges: Array = []; - const matchRangeIterator = matchRanges[Symbol.iterator](); + const byteRangeIterator = byteRanges[Symbol.iterator](); // The range that we're currently in, or the next upcoming range, or undefined // if we've passed the last range. - let currentMatchRange: Range | undefined = matchRangeIterator.next().value; - // Have we previously handled the start of `currentMatchRange` but not its + let currentByteRange: Range | undefined = byteRangeIterator.next().value; + // Have we previously handled the start of `currentByteRange` but not its // end? let inMatch = false; @@ -75,7 +73,7 @@ Array> => { while ( (tokenBoundary = findNextBoundary( inMatch, - currentMatchRange, + currentByteRange, currentNewline, )) ) { @@ -84,20 +82,22 @@ Array> => { if (match === "start") { inMatch = true; if (tokenEnd > tokenStart) { - currentLineTokens.push({ - text: content.toString("utf8", tokenStart, tokenEnd), - }); + currentLineText += content.toString("utf8", tokenStart, tokenEnd); } tokenStart = tokenEnd; } else if (match === "end") { inMatch = false; if (tokenEnd > tokenStart) { - currentLineTokens.push({ - text: content.toString("utf8", tokenStart, tokenEnd), - match: true, + currentLineMatchRanges.push({ + start: currentLineText.length, + end: (currentLineText += content.toString( + "utf8", + tokenStart, + tokenEnd, + )).length, }); } - currentMatchRange = matchRangeIterator.next().value; + currentByteRange = byteRangeIterator.next().value; tokenStart = tokenEnd; } @@ -107,30 +107,36 @@ Array> => { // as lines are visually separated from one another in the UI with // `display: block`. if (tokenEnd > tokenStart) { - currentLineTokens.push({ - text: content.toString("utf8", tokenStart, tokenEnd), - ...(inMatch ? { match: true } : {}), - }); + const start = currentLineText.length; + currentLineText += content.toString("utf8", tokenStart, tokenEnd); + if (inMatch) { + currentLineMatchRanges.push({ start, end: currentLineText.length }); + } } - lines.push(currentLineTokens); - currentLineTokens = []; + lines.push({ + text: currentLineText, + matchRanges: currentLineMatchRanges, + }); + currentLineText = ""; + currentLineMatchRanges = []; currentNewline = newlineIterator.next().value; tokenStart = tokenEnd + 1; } } if (tokenStart < content.length) { - currentLineTokens.push({ - text: content.toString("utf8", tokenStart), - ...(inMatch ? { match: true } : {}), - }); + const start = currentLineText.length; + currentLineText += content.toString("utf8", tokenStart); + if (inMatch) { + currentLineMatchRanges.push({ start, end: currentLineText.length }); + } } - // Conclude the current line. Note that if `currentLineTokens` is length 0, + // Conclude the current line. Note that if `currentLineText` is length 0, // that is still semantically a line, namely an empty line. `Content` never // naturally has a trailing newline; if there's a newline at the last byte, // this indicates that there is a final line that is empty. - lines.push(currentLineTokens); + lines.push({ text: currentLineText, matchRanges: currentLineMatchRanges }); return lines; }; diff --git a/src/lib/server/search-api.ts b/src/lib/server/search-api.ts index 61fdd89..d784ba2 100644 --- a/src/lib/server/search-api.ts +++ b/src/lib/server/search-api.ts @@ -1,7 +1,7 @@ import * as v from "@badrap/valita"; import type { ReadonlyDeep } from "type-fest"; import { - type ContentToken, + type ContentLine, parseChunkMatch, parseFileNameMatch, } from "./content-parser"; @@ -184,42 +184,27 @@ const searchResultSchema = v.object({ `Unreachable: received ${fileNameChunks.length} file name matches`, ); } - let fileNameTokens: Array; + let fileNameParsed: ContentLine; if (fileNameChunks.length === 1) { const { content, matchRanges } = fileNameChunks[0]; - fileNameTokens = parseFileNameMatch(content, matchRanges); + fileNameParsed = parseFileNameMatch(content, matchRanges); } else { - fileNameTokens = [{ text: fileName }]; + fileNameParsed = { text: fileName, matchRanges: [] }; } return { repository, - fileName, - fileNameTokens, + fileName: fileNameParsed, branches, language, version, chunks: chunkMatches .filter(({ isFileNameChunk }) => !isFileNameChunk) - .map(({ content, startLineNumber, matchRanges }) => { - const lines = parseChunkMatch(content, matchRanges).map( - (lineTokens) => ({ - lineTokens, - // While the frontend could derive this from - // lineTokens, counts of matches in a chunk are - // needed so frequently that it's substantially less - // tedious to precompute it. - matchCount: numMatches(lineTokens), - }), - ); - - const matchCount = lines.reduce( - (n, { matchCount }) => n + matchCount, - 0, - ); - - return { matchCount, startLineNumber, lines }; - }), + .map(({ content, startLineNumber, matchRanges }) => ({ + lines: parseChunkMatch(content, matchRanges), + startLineNumber, + matchCount: matchRanges.length, + })), }; }, ), @@ -246,30 +231,19 @@ const searchResultSchema = v.object({ filesSkipped, }, files: files.map( - ({ - repository, - version, - fileName, - fileNameTokens, - chunks, - ...rest - }) => { - const fileNameMatchCount = numMatches(fileNameTokens); + ({ repository, version, fileName, chunks, ...rest }) => { return { ...rest, repository, matchCount: chunks.reduce( (n, { matchCount: m }) => n + m, - fileNameMatchCount, + fileName.matchRanges.length, ), - fileName: { - matchCount: fileNameMatchCount, - tokens: fileNameTokens, - }, + fileName, chunks, fileUrl: repoUrls[repository] ?.replaceAll("{{.Version}}", version) - .replaceAll("{{.Path}}", fileName), + .replaceAll("{{.Path}}", fileName.text), // The 'template' is such that the line number can be `join`ed // into it. JSON serializable! lineNumberTemplate: @@ -281,9 +255,6 @@ const searchResultSchema = v.object({ ), }); -const numMatches = (tokens: Array) => - tokens.filter((t) => t.match).length; - export type SearchResults = ReadonlyDeep< v.Infer["Result"] >; diff --git a/src/routes/(search-page)/chunk-renderer.test.ts b/src/routes/(search-page)/chunk-renderer.test.ts index d18091e..73e2250 100644 --- a/src/routes/(search-page)/chunk-renderer.test.ts +++ b/src/routes/(search-page)/chunk-renderer.test.ts @@ -2,179 +2,163 @@ import { describe, it, expect } from "vitest"; import { renderChunksToLineGroups } from "./chunk-renderer"; import type { Chunk } from "$lib/server/search-api"; -// This is real data from a search result against the zoekt repo. +// This is real data from a search result against the zoekt repo: +// r:zoekt f:api test const chunks: ReadonlyArray = [ { - matchCount: 1, - startLineNumber: 20, lines: [ { - lineTokens: [{ text: '\t"strings"\n' }], - matchCount: 0, + text: '\t"strings"', + matchRanges: [], }, { - lineTokens: [ - { text: '\t"' }, - { match: true, text: "test" }, - { text: 'ing"\n' }, + text: '\t"testing"', + matchRanges: [ + { + start: 2, + end: 6, + }, ], - matchCount: 1, }, { - lineTokens: [{ text: ")" }], - matchCount: 0, + text: '\t"time"', + matchRanges: [], }, ], + startLineNumber: 20, + matchCount: 1, }, { - matchCount: 1, - startLineNumber: 27, lines: [ { - lineTokens: [{ text: "*/\n" }], - matchCount: 0, + text: "*/", + matchRanges: [], }, { - lineTokens: [ + text: "func BenchmarkMinimalRepoListEncodings(b *testing.B) {", + matchRanges: [ { - text: "func BenchmarkMinimalRepoListEncodings(b *", + start: 42, + end: 46, }, - { match: true, text: "test" }, - { text: "ing.B) {\n" }, ], - matchCount: 1, }, { - lineTokens: [ - { - text: "\tsize := uint32(13000) // 2021-06-24 rough estimate of number of repos on a replica.", - }, - ], - matchCount: 0, + text: "\tsize := uint32(13000) // 2021-06-24 rough estimate of number of repos on a replica.", + matchRanges: [], }, ], + startLineNumber: 28, + matchCount: 1, }, { - matchCount: 2, - startLineNumber: 57, lines: [ { - lineTokens: [{ text: "\n" }], - matchCount: 0, + text: "", + matchRanges: [], }, { - lineTokens: [ + text: "func benchmarkEncoding(data interface{}) func(*testing.B) {", + matchRanges: [ { - text: "func benchmarkEncoding(data interface{}) func(*", + start: 47, + end: 51, }, - { match: true, text: "test" }, - { text: "ing.B) {\n" }, ], - matchCount: 1, }, { - lineTokens: [ - { text: "\treturn func(b *" }, - { match: true, text: "test" }, - { text: "ing.B) {\n" }, + text: "\treturn func(b *testing.B) {", + matchRanges: [ + { + start: 16, + end: 20, + }, ], - matchCount: 1, }, { - lineTokens: [{ text: "\t\tb.Helper()" }], - matchCount: 0, + text: "\t\tb.Helper()", + matchRanges: [], }, ], + startLineNumber: 62, + matchCount: 2, }, { - matchCount: 2, - startLineNumber: 78, lines: [ { - lineTokens: [{ text: "\n" }], - matchCount: 0, + text: "", + matchRanges: [], }, { - lineTokens: [ - { text: "func " }, - { match: true, text: "Test" }, + text: "func TestSizeBytesSearchResult(t *testing.T) {", + matchRanges: [ { - text: "SizeBytesSearchResult(t *", + start: 5, + end: 9, }, - { match: true, text: "test" }, - { text: "ing.T) {\n" }, - ], - matchCount: 2, - }, - { - lineTokens: [ { - text: "\tvar sr = SearchResult{", + start: 34, + end: 38, }, ], - matchCount: 0, + }, + { + text: "\tvar sr = SearchResult{", + matchRanges: [], }, ], + startLineNumber: 83, + matchCount: 2, }, { - matchCount: 1, - startLineNumber: 89, lines: [ { - lineTokens: [ - { - text: "\t\t\tLineMatches: nil, // 24 bytes\n", - }, - ], - matchCount: 0, + text: "\t\t\tLineMatches: nil, // 24 bytes", + matchRanges: [], }, { - lineTokens: [ - { - text: "\t\t\tChunkMatches: []ChunkMatch{{ // 24 bytes + 208 bytes (see ", - }, - { match: true, text: "Test" }, + text: "\t\t\tChunkMatches: []ChunkMatch{{ // 24 bytes + 208 bytes (see TestSizeByteChunkMatches)", + matchRanges: [ { - text: "SizeByteChunkMatches)\n", + start: 61, + end: 65, }, ], - matchCount: 1, }, { - lineTokens: [ - { - text: '\t\t\t\tContent: []byte("foo"),', - }, - ], - matchCount: 0, + text: '\t\t\t\tContent: []byte("foo"),', + matchRanges: [], }, ], + startLineNumber: 94, + matchCount: 1, }, { - matchCount: 2, - startLineNumber: 117, lines: [ { - lineTokens: [{ text: "\n" }], - matchCount: 0, + text: "", + matchRanges: [], }, { - lineTokens: [ - { text: "func " }, - { match: true, text: "Test" }, + text: "func TestSizeBytesChunkMatches(t *testing.T) {", + matchRanges: [ { - text: "SizeBytesChunkMatches(t *", + start: 5, + end: 9, + }, + { + start: 34, + end: 38, }, - { match: true, text: "test" }, - { text: "ing.T) {\n" }, ], - matchCount: 2, }, { - lineTokens: [{ text: "\tcm := ChunkMatch{" }], - matchCount: 0, + text: "\tcm := ChunkMatch{", + matchRanges: [], }, ], + startLineNumber: 122, + matchCount: 2, }, ]; @@ -189,165 +173,130 @@ describe("renderChunksToLineGroups", () => { "lineGroups": [ [ { + "line": { + "matchRanges": [], + "text": " "strings"", + }, "lineNumber": 20, - "lineTokens": [ - { - "text": " "strings" - ", - }, - ], }, { + "line": { + "matchRanges": [ + { + "end": 6, + "start": 2, + }, + ], + "text": " "testing"", + }, "lineNumber": 21, - "lineTokens": [ - { - "text": " "", - }, - { - "match": true, - "text": "test", - }, - { - "text": "ing" - ", - }, - ], }, { + "line": { + "matchRanges": [], + "text": " "time"", + }, "lineNumber": 22, - "lineTokens": [ - { - "text": ")", - }, - ], }, ], [ { - "lineNumber": 27, - "lineTokens": [ - { - "text": "*/ - ", - }, - ], - }, - { + "line": { + "matchRanges": [], + "text": "*/", + }, "lineNumber": 28, - "lineTokens": [ - { - "text": "func BenchmarkMinimalRepoListEncodings(b *", - }, - { - "match": true, - "text": "test", - }, - { - "text": "ing.B) { - ", - }, - ], }, { + "line": { + "matchRanges": [ + { + "end": 46, + "start": 42, + }, + ], + "text": "func BenchmarkMinimalRepoListEncodings(b *testing.B) {", + }, "lineNumber": 29, - "lineTokens": [ - { - "text": " size := uint32(13000) // 2021-06-24 rough estimate of number of repos on a replica.", - }, - ], + }, + { + "line": { + "matchRanges": [], + "text": " size := uint32(13000) // 2021-06-24 rough estimate of number of repos on a replica.", + }, + "lineNumber": 30, }, ], [ { - "lineNumber": 57, - "lineTokens": [ - { - "text": " - ", - }, - ], - }, - { - "lineNumber": 58, - "lineTokens": [ - { - "text": "func benchmarkEncoding(data interface{}) func(*", - }, - { - "match": true, - "text": "test", - }, - { - "text": "ing.B) { - ", - }, - ], - }, - { - "lineNumber": 59, - "lineTokens": [ - { - "text": " return func(b *", - }, - { - "match": true, - "text": "test", - }, - { - "text": "ing.B) { - ", - }, - ], - }, - { - "lineNumber": 60, - "lineTokens": [ - { - "text": " b.Helper()", - }, - ], + "line": { + "matchRanges": [], + "text": "", + }, + "lineNumber": 62, + }, + { + "line": { + "matchRanges": [ + { + "end": 51, + "start": 47, + }, + ], + "text": "func benchmarkEncoding(data interface{}) func(*testing.B) {", + }, + "lineNumber": 63, + }, + { + "line": { + "matchRanges": [ + { + "end": 20, + "start": 16, + }, + ], + "text": " return func(b *testing.B) {", + }, + "lineNumber": 64, + }, + { + "line": { + "matchRanges": [], + "text": " b.Helper()", + }, + "lineNumber": 65, }, ], [ { - "lineNumber": 78, - "lineTokens": [ - { - "text": " - ", - }, - ], - }, - { - "lineNumber": 79, - "lineTokens": [ - { - "text": "func ", - }, - { - "match": true, - "text": "Test", - }, - { - "text": "SizeBytesSearchResult(t *", - }, - { - "match": true, - "text": "test", - }, - { - "text": "ing.T) { - ", - }, - ], - }, - { - "lineNumber": 80, - "lineTokens": [ - { - "text": " var sr = SearchResult{", - }, - ], + "line": { + "matchRanges": [], + "text": "", + }, + "lineNumber": 83, + }, + { + "line": { + "matchRanges": [ + { + "end": 9, + "start": 5, + }, + { + "end": 38, + "start": 34, + }, + ], + "text": "func TestSizeBytesSearchResult(t *testing.T) {", + }, + "lineNumber": 84, + }, + { + "line": { + "matchRanges": [], + "text": " var sr = SearchResult{", + }, + "lineNumber": 85, }, ], ], @@ -365,242 +314,190 @@ describe("renderChunksToLineGroups", () => { "lineGroups": [ [ { + "line": { + "matchRanges": [], + "text": " "strings"", + }, "lineNumber": 20, - "lineTokens": [ - { - "text": " "strings" - ", - }, - ], }, { + "line": { + "matchRanges": [ + { + "end": 6, + "start": 2, + }, + ], + "text": " "testing"", + }, "lineNumber": 21, - "lineTokens": [ - { - "text": " "", - }, - { - "match": true, - "text": "test", - }, - { - "text": "ing" - ", - }, - ], }, { + "line": { + "matchRanges": [], + "text": " "time"", + }, "lineNumber": 22, - "lineTokens": [ - { - "text": ")", - }, - ], }, ], [ { - "lineNumber": 27, - "lineTokens": [ - { - "text": "*/ - ", - }, - ], - }, - { + "line": { + "matchRanges": [], + "text": "*/", + }, "lineNumber": 28, - "lineTokens": [ - { - "text": "func BenchmarkMinimalRepoListEncodings(b *", - }, - { - "match": true, - "text": "test", - }, - { - "text": "ing.B) { - ", - }, - ], }, { + "line": { + "matchRanges": [ + { + "end": 46, + "start": 42, + }, + ], + "text": "func BenchmarkMinimalRepoListEncodings(b *testing.B) {", + }, "lineNumber": 29, - "lineTokens": [ - { - "text": " size := uint32(13000) // 2021-06-24 rough estimate of number of repos on a replica.", - }, - ], + }, + { + "line": { + "matchRanges": [], + "text": " size := uint32(13000) // 2021-06-24 rough estimate of number of repos on a replica.", + }, + "lineNumber": 30, }, ], [ { - "lineNumber": 57, - "lineTokens": [ - { - "text": " - ", - }, - ], - }, - { - "lineNumber": 58, - "lineTokens": [ - { - "text": "func benchmarkEncoding(data interface{}) func(*", - }, - { - "match": true, - "text": "test", - }, - { - "text": "ing.B) { - ", - }, - ], - }, - { - "lineNumber": 59, - "lineTokens": [ - { - "text": " return func(b *", - }, - { - "match": true, - "text": "test", - }, - { - "text": "ing.B) { - ", - }, - ], - }, - { - "lineNumber": 60, - "lineTokens": [ - { - "text": " b.Helper()", - }, - ], + "line": { + "matchRanges": [], + "text": "", + }, + "lineNumber": 62, + }, + { + "line": { + "matchRanges": [ + { + "end": 51, + "start": 47, + }, + ], + "text": "func benchmarkEncoding(data interface{}) func(*testing.B) {", + }, + "lineNumber": 63, + }, + { + "line": { + "matchRanges": [ + { + "end": 20, + "start": 16, + }, + ], + "text": " return func(b *testing.B) {", + }, + "lineNumber": 64, + }, + { + "line": { + "matchRanges": [], + "text": " b.Helper()", + }, + "lineNumber": 65, }, ], [ { - "lineNumber": 78, - "lineTokens": [ - { - "text": " - ", - }, - ], - }, - { - "lineNumber": 79, - "lineTokens": [ - { - "text": "func ", - }, - { - "match": true, - "text": "Test", - }, - { - "text": "SizeBytesSearchResult(t *", - }, - { - "match": true, - "text": "test", - }, - { - "text": "ing.T) { - ", - }, - ], - }, - { - "lineNumber": 80, - "lineTokens": [ - { - "text": " var sr = SearchResult{", - }, - ], + "line": { + "matchRanges": [], + "text": "", + }, + "lineNumber": 83, + }, + { + "line": { + "matchRanges": [ + { + "end": 9, + "start": 5, + }, + { + "end": 38, + "start": 34, + }, + ], + "text": "func TestSizeBytesSearchResult(t *testing.T) {", + }, + "lineNumber": 84, + }, + { + "line": { + "matchRanges": [], + "text": " var sr = SearchResult{", + }, + "lineNumber": 85, }, ], [ { - "lineNumber": 89, - "lineTokens": [ - { - "text": " LineMatches: nil, // 24 bytes - ", - }, - ], - }, - { - "lineNumber": 90, - "lineTokens": [ - { - "text": " ChunkMatches: []ChunkMatch{{ // 24 bytes + 208 bytes (see ", - }, - { - "match": true, - "text": "Test", - }, - { - "text": "SizeByteChunkMatches) - ", - }, - ], - }, - { - "lineNumber": 91, - "lineTokens": [ - { - "text": " Content: []byte("foo"),", - }, - ], + "line": { + "matchRanges": [], + "text": " LineMatches: nil, // 24 bytes", + }, + "lineNumber": 94, + }, + { + "line": { + "matchRanges": [ + { + "end": 65, + "start": 61, + }, + ], + "text": " ChunkMatches: []ChunkMatch{{ // 24 bytes + 208 bytes (see TestSizeByteChunkMatches)", + }, + "lineNumber": 95, + }, + { + "line": { + "matchRanges": [], + "text": " Content: []byte("foo"),", + }, + "lineNumber": 96, }, ], [ { - "lineNumber": 117, - "lineTokens": [ - { - "text": " - ", - }, - ], - }, - { - "lineNumber": 118, - "lineTokens": [ - { - "text": "func ", - }, - { - "match": true, - "text": "Test", - }, - { - "text": "SizeBytesChunkMatches(t *", - }, - { - "match": true, - "text": "test", - }, - { - "text": "ing.T) { - ", - }, - ], - }, - { - "lineNumber": 119, - "lineTokens": [ - { - "text": " cm := ChunkMatch{", - }, - ], + "line": { + "matchRanges": [], + "text": "", + }, + "lineNumber": 122, + }, + { + "line": { + "matchRanges": [ + { + "end": 9, + "start": 5, + }, + { + "end": 38, + "start": 34, + }, + ], + "text": "func TestSizeBytesChunkMatches(t *testing.T) {", + }, + "lineNumber": 123, + }, + { + "line": { + "matchRanges": [], + "text": " cm := ChunkMatch{", + }, + "lineNumber": 124, }, ], ], diff --git a/src/routes/(search-page)/chunk-renderer.ts b/src/routes/(search-page)/chunk-renderer.ts index 6a22cf8..d4a64a3 100644 --- a/src/routes/(search-page)/chunk-renderer.ts +++ b/src/routes/(search-page)/chunk-renderer.ts @@ -1,6 +1,11 @@ -import type { ContentToken } from "$lib/server/content-parser"; +import type { ContentLine } from "$lib/server/content-parser"; import type { Chunk, ResultFile } from "$lib/server/search-api"; +export type LineGroup = Array<{ + readonly lineNumber: number; + readonly line: ContentLine; +}>; + export const renderChunksToLineGroups = ( chunks: ResultFile["chunks"], // The number of matches that, when exceeded, we stop incorporating chunks @@ -18,12 +23,7 @@ export const renderChunksToLineGroups = ( // // That being said, we will bail in the middle of a chunk if the greater // hardCutoff is exceeded. - const lineGroups: Array< - Array<{ - readonly lineNumber: number; - readonly lineTokens: ReadonlyArray; - }> - > = []; + const lineGroups: Array = []; // The number of matches beyond which we will actually cut off a chunk early. // The problem is that the cost (and UI absurdity) of rendering a chunk scales @@ -49,7 +49,7 @@ export const renderChunksToLineGroups = ( const lines = []; for (const line of chunk.lines) { lines.push(line); - matchCount += line.matchCount; + matchCount += line.matchRanges.length; if (preCutoffMatchCount + matchCount >= hardCutoff) { break; } @@ -75,9 +75,9 @@ export const renderChunksToLineGroups = ( : // The above `break` guarantees this is non-null. subChunk!; const { lines, startLineNumber } = renderedChunk; - const numberedLines = lines.map(({ lineTokens }, i) => ({ + const numberedLines = lines.map((line, i) => ({ lineNumber: i + startLineNumber, - lineTokens, + line, })); const contiguous = diff --git a/src/routes/(search-page)/line-group.svelte b/src/routes/(search-page)/line-group.svelte new file mode 100644 index 0000000..6b113aa --- /dev/null +++ b/src/routes/(search-page)/line-group.svelte @@ -0,0 +1,107 @@ + + + +
+ {#each lines as { lineNumber, line }, i} + + {#if file.fileUrl && file.lineNumberTemplate} + {lineNumber} + {:else}{lineNumber}{/if} + + + {/each} +
diff --git a/src/routes/(search-page)/rendered-content.svelte b/src/routes/(search-page)/rendered-content.svelte index d31d8d3..0ebf604 100644 --- a/src/routes/(search-page)/rendered-content.svelte +++ b/src/routes/(search-page)/rendered-content.svelte @@ -1,12 +1,133 @@ -{#each tokens as { text, match }} - {#if match} +{#each tokens as { text, match, color, fontClass }} + {#if match && color} + + {text} + {:else if match} {text} + {:else if color} + {text} {:else} {text} {/if} diff --git a/src/routes/(search-page)/search-results-file-header.svelte b/src/routes/(search-page)/search-results-file-header.svelte index 3274699..2600ba3 100644 --- a/src/routes/(search-page)/search-results-file-header.svelte +++ b/src/routes/(search-page)/search-results-file-header.svelte @@ -14,7 +14,7 @@ ...(file.branches.length > 1 || file.branches[0] !== "HEAD" ? [file.branches.join(", ")] : []), - file.language, + file.language || "Text", `№${rank}`, ]; @@ -30,8 +30,8 @@ class="inline" size={16} />{#if file.fileUrl} - {:else}{/if}{:else}{/if} {metadata.join(" | ")} diff --git a/src/routes/(search-page)/search-results-file.svelte b/src/routes/(search-page)/search-results-file.svelte index bda27eb..c92ce02 100644 --- a/src/routes/(search-page)/search-results-file.svelte +++ b/src/routes/(search-page)/search-results-file.svelte @@ -5,7 +5,7 @@ } from "$lib/preferences"; import type { ResultFile } from "$lib/server/search-api"; import SearchResultsFileHeader from "./search-results-file-header.svelte"; - import RenderedContent from "./rendered-content.svelte"; + import LineGroup from "./line-group.svelte"; import { renderChunksToLineGroups } from "./chunk-renderer"; export let file: ResultFile; @@ -30,7 +30,7 @@ )); $: postCutoffMatchCount = - file.matchCount - preCutoffMatchCount - file.fileName.matchCount; + file.matchCount - preCutoffMatchCount - file.fileName.matchRanges.length; const expand = () => { expanded = true; @@ -64,30 +64,8 @@ {#if lineGroups.length > 0}
- {#each lineGroups as lines} - -
- {#each lines as { lineNumber, lineTokens }} - - {#if file.fileUrl && file.lineNumberTemplate} - {lineNumber} - {:else}{lineNumber}{/if} - - - {/each} -
+ {#each lineGroups as lines (lines[0].lineNumber)} + {/each}
{/if} diff --git a/src/routes/(search-page)/search-results.svelte b/src/routes/(search-page)/search-results.svelte index 1b20095..f742254 100644 --- a/src/routes/(search-page)/search-results.svelte +++ b/src/routes/(search-page)/search-results.svelte @@ -43,9 +43,7 @@
- {#each files as file, i (`${file.repository}/${file.fileName.tokens - .map(({ text }) => text) - .join()}@${file.branches.join(";")}`)} + {#each files as file, i (`${file.repository}/${file.fileName.text}@${file.branches.join(";")}`)} {/each}
diff --git a/yarn.lock b/yarn.lock index 18533bf..bc3cd5c 100644 --- a/yarn.lock +++ b/yarn.lock @@ -985,6 +985,15 @@ __metadata: languageName: node linkType: hard +"@types/node@npm:20.10.0": + version: 20.10.0 + resolution: "@types/node@npm:20.10.0" + dependencies: + undici-types: "npm:~5.26.4" + checksum: f379e57d9d28cb5f3d8eab943de0c54a0ca2f95ee356e1fe2a1a4fa718b740103ae522c50ce107cffd52c3642ef3244cfc55bf5369081dd6c48369c8587b21ae + languageName: node + linkType: hard + "@types/node@npm:^12.7.1": version: 12.20.55 resolution: "@types/node@npm:12.20.55" @@ -3559,6 +3568,7 @@ __metadata: "@sveltejs/kit": "npm:2.0.6" "@sveltejs/vite-plugin-svelte": "npm:3.0.1" "@types/lucene": "npm:2.1.5" + "@types/node": "npm:20.10.0" "@typescript-eslint/eslint-plugin": "npm:6.7.5" "@typescript-eslint/parser": "npm:6.7.5" eslint: "npm:8.51.0" @@ -3570,6 +3580,7 @@ __metadata: prettier-plugin-svelte: "npm:3.0.3" pretty-bytes: "npm:6.1.1" prom-client: "npm:15.0.0" + shikiji: "patch:shikiji@npm%3A0.9.18#~/.yarn/patches/shikiji-npm-0.9.18-945ea5efcb.patch" svelte: "npm:4.2.1" svelte-check: "npm:3.5.2" tailwindcss: "npm:3.3.3" @@ -4492,6 +4503,38 @@ __metadata: languageName: node linkType: hard +"shikiji-core@npm:0.9.18": + version: 0.9.18 + resolution: "shikiji-core@npm:0.9.18" + checksum: cc8673c664d31a2392bb972c69c57c79f364056dd2361565d401165bee7d3afe8b358c15211d6356357fbc9d25c34dd9d06c9949fa7237031b840995e5eb5da2 + languageName: node + linkType: hard + +"shikiji-core@patch:shikiji-core@npm%3A0.9.18#~/.yarn/patches/shikiji-core-npm-0.9.18-826293a3df.patch": + version: 0.9.18 + resolution: "shikiji-core@patch:shikiji-core@npm%3A0.9.18#~/.yarn/patches/shikiji-core-npm-0.9.18-826293a3df.patch::version=0.9.18&hash=6968ed" + checksum: ace21e92feb3bfe58d126d95fd876b729184dbf57bf1e0cf3f96f7c71840754ed227649328e2301bcb47730e6875c4915e109599db5ed5d7f28557a3bd82b4c5 + languageName: node + linkType: hard + +"shikiji@npm:0.9.18": + version: 0.9.18 + resolution: "shikiji@npm:0.9.18" + dependencies: + shikiji-core: "npm:0.9.18" + checksum: 29651f5c397c5862ff1c78e0c6b85e6fa47f9f3538f5672f64c4c9afea368e77dfb1697eb7cd5746bc259d7cbaec6d77c91c5e0b520dc4dcbb77ad8492dbef72 + languageName: node + linkType: hard + +"shikiji@patch:shikiji@npm%3A0.9.18#~/.yarn/patches/shikiji-npm-0.9.18-945ea5efcb.patch": + version: 0.9.18 + resolution: "shikiji@patch:shikiji@npm%3A0.9.18#~/.yarn/patches/shikiji-npm-0.9.18-945ea5efcb.patch::version=0.9.18&hash=43ddf9" + dependencies: + shikiji-core: "npm:0.9.18" + checksum: b397c3bef535e66c64c73785143e4ce9e6faca083966b0ade618063e88fa27ed025531e78aaa7712ad5c776a97130a0e6603c77fadeeebde5ba078dfb7d9cfc3 + languageName: node + linkType: hard + "siginfo@npm:^2.0.0": version: 2.0.0 resolution: "siginfo@npm:2.0.0" @@ -5197,6 +5240,13 @@ __metadata: languageName: node linkType: hard +"undici-types@npm:~5.26.4": + version: 5.26.5 + resolution: "undici-types@npm:5.26.5" + checksum: bb673d7876c2d411b6eb6c560e0c571eef4a01c1c19925175d16e3a30c4c428181fb8d7ae802a261f283e4166a0ac435e2f505743aa9e45d893f9a3df017b501 + languageName: node + linkType: hard + "unique-filename@npm:^3.0.0": version: 3.0.0 resolution: "unique-filename@npm:3.0.0"