From 4ef496127fa60cc8030e61c53a07902360ec990f Mon Sep 17 00:00:00 2001 From: Ian Kerins Date: Fri, 19 Apr 2024 00:39:57 -0400 Subject: [PATCH] Handle breaking change in ChunkMatch newlines Previously, zoekt made the curious choice to not include trailing newline characters in each chunk it served in its API, in spite of the posix definition of a line including the trailing newline character, which is respected in many line-oriented unix tools like grep/ripgrep, etc. This mistake has finally been corrected! But it is a breakage that we have to handle here. I've updated the tests to reflect the kind of data that zoekt is actually serving now. See https://github.com/sourcegraph/zoekt/pull/747. --- .changeset/dirty-windows-marry.md | 5 ++++ src/lib/server/content-parser.test.ts | 42 +++++++++++++-------------- src/lib/server/content-parser.ts | 9 +++--- 3 files changed, 30 insertions(+), 26 deletions(-) create mode 100644 .changeset/dirty-windows-marry.md diff --git a/.changeset/dirty-windows-marry.md b/.changeset/dirty-windows-marry.md new file mode 100644 index 0000000..1c03d7d --- /dev/null +++ b/.changeset/dirty-windows-marry.md @@ -0,0 +1,5 @@ +--- +"neogrok": patch +--- + +Handle breaking change in ChunkMatch newlines diff --git a/src/lib/server/content-parser.test.ts b/src/lib/server/content-parser.test.ts index b53eafc..74ec30f 100644 --- a/src/lib/server/content-parser.test.ts +++ b/src/lib/server/content-parser.test.ts @@ -37,23 +37,23 @@ describe("parseFileNameMatch", () => { describe("parseChunkMatch", () => { it("parses chunk matches", () => { // Single line. - expect(parseChunkMatch(Buffer.from("foo"), [])).toEqual([ + expect(parseChunkMatch(Buffer.from("foo\n"), [])).toEqual([ { text: "foo", matchRanges: [] }, ]); - expect(parseChunkMatch(Buffer.from("foo"), [{ start: 0, end: 3 }])).toEqual( - [{ text: "foo", matchRanges: [{ start: 0, end: 3 }] }], - ); - expect(parseChunkMatch(Buffer.from("foo"), [{ start: 0, end: 2 }])).toEqual( - [{ text: "foo", matchRanges: [{ start: 0, end: 2 }] }], - ); - expect(parseChunkMatch(Buffer.from("foo"), [{ start: 1, end: 3 }])).toEqual( - [{ text: "foo", matchRanges: [{ start: 1, end: 3 }] }], - ); - expect(parseChunkMatch(Buffer.from("foo"), [{ start: 1, end: 2 }])).toEqual( - [{ text: "foo", matchRanges: [{ start: 1, end: 2 }] }], - ); expect( - parseChunkMatch(Buffer.from("foo"), [ + parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 3 }]), + ).toEqual([{ text: "foo", matchRanges: [{ start: 0, end: 3 }] }]); + expect( + parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 2 }]), + ).toEqual([{ text: "foo", matchRanges: [{ start: 0, end: 2 }] }]); + expect( + parseChunkMatch(Buffer.from("foo\n"), [{ start: 1, end: 3 }]), + ).toEqual([{ text: "foo", matchRanges: [{ start: 1, end: 3 }] }]); + expect( + parseChunkMatch(Buffer.from("foo\n"), [{ start: 1, end: 2 }]), + ).toEqual([{ text: "foo", matchRanges: [{ start: 1, end: 2 }] }]); + expect( + parseChunkMatch(Buffer.from("foo\n"), [ { start: 1, end: 2 }, { start: 2, end: 3 }, ]), @@ -68,42 +68,42 @@ describe("parseChunkMatch", () => { ]); // Multi-line. - expect(parseChunkMatch(Buffer.from("foo\n"), [])).toEqual([ + expect(parseChunkMatch(Buffer.from("foo\n\n"), [])).toEqual([ { text: "foo", matchRanges: [] }, { text: "", matchRanges: [] }, ]); expect( - parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 3 }]), + parseChunkMatch(Buffer.from("foo\n\n"), [{ start: 0, end: 3 }]), ).toEqual([ { text: "foo", matchRanges: [{ start: 0, end: 3 }] }, { text: "", matchRanges: [] }, ]); expect( - parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 4 }]), + parseChunkMatch(Buffer.from("foo\n\n"), [{ start: 0, end: 4 }]), ).toEqual([ { text: "foo", matchRanges: [{ start: 0, end: 3 }] }, { text: "", matchRanges: [] }, ]); - expect(parseChunkMatch(Buffer.from("foo\nbar"), [])).toEqual([ + expect(parseChunkMatch(Buffer.from("foo\nbar\n"), [])).toEqual([ { text: "foo", matchRanges: [] }, { text: "bar", matchRanges: [] }, ]); expect( - parseChunkMatch(Buffer.from("foo\nbar"), [{ start: 0, end: 3 }]), + parseChunkMatch(Buffer.from("foo\nbar\n"), [{ start: 0, end: 3 }]), ).toEqual([ { text: "foo", matchRanges: [{ start: 0, end: 3 }] }, { text: "bar", matchRanges: [] }, ]); expect( - parseChunkMatch(Buffer.from("foo\nbar"), [{ start: 0, end: 4 }]), + parseChunkMatch(Buffer.from("foo\nbar\n"), [{ start: 0, end: 4 }]), ).toEqual([ { text: "foo", matchRanges: [{ start: 0, end: 3 }] }, { text: "bar", matchRanges: [] }, ]); expect( - parseChunkMatch(Buffer.from("foo\nbar"), [ + parseChunkMatch(Buffer.from("foo\nbar\n"), [ { start: 0, end: 1 }, { start: 2, end: 5 }, ]), diff --git a/src/lib/server/content-parser.ts b/src/lib/server/content-parser.ts index 0216057..11980f7 100644 --- a/src/lib/server/content-parser.ts +++ b/src/lib/server/content-parser.ts @@ -132,11 +132,10 @@ Array => { } } - // Conclude the current line. Note that if `currentLineText` is length 0, - // that is still semantically a line, namely an empty line. `Content` never - // naturally has a trailing newline; if there's a newline at the last byte, - // this indicates that there is a final line that is empty. - lines.push({ text: currentLineText, matchRanges: currentLineMatchRanges }); + if (currentLineText.length > 0) { + // Conclude the current line. + lines.push({ text: currentLineText, matchRanges: currentLineMatchRanges }); + } return lines; };