From 4ef496127fa60cc8030e61c53a07902360ec990f Mon Sep 17 00:00:00 2001
From: Ian Kerins <git@isk.haus>
Date: Fri, 19 Apr 2024 00:39:57 -0400
Subject: [PATCH] Handle breaking change in ChunkMatch newlines

Previously, zoekt made the curious choice to not include trailing
newline characters in each chunk it served in its API, in spite of the
posix definition of a line including the trailing newline character,
which is respected in many line-oriented unix tools like grep/ripgrep,
etc.

This mistake has finally been corrected! But it is a breakage that we
have to handle here. I've updated the tests to reflect the kind of data
that zoekt is actually serving now.

See https://github.com/sourcegraph/zoekt/pull/747.
---
 .changeset/dirty-windows-marry.md     |  5 ++++
 src/lib/server/content-parser.test.ts | 42 +++++++++++++--------------
 src/lib/server/content-parser.ts      |  9 +++---
 3 files changed, 30 insertions(+), 26 deletions(-)
 create mode 100644 .changeset/dirty-windows-marry.md

diff --git a/.changeset/dirty-windows-marry.md b/.changeset/dirty-windows-marry.md
new file mode 100644
index 0000000..1c03d7d
--- /dev/null
+++ b/.changeset/dirty-windows-marry.md
@@ -0,0 +1,5 @@
+---
+"neogrok": patch
+---
+
+Handle breaking change in ChunkMatch newlines
diff --git a/src/lib/server/content-parser.test.ts b/src/lib/server/content-parser.test.ts
index b53eafc..74ec30f 100644
--- a/src/lib/server/content-parser.test.ts
+++ b/src/lib/server/content-parser.test.ts
@@ -37,23 +37,23 @@ describe("parseFileNameMatch", () => {
 describe("parseChunkMatch", () => {
   it("parses chunk matches", () => {
     // Single line.
-    expect(parseChunkMatch(Buffer.from("foo"), [])).toEqual([
+    expect(parseChunkMatch(Buffer.from("foo\n"), [])).toEqual([
       { text: "foo", matchRanges: [] },
     ]);
-    expect(parseChunkMatch(Buffer.from("foo"), [{ start: 0, end: 3 }])).toEqual(
-      [{ text: "foo", matchRanges: [{ start: 0, end: 3 }] }],
-    );
-    expect(parseChunkMatch(Buffer.from("foo"), [{ start: 0, end: 2 }])).toEqual(
-      [{ text: "foo", matchRanges: [{ start: 0, end: 2 }] }],
-    );
-    expect(parseChunkMatch(Buffer.from("foo"), [{ start: 1, end: 3 }])).toEqual(
-      [{ text: "foo", matchRanges: [{ start: 1, end: 3 }] }],
-    );
-    expect(parseChunkMatch(Buffer.from("foo"), [{ start: 1, end: 2 }])).toEqual(
-      [{ text: "foo", matchRanges: [{ start: 1, end: 2 }] }],
-    );
     expect(
-      parseChunkMatch(Buffer.from("foo"), [
+      parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 3 }]),
+    ).toEqual([{ text: "foo", matchRanges: [{ start: 0, end: 3 }] }]);
+    expect(
+      parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 2 }]),
+    ).toEqual([{ text: "foo", matchRanges: [{ start: 0, end: 2 }] }]);
+    expect(
+      parseChunkMatch(Buffer.from("foo\n"), [{ start: 1, end: 3 }]),
+    ).toEqual([{ text: "foo", matchRanges: [{ start: 1, end: 3 }] }]);
+    expect(
+      parseChunkMatch(Buffer.from("foo\n"), [{ start: 1, end: 2 }]),
+    ).toEqual([{ text: "foo", matchRanges: [{ start: 1, end: 2 }] }]);
+    expect(
+      parseChunkMatch(Buffer.from("foo\n"), [
         { start: 1, end: 2 },
         { start: 2, end: 3 },
       ]),
@@ -68,42 +68,42 @@ describe("parseChunkMatch", () => {
     ]);
 
     // Multi-line.
-    expect(parseChunkMatch(Buffer.from("foo\n"), [])).toEqual([
+    expect(parseChunkMatch(Buffer.from("foo\n\n"), [])).toEqual([
       { text: "foo", matchRanges: [] },
       { text: "", matchRanges: [] },
     ]);
     expect(
-      parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 3 }]),
+      parseChunkMatch(Buffer.from("foo\n\n"), [{ start: 0, end: 3 }]),
     ).toEqual([
       { text: "foo", matchRanges: [{ start: 0, end: 3 }] },
       { text: "", matchRanges: [] },
     ]);
     expect(
-      parseChunkMatch(Buffer.from("foo\n"), [{ start: 0, end: 4 }]),
+      parseChunkMatch(Buffer.from("foo\n\n"), [{ start: 0, end: 4 }]),
     ).toEqual([
       { text: "foo", matchRanges: [{ start: 0, end: 3 }] },
       { text: "", matchRanges: [] },
     ]);
 
-    expect(parseChunkMatch(Buffer.from("foo\nbar"), [])).toEqual([
+    expect(parseChunkMatch(Buffer.from("foo\nbar\n"), [])).toEqual([
       { text: "foo", matchRanges: [] },
       { text: "bar", matchRanges: [] },
     ]);
     expect(
-      parseChunkMatch(Buffer.from("foo\nbar"), [{ start: 0, end: 3 }]),
+      parseChunkMatch(Buffer.from("foo\nbar\n"), [{ start: 0, end: 3 }]),
     ).toEqual([
       { text: "foo", matchRanges: [{ start: 0, end: 3 }] },
       { text: "bar", matchRanges: [] },
     ]);
     expect(
-      parseChunkMatch(Buffer.from("foo\nbar"), [{ start: 0, end: 4 }]),
+      parseChunkMatch(Buffer.from("foo\nbar\n"), [{ start: 0, end: 4 }]),
     ).toEqual([
       { text: "foo", matchRanges: [{ start: 0, end: 3 }] },
       { text: "bar", matchRanges: [] },
     ]);
 
     expect(
-      parseChunkMatch(Buffer.from("foo\nbar"), [
+      parseChunkMatch(Buffer.from("foo\nbar\n"), [
         { start: 0, end: 1 },
         { start: 2, end: 5 },
       ]),
diff --git a/src/lib/server/content-parser.ts b/src/lib/server/content-parser.ts
index 0216057..11980f7 100644
--- a/src/lib/server/content-parser.ts
+++ b/src/lib/server/content-parser.ts
@@ -132,11 +132,10 @@ Array<ContentLine> => {
     }
   }
 
-  // Conclude the current line. Note that if `currentLineText` is length 0,
-  // that is still semantically a line, namely an empty line. `Content` never
-  // naturally has a trailing newline; if there's a newline at the last byte,
-  // this indicates that there is a final line that is empty.
-  lines.push({ text: currentLineText, matchRanges: currentLineMatchRanges });
+  if (currentLineText.length > 0) {
+    // Conclude the current line.
+    lines.push({ text: currentLineText, matchRanges: currentLineMatchRanges });
+  }
 
   return lines;
 };