Skip to content

Commit

Permalink
feat: ✨ add line tracking and summarization script
Browse files Browse the repository at this point in the history
  • Loading branch information
pelikhan committed Oct 23, 2024
1 parent 7c7e185 commit 2ff37c9
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 6 deletions.
12 changes: 8 additions & 4 deletions packages/core/src/encoders.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,17 @@ export async function chunk(
keepSeparators: true,
})
const chunksRaw = ts.split(content)
const chunks = chunksRaw.map(({ text, startPos }) => {
const chunks = chunksRaw.map(({ text, startPos, endPos }) => {
const lineStart = indexToLineNumber(content, startPos)
const lineEnd = indexToLineNumber(content, endPos)
if (lineNumbers) {
const startLine = indexToLineNumber(content, startPos)
text = addLineNumbers(text, { startLine })
text = addLineNumbers(text, { startLine: lineStart })
}
return {
text,
content: text,
filename,
lineStart,
lineEnd,
} satisfies TextChunk
})
return chunks
Expand Down
5 changes: 3 additions & 2 deletions packages/core/src/types/prompt_template.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1095,8 +1095,9 @@ interface CSVParseOptions {
headers?: string[]
}

interface TextChunk {
text: string
interface TextChunk extends WorkspaceFile {
lineStart: number
lineEnd: number
}

interface TextChunkerConfig extends LineNumberingOptions {
Expand Down
24 changes: 24 additions & 0 deletions packages/sample/genaisrc/chunk.genai.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
script({
files: "src/rag/loremipsum.pdf",
})

const chunks = await tokenizers.chunk(env.files[0], {
chunkSize: 256,
chunkOverlap: 42,
lineNumbers: true,
})

let summary = ""
for (const chunk of chunks) {
const { text } = await runPrompt(
(ctx) => {
ctx.def("CHUNK", chunk)
ctx.def("SUMMARY_SO_FAR", summary, { ignoreEmpty: true })
ctx.$`Summarize CHUNK. Use SUMMARY_SO_FAR as a starting point (but do not repeat it).`
},
{ model: "small", system: ["system"] }
)
summary = text
}

console.log(summary)

0 comments on commit 2ff37c9

Please sign in to comment.