-
-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
c03b3e2
commit bc1a4a1
Showing
7 changed files
with
250 additions
and
39 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
// ----------------------- | ||
// -- example-sentenceit.js -- | ||
// -------------------------------------------------------------------------------- | ||
// this is an example of how to use the sentenceit function | ||
// first we import the sentenceit function | ||
// then we setup the documents array with a text | ||
// then we call the sentenceit function with the text and an options object | ||
// the options object is optional | ||
// | ||
// the cramit function is faster than the chunkit function, but it is less accurate | ||
// useful for quickly chunking text, but not for exact semantic chunking | ||
// -------------------------------------------------------------------------------- | ||
|
||
import { sentenceit } from '../chunkit.js'; // this is typically just "import { sentenceit } from 'semantic-chunking';", but this is a local test | ||
import fs from 'fs'; | ||
|
||
// initialize documents array | ||
let documents = []; | ||
let textFiles = ['./example3.txt']; | ||
|
||
// read each text file and add it to the documents array | ||
for (const textFile of textFiles) { | ||
documents.push({ | ||
document_name: textFile, | ||
document_text: await fs.promises.readFile(textFile, 'utf8') | ||
}); | ||
} | ||
|
||
// start timing | ||
const startTime = performance.now(); | ||
|
||
let myTestSentences = await sentenceit( | ||
documents, | ||
{ | ||
logging: false, | ||
onnxEmbeddingModel: "Xenova/all-MiniLM-L6-v2", | ||
dtype: 'fp32', | ||
localModelPath: "../models", | ||
modelCacheDir: "../models", | ||
returnEmbedding: true, | ||
} | ||
); | ||
|
||
// end timeing | ||
const endTime = performance.now(); | ||
|
||
// calculate tracked time in seconds | ||
let trackedTimeSeconds = (endTime - startTime) / 1000; | ||
trackedTimeSeconds = parseFloat(trackedTimeSeconds.toFixed(2)); | ||
|
||
console.log("\n\n\n"); | ||
console.log("myTestSentences:"); | ||
console.log(myTestSentences); | ||
console.log("length: " + myTestSentences.length); | ||
console.log("trackedTimeSeconds: " + trackedTimeSeconds); |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters