Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Remove unused dependencies" #9225

Merged
merged 1 commit into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 67 additions & 67 deletions front/migrations/20230614_timetttamp.ts
Original file line number Diff line number Diff line change
@@ -1,73 +1,73 @@
// /* Following a typo on points payload in our Qdrant database in which the field
// time*st*amp was written time*t*amp This migration creates a new field for every
// point, correctly named time*st*amp and with the value of the existing time*t*amp
// field*/
// import { QdrantClient } from "@qdrant/js-client-rest";
/* Following a typo on points payload in our Qdrant database in which the field
time*st*amp was written time*t*amp This migration creates a new field for every
point, correctly named time*st*amp and with the value of the existing time*t*amp
field*/
import { QdrantClient } from "@qdrant/js-client-rest";

// // if reusing, set env vars to those values, don't hardcode the values in the code
// // also as a safety delete the env vars (if persisted one way or another) after the migration
// // !! Since the client used is the REST client, ensure the port in QDRANT URL is to the REST API (not gRPC), usually 6333 (not 6334)
// const { QDRANT_URL, QDRANT_API_KEY } = process.env;
// if reusing, set env vars to those values, don't hardcode the values in the code
// also as a safety delete the env vars (if persisted one way or another) after the migration
// !! Since the client used is the REST client, ensure the port in QDRANT URL is to the REST API (not gRPC), usually 6333 (not 6334)
const { QDRANT_URL, QDRANT_API_KEY } = process.env;

// const prodClient = new QdrantClient({
// url: QDRANT_URL,
// apiKey: QDRANT_API_KEY,
// });
const prodClient = new QdrantClient({
url: QDRANT_URL,
apiKey: QDRANT_API_KEY,
});

// async function migrateCollection(
// client: QdrantClient,
// collectionName: string,
// batchSize = 250,
// offset?: string | number | undefined | null | Record<string, unknown>
// ) {
// const currentTime = Date.now();
// let updated = 0;
// const nb_points = await client.count(collectionName);
// console.log(
// "Migrating Collection ",
// collectionName,
// "\nNumber of points: ",
// nb_points.count
// );
// // scroll points excluding those with timestamp field until none are left
// do {
// console.log("Current offset: ", offset);
// const { points, next_page_offset } = await client.scroll(collectionName, {
// limit: batchSize,
// offset,
// with_vector: false,
// filter: {
// must: [{ is_empty: { key: "timestamp" } }],
// },
// });
async function migrateCollection(
client: QdrantClient,
collectionName: string,
batchSize = 250,
offset?: string | number | undefined | null | Record<string, unknown>
) {
const currentTime = Date.now();
let updated = 0;
const nb_points = await client.count(collectionName);
console.log(
"Migrating Collection ",
collectionName,
"\nNumber of points: ",
nb_points.count
);
// scroll points excluding those with timestamp field until none are left
do {
console.log("Current offset: ", offset);
const { points, next_page_offset } = await client.scroll(collectionName, {
limit: batchSize,
offset,
with_vector: false,
filter: {
must: [{ is_empty: { key: "timestamp" } }],
},
});

// // update the points
// const updatePromises = points.map(async (point) => {
// const payload = { timestamp: point.payload?.timetamp };
// await client.setPayload(collectionName, { payload, points: [point.id] });
// });
// // wait for all points of the batch to be updated to avoid flooding the server
// await Promise.all(updatePromises);
// offset = next_page_offset;
// updated += points.length;
// } while (offset !== null);
// console.log(
// "Updated points: ",
// updated,
// "\nTime: ",
// Date.now() - currentTime
// );
// }
// update the points
const updatePromises = points.map(async (point) => {
const payload = { timestamp: point.payload?.timetamp };
await client.setPayload(collectionName, { payload, points: [point.id] });
});
// wait for all points of the batch to be updated to avoid flooding the server
await Promise.all(updatePromises);
offset = next_page_offset;
updated += points.length;
} while (offset !== null);
console.log(
"Updated points: ",
updated,
"\nTime: ",
Date.now() - currentTime
);
}

// /* Migrate all collections
// * Points that do not need an update are ignored
// * Therefore can be safely restarted if it fails
// */
// async function migrateCollections(client: QdrantClient) {
// const result = await client.getCollections();
// for (const collection of result.collections) {
// await migrateCollection(client, collection.name);
// }
// }
/* Migrate all collections
* Points that do not need an update are ignored
* Therefore can be safely restarted if it fails
*/
async function migrateCollections(client: QdrantClient) {
const result = await client.getCollections();
for (const collection of result.collections) {
await migrateCollection(client, collection.name);
}
}

// void migrateCollections(prodClient);
void migrateCollections(prodClient);
164 changes: 82 additions & 82 deletions front/migrations/20230926_clean_up_qdrant_null_payloads.ts
Original file line number Diff line number Diff line change
@@ -1,94 +1,94 @@
// import { QdrantClient } from "@qdrant/js-client-rest";
import { QdrantClient } from "@qdrant/js-client-rest";

// const { QDRANT_API_KEY, QDRANT_URL } = process.env;
const { QDRANT_API_KEY, QDRANT_URL } = process.env;

// const client = new QdrantClient({
// url: QDRANT_URL,
// apiKey: QDRANT_API_KEY,
// });
const client = new QdrantClient({
url: QDRANT_URL,
apiKey: QDRANT_API_KEY,
});

// async function cleanup(collections: { name: string }[]) {
// console.log(`Cleaning up ${collections.length} collections.`);
// for (const c of collections) {
// let done = 0;
// let offset: number | undefined | null | string | Record<string, unknown> =
// 0;
// while (offset !== null && offset !== undefined) {
// const res = await client.scroll(c.name, {
// filter: {
// must: [
// {
// is_empty: {
// key: "document_id_hash",
// },
// },
// ],
// },
// with_payload: true,
// limit: 1024,
// offset,
// });
async function cleanup(collections: { name: string }[]) {
console.log(`Cleaning up ${collections.length} collections.`);
for (const c of collections) {
let done = 0;
let offset: number | undefined | null | string | Record<string, unknown> =
0;
while (offset !== null && offset !== undefined) {
const res = await client.scroll(c.name, {
filter: {
must: [
{
is_empty: {
key: "document_id_hash",
},
},
],
},
with_payload: true,
limit: 1024,
offset,
});

// const points = res.points.map((p) => p.id);
// const r = await client.delete(c.name, { wait: true, points });
// console.log(`Deleted ${points.length} points from ${c.name} res=${r}`);
const points = res.points.map((p) => p.id);
const r = await client.delete(c.name, { wait: true, points });
console.log(`Deleted ${points.length} points from ${c.name} res=${r}`);

// offset = res.next_page_offset;
// done += res.points.length;
// }
offset = res.next_page_offset;
done += res.points.length;
}

// const rr = await client.getCollection(c.name);
// console.log("DONE", done);
// console.log(rr);
// }
// }
const rr = await client.getCollection(c.name);
console.log("DONE", done);
console.log(rr);
}
}

// async function run() {
// const result = await client.getCollections();
// const collections = result.collections;
// // const collections = COLLECTION_NAMES;
async function run() {
const result = await client.getCollections();
const collections = result.collections;
// const collections = COLLECTION_NAMES;

// const collectionsWithNulls: { name: string }[] = [];
const collectionsWithNulls: { name: string }[] = [];

// console.log(`Processing ${collections.length} collections.`);
// let i = 0;
// for (const c of collections) {
// let done = 0;
// let offset: number | undefined | null | string | Record<string, unknown> =
// 0;
// while (offset !== null && offset !== undefined) {
// const res = await client.scroll(c.name, {
// filter: {
// must: [
// {
// is_empty: {
// key: "document_id_hash",
// },
// },
// ],
// },
// with_payload: true,
// limit: 1024,
// offset,
// });
console.log(`Processing ${collections.length} collections.`);
let i = 0;
for (const c of collections) {
let done = 0;
let offset: number | undefined | null | string | Record<string, unknown> =
0;
while (offset !== null && offset !== undefined) {
const res = await client.scroll(c.name, {
filter: {
must: [
{
is_empty: {
key: "document_id_hash",
},
},
],
},
with_payload: true,
limit: 1024,
offset,
});

// offset = res.next_page_offset;
// done += res.points.length;
// }
// if (done > 0) {
// console.log(`NULL_FOUND [${c.name}] found=${done}`);
// collectionsWithNulls.push(c);
// }
// i++;
// if (i % 32 === 0) {
// console.log(`PROCESS [${i}/${collections.length}] sleeping 1s`);
// await new Promise((resolve) => setTimeout(resolve, 1000));
// }
// }
offset = res.next_page_offset;
done += res.points.length;
}
if (done > 0) {
console.log(`NULL_FOUND [${c.name}] found=${done}`);
collectionsWithNulls.push(c);
}
i++;
if (i % 32 === 0) {
console.log(`PROCESS [${i}/${collections.length}] sleeping 1s`);
await new Promise((resolve) => setTimeout(resolve, 1000));
}
}

// return collectionsWithNulls;
// }
return collectionsWithNulls;
}

// void (async () => {
// await cleanup(await run());
// })();
void (async () => {
await cleanup(await run());
})();
Loading
Loading