Skip to content

Commit

Permalink
enh: add sId column on content fragments (#8647)
Browse files Browse the repository at this point in the history
* 0;276;0cenh: add sId column on content fragments

* index concurrently

* add backfill script ref in SQL

* move index creation to second migration

---------

Co-authored-by: Henry Fontanier <henry@dust.tt>
  • Loading branch information
fontanierh and Henry Fontanier authored Nov 14, 2024
1 parent 2e3c53b commit 448a5a2
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 2 deletions.
4 changes: 3 additions & 1 deletion front/lib/resources/content_fragment_resource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { BaseResource } from "@app/lib/resources/base_resource";
import { FileResource } from "@app/lib/resources/file_resource";
import { ContentFragmentModel } from "@app/lib/resources/storage/models/content_fragment";
import type { ReadonlyAttributesType } from "@app/lib/resources/storage/types";
import { generateRandomModelSId } from "@app/lib/resources/string_ids";
import logger from "@app/logger/logger";

const MAX_BYTE_SIZE_CSV_RENDER_FULL_CONTENT = 500 * 1024; // 500 KB
Expand All @@ -45,12 +46,13 @@ export class ContentFragmentResource extends BaseResource<ContentFragmentModel>
}

static async makeNew(
blob: CreationAttributes<ContentFragmentModel>,
blob: Omit<CreationAttributes<ContentFragmentModel>, "sId">,
transaction?: Transaction
) {
const contentFragment = await ContentFragmentModel.create(
{
...blob,
sId: generateRandomModelSId("cf"),
},
{
transaction,
Expand Down
7 changes: 6 additions & 1 deletion front/lib/resources/storage/models/content_fragment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ export class ContentFragmentModel extends Model<
declare createdAt: CreationOptional<Date>;
declare updatedAt: CreationOptional<Date>;

declare sId: string;
declare title: string;
declare contentType: SupportedContentFragmentType;
declare sourceUrl: string | null; // GCS (upload) or Slack or ...
Expand Down Expand Up @@ -54,6 +55,10 @@ ContentFragmentModel.init(
allowNull: false,
defaultValue: DataTypes.NOW,
},
sId: {
type: DataTypes.STRING,
allowNull: false,
},
title: {
type: DataTypes.TEXT,
allowNull: false,
Expand Down Expand Up @@ -90,7 +95,7 @@ ContentFragmentModel.init(
{
modelName: "content_fragment",
sequelize: frontSequelize,
indexes: [{ fields: ["fileId"] }],
indexes: [{ fields: ["fileId"] }, { fields: ["sId"] }],
}
);

Expand Down
62 changes: 62 additions & 0 deletions front/migrations/20241114_backfill_cf_sid.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import { Op } from "sequelize";

import { ContentFragmentModel } from "@app/lib/resources/storage/models/content_fragment";
import { generateRandomModelSId } from "@app/lib/resources/string_ids";
import { makeScript } from "@app/scripts/helpers";

makeScript({}, async ({ execute }, logger) => {
let lastSeenId = 0;
const batchSize = 1000;

for (;;) {
// Find content fragments without sId
const contentFragments: ContentFragmentModel[] =
await ContentFragmentModel.findAll({
// @ts-expect-error -- sequelize type for sId is not nullable (it temporarily is in db)
where: {
id: {
[Op.gt]: lastSeenId,
},
sId: {
[Op.is]: null,
},
},
order: [["id", "ASC"]],
limit: batchSize,
});

if (contentFragments.length === 0) {
break;
}

logger.info(
`Processing ${contentFragments.length} content fragments starting from ID ${lastSeenId}`
);

if (execute) {
await Promise.all(
contentFragments.map(async (cf) => {
const sId = generateRandomModelSId("cf");
await cf.update({ sId });
logger.info(
{
contentFragmentId: cf.id,
sId,
},
"Updated content fragment with sId"
);
})
);
} else {
logger.info(
{
lastSeenId,
count: contentFragments.length,
},
"Dry run - would have updated content fragments with sIds"
);
}

lastSeenId = contentFragments[contentFragments.length - 1].id;
}
});
5 changes: 5 additions & 0 deletions front/migrations/db/migration_111.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- Migration created on Nov 14, 2024
ALTER TABLE
"public"."content_fragments"
ADD
COLUMN "sId" VARCHAR(255);
10 changes: 10 additions & 0 deletions front/migrations/db/migration_112.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-- Migration created on Nov 14, 2024
-- Backfill script that needs to be ran: 20241114_backfill_cf_sid.ts
ALTER TABLE
"public"."content_fragments"
ALTER COLUMN
"sId"
SET
NOT NULL;

CREATE INDEX CONCURRENTLY "content_fragments_s_id" ON "content_fragments" ("sId");

0 comments on commit 448a5a2

Please sign in to comment.