Skip to content

Commit

Permalink
[front] Replace parents splice/unshift logic with validation in `lib/…
Browse files Browse the repository at this point in the history
…api/data_sources.ts` (#9948)

* remove the ability of upsertTable and upsertDocument to mutate parents, replace with validation

* fix a call site

* use the tableId in upsertArgs if provided (contains the content node ID if already existing)

* fix the check

* move the validation up in the execution flow

* rename name into document_id in UpsertDocumentArgs

* patch the types

* invert an && statement to optimize for the most common case
  • Loading branch information
aubin-tchoi authored Jan 14, 2025
1 parent d857ad6 commit cab4e8b
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 60 deletions.
2 changes: 1 addition & 1 deletion front/components/data_source/MultipleDocumentsUpload.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ export const MultipleDocumentsUpload = ({
// Have to use the filename to avoid fileId becoming apparent in the UI.
upsertArgs: {
title: blob.filename,
name: blob.filename,
document_id: blob.filename,
},
});

Expand Down
108 changes: 66 additions & 42 deletions front/lib/api/data_sources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,9 @@ export async function augmentDataSourceWithConnectorDetails(
fetchConnectorErrorMessage,
};
}
export type UpsertDocumentArgs = {
name: string;

export interface UpsertDocumentArgs {
document_id: string;
source_url?: string | null;
text?: string | null;
section?: FrontDataSourceDocumentSectionType | null;
Expand All @@ -240,9 +241,10 @@ export type UpsertDocumentArgs = {
auth: Authenticator;
mime_type: string;
title: string;
};
}

export async function upsertDocument({
name,
document_id,
source_url,
text,
section,
Expand All @@ -268,6 +270,33 @@ export async function upsertDocument({
DustError
>
> {
// enforcing validation on the parents and parent_id
const documentId = document_id;
const documentParents = parents || [documentId];
const documentParentId = parent_id ?? null;

// parents must comply to the invariant parents[0] === document_id
if (documentParents[0] !== documentId) {
return new Err(
new DustError(
"invalid_parents",
"Invalid request body, parents[0] and document_id should be equal"
)
);
}
// parents and parentId must comply to the invariant parents[1] === parentId || (parentId === null && parents.length < 2)
if (
(documentParents.length >= 2 || documentParentId !== null) &&
documentParents[1] !== documentParentId
) {
return new Err(
new DustError(
"invalid_parent_id",
"Invalid request body, parents[1] and parent_id should be equal"
)
);
}

let sourceUrl: string | null = null;
if (source_url) {
const { valid: isSourceUrlValid, standardized: standardizedSourceUrl } =
Expand Down Expand Up @@ -317,15 +346,6 @@ export async function upsertDocument({
);
}

if (parent_id && parents && parents[1] !== parent_id) {
return new Err(
new DustError(
"invalid_parent_id",
"Invalid request body, parents[1] and parent_id should be equal"
)
);
}

const fullText = sectionFullText(generatedSection);

const coreAPI = new CoreAPI(apiConfig.getCoreAPIConfig(), logger);
Expand Down Expand Up @@ -384,24 +404,13 @@ export async function upsertDocument({
// Data source operations are performed with our credentials.
const credentials = dustManagedCredentials();

const documentId = name;
const documentParents = parents || [];

// Ensure that the documentId is included in the parents as the first item.
// remove it if it's already present and add it as the first item.
const indexOfDocumentId = documentParents.indexOf(documentId);
if (indexOfDocumentId !== -1) {
documentParents.splice(indexOfDocumentId, 1);
}
documentParents.unshift(documentId);

// Create document with the Dust internal API.
const upsertRes = await coreAPI.upsertDataSourceDocument({
projectId: dataSource.dustAPIProjectId,
dataSourceId: dataSource.dustAPIDataSourceId,
documentId: documentId,
documentId,
tags: nonNullTags,
parentId: parent_id ?? null,
parentId: documentParentId,
parents: documentParents,
sourceUrl,
// TEMPORARY -- need to unstuck a specific entry
Expand All @@ -426,8 +435,8 @@ export async function upsertDocument({
return new Ok(upsertRes.value);
}

export type UpsertTableArgs = {
tableId?: string | null;
export interface UpsertTableArgs {
tableId: string;
name: string;
description: string;
truncate: boolean;
Expand All @@ -442,7 +451,8 @@ export type UpsertTableArgs = {
useAppForHeaderDetection?: boolean;
title: string;
mimeType: string;
};
}

export async function upsertTable({
tableId,
name,
Expand All @@ -460,16 +470,30 @@ export async function upsertTable({
title,
mimeType,
}: UpsertTableArgs) {
const nonNullTableId = tableId ?? generateRandomModelSId();
const tableParents: string[] = parents ?? [];

// Ensure that the nonNullTableId is included in the parents as the first item.
// remove it if it's already present and add it as the first item.
const indexOfTableId = tableParents.indexOf(nonNullTableId);
if (indexOfTableId !== -1) {
tableParents.splice(indexOfTableId, 1);
const tableParents = parents ?? [tableId];
const tableParentId = parentId ?? null;

// parents must comply to the invariant parents[0] === document_id
if (tableParents[0] !== tableId) {
return new Err(
new DustError(
"invalid_parents",
"Invalid request body, parents[0] and table_id should be equal"
)
);
}
// parents and parentId must comply to the invariant parents[1] === parentId
if (
(tableParents.length >= 2 || tableParentId !== null) &&
tableParents[1] !== tableParentId
) {
return new Err(
new DustError(
"invalid_parent_id",
"Invalid request body, parents[1] and parent_id should be equal"
)
);
}
tableParents.unshift(nonNullTableId);

const flags = await getFeatureFlags(auth.getNonNullableWorkspace());

Expand All @@ -496,12 +520,12 @@ export async function upsertTable({
upsertTable: {
workspaceId: auth.getNonNullableWorkspace().sId,
dataSourceId: dataSource.sId,
tableId: nonNullTableId,
tableId,
tableName: name,
tableDescription: description,
tableTimestamp: timestamp ?? null,
tableTags: tags ?? [],
tableParentId: parentId ?? null,
tableParentId,
tableParents,
csv: csv ?? null,
truncate,
Expand All @@ -521,12 +545,12 @@ export async function upsertTable({
const tableRes = await upsertTableFromCsv({
auth,
dataSource: dataSource,
tableId: nonNullTableId,
tableId,
tableName: name,
tableDescription: description,
tableTimestamp: timestamp ?? null,
tableTags: tags || [],
tableParentId: parentId ?? null,
tableParentId,
tableParents,
csv: csv ?? null,
truncate,
Expand Down
7 changes: 3 additions & 4 deletions front/lib/api/files/upsert.ts
Original file line number Diff line number Diff line change
Expand Up @@ -207,14 +207,13 @@ const upsertDocumentToDatasource: ProcessingFunction = async ({
upsertArgs,
}) => {
// Use the file id as the document id to make it easy to track the document back to the file.
const documentId = file.sId;
const sourceUrl = file.getPrivateUrl(auth);

const upsertDocumentRes = await upsertDocument({
name: documentId,
document_id: file.sId,
source_url: sourceUrl,
text: content,
parents: [documentId],
parents: [file.sId],
tags: [`title:${file.fileName}`, `fileId:${file.sId}`],
light_document_output: true,
dataSource,
Expand Down Expand Up @@ -245,7 +244,7 @@ const upsertTableToDatasource: ProcessingFunction = async ({
dataSource,
upsertArgs,
}) => {
const tableId = file.sId; // Use the file sId as the table id to make it easy to track the table back to the file.
const tableId = upsertArgs?.tableId ?? file.sId; // Use the file sId as a fallback for the table_id to make it easy to track the table back to the file.
const upsertTableRes = await upsertTable({
tableId,
name: slugify(file.fileName),
Expand Down
1 change: 1 addition & 0 deletions front/lib/error.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export type DustErrorCode =
| "data_source_quota_error"
| "text_or_section_required"
| "invalid_url"
| "invalid_parents"
| "invalid_parent_id"
// Table
| "missing_csv"
Expand Down
16 changes: 5 additions & 11 deletions front/pages/api/w/[wId]/data_sources/[dsId]/files.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import type { FileType, WithAPIErrorResponse } from "@dust-tt/types";
import type { NextApiRequest } from "next";
import type { NextApiResponse } from "next";
import type { NextApiRequest, NextApiResponse } from "next";

import { withSessionAuthenticationForWorkspace } from "@app/lib/api/auth_wrappers";
import type {
Expand All @@ -17,16 +16,11 @@ import { apiError } from "@app/logger/withlogging";
export interface UpsertFileToDataSourceRequestBody {
fileId: string;
upsertArgs?:
| Pick<UpsertDocumentArgs, "name" | "title" | "tags">
| Pick<
| Pick<UpsertDocumentArgs, "document_id" | "title" | "tags">
| (Pick<
UpsertTableArgs,
| "name"
| "title"
| "description"
| "tableId"
| "tags"
| "useAppForHeaderDetection"
>;
"name" | "title" | "description" | "tags" | "useAppForHeaderDetection"
> & { tableId: string | undefined }); // we actually don't always have a tableId, this is very dirty, but the refactoring should be done at the level of the whole upsertArgs mechanic
}

export interface UpsertFileToDataSourceResponseBody {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ async function handler(
} = bodyValidation.right;

const upsertResult = await upsertDocument({
name: documentId,
document_id: documentId,
source_url,
text,
section,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ async function handler(
} = bodyValidation.right;

const upsertResult = await upsertDocument({
name,
document_id: name, // using the name as the document_id since we don't have one here
source_url,
text,
section,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { withResourceFetchingFromRoute } from "@app/lib/api/resource_wrappers";
import type { Authenticator } from "@app/lib/auth";
import { DataSourceResource } from "@app/lib/resources/data_source_resource";
import type { SpaceResource } from "@app/lib/resources/space_resource";
import { generateRandomModelSId } from "@app/lib/resources/string_ids";
import { apiError } from "@app/logger/withlogging";

export const config = {
Expand Down Expand Up @@ -93,11 +94,13 @@ async function handler(
});
}

const tableId = generateRandomModelSId();
const upsertRes = await upsertTable({
...bodyValidation.right,
async: bodyValidation.right.async ?? false,
dataSource,
auth,
tableId,
});

if (upsertRes.isErr()) {
Expand Down

0 comments on commit cab4e8b

Please sign in to comment.