diff --git a/connectors/src/lib/data_sources.ts b/connectors/src/lib/data_sources.ts index cd9dfda01b74..e52c21025bd5 100644 --- a/connectors/src/lib/data_sources.ts +++ b/connectors/src/lib/data_sources.ts @@ -153,6 +153,7 @@ async function _upsertToDatasource({ title, mime_type: mimeType, tags: tags?.map((tag) => tag.substring(0, 512)), + parent_id: parents[1] ?? null, parents, light_document_output: true, upsert_context: upsertContext, @@ -770,6 +771,7 @@ export async function upsertTableFromCsv({ `/data_sources/${dataSourceConfig.dataSourceId}/tables/csv`; const dustRequestPayload: UpsertTableFromCsvRequestType = { name: tableName, + parentId: parents[1] ?? null, parents, description: tableDescription, csv: tableCsv, @@ -1148,6 +1150,7 @@ export async function upsertFolderNode({ folderId, timestampMs ? timestampMs : now.getTime(), title, + parents[1] ?? null, parents ); diff --git a/core/bin/core_api.rs b/core/bin/core_api.rs index fa4893f3c765..a3af7d977ea2 100644 --- a/core/bin/core_api.rs +++ b/core/bin/core_api.rs @@ -1468,6 +1468,7 @@ async fn data_sources_documents_update_tags( #[derive(serde::Deserialize)] struct DataSourcesDocumentsUpdateParentsPayload { + parent_id: Option, parents: Vec, } @@ -1489,6 +1490,17 @@ async fn data_sources_documents_update_parents( ); } + if let Some(parent_id) = &payload.parent_id { + if payload.parents.get(1) != Some(parent_id) { + return error_response( + StatusCode::BAD_REQUEST, + "invalid_parent_id", + "Failed to update document parents - parents[1] and parent_id should be equal", + None, + ); + } + } + match state .store .load_data_source(&project, &data_source_id) @@ -1617,6 +1629,7 @@ struct DataSourcesDocumentsUpsertPayload { document_id: String, timestamp: Option, tags: Vec, + parent_id: Option, parents: Vec, source_url: Option, section: Section, @@ -1648,6 +1661,17 @@ async fn data_sources_documents_upsert( ); } + if let Some(parent_id) = &payload.parent_id { + if payload.parents.get(1) != Some(parent_id) { + return error_response( + StatusCode::BAD_REQUEST, + "invalid_parent_id", + "Failed to upsert document - parents[1] and parent_id should be equal", + None, + ); + } + } + match state .store .load_data_source(&project, &data_source_id) @@ -2074,6 +2098,7 @@ struct DatabasesTablesUpsertPayload { description: String, timestamp: Option, tags: Vec, + parent_id: Option, parents: Vec, // Remote DB specifics @@ -2103,6 +2128,17 @@ async fn tables_upsert( ); } + if let Some(parent_id) = &payload.parent_id { + if payload.parents.get(1) != Some(parent_id) { + return error_response( + StatusCode::BAD_REQUEST, + "invalid_parent_id", + "Failed to upsert table - parents[1] and parent_id should be equal", + None, + ); + } + } + match state .store .upsert_data_source_table( @@ -2351,6 +2387,17 @@ async fn tables_update_parents( ); } + if let Some(parent_id) = &payload.parent_id { + if payload.parents.get(1) != Some(parent_id) { + return error_response( + StatusCode::BAD_REQUEST, + "invalid_parent_id", + "Failed to update table parents - parents[1] and parent_id should be equal", + None, + ); + } + } + match state .store .load_data_source_table(&project, &data_source_id, &table_id) @@ -2725,6 +2772,7 @@ async fn tables_rows_list( struct FoldersUpsertPayload { folder_id: String, timestamp: Option, + parent_id: Option, parents: Vec, title: String, } @@ -2747,6 +2795,17 @@ async fn folders_upsert( ); } + if let Some(parent_id) = &payload.parent_id { + if payload.parents.get(1) != Some(parent_id) { + return error_response( + StatusCode::BAD_REQUEST, + "invalid_parent_id", + "Failed to upsert folder - parents[1] and parent_id should be equal", + None, + ); + } + } + match state .store .upsert_data_source_folder( diff --git a/core/src/data_sources/data_source.rs b/core/src/data_sources/data_source.rs index b40445334d9c..d0dbf182ffce 100644 --- a/core/src/data_sources/data_source.rs +++ b/core/src/data_sources/data_source.rs @@ -142,6 +142,7 @@ pub struct Document { pub title: String, pub mime_type: String, pub tags: Vec, + pub parent_id: Option, pub parents: Vec, pub source_url: Option, pub hash: String, @@ -161,6 +162,7 @@ impl Document { title: &str, mime_type: &str, tags: &Vec, + parent_id: &Option, parents: &Vec, source_url: &Option, hash: &str, @@ -174,6 +176,7 @@ impl Document { title: title.to_string(), mime_type: mime_type.to_string(), tags: tags.clone(), + parent_id: parent_id.clone(), parents: parents.clone(), source_url: source_url.clone(), hash: hash.to_string(), @@ -216,6 +219,7 @@ impl From for Node { document.timestamp, &document.title, &document.mime_type, + document.parent_id, document.parents.clone(), ) } @@ -686,7 +690,8 @@ impl DataSource { title.as_deref().unwrap_or(document_id), mime_type.as_deref().unwrap_or("application/octet-stream"), &tags, - &parents, + &parents.get(1).cloned(), + parents, source_url, &document_hash, full_text.len() as u64, diff --git a/core/src/data_sources/folder.rs b/core/src/data_sources/folder.rs index 36ab99b3ff12..ddef480f9b5e 100644 --- a/core/src/data_sources/folder.rs +++ b/core/src/data_sources/folder.rs @@ -6,6 +6,7 @@ pub struct Folder { folder_id: String, timestamp: u64, title: String, + parent_id: Option, parents: Vec, } @@ -18,13 +19,15 @@ impl Folder { folder_id: String, timestamp: u64, title: String, + parent_id: Option, parents: Vec, ) -> Self { Folder { - data_source_id: data_source_id, - folder_id: folder_id, + data_source_id, + folder_id, timestamp, - title: title, + title, + parent_id, parents, } } @@ -41,6 +44,9 @@ impl Folder { pub fn title(&self) -> &str { &self.title } + pub fn parent_id(&self) -> &Option { + &self.parent_id + } pub fn parents(&self) -> &Vec { &self.parents } diff --git a/core/src/data_sources/node.rs b/core/src/data_sources/node.rs index 665fef7b7e8f..41e42b588b60 100644 --- a/core/src/data_sources/node.rs +++ b/core/src/data_sources/node.rs @@ -17,6 +17,7 @@ pub struct Node { timestamp: u64, title: String, mime_type: String, + parent_id: Option, parents: Vec, } @@ -28,6 +29,7 @@ impl Node { timestamp: u64, title: &str, mime_type: &str, + parent_id: Option, parents: Vec, ) -> Self { Node { @@ -37,6 +39,7 @@ impl Node { timestamp, title: title.to_string(), mime_type: mime_type.to_string(), + parent_id: parent_id.clone(), parents, } } @@ -70,6 +73,7 @@ impl Node { self.node_id, self.timestamp, self.title, + self.parent_id, self.parents, ) } diff --git a/core/src/databases/table.rs b/core/src/databases/table.rs index 6b04dd08733b..91f0cceb6adb 100644 --- a/core/src/databases/table.rs +++ b/core/src/databases/table.rs @@ -60,6 +60,7 @@ pub struct Table { tags: Vec, title: String, mime_type: String, + parent_id: Option, parents: Vec, schema: Option, @@ -81,6 +82,7 @@ impl Table { title: String, mime_type: String, tags: Vec, + parent_id: Option, parents: Vec, schema: Option, schema_stale_at: Option, @@ -88,18 +90,19 @@ impl Table { remote_database_secret_id: Option, ) -> Self { Table { - project: project, - data_source_id: data_source_id, + project, + data_source_id, created, - table_id: table_id, - name: name, - description: description, + table_id, + name, + description, timestamp, tags, - title: title, - mime_type: mime_type, + title, + mime_type, + parent_id, parents, - schema: schema, + schema, schema_stale_at, remote_database_table_id, remote_database_secret_id, @@ -124,6 +127,9 @@ impl Table { pub fn mime_type(&self) -> &str { &self.mime_type } + pub fn parent_id(&self) -> &Option { + &self.parent_id + } pub fn parents(&self) -> &Vec { &self.parents } @@ -228,7 +234,8 @@ impl From for Node { table.timestamp, &table.title, &table.mime_type, - table.parents.clone(), + table.parents.get(1).cloned(), + table.parents, ) } } @@ -574,6 +581,7 @@ mod tests { "test_dbml".to_string(), "text/plain".to_string(), vec![], + None, vec![], Some(schema), None, diff --git a/core/src/stores/postgres.rs b/core/src/stores/postgres.rs index 2cfa394e8bc7..11312d9eb474 100644 --- a/core/src/stores/postgres.rs +++ b/core/src/stores/postgres.rs @@ -1400,6 +1400,7 @@ impl Store for PostgresStore { document_id, tags, mime_type: node_mime_type.unwrap_or("application/octet-stream".to_string()), + parent_id: parents.get(1).cloned(), parents, source_url, hash, @@ -1896,6 +1897,7 @@ impl Store for PostgresStore { document_id: create_params.document_id, timestamp: create_params.timestamp, tags: create_params.tags, + parent_id: create_params.parents.get(1).cloned(), parents: create_params.parents, source_url: create_params.source_url, hash: create_params.hash, @@ -2053,6 +2055,7 @@ impl Store for PostgresStore { mime_type: node_mime_type.unwrap_or("application/octet-stream".to_string()), document_id, tags, + parent_id: parents.get(1).cloned(), parents, source_url, hash, @@ -2670,6 +2673,7 @@ impl Store for PostgresStore { title, upsert_params.mime_type.unwrap_or("text/csv".to_string()), upsert_params.tags, + upsert_params.parents.get(1).cloned(), upsert_params.parents, parsed_schema, table_schema_stale_at.map(|t| t as u64), @@ -2943,6 +2947,7 @@ impl Store for PostgresStore { title, "text/csv".to_string(), // TODO(KW_SEARCH_INFRA) use mimetype tags, + parents.get(1).cloned(), parents, parsed_schema, schema_stale_at.map(|t| t as u64), @@ -3084,6 +3089,7 @@ impl Store for PostgresStore { title, "text/csv".to_string(), // TODO(KW_SEARCH_INFRA)use mimetype tags, + parents.get(1).cloned(), parents, parsed_schema, schema_stale_at.map(|t| t as u64), @@ -3216,6 +3222,7 @@ impl Store for PostgresStore { upsert_params.folder_id, created as u64, upsert_params.title, + upsert_params.parents.get(1).cloned(), upsert_params.parents, ); @@ -3390,6 +3397,7 @@ impl Store for PostgresStore { node_id, timestamp as u64, title, + parents.get(1).cloned(), parents, )) }) @@ -3495,6 +3503,7 @@ impl Store for PostgresStore { timestamp as u64, &title, &mime_type, + parents.get(1).cloned(), parents, ), row_id, diff --git a/front/components/data_source/DocumentUploadOrEditModal.tsx b/front/components/data_source/DocumentUploadOrEditModal.tsx index f407a24cc97d..b9d1d13ffb8a 100644 --- a/front/components/data_source/DocumentUploadOrEditModal.tsx +++ b/front/components/data_source/DocumentUploadOrEditModal.tsx @@ -139,6 +139,7 @@ export const DocumentUploadOrEditModal = ({ title: initialId ?? document.name, mime_type: document.mimeType ?? undefined, timestamp: null, + parent_id: null, parents: [initialId ?? document.name], section: { prefix: null, content: document.text, sections: [] }, text: null, diff --git a/front/components/data_source/MultipleDocumentsUpload.tsx b/front/components/data_source/MultipleDocumentsUpload.tsx index f463847c8616..b9cd5a0b7aed 100644 --- a/front/components/data_source/MultipleDocumentsUpload.tsx +++ b/front/components/data_source/MultipleDocumentsUpload.tsx @@ -144,6 +144,7 @@ export const MultipleDocumentsUpload = ({ title: blob.filename, mime_type: blob.contentType ?? undefined, timestamp: null, + parent_id: null, parents: [blob.filename], section: { prefix: null, diff --git a/front/components/data_source/TableUploadOrEditModal.tsx b/front/components/data_source/TableUploadOrEditModal.tsx index d360e203f229..a3e072c6e7b2 100644 --- a/front/components/data_source/TableUploadOrEditModal.tsx +++ b/front/components/data_source/TableUploadOrEditModal.tsx @@ -141,6 +141,7 @@ export const TableUploadOrEditModal = ({ tableId: initialId, timestamp: null, tags: [], + parentId: null, parents: [], truncate: true, async: false, diff --git a/front/lib/api/data_sources.ts b/front/lib/api/data_sources.ts index d242809d6839..34d34796c10e 100644 --- a/front/lib/api/data_sources.ts +++ b/front/lib/api/data_sources.ts @@ -228,6 +228,7 @@ export async function upsertDocument({ text, section, tags, + parent_id, parents, timestamp, light_document_output, @@ -241,6 +242,7 @@ export async function upsertDocument({ text?: string | null; section?: FrontDataSourceDocumentSectionType | null; tags?: string[] | null; + parent_id?: string | null; parents?: string[] | null; timestamp?: number | null; light_document_output?: boolean; @@ -310,6 +312,15 @@ export async function upsertDocument({ ); } + if (parent_id && parents && parents[1] !== parent_id) { + return new Err( + new DustError( + "invalid_parent_id", + "Invalid request body, parents[1] and parent_id should be equal" + ) + ); + } + const fullText = sectionFullText(generatedSection); const coreAPI = new CoreAPI(apiConfig.getCoreAPIConfig(), logger); @@ -385,6 +396,7 @@ export async function upsertDocument({ dataSourceId: dataSource.dustAPIDataSourceId, documentId: documentId, tags: nonNullTags, + parentId: parent_id ?? null, parents: documentParents, sourceUrl, // TEMPORARY -- need to unstuck a specific entry @@ -416,6 +428,7 @@ export async function upsertTable({ truncate, csv, tags, + parentId, parents, timestamp, async, @@ -431,6 +444,7 @@ export async function upsertTable({ truncate: boolean; csv?: string | null; tags?: string[] | null; + parentId?: string | null; parents?: string[] | null; timestamp?: number | null; async: boolean; @@ -481,6 +495,7 @@ export async function upsertTable({ tableDescription: description, tableTimestamp: timestamp ?? null, tableTags: tags ?? [], + tableParentId: parentId ?? null, tableParents, csv: csv ?? null, truncate, @@ -505,6 +520,7 @@ export async function upsertTable({ tableDescription: description, tableTimestamp: timestamp ?? null, tableTags: tags || [], + tableParentId: parentId ?? null, tableParents, csv: csv ?? null, truncate, @@ -595,6 +611,7 @@ export async function handleDataSourceTableCSVUpsert({ | "data_source_error" | "invalid_rows" | "resource_not_found" + | "invalid_parent_id" | "internal_error"; } > @@ -653,6 +670,7 @@ export async function handleDataSourceTableCSVUpsert({ tableDescription: description, tableTimestamp: params.timestamp ?? null, tableTags: params.tags ?? [], + tableParentId: params.parentId ?? null, tableParents, csv: csv ?? null, truncate, @@ -685,6 +703,7 @@ export async function handleDataSourceTableCSVUpsert({ tableDescription: description, tableTimestamp: params.timestamp ?? null, tableTags: params.tags || [], + tableParentId: params.parentId ?? null, tableParents, csv: csv ?? null, truncate, @@ -717,6 +736,12 @@ export async function handleDataSourceTableCSVUpsert({ message: "Invalid request body: " + tableRes.error.inputValidationError, }); + } else if ("message" in tableRes.error) { + return new Err({ + name: "dust_error", + code: "invalid_parent_id", + message: "Invalid request body: " + tableRes.error.message, + }); } else { assertNever(tableRes.error); } diff --git a/front/lib/api/tables.ts b/front/lib/api/tables.ts index 89bdbb3a53dd..9203c5ce2fbc 100644 --- a/front/lib/api/tables.ts +++ b/front/lib/api/tables.ts @@ -67,6 +67,10 @@ export type TableOperationError = type: "invalid_request_error"; inputValidationError: InputValidationError; } + | { + type: "invalid_request_error"; + message: string; + } | { type: "not_found_error"; notFoundError: NotFoundError; @@ -129,6 +133,7 @@ export async function upsertTableFromCsv({ tableId, tableTimestamp, tableTags, + tableParentId, tableParents, csv, truncate, @@ -144,6 +149,7 @@ export async function upsertTableFromCsv({ tableId: string; tableTimestamp: number | null; tableTags: string[]; + tableParentId: string | null; tableParents: string[]; csv: string | null; truncate: boolean; @@ -181,6 +187,13 @@ export async function upsertTableFromCsv({ }); } + if (tableParentId && tableParents && tableParents[1] !== tableParentId) { + return new Err({ + type: "invalid_request_error", + message: "Invalid request body, parents[1] and parent_id should be equal", + }); + } + let csvRows: CoreAPIRow[] | undefined = undefined; if (csvRowsRes) { if (csvRowsRes.isErr()) { @@ -236,6 +249,7 @@ export async function upsertTableFromCsv({ description: tableDescription, timestamp: tableTimestamp, tags: tableTags, + parentId: tableParentId, parents: tableParents, title, mimeType, diff --git a/front/lib/error.ts b/front/lib/error.ts index ff0bfc9aa19a..b41bfc5e7fb3 100644 --- a/front/lib/error.ts +++ b/front/lib/error.ts @@ -10,6 +10,7 @@ export type DustErrorCode = | "data_source_quota_error" | "text_or_section_required" | "invalid_url" + | "invalid_parent_id" // Table | "missing_csv" | "invalid_rows" diff --git a/front/lib/upsert_queue.ts b/front/lib/upsert_queue.ts index 3334aa9af65e..0f38a37b5448 100644 --- a/front/lib/upsert_queue.ts +++ b/front/lib/upsert_queue.ts @@ -22,6 +22,7 @@ export const EnqueueUpsertDocument = t.intersection([ dataSourceId: t.string, documentId: t.string, tags: t.union([t.array(t.string), t.null]), + parentId: t.union([t.string, t.null]), parents: t.union([t.array(t.string), t.null]), sourceUrl: t.union([t.string, t.null]), timestamp: t.union([t.number, t.null]), @@ -48,6 +49,7 @@ export const EnqueueUpsertTable = t.intersection([ tableDescription: t.string, tableTimestamp: t.union([t.number, t.undefined, t.null]), tableTags: t.union([t.array(t.string), t.undefined, t.null]), + tableParentId: t.union([t.string, t.undefined, t.null]), tableParents: t.union([t.array(t.string), t.undefined, t.null]), csv: t.union([t.string, t.null]), truncate: t.boolean, diff --git a/front/migrations/20241129_fix_intercom_conversation_parents.ts b/front/migrations/20241129_fix_intercom_conversation_parents.ts index 08b040c9cdf0..5b176bfd1698 100644 --- a/front/migrations/20241129_fix_intercom_conversation_parents.ts +++ b/front/migrations/20241129_fix_intercom_conversation_parents.ts @@ -68,6 +68,7 @@ makeScript({}, async ({ execute }, logger) => { projectId: ds.dustAPIProjectId, dataSourceId: ds.dustAPIDataSourceId, documentId: row.document_id, + parentId: null, parents: row.parents ? [row.document_id, ...row.parents] : [row.document_id], diff --git a/front/migrations/20241211_parents_migrator.ts b/front/migrations/20241211_parents_migrator.ts index 0076a1fc8942..243af79021c5 100644 --- a/front/migrations/20241211_parents_migrator.ts +++ b/front/migrations/20241211_parents_migrator.ts @@ -193,6 +193,7 @@ async function migrateDocument({ dataSourceId: dataSource.dustAPIDataSourceId, documentId: coreDocument.document_id, parents: newParents, + parentId: null, }); if (updateRes.isErr()) { @@ -261,6 +262,7 @@ async function migrateTable({ dataSourceId: dataSource.dustAPIDataSourceId, tableId: coreTable.table_id, parents: newParents, + parentId: null, }); if (updateRes.isErr()) { diff --git a/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/[documentId]/index.ts b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/[documentId]/index.ts index 09f4457fda5c..2eb8fca8153a 100644 --- a/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/[documentId]/index.ts +++ b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/[documentId]/index.ts @@ -147,11 +147,14 @@ export const config = { * items: * type: string * description: Tags to associate with the document. + * parent_id: + * type: string + * description: Direct parent document ID to associate with the document. * parents: * type: array * items: * type: string - * description: Parent document IDs to associate with the document. + * description: 'Document and ancestor ids, with the following convention: parents[0] === documentId, parents[1] === parent_id, and then ancestors ids in order' * timestamp: * type: number * description: Unix timestamp (in seconds) for the document (e.g. 1698225000). Can be null or omitted. @@ -497,6 +500,7 @@ async function handler( dataSourceId: dataSource.sId, documentId: req.query.documentId as string, tags: r.data.tags || [], + parentId: r.data.parent_id || null, parents: r.data.parents || [], timestamp: r.data.timestamp || null, sourceUrl, @@ -536,6 +540,7 @@ async function handler( dataSourceId: dataSource.dustAPIDataSourceId, documentId: req.query.documentId as string, tags: r.data.tags || [], + parentId: r.data.parent_id || null, parents: r.data.parents || [], sourceUrl, timestamp: r.data.timestamp || null, diff --git a/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/[documentId]/parents.ts b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/[documentId]/parents.ts index ac808ff55e3f..980ab2c197d8 100644 --- a/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/[documentId]/parents.ts +++ b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/[documentId]/parents.ts @@ -53,11 +53,14 @@ import { apiError } from "@app/logger/withlogging"; * schema: * type: object * properties: + * parent_id: + * type: string + * description: Direct parent ID of the document * parents: * type: array * items: * type: string - * description: Array of parent document IDs + * description: 'Document and ancestor ids, with the following convention: parents[0] === documentId, parents[1] === parentId, and then ancestors ids in order' * responses: * 200: * description: The parents were updated @@ -151,6 +154,7 @@ async function handler( projectId: dataSource.dustAPIProjectId, dataSourceId: dataSource.dustAPIDataSourceId, documentId: req.query.documentId as string, + parentId: req.body.parent_id ?? null, parents: req.body.parents, }); diff --git a/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/folders/[fId].ts b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/folders/[fId].ts index 923099b4b6ad..24902360fb93 100644 --- a/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/folders/[fId].ts +++ b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/folders/[fId].ts @@ -113,6 +113,17 @@ async function handler( }); } + const { timestamp, parent_id: parentId, parents, title } = r.data; + if (parentId && parents && parents[1] !== parentId) { + return apiError(req, res, { + status_code: 400, + api_error: { + type: "invalid_request_error", + message: `Invalid request body: parents[1] and parent_id should be equal`, + }, + }); + } + const statsDTags = [ `data_source_id:${dataSource.id}`, `workspace_id:${owner.sId}`, @@ -130,9 +141,10 @@ async function handler( projectId: dataSource.dustAPIProjectId, dataSourceId: dataSource.dustAPIDataSourceId, folderId: fId, - timestamp: r.data.timestamp || null, - parents: r.data.parents || [fId], - title: r.data.title, + timestamp: timestamp || null, + parentId: parentId || null, + parents: parents || [fId], + title: title, }); if (upsertRes.isErr()) { diff --git a/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/tables/[tId]/index.ts b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/tables/[tId]/index.ts index 0cbf77a0d83d..6107047e1982 100644 --- a/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/tables/[tId]/index.ts +++ b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/tables/[tId]/index.ts @@ -205,6 +205,7 @@ async function handler( timestamp: table.timestamp, tags: table.tags, parents: table.parents, + parent_id: table.parent_id, mime_type: table.mime_type, title: table.title, }, diff --git a/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/tables/[tId]/parents.ts b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/tables/[tId]/parents.ts index f3863b22865f..4f8fef6e0c14 100644 --- a/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/tables/[tId]/parents.ts +++ b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/tables/[tId]/parents.ts @@ -100,13 +100,14 @@ async function handler( }); } - const { parents } = r.data; + const { parents, parent_id: parentId } = r.data; const coreAPI = new CoreAPI(config.getCoreAPIConfig(), logger); const updateRes = await coreAPI.updateTableParents({ projectId: dataSource.dustAPIProjectId, dataSourceId: dataSource.dustAPIDataSourceId, tableId: tId, + parentId: parentId ?? null, parents, }); diff --git a/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/tables/csv.ts b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/tables/csv.ts index e731b64559d0..a65e87d1c2ef 100644 --- a/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/tables/csv.ts +++ b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/tables/csv.ts @@ -140,6 +140,14 @@ async function handler( message: upsertRes.error.message, }, }); + case "invalid_parent_id": + return apiError(req, res, { + status_code: 400, + api_error: { + type: "invalid_request_error", + message: upsertRes.error.message, + }, + }); case "invalid_rows": return apiError(req, res, { status_code: 400, diff --git a/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/tables/index.ts b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/tables/index.ts index e3e6feda02b9..e97f0dbbe9e7 100644 --- a/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/tables/index.ts +++ b/front/pages/api/v1/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/tables/index.ts @@ -114,7 +114,10 @@ import { apiError } from "@app/logger/withlogging"; * type: array * items: * type: string - * description: Parent tables of this table + * description: 'Table and ancestor ids, with the following convention: parents[0] === table_id, parents[1] === parent_id, and then ancestors ids in order' + * parent_id: + * type: string + * description: Direct parent id of this table * mime_type: * type: string * description: Mime type of the table @@ -233,6 +236,7 @@ async function handler( timestamp: table.timestamp, tags: table.tags, parents: table.parents, + parent_id: table.parent_id, mime_type: table.mime_type, title: table.title, }; @@ -259,6 +263,7 @@ async function handler( timestamp, tags, parents, + parent_id: parentId, remote_database_table_id: remoteDatabaseTableId, remote_database_secret_id: remoteDatabaseSecretId, } = r.data; @@ -365,6 +370,7 @@ async function handler( tags: tags || [], // Table is a parent of itself by default. parents: parents || [tableId], + parentId: parentId ?? null, remoteDatabaseTableId: remoteDatabaseTableId ?? null, remoteDatabaseSecretId: remoteDatabaseSecretId ?? null, title, diff --git a/front/pages/api/v1/w/[wId]/swagger_schemas.ts b/front/pages/api/v1/w/[wId]/swagger_schemas.ts index 296b0b38ec02..32b814f30afa 100644 --- a/front/pages/api/v1/w/[wId]/swagger_schemas.ts +++ b/front/pages/api/v1/w/[wId]/swagger_schemas.ts @@ -471,6 +471,12 @@ * description: Array of tags associated with the table * items: * type: string + * parent_id: + * type: string + * description: ID of the table parent + * items: + * type: string + * example: "1234f4567c" * parents: * type: array * description: Array of parent table IDs @@ -594,6 +600,12 @@ * items: * type: string * example: ["customer_support", "faq"] + * parent_id: + * type: string + * description: ID of the document parent + * items: + * type: string + * example: "1234f4567c" * parents: * type: array * items: diff --git a/front/pages/api/w/[wId]/data_sources/[dsId]/tables/csv.ts b/front/pages/api/w/[wId]/data_sources/[dsId]/tables/csv.ts index 4908be6874be..772451221a76 100644 --- a/front/pages/api/w/[wId]/data_sources/[dsId]/tables/csv.ts +++ b/front/pages/api/w/[wId]/data_sources/[dsId]/tables/csv.ts @@ -108,6 +108,14 @@ async function handler( message: r.error.message, }, }); + case "invalid_parent_id": + return apiError(req, res, { + status_code: 400, + api_error: { + type: "invalid_request_error", + message: r.error.message, + }, + }); case "invalid_rows": return apiError(req, res, { status_code: 400, diff --git a/front/pages/api/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/[documentId]/index.ts b/front/pages/api/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/[documentId]/index.ts index 629509dbc9dc..9984e4d99527 100644 --- a/front/pages/api/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/[documentId]/index.ts +++ b/front/pages/api/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/[documentId]/index.ts @@ -105,9 +105,31 @@ async function handler( }); } + const { + source_url, + text, + section, + tags, + parent_id, + parents, + timestamp, + light_document_output, + mime_type, + title, + } = bodyValidation.right; + const upsertResult = await upsertDocument({ name: documentId, - ...bodyValidation.right, + source_url, + text, + section, + tags, + parent_id, + parents, + timestamp, + light_document_output, + mime_type, + title, dataSource, auth, }); @@ -131,6 +153,14 @@ async function handler( message: upsertResult.error.message, }, }); + case "invalid_parent_id": + return apiError(req, res, { + status_code: 400, + api_error: { + type: "invalid_request_error", + message: upsertResult.error.message, + }, + }); default: return apiError(req, res, { status_code: 500, diff --git a/front/pages/api/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/index.ts b/front/pages/api/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/index.ts index bad4ebd59ff8..d3241dd9df90 100644 --- a/front/pages/api/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/index.ts +++ b/front/pages/api/w/[wId]/spaces/[spaceId]/data_sources/[dsId]/documents/index.ts @@ -96,8 +96,32 @@ async function handler( }); } + const { + name, + source_url, + text, + section, + tags, + parent_id, + parents, + timestamp, + light_document_output, + mime_type, + title, + } = bodyValidation.right; + const upsertResult = await upsertDocument({ - ...bodyValidation.right, + name, + source_url, + text, + section, + tags, + parent_id, + parents, + timestamp, + light_document_output, + mime_type, + title, dataSource, auth, }); @@ -121,6 +145,14 @@ async function handler( message: upsertResult.error.message, }, }); + case "invalid_parent_id": + return apiError(req, res, { + status_code: 400, + api_error: { + type: "invalid_request_error", + message: upsertResult.error.message, + }, + }); default: return apiError(req, res, { status_code: 500, diff --git a/front/public/swagger.json b/front/public/swagger.json index 5761bf592efc..e7450a80a1ec 100644 --- a/front/public/swagger.json +++ b/front/public/swagger.json @@ -2004,12 +2004,16 @@ }, "description": "Tags to associate with the document." }, + "parent_id": { + "type": "string", + "description": "Direct parent document ID to associate with the document." + }, "parents": { "type": "array", "items": { "type": "string" }, - "description": "Parent document IDs to associate with the document." + "description": "Document and ancestor ids, with the following convention: parents[0] === documentId, parents[1] === parent_id, and then ancestors ids in order" }, "timestamp": { "type": "number", @@ -2219,12 +2223,16 @@ "schema": { "type": "object", "properties": { + "parent_id": { + "type": "string", + "description": "Direct parent ID of the document" + }, "parents": { "type": "array", "items": { "type": "string" }, - "description": "Array of parent document IDs" + "description": "Document and ancestor ids, with the following convention: parents[0] === documentId, parents[1] === parentId, and then ancestors ids in order" } } } @@ -3178,9 +3186,13 @@ "parents": { "type": "array", "items": { - "type": "string" - }, - "description": "Parent tables of this table" + "type": "string", + "description": "Table and ancestor ids, with the following convention: parents[0] === table_id, parents[1] === parent_id, and then ancestors ids in order" + } + }, + "parent_id": { + "type": "string", + "description": "Direct parent id of this table" }, "mime_type": { "type": "string", diff --git a/front/temporal/labs/activities.ts b/front/temporal/labs/activities.ts index 819f6439e75f..675b0abaae3e 100644 --- a/front/temporal/labs/activities.ts +++ b/front/temporal/labs/activities.ts @@ -368,7 +368,8 @@ export async function processTranscriptActivity( dataSourceId: dataSource.dustAPIDataSourceId, documentId: transcriptTitle, tags: ["transcript", transcriptsConfiguration.provider], - parents: [], + parentId: null, + parents: [transcriptTitle], sourceUrl: null, timestamp: null, section: { diff --git a/front/temporal/upsert_queue/activities.ts b/front/temporal/upsert_queue/activities.ts index 16c79d6537cc..ac052b10a8f9 100644 --- a/front/temporal/upsert_queue/activities.ts +++ b/front/temporal/upsert_queue/activities.ts @@ -87,6 +87,7 @@ export async function upsertDocumentActivity( dataSourceId: dataSource.dustAPIDataSourceId, documentId: upsertQueueItem.documentId, tags: upsertQueueItem.tags || [], + parentId: upsertQueueItem.parentId || null, parents: upsertQueueItem.parents || [], sourceUrl: upsertQueueItem.sourceUrl, timestamp: upsertQueueItem.timestamp, diff --git a/front/temporal/upsert_tables/activities.ts b/front/temporal/upsert_tables/activities.ts index 72d8d1fb229f..79b336fb4713 100644 --- a/front/temporal/upsert_tables/activities.ts +++ b/front/temporal/upsert_tables/activities.ts @@ -84,6 +84,7 @@ export async function upsertTableActivity( tableId: upsertQueueItem.tableId, tableTimestamp: upsertQueueItem.tableTimestamp ?? null, tableTags: upsertQueueItem.tableTags || [], + tableParentId: upsertQueueItem.tableParentId || null, tableParents: upsertQueueItem.tableParents || [], csv: upsertQueueItem.csv, truncate: upsertQueueItem.truncate, diff --git a/sdks/js/src/index.ts b/sdks/js/src/index.ts index 6376adadfb70..803fbf2e36fc 100644 --- a/sdks/js/src/index.ts +++ b/sdks/js/src/index.ts @@ -823,6 +823,7 @@ export class DustAPI { folderId: string, timestamp: number, title: string, + parentId: string | null, parents: string[] ) { return this.request({ @@ -833,6 +834,7 @@ export class DustAPI { body: { timestamp: Math.floor(timestamp), title, + parent_id: parentId, parents, }, }); diff --git a/sdks/js/src/types.ts b/sdks/js/src/types.ts index d1c09a1fbc6b..f12e5f19ddf5 100644 --- a/sdks/js/src/types.ts +++ b/sdks/js/src/types.ts @@ -275,6 +275,7 @@ const CoreAPIDocumentSchema = z.object({ created: z.number(), document_id: z.string(), timestamp: z.number(), + parent_id: z.string().nullable().optional(), tags: z.array(z.string()), source_url: z.string().nullable().optional(), hash: z.string(), @@ -325,6 +326,7 @@ const CoreAPITablePublicSchema = z.object({ timestamp: z.number(), tags: z.array(z.string()), parents: z.array(z.string()), + parent_id: z.string().nullable().optional(), mime_type: z.string().optional(), title: z.string().optional(), }); @@ -1858,6 +1860,7 @@ const FrontDataSourceDocumentSectionSchema: z.ZodSchema; export const PostTableParentsRequestSchema = z.object({ + parent_id: z.string().nullable().optional(), parents: z.array(z.string()), }); @@ -1997,6 +2001,7 @@ export const UpsertTableFromCsvRequestSchema = z.intersection( description: z.string(), timestamp: z.number().nullable().optional(), tags: z.array(z.string()).nullable().optional(), + parentId: z.string().nullable().optional(), parents: z.array(z.string()).nullable().optional(), truncate: z.boolean(), useAppForHeaderDetection: z.boolean().nullable().optional(), @@ -2009,6 +2014,7 @@ export const UpsertTableFromCsvRequestSchema = z.intersection( description: o.description, timestamp: o.timestamp, tags: o.tags, + parentId: o.parentId, parents: o.parents, truncate: o.truncate, useAppForHeaderDetection: o.useAppForHeaderDetection, @@ -2064,6 +2070,7 @@ export const UpsertDatabaseTableRequestSchema = z.object({ description: z.string(), timestamp: z.number().nullable().optional(), tags: z.array(z.string()).nullable().optional(), + parent_id: z.string().nullable().optional(), parents: z.array(z.string()).nullable().optional(), remote_database_table_id: z.string().nullable().optional(), remote_database_secret_id: z.string().nullable().optional(), @@ -2137,6 +2144,7 @@ export type UpsertFolderResponseType = z.infer< export const UpsertDataSourceFolderRequestSchema = z.object({ timestamp: z.number(), parents: z.array(z.string()).nullable().optional(), + parent_id: z.string().nullable().optional(), title: z.string(), }); export type UpsertDataSourceFolderRequestType = z.infer< diff --git a/types/src/core/data_source.ts b/types/src/core/data_source.ts index 0e8077ffd48a..23dcedc4f484 100644 --- a/types/src/core/data_source.ts +++ b/types/src/core/data_source.ts @@ -48,6 +48,7 @@ export type CoreAPIDocument = { data_source_id: string; created: number; document_id: string; + parent_id: string | null; timestamp: number; tags: string[]; source_url?: string | null; @@ -79,6 +80,7 @@ export type CoreAPIFolder = { folder_id: string; timestamp: number; title: string; + parent_id: string | null; parents: string[]; }; diff --git a/types/src/front/api_handlers/public/data_sources.ts b/types/src/front/api_handlers/public/data_sources.ts index b4d68518a0ab..aae9fc0c2589 100644 --- a/types/src/front/api_handlers/public/data_sources.ts +++ b/types/src/front/api_handlers/public/data_sources.ts @@ -31,6 +31,7 @@ export const PostDataSourceDocumentRequestBodySchema = t.intersection([ t.type({ timestamp: t.union([t.Int, t.undefined, t.null]), tags: t.union([t.array(t.string), t.undefined, t.null]), + parent_id: t.union([t.string, t.undefined, t.null]), parents: t.union([t.array(t.string), t.undefined, t.null]), source_url: t.union([t.string, t.undefined, t.null]), upsert_context: t.union([UpsertContextSchema, t.undefined, t.null]), @@ -71,6 +72,7 @@ export const PatchDataSourceTableRequestBodySchema = t.intersection([ description: t.string, timestamp: t.union([t.number, t.undefined, t.null]), tags: t.union([t.array(t.string), t.undefined, t.null]), + parentId: t.union([t.string, t.undefined, t.null]), parents: t.union([t.array(t.string), t.undefined, t.null]), truncate: t.boolean, async: t.union([t.boolean, t.undefined]), @@ -105,6 +107,7 @@ export const UpsertTableFromCsvRequestSchema = t.intersection([ description: t.string, timestamp: t.union([t.number, t.undefined, t.null]), tags: t.union([t.array(t.string), t.undefined, t.null]), + parentId: t.union([t.string, t.undefined, t.null]), parents: t.union([t.array(t.string), t.undefined, t.null]), truncate: t.boolean, useAppForHeaderDetection: t.union([t.boolean, t.undefined, t.null]), diff --git a/types/src/front/lib/core_api.ts b/types/src/front/lib/core_api.ts index 23e99d62e05c..334af63aab6b 100644 --- a/types/src/front/lib/core_api.ts +++ b/types/src/front/lib/core_api.ts @@ -126,6 +126,7 @@ export type CoreAPITable = { schema: CoreAPITableSchema | null; timestamp: number; tags: string[]; + parent_id: string | null; parents: string[]; created: number; data_source_id: string; @@ -827,6 +828,7 @@ export class CoreAPI { documentId, timestamp, tags, + parentId, parents, sourceUrl, section, @@ -840,6 +842,7 @@ export class CoreAPI { documentId: string; timestamp?: number | null; tags: string[]; + parentId: string | null; parents: string[]; sourceUrl?: string | null; section: CoreAPIDataSourceDocumentSection; @@ -871,6 +874,7 @@ export class CoreAPI { timestamp, section, tags, + parent_id: parentId, parents, source_url: sourceUrl, credentials, @@ -926,11 +930,13 @@ export class CoreAPI { projectId, dataSourceId, documentId, + parentId, parents, }: { projectId: string; dataSourceId: string; documentId: string; + parentId: string | null; parents: string[]; }): Promise< CoreAPIResponse<{ @@ -950,6 +956,7 @@ export class CoreAPI { }, body: JSON.stringify({ parents: parents, + parent_id: parentId, }), } ); @@ -1093,6 +1100,7 @@ export class CoreAPI { description, timestamp, tags, + parentId, parents, remoteDatabaseTableId, remoteDatabaseSecretId, @@ -1106,6 +1114,7 @@ export class CoreAPI { description: string; timestamp: number | null; tags: string[]; + parentId: string | null; parents: string[]; remoteDatabaseTableId?: string | null; remoteDatabaseSecretId?: string | null; @@ -1127,6 +1136,7 @@ export class CoreAPI { description: description, timestamp, tags, + parent_id: parentId, parents, remote_database_table_id: remoteDatabaseTableId ?? null, remote_database_secret_id: remoteDatabaseSecretId ?? null, @@ -1247,11 +1257,13 @@ export class CoreAPI { projectId, dataSourceId, tableId, + parentId, parents, }: { projectId: string; dataSourceId: string; tableId: string; + parentId: string | null; parents: string[]; }): Promise> { const response = await this._fetchWithError( @@ -1266,6 +1278,7 @@ export class CoreAPI { "Content-Type": "application/json", }, body: JSON.stringify({ + parent_id: parentId, parents: parents, }), } @@ -1519,6 +1532,7 @@ export class CoreAPI { dataSourceId, folderId, timestamp, + parentId, parents, title, }: { @@ -1526,6 +1540,7 @@ export class CoreAPI { dataSourceId: string; folderId: string; timestamp: number | null; + parentId: string | null; parents: string[]; title: string; }): Promise> { @@ -1542,6 +1557,7 @@ export class CoreAPI { folder_id: folderId, timestamp, title, + parent_id: parentId, parents, }), }