Skip to content

Commit

Permalink
[Document / Table API] Enforce required title and mimetype on upsert (#…
Browse files Browse the repository at this point in the history
…9877)

* enforce on front / types

* enforce in core

* table fix

* clean

* fix rust
  • Loading branch information
philipperolet authored Jan 10, 2025
1 parent 4fd2ddc commit 0d53618
Show file tree
Hide file tree
Showing 14 changed files with 99 additions and 116 deletions.
8 changes: 4 additions & 4 deletions core/bin/core_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1686,8 +1686,8 @@ struct DataSourcesDocumentsUpsertPayload {
section: Section,
credentials: run::Credentials,
light_document_output: Option<bool>,
title: Option<String>,
mime_type: Option<String>,
title: String,
mime_type: String,
}

async fn data_sources_documents_upsert(
Expand Down Expand Up @@ -2191,8 +2191,8 @@ struct DatabasesTablesUpsertPayload {
remote_database_secret_id: Option<String>,

// Node meta:
title: Option<String>,
mime_type: Option<String>,
title: String,
mime_type: String,
}

async fn tables_upsert(
Expand Down
8 changes: 4 additions & 4 deletions core/src/data_sources/data_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -633,8 +633,8 @@ impl DataSource {
store: Box<dyn Store + Sync + Send>,
qdrant_clients: QdrantClients,
document_id: &str,
title: Option<String>,
mime_type: Option<String>,
title: String,
mime_type: String,
timestamp: Option<u64>,
tags: &Vec<String>,
parents: &Vec<String>,
Expand Down Expand Up @@ -716,8 +716,8 @@ impl DataSource {
&self.internal_id,
document_id,
timestamp,
title.as_deref().unwrap_or(document_id),
mime_type.as_deref().unwrap_or("application/octet-stream"),
title.as_str(),
mime_type.as_str(),
&tags,
&parents.get(1).cloned(),
parents,
Expand Down
4 changes: 2 additions & 2 deletions core/src/stores/postgres.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2700,7 +2700,7 @@ impl Store for PostgresStore {
}
};

let title = upsert_params.title.unwrap_or(upsert_params.name.clone());
let title = upsert_params.title;

let table = Table::new(
project,
Expand All @@ -2712,7 +2712,7 @@ impl Store for PostgresStore {
upsert_params.description,
upsert_params.timestamp,
title,
upsert_params.mime_type.unwrap_or("text/csv".to_string()),
upsert_params.mime_type,
upsert_params.tags,
upsert_params.parents.get(1).cloned(),
upsert_params.parents,
Expand Down
4 changes: 2 additions & 2 deletions core/src/stores/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ pub struct TableUpsertParams {
pub parents: Vec<String>,
pub remote_database_table_id: Option<String>,
pub remote_database_secret_id: Option<String>,
pub title: Option<String>,
pub mime_type: Option<String>,
pub title: String,
pub mime_type: String,
}

pub struct FolderUpsertParams {
Expand Down
2 changes: 1 addition & 1 deletion front/components/data_source/DocumentUploadOrEditModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ export const DocumentUploadOrEditModal = ({
const body = {
name: initialId ?? document.name,
title: initialId ?? document.name,
mime_type: document.mimeType ?? undefined,
mime_type: document.mimeType ?? "application/octet-stream",
timestamp: null,
parent_id: null,
parents: [initialId ?? document.name],
Expand Down
1 change: 1 addition & 0 deletions front/components/data_source/TableUploadOrEditModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ export const TableUploadOrEditModal = ({
truncate: true,
useAppForHeaderDetection,
title: table.name,
mimeType: "text/csv",
timestamp: undefined,
tags: undefined,
parentId: undefined,
Expand Down
8 changes: 4 additions & 4 deletions front/lib/api/data_sources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,8 @@ export type UpsertDocumentArgs = {
light_document_output?: boolean;
dataSource: DataSourceResource;
auth: Authenticator;
mime_type?: string;
title?: string;
mime_type: string;
title: string;
};
export async function upsertDocument({
name,
Expand Down Expand Up @@ -439,8 +439,8 @@ export type UpsertTableArgs = {
dataSource: DataSourceResource;
auth: Authenticator;
useAppForHeaderDetection?: boolean;
title?: string;
mimeType?: string;
title: string;
mimeType: string;
};
export async function upsertTable({
tableId,
Expand Down
4 changes: 2 additions & 2 deletions front/lib/api/tables.ts
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,8 @@ export async function upsertTableFromCsv({
truncate: boolean;
useAppForHeaderDetection: boolean;
detectedHeaders?: DetectedHeadersType;
title?: string;
mimeType?: string;
title: string;
mimeType: string;
}): Promise<Result<{ table: CoreAPITable }, TableOperationError>> {
const csvRowsRes = csv
? await rowsFromCsv({
Expand Down
70 changes: 31 additions & 39 deletions front/lib/upsert_queue.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,51 +16,43 @@ import { launchUpsertTableWorkflow } from "@app/temporal/upsert_tables/client";

const { DUST_UPSERT_QUEUE_BUCKET, SERVICE_ACCOUNT } = process.env;

export const EnqueueUpsertDocument = t.intersection([
t.type({
workspaceId: t.string,
dataSourceId: t.string,
documentId: t.string,
tags: t.union([t.array(t.string), t.null]),
parentId: t.union([t.string, t.null, t.undefined]),
parents: t.union([t.array(t.string), t.null]),
sourceUrl: t.union([t.string, t.null]),
timestamp: t.union([t.number, t.null]),
section: FrontDataSourceDocumentSection,
upsertContext: t.union([UpsertContextSchema, t.null]),
}),
t.partial({
title: t.string,
mimeType: t.string,
}),
]);
export const EnqueueUpsertDocument = t.type({
workspaceId: t.string,
dataSourceId: t.string,
documentId: t.string,
tags: t.union([t.array(t.string), t.null]),
parentId: t.union([t.string, t.null, t.undefined]),
parents: t.union([t.array(t.string), t.null]),
sourceUrl: t.union([t.string, t.null]),
timestamp: t.union([t.number, t.null]),
section: FrontDataSourceDocumentSection,
upsertContext: t.union([UpsertContextSchema, t.null]),
title: t.string,
mimeType: t.string,
});

const DetectedHeaders = t.type({
header: t.array(t.string),
rowIndex: t.number,
});

export const EnqueueUpsertTable = t.intersection([
t.type({
workspaceId: t.string,
dataSourceId: t.string,
tableId: t.string,
tableName: t.string,
tableDescription: t.string,
tableTimestamp: t.union([t.number, t.undefined, t.null]),
tableTags: t.union([t.array(t.string), t.undefined, t.null]),
tableParentId: t.union([t.string, t.undefined, t.null]),
tableParents: t.union([t.array(t.string), t.undefined, t.null]),
csv: t.union([t.string, t.null]),
truncate: t.boolean,
useAppForHeaderDetection: t.union([t.boolean, t.undefined, t.null]),
detectedHeaders: t.union([DetectedHeaders, t.undefined]),
}),
t.partial({
title: t.string,
mimeType: t.string,
}),
]);
export const EnqueueUpsertTable = t.type({
workspaceId: t.string,
dataSourceId: t.string,
tableId: t.string,
tableName: t.string,
tableDescription: t.string,
tableTimestamp: t.union([t.number, t.undefined, t.null]),
tableTags: t.union([t.array(t.string), t.undefined, t.null]),
tableParentId: t.union([t.string, t.undefined, t.null]),
tableParents: t.union([t.array(t.string), t.undefined, t.null]),
csv: t.union([t.string, t.null]),
truncate: t.boolean,
useAppForHeaderDetection: t.union([t.boolean, t.undefined, t.null]),
detectedHeaders: t.union([DetectedHeaders, t.undefined]),
title: t.string,
mimeType: t.string,
});

type EnqueueUpsertDocumentType = t.TypeOf<typeof EnqueueUpsertDocument>;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,9 @@ async function handler(

const documentId = req.query.documentId as string;

const title = r.data.title ?? "Untitled document";
const mimeType = r.data.mime_type ?? "application/octet-stream";

if (r.data.async === true) {
const enqueueRes = await enqueueUpsertDocument({
upsertDocument: {
Expand All @@ -524,8 +527,8 @@ async function handler(
sourceUrl,
section,
upsertContext: r.data.upsert_context || null,
title: r.data.title ?? undefined,
mimeType: r.data.mime_type ?? undefined,
title,
mimeType,
},
});
if (enqueueRes.isErr()) {
Expand Down Expand Up @@ -565,8 +568,8 @@ async function handler(
section,
credentials,
lightDocumentOutput: r.data.light_document_output === true,
title: r.data.title,
mimeType: r.data.mime_type,
title,
mimeType,
});

if (upsertRes.isErr()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,7 @@ async function handler(
}
const upsertRes = await handleDataSourceTableCSVUpsert({
auth,
params: {
...r.data,
title: r.data.title ?? r.data.name,
},
params: r.data,
dataSource,
});

Expand Down
8 changes: 4 additions & 4 deletions sdks/js/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1980,8 +1980,8 @@ export const UpsertTableFromCsvRequestSchema = z.intersection(
truncate: z.boolean(),
useAppForHeaderDetection: z.boolean().nullable().optional(),
async: z.boolean().optional(),
title: z.string().optional(),
mimeType: z.string().optional(),
title: z.string(),
mimeType: z.string(),
})
.transform((o) => ({
name: o.name,
Expand Down Expand Up @@ -2048,8 +2048,8 @@ export const UpsertDatabaseTableRequestSchema = z.object({
parents: z.array(z.string()).nullable().optional(),
remote_database_table_id: z.string().nullable().optional(),
remote_database_secret_id: z.string().nullable().optional(),
title: z.string().optional(),
mime_type: z.string().optional(),
title: z.string(),
mime_type: z.string(),
});

export type UpsertDatabaseTableRequestType = z.infer<
Expand Down
70 changes: 30 additions & 40 deletions types/src/front/api_handlers/public/data_sources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,20 @@ export type FrontDataSourceDocumentSectionType = t.TypeOf<
typeof FrontDataSourceDocumentSection
>;

export const PostDataSourceDocumentRequestBodySchema = t.intersection([
t.type({
timestamp: t.union([t.Int, t.undefined, t.null]),
tags: t.union([t.array(t.string), t.undefined, t.null]),
parent_id: t.union([t.string, t.undefined, t.null]),
parents: t.union([t.array(t.string), t.undefined, t.null]),
source_url: t.union([t.string, t.undefined, t.null]),
upsert_context: t.union([UpsertContextSchema, t.undefined, t.null]),
text: t.union([t.string, t.undefined, t.null]),
section: t.union([FrontDataSourceDocumentSection, t.undefined, t.null]),
light_document_output: t.union([t.boolean, t.undefined]),
async: t.union([t.boolean, t.undefined, t.null]),
}),
t.partial({
title: t.string,
mime_type: t.string,
}),
]);
export const PostDataSourceDocumentRequestBodySchema = t.type({
timestamp: t.union([t.Int, t.undefined, t.null]),
tags: t.union([t.array(t.string), t.undefined, t.null]),
parent_id: t.union([t.string, t.undefined, t.null]),
parents: t.union([t.array(t.string), t.undefined, t.null]),
source_url: t.union([t.string, t.undefined, t.null]),
upsert_context: t.union([UpsertContextSchema, t.undefined, t.null]),
text: t.union([t.string, t.undefined, t.null]),
section: t.union([FrontDataSourceDocumentSection, t.undefined, t.null]),
light_document_output: t.union([t.boolean, t.undefined]),
async: t.union([t.boolean, t.undefined, t.null]),
title: t.string,
mime_type: t.string,
});

export type PostDataSourceDocumentRequestBody = t.TypeOf<
typeof PostDataSourceDocumentRequestBodySchema
Expand All @@ -66,24 +62,20 @@ export type PatchDataSourceWithNameDocumentRequestBody = t.TypeOf<
typeof PostDataSourceWithNameDocumentRequestBodySchema
>;

export const PatchDataSourceTableRequestBodySchema = t.intersection([
t.type({
name: t.string,
description: t.string,
timestamp: t.union([t.number, t.undefined, t.null]),
tags: t.union([t.array(t.string), t.undefined, t.null]),
parentId: t.union([t.string, t.undefined, t.null]),
parents: t.union([t.array(t.string), t.undefined, t.null]),
truncate: t.boolean,
async: t.union([t.boolean, t.undefined]),
csv: t.union([t.string, t.undefined]),
useAppForHeaderDetection: t.union([t.boolean, t.undefined]),
}),
t.partial({
title: t.string,
mimeType: t.string,
}),
]);
export const PatchDataSourceTableRequestBodySchema = t.type({
name: t.string,
description: t.string,
timestamp: t.union([t.number, t.undefined, t.null]),
tags: t.union([t.array(t.string), t.undefined, t.null]),
parentId: t.union([t.string, t.undefined, t.null]),
parents: t.union([t.array(t.string), t.undefined, t.null]),
truncate: t.boolean,
async: t.union([t.boolean, t.undefined]),
csv: t.union([t.string, t.undefined]),
useAppForHeaderDetection: t.union([t.boolean, t.undefined]),
title: t.string,
mimeType: t.string,
});

export type PatchDataSourceTableRequestBody = t.TypeOf<
typeof PatchDataSourceTableRequestBodySchema
Expand Down Expand Up @@ -112,6 +104,8 @@ export const UpsertTableFromCsvRequestSchema = t.intersection([
truncate: t.boolean,
useAppForHeaderDetection: t.union([t.boolean, t.undefined, t.null]),
async: t.union([t.boolean, t.undefined]),
title: t.string,
mimeType: t.string,
}),
// csv is optional when editing an existing table.
t.union([
Expand All @@ -121,10 +115,6 @@ export const UpsertTableFromCsvRequestSchema = t.intersection([
tableId: t.string,
}),
]),
t.partial({
title: t.string,
mimeType: t.string,
}),
]);

export type UpsertTableFromCsvRequestType = t.TypeOf<
Expand Down
Loading

0 comments on commit 0d53618

Please sign in to comment.