Skip to content

Commit

Permalink
Store dataSourceWorkspaceId on RetrievalDocument to create proper lin…
Browse files Browse the repository at this point in the history
…ks (#1977)

* Store dataSourceWorkspaceId on RetrievalDocument to create proper links

* lint
  • Loading branch information
spolu authored Oct 5, 2023
1 parent b851853 commit e7a3d39
Show file tree
Hide file tree
Showing 9 changed files with 196 additions and 28 deletions.
11 changes: 9 additions & 2 deletions connectors/src/connectors/slack/bot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -324,12 +324,19 @@ function _processCiteMention(
.map((key) => {
const k = key.trim();
const ref = references[k];
if (ref && ref.sourceUrl) {
if (ref) {
if (!refCounter[k]) {
counter++;
refCounter[k] = counter;
}
return `[<${ref.sourceUrl}|${refCounter[k]}>]`;
const link = ref.sourceUrl
? ref.sourceUrl
: `${DUST_API}/w/${
ref.dataSourceWorkspaceId
}/builder/data-sources/${
ref.dataSourceId
}/upsert?documentId=${encodeURIComponent(ref.documentId)}`;
return `[<${link}|${refCounter[k]}>]`;
}
return "";
})
Expand Down
1 change: 1 addition & 0 deletions connectors/src/lib/dust_api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ export type RetrievalActionType = {
};

export type RetrievalDocumentType = {
dataSourceWorkspaceId: string;
dataSourceId: string;
sourceUrl: string | null;
documentId: string;
Expand Down
31 changes: 14 additions & 17 deletions front/components/assistant/RenderMessageMarkdown.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {
ClipboardCheckIcon,
ClipboardIcon,
DocumentDuplicateStrokeIcon,
DocumentTextIcon,
IconButton,
Tooltip,
} from "@dust-tt/sparkle";
Expand All @@ -27,6 +27,7 @@ import { RetrievalDocumentType } from "@app/types/assistant/actions/retrieval";
import { AgentConfigurationType } from "@app/types/assistant/agent";

import {
linkFromDocument,
PROVIDER_LOGO_PATH,
providerFromDocument,
titleFromDocument,
Expand Down Expand Up @@ -77,7 +78,6 @@ function citeDirective() {
.map((ref: string) => ({
counter: counter(ref),
ref,
link: "https://dust.tt",
}));

// `sup` will then be mapped to a custom component `CiteBlock`.
Expand Down Expand Up @@ -215,7 +215,6 @@ function CiteBlockWrapper(references: {
JSON.parse(props.references) as {
counter: number;
ref: string;
link: string;
}[]
).filter((r) => r.ref in references);

Expand All @@ -226,6 +225,8 @@ function CiteBlockWrapper(references: {

const provider = providerFromDocument(document);
const title = titleFromDocument(document);
const link = linkFromDocument(document);

const citeClassNames = classNames(
"rounded-md bg-structure-100 px-1",
"text-xs font-semibold text-action-500",
Expand All @@ -241,7 +242,7 @@ function CiteBlockWrapper(references: {
{provider !== "none" ? (
<img src={PROVIDER_LOGO_PATH[provider]}></img>
) : (
<DocumentDuplicateStrokeIcon className="h-4 w-4 text-slate-500" />
<DocumentTextIcon className="h-4 w-4 text-slate-500" />
)}
</div>
<div className="text-md flex whitespace-nowrap">
Expand All @@ -251,19 +252,15 @@ function CiteBlockWrapper(references: {
}
position="below"
>
{document.sourceUrl ? (
<a
// TODO(spolu): for custom data source add data source name to title
href={document.sourceUrl}
target="_blank"
rel="noopener noreferrer"
className={citeClassNames}
>
{r.counter}
</a>
) : (
<span className={citeClassNames}>{r.counter}</span>
)}
<a
// TODO(spolu): for custom data source add data source name to title
href={link}
target="_blank"
rel="noopener noreferrer"
className={citeClassNames}
>
{r.counter}
</a>
</Tooltip>
</sup>
);
Expand Down
26 changes: 19 additions & 7 deletions front/components/assistant/conversation/RetrievalAction.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import {
ChevronDownIcon,
ChevronRightIcon,
Chip,
DocumentDuplicateStrokeIcon,
DocumentTextIcon,
Icon,
Spinner,
Tooltip,
Expand Down Expand Up @@ -75,7 +75,7 @@ export default function RetrievalAction({
</div>
)}
</div>
<div className="row-span-1">
<div className="row-span-1 select-none">
{retrievalAction.documents && (
<div
onClick={() => setDocListVisible(!docListVisible)}
Expand Down Expand Up @@ -103,18 +103,18 @@ export default function RetrievalAction({
leaveFrom="opacity-100 scale-100"
leaveTo="opacity-0 scale-95"
>
<ul className="ml-2 gap-2">
<ul className="ml-2 flex flex-col gap-y-2">
{retrievalAction.documents.map((document, i) => {
const provider = providerFromDocument(document);
return (
<li key={i}>
<a
href={document.sourceUrl || ""}
className="front-bold text-xs text-element-800"
href={linkFromDocument(document)}
className="front-bold flex flex-row items-center text-xs text-element-800"
target="_blank"
>
{provider === "none" ? (
<DocumentDuplicateStrokeIcon className="mr-1 inline-block h-4 w-4 text-element-500" />
<DocumentTextIcon className="mr-1 inline-block h-4 w-4 text-slate-500" />
) : (
<img
src={
Expand Down Expand Up @@ -149,7 +149,7 @@ function RetrievedDocumentsInfo(documents: RetrievalDocumentType[]) {
{summary[k].provider !== "none" ? (
<img src={PROVIDER_LOGO_PATH[summary[k].provider]}></img>
) : (
<DocumentDuplicateStrokeIcon className="h-4 w-4 text-slate-500" />
<DocumentTextIcon className="h-4 w-4 text-slate-500" />
)}
</div>
<div className="flex-initial text-gray-700">{summary[k].count}</div>
Expand Down Expand Up @@ -220,3 +220,15 @@ export function titleFromDocument(document: RetrievalDocumentType): string {

return document.documentId;
}

export function linkFromDocument(document: RetrievalDocumentType): string {
if (document.sourceUrl) {
return document.sourceUrl;
} else {
return `https://dust.tt/w/${
document.dataSourceWorkspaceId
}/builder/data-sources/${
document.dataSourceId
}/upsert?documentId=${encodeURIComponent(document.documentId)}`;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import {
CloudArrowDownIcon,
DocumentDuplicateIcon,
Item,
Modal,
PageHeader,
Expand Down Expand Up @@ -165,7 +164,7 @@ function PickDataSource({
icon={
ds.connectorProvider
? CONNECTOR_CONFIGURATIONS[ds.connectorProvider].logoComponent
: DocumentDuplicateIcon
: CloudArrowDownIcon
}
key={ds.name}
size="md"
Expand Down
12 changes: 12 additions & 0 deletions front/lib/api/assistant/actions/retrieval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ export async function renderRetrievalActionByModelId(

return {
id: d.id,
dataSourceWorkspaceId: d.dataSourceWorkspaceId,
dataSourceId: d.dataSourceId,
sourceUrl: d.sourceUrl,
documentId: d.documentId,
Expand Down Expand Up @@ -598,6 +599,15 @@ export async function* runRetrieval(
const run = res.value;
let documents: RetrievalDocumentType[] = [];

// This is not perfect and will be erroneous in case of two data sources with the same id from two
// different workspaces. We don't support cross workspace data sources right now. But we'll likely
// want `core` to return the `workspace_id` that was used eventualy.
// TODO(spolu): make `core` return data source workspace id.
const dataSourcesIdToWorkspaceId: { [key: string]: string } = {};
for (const ds of c.dataSources) {
dataSourcesIdToWorkspaceId[ds.dataSourceId] = ds.workspaceId;
}

for (const t of run.traces) {
if (t[1][0][0].error) {
yield {
Expand Down Expand Up @@ -633,6 +643,7 @@ export async function* runRetrieval(
const reference = refs[i % refs.length];
return {
id: 0, // dummy pending database insertion
dataSourceWorkspaceId: dataSourcesIdToWorkspaceId[d.data_source_id],
dataSourceId: d.data_source_id,
documentId: d.document_id,
reference,
Expand All @@ -656,6 +667,7 @@ export async function* runRetrieval(
for (const d of documents) {
const document = await RetrievalDocument.create(
{
dataSourceWorkspaceId: d.dataSourceWorkspaceId,
dataSourceId: d.dataSourceId,
sourceUrl: d.sourceUrl,
documentId: d.documentId,
Expand Down
5 changes: 5 additions & 0 deletions front/lib/models/assistant/actions/retrieval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ export class RetrievalDocument extends Model<
declare createdAt: CreationOptional<Date>;
declare updatedAt: CreationOptional<Date>;

declare dataSourceWorkspaceId: string;
declare dataSourceId: string;
declare sourceUrl: string | null;
declare documentId: string;
Expand Down Expand Up @@ -336,6 +337,10 @@ RetrievalDocument.init(
allowNull: false,
defaultValue: DataTypes.NOW,
},
dataSourceWorkspaceId: {
type: DataTypes.STRING,
allowNull: true,
},
dataSourceId: {
type: DataTypes.STRING,
allowNull: false,
Expand Down
134 changes: 134 additions & 0 deletions front/migrations/20231005_populate_retrieved_documents_workspace_id.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import { Op } from "sequelize";

import { ModelId } from "@app/lib/databases";
import {
AgentMessage,
Conversation,
Message,
RetrievalDocument,
Workspace,
} from "@app/lib/models";

const { LIVE = false } = process.env;

async function main() {
console.log("Fetching Upgraded Worspaces...");
const workspaces = await Workspace.findAll({});
console.log(
`Found ${workspaces.length} workspaces for which to add largeModels = true`
);

const chunkSize = 16;
const chunks = [];
for (let i = 0; i < workspaces.length; i += chunkSize) {
chunks.push(workspaces.slice(i, i + chunkSize));
}

for (let i = 0; i < chunks.length; i++) {
console.log(`Processing chunk ${i}/${chunks.length}...`);
const chunk = chunks[i];
await Promise.all(
chunk.map((workspace: Workspace) => {
return updateAllConversations(!!LIVE, workspace);
})
);
}
}

async function updateAllConversations(live: boolean, workspace: Workspace) {
const conversations = await Conversation.findAll({
where: {
workspaceId: workspace.id,
},
});

const chunkSize = 16;
const chunks = [];
for (let i = 0; i < conversations.length; i += chunkSize) {
chunks.push(conversations.slice(i, i + chunkSize));
}

for (let i = 0; i < chunks.length; i++) {
console.log(`Processing chunk ${i}/${chunks.length}...`);
const chunk = chunks[i];
await Promise.all(
chunk.map((c: Conversation) => {
return updateConversation(live, c, workspace);
})
);
}
}

async function updateConversation(
live: boolean,
conversation: Conversation,
workspace: Workspace
) {
const messages = await Message.findAll({
where: {
// where conversationId = conversation.id
// and agentMessageId is not null
[Op.and]: [
{
conversationId: conversation.id,
agentMessageId: {
[Op.ne]: null,
},
},
],
},
});

await Promise.all(
messages.map((message) => {
return updateMessage(
live,
conversation,
message.agentMessageId as number,
workspace
);
})
);
}

async function updateMessage(
live: boolean,
conversation: Conversation,
agentMessageId: ModelId,
workspace: Workspace
) {
const m = await AgentMessage.findByPk(agentMessageId);
if (m?.agentRetrievalActionId) {
const documents = await RetrievalDocument.findAll({
where: {
retrievalActionId: m.agentRetrievalActionId,
},
});
console.log(
`LIVE=${live} workspace=${workspace.sId} conversation=${conversation.sId} documents=${documents.length}`
);

if (live) {
await RetrievalDocument.update(
{
dataSourceWorkspaceId: workspace.sId,
},
{
where: {
retrievalActionId: m.agentRetrievalActionId,
},
}
);
}
}
}

main()
.then(() => {
console.log("done");
process.exit(0);
})
.catch((err) => {
console.error(err);
process.exit(1);
});
1 change: 1 addition & 0 deletions front/types/assistant/actions/retrieval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ export function isRetrievalConfiguration(

export type RetrievalDocumentType = {
id: ModelId;
dataSourceWorkspaceId: string;
dataSourceId: string;
sourceUrl: string | null;
documentId: string;
Expand Down

0 comments on commit e7a3d39

Please sign in to comment.