From f663c248ed5019bd80aad6e0a61e69bb982ff85f Mon Sep 17 00:00:00 2001
From: Zeke Hunter-Green
Date: Tue, 3 Oct 2023 15:23:34 +0100
Subject: [PATCH 1/4] add problem blobs back to ingestion events query
---
.../observability/PostgresClient.scala | 28 ++++++++++++++++---
1 file changed, 24 insertions(+), 4 deletions(-)
diff --git a/backend/app/services/observability/PostgresClient.scala b/backend/app/services/observability/PostgresClient.scala
index 994477ff..832021c0 100644
--- a/backend/app/services/observability/PostgresClient.scala
+++ b/backend/app/services/observability/PostgresClient.scala
@@ -157,6 +157,7 @@ class PostgresClientImpl(postgresConfig: PostgresConfig) extends PostgresClient
ie.errors,
ie.workspace_name AS "workspaceName",
ie.mime_types AS "mimeTypes",
+ infinite_loop,
ARRAY_AGG(DISTINCT blob_metadata.path ) AS paths,
(ARRAY_AGG(blob_metadata.file_size))[1] as "fileSize",
ARRAY_REMOVE(ARRAY_AGG(extractor_statuses.extractor), NULL) AS extractors,
@@ -167,22 +168,41 @@ class PostgresClientImpl(postgresConfig: PostgresConfig) extends PostgresClient
SELECT
blob_id,
ingest_id,
- MIN(EXTRACT(EPOCH from event_time)) AS ingest_start,
- MAX(EXTRACT(EPOCH from event_time)) AS most_recent_event,
+ MIN(EXTRACT(EPOCH FROM event_time)) AS ingest_start,
+ MAX(EXTRACT(EPOCH FROM event_time)) AS most_recent_event,
ARRAY_AGG(type) as event_types,
ARRAY_AGG(EXTRACT(EPOCH from event_time)) as event_times,
ARRAY_AGG(status) as event_statuses,
ARRAY_AGG(details -> 'errors') as errors,
(ARRAY_AGG(details ->> 'workspaceName') FILTER (WHERE details ->> 'workspaceName' IS NOT NULL))[1] as workspace_name,
- (ARRAY_AGG(details ->> 'mimeTypes') FILTER (WHERE details ->> 'mimeTypes' IS NOT NULL))[1] as mime_types
+ (ARRAY_AGG(details ->> 'mimeTypes') FILTER (WHERE details ->> 'mimeTypes' IS NOT NULL))[1] as mime_types,
+ FALSE AS infinite_loop
FROM ingestion_events
WHERE ingest_id LIKE ${if(ingestIdIsPrefix) LikeConditionEscapeUtil.beginsWith(ingestId) else ingestId}
AND blob_id NOT IN (SELECT blob_id FROM problem_blobs)
GROUP BY 1,2
+ UNION
+ -- blobs in the ingestion that are failing in an infinite loop
+ SELECT DISTINCT
+ blob_id,
+ ingest_id,
+ MIN(EXTRACT(EPOCH FROM event_time)) AS ingest_start,
+ MAX(EXTRACT(EPOCH FROM event_time)) AS most_recent_event,
+ array[]::text[] AS event_types,
+ array[]::numeric[] AS event_times,
+ array[]::text[] AS event_statuses,
+ array['[]'::jsonb] AS errors,
+ NULL AS workspace_name,
+ NULL AS mime_types,
+ TRUE AS infinite_loop
+ FROM ingestion_events
+ WHERE ingest_id LIKE ${if(ingestIdIsPrefix) LikeConditionEscapeUtil.beginsWith(ingestId) else ingestId}
+ AND blob_id IN (SELECT blob_id FROM problem_blobs)
+ GROUP BY 1,2
) AS ie
LEFT JOIN blob_metadata USING(ingest_id, blob_id)
LEFT JOIN extractor_statuses on extractor_statuses.blob_id = ie.blob_id and extractor_statuses.ingest_id = ie.ingest_id
- GROUP BY 1,2,3,4,5,6,7,8,9,10
+ GROUP BY 1,2,3,4,5,6,7,8,9,10,11
ORDER by ingest_start desc
""".map(rs => {
val eventTypes = rs.array("event_types").getArray.asInstanceOf[Array[String]]
From 3144fb80e52b4da73c02c219c9bbc96de5105d08 Mon Sep 17 00:00:00 2001
From: Zeke Hunter-Green
Date: Tue, 3 Oct 2023 15:27:34 +0100
Subject: [PATCH 2/4] format IngestionEvents with prettier
---
.../IngestionEvents/IngestionEvents.tsx | 559 ++++++++++++------
1 file changed, 371 insertions(+), 188 deletions(-)
diff --git a/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx b/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx
index 954f85b8..735ecaa8 100644
--- a/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx
+++ b/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx
@@ -1,27 +1,54 @@
-import React, {ReactNode, useEffect, useState} from "react";
-import authFetch from "../../util/auth/authFetch";
-import {EuiFlexItem, EuiBasicTable, EuiToolTip, EuiText, EuiButtonIcon, EuiScreenReaderOnly, EuiSpacer, EuiIconTip, EuiBadge, EuiFlexGroup, EuiInMemoryTable, EuiBasicTableColumn, EuiLoadingSpinner, EuiCodeBlock, Criteria} from "@elastic/eui";
-import '@elastic/eui/dist/eui_theme_light.css';
-import hdate from 'human-date';
-import {WorkspaceMetadata} from "../../types/Workspaces";
-import moment from "moment";
-import _ from "lodash";
+import React, { ReactNode, useEffect, useState } from "react"
+import authFetch from "../../util/auth/authFetch"
+import {
+ EuiFlexItem,
+ EuiBasicTable,
+ EuiToolTip,
+ EuiText,
+ EuiButtonIcon,
+ EuiScreenReaderOnly,
+ EuiSpacer,
+ EuiIconTip,
+ EuiBadge,
+ EuiFlexGroup,
+ EuiInMemoryTable,
+ EuiBasicTableColumn,
+ EuiLoadingSpinner,
+ EuiCodeBlock,
+ Criteria,
+} from "@elastic/eui"
+import "@elastic/eui/dist/eui_theme_light.css"
+import hdate from "human-date"
+import { WorkspaceMetadata } from "../../types/Workspaces"
+import moment from "moment"
+import _ from "lodash"
import {
BlobStatus,
ExtractorStatus,
IngestionTable,
Status,
extractorStatusColors,
- IngestionEventStatus
-} from "./types";
-import styles from "./IngestionEvents.module.css";
+ IngestionEventStatus,
+} from "./types"
+import styles from "./IngestionEvents.module.css"
type BlobProgress = "complete" | "completeWithErrors" | "inProgress"
const blobStatusIcons = {
- complete: ,
- completeWithErrors: ,
- inProgress:
+ complete: (
+
+ ),
+ completeWithErrors: (
+
+ ),
+ inProgress: (
+
+
+
+ ),
}
const SHORT_READABLE_DATE = "DD MMM HH:mm:ss"
@@ -29,200 +56,323 @@ const SHORT_READABLE_DATE = "DD MMM HH:mm:ss"
const statusToColor = (status: Status) => extractorStatusColors[status]
const getFailedStatuses = (statuses: ExtractorStatus[]) =>
- statuses.filter(status => status.statusUpdates.find(u => u.status === "Failure") !== undefined);
+ statuses.filter(
+ (status) =>
+ status.statusUpdates.find((u) => u.status === "Failure") !==
+ undefined
+ )
const getFailedBlobs = (blobs: BlobStatus[]) => {
- return blobs.filter(wb => {
- return getFailedStatuses(wb.extractorStatuses).length > 0;
- });
+ return blobs.filter((wb) => {
+ return getFailedStatuses(wb.extractorStatuses).length > 0
+ })
}
const getBlobStatus = (statuses: ExtractorStatus[]): BlobProgress => {
- const failures = getFailedStatuses(statuses);
- const inProgress = statuses.filter(status => status.statusUpdates.find(u => !u.status || ["Failure", "Success"].includes(u.status)) === undefined)
- return failures.length > 0 ? "completeWithErrors" : inProgress.length > 0 ? "inProgress" : "complete"
+ const failures = getFailedStatuses(statuses)
+ const inProgress = statuses.filter(
+ (status) =>
+ status.statusUpdates.find(
+ (u) => !u.status || ["Failure", "Success"].includes(u.status)
+ ) === undefined
+ )
+ return failures.length > 0
+ ? "completeWithErrors"
+ : inProgress.length > 0
+ ? "inProgress"
+ : "complete"
}
-const blobIngestedMultipleTimes = (status:BlobStatus) => status.extractorStatuses.find(s => s.statusUpdates.filter(u => u.status === "Started").length > 1) !== undefined
+const blobIngestedMultipleTimes = (status: BlobStatus) =>
+ status.extractorStatuses.find(
+ (s) => s.statusUpdates.filter((u) => u.status === "Started").length > 1
+ ) !== undefined
const extractorStatusList = (status: ExtractorStatus, title?: string) => {
- const statusUpdateStrings = status.statusUpdates.map(u => `${moment(u.eventTime).format(SHORT_READABLE_DATE)} ${u.status}`)
- return status.statusUpdates.length > 0 ?
- {title && <>{title}
>}
-
- {statusUpdateStrings.map(s => - {s}
)}
-
-
: "No events so far"
+ const statusUpdateStrings = status.statusUpdates.map(
+ (u) => `${moment(u.eventTime).format(SHORT_READABLE_DATE)} ${u.status}`
+ )
+ return status.statusUpdates.length > 0 ? (
+
+ {title && (
+ <>
+ {title}
+ >
+ )}
+
+ {statusUpdateStrings.map((s) => (
+ - {s}
+ ))}
+
+
+ ) : (
+ "No events so far"
+ )
}
// throw away everything after last / to get the filename from a path
-const pathsToFileNames = (paths: string[]) => paths.map(p => p.split("/").slice(-1)).join("\n")
-
+const pathsToFileNames = (paths: string[]) =>
+ paths.map((p) => p.split("/").slice(-1)).join("\n")
const blobStatusText = {
complete: "Complete",
completeWithErrors: "Complete with errors",
- inProgress: "In progress"
+ inProgress: "In progress",
}
const statusIconColumn = {
- field: 'extractorStatuses',
- name: '',
- width: '40',
- render: (statuses: ExtractorStatus[], row: BlobStatus) => {
- const totalErrors = row.errors.length
- const extractorStatus = getBlobStatus(statuses)
- // if extractors have finished but there are other non-extractor related errors, show an error icon
- const combinedStatus = extractorStatus === "complete" && totalErrors > 0 ? "completeWithErrors" : extractorStatus
- return blobStatusIcons[combinedStatus]
- }
- }
+ field: "extractorStatuses",
+ name: "",
+ width: "40",
+ render: (statuses: ExtractorStatus[], row: BlobStatus) => {
+ const totalErrors = row.errors.length
+ const extractorStatus = getBlobStatus(statuses)
+ // if extractors have finished but there are other non-extractor related errors, show an error icon
+ const combinedStatus =
+ extractorStatus === "complete" && totalErrors > 0
+ ? "completeWithErrors"
+ : extractorStatus
+ return blobStatusIcons[combinedStatus]
+ },
+}
const columns: Array> = [
{
- field: 'paths',
- name: 'Filename(s)',
+ field: "paths",
+ name: "Filename(s)",
sortable: true,
truncateText: true,
- render: pathsToFileNames
+ render: pathsToFileNames,
},
{
- field: 'ingestStart',
- name: 'First event time',
+ field: "ingestStart",
+ name: "First event time",
sortable: true,
- render: (ingestStart: Date) => moment(ingestStart).format(SHORT_READABLE_DATE)
+ render: (ingestStart: Date) =>
+ moment(ingestStart).format(SHORT_READABLE_DATE),
},
{
- name: 'Ingestion run time',
- render: (row: BlobStatus) =>
- <>{moment.duration(moment(row.mostRecentEvent).diff(moment(row.ingestStart))).humanize()} {
- blobIngestedMultipleTimes(row) && }>
+ name: "Ingestion run time",
+ render: (row: BlobStatus) => (
+ <>
+ {moment
+ .duration(
+ moment(row.mostRecentEvent).diff(
+ moment(row.ingestStart)
+ )
+ )
+ .humanize()}{" "}
+ {blobIngestedMultipleTimes(row) && (
+
+ )}
+ >
+ ),
},
{
- field: 'extractorStatuses',
- name: 'Status',
+ field: "extractorStatuses",
+ name: "Status",
render: (statuses: ExtractorStatus[]) => {
return blobStatusText[getBlobStatus(statuses)]
- }
+ },
},
{
- field: 'extractorStatuses',
- name: 'Extractors',
+ field: "extractorStatuses",
+ name: "Extractors",
render: (statuses: ExtractorStatus[]) => {
- return statuses.length > 0 ? (
- {statuses.map(status => {
- const mostRecent = status.statusUpdates.length > 0 ? status.statusUpdates[status.statusUpdates.length - 1] : undefined
- return -
- {status.extractorType.replace("Extractor", "")}
-
- {mostRecent?.status ?
- (
-
- {mostRecent.status} ({moment(mostRecent.eventTime).format("HH:mm:ss") })
-
- ) : <>No updates>
- }
-
-
- })}
-
) : <>>
-
+ return statuses.length > 0 ? (
+
+ {statuses.map((status) => {
+ const mostRecent =
+ status.statusUpdates.length > 0
+ ? status.statusUpdates[
+ status.statusUpdates.length - 1
+ ]
+ : undefined
+ return (
+ -
+
+
+ {status.extractorType.replace(
+ "Extractor",
+ ""
+ )}
+
+
+ {mostRecent?.status ? (
+
+
+ {mostRecent.status} (
+ {moment(
+ mostRecent.eventTime
+ ).format("HH:mm:ss")}
+ )
+
+
+ ) : (
+ <>No updates>
+ )}
+
+
+
+ )
+ })}
+
+ ) : (
+ <>>
+ )
},
- width: "300"
+ width: "300",
},
-
-];
+]
const parseBlobStatus = (status: any): BlobStatus => {
return {
...status,
- paths: status.paths.map((p: any) => p ? p : "unknown-filename"),
+ paths: status.paths.map((p: any) => (p ? p : "unknown-filename")),
ingestStart: new Date(status.ingestStart),
mostRecentEvent: new Date(status.mostRecentEvent),
mimeTypes: status.mimeTypes?.split(","),
- eventStatuses: status.eventStatuses.map((es: any) => ({...es, eventTime: new Date(es.eventTime)})),
+ eventStatuses: status.eventStatuses.map((es: any) => ({
+ ...es,
+ eventTime: new Date(es.eventTime),
+ })),
extractorStatuses: status.extractorStatuses.map((s: any) => ({
extractorType: s.extractorType.replace("Extractor", ""),
- statusUpdates: _.sortBy(s.statusUpdates
- // discard empty status updates (does this make sense? Maybe we should tag them as 'unknown status' instead
- .filter((u: any) => u.eventTime !== undefined && u.status !== undefined)
- .map((u: any) => ({
- ...u,
- eventTime: new Date(u.eventTime)
- })), update => update.eventTime)
- }))
+ statusUpdates: _.sortBy(
+ s.statusUpdates
+ // discard empty status updates (does this make sense? Maybe we should tag them as 'unknown status' instead
+ .filter(
+ (u: any) =>
+ u.eventTime !== undefined && u.status !== undefined
+ )
+ .map((u: any) => ({
+ ...u,
+ eventTime: new Date(u.eventTime),
+ })),
+ (update) => update.eventTime
+ ),
+ })),
}
}
-const blobStatusId = (blobStatus: BlobStatus) => `${blobStatus.metadata.ingestId}-${blobStatus.metadata.blobId}`
+const blobStatusId = (blobStatus: BlobStatus) =>
+ `${blobStatus.metadata.ingestId}-${blobStatus.metadata.blobId}`
const renderExpandedRow = (blobStatus: BlobStatus) => {
const columns: Array> = [
{
- field: 'eventTime',
- name: 'Event time',
- render: (time: Date) => moment(time).format(SHORT_READABLE_DATE)
+ field: "eventTime",
+ name: "Event time",
+ render: (time: Date) => moment(time).format(SHORT_READABLE_DATE),
},
{
- field: 'eventType',
- name: 'Event',
+ field: "eventType",
+ name: "Event",
},
{
- field: 'eventStatus',
- name: 'Status',
+ field: "eventStatus",
+ name: "Status",
render: (status: Status) => {
- return {status}
+ return (
+ {status}
+ )
},
},
- ];
-
- return
- {pathsToFileNames(blobStatus.paths)}
- Full file path(s) : {blobStatus.paths.join(", ")}. Ingestion started on {hdate.prettyPrint(blobStatus.ingestStart)}
- All ingestion events prior to extraction
- s.eventTime.toISOString() )}
- columns={columns}
- />
- Extraction events
- {blobStatus.mimeTypes && `This file is of type ${blobStatus.mimeTypes.join(",")}.`} Giant has run the following extractors on the file:
-
- {blobStatus.extractorStatuses.map(extractorStatus => {
- const numErrors = extractorStatus.statusUpdates.filter(su => su.status === "Failure").length
- const numStarted = extractorStatus.statusUpdates.filter(su => su.status === "Started").length
- const mostRecent = extractorStatus.statusUpdates.length > 0 ? extractorStatus.statusUpdates[extractorStatus.statusUpdates.length - 1] : undefined
- return <>
{extractorStatus.extractorType}
-
The extractor {extractorStatus.extractorType} has been started {numStarted} times. There have been {numErrors} errors.
- {mostRecent ? <>The most recent status event is '{mostRecent.status}' which happened on {hdate.prettyPrint(mostRecent.eventTime, {showTime: true})}> : ""}
-
- All {extractorStatus.extractorType} events:
-
- {extractorStatusList(extractorStatus)}
-
-
-
>
- })}
-
- {blobStatus.errors.length > 0 &&
- <>
- Errors encountered processing this file
- {blobStatus.errors.map(error =>
-
-
{error.eventType}
- {error.errors.map(e => {e.message})}
-
- )
- }
- >
- }
-
+ ]
+
+ return (
+
+ {pathsToFileNames(blobStatus.paths)}
+
+ Full file path(s) : {blobStatus.paths.join(", ")}. Ingestion
+ started on {hdate.prettyPrint(blobStatus.ingestStart)}
+
+ All ingestion events prior to extraction
+
+ s.eventTime.toISOString()
+ )}
+ columns={columns}
+ />
+ Extraction events
+ {blobStatus.mimeTypes &&
+ `This file is of type ${blobStatus.mimeTypes.join(",")}.`}{" "}
+ Giant has run the following extractors on the file:
+
+ {blobStatus.extractorStatuses.map((extractorStatus) => {
+ const numErrors = extractorStatus.statusUpdates.filter(
+ (su) => su.status === "Failure"
+ ).length
+ const numStarted = extractorStatus.statusUpdates.filter(
+ (su) => su.status === "Started"
+ ).length
+ const mostRecent =
+ extractorStatus.statusUpdates.length > 0
+ ? extractorStatus.statusUpdates[
+ extractorStatus.statusUpdates.length - 1
+ ]
+ : undefined
+ return (
+ <>
+
{extractorStatus.extractorType}
+
+ The extractor {extractorStatus.extractorType}{" "}
+ has been started {numStarted} times. There have
+ been {numErrors} errors.
+
+ {mostRecent ? (
+ <>
+ The most recent status event is '
+ {mostRecent.status}' which happened on{" "}
+ {hdate.prettyPrint(
+ mostRecent.eventTime,
+ { showTime: true }
+ )}
+ >
+ ) : (
+ ""
+ )}{" "}
+
+ All {extractorStatus.extractorType} events:
+ {extractorStatusList(extractorStatus)}
+
+ >
+ )
+ })}
+
+ {blobStatus.errors.length > 0 && (
+ <>
+ Errors encountered processing this file
+ {blobStatus.errors.map((error) => (
+
+
{error.eventType}
+ {error.errors.map((e) => (
+ {e.message}
+ ))}
+
+ ))}
+ >
+ )}
+
+ )
}
function IngestionEventsTable({
@@ -271,101 +421,135 @@ function IngestionEventsTable({
)
}
-export function IngestionEvents(
- {collectionId, ingestId, workspaces, breakdownByWorkspace, showErrorsOnly}: {
- collectionId: string,
- ingestId?: string,
- workspaces: WorkspaceMetadata[],
- breakdownByWorkspace: boolean,
- showErrorsOnly: boolean,
- }) {
+export function IngestionEvents({
+ collectionId,
+ ingestId,
+ workspaces,
+ breakdownByWorkspace,
+ showErrorsOnly,
+}: {
+ collectionId: string
+ ingestId?: string
+ workspaces: WorkspaceMetadata[]
+ breakdownByWorkspace: boolean
+ showErrorsOnly: boolean
+}) {
const [blobs, updateBlobs] = useState(undefined)
const [tableData, setTableData] = useState([])
const ingestIdSuffix = ingestId && ingestId !== "all" ? `/${ingestId}` : ""
// Expanding rows logic - we use itemIdToExpandedRowMap to keep track of which rows have been expanded
- const [itemIdToExpandedRowMap, setItemIdToExpandedRowMap ] = useState<
+ const [itemIdToExpandedRowMap, setItemIdToExpandedRowMap] = useState<
Record
- >({});
+ >({})
const openRow = (blobStatus: BlobStatus) => {
- const map = {...itemIdToExpandedRowMap}
+ const map = { ...itemIdToExpandedRowMap }
const id = blobStatusId(blobStatus)
map[id] = renderExpandedRow(blobStatus)
setItemIdToExpandedRowMap(map)
}
const closeRow = (blobStatus: BlobStatus) => {
- const map = {...itemIdToExpandedRowMap}
+ const map = { ...itemIdToExpandedRowMap }
delete map[blobStatusId(blobStatus)]
setItemIdToExpandedRowMap(map)
}
- const columnsWithWorkspace = breakdownByWorkspace ?
- columns : columns.concat( {
- field: 'workspaceName',
- sortable: true,
- name: 'Workspace name'
- })
+ const columnsWithWorkspace = breakdownByWorkspace
+ ? columns
+ : columns.concat({
+ field: "workspaceName",
+ sortable: true,
+ name: "Workspace name",
+ })
const columnsWithExpandingRow: Array> = [
...columnsWithWorkspace,
statusIconColumn,
{
- align: 'right',
- width: '40px',
+ align: "right",
+ width: "40px",
isExpander: true,
- name: (Expand rows),
+ name: (
+
+ Expand rows
+
+ ),
render: (row: BlobStatus) => (
itemIdToExpandedRowMap[blobStatusId(row)] ? closeRow(row) : openRow(row)}
+ onClick={() =>
+ itemIdToExpandedRowMap[blobStatusId(row)]
+ ? closeRow(row)
+ : openRow(row)
+ }
aria-label={
- itemIdToExpandedRowMap[blobStatusId(row)] ? 'Collapse' : 'Expand'
+ itemIdToExpandedRowMap[blobStatusId(row)]
+ ? "Collapse"
+ : "Expand"
}
iconType={
- itemIdToExpandedRowMap[blobStatusId(row)] ? 'arrowDown' : 'arrowRight'
+ itemIdToExpandedRowMap[blobStatusId(row)]
+ ? "arrowDown"
+ : "arrowRight"
}
/>
- )
- }
+ ),
+ },
]
useEffect(() => {
authFetch(`/api/ingestion-events/${collectionId}${ingestIdSuffix}`)
- .then(resp => resp.json())
- .then(json => {
+ .then((resp) => resp.json())
+ .then((json) => {
const blobStatuses: BlobStatus[] = json.map(parseBlobStatus)
updateBlobs(blobStatuses)
- })
+ })
}, [collectionId, ingestId, updateBlobs, ingestIdSuffix])
- const getWorkspaceBlobs = (allBlobs: BlobStatus[], workspaceName: string, errorsOnly: boolean | undefined) => {
- const workspaceBlobs = allBlobs.filter(b => b.workspaceName === workspaceName);
+ const getWorkspaceBlobs = (
+ allBlobs: BlobStatus[],
+ workspaceName: string,
+ errorsOnly: boolean | undefined
+ ) => {
+ const workspaceBlobs = allBlobs.filter(
+ (b) => b.workspaceName === workspaceName
+ )
- if (errorsOnly) return getFailedBlobs(workspaceBlobs);
+ if (errorsOnly) return getFailedBlobs(workspaceBlobs)
- return workspaceBlobs;
+ return workspaceBlobs
}
useEffect(() => {
if (blobs) {
if (breakdownByWorkspace) {
- setTableData(workspaces
- .map((w: WorkspaceMetadata) => ({
+ setTableData(
+ workspaces.map((w: WorkspaceMetadata) => ({
title: `Workspace: ${w.name}`,
- blobs: getWorkspaceBlobs(blobs, w.name, showErrorsOnly)
- })))
+ blobs: getWorkspaceBlobs(blobs, w.name, showErrorsOnly),
+ }))
+ )
} else {
setTableData([
{
title: `${collectionId}${ingestIdSuffix}`,
- blobs: showErrorsOnly ? getFailedBlobs(blobs) : blobs
- }])
+ blobs: showErrorsOnly ? getFailedBlobs(blobs) : blobs,
+ },
+ ])
}
} else {
setTableData([])
}
- }, [breakdownByWorkspace, blobs, workspaces, ingestIdSuffix, collectionId, showErrorsOnly, setItemIdToExpandedRowMap])
+ }, [
+ breakdownByWorkspace,
+ blobs,
+ workspaces,
+ ingestIdSuffix,
+ collectionId,
+ showErrorsOnly,
+ setItemIdToExpandedRowMap,
+ ])
return (
<>
@@ -384,4 +568,3 @@ export function IngestionEvents(
>
)
}
-
From b81c30baee051d609db2de608f70f90dd77f8c7a Mon Sep 17 00:00:00 2001
From: Zeke Hunter-Green
Date: Tue, 3 Oct 2023 16:32:32 +0100
Subject: [PATCH 3/4] dashboard shows specific message for blobs in infinite
loop
---
.../app/services/observability/Models.scala | 3 +-
.../observability/PostgresClient.scala | 37 ++---
.../IngestionEvents/IngestionEvents.tsx | 136 +++++++++++-------
.../js/components/IngestionEvents/types.ts | 1 +
4 files changed, 104 insertions(+), 73 deletions(-)
diff --git a/backend/app/services/observability/Models.scala b/backend/app/services/observability/Models.scala
index f5e1f1ec..8005e346 100644
--- a/backend/app/services/observability/Models.scala
+++ b/backend/app/services/observability/Models.scala
@@ -197,7 +197,8 @@ case class BlobStatus(
eventStatuses: List[IngestionEventStatus],
extractorStatuses: List[ExtractorStatus],
errors: List[IngestionErrorsWithEventType],
- mimeTypes: Option[String])
+ mimeTypes: Option[String],
+ infiniteLoop: Boolean)
object BlobStatus {
implicit val dateWrites = JodaReadWrites.dateWrites
implicit val dateReads = JodaReadWrites.dateReads
diff --git a/backend/app/services/observability/PostgresClient.scala b/backend/app/services/observability/PostgresClient.scala
index 832021c0..7e3ea767 100644
--- a/backend/app/services/observability/PostgresClient.scala
+++ b/backend/app/services/observability/PostgresClient.scala
@@ -157,7 +157,7 @@ class PostgresClientImpl(postgresConfig: PostgresConfig) extends PostgresClient
ie.errors,
ie.workspace_name AS "workspaceName",
ie.mime_types AS "mimeTypes",
- infinite_loop,
+ ie.infinite_loop AS "infiniteLoop",
ARRAY_AGG(DISTINCT blob_metadata.path ) AS paths,
(ARRAY_AGG(blob_metadata.file_size))[1] as "fileSize",
ARRAY_REMOVE(ARRAY_AGG(extractor_statuses.extractor), NULL) AS extractors,
@@ -207,32 +207,33 @@ class PostgresClientImpl(postgresConfig: PostgresConfig) extends PostgresClient
""".map(rs => {
val eventTypes = rs.array("event_types").getArray.asInstanceOf[Array[String]]
BlobStatus(
- EventMetadata(
- rs.string("blob_id"),
- rs.string("ingest_id")
- ),
- BlobStatus.parsePathsArray(rs.array("paths").getArray().asInstanceOf[Array[String]]),
- rs.longOpt("fileSize"),
- rs.stringOpt("workspaceName"),
+ EventMetadata(
+ rs.string("blob_id"),
+ rs.string("ingest_id")
+ ),
+ BlobStatus.parsePathsArray(rs.array("paths").getArray().asInstanceOf[Array[String]]),
+ rs.longOpt("fileSize"),
+ rs.stringOpt("workspaceName"),
PostgresHelpers.postgresEpochToDateTime(rs.double("ingest_start")),
PostgresHelpers.postgresEpochToDateTime(rs.double("most_recent_event")),
IngestionEventStatus.parseEventStatus(
rs.array("event_times").getArray.asInstanceOf[Array[java.math.BigDecimal]].map(t =>PostgresHelpers.postgresEpochToDateTime(t.doubleValue)),
eventTypes,
- rs.array("event_statuses").getArray.asInstanceOf[Array[String]]),
- rs.arrayOpt("extractors").map { extractors =>
- ExtractorStatus.parseDbStatusEvents(
- extractors.getArray().asInstanceOf[Array[String]],
- rs.array("extractorEventTimes").getArray().asInstanceOf[Array[String]],
- rs.array("extractorStatuses").getArray().asInstanceOf[Array[String]]
- )
- }.getOrElse(List()),
+ rs.array("event_statuses").getArray.asInstanceOf[Array[String]]
+ ),
+ rs.arrayOpt("extractors").map { extractors =>
+ ExtractorStatus.parseDbStatusEvents(
+ extractors.getArray().asInstanceOf[Array[String]],
+ rs.array("extractorEventTimes").getArray().asInstanceOf[Array[String]],
+ rs.array("extractorStatuses").getArray().asInstanceOf[Array[String]]
+ )
+ }.getOrElse(List()),
IngestionError.parseIngestionErrors(
rs.array("errors").getArray.asInstanceOf[Array[String]],
eventTypes
),
- rs.stringOpt("mimeTypes")
-
+ rs.stringOpt("mimeTypes"),
+ rs.boolean("infiniteLoop")
)
}
).list().apply()
diff --git a/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx b/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx
index 735ecaa8..bdc9ae11 100644
--- a/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx
+++ b/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx
@@ -49,6 +49,12 @@ const blobStatusIcons = {
),
+ infiniteLoop: (
+
+ ),
}
const SHORT_READABLE_DATE = "DD MMM HH:mm:ss"
@@ -125,6 +131,9 @@ const statusIconColumn = {
name: "",
width: "40",
render: (statuses: ExtractorStatus[], row: BlobStatus) => {
+ if (row.infiniteLoop) {
+ return blobStatusIcons["infiniteLoop"]
+ }
const totalErrors = row.errors.length
const extractorStatus = getBlobStatus(statuses)
// if extractors have finished but there are other non-extractor related errors, show an error icon
@@ -304,60 +313,79 @@ const renderExpandedRow = (blobStatus: BlobStatus) => {
Full file path(s) : {blobStatus.paths.join(", ")}. Ingestion
started on {hdate.prettyPrint(blobStatus.ingestStart)}
- All ingestion events prior to extraction
-
- s.eventTime.toISOString()
- )}
- columns={columns}
- />
- Extraction events
- {blobStatus.mimeTypes &&
- `This file is of type ${blobStatus.mimeTypes.join(",")}.`}{" "}
- Giant has run the following extractors on the file:
-
- {blobStatus.extractorStatuses.map((extractorStatus) => {
- const numErrors = extractorStatus.statusUpdates.filter(
- (su) => su.status === "Failure"
- ).length
- const numStarted = extractorStatus.statusUpdates.filter(
- (su) => su.status === "Started"
- ).length
- const mostRecent =
- extractorStatus.statusUpdates.length > 0
- ? extractorStatus.statusUpdates[
- extractorStatus.statusUpdates.length - 1
- ]
- : undefined
- return (
- <>
-
{extractorStatus.extractorType}
-
- The extractor {extractorStatus.extractorType}{" "}
- has been started {numStarted} times. There have
- been {numErrors} errors.
-
- {mostRecent ? (
- <>
- The most recent status event is '
- {mostRecent.status}' which happened on{" "}
- {hdate.prettyPrint(
- mostRecent.eventTime,
- { showTime: true }
- )}
- >
- ) : (
- ""
- )}{" "}
-
- All {extractorStatus.extractorType} events:
- {extractorStatusList(extractorStatus)}
-
- >
- )
- })}
-
+ {blobStatus.infiniteLoop && (
+ <>
+ Ingestion failing in infinite loop
+ blob id {blobStatus.metadata.blobId}
+ >
+ )}
+ {!blobStatus.infiniteLoop && (
+ <>
+ All ingestion events prior to extraction
+
+ s.eventTime.toISOString()
+ )}
+ columns={columns}
+ />
+ Extraction events
+ {blobStatus.mimeTypes &&
+ `This file is of type ${blobStatus.mimeTypes.join(
+ ","
+ )}.`}{" "}
+ Giant has run the following extractors on the file:
+
+ {blobStatus.extractorStatuses.map((extractorStatus) => {
+ const numErrors =
+ extractorStatus.statusUpdates.filter(
+ (su) => su.status === "Failure"
+ ).length
+ const numStarted =
+ extractorStatus.statusUpdates.filter(
+ (su) => su.status === "Started"
+ ).length
+ const mostRecent =
+ extractorStatus.statusUpdates.length > 0
+ ? extractorStatus.statusUpdates[
+ extractorStatus.statusUpdates.length -
+ 1
+ ]
+ : undefined
+ return (
+ <>
+
{extractorStatus.extractorType}
+
+ The extractor{" "}
+ {extractorStatus.extractorType} has been
+ started {numStarted} times. There have
+ been {numErrors} errors.
+
+ {mostRecent ? (
+ <>
+ The most recent status event is
+ '{mostRecent.status}' which
+ happened on{" "}
+ {hdate.prettyPrint(
+ mostRecent.eventTime,
+ { showTime: true }
+ )}
+ >
+ ) : (
+ ""
+ )}{" "}
+
+ All {extractorStatus.extractorType}{" "}
+ events:
+ {extractorStatusList(extractorStatus)}
+
+ >
+ )
+ })}
+
+ >
+ )}
+
{blobStatus.errors.length > 0 && (
<>
Errors encountered processing this file
diff --git a/frontend/src/js/components/IngestionEvents/types.ts b/frontend/src/js/components/IngestionEvents/types.ts
index 409463fb..15f3500d 100644
--- a/frontend/src/js/components/IngestionEvents/types.ts
+++ b/frontend/src/js/components/IngestionEvents/types.ts
@@ -32,6 +32,7 @@ export type BlobStatus = {
errors: IngestionErrorWithEventType[];
workspaceName: string;
mimeTypes: string[];
+ infiniteLoop: boolean;
}
export type IngestionTable = {
From e3a655926f96f2d26780df0f63f9dac2ca5fd427 Mon Sep 17 00:00:00 2001
From: Zeke Hunter-Green
Date: Wed, 4 Oct 2023 17:48:14 +0100
Subject: [PATCH 4/4] show status 'infinite loop', looping files appear when
'errors only' is toggled
---
.../components/IngestionEvents/IngestionEvents.tsx | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx b/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx
index bdc9ae11..a68df949 100644
--- a/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx
+++ b/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx
@@ -32,7 +32,7 @@ import {
} from "./types"
import styles from "./IngestionEvents.module.css"
-type BlobProgress = "complete" | "completeWithErrors" | "inProgress"
+type BlobProgress = "complete" | "completeWithErrors" | "inProgress" | "infiniteLoop"
const blobStatusIcons = {
complete: (
@@ -70,11 +70,12 @@ const getFailedStatuses = (statuses: ExtractorStatus[]) =>
const getFailedBlobs = (blobs: BlobStatus[]) => {
return blobs.filter((wb) => {
- return getFailedStatuses(wb.extractorStatuses).length > 0
+ return getFailedStatuses(wb.extractorStatuses).length > 0 || wb.infiniteLoop
})
}
-const getBlobStatus = (statuses: ExtractorStatus[]): BlobProgress => {
+const getBlobStatus = (statuses: ExtractorStatus[], infiniteLoop: boolean): BlobProgress => {
+ if (infiniteLoop) return "infiniteLoop"
const failures = getFailedStatuses(statuses)
const inProgress = statuses.filter(
(status) =>
@@ -124,6 +125,7 @@ const blobStatusText = {
complete: "Complete",
completeWithErrors: "Complete with errors",
inProgress: "In progress",
+ infiniteLoop: "Infinite Loop"
}
const statusIconColumn = {
@@ -135,7 +137,7 @@ const statusIconColumn = {
return blobStatusIcons["infiniteLoop"]
}
const totalErrors = row.errors.length
- const extractorStatus = getBlobStatus(statuses)
+ const extractorStatus = getBlobStatus(statuses, row.infiniteLoop)
// if extractors have finished but there are other non-extractor related errors, show an error icon
const combinedStatus =
extractorStatus === "complete" && totalErrors > 0
@@ -188,8 +190,8 @@ const columns: Array> = [
{
field: "extractorStatuses",
name: "Status",
- render: (statuses: ExtractorStatus[]) => {
- return blobStatusText[getBlobStatus(statuses)]
+ render: (statuses: ExtractorStatus[], row: BlobStatus) => {
+ return blobStatusText[getBlobStatus(statuses, row.infiniteLoop)]
},
},
{