From f663c248ed5019bd80aad6e0a61e69bb982ff85f Mon Sep 17 00:00:00 2001 From: Zeke Hunter-Green Date: Tue, 3 Oct 2023 15:23:34 +0100 Subject: [PATCH 1/4] add problem blobs back to ingestion events query --- .../observability/PostgresClient.scala | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/backend/app/services/observability/PostgresClient.scala b/backend/app/services/observability/PostgresClient.scala index 994477ff..832021c0 100644 --- a/backend/app/services/observability/PostgresClient.scala +++ b/backend/app/services/observability/PostgresClient.scala @@ -157,6 +157,7 @@ class PostgresClientImpl(postgresConfig: PostgresConfig) extends PostgresClient ie.errors, ie.workspace_name AS "workspaceName", ie.mime_types AS "mimeTypes", + infinite_loop, ARRAY_AGG(DISTINCT blob_metadata.path ) AS paths, (ARRAY_AGG(blob_metadata.file_size))[1] as "fileSize", ARRAY_REMOVE(ARRAY_AGG(extractor_statuses.extractor), NULL) AS extractors, @@ -167,22 +168,41 @@ class PostgresClientImpl(postgresConfig: PostgresConfig) extends PostgresClient SELECT blob_id, ingest_id, - MIN(EXTRACT(EPOCH from event_time)) AS ingest_start, - MAX(EXTRACT(EPOCH from event_time)) AS most_recent_event, + MIN(EXTRACT(EPOCH FROM event_time)) AS ingest_start, + MAX(EXTRACT(EPOCH FROM event_time)) AS most_recent_event, ARRAY_AGG(type) as event_types, ARRAY_AGG(EXTRACT(EPOCH from event_time)) as event_times, ARRAY_AGG(status) as event_statuses, ARRAY_AGG(details -> 'errors') as errors, (ARRAY_AGG(details ->> 'workspaceName') FILTER (WHERE details ->> 'workspaceName' IS NOT NULL))[1] as workspace_name, - (ARRAY_AGG(details ->> 'mimeTypes') FILTER (WHERE details ->> 'mimeTypes' IS NOT NULL))[1] as mime_types + (ARRAY_AGG(details ->> 'mimeTypes') FILTER (WHERE details ->> 'mimeTypes' IS NOT NULL))[1] as mime_types, + FALSE AS infinite_loop FROM ingestion_events WHERE ingest_id LIKE ${if(ingestIdIsPrefix) LikeConditionEscapeUtil.beginsWith(ingestId) else ingestId} AND blob_id NOT IN (SELECT blob_id FROM problem_blobs) GROUP BY 1,2 + UNION + -- blobs in the ingestion that are failing in an infinite loop + SELECT DISTINCT + blob_id, + ingest_id, + MIN(EXTRACT(EPOCH FROM event_time)) AS ingest_start, + MAX(EXTRACT(EPOCH FROM event_time)) AS most_recent_event, + array[]::text[] AS event_types, + array[]::numeric[] AS event_times, + array[]::text[] AS event_statuses, + array['[]'::jsonb] AS errors, + NULL AS workspace_name, + NULL AS mime_types, + TRUE AS infinite_loop + FROM ingestion_events + WHERE ingest_id LIKE ${if(ingestIdIsPrefix) LikeConditionEscapeUtil.beginsWith(ingestId) else ingestId} + AND blob_id IN (SELECT blob_id FROM problem_blobs) + GROUP BY 1,2 ) AS ie LEFT JOIN blob_metadata USING(ingest_id, blob_id) LEFT JOIN extractor_statuses on extractor_statuses.blob_id = ie.blob_id and extractor_statuses.ingest_id = ie.ingest_id - GROUP BY 1,2,3,4,5,6,7,8,9,10 + GROUP BY 1,2,3,4,5,6,7,8,9,10,11 ORDER by ingest_start desc """.map(rs => { val eventTypes = rs.array("event_types").getArray.asInstanceOf[Array[String]] From 3144fb80e52b4da73c02c219c9bbc96de5105d08 Mon Sep 17 00:00:00 2001 From: Zeke Hunter-Green Date: Tue, 3 Oct 2023 15:27:34 +0100 Subject: [PATCH 2/4] format IngestionEvents with prettier --- .../IngestionEvents/IngestionEvents.tsx | 559 ++++++++++++------ 1 file changed, 371 insertions(+), 188 deletions(-) diff --git a/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx b/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx index 954f85b8..735ecaa8 100644 --- a/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx +++ b/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx @@ -1,27 +1,54 @@ -import React, {ReactNode, useEffect, useState} from "react"; -import authFetch from "../../util/auth/authFetch"; -import {EuiFlexItem, EuiBasicTable, EuiToolTip, EuiText, EuiButtonIcon, EuiScreenReaderOnly, EuiSpacer, EuiIconTip, EuiBadge, EuiFlexGroup, EuiInMemoryTable, EuiBasicTableColumn, EuiLoadingSpinner, EuiCodeBlock, Criteria} from "@elastic/eui"; -import '@elastic/eui/dist/eui_theme_light.css'; -import hdate from 'human-date'; -import {WorkspaceMetadata} from "../../types/Workspaces"; -import moment from "moment"; -import _ from "lodash"; +import React, { ReactNode, useEffect, useState } from "react" +import authFetch from "../../util/auth/authFetch" +import { + EuiFlexItem, + EuiBasicTable, + EuiToolTip, + EuiText, + EuiButtonIcon, + EuiScreenReaderOnly, + EuiSpacer, + EuiIconTip, + EuiBadge, + EuiFlexGroup, + EuiInMemoryTable, + EuiBasicTableColumn, + EuiLoadingSpinner, + EuiCodeBlock, + Criteria, +} from "@elastic/eui" +import "@elastic/eui/dist/eui_theme_light.css" +import hdate from "human-date" +import { WorkspaceMetadata } from "../../types/Workspaces" +import moment from "moment" +import _ from "lodash" import { BlobStatus, ExtractorStatus, IngestionTable, Status, extractorStatusColors, - IngestionEventStatus -} from "./types"; -import styles from "./IngestionEvents.module.css"; + IngestionEventStatus, +} from "./types" +import styles from "./IngestionEvents.module.css" type BlobProgress = "complete" | "completeWithErrors" | "inProgress" const blobStatusIcons = { - complete: , - completeWithErrors: , - inProgress: + complete: ( + + ), + completeWithErrors: ( + + ), + inProgress: ( + + + + ), } const SHORT_READABLE_DATE = "DD MMM HH:mm:ss" @@ -29,200 +56,323 @@ const SHORT_READABLE_DATE = "DD MMM HH:mm:ss" const statusToColor = (status: Status) => extractorStatusColors[status] const getFailedStatuses = (statuses: ExtractorStatus[]) => - statuses.filter(status => status.statusUpdates.find(u => u.status === "Failure") !== undefined); + statuses.filter( + (status) => + status.statusUpdates.find((u) => u.status === "Failure") !== + undefined + ) const getFailedBlobs = (blobs: BlobStatus[]) => { - return blobs.filter(wb => { - return getFailedStatuses(wb.extractorStatuses).length > 0; - }); + return blobs.filter((wb) => { + return getFailedStatuses(wb.extractorStatuses).length > 0 + }) } const getBlobStatus = (statuses: ExtractorStatus[]): BlobProgress => { - const failures = getFailedStatuses(statuses); - const inProgress = statuses.filter(status => status.statusUpdates.find(u => !u.status || ["Failure", "Success"].includes(u.status)) === undefined) - return failures.length > 0 ? "completeWithErrors" : inProgress.length > 0 ? "inProgress" : "complete" + const failures = getFailedStatuses(statuses) + const inProgress = statuses.filter( + (status) => + status.statusUpdates.find( + (u) => !u.status || ["Failure", "Success"].includes(u.status) + ) === undefined + ) + return failures.length > 0 + ? "completeWithErrors" + : inProgress.length > 0 + ? "inProgress" + : "complete" } -const blobIngestedMultipleTimes = (status:BlobStatus) => status.extractorStatuses.find(s => s.statusUpdates.filter(u => u.status === "Started").length > 1) !== undefined +const blobIngestedMultipleTimes = (status: BlobStatus) => + status.extractorStatuses.find( + (s) => s.statusUpdates.filter((u) => u.status === "Started").length > 1 + ) !== undefined const extractorStatusList = (status: ExtractorStatus, title?: string) => { - const statusUpdateStrings = status.statusUpdates.map(u => `${moment(u.eventTime).format(SHORT_READABLE_DATE)} ${u.status}`) - return status.statusUpdates.length > 0 ?

- {title && <>{title}
} -

    - {statusUpdateStrings.map(s =>
  • {s}
  • )} -
-

: "No events so far" + const statusUpdateStrings = status.statusUpdates.map( + (u) => `${moment(u.eventTime).format(SHORT_READABLE_DATE)} ${u.status}` + ) + return status.statusUpdates.length > 0 ? ( +

+ {title && ( + <> + {title}
+ + )} +

    + {statusUpdateStrings.map((s) => ( +
  • {s}
  • + ))} +
+

+ ) : ( + "No events so far" + ) } // throw away everything after last / to get the filename from a path -const pathsToFileNames = (paths: string[]) => paths.map(p => p.split("/").slice(-1)).join("\n") - +const pathsToFileNames = (paths: string[]) => + paths.map((p) => p.split("/").slice(-1)).join("\n") const blobStatusText = { complete: "Complete", completeWithErrors: "Complete with errors", - inProgress: "In progress" + inProgress: "In progress", } const statusIconColumn = { - field: 'extractorStatuses', - name: '', - width: '40', - render: (statuses: ExtractorStatus[], row: BlobStatus) => { - const totalErrors = row.errors.length - const extractorStatus = getBlobStatus(statuses) - // if extractors have finished but there are other non-extractor related errors, show an error icon - const combinedStatus = extractorStatus === "complete" && totalErrors > 0 ? "completeWithErrors" : extractorStatus - return blobStatusIcons[combinedStatus] - } - } + field: "extractorStatuses", + name: "", + width: "40", + render: (statuses: ExtractorStatus[], row: BlobStatus) => { + const totalErrors = row.errors.length + const extractorStatus = getBlobStatus(statuses) + // if extractors have finished but there are other non-extractor related errors, show an error icon + const combinedStatus = + extractorStatus === "complete" && totalErrors > 0 + ? "completeWithErrors" + : extractorStatus + return blobStatusIcons[combinedStatus] + }, +} const columns: Array> = [ { - field: 'paths', - name: 'Filename(s)', + field: "paths", + name: "Filename(s)", sortable: true, truncateText: true, - render: pathsToFileNames + render: pathsToFileNames, }, { - field: 'ingestStart', - name: 'First event time', + field: "ingestStart", + name: "First event time", sortable: true, - render: (ingestStart: Date) => moment(ingestStart).format(SHORT_READABLE_DATE) + render: (ingestStart: Date) => + moment(ingestStart).format(SHORT_READABLE_DATE), }, { - name: 'Ingestion run time', - render: (row: BlobStatus) => - <>{moment.duration(moment(row.mostRecentEvent).diff(moment(row.ingestStart))).humanize()} { - blobIngestedMultipleTimes(row) && } + name: "Ingestion run time", + render: (row: BlobStatus) => ( + <> + {moment + .duration( + moment(row.mostRecentEvent).diff( + moment(row.ingestStart) + ) + ) + .humanize()}{" "} + {blobIngestedMultipleTimes(row) && ( + + )} + + ), }, { - field: 'extractorStatuses', - name: 'Status', + field: "extractorStatuses", + name: "Status", render: (statuses: ExtractorStatus[]) => { return blobStatusText[getBlobStatus(statuses)] - } + }, }, { - field: 'extractorStatuses', - name: 'Extractors', + field: "extractorStatuses", + name: "Extractors", render: (statuses: ExtractorStatus[]) => { - return statuses.length > 0 ? (
    - {statuses.map(status => { - const mostRecent = status.statusUpdates.length > 0 ? status.statusUpdates[status.statusUpdates.length - 1] : undefined - return
  • - {status.extractorType.replace("Extractor", "")} - - {mostRecent?.status ? - ( - - {mostRecent.status} ({moment(mostRecent.eventTime).format("HH:mm:ss") }) - - ) : <>No updates - } - -
  • - })} -
) : <> - + return statuses.length > 0 ? ( +
    + {statuses.map((status) => { + const mostRecent = + status.statusUpdates.length > 0 + ? status.statusUpdates[ + status.statusUpdates.length - 1 + ] + : undefined + return ( +
  • + + + {status.extractorType.replace( + "Extractor", + "" + )} + + + {mostRecent?.status ? ( + + + {mostRecent.status} ( + {moment( + mostRecent.eventTime + ).format("HH:mm:ss")} + ) + + + ) : ( + <>No updates + )} + + +
  • + ) + })} +
+ ) : ( + <> + ) }, - width: "300" + width: "300", }, - -]; +] const parseBlobStatus = (status: any): BlobStatus => { return { ...status, - paths: status.paths.map((p: any) => p ? p : "unknown-filename"), + paths: status.paths.map((p: any) => (p ? p : "unknown-filename")), ingestStart: new Date(status.ingestStart), mostRecentEvent: new Date(status.mostRecentEvent), mimeTypes: status.mimeTypes?.split(","), - eventStatuses: status.eventStatuses.map((es: any) => ({...es, eventTime: new Date(es.eventTime)})), + eventStatuses: status.eventStatuses.map((es: any) => ({ + ...es, + eventTime: new Date(es.eventTime), + })), extractorStatuses: status.extractorStatuses.map((s: any) => ({ extractorType: s.extractorType.replace("Extractor", ""), - statusUpdates: _.sortBy(s.statusUpdates - // discard empty status updates (does this make sense? Maybe we should tag them as 'unknown status' instead - .filter((u: any) => u.eventTime !== undefined && u.status !== undefined) - .map((u: any) => ({ - ...u, - eventTime: new Date(u.eventTime) - })), update => update.eventTime) - })) + statusUpdates: _.sortBy( + s.statusUpdates + // discard empty status updates (does this make sense? Maybe we should tag them as 'unknown status' instead + .filter( + (u: any) => + u.eventTime !== undefined && u.status !== undefined + ) + .map((u: any) => ({ + ...u, + eventTime: new Date(u.eventTime), + })), + (update) => update.eventTime + ), + })), } } -const blobStatusId = (blobStatus: BlobStatus) => `${blobStatus.metadata.ingestId}-${blobStatus.metadata.blobId}` +const blobStatusId = (blobStatus: BlobStatus) => + `${blobStatus.metadata.ingestId}-${blobStatus.metadata.blobId}` const renderExpandedRow = (blobStatus: BlobStatus) => { const columns: Array> = [ { - field: 'eventTime', - name: 'Event time', - render: (time: Date) => moment(time).format(SHORT_READABLE_DATE) + field: "eventTime", + name: "Event time", + render: (time: Date) => moment(time).format(SHORT_READABLE_DATE), }, { - field: 'eventType', - name: 'Event', + field: "eventType", + name: "Event", }, { - field: 'eventStatus', - name: 'Status', + field: "eventStatus", + name: "Status", render: (status: Status) => { - return {status} + return ( + {status} + ) }, }, - ]; - - return -

{pathsToFileNames(blobStatus.paths)}

-

Full file path(s) : {blobStatus.paths.join(", ")}. Ingestion started on {hdate.prettyPrint(blobStatus.ingestStart)}

-

All ingestion events prior to extraction

- s.eventTime.toISOString() )} - columns={columns} - /> -

Extraction events

- {blobStatus.mimeTypes && `This file is of type ${blobStatus.mimeTypes.join(",")}.`} Giant has run the following extractors on the file: -
- {blobStatus.extractorStatuses.map(extractorStatus => { - const numErrors = extractorStatus.statusUpdates.filter(su => su.status === "Failure").length - const numStarted = extractorStatus.statusUpdates.filter(su => su.status === "Started").length - const mostRecent = extractorStatus.statusUpdates.length > 0 ? extractorStatus.statusUpdates[extractorStatus.statusUpdates.length - 1] : undefined - return <>

{extractorStatus.extractorType}

-

The extractor {extractorStatus.extractorType} has been started {numStarted} times. There have been {numErrors} errors.
- {mostRecent ? <>The most recent status event is '{mostRecent.status}' which happened on {hdate.prettyPrint(mostRecent.eventTime, {showTime: true})} : ""}

- - All {extractorStatus.extractorType} events: - - {extractorStatusList(extractorStatus)} - - -

- })} -
- {blobStatus.errors.length > 0 && - <> -

Errors encountered processing this file

- {blobStatus.errors.map(error => -
-
{error.eventType}
- {error.errors.map(e => {e.message})} -
- ) - } - - } -
+ ] + + return ( + +

{pathsToFileNames(blobStatus.paths)}

+

+ Full file path(s) : {blobStatus.paths.join(", ")}. Ingestion + started on {hdate.prettyPrint(blobStatus.ingestStart)} +

+

All ingestion events prior to extraction

+ + s.eventTime.toISOString() + )} + columns={columns} + /> +

Extraction events

+ {blobStatus.mimeTypes && + `This file is of type ${blobStatus.mimeTypes.join(",")}.`}{" "} + Giant has run the following extractors on the file: +
+ {blobStatus.extractorStatuses.map((extractorStatus) => { + const numErrors = extractorStatus.statusUpdates.filter( + (su) => su.status === "Failure" + ).length + const numStarted = extractorStatus.statusUpdates.filter( + (su) => su.status === "Started" + ).length + const mostRecent = + extractorStatus.statusUpdates.length > 0 + ? extractorStatus.statusUpdates[ + extractorStatus.statusUpdates.length - 1 + ] + : undefined + return ( + <> +

{extractorStatus.extractorType}

+

+ The extractor {extractorStatus.extractorType}{" "} + has been started {numStarted} times. There have + been {numErrors} errors. +
+ {mostRecent ? ( + <> + The most recent status event is ' + {mostRecent.status}' which happened on{" "} + {hdate.prettyPrint( + mostRecent.eventTime, + { showTime: true } + )} + + ) : ( + "" + )}{" "} +

+ All {extractorStatus.extractorType} events: + {extractorStatusList(extractorStatus)} +

+ + ) + })} +
+ {blobStatus.errors.length > 0 && ( + <> +

Errors encountered processing this file

+ {blobStatus.errors.map((error) => ( +
+
{error.eventType}
+ {error.errors.map((e) => ( + {e.message} + ))} +
+ ))} + + )} +
+ ) } function IngestionEventsTable({ @@ -271,101 +421,135 @@ function IngestionEventsTable({ ) } -export function IngestionEvents( - {collectionId, ingestId, workspaces, breakdownByWorkspace, showErrorsOnly}: { - collectionId: string, - ingestId?: string, - workspaces: WorkspaceMetadata[], - breakdownByWorkspace: boolean, - showErrorsOnly: boolean, - }) { +export function IngestionEvents({ + collectionId, + ingestId, + workspaces, + breakdownByWorkspace, + showErrorsOnly, +}: { + collectionId: string + ingestId?: string + workspaces: WorkspaceMetadata[] + breakdownByWorkspace: boolean + showErrorsOnly: boolean +}) { const [blobs, updateBlobs] = useState(undefined) const [tableData, setTableData] = useState([]) const ingestIdSuffix = ingestId && ingestId !== "all" ? `/${ingestId}` : "" // Expanding rows logic - we use itemIdToExpandedRowMap to keep track of which rows have been expanded - const [itemIdToExpandedRowMap, setItemIdToExpandedRowMap ] = useState< + const [itemIdToExpandedRowMap, setItemIdToExpandedRowMap] = useState< Record - >({}); + >({}) const openRow = (blobStatus: BlobStatus) => { - const map = {...itemIdToExpandedRowMap} + const map = { ...itemIdToExpandedRowMap } const id = blobStatusId(blobStatus) map[id] = renderExpandedRow(blobStatus) setItemIdToExpandedRowMap(map) } const closeRow = (blobStatus: BlobStatus) => { - const map = {...itemIdToExpandedRowMap} + const map = { ...itemIdToExpandedRowMap } delete map[blobStatusId(blobStatus)] setItemIdToExpandedRowMap(map) } - const columnsWithWorkspace = breakdownByWorkspace ? - columns : columns.concat( { - field: 'workspaceName', - sortable: true, - name: 'Workspace name' - }) + const columnsWithWorkspace = breakdownByWorkspace + ? columns + : columns.concat({ + field: "workspaceName", + sortable: true, + name: "Workspace name", + }) const columnsWithExpandingRow: Array> = [ ...columnsWithWorkspace, statusIconColumn, { - align: 'right', - width: '40px', + align: "right", + width: "40px", isExpander: true, - name: (Expand rows), + name: ( + + Expand rows + + ), render: (row: BlobStatus) => ( itemIdToExpandedRowMap[blobStatusId(row)] ? closeRow(row) : openRow(row)} + onClick={() => + itemIdToExpandedRowMap[blobStatusId(row)] + ? closeRow(row) + : openRow(row) + } aria-label={ - itemIdToExpandedRowMap[blobStatusId(row)] ? 'Collapse' : 'Expand' + itemIdToExpandedRowMap[blobStatusId(row)] + ? "Collapse" + : "Expand" } iconType={ - itemIdToExpandedRowMap[blobStatusId(row)] ? 'arrowDown' : 'arrowRight' + itemIdToExpandedRowMap[blobStatusId(row)] + ? "arrowDown" + : "arrowRight" } /> - ) - } + ), + }, ] useEffect(() => { authFetch(`/api/ingestion-events/${collectionId}${ingestIdSuffix}`) - .then(resp => resp.json()) - .then(json => { + .then((resp) => resp.json()) + .then((json) => { const blobStatuses: BlobStatus[] = json.map(parseBlobStatus) updateBlobs(blobStatuses) - }) + }) }, [collectionId, ingestId, updateBlobs, ingestIdSuffix]) - const getWorkspaceBlobs = (allBlobs: BlobStatus[], workspaceName: string, errorsOnly: boolean | undefined) => { - const workspaceBlobs = allBlobs.filter(b => b.workspaceName === workspaceName); + const getWorkspaceBlobs = ( + allBlobs: BlobStatus[], + workspaceName: string, + errorsOnly: boolean | undefined + ) => { + const workspaceBlobs = allBlobs.filter( + (b) => b.workspaceName === workspaceName + ) - if (errorsOnly) return getFailedBlobs(workspaceBlobs); + if (errorsOnly) return getFailedBlobs(workspaceBlobs) - return workspaceBlobs; + return workspaceBlobs } useEffect(() => { if (blobs) { if (breakdownByWorkspace) { - setTableData(workspaces - .map((w: WorkspaceMetadata) => ({ + setTableData( + workspaces.map((w: WorkspaceMetadata) => ({ title: `Workspace: ${w.name}`, - blobs: getWorkspaceBlobs(blobs, w.name, showErrorsOnly) - }))) + blobs: getWorkspaceBlobs(blobs, w.name, showErrorsOnly), + })) + ) } else { setTableData([ { title: `${collectionId}${ingestIdSuffix}`, - blobs: showErrorsOnly ? getFailedBlobs(blobs) : blobs - }]) + blobs: showErrorsOnly ? getFailedBlobs(blobs) : blobs, + }, + ]) } } else { setTableData([]) } - }, [breakdownByWorkspace, blobs, workspaces, ingestIdSuffix, collectionId, showErrorsOnly, setItemIdToExpandedRowMap]) + }, [ + breakdownByWorkspace, + blobs, + workspaces, + ingestIdSuffix, + collectionId, + showErrorsOnly, + setItemIdToExpandedRowMap, + ]) return ( <> @@ -384,4 +568,3 @@ export function IngestionEvents( ) } - From b81c30baee051d609db2de608f70f90dd77f8c7a Mon Sep 17 00:00:00 2001 From: Zeke Hunter-Green Date: Tue, 3 Oct 2023 16:32:32 +0100 Subject: [PATCH 3/4] dashboard shows specific message for blobs in infinite loop --- .../app/services/observability/Models.scala | 3 +- .../observability/PostgresClient.scala | 37 ++--- .../IngestionEvents/IngestionEvents.tsx | 136 +++++++++++------- .../js/components/IngestionEvents/types.ts | 1 + 4 files changed, 104 insertions(+), 73 deletions(-) diff --git a/backend/app/services/observability/Models.scala b/backend/app/services/observability/Models.scala index f5e1f1ec..8005e346 100644 --- a/backend/app/services/observability/Models.scala +++ b/backend/app/services/observability/Models.scala @@ -197,7 +197,8 @@ case class BlobStatus( eventStatuses: List[IngestionEventStatus], extractorStatuses: List[ExtractorStatus], errors: List[IngestionErrorsWithEventType], - mimeTypes: Option[String]) + mimeTypes: Option[String], + infiniteLoop: Boolean) object BlobStatus { implicit val dateWrites = JodaReadWrites.dateWrites implicit val dateReads = JodaReadWrites.dateReads diff --git a/backend/app/services/observability/PostgresClient.scala b/backend/app/services/observability/PostgresClient.scala index 832021c0..7e3ea767 100644 --- a/backend/app/services/observability/PostgresClient.scala +++ b/backend/app/services/observability/PostgresClient.scala @@ -157,7 +157,7 @@ class PostgresClientImpl(postgresConfig: PostgresConfig) extends PostgresClient ie.errors, ie.workspace_name AS "workspaceName", ie.mime_types AS "mimeTypes", - infinite_loop, + ie.infinite_loop AS "infiniteLoop", ARRAY_AGG(DISTINCT blob_metadata.path ) AS paths, (ARRAY_AGG(blob_metadata.file_size))[1] as "fileSize", ARRAY_REMOVE(ARRAY_AGG(extractor_statuses.extractor), NULL) AS extractors, @@ -207,32 +207,33 @@ class PostgresClientImpl(postgresConfig: PostgresConfig) extends PostgresClient """.map(rs => { val eventTypes = rs.array("event_types").getArray.asInstanceOf[Array[String]] BlobStatus( - EventMetadata( - rs.string("blob_id"), - rs.string("ingest_id") - ), - BlobStatus.parsePathsArray(rs.array("paths").getArray().asInstanceOf[Array[String]]), - rs.longOpt("fileSize"), - rs.stringOpt("workspaceName"), + EventMetadata( + rs.string("blob_id"), + rs.string("ingest_id") + ), + BlobStatus.parsePathsArray(rs.array("paths").getArray().asInstanceOf[Array[String]]), + rs.longOpt("fileSize"), + rs.stringOpt("workspaceName"), PostgresHelpers.postgresEpochToDateTime(rs.double("ingest_start")), PostgresHelpers.postgresEpochToDateTime(rs.double("most_recent_event")), IngestionEventStatus.parseEventStatus( rs.array("event_times").getArray.asInstanceOf[Array[java.math.BigDecimal]].map(t =>PostgresHelpers.postgresEpochToDateTime(t.doubleValue)), eventTypes, - rs.array("event_statuses").getArray.asInstanceOf[Array[String]]), - rs.arrayOpt("extractors").map { extractors => - ExtractorStatus.parseDbStatusEvents( - extractors.getArray().asInstanceOf[Array[String]], - rs.array("extractorEventTimes").getArray().asInstanceOf[Array[String]], - rs.array("extractorStatuses").getArray().asInstanceOf[Array[String]] - ) - }.getOrElse(List()), + rs.array("event_statuses").getArray.asInstanceOf[Array[String]] + ), + rs.arrayOpt("extractors").map { extractors => + ExtractorStatus.parseDbStatusEvents( + extractors.getArray().asInstanceOf[Array[String]], + rs.array("extractorEventTimes").getArray().asInstanceOf[Array[String]], + rs.array("extractorStatuses").getArray().asInstanceOf[Array[String]] + ) + }.getOrElse(List()), IngestionError.parseIngestionErrors( rs.array("errors").getArray.asInstanceOf[Array[String]], eventTypes ), - rs.stringOpt("mimeTypes") - + rs.stringOpt("mimeTypes"), + rs.boolean("infiniteLoop") ) } ).list().apply() diff --git a/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx b/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx index 735ecaa8..bdc9ae11 100644 --- a/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx +++ b/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx @@ -49,6 +49,12 @@ const blobStatusIcons = { ), + infiniteLoop: ( + + ), } const SHORT_READABLE_DATE = "DD MMM HH:mm:ss" @@ -125,6 +131,9 @@ const statusIconColumn = { name: "", width: "40", render: (statuses: ExtractorStatus[], row: BlobStatus) => { + if (row.infiniteLoop) { + return blobStatusIcons["infiniteLoop"] + } const totalErrors = row.errors.length const extractorStatus = getBlobStatus(statuses) // if extractors have finished but there are other non-extractor related errors, show an error icon @@ -304,60 +313,79 @@ const renderExpandedRow = (blobStatus: BlobStatus) => { Full file path(s) : {blobStatus.paths.join(", ")}. Ingestion started on {hdate.prettyPrint(blobStatus.ingestStart)}

-

All ingestion events prior to extraction

- - s.eventTime.toISOString() - )} - columns={columns} - /> -

Extraction events

- {blobStatus.mimeTypes && - `This file is of type ${blobStatus.mimeTypes.join(",")}.`}{" "} - Giant has run the following extractors on the file: -
- {blobStatus.extractorStatuses.map((extractorStatus) => { - const numErrors = extractorStatus.statusUpdates.filter( - (su) => su.status === "Failure" - ).length - const numStarted = extractorStatus.statusUpdates.filter( - (su) => su.status === "Started" - ).length - const mostRecent = - extractorStatus.statusUpdates.length > 0 - ? extractorStatus.statusUpdates[ - extractorStatus.statusUpdates.length - 1 - ] - : undefined - return ( - <> -

{extractorStatus.extractorType}

-

- The extractor {extractorStatus.extractorType}{" "} - has been started {numStarted} times. There have - been {numErrors} errors. -
- {mostRecent ? ( - <> - The most recent status event is ' - {mostRecent.status}' which happened on{" "} - {hdate.prettyPrint( - mostRecent.eventTime, - { showTime: true } - )} - - ) : ( - "" - )}{" "} -

- All {extractorStatus.extractorType} events: - {extractorStatusList(extractorStatus)} -

- - ) - })} -
+ {blobStatus.infiniteLoop && ( + <> +

Ingestion failing in infinite loop

+

blob id {blobStatus.metadata.blobId}

+ + )} + {!blobStatus.infiniteLoop && ( + <> +

All ingestion events prior to extraction

+ + s.eventTime.toISOString() + )} + columns={columns} + /> +

Extraction events

+ {blobStatus.mimeTypes && + `This file is of type ${blobStatus.mimeTypes.join( + "," + )}.`}{" "} + Giant has run the following extractors on the file: +
+ {blobStatus.extractorStatuses.map((extractorStatus) => { + const numErrors = + extractorStatus.statusUpdates.filter( + (su) => su.status === "Failure" + ).length + const numStarted = + extractorStatus.statusUpdates.filter( + (su) => su.status === "Started" + ).length + const mostRecent = + extractorStatus.statusUpdates.length > 0 + ? extractorStatus.statusUpdates[ + extractorStatus.statusUpdates.length - + 1 + ] + : undefined + return ( + <> +

{extractorStatus.extractorType}

+

+ The extractor{" "} + {extractorStatus.extractorType} has been + started {numStarted} times. There have + been {numErrors} errors. +
+ {mostRecent ? ( + <> + The most recent status event is + '{mostRecent.status}' which + happened on{" "} + {hdate.prettyPrint( + mostRecent.eventTime, + { showTime: true } + )} + + ) : ( + "" + )}{" "} +

+ All {extractorStatus.extractorType}{" "} + events: + {extractorStatusList(extractorStatus)} +

+ + ) + })} +
+ + )} + {blobStatus.errors.length > 0 && ( <>

Errors encountered processing this file

diff --git a/frontend/src/js/components/IngestionEvents/types.ts b/frontend/src/js/components/IngestionEvents/types.ts index 409463fb..15f3500d 100644 --- a/frontend/src/js/components/IngestionEvents/types.ts +++ b/frontend/src/js/components/IngestionEvents/types.ts @@ -32,6 +32,7 @@ export type BlobStatus = { errors: IngestionErrorWithEventType[]; workspaceName: string; mimeTypes: string[]; + infiniteLoop: boolean; } export type IngestionTable = { From e3a655926f96f2d26780df0f63f9dac2ca5fd427 Mon Sep 17 00:00:00 2001 From: Zeke Hunter-Green Date: Wed, 4 Oct 2023 17:48:14 +0100 Subject: [PATCH 4/4] show status 'infinite loop', looping files appear when 'errors only' is toggled --- .../components/IngestionEvents/IngestionEvents.tsx | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx b/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx index bdc9ae11..a68df949 100644 --- a/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx +++ b/frontend/src/js/components/IngestionEvents/IngestionEvents.tsx @@ -32,7 +32,7 @@ import { } from "./types" import styles from "./IngestionEvents.module.css" -type BlobProgress = "complete" | "completeWithErrors" | "inProgress" +type BlobProgress = "complete" | "completeWithErrors" | "inProgress" | "infiniteLoop" const blobStatusIcons = { complete: ( @@ -70,11 +70,12 @@ const getFailedStatuses = (statuses: ExtractorStatus[]) => const getFailedBlobs = (blobs: BlobStatus[]) => { return blobs.filter((wb) => { - return getFailedStatuses(wb.extractorStatuses).length > 0 + return getFailedStatuses(wb.extractorStatuses).length > 0 || wb.infiniteLoop }) } -const getBlobStatus = (statuses: ExtractorStatus[]): BlobProgress => { +const getBlobStatus = (statuses: ExtractorStatus[], infiniteLoop: boolean): BlobProgress => { + if (infiniteLoop) return "infiniteLoop" const failures = getFailedStatuses(statuses) const inProgress = statuses.filter( (status) => @@ -124,6 +125,7 @@ const blobStatusText = { complete: "Complete", completeWithErrors: "Complete with errors", inProgress: "In progress", + infiniteLoop: "Infinite Loop" } const statusIconColumn = { @@ -135,7 +137,7 @@ const statusIconColumn = { return blobStatusIcons["infiniteLoop"] } const totalErrors = row.errors.length - const extractorStatus = getBlobStatus(statuses) + const extractorStatus = getBlobStatus(statuses, row.infiniteLoop) // if extractors have finished but there are other non-extractor related errors, show an error icon const combinedStatus = extractorStatus === "complete" && totalErrors > 0 @@ -188,8 +190,8 @@ const columns: Array> = [ { field: "extractorStatuses", name: "Status", - render: (statuses: ExtractorStatus[]) => { - return blobStatusText[getBlobStatus(statuses)] + render: (statuses: ExtractorStatus[], row: BlobStatus) => { + return blobStatusText[getBlobStatus(statuses, row.infiniteLoop)] }, }, {