Skip to content

Commit

Permalink
Merge pull request #1129 from morucci/1112
Browse files Browse the repository at this point in the history
Ensure Github PR fetching repo not found non fatal for stream
  • Loading branch information
morucci authored Dec 23, 2024
2 parents d2232bf + 99ef7f5 commit 0af1dfe
Show file tree
Hide file tree
Showing 16 changed files with 61 additions and 44 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,15 @@ jobs:
- name: Set Monocle compose image to latest
run: "echo \"COMPOSE_MONOCLE_VERSION=latest\" > .env"
- name: Start Monocle compose
run: docker-compose up -d
run: docker compose up -d
- name: Wait for services to start
run: sleep 45
- name: Display docker-compose ps
run: docker-compose ps
run: docker compose ps
- name: Display docker-compose logs
run: docker-compose logs
run: docker compose logs
- name: Check services are running
run: "test -z \"$(sudo docker-compose ps -a | grep Exit)\""
run: "test -z \"$(sudo docker compose ps -a | grep Exit)\""
- name: Check api service through nginx
run: "curl -s --fail -H 'Content-type: application/json' http://localhost:8080/api/2/get_workspaces -d '{}' | grep 'workspaces'"
- name: Check web service to fetch web app
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/mkCI.dhall
Original file line number Diff line number Diff line change
Expand Up @@ -91,23 +91,23 @@ in { GithubActions
}
, GithubActions.Step::{
, name = Some "Start Monocle compose"
, run = Some "docker-compose up -d"
, run = Some "docker compose up -d"
}
, GithubActions.Step::{
, name = Some "Wait for services to start"
, run = Some "sleep 45"
}
, GithubActions.Step::{
, name = Some "Display docker-compose ps"
, run = Some "docker-compose ps"
, run = Some "docker compose ps"
}
, GithubActions.Step::{
, name = Some "Display docker-compose logs"
, run = Some "docker-compose logs"
, run = Some "docker compose logs"
}
, GithubActions.Step::{
, name = Some "Check services are running"
, run = Some "test -z \"\$(sudo docker-compose ps -a | grep Exit)\""
, run = Some "test -z \"\$(sudo docker compose ps -a | grep Exit)\""
}
, GithubActions.Step::{
, name = Some "Check api service through nginx"
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/publish-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ jobs:
- name: Set Monocle compose image to latest
run: "echo \"COMPOSE_MONOCLE_VERSION=latest\" > .env"
- name: Start Monocle compose
run: docker-compose up -d
run: docker compose up -d
- name: Wait for services to start
run: sleep 45
- name: Display docker-compose ps
run: docker-compose ps
run: docker compose ps
- name: Display docker-compose logs
run: docker-compose logs
run: docker compose logs
- name: Check services are running
run: "test -z \"$(sudo docker-compose ps -a | grep Exit)\""
run: "test -z \"$(sudo docker compose ps -a | grep Exit)\""
- name: Check api service through nginx
run: "curl -s --fail -H 'Content-type: application/json' http://localhost:8080/api/2/get_workspaces -d '{}' | grep 'workspaces'"
- name: Check web service to fetch web app
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/publish-tag.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ jobs:
- name: Set Monocle compose image to latest
run: "echo \"COMPOSE_MONOCLE_VERSION=latest\" > .env"
- name: Start Monocle compose
run: docker-compose up -d
run: docker compose up -d
- name: Wait for services to start
run: sleep 45
- name: Display docker-compose ps
run: docker-compose ps
run: docker compose ps
- name: Display docker-compose logs
run: docker-compose logs
run: docker compose logs
- name: Check services are running
run: "test -z \"$(sudo docker-compose ps -a | grep Exit)\""
run: "test -z \"$(sudo docker compose ps -a | grep Exit)\""
- name: Check api service through nginx
run: "curl -s --fail -H 'Content-type: application/json' http://localhost:8080/api/2/get_workspaces -d '{}' | grep 'workspaces'"
- name: Check web service to fetch web app
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
# Changelog

All notable changes to this project will be documented in this file.

## [master]

### Added
### Changed
### Removed
### Fixed

- [crawler] github PR crawler raise a fatal StreamError when a repository is not found (#1112)

## [1.11.1] - 2024-02-13

### Added
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ A GitHub provider settings
`github_organization` is the only mandatory key. If `github_repositories` is not specified then
the crawler will crawl the whole organization repositories. If specified then it will crawl only
the listed repositories. To crawl repositories from a personnal GitHub account, you need to set
`github_organization` to you account name and list repositories under the `github_repositories` key.
`github_organization` to the account name and list repositories under the `github_repositories` key.

`github_url` might be specified in case of an alternate url. Default is "https://github.com/api/graphql".

Expand Down
1 change: 1 addition & 0 deletions src/Lentille.hs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ data LentilleErrorKind
| RequestError GraphQLError
| RateLimitInfoError GraphQLError
| PartialErrors Value
| EntityRemoved
deriving (Show, Generic, ToJSON)

yieldStreamError :: TimeEffect :> es => LentilleErrorKind -> LentilleStream es a
Expand Down
4 changes: 2 additions & 2 deletions src/Lentille/GitHub/Issues.hs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ transformRateLimit (GetLinkedIssuesRateLimit used remaining (DateTime resetAtTex
Just resetAt -> RateLimit {..}
Nothing -> error $ "Unable to parse the resetAt date string: " <> resetAtText

transformResponse :: GetLinkedIssues -> (PageInfo, Maybe RateLimit, [Text], [TaskData])
transformResponse :: GetLinkedIssues -> (PageInfo, Maybe RateLimit, GraphResponseResult, [TaskData])
transformResponse searchResult =
case searchResult of
GetLinkedIssues
Expand All @@ -110,7 +110,7 @@ transformResponse searchResult =
let newTaskDataE = concatMap mkTaskData issues
in ( PageInfo hasNextPage' endCursor' (Just issueCount')
, transformRateLimit <$> rateLimitM
, lefts newTaskDataE
, UnknownErr $ lefts newTaskDataE
, rights newTaskDataE
)
respOther -> error ("Invalid response: " <> show respOther)
Expand Down
6 changes: 3 additions & 3 deletions src/Lentille/GitHub/Organization.hs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ transformRateLimit (GetProjectsRateLimit used remaining (DateTime resetAtText))
Just resetAt -> RateLimit {..}
Nothing -> error $ "Unable to parse the resetAt date string: " <> resetAtText

transformResponse :: GetProjects -> (PageInfo, Maybe RateLimit, [Text], [Project])
transformResponse :: GetProjects -> (PageInfo, Maybe RateLimit, GraphResponseResult, [Project])
transformResponse result =
case result of
GetProjects
Expand All @@ -52,13 +52,13 @@ transformResponse result =
) ->
( PageInfo hasNextPage endCursor (Just totalCount)
, transformRateLimit <$> rateLimitM
, []
, NoErr
, getRepos orgRepositories
)
_anyOtherResponse ->
( PageInfo False Nothing Nothing
, Nothing
, ["Unknown GetProjects response: " <> show result]
, UnknownErr ["Unknown GetProjects response: " <> show result]
, []
)
where
Expand Down
14 changes: 10 additions & 4 deletions src/Lentille/GitHub/PullRequests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ transformResponse ::
(Text -> Maybe Config.IdentUG) ->
-- The response payload
GetProjectPullRequests ->
(PageInfo, Maybe RateLimit, [Text], [Changes])
(PageInfo, Maybe RateLimit, GraphResponseResult, [Changes])
transformResponse host identCB result = do
let process resp rateLimit = case resp of
( Just
Expand All @@ -79,11 +79,17 @@ transformResponse host identCB result = do
)
) ->
let totalCount = Just totalCount'
in (PageInfo {..}, rateLimit, [], mapMaybe transPR (catMaybes projectPRs))
_anyOtherResponse ->
in (PageInfo {..}, rateLimit, NoErr, mapMaybe transPR (catMaybes projectPRs))
Just _ ->
( PageInfo False Nothing Nothing
, Nothing
, ["Unknown GetProjectPullRequests response: " <> show result]
, UnknownErr ["Unknown GetProjectPullRequests response: " <> show result]
, []
)
Nothing ->
( PageInfo False Nothing Nothing
, Nothing
, NoRepo
, []
)
case result of
Expand Down
6 changes: 3 additions & 3 deletions src/Lentille/GitHub/UserPullRequests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ transformResponse ::
(Text -> Maybe Config.IdentUG) ->
-- The response payload
GetUserPullRequests ->
(PageInfo, Maybe RateLimit, [Text], [Changes])
(PageInfo, Maybe RateLimit, GraphResponseResult, [Changes])
transformResponse host identCB result = do
let process resp rateLimit = case resp of
( Just
Expand All @@ -76,11 +76,11 @@ transformResponse host identCB result = do
)
) ->
let totalCount = Just totalCount'
in (PageInfo {..}, rateLimit, [], mapMaybe transPR (catMaybes projectPRs))
in (PageInfo {..}, rateLimit, NoErr, mapMaybe transPR (catMaybes projectPRs))
_anyOtherResponse ->
( PageInfo False Nothing Nothing
, Nothing
, ["Unknown GetUserPullRequests response: " <> show result]
, UnknownErr ["Unknown GetUserPullRequests response: " <> show result]
, []
)
case result of
Expand Down
4 changes: 2 additions & 2 deletions src/Lentille/GitHub/Watching.hs
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ transformResponse result = do
Nothing -> error $ "Unable to parse the resetAt date string: " <> resetAtText
in ( Lentille.GraphQL.PageInfo hasNextPage endCursor (Just totalCount)
, Just rateLimit
, []
, Lentille.GraphQL.NoErr
, getRepos watchedRepositories
)
_anyOtherResponse ->
( Lentille.GraphQL.PageInfo False Nothing Nothing
, Nothing
, ["Unknown GetWatched response: " <> show result]
, Lentille.GraphQL.UnknownErr ["Unknown GetWatched response: " <> show result]
, []
)
where
Expand Down
6 changes: 3 additions & 3 deletions src/Lentille/GitLab/Group.hs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ streamGroupProjects client fullPath =
where
mkArgs _ = GetGroupProjectsArgs (ID fullPath)

transformResponse :: GetGroupProjects -> (PageInfo, Maybe RateLimit, [Text], [Project])
transformResponse :: GetGroupProjects -> (PageInfo, Maybe RateLimit, GraphResponseResult, [Project])
transformResponse result =
case result of
GetGroupProjects
Expand All @@ -57,13 +57,13 @@ transformResponse result =
) ->
( PageInfo hasNextPage endCursor Nothing
, Nothing
, []
, NoErr
, getFullPath <$> cleanMaybeMNodes nodes
)
_anyOtherResponse ->
( PageInfo False Nothing Nothing
, Nothing
, ["Unknown GetGroupProjects response: " <> show result]
, UnknownErr ["Unknown GetGroupProjects response: " <> show result]
, []
)
where
Expand Down
6 changes: 3 additions & 3 deletions src/Lentille/GitLab/MergeRequests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ transformResponse ::
-- A callback to get Ident ID from an alias
(Text -> Maybe Config.IdentUG) ->
GetProjectMergeRequests ->
(PageInfo, Maybe RateLimit, [Text], [(Change, [ChangeEvent])])
(PageInfo, Maybe RateLimit, GraphResponseResult, [(Change, [ChangeEvent])])
transformResponse host getIdentIdCB result =
case result of
GetProjectMergeRequests
Expand All @@ -144,13 +144,13 @@ transformResponse host getIdentIdCB result =
) ->
( PageInfo hasNextPage endCursor (Just count)
, Nothing
, []
, NoErr
, extract shortName fullName <$> catMaybes nodes
)
_anyOtherResponse ->
( PageInfo False Nothing Nothing
, Nothing
, ["Unknown GetProjectMergeRequests response: " <> show result]
, UnknownErr ["Unknown GetProjectMergeRequests response: " <> show result]
, []
)
where
Expand Down
15 changes: 10 additions & 5 deletions src/Lentille/GraphQL.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ module Lentille.GraphQL (
GraphResponse,
GraphResp,
GraphError,
GraphResponseResult (..),
RateLimit (..),
PageInfo (..),
StreamFetchOptParams (..),
Expand All @@ -44,7 +45,9 @@ import Monocle.Effects

type GraphEffects es = (LoggerEffect :> es, HttpEffect :> es, PrometheusEffect :> es, TimeEffect :> es, Retry :> es, Concurrent :> es, Fail :> es)

type GraphResponse a = (PageInfo, Maybe RateLimit, [Text], a)
data GraphResponseResult = NoRepo | UnknownErr [Text] | NoErr

type GraphResponse a = (PageInfo, Maybe RateLimit, GraphResponseResult, a)

-------------------------------------------------------------------------------
-- Constants
Expand Down Expand Up @@ -201,7 +204,7 @@ streamFetch ::
(Maybe Int -> Maybe Text -> Args a) ->
StreamFetchOptParams es a ->
-- | query result adapter
(a -> (PageInfo, Maybe RateLimit, [Text], [b])) ->
(a -> (PageInfo, Maybe RateLimit, GraphResponseResult, [b])) ->
LentilleStream es b
streamFetch client@GraphClient {..} mkArgs StreamFetchOptParams {..} transformResponse = startFetch
where
Expand Down Expand Up @@ -268,16 +271,18 @@ streamFetch client@GraphClient {..} mkArgs StreamFetchOptParams {..} transformRe
Left err ->
-- Yield the error and stop the stream
yieldStreamError $ RequestError (mkGraphQLError err)
Right (RequestResult mPartial (pageInfo, rateLimitM, decodingErrors, xs)) -> do
Right (RequestResult mPartial (pageInfo, rateLimitM, dynErrors, xs)) -> do
-- Log crawling status
logStep pageInfo rateLimitM xs totalFetched

forM_ mPartial \partial -> do
lift $ logWarn "Fetched partial result" ["err" .= partial]
yieldStreamError $ PartialErrors partial

unless (null decodingErrors) do
yieldStreamError $ DecodeError decodingErrors
_ <- case dynErrors of
UnknownErr decodingErrors -> yieldStreamError $ DecodeError decodingErrors
NoRepo -> yieldStreamError EntityRemoved
NoErr -> pure ()

-- Yield the results
S.each (Right <$> xs)
Expand Down
6 changes: 4 additions & 2 deletions src/Macroscope/Worker.hs
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,10 @@ processStream logFunc postFunc = go (0 :: Word) [] []
let addStreamError :: [Maybe ProcessError] -> [Maybe ProcessError]
addStreamError = case edoc of
Right _ -> id
-- This is likely an error we can't recover, so don't add stream error
-- This is likely an error we can recover, so don't add stream error
Left (LentilleError _ (PartialErrors _)) -> id
-- Every other 'LentilleError' are fatal$
Left (LentilleError _ EntityRemoved) -> id
-- Every other 'LentilleError' are fatal
Left err -> (Just (StreamError err) :)
let newAcc = doc : acc
if count == 499
Expand All @@ -132,6 +133,7 @@ processStream logFunc postFunc = go (0 :: Word) [] []
RequestError e -> ("graph", encodeJSON e)
RateLimitInfoError e -> ("rate-limit-info", encodeJSON e)
PartialErrors es -> ("partial", encodeJSON es)
EntityRemoved -> ("entity-removed", encodeJSON ("null" :: Text))

processBatch :: [DocumentType] -> Eff es (Maybe ProcessError)
processBatch [] = pure Nothing
Expand Down

0 comments on commit 0af1dfe

Please sign in to comment.