Skip to content

Commit

Permalink
Merge pull request #94 from uptick/dev-882/better-logs-on-failed-deploys
Browse files Browse the repository at this point in the history
DEV-882 feat(server): update deploy failed messages to point to grafana
  • Loading branch information
uptickmetachu authored Nov 26, 2024
2 parents faad7e8 + b6a90b8 commit ecc919a
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 16 deletions.
6 changes: 4 additions & 2 deletions gitops_server/workers/deployer/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
import os
import tempfile
import time
import uuid

from opentelemetry import trace
Expand Down Expand Up @@ -40,7 +41,7 @@ async def post_init_summary(source, username, added_apps, updated_apps, removed_
@tracer.start_as_current_span("post_result")
async def post_result(app: App, result: UpdateAppResult, deployer: "Deployer", **kwargs):
if result["exit_code"] != 0:
deploy_result = await handle_failed_deploy(app, result, deployer)
deploy_result = await handle_failed_deploy(app, result, deployer, **kwargs)
message = (
deploy_result["slack_message"]
or f"Failed to deploy app `{result['app_name']}` for cluster `{settings.CLUSTER_NAME}`:\n>>>{result['output']}"
Expand Down Expand Up @@ -170,6 +171,7 @@ async def update_app_deployment(self, app: App) -> UpdateAppResult | None:

async with self.semaphore:
logger.info(f"Deploying app {app.name!r}.")
from_timestamp = time.time()
if app.chart.type == "git":
span.set_attribute("gitops.chart.type", "git")
assert app.chart.git_repo_url
Expand Down Expand Up @@ -224,7 +226,7 @@ async def update_app_deployment(self, app: App) -> UpdateAppResult | None:

update_result = UpdateAppResult(app_name=app.name, slack_message="", **result)

await post_result(app=app, result=update_result, deployer=self)
await post_result(app=app, result=update_result, deployer=self, from_timestamp=from_timestamp)
return update_result

def calculate_app_deltas(self):
Expand Down
45 changes: 31 additions & 14 deletions gitops_server/workers/deployer/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import logging
import os
import time
from typing import Any

import httpx
from opentelemetry import trace
Expand All @@ -17,10 +19,7 @@
logger = logging.getLogger(__name__)


async def update_issue_from_deployment_url(
app: App,
deployment_url: str,
) -> None:
async def update_issue_from_deployment_url(app: App, deployment_url: str, **kwargs: Any) -> None:
async with httpx.AsyncClient() as client:
headers = github.get_headers()
deployment_response = await client.get(deployment_url, headers=headers)
Expand All @@ -35,10 +34,12 @@ async def update_issue_from_deployment_url(
try:
response = await client.post(issue_url + "/labels", json={"labels": ["NODEPLOY"]}, headers=headers)
response.raise_for_status()
dashboard_url = get_dashboard_url(
workspace_name=app.name, from_timestamp=kwargs.get("from_timestamp"), to_timestamp=time.time()
)
comment = (
":poop: Failed to deploy :poop:\n Applying `NODEPLOY` label to shutdown the server"
" and prevent deploys until it has been fixed.\nCheck migration logs at"
f" https://my.papertrailapp.com/systems/{app.name}-migration/events"
f" and prevent deploys until it has been fixed.\nCheck migration logs at {dashboard_url}"
)
response = await client.post(issue_url + "/comments", json={"body": comment}, headers=headers)
response.raise_for_status()
Expand All @@ -47,11 +48,7 @@ async def update_issue_from_deployment_url(
return


async def handle_successful_deploy(
app: App,
result,
deployer,
) -> UpdateAppResult:
async def handle_successful_deploy(app: App, result, deployer, **kwargs) -> UpdateAppResult:
github_deployment_url = str(app.values.get("github/deployment_url", ""))
await github.update_deployment(
github_deployment_url,
Expand All @@ -64,15 +61,35 @@ async def handle_successful_deploy(
DEFAULT_USER_GROUP = SlackGroup("devops", "", "devops", os.environ.get("DEFAULT_SLACK_USER_GROUP_ID", "S5KVCGSGP"))


async def handle_failed_deploy(app: App, result: UpdateAppResult, deployer) -> UpdateAppResult:
def get_dashboard_url(
workspace_name: str, from_timestamp: float | None = None, to_timestamp: float | None = None
) -> str:
DASHBOARD_URL = "https://grafana.onuptick.com/d/workforce-failed-deploys/workforce-failed-deploys?from={from_timestamp}&to={to_timestamp}&var-workspace={workspace_name}"

if from_timestamp:
from_timestamp_grafana = str(int(from_timestamp * 1000))
else:
from_timestamp_grafana = "now-6h"

if to_timestamp:
to_timestamp_grafana = str(int(to_timestamp * 1000))
else:
to_timestamp_grafana = "now"

return DASHBOARD_URL.format(
workspace_name=workspace_name, from_timestamp=from_timestamp_grafana, to_timestamp=to_timestamp_grafana
)


async def handle_failed_deploy(app: App, result: UpdateAppResult, deployer, **kwargs) -> UpdateAppResult:
github_deployment_url = str(app.values.get("github/deployment_url", ""))
if github_deployment_url:
await github.update_deployment(
github_deployment_url,
status=github.STATUSES.failure,
description=f"Failed to deploy app. {result['output']}",
)
await update_issue_from_deployment_url(app, github_deployment_url)
await update_issue_from_deployment_url(app, github_deployment_url, **kwargs)

email = deployer.author_email

Expand All @@ -83,7 +100,7 @@ async def handle_failed_deploy(app: App, result: UpdateAppResult, deployer) -> U
await find_commiter_slack_user(name=deployer.author_name, email=deployer.author_email) or DEFAULT_USER_GROUP
)
slack_user_msg = f" {slack_user} " if slack_user else ""
log_msg = f"<https://my.papertrailapp.com/systems/{app.name}-migration/events|(Migration Logs)>"
log_msg = f"<{get_dashboard_url(workspace_name=app.name, from_timestamp=kwargs.get("from_timestamp"), to_timestamp=time.time())}|(Deployment Logs)>"
result["slack_message"] = (
f"Failed to deploy app `{result['app_name']}` for cluster"
f" `{settings.CLUSTER_NAME}` :rotating_light:"
Expand Down

0 comments on commit ecc919a

Please sign in to comment.