Skip to content

Commit

Permalink
Merge pull request #459 from cloud-gov/feat-build-metrics
Browse files Browse the repository at this point in the history
save build container metrics
  • Loading branch information
drewbo authored May 6, 2024
2 parents e7d3cc5 + d121aeb commit 4bf4ddb
Show file tree
Hide file tree
Showing 10 changed files with 361 additions and 317 deletions.
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
requests==2.31.0
boto3==1.14.20
humanize==4.4.0
stopit==1.1.2
psycopg2==2.9.9
cryptography==42.0.2
Expand Down
74 changes: 46 additions & 28 deletions src/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from datetime import datetime
from stopit import TimeoutException, SignalTimeout as Timeout
import boto3
from functools import partial

from common import CLONE_DIR_PATH

Expand All @@ -14,7 +15,8 @@
)
from log_utils.remote_logs import (
post_build_complete, post_build_error,
post_build_timeout, post_build_processing
post_build_timeout, post_build_processing,
post_metrics,
)

from crypto.decrypt import decrypt
Expand Down Expand Up @@ -92,8 +94,14 @@ def build(

logger = get_logger('main')

def run_step(returncode, msg):
if returncode != 0:
# partially apply the callback url to post_metrics
post_metrics_p = partial(post_metrics, status_callback)

def run_step(step, msg, *args, **kwargs):
try:
step(*args, **kwargs)
except Exception as e:
logger.error(e)
raise StepException(msg)

logger.info(f'Running build for {owner}/{repository}/{branch}')
Expand All @@ -104,7 +112,11 @@ def run_step(returncode, msg):
# start a separate scheduled thread for memory/cpu monitoring
MONITORING_INTERVAL = 30
monitoring_logger = get_logger('monitor')
thread = RepeatTimer(MONITORING_INTERVAL, log_monitoring_metrics, [monitoring_logger])
thread = RepeatTimer(
MONITORING_INTERVAL,
log_monitoring_metrics,
[monitoring_logger, post_metrics_p],
)
thread.start()

# S3 client used in multiple steps
Expand All @@ -119,8 +131,9 @@ def run_step(returncode, msg):
# FETCH
#
run_step(
fetch_repo(owner, repository, branch, github_token),
'There was a problem fetching the repository, see the above logs for details.'
fetch_repo,
'There was a problem fetching the repository, see the above logs for details.',
owner, repository, branch, github_token,
)

commit_sha = fetch_commit_sha(CLONE_DIR_PATH)
Expand All @@ -141,57 +154,62 @@ def run_step(returncode, msg):

if federalist_config.full_clone():
run_step(
update_repo(CLONE_DIR_PATH),
'There was a problem updating the repository, see the above logs for details.'
update_repo,
'There was a problem updating the repository, see the above logs for details.',
CLONE_DIR_PATH,
)

##
# BUILD
#
run_step(
setup_node(federalist_config.should_cache(), bucket, s3_client),
'There was a problem setting up Node, see the above logs for details.'
setup_node,
'There was a problem setting up Node, see the above logs for details.',
federalist_config.should_cache(),
bucket,
s3_client,
post_metrics_p,
)

# Run the npm `federalist` task (if it is defined)
run_step(
run_build_script(
branch, owner, repository, site_prefix, baseurl, decrypted_uevs
),
'There was a problem running the federalist script, see the above logs for details.'
run_build_script,
'There was a problem running the federalist script, see the above logs for details.', # noqa: E501
branch, owner, repository, site_prefix, baseurl, decrypted_uevs,
)

# Run the appropriate build engine based on generator
if generator == 'jekyll':
run_step(
setup_ruby(),
'There was a problem setting up Ruby, see the above logs for details.'
setup_ruby,
'There was a problem setting up Ruby, see the above logs for details.',
federalist_config.should_cache(), post_metrics_p,
)

run_step(
setup_bundler(federalist_config.should_cache(), bucket, s3_client),
'There was a problem setting up Bundler, see the above logs for details.'
setup_bundler,
'There was a problem setting up Bundler, see the above logs for details.',
federalist_config.should_cache(), bucket, s3_client,
)

run_step(
build_jekyll(
branch, owner, repository, site_prefix, baseurl, config, decrypted_uevs
),
'There was a problem running Jekyll, see the above logs for details.'
build_jekyll,
'There was a problem running Jekyll, see the above logs for details.',
branch, owner, repository, site_prefix, baseurl, config, decrypted_uevs,
)

elif generator == 'hugo':
# extra: --hugo-version (not yet used)
run_step(
download_hugo(),
'There was a problem downloading Hugo, see the above logs for details.'
download_hugo,
'There was a problem downloading Hugo, see the above logs for details.',
post_metrics_p
)

run_step(
build_hugo(
branch, owner, repository, site_prefix, baseurl, decrypted_uevs
),
'There was a problem running Hugo, see the above logs for details.'
build_hugo,
'There was a problem running Hugo, see the above logs for details.',
branch, owner, repository, site_prefix, baseurl, decrypted_uevs,
)

elif generator == 'static':
Expand Down
19 changes: 14 additions & 5 deletions src/log_utils/monitoring.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
from threading import Timer
import psutil
from humanize import naturalsize

max_metrics = dict(
cpu=0,
mem=0,
disk=0
)


# https://stackoverflow.com/a/48741004
Expand All @@ -10,8 +15,12 @@ def run(self):
self.function(*self.args, **self.kwargs)


def log_monitoring_metrics(logger):
def log_monitoring_metrics(logger, post_metrics):
disk = psutil.disk_usage("/")
logger.info(f'CPU Usage Percentage: {psutil.cpu_percent()}')
logger.info(f'Memory Usage Percentage: {psutil.virtual_memory().percent}')
logger.info(f'Disk usage: {naturalsize(disk.used)} / {naturalsize(disk.total)}')

# compute new maximum metrics and post to the application
max_metrics["cpu"] = max(psutil.cpu_percent(), max_metrics["cpu"])
max_metrics["mem"] = max(psutil.virtual_memory().percent, max_metrics["mem"])
max_metrics["disk"] = max(disk.used, max_metrics["disk"])

post_metrics(dict(machine=max_metrics))
13 changes: 13 additions & 0 deletions src/log_utils/remote_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import base64
import requests
from typing import Dict

from .common import (STATUS_COMPLETE, STATUS_ERROR, STATUS_PROCESSING)

Expand Down Expand Up @@ -63,3 +64,15 @@ def post_build_timeout(status_callback_url, commit_sha=None):

# Post to the Pages web application with status and output
post_status(status_callback_url, status=STATUS_ERROR, output=output, commit_sha=commit_sha)


def post_metrics(status_callback_url: str, metrics: Dict):
'''
POST build metrics to the metrics API
'''
url = status_callback_url.replace('status', 'metrics')
requests.post(
url,
json=metrics,
timeout=10
)
30 changes: 20 additions & 10 deletions src/runner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pwd
import shlex
import subprocess # nosec
from io import StringIO

NVM_PATH = '~/.nvm/nvm.sh'
RVM_PATH = '/usr/local/rvm/scripts/rvm'
Expand All @@ -13,26 +14,24 @@ def setuser():
os.setuid(pwd.getpwnam('customer').pw_uid)


def run(logger, command, cwd=None, env=None, shell=False, check=False, node=False, ruby=False):
def run(logger, command, cwd=None, env=None, shell=False, check=True, node=False, ruby=False, skip_log=False): # noqa: E501
'''
Run an OS command with provided cwd or env, stream logs to logger, and return the exit code.
Errors that occur BEFORE the command is actually executed are caught and handled here.
Errors encountered by the executed command are NOT caught. Instead a non-zero exit code
will be returned to be handled by the caller.
Errors encountered by the executed command are caught unless `check=False`. In these cases a
non-zero exit code will be returned to be handled by the caller.
See https://docs.python.org/3/library/subprocess.html#popen-constructor for details.
'''

# TODO - refactor to put the appropriate bundler binaries in PATH so this isn't necessary
if ruby:
command = f'source {RVM_PATH} && {command}'
shell = True

# TODO - refactor to put the appropriate node/npm binaries in PATH so this isn't necessary
if node:
command = f'source {NVM_PATH} && nvm use default && {command}'
command = f'source {NVM_PATH} && {command}'
shell = True

if isinstance(command, str) and not shell:
Expand All @@ -41,6 +40,9 @@ def run(logger, command, cwd=None, env=None, shell=False, check=False, node=Fals
# When a shell is needed, use `bash` instead of `sh`
executable = '/bin/bash' if shell else None

# aggregate stdout in case we need to return
output = StringIO()

try:
p = subprocess.Popen( # nosec
command,
Expand All @@ -56,12 +58,20 @@ def run(logger, command, cwd=None, env=None, shell=False, check=False, node=Fals
preexec_fn=setuser
)
while p.poll() is None:
logger.info(p.stdout.readline().strip())
line = p.stdout.readline().strip()
if not skip_log:
logger.info(line)
output.write(line)

logger.info(p.stdout.readline().strip())
line = p.stdout.readline().strip()
if not skip_log:
logger.info(line)
output.write(line)

if check and p.returncode:
raise subprocess.CalledProcessError(p.returncode, command)
if check:
if p.returncode:
raise subprocess.CalledProcessError(p.returncode, command)
return output.getvalue()

return p.returncode

Expand Down
Loading

0 comments on commit 4bf4ddb

Please sign in to comment.