Skip to content

Commit

Permalink
Add records for Api app status
Browse files Browse the repository at this point in the history
fixes #3175
  • Loading branch information
mdellweg committed Sep 2, 2023
1 parent 6c0389f commit b30ca00
Show file tree
Hide file tree
Showing 22 changed files with 408 additions and 174 deletions.
1 change: 1 addition & 0 deletions CHANGES/3175.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add status records for api apps.
3 changes: 3 additions & 0 deletions CHANGES/3175.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Starting with this release, it is highly recommended to start the api and content processes by
the newly provided entrypoints (``pulpcore-api`` and ``pulpcore-content``) instead of calling
``gunicorn`` directly.
4 changes: 2 additions & 2 deletions docs/components.rst
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ and set the following environment variables:
.. note::
A quick example on how it would run using this method::

$ /usr/local/bin/opentelemetry-instrument --service_name pulp-api /usr/local/bin/gunicorn
pulpcore.app.wsgi:application --bind "127.0.0.1:24817" --name pulp-api --workers 4 --access-logfile -
$ /usr/local/bin/opentelemetry-instrument --service_name pulp-api /usr/local/bin/pulpcore-api \
--bind "127.0.0.1:24817" --name pulp-api --workers 4 --access-logfile -

You will need to run an instance of OpenTelemetry Collector. You can read more about the `OpenTelemetry
Collector here <https://opentelemetry.io/docs/collector/>`_.
4 changes: 2 additions & 2 deletions docs/contributing/platform-api/tasking.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ pulp.tasking.constants

.. automodule:: pulpcore.tasking.constants

pulp.tasking.pulpcore_worker
pulp.tasking.worker
----------------------------

.. automodule:: pulpcore.tasking.pulpcore_worker
.. automodule:: pulpcore.tasking.worker

pulp.tasking.storage
--------------------
Expand Down
141 changes: 141 additions & 0 deletions pulpcore/app/entrypoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
from contextvars import ContextVar
from logging import getLogger
import os
import socket

import click
import django
from django.conf import settings
from django.db import connection
from django.db.utils import InterfaceError, OperationalError
from gunicorn.workers.sync import SyncWorker
from gunicorn.app.base import BaseApplication

from pulpcore.app.apps import pulp_plugin_configs

logger = getLogger(__name__)


using_pulp_api_worker = ContextVar("using_pulp_api_worker", default=False)


class PulpApiWorker(SyncWorker):
def notify(self):
super().notify()
self.heartbeat()

def heartbeat(self):
try:
self.api_app_status, created = self.ApiAppStatus.objects.get_or_create(
name=self.name, defaults={"versions": self.versions}
)

if not created:
self.api_app_status.save_heartbeat()

if self.api_app_status.versions != self.versions:
self.api_app_status.versions = self.versions
self.api_app_status.save(update_fields=["versions"])

logger.debug(self.beat_msg)
except (InterfaceError, OperationalError):
connection.close_if_unusable_or_obsolete()
logger.info(self.fail_beat_msg)

def init_process(self):
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pulpcore.app.settings")
django.setup()
from pulpcore.app.models import ApiAppStatus

if settings.API_APP_TTL < 2 * self.timeout:
logger.warn(
"API_APP_TTL (%s) is smaller than half the gunicorn timeout (%s). "
"You may experience workers wrongly reporting as missing",
settings.API_APP_TTL,
self.timeout,
)

self.ApiAppStatus = ApiAppStatus
self.api_app_status = None

self.name = "{pid}@{hostname}".format(pid=self.pid, hostname=socket.gethostname())
self.versions = {app.label: app.version for app in pulp_plugin_configs()}
self.beat_msg = (
"Api App '{name}' heartbeat written, sleeping for '{interarrival}' seconds".format(
name=self.name, interarrival=self.timeout
)
)
self.fail_beat_msg = (
"Api App '{name}' failed to write a heartbeat to the database, sleeping for "
"'{interarrival}' seconds."
).format(name=self.name, interarrival=self.timeout)
using_pulp_api_worker.set(True)
super().init_process()

def run(self):
try:
super().run()
finally:
# cleanup
if self.api_app_status:
self.api_app_status.delete()


class PulpcoreApiApplication(BaseApplication):
def __init__(self, options):
self.options = options or {}
super().__init__()

def load_config(self):
[
self.cfg.set(key.lower(), value)
for key, value in self.options.items()
if value is not None
]
self.cfg.set("default_proc_name", "pulpcore-api")
self.cfg.set("worker_class", PulpApiWorker.__module__ + "." + PulpApiWorker.__qualname__)

def load(self):
import pulpcore.app.wsgi

return pulpcore.app.wsgi.application


# Gunicorn options are adapted from:
# https://github.com/benoitc/gunicorn/blob/master/gunicorn/config.py


@click.option("--bind", "-b", default="[::]:24817")
@click.option("--workers", "-w", type=int)
# @click.option("--threads", "-w", type=int) # We don't use a threaded worker...
@click.option("--name", "-n", "proc_name")
@click.option("--timeout", "-t", type=int)
@click.option("--graceful-timeout", type=int)
@click.option("--keep-alive", "keepalive", type=int)
@click.option("--limit-request-line", type=int)
@click.option("--limit-request-fields", type=int)
@click.option("--limit-request-field-size", type=int)
@click.option("--max-requests", type=int)
@click.option("--access-logfile", "accesslog")
@click.option(
"--access-logformat",
"access_log_format",
default=(
"pulp [%({correlation-id}o)s]: "
'%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"',
),
)
@click.option("--error-logfile", "--log-file", "errorlog")
@click.option(
"--log-level", "loglevel", type=click.Choice(["debug", "info", "warning", "error", "critical"])
)
@click.option("--reload/--no-reload")
@click.option("--reload-engine", type=click.Choice(["auto", "poll", "inotify"]))
@click.option("--reload-extra-file", "reload_extra_files", multiple=True)
@click.option("--reuse-port/--no-reuse-port")
@click.option("--chdir")
@click.option("--user", "-u")
@click.option("--group", "-g")
@click.command()
def main(**options):
PulpcoreApiApplication(options).run()
38 changes: 38 additions & 0 deletions pulpcore/app/migrations/0110_apiappstatus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by Django 4.2.1 on 2023-08-21 14:37

import django.contrib.postgres.fields.hstore
from django.db import migrations, models
import django_lifecycle.mixins
import pulpcore.app.models.base


class Migration(migrations.Migration):
dependencies = [
("core", "0109_contentartifact_relative_path_index"),
]

operations = [
migrations.CreateModel(
name="ApiAppStatus",
fields=[
(
"pulp_id",
models.UUIDField(
default=pulpcore.app.models.base.pulp_uuid,
editable=False,
primary_key=True,
serialize=False,
),
),
("pulp_created", models.DateTimeField(auto_now_add=True)),
("pulp_last_updated", models.DateTimeField(auto_now=True, null=True)),
("name", models.TextField(db_index=True, unique=True)),
("last_heartbeat", models.DateTimeField(auto_now=True)),
("versions", django.contrib.postgres.fields.hstore.HStoreField(default=dict)),
],
options={
"abstract": False,
},
bases=(django_lifecycle.mixins.LifecycleModelMixin, models.Model),
),
]
2 changes: 1 addition & 1 deletion pulpcore/app/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
RepositoryVersionContentDetails,
)

from .status import ContentAppStatus
from .status import ApiAppStatus, ContentAppStatus

from .task import (
CreatedResource,
Expand Down
83 changes: 59 additions & 24 deletions pulpcore/app/models/status.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,35 +11,55 @@
from pulpcore.app.models import BaseModel


class ContentAppStatusManager(models.Manager):
class AppStatusManager(models.Manager):
def online(self):
"""
Returns a queryset of ``ContentAppStatus`` objects that are online.
Returns a queryset of objects that are online.
To be considered 'online', a ContentAppStatus must have a heartbeat timestamp within
``settings.CONTENT_APP_TTL`` from now.
To be considered 'online', a AppStatus must have a heartbeat timestamp within
``self.model.APP_TTL`` from now.
Returns:
:class:`django.db.models.query.QuerySet`: A query set of the ``ContentAppStatus``
:class:`django.db.models.query.QuerySet`: A query set of the
objects which are considered 'online'.
"""
now = timezone.now()
age_threshold = now - timedelta(seconds=settings.CONTENT_APP_TTL)

age_threshold = timezone.now() - self.model.APP_TTL
return self.filter(last_heartbeat__gte=age_threshold)

def missing(self, age=None):
"""
Returns a queryset of workers meeting the criteria to be considered 'missing'
To be considered missing, a AppsStatus must have a stale timestamp. By default, stale is
defined here as longer than the ``self.model.APP_TTL``, or you can specify age as a
timedelta.
Args:
age (datetime.timedelta): Objects who have heartbeats older than this time interval are
considered missing.
Returns:
:class:`django.db.models.query.QuerySet`: A query set of the objects objects which
are considered to be 'missing'.
"""
age_threshold = timezone.now() - (age or self.model.APP_TTL)
return self.filter(last_heartbeat__lt=age_threshold)


class ContentAppStatus(BaseModel):
class BaseAppStatus(BaseModel):
"""
Represents a Content App Status
Represents an AppStatus.
This class is abstract. Subclasses must define `APP_TTL` as a `timedelta`.
Fields:
name (models.TextField): The name of the content app
last_heartbeat (models.DateTimeField): A timestamp of this worker's last heartbeat
name (models.TextField): The name of the app.
last_heartbeat (models.DateTimeField): A timestamp of this worker's last heartbeat.
versions (HStoreField): A dictionary with versions of all pulp components.
"""

objects = ContentAppStatusManager()
objects = AppStatusManager()

name = models.TextField(db_index=True, unique=True)
last_heartbeat = models.DateTimeField(auto_now=True)
Expand All @@ -48,29 +68,25 @@ class ContentAppStatus(BaseModel):
@property
def online(self):
"""
Whether a content app can be considered 'online'
Whether an app can be considered 'online'
To be considered 'online', a content app must have a heartbeat timestamp more recent than
the ``CONTENT_APP_TTL`` setting.
To be considered 'online', an app must have a timestamp more recent than ``self.APP_TTL``.
Returns:
bool: True if the content app is considered online, otherwise False
bool: True if the app is considered online, otherwise False
"""
now = timezone.now()
age_threshold = now - timedelta(seconds=settings.CONTENT_APP_TTL)

age_threshold = timezone.now() - self.APP_TTL
return self.last_heartbeat >= age_threshold

@property
def missing(self):
"""
Whether a Content App can be considered 'missing'
Whether an app can be considered 'missing'
To be considered 'missing', a Content App must have a timestamp older than
``SETTINGS.CONTENT_APP_TTL``.
To be considered 'missing', an App must have a timestamp older than ``self.APP_TTL``.
Returns:
bool: True if the content app is considered missing, otherwise False
bool: True if the app is considered missing, otherwise False
"""
return not self.online

Expand All @@ -85,3 +101,22 @@ def save_heartbeat(self):
only update an existing database record.
"""
self.save(update_fields=["last_heartbeat"])

class Meta:
abstract = True


class ApiAppStatus(BaseAppStatus):
"""
Represents a Api App Status
"""

APP_TTL = timedelta(seconds=settings.API_APP_TTL)


class ContentAppStatus(BaseAppStatus):
"""
Represents a Content App Status
"""

APP_TTL = timedelta(seconds=settings.CONTENT_APP_TTL)
Loading

0 comments on commit b30ca00

Please sign in to comment.