Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[24.1] Fix wrong celery_app config on job and workflow handlers #18819

Conversation

mvdbeek
Copy link
Member

@mvdbeek mvdbeek commented Sep 16, 2024

Fixes #18727 / https://sentry.galaxyproject.org/share/issue/8091a452f32645b89b37976362779532/:

Message
(59965185/49195886) Job wrapper finish method failed
Stack Trace

Newest

OperationalError: unable to open database file
  File "sqlalchemy/engine/base.py", line 146, in __init__
    self._dbapi_connection = engine.raw_connection()
  File "sqlalchemy/engine/base.py", line 3300, in raw_connection
    return self.pool.connect()
  File "sqlalchemy/pool/base.py", line 449, in connect
    return _ConnectionFairy._checkout(self)
  File "sqlalchemy/pool/base.py", line 1263, in _checkout
    fairy = _ConnectionRecord.checkout(pool)
  File "sqlalchemy/pool/base.py", line 712, in checkout
    rec = pool._do_get()
  File "sqlalchemy/pool/impl.py", line 179, in _do_get
    with util.safe_reraise():
  File "sqlalchemy/util/langhelpers.py", line 146, in __exit__
    raise exc_value.with_traceback(exc_tb)
  File "sqlalchemy/pool/impl.py", line 177, in _do_get
    return self._create_connection()
  File "sqlalchemy/pool/base.py", line 390, in _create_connection
    return _ConnectionRecord(self)
  File "sqlalchemy/pool/base.py", line 674, in __init__
    self.__connect()
  File "sqlalchemy/pool/base.py", line 900, in __connect
    with util.safe_reraise():
  File "sqlalchemy/util/langhelpers.py", line 146, in __exit__
    raise exc_value.with_traceback(exc_tb)
  File "sqlalchemy/pool/base.py", line 896, in __connect
    self.dbapi_connection = connection = pool._invoke_creator(self)
  File "sqlalchemy/engine/create.py", line 643, in connect
    return dialect.connect(*cargs, **cparams)
  File "sqlalchemy/engine/default.py", line 620, in connect
    return self.loaded_dbapi.connect(*cargs, **cparams)
OperationalError: (sqlite3.OperationalError) unable to open database file
(Background on this error at: https://sqlalche.me/e/20/e3q8)
  File "kombu/connection.py", line 472, in _reraise_as_library_errors
    yield
  File "kombu/connection.py", line 556, in _ensured
    return fun(*args, **kwargs)
  File "kombu/messaging.py", line 202, in _publish
    [maybe_declare(entity) for entity in declare]
  File "kombu/messaging.py", line 202, in <listcomp>
    [maybe_declare(entity) for entity in declare]
  File "kombu/messaging.py", line 107, in maybe_declare
    return maybe_declare(entity, self.channel, retry, **retry_policy)
  File "kombu/common.py", line 113, in maybe_declare
    return _maybe_declare(entity, channel)
  File "kombu/common.py", line 153, in _maybe_declare
    entity.declare(channel=channel)
  File "kombu/entity.py", line 617, in declare
    self._create_queue(nowait=nowait, channel=channel)
  File "kombu/entity.py", line 626, in _create_queue
    self.queue_declare(nowait=nowait, passive=False, channel=channel)
  File "kombu/entity.py", line 655, in queue_declare
    ret = channel.queue_declare(
  File "kombu/transport/virtual/base.py", line 537, in queue_declare
    self._new_queue(queue, **kwargs)
  File "kombu/transport/sqlalchemy/__init__.py", line 159, in _new_queue
    self._get_or_create(queue)
  File "kombu/transport/sqlalchemy/__init__.py", line 138, in _get_or_create
    obj = self.session.query(self.queue_cls) \
  File "kombu/transport/sqlalchemy/__init__.py", line 133, in session
    _, Session = self._open()
  File "kombu/transport/sqlalchemy/__init__.py", line 125, in _open
    metadata.create_all(engine)
  File "sqlalchemy/sql/schema.py", line 5857, in create_all
    bind._run_ddl_visitor(
  File "sqlalchemy/engine/base.py", line 3250, in _run_ddl_visitor
    with self.begin() as conn:
  File "contextlib.py", line 137, in __enter__
    return next(self.gen)
  File "sqlalchemy/engine/base.py", line 3240, in begin
    with self.connect() as conn:
  File "sqlalchemy/engine/base.py", line 3276, in connect
    return self._connection_cls(self)
  File "sqlalchemy/engine/base.py", line 148, in __init__
    Connection._handle_dbapi_exception_noconnection(
  File "sqlalchemy/engine/base.py", line 2440, in _handle_dbapi_exception_noconnection
    raise sqlalchemy_exception.with_traceback(exc_info[2]) from e
  File "sqlalchemy/engine/base.py", line 146, in __init__
    self._dbapi_connection = engine.raw_connection()
  File "sqlalchemy/engine/base.py", line 3300, in raw_connection
    return self.pool.connect()
  File "sqlalchemy/pool/base.py", line 449, in connect
    return _ConnectionFairy._checkout(self)
  File "sqlalchemy/pool/base.py", line 1263, in _checkout
    fairy = _ConnectionRecord.checkout(pool)
  File "sqlalchemy/pool/base.py", line 712, in checkout
    rec = pool._do_get()
  File "sqlalchemy/pool/impl.py", line 179, in _do_get
    with util.safe_reraise():
  File "sqlalchemy/util/langhelpers.py", line 146, in __exit__
    raise exc_value.with_traceback(exc_tb)
  File "sqlalchemy/pool/impl.py", line 177, in _do_get
    return self._create_connection()
  File "sqlalchemy/pool/base.py", line 390, in _create_connection
    return _ConnectionRecord(self)
  File "sqlalchemy/pool/base.py", line 674, in __init__
    self.__connect()
  File "sqlalchemy/pool/base.py", line 900, in __connect
    with util.safe_reraise():
  File "sqlalchemy/util/langhelpers.py", line 146, in __exit__
    raise exc_value.with_traceback(exc_tb)
  File "sqlalchemy/pool/base.py", line 896, in __connect
    self.dbapi_connection = connection = pool._invoke_creator(self)
  File "sqlalchemy/engine/create.py", line 643, in connect
    return dialect.connect(*cargs, **cparams)
  File "sqlalchemy/engine/default.py", line 620, in connect
    return self.loaded_dbapi.connect(*cargs, **cparams)
OperationalError: (sqlite3.OperationalError) unable to open database file
(Background on this error at: https://sqlalche.me/e/20/e3q8)
  File "galaxy/jobs/runners/__init__.py", line 677, in _finish_or_resubmit_job
    job_wrapper.finish(
  File "galaxy/jobs/__init__.py", line 2070, in finish
    task_wrapper.delay()
  File "celery/canvas.py", line 353, in delay
    return self.apply_async(partial_args, partial_kwargs)
  File "celery/canvas.py", line 400, in apply_async
    return _apply(args, kwargs, **options)
  File "celery/app/task.py", line 594, in apply_async
    return app.send_task(
  File "celery/app/base.py", line 801, in send_task
    amqp.send_task_message(P, name, message, **options)
  File "celery/app/amqp.py", line 518, in send_task_message
    ret = producer.publish(
  File "kombu/messaging.py", line 186, in publish
    return _publish(
  File "kombu/connection.py", line 553, in _ensured
    with self._reraise_as_library_errors():
  File "contextlib.py", line 155, in __exit__
    self.gen.throw(typ, value, traceback)
  File "kombu/connection.py", line 476, in _reraise_as_library_errors
    raise ConnectionError(str(exc)) from exc

The issue was that we aren't setting GALAXY_CONFIG_FILE if the job and workflow handlers are started with -c path/to/galaxy.yml, and the celery config heuristic isn't looking at sys.argv.

(Please replace this header with a description of your pull request. Please include BOTH what you did and why you made the changes. The "why" may simply be citing a relevant Galaxy issue.)
(If fixing a bug, please add any relevant error or traceback)
(For UI components, it is recommended to include screenshots or screencasts)

How to test the changes?

(Select all options that apply)

  • I've included appropriate automated tests.
  • This is a refactoring of components with existing test coverage.
  • Instructions for manual testing are as follows:
    1. [add testing steps and prerequisites here if you didn't write automated tests covering all your changes]

License

  • I agree to license these and all my past contributions to the core galaxy codebase under the MIT license.

@mvdbeek mvdbeek added kind/bug area/configuration Galaxy's configuration system labels Sep 16, 2024
@github-actions github-actions bot added this to the 24.1 milestone Sep 16, 2024
Fixes galaxyproject#18727 /
https://sentry.galaxyproject.org/share/issue/8091a452f32645b89b37976362779532/:
```
Message
(59965185/49195886) Job wrapper finish method failed
Stack Trace

Newest

OperationalError: unable to open database file
  File "sqlalchemy/engine/base.py", line 146, in __init__
    self._dbapi_connection = engine.raw_connection()
  File "sqlalchemy/engine/base.py", line 3300, in raw_connection
    return self.pool.connect()
  File "sqlalchemy/pool/base.py", line 449, in connect
    return _ConnectionFairy._checkout(self)
  File "sqlalchemy/pool/base.py", line 1263, in _checkout
    fairy = _ConnectionRecord.checkout(pool)
  File "sqlalchemy/pool/base.py", line 712, in checkout
    rec = pool._do_get()
  File "sqlalchemy/pool/impl.py", line 179, in _do_get
    with util.safe_reraise():
  File "sqlalchemy/util/langhelpers.py", line 146, in __exit__
    raise exc_value.with_traceback(exc_tb)
  File "sqlalchemy/pool/impl.py", line 177, in _do_get
    return self._create_connection()
  File "sqlalchemy/pool/base.py", line 390, in _create_connection
    return _ConnectionRecord(self)
  File "sqlalchemy/pool/base.py", line 674, in __init__
    self.__connect()
  File "sqlalchemy/pool/base.py", line 900, in __connect
    with util.safe_reraise():
  File "sqlalchemy/util/langhelpers.py", line 146, in __exit__
    raise exc_value.with_traceback(exc_tb)
  File "sqlalchemy/pool/base.py", line 896, in __connect
    self.dbapi_connection = connection = pool._invoke_creator(self)
  File "sqlalchemy/engine/create.py", line 643, in connect
    return dialect.connect(*cargs, **cparams)
  File "sqlalchemy/engine/default.py", line 620, in connect
    return self.loaded_dbapi.connect(*cargs, **cparams)
OperationalError: (sqlite3.OperationalError) unable to open database file
(Background on this error at: https://sqlalche.me/e/20/e3q8)
  File "kombu/connection.py", line 472, in _reraise_as_library_errors
    yield
  File "kombu/connection.py", line 556, in _ensured
    return fun(*args, **kwargs)
  File "kombu/messaging.py", line 202, in _publish
    [maybe_declare(entity) for entity in declare]
  File "kombu/messaging.py", line 202, in <listcomp>
    [maybe_declare(entity) for entity in declare]
  File "kombu/messaging.py", line 107, in maybe_declare
    return maybe_declare(entity, self.channel, retry, **retry_policy)
  File "kombu/common.py", line 113, in maybe_declare
    return _maybe_declare(entity, channel)
  File "kombu/common.py", line 153, in _maybe_declare
    entity.declare(channel=channel)
  File "kombu/entity.py", line 617, in declare
    self._create_queue(nowait=nowait, channel=channel)
  File "kombu/entity.py", line 626, in _create_queue
    self.queue_declare(nowait=nowait, passive=False, channel=channel)
  File "kombu/entity.py", line 655, in queue_declare
    ret = channel.queue_declare(
  File "kombu/transport/virtual/base.py", line 537, in queue_declare
    self._new_queue(queue, **kwargs)
  File "kombu/transport/sqlalchemy/__init__.py", line 159, in _new_queue
    self._get_or_create(queue)
  File "kombu/transport/sqlalchemy/__init__.py", line 138, in _get_or_create
    obj = self.session.query(self.queue_cls) \
  File "kombu/transport/sqlalchemy/__init__.py", line 133, in session
    _, Session = self._open()
  File "kombu/transport/sqlalchemy/__init__.py", line 125, in _open
    metadata.create_all(engine)
  File "sqlalchemy/sql/schema.py", line 5857, in create_all
    bind._run_ddl_visitor(
  File "sqlalchemy/engine/base.py", line 3250, in _run_ddl_visitor
    with self.begin() as conn:
  File "contextlib.py", line 137, in __enter__
    return next(self.gen)
  File "sqlalchemy/engine/base.py", line 3240, in begin
    with self.connect() as conn:
  File "sqlalchemy/engine/base.py", line 3276, in connect
    return self._connection_cls(self)
  File "sqlalchemy/engine/base.py", line 148, in __init__
    Connection._handle_dbapi_exception_noconnection(
  File "sqlalchemy/engine/base.py", line 2440, in _handle_dbapi_exception_noconnection
    raise sqlalchemy_exception.with_traceback(exc_info[2]) from e
  File "sqlalchemy/engine/base.py", line 146, in __init__
    self._dbapi_connection = engine.raw_connection()
  File "sqlalchemy/engine/base.py", line 3300, in raw_connection
    return self.pool.connect()
  File "sqlalchemy/pool/base.py", line 449, in connect
    return _ConnectionFairy._checkout(self)
  File "sqlalchemy/pool/base.py", line 1263, in _checkout
    fairy = _ConnectionRecord.checkout(pool)
  File "sqlalchemy/pool/base.py", line 712, in checkout
    rec = pool._do_get()
  File "sqlalchemy/pool/impl.py", line 179, in _do_get
    with util.safe_reraise():
  File "sqlalchemy/util/langhelpers.py", line 146, in __exit__
    raise exc_value.with_traceback(exc_tb)
  File "sqlalchemy/pool/impl.py", line 177, in _do_get
    return self._create_connection()
  File "sqlalchemy/pool/base.py", line 390, in _create_connection
    return _ConnectionRecord(self)
  File "sqlalchemy/pool/base.py", line 674, in __init__
    self.__connect()
  File "sqlalchemy/pool/base.py", line 900, in __connect
    with util.safe_reraise():
  File "sqlalchemy/util/langhelpers.py", line 146, in __exit__
    raise exc_value.with_traceback(exc_tb)
  File "sqlalchemy/pool/base.py", line 896, in __connect
    self.dbapi_connection = connection = pool._invoke_creator(self)
  File "sqlalchemy/engine/create.py", line 643, in connect
    return dialect.connect(*cargs, **cparams)
  File "sqlalchemy/engine/default.py", line 620, in connect
    return self.loaded_dbapi.connect(*cargs, **cparams)
OperationalError: (sqlite3.OperationalError) unable to open database file
(Background on this error at: https://sqlalche.me/e/20/e3q8)
  File "galaxy/jobs/runners/__init__.py", line 677, in _finish_or_resubmit_job
    job_wrapper.finish(
  File "galaxy/jobs/__init__.py", line 2070, in finish
    task_wrapper.delay()
  File "celery/canvas.py", line 353, in delay
    return self.apply_async(partial_args, partial_kwargs)
  File "celery/canvas.py", line 400, in apply_async
    return _apply(args, kwargs, **options)
  File "celery/app/task.py", line 594, in apply_async
    return app.send_task(
  File "celery/app/base.py", line 801, in send_task
    amqp.send_task_message(P, name, message, **options)
  File "celery/app/amqp.py", line 518, in send_task_message
    ret = producer.publish(
  File "kombu/messaging.py", line 186, in publish
    return _publish(
  File "kombu/connection.py", line 553, in _ensured
    with self._reraise_as_library_errors():
  File "contextlib.py", line 155, in __exit__
    self.gen.throw(typ, value, traceback)
  File "kombu/connection.py", line 476, in _reraise_as_library_errors
    raise ConnectionError(str(exc)) from exc
```
The issue was that we aren't setting `GALAXY_CONFIG_FILE` if the job and
workflow handlers are started with `-c path/to/galaxy.yml`, and the
celery config heuristic isn't looking at sys.argv.
@mvdbeek mvdbeek force-pushed the fix_celery_config_when_galaxy_started_via_main_py branch from 47e8246 to ec48f50 Compare September 16, 2024 17:11
@mvdbeek
Copy link
Member Author

mvdbeek commented Sep 16, 2024

Deployed and working on usegalaxy.org. Ping @Delphine-L

@mvdbeek mvdbeek merged commit a567944 into galaxyproject:release_24.1 Sep 17, 2024
48 of 49 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
area/configuration Galaxy's configuration system kind/bug
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants