From 92471e9f624b136bb2655f964b120589a8a73af3 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Tue, 30 Jul 2024 22:11:04 -0500 Subject: [PATCH 01/13] chore: deprecate 'allow_userdata', add 'user_data' Rather than an top-level `allow_userdata` key, instead use a `user_data` dict. This is to better align with the top-level `vendor_data` keys. --- cloudinit/stages.py | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/cloudinit/stages.py b/cloudinit/stages.py index 84bc0ce07a0..1160ba28fde 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -793,6 +793,41 @@ def finalize_handlers(): finally: finalize_handlers() + def _consume_userdata_if_enabled(self, frequency: str) -> None: + """Consume userdata if not disabled in base config. + + Base config can have a definition like: + user_data: + enabled: false + require_pgp: true + or a deprecated `allow_userdata` key. + + Parse them and maybe consume userdata accordingly. + """ + user_data_cfg = self.cfg.get("user_data", {}) + enabled = user_data_cfg.get("enabled", True) + + if "allow_userdata" in self.cfg: + lifecycle.deprecate( + deprecated="Key 'allow_userdata'", + deprecated_version="24.3", + extra_message="Use 'user_data.enabled' instead.", + ) + if "enabled" in user_data_cfg: + LOG.warning( + "Both 'allow_userdata' and 'user_data.enabled' are set." + " 'allow_userdata' will be ignored." + ) + else: + enabled = util.get_cfg_option_bool(self.cfg, "allow_userdata") + + if enabled: + self._consume_userdata(frequency) + else: + LOG.debug( + "User data disabled in base config: discarding user-data" + ) + def consume_data(self, frequency=PER_INSTANCE): # Consume the userdata first, because we need want to let the part # handlers run first (for merging stuff) @@ -801,10 +836,7 @@ def consume_data(self, frequency=PER_INSTANCE): "reading and applying user-data", parent=self.reporter, ): - if util.get_cfg_option_bool(self.cfg, "allow_userdata", True): - self._consume_userdata(frequency) - else: - LOG.debug("allow_userdata = False: discarding user-data") + self._consume_userdata_if_enabled(frequency) with events.ReportEventStack( "consume-vendor-data", From 224b682b57e72235659eff38cdad29941a7561a6 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Sat, 25 May 2024 22:45:19 -0500 Subject: [PATCH 02/13] feat: Support encrypted and signed user data Cloud-init user data often contains user secrets including passwords and private keys. This data has always been submitted in plain text. To protect this data's confidentiality and guarantee its authenticity, this commit add the ability to have this data encrypted and signed. A new user data format is added allowing for an ASCII armored PGP MESSAGE. If detected, cloud-init will import into a temporary keyring any keys provided in /etc/cloud/keys and use these keys to decrypt and/or verify the provided data. After decryption, the resulting message will be treated as user data as before. --- cloudinit/config/cc_apt_configure.py | 2 +- cloudinit/gpg.py | 38 ++++++++++ cloudinit/handlers/__init__.py | 1 + cloudinit/settings.py | 1 + cloudinit/user_data.py | 107 +++++++++++++++++---------- cloudinit/util.py | 20 +++-- 6 files changed, 121 insertions(+), 48 deletions(-) diff --git a/cloudinit/config/cc_apt_configure.py b/cloudinit/config/cc_apt_configure.py index 35445711e60..72a81f9c7b4 100644 --- a/cloudinit/config/cc_apt_configure.py +++ b/cloudinit/config/cc_apt_configure.py @@ -1111,7 +1111,7 @@ def apt_key_add(gpg_context): ) return file_name - def apt_key_list(gpg_context): + def apt_key_list(gpg_context: GPG): """apt-key list returns string of all trusted keys (in /etc/apt/trusted.gpg and diff --git a/cloudinit/gpg.py b/cloudinit/gpg.py index 8ade82185f2..dccd6a84e5f 100644 --- a/cloudinit/gpg.py +++ b/cloudinit/gpg.py @@ -9,6 +9,7 @@ import logging import os +import pathlib import re import signal import time @@ -68,6 +69,43 @@ def export_armour(self, key: str) -> Optional[str]: LOG.debug('Failed to export armoured key "%s": %s', key, error) return None + def import_key(self, key: pathlib.Path) -> None: + """Import gpg key from a file to the temporary keyring. + + :param key: path to the key file + """ + try: + subp.subp( + [ + "gpg", + "--batch", + "--import", + str(key), + ], + update_env=self.env, + ) + except subp.ProcessExecutionError as error: + LOG.warning("Failed to import key %s: %s", key, error) + + def decrypt(self, data: str) -> str: + """Process data using gpg. + + This can be used to decrypt encrypted data, verify signed data, + or both depending on the data provided. + + :param data: ASCII-armored GPG message to process + :return: decrypted data + :raises: ProcessExecutionError if gpg fails to decrypt data + """ + return subp.subp( + [ + "gpg", + "--decrypt", + ], + data=data, + update_env=self.env, + ).stdout + def dearmor(self, key: str) -> str: """Dearmor gpg key, dearmored key gets returned diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index 510768b5c1f..6acfe1d623f 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -55,6 +55,7 @@ "text/x-shellscript-per-boot": "text/x-shellscript-per-boot", "text/x-shellscript-per-instance": "text/x-shellscript-per-instance", "text/x-shellscript-per-once": "text/x-shellscript-per-once", + "-----begin pgp message-----": "text/x-pgp-armored", } # Sorted longest first diff --git a/cloudinit/settings.py b/cloudinit/settings.py index a73f25118d7..ff4831db4fa 100644 --- a/cloudinit/settings.py +++ b/cloudinit/settings.py @@ -79,3 +79,4 @@ FREQUENCIES = [PER_INSTANCE, PER_ALWAYS, PER_ONCE] HOTPLUG_ENABLED_FILE = "/var/lib/cloud/hotplug.enabled" +KEY_DIR = "/etc/cloud/keys" diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index e99ad9a183a..85449737419 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -8,14 +8,18 @@ # # This file is part of cloud-init. See LICENSE file for license information. +import email import logging import os +from email.message import Message from email.mime.base import MIMEBase from email.mime.multipart import MIMEMultipart from email.mime.nonmultipart import MIMENonMultipart from email.mime.text import MIMEText +import pathlib -from cloudinit import features, handlers, util +from cloudinit import features, gpg, handlers, subp, util +from cloudinit.settings import KEY_DIR from cloudinit.url_helper import UrlError, read_file_or_url LOG = logging.getLogger(__name__) @@ -37,6 +41,7 @@ ARCHIVE_UNDEF_BINARY_TYPE = "application/octet-stream" # This seems to hit most of the gzip possible content types. +ENCRYPT_TYPE = "text/x-pgp-armored" DECOMP_TYPES = [ "application/gzip", "application/gzip-compressed", @@ -47,6 +52,7 @@ "application/x-gzip", "application/x-gzip-compressed", ] +TRANSFORM_TYPES = [ENCRYPT_TYPE] + DECOMP_TYPES # Msg header used to track attachments ATTACHMENT_FIELD = "Number-Attachments" @@ -87,7 +93,7 @@ def process(self, blob): self._process_msg(convert_string(blob), accumulating_msg) return accumulating_msg - def _process_msg(self, base_msg, append_msg): + def _process_msg(self, base_msg: Message, append_msg): def find_ctype(payload): return handlers.type_from_starts_with(payload) @@ -95,49 +101,67 @@ def find_ctype(payload): if is_skippable(part): continue - ctype = None - ctype_orig = part.get_content_type() payload = util.fully_decoded_payload(part) - was_compressed = False - # When the message states it is of a gzipped content type ensure - # that we attempt to decode said payload so that the decompressed - # data can be examined (instead of the compressed data). - if ctype_orig in DECOMP_TYPES: - try: - payload = util.decomp_gzip(payload, quiet=False) - # At this point we don't know what the content-type is - # since we just decompressed it. - ctype_orig = None - was_compressed = True - except util.DecompressionError as e: - error_message = ( - "Failed decompressing payload from {} of" - " length {} due to: {}".format( - ctype_orig, len(payload), e - ) - ) - _handle_error(error_message, e) - continue - - # Attempt to figure out the payloads content-type - if not ctype_orig: - ctype_orig = UNDEF_TYPE + ctype = part.get_content_type() # There are known cases where mime-type text/x-shellscript included # non shell-script content that was user-data instead. It is safe # to check the true MIME type for x-shellscript type since all # shellscript payloads must have a #! header. The other MIME types # that cloud-init supports do not have the same guarantee. - if ctype_orig in TYPE_NEEDED + ["text/x-shellscript"]: - ctype = find_ctype(payload) - if ctype is None: - ctype = ctype_orig + if ctype in TYPE_NEEDED + ["text/x-shellscript"]: + ctype = find_ctype(payload) or ctype + + was_transformed = False + + # When the message states it is transformed ensure + # that we attempt to decode said payload so that the transformed + # data can be examined. + parent_ctype = None + if ctype in TRANSFORM_TYPES: + if ctype in DECOMP_TYPES: + try: + payload = util.decomp_gzip(payload, quiet=False) + except util.DecompressionError as e: + error_message = ( + "Failed decompressing payload from {} of" + " length {} due to: {}".format( + ctype, len(payload), e + ) + ) + _handle_error(error_message, e) + continue + elif ctype == ENCRYPT_TYPE and isinstance(payload, str): + with gpg.GPG() as gpg_context: + # Import all keys from the /etc/cloud/keys directory + keys_dir = pathlib.Path("/etc/cloud/keys") + if keys_dir.is_dir(): + for key_path in keys_dir.iterdir(): + gpg_context.import_key(key_path) + try: + payload = gpg_context.decrypt(payload) + except subp.ProcessExecutionError as e: + raise RuntimeError( + "Failed decrypting user data payload of type " + f"{ctype}. Ensure any necessary keys are " + f"present in {KEY_DIR}." + ) from e + else: + error_message = ( + f"Unknown content type {ctype} that" + " is marked as transformed" + ) + _handle_error(error_message) + continue + was_transformed = True + parent_ctype = ctype + ctype = find_ctype(payload) or parent_ctype # In the case where the data was compressed, we want to make sure # that we create a new message that contains the found content # type with the uncompressed content since later traversals of the # messages will expect a part not compressed. - if was_compressed: + if was_transformed: maintype, subtype = ctype.split("/", 1) n_part = MIMENonMultipart(maintype, subtype) n_part.set_payload(payload) @@ -146,12 +170,13 @@ def find_ctype(payload): # after decoding and decompression. if part.get_filename(): _set_filename(n_part, part.get_filename()) - for h in ("Launch-Index",): - if h in part: - _replace_header(n_part, h, str(part[h])) + if "Launch-Index" in part: + _replace_header( + n_part, "Launch-Index", str(part["Launch-Index"]) + ) part = n_part - if ctype != ctype_orig: + if ctype != parent_ctype: _replace_header(part, CONTENT_TYPE, ctype) if ctype in INCLUDE_TYPES: @@ -361,8 +386,12 @@ def is_skippable(part): return False +def message_from_string(string) -> Message: + return email.message_from_string(string) + + # Coverts a raw string into a mime message -def convert_string(raw_data, content_type=NOT_MULTIPART_TYPE): +def convert_string(raw_data, content_type=NOT_MULTIPART_TYPE) -> Message: """convert a string (more likely bytes) or a message into a mime message.""" if not raw_data: @@ -380,7 +409,7 @@ def create_binmsg(data, content_type): bdata = raw_data bdata = util.decomp_gzip(bdata, decode=False) if b"mime-version:" in bdata[0:4096].lower(): - msg = util.message_from_string(bdata.decode("utf-8")) + msg = message_from_string(bdata.decode("utf-8")) else: msg = create_binmsg(bdata, content_type) diff --git a/cloudinit/util.py b/cloudinit/util.py index 8025f4d51c4..214b0ced26d 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -11,7 +11,6 @@ import binascii import contextlib import copy as obj_copy -import email import glob import grp import gzip @@ -36,6 +35,7 @@ from base64 import b64decode from collections import deque, namedtuple from contextlib import contextmanager, suppress +from email.message import Message from errno import ENOENT from functools import lru_cache from pathlib import Path @@ -164,7 +164,7 @@ def maybe_b64decode(data: bytes) -> bytes: return data -def fully_decoded_payload(part): +def fully_decoded_payload(part: Message): # In Python 3, decoding the payload will ironically hand us a bytes object. # 'decode' means to decode according to Content-Transfer-Encoding, not # according to any charset in the Content-Type. So, if we end up with @@ -175,8 +175,16 @@ def fully_decoded_payload(part): cte_payload, bytes ): charset = part.get_charset() - if charset and charset.input_codec: - encoding = charset.input_codec + + # TODO: Mypy doesn't like the following code because `input_codec` + # is part of a legacy API. See first line of: + # https://docs.python.org/3/library/email.charset.html + # However, as of this writing, it is still available: + # https://github.com/python/cpython/blob/aab18f4d925528c2cbe4625211bf904db2a28317/Lib/email/charset.py#L234 # noqa: E501 + # That said, we shouldn't continue using legacy APIs, so we should + # update this code at some point. + if charset and charset.input_codec: # type: ignore + encoding = charset.input_codec # type: ignore else: encoding = "utf-8" return cte_payload.decode(encoding, "surrogateescape") @@ -2899,10 +2907,6 @@ def is_x86(uname_arch=None): return x86_arch_match -def message_from_string(string): - return email.message_from_string(string) - - def get_installed_packages(): out = subp.subp(["dpkg-query", "--list"], capture=True) From 038bb20793adef129ab7a8cef577fb1fba781b6f Mon Sep 17 00:00:00 2001 From: James Falcon Date: Tue, 30 Jul 2024 22:53:40 -0500 Subject: [PATCH 03/13] squash: Add require_gpg flag --- cloudinit/sources/__init__.py | 21 ++++++++++++++++++--- cloudinit/user_data.py | 22 +++++++++++++++++----- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index 973d140ff36..3b79a47cb7e 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -646,20 +646,35 @@ def get_url_params(self): return URLParams(max_wait, timeout, retries, sec_between_retries) def get_userdata(self, apply_filter=False): + require_pgp = self.sys_cfg.get("user_data", {}).get( + "require_pgp", False + ) if self.userdata is None: - self.userdata = self.ud_proc.process(self.get_userdata_raw()) + self.userdata = self.ud_proc.process( + self.get_userdata_raw(), require_pgp + ) if apply_filter: return self._filter_xdata(self.userdata) return self.userdata def get_vendordata(self): + require_pgp = self.sys_cfg.get("vendor_data", {}).get( + "require_pgp", False + ) if self.vendordata is None: - self.vendordata = self.ud_proc.process(self.get_vendordata_raw()) + self.vendordata = self.ud_proc.process( + self.get_vendordata_raw(), require_pgp + ) return self.vendordata def get_vendordata2(self): + require_pgp = self.sys_cfg.get("vendor_data2", {}).get( + "require_pgp", False + ) if self.vendordata2 is None: - self.vendordata2 = self.ud_proc.process(self.get_vendordata2_raw()) + self.vendordata2 = self.ud_proc.process( + self.get_vendordata2_raw(), require_pgp + ) return self.vendordata2 @property diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index 85449737419..995428eb7a3 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -84,16 +84,20 @@ def __init__(self, paths): self.paths = paths self.ssl_details = util.fetch_ssl_details(paths) - def process(self, blob): + def process(self, blob, require_pgp=False): accumulating_msg = MIMEMultipart() if isinstance(blob, list): for b in blob: - self._process_msg(convert_string(b), accumulating_msg) + self._process_msg( + convert_string(b), accumulating_msg, require_pgp + ) else: - self._process_msg(convert_string(blob), accumulating_msg) + self._process_msg( + convert_string(blob), accumulating_msg, require_pgp + ) return accumulating_msg - def _process_msg(self, base_msg: Message, append_msg): + def _process_msg(self, base_msg: Message, append_msg, require_pgp=False): def find_ctype(payload): return handlers.type_from_starts_with(payload) @@ -104,6 +108,7 @@ def find_ctype(payload): payload = util.fully_decoded_payload(part) ctype = part.get_content_type() + # There are known cases where mime-type text/x-shellscript included # non shell-script content that was user-data instead. It is safe # to check the true MIME type for x-shellscript type since all @@ -112,6 +117,13 @@ def find_ctype(payload): if ctype in TYPE_NEEDED + ["text/x-shellscript"]: ctype = find_ctype(payload) or ctype + if require_pgp and ctype != ENCRYPT_TYPE: + error_message = ( + "'require_pgp' was set true in cloud-init's base " + f"configuration, but content type is {ctype}." + ) + raise RuntimeError(error_message) + was_transformed = False # When the message states it is transformed ensure @@ -134,7 +146,7 @@ def find_ctype(payload): elif ctype == ENCRYPT_TYPE and isinstance(payload, str): with gpg.GPG() as gpg_context: # Import all keys from the /etc/cloud/keys directory - keys_dir = pathlib.Path("/etc/cloud/keys") + keys_dir = pathlib.Path(KEY_DIR) if keys_dir.is_dir(): for key_path in keys_dir.iterdir(): gpg_context.import_key(key_path) From 3bb1a0ad22addd062f0a7b269d60afa893837d21 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Wed, 31 Jul 2024 14:23:47 -0500 Subject: [PATCH 04/13] squash: tests and docs --- cloudinit/user_data.py | 15 +- doc/rtd/explanation/format.rst | 61 ++++ doc/rtd/howto/index.rst | 1 + doc/rtd/howto/pgp.rst | 201 +++++++++++ doc/rtd/reference/base_config_reference.rst | 36 +- tests/integration_tests/instances.py | 4 +- tests/integration_tests/userdata/test_pgp.py | 331 +++++++++++++++++++ tests/unittests/cloudinit/test_user_data.py | 129 ++++++++ tests/unittests/test_data.py | 74 +---- tests/unittests/test_util.py | 6 - tests/unittests/util.py | 13 +- 11 files changed, 773 insertions(+), 98 deletions(-) create mode 100644 doc/rtd/howto/pgp.rst create mode 100644 tests/integration_tests/userdata/test_pgp.py create mode 100644 tests/unittests/cloudinit/test_user_data.py diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index 995428eb7a3..48fef79467c 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -11,12 +11,13 @@ import email import logging import os +import pathlib from email.message import Message from email.mime.base import MIMEBase from email.mime.multipart import MIMEMultipart from email.mime.nonmultipart import MIMENonMultipart from email.mime.text import MIMEText -import pathlib +from typing import Union from cloudinit import features, gpg, handlers, subp, util from cloudinit.settings import KEY_DIR @@ -402,10 +403,14 @@ def message_from_string(string) -> Message: return email.message_from_string(string) -# Coverts a raw string into a mime message -def convert_string(raw_data, content_type=NOT_MULTIPART_TYPE) -> Message: - """convert a string (more likely bytes) or a message into - a mime message.""" +def convert_string( + raw_data: Union[str, bytes], content_type=NOT_MULTIPART_TYPE +) -> Message: + """Convert the raw data into a mime message. + + 'raw_data' is the data as it was received from the user-data source. + It could be a string, bytes, or a gzip compressed version of either. + """ if not raw_data: raw_data = b"" diff --git a/doc/rtd/explanation/format.rst b/doc/rtd/explanation/format.rst index 7d14acb6318..32431d12ad4 100644 --- a/doc/rtd/explanation/format.rst +++ b/doc/rtd/explanation/format.rst @@ -24,6 +24,7 @@ Formats that directly configure the instance: Formats that deal with other user data formats: +- `PGP Message`_ - `Include file`_ - `Jinja template`_ - `MIME multi-part archive`_ @@ -141,6 +142,64 @@ The boothook is different in that: Use :ref:`jinja templates` with :ref:`v1.instance_id` instead. +.. _user_data_formats-pgp: + +PGP Message +=========== + +Example +------- + +.. code-block:: text + + -----BEGIN PGP MESSAGE----- + + hF4DoKS6K1guBqsSAQdAE8bD7gGuKcGN91zGzW7SR+u6nnq9pyqjR8qJZl2sJRow + MgfxFaFaIw232BJvqwuoa0HGsmCaVyXmj6PFVJceBn2X7s8hXgHutlQxQf1Mju63 + 1OkBCQIQxJJj1X4dpQs44tcCjkGOWOO9XrSyAPhcR5sHIUYcsrd3cW9OUp6PUMb2 + Ci2Lmz8gG+Z1pEAxJD6TdCIFy/+Emf6UaDPWbg6CRuWmu3BtagUmVVK8+yXZBa6j + hdy0a9FZDpZsemWwsGxHJg0HRcClE/AebMtjczftIosPFfn68/fUgwGcuQ28wWGn + HG9h09fgXoO1tlOjpz/pqDou465pDMk43gxOUoCtdBaOSgkHWUiD2s9WxoAi83OY + 9uTdQCZ1EmaUQE7W/VFlcjapCtoiph/sBmQyumcMfUjJd5CCTK8q3iNcFnkulXl+ + yRMFhm1w1SBogfKeo8bv0wO9SJZ4dBq0cDSXYGzFkWOzm1k0o7rJ4pghuU3EUFk+ + Sr3QDTBlOz83BI2NPHLO+MqAwCwtao29YXA9iDYD3Cyz8fRLsWL0gLGE887s0W0Y + dh2fIhLR+5uufQWF6xj7Am3UBTbidgMmEch9FdSYZYpPamiWojMFg45jLyBBETIq + C2AAfHUYOkwZzf55yxwLEW4qyj9q+9rHF/M/XfI7t+RgBqODmNWoww4SOixEun1X + tP3W8DSp9p+PMqdfztf8Uo7HzijsolLlt4cxsUx5tN0Bz9UYtQ9VMog5IN1Oe5Vu + Wec6bO9qWdZUNV5Ty32DFycXvgmzI6w/9v4R7YKgKE/2AWOito4E3WpZZlj2mVJW + M9ZoSf3O9I3jqyCpI/pZ/F1egrydwLskM71FCx/3kkAHRRB4436TpQe0OpMFcfOp + 6qflwEWygx9EzqEsfZJ4vYgXf2ivgsSpxGVE+5Tcg90Te+0of3E= + =T2Fh + -----END PGP MESSAGE----- + +Explanation +----------- + +A PGP message is a message that has been encrypted and/or signed with one or +multiple `PGP keys `_, and then +`ASCII armored `_. The message +is decrypted and/or verified with keys contained in the image. +The decrypted message is then processed as any other user data format. + +To support such data, cloud-init requires the decryption and signing keys +to be present in the `/etc/cloud/keys` directory. To decrypt encrypted +data, the private key corresponding to the public key that encrypted the data +must be present. To verify signed data, the public key that corresponds +to the private key that signed the data must be present. + +To ensure authenticity, cloud-init can restrict user data to be a PGP +message only. To do so, the following +:ref:`base configuration` can be used: + +.. code-block:: yaml + + user_data: + require_pgp: true + + +For a detailed guide to creating the PGP message and image necessary for this +user data format, see the :ref:`Use encrypted or signed user data` page. + Include file ============ @@ -384,6 +443,8 @@ as binary data and so may be processed automatically. +--------------------+-----------------------------+-------------------------+ |Cloud boothook |#cloud-boothook |text/cloud-boothook | +--------------------+-----------------------------+-------------------------+ +| PGP Message |-----BEGIN PGP MESSAGE----- |text/x-pgp-armored | ++--------------------+-----------------------------+-------------------------+ |MIME multi-part |Content-Type: multipart/mixed|multipart/mixed | +--------------------+-----------------------------+-------------------------+ |Cloud config archive|#cloud-config-archive |text/cloud-config-archive| diff --git a/doc/rtd/howto/index.rst b/doc/rtd/howto/index.rst index b6811fa3133..940f5138666 100644 --- a/doc/rtd/howto/index.rst +++ b/doc/rtd/howto/index.rst @@ -22,6 +22,7 @@ How do I...? Re-run cloud-init Change how often a module runs Validate my user data + Use encrypted or signed user data Debug cloud-init Check the status of cloud-init Report a bug diff --git a/doc/rtd/howto/pgp.rst b/doc/rtd/howto/pgp.rst new file mode 100644 index 00000000000..e0e2c001f08 --- /dev/null +++ b/doc/rtd/howto/pgp.rst @@ -0,0 +1,201 @@ +.. _pgp: + +Use encrypted or signed user data +********************************* + +Overview +======== + +This guide will show you how to use PGP encryption to secure your +:ref:`user data` +when using cloud-init. This will be accomplished with the following steps: + +1. Launch a cloud instance +2. Generate PGP key pairs +3. Encrypt and sign the user data +4. Export the keys +5. Restrict user data to require PGP message +6. Retrieve our encrypted and signed user data +7. Create a custom image containing the keys +8. Launch an instance with the encrypted user data + +We will be using the `gpg` command to for all PGP-related operations. This +guide will add new keys to the default key ring while later providing +instructions for removing them. These keys are generated for instructive +purposes only and are not intended for production use. + +.. note:: + This guide is NOT intended to be a comprehensive guide to PGP encryption or + best practices when using the `gpg` command or gpg key rings. Please + consult the `GnuPG documentation `_ for documentation + and best practices. + +Prerequisites +============= + +This guide also assumes you have the ability to launch cloud instances +with root permissions and can create snapshots or custom images from +those instances. This guide will demonstrate this using LXD, but +commands to achieve the same result will vary by cloud provider. + +The launched instance must contain the `GNU Privacy Guard` software +which provides the `gpg` command. + +Launch a cloud instance +======================= + +We will use LXD to launch an Ubuntu instance, but you can use any cloud +provider and OS that supports `gpg` and cloud-init. + +Launch the instance and connect to it: + +.. code-block:: bash + + $ lxc launch ubuntu:noble pgp-demo + $ lxc shell pgp-demo + +You should now be inside the LXD instance in the "/root" directory. +The remaining steps will be performed inside this instance in the "/root" +directory until otherwise noted. + +Generate PGP key pairs +====================== + +.. note:: + + If you already have PGP key pairs you would like to use, you can skip this + step. However, we do NOT recommend reusing or sharing any personal + private keys. + +First, generate a new key pair to be used for signing: + +.. code-block:: bash + + $ gpg --quick-generate-key --batch --passphrase "" signing_user + +Next, generate a new key pair to be used for encryption: + +.. code-block:: bash + + $ gpg --quick-generate-key --batch --passphrase "" encrypting_user + +Encrypt and sign the user data +============================== + +Create a file with your user data. For this example, we will use a simple +cloud-config file: + +.. code-block:: yaml + + #cloud-config + runcmd: + - echo 'Hello, World!' > /var/tmp/hello-world.txt + +Save this file to your working directory as `cloud-config.yaml`. + +Encrypt the user data using the public key of the encrypting user and the +private key of the signing user: + +.. code-block:: bash + + $ gpg --batch --output cloud-config.yaml.asc --sign --local-user signing_user --encrypt --recipient encrypting_user --armor cloud-config.yaml + +Our encrypted and signed user data is now saved in `cloud-config.yaml.asc`. + +Export the keys +=============== + +In order to use this user data, we will need to create a custom image +containing the public key of the encrypting user and the private key +of the signing user. + +Create the key directory: + +.. code-block:: bash + + $ mkdir /etc/cloud/keys + +Export the public key of the signing user: + +.. code-block:: bash + + $ gpg --export signing_user > /etc/cloud/keys/signing_user.gpg + +Export the private key of the encrypting user: + +.. code-block:: bash + + $ gpg --export-secret-keys encrypting_user > /etc/cloud/keys/encrypting_user.gpg + +Why export keys? +---------------- + +While it is more steps to export the keys in this way as opposed to +using the existing key ring in the snapshot, we do this for a few reasons: + +* Users may not want these keys in any key ring by default on a new instance +* Keys may be exported from any system without needing to be concerned with + the implementation details of the gpg implementation + +Note that on launch, cloud-init will import there keys into a temporary +key ring that is removed after the user data is processed. The default +key ring will not be read or modified. + +Restrict user data to require PGP message +========================================= + +To ensure that our message hasn't been replaced or tampered with, we can +require that cloud-init only process PGP messages. To do so, create a file +`/etc/cloud/cloud.cfg.d/99_pgp.cfg` with the following contents: + +.. code-block:: text + + user_data: + require_pgp: true + +Retrieve our encrypted and signed user data +=========================================== + +Before running +these commands, copy the encrypted and signed user data +that we created earlier to the host system. + +From the host system, run: + +.. code-block:: bash + + $ lxc file pull pgp-demo/root/cloud-config.yaml.asc . + + +Create a custom image containing the keys +========================================= + +.. note:: + Before creating the image, you may want to remove the original user data + and created key ring from the instance. This is not strictly necessary + but is recommended for a clean image. + +Now that we have our instance configured, we can create a custom image from +it. This step will vary depending on your cloud provider. + +Using LXD, from the host system, run: + +.. code-block:: bash + + $ lxc stop pgp-demo + $ lxc publish pgp-demo --alias pgp-demo-image + +Launch an instance with the encrypted user data +=============================================== + +Now that we have our custom image with the keys, we can launch a +new instance with the encrypted user data. With your encrypted and signed +user data in the current working directory, run: + +.. code-block:: bash + + $ lxc launch pgp-demo-image pgp-demo-encrypted \ + --config user.user-data="$(cat cloud-config.yaml.asc)" + +On the launched system, you should see the file `/var/tmp/hello-world.txt` +containing the text `Hello, World!`. diff --git a/doc/rtd/reference/base_config_reference.rst b/doc/rtd/reference/base_config_reference.rst index 2d13675e68c..760a5266acc 100644 --- a/doc/rtd/reference/base_config_reference.rst +++ b/doc/rtd/reference/base_config_reference.rst @@ -255,25 +255,37 @@ There are a few reasons to modify the ``datasource_list``: that the key and its contents, a list, must share a single line - no newlines. +.. _base_config_user_data: + +``user_data`` +^^^^^^^^^^^^^ + +Allows the user to set options for processing user data. + +Format is a dict with the following keys: + +* ``enabled``: A boolean value to enable or disable the use of user data. + One use case is to prevent users from accidentally breaking closed + appliances. Default: ``true``. +* ``require_pgp``: A boolean indicating whether to require PGP verification + of the user data. Default: ``false``. This should be true if + using :ref:`signed user data`. + ``vendor_data``/``vendor_data2`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Allows the user to disable ``vendor_data`` or ``vendor_data2`` along with providing a prefix for any executed scripts. -Format is a dict with ``enabled`` and ``prefix`` keys: - -* ``enabled``: A boolean indicating whether to enable or disable the - ``vendor_data``. -* ``prefix``: A path to prepend to any ``vendor_data``-provided script. - -``allow_userdata`` -^^^^^^^^^^^^^^^^^^ +Format is a dict with the following keys: -A boolean value to disable the use of user data. -This allows custom images to prevent users from accidentally breaking closed -appliances. Setting ``allow_userdata: false`` in the configuration will disable -``cloud-init`` from processing user data. +* ``enabled``: A boolean indicating whether to enable or disable the vendor + data. Default: ``true``. +* ``require_pgp``: A boolean indicating whether to require PGP verification + of the vendor data. Default: ``false``. +* ``prefix``: A path to a binary to prefix to any executed scripts. + An example of usage would be to prefix a script with ``strace`` to + debug a script. ``manual_cache_clean`` ^^^^^^^^^^^^^^^^^^^^^^ diff --git a/tests/integration_tests/instances.py b/tests/integration_tests/instances.py index a5ce8f1e14f..3e88aa43b96 100644 --- a/tests/integration_tests/instances.py +++ b/tests/integration_tests/instances.py @@ -83,10 +83,10 @@ def restart(self): log.info("Restarting instance and waiting for boot") self.instance.restart() - def execute(self, command, *, use_sudo=True) -> Result: + def execute(self, command, *, use_sudo=True, **kwargs) -> Result: if self.instance.username == "root" and use_sudo is False: raise RuntimeError("Root user cannot run unprivileged") - return self.instance.execute(command, use_sudo=use_sudo) + return self.instance.execute(command, use_sudo=use_sudo, **kwargs) def pull_file( self, diff --git a/tests/integration_tests/userdata/test_pgp.py b/tests/integration_tests/userdata/test_pgp.py new file mode 100644 index 00000000000..a5d6e893c89 --- /dev/null +++ b/tests/integration_tests/userdata/test_pgp.py @@ -0,0 +1,331 @@ +"""Test PGP signed and encrypted userdata.""" +import pytest + +from cloudinit import subp +from tests.integration_tests.clouds import IntegrationCloud +from tests.integration_tests.instances import IntegrationInstance +from tests.integration_tests.integration_settings import PLATFORM +from tests.integration_tests.util import verify_clean_boot + +USER_DATA = """\ +#cloud-config +bootcmd: + - touch /var/tmp/{0} +""" + + +@pytest.fixture(scope="module") +def gpg_dir(tmp_path_factory): + yield tmp_path_factory.mktemp("gpg_dir") + + +@pytest.fixture(scope="module") +def public_key(gpg_dir): + subp.subp( + [ + "gpg", + "--homedir", + str(gpg_dir), + "--quick-generate-key", + "--batch", + # "loopback", + "--passphrase", + "", + "signing_user", + ] + ) + yield subp.subp( + [ + "gpg", + "--homedir", + str(gpg_dir), + "--export", + "--armor", + "signing_user", + ] + ).stdout + + +@pytest.fixture(scope="module") +def private_key(gpg_dir): + subp.subp( + [ + "gpg", + "--homedir", + str(gpg_dir), + "--quick-generate-key", + "--batch", + "--passphrase", + "", + "encrypting_user", + ] + ) + yield subp.subp( + [ + "gpg", + "--homedir", + str(gpg_dir), + "--batch", + "--export-secret-keys", + "--armor", + "encrypting_user", + ] + ).stdout + + +@pytest.fixture(scope="module") +def signed_and_encrypted_userdata(gpg_dir, public_key, private_key): + return subp.subp( + [ + "gpg", + "--homedir", + str(gpg_dir), + "--batch", + "--sign", + "--local-user", + "signing_user", + "--encrypt", + "--recipient", + "encrypting_user", + "--armor", + ], + data=USER_DATA.format("signed_and_encrypted"), + ).stdout + + +@pytest.fixture(scope="module") +def signed_userdata(gpg_dir, public_key): + return subp.subp( + [ + "gpg", + "--homedir", + str(gpg_dir), + "--batch", + "--sign", + "--local-user", + "signing_user", + "--armor", + ], + data=USER_DATA.format("signed"), + ).stdout + + +@pytest.fixture(scope="module") +def encrypted_userdata(gpg_dir, private_key): + return subp.subp( + [ + "gpg", + "--homedir", + str(gpg_dir), + "--batch", + "--encrypt", + "--recipient", + "encrypting_user", + "--armor", + ], + data=USER_DATA.format("encrypted"), + ).stdout + + +@pytest.fixture(scope="module") +def valid_keys_image( + public_key, + private_key, + session_cloud: IntegrationCloud, +): + with session_cloud.launch() as client: + client.execute("mkdir /etc/cloud/keys") + client.write_to_file("/etc/cloud/keys/pub_key", public_key) + client.write_to_file("/etc/cloud/keys/priv_key", private_key) + client.execute("cloud-init clean --logs") + image_id = client.snapshot() + yield image_id + client.cloud.cloud_instance.delete_image(image_id) + + +@pytest.fixture(scope="module") +def valid_pub_key_image( + public_key, + session_cloud: IntegrationCloud, +): + with session_cloud.launch() as client: + client.execute("mkdir /etc/cloud/keys") + client.write_to_file("/etc/cloud/keys/pub_key", public_key) + client.execute("cloud-init clean --logs") + image_id = client.snapshot() + yield image_id + client.cloud.cloud_instance.delete_image(image_id) + + +@pytest.fixture(scope="module") +def valid_priv_key_image( + private_key, + session_cloud: IntegrationCloud, +): + with session_cloud.launch() as client: + client.execute("mkdir /etc/cloud/keys") + client.write_to_file("/etc/cloud/keys/priv_key", private_key) + client.execute("cloud-init clean --logs") + image_id = client.snapshot() + yield image_id + client.cloud.cloud_instance.delete_image(image_id) + + +@pytest.fixture +def pgp_client(session_cloud: IntegrationCloud, request): + user_data_fixture, image_id_fixture = request.param + user_data = request.getfixturevalue(user_data_fixture) + launch_kwargs = {"image_id": request.getfixturevalue(image_id_fixture)} + with session_cloud.launch( + user_data=user_data, launch_kwargs=launch_kwargs + ) as client: + yield client + + +def _invalidate_key(client, key_path): + pub_key = client.read_from_file(key_path) + midde_index = len(pub_key) // 2 + bad_pub_key = f"{pub_key[:midde_index]}a{pub_key[midde_index:]}" + client.write_to_file(key_path, bad_pub_key) + + +@pytest.mark.parametrize( + "pgp_client", + [("signed_and_encrypted_userdata", "valid_keys_image")], + indirect=True, +) +def test_signed_and_encrypted(pgp_client: IntegrationInstance): + client = pgp_client + assert client.execute("test -f /var/tmp/signed_and_encrypted") + verify_clean_boot(client) + + # Invalidate our public key and ensure we fail + client.execute("cp /etc/cloud/keys/pub_key /var/tmp/pub_key") + _invalidate_key(client, "/etc/cloud/keys/pub_key") + client.execute("cloud-init clean --logs") + client.execute("rm /var/tmp/signed_and_encrypted") + client.restart() + assert not client.execute("test -f /var/tmp/signed_and_encrypted") + result = client.execute("cloud-init status --format=json") + assert result.failed + assert "Failed decrypting user data" in result.stdout + + # Restore the public key, invalidate the private key, and ensure we fail + client.execute("cp /var/tmp/pub_key /etc/cloud/keys/pub_key") + client.execute("cp /etc/cloud/keys/priv_key /var/tmp/priv_key") + _invalidate_key(client, "/etc/cloud/keys/priv_key") + client.execute("cloud-init clean --logs") + client.execute("rm /var/tmp/signed_and_encrypted") + client.restart() + assert not client.execute("test -f /var/tmp/signed_and_encrypted") + result = client.execute("cloud-init status --format=json") + assert result.failed + assert "Failed decrypting user data" in result.stdout + + +@pytest.mark.parametrize( + "pgp_client", + [("encrypted_userdata", "valid_priv_key_image")], + indirect=True, +) +def test_encrypted(pgp_client: IntegrationInstance): + client = pgp_client + assert client.execute("test -f /var/tmp/encrypted") + verify_clean_boot(client) + + # Invalidate our private key and ensure we fail + _invalidate_key(client, "/etc/cloud/keys/priv_key") + client.execute("cloud-init clean --logs") + client.execute("rm /var/tmp/encrypted") + client.restart() + assert not client.execute("test -f /var/tmp/encrypted") + result = client.execute("cloud-init status --format=json") + assert result.failed + assert "Failed decrypting user data" in result.stdout + + +@pytest.mark.parametrize( + "pgp_client", + [("signed_userdata", "valid_pub_key_image")], + indirect=True, +) +def test_signed(pgp_client: IntegrationInstance): + client = pgp_client + assert client.execute("test -f /var/tmp/signed") + verify_clean_boot(client) + + # Invalidate our public key and ensure we fail + _invalidate_key(client, "/etc/cloud/keys/pub_key") + client.execute("cloud-init clean --logs") + client.execute("rm /var/tmp/signed") + client.restart() + assert not client.execute("test -f /var/tmp/signed") + result = client.execute("cloud-init status --format=json") + assert result.failed + assert "Failed decrypting user data" in result.stdout + + +@pytest.fixture +def lxd_pgp_client(session_cloud: IntegrationCloud, request): + user_data_fixture, image_id_fixture = request.param + user_data = request.getfixturevalue(user_data_fixture) + launch_kwargs = { + "execute_via_ssh": False, + "username": "root", + } + if image_id_fixture: + launch_kwargs["image_id"] = request.getfixturevalue(image_id_fixture) + + with session_cloud.launch( + user_data=user_data, launch_kwargs=launch_kwargs + ) as client: + yield client + + +@pytest.mark.skipif( + PLATFORM not in ["lxd_container", "lxd_vm"], + reason=( + "failed user data means no ssh or 'ubuntu' user creation, " + "so we need special LXD options" + ), +) +@pytest.mark.parametrize( + "lxd_pgp_client", + [ + pytest.param( + ("encrypted_userdata", "valid_pub_key_image"), + id="encrypted_with_no_priv_key", + ), + pytest.param( + ("signed_userdata", "valid_priv_key_image"), + id="signed_with_no_pub_key", + ), + pytest.param( + ("signed_and_encrypted_userdata", None), + id="signed_and_encrypted_with_no_keys", + ), + ], + indirect=True, +) +def test_unparseable_userdata( + lxd_pgp_client: IntegrationInstance, +): + result = lxd_pgp_client.execute("cloud-init status --format=json") + assert result.failed + assert "Failed decrypting user data" in result.stdout + + +@pytest.mark.user_data(USER_DATA.format("no")) +def test_pgp_required(client: IntegrationInstance): + client.write_to_file( + "/etc/cloud/cloud.cfg.d/99_pgp.cfg", "user_data:\n require_pgp: true" + ) + client.execute("cloud-init clean --logs") + client.restart() + + result = client.execute("cloud-init status --format=json") + assert result.failed + assert ( + "'require_pgp' was set true in cloud-init's base configuration, but " + "content type is text/cloud-config" + ) in result.stdout diff --git a/tests/unittests/cloudinit/test_user_data.py b/tests/unittests/cloudinit/test_user_data.py new file mode 100644 index 00000000000..424b4b051cf --- /dev/null +++ b/tests/unittests/cloudinit/test_user_data.py @@ -0,0 +1,129 @@ +"""These tests are for code in the cloudinit.user_data module. + +These are NOT general tests for user data processing. +""" + +from email.mime.base import MIMEBase + +import pytest + +from cloudinit import subp, user_data +from tests.unittests.util import gzip_text + + +def count_messages(root): + return sum(not user_data.is_skippable(m) for m in root.walk()) + + +class TestMessageFromString: + def test_unicode_not_messed_up(self): + roundtripped = user_data.message_from_string("\n").as_string() + assert "\x00" not in roundtripped + + +class TestUDProcess: + @pytest.mark.parametrize( + "msg", + [ + pytest.param("#cloud-config\napt_update: True\n", id="str"), + pytest.param(b"#cloud-config\napt_update: True\n", id="bytes"), + pytest.param( + gzip_text("#cloud-config\napt_update: True\n"), + id="compressed", + ), + ], + ) + def test_type_in_userdata(self, msg): + ud_proc = user_data.UserDataProcessor({}) + message = ud_proc.process(msg) + assert count_messages(message) + + +class TestConvertString: + @pytest.mark.parametrize( + "blob,decode", + [ + pytest.param("hi mom", False, id="str"), + pytest.param(b"#!/bin/bash\necho \xc3\x84\n", True, id="bytes"), + pytest.param(b"\x32\x32", True, id="binary"), + ], + ) + def test_mime_conversion_preserves_content(self, blob, decode): + """Ensure for each type of input, the content is preserved.""" + assert ( + user_data.convert_string(blob).get_payload(decode=decode) == blob + ) + + def test_handle_mime_parts(self): + """Mime parts are properly returned as a mime message.""" + message = MIMEBase("text", "plain") + message.set_payload("Just text") + msg = user_data.convert_string(str(message)) + assert "Just text" == msg.get_payload(decode=False) + + +GOOD_MESSAGE = """\ +-----BEGIN PGP MESSAGE----- + +Pass:AMockWillDecryptMeIntoACloudConfig +-----END PGP MESSAGE----- +""" + +CLOUD_CONFIG = """\ +#cloud-config +password: password +""" + +BAD_MESSAGE = """\ +-----BEGIN PGP MESSAGE----- + +Fail:AMockWillFailToDecryptMe +-----END PGP MESSAGE----- +""" + + +def my_subp(*args, **kwargs): + if args[0][0] == "gpg": + if "Pass:" in kwargs["data"]: + return subp.SubpResult(CLOUD_CONFIG, "") + elif "Fail:" in kwargs["data"]: + raise subp.ProcessExecutionError( + "", "gpg: public key decryption failed", 2, args[0] + ) + return subp.SubpResult("", "") + + +class TestPgpData: + def test_pgp_decryption(self, mocker): + mocker.patch("cloudinit.subp.subp", side_effect=my_subp) + ud_proc = user_data.UserDataProcessor({}) + message = ud_proc.process(GOOD_MESSAGE) + parts = [p for p in message.walk() if not user_data.is_skippable(p)] + assert len(parts) == 1 + part = parts[0] + assert part.get_payload() == CLOUD_CONFIG + + def test_pgp_decryption_failure(self, mocker): + mocker.patch("cloudinit.subp.subp", side_effect=my_subp) + ud_proc = user_data.UserDataProcessor({}) + with pytest.raises( + RuntimeError, match="payload of type text/x-pgp-armored" + ): + ud_proc.process(BAD_MESSAGE) + + def test_pgp_required(self, mocker): + mocker.patch("cloudinit.subp.subp", side_effect=my_subp) + ud_proc = user_data.UserDataProcessor({}) + message = ud_proc.process(GOOD_MESSAGE, require_pgp=True) + parts = [p for p in message.walk() if not user_data.is_skippable(p)] + assert len(parts) == 1 + part = parts[0] + assert part.get_payload() == CLOUD_CONFIG + + def test_pgp_required_with_no_pgp_message(self, mocker): + mocker.patch("cloudinit.subp.subp", side_effect=my_subp) + ud_proc = user_data.UserDataProcessor({}) + with pytest.raises( + RuntimeError, match="content type is text/cloud-config" + ): + ud_proc.process(CLOUD_CONFIG, require_pgp=True) diff --git a/tests/unittests/test_data.py b/tests/unittests/test_data.py index 7621c5f6c80..be6721d7416 100644 --- a/tests/unittests/test_data.py +++ b/tests/unittests/test_data.py @@ -2,14 +2,12 @@ """Tests for handling of userdata within cloud init.""" -import gzip import logging import os from email import encoders from email.mime.application import MIMEApplication from email.mime.base import MIMEBase from email.mime.multipart import MIMEMultipart -from io import BytesIO from pathlib import Path from unittest import mock @@ -19,35 +17,14 @@ from cloudinit import handlers from cloudinit import helpers as c_helpers -from cloudinit import safeyaml, stages -from cloudinit import user_data as ud -from cloudinit import util +from cloudinit import safeyaml, stages, util from cloudinit.config.modules import Modules from cloudinit.settings import DEFAULT_RUN_DIR, PER_INSTANCE -from tests.unittests import helpers -from tests.unittests.util import FakeDataSource +from tests.unittests.util import FakeDataSource, gzip_text MPATH = "cloudinit.stages" -def count_messages(root): - am = 0 - for m in root.walk(): - if ud.is_skippable(m): - continue - am += 1 - return am - - -def gzip_text(text): - contents = BytesIO() - f = gzip.GzipFile(fileobj=contents, mode="wb") - f.write(util.encode_text(text)) - f.flush() - f.close() - return contents.getvalue() - - @pytest.fixture(scope="function") def init_tmp(request, tmpdir): ci = stages.Init() @@ -772,53 +749,6 @@ def test_include_bad_url_no_fail( assert cc.get("included") is True -class TestUDProcess(helpers.ResourceUsingTestCase): - def test_bytes_in_userdata(self): - msg = b"#cloud-config\napt_update: True\n" - ud_proc = ud.UserDataProcessor(self.getCloudPaths()) - message = ud_proc.process(msg) - self.assertTrue(count_messages(message) == 1) - - def test_string_in_userdata(self): - msg = "#cloud-config\napt_update: True\n" - - ud_proc = ud.UserDataProcessor(self.getCloudPaths()) - message = ud_proc.process(msg) - self.assertTrue(count_messages(message) == 1) - - def test_compressed_in_userdata(self): - msg = gzip_text("#cloud-config\napt_update: True\n") - - ud_proc = ud.UserDataProcessor(self.getCloudPaths()) - message = ud_proc.process(msg) - self.assertTrue(count_messages(message) == 1) - - -class TestConvertString(helpers.TestCase): - def test_handles_binary_non_utf8_decodable(self): - """Printable unicode (not utf8-decodable) is safely converted.""" - blob = b"#!/bin/bash\necho \xc3\x84\n" - msg = ud.convert_string(blob) - self.assertEqual(blob, msg.get_payload(decode=True)) - - def test_handles_binary_utf8_decodable(self): - blob = b"\x32\x32" - msg = ud.convert_string(blob) - self.assertEqual(blob, msg.get_payload(decode=True)) - - def test_handle_headers(self): - text = "hi mom" - msg = ud.convert_string(text) - self.assertEqual(text, msg.get_payload(decode=False)) - - def test_handle_mime_parts(self): - """Mime parts are properly returned as a mime message.""" - message = MIMEBase("text", "plain") - message.set_payload("Just text") - msg = ud.convert_string(str(message)) - self.assertEqual("Just text", msg.get_payload(decode=False)) - - class TestFetchBaseConfig: @pytest.fixture(autouse=True) def mocks(self, mocker): diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py index 221e21de5c2..3b80e7a0e2e 100644 --- a/tests/unittests/test_util.py +++ b/tests/unittests/test_util.py @@ -2451,12 +2451,6 @@ def test_given_log_level_used(self): self.assertEqual((log_level, mock.ANY), logger.log.call_args[0]) -class TestMessageFromString(helpers.TestCase): - def test_unicode_not_messed_up(self): - roundtripped = util.message_from_string("\n").as_string() - self.assertNotIn("\x00", roundtripped) - - class TestReadOptionalSeed: @pytest.mark.parametrize( "seed_dir,expected_fill,retval", diff --git a/tests/unittests/util.py b/tests/unittests/util.py index 02aa6b1ac97..c7ce1fe407c 100644 --- a/tests/unittests/util.py +++ b/tests/unittests/util.py @@ -1,8 +1,10 @@ # This file is part of cloud-init. See LICENSE file for license information. +import gzip +from io import BytesIO from typing import Optional, Type from unittest import mock -from cloudinit import cloud, distros, helpers +from cloudinit import cloud, distros, helpers, util from cloudinit.config import Config from cloudinit.net.dhcp import IscDhclient from cloudinit.sources import DataSource, DataSourceHostname @@ -71,6 +73,15 @@ class concreteCls(abclass): return type("DummyConcrete" + abclass.__name__, (concreteCls,), {}) +def gzip_text(text): + contents = BytesIO() + f = gzip.GzipFile(fileobj=contents, mode="wb") + f.write(util.encode_text(text)) + f.flush() + f.close() + return contents.getvalue() + + class MockDistro(distros.Distro): # MockDistro is here to test base Distro class implementations def __init__(self, name="testingdistro", cfg=None, paths=None): From 8f64f9db832f945d758b9b0c71ab12c2d094fd9e Mon Sep 17 00:00:00 2001 From: James Falcon Date: Thu, 8 Aug 2024 15:46:24 -0500 Subject: [PATCH 05/13] spelling lint --- doc/rtd/spelling_word_list.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/rtd/spelling_word_list.txt b/doc/rtd/spelling_word_list.txt index 5f4783af65b..a19cc46b3fe 100644 --- a/doc/rtd/spelling_word_list.txt +++ b/doc/rtd/spelling_word_list.txt @@ -167,6 +167,7 @@ passno passthrough passw pem +pgp pid pipelining pki From 168e346a77138ba502caf761551f3f2bb6cd60c2 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Wed, 14 Aug 2024 23:15:59 -0500 Subject: [PATCH 06/13] comments --- cloudinit/gpg.py | 15 +++++++-- cloudinit/sources/__init__.py | 18 +++++----- cloudinit/stages.py | 2 +- cloudinit/user_data.py | 18 ++++++---- doc/rtd/explanation/format.rst | 2 +- doc/rtd/howto/pgp.rst | 9 +++-- doc/rtd/reference/base_config_reference.rst | 9 ++--- tests/integration_tests/userdata/test_pgp.py | 35 +++++++++++++++++--- tests/unittests/cloudinit/test_user_data.py | 4 +-- 9 files changed, 76 insertions(+), 36 deletions(-) diff --git a/cloudinit/gpg.py b/cloudinit/gpg.py index dccd6a84e5f..e1d3f658b1a 100644 --- a/cloudinit/gpg.py +++ b/cloudinit/gpg.py @@ -23,6 +23,10 @@ HOME = "GNUPGHOME" +class GpgVerificationError(Exception): + """GpgVerificationError is raised when a signature verification fails.""" + + class GPG: def __init__(self): self.gpg_started = False @@ -87,7 +91,7 @@ def import_key(self, key: pathlib.Path) -> None: except subp.ProcessExecutionError as error: LOG.warning("Failed to import key %s: %s", key, error) - def decrypt(self, data: str) -> str: + def decrypt(self, data: str, *, require_signature=False) -> str: """Process data using gpg. This can be used to decrypt encrypted data, verify signed data, @@ -97,14 +101,19 @@ def decrypt(self, data: str) -> str: :return: decrypted data :raises: ProcessExecutionError if gpg fails to decrypt data """ - return subp.subp( + result = subp.subp( [ "gpg", "--decrypt", ], data=data, update_env=self.env, - ).stdout + ) + if require_signature and "gpg: Good signature" not in result.stderr: + raise GpgVerificationError( + "Signature verification required, but no signature found" + ) + return result.stdout def dearmor(self, key: str) -> str: """Dearmor gpg key, dearmored key gets returned diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index 3b79a47cb7e..757de4140db 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -646,34 +646,34 @@ def get_url_params(self): return URLParams(max_wait, timeout, retries, sec_between_retries) def get_userdata(self, apply_filter=False): - require_pgp = self.sys_cfg.get("user_data", {}).get( - "require_pgp", False + require_signature = self.sys_cfg.get("user_data", {}).get( + "require_signature", False ) if self.userdata is None: self.userdata = self.ud_proc.process( - self.get_userdata_raw(), require_pgp + self.get_userdata_raw(), require_signature ) if apply_filter: return self._filter_xdata(self.userdata) return self.userdata def get_vendordata(self): - require_pgp = self.sys_cfg.get("vendor_data", {}).get( - "require_pgp", False + require_signature = self.sys_cfg.get("vendor_data", {}).get( + "require_signature", False ) if self.vendordata is None: self.vendordata = self.ud_proc.process( - self.get_vendordata_raw(), require_pgp + self.get_vendordata_raw(), require_signature ) return self.vendordata def get_vendordata2(self): - require_pgp = self.sys_cfg.get("vendor_data2", {}).get( - "require_pgp", False + require_signature = self.sys_cfg.get("vendor_data2", {}).get( + "require_signature", False ) if self.vendordata2 is None: self.vendordata2 = self.ud_proc.process( - self.get_vendordata2_raw(), require_pgp + self.get_vendordata2_raw(), require_signature ) return self.vendordata2 diff --git a/cloudinit/stages.py b/cloudinit/stages.py index 1160ba28fde..33325bb678c 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -799,7 +799,7 @@ def _consume_userdata_if_enabled(self, frequency: str) -> None: Base config can have a definition like: user_data: enabled: false - require_pgp: true + require_signature: true or a deprecated `allow_userdata` key. Parse them and maybe consume userdata accordingly. diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index 48fef79467c..f3a9e78789c 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -85,20 +85,22 @@ def __init__(self, paths): self.paths = paths self.ssl_details = util.fetch_ssl_details(paths) - def process(self, blob, require_pgp=False): + def process(self, blob, require_signature=False): accumulating_msg = MIMEMultipart() if isinstance(blob, list): for b in blob: self._process_msg( - convert_string(b), accumulating_msg, require_pgp + convert_string(b), accumulating_msg, require_signature ) else: self._process_msg( - convert_string(blob), accumulating_msg, require_pgp + convert_string(blob), accumulating_msg, require_signature ) return accumulating_msg - def _process_msg(self, base_msg: Message, append_msg, require_pgp=False): + def _process_msg( + self, base_msg: Message, append_msg, require_signature=False + ): def find_ctype(payload): return handlers.type_from_starts_with(payload) @@ -118,9 +120,9 @@ def find_ctype(payload): if ctype in TYPE_NEEDED + ["text/x-shellscript"]: ctype = find_ctype(payload) or ctype - if require_pgp and ctype != ENCRYPT_TYPE: + if require_signature and ctype != ENCRYPT_TYPE: error_message = ( - "'require_pgp' was set true in cloud-init's base " + "'require_signature' was set true in cloud-init's base " f"configuration, but content type is {ctype}." ) raise RuntimeError(error_message) @@ -152,7 +154,9 @@ def find_ctype(payload): for key_path in keys_dir.iterdir(): gpg_context.import_key(key_path) try: - payload = gpg_context.decrypt(payload) + payload = gpg_context.decrypt( + payload, require_signature=require_signature + ) except subp.ProcessExecutionError as e: raise RuntimeError( "Failed decrypting user data payload of type " diff --git a/doc/rtd/explanation/format.rst b/doc/rtd/explanation/format.rst index 32431d12ad4..8c4eb35ba7b 100644 --- a/doc/rtd/explanation/format.rst +++ b/doc/rtd/explanation/format.rst @@ -194,7 +194,7 @@ message only. To do so, the following .. code-block:: yaml user_data: - require_pgp: true + require_signature: true For a detailed guide to creating the PGP message and image necessary for this diff --git a/doc/rtd/howto/pgp.rst b/doc/rtd/howto/pgp.rst index e0e2c001f08..d5f999790ea 100644 --- a/doc/rtd/howto/pgp.rst +++ b/doc/rtd/howto/pgp.rst @@ -93,8 +93,8 @@ cloud-config file: Save this file to your working directory as `cloud-config.yaml`. -Encrypt the user data using the public key of the encrypting user and the -private key of the signing user: +Encrypt the user data using the public key of the encrypting user and +sign it using the private key of the signing user: .. code-block:: bash @@ -134,8 +134,7 @@ While it is more steps to export the keys in this way as opposed to using the existing key ring in the snapshot, we do this for a few reasons: * Users may not want these keys in any key ring by default on a new instance -* Keys may be exported from any system without needing to be concerned with - the implementation details of the gpg implementation +* Exporting keys is easier than copying key rings Note that on launch, cloud-init will import there keys into a temporary key ring that is removed after the user data is processed. The default @@ -151,7 +150,7 @@ require that cloud-init only process PGP messages. To do so, create a file .. code-block:: text user_data: - require_pgp: true + require_signature: true Retrieve our encrypted and signed user data =========================================== diff --git a/doc/rtd/reference/base_config_reference.rst b/doc/rtd/reference/base_config_reference.rst index 760a5266acc..7834c6965b9 100644 --- a/doc/rtd/reference/base_config_reference.rst +++ b/doc/rtd/reference/base_config_reference.rst @@ -267,8 +267,9 @@ Format is a dict with the following keys: * ``enabled``: A boolean value to enable or disable the use of user data. One use case is to prevent users from accidentally breaking closed appliances. Default: ``true``. -* ``require_pgp``: A boolean indicating whether to require PGP verification - of the user data. Default: ``false``. This should be true if +* ``require_signature``: A boolean indicating whether to require a PGP + signed message for user data. Default: ``false``. + This should be true if using :ref:`signed user data`. ``vendor_data``/``vendor_data2`` @@ -281,8 +282,8 @@ Format is a dict with the following keys: * ``enabled``: A boolean indicating whether to enable or disable the vendor data. Default: ``true``. -* ``require_pgp``: A boolean indicating whether to require PGP verification - of the vendor data. Default: ``false``. +* ``require_signature``: A boolean indicating whether to require a PGP + signed message for vendor data. Default: ``false``. * ``prefix``: A path to a binary to prefix to any executed scripts. An example of usage would be to prefix a script with ``strace`` to debug a script. diff --git a/tests/integration_tests/userdata/test_pgp.py b/tests/integration_tests/userdata/test_pgp.py index a5d6e893c89..85c5fae8daf 100644 --- a/tests/integration_tests/userdata/test_pgp.py +++ b/tests/integration_tests/userdata/test_pgp.py @@ -316,9 +316,10 @@ def test_unparseable_userdata( @pytest.mark.user_data(USER_DATA.format("no")) -def test_pgp_required(client: IntegrationInstance): +def test_signature_required(client: IntegrationInstance): client.write_to_file( - "/etc/cloud/cloud.cfg.d/99_pgp.cfg", "user_data:\n require_pgp: true" + "/etc/cloud/cloud.cfg.d/99_pgp.cfg", + "user_data:\n require_signature: true", ) client.execute("cloud-init clean --logs") client.restart() @@ -326,6 +327,32 @@ def test_pgp_required(client: IntegrationInstance): result = client.execute("cloud-init status --format=json") assert result.failed assert ( - "'require_pgp' was set true in cloud-init's base configuration, but " - "content type is text/cloud-config" + "'require_signature' was set true in cloud-init's base configuration, " + "but content type is text/cloud-config" ) in result.stdout + + +@pytest.mark.parametrize( + "pgp_client", [("encrypted_userdata", "valid_keys_image")], indirect=True +) +def test_encrypted_message_but_required_signature( + pgp_client: IntegrationInstance, +): + """Ensure fail if we require signature but only have encrypted message.""" + client = pgp_client + assert client.execute("test -f /var/tmp/encrypted") + verify_clean_boot(client) + + client.write_to_file( + "/etc/cloud/cloud.cfg.d/99_pgp.cfg", + "user_data:\n require_signature: true", + ) + client.execute("cloud-init clean --logs") + client.restart() + + result = client.execute("cloud-init status --format=json") + assert result.failed + assert ( + "Signature verification required, but no signature found" + in result.stdout + ) diff --git a/tests/unittests/cloudinit/test_user_data.py b/tests/unittests/cloudinit/test_user_data.py index 424b4b051cf..1305f0679e8 100644 --- a/tests/unittests/cloudinit/test_user_data.py +++ b/tests/unittests/cloudinit/test_user_data.py @@ -114,7 +114,7 @@ def test_pgp_decryption_failure(self, mocker): def test_pgp_required(self, mocker): mocker.patch("cloudinit.subp.subp", side_effect=my_subp) ud_proc = user_data.UserDataProcessor({}) - message = ud_proc.process(GOOD_MESSAGE, require_pgp=True) + message = ud_proc.process(GOOD_MESSAGE, require_signature=True) parts = [p for p in message.walk() if not user_data.is_skippable(p)] assert len(parts) == 1 part = parts[0] @@ -126,4 +126,4 @@ def test_pgp_required_with_no_pgp_message(self, mocker): with pytest.raises( RuntimeError, match="content type is text/cloud-config" ): - ud_proc.process(CLOUD_CONFIG, require_pgp=True) + ud_proc.process(CLOUD_CONFIG, require_signature=True) From 4e2726a1f6856b80b85b89cde0e2b88b8da18f99 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Wed, 14 Aug 2024 23:27:19 -0500 Subject: [PATCH 07/13] unit test failure and lint --- tests/integration_tests/userdata/test_pgp.py | 1 + tests/unittests/cloudinit/test_user_data.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration_tests/userdata/test_pgp.py b/tests/integration_tests/userdata/test_pgp.py index 85c5fae8daf..713dc220826 100644 --- a/tests/integration_tests/userdata/test_pgp.py +++ b/tests/integration_tests/userdata/test_pgp.py @@ -1,4 +1,5 @@ """Test PGP signed and encrypted userdata.""" + import pytest from cloudinit import subp diff --git a/tests/unittests/cloudinit/test_user_data.py b/tests/unittests/cloudinit/test_user_data.py index 1305f0679e8..2aa4333b5c6 100644 --- a/tests/unittests/cloudinit/test_user_data.py +++ b/tests/unittests/cloudinit/test_user_data.py @@ -85,7 +85,7 @@ def test_handle_mime_parts(self): def my_subp(*args, **kwargs): if args[0][0] == "gpg": if "Pass:" in kwargs["data"]: - return subp.SubpResult(CLOUD_CONFIG, "") + return subp.SubpResult(CLOUD_CONFIG, "gpg: Good signature") elif "Fail:" in kwargs["data"]: raise subp.ProcessExecutionError( "", "gpg: public key decryption failed", 2, args[0] From e92beee5d4661387375e1ec6f4eb5a0bdbdceaa3 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Wed, 18 Sep 2024 15:03:52 -0500 Subject: [PATCH 08/13] stderr bad --- cloudinit/gpg.py | 17 +++++++++++++---- tests/integration_tests/userdata/test_pgp.py | 5 +---- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/cloudinit/gpg.py b/cloudinit/gpg.py index e1d3f658b1a..792d39c4895 100644 --- a/cloudinit/gpg.py +++ b/cloudinit/gpg.py @@ -101,6 +101,19 @@ def decrypt(self, data: str, *, require_signature=False) -> str: :return: decrypted data :raises: ProcessExecutionError if gpg fails to decrypt data """ + if require_signature: + try: + subp.subp( + ["gpg", "--verify"], + data=data, + update_env=self.env, + ) + except subp.ProcessExecutionError as e: + if e.exit_code == 2: + raise GpgVerificationError( + "Signature verification failed" + ) from e + raise result = subp.subp( [ "gpg", @@ -109,10 +122,6 @@ def decrypt(self, data: str, *, require_signature=False) -> str: data=data, update_env=self.env, ) - if require_signature and "gpg: Good signature" not in result.stderr: - raise GpgVerificationError( - "Signature verification required, but no signature found" - ) return result.stdout def dearmor(self, key: str) -> str: diff --git a/tests/integration_tests/userdata/test_pgp.py b/tests/integration_tests/userdata/test_pgp.py index 713dc220826..c14529468e6 100644 --- a/tests/integration_tests/userdata/test_pgp.py +++ b/tests/integration_tests/userdata/test_pgp.py @@ -353,7 +353,4 @@ def test_encrypted_message_but_required_signature( result = client.execute("cloud-init status --format=json") assert result.failed - assert ( - "Signature verification required, but no signature found" - in result.stdout - ) + assert "Signature verification failed" in result.stdout From 126126d76e766c9877a626621ae3bad91fec9325 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Wed, 9 Oct 2024 16:18:49 -0500 Subject: [PATCH 09/13] comments --- cloudinit/sources/__init__.py | 24 ++++++++++----------- cloudinit/user_data.py | 6 +----- tests/unittests/cloudinit/test_user_data.py | 3 ++- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index 757de4140db..07fd1ab7705 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -646,34 +646,34 @@ def get_url_params(self): return URLParams(max_wait, timeout, retries, sec_between_retries) def get_userdata(self, apply_filter=False): - require_signature = self.sys_cfg.get("user_data", {}).get( - "require_signature", False - ) if self.userdata is None: self.userdata = self.ud_proc.process( - self.get_userdata_raw(), require_signature + self.get_userdata_raw(), + require_signature=self.sys_cfg.get("user_data", {}).get( + "require_signature", False + ), ) if apply_filter: return self._filter_xdata(self.userdata) return self.userdata def get_vendordata(self): - require_signature = self.sys_cfg.get("vendor_data", {}).get( - "require_signature", False - ) if self.vendordata is None: self.vendordata = self.ud_proc.process( - self.get_vendordata_raw(), require_signature + self.get_vendordata_raw(), + require_signature=self.sys_cfg.get("vendor_data", {}).get( + "require_signature", False + ), ) return self.vendordata def get_vendordata2(self): - require_signature = self.sys_cfg.get("vendor_data2", {}).get( - "require_signature", False - ) if self.vendordata2 is None: self.vendordata2 = self.ud_proc.process( - self.get_vendordata2_raw(), require_signature + self.get_vendordata2_raw(), + require_signature=self.sys_cfg.get("vendor_data2", {}).get( + "require_signature", False + ), ) return self.vendordata2 diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index f3a9e78789c..51392e97f37 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -403,10 +403,6 @@ def is_skippable(part): return False -def message_from_string(string) -> Message: - return email.message_from_string(string) - - def convert_string( raw_data: Union[str, bytes], content_type=NOT_MULTIPART_TYPE ) -> Message: @@ -430,7 +426,7 @@ def create_binmsg(data, content_type): bdata = raw_data bdata = util.decomp_gzip(bdata, decode=False) if b"mime-version:" in bdata[0:4096].lower(): - msg = message_from_string(bdata.decode("utf-8")) + msg = email.message_from_string(bdata.decode("utf-8")) else: msg = create_binmsg(bdata, content_type) diff --git a/tests/unittests/cloudinit/test_user_data.py b/tests/unittests/cloudinit/test_user_data.py index 2aa4333b5c6..ad95fc2a1b0 100644 --- a/tests/unittests/cloudinit/test_user_data.py +++ b/tests/unittests/cloudinit/test_user_data.py @@ -3,6 +3,7 @@ These are NOT general tests for user data processing. """ +import email from email.mime.base import MIMEBase import pytest @@ -17,7 +18,7 @@ def count_messages(root): class TestMessageFromString: def test_unicode_not_messed_up(self): - roundtripped = user_data.message_from_string("\n").as_string() + roundtripped = email.message_from_string("\n").as_string() assert "\x00" not in roundtripped From 7d4095474385f20bbb2f2ffaf7466e47cb38e37e Mon Sep 17 00:00:00 2001 From: James Falcon Date: Tue, 15 Oct 2024 00:21:12 -0500 Subject: [PATCH 10/13] remove mime processing, update tests and doc comment --- cloudinit/handlers/__init__.py | 1 - cloudinit/user_data.py | 162 ++++++++++--------- doc/rtd/howto/pgp.rst | 2 +- tests/integration_tests/userdata/test_pgp.py | 2 +- tests/unittests/cloudinit/test_user_data.py | 6 +- 5 files changed, 92 insertions(+), 81 deletions(-) diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index 6acfe1d623f..510768b5c1f 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -55,7 +55,6 @@ "text/x-shellscript-per-boot": "text/x-shellscript-per-boot", "text/x-shellscript-per-instance": "text/x-shellscript-per-instance", "text/x-shellscript-per-once": "text/x-shellscript-per-once", - "-----begin pgp message-----": "text/x-pgp-armored", } # Sorted longest first diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index 51392e97f37..a1e4e724659 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -17,7 +17,7 @@ from email.mime.multipart import MIMEMultipart from email.mime.nonmultipart import MIMENonMultipart from email.mime.text import MIMEText -from typing import Union +from typing import Union, cast from cloudinit import features, gpg, handlers, subp, util from cloudinit.settings import KEY_DIR @@ -42,7 +42,6 @@ ARCHIVE_UNDEF_BINARY_TYPE = "application/octet-stream" # This seems to hit most of the gzip possible content types. -ENCRYPT_TYPE = "text/x-pgp-armored" DECOMP_TYPES = [ "application/gzip", "application/gzip-compressed", @@ -53,7 +52,6 @@ "application/x-gzip", "application/x-gzip-compressed", ] -TRANSFORM_TYPES = [ENCRYPT_TYPE] + DECOMP_TYPES # Msg header used to track attachments ATTACHMENT_FIELD = "Number-Attachments" @@ -90,17 +88,17 @@ def process(self, blob, require_signature=False): if isinstance(blob, list): for b in blob: self._process_msg( - convert_string(b), accumulating_msg, require_signature + convert_string(b, require_signature=require_signature), + accumulating_msg, ) else: self._process_msg( - convert_string(blob), accumulating_msg, require_signature + convert_string(blob, require_signature=require_signature), + accumulating_msg, ) return accumulating_msg - def _process_msg( - self, base_msg: Message, append_msg, require_signature=False - ): + def _process_msg(self, base_msg, append_msg): def find_ctype(payload): return handlers.type_from_starts_with(payload) @@ -108,77 +106,49 @@ def find_ctype(payload): if is_skippable(part): continue + ctype = None + ctype_orig = part.get_content_type() payload = util.fully_decoded_payload(part) + was_compressed = False - ctype = part.get_content_type() + # When the message states it is of a gzipped content type ensure + # that we attempt to decode said payload so that the decompressed + # data can be examined (instead of the compressed data). + if ctype_orig in DECOMP_TYPES: + try: + payload = util.decomp_gzip(payload, quiet=False) + # At this point we don't know what the content-type is + # since we just decompressed it. + ctype_orig = None + was_compressed = True + except util.DecompressionError as e: + error_message = ( + "Failed decompressing payload from {} of" + " length {} due to: {}".format( + ctype_orig, len(payload), e + ) + ) + _handle_error(error_message, e) + continue + # Attempt to figure out the payloads content-type + if not ctype_orig: + ctype_orig = UNDEF_TYPE # There are known cases where mime-type text/x-shellscript included # non shell-script content that was user-data instead. It is safe # to check the true MIME type for x-shellscript type since all # shellscript payloads must have a #! header. The other MIME types # that cloud-init supports do not have the same guarantee. - if ctype in TYPE_NEEDED + ["text/x-shellscript"]: - ctype = find_ctype(payload) or ctype - - if require_signature and ctype != ENCRYPT_TYPE: - error_message = ( - "'require_signature' was set true in cloud-init's base " - f"configuration, but content type is {ctype}." - ) - raise RuntimeError(error_message) - - was_transformed = False - - # When the message states it is transformed ensure - # that we attempt to decode said payload so that the transformed - # data can be examined. - parent_ctype = None - if ctype in TRANSFORM_TYPES: - if ctype in DECOMP_TYPES: - try: - payload = util.decomp_gzip(payload, quiet=False) - except util.DecompressionError as e: - error_message = ( - "Failed decompressing payload from {} of" - " length {} due to: {}".format( - ctype, len(payload), e - ) - ) - _handle_error(error_message, e) - continue - elif ctype == ENCRYPT_TYPE and isinstance(payload, str): - with gpg.GPG() as gpg_context: - # Import all keys from the /etc/cloud/keys directory - keys_dir = pathlib.Path(KEY_DIR) - if keys_dir.is_dir(): - for key_path in keys_dir.iterdir(): - gpg_context.import_key(key_path) - try: - payload = gpg_context.decrypt( - payload, require_signature=require_signature - ) - except subp.ProcessExecutionError as e: - raise RuntimeError( - "Failed decrypting user data payload of type " - f"{ctype}. Ensure any necessary keys are " - f"present in {KEY_DIR}." - ) from e - else: - error_message = ( - f"Unknown content type {ctype} that" - " is marked as transformed" - ) - _handle_error(error_message) - continue - was_transformed = True - parent_ctype = ctype - ctype = find_ctype(payload) or parent_ctype + if ctype_orig in TYPE_NEEDED + ["text/x-shellscript"]: + ctype = find_ctype(payload) + if ctype is None: + ctype = ctype_orig # In the case where the data was compressed, we want to make sure # that we create a new message that contains the found content # type with the uncompressed content since later traversals of the # messages will expect a part not compressed. - if was_transformed: + if was_compressed: maintype, subtype = ctype.split("/", 1) n_part = MIMENonMultipart(maintype, subtype) n_part.set_payload(payload) @@ -187,13 +157,12 @@ def find_ctype(payload): # after decoding and decompression. if part.get_filename(): _set_filename(n_part, part.get_filename()) - if "Launch-Index" in part: - _replace_header( - n_part, "Launch-Index", str(part["Launch-Index"]) - ) + for h in ("Launch-Index",): + if h in part: + _replace_header(n_part, h, str(part[h])) part = n_part - if ctype != parent_ctype: + if ctype != ctype_orig: _replace_header(part, CONTENT_TYPE, ctype) if ctype in INCLUDE_TYPES: @@ -403,8 +372,35 @@ def is_skippable(part): return False +def decrypt_payload(payload: str, require_signature: bool) -> str: + """Decrypt/Verify a PGP message. + + :param payload: ASCII-armored GPG message to process + :param require_signature: Whether to require a signature + :return: decrypted data + """ + with gpg.GPG() as gpg_context: + # Import all keys from the /etc/cloud/keys directory + keys_dir = pathlib.Path(KEY_DIR) + if keys_dir.is_dir(): + for key_path in keys_dir.iterdir(): + gpg_context.import_key(key_path) + try: + return gpg_context.decrypt( + payload, require_signature=require_signature + ) + except subp.ProcessExecutionError as e: + raise RuntimeError( + "Failed decrypting user data payload. " + f"Ensure any necessary keys are present in {KEY_DIR}." + ) from e + + def convert_string( - raw_data: Union[str, bytes], content_type=NOT_MULTIPART_TYPE + raw_data: Union[str, bytes], + *, + require_signature: bool = False, + content_type=NOT_MULTIPART_TYPE, ) -> Message: """Convert the raw data into a mime message. @@ -424,10 +420,28 @@ def create_binmsg(data, content_type): bdata = raw_data.encode("utf-8") else: bdata = raw_data - bdata = util.decomp_gzip(bdata, decode=False) - if b"mime-version:" in bdata[0:4096].lower(): + # cast here because decode=False means return type is bytes + bdata = cast(bytes, util.decomp_gzip(bdata, decode=False)) + + # Decrypt/verify a PGP message. We do this here because a signed + # MIME part could be thwarted by other user data parts + if bdata[:27] == b"-----BEGIN PGP MESSAGE-----": + bdata = decrypt_payload( + bdata.decode("utf-8"), require_signature + ).encode("utf-8") + elif require_signature: + error_message = ( + "'require_signature' was set true in cloud-init's base " + "configuration, but content is not signed." + ) + raise RuntimeError(error_message) + + # Now ensure we have a MIME message + if b"mime-version:" in bdata[:4096].lower(): + # If we have a pre-existing MIME, use it msg = email.message_from_string(bdata.decode("utf-8")) else: + # Otherwise, convert to MIME msg = create_binmsg(bdata, content_type) return msg diff --git a/doc/rtd/howto/pgp.rst b/doc/rtd/howto/pgp.rst index d5f999790ea..44b928edcb5 100644 --- a/doc/rtd/howto/pgp.rst +++ b/doc/rtd/howto/pgp.rst @@ -136,7 +136,7 @@ using the existing key ring in the snapshot, we do this for a few reasons: * Users may not want these keys in any key ring by default on a new instance * Exporting keys is easier than copying key rings -Note that on launch, cloud-init will import there keys into a temporary +Note that on launch, cloud-init will import these keys into a temporary key ring that is removed after the user data is processed. The default key ring will not be read or modified. diff --git a/tests/integration_tests/userdata/test_pgp.py b/tests/integration_tests/userdata/test_pgp.py index c14529468e6..bb77bc2c572 100644 --- a/tests/integration_tests/userdata/test_pgp.py +++ b/tests/integration_tests/userdata/test_pgp.py @@ -329,7 +329,7 @@ def test_signature_required(client: IntegrationInstance): assert result.failed assert ( "'require_signature' was set true in cloud-init's base configuration, " - "but content type is text/cloud-config" + "but content is not signed" ) in result.stdout diff --git a/tests/unittests/cloudinit/test_user_data.py b/tests/unittests/cloudinit/test_user_data.py index ad95fc2a1b0..6d7b3f88fec 100644 --- a/tests/unittests/cloudinit/test_user_data.py +++ b/tests/unittests/cloudinit/test_user_data.py @@ -108,7 +108,7 @@ def test_pgp_decryption_failure(self, mocker): mocker.patch("cloudinit.subp.subp", side_effect=my_subp) ud_proc = user_data.UserDataProcessor({}) with pytest.raises( - RuntimeError, match="payload of type text/x-pgp-armored" + RuntimeError, match="Failed decrypting user data payload" ): ud_proc.process(BAD_MESSAGE) @@ -124,7 +124,5 @@ def test_pgp_required(self, mocker): def test_pgp_required_with_no_pgp_message(self, mocker): mocker.patch("cloudinit.subp.subp", side_effect=my_subp) ud_proc = user_data.UserDataProcessor({}) - with pytest.raises( - RuntimeError, match="content type is text/cloud-config" - ): + with pytest.raises(RuntimeError, match="content is not signed"): ud_proc.process(CLOUD_CONFIG, require_signature=True) From ee2a368a3ab887dc7c8be0a156ae3b25c53a07f7 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Tue, 15 Oct 2024 10:22:49 -0500 Subject: [PATCH 11/13] handle userdata list --- cloudinit/user_data.py | 59 +++++++++++++-------- tests/unittests/cloudinit/test_user_data.py | 9 ++++ 2 files changed, 46 insertions(+), 22 deletions(-) diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index a1e4e724659..e8dd031d893 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -88,12 +88,16 @@ def process(self, blob, require_signature=False): if isinstance(blob, list): for b in blob: self._process_msg( - convert_string(b, require_signature=require_signature), + convert_string( + b, is_part=True, require_signature=require_signature + ), accumulating_msg, ) else: self._process_msg( - convert_string(blob, require_signature=require_signature), + convert_string( + blob, is_part=False, require_signature=require_signature + ), accumulating_msg, ) return accumulating_msg @@ -396,11 +400,39 @@ def decrypt_payload(payload: str, require_signature: bool) -> str: ) from e +def handle_encrypted( + data: bytes, is_part: bool, require_signature: bool +) -> bytes: + # Decrypt/verify a PGP message. We do this here because a signed + # MIME part could be thwarted by other user data parts + if data[:27] == b"-----BEGIN PGP MESSAGE-----": + if is_part: + raise RuntimeError( + "PGP message must encompass entire user data or vendor data." + ) + return decrypt_payload(data.decode("utf-8"), require_signature).encode( + "utf-8" + ) + elif require_signature: + raise RuntimeError( + "'require_signature' was set true in cloud-init's base " + "configuration, but content is not signed." + ) + return data + + +def _create_binmsg(data): + maintype, subtype = NOT_MULTIPART_TYPE.split("/", 1) + msg = MIMEBase(maintype, subtype) + msg.set_payload(data) + return msg + + def convert_string( raw_data: Union[str, bytes], *, + is_part: bool = False, require_signature: bool = False, - content_type=NOT_MULTIPART_TYPE, ) -> Message: """Convert the raw data into a mime message. @@ -410,12 +442,6 @@ def convert_string( if not raw_data: raw_data = b"" - def create_binmsg(data, content_type): - maintype, subtype = content_type.split("/", 1) - msg = MIMEBase(maintype, subtype) - msg.set_payload(data) - return msg - if isinstance(raw_data, str): bdata = raw_data.encode("utf-8") else: @@ -423,18 +449,7 @@ def create_binmsg(data, content_type): # cast here because decode=False means return type is bytes bdata = cast(bytes, util.decomp_gzip(bdata, decode=False)) - # Decrypt/verify a PGP message. We do this here because a signed - # MIME part could be thwarted by other user data parts - if bdata[:27] == b"-----BEGIN PGP MESSAGE-----": - bdata = decrypt_payload( - bdata.decode("utf-8"), require_signature - ).encode("utf-8") - elif require_signature: - error_message = ( - "'require_signature' was set true in cloud-init's base " - "configuration, but content is not signed." - ) - raise RuntimeError(error_message) + bdata = handle_encrypted(bdata, is_part, require_signature) # Now ensure we have a MIME message if b"mime-version:" in bdata[:4096].lower(): @@ -442,6 +457,6 @@ def create_binmsg(data, content_type): msg = email.message_from_string(bdata.decode("utf-8")) else: # Otherwise, convert to MIME - msg = create_binmsg(bdata, content_type) + msg = _create_binmsg(bdata) return msg diff --git a/tests/unittests/cloudinit/test_user_data.py b/tests/unittests/cloudinit/test_user_data.py index 6d7b3f88fec..3f6a1c4b684 100644 --- a/tests/unittests/cloudinit/test_user_data.py +++ b/tests/unittests/cloudinit/test_user_data.py @@ -126,3 +126,12 @@ def test_pgp_required_with_no_pgp_message(self, mocker): ud_proc = user_data.UserDataProcessor({}) with pytest.raises(RuntimeError, match="content is not signed"): ud_proc.process(CLOUD_CONFIG, require_signature=True) + + def test_pgp_in_list_disallowed(self, mocker): + mocker.patch("cloudinit.subp.subp", side_effect=my_subp) + ud_proc = user_data.UserDataProcessor({}) + with pytest.raises( + RuntimeError, + match="PGP message must encompass entire user data or vendor data", + ): + ud_proc.process([GOOD_MESSAGE, GOOD_MESSAGE]) From 5c8743a319b4900470524956bec6c1f83767d62e Mon Sep 17 00:00:00 2001 From: James Falcon Date: Mon, 21 Oct 2024 08:41:48 -0500 Subject: [PATCH 12/13] unwrap before verify --- cloudinit/gpg.py | 26 +++++++++++++++++--- cloudinit/subp.py | 4 +-- tests/integration_tests/userdata/test_pgp.py | 19 ++++++++++---- 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/cloudinit/gpg.py b/cloudinit/gpg.py index 792d39c4895..496da753fe2 100644 --- a/cloudinit/gpg.py +++ b/cloudinit/gpg.py @@ -108,12 +108,30 @@ def decrypt(self, data: str, *, require_signature=False) -> str: data=data, update_env=self.env, ) - except subp.ProcessExecutionError as e: - if e.exit_code == 2: + except subp.ProcessExecutionError: + # If the message is signed then encrypted (the default), + # the message can't be verified until it's decrypted + try: + stdout, _ = subp.subp( + ["gpg", "--unwrap"], + data=data, + update_env=self.env, + decode=False, + ) + except subp.ProcessExecutionError as e: raise GpgVerificationError( - "Signature verification failed" + "Signature verification failed. Could not unwrap." + ) from e + try: + subp.subp( + ["gpg", "--verify"], + data=stdout, + update_env=self.env, + ) + except subp.ProcessExecutionError as e: + raise GpgVerificationError( + "Signature verification failed. Could not verify." ) from e - raise result = subp.subp( [ "gpg", diff --git a/cloudinit/subp.py b/cloudinit/subp.py index eebf009d4f0..6cc5cf88f15 100644 --- a/cloudinit/subp.py +++ b/cloudinit/subp.py @@ -7,7 +7,7 @@ import subprocess from errno import ENOEXEC from io import TextIOWrapper -from typing import List, Optional, Union +from typing import List, Literal, Optional, Union from cloudinit import performance @@ -170,7 +170,7 @@ def subp( capture=True, shell=False, logstring=False, - decode="replace", + decode: Literal[False, "strict", "ignore", "replace"] = "replace", update_env=None, cwd=None, timeout=None, diff --git a/tests/integration_tests/userdata/test_pgp.py b/tests/integration_tests/userdata/test_pgp.py index bb77bc2c572..57df0293ccb 100644 --- a/tests/integration_tests/userdata/test_pgp.py +++ b/tests/integration_tests/userdata/test_pgp.py @@ -1,5 +1,7 @@ """Test PGP signed and encrypted userdata.""" +from pathlib import Path + import pytest from cloudinit import subp @@ -16,12 +18,12 @@ @pytest.fixture(scope="module") -def gpg_dir(tmp_path_factory): +def gpg_dir(tmp_path_factory: pytest.TempPathFactory): yield tmp_path_factory.mktemp("gpg_dir") @pytest.fixture(scope="module") -def public_key(gpg_dir): +def public_key(gpg_dir: Path): subp.subp( [ "gpg", @@ -29,7 +31,6 @@ def public_key(gpg_dir): str(gpg_dir), "--quick-generate-key", "--batch", - # "loopback", "--passphrase", "", "signing_user", @@ -197,6 +198,14 @@ def _invalidate_key(client, key_path): ) def test_signed_and_encrypted(pgp_client: IntegrationInstance): client = pgp_client + + client.write_to_file( + "/etc/cloud/cloud.cfg.d/99_pgp.cfg", + "user_data:\n require_signature: true", + ) + client.execute("cloud-init clean --logs") + client.restart() + assert client.execute("test -f /var/tmp/signed_and_encrypted") verify_clean_boot(client) @@ -209,7 +218,7 @@ def test_signed_and_encrypted(pgp_client: IntegrationInstance): assert not client.execute("test -f /var/tmp/signed_and_encrypted") result = client.execute("cloud-init status --format=json") assert result.failed - assert "Failed decrypting user data" in result.stdout + assert "Signature verification failed. Could not verify." in result.stdout # Restore the public key, invalidate the private key, and ensure we fail client.execute("cp /var/tmp/pub_key /etc/cloud/keys/pub_key") @@ -221,7 +230,7 @@ def test_signed_and_encrypted(pgp_client: IntegrationInstance): assert not client.execute("test -f /var/tmp/signed_and_encrypted") result = client.execute("cloud-init status --format=json") assert result.failed - assert "Failed decrypting user data" in result.stdout + assert "Signature verification failed. Could not unwrap." in result.stdout @pytest.mark.parametrize( From 2d60e45aec93ec6c0786ffb2693a99f22df5d17f Mon Sep 17 00:00:00 2001 From: James Falcon Date: Mon, 21 Oct 2024 09:04:13 -0500 Subject: [PATCH 13/13] update docs --- doc/rtd/howto/pgp.rst | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/doc/rtd/howto/pgp.rst b/doc/rtd/howto/pgp.rst index 44b928edcb5..3bcf17c6054 100644 --- a/doc/rtd/howto/pgp.rst +++ b/doc/rtd/howto/pgp.rst @@ -127,6 +127,18 @@ Export the private key of the encrypting user: $ gpg --export-secret-keys encrypting_user > /etc/cloud/keys/encrypting_user.gpg +Remove key ring +--------------- + +.. note:: + This step is optional but recommended for a clean image. + +Remove the keyring that was generated upon creating our first key: + +.. code-block:: bash + + $ rm -r ~/.gnupg/ + Why export keys? ---------------- @@ -152,6 +164,15 @@ require that cloud-init only process PGP messages. To do so, create a file user_data: require_signature: true +Clean cloud-init +================ + +This is to ensure that cloud-init runs as if it were first boot: + +.. code-block:: bash + + $ cloud-init clean --logs + Retrieve our encrypted and signed user data ===========================================