Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Conditionally remove networkd online dependency on Ubuntu #5772

Merged
merged 8 commits into from
Oct 17, 2024
115 changes: 114 additions & 1 deletion cloudinit/cmd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import traceback
import logging
import yaml
from typing import Tuple, Callable
from typing import Optional, Tuple, Callable, Union

from cloudinit import netinfo
from cloudinit import signal_handler
Expand All @@ -34,11 +34,13 @@
from cloudinit import reporting
from cloudinit import atomic_helper
from cloudinit import lifecycle
from cloudinit import handlers
from cloudinit.log import log_util, loggers
from cloudinit.cmd.devel import read_cfg_paths
from cloudinit.config import cc_set_hostname
from cloudinit.config.modules import Modules
from cloudinit.config.schema import validate_cloudconfig_schema
from cloudinit.lifecycle import log_with_downgradable_level
from cloudinit.reporting import events
from cloudinit.settings import (
PER_INSTANCE,
Expand All @@ -47,6 +49,8 @@
CLOUD_CONFIG,
)

Reason = str

# Welcome message template
WELCOME_MSG_TPL = (
"Cloud-init v. {version} running '{action}' at "
Expand Down Expand Up @@ -319,6 +323,96 @@ def _should_bring_up_interfaces(init, args):
return not args.local


def _should_wait_via_user_data(
raw_config: Optional[Union[str, bytes]]
) -> Tuple[bool, Reason]:
"""Determine if our cloud-config requires us to wait

User data requires us to wait during cloud-init network phase if:
- We have user data that is anything other than cloud-config
- This can likely be further optimized in the future to include
other user data types
- cloud-config contains:
- bootcmd
- random_seed command
- mounts
- write_files with source
"""
if not raw_config:
return False, "no configuration found"

if (
handlers.type_from_starts_with(raw_config.strip()[:13])
!= "text/cloud-config"
):
return True, "non-cloud-config user data found"

try:
parsed_yaml = yaml.safe_load(raw_config)
except Exception as e:
log_with_downgradable_level(
TheRealFalcon marked this conversation as resolved.
Show resolved Hide resolved
logger=LOG,
version="24.4",
requested_level=logging.WARNING,
msg="Unexpected failure parsing userdata: %s",
args=e,
)
return True, "failed to parse user data as yaml"

# These all have the potential to require network access, so we should wait
if "write_files" in parsed_yaml:
for item in parsed_yaml["write_files"]:
source_dict = item.get("source") or {}
source_uri = source_dict.get("uri", "")
if source_uri and not (source_uri.startswith(("/", "file:"))):
return True, "write_files with source uri found"
return False, "write_files without source uri found"
if parsed_yaml.get("bootcmd"):
return True, "bootcmd found"
if parsed_yaml.get("random_seed", {}).get("command"):
return True, "random_seed command found"
if parsed_yaml.get("mounts"):
return True, "mounts found"
return False, "cloud-config does not contain network requiring elements"
TheRealFalcon marked this conversation as resolved.
Show resolved Hide resolved


def _should_wait_on_network(
datasource: Optional[sources.DataSource],
) -> Tuple[bool, Reason]:
"""Determine if we should wait on network connectivity for cloud-init.

We need to wait during the cloud-init network phase if:
- We have no datasource
- We have user data that may require network access
"""
if not datasource:
return True, "no datasource found"
user_should_wait, user_reason = _should_wait_via_user_data(
datasource.get_userdata_raw()
)
if user_should_wait:
return True, f"{user_reason} in user data"
vendor_should_wait, vendor_reason = _should_wait_via_user_data(
datasource.get_vendordata_raw()
)
if vendor_should_wait:
return True, f"{vendor_reason} in vendor data"
vendor2_should_wait, vendor2_reason = _should_wait_via_user_data(
datasource.get_vendordata2_raw()
)
if vendor2_should_wait:
return True, f"{vendor2_reason} in vendor data2"

return (
False,
(
f"user data: {user_reason}, "
f"vendor data: {vendor_reason}, "
f"vendor data2: {vendor2_reason}"
),
)


def main_init(name, args):
deps = [sources.DEP_FILESYSTEM, sources.DEP_NETWORK]
if args.local:
Expand Down Expand Up @@ -396,6 +490,9 @@ def main_init(name, args):
mode = sources.DSMODE_LOCAL if args.local else sources.DSMODE_NETWORK

if mode == sources.DSMODE_NETWORK:
if not os.path.exists(init.paths.get_runpath(".skip-network")):
LOG.debug("Will wait for network connectivity before continuing")
init.distro.wait_for_network()
blackboxsw marked this conversation as resolved.
Show resolved Hide resolved
existing = "trust"
sys.stderr.write("%s\n" % (netinfo.debug_info()))
else:
Expand Down Expand Up @@ -463,9 +560,25 @@ def main_init(name, args):
# dhcp clients to advertize this hostname to any DDNS services
# LP: #1746455.
_maybe_set_hostname(init, stage="local", retry_stage="network")

init.apply_network_config(bring_up=bring_up_interfaces)

if mode == sources.DSMODE_LOCAL:
should_wait, reason = _should_wait_on_network(init.datasource)
if should_wait:
LOG.debug(
"Network connectivity determined necessary for "
"cloud-init's network stage. Reason: %s",
reason,
)
else:
LOG.debug(
"Network connectivity determined unnecessary for "
"cloud-init's network stage. Reason: %s",
reason,
)
util.write_file(init.paths.get_runpath(".skip-network"), "")

if init.datasource.dsmode != mode:
LOG.debug(
"[%s] Exiting. datasource %s not in local mode.",
Expand Down
31 changes: 23 additions & 8 deletions cloudinit/distros/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,15 +349,16 @@ def dhcp_client(self) -> dhcp.DhcpClient:
raise dhcp.NoDHCPLeaseMissingDhclientError()

@property
def network_activator(self) -> Optional[Type[activators.NetworkActivator]]:
"""Return the configured network activator for this environment."""
def network_activator(self) -> Type[activators.NetworkActivator]:
"""Return the configured network activator for this environment.

:returns: The network activator class to use
:raises: NoActivatorException if no activator is found
"""
priority = util.get_cfg_by_path(
self._cfg, ("network", "activators"), None
)
try:
return activators.select_activator(priority=priority)
except activators.NoActivatorException:
return None
return activators.select_activator(priority=priority)
TheRealFalcon marked this conversation as resolved.
Show resolved Hide resolved

@property
def network_renderer(self) -> Renderer:
Expand Down Expand Up @@ -460,8 +461,9 @@ def apply_network_config(self, netconfig, bring_up=False) -> bool:
# Now try to bring them up
if bring_up:
LOG.debug("Bringing up newly configured network interfaces")
network_activator = self.network_activator
if not network_activator:
try:
network_activator = self.network_activator
except activators.NoActivatorException:
LOG.warning(
"No network activator found, not bringing up "
"network interfaces"
Expand Down Expand Up @@ -1574,6 +1576,19 @@ def device_part_info(devpath: str) -> tuple:
# name in /dev/
return diskdevpath, ptnum

def wait_for_network(self) -> None:
"""Ensure that cloud-init has network connectivity.

For most distros, this is a no-op as cloud-init's network service is
ordered in boot to start after network connectivity has been achieved.
As an optimization, distros may opt to order cloud-init's network
service immediately after cloud-init's local service, and only
require network connectivity if it has been deemed necessary.
This method is a hook for distros to implement this optimization.
It is called during cloud-init's network stage if it was determined
that network connectivity is necessary in cloud-init's network stage.
"""


def _apply_hostname_transformations_to_url(url: str, transformations: list):
"""
Expand Down
13 changes: 13 additions & 0 deletions cloudinit/distros/ubuntu.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,15 @@
# This file is part of cloud-init. See LICENSE file for license information.

import copy
import logging

from cloudinit.distros import PREFERRED_NTP_CLIENTS, debian
from cloudinit.distros.package_management.snap import Snap
from cloudinit.net import activators
from cloudinit.net.netplan import CLOUDINIT_NETPLAN_FILE

LOG = logging.getLogger(__name__)


class Distro(debian.Distro):
def __init__(self, name, cfg, paths):
Expand Down Expand Up @@ -49,3 +53,12 @@ def preferred_ntp_clients(self):
if not self._preferred_ntp_clients:
self._preferred_ntp_clients = copy.deepcopy(PREFERRED_NTP_CLIENTS)
return self._preferred_ntp_clients

def wait_for_network(self) -> None:
"""Ensure that cloud-init's network service has network connectivity"""
try:
self.network_activator.wait_for_network()
except activators.NoActivatorException:
LOG.error("Failed to wait for network. No network activator found")
except Exception as e:
LOG.error("Failed to wait for network: %s", e)
1 change: 1 addition & 0 deletions cloudinit/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,7 @@ def __init__(self, path_cfgs: dict, ds=None):
"vendor_scripts": "scripts/vendor",
"warnings": "warnings",
"hotplug.enabled": "hotplug.enabled",
".skip-network": ".skip-network",
}
# Set when a datasource becomes active
self.datasource = ds
Expand Down
65 changes: 42 additions & 23 deletions cloudinit/net/activators.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,9 @@
from typing import Callable, Dict, Iterable, List, Optional, Type, Union

from cloudinit import subp, util
from cloudinit.net.eni import available as eni_available
from cloudinit.net import eni, netplan, network_manager, networkd
from cloudinit.net.netops.iproute2 import Iproute2
from cloudinit.net.netplan import available as netplan_available
from cloudinit.net.network_manager import available as nm_available
from cloudinit.net.network_state import NetworkState
from cloudinit.net.networkd import available as networkd_available

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -88,6 +85,11 @@ def bring_up_all_interfaces(cls, network_state: NetworkState) -> bool:
[i["name"] for i in network_state.iter_interfaces()]
)

@staticmethod
def wait_for_network() -> None:
"""Wait for network to come up."""
raise NotImplementedError()


class IfUpDownActivator(NetworkActivator):
# Note that we're not overriding bring_up_interfaces to pass something
Expand All @@ -97,7 +99,7 @@ class IfUpDownActivator(NetworkActivator):
@staticmethod
def available(target: Optional[str] = None) -> bool:
"""Return true if ifupdown can be used on this system."""
return eni_available(target=target)
return eni.available(target=target)

@staticmethod
def bring_up_interface(device_name: str) -> bool:
Expand Down Expand Up @@ -149,7 +151,7 @@ class NetworkManagerActivator(NetworkActivator):
@staticmethod
def available(target=None) -> bool:
"""Return true if NetworkManager can be used on this system."""
return nm_available(target=target)
return network_manager.available(target=target)

@staticmethod
def bring_up_interface(device_name: str) -> bool:
Expand Down Expand Up @@ -215,7 +217,7 @@ class NetplanActivator(NetworkActivator):
@staticmethod
def available(target=None) -> bool:
"""Return true if netplan can be used on this system."""
return netplan_available(target=target)
return netplan.available(target=target)

@staticmethod
def bring_up_interface(device_name: str) -> bool:
Expand Down Expand Up @@ -269,12 +271,21 @@ def bring_down_interface(device_name: str) -> bool:
NetplanActivator.NETPLAN_CMD, "all", warn_on_stderr=False
)

@staticmethod
def wait_for_network() -> None:
"""On networkd systems, wait for systemd-networkd-wait-online"""
# At the moment, this is only supported using the networkd renderer.
if network_manager.available():
LOG.debug("NetworkManager is enabled, skipping networkd wait")
return
NetworkdActivator.wait_for_network()


class NetworkdActivator(NetworkActivator):
@staticmethod
def available(target=None) -> bool:
"""Return true if ifupdown can be used on this system."""
return networkd_available(target=target)
return networkd.available(target=target)

@staticmethod
def bring_up_interface(device_name: str) -> bool:
Expand All @@ -296,6 +307,13 @@ def bring_down_interface(device_name: str) -> bool:
partial(Iproute2.link_down, device_name)
)

@staticmethod
def wait_for_network() -> None:
"""Wait for systemd-networkd-wait-online."""
subp.subp(
["systemctl", "start", "systemd-networkd-wait-online.service"]
TheRealFalcon marked this conversation as resolved.
Show resolved Hide resolved
)


# This section is mostly copied and pasted from renderers.py. An abstract
# version to encompass both seems overkill at this point
Expand All @@ -318,35 +336,36 @@ def bring_down_interface(device_name: str) -> bool:

def search_activator(
priority: List[str], target: Union[str, None]
) -> List[Type[NetworkActivator]]:
) -> Optional[Type[NetworkActivator]]:
"""Returns the first available activator from the priority list or None."""
unknown = [i for i in priority if i not in DEFAULT_PRIORITY]
if unknown:
raise ValueError(
"Unknown activators provided in priority list: %s" % unknown
f"Unknown activators provided in priority list: {unknown}"
)
activator_classes = [NAME_TO_ACTIVATOR[name] for name in priority]
return [
activator_cls
for activator_cls in activator_classes
if activator_cls.available(target)
]
return next(
(
activator_cls
for activator_cls in activator_classes
if activator_cls.available(target)
),
None,
)
TheRealFalcon marked this conversation as resolved.
Show resolved Hide resolved


def select_activator(
priority: Optional[List[str]] = None, target: Optional[str] = None
) -> Type[NetworkActivator]:
if priority is None:
priority = DEFAULT_PRIORITY
found = search_activator(priority, target)
if not found:
tmsg = ""
if target and target != "/":
tmsg = " in target=%s" % target
selected = search_activator(priority, target)
if not selected:
tmsg = f" in target={target}" if target and target != "/" else ""
raise NoActivatorException(
"No available network activators found%s. Searched "
"through list: %s" % (tmsg, priority)
f"No available network activators found{tmsg}. "
f"Searched through list: {priority}"
)
selected = found[0]
LOG.debug(
"Using selected activator: %s from priority: %s", selected, priority
)
Expand Down
Loading