[antlir2][image_test] use static units for booted tests

Summary: Use as much static unit configuration as possible for booted tests. There is still a ton of drop-in settings required for things that can only be determined at runtime (including the test command, environment, etc) However, this does make the tests a bit easier to manually debug, since there is only one drop-in being generated instead of 3 Test Plan: ``` ❯ buck2 test fbcode//antlir/antlir2/testing/tests: fbcode//metalos/imaging_initrd/tests/broken-generator:broken-generator.antlir2 Buck UI: https://www.internalfb.com/buck2/12c0e77e-aed1-4419-94c6-dc12eb960529 Test UI: https://www.internalfb.com/intern/testinfra/testrun/2814749958879637 Note: Using experimental modern dice Network: Up: 0B Down: 0B Jobs completed: 141. Time elapsed: 24.7s. Tests finished: Pass 32. Fail 0. Fatal 0. Skip 0. Build failure 0 ``` Reviewed By: epilatow Differential Revision: D47886919 fbshipit-source-id: b274816ef291656ce503d8dce2d788d3857d251e
facebookincubator · Aug 22, 2023 · 76d6875 · 76d6875
1 parent f0b2d99
commit 76d6875
Show file tree

Hide file tree

Showing 7 changed files with 160 additions and 94 deletions.
diff --git a/antlir/antlir2/testing/image_test.bzl b/antlir/antlir2/testing/image_test.bzl
@@ -11,18 +11,31 @@ load("//antlir/antlir2/bzl/feature:defs.bzl", "feature")
 load("//antlir/antlir2/bzl/image:defs.bzl", "image")
 load("//antlir/bzl:build_defs.bzl", "add_test_framework_label", "buck_sh_test", "cpp_unittest", "python_unittest", "rust_unittest")
 load("//antlir/bzl:constants.bzl", "REPO_CFG")
+load("//antlir/bzl:systemd.bzl", "systemd")
 
 HIDE_TEST_LABELS = ["disabled", "test_is_invisible_to_testpilot"]
 
+def _default_list(maybe_value: list[str] | None, default: list[str]) -> list[str]:
+    if maybe_value == None:
+        return default
+    return maybe_value
+
 def _impl(ctx: AnalysisContext) -> list[Provider]:
+    if not ctx.attrs.boot and (ctx.attrs.boot_requires_units or ctx.attrs.boot_after_units):
+        fail("boot=False cannot be combined with boot_{requires,after}_units")
+
+    boot_requires_units = _default_list(ctx.attrs.boot_requires_units, default = ["sysinit.target"])
+    boot_after_units = _default_list(ctx.attrs.boot_after_units, default = ["sysinit.target", "basic.target"])
+
     mounts = ctx.actions.write_json("mounts.json", ctx.attrs.layer[LayerInfo].mounts)
 
     test_cmd = cmd_args(
         ctx.attrs.image_test[RunInfo],
         cmd_args(ctx.attrs.layer[LayerInfo].subvol_symlink, format = "--layer={}"),
         cmd_args(ctx.attrs.run_as_user, format = "--user={}"),
         cmd_args("--boot") if ctx.attrs.boot else cmd_args(),
-        cmd_args(ctx.attrs.boot_requires_units, format = "--requires-unit={}"),
+        cmd_args(boot_requires_units, format = "--requires-unit={}") if ctx.attrs.boot else cmd_args(),
+        cmd_args(boot_after_units, format = "--after-unit={}") if ctx.attrs.boot else cmd_args(),
         cmd_args(["{}={}".format(k, v) for k, v in ctx.attrs.test[ExternalRunnerTestInfo].env.items()], format = "--setenv={}"),
         cmd_args(mounts, format = "--mounts={}"),
         ctx.attrs.test[ExternalRunnerTestInfo].test_type,
@@ -66,10 +79,19 @@ _image_test = rule(
             default = False,
             doc = "boot the container with /init as pid1 before running the test",
         ),
-        "boot_requires_units": attrs.list(
-            attrs.string(),
-            default = [],
-            doc = "delay running the test until all of the given systemd units have started successfully",
+        "boot_after_units": attrs.option(
+            attrs.list(
+                attrs.string(),
+            ),
+            default = None,
+            doc = "Add an After= requirement on these units to the test",
+        ),
+        "boot_requires_units": attrs.option(
+            attrs.list(
+                attrs.string(),
+            ),
+            default = None,
+            doc = "Add a Requires= and After= requirement on these units to the test",
         ),
         "image_test": attrs.default_only(attrs.exec_dep(default = "//antlir/antlir2/testing/image_test:image-test")),
         "labels": attrs.list(attrs.string(), default = []),
@@ -92,7 +114,8 @@ def _implicit_image_test(
         run_as_user: str | None = None,
         labels: list[str] | None = None,
         boot: bool = False,
-        boot_requires_units: list[str] = [],
+        boot_requires_units: [list[str], None] = None,
+        boot_after_units: [list[str], None] = None,
         _add_outer_labels: list[str] = [],
         **kwargs):
     test_rule(
@@ -107,6 +130,19 @@ def _implicit_image_test(
     # @oss-disable
         # @oss-disable
 
+    if boot:
+        image.layer(
+            name = "{}--bootable-layer".format(name),
+            parent_layer = layer,
+            features = [
+                systemd.install_unit(
+                    "//antlir/antlir2/testing/image_test:antlir2_image_test.service",
+                    force = True,
+                ),
+            ],
+        )
+        layer = ":{}--bootable-layer".format(name)
+
     image_test(
         name = name,
         layer = layer,
@@ -115,6 +151,7 @@ def _implicit_image_test(
         labels = labels + [special_tags.enable_artifact_reporting],
         boot = boot,
         boot_requires_units = boot_requires_units,
+        boot_after_units = boot_after_units,
     )
 
 image_cpp_test = partial(

diff --git a/antlir/antlir2/testing/image_test/BUCK b/antlir/antlir2/testing/image_test/BUCK
@@ -1,4 +1,4 @@
-load("//antlir/bzl:build_defs.bzl", "rust_binary", "rust_library")
+load("//antlir/bzl:build_defs.bzl", "export_file", "rust_binary", "rust_library")
 
 rust_library(
     name = "image_test_lib",
@@ -30,3 +30,8 @@ rust_binary(
         "//antlir/util/cli/json_arg:json_arg",
     ],
 )
+
+export_file(
+    name = "antlir2_image_test.service",
+    visibility = ["PUBLIC"],
+)
diff --git a/antlir/antlir2/testing/image_test/antlir2_image_test.service b/antlir/antlir2/testing/image_test/antlir2_image_test.service
@@ -0,0 +1,19 @@
+# Unit that runs the booted unit test. This is heavily re-configured by a
+# drop-in that inserts the command, env vars, cwd, and any other things that can
+# only be determined at runtime
+[Unit]
+DefaultDependencies=no
+# Exit the container as soon as this test is done, using the exit code of the
+# process
+SuccessAction=exit-force
+FailureAction=exit-force
+
+[Service]
+# Having Type=simple will not cause a test that waits for `systemctl
+# is-system-running` to stall until the test itself is done (which would never
+# happen). {Failure,Success}Action are still respected when the test process
+# exits either way.
+Type=simple
+Environment=USER=%u
+StandardOutput=truncate:/antlir2/test_stdout
+StandardError=truncate:/antlir2/test_stderr
diff --git a/antlir/antlir2/testing/image_test/src/main.rs b/antlir/antlir2/testing/image_test/src/main.rs
@@ -16,10 +16,12 @@ use std::os::unix::ffi::OsStrExt;
 use std::os::unix::process::CommandExt;
 use std::path::Path;
 use std::path::PathBuf;
+use std::process::Command;
 
 use antlir2_isolate::isolate;
 use antlir2_isolate::InvocationType;
 use antlir2_isolate::IsolationContext;
+use anyhow::ensure;
 use anyhow::Context;
 use anyhow::Result;
 use clap::Parser;
@@ -29,6 +31,7 @@ use json_arg::JsonFile;
 use mount::Mount;
 use tempfile::NamedTempFile;
 use tracing::debug;
+use tracing::trace;
 use tracing_subscriber::prelude::*;
 
 fn make_log_files(_base: &str) -> Result<(NamedTempFile, NamedTempFile)> {
@@ -50,6 +53,9 @@ struct Args {
     #[clap(long = "requires-unit", requires = "boot")]
     /// Add Requires= and After= dependencies on these units
     requires_units: Vec<String>,
+    #[clap(long = "after-unit", requires = "boot")]
+    /// Add an After= dependency on these units
+    after_units: Vec<String>,
     #[clap(long)]
     /// Set these env vars in the test environment
     setenv: Vec<KvPair>,
@@ -132,6 +138,7 @@ fn main() -> Result<()> {
             })
             .collect::<HashMap<_, _>>(),
     );
+    ctx.setenv(("ANTLIR2_IMAGE_TEST", "1"));
 
     // XARs need /dev/fuse to run. Ideally we could just have this created
     // inside the container. Until
@@ -142,110 +149,93 @@ fn main() -> Result<()> {
     }
 
     if args.boot {
-        // Mark the kernel-command-line.service unit as being Type=simple so
-        // that the boot graph is considered complete as soon as it starts the
-        // test.
-        let mut dropin = NamedTempFile::new()?;
-        writeln!(dropin, "[Unit]")?;
-        // do not exit the container until the test itself is done
-        writeln!(dropin, "SuccessAction=none")?;
-        // if, however, kernel-command-line.service fails to even start the
-        // test, exit immediately
-        writeln!(dropin, "FailureAction=exit-force")?;
-        writeln!(dropin, "[Service]")?;
-        writeln!(dropin, "Type=simple")?;
-        // kernel-command-line.service will just start the
-        // antlir2_image_test.service unit that is created below. That unit has {Failure,Success}Action
-        let systemd_run_arg = "systemd.run=\"systemctl start antlir2_image_test.service\"";
-        ctx.inputs((
-            Path::new("/run/systemd/system/kernel-command-line.service.d/antlir2.conf"),
-            dropin.path(),
-        ));
-
-        // If we drop into emergency.target, we still want the test to run.
-        // To prevent this behavior, set boot_requires_units to sysinit.target.
-        let mut dropin = NamedTempFile::new()?;
-        writeln!(dropin, "[Unit]")?;
-        writeln!(dropin, "Requires=antlir2_image_test.service")?;
-        ctx.inputs((
-            Path::new("/run/systemd/system/emergency.service.d/antlir2.conf"),
-            dropin.path(),
-        ));
-
         let container_stdout = container_stdout_file()?;
         let (mut test_stdout, mut test_stderr) = make_log_files("test")?;
 
-        let mut test_unit = NamedTempFile::new()?;
-        writeln!(test_unit, "[Unit]")?;
-        writeln!(test_unit, "DefaultDependencies=no")?;
-        // exit the container as soon as this test is done, using the exit code
-        // of the process
-        writeln!(test_unit, "SuccessAction=exit-force")?;
-        writeln!(test_unit, "FailureAction=exit-force")?;
+        let mut test_unit_dropin = NamedTempFile::new()?;
+        writeln!(test_unit_dropin, "[Unit]")?;
+
+        // If a test requires default.target, it really wants the _real_
+        // default.target, not the test itself which becomes default.target when
+        // we pass systemd.unit=
+        let res = Command::new("systemctl")
+            .arg("get-default")
+            .arg("--root")
+            .arg(&args.layer)
+            .output()
+            .context("while running systemctl get-default")?;
+        ensure!(
+            res.status.success(),
+            "systemctl get-default failed: {}",
+            String::from_utf8_lossy(&res.stderr)
+        );
+        let default_target = std::str::from_utf8(&res.stdout)
+            .context("systemctl get-default returned invalid utf8")?
+            .trim();
+        trace!("default target was {default_target}");
+
         for unit in &args.requires_units {
-            writeln!(test_unit, "After={unit}")?;
-            writeln!(test_unit, "Requires={unit}")?;
+            let unit = match unit.as_str() {
+                "default.target" => default_target,
+                unit => unit,
+            };
+            writeln!(test_unit_dropin, "Requires={unit}")?;
+        }
+        for unit in args.requires_units.iter().chain(&args.after_units) {
+            let unit = match unit.as_str() {
+                "default.target" => default_target,
+                unit => unit,
+            };
+            writeln!(test_unit_dropin, "After={unit}")?;
         }
 
-        writeln!(test_unit, "[Service]")?;
-        // Having Type=simple will not cause a test that waits for `systemctl
-        // is-system-running` to stall until the test itself is done (which
-        // would never happen). {Failure,Success}Action are still respected when
-        // the test process exits either way.
-        writeln!(test_unit, "Type=simple")?;
+        writeln!(test_unit_dropin, "[Service]")?;
 
-        write!(test_unit, "WorkingDirectory=")?;
+        write!(test_unit_dropin, "WorkingDirectory=")?;
         let cwd = std::env::current_dir().context("while getting cwd")?;
-        test_unit.write_all(cwd.as_os_str().as_bytes())?;
-        test_unit.write_all(b"\n")?;
+        test_unit_dropin.write_all(cwd.as_os_str().as_bytes())?;
+        test_unit_dropin.write_all(b"\n")?;
 
-        write!(test_unit, "ExecStart=")?;
+        write!(test_unit_dropin, "ExecStart=")?;
         let mut iter = args.test.into_inner_cmd().into_iter().peekable();
         if let Some(exe) = iter.next() {
             let realpath = std::fs::canonicalize(&exe)
                 .with_context(|| format!("while getting absolute path of {exe:?}"))?;
-            test_unit.write_all(realpath.as_os_str().as_bytes())?;
+            test_unit_dropin.write_all(realpath.as_os_str().as_bytes())?;
             if iter.peek().is_some() {
-                test_unit.write_all(b" ")?;
+                test_unit_dropin.write_all(b" ")?;
             }
         }
         while let Some(arg) = iter.next() {
-            test_unit.write_all(arg.as_os_str().as_bytes())?;
+            test_unit_dropin.write_all(arg.as_os_str().as_bytes())?;
             if iter.peek().is_some() {
-                test_unit.write_all(b" ")?;
+                test_unit_dropin.write_all(b" ")?;
             }
         }
-        test_unit.write_all(b"\n")?;
-
-        // wire the test output to the parent process's std{out,err}
-        write!(test_unit, "StandardOutput=truncate:")?;
-        test_unit.write_all(test_stdout.path().as_os_str().as_bytes())?;
-        test_unit.write_all(b"\n")?;
-        write!(test_unit, "StandardError=truncate:")?;
-        test_unit.write_all(test_stderr.path().as_os_str().as_bytes())?;
-        test_unit.write_all(b"\n")?;
-
-        writeln!(test_unit, "Environment=USER=%u")?;
+        test_unit_dropin.write_all(b"\n")?;
 
         for (key, val) in &setenv {
-            write!(test_unit, "Environment=\"{key}=")?;
-            test_unit.write_all(val.as_bytes())?;
-            writeln!(test_unit, "\"")?;
+            write!(test_unit_dropin, "Environment=\"{key}=")?;
+            test_unit_dropin.write_all(val.as_bytes())?;
+            writeln!(test_unit_dropin, "\"")?;
         }
         // forward test runner env vars to the inner test
         for (key, val) in std::env::vars() {
             if key.starts_with("TEST_PILOT") {
-                writeln!(test_unit, "Environment=\"{key}={val}\"")?;
+                writeln!(test_unit_dropin, "Environment=\"{key}={val}\"")?;
             }
         }
-        ctx.outputs(test_stdout.path());
-        ctx.outputs(test_stderr.path());
+        // wire the test output to the parent process's std{out,err}
+        ctx.outputs(HashMap::from([
+            (Path::new("/antlir2/test_stdout"), test_stdout.path()),
+            (Path::new("/antlir2/test_stderr"), test_stderr.path()),
+        ]));
         ctx.inputs((
-            Path::new("/run/systemd/system/antlir2_image_test.service"),
-            test_unit.path(),
+            Path::new("/run/systemd/system/antlir2_image_test.service.d/runtime.conf"),
+            test_unit_dropin.path(),
         ));
 
-        let mut isol = isolate(ctx.build())?.command(systemd_run_arg)?;
+        let mut isol = isolate(ctx.build())?.command("systemd.unit=antlir2_image_test.service")?;
         isol.arg("systemd.journald.forward_to_console=1")
             .arg("systemd.log_time=1");
         debug!("executing test in booted isolated container: {isol:?}");

diff --git a/antlir/antlir2/testing/tests/BUCK b/antlir/antlir2/testing/tests/BUCK
@@ -32,7 +32,7 @@ image.layer(
             name = "test-cpp" + ("-booted" if boot else "") + ("-requires-units" if boot == "wait-default" else ""),
             srcs = ["test.cpp"],
             boot = bool(boot),
-            boot_requires_units = ["default.target"] if boot == "wait-default" else [],
+            boot_requires_units = ["default.target"] if boot == "wait-default" else None,
             env = {
                 "ANTLIR2_TEST": "1",
                 "BOOT": str(boot),
@@ -44,7 +44,7 @@ image.layer(
             name = "test-py" + ("-booted" if boot else "") + ("-requires-units" if boot == "wait-default" else ""),
             srcs = ["test.py"],
             boot = bool(boot),
-            boot_requires_units = ["default.target"] if boot == "wait-default" else [],
+            boot_requires_units = ["default.target"] if boot == "wait-default" else None,
             env = {
                 "ANTLIR2_TEST": "1",
                 "BOOT": str(boot),
@@ -55,7 +55,7 @@ image.layer(
             name = "test-rs" + ("-booted" if boot else "") + ("-requires-units" if boot == "wait-default" else ""),
             srcs = ["test.rs"],
             boot = bool(boot),
-            boot_requires_units = ["default.target"] if boot == "wait-default" else [],
+            boot_requires_units = ["default.target"] if boot == "wait-default" else None,
             crate = "test_rs",
             crate_root = "test.rs",
             env = {
@@ -67,7 +67,7 @@ image.layer(
         image_sh_test(
             name = "test-sh" + ("-booted" if boot else "") + ("-requires-units" if boot == "wait-default" else ""),
             boot = bool(boot),
-            boot_requires_units = ["default.target"] if boot == "wait-default" else [],
+            boot_requires_units = ["default.target"] if boot == "wait-default" else None,
             env = {
                 "ANTLIR2_TEST": "1",
                 "BOOT": str(boot),

diff --git a/antlir/bzl/image_rust_unittest.bzl b/antlir/bzl/image_rust_unittest.bzl
@@ -17,7 +17,8 @@ def image_rust_unittest(
         container_opts = None,
         visibility = None,
         antlir2 = None,
-        antlir2_requires_units = [],
+        antlir2_requires_units = None,
+        antlir2_after_units = None,
         **rust_unittest_kwargs):
     wrapper_props = helpers.nspawn_wrapper_properties(
         name = name,
@@ -69,9 +70,10 @@ def image_rust_unittest(
                 layer = layer + ".antlir2",
                 boot = boot,
                 run_as_user = run_as_user,
-                boot_requires_units = (
+                boot_requires_units = ((
                     ["dbus.socket"] if (boot and wrapper_props.container_opts.boot_await_dbus) else []
-                ) + antlir2_requires_units,
+                ) + (antlir2_requires_units or [])) if (boot and wrapper_props.container_opts.boot_await_dbus) else antlir2_requires_units,
+                boot_after_units = antlir2_after_units,
                 crate = rust_unittest_kwargs.pop("crate", name + "_unittest"),
                 **rust_unittest_kwargs
             )