diff --git a/vmm/common/src/lib.rs b/vmm/common/src/lib.rs index c1d9403f..90502af9 100644 --- a/vmm/common/src/lib.rs +++ b/vmm/common/src/lib.rs @@ -33,3 +33,9 @@ pub const DEV_SHM: &str = "/dev/shm"; pub const HOSTS_FILENAME: &str = "hosts"; pub const HOSTNAME_FILENAME: &str = "hostname"; pub const RESOLV_FILENAME: &str = "resolv.conf"; + +pub const SANDBOX_NS_PATH: &str = "/run/sandbox-ns"; +pub const NET_NAMESPACE: &str = "net"; +pub const IPC_NAMESPACE: &str = "ipc"; +pub const UTS_NAMESPACE: &str = "uts"; +pub const CGROUP_NAMESPACE: &str = "cgroup"; diff --git a/vmm/sandbox/src/container/handler/ns.rs b/vmm/sandbox/src/container/handler/ns.rs index 0d3f362a..a2d3cb10 100644 --- a/vmm/sandbox/src/container/handler/ns.rs +++ b/vmm/sandbox/src/container/handler/ns.rs @@ -16,16 +16,10 @@ limitations under the License. use async_trait::async_trait; use containerd_sandbox::error::Result; +use vmm_common::{CGROUP_NAMESPACE, IPC_NAMESPACE, NET_NAMESPACE, SANDBOX_NS_PATH, UTS_NAMESPACE}; use crate::{container::handler::Handler, sandbox::KuasarSandbox, vm::VM}; -#[allow(dead_code)] -pub const NAMESPACE_PID: &str = "pid"; -pub const NAMESPACE_NET: &str = "network"; -#[allow(dead_code)] -pub const NAMESPACE_MNT: &str = "mount"; -pub const NAMESPACE_CGROUP: &str = "cgroup"; - pub struct NamespaceHandler { container_id: String, } @@ -52,10 +46,14 @@ where }; if let Some(l) = spec.linux.as_mut() { l.namespaces - .retain(|n| n.r#type != NAMESPACE_NET && n.r#type != NAMESPACE_CGROUP); - l.namespaces - .iter_mut() - .for_each(|n| n.path = "".to_string()); + .retain(|n| n.r#type != NET_NAMESPACE && n.r#type != CGROUP_NAMESPACE); + l.namespaces.iter_mut().for_each(|n| { + n.path = if n.r#type == IPC_NAMESPACE || n.r#type == UTS_NAMESPACE { + format!("{}/{}", SANDBOX_NS_PATH, n.r#type) + } else { + "".to_string() + } + }); }; Ok(()) } diff --git a/vmm/sandbox/src/lib.rs b/vmm/sandbox/src/lib.rs index b2718479..0e165944 100644 --- a/vmm/sandbox/src/lib.rs +++ b/vmm/sandbox/src/lib.rs @@ -38,11 +38,6 @@ pub mod sandbox; pub mod stratovirt; pub mod utils; -pub const NAMESPACE_PID: &str = "pid"; -pub const NAMESPACE_NET: &str = "network"; -pub const NAMESPACE_MNT: &str = "mount"; -pub const NAMESPACE_CGROUP: &str = "cgroup"; - async fn load_config( default_config_path: &str, ) -> anyhow::Result<(Config, String)> { diff --git a/vmm/sandbox/src/utils.rs b/vmm/sandbox/src/utils.rs index b049e57c..e5cb1e91 100644 --- a/vmm/sandbox/src/utils.rs +++ b/vmm/sandbox/src/utils.rs @@ -45,8 +45,7 @@ use tokio::{ sync::watch::Receiver, time::sleep, }; - -use crate::NAMESPACE_NET; +use vmm_common::NET_NAMESPACE; pub async fn read_file>(filename: P) -> Result { let mut file = tokio::fs::File::open(&filename).await?; @@ -62,7 +61,7 @@ pub fn get_netns(data: &SandboxData) -> String { let mut netns = "".to_string(); if let Some(l) = &spec.linux { for ns in &l.namespaces { - if ns.r#type == NAMESPACE_NET { + if ns.r#type == NET_NAMESPACE { netns = ns.path.clone(); } } diff --git a/vmm/task/src/main.rs b/vmm/task/src/main.rs index 2cf25ecc..3ab6159b 100644 --- a/vmm/task/src/main.rs +++ b/vmm/task/src/main.rs @@ -14,14 +14,14 @@ See the License for the specific language governing permissions and limitations under the License. */ -use std::{convert::TryFrom, path::Path, str::FromStr, sync::Arc}; +use std::{collections::HashMap, convert::TryFrom, path::Path, str::FromStr, sync::Arc, thread}; use containerd_shim::{ asynchronous::{monitor::monitor_notify_by_pid, util::asyncify}, error::Error, io_error, other, protos::{shim::shim_ttrpc_async::create_task, ttrpc::asynchronous::Server}, - util::IntoOption, + util::{mkdir, IntoOption}, Result, }; use futures::StreamExt; @@ -29,16 +29,19 @@ use lazy_static::lazy_static; use log::{debug, error, info, warn, LevelFilter}; use nix::{ errno::Errno, + sched::{unshare, CloneFlags}, sys::{ wait, wait::{WaitPidFlag, WaitStatus}, }, - unistd::Pid, + unistd::{getpid, gettid, Pid}, }; use signal_hook_tokio::Signals; +use tokio::fs::File; use vmm_common::{ - api::sandbox_ttrpc::create_sandbox_service, mount::mount, ETC_RESOLV, KUASAR_STATE_DIR, - RESOLV_FILENAME, + api::sandbox_ttrpc::create_sandbox_service, mount::mount, ETC_RESOLV, HOSTNAME_FILENAME, + IPC_NAMESPACE, KUASAR_STATE_DIR, NET_NAMESPACE, RESOLV_FILENAME, SANDBOX_NS_PATH, + UTS_NAMESPACE, }; use crate::{ @@ -127,6 +130,11 @@ lazy_static! { dest: KUASAR_STATE_DIR, options: vec!["relatime", "nodev", "sync", "dirsync",] },]; + static ref CLONE_FLAG_TABLE: HashMap = HashMap::from([ + (String::from(NET_NAMESPACE), CloneFlags::CLONE_NEWNET), + (String::from(IPC_NAMESPACE), CloneFlags::CLONE_NEWIPC), + (String::from(UTS_NAMESPACE), CloneFlags::CLONE_NEWUTS), + ]); } #[tokio::main] @@ -298,6 +306,9 @@ async fn late_init_call() -> Result<()> { warn!("unable to find DNS files in kuasar state dir"); } + // Setup sandbox namespace + setup_sandbox_ns().await?; + Ok(()) } @@ -342,3 +353,59 @@ async fn start_ttrpc_server() -> Result { .register_service(task_service) .register_service(sandbox_service)) } + +async fn setup_sandbox_ns() -> Result<()> { + setup_persistent_ns(vec![ + String::from(IPC_NAMESPACE), + String::from(UTS_NAMESPACE), + ]) + .await?; + Ok(()) +} + +async fn setup_persistent_ns(ns_types: Vec) -> Result<()> { + if ns_types.is_empty() { + return Ok(()); + } + mkdir(SANDBOX_NS_PATH, 0o711).await?; + + let mut clone_type = CloneFlags::empty(); + + for ns_type in &ns_types { + let sandbox_ns_path = format!("{}/{}", SANDBOX_NS_PATH, ns_type); + File::create(&sandbox_ns_path).await.map_err(io_error!( + e, + "failed to create: {}", + sandbox_ns_path + ))?; + + clone_type |= *CLONE_FLAG_TABLE + .get(ns_type) + .ok_or(other!("bad ns type {}", ns_type))?; + } + + thread::spawn(move || { + unshare(clone_type).expect("failed to do unshare"); + // set hostname + let hostname = std::fs::read_to_string(Path::new(KUASAR_STATE_DIR).join(HOSTNAME_FILENAME)) + .map(|s| s.trim().to_string()) + .unwrap_or_default(); + if !hostname.is_empty() { + nix::unistd::sethostname(hostname).expect("set hostname"); + } + + for ns_type in &ns_types { + let sandbox_ns_path = format!("{}/{}", SANDBOX_NS_PATH, ns_type); + let ns_path = format!("/proc/{}/task/{}/ns/{}", getpid(), gettid(), ns_type); + mount( + Some("none"), + Some(ns_path.as_str()), + &["bind".to_string()], + &sandbox_ns_path, + ) + .expect("failed to mount sandbox ns"); + } + }); + + Ok(()) +}