Skip to content

Commit

Permalink
vmm: Unshare IPC and UTS namespaces for container process
Browse files Browse the repository at this point in the history
Container process needs a POD level IPC and UTS namespace unshared with
kuasar-task by default as pause container has been removed.

In this commit, kuasar-task will unshare a new ns and bind mount it to
`/run/sandbox-ns`, in which the container could join. The hostname in the
sandbox pod config will be set into uts ns by runC when creating container.

Signed-off-by: Zhang Tianyang <burning9699@gmail.com>
  • Loading branch information
Burning1020 committed Jan 12, 2024
1 parent 6a599c0 commit 80d40e1
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 24 deletions.
6 changes: 6 additions & 0 deletions vmm/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,9 @@ pub const DEV_SHM: &str = "/dev/shm";
pub const HOSTS_FILENAME: &str = "hosts";
pub const HOSTNAME_FILENAME: &str = "hostname";
pub const RESOLV_FILENAME: &str = "resolv.conf";

pub const SANDBOX_NS_PATH: &str = "/run/sandbox-ns";
pub const NET_NAMESPACE: &str = "net";
pub const IPC_NAMESPACE: &str = "ipc";
pub const UTS_NAMESPACE: &str = "uts";
pub const CGROUP_NAMESPACE: &str = "cgroup";
20 changes: 9 additions & 11 deletions vmm/sandbox/src/container/handler/ns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,10 @@ limitations under the License.

use async_trait::async_trait;
use containerd_sandbox::error::Result;
use vmm_common::{CGROUP_NAMESPACE, IPC_NAMESPACE, NET_NAMESPACE, SANDBOX_NS_PATH, UTS_NAMESPACE};

use crate::{container::handler::Handler, sandbox::KuasarSandbox, vm::VM};

#[allow(dead_code)]
pub const NAMESPACE_PID: &str = "pid";
pub const NAMESPACE_NET: &str = "network";
#[allow(dead_code)]
pub const NAMESPACE_MNT: &str = "mount";
pub const NAMESPACE_CGROUP: &str = "cgroup";

pub struct NamespaceHandler {
container_id: String,
}
Expand All @@ -52,10 +46,14 @@ where
};
if let Some(l) = spec.linux.as_mut() {
l.namespaces
.retain(|n| n.r#type != NAMESPACE_NET && n.r#type != NAMESPACE_CGROUP);
l.namespaces
.iter_mut()
.for_each(|n| n.path = "".to_string());
.retain(|n| n.r#type != NET_NAMESPACE && n.r#type != CGROUP_NAMESPACE);
l.namespaces.iter_mut().for_each(|n| {
n.path = if n.r#type == IPC_NAMESPACE || n.r#type == UTS_NAMESPACE {
format!("{}/{}", SANDBOX_NS_PATH, n.r#type)
} else {
"".to_string()
}
});
};
Ok(())
}
Expand Down
5 changes: 0 additions & 5 deletions vmm/sandbox/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,6 @@ pub mod sandbox;
pub mod stratovirt;
pub mod utils;

pub const NAMESPACE_PID: &str = "pid";
pub const NAMESPACE_NET: &str = "network";
pub const NAMESPACE_MNT: &str = "mount";
pub const NAMESPACE_CGROUP: &str = "cgroup";

async fn load_config<T: DeserializeOwned>(
default_config_path: &str,
) -> anyhow::Result<(Config<T>, String)> {
Expand Down
5 changes: 2 additions & 3 deletions vmm/sandbox/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ use tokio::{
sync::watch::Receiver,
time::sleep,
};

use crate::NAMESPACE_NET;
use vmm_common::NET_NAMESPACE;

pub async fn read_file<P: AsRef<Path>>(filename: P) -> Result<String> {
let mut file = tokio::fs::File::open(&filename).await?;
Expand All @@ -62,7 +61,7 @@ pub fn get_netns(data: &SandboxData) -> String {
let mut netns = "".to_string();
if let Some(l) = &spec.linux {
for ns in &l.namespaces {
if ns.r#type == NAMESPACE_NET {
if ns.r#type == NET_NAMESPACE {
netns = ns.path.clone();
}
}
Expand Down
77 changes: 72 additions & 5 deletions vmm/task/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,31 +14,34 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

use std::{convert::TryFrom, path::Path, str::FromStr, sync::Arc};
use std::{collections::HashMap, convert::TryFrom, path::Path, str::FromStr, sync::Arc, thread};

use containerd_shim::{
asynchronous::{monitor::monitor_notify_by_pid, util::asyncify},
error::Error,
io_error, other,
protos::{shim::shim_ttrpc_async::create_task, ttrpc::asynchronous::Server},
util::IntoOption,
util::{mkdir, IntoOption},
Result,
};
use futures::StreamExt;
use lazy_static::lazy_static;
use log::{debug, error, info, warn, LevelFilter};
use nix::{
errno::Errno,
sched::{unshare, CloneFlags},
sys::{
wait,
wait::{WaitPidFlag, WaitStatus},
},
unistd::Pid,
unistd::{getpid, gettid, Pid},
};
use signal_hook_tokio::Signals;
use tokio::fs::File;
use vmm_common::{
api::sandbox_ttrpc::create_sandbox_service, mount::mount, ETC_RESOLV, KUASAR_STATE_DIR,
RESOLV_FILENAME,
api::sandbox_ttrpc::create_sandbox_service, mount::mount, ETC_RESOLV, HOSTNAME_FILENAME,
IPC_NAMESPACE, KUASAR_STATE_DIR, NET_NAMESPACE, RESOLV_FILENAME, SANDBOX_NS_PATH,
UTS_NAMESPACE,
};

use crate::{
Expand Down Expand Up @@ -127,6 +130,11 @@ lazy_static! {
dest: KUASAR_STATE_DIR,
options: vec!["relatime", "nodev", "sync", "dirsync",]
},];
static ref CLONE_FLAG_TABLE: HashMap<String, CloneFlags> = HashMap::from([
(String::from(NET_NAMESPACE), CloneFlags::CLONE_NEWNET),
(String::from(IPC_NAMESPACE), CloneFlags::CLONE_NEWIPC),
(String::from(UTS_NAMESPACE), CloneFlags::CLONE_NEWUTS),
]);
}

#[tokio::main]
Expand Down Expand Up @@ -298,6 +306,9 @@ async fn late_init_call() -> Result<()> {
warn!("unable to find DNS files in kuasar state dir");
}

// Setup sandbox namespace
setup_sandbox_ns().await?;

Ok(())
}

Expand Down Expand Up @@ -342,3 +353,59 @@ async fn start_ttrpc_server() -> Result<Server> {
.register_service(task_service)
.register_service(sandbox_service))
}

async fn setup_sandbox_ns() -> Result<()> {
setup_persistent_ns(vec![
String::from(IPC_NAMESPACE),
String::from(UTS_NAMESPACE),
])
.await?;
Ok(())
}

async fn setup_persistent_ns(ns_types: Vec<String>) -> Result<()> {
if ns_types.is_empty() {
return Ok(());
}
mkdir(SANDBOX_NS_PATH, 0o711).await?;

let mut clone_type = CloneFlags::empty();

for ns_type in &ns_types {
let sandbox_ns_path = format!("{}/{}", SANDBOX_NS_PATH, ns_type);
File::create(&sandbox_ns_path).await.map_err(io_error!(
e,
"failed to create: {}",
sandbox_ns_path
))?;

clone_type |= *CLONE_FLAG_TABLE
.get(ns_type)
.ok_or(other!("bad ns type {}", ns_type))?;
}

thread::spawn(move || {
unshare(clone_type).expect("failed to do unshare");
// set hostname
let hostname = std::fs::read_to_string(Path::new(KUASAR_STATE_DIR).join(HOSTNAME_FILENAME))
.map(|s| s.trim().to_string())
.unwrap_or_default();
if !hostname.is_empty() {
nix::unistd::sethostname(hostname).expect("set hostname");
}

for ns_type in &ns_types {
let sandbox_ns_path = format!("{}/{}", SANDBOX_NS_PATH, ns_type);
let ns_path = format!("/proc/{}/task/{}/ns/{}", getpid(), gettid(), ns_type);
mount(
Some("none"),
Some(ns_path.as_str()),
&["bind".to_string()],
&sandbox_ns_path,
)
.expect("failed to mount sandbox ns");
}
});

Ok(())
}

0 comments on commit 80d40e1

Please sign in to comment.