Skip to content

Commit

Permalink
feat: Support reclaiming page cache for agent
Browse files Browse the repository at this point in the history
  • Loading branch information
rvql committed Jan 16, 2025
1 parent 18399cb commit a08c67c
Show file tree
Hide file tree
Showing 10 changed files with 317 additions and 35 deletions.
2 changes: 2 additions & 0 deletions agent/src/config/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1987,6 +1987,7 @@ pub struct Tunning {
pub cpu_affinity: Vec<usize>,
pub process_scheduling_priority: usize,
pub idle_memory_trimming: bool,
pub page_cache_reclaim_percentage: u8,
#[serde(with = "humantime_serde")]
pub resource_monitoring_interval: Duration,
}
Expand All @@ -1997,6 +1998,7 @@ impl Default for Tunning {
cpu_affinity: vec![],
process_scheduling_priority: 0,
idle_memory_trimming: false,
page_cache_reclaim_percentage: 100,
resource_monitoring_interval: Duration::from_secs(10),
}
}
Expand Down
9 changes: 9 additions & 0 deletions agent/src/config/handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ pub struct EnvironmentConfig {
pub system_load_circuit_breaker_threshold: f32,
pub system_load_circuit_breaker_recover: f32,
pub system_load_circuit_breaker_metric: agent::SystemLoadMetric,
pub page_cache_reclaim_percentage: u8,
}

#[derive(Clone, PartialEq, Eq, Debug)]
Expand Down Expand Up @@ -1602,6 +1603,7 @@ impl TryFrom<(Config, UserConfig)> for ModuleConfig {
.circuit_breakers
.relative_sys_load
.metric,
page_cache_reclaim_percentage: conf.global.tunning.page_cache_reclaim_percentage,
},
synchronizer: SynchronizerConfig {
sync_interval: conf.global.communication.proactive_request_interval,
Expand Down Expand Up @@ -3941,6 +3943,13 @@ impl ConfigHandler {
tunning.resource_monitoring_interval = new_tunning.resource_monitoring_interval;
restart_agent = !first_run;
}
if tunning.page_cache_reclaim_percentage != new_tunning.page_cache_reclaim_percentage {
info!(
"Update global.tunning.page_cache_reclaim_percentage from {:?} to {:?}.",
tunning.page_cache_reclaim_percentage, new_tunning.page_cache_reclaim_percentage
);
tunning.page_cache_reclaim_percentage = new_tunning.page_cache_reclaim_percentage;
}

// dev
let dev = &mut config.dev;
Expand Down
11 changes: 11 additions & 0 deletions agent/src/monitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ use crate::{
config::handler::EnvironmentAccess,
error::{Error, Result},
utils::{
cgroups,
process::{get_current_sys_memory_percentage, get_file_and_size_sum},
stats::{
self, Collector, Countable, Counter, CounterType, CounterValue, RefCountable,
Expand Down Expand Up @@ -318,6 +319,16 @@ impl RefCountable for SysStatusBroker {
Err(_) => CounterValue::Unsigned(0),
},
));
#[cfg(target_os = "linux")]
metrics.push((
"page_cache",
CounterType::Gauged,
if let Some(m_stat) = cgroups::memory_info() {
CounterValue::Unsigned(m_stat.stat.cache)
} else {
CounterValue::Unsigned(0)
},
));
metrics
}
}
Expand Down
178 changes: 155 additions & 23 deletions agent/src/utils/cgroups/linux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* limitations under the License.
*/

use std::path::{Path, PathBuf};
use std::sync::{Arc, Condvar, Mutex};
use std::thread::JoinHandle;
use std::time::Duration;
Expand All @@ -24,9 +25,14 @@ use crate::config::handler::EnvironmentAccess;
use crate::utils::environment::is_kernel_available;

use arc_swap::access::Access;
use cgroups_rs::cgroup_builder::*;
use cgroups_rs::*;
use log::{info, warn};
use cgroups_rs::{
cgroup_builder::CgroupBuilder,
cpu::CpuController,
hierarchies,
memory::{MemController, Memory},
Cgroup, CgroupPid, Controller, CpuResources, MemoryResources, Resources,
};
use log::{debug, info, trace, warn};
use public::consts::{DEFAULT_CPU_CFS_PERIOD_US, PROCESS_NAME};

pub struct Cgroups {
Expand All @@ -40,40 +46,34 @@ pub struct Cgroups {

const CHECK_INTERVAL: Duration = Duration::from_secs(1);

fn cgroups_supported() -> bool {
let Ok(fs) = fs::read_to_string("/proc/filesystems") else {
return false;
};
fs.lines()
.any(|line| line.to_lowercase().contains("cgroup"))
}

impl Cgroups {
/// 创建cgroup hierarchy
pub fn new(pid: u64, config: EnvironmentAccess) -> Result<Self, Error> {
let contents = match fs::read_to_string("/proc/filesystems") {
Ok(file_contents) => file_contents,
Err(e) => {
return Err(Error::CgroupsNotSupported(e.to_string()));
}
};
let mut cgroup_supported = false;
for line in contents.lines() {
// 检查系统是否支持cgroup
if line.to_lowercase().contains("cgroup") {
cgroup_supported = true;
break;
}
}
if !cgroup_supported {
return Err(Error::CgroupsNotSupported(format!(
"cgroups v1 or v2 is not found."
)));
if !cgroups_supported() {
return Err(Error::CgroupsNotSupported(
"read /proc/filesystems failed or cgroups/cgroups2 not found.".to_string(),
));
}
let hier = hierarchies::auto();
let is_v2 = hier.v2();
let cg: Cgroup = CgroupBuilder::new(PROCESS_NAME).build(hier);
let cpus: &cpu::CpuController = match cg.controller_of() {
let cpus: &CpuController = match cg.controller_of() {
Some(controller) => controller,
None => {
return Err(Error::CpuControllerSetFailed(format!(
"maybe cgroups is not installed"
)));
}
};
let mem: &memory::MemController = match cg.controller_of() {
let mem: &MemController = match cg.controller_of() {
Some(controller) => controller,
None => {
return Err(Error::MemControllerSetFailed(format!(
Expand Down Expand Up @@ -229,3 +229,135 @@ pub fn is_cgroup_procs_writable() -> bool {
const MIN_KERNEL_VERSION_CGROUP_PROCS: &str = "3";
is_kernel_available(MIN_KERNEL_VERSION_CGROUP_PROCS)
}

const PID1_ROOT: &str = "/proc/1/root";

/*
* The path of container memory cgroup from its own namespace is `/sys/fs/cgroup/memory`.
* However, the path is mounted read-only, making it impossible to reclaim memory cache with:
*
* `echo 0 > /sys/fs/cgroup/memory/memory.force_empty`
*
* The approach here is to take a little detour, visiting the actual memory cgroup path from global cgroup namespace.
* The path would be:
*
* /proc/1/root/{memory_cgroup_mount_point}/{cgroup_path}/memory.force_empty
*/
fn get_memory_cgroup_path() -> procfs::ProcResult<Option<PathBuf>> {
let mut path = PathBuf::from(PID1_ROOT);
let proc = procfs::process::Process::myself()?;

let Some(mount_info) = proc
.mountinfo()?
.into_iter()
.find(|m| m.fs_type == "cgroup" && m.super_options.contains_key("memory"))
else {
debug!("memory cgroup not found");
return Ok(None);
};
trace!(
"memory cgroup mount point: {}",
mount_info.mount_point.display()
);
let mut mount_point = mount_info.mount_point.components();
mount_point.next(); // skip "/"
path.extend(mount_point);

let Some(cg_info) = proc
.cgroups()?
.into_iter()
.find(|cg| cg.controllers.iter().any(|c| c == "memory"))
else {
return Ok(None);
};
trace!("memory cgroup path: {}", cg_info.pathname);
if cg_info.pathname == "/" {
debug!("memory cgroup is mounted on root");
return Ok(None);
}
let mut cg_path = Path::new(&cg_info.pathname).components();
cg_path.next(); // skip "/"
path.extend(cg_path);

trace!("memory cgroup path: {}", path.display());
Ok(Some(path))
}

fn cgroups_v1_check() -> bool {
if !cgroups_supported() {
debug!("cgroups not supported for this system");
return false;
}
if hierarchies::is_cgroup2_unified_mode() {
debug!("cgroups v2 is not supported");
return false;
}

true
}

pub(crate) fn memory_info() -> Option<Memory> {
if !cgroups_v1_check() {
return None;
}

let mem_mount = match get_memory_cgroup_path() {
Ok(Some(path)) => path,
Ok(None) => {
debug!("cgroups memory mount point not found or is invalid");
return None;
}
Err(e) => {
warn!("get memory path failed: {e}");
return None;
}
};

Some(MemController::new(mem_mount.clone(), false).memory_stat())
}

pub(crate) fn page_cache_reclaim_check(threshold: u8) -> bool {
if threshold >= 100 {
return false;
}
if !cgroups_v1_check() {
return false;
}

let mem_mount = match get_memory_cgroup_path() {
Ok(Some(path)) => path,
Ok(None) => {
debug!("cgroups memory mount point not found or is invalid");
return false;
}
Err(e) => {
warn!("get memory path failed: {e}");
return false;
}
};
let mc = MemController::new(mem_mount.clone(), false);
let mut reclaim_path = mem_mount;
reclaim_path.set_file_name("memory.force_empty");

let m_stat = mc.memory_stat();
let percentage = m_stat.stat.cache * 100 / m_stat.limit_in_bytes as u64;
if percentage < threshold as u64 {
debug!("cache / limit = {percentage}% < {threshold}%");
return false;
}

debug!("cache before reclaim: {}", m_stat.stat.cache);
if let Err(e) = fs::write(&reclaim_path, b"0") {
warn!(
"reclaim memory cache write to {} failed: {e}",
reclaim_path.display()
);
return false;
}
info!(
"page cache reclaimed {} -> {}",
m_stat.stat.cache,
mc.memory_stat().stat.cache
);
true
}
8 changes: 8 additions & 0 deletions agent/src/utils/guard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,8 @@ impl Guard {
let in_container = running_in_container();
let cgroups_disabled = self.cgroups_disabled;
let mut last_exceeded = get_timestamp(0);
#[cfg(target_os = "linux")]
let mut last_page_reclaim = Instant::now();

let thread = thread::Builder::new().name("guard".to_owned()).spawn(move || {
let mut system_load = SystemLoadGuard::new(system.clone(), exception_handler.clone());
Expand Down Expand Up @@ -436,6 +438,12 @@ impl Guard {
unsafe { let _ = malloc_trim(0); }
}

#[cfg(target_os = "linux")]
if last_page_reclaim.elapsed() >= Duration::from_secs(60) {
last_page_reclaim = Instant::now();
let _ = crate::utils::cgroups::page_cache_reclaim_check(config.page_cache_reclaim_percentage);
}

// Periodic memory checks are necessary:
// Cgroups does not count the memory of RssFile, and AF_PACKET Block occupies RssFile.
// Therefore, using Cgroups to limit the memory usage may not be accurate in some scenarios.
Expand Down
40 changes: 38 additions & 2 deletions server/agent_config/README-CH.md
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,42 @@ global:

开启闲置内存修剪特性,将降低 agent 内存使用量,但可能会损失 agent 处理性能。

### Page Cache 回收间隔 {#global.tunning.page_cache_reclaim_interval}

**标签**:

`hot_update`

**FQCN**:

`global.tunning.page_cache_reclaim_interval`

Upgrade from old version: `static_config.page-cache-reclaim-interval`

**默认值**:
```yaml
global:
tunning:
page_cache_reclaim_interval: 0
```

**模式**:
| Key | Value |
| ---- | ---------------------------- |
| Type | duration |
| Range | [0, '1d'] |

**详细描述**:

Cgroup 的内存使用量包括匿名内存和文件页缓存。在某些情况下,仅仅是文件页缓存就可能导致
cgroup 因为内存不足杀死 agent 进程。为了避免这种情况,agent 将定期强制清空文件页缓存,
且由于 agent 的文件 I/O 量不大,这不太可能对 agent 的性能造成影响,但同一 cgroup 下的其他
进程可能会受到影响。不建议设置很小的值。
设置该值为 0 时,该特性不生效。
注意:
- 该特性仅支持 cgroups v1。
- 如果 agent 的 memory cgroup 路径是 “/”,该特性不生效。

### 资源监控间隔 {#global.tunning.resource_monitoring_interval}

**标签**:
Expand Down Expand Up @@ -7048,7 +7084,7 @@ processors:
| Key | Value |
| ---- | ---------------------------- |
| Type | duration |
| Range | ['1s', '10s'] |
| Range | ['1s', '20s'] |

**详细描述**:

Expand Down Expand Up @@ -7079,7 +7115,7 @@ processors:
| Key | Value |
| ---- | ---------------------------- |
| Type | duration |
| Range | ['0s', '10s'] |
| Range | ['0s', '20s'] |

**详细描述**:

Expand Down
Loading

0 comments on commit a08c67c

Please sign in to comment.