From 2de35a3762ae9372cb4834211fdda9324bc215bb Mon Sep 17 00:00:00 2001 From: kamizjw Date: Wed, 15 Jan 2025 15:27:30 +0800 Subject: [PATCH] lib-shim-v2:support stats cmd --- build.rs | 1 + shim_v2.h | 27 +++ src/client/client.rs | 22 +++ src/lib.rs | 73 +++++++- src/protocols/mod.rs | 1 + .../containerd/cgroup/stats/v1/metrics.proto | 158 ++++++++++++++++++ 6 files changed, 281 insertions(+), 1 deletion(-) create mode 100644 src/protocols/protos/github.com/containerd/cgroup/stats/v1/metrics.proto diff --git a/build.rs b/build.rs index dda0ccb..f572bc9 100644 --- a/build.rs +++ b/build.rs @@ -11,6 +11,7 @@ fn main() { "src/protocols/protos/google/protobuf/timestamp.proto", "src/protocols/protos/github.com/containerd/containerd/api/types/mount.proto", "src/protocols/protos/github.com/containerd/containerd/api/types/task/task.proto", + "src/protocols/protos/github.com/containerd/cgroup/stats/v1/metrics.proto", ]) .include("src/protocols/protos") .rust_protobuf() diff --git a/shim_v2.h b/shim_v2.h index 6a719f2..c19a809 100644 --- a/shim_v2.h +++ b/shim_v2.h @@ -41,6 +41,32 @@ struct State { unsigned int exit_status; }; +struct Stats { + uint64_t pids_current; + /* CPU usage */ + uint64_t cpu_use_nanos; + uint64_t cpu_system_use; + /* BlkIO usage */ + uint64_t blkio_read; + uint64_t blkio_write; + /* Memory usage */ + uint64_t mem_used; + uint64_t mem_limit; + uint64_t rss_bytes; + uint64_t page_faults; + uint64_t major_page_faults; + /* Kernel Memory usage */ + uint64_t kmem_used; + uint64_t kmem_limit; + /* Cache usage */ + uint64_t cache; + uint64_t cache_total; + uint64_t inactive_file_total; + /* Swap usage*/ + uint64_t swap_used; + uint64_t swap_limit; +}; + int shim_v2_new(const char *container_id, const char *addr); int shim_v2_close(const char *container_id); @@ -59,6 +85,7 @@ int shim_v2_pause(const char *container_id); int shim_v2_resume(const char *container_id); int shim_v2_state(const char *container_id, const struct State *state); +int shim_v2_stats(const char *container_id, const struct Stats *stats); int shim_v2_pids(const char *container_id, int *pid); int shim_v2_wait(const char *container_id, const char *exec_id, int *exit_status); diff --git a/src/client/client.rs b/src/client/client.rs index 2151e23..40de955 100644 --- a/src/client/client.rs +++ b/src/client/client.rs @@ -11,7 +11,11 @@ // See the Mulan PSL v2 for more details. use super::error::{Error, Result}; +use crate::protocols::metrics::Metrics; use crate::protocols; +use protobuf::{ + CodedInputStream,Message, +}; use lazy_static::lazy_static; use nix::sys::socket::*; use protocols::task::Status as shim_v2_status; @@ -384,6 +388,24 @@ impl Store { }) } + pub fn stats(&self) -> Result { + let client = protocols::shim_ttrpc::TaskClient::new(self.conn.clone()); + + let mut req = protocols::shim::StatsRequest::new(); + req.id= self.container_id.clone(); + let ctx = context::with_timeout(0); + + let resp = client + .stats(ctx, &req) + .map_err(shim_error!(e, "ttrpc call stats failed"))?; + let mut m = Metrics::new(); + if let Some(any) = resp.stats.as_ref() { + let mut input = CodedInputStream::from_bytes(any.value.as_ref()); + m.merge_from(&mut input).unwrap(); + } + Ok(m) + } + pub fn pids(&self) -> Result { let c = protocols::shim_ttrpc::TaskClient::new(self.conn.clone()); diff --git a/src/lib.rs b/src/lib.rs index b46d800..7c77187 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,7 +22,8 @@ use client::client::State as client_state; use client::client::Status as client_status; use client::client::{del_conn, get_conn, new_conn}; use std::ffi::{CStr, CString}; -use std::os::raw::{c_char, c_int, c_uint}; +use std::os::raw::{c_char, c_int, c_uint, c_ulonglong}; +use protocols::metrics::Metrics; fn to_string(x: *const c_char) -> String { unsafe { @@ -371,6 +372,76 @@ pub extern "C" fn shim_v2_state(container_id: *const c_char, state: &mut State) }) } +#[repr(C)] +pub struct Stats { + pids_current: c_ulonglong, + /* CPU usage */ + cpu_use_nanos: c_ulonglong, + cpu_system_use: c_ulonglong, + /* BlkIO usage */ + blkio_read: c_ulonglong, + blkio_write: c_ulonglong, + /* Memory usage */ + mem_used: c_ulonglong, + mem_limit: c_ulonglong, + rss_bytes: c_ulonglong, + page_faults: c_ulonglong, + major_page_faults: c_ulonglong, + /* Kernel Memory usage */ + kmem_used: c_ulonglong, + kmem_limit: c_ulonglong, + /* Cache usage */ + cache: c_ulonglong, + cache_total: c_ulonglong, + inactive_file_total: c_ulonglong, + /* Swap usage*/ + swap_used: c_ulonglong, + swap_limit: c_ulonglong, +} + +impl Stats { + fn copy(&mut self, in_obj: Metrics) { + self.pids_current = in_obj.pids.current; + self.cpu_use_nanos = in_obj.cpu.usage.total; + self.cpu_system_use = in_obj.cpu.usage.kernel; + self.mem_used = in_obj.memory.usage.usage; + self.mem_limit = in_obj.memory.usage.limit; + self.rss_bytes = in_obj.memory.rss; + self.page_faults = in_obj.memory.pg_fault; + self.major_page_faults = in_obj.memory.pg_maj_fault; + self.kmem_used = in_obj.memory.kernel.usage; + self.kmem_limit = in_obj.memory.kernel.limit; + self.inactive_file_total = in_obj.memory.total_inactive_file; + self.swap_limit = in_obj.memory.swap.limit; + self.swap_used = in_obj.memory.swap.usage; + for ele in in_obj.blkio.io_service_bytes_recursive.iter() { + if ele.op == "read" { + self.blkio_read += ele.value; + } else { + self.blkio_write += ele.value; + } + } + } +} + +#[no_mangle] +pub extern "C" fn shim_v2_stats(container_id: *const c_char, stats: &mut Stats) -> c_int { + let r_container_id = to_string(container_id); + println!("lib-shim-v2::stats::{}::", r_container_id); + get_conn(&r_container_id) + .and_then(|client| { + client.stats().map(|metrics| { + println!("lib-shim-v2::metrics data::{:?}", metrics); + stats.copy(metrics); + 0 + }) + }) + .unwrap_or_else(|e| { + println!("lib-shim-v2::stats::{}:: failed, {}.", r_container_id, e); + -1 + }) +} + #[no_mangle] pub extern "C" fn shim_v2_pids(container_id: *const c_char, pid: &mut c_int) -> c_int { let r_container_id = to_string(container_id); diff --git a/src/protocols/mod.rs b/src/protocols/mod.rs index cc5794e..dfe4f42 100644 --- a/src/protocols/mod.rs +++ b/src/protocols/mod.rs @@ -13,6 +13,7 @@ pub mod any; pub mod empty; pub mod gogo; +pub mod metrics; pub mod mount; pub mod shim; pub mod shim_ttrpc; diff --git a/src/protocols/protos/github.com/containerd/cgroup/stats/v1/metrics.proto b/src/protocols/protos/github.com/containerd/cgroup/stats/v1/metrics.proto new file mode 100644 index 0000000..b3f6cc3 --- /dev/null +++ b/src/protocols/protos/github.com/containerd/cgroup/stats/v1/metrics.proto @@ -0,0 +1,158 @@ +syntax = "proto3"; + +package io.containerd.cgroups.v1; + +import "gogoproto/gogo.proto"; + +message Metrics { + repeated HugetlbStat hugetlb = 1; + PidsStat pids = 2; + CPUStat cpu = 3 [(gogoproto.customname) = "CPU"]; + MemoryStat memory = 4; + BlkIOStat blkio = 5; + RdmaStat rdma = 6; + repeated NetworkStat network = 7; + CgroupStats cgroup_stats = 8; + MemoryOomControl memory_oom_control = 9; +} + +message HugetlbStat { + uint64 usage = 1; + uint64 max = 2; + uint64 failcnt = 3; + string pagesize = 4; +} + +message PidsStat { + uint64 current = 1; + uint64 limit = 2; +} + +message CPUStat { + CPUUsage usage = 1; + Throttle throttling = 2; +} + +message CPUUsage { + // values in nanoseconds + uint64 total = 1; + uint64 kernel = 2; + uint64 user = 3; + repeated uint64 per_cpu = 4 [(gogoproto.customname) = "PerCPU"]; + +} + +message Throttle { + uint64 periods = 1; + uint64 throttled_periods = 2; + uint64 throttled_time = 3; +} + +message MemoryStat { + uint64 cache = 1; + uint64 rss = 2 [(gogoproto.customname) = "RSS"]; + uint64 rss_huge = 3 [(gogoproto.customname) = "RSSHuge"]; + uint64 mapped_file = 4; + uint64 dirty = 5; + uint64 writeback = 6; + uint64 pg_pg_in = 7; + uint64 pg_pg_out = 8; + uint64 pg_fault = 9; + uint64 pg_maj_fault = 10; + uint64 inactive_anon = 11; + uint64 active_anon = 12; + uint64 inactive_file = 13; + uint64 active_file = 14; + uint64 unevictable = 15; + uint64 hierarchical_memory_limit = 16; + uint64 hierarchical_swap_limit = 17; + uint64 total_cache = 18; + uint64 total_rss = 19 [(gogoproto.customname) = "TotalRSS"]; + uint64 total_rss_huge = 20 [(gogoproto.customname) = "TotalRSSHuge"]; + uint64 total_mapped_file = 21; + uint64 total_dirty = 22; + uint64 total_writeback = 23; + uint64 total_pg_pg_in = 24; + uint64 total_pg_pg_out = 25; + uint64 total_pg_fault = 26; + uint64 total_pg_maj_fault = 27; + uint64 total_inactive_anon = 28; + uint64 total_active_anon = 29; + uint64 total_inactive_file = 30; + uint64 total_active_file = 31; + uint64 total_unevictable = 32; + MemoryEntry usage = 33; + MemoryEntry swap = 34; + MemoryEntry kernel = 35; + MemoryEntry kernel_tcp = 36 [(gogoproto.customname) = "KernelTCP"]; + +} + +message MemoryEntry { + uint64 limit = 1; + uint64 usage = 2; + uint64 max = 3; + uint64 failcnt = 4; +} + +message MemoryOomControl { + uint64 oom_kill_disable = 1; + uint64 under_oom = 2; + uint64 oom_kill = 3; +} + +message BlkIOStat { + repeated BlkIOEntry io_service_bytes_recursive = 1; + repeated BlkIOEntry io_serviced_recursive = 2; + repeated BlkIOEntry io_queued_recursive = 3; + repeated BlkIOEntry io_service_time_recursive = 4; + repeated BlkIOEntry io_wait_time_recursive = 5; + repeated BlkIOEntry io_merged_recursive = 6; + repeated BlkIOEntry io_time_recursive = 7; + repeated BlkIOEntry sectors_recursive = 8; +} + +message BlkIOEntry { + string op = 1; + string device = 2; + uint64 major = 3; + uint64 minor = 4; + uint64 value = 5; +} + +message RdmaStat { + repeated RdmaEntry current = 1; + repeated RdmaEntry limit = 2; +} + +message RdmaEntry { + string device = 1; + uint32 hca_handles = 2; + uint32 hca_objects = 3; +} + +message NetworkStat { + string name = 1; + uint64 rx_bytes = 2; + uint64 rx_packets = 3; + uint64 rx_errors = 4; + uint64 rx_dropped = 5; + uint64 tx_bytes = 6; + uint64 tx_packets = 7; + uint64 tx_errors = 8; + uint64 tx_dropped = 9; +} + +// CgroupStats exports per-cgroup statistics. +message CgroupStats { + // number of tasks sleeping + uint64 nr_sleeping = 1; + // number of tasks running + uint64 nr_running = 2; + // number of tasks in stopped state + uint64 nr_stopped = 3; + // number of tasks in uninterruptible state + uint64 nr_uninterruptible = 4; + // number of tasks waiting on IO + uint64 nr_io_wait = 5; +} -- 2.43.0