diff options
| author | Hazel Atkinson <yellowsink@riseup.net> | 2025-04-08 14:26:32 +0100 |
|---|---|---|
| committer | Hazel Atkinson <yellowsink@riseup.net> | 2025-04-08 14:31:53 +0100 |
| commit | 707539b3a98db039f11234c36bc9aa09a1521bab (patch) | |
| tree | 9f5b1a084ca76d2e01969ac1f458b7ba4c9e2a08 /src | |
| parent | d351eb8d5a00e597cea3f93abeb2c91f077b3ceb (diff) | |
| download | containerspy-707539b3a98db039f11234c36bc9aa09a1521bab.tar.gz containerspy-707539b3a98db039f11234c36bc9aa09a1521bab.tar.bz2 containerspy-707539b3a98db039f11234c36bc9aa09a1521bab.zip | |
impl memory metrics
Diffstat (limited to 'src')
| -rw-r--r-- | src/stats_task.rs | 102 |
1 files changed, 86 insertions, 16 deletions
diff --git a/src/stats_task.rs b/src/stats_task.rs index a687a37..ec73685 100644 --- a/src/stats_task.rs +++ b/src/stats_task.rs @@ -60,7 +60,7 @@ pub fn launch_stats_task( // I'm going to rust jail! let first_read = unsafe { first_read.assume_init() }; - let Stats { blkio_stats, networks: mut last_net_stats, .. } = first_read; + let Stats { blkio_stats, networks: mut last_net_stats, memory_stats: mut last_mem_stats, .. } = first_read; let mut last_io_stats = blkio_stats.io_service_bytes_recursive; @@ -86,10 +86,34 @@ pub fn launch_stats_task( } } - // free space and make mutable + // other label sets that are static per container + let mut labels_mem_container_min_c = shared_labels.clone(); + labels_mem_container_min_c.push(KeyValue::new("failure_type", "pgfault")); + + let mut labels_mem_container_maj_c = shared_labels.clone(); + labels_mem_container_maj_c.push(KeyValue::new("failure_type", "pgmajfault")); + + let mut labels_mem_container_min_h = labels_mem_container_min_c.clone(); + labels_mem_container_min_h.push(KeyValue::new("scope", "hierarchy")); + labels_mem_container_min_c.push(KeyValue::new("scope", "container")); + + let mut labels_mem_container_maj_h = labels_mem_container_maj_c.clone(); + labels_mem_container_maj_h.push(KeyValue::new("scope", "hierarchy")); + labels_mem_container_maj_c.push(KeyValue::new("scope", "container")); + + // free space and make immutable shared_labels.shrink_to_fit(); let shared_labels = &shared_labels[..]; + labels_mem_container_min_c.shrink_to_fit(); + labels_mem_container_min_h.shrink_to_fit(); + labels_mem_container_maj_c.shrink_to_fit(); + labels_mem_container_maj_h.shrink_to_fit(); + let labels_mem_container_min_c = &labels_mem_container_min_c[..]; + let labels_mem_container_min_h = &labels_mem_container_min_h[..]; + let labels_mem_container_maj_c = &labels_mem_container_maj_c[..]; + let labels_mem_container_maj_h = &labels_mem_container_maj_h[..]; + //println!("Starting reporting for container: {shared_labels:?}"); // create meters @@ -141,7 +165,36 @@ pub fn launch_stats_task( .with_description("Last time this container was seen by ContainerSpy") .build(); - // memory stats go here + // annoyingly a lot of the meter names cadvisor went with don't have units attached even though they have known units + let meter_container_memory_cache = meter + .u64_gauge("container_memory_cache") + //.with_unit("By") + .with_description("Total page cache memory") + .build(); + let meter_container_memory_failures_total = meter + .u64_counter("container_memory_failures_total") + .with_description("Cumulative count of memory allocation failures") + .build(); + let meter_container_memory_mapped_file = meter + .u64_gauge("container_memory_mapped_file") + //.with_unit("By") + .with_description("Size of memory mapped files") + .build(); + let meter_container_memory_rss = meter + .u64_gauge("container_memory_rss") + //.with_unit("By") + .with_description("Size of RSS") + .build(); + let meter_container_memory_usage_bytes = meter + .u64_gauge("container_memory_usage_bytes") + .with_unit("By") + .with_description("Current memory usage, including all memory regardless of when it was accessed") + .build(); + let meter_container_memory_working_set_bytes = meter + .u64_gauge("container_memory_working_set_bytes") + .with_unit("By") + .with_description("Current working set") + .build(); let meter_container_network_receive_bytes_total = meter .u64_counter("container_network_receive_bytes_total") @@ -288,38 +341,55 @@ pub fn launch_stats_task( // - https://github.com/google/cadvisor/blob/f6e31a3c/info/v1/container.go#L389 (yes, v1, roll w it) // - https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html + // see https://stackoverflow.com/a/66778814 and also https://archive.is/qJWTp + // also see this comparison between cAdvisor output and {stats.memory_stats.usage:?} {v2stats:?} + // on my dev laptop: https://web.archive.org/web/20250408121954/https://pastebin.com/Kc4Ur0Hr + // and jackpot: https://github.com/google/cadvisor/blob/1f17a6c/container/libcontainer/handler.go#L808 + if let Some(all_usage) = stats.memory_stats.usage { if cfg!(windows) { // todo // i have no way to test cgroups v2 so only work on v1 - see readme for more info } else if let Some(MemoryStatsStats::V2(v2stats)) = stats.memory_stats.stats { - // container_memory_cache - // container_memory_failcnt only on cgroups v1 + // container_memory_cache + meter_container_memory_cache.record(v2stats.file, shared_labels); // container_memory_failures_total - v2stats.pgfault; // label failure_type=pgfault - v2stats.pgmajfault; // label failure_type=pgmajfault + // need last + if let Some(MemoryStatsStats::V2(last_v2)) = last_mem_stats.stats { + meter_container_memory_failures_total.add(v2stats.pgfault - last_v2.pgfault, labels_mem_container_min_c); + meter_container_memory_failures_total.add(v2stats.pgfault - last_v2.pgfault, labels_mem_container_min_h); - // container_memory_mapped_file - v2stats.file; // includes tmpfs - - // container_memory_max_usage_bytes only on cgroups v1 + meter_container_memory_failures_total.add(v2stats.pgmajfault - last_v2.pgmajfault, labels_mem_container_maj_c); + meter_container_memory_failures_total.add(v2stats.pgmajfault - last_v2.pgmajfault, labels_mem_container_maj_h); + } - // container_memory_migrate + // container_memory_kernel_usage + // actually not reported by cA but is reported by docker! + // not sure if slab contains kernel_stack or not though + // in my one sample, kernel_stack < slab + //v2stats.slab + v2stats.kernel_stack; - // container_memory_numa_pages omitted cause its hard :< + // container_memory_mapped_file + meter_container_memory_mapped_file.record(v2stats.file_mapped, shared_labels); // includes tmpfs - // container_memory_rss: may need recalcing + // container_memory_rss + meter_container_memory_rss.record(v2stats.anon, shared_labels); // container_memory_swap: can't get + // need accesss to memory.swap.*, but we only have memory.stat :( - // container_memory_usage_bytes: how? + // container_memory_usage_bytes + meter_container_memory_usage_bytes.record(all_usage, shared_labels); - // container_memory_working_set_bytes: not reported + // container_memory_working_set_bytes + meter_container_memory_working_set_bytes.record(all_usage - v2stats.inactive_file, shared_labels); } } + last_mem_stats = stats.memory_stats; + // networking // TODO: what is stats.network? is it populated on windows? if let Some(net) = &stats.networks { |
