From cd5fbac13f68d2e054632570bb3520f9a3ff8e42 Mon Sep 17 00:00:00 2001 From: Hazel Atkinson Date: Mon, 7 Apr 2025 13:43:12 +0100 Subject: cleanups, make workers close neatly on container exit --- src/config.rs | 2 +- src/main.rs | 7 ++++--- src/stats_task.rs | 15 ++++++++++++++- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/config.rs b/src/config.rs index 52cb69c..212ca64 100644 --- a/src/config.rs +++ b/src/config.rs @@ -35,7 +35,7 @@ pub static CONFIG: LazyLock = LazyLock::new(|| { struct ProtoDeserVisitor; /// deserialization boilerplate -impl<'de> confique::serde::de::Visitor<'de> for ProtoDeserVisitor { +impl confique::serde::de::Visitor<'_> for ProtoDeserVisitor { type Value = Protocol; fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { diff --git a/src/main.rs b/src/main.rs index dd8f6a2..c703fc2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,9 +4,9 @@ use anyhow::Result; use bollard::Docker; use config::CONFIG; use opentelemetry_otlp::{MetricExporter, Protocol, WithExportConfig}; -use opentelemetry_sdk::metrics::{PeriodicReader, PeriodicReaderBuilder, SdkMeterProvider}; +use opentelemetry_sdk::metrics::{PeriodicReader, SdkMeterProvider}; use tokio::task::JoinHandle; -use tokio::time::interval; +use tokio::time::MissedTickBehavior; use tokio_util::sync::CancellationToken; mod config; @@ -80,7 +80,8 @@ async fn main() -> Result<()> { st2.cancel(); }); - let mut container_search_interval = tokio::time::interval(Duration::from_secs(1)); + let mut container_search_interval = tokio::time::interval(Duration::from_millis(CONFIG.otlp_export_interval.unwrap_or(6000)) / 2); + container_search_interval.set_missed_tick_behavior(MissedTickBehavior::Skip); let mut tasks: BTreeMap> = BTreeMap::new(); diff --git a/src/stats_task.rs b/src/stats_task.rs index 4240a63..8b82373 100644 --- a/src/stats_task.rs +++ b/src/stats_task.rs @@ -1,5 +1,5 @@ use std::mem::MaybeUninit; -use bollard::container::{BlkioStats, BlkioStatsEntry, StatsOptions}; +use bollard::container::{BlkioStatsEntry, StatsOptions}; use bollard::models::ContainerSummary; use bollard::Docker; use opentelemetry::metrics::MeterProvider; @@ -136,6 +136,19 @@ pub fn launch_stats_task( while let Some(val) = stats_stream.next().await { if let Ok(stats) = val { + + // when a container exits, instead of a None we get sent Ok()s with zeroes in it forever, horror + if stats.cpu_stats.cpu_usage.total_usage == 0 { + if stats.precpu_stats.cpu_usage.total_usage != 0 { break; } + else { + // last time was ALSO a zero, so this MIGHT actually be (SOMEHOW?) legit, + // so just loop around again, and wait for the main task to abort() this worker task instead! + // which it will if this container died, or if we are gonna get real stats later, it won't... + // man i dont know i should probably just break lol + continue; + } + }; + meter_container_cpu_usage_seconds_total.add( cpu_delta_from_docker( stats.cpu_stats.cpu_usage.total_usage, -- cgit