feat: watchdog for detecting tokio runtime deadlock (I wish I could prevent it, but I don't know what's causing it)

This commit is contained in:
2026-06-12 20:01:46 -04:00
parent a0a0632a1d
commit c7300a9e6d
3 changed files with 70 additions and 1 deletions

View File

@@ -7,7 +7,14 @@ use fomo_reducer::{
use secrecy::{ExposeSecret, SecretString};
use snafu::{OptionExt, ResultExt, Snafu};
use songbird::{Config, Songbird, driver::DecodeConfig, shards::TwilightMap};
use std::{collections::BTreeMap, fmt::Debug, str::FromStr, sync::Arc, time::Duration};
use std::{
collections::BTreeMap,
fmt::{Debug, Display},
num::NonZero,
str::FromStr,
sync::Arc,
time::Duration,
};
use tokio::{select, signal::ctrl_c, task::JoinSet};
use tokio_util::{sync::CancellationToken, time::FutureExt as _};
use tracing::Level;
@@ -68,6 +75,29 @@ fn parse_guild_vc_to_text_channel(
Ok((guild, voice_channel, text_channel))
}
#[derive(Clone)]
struct HumanDuration(Duration);
impl FromStr for HumanDuration {
type Err = humantime::DurationError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
humantime::parse_duration(s).map(Self)
}
}
impl Debug for HumanDuration {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl Display for HumanDuration {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", humantime::format_duration(self.0))
}
}
#[derive(Debug, Parser)]
struct AppArgs {
#[arg(long, env)]
@@ -103,6 +133,12 @@ struct AppArgs {
#[arg(long, env)]
render_data: Storage,
#[arg(long, env, default_value_t = HumanDuration(Duration::from_secs(5)))]
watchdog_frequency: HumanDuration,
#[arg(long, env, default_value_t = 8.try_into().unwrap())]
watchdog_channel_size: NonZero<usize>,
}
#[derive(Parser)]
@@ -166,6 +202,8 @@ async fn main() -> Result<(), MainError> {
user_data,
recording_data,
render_data,
watchdog_frequency: HumanDuration(watchdog_frequency),
watchdog_channel_size,
} = app_args;
let cancellation_token = CancellationToken::new();
@@ -301,6 +339,29 @@ async fn main() -> Result<(), MainError> {
}
});
let (mut watchdog_tx, mut watchdog_rx) =
futures::channel::mpsc::channel(watchdog_channel_size.get());
std::thread::spawn(move || {
loop {
if watchdog_tx.try_send(()).is_err() {
tracing::error!("tokio runtime deadlocked");
std::process::exit(1);
}
std::thread::sleep(watchdog_frequency);
}
});
tokio::spawn(async move {
loop {
if watchdog_rx.recv().await.is_err() {
tracing::error!("watchdog died (this should be impossible)");
std::process::exit(1);
}
}
});
tokio::spawn(async {
let duration = Duration::from_secs(120);
let mut interval = tokio::time::interval(duration);