Files
songbird/examples/serenity/voice_receive/src/main.rs
Kyle Simpson 8cc7a22b0b Driver/Input: Migrate audio backend to Symphonia (#89)
This extensive PR rewrites the internal mixing logic of the driver to use symphonia for parsing and decoding audio data, and rubato to resample audio. Existing logic to decode DCA and Opus formats/data have been reworked as plugins for symphonia. The main benefit is that we no longer need to keep yt-dlp and ffmpeg processes alive, saving a lot of memory and CPU: all decoding can be done in Rust! In exchange, we now need to do a lot of the HTTP handling and resumption ourselves, but this is still a huge net positive.

`Input`s have been completely reworked such that all default (non-cached) sources are lazy by default, and are no longer covered by a special-case `Restartable`. These now span a gamut from a `Compose` (lazy), to a live source, to a fully `Parsed` source. As mixing is still sync, this includes adapters for `AsyncRead`/`AsyncSeek`, and HTTP streams.

`Track`s have been reworked so that they only contain initialisation state for each track. `TrackHandles` are only created once a `Track`/`Input` has been handed over to the driver, replacing `create_player` and related functions. `TrackHandle::action` now acts on a `View` of (im)mutable state, and can request seeks/readying via `Action`.

Per-track event handling has also been improved -- we can now determine and propagate the reason behind individual track errors due to the new backend. Some `TrackHandle` commands (seek etc.) benefit from this, and now use internal callbacks to signal completion.

Due to associated PRs on felixmcfelix/songbird from avid testers, this includes general clippy tweaks, API additions, and other repo-wide cleanup. Thanks go out to the below co-authors.

Co-authored-by: Gnome! <45660393+GnomedDev@users.noreply.github.com>
Co-authored-by: Alakh <36898190+alakhpc@users.noreply.github.com>
2023-11-19 23:58:34 +00:00

266 lines
8.6 KiB
Rust

//! Requires the "client", "standard_framework", and "voice" features be enabled
//! in your Cargo.toml, like so:
//!
//! ```toml
//! [dependencies.serenity]
//! git = "https://github.com/serenity-rs/serenity.git"
//! features = ["client", "standard_framework", "voice"]
//! ```
use std::env;
use serenity::{
async_trait,
client::{Client, Context, EventHandler},
framework::{
standard::{
macros::{command, group},
Args,
CommandResult,
},
StandardFramework,
},
model::{channel::Message, gateway::Ready, id::ChannelId},
prelude::{GatewayIntents, Mentionable},
Result as SerenityResult,
};
use songbird::{
driver::DecodeMode,
model::payload::{ClientDisconnect, Speaking},
Config,
CoreEvent,
Event,
EventContext,
EventHandler as VoiceEventHandler,
SerenityInit,
};
struct Handler;
#[async_trait]
impl EventHandler for Handler {
async fn ready(&self, _: Context, ready: Ready) {
println!("{} is connected!", ready.user.name);
}
}
struct Receiver;
impl Receiver {
pub fn new() -> Self {
// You can manage state here, such as a buffer of audio packet bytes so
// you can later store them in intervals.
Self {}
}
}
#[async_trait]
impl VoiceEventHandler for Receiver {
#[allow(unused_variables)]
async fn act(&self, ctx: &EventContext<'_>) -> Option<Event> {
use EventContext as Ctx;
match ctx {
Ctx::SpeakingStateUpdate(Speaking {
speaking,
ssrc,
user_id,
..
}) => {
// Discord voice calls use RTP, where every sender uses a randomly allocated
// *Synchronisation Source* (SSRC) to allow receivers to tell which audio
// stream a received packet belongs to. As this number is not derived from
// the sender's user_id, only Discord Voice Gateway messages like this one
// inform us about which random SSRC a user has been allocated. Future voice
// packets will contain *only* the SSRC.
//
// You can implement logic here so that you can differentiate users'
// SSRCs and map the SSRC to the User ID and maintain this state.
// Using this map, you can map the `ssrc` in `voice_packet`
// to the user ID and handle their audio packets separately.
println!(
"Speaking state update: user {:?} has SSRC {:?}, using {:?}",
user_id, ssrc, speaking,
);
},
Ctx::SpeakingUpdate(data) => {
// You can implement logic here which reacts to a user starting
// or stopping speaking, and to map their SSRC to User ID.
println!(
"Source {} has {} speaking.",
data.ssrc,
if data.speaking { "started" } else { "stopped" },
);
},
Ctx::VoicePacket(data) => {
// An event which fires for every received audio packet,
// containing the decoded data.
if let Some(audio) = data.audio {
println!(
"Audio packet's first 5 samples: {:?}",
audio.get(..5.min(audio.len()))
);
println!(
"Audio packet sequence {:05} has {:04} bytes (decompressed from {}), SSRC {}",
data.packet.sequence.0,
audio.len() * std::mem::size_of::<i16>(),
data.packet.payload.len(),
data.packet.ssrc,
);
} else {
println!("RTP packet, but no audio. Driver may not be configured to decode.");
}
},
Ctx::RtcpPacket(data) => {
// An event which fires for every received rtcp packet,
// containing the call statistics and reporting information.
println!("RTCP packet received: {:?}", data.packet);
},
Ctx::ClientDisconnect(ClientDisconnect { user_id, .. }) => {
// You can implement your own logic here to handle a user who has left the
// voice channel e.g., finalise processing of statistics etc.
// You will typically need to map the User ID to their SSRC; observed when
// first speaking.
println!("Client disconnected: user {:?}", user_id);
},
_ => {
// We won't be registering this struct for any more event classes.
unimplemented!()
},
}
None
}
}
#[group]
#[commands(join, leave, ping)]
struct General;
#[tokio::main]
async fn main() {
tracing_subscriber::fmt::init();
// Configure the client with your Discord bot token in the environment.
let token = env::var("DISCORD_TOKEN").expect("Expected a token in the environment");
let framework = StandardFramework::new()
.group(&GENERAL_GROUP);
framework.configure(|c| c.prefix("~"));
let intents = GatewayIntents::non_privileged() | GatewayIntents::MESSAGE_CONTENT;
// Here, we need to configure Songbird to decode all incoming voice packets.
// If you want, you can do this on a per-call basis---here, we need it to
// read the audio data that other people are sending us!
let songbird_config = Config::default().decode_mode(DecodeMode::Decode);
let mut client = Client::builder(&token, intents)
.event_handler(Handler)
.framework(framework)
.register_songbird_from_config(songbird_config)
.await
.expect("Err creating client");
let _ = client
.start()
.await
.map_err(|why| println!("Client ended: {:?}", why));
}
#[command]
#[only_in(guilds)]
async fn join(ctx: &Context, msg: &Message, mut args: Args) -> CommandResult {
let connect_to = match args.single::<std::num::NonZeroU64>() {
Ok(id) => ChannelId(id),
Err(_) => {
check_msg(
msg.reply(ctx, "Requires a valid voice channel ID be given")
.await,
);
return Ok(());
},
};
let guild_id = msg.guild_id.unwrap();
let manager = songbird::get(ctx)
.await
.expect("Songbird Voice client placed in at initialisation.")
.clone();
let (handler_lock, conn_result) = manager.join(guild_id, connect_to).await;
if let Ok(_) = conn_result {
// NOTE: this skips listening for the actual connection result.
let mut handler = handler_lock.lock().await;
handler.add_global_event(CoreEvent::SpeakingStateUpdate.into(), Receiver::new());
handler.add_global_event(CoreEvent::SpeakingUpdate.into(), Receiver::new());
handler.add_global_event(CoreEvent::VoicePacket.into(), Receiver::new());
handler.add_global_event(CoreEvent::RtcpPacket.into(), Receiver::new());
handler.add_global_event(CoreEvent::ClientDisconnect.into(), Receiver::new());
check_msg(
msg.channel_id
.say(&ctx.http, &format!("Joined {}", connect_to.mention()))
.await,
);
} else {
check_msg(
msg.channel_id
.say(&ctx.http, "Error joining the channel")
.await,
);
}
Ok(())
}
#[command]
#[only_in(guilds)]
async fn leave(ctx: &Context, msg: &Message) -> CommandResult {
let guild_id = msg.guild_id.unwrap();
let manager = songbird::get(ctx)
.await
.expect("Songbird Voice client placed in at initialisation.")
.clone();
let has_handler = manager.get(guild_id).is_some();
if has_handler {
if let Err(e) = manager.remove(guild_id).await {
check_msg(
msg.channel_id
.say(&ctx.http, format!("Failed: {:?}", e))
.await,
);
}
check_msg(msg.channel_id.say(&ctx.http, "Left voice channel").await);
} else {
check_msg(msg.reply(ctx, "Not in a voice channel").await);
}
Ok(())
}
#[command]
async fn ping(ctx: &Context, msg: &Message) -> CommandResult {
check_msg(msg.channel_id.say(&ctx.http, "Pong!").await);
Ok(())
}
/// Checks that a message successfully sent; if not, then logs why to stdout.
fn check_msg(result: SerenityResult<Message>) {
if let Err(why) = result {
println!("Error sending message: {:?}", why);
}
}