Adds the "receive" feature, which is disabled by default. When this is disabled, the UDP receive task is not compiled and not run, and as an optimisation the UDP receive buffer size is set to 0. All related events are also removed. This also removes the UDP Tx task, and moves packet and keepalive sends back into the mixer thread. This allows us to entirely remove channels and various allocations between the mixer and an async task created only for sending data (i.e., fewer memcopies). If "receive" is enabled, UDP sends are now non-blocking due to technical constraints -- failure to send is non-fatal, but *will* drop affected packets. Given that blocking on a UDP send indicates that the OS cannot clear send buffers fast enough, this should alleviate OS load. Closes #131.
420 lines
14 KiB
Rust
420 lines
14 KiB
Rust
use super::{
|
|
error::{Error, Result},
|
|
message::*,
|
|
Config,
|
|
};
|
|
use crate::{
|
|
constants::*,
|
|
driver::{CryptoMode, DecodeMode},
|
|
events::{internal_data::*, CoreContext},
|
|
};
|
|
use audiopus::{
|
|
coder::Decoder as OpusDecoder,
|
|
error::{Error as OpusError, ErrorCode},
|
|
packet::Packet as OpusPacket,
|
|
Channels,
|
|
};
|
|
use discortp::{
|
|
demux::{self, DemuxedMut},
|
|
rtp::{RtpExtensionPacket, RtpPacket},
|
|
FromPacket,
|
|
Packet,
|
|
PacketSize,
|
|
};
|
|
use flume::Receiver;
|
|
use std::{collections::HashMap, convert::TryInto};
|
|
use tokio::{net::UdpSocket, select};
|
|
use tracing::{error, instrument, trace, warn};
|
|
use xsalsa20poly1305::XSalsa20Poly1305 as Cipher;
|
|
|
|
#[derive(Debug)]
|
|
struct SsrcState {
|
|
silent_frame_count: u16,
|
|
decoder: OpusDecoder,
|
|
last_seq: u16,
|
|
decode_size: PacketDecodeSize,
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
|
enum PacketDecodeSize {
|
|
/// Minimum frame size on Discord.
|
|
TwentyMillis,
|
|
/// Hybrid packet, sent by Firefox web client.
|
|
///
|
|
/// Likely 20ms frame + 10ms frame.
|
|
ThirtyMillis,
|
|
/// Next largest frame size.
|
|
FortyMillis,
|
|
/// Maximum Opus frame size.
|
|
SixtyMillis,
|
|
/// Maximum Opus packet size: 120ms.
|
|
Max,
|
|
}
|
|
|
|
impl PacketDecodeSize {
|
|
fn bump_up(self) -> Self {
|
|
match self {
|
|
Self::TwentyMillis => Self::ThirtyMillis,
|
|
Self::ThirtyMillis => Self::FortyMillis,
|
|
Self::FortyMillis => Self::SixtyMillis,
|
|
Self::SixtyMillis | Self::Max => Self::Max,
|
|
}
|
|
}
|
|
|
|
fn can_bump_up(self) -> bool {
|
|
self != Self::Max
|
|
}
|
|
|
|
fn len(self) -> usize {
|
|
match self {
|
|
Self::TwentyMillis => STEREO_FRAME_SIZE,
|
|
Self::ThirtyMillis => (STEREO_FRAME_SIZE / 2) * 3,
|
|
Self::FortyMillis => 2 * STEREO_FRAME_SIZE,
|
|
Self::SixtyMillis => 3 * STEREO_FRAME_SIZE,
|
|
Self::Max => 6 * STEREO_FRAME_SIZE,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
|
enum SpeakingDelta {
|
|
Same,
|
|
Start,
|
|
Stop,
|
|
}
|
|
|
|
impl SsrcState {
|
|
fn new(pkt: &RtpPacket<'_>) -> Self {
|
|
Self {
|
|
silent_frame_count: 5, // We do this to make the first speech packet fire an event.
|
|
decoder: OpusDecoder::new(SAMPLE_RATE, Channels::Stereo)
|
|
.expect("Failed to create new Opus decoder for source."),
|
|
last_seq: pkt.get_sequence().into(),
|
|
decode_size: PacketDecodeSize::TwentyMillis,
|
|
}
|
|
}
|
|
|
|
fn process(
|
|
&mut self,
|
|
pkt: &RtpPacket<'_>,
|
|
data_offset: usize,
|
|
data_trailer: usize,
|
|
decode_mode: DecodeMode,
|
|
decrypted: bool,
|
|
) -> Result<(SpeakingDelta, Option<Vec<i16>>)> {
|
|
let new_seq: u16 = pkt.get_sequence().into();
|
|
let payload_len = pkt.payload().len();
|
|
|
|
let extensions = pkt.get_extension() != 0;
|
|
let seq_delta = new_seq.wrapping_sub(self.last_seq);
|
|
Ok(if seq_delta >= (1 << 15) {
|
|
// Overflow, reordered (previously missing) packet.
|
|
(SpeakingDelta::Same, Some(vec![]))
|
|
} else {
|
|
self.last_seq = new_seq;
|
|
let missed_packets = seq_delta.saturating_sub(1);
|
|
|
|
// Note: we still need to handle this for non-decoded.
|
|
// This is mainly because packet events and speaking events can be handed to the
|
|
// user.
|
|
let (audio, pkt_size) = if decode_mode.should_decrypt() && decrypted {
|
|
self.scan_and_decode(
|
|
&pkt.payload()[data_offset..payload_len - data_trailer],
|
|
extensions,
|
|
missed_packets,
|
|
decode_mode == DecodeMode::Decode,
|
|
)?
|
|
} else {
|
|
// The latter part is an upper bound, as we cannot determine
|
|
// how long packet extensions are.
|
|
// WIthout decryption, speaking detection is thus broken.
|
|
(None, payload_len - data_offset - data_trailer)
|
|
};
|
|
|
|
let delta = if pkt_size == SILENT_FRAME.len() {
|
|
// Frame is silent.
|
|
let old = self.silent_frame_count;
|
|
self.silent_frame_count =
|
|
self.silent_frame_count.saturating_add(1 + missed_packets);
|
|
|
|
if self.silent_frame_count >= 5 && old < 5 {
|
|
SpeakingDelta::Stop
|
|
} else {
|
|
SpeakingDelta::Same
|
|
}
|
|
} else {
|
|
// Frame has meaningful audio.
|
|
let out = if self.silent_frame_count >= 5 {
|
|
SpeakingDelta::Start
|
|
} else {
|
|
SpeakingDelta::Same
|
|
};
|
|
self.silent_frame_count = 0;
|
|
out
|
|
};
|
|
|
|
(delta, audio)
|
|
})
|
|
}
|
|
|
|
fn scan_and_decode(
|
|
&mut self,
|
|
data: &[u8],
|
|
extension: bool,
|
|
missed_packets: u16,
|
|
decode: bool,
|
|
) -> Result<(Option<Vec<i16>>, usize)> {
|
|
let start = if extension {
|
|
RtpExtensionPacket::new(data)
|
|
.map(|pkt| pkt.packet_size())
|
|
.ok_or_else(|| {
|
|
error!("Extension packet indicated, but insufficient space.");
|
|
Error::IllegalVoicePacket
|
|
})
|
|
} else {
|
|
Ok(0)
|
|
}?;
|
|
|
|
let pkt = if decode {
|
|
let mut out = vec![0; self.decode_size.len()];
|
|
|
|
for _ in 0..missed_packets {
|
|
let missing_frame: Option<OpusPacket> = None;
|
|
let dest_samples = (&mut out[..])
|
|
.try_into()
|
|
.expect("Decode logic will cap decode buffer size at i32::MAX.");
|
|
if let Err(e) = self.decoder.decode(missing_frame, dest_samples, false) {
|
|
warn!("Issue while decoding for missed packet: {:?}.", e);
|
|
}
|
|
}
|
|
|
|
// In general, we should expect 20 ms frames.
|
|
// However, Discord occasionally like to surprise us with something bigger.
|
|
// This is *sender-dependent behaviour*.
|
|
//
|
|
// This should scan up to find the "correct" size that a source is using,
|
|
// and then remember that.
|
|
loop {
|
|
let tried_audio_len = self.decoder.decode(
|
|
Some(data[start..].try_into()?),
|
|
(&mut out[..]).try_into()?,
|
|
false,
|
|
);
|
|
match tried_audio_len {
|
|
Ok(audio_len) => {
|
|
// Decoding to stereo: audio_len refers to sample count irrespective of channel count.
|
|
// => multiply by number of channels.
|
|
out.truncate(2 * audio_len);
|
|
|
|
break;
|
|
},
|
|
Err(OpusError::Opus(ErrorCode::BufferTooSmall)) => {
|
|
if self.decode_size.can_bump_up() {
|
|
self.decode_size = self.decode_size.bump_up();
|
|
out = vec![0; self.decode_size.len()];
|
|
} else {
|
|
error!("Received packet larger than Opus standard maximum,");
|
|
return Err(Error::IllegalVoicePacket);
|
|
}
|
|
},
|
|
Err(e) => {
|
|
error!("Failed to decode received packet: {:?}.", e);
|
|
return Err(e.into());
|
|
},
|
|
}
|
|
}
|
|
|
|
Some(out)
|
|
} else {
|
|
None
|
|
};
|
|
|
|
Ok((pkt, data.len() - start))
|
|
}
|
|
}
|
|
|
|
struct UdpRx {
|
|
cipher: Cipher,
|
|
decoder_map: HashMap<u32, SsrcState>,
|
|
#[allow(dead_code)]
|
|
config: Config,
|
|
packet_buffer: [u8; VOICE_PACKET_MAX],
|
|
rx: Receiver<UdpRxMessage>,
|
|
udp_socket: UdpSocket,
|
|
}
|
|
|
|
impl UdpRx {
|
|
#[instrument(skip(self))]
|
|
async fn run(&mut self, interconnect: &mut Interconnect) {
|
|
loop {
|
|
select! {
|
|
Ok((len, _addr)) = self.udp_socket.recv_from(&mut self.packet_buffer[..]) => {
|
|
self.process_udp_message(interconnect, len);
|
|
}
|
|
msg = self.rx.recv_async() => {
|
|
match msg {
|
|
Ok(UdpRxMessage::ReplaceInterconnect(i)) => {
|
|
*interconnect = i;
|
|
},
|
|
Ok(UdpRxMessage::SetConfig(c)) => {
|
|
self.config = c;
|
|
},
|
|
Err(flume::RecvError::Disconnected) => break,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn process_udp_message(&mut self, interconnect: &Interconnect, len: usize) {
|
|
// NOTE: errors here (and in general for UDP) are not fatal to the connection.
|
|
// Panics should be avoided due to adversarial nature of rx'd packets,
|
|
// but correct handling should not prompt a reconnect.
|
|
//
|
|
// For simplicity, we nominate the mixing context to rebuild the event
|
|
// context if it fails (hence, the `let _ =` statements.), as it will try to
|
|
// make contact every 20ms.
|
|
let crypto_mode = self.config.crypto_mode;
|
|
let packet = &mut self.packet_buffer[..len];
|
|
|
|
match demux::demux_mut(packet) {
|
|
DemuxedMut::Rtp(mut rtp) => {
|
|
if !rtp_valid(&rtp.to_immutable()) {
|
|
error!("Illegal RTP message received.");
|
|
return;
|
|
}
|
|
|
|
let packet_data = if self.config.decode_mode.should_decrypt() {
|
|
let out = crypto_mode
|
|
.decrypt_in_place(&mut rtp, &self.cipher)
|
|
.map(|(s, t)| (s, t, true));
|
|
|
|
if let Err(e) = out {
|
|
warn!("RTP decryption failed: {:?}", e);
|
|
}
|
|
|
|
out.ok()
|
|
} else {
|
|
None
|
|
};
|
|
|
|
let rtp = rtp.to_immutable();
|
|
let (rtp_body_start, rtp_body_tail, decrypted) = packet_data.unwrap_or_else(|| {
|
|
(
|
|
CryptoMode::payload_prefix_len(),
|
|
crypto_mode.payload_suffix_len(),
|
|
false,
|
|
)
|
|
});
|
|
|
|
let entry = self
|
|
.decoder_map
|
|
.entry(rtp.get_ssrc())
|
|
.or_insert_with(|| SsrcState::new(&rtp));
|
|
|
|
if let Ok((delta, audio)) = entry.process(
|
|
&rtp,
|
|
rtp_body_start,
|
|
rtp_body_tail,
|
|
self.config.decode_mode,
|
|
decrypted,
|
|
) {
|
|
match delta {
|
|
SpeakingDelta::Start => {
|
|
drop(interconnect.events.send(EventMessage::FireCoreEvent(
|
|
CoreContext::SpeakingUpdate(InternalSpeakingUpdate {
|
|
ssrc: rtp.get_ssrc(),
|
|
speaking: true,
|
|
}),
|
|
)));
|
|
},
|
|
SpeakingDelta::Stop => {
|
|
drop(interconnect.events.send(EventMessage::FireCoreEvent(
|
|
CoreContext::SpeakingUpdate(InternalSpeakingUpdate {
|
|
ssrc: rtp.get_ssrc(),
|
|
speaking: false,
|
|
}),
|
|
)));
|
|
},
|
|
SpeakingDelta::Same => {},
|
|
}
|
|
|
|
drop(interconnect.events.send(EventMessage::FireCoreEvent(
|
|
CoreContext::VoicePacket(InternalVoicePacket {
|
|
audio,
|
|
packet: rtp.from_packet(),
|
|
payload_offset: rtp_body_start,
|
|
payload_end_pad: rtp_body_tail,
|
|
}),
|
|
)));
|
|
} else {
|
|
warn!("RTP decoding/processing failed.");
|
|
}
|
|
},
|
|
DemuxedMut::Rtcp(mut rtcp) => {
|
|
let packet_data = if self.config.decode_mode.should_decrypt() {
|
|
let out = crypto_mode.decrypt_in_place(&mut rtcp, &self.cipher);
|
|
|
|
if let Err(e) = out {
|
|
warn!("RTCP decryption failed: {:?}", e);
|
|
}
|
|
|
|
out.ok()
|
|
} else {
|
|
None
|
|
};
|
|
|
|
let (start, tail) = packet_data.unwrap_or_else(|| {
|
|
(
|
|
CryptoMode::payload_prefix_len(),
|
|
crypto_mode.payload_suffix_len(),
|
|
)
|
|
});
|
|
|
|
drop(interconnect.events.send(EventMessage::FireCoreEvent(
|
|
CoreContext::RtcpPacket(InternalRtcpPacket {
|
|
packet: rtcp.from_packet(),
|
|
payload_offset: start,
|
|
payload_end_pad: tail,
|
|
}),
|
|
)));
|
|
},
|
|
DemuxedMut::FailedParse(t) => {
|
|
warn!("Failed to parse message of type {:?}.", t);
|
|
},
|
|
DemuxedMut::TooSmall => {
|
|
warn!("Illegal UDP packet from voice server.");
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
#[instrument(skip(interconnect, rx, cipher))]
|
|
pub(crate) async fn runner(
|
|
mut interconnect: Interconnect,
|
|
rx: Receiver<UdpRxMessage>,
|
|
cipher: Cipher,
|
|
config: Config,
|
|
udp_socket: UdpSocket,
|
|
) {
|
|
trace!("UDP receive handle started.");
|
|
|
|
let mut state = UdpRx {
|
|
cipher,
|
|
decoder_map: HashMap::new(),
|
|
config,
|
|
packet_buffer: [0u8; VOICE_PACKET_MAX],
|
|
rx,
|
|
udp_socket,
|
|
};
|
|
|
|
state.run(&mut interconnect).await;
|
|
|
|
trace!("UDP receive handle stopped.");
|
|
}
|
|
|
|
#[inline]
|
|
fn rtp_valid(packet: &RtpPacket<'_>) -> bool {
|
|
packet.get_version() == RTP_VERSION && packet.get_payload_type() == RTP_PROFILE_TYPE
|
|
}
|