songbird/src/driver/tasks/mixer/mix_logic.rs

use super::*;

/// Mix a track's audio stream into either the shared mixing buffer, or directly into the output
/// packet ("passthrough") when possible.
///
/// Passthrough is highest performance, but the source MUST be opus, have 20ms frames, and be the only
/// live track. In this case we copy the opus-encoded data with no changes. Otherwise, we fall back to
/// below.
///
/// There are a few functional requirements here for non-passthrough mixing that make it tricky:
/// * Input frame lengths are not congruent with what we need to send (i.e., 26.12ms in MP3 vs
///   needed 20ms).
/// * Input audio arrives at a different sample rate from required (i.e., 44.1 vs needed 48 kHz).
/// * Input data may not be `f32`s.
/// * Input data may not match stereo/mono of desired output.
///
/// All of the above challenges often happen at once. The rough pipeline in processing is:
///
/// until source end or 20 ms taken:
///   (use previous frame 'til empty / get new frame) -> [resample] -> [audio += vol * (sample as f32)]
///
/// Typically, we mix between a subset of the input packet and the output buf because the 20ms window
/// straddles packet boundaries. If there's enough space AND 48kHz AND receive f32s, then we use a fast
/// path.
///
/// In the mono -> stereo case, we duplicate across all target channels. In stereo -> mono, we average
/// the samples from each channel.
///
/// To avoid needing to hold onto resampled data longer than one mix cycle, we take enough input samples
/// to fill a chunk of the mixer (e.g., 10ms == 20ms / 2) so that they will all be used.
///
/// This is a fairly annoying piece of code to reason about, mainly because you need to hold so many
/// internal positions into: the mix buffer, resample buffers, and previous/current packets
/// for a stream.
#[inline]
pub fn mix_symph_indiv(
    // shared buffer to mix into.
    symph_mix: &mut AudioBuffer<f32>,
    // buffer to hold built up packet
    resample_scratch: &mut AudioBuffer<f32>,
    // the input stream to use
    input: &mut Parsed,
    // resampler state and positions into partially read packets
    local_state: &mut DecodeState,
    // volume of this source
    volume: f32,
    // window into the output UDP buffer to copy opus frames into.
    // This is set to `Some` IF passthrough is possible (i.e., one live source).
    mut opus_slot: Option<&mut [u8]>,
) -> (MixType, MixStatus) {
    let mut samples_written = 0;
    let mut resample_in_progress = false;
    let mut track_status = MixStatus::Live;
    let codec_type = input.decoder.codec_params().codec;

    resample_scratch.clear();

    while samples_written != MONO_FRAME_SIZE {
        // fetch a packet: either in progress, passthrough (early exit), or
        let source_packet = if local_state.inner_pos != 0 {
            Some(input.decoder.last_decoded())
        } else if let Ok(pkt) = input.format.next_packet() {
            if pkt.track_id() != input.track_id {
                continue;
            }

            let buf = pkt.buf();

            // Opus packet passthrough special case.
            if codec_type == CODEC_TYPE_OPUS && local_state.passthrough != Passthrough::Block {
                if let Some(slot) = opus_slot.as_mut() {
                    let sample_ct = buf
                        .try_into()
                        .and_then(|buf| audiopus::packet::nb_samples(buf, SAMPLE_RATE));

                    // We don't actually block passthrough until a few violations are
                    // seen. The main one is that most Opus tracks end on a sub-20ms
                    // frame, particularly on Youtube.
                    // However, a frame that's bigger than the target buffer is an instant block.
                    let buf_size_fatal = buf.len() >= slot.len();

                    if match sample_ct {
                        Ok(MONO_FRAME_SIZE) => true,
                        _ => !local_state.record_and_check_passthrough_strike_final(buf_size_fatal),
                    } {
                        slot.write_all(buf)
                            .expect("Bounds check performed, and failure will block passthrough.");

                        return (MixType::Passthrough(buf.len()), MixStatus::Live);
                    }
                }
            }

            input
                .decoder
                .decode(&pkt)
                .map_err(|e| {
                    track_status = e.into();
                })
                .ok()
        } else {
            track_status = MixStatus::Ended;
            None
        };

        // Cleanup: failed to get the next packet, but still have to convert and mix scratch.
        if source_packet.is_none() {
            if resample_in_progress {
                // fill up remainder of buf with zeroes, resample, mix
                let (chan_c, resampler, rs_out_buf) = local_state.resampler.as_mut().unwrap();
                let in_len = resample_scratch.frames();
                let to_render = resampler.input_frames_next().saturating_sub(in_len);

                if to_render != 0 {
                    resample_scratch.render_reserved(Some(to_render));
                    for plane in resample_scratch.planes_mut().planes() {
                        for val in &mut plane[in_len..] {
                            *val = 0.0f32;
                        }
                    }
                }

                // Luckily, we make use of the WHOLE input buffer here.
                resampler
                    .process_into_buffer(
                        &resample_scratch.planes().planes()[..*chan_c],
                        rs_out_buf,
                        None,
                    )
                    .unwrap();

                // Calculate true end position using sample rate math
                let ratio = (rs_out_buf[0].len() as f32) / (resample_scratch.frames() as f32);
                let out_samples = (ratio * (in_len as f32)).round() as usize;

                mix_resampled(rs_out_buf, symph_mix, samples_written, volume);

                samples_written += out_samples;
            }

            break;
        }

        let source_packet = source_packet.unwrap();

        let in_rate = source_packet.spec().rate;
        let pkt_frames = source_packet.frames();

        if pkt_frames == 0 {
            continue;
        }

        if in_rate == SAMPLE_RATE_RAW as u32 {
            // No need to resample: mix as standard.
            let samples_marched = mix_over_ref(
                &source_packet,
                symph_mix,
                local_state.inner_pos,
                samples_written,
                volume,
            );

            samples_written += samples_marched;

            local_state.inner_pos += samples_marched;
            local_state.inner_pos %= pkt_frames;
        } else {
            // NOTE: this should NEVER change in one stream.
            let chan_c = source_packet.spec().channels.count();
            let (_, resampler, rs_out_buf) = local_state.resampler.get_or_insert_with(|| {
                // TODO: integ. error handling here.
                let resampler = FftFixedOut::new(
                    in_rate as usize,
                    SAMPLE_RATE_RAW,
                    RESAMPLE_OUTPUT_FRAME_SIZE,
                    4,
                    chan_c,
                )
                .expect("Failed to create resampler.");
                let out_buf = resampler.output_buffer_allocate(true);

                (chan_c, resampler, out_buf)
            });

            let inner_pos = local_state.inner_pos;

            let needed_in_frames = resampler.input_frames_next();
            let available_frames = pkt_frames - inner_pos;

            let force_copy = resample_in_progress || needed_in_frames > available_frames;
            if (!force_copy) && matches!(source_packet, AudioBufferRef::F32(_)) {
                // This is the only case where we can pull off a straight resample...
                // I would really like if this could be a slice of slices,
                // but the technology just isn't there yet. And I don't feel like
                // writing unsafe transformations to do so.

                // NOTE: if let needed as if-let && {bool} is nightly only.
                if let AudioBufferRef::F32(s_pkt) = source_packet {
                    let refs: Vec<&[f32]> = s_pkt
                        .planes()
                        .planes()
                        .iter()
                        .map(|s| &s[inner_pos..][..needed_in_frames])
                        .collect();

                    local_state.inner_pos += needed_in_frames;
                    local_state.inner_pos %= pkt_frames;

                    resampler
                        .process_into_buffer(&refs, rs_out_buf, None)
                        .unwrap();
                } else {
                    unreachable!()
                }
            } else {
                // We either lack enough samples, or have the wrong data format, forcing
                // a conversion/copy into the buffer.
                let old_scratch_len = resample_scratch.frames();
                let missing_frames = needed_in_frames - old_scratch_len;
                let frames_to_take = available_frames.min(missing_frames);

                resample_scratch.render_reserved(Some(frames_to_take));
                copy_into_resampler(
                    &source_packet,
                    resample_scratch,
                    inner_pos,
                    old_scratch_len,
                    frames_to_take,
                );

                local_state.inner_pos += frames_to_take;
                local_state.inner_pos %= pkt_frames;

                if resample_scratch.frames() == needed_in_frames {
                    resampler
                        .process_into_buffer(
                            &resample_scratch.planes().planes()[..chan_c],
                            rs_out_buf,
                            None,
                        )
                        .unwrap();
                    resample_scratch.clear();
                    resample_in_progress = false;
                } else {
                    // Not enough data to fill the resampler: fetch more.
                    resample_in_progress = true;
                    continue;
                }
            }

            let samples_marched = mix_resampled(rs_out_buf, symph_mix, samples_written, volume);

            samples_written += samples_marched;
        }
    }

    (MixType::MixedPcm(samples_written), track_status)
}

#[inline]
fn mix_over_ref(
    source: &AudioBufferRef<'_>,
    target: &mut AudioBuffer<f32>,
    source_pos: usize,
    dest_pos: usize,
    volume: f32,
) -> usize {
    match source {
        AudioBufferRef::U8(v) => mix_symph_buffer(v, target, source_pos, dest_pos, volume),
        AudioBufferRef::U16(v) => mix_symph_buffer(v, target, source_pos, dest_pos, volume),
        AudioBufferRef::U24(v) => mix_symph_buffer(v, target, source_pos, dest_pos, volume),
        AudioBufferRef::U32(v) => mix_symph_buffer(v, target, source_pos, dest_pos, volume),
        AudioBufferRef::S8(v) => mix_symph_buffer(v, target, source_pos, dest_pos, volume),
        AudioBufferRef::S16(v) => mix_symph_buffer(v, target, source_pos, dest_pos, volume),
        AudioBufferRef::S24(v) => mix_symph_buffer(v, target, source_pos, dest_pos, volume),
        AudioBufferRef::S32(v) => mix_symph_buffer(v, target, source_pos, dest_pos, volume),
        AudioBufferRef::F32(v) => mix_symph_buffer(v, target, source_pos, dest_pos, volume),
        AudioBufferRef::F64(v) => mix_symph_buffer(v, target, source_pos, dest_pos, volume),
    }
}

#[inline]
fn mix_symph_buffer<S>(
    source: &AudioBuffer<S>,
    target: &mut AudioBuffer<f32>,
    source_pos: usize,
    dest_pos: usize,
    volume: f32,
) -> usize
where
    S: Sample + IntoSample<f32>,
{
    // mix in source_packet[inner_pos..] til end of EITHER buffer.
    let src_usable = source.frames() - source_pos;
    let tgt_usable = target.frames() - dest_pos;

    let mix_ct = src_usable.min(tgt_usable);

    let target_chans = target.spec().channels.count();
    let target_mono = target_chans == 1;
    let source_chans = source.spec().channels.count();
    let source_mono = source_chans == 1;

    let source_planes = source.planes();
    let source_raw_planes = source_planes.planes();

    if source_mono {
        // mix this signal into *all* output channels at req'd volume.
        let source_plane = source_raw_planes[0];
        for d_plane in &mut (*target.planes_mut().planes()) {
            for (d, s) in d_plane[dest_pos..dest_pos + mix_ct]
                .iter_mut()
                .zip(source_plane[source_pos..source_pos + mix_ct].iter())
            {
                *d += volume * (*s).into_sample();
            }
        }
    } else if target_mono {
        // mix all signals into the one target channel: reduce aggregate volume
        // by n_channels.
        let vol_adj = 1.0 / (source_chans as f32);
        let mut t_planes = target.planes_mut();
        let d_plane = &mut *t_planes.planes()[0];
        for s_plane in source_raw_planes {
            for (d, s) in d_plane[dest_pos..dest_pos + mix_ct]
                .iter_mut()
                .zip(s_plane[source_pos..source_pos + mix_ct].iter())
            {
                *d += volume * vol_adj * (*s).into_sample();
            }
        }
    } else {
        // stereo -> stereo: don't change volume, map input -> output channels w/ no duplication
        for (d_plane, s_plane) in (*target.planes_mut().planes())
            .iter_mut()
            .zip(source_raw_planes[..].iter())
        {
            for (d, s) in d_plane[dest_pos..dest_pos + mix_ct]
                .iter_mut()
                .zip(s_plane[source_pos..source_pos + mix_ct].iter())
            {
                *d += volume * (*s).into_sample();
            }
        }
    }

    mix_ct
}

#[inline]
fn mix_resampled(
    source: &[Vec<f32>],
    target: &mut AudioBuffer<f32>,
    dest_pos: usize,
    volume: f32,
) -> usize {
    let mix_ct = source[0].len();

    let target_chans = target.spec().channels.count();
    let target_mono = target_chans == 1;
    let source_chans = source.len();
    let source_mono = source_chans == 1;

    // see `mix_symph_buffer` for explanations of stereo<->mono logic.
    if source_mono {
        let source_plane = &source[0];
        for d_plane in &mut (*target.planes_mut().planes()) {
            for (d, s) in d_plane[dest_pos..dest_pos + mix_ct]
                .iter_mut()
                .zip(source_plane)
            {
                *d += volume * s;
            }
        }
    } else if target_mono {
        let vol_adj = 1.0 / (source_chans as f32);
        let mut t_planes = target.planes_mut();
        let d_plane = &mut *t_planes.planes()[0];
        for s_plane in source {
            for (d, s) in d_plane[dest_pos..dest_pos + mix_ct].iter_mut().zip(s_plane) {
                *d += volume * vol_adj * s;
            }
        }
    } else {
        for (d_plane, s_plane) in (*target.planes_mut().planes())
            .iter_mut()
            .zip(source[..].iter())
        {
            for (d, s) in d_plane[dest_pos..dest_pos + mix_ct].iter_mut().zip(s_plane) {
                *d += volume * (*s);
            }
        }
    }

    mix_ct
}

#[inline]
pub(crate) fn copy_into_resampler(
    source: &AudioBufferRef<'_>,
    target: &mut AudioBuffer<f32>,
    source_pos: usize,
    dest_pos: usize,
    len: usize,
) -> usize {
    match source {
        AudioBufferRef::U8(v) => copy_symph_buffer(v, target, source_pos, dest_pos, len),
        AudioBufferRef::U16(v) => copy_symph_buffer(v, target, source_pos, dest_pos, len),
        AudioBufferRef::U24(v) => copy_symph_buffer(v, target, source_pos, dest_pos, len),
        AudioBufferRef::U32(v) => copy_symph_buffer(v, target, source_pos, dest_pos, len),
        AudioBufferRef::S8(v) => copy_symph_buffer(v, target, source_pos, dest_pos, len),
        AudioBufferRef::S16(v) => copy_symph_buffer(v, target, source_pos, dest_pos, len),
        AudioBufferRef::S24(v) => copy_symph_buffer(v, target, source_pos, dest_pos, len),
        AudioBufferRef::S32(v) => copy_symph_buffer(v, target, source_pos, dest_pos, len),
        AudioBufferRef::F32(v) => copy_symph_buffer(v, target, source_pos, dest_pos, len),
        AudioBufferRef::F64(v) => copy_symph_buffer(v, target, source_pos, dest_pos, len),
    }
}

#[inline]
fn copy_symph_buffer<S>(
    source: &AudioBuffer<S>,
    target: &mut AudioBuffer<f32>,
    source_pos: usize,
    dest_pos: usize,
    len: usize,
) -> usize
where
    S: Sample + IntoSample<f32>,
{
    for (d_plane, s_plane) in (*target.planes_mut().planes())
        .iter_mut()
        .zip(source.planes().planes()[..].iter())
    {
        for (d, s) in d_plane[dest_pos..dest_pos + len]
            .iter_mut()
            .zip(s_plane[source_pos..source_pos + len].iter())
        {
            *d = (*s).into_sample();
        }
    }

    len
}