This repository has been archived on 2025-02-04. You can view files and clone it, but cannot push or open issues or pull requests.
helpless/oogabooga/audio.c
Charlie 2b335aee35 Sold my soul to play monlight sonata
Audio sources & decoding are pretty much done and working well.

Playback is not really implemented yet, I'm just hacking in a way to output an audio source.

- Seriously microsoft wtf
2024-07-12 21:11:47 +02:00

628 lines
No EOL
20 KiB
C

bool check_wav_header(string data) {
return string_starts_with(data, STR("RIFF"));
}
bool check_ogg_header(string data) {
return string_starts_with(data, STR("OggS"));
}
// Supporting more than s16 and f32
// If it's a real thing that there's audio devices which support neither then I will be surprised
// The only format I might consider adding is S32 if it turns out people want VERY detailed audio
typedef enum Audio_Format_Bits {
AUDIO_BITS_16, // this will be s16
AUDIO_BITS_32, // this will be f32
} Audio_Format_Bits;
u64 get_audio_bit_width_byte_size(Audio_Format_Bits b) {
switch (b) {
case AUDIO_BITS_32: return 4; break;
case AUDIO_BITS_16: return 2; break;
}
panic("");
}
typedef struct Audio_Format {
Audio_Format_Bits bit_width;
int channels;
int sample_rate;
} Audio_Format;
// I don't see a big reason for you to use anything else than WAV and OGG.
// If you use mp3 that's just not very smart.
// Ogg has better quality AND better compression AND you don't need any licensing (which you need for mp3)
// https://convertio.co/mp3-ogg/
// I will probably add mp3 support at some point for compatibility reasonavg.
// - Charlie 2024-07-11
typedef enum Audio_Decoder_Kind {
AUDIO_DECODER_WAV,
AUDIO_DECODER_OGG
} Audio_Decoder_Kind;
typedef enum Audio_Source_Kind {
AUDIO_SOURCE_FILE_STREAM,
AUDIO_SOURCE_MEMORY, // Raw pcm frames
} Audio_Source_Kind;
typedef struct Audio_Source {
Audio_Source_Kind kind;
Audio_Format format;
u64 number_of_frames;
Allocator allocator;
string compressed_data;
// For file stream
Audio_Decoder_Kind decoder;
union {
drwav wav;
stb_vorbis *ogg;
};
// For memory source
void *pcm_frames;
} Audio_Source;
int
_audio_file_stream_sample_frames(Audio_Source *src, u64 first_frame_index,
u64 number_of_frames, void *output_buffer);
bool
audio_source_init_file_stream(Audio_Source *src, string path, Audio_Format_Bits bit_width,
Allocator allocator) {
*src = ZERO(Audio_Source);
src->allocator = allocator;
src->kind = AUDIO_SOURCE_FILE_STREAM;
string data;
bool read_ok = os_read_entire_file(path, &data, allocator);
src->compressed_data = data;
third_party_allocator = allocator;
if (!read_ok) {
third_party_allocator = ZERO(Allocator);
return false;
}
if (check_wav_header(data)) {
drwav_bool32 init_ok = drwav_init_memory(&src->wav, data.data, data.count, null);
if (!init_ok) {
third_party_allocator = ZERO(Allocator);
return false;
}
src->decoder = AUDIO_DECODER_WAV;
src->format.channels = src->wav.fmt.channels;
src->format.sample_rate = src->wav.fmt.sampleRate;
src->number_of_frames = src->wav.totalPCMFrameCount;
} else if (check_ogg_header(data)) {
int err;
src->ogg = stb_vorbis_open_memory(data.data, data.count, &err, null);
if (!src->ogg) {
third_party_allocator = ZERO(Allocator);
return false;
}
src->decoder = AUDIO_DECODER_OGG;
stb_vorbis_info info = stb_vorbis_get_info(src->ogg);
src->format.channels = info.channels;
src->format.sample_rate = info.sample_rate;
src->number_of_frames = stb_vorbis_stream_length_in_samples(src->ogg);
} else {
log_error("Error in init_audio_source_file_stream(): Unrecognized audio format in file '%s'. We currently support WAV and OGG (Vorbis).", path);
third_party_allocator = ZERO(Allocator);
return false;
}
src->format.bit_width = bit_width;
third_party_allocator = ZERO(Allocator);
return true;
}
bool
audio_source_init_file_decode(Audio_Source *src, string path, Audio_Format_Bits bit_width,
Allocator allocator) {
if (!audio_source_init_file_stream(src, path, bit_width, allocator)) return false;
src->kind = AUDIO_SOURCE_MEMORY;
u64 comp_size = get_audio_bit_width_byte_size(src->format.bit_width);
u64 total_size = src->number_of_frames * src->format.channels * comp_size;
src->pcm_frames = alloc(allocator, total_size);
int num_retrieved = _audio_file_stream_sample_frames(src, 0, src->number_of_frames, src->pcm_frames);
assert(num_retrieved == src->number_of_frames, "decoder failed failed");
return true;
}
void
audio_source_destroy(Audio_Source *src) {
switch (src->kind) {
case AUDIO_SOURCE_FILE_STREAM: {
if (src->pcm_frames) dealloc(src->allocator, src->pcm_frames);
break;
}
case AUDIO_SOURCE_MEMORY: {
dealloc(src->allocator, src->pcm_frames);
break;
}
}
third_party_allocator = src->allocator;
switch (src->decoder) {
case AUDIO_DECODER_WAV: {
drwav_uninit(&src->wav);
break;
}
case AUDIO_DECODER_OGG: {
stb_vorbis_close(src->ogg);
break;
}
}
third_party_allocator = ZERO(Allocator);
dealloc_string(src->allocator, src->compressed_data);
}
int
_audio_file_stream_sample_frames(Audio_Source *src, u64 first_frame_index,
u64 number_of_frames, void *output_buffer) {
third_party_allocator = src->allocator;
int retrieved = 0;
switch (src->decoder) {
case AUDIO_DECODER_WAV:
bool seek_ok = drwav_seek_to_pcm_frame(&src->wav, first_frame_index);
assert(seek_ok);
switch(src->format.bit_width) {
case AUDIO_BITS_32: {
retrieved = drwav_read_pcm_frames_f32(
&src->wav,
number_of_frames,
(f32*)output_buffer
);
break;
}
case AUDIO_BITS_16: {
retrieved = drwav_read_pcm_frames_s16(
&src->wav,
number_of_frames,
(s16*)output_buffer
);
break;
}
default: panic("Invalid bits value");
} break; // case AUDIO_DECODER_WAV:
case AUDIO_DECODER_OGG:
seek_ok = stb_vorbis_seek(src->ogg, first_frame_index);
assert(seek_ok);
switch(src->format.bit_width) {
case AUDIO_BITS_32: {
retrieved = stb_vorbis_get_samples_float_interleaved(
src->ogg,
src->format.channels,
(f32*)output_buffer,
number_of_frames * src->format.channels
);
break;
}
case AUDIO_BITS_16: {
retrieved = stb_vorbis_get_samples_short_interleaved(
src->ogg,
src->format.channels,
(s16*)output_buffer,
number_of_frames * src->format.channels
);
break;
}
default: panic("Invalid bits value");
} break; // case AUDIO_DECODER_OGG:
default: panic("Invalid decoder value");
}
third_party_allocator = ZERO(Allocator);
return retrieved;
}
u64 // New frame index
audio_source_sample_frames(Audio_Source *src, u64 first_frame_index, u64 number_of_frames,
void *output_buffer, bool looping) {
u64 comp_size = get_audio_bit_width_byte_size(src->format.bit_width);
u64 frame_size = comp_size * src->format.channels;
u64 output_size = number_of_frames * frame_size;
if (first_frame_index == src->number_of_frames) {
return first_frame_index;
}
assert(first_frame_index < src->number_of_frames, "Invalid first_frame_index");
u64 new_index = first_frame_index;
int num_retrieved;
switch (src->kind) {
case AUDIO_SOURCE_FILE_STREAM: {
num_retrieved = _audio_file_stream_sample_frames(
src,
first_frame_index,
number_of_frames,
output_buffer
);
new_index += num_retrieved;
assert(num_retrieved <= number_of_frames);
if (num_retrieved < number_of_frames) {
void *dst_remain = ((u8*)output_buffer) + num_retrieved*frame_size;
if (looping) {
num_retrieved = _audio_file_stream_sample_frames(
src,
0,
number_of_frames-num_retrieved,
dst_remain
);
new_index = number_of_frames-num_retrieved;
} else {
memset(dst_remain, 0, frame_size * (number_of_frames - num_retrieved));
}
}
break; // case AUDIO_SOURCE_FILE_STREAM
}
case AUDIO_SOURCE_MEMORY: {
s64 first_number_of_frames = min(number_of_frames, src->number_of_frames-first_frame_index);
void *src_pcm_start = (u8*)src->pcm_frames + first_frame_index*frame_size;
memcpy(output_buffer, src_pcm_start, first_number_of_frames*frame_size);
new_index += first_number_of_frames;
s64 remainder = number_of_frames-first_number_of_frames;
if (remainder > 0) {
void *dst_remain = (u8*)output_buffer + first_number_of_frames*frame_size;
if (looping) {
memcpy(dst_remain, src->pcm_frames, frame_size*remainder);
new_index = remainder;
} else {
memset(dst_remain, 0, frame_size*remainder);
}
}
break;
}
}
return new_index;
}
#define U8_MAX 255
#define S16_MIN -32768
#define S16_MAX 32767
#define S24_MIN -8388608
#define S24_MAX 8388607
#define S32_MIN -2147483648
#define S32_MAX 2147483647
void
mix_frames(void *dst, void *src, u64 frame_count, Audio_Format format) {
u64 comp_size = get_audio_bit_width_byte_size(format.bit_width);
u64 frame_size = comp_size * format.channels;
u64 output_size = frame_count * frame_size;
// #Speed #Simd #Incomplete
// Quality:
// - Dithering
// - Clipping. Dynamic Range Compression?
for (u64 frame = 0; frame < frame_count; frame++) {
for (u64 c = 0; c < format.channels; c++) {
void *src_sample = (u8*)src + frame*frame_size + c*comp_size;
void *dst_sample = (u8*)dst + frame*frame_size + c*comp_size;
switch (format.bit_width) {
case AUDIO_BITS_32: {
*((f32*)dst_sample) += *((f32*)src_sample);
}
case AUDIO_BITS_16: {
s16 dst_int = *((s16*)dst_sample);
s16 src_int = *((s16*)src_sample);
*((s16*)dst_sample) = (s16)clamp((s64)(dst_int + src_int), S16_MIN, S16_MAX);
break;
}
}
}
}
}
void
convert_one_component(void *dst, Audio_Format_Bits dst_bits,
void *src, Audio_Format_Bits src_bits) {
switch (dst_bits) {
case AUDIO_BITS_32: {
switch (src_bits) {
case AUDIO_BITS_32:
memcpy(dst, src, get_audio_bit_width_byte_size(dst_bits)); break;
case AUDIO_BITS_16:
// #Simd
*(f32*)dst = (f64)((f32)*((s16*)src) * ((f64)1.0 / (f64)32768.0));
break;
default: panic("Unhandled bits");
}
break;
}
case AUDIO_BITS_16: {
switch (src_bits) {
case AUDIO_BITS_32:
// #Simd
*(s16*)dst = (s16)(*((f32*)src) * 32768.0f);
break;
case AUDIO_BITS_16:
memcpy(dst, src, get_audio_bit_width_byte_size(dst_bits));
break;
default: panic("Unhandled bits");
}
break;
}
default: panic("Unhandled bits");
}
}
// Assume dst buffer is large enough
// in-place conversion is OK
void
resample_frames(void *dst, Audio_Format dst_format,
void *src, Audio_Format src_format, u64 src_frame_count) {
assert(dst_format.channels == src_format.channels, "Channel count must be the same for sample rate conversion");
assert(dst_format.bit_width == src_format.bit_width, "Types must be the same for sample rate conversion");
f32 src_ratio = (f32)src_format.sample_rate / (f32)dst_format.sample_rate;
u64 dst_frame_count = (u64)round(src_frame_count / src_ratio);
u64 dst_comp_size = get_audio_bit_width_byte_size(dst_format.bit_width);
u64 dst_frame_size = dst_comp_size * dst_format.channels;
u64 src_comp_size = get_audio_bit_width_byte_size(src_format.bit_width);
u64 src_frame_size = src_comp_size * src_format.channels;
// Reverse in case dst == src (so we can do in-place conversion)
for (s64 dst_frame_index = dst_frame_count - 1; dst_frame_index >= 1; dst_frame_index--) {
f32 src_frame_index_f = dst_frame_index * src_ratio;
u64 src_frame_index_1 = (u64)src_frame_index_f;
u64 src_frame_index_2 = src_frame_index_1 + 1;
if (src_frame_index_2 >= src_frame_count) src_frame_index_2 = src_frame_count - 1;
f32 lerp_factor = src_frame_index_f - (f32)src_frame_index_1;
void *src_frame_1 = (u8*)src + src_frame_index_1 * src_frame_size;
void *src_frame_2 = (u8*)src + src_frame_index_2 * src_frame_size;
void *dst_frame = (u8*)dst + dst_frame_index * dst_frame_size;
for (int c = 0; c < src_format.channels; c++) {
union {
s16 s16_sample;
f32 f32_sample;
u8 data[4];
} sample_dst;
void *src_comp_1 = (u8*)src_frame_1 + c * src_comp_size;
void *src_comp_2 = (u8*)src_frame_2 + c * src_comp_size;
void *dst_comp = (u8*)dst_frame + c * dst_comp_size;
if (src_format.bit_width == AUDIO_BITS_32) {
float sample_1 = *((f32*)src_comp_1);
float sample_2 = *((f32*)src_comp_2);
sample_dst.f32_sample = sample_1 + lerp_factor * (sample_2 - sample_1);
} else if (src_format.bit_width == AUDIO_BITS_16) {
s16 sample_1 = *((s16*)src_comp_1);
s16 sample_2 = *((s16*)src_comp_2);
sample_dst.s16_sample = (s16)((f32)sample_1 + lerp_factor * ((f32)sample_2 - (f32)sample_1));
} else {
panic("Unhandled bit width");
}
memcpy(dst_comp, sample_dst.data, dst_comp_size);
}
}
// Correct padding if we downscaled (since we coverted in reverse)
if (src == dst && dst_format.sample_rate < src_format.sample_rate) {
void *dst_after_pad = (u8*)dst + (src_frame_count - dst_frame_count) * dst_frame_size;
u64 padding = (u64)dst_after_pad - (u64)dst;
memcpy(
dst,
dst_after_pad,
dst_frame_count * dst_frame_size
);
memset((u8*)dst+dst_frame_count * dst_frame_size, 0, padding);
}
}
// Assumes dst buffer is large enough
void
convert_frames(void *dst, Audio_Format dst_format,
void *src, Audio_Format src_format, u64 src_frame_count) {
u64 dst_comp_size = get_audio_bit_width_byte_size(dst_format.bit_width);
u64 dst_frame_size = dst_comp_size * dst_format.channels;
u64 src_comp_size = get_audio_bit_width_byte_size(src_format.bit_width);
u64 src_frame_size = src_comp_size * src_format.channels;
if (dst_format.sample_rate != src_format.sample_rate) {
f32 ratio = (f32)src_format.sample_rate/(f32)dst_format.sample_rate;
src_frame_count = (u64)round((f32)src_frame_count*ratio);
}
if (bytes_match(&dst_format, &src_format, sizeof(Audio_Format))) {
memcpy(dst, src, src_frame_count*src_frame_size);
return;
}
u64 output_frame_count = src_frame_count;
// #Speed #Simd
if (dst_format.channels != src_format.channels || dst_format.bit_width != src_format.bit_width) {
for (u64 src_frame_index = 0; src_frame_index < src_frame_count; src_frame_index++) {
void *src_frame = ((u8*)src) + src_frame_index*src_frame_size;
void *dst_frame = ((u8*)dst) + src_frame_index*dst_frame_size;
// For getting average src sample
union {
s16 s16_sample;
f32 f32_sample;
u8 data[4];
} avg;
if (src_format.channels != dst_format.channels) {
// This is where we get the average src sample
f32 sum = 0;
for (int c = 0; c < src_format.channels; c++) {
avg.s16_sample = 0;
void *src_comp = (u8*)src_frame + c * src_comp_size;
convert_one_component(
avg.data, dst_format.bit_width,
src_comp, src_format.bit_width
);
if (dst_format.bit_width == AUDIO_BITS_32) sum += avg.f32_sample;
else if (dst_format.bit_width == AUDIO_BITS_16) sum += (f32)avg.s16_sample;
else panic("Unhandled bit width");
}
if (dst_format.bit_width == AUDIO_BITS_32) {
avg.f32_sample = sum/(f32)src_format.channels;
} else if (dst_format.bit_width == AUDIO_BITS_16) {
avg.s16_sample = (s16)round(sum/(f32)src_format.channels);
} else panic("Unhandled bit width");
}
if (src_format.channels > dst_format.channels) {
// #Limitation #Audioquality
// Here we are down-scaling the channel count.
// So what we do is we get the average sample for all channels in src and then
// set all channels in dst to that. This is fine for mono to stereo, but will
// be a loss for example for surround to mono. But I'm not sure we will ever
// care about non-stereo/mono audio.
for (int c = 0; c < dst_format.channels; c++) {
void *dst_comp = (u8*)dst_frame + c * dst_comp_size;
memcpy(dst_comp, avg.data, dst_comp_size);
}
} else if (dst_format.channels > src_format.channels) {
// Here, we are upscaling to a higher channel count.
// I'm not sure what the best way to do this is, but for now I will try to just
// get the average in src and set that to the extra channels in dst.
// This is obviously fine for mono -> stereo but might be a problem for surround.
// Again, I'm not sure if surround will ever be on our list of worries.
for (int c = 0; c < dst_format.channels; c++) {
void *dst_comp = (u8*)dst_frame + c * dst_comp_size;
void *src_comp = (u8*)src_frame + c * src_comp_size;
if (c < src_format.channels)
convert_one_component(dst_comp, dst_format.bit_width,
src_comp, src_format.bit_width);
else
memcpy(dst_comp, avg.data, dst_comp_size);
}
} else {
// Same channel count, just copy components over
for (int c = 0; c < dst_format.channels; c++) {
void *dst_comp = (u8*)dst_frame + c * dst_comp_size;
void *src_comp = (u8*)src_frame + c * src_comp_size;
convert_one_component(dst_comp, dst_format.bit_width, src_comp, src_format.bit_width);
}
}
}
}
if (dst_format.sample_rate != src_format.sample_rate) {
resample_frames(
dst,
(Audio_Format){dst_format.bit_width, dst_format.channels, dst_format.sample_rate},
dst,
(Audio_Format){dst_format.bit_width, dst_format.channels, src_format.sample_rate},
src_frame_count
);
}
}
// #Temporary this is jsut for testing
Audio_Source *current_source = 0;
u64 current_index = 0;
// This is supposed to be called by OS layer audio thread whenever it wants more audio samples
void do_program_audio_sample(u64 number_of_output_frames, Audio_Format out_format,
void *output) {
u64 out_comp_size = get_audio_bit_width_byte_size(out_format.bit_width);
u64 out_frame_size = out_comp_size * out_format.channels;
u64 output_size = number_of_output_frames * out_frame_size;
memset(output, 0, output_size);
if (current_source) {
bool need_convert = !bytes_match(&out_format, &current_source->format, sizeof(Audio_Format));
u64 in_comp_size = get_audio_bit_width_byte_size(current_source->format.bit_width);
u64 in_frame_size = in_comp_size * current_source->format.channels;
u64 input_size = number_of_output_frames * in_frame_size;
void *target_buffer = output;
u64 number_of_sample_frames = number_of_output_frames;
thread_local local_persist void *convert_buffer = 0;
thread_local local_persist u64 convert_buffer_size;
if (need_convert) {
if (current_source->format.sample_rate != out_format.sample_rate) {
f32 src_ratio
= (f32)current_source->format.sample_rate / (f32)out_format.sample_rate;
number_of_sample_frames = round(number_of_output_frames * src_ratio);
input_size = number_of_sample_frames * in_frame_size;
}
u64 biggest_size = max(input_size, output_size);
if (!convert_buffer || convert_buffer_size < biggest_size) {
// #Speed
if (convert_buffer) dealloc(get_heap_allocator(), convert_buffer);
convert_buffer = alloc(get_heap_allocator(), biggest_size);
convert_buffer_size = biggest_size;
}
target_buffer = convert_buffer;
memset(convert_buffer, 0, biggest_size);
}
current_index = audio_source_sample_frames(
current_source,
current_index,
number_of_sample_frames,
target_buffer,
true
);
if (need_convert) {
convert_frames(
output,
out_format,
convert_buffer,
current_source->format,
number_of_output_frames
);
}
}
}