helpless/oogabooga/audio.c



bool check_wav_header(string data) {
	return string_starts_with(data, STR("RIFF"));
}
bool check_ogg_header(string data) {
	return string_starts_with(data, STR("OggS"));
}

// Supporting more than s16 and f32
// If it's a real thing that there's audio devices which support neither then I will be surprised
// The only format I might consider adding is S32 if it turns out people want VERY detailed audio
typedef enum Audio_Format_Bits {
	AUDIO_BITS_16, // this will be s16
	AUDIO_BITS_32, // this will be f32
} Audio_Format_Bits;
u64 get_audio_bit_width_byte_size(Audio_Format_Bits b) {
    switch (b) {
        case AUDIO_BITS_32: return 4; break;
        case AUDIO_BITS_16: return 2; break;
    }
    panic("");
}
typedef struct Audio_Format {
	Audio_Format_Bits bit_width;
	int channels;
	int sample_rate;
} Audio_Format;

// I don't see a big reason for you to use anything else than WAV and OGG.
// If you use mp3 that's just not very smart.
// Ogg has better quality AND better compression AND you don't need any licensing (which you need for mp3)
// https://convertio.co/mp3-ogg/
// I will probably add mp3 support at some point for compatibility reasonavg.
// - Charlie 2024-07-11
typedef enum Audio_Decoder_Kind {
	AUDIO_DECODER_WAV,
	AUDIO_DECODER_OGG
} Audio_Decoder_Kind;

typedef enum Audio_Source_Kind {
	AUDIO_SOURCE_FILE_STREAM,
	AUDIO_SOURCE_MEMORY, // Raw pcm frames
} Audio_Source_Kind;

typedef struct Audio_Source {

	Audio_Source_Kind kind;
	Audio_Format format;
	u64 number_of_frames;
	Allocator allocator;
	string compressed_data;

	// For file stream
	Audio_Decoder_Kind decoder;
	union {
		drwav wav;
		stb_vorbis *ogg;
	};

	// For memory source
	void *pcm_frames;

} Audio_Source;

int
_audio_file_stream_sample_frames(Audio_Source *src, u64 first_frame_index,
					             u64 number_of_frames, void *output_buffer);

bool
audio_source_init_file_stream(Audio_Source *src, string path, Audio_Format_Bits bit_width,
							  Allocator allocator) {
	*src = ZERO(Audio_Source);

	src->allocator = allocator;
	src->kind = AUDIO_SOURCE_FILE_STREAM;

	string data;
	bool read_ok = os_read_entire_file(path, &data, allocator);
	src->compressed_data = data;

	third_party_allocator = allocator;

	if (!read_ok)  {
			third_party_allocator = ZERO(Allocator);
			return false;
		}

	if (check_wav_header(data)) {
		drwav_bool32 init_ok = drwav_init_memory(&src->wav, data.data, data.count, null);
		if (!init_ok)  {
			third_party_allocator = ZERO(Allocator);
			return false;
		}
		src->decoder = AUDIO_DECODER_WAV;

		src->format.channels = src->wav.fmt.channels;
		src->format.sample_rate = src->wav.fmt.sampleRate;
		src->number_of_frames = src->wav.totalPCMFrameCount;
	} else if (check_ogg_header(data)) {
		int err;
		src->ogg = stb_vorbis_open_memory(data.data, data.count, &err, null);
		if (!src->ogg) {
			third_party_allocator = ZERO(Allocator);
			return false;
		}
		src->decoder = AUDIO_DECODER_OGG;

		stb_vorbis_info info = stb_vorbis_get_info(src->ogg);
		src->format.channels = info.channels;
		src->format.sample_rate = info.sample_rate;
		src->number_of_frames = stb_vorbis_stream_length_in_samples(src->ogg);
	} else {
		log_error("Error in init_audio_source_file_stream(): Unrecognized audio format in file '%s'. We currently support WAV and OGG (Vorbis).", path);
		third_party_allocator = ZERO(Allocator);
		return false;
	}

	src->format.bit_width = bit_width;

	third_party_allocator = ZERO(Allocator);

	return true;
}

bool
audio_source_init_file_decode(Audio_Source *src, string path, Audio_Format_Bits bit_width,
							  Allocator allocator) {
	if (!audio_source_init_file_stream(src, path, bit_width, allocator)) return false;
	src->kind = AUDIO_SOURCE_MEMORY;

	u64 comp_size = get_audio_bit_width_byte_size(src->format.bit_width);
	u64 total_size = src->number_of_frames * src->format.channels * comp_size;
	src->pcm_frames = alloc(allocator, total_size);

	int num_retrieved = _audio_file_stream_sample_frames(src, 0, src->number_of_frames, src->pcm_frames);
	assert(num_retrieved == src->number_of_frames, "decoder failed failed");

	return true;
}

void
audio_source_destroy(Audio_Source *src) {
	switch (src->kind) {
		case AUDIO_SOURCE_FILE_STREAM: {
			if (src->pcm_frames) dealloc(src->allocator, src->pcm_frames);
			break;
		}
		case AUDIO_SOURCE_MEMORY: {
			dealloc(src->allocator, src->pcm_frames);
			break;
		}
	}


	third_party_allocator = src->allocator;
	switch (src->decoder) {
		case AUDIO_DECODER_WAV: {
			drwav_uninit(&src->wav);
			break;
		}
		case AUDIO_DECODER_OGG: {
			stb_vorbis_close(src->ogg);
			break;
		}
	}
	third_party_allocator = ZERO(Allocator);

	dealloc_string(src->allocator, src->compressed_data);
}

int
_audio_file_stream_sample_frames(Audio_Source *src, u64 first_frame_index,
					             u64 number_of_frames, void *output_buffer) {
	third_party_allocator = src->allocator;
	int retrieved = 0;
	switch (src->decoder) {
	case AUDIO_DECODER_WAV:
		bool seek_ok = drwav_seek_to_pcm_frame(&src->wav, first_frame_index);
		assert(seek_ok);
		switch(src->format.bit_width) {
		case AUDIO_BITS_32: {
			retrieved = drwav_read_pcm_frames_f32(
				&src->wav,
				number_of_frames,
				(f32*)output_buffer
			);
			break;
		}
		case AUDIO_BITS_16: {
			retrieved = drwav_read_pcm_frames_s16(
				&src->wav,
				number_of_frames,
				(s16*)output_buffer
			);
			break;
		}
		default: panic("Invalid bits value");
	} break; // case AUDIO_DECODER_WAV:
	case AUDIO_DECODER_OGG:
		seek_ok = stb_vorbis_seek(src->ogg, first_frame_index);
		assert(seek_ok);
		switch(src->format.bit_width) {
		case AUDIO_BITS_32: {
			retrieved = stb_vorbis_get_samples_float_interleaved(
				src->ogg,
				src->format.channels,
				(f32*)output_buffer,
				number_of_frames * src->format.channels
			);
			break;
		}
		case AUDIO_BITS_16: {
			retrieved = stb_vorbis_get_samples_short_interleaved(
				src->ogg,
				src->format.channels,
				(s16*)output_buffer,
				number_of_frames * src->format.channels
			);
			break;
		}
		default: panic("Invalid bits value");
	} break; // case AUDIO_DECODER_OGG:
	default: panic("Invalid decoder value");
	}

	third_party_allocator = ZERO(Allocator);
	return retrieved;
}

u64 // New frame index
audio_source_sample_frames(Audio_Source *src, u64 first_frame_index, u64 number_of_frames,
						   void *output_buffer, bool looping) {

	u64 comp_size = get_audio_bit_width_byte_size(src->format.bit_width);
    u64 frame_size = comp_size * src->format.channels;
    u64 output_size = number_of_frames * frame_size;

    if (first_frame_index == src->number_of_frames) {
    	return first_frame_index;
    }

	assert(first_frame_index < src->number_of_frames, "Invalid first_frame_index");

    u64 new_index = first_frame_index;


    int num_retrieved;
	switch (src->kind) {
	case AUDIO_SOURCE_FILE_STREAM: {

		num_retrieved = _audio_file_stream_sample_frames(
			src,
			first_frame_index,
			number_of_frames,
			output_buffer
		);
		new_index += num_retrieved;

		assert(num_retrieved <= number_of_frames);

		if (num_retrieved < number_of_frames) {
			void *dst_remain = ((u8*)output_buffer) + num_retrieved*frame_size;
			if (looping) {
				num_retrieved = _audio_file_stream_sample_frames(
					src,
					0,
					number_of_frames-num_retrieved,
					dst_remain
				);
				new_index = number_of_frames-num_retrieved;
			} else {
				memset(dst_remain, 0, frame_size * (number_of_frames - num_retrieved));
			}
		}

		break; // case AUDIO_SOURCE_FILE_STREAM
	}
	case AUDIO_SOURCE_MEMORY: {
		s64 first_number_of_frames = min(number_of_frames, src->number_of_frames-first_frame_index);
		void *src_pcm_start = (u8*)src->pcm_frames + first_frame_index*frame_size;

		memcpy(output_buffer, src_pcm_start, first_number_of_frames*frame_size);
		new_index += first_number_of_frames;

		s64 remainder = number_of_frames-first_number_of_frames;
		if (remainder > 0) {
			void *dst_remain = (u8*)output_buffer + first_number_of_frames*frame_size;

			if (looping) {
				memcpy(dst_remain, src->pcm_frames, frame_size*remainder);
				new_index = remainder;
			} else {
				memset(dst_remain, 0, frame_size*remainder);
			}
		}

		break;
	}
	}

	return new_index;
}

#define U8_MAX  255
#define S16_MIN -32768
#define S16_MAX 32767
#define S24_MIN -8388608
#define S24_MAX 8388607
#define S32_MIN -2147483648
#define S32_MAX 2147483647

void
mix_frames(void *dst, void *src, u64 frame_count, Audio_Format format) {
    u64 comp_size = get_audio_bit_width_byte_size(format.bit_width);
    u64 frame_size = comp_size * format.channels;
    u64 output_size = frame_count * frame_size;

    // #Speed #Simd #Incomplete
    // Quality:
    // - Dithering
    // - Clipping. Dynamic Range Compression?

    for (u64 frame = 0; frame < frame_count; frame++) {

        for (u64 c = 0; c < format.channels; c++) {

            void *src_sample = (u8*)src + frame*frame_size + c*comp_size;
            void *dst_sample = (u8*)dst + frame*frame_size + c*comp_size;

            switch (format.bit_width) {
                case AUDIO_BITS_32: {
                	*((f32*)dst_sample) += *((f32*)src_sample);
            	}
                case AUDIO_BITS_16: {
                    s16 dst_int = *((s16*)dst_sample);
                    s16 src_int = *((s16*)src_sample);
                    *((s16*)dst_sample) = (s16)clamp((s64)(dst_int + src_int), S16_MIN, S16_MAX);
                    break;
                }
            }
        }
    }
}

void
convert_one_component(void *dst, Audio_Format_Bits dst_bits,
                  void *src, Audio_Format_Bits src_bits) {
	switch (dst_bits) {
		case AUDIO_BITS_32: {
			switch (src_bits) {
			case AUDIO_BITS_32:
				memcpy(dst, src, get_audio_bit_width_byte_size(dst_bits)); break;
			case AUDIO_BITS_16:
				// #Simd
				*(f32*)dst = (f64)((f32)*((s16*)src) * ((f64)1.0 / (f64)32768.0));
				break;
			default: panic("Unhandled bits");
			}
			break;
		}
		case AUDIO_BITS_16: {
			switch (src_bits) {
			case AUDIO_BITS_32:
				// #Simd
				*(s16*)dst = (s16)(*((f32*)src) * 32768.0f);
				break;
			case AUDIO_BITS_16:
				memcpy(dst, src, get_audio_bit_width_byte_size(dst_bits));
				break;
			default: panic("Unhandled bits");
			}
			break;
		}
		default: panic("Unhandled bits");
	}
}

// Assume dst buffer is large enough
// in-place conversion is OK
void
resample_frames(void *dst, Audio_Format dst_format,
                void *src, Audio_Format src_format, u64 src_frame_count) {
    assert(dst_format.channels == src_format.channels, "Channel count must be the same for sample rate conversion");
    assert(dst_format.bit_width == src_format.bit_width, "Types must be the same for sample rate conversion");

    f32 src_ratio = (f32)src_format.sample_rate / (f32)dst_format.sample_rate;
    u64 dst_frame_count = (u64)round(src_frame_count / src_ratio);
    u64 dst_comp_size = get_audio_bit_width_byte_size(dst_format.bit_width);
    u64 dst_frame_size = dst_comp_size * dst_format.channels;
    u64 src_comp_size = get_audio_bit_width_byte_size(src_format.bit_width);
    u64 src_frame_size = src_comp_size * src_format.channels;

	// Reverse in case dst == src (so we can do in-place conversion)
    for (s64 dst_frame_index = dst_frame_count - 1; dst_frame_index >= 1; dst_frame_index--) {
        f32 src_frame_index_f = dst_frame_index * src_ratio;
        u64 src_frame_index_1 = (u64)src_frame_index_f;
        u64 src_frame_index_2 = src_frame_index_1 + 1;
        if (src_frame_index_2 >= src_frame_count) src_frame_index_2 = src_frame_count - 1;

        f32 lerp_factor = src_frame_index_f - (f32)src_frame_index_1;

        void *src_frame_1 = (u8*)src + src_frame_index_1 * src_frame_size;
        void *src_frame_2 = (u8*)src + src_frame_index_2 * src_frame_size;
        void *dst_frame = (u8*)dst + dst_frame_index * dst_frame_size;

        for (int c = 0; c < src_format.channels; c++) {
            union {
                s16 s16_sample;
                f32 f32_sample;
                u8 data[4];
            } sample_dst;

            void *src_comp_1 = (u8*)src_frame_1 + c * src_comp_size;
            void *src_comp_2 = (u8*)src_frame_2 + c * src_comp_size;
            void *dst_comp = (u8*)dst_frame + c * dst_comp_size;

            if (src_format.bit_width == AUDIO_BITS_32) {
                float sample_1 = *((f32*)src_comp_1);
                float sample_2 = *((f32*)src_comp_2);
                sample_dst.f32_sample = sample_1 + lerp_factor * (sample_2 - sample_1);
            } else if (src_format.bit_width == AUDIO_BITS_16) {
                s16 sample_1 = *((s16*)src_comp_1);
                s16 sample_2 = *((s16*)src_comp_2);
                sample_dst.s16_sample = (s16)((f32)sample_1 + lerp_factor * ((f32)sample_2 - (f32)sample_1));
            } else {
                panic("Unhandled bit width");
            }

            memcpy(dst_comp, sample_dst.data, dst_comp_size);
        }
    }

    // Correct padding if we downscaled (since we coverted in reverse)
    if (src == dst && dst_format.sample_rate < src_format.sample_rate) {
    	void *dst_after_pad = (u8*)dst + (src_frame_count - dst_frame_count) * dst_frame_size;
    	u64 padding = (u64)dst_after_pad - (u64)dst;
    	memcpy(
    		dst,
    		dst_after_pad,
    		dst_frame_count * dst_frame_size
		);
		memset((u8*)dst+dst_frame_count * dst_frame_size, 0, padding);
	}

}

// Assumes dst buffer is large enough
void
convert_frames(void *dst, Audio_Format dst_format,
               void *src, Audio_Format src_format, u64 src_frame_count) {

	u64 dst_comp_size = get_audio_bit_width_byte_size(dst_format.bit_width);
    u64 dst_frame_size = dst_comp_size * dst_format.channels;
	u64 src_comp_size = get_audio_bit_width_byte_size(src_format.bit_width);
    u64 src_frame_size = src_comp_size * src_format.channels;

    if (dst_format.sample_rate != src_format.sample_rate) {
    	f32 ratio = (f32)src_format.sample_rate/(f32)dst_format.sample_rate;
    	src_frame_count = (u64)round((f32)src_frame_count*ratio);
    }

	if (bytes_match(&dst_format, &src_format, sizeof(Audio_Format))) {
		memcpy(dst, src, src_frame_count*src_frame_size);
		return;
	}

	u64 output_frame_count = src_frame_count;

	// #Speed #Simd
	if (dst_format.channels != src_format.channels || dst_format.bit_width != src_format.bit_width) {
		for (u64 src_frame_index = 0; src_frame_index < src_frame_count; src_frame_index++) {
	        void *src_frame = ((u8*)src) + src_frame_index*src_frame_size;
	        void *dst_frame = ((u8*)dst) + src_frame_index*dst_frame_size;

	        // For getting average src sample
	    	union {
	    		s16 s16_sample;
	    		f32 f32_sample;
	    		u8 data[4];
	    	} avg;
	        if (src_format.channels != dst_format.channels) {
	        	// This is where we get the average src sample
	        	f32 sum = 0;
	        	for (int c = 0; c < src_format.channels; c++) {
	        		avg.s16_sample = 0;
	        		void *src_comp = (u8*)src_frame + c * src_comp_size;
	        		convert_one_component(
	        			avg.data, dst_format.bit_width,
	        			src_comp, src_format.bit_width
	    			);
	    			if (dst_format.bit_width == AUDIO_BITS_32)      sum += avg.f32_sample;
	    			else if (dst_format.bit_width == AUDIO_BITS_16) sum += (f32)avg.s16_sample;
	    			else panic("Unhandled bit width");
	        	}
	        	if (dst_format.bit_width == AUDIO_BITS_32) {
	        		avg.f32_sample = sum/(f32)src_format.channels;
				} else if (dst_format.bit_width == AUDIO_BITS_16) {
					avg.s16_sample = (s16)round(sum/(f32)src_format.channels);
				} else panic("Unhandled bit width");
	        }

	        if (src_format.channels > dst_format.channels) {

	        	// #Limitation #Audioquality
	        	// Here we are down-scaling the channel count.
	        	// So what we do is we get the average sample for all channels in src and then
	        	// set all channels in dst to that. This is fine for mono to stereo, but will
	        	// be a loss for example for surround to mono. But I'm not sure we will ever
	        	// care about non-stereo/mono audio.

				for (int c = 0; c < dst_format.channels; c++) {
					void *dst_comp = (u8*)dst_frame + c * dst_comp_size;
					memcpy(dst_comp, avg.data, dst_comp_size);
				}

	        } else if (dst_format.channels > src_format.channels) {

	        	// Here, we are upscaling to a higher channel count.
	        	// I'm not sure what the best way to do this is, but for now I will try to just
	        	// get the average in src and set that to the extra channels in dst.
	        	// This is obviously fine for mono -> stereo but might be a problem for surround.
	        	// Again, I'm not sure if surround will ever be on our list of worries.

	        	for (int c = 0; c < dst_format.channels; c++) {
	        		void *dst_comp = (u8*)dst_frame + c * dst_comp_size;
	        		void *src_comp = (u8*)src_frame + c * src_comp_size;

	        		if (c < src_format.channels)
	        			convert_one_component(dst_comp, dst_format.bit_width,
	        								  src_comp, src_format.bit_width);
	        		else
	        			memcpy(dst_comp, avg.data, dst_comp_size);
	        	}

	        } else {
	        	// Same channel count, just copy components over
	        	for (int c = 0; c < dst_format.channels; c++) {
                    void *dst_comp = (u8*)dst_frame + c * dst_comp_size;
                    void *src_comp = (u8*)src_frame + c * src_comp_size;
                    convert_one_component(dst_comp, dst_format.bit_width, src_comp, src_format.bit_width);
                }
	        }
	    }
    }
    if (dst_format.sample_rate != src_format.sample_rate) {
    	resample_frames(
    		dst,
    		(Audio_Format){dst_format.bit_width, dst_format.channels, dst_format.sample_rate},
			dst,
			(Audio_Format){dst_format.bit_width, dst_format.channels, src_format.sample_rate},
			src_frame_count
		);

    }
}


// #Temporary this is jsut for testing
Audio_Source *current_source = 0;
u64 current_index = 0;

// This is supposed to be called by OS layer audio thread whenever it wants more audio samples
void do_program_audio_sample(u64 number_of_output_frames, Audio_Format out_format,
							 void *output) {
	u64 out_comp_size = get_audio_bit_width_byte_size(out_format.bit_width);
    u64 out_frame_size = out_comp_size * out_format.channels;
    u64 output_size = number_of_output_frames * out_frame_size;

	memset(output, 0, output_size);
	if (current_source) {

		bool need_convert = !bytes_match(&out_format, &current_source->format, sizeof(Audio_Format));

		u64 in_comp_size = get_audio_bit_width_byte_size(current_source->format.bit_width);
		u64 in_frame_size = in_comp_size * current_source->format.channels;
		u64 input_size = number_of_output_frames * in_frame_size;

		void *target_buffer = output;
		u64 number_of_sample_frames = number_of_output_frames;

		thread_local local_persist void *convert_buffer = 0;
		thread_local local_persist u64 convert_buffer_size;
		if (need_convert) {
			if (current_source->format.sample_rate != out_format.sample_rate) {
				f32 src_ratio
					= (f32)current_source->format.sample_rate / (f32)out_format.sample_rate;
				number_of_sample_frames = round(number_of_output_frames * src_ratio);
				input_size = number_of_sample_frames * in_frame_size;
			}

			u64 biggest_size = max(input_size, output_size);

			if (!convert_buffer || convert_buffer_size < biggest_size) {
				// #Speed
				if (convert_buffer) dealloc(get_heap_allocator(), convert_buffer);
				convert_buffer = alloc(get_heap_allocator(), biggest_size);
				convert_buffer_size = biggest_size;
			}
			target_buffer = convert_buffer;
			memset(convert_buffer, 0, biggest_size);

		}

		current_index = audio_source_sample_frames(
			current_source,
			current_index,
			number_of_sample_frames,
			target_buffer,
			true
		);
		if (need_convert) {
			convert_frames(
				output,
				out_format,
				convert_buffer,
				current_source->format,
				number_of_output_frames
			);
		}
	}
}