From 9f5d20d3dee418f533a4499bba93cd6fc178af25 Mon Sep 17 00:00:00 2001 From: Charlie Malmqvist Date: Mon, 22 Jul 2024 16:49:10 +0200 Subject: [PATCH] Cleanup --- TODO | 9 +- build.c | 7 +- changelog.txt | 2 +- oogabooga/cpu.c | 8 ++ oogabooga/oogabooga.c | 54 ++++++-- oogabooga/simd.c | 313 ------------------------------------------ 6 files changed, 60 insertions(+), 333 deletions(-) diff --git a/TODO b/TODO index c616414..994addc 100644 --- a/TODO +++ b/TODO @@ -35,8 +35,13 @@ - Atlases are way too big, render atlases with size depending on font_height (say, 128 codepoints per atlas) - OS - Window::bool is_minimized - don't set window.width & window.height to 0 + - Window::bool is_minimized + - don't set window.width & window.height to 0 + - Sockets recv, send + + +- Arenas + - Needs testing: - Audio format channel conversions diff --git a/build.c b/build.c index e6a6397..9d53e52 100644 --- a/build.c +++ b/build.c @@ -3,6 +3,9 @@ /// // Build config stuff +#define OOGABOOGA_ENABLE_COMPLICATED_BUILD_MODE 1 +#define OOGABOOGA_NO_IMPLEMENTATION 1 + #define INITIAL_PROGRAM_MEMORY_SIZE MB(5) // You might want to increase this if you get a log warning saying the temporary storage was overflown. @@ -36,11 +39,11 @@ typedef struct Context_Extra { // // This is a minimal starting point for new projects. Copy & rename to get started -#include "oogabooga/examples/minimal_game_loop.c" +// #include "oogabooga/examples/minimal_game_loop.c" // #include "oogabooga/examples/text_rendering.c" // #include "oogabooga/examples/custom_logger.c" -// #include "oogabooga/examples/renderer_stress_test.c" +#include "oogabooga/examples/renderer_stress_test.c" // #include "oogabooga/examples/tile_game.c" // #include "oogabooga/examples/audio_test.c" // #include "oogabooga/examples/custom_shader.c" diff --git a/changelog.txt b/changelog.txt index 6d2314f..96a428d 100644 --- a/changelog.txt +++ b/changelog.txt @@ -37,7 +37,7 @@ vx_cross() - added os_get_file_size_from_Path() - Some simple restructuring of existing code - - Made heap corruption detection more robust + - Made heap corruption detection more robust ## v0.01.000 - AUDIO! - Added audio sources diff --git a/oogabooga/cpu.c b/oogabooga/cpu.c index a32c914..f156aa0 100644 --- a/oogabooga/cpu.c +++ b/oogabooga/cpu.c @@ -106,6 +106,9 @@ typedef struct Cpu_Capabilities { #define MEMORY_BARRIER _ReadWriteBarrier() + #define SHARED_EXPORT __declspec(dllexport) + #define SHARED_IMPORT __declspec(dllimport) + #elif COMPILER_GCC || COMPILER_CLANG #define inline __attribute__((always_inline)) inline #define alignat(x) __attribute__((aligned(x))) @@ -220,6 +223,9 @@ typedef struct Cpu_Capabilities { #define MEMORY_BARRIER __asm__ __volatile__("" ::: "memory") + #define SHARED_EXPORT __attribute__((visibility("default"))) + #define SHARED_IMPORT + #else #define inline inline #define COMPILER_HAS_MEMCPY_INTRINSICS 0 @@ -239,6 +245,8 @@ typedef struct Cpu_Capabilities { #warning "Compiler is not explicitly supported, some things will probably not work as expected" #endif + + Cpu_Capabilities query_cpu_capabilities() { Cpu_Capabilities result = {0}; diff --git a/oogabooga/oogabooga.c b/oogabooga/oogabooga.c index 13daddb..99f7bdf 100644 --- a/oogabooga/oogabooga.c +++ b/oogabooga/oogabooga.c @@ -84,7 +84,7 @@ Example: - #define RUN_TESTS 0 + #define RUN_TESTS 1 - ENABLE_PROFILING Enable time profiling which will be dumped to google_trace.json. @@ -101,13 +101,24 @@ tm_scope tm_scope_var tm_scope_accum + + - OOGABOOGA_HEADLESS + Run oogabooga in headless mode, i.e. no window, no graphics, no audio. + Useful if you only need the oogabooga standard library for something like a game server. + + 0: Disable + 1: Enable + + Example: + + #define OOGABOOGA_HEADLESS 1 */ #define OGB_VERSION_MAJOR 0 #define OGB_VERSION_MINOR 1 -#define OGB_VERSION_PATCH 1 +#define OGB_VERSION_PATCH 2 #define OGB_VERSION (OGB_VERSION_MAJOR*1000000+OGB_VERSION_MINOR*1000+OGB_VERSION_PATCH) @@ -214,9 +225,9 @@ typedef u8 bool; #ifdef _WIN32 #define COBJMACROS #include -#if CONFIGURATION == DEBUG - #include -#endif + #if CONFIGURATION == DEBUG + #include + #endif #define TARGET_OS WINDOWS #define OS_PATHS_HAVE_BACKSLASH 1 #elif defined(__linux__) @@ -234,6 +245,19 @@ typedef u8 bool; #endif +#if OOGABOOGA_ENABLE_COMPLICATED_BUILD_MODE + + #if OOGABOOGA_NO_IMPLEMENTATION + #define ogb_proc SHARED_IMPORT + #else + #define ogb_proc SHARED_EXPORT + #endif + +#else + #define ogb_proc +#endif + + // This needs to be included before dependencies #include "base.c" @@ -273,8 +297,8 @@ typedef u8 bool; /// // Dependencies /// -// The reason dependencies are compiled here is because we modify stb_vorbis to use our -// file API instead of the stdio.h (cmoooon Sean) +// The reason dependencies are compiled here is because we need to modify third party code +// to use the oogabooga standard where they use the C standard. #include "third_party.c" @@ -360,14 +384,14 @@ void oogabooga_init(u64 program_memory_size) { #else log_info("Headless mode on"); #endif - log_verbose("CPU has sse1: %cs", features.sse1 ? "true" : "false"); - log_verbose("CPU has sse2: %cs", features.sse2 ? "true" : "false"); - log_verbose("CPU has sse3: %cs", features.sse3 ? "true" : "false"); - log_verbose("CPU has ssse3: %cs", features.ssse3 ? "true" : "false"); - log_verbose("CPU has sse41: %cs", features.sse41 ? "true" : "false"); - log_verbose("CPU has sse42: %cs", features.sse42 ? "true" : "false"); - log_verbose("CPU has avx: %cs", features.avx ? "true" : "false"); - log_verbose("CPU has avx2: %cs", features.avx2 ? "true" : "false"); + log_verbose("CPU has sse1: %cs", features.sse1 ? "true" : "false"); + log_verbose("CPU has sse2: %cs", features.sse2 ? "true" : "false"); + log_verbose("CPU has sse3: %cs", features.sse3 ? "true" : "false"); + log_verbose("CPU has ssse3: %cs", features.ssse3 ? "true" : "false"); + log_verbose("CPU has sse41: %cs", features.sse41 ? "true" : "false"); + log_verbose("CPU has sse42: %cs", features.sse42 ? "true" : "false"); + log_verbose("CPU has avx: %cs", features.avx ? "true" : "false"); + log_verbose("CPU has avx2: %cs", features.avx2 ? "true" : "false"); log_verbose("CPU has avx512: %cs", features.avx512 ? "true" : "false"); } diff --git a/oogabooga/simd.c b/oogabooga/simd.c index f288bc0..ed2d392 100644 --- a/oogabooga/simd.c +++ b/oogabooga/simd.c @@ -800,316 +800,3 @@ inline void basic_rsqrt_float32_512(float *a, float *result) { basic_rsqrt_float32_256(a+8, result+8); } - - - - - - - -// SSE 2 int32 -/*inline void sse_add_int32_128(s32 *a, s32 *b, s32* result) { - __asm__ ( - "movdqa (%0), %%xmm0\n\t" - "movdqa (%1), %%xmm1\n\t" - "paddd %%xmm1, %%xmm0\n\t" - "movdqa %%xmm0, (%2)\n\t" - : - : "r" (a), "r" (b), "r" (result) - : "xmm0", "xmm1" - ); -} - -inline void sse_sub_int32_128(s32 *a, s32 *b, s32* result) { - __asm__ ( - "movdqa (%0), %%xmm0\n\t" - "movdqa (%1), %%xmm1\n\t" - "psubd %%xmm1, %%xmm0\n\t" - "movdqa %%xmm0, (%2)\n\t" - : - : "r" (a), "r" (b), "r" (result) - : "xmm0", "xmm1" - ); -} - -inline void sse_mul_int32_128(s32 *a, s32 *b, s32* result) { - __asm__ ( - "movdqa (%0), %%xmm0\n\t" - "movdqa (%1), %%xmm1\n\t" - "pmulld %%xmm1, %%xmm0\n\t" - "movdqa %%xmm0, (%2)\n\t" - : - : "r" (a), "r" (b), "r" (result) - : "xmm0", "xmm1" - ); -} - -// SSE4.2 float32 -inline void sse_add_float32_64(float32 *a, float32 *b, float32* result) { - __asm__ ( - "movups (%0), %%xmm0\n\t" - "movups (%1), %%xmm1\n\t" - "addps %%xmm1, %%xmm0\n\t" - "movups %%xmm0, (%2)\n\t" - : - : "r" (a), "r" (b), "r" (result) - : "xmm0", "xmm1" - ); -} - -inline void sse_add_float32_128(float32 *a, float32 *b, float32* result) { - __asm__ ( - "movups (%0), %%xmm0\n\t" - "movups (%1), %%xmm1\n\t" - "addps %%xmm1, %%xmm0\n\t" - "movups %%xmm0, (%2)\n\t" - : - : "r" (a), "r" (b), "r" (result) - : "xmm0", "xmm1" - ); -} - -inline void sse_sub_float32_64(float32 *a, float32 *b, float32* result) { - __asm__ ( - "movups (%0), %%xmm0\n\t" - "movups (%1), %%xmm1\n\t" - "subps %%xmm1, %%xmm0\n\t" - "movups %%xmm0, (%2)\n\t" - : - : "r" (a), "r" (b), "r" (result) - : "xmm0", "xmm1" - ); -} - -inline void sse_sub_float32_128(float32 *a, float32 *b, float32* result) { - __asm__ ( - "movups (%0), %%xmm0\n\t" - "movups (%1), %%xmm1\n\t" - "subps %%xmm1, %%xmm0\n\t" - "movups %%xmm0, (%2)\n\t" - : - : "r" (a), "r" (b), "r" (result) - : "xmm0", "xmm1" - ); - -} - -inline void sse_mul_float32_64(float32 *a, float32 *b, float32* result) { - __asm__ ( - "movups (%0), %%xmm0\n\t" - "movups (%1), %%xmm1\n\t" - "mulps %%xmm1, %%xmm0\n\t" - "movups %%xmm0, (%2)\n\t" - : - : "r" (a), "r" (b), "r" (result) - : "xmm0", "xmm1" - ); -} - -inline void sse_mul_float32_128(float32 *a, float32 *b, float32* result) { - __asm__ ( - "movups (%0), %%xmm0\n\t" - "movups (%1), %%xmm1\n\t" - "mulps %%xmm1, %%xmm0\n\t" - "movups %%xmm0, (%2)\n\t" - : - : "r" (a), "r" (b), "r" (result) - : "xmm0", "xmm1" - ); -} - -inline void sse_div_float32_64(float32 *a, float32 *b, float32* result) { - __asm__ ( - "movups (%0), %%xmm0\n\t" - "movups (%1), %%xmm1\n\t" - "divps %%xmm1, %%xmm0\n\t" - "movups %%xmm0, (%2)\n\t" - : - : "r" (a), "r" (b), "r" (result) - : "xmm0", "xmm1" - ); -} - -inline void sse_div_float32_128(float32 *a, float32 *b, float32* result) { - __asm__ ( - "movups (%0), %%xmm0\n\t" - "movups (%1), %%xmm1\n\t" - "divps %%xmm1, %%xmm0\n\t" - "movups %%xmm0, (%2)\n\t" - : - : "r" (a), "r" (b), "r" (result) - : "xmm0", "xmm1" - ); -} - -// AVX float32 -inline void avx_add_float32_256(float32 *a, float32 *b, float32* result) { - __asm__ ( - "vmovups %1, %%ymm0\n\t" - "vmovups %2, %%ymm1\n\t" - "vaddps %%ymm1, %%ymm0, %%ymm0\n\t" - "vmovups %%ymm0, %0\n\t" - : "=m" (*result) - : "m" (*a), "m" (*b) - : "ymm0", "ymm1" - ); -} - -inline void avx_sub_float32_256(float32 *a, float32 *b, float32* result) { - __asm__ ( - "vmovups %1, %%ymm0\n\t" - "vmovups %2, %%ymm1\n\t" - "vsubps %%ymm1, %%ymm0, %%ymm0\n\t" - "vmovups %%ymm0, %0\n\t" - : "=m" (*result) - : "m" (*a), "m" (*b) - : "ymm0", "ymm1" - ); -} - -inline void avx_mul_float32_256(float32 *a, float32 *b, float32* result) { - __asm__ ( - "vmovups %1, %%ymm0\n\t" - "vmovups %2, %%ymm1\n\t" - "vmulps %%ymm1, %%ymm0, %%ymm0\n\t" - "vmovups %%ymm0, %0\n\t" - : "=m" (*result) - : "m" (*a), "m" (*b) - : "ymm0", "ymm1" - ); -} - -inline void avx_div_float32_256(float32 *a, float32 *b, float32* result) { - __asm__ ( - "vmovups %1, %%ymm0\n\t" - "vmovups %2, %%ymm1\n\t" - "vdivps %%ymm1, %%ymm0, %%ymm0\n\t" - "vmovups %%ymm0, %0\n\t" - : "=m" (*result) - : "m" (*a), "m" (*b) - : "ymm0", "ymm1" - ); -} - -// AVX2 int32 -inline void avx2_add_int32_256(s32 *a, s32 *b, s32* result) { - __asm__ ( - "vmovdqu %1, %%ymm0\n\t" - "vmovdqu %2, %%ymm1\n\t" - "vpaddd %%ymm1, %%ymm0, %%ymm0\n\t" - "vmovdqu %%ymm0, %0\n\t" - : "=m" (*result) - : "m" (*a), "m" (*b) - : "ymm0", "ymm1" - ); -} - -inline void avx2_sub_int32_256(s32 *a, s32 *b, s32* result) { - __asm__ ( - "vmovdqu %1, %%ymm0\n\t" - "vmovdqu %2, %%ymm1\n\t" - "vpsubd %%ymm1, %%ymm0, %%ymm0\n\t" - "vmovdqu %%ymm0, %0\n\t" - : "=m" (*result) - : "m" (*a), "m" (*b) - : "ymm0", "ymm1" - ); -} - -inline void avx2_mul_int32_256(s32 *a, s32 *b, s32* result) { - __asm__ ( - "vmovdqu %1, %%ymm0\n\t" - "vmovdqu %2, %%ymm1\n\t" - "vpmulld %%ymm1, %%ymm0, %%ymm0\n\t" - "vmovdqu %%ymm0, %0\n\t" - : "=m" (*result) - : "m" (*a), "m" (*b) - : "ymm0", "ymm1" - ); -} - -// AVX-512 float32 -inline void avx512_add_float32_512(float32 *a, float32 *b, float32* result) { - __asm__ ( - "vmovups %1, %%zmm0\n\t" - "vmovups %2, %%zmm1\n\t" - "vaddps %%zmm1, %%zmm0, %%zmm0\n\t" - "vmovups %%zmm0, %0\n\t" - : "=m" (*result) - : "m" (*a), "m" (*b) - : "zmm0", "zmm1" - ); -} - -inline void avx512_sub_float32_512(float32 *a, float32 *b, float32* result) { - __asm__ ( - "vmovups %1, %%zmm0\n\t" - "vmovups %2, %%zmm1\n\t" - "vsubps %%zmm1, %%zmm0, %%zmm0\n\t" - "vmovups %%zmm0, %0\n\t" - : "=m" (*result) - : "m" (*a), "m" (*b) - : "zmm0", "zmm1" - ); -} - -inline void avx512_mul_float32_512(float32 *a, float32 *b, float32* result) { - __asm__ ( - "vmovups %1, %%zmm0\n\t" - "vmovups %2, %%zmm1\n\t" - "vmulps %%zmm1, %%zmm0, %%zmm0\n\t" - "vmovups %%zmm0, %0\n\t" - : "=m" (*result) - : "m" (*a), "m" (*b) - : "zmm0", "zmm1" - ); -} - -inline void avx512_div_float32_512(float32 *a, float32 *b, float32* result) { - __asm__ ( - "vmovups %1, %%zmm0\n\t" - "vmovups %2, %%zmm1\n\t" - "vdivps %%zmm1, %%zmm0, %%zmm0\n\t" - "vmovups %%zmm0, %0\n\t" - : "=m" (*result) - : "m" (*a), "m" (*b) - : "zmm0", "zmm1" - ); -} - -// AVX-512 int32 -inline void avx512_add_int32_512(s32 *a, s32 *b, s32* result) { - __asm__ ( - "vmovdqu32 %1, %%zmm0\n\t" - "vmovdqu32 %2, %%zmm1\n\t" - "vpaddd %%zmm1, %%zmm0, %%zmm0\n\t" - "vmovdqu32 %%zmm0, %0\n\t" - : "=m" (*result) - : "m" (*a), "m" (*b) - : "zmm0", "zmm1" - ); -} - -inline void avx512_sub_int32_512(s32 *a, s32 *b, s32* result) { - __asm__ ( - "vmovdqu32 %1, %%zmm0\n\t" - "vmovdqu32 %2, %%zmm1\n\t" - "vpsubd %%zmm1, %%zmm0, %%zmm0\n\t" - "vmovdqu32 %%zmm0, %0\n\t" - : "=m" (*result) - : "m" (*a), "m" (*b) - : "zmm0", "zmm1" - ); -} - -inline void avx512_mul_int32_512(s32 *a, s32 *b, s32* result) { - __asm__ ( - "vmovdqu32 %1, %%zmm0\n\t" - "vmovdqu32 %2, %%zmm1\n\t" - "vpmulld %%zmm1, %%zmm0, %%zmm0\n\t" - "vmovdqu32 %%zmm0, %0\n\t" - : "=m" (*result) - : "m" (*a), "m" (*b) - : "zmm0", "zmm1" - ); -}*/