From 9f5d20d3dee418f533a4499bba93cd6fc178af25 Mon Sep 17 00:00:00 2001
From: Charlie Malmqvist <charlie.malmqvist1@gmail.com>
Date: Mon, 22 Jul 2024 16:49:10 +0200
Subject: [PATCH] Cleanup

---
 TODO                  |   9 +-
 build.c               |   7 +-
 changelog.txt         |   2 +-
 oogabooga/cpu.c       |   8 ++
 oogabooga/oogabooga.c |  54 ++++++--
 oogabooga/simd.c      | 313 ------------------------------------------
 6 files changed, 60 insertions(+), 333 deletions(-)

diff --git a/TODO b/TODO
index c616414..994addc 100644
--- a/TODO
+++ b/TODO
@@ -35,8 +35,13 @@
 	- Atlases are way too big, render atlases with size depending on font_height (say, 128 codepoints per atlas)
 	
 - OS
-	Window::bool is_minimized
-	don't set window.width & window.height to 0
+	- Window::bool is_minimized
+	- don't set window.width & window.height to 0
+	- Sockets recv, send
+	
+	
+- Arenas
+
 	
 - Needs testing:
 	- Audio format channel conversions
diff --git a/build.c b/build.c
index e6a6397..9d53e52 100644
--- a/build.c
+++ b/build.c
@@ -3,6 +3,9 @@
 ///
 // Build config stuff
 
+#define OOGABOOGA_ENABLE_COMPLICATED_BUILD_MODE 1
+#define OOGABOOGA_NO_IMPLEMENTATION 1
+
 #define INITIAL_PROGRAM_MEMORY_SIZE MB(5)
 
 // You might want to increase this if you get a log warning saying the temporary storage was overflown.
@@ -36,11 +39,11 @@ typedef struct Context_Extra {
 //
 
 // This is a minimal starting point for new projects. Copy & rename to get started
-#include "oogabooga/examples/minimal_game_loop.c"
+// #include "oogabooga/examples/minimal_game_loop.c"
 
 // #include "oogabooga/examples/text_rendering.c"
 // #include "oogabooga/examples/custom_logger.c"
-// #include "oogabooga/examples/renderer_stress_test.c"
+#include "oogabooga/examples/renderer_stress_test.c"
 // #include "oogabooga/examples/tile_game.c"
 // #include "oogabooga/examples/audio_test.c"
 // #include "oogabooga/examples/custom_shader.c"
diff --git a/changelog.txt b/changelog.txt
index 6d2314f..96a428d 100644
--- a/changelog.txt
+++ b/changelog.txt
@@ -37,7 +37,7 @@
 			vx_cross()
 		- added os_get_file_size_from_Path()
 		- Some simple restructuring of existing code
-	- Made heap corruption detection more robust
+	   - Made heap corruption detection more robust
 
 ## v0.01.000 - AUDIO!
 	- Added audio sources
diff --git a/oogabooga/cpu.c b/oogabooga/cpu.c
index a32c914..f156aa0 100644
--- a/oogabooga/cpu.c
+++ b/oogabooga/cpu.c
@@ -106,6 +106,9 @@ typedef struct Cpu_Capabilities {
 	
 	#define MEMORY_BARRIER _ReadWriteBarrier()
 	
+	#define SHARED_EXPORT __declspec(dllexport)
+    #define SHARED_IMPORT __declspec(dllimport)
+	
 #elif COMPILER_GCC || COMPILER_CLANG
 	#define inline __attribute__((always_inline)) inline
 	#define alignat(x) __attribute__((aligned(x)))
@@ -220,6 +223,9 @@ typedef struct Cpu_Capabilities {
 	
 	#define MEMORY_BARRIER __asm__ __volatile__("" ::: "memory")
 	
+	#define SHARED_EXPORT __attribute__((visibility("default")))
+    #define SHARED_IMPORT 
+	
 #else
 	#define inline inline
     #define COMPILER_HAS_MEMCPY_INTRINSICS 0
@@ -239,6 +245,8 @@ typedef struct Cpu_Capabilities {
     #warning "Compiler is not explicitly supported, some things will probably not work as expected"
 #endif
 
+
+
 Cpu_Capabilities 
 query_cpu_capabilities() {
     Cpu_Capabilities result = {0};
diff --git a/oogabooga/oogabooga.c b/oogabooga/oogabooga.c
index 13daddb..99f7bdf 100644
--- a/oogabooga/oogabooga.c
+++ b/oogabooga/oogabooga.c
@@ -84,7 +84,7 @@
 			
 			Example:
 			
-				#define RUN_TESTS 0
+				#define RUN_TESTS 1
 				
 		- ENABLE_PROFILING
 			Enable time profiling which will be dumped to google_trace.json.
@@ -101,13 +101,24 @@
 					tm_scope
 					tm_scope_var
 					tm_scope_accum
+					
+		- OOGABOOGA_HEADLESS
+            Run oogabooga in headless mode, i.e. no window, no graphics, no audio.
+            Useful if you only need the oogabooga standard library for something like a game server.
+            
+            0: Disable
+            1: Enable
+            
+            Example:
+            
+                #define OOGABOOGA_HEADLESS 1
 		
 
 */
 
 #define OGB_VERSION_MAJOR 0
 #define OGB_VERSION_MINOR 1
-#define OGB_VERSION_PATCH 1
+#define OGB_VERSION_PATCH 2
 
 #define OGB_VERSION (OGB_VERSION_MAJOR*1000000+OGB_VERSION_MINOR*1000+OGB_VERSION_PATCH)
 
@@ -214,9 +225,9 @@ typedef u8 bool;
 #ifdef _WIN32
 	#define COBJMACROS
 	#include <Windows.h>
-#if CONFIGURATION == DEBUG
-	#include <dbghelp.h>
-#endif
+    #if CONFIGURATION == DEBUG
+    	#include <dbghelp.h>
+    #endif
 	#define TARGET_OS WINDOWS
 	#define OS_PATHS_HAVE_BACKSLASH 1
 #elif defined(__linux__)
@@ -234,6 +245,19 @@ typedef u8 bool;
 #endif
 
 
+#if OOGABOOGA_ENABLE_COMPLICATED_BUILD_MODE
+
+    #if OOGABOOGA_NO_IMPLEMENTATION
+        #define ogb_proc SHARED_IMPORT    
+    #else
+        #define ogb_proc SHARED_EXPORT
+    #endif
+
+#else
+    #define ogb_proc
+#endif
+
+
 // This needs to be included before dependencies
 #include "base.c"
 
@@ -273,8 +297,8 @@ typedef u8 bool;
 ///
 // Dependencies
 ///
-// The reason dependencies are compiled here is because we modify stb_vorbis to use our
-// file API instead of the stdio.h (cmoooon Sean)
+// The reason dependencies are compiled here is because we need to modify third party code
+// to use the oogabooga standard where they use the C standard.
 
 #include "third_party.c"
 
@@ -360,14 +384,14 @@ void oogabooga_init(u64 program_memory_size) {
 #else
     log_info("Headless mode on");
 #endif
-	log_verbose("CPU has sse1: %cs", features.sse1 ? "true" : "false");
-	log_verbose("CPU has sse2: %cs", features.sse2 ? "true" : "false");
-	log_verbose("CPU has sse3: %cs", features.sse3 ? "true" : "false");
-	log_verbose("CPU has ssse3: %cs", features.ssse3 ? "true" : "false");
-	log_verbose("CPU has sse41: %cs", features.sse41 ? "true" : "false");
-	log_verbose("CPU has sse42: %cs", features.sse42 ? "true" : "false");
-	log_verbose("CPU has avx: %cs", features.avx ? "true" : "false");
-	log_verbose("CPU has avx2: %cs", features.avx2 ? "true" : "false");
+	log_verbose("CPU has sse1:   %cs", features.sse1 ? "true" : "false");
+	log_verbose("CPU has sse2:   %cs", features.sse2 ? "true" : "false");
+	log_verbose("CPU has sse3:   %cs", features.sse3 ? "true" : "false");
+	log_verbose("CPU has ssse3:  %cs", features.ssse3 ? "true" : "false");
+	log_verbose("CPU has sse41:  %cs", features.sse41 ? "true" : "false");
+	log_verbose("CPU has sse42:  %cs", features.sse42 ? "true" : "false");
+	log_verbose("CPU has avx:    %cs", features.avx ? "true" : "false");
+	log_verbose("CPU has avx2:   %cs", features.avx2 ? "true" : "false");
 	log_verbose("CPU has avx512: %cs", features.avx512 ? "true" : "false");
 }
 
diff --git a/oogabooga/simd.c b/oogabooga/simd.c
index f288bc0..ed2d392 100644
--- a/oogabooga/simd.c
+++ b/oogabooga/simd.c
@@ -800,316 +800,3 @@ inline void basic_rsqrt_float32_512(float *a, float *result) {
     basic_rsqrt_float32_256(a+8, result+8);
 }
 
-
-
-
-
-
-
-
-// SSE 2 int32
-/*inline void sse_add_int32_128(s32 *a, s32 *b, s32* result) {
-    __asm__ (
-        "movdqa (%0), %%xmm0\n\t"
-        "movdqa (%1), %%xmm1\n\t"
-        "paddd %%xmm1, %%xmm0\n\t"
-        "movdqa %%xmm0, (%2)\n\t"
-        :
-        : "r" (a), "r" (b), "r" (result)
-        : "xmm0", "xmm1"
-    );
-}
-
-inline void sse_sub_int32_128(s32 *a, s32 *b, s32* result) {
-    __asm__ (
-        "movdqa (%0), %%xmm0\n\t"
-        "movdqa (%1), %%xmm1\n\t"
-        "psubd %%xmm1, %%xmm0\n\t"
-        "movdqa %%xmm0, (%2)\n\t"
-        :
-        : "r" (a), "r" (b), "r" (result)
-        : "xmm0", "xmm1"
-    );
-}
-
-inline void sse_mul_int32_128(s32 *a, s32 *b, s32* result) {
-    __asm__ (
-        "movdqa (%0), %%xmm0\n\t"
-        "movdqa (%1), %%xmm1\n\t"
-        "pmulld %%xmm1, %%xmm0\n\t"
-        "movdqa %%xmm0, (%2)\n\t"
-        :
-        : "r" (a), "r" (b), "r" (result)
-        : "xmm0", "xmm1"
-    );
-}
-
-// SSE4.2 float32
-inline void sse_add_float32_64(float32 *a, float32 *b, float32* result) {
-    __asm__ (
-        "movups (%0), %%xmm0\n\t"
-        "movups (%1), %%xmm1\n\t" 
-        "addps %%xmm1, %%xmm0\n\t" 
-        "movups %%xmm0, (%2)\n\t"
-        :
-        : "r" (a), "r" (b), "r" (result)
-        : "xmm0", "xmm1"
-    );
-}
-
-inline void sse_add_float32_128(float32 *a, float32 *b, float32* result) {
-    __asm__ (
-        "movups (%0), %%xmm0\n\t"
-        "movups (%1), %%xmm1\n\t"
-        "addps %%xmm1, %%xmm0\n\t"
-        "movups %%xmm0, (%2)\n\t"
-        :
-        : "r" (a), "r" (b), "r" (result)
-        : "xmm0", "xmm1"
-    );
-}
-
-inline void sse_sub_float32_64(float32 *a, float32 *b, float32* result) {
-    __asm__ (
-        "movups (%0), %%xmm0\n\t"
-        "movups (%1), %%xmm1\n\t"
-        "subps %%xmm1, %%xmm0\n\t"
-        "movups %%xmm0, (%2)\n\t"
-        :
-        : "r" (a), "r" (b), "r" (result)
-        : "xmm0", "xmm1"
-    );
-}
-
-inline void sse_sub_float32_128(float32 *a, float32 *b, float32* result) {
-    __asm__ (
-        "movups (%0), %%xmm0\n\t"
-        "movups (%1), %%xmm1\n\t"
-        "subps %%xmm1, %%xmm0\n\t"
-        "movups %%xmm0, (%2)\n\t"
-        :
-        : "r" (a), "r" (b), "r" (result)
-        : "xmm0", "xmm1"
-    );
-    
-}
-
-inline void sse_mul_float32_64(float32 *a, float32 *b, float32* result) {
-    __asm__ (
-        "movups (%0), %%xmm0\n\t"
-        "movups (%1), %%xmm1\n\t"
-        "mulps %%xmm1, %%xmm0\n\t"
-        "movups %%xmm0, (%2)\n\t"
-        :
-        : "r" (a), "r" (b), "r" (result)
-        : "xmm0", "xmm1"
-    );
-}
-
-inline void sse_mul_float32_128(float32 *a, float32 *b, float32* result) {
-    __asm__ (
-        "movups (%0), %%xmm0\n\t"
-        "movups (%1), %%xmm1\n\t"
-        "mulps %%xmm1, %%xmm0\n\t"
-        "movups %%xmm0, (%2)\n\t"
-        :
-        : "r" (a), "r" (b), "r" (result)
-        : "xmm0", "xmm1"
-    );
-}
-
-inline void sse_div_float32_64(float32 *a, float32 *b, float32* result) {
-    __asm__ (
-        "movups (%0), %%xmm0\n\t"
-        "movups (%1), %%xmm1\n\t"
-        "divps %%xmm1, %%xmm0\n\t"
-        "movups %%xmm0, (%2)\n\t"
-        :
-        : "r" (a), "r" (b), "r" (result)
-        : "xmm0", "xmm1"
-    );
-}
-
-inline void sse_div_float32_128(float32 *a, float32 *b, float32* result) {
-    __asm__ (
-        "movups (%0), %%xmm0\n\t"
-        "movups (%1), %%xmm1\n\t"
-        "divps %%xmm1, %%xmm0\n\t"
-        "movups %%xmm0, (%2)\n\t"
-        :
-        : "r" (a), "r" (b), "r" (result)
-        : "xmm0", "xmm1"
-    );
-}
-
-// AVX float32
-inline void avx_add_float32_256(float32 *a, float32 *b, float32* result) {
-    __asm__ (
-        "vmovups %1, %%ymm0\n\t"
-        "vmovups %2, %%ymm1\n\t"
-        "vaddps %%ymm1, %%ymm0, %%ymm0\n\t"
-        "vmovups %%ymm0, %0\n\t"
-        : "=m" (*result)
-        : "m" (*a), "m" (*b)
-        : "ymm0", "ymm1"
-    );
-}
-
-inline void avx_sub_float32_256(float32 *a, float32 *b, float32* result) {
-    __asm__ (
-        "vmovups %1, %%ymm0\n\t"
-        "vmovups %2, %%ymm1\n\t"
-        "vsubps %%ymm1, %%ymm0, %%ymm0\n\t"
-        "vmovups %%ymm0, %0\n\t"
-        : "=m" (*result)
-        : "m" (*a), "m" (*b)
-        : "ymm0", "ymm1"
-    );
-}
-
-inline void avx_mul_float32_256(float32 *a, float32 *b, float32* result) {
-    __asm__ (
-        "vmovups %1, %%ymm0\n\t"
-        "vmovups %2, %%ymm1\n\t"
-        "vmulps %%ymm1, %%ymm0, %%ymm0\n\t"
-        "vmovups %%ymm0, %0\n\t"
-        : "=m" (*result)
-        : "m" (*a), "m" (*b)
-        : "ymm0", "ymm1"
-    );
-}
-
-inline void avx_div_float32_256(float32 *a, float32 *b, float32* result) {
-    __asm__ (
-        "vmovups %1, %%ymm0\n\t"
-        "vmovups %2, %%ymm1\n\t"
-        "vdivps %%ymm1, %%ymm0, %%ymm0\n\t"
-        "vmovups %%ymm0, %0\n\t"
-        : "=m" (*result)
-        : "m" (*a), "m" (*b)
-        : "ymm0", "ymm1"
-    );
-}
-
-// AVX2 int32
-inline void avx2_add_int32_256(s32 *a, s32 *b, s32* result) {
-    __asm__ (
-        "vmovdqu %1, %%ymm0\n\t"
-        "vmovdqu %2, %%ymm1\n\t"
-        "vpaddd %%ymm1, %%ymm0, %%ymm0\n\t"
-        "vmovdqu %%ymm0, %0\n\t"
-        : "=m" (*result)
-        : "m" (*a), "m" (*b)
-        : "ymm0", "ymm1"
-    );
-}
-
-inline void avx2_sub_int32_256(s32 *a, s32 *b, s32* result) {
-    __asm__ (
-        "vmovdqu %1, %%ymm0\n\t"
-        "vmovdqu %2, %%ymm1\n\t"
-        "vpsubd %%ymm1, %%ymm0, %%ymm0\n\t"
-        "vmovdqu %%ymm0, %0\n\t"
-        : "=m" (*result)
-        : "m" (*a), "m" (*b)
-        : "ymm0", "ymm1"
-    );
-}
-
-inline void avx2_mul_int32_256(s32 *a, s32 *b, s32* result) {
-    __asm__ (
-        "vmovdqu %1, %%ymm0\n\t"
-        "vmovdqu %2, %%ymm1\n\t"
-        "vpmulld %%ymm1, %%ymm0, %%ymm0\n\t"
-        "vmovdqu %%ymm0, %0\n\t"
-        : "=m" (*result)
-        : "m" (*a), "m" (*b)
-        : "ymm0", "ymm1"
-    );
-}
-
-// AVX-512 float32
-inline void avx512_add_float32_512(float32 *a, float32 *b, float32* result) {
-    __asm__ (
-        "vmovups %1, %%zmm0\n\t"
-        "vmovups %2, %%zmm1\n\t"
-        "vaddps %%zmm1, %%zmm0, %%zmm0\n\t"
-        "vmovups %%zmm0, %0\n\t"
-        : "=m" (*result)
-        : "m" (*a), "m" (*b)
-        : "zmm0", "zmm1"
-    );
-}
-
-inline void avx512_sub_float32_512(float32 *a, float32 *b, float32* result) {
-    __asm__ (
-        "vmovups %1, %%zmm0\n\t"
-        "vmovups %2, %%zmm1\n\t"
-        "vsubps %%zmm1, %%zmm0, %%zmm0\n\t"
-        "vmovups %%zmm0, %0\n\t"
-        : "=m" (*result)
-        : "m" (*a), "m" (*b)
-        : "zmm0", "zmm1"
-    );
-}
-
-inline void avx512_mul_float32_512(float32 *a, float32 *b, float32* result) {
-    __asm__ (
-        "vmovups %1, %%zmm0\n\t"
-        "vmovups %2, %%zmm1\n\t"
-        "vmulps %%zmm1, %%zmm0, %%zmm0\n\t"
-        "vmovups %%zmm0, %0\n\t"
-        : "=m" (*result)
-        : "m" (*a), "m" (*b)
-        : "zmm0", "zmm1"
-    );
-}
-
-inline void avx512_div_float32_512(float32 *a, float32 *b, float32* result) {
-    __asm__ (
-        "vmovups %1, %%zmm0\n\t"
-        "vmovups %2, %%zmm1\n\t"
-        "vdivps %%zmm1, %%zmm0, %%zmm0\n\t"
-        "vmovups %%zmm0, %0\n\t"
-        : "=m" (*result)
-        : "m" (*a), "m" (*b)
-        : "zmm0", "zmm1"
-    );
-}
-
-// AVX-512 int32
-inline void avx512_add_int32_512(s32 *a, s32 *b, s32* result) {
-    __asm__ (
-        "vmovdqu32 %1, %%zmm0\n\t"
-        "vmovdqu32 %2, %%zmm1\n\t"
-        "vpaddd %%zmm1, %%zmm0, %%zmm0\n\t"
-        "vmovdqu32 %%zmm0, %0\n\t"
-        : "=m" (*result)
-        : "m" (*a), "m" (*b)
-        : "zmm0", "zmm1"
-    );
-}
-
-inline void avx512_sub_int32_512(s32 *a, s32 *b, s32* result) {
-    __asm__ (
-        "vmovdqu32 %1, %%zmm0\n\t"
-        "vmovdqu32 %2, %%zmm1\n\t"
-        "vpsubd %%zmm1, %%zmm0, %%zmm0\n\t"
-        "vmovdqu32 %%zmm0, %0\n\t"
-        : "=m" (*result)
-        : "m" (*a), "m" (*b)
-        : "zmm0", "zmm1"
-    );
-}
-
-inline void avx512_mul_int32_512(s32 *a, s32 *b, s32* result) {
-    __asm__ (
-        "vmovdqu32 %1, %%zmm0\n\t"
-        "vmovdqu32 %2, %%zmm1\n\t"
-        "vpmulld %%zmm1, %%zmm0, %%zmm0\n\t"
-        "vmovdqu32 %%zmm0, %0\n\t"
-        : "=m" (*result)
-        : "m" (*a), "m" (*b)
-        : "zmm0", "zmm1"
-    );
-}*/