From 98428340657d84272d9b3d5b76adccc56e3dc4e3 Mon Sep 17 00:00:00 2001 From: Charlie <66182434+asbott@users.noreply.github.com> Date: Tue, 9 Jul 2024 09:07:30 +0200 Subject: [PATCH] - Portable DEPRECATED macro - Deprecate os_compare_and_swap and replace with more portable compare_and_swap - Fixed a bug where the wrong image would be drawn --- build.bat | 2 +- build_dissassembly.bat | 2 +- build_release.bat | 2 +- changelog.txt | 5 ++- oogabooga/cpu.c | 81 +++++++++++++++++++++++++++++++++++++ oogabooga/drawing.c | 17 +++++++- oogabooga/gfx_impl_d3d11.c | 4 +- oogabooga/oogabooga.c | 4 ++ oogabooga/os_impl_windows.c | 4 +- oogabooga/os_interface.c | 16 +++++--- oogabooga/tests.c | 6 +-- 11 files changed, 126 insertions(+), 17 deletions(-) diff --git a/build.bat b/build.bat index 99b1551..a93248e 100644 --- a/build.bat +++ b/build.bat @@ -6,6 +6,6 @@ mkdir build pushd build -clang -g -o cgame.exe ../build.c -O0 -std=c11 -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -Wno-deprecated-declarations -lkernel32 -lgdi32 -luser32 -lwinmm -ld3d11 -ldxguid -ld3dcompiler -lshlwapi -femit-all-decls +clang -g -o cgame.exe ../build.c -O0 -std=c11 -D_CRT_SECURE_NO_WARNINGS -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -lkernel32 -lgdi32 -luser32 -lwinmm -ld3d11 -ldxguid -ld3dcompiler -lshlwapi -femit-all-decls popd \ No newline at end of file diff --git a/build_dissassembly.bat b/build_dissassembly.bat index 934e0f3..e0c9f47 100644 --- a/build_dissassembly.bat +++ b/build_dissassembly.bat @@ -12,7 +12,7 @@ pushd build mkdir dissassembly pushd dissassembly -clang -o cgame.asm ../../build.c -Ofast -std=c11 -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -Wno-deprecated-declarations -finline-functions -ffast-math -fno-math-errno -funsafe-math-optimizations -freciprocal-math -ffinite-math-only -fassociative-math -fno-signed-zeros -fno-trapping-math -ftree-vectorize -fomit-frame-pointer -funroll-loops -fno-rtti -fno-exceptions -S -masm=intel +clang -o cgame.asm ../../build.c -Ofast -std=c11 -D_CRT_SECURE_NO_WARNINGS -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -Wno-deprecated-declarations -finline-functions -ffast-math -fno-math-errno -funsafe-math-optimizations -freciprocal-math -ffinite-math-only -fassociative-math -fno-signed-zeros -fno-trapping-math -ftree-vectorize -fomit-frame-pointer -funroll-loops -fno-rtti -fno-exceptions -S -masm=intel popd popd \ No newline at end of file diff --git a/build_release.bat b/build_release.bat index 122c0d6..5d44482 100644 --- a/build_release.bat +++ b/build_release.bat @@ -9,7 +9,7 @@ pushd build mkdir release pushd release -clang -o cgame.exe ../../build.c -Ofast -DNDEBUG -std=c11 -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -Wno-deprecated-declarations -lgdi32 -luser32 -lwinmm -ld3d11 -ldxguid -ld3dcompiler -lshlwapi -finline-functions -finline-hint-functions -ffast-math -fno-math-errno -funsafe-math-optimizations -freciprocal-math -ffinite-math-only -fassociative-math -fno-signed-zeros -fno-trapping-math -ftree-vectorize -fomit-frame-pointer -funroll-loops -fno-rtti -fno-exceptions +clang -o cgame.exe ../../build.c -Ofast -DNDEBUG -std=c11 -D_CRT_SECURE_NO_WARNINGS -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -Wno-deprecated-declarations -lgdi32 -luser32 -lwinmm -ld3d11 -ldxguid -ld3dcompiler -lshlwapi -finline-functions -finline-hint-functions -ffast-math -fno-math-errno -funsafe-math-optimizations -freciprocal-math -ffinite-math-only -fassociative-math -fno-signed-zeros -fno-trapping-math -ftree-vectorize -fomit-frame-pointer -funroll-loops -fno-rtti -fno-exceptions popd popd \ No newline at end of file diff --git a/changelog.txt b/changelog.txt index e4c3914..349902d 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,4 +1,4 @@ -## v0.00.003 - +## v0.00.003 - Fixes Random: - get_random_float64() @@ -24,6 +24,9 @@ Misc: - Fixed Y placement of window when changing the window rect - Fixed window sizing when setting scaled_width or scaled_height - Updated readme + - Portable DEPRECATED macro + - Deprecate os_compare_and_swap and replace with more portable compare_and_swap + - Fixed a bug where the wrong image would be drawn diff --git a/oogabooga/cpu.c b/oogabooga/cpu.c index 4556370..f554ddc 100644 --- a/oogabooga/cpu.c +++ b/oogabooga/cpu.c @@ -67,6 +67,34 @@ typedef struct Cpu_Capabilities { #else #define COMPILER_CAN_DO_AVX512 0 #endif + + #define DEPRECATED(proc, msg) __declspec(deprecated(msg)) func + + #pragma intrinsic(_InterlockedCompareExchange8) + #pragma intrinsic(_InterlockedCompareExchange16) + #pragma intrinsic(_InterlockedCompareExchange) + #pragma intrinsic(_InterlockedCompareExchange64) + + inline bool compare_and_swap_8(uint8_t *a, uint8_t b, uint8_t old) { + return _InterlockedCompareExchange8((volatile char*)a, (char)b, (char)old) == old; + } + + inline bool compare_and_swap_16(uint16_t *a, uint16_t b, uint16_t old) { + return _InterlockedCompareExchange16((volatile short*)a, (short)b, (short)old) == old; + } + + inline bool compare_and_swap_32(uint32_t *a, uint32_t b, uint32_t old) { + return _InterlockedCompareExchange((volatile long*)a, (long)b, (long)old) == old; + } + + inline bool compare_and_swap_64(uint64_t *a, uint64_t b, uint64_t old) { + return _InterlockedCompareExchange64((volatile long long*)a, (long long)b, (long long)old) == old; + } + + inline bool compare_and_swap_bool(bool *a, bool b, bool old) { + return compare_and_swap_8((uint8_t*)a, (uint8_t)b, (uint8_t)old); + } + #elif COMPILER_GCC || COMPILER_CLANG #define inline __attribute__((always_inline)) inline #define alignat(x) __attribute__((aligned(x))) @@ -115,6 +143,57 @@ typedef struct Cpu_Capabilities { #else #define COMPILER_CAN_DO_AVX512 0 #endif + + #define DEPRECATED(proc, msg) proc __attribute__((deprecated(msg))) + + inline bool compare_and_swap_8(uint8_t *a, uint8_t b, uint8_t old) { + unsigned char result; + __asm__ __volatile__( + "lock; cmpxchgb %2, %1" + : "=a" (result), "=m" (*a) + : "r" (b), "m" (*a), "a" (old) + : "memory" + ); + return result == old; + } + + inline bool compare_and_swap_16(uint16_t *a, uint16_t b, uint16_t old) { + unsigned short result; + __asm__ __volatile__( + "lock; cmpxchgw %2, %1" + : "=a" (result), "=m" (*a) + : "r" (b), "m" (*a), "a" (old) + : "memory" + ); + return result == old; + } + + inline bool compare_and_swap_32(uint32_t *a, uint32_t b, uint32_t old) { + unsigned int result; + __asm__ __volatile__( + "lock; cmpxchgl %2, %1" + : "=a" (result), "=m" (*a) + : "r" (b), "m" (*a), "a" (old) + : "memory" + ); + return result == old; + } + + inline bool compare_and_swap_64(uint64_t *a, uint64_t b, uint64_t old) { + unsigned long long result; + __asm__ __volatile__( + "lock; cmpxchgq %2, %1" + : "=a" (result), "=m" (*a) + : "r" (b), "m" (*a), "a" (old) + : "memory" + ); + return result == old; + } + + inline bool compare_and_swap_bool(bool *a, bool b, bool old) { + return compare_and_swap_8((uint8_t*)a, (uint8_t)b, (uint8_t)old); + } + #else #define inline inline #define COMPILER_HAS_MEMCPY_INTRINSICS 0 @@ -126,6 +205,8 @@ typedef struct Cpu_Capabilities { #define COMPILER_CAN_DO_AVX2 0 #define COMPILER_CAN_DO_AVX512 0 + #define deprecated(msg) + #warning "Compiler is not explicitly supported, some things will probably not work as expected" #endif diff --git a/oogabooga/drawing.c b/oogabooga/drawing.c index 6eb88c0..bf7ad77 100644 --- a/oogabooga/drawing.c +++ b/oogabooga/drawing.c @@ -68,7 +68,7 @@ Usage: -#define QUADS_PER_BLOCK 16 +#define QUADS_PER_BLOCK 256 typedef struct Draw_Quad { Vector2 bottom_left, top_left, top_right, bottom_right; // r, g, b, a @@ -82,6 +82,8 @@ typedef struct Draw_Quad { Gfx_Filter_Mode image_min_filter; Gfx_Filter_Mode image_mag_filter; + float32 z; + } Draw_Quad; @@ -89,6 +91,8 @@ typedef struct Draw_Quad_Block { Draw_Quad quad_buffer[QUADS_PER_BLOCK]; u64 num_quads; + float32 low_z, high_z; + struct Draw_Quad_Block *next; } Draw_Quad_Block; @@ -103,6 +107,8 @@ typedef struct Draw_Frame { Matrix4 projection; Matrix4 view; + + bool enable_z_sorting; } Draw_Frame; // This frame is passed to the platform layer and rendered in os_update. // Resets every frame. @@ -129,7 +135,11 @@ Draw_Quad *draw_quad_projected(Draw_Quad quad, Matrix4 world_to_clip) { quad.image_min_filter = GFX_FILTER_MODE_NEAREST; quad.image_min_filter = GFX_FILTER_MODE_NEAREST; - if (!draw_frame.current) draw_frame.current = &first_block; + if (!draw_frame.current) { + draw_frame.current = &first_block; + draw_frame.current->low_z = F32_MAX; + draw_frame.current->high_z = F32_MIN; + } if (draw_frame.current == &first_block) draw_frame.num_blocks = 1; @@ -146,6 +156,9 @@ Draw_Quad *draw_quad_projected(Draw_Quad quad, Matrix4 world_to_clip) { draw_frame.current->num_quads = 0; draw_frame.num_blocks += 1; + + draw_frame.current->low_z = F32_MAX; + draw_frame.current->high_z = F32_MIN; } draw_frame.current->quad_buffer[draw_frame.current->num_quads] = quad; diff --git a/oogabooga/gfx_impl_d3d11.c b/oogabooga/gfx_impl_d3d11.c index 9805b79..1d346bd 100644 --- a/oogabooga/gfx_impl_d3d11.c +++ b/oogabooga/gfx_impl_d3d11.c @@ -575,6 +575,7 @@ void d3d11_process_draw_frame() { ID3D11ShaderResourceView *textures[32]; ID3D11ShaderResourceView *last_texture = 0; u64 num_textures = 0; + s8 last_texture_index = 0; D3D11_Vertex* head = (D3D11_Vertex*)d3d11_staging_quad_buffer; D3D11_Vertex* pointer = head; @@ -592,7 +593,7 @@ void d3d11_process_draw_frame() { if (q->image) { if (last_texture == q->image->gfx_handle) { - texture_index = (s8)(num_textures-1); + texture_index = last_texture_index; } else { // First look if texture is already bound for (u64 j = 0; j < num_textures; j++) { @@ -623,6 +624,7 @@ void d3d11_process_draw_frame() { } textures[texture_index] = q->image->gfx_handle; last_texture = q->image->gfx_handle; + last_texture_index = texture_index; } if (q->type == QUAD_TYPE_TEXT) { diff --git a/oogabooga/oogabooga.c b/oogabooga/oogabooga.c index 9c79151..0ecf02d 100644 --- a/oogabooga/oogabooga.c +++ b/oogabooga/oogabooga.c @@ -135,10 +135,14 @@ typedef double f64; typedef f32 float32; typedef f64 float64; +#define F32_MAX 3.402823466e+38F +#define F32_MIN 1.175494351e-38F + typedef u8 bool; #define false 0 #define true 1 + // Determine what compiler we are on #ifdef __clang__ #define COMPILER_CLANG 1 diff --git a/oogabooga/os_impl_windows.c b/oogabooga/os_impl_windows.c index 8f8f65b..509efd8 100644 --- a/oogabooga/os_impl_windows.c +++ b/oogabooga/os_impl_windows.c @@ -440,7 +440,7 @@ Spinlock *os_make_spinlock(Allocator allocator) { void os_spinlock_lock(Spinlock *l) { while (true) { bool expected = false; - if (os_compare_and_swap_bool(&l->locked, true, expected)) { + if (compare_and_swap_bool(&l->locked, true, expected)) { return; } while (l->locked) { @@ -451,7 +451,7 @@ void os_spinlock_lock(Spinlock *l) { void os_spinlock_unlock(Spinlock *l) { bool expected = true; - bool success = os_compare_and_swap_bool(&l->locked, false, expected); + bool success = compare_and_swap_bool(&l->locked, false, expected); assert(success, "This thread should have acquired the spinlock but compare_and_swap failed"); } diff --git a/oogabooga/os_interface.c b/oogabooga/os_interface.c index 0ff3447..8a3420b 100644 --- a/oogabooga/os_interface.c +++ b/oogabooga/os_interface.c @@ -124,11 +124,17 @@ void os_spinlock_unlock(Spinlock* l); /// // Concurrency utilities -bool os_compare_and_swap_8 (u8 *a, u8 b, u8 old); -bool os_compare_and_swap_16 (u16 *a, u16 b, u16 old); -bool os_compare_and_swap_32 (u32 *a, u32 b, u32 old); -bool os_compare_and_swap_64 (u64 *a, u64 b, u64 old); -bool os_compare_and_swap_bool(bool *a, bool b, bool old); +// #Cleanup +// In retrospect, I'm not sure why I choose to implement this per OS. +// I think Win32 InterlockedCompareExchange just generates the cmpxchg +// instruction anyways, so may as well just inline asm it (or Win32 +// if we're compiling with msvc) (LDREX/STREX on ARM) +// - CharlieM July 8th 2024 +DEPRECATED(bool os_compare_and_swap_8 (u8 *a, u8 b, u8 old), "use compare_and_swap instead"); +DEPRECATED(bool os_compare_and_swap_16 (u16 *a, u16 b, u16 old), "use compare_and_swap instead"); +DEPRECATED(bool os_compare_and_swap_32 (u32 *a, u32 b, u32 old), "use compare_and_swap instead"); +DEPRECATED(bool os_compare_and_swap_64 (u64 *a, u64 b, u64 old), "use compare_and_swap instead"); +DEPRECATED(bool os_compare_and_swap_bool(bool *a, bool b, bool old), "use compare_and_swap instead"); void os_sleep(u32 ms); void os_yield_thread(); diff --git a/oogabooga/tests.c b/oogabooga/tests.c index c2ec56e..9687877 100644 --- a/oogabooga/tests.c +++ b/oogabooga/tests.c @@ -1098,12 +1098,12 @@ void oogabooga_run_tests() { #if CONFIGURATION != RELEASE print("Thread bombing allocator... "); - Thread* threads[100]; - for (int i = 0; i < 100; i++) { + Thread* threads[300]; + for (int i = 0; i < 300; i++) { threads[i] = os_make_thread(test_allocator_threaded, get_heap_allocator()); os_start_thread(threads[i]); } - for (int i = 0; i < 100; i++) { + for (int i = 0; i < 300; i++) { os_join_thread(threads[i]); } print("OK!\n");