This commit is contained in:
Charlie Malmqvist 2024-07-22 16:49:10 +02:00
parent b25b67f645
commit 9f5d20d3de
6 changed files with 60 additions and 333 deletions

9
TODO
View file

@ -35,8 +35,13 @@
- Atlases are way too big, render atlases with size depending on font_height (say, 128 codepoints per atlas)
- OS
Window::bool is_minimized
don't set window.width & window.height to 0
- Window::bool is_minimized
- don't set window.width & window.height to 0
- Sockets recv, send
- Arenas
- Needs testing:
- Audio format channel conversions

View file

@ -3,6 +3,9 @@
///
// Build config stuff
#define OOGABOOGA_ENABLE_COMPLICATED_BUILD_MODE 1
#define OOGABOOGA_NO_IMPLEMENTATION 1
#define INITIAL_PROGRAM_MEMORY_SIZE MB(5)
// You might want to increase this if you get a log warning saying the temporary storage was overflown.
@ -36,11 +39,11 @@ typedef struct Context_Extra {
//
// This is a minimal starting point for new projects. Copy & rename to get started
#include "oogabooga/examples/minimal_game_loop.c"
// #include "oogabooga/examples/minimal_game_loop.c"
// #include "oogabooga/examples/text_rendering.c"
// #include "oogabooga/examples/custom_logger.c"
// #include "oogabooga/examples/renderer_stress_test.c"
#include "oogabooga/examples/renderer_stress_test.c"
// #include "oogabooga/examples/tile_game.c"
// #include "oogabooga/examples/audio_test.c"
// #include "oogabooga/examples/custom_shader.c"

View file

@ -106,6 +106,9 @@ typedef struct Cpu_Capabilities {
#define MEMORY_BARRIER _ReadWriteBarrier()
#define SHARED_EXPORT __declspec(dllexport)
#define SHARED_IMPORT __declspec(dllimport)
#elif COMPILER_GCC || COMPILER_CLANG
#define inline __attribute__((always_inline)) inline
#define alignat(x) __attribute__((aligned(x)))
@ -220,6 +223,9 @@ typedef struct Cpu_Capabilities {
#define MEMORY_BARRIER __asm__ __volatile__("" ::: "memory")
#define SHARED_EXPORT __attribute__((visibility("default")))
#define SHARED_IMPORT
#else
#define inline inline
#define COMPILER_HAS_MEMCPY_INTRINSICS 0
@ -239,6 +245,8 @@ typedef struct Cpu_Capabilities {
#warning "Compiler is not explicitly supported, some things will probably not work as expected"
#endif
Cpu_Capabilities
query_cpu_capabilities() {
Cpu_Capabilities result = {0};

View file

@ -84,7 +84,7 @@
Example:
#define RUN_TESTS 0
#define RUN_TESTS 1
- ENABLE_PROFILING
Enable time profiling which will be dumped to google_trace.json.
@ -102,12 +102,23 @@
tm_scope_var
tm_scope_accum
- OOGABOOGA_HEADLESS
Run oogabooga in headless mode, i.e. no window, no graphics, no audio.
Useful if you only need the oogabooga standard library for something like a game server.
0: Disable
1: Enable
Example:
#define OOGABOOGA_HEADLESS 1
*/
#define OGB_VERSION_MAJOR 0
#define OGB_VERSION_MINOR 1
#define OGB_VERSION_PATCH 1
#define OGB_VERSION_PATCH 2
#define OGB_VERSION (OGB_VERSION_MAJOR*1000000+OGB_VERSION_MINOR*1000+OGB_VERSION_PATCH)
@ -214,9 +225,9 @@ typedef u8 bool;
#ifdef _WIN32
#define COBJMACROS
#include <Windows.h>
#if CONFIGURATION == DEBUG
#if CONFIGURATION == DEBUG
#include <dbghelp.h>
#endif
#endif
#define TARGET_OS WINDOWS
#define OS_PATHS_HAVE_BACKSLASH 1
#elif defined(__linux__)
@ -234,6 +245,19 @@ typedef u8 bool;
#endif
#if OOGABOOGA_ENABLE_COMPLICATED_BUILD_MODE
#if OOGABOOGA_NO_IMPLEMENTATION
#define ogb_proc SHARED_IMPORT
#else
#define ogb_proc SHARED_EXPORT
#endif
#else
#define ogb_proc
#endif
// This needs to be included before dependencies
#include "base.c"
@ -273,8 +297,8 @@ typedef u8 bool;
///
// Dependencies
///
// The reason dependencies are compiled here is because we modify stb_vorbis to use our
// file API instead of the stdio.h (cmoooon Sean)
// The reason dependencies are compiled here is because we need to modify third party code
// to use the oogabooga standard where they use the C standard.
#include "third_party.c"

View file

@ -800,316 +800,3 @@ inline void basic_rsqrt_float32_512(float *a, float *result) {
basic_rsqrt_float32_256(a+8, result+8);
}
// SSE 2 int32
/*inline void sse_add_int32_128(s32 *a, s32 *b, s32* result) {
__asm__ (
"movdqa (%0), %%xmm0\n\t"
"movdqa (%1), %%xmm1\n\t"
"paddd %%xmm1, %%xmm0\n\t"
"movdqa %%xmm0, (%2)\n\t"
:
: "r" (a), "r" (b), "r" (result)
: "xmm0", "xmm1"
);
}
inline void sse_sub_int32_128(s32 *a, s32 *b, s32* result) {
__asm__ (
"movdqa (%0), %%xmm0\n\t"
"movdqa (%1), %%xmm1\n\t"
"psubd %%xmm1, %%xmm0\n\t"
"movdqa %%xmm0, (%2)\n\t"
:
: "r" (a), "r" (b), "r" (result)
: "xmm0", "xmm1"
);
}
inline void sse_mul_int32_128(s32 *a, s32 *b, s32* result) {
__asm__ (
"movdqa (%0), %%xmm0\n\t"
"movdqa (%1), %%xmm1\n\t"
"pmulld %%xmm1, %%xmm0\n\t"
"movdqa %%xmm0, (%2)\n\t"
:
: "r" (a), "r" (b), "r" (result)
: "xmm0", "xmm1"
);
}
// SSE4.2 float32
inline void sse_add_float32_64(float32 *a, float32 *b, float32* result) {
__asm__ (
"movups (%0), %%xmm0\n\t"
"movups (%1), %%xmm1\n\t"
"addps %%xmm1, %%xmm0\n\t"
"movups %%xmm0, (%2)\n\t"
:
: "r" (a), "r" (b), "r" (result)
: "xmm0", "xmm1"
);
}
inline void sse_add_float32_128(float32 *a, float32 *b, float32* result) {
__asm__ (
"movups (%0), %%xmm0\n\t"
"movups (%1), %%xmm1\n\t"
"addps %%xmm1, %%xmm0\n\t"
"movups %%xmm0, (%2)\n\t"
:
: "r" (a), "r" (b), "r" (result)
: "xmm0", "xmm1"
);
}
inline void sse_sub_float32_64(float32 *a, float32 *b, float32* result) {
__asm__ (
"movups (%0), %%xmm0\n\t"
"movups (%1), %%xmm1\n\t"
"subps %%xmm1, %%xmm0\n\t"
"movups %%xmm0, (%2)\n\t"
:
: "r" (a), "r" (b), "r" (result)
: "xmm0", "xmm1"
);
}
inline void sse_sub_float32_128(float32 *a, float32 *b, float32* result) {
__asm__ (
"movups (%0), %%xmm0\n\t"
"movups (%1), %%xmm1\n\t"
"subps %%xmm1, %%xmm0\n\t"
"movups %%xmm0, (%2)\n\t"
:
: "r" (a), "r" (b), "r" (result)
: "xmm0", "xmm1"
);
}
inline void sse_mul_float32_64(float32 *a, float32 *b, float32* result) {
__asm__ (
"movups (%0), %%xmm0\n\t"
"movups (%1), %%xmm1\n\t"
"mulps %%xmm1, %%xmm0\n\t"
"movups %%xmm0, (%2)\n\t"
:
: "r" (a), "r" (b), "r" (result)
: "xmm0", "xmm1"
);
}
inline void sse_mul_float32_128(float32 *a, float32 *b, float32* result) {
__asm__ (
"movups (%0), %%xmm0\n\t"
"movups (%1), %%xmm1\n\t"
"mulps %%xmm1, %%xmm0\n\t"
"movups %%xmm0, (%2)\n\t"
:
: "r" (a), "r" (b), "r" (result)
: "xmm0", "xmm1"
);
}
inline void sse_div_float32_64(float32 *a, float32 *b, float32* result) {
__asm__ (
"movups (%0), %%xmm0\n\t"
"movups (%1), %%xmm1\n\t"
"divps %%xmm1, %%xmm0\n\t"
"movups %%xmm0, (%2)\n\t"
:
: "r" (a), "r" (b), "r" (result)
: "xmm0", "xmm1"
);
}
inline void sse_div_float32_128(float32 *a, float32 *b, float32* result) {
__asm__ (
"movups (%0), %%xmm0\n\t"
"movups (%1), %%xmm1\n\t"
"divps %%xmm1, %%xmm0\n\t"
"movups %%xmm0, (%2)\n\t"
:
: "r" (a), "r" (b), "r" (result)
: "xmm0", "xmm1"
);
}
// AVX float32
inline void avx_add_float32_256(float32 *a, float32 *b, float32* result) {
__asm__ (
"vmovups %1, %%ymm0\n\t"
"vmovups %2, %%ymm1\n\t"
"vaddps %%ymm1, %%ymm0, %%ymm0\n\t"
"vmovups %%ymm0, %0\n\t"
: "=m" (*result)
: "m" (*a), "m" (*b)
: "ymm0", "ymm1"
);
}
inline void avx_sub_float32_256(float32 *a, float32 *b, float32* result) {
__asm__ (
"vmovups %1, %%ymm0\n\t"
"vmovups %2, %%ymm1\n\t"
"vsubps %%ymm1, %%ymm0, %%ymm0\n\t"
"vmovups %%ymm0, %0\n\t"
: "=m" (*result)
: "m" (*a), "m" (*b)
: "ymm0", "ymm1"
);
}
inline void avx_mul_float32_256(float32 *a, float32 *b, float32* result) {
__asm__ (
"vmovups %1, %%ymm0\n\t"
"vmovups %2, %%ymm1\n\t"
"vmulps %%ymm1, %%ymm0, %%ymm0\n\t"
"vmovups %%ymm0, %0\n\t"
: "=m" (*result)
: "m" (*a), "m" (*b)
: "ymm0", "ymm1"
);
}
inline void avx_div_float32_256(float32 *a, float32 *b, float32* result) {
__asm__ (
"vmovups %1, %%ymm0\n\t"
"vmovups %2, %%ymm1\n\t"
"vdivps %%ymm1, %%ymm0, %%ymm0\n\t"
"vmovups %%ymm0, %0\n\t"
: "=m" (*result)
: "m" (*a), "m" (*b)
: "ymm0", "ymm1"
);
}
// AVX2 int32
inline void avx2_add_int32_256(s32 *a, s32 *b, s32* result) {
__asm__ (
"vmovdqu %1, %%ymm0\n\t"
"vmovdqu %2, %%ymm1\n\t"
"vpaddd %%ymm1, %%ymm0, %%ymm0\n\t"
"vmovdqu %%ymm0, %0\n\t"
: "=m" (*result)
: "m" (*a), "m" (*b)
: "ymm0", "ymm1"
);
}
inline void avx2_sub_int32_256(s32 *a, s32 *b, s32* result) {
__asm__ (
"vmovdqu %1, %%ymm0\n\t"
"vmovdqu %2, %%ymm1\n\t"
"vpsubd %%ymm1, %%ymm0, %%ymm0\n\t"
"vmovdqu %%ymm0, %0\n\t"
: "=m" (*result)
: "m" (*a), "m" (*b)
: "ymm0", "ymm1"
);
}
inline void avx2_mul_int32_256(s32 *a, s32 *b, s32* result) {
__asm__ (
"vmovdqu %1, %%ymm0\n\t"
"vmovdqu %2, %%ymm1\n\t"
"vpmulld %%ymm1, %%ymm0, %%ymm0\n\t"
"vmovdqu %%ymm0, %0\n\t"
: "=m" (*result)
: "m" (*a), "m" (*b)
: "ymm0", "ymm1"
);
}
// AVX-512 float32
inline void avx512_add_float32_512(float32 *a, float32 *b, float32* result) {
__asm__ (
"vmovups %1, %%zmm0\n\t"
"vmovups %2, %%zmm1\n\t"
"vaddps %%zmm1, %%zmm0, %%zmm0\n\t"
"vmovups %%zmm0, %0\n\t"
: "=m" (*result)
: "m" (*a), "m" (*b)
: "zmm0", "zmm1"
);
}
inline void avx512_sub_float32_512(float32 *a, float32 *b, float32* result) {
__asm__ (
"vmovups %1, %%zmm0\n\t"
"vmovups %2, %%zmm1\n\t"
"vsubps %%zmm1, %%zmm0, %%zmm0\n\t"
"vmovups %%zmm0, %0\n\t"
: "=m" (*result)
: "m" (*a), "m" (*b)
: "zmm0", "zmm1"
);
}
inline void avx512_mul_float32_512(float32 *a, float32 *b, float32* result) {
__asm__ (
"vmovups %1, %%zmm0\n\t"
"vmovups %2, %%zmm1\n\t"
"vmulps %%zmm1, %%zmm0, %%zmm0\n\t"
"vmovups %%zmm0, %0\n\t"
: "=m" (*result)
: "m" (*a), "m" (*b)
: "zmm0", "zmm1"
);
}
inline void avx512_div_float32_512(float32 *a, float32 *b, float32* result) {
__asm__ (
"vmovups %1, %%zmm0\n\t"
"vmovups %2, %%zmm1\n\t"
"vdivps %%zmm1, %%zmm0, %%zmm0\n\t"
"vmovups %%zmm0, %0\n\t"
: "=m" (*result)
: "m" (*a), "m" (*b)
: "zmm0", "zmm1"
);
}
// AVX-512 int32
inline void avx512_add_int32_512(s32 *a, s32 *b, s32* result) {
__asm__ (
"vmovdqu32 %1, %%zmm0\n\t"
"vmovdqu32 %2, %%zmm1\n\t"
"vpaddd %%zmm1, %%zmm0, %%zmm0\n\t"
"vmovdqu32 %%zmm0, %0\n\t"
: "=m" (*result)
: "m" (*a), "m" (*b)
: "zmm0", "zmm1"
);
}
inline void avx512_sub_int32_512(s32 *a, s32 *b, s32* result) {
__asm__ (
"vmovdqu32 %1, %%zmm0\n\t"
"vmovdqu32 %2, %%zmm1\n\t"
"vpsubd %%zmm1, %%zmm0, %%zmm0\n\t"
"vmovdqu32 %%zmm0, %0\n\t"
: "=m" (*result)
: "m" (*a), "m" (*b)
: "zmm0", "zmm1"
);
}
inline void avx512_mul_int32_512(s32 *a, s32 *b, s32* result) {
__asm__ (
"vmovdqu32 %1, %%zmm0\n\t"
"vmovdqu32 %2, %%zmm1\n\t"
"vpmulld %%zmm1, %%zmm0, %%zmm0\n\t"
"vmovdqu32 %%zmm0, %0\n\t"
: "=m" (*result)
: "m" (*a), "m" (*b)
: "zmm0", "zmm1"
);
}*/