Cleanup
This commit is contained in:
parent
b25b67f645
commit
9f5d20d3de
6 changed files with 60 additions and 333 deletions
9
TODO
9
TODO
|
@ -35,8 +35,13 @@
|
|||
- Atlases are way too big, render atlases with size depending on font_height (say, 128 codepoints per atlas)
|
||||
|
||||
- OS
|
||||
Window::bool is_minimized
|
||||
don't set window.width & window.height to 0
|
||||
- Window::bool is_minimized
|
||||
- don't set window.width & window.height to 0
|
||||
- Sockets recv, send
|
||||
|
||||
|
||||
- Arenas
|
||||
|
||||
|
||||
- Needs testing:
|
||||
- Audio format channel conversions
|
||||
|
|
7
build.c
7
build.c
|
@ -3,6 +3,9 @@
|
|||
///
|
||||
// Build config stuff
|
||||
|
||||
#define OOGABOOGA_ENABLE_COMPLICATED_BUILD_MODE 1
|
||||
#define OOGABOOGA_NO_IMPLEMENTATION 1
|
||||
|
||||
#define INITIAL_PROGRAM_MEMORY_SIZE MB(5)
|
||||
|
||||
// You might want to increase this if you get a log warning saying the temporary storage was overflown.
|
||||
|
@ -36,11 +39,11 @@ typedef struct Context_Extra {
|
|||
//
|
||||
|
||||
// This is a minimal starting point for new projects. Copy & rename to get started
|
||||
#include "oogabooga/examples/minimal_game_loop.c"
|
||||
// #include "oogabooga/examples/minimal_game_loop.c"
|
||||
|
||||
// #include "oogabooga/examples/text_rendering.c"
|
||||
// #include "oogabooga/examples/custom_logger.c"
|
||||
// #include "oogabooga/examples/renderer_stress_test.c"
|
||||
#include "oogabooga/examples/renderer_stress_test.c"
|
||||
// #include "oogabooga/examples/tile_game.c"
|
||||
// #include "oogabooga/examples/audio_test.c"
|
||||
// #include "oogabooga/examples/custom_shader.c"
|
||||
|
|
|
@ -37,7 +37,7 @@
|
|||
vx_cross()
|
||||
- added os_get_file_size_from_Path()
|
||||
- Some simple restructuring of existing code
|
||||
- Made heap corruption detection more robust
|
||||
- Made heap corruption detection more robust
|
||||
|
||||
## v0.01.000 - AUDIO!
|
||||
- Added audio sources
|
||||
|
|
|
@ -106,6 +106,9 @@ typedef struct Cpu_Capabilities {
|
|||
|
||||
#define MEMORY_BARRIER _ReadWriteBarrier()
|
||||
|
||||
#define SHARED_EXPORT __declspec(dllexport)
|
||||
#define SHARED_IMPORT __declspec(dllimport)
|
||||
|
||||
#elif COMPILER_GCC || COMPILER_CLANG
|
||||
#define inline __attribute__((always_inline)) inline
|
||||
#define alignat(x) __attribute__((aligned(x)))
|
||||
|
@ -220,6 +223,9 @@ typedef struct Cpu_Capabilities {
|
|||
|
||||
#define MEMORY_BARRIER __asm__ __volatile__("" ::: "memory")
|
||||
|
||||
#define SHARED_EXPORT __attribute__((visibility("default")))
|
||||
#define SHARED_IMPORT
|
||||
|
||||
#else
|
||||
#define inline inline
|
||||
#define COMPILER_HAS_MEMCPY_INTRINSICS 0
|
||||
|
@ -239,6 +245,8 @@ typedef struct Cpu_Capabilities {
|
|||
#warning "Compiler is not explicitly supported, some things will probably not work as expected"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
Cpu_Capabilities
|
||||
query_cpu_capabilities() {
|
||||
Cpu_Capabilities result = {0};
|
||||
|
|
|
@ -84,7 +84,7 @@
|
|||
|
||||
Example:
|
||||
|
||||
#define RUN_TESTS 0
|
||||
#define RUN_TESTS 1
|
||||
|
||||
- ENABLE_PROFILING
|
||||
Enable time profiling which will be dumped to google_trace.json.
|
||||
|
@ -102,12 +102,23 @@
|
|||
tm_scope_var
|
||||
tm_scope_accum
|
||||
|
||||
- OOGABOOGA_HEADLESS
|
||||
Run oogabooga in headless mode, i.e. no window, no graphics, no audio.
|
||||
Useful if you only need the oogabooga standard library for something like a game server.
|
||||
|
||||
0: Disable
|
||||
1: Enable
|
||||
|
||||
Example:
|
||||
|
||||
#define OOGABOOGA_HEADLESS 1
|
||||
|
||||
|
||||
*/
|
||||
|
||||
#define OGB_VERSION_MAJOR 0
|
||||
#define OGB_VERSION_MINOR 1
|
||||
#define OGB_VERSION_PATCH 1
|
||||
#define OGB_VERSION_PATCH 2
|
||||
|
||||
#define OGB_VERSION (OGB_VERSION_MAJOR*1000000+OGB_VERSION_MINOR*1000+OGB_VERSION_PATCH)
|
||||
|
||||
|
@ -214,9 +225,9 @@ typedef u8 bool;
|
|||
#ifdef _WIN32
|
||||
#define COBJMACROS
|
||||
#include <Windows.h>
|
||||
#if CONFIGURATION == DEBUG
|
||||
#include <dbghelp.h>
|
||||
#endif
|
||||
#if CONFIGURATION == DEBUG
|
||||
#include <dbghelp.h>
|
||||
#endif
|
||||
#define TARGET_OS WINDOWS
|
||||
#define OS_PATHS_HAVE_BACKSLASH 1
|
||||
#elif defined(__linux__)
|
||||
|
@ -234,6 +245,19 @@ typedef u8 bool;
|
|||
#endif
|
||||
|
||||
|
||||
#if OOGABOOGA_ENABLE_COMPLICATED_BUILD_MODE
|
||||
|
||||
#if OOGABOOGA_NO_IMPLEMENTATION
|
||||
#define ogb_proc SHARED_IMPORT
|
||||
#else
|
||||
#define ogb_proc SHARED_EXPORT
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define ogb_proc
|
||||
#endif
|
||||
|
||||
|
||||
// This needs to be included before dependencies
|
||||
#include "base.c"
|
||||
|
||||
|
@ -273,8 +297,8 @@ typedef u8 bool;
|
|||
///
|
||||
// Dependencies
|
||||
///
|
||||
// The reason dependencies are compiled here is because we modify stb_vorbis to use our
|
||||
// file API instead of the stdio.h (cmoooon Sean)
|
||||
// The reason dependencies are compiled here is because we need to modify third party code
|
||||
// to use the oogabooga standard where they use the C standard.
|
||||
|
||||
#include "third_party.c"
|
||||
|
||||
|
@ -360,14 +384,14 @@ void oogabooga_init(u64 program_memory_size) {
|
|||
#else
|
||||
log_info("Headless mode on");
|
||||
#endif
|
||||
log_verbose("CPU has sse1: %cs", features.sse1 ? "true" : "false");
|
||||
log_verbose("CPU has sse2: %cs", features.sse2 ? "true" : "false");
|
||||
log_verbose("CPU has sse3: %cs", features.sse3 ? "true" : "false");
|
||||
log_verbose("CPU has ssse3: %cs", features.ssse3 ? "true" : "false");
|
||||
log_verbose("CPU has sse41: %cs", features.sse41 ? "true" : "false");
|
||||
log_verbose("CPU has sse42: %cs", features.sse42 ? "true" : "false");
|
||||
log_verbose("CPU has avx: %cs", features.avx ? "true" : "false");
|
||||
log_verbose("CPU has avx2: %cs", features.avx2 ? "true" : "false");
|
||||
log_verbose("CPU has sse1: %cs", features.sse1 ? "true" : "false");
|
||||
log_verbose("CPU has sse2: %cs", features.sse2 ? "true" : "false");
|
||||
log_verbose("CPU has sse3: %cs", features.sse3 ? "true" : "false");
|
||||
log_verbose("CPU has ssse3: %cs", features.ssse3 ? "true" : "false");
|
||||
log_verbose("CPU has sse41: %cs", features.sse41 ? "true" : "false");
|
||||
log_verbose("CPU has sse42: %cs", features.sse42 ? "true" : "false");
|
||||
log_verbose("CPU has avx: %cs", features.avx ? "true" : "false");
|
||||
log_verbose("CPU has avx2: %cs", features.avx2 ? "true" : "false");
|
||||
log_verbose("CPU has avx512: %cs", features.avx512 ? "true" : "false");
|
||||
}
|
||||
|
||||
|
|
313
oogabooga/simd.c
313
oogabooga/simd.c
|
@ -800,316 +800,3 @@ inline void basic_rsqrt_float32_512(float *a, float *result) {
|
|||
basic_rsqrt_float32_256(a+8, result+8);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// SSE 2 int32
|
||||
/*inline void sse_add_int32_128(s32 *a, s32 *b, s32* result) {
|
||||
__asm__ (
|
||||
"movdqa (%0), %%xmm0\n\t"
|
||||
"movdqa (%1), %%xmm1\n\t"
|
||||
"paddd %%xmm1, %%xmm0\n\t"
|
||||
"movdqa %%xmm0, (%2)\n\t"
|
||||
:
|
||||
: "r" (a), "r" (b), "r" (result)
|
||||
: "xmm0", "xmm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void sse_sub_int32_128(s32 *a, s32 *b, s32* result) {
|
||||
__asm__ (
|
||||
"movdqa (%0), %%xmm0\n\t"
|
||||
"movdqa (%1), %%xmm1\n\t"
|
||||
"psubd %%xmm1, %%xmm0\n\t"
|
||||
"movdqa %%xmm0, (%2)\n\t"
|
||||
:
|
||||
: "r" (a), "r" (b), "r" (result)
|
||||
: "xmm0", "xmm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void sse_mul_int32_128(s32 *a, s32 *b, s32* result) {
|
||||
__asm__ (
|
||||
"movdqa (%0), %%xmm0\n\t"
|
||||
"movdqa (%1), %%xmm1\n\t"
|
||||
"pmulld %%xmm1, %%xmm0\n\t"
|
||||
"movdqa %%xmm0, (%2)\n\t"
|
||||
:
|
||||
: "r" (a), "r" (b), "r" (result)
|
||||
: "xmm0", "xmm1"
|
||||
);
|
||||
}
|
||||
|
||||
// SSE4.2 float32
|
||||
inline void sse_add_float32_64(float32 *a, float32 *b, float32* result) {
|
||||
__asm__ (
|
||||
"movups (%0), %%xmm0\n\t"
|
||||
"movups (%1), %%xmm1\n\t"
|
||||
"addps %%xmm1, %%xmm0\n\t"
|
||||
"movups %%xmm0, (%2)\n\t"
|
||||
:
|
||||
: "r" (a), "r" (b), "r" (result)
|
||||
: "xmm0", "xmm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void sse_add_float32_128(float32 *a, float32 *b, float32* result) {
|
||||
__asm__ (
|
||||
"movups (%0), %%xmm0\n\t"
|
||||
"movups (%1), %%xmm1\n\t"
|
||||
"addps %%xmm1, %%xmm0\n\t"
|
||||
"movups %%xmm0, (%2)\n\t"
|
||||
:
|
||||
: "r" (a), "r" (b), "r" (result)
|
||||
: "xmm0", "xmm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void sse_sub_float32_64(float32 *a, float32 *b, float32* result) {
|
||||
__asm__ (
|
||||
"movups (%0), %%xmm0\n\t"
|
||||
"movups (%1), %%xmm1\n\t"
|
||||
"subps %%xmm1, %%xmm0\n\t"
|
||||
"movups %%xmm0, (%2)\n\t"
|
||||
:
|
||||
: "r" (a), "r" (b), "r" (result)
|
||||
: "xmm0", "xmm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void sse_sub_float32_128(float32 *a, float32 *b, float32* result) {
|
||||
__asm__ (
|
||||
"movups (%0), %%xmm0\n\t"
|
||||
"movups (%1), %%xmm1\n\t"
|
||||
"subps %%xmm1, %%xmm0\n\t"
|
||||
"movups %%xmm0, (%2)\n\t"
|
||||
:
|
||||
: "r" (a), "r" (b), "r" (result)
|
||||
: "xmm0", "xmm1"
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
inline void sse_mul_float32_64(float32 *a, float32 *b, float32* result) {
|
||||
__asm__ (
|
||||
"movups (%0), %%xmm0\n\t"
|
||||
"movups (%1), %%xmm1\n\t"
|
||||
"mulps %%xmm1, %%xmm0\n\t"
|
||||
"movups %%xmm0, (%2)\n\t"
|
||||
:
|
||||
: "r" (a), "r" (b), "r" (result)
|
||||
: "xmm0", "xmm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void sse_mul_float32_128(float32 *a, float32 *b, float32* result) {
|
||||
__asm__ (
|
||||
"movups (%0), %%xmm0\n\t"
|
||||
"movups (%1), %%xmm1\n\t"
|
||||
"mulps %%xmm1, %%xmm0\n\t"
|
||||
"movups %%xmm0, (%2)\n\t"
|
||||
:
|
||||
: "r" (a), "r" (b), "r" (result)
|
||||
: "xmm0", "xmm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void sse_div_float32_64(float32 *a, float32 *b, float32* result) {
|
||||
__asm__ (
|
||||
"movups (%0), %%xmm0\n\t"
|
||||
"movups (%1), %%xmm1\n\t"
|
||||
"divps %%xmm1, %%xmm0\n\t"
|
||||
"movups %%xmm0, (%2)\n\t"
|
||||
:
|
||||
: "r" (a), "r" (b), "r" (result)
|
||||
: "xmm0", "xmm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void sse_div_float32_128(float32 *a, float32 *b, float32* result) {
|
||||
__asm__ (
|
||||
"movups (%0), %%xmm0\n\t"
|
||||
"movups (%1), %%xmm1\n\t"
|
||||
"divps %%xmm1, %%xmm0\n\t"
|
||||
"movups %%xmm0, (%2)\n\t"
|
||||
:
|
||||
: "r" (a), "r" (b), "r" (result)
|
||||
: "xmm0", "xmm1"
|
||||
);
|
||||
}
|
||||
|
||||
// AVX float32
|
||||
inline void avx_add_float32_256(float32 *a, float32 *b, float32* result) {
|
||||
__asm__ (
|
||||
"vmovups %1, %%ymm0\n\t"
|
||||
"vmovups %2, %%ymm1\n\t"
|
||||
"vaddps %%ymm1, %%ymm0, %%ymm0\n\t"
|
||||
"vmovups %%ymm0, %0\n\t"
|
||||
: "=m" (*result)
|
||||
: "m" (*a), "m" (*b)
|
||||
: "ymm0", "ymm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void avx_sub_float32_256(float32 *a, float32 *b, float32* result) {
|
||||
__asm__ (
|
||||
"vmovups %1, %%ymm0\n\t"
|
||||
"vmovups %2, %%ymm1\n\t"
|
||||
"vsubps %%ymm1, %%ymm0, %%ymm0\n\t"
|
||||
"vmovups %%ymm0, %0\n\t"
|
||||
: "=m" (*result)
|
||||
: "m" (*a), "m" (*b)
|
||||
: "ymm0", "ymm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void avx_mul_float32_256(float32 *a, float32 *b, float32* result) {
|
||||
__asm__ (
|
||||
"vmovups %1, %%ymm0\n\t"
|
||||
"vmovups %2, %%ymm1\n\t"
|
||||
"vmulps %%ymm1, %%ymm0, %%ymm0\n\t"
|
||||
"vmovups %%ymm0, %0\n\t"
|
||||
: "=m" (*result)
|
||||
: "m" (*a), "m" (*b)
|
||||
: "ymm0", "ymm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void avx_div_float32_256(float32 *a, float32 *b, float32* result) {
|
||||
__asm__ (
|
||||
"vmovups %1, %%ymm0\n\t"
|
||||
"vmovups %2, %%ymm1\n\t"
|
||||
"vdivps %%ymm1, %%ymm0, %%ymm0\n\t"
|
||||
"vmovups %%ymm0, %0\n\t"
|
||||
: "=m" (*result)
|
||||
: "m" (*a), "m" (*b)
|
||||
: "ymm0", "ymm1"
|
||||
);
|
||||
}
|
||||
|
||||
// AVX2 int32
|
||||
inline void avx2_add_int32_256(s32 *a, s32 *b, s32* result) {
|
||||
__asm__ (
|
||||
"vmovdqu %1, %%ymm0\n\t"
|
||||
"vmovdqu %2, %%ymm1\n\t"
|
||||
"vpaddd %%ymm1, %%ymm0, %%ymm0\n\t"
|
||||
"vmovdqu %%ymm0, %0\n\t"
|
||||
: "=m" (*result)
|
||||
: "m" (*a), "m" (*b)
|
||||
: "ymm0", "ymm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void avx2_sub_int32_256(s32 *a, s32 *b, s32* result) {
|
||||
__asm__ (
|
||||
"vmovdqu %1, %%ymm0\n\t"
|
||||
"vmovdqu %2, %%ymm1\n\t"
|
||||
"vpsubd %%ymm1, %%ymm0, %%ymm0\n\t"
|
||||
"vmovdqu %%ymm0, %0\n\t"
|
||||
: "=m" (*result)
|
||||
: "m" (*a), "m" (*b)
|
||||
: "ymm0", "ymm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void avx2_mul_int32_256(s32 *a, s32 *b, s32* result) {
|
||||
__asm__ (
|
||||
"vmovdqu %1, %%ymm0\n\t"
|
||||
"vmovdqu %2, %%ymm1\n\t"
|
||||
"vpmulld %%ymm1, %%ymm0, %%ymm0\n\t"
|
||||
"vmovdqu %%ymm0, %0\n\t"
|
||||
: "=m" (*result)
|
||||
: "m" (*a), "m" (*b)
|
||||
: "ymm0", "ymm1"
|
||||
);
|
||||
}
|
||||
|
||||
// AVX-512 float32
|
||||
inline void avx512_add_float32_512(float32 *a, float32 *b, float32* result) {
|
||||
__asm__ (
|
||||
"vmovups %1, %%zmm0\n\t"
|
||||
"vmovups %2, %%zmm1\n\t"
|
||||
"vaddps %%zmm1, %%zmm0, %%zmm0\n\t"
|
||||
"vmovups %%zmm0, %0\n\t"
|
||||
: "=m" (*result)
|
||||
: "m" (*a), "m" (*b)
|
||||
: "zmm0", "zmm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void avx512_sub_float32_512(float32 *a, float32 *b, float32* result) {
|
||||
__asm__ (
|
||||
"vmovups %1, %%zmm0\n\t"
|
||||
"vmovups %2, %%zmm1\n\t"
|
||||
"vsubps %%zmm1, %%zmm0, %%zmm0\n\t"
|
||||
"vmovups %%zmm0, %0\n\t"
|
||||
: "=m" (*result)
|
||||
: "m" (*a), "m" (*b)
|
||||
: "zmm0", "zmm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void avx512_mul_float32_512(float32 *a, float32 *b, float32* result) {
|
||||
__asm__ (
|
||||
"vmovups %1, %%zmm0\n\t"
|
||||
"vmovups %2, %%zmm1\n\t"
|
||||
"vmulps %%zmm1, %%zmm0, %%zmm0\n\t"
|
||||
"vmovups %%zmm0, %0\n\t"
|
||||
: "=m" (*result)
|
||||
: "m" (*a), "m" (*b)
|
||||
: "zmm0", "zmm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void avx512_div_float32_512(float32 *a, float32 *b, float32* result) {
|
||||
__asm__ (
|
||||
"vmovups %1, %%zmm0\n\t"
|
||||
"vmovups %2, %%zmm1\n\t"
|
||||
"vdivps %%zmm1, %%zmm0, %%zmm0\n\t"
|
||||
"vmovups %%zmm0, %0\n\t"
|
||||
: "=m" (*result)
|
||||
: "m" (*a), "m" (*b)
|
||||
: "zmm0", "zmm1"
|
||||
);
|
||||
}
|
||||
|
||||
// AVX-512 int32
|
||||
inline void avx512_add_int32_512(s32 *a, s32 *b, s32* result) {
|
||||
__asm__ (
|
||||
"vmovdqu32 %1, %%zmm0\n\t"
|
||||
"vmovdqu32 %2, %%zmm1\n\t"
|
||||
"vpaddd %%zmm1, %%zmm0, %%zmm0\n\t"
|
||||
"vmovdqu32 %%zmm0, %0\n\t"
|
||||
: "=m" (*result)
|
||||
: "m" (*a), "m" (*b)
|
||||
: "zmm0", "zmm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void avx512_sub_int32_512(s32 *a, s32 *b, s32* result) {
|
||||
__asm__ (
|
||||
"vmovdqu32 %1, %%zmm0\n\t"
|
||||
"vmovdqu32 %2, %%zmm1\n\t"
|
||||
"vpsubd %%zmm1, %%zmm0, %%zmm0\n\t"
|
||||
"vmovdqu32 %%zmm0, %0\n\t"
|
||||
: "=m" (*result)
|
||||
: "m" (*a), "m" (*b)
|
||||
: "zmm0", "zmm1"
|
||||
);
|
||||
}
|
||||
|
||||
inline void avx512_mul_int32_512(s32 *a, s32 *b, s32* result) {
|
||||
__asm__ (
|
||||
"vmovdqu32 %1, %%zmm0\n\t"
|
||||
"vmovdqu32 %2, %%zmm1\n\t"
|
||||
"vpmulld %%zmm1, %%zmm0, %%zmm0\n\t"
|
||||
"vmovdqu32 %%zmm0, %0\n\t"
|
||||
: "=m" (*result)
|
||||
: "m" (*a), "m" (*b)
|
||||
: "zmm0", "zmm1"
|
||||
);
|
||||
}*/
|
||||
|
|
Reference in a new issue