- Replace lodepng with stb_image (& add stb_truetype for fonts)
- Fix d3d11 Input assembler not being created correctly bug - Fix framerate being locked by swap chain present - Move enable_vsync to window - sqrt & rsqrt simd - Add release build & run in vscode tasks & launch - Cleanup
This commit is contained in:
parent
4c5f882999
commit
05919248eb
26 changed files with 13557 additions and 305 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -54,4 +54,6 @@ test_doc.vkn
|
|||
*keybinds
|
||||
*.rdi
|
||||
|
||||
google_trace.json
|
||||
google_trace.json
|
||||
|
||||
build/*
|
16
.vscode/launch.json
vendored
16
.vscode/launch.json
vendored
|
@ -2,7 +2,7 @@
|
|||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Launch with MSVC Debugger",
|
||||
"name": "Launch Debug with MSVC Debugger",
|
||||
"type": "cppvsdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/build/cgame.exe", // Run the output executable after compile
|
||||
|
@ -11,7 +11,19 @@
|
|||
"cwd": "${workspaceFolder}",
|
||||
"environment": [],
|
||||
"console":"integratedTerminal",
|
||||
// "preLaunchTask": "Compile"
|
||||
"preLaunchTask": "Compile"
|
||||
},
|
||||
{
|
||||
"name": "Launch Release with MSVC Debugger",
|
||||
"type": "cppvsdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/build/release/cgame.exe", // Run the output executable after compile
|
||||
"args": [],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${workspaceFolder}",
|
||||
"environment": [],
|
||||
"console":"integratedTerminal",
|
||||
"preLaunchTask": "Compile Release"
|
||||
}
|
||||
]
|
||||
}
|
17
.vscode/tasks.json
vendored
17
.vscode/tasks.json
vendored
|
@ -16,6 +16,21 @@
|
|||
// "close": false,
|
||||
// "showReuseMessage": true,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"label": "Compile Release",
|
||||
"type": "shell",
|
||||
"command": "${workspaceFolder}\\build_release",
|
||||
"group": {
|
||||
"kind": "build"
|
||||
},
|
||||
"problemMatcher": ["$gcc"],
|
||||
"presentation": {
|
||||
"clear": true,
|
||||
// "revealProblems": "onProblem",
|
||||
// "close": false,
|
||||
// "showReuseMessage": true,
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
@ -6,6 +6,6 @@ mkdir build
|
|||
|
||||
pushd build
|
||||
|
||||
clang -g -o cgame.exe ../build.c -O0 -std=c11 -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -lgdi32 -luser32 -lwinmm -ld3d11 -ldxguid -ld3dcompiler -lshlwapi -msse4.1
|
||||
clang -g -o cgame.exe ../build.c -O0 -std=c11 -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -Wno-deprecated-declarations -lkernel32 -lgdi32 -luser32 -lwinmm -ld3d11 -ldxguid -ld3dcompiler -lshlwapi
|
||||
|
||||
popd
|
9
build.c
9
build.c
|
@ -3,7 +3,7 @@
|
|||
///
|
||||
// Build config stuff
|
||||
|
||||
#define RUN_TESTS 1
|
||||
#define RUN_TESTS 0
|
||||
|
||||
// This is only for people developing oogabooga!
|
||||
#define OOGABOOGA_DEV 1
|
||||
|
@ -13,6 +13,7 @@
|
|||
// ENABLE_SIMD Requires CPU to support at least SSE1 but I will be very surprised if you find a system today which doesn't
|
||||
#define ENABLE_SIMD 1
|
||||
|
||||
|
||||
#define INITIAL_PROGRAM_MEMORY_SIZE MB(5)
|
||||
|
||||
typedef struct Context_Extra {
|
||||
|
@ -21,8 +22,6 @@ typedef struct Context_Extra {
|
|||
// This needs to be defined before oogabooga if we want extra stuff in context
|
||||
#define CONTEXT_EXTRA Context_Extra
|
||||
|
||||
#define GFX_RENDERER GFX_RENDERER_D3D11
|
||||
|
||||
// This defaults to "entry", but we can set it to anything (except "main" or other existing proc names"
|
||||
#define ENTRY_PROC entry
|
||||
|
||||
|
@ -38,13 +37,13 @@ typedef struct Context_Extra {
|
|||
//
|
||||
|
||||
// this is a minimal starting point for new projects. Copy & rename to get started
|
||||
#include "oogabooga/examples/minimal_game_loop.c"
|
||||
// #include "oogabooga/examples/minimal_game_loop.c"
|
||||
|
||||
// An engine dev stress test for rendering
|
||||
// #include "oogabooga/examples/renderer_stress_test.c"
|
||||
|
||||
// Randy's example game that he's building out as a tutorial for using the engine
|
||||
// #include "entry_randygame.c"
|
||||
#include "entry_randygame.c"
|
||||
|
||||
// This is where you swap in your own project!
|
||||
// #include "entry_yourepicgamename.c"
|
||||
|
|
|
@ -1,14 +1,18 @@
|
|||
|
||||
@echo off
|
||||
rmdir /S /Q build
|
||||
mkdir build
|
||||
if exist build/dissassembly (
|
||||
rmdir /s /q build
|
||||
)
|
||||
if not exist build (
|
||||
mkdir build
|
||||
)
|
||||
|
||||
pushd build
|
||||
|
||||
mkdir release
|
||||
pushd release
|
||||
mkdir dissassembly
|
||||
pushd dissassembly
|
||||
|
||||
clang -o cgame.asm ../../build.c -Ofast -std=c11 -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -ffast-math -funroll-loops -finline-functions -fvectorize -fslp-vectorize -fomit-frame-pointer -fno-exceptions -fno-rtti -S -masm=intel
|
||||
clang -o cgame.asm ../../build.c -Ofast -std=c11 -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -Wno-deprecated-declarations -finline-functions -ffast-math -fno-math-errno -funsafe-math-optimizations -freciprocal-math -ffinite-math-only -fassociative-math -fno-signed-zeros -fno-trapping-math -ftree-vectorize -fomit-frame-pointer -funroll-loops -fno-rtti -fno-exceptions -S -masm=intel
|
||||
|
||||
popd
|
||||
popd
|
|
@ -1,5 +1,7 @@
|
|||
@echo off
|
||||
rmdir /S /Q build
|
||||
if exist build (
|
||||
rmdir /s /q build
|
||||
)
|
||||
mkdir build
|
||||
|
||||
pushd build
|
||||
|
@ -7,7 +9,7 @@ pushd build
|
|||
mkdir release
|
||||
pushd release
|
||||
|
||||
clang -o cgame.exe ../../build.c -Ofast -std=c11 -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -lgdi32 -luser32 -lwinmm -ld3d11 -ldxguid -ld3dcompiler -lshlwapi -finline-functions -ffast-math -fno-math-errno -funsafe-math-optimizations -freciprocal-math -ffinite-math-only -fassociative-math -fno-signed-zeros -fno-trapping-math -ftree-vectorize -fomit-frame-pointer -funroll-loops -fno-rtti -fno-exceptions -msse4.1
|
||||
clang -o cgame.exe ../../build.c -Ofast -DNDEBUG -std=c11 -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -Wno-deprecated-declarations -lgdi32 -luser32 -lwinmm -ld3d11 -ldxguid -ld3dcompiler -lshlwapi -finline-functions -ffast-math -fno-math-errno -funsafe-math-optimizations -freciprocal-math -ffinite-math-only -fassociative-math -fno-signed-zeros -fno-trapping-math -ftree-vectorize -fomit-frame-pointer -funroll-loops -fno-rtti -fno-exceptions
|
||||
|
||||
popd
|
||||
popd
|
|
@ -10,37 +10,22 @@
|
|||
#define local_persist static
|
||||
|
||||
#define forward_global extern
|
||||
|
||||
// Haters gonna hate
|
||||
#define If if (
|
||||
#define then )
|
||||
// If cond then {}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
inline void os_break() {
|
||||
__debugbreak();
|
||||
volatile int *a = 0;
|
||||
*a = 5;
|
||||
}
|
||||
#else
|
||||
#error "Only msvc compiler supported at the moment";
|
||||
#endif
|
||||
|
||||
|
||||
void printf(const char* fmt, ...);
|
||||
#define ASSERT_STR_HELPER(x) #x
|
||||
#define ASSERT_STR(x) ASSERT_STR_HELPER(x)
|
||||
#define assert_line(line, cond, ...) if(!(cond)) { printf("Assertion failed in file " __FILE__ " on line " ASSERT_STR(line) "\nFailed Condition: " #cond ". Message: " __VA_ARGS__); os_break(); }
|
||||
#define assert(cond, ...) assert_line(__LINE__, cond, __VA_ARGS__);
|
||||
#define assert_line(line, cond, ...) {if(!(cond)) { printf("Assertion failed in file " __FILE__ " on line " ASSERT_STR(line) "\nFailed Condition: " #cond ". Message: " __VA_ARGS__); crash(); }}
|
||||
#define assert(cond, ...) {assert_line(__LINE__, cond, __VA_ARGS__)}
|
||||
|
||||
#define DEFER(start, end) for(int _i_ = ((start), 0); _i_ == 0; _i_ += 1, (end))
|
||||
|
||||
#if CONFIGURATION == RELEASE
|
||||
#undef assert
|
||||
#define assert(...)
|
||||
#define assert(...) (void)0;
|
||||
#endif
|
||||
|
||||
#define panic(...) { print(__VA_ARGS__); os_break(); }
|
||||
#define panic(...) { print(__VA_ARGS__); crash(); }
|
||||
|
||||
#define cast(t) (t)
|
||||
|
||||
|
@ -48,7 +33,6 @@ void printf(const char* fmt, ...);
|
|||
|
||||
|
||||
|
||||
|
||||
#define FIRST_ARG(arg1, ...) arg1
|
||||
#define SECOND_ARG(arg1, arg2, ...) arg2
|
||||
#define print(...) _Generic((FIRST_ARG(__VA_ARGS__)), \
|
||||
|
|
|
@ -29,6 +29,11 @@ typedef struct Cpu_Capabilities {
|
|||
#define inline __forceinline
|
||||
#define alignat(x) __declspec(align(x))
|
||||
#define COMPILER_HAS_MEMCPY_INTRINSICS 1
|
||||
inline void crash() {
|
||||
__debugbreak();
|
||||
volatile int *a = 0;
|
||||
*a = 5;
|
||||
}
|
||||
#include <intrin.h>
|
||||
#pragma intrinsic(__rdtsc)
|
||||
inline u64 rdtsc() {
|
||||
|
@ -66,6 +71,11 @@ typedef struct Cpu_Capabilities {
|
|||
#define inline __attribute__((always_inline)) inline
|
||||
#define alignat(x) __attribute__((aligned(x)))
|
||||
#define COMPILER_HAS_MEMCPY_INTRINSICS 1
|
||||
inline void crash() {
|
||||
__builtin_trap();
|
||||
volatile int *a = 0;
|
||||
*a = 5;
|
||||
}
|
||||
inline u64 rdtsc() {
|
||||
unsigned int lo, hi;
|
||||
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
|
||||
|
@ -119,7 +129,6 @@ typedef struct Cpu_Capabilities {
|
|||
#warning "Compiler is not explicitly supported, some things will probably not work as expected"
|
||||
#endif
|
||||
|
||||
|
||||
Cpu_Capabilities query_cpu_capabilities() {
|
||||
Cpu_Capabilities result = {0};
|
||||
|
||||
|
|
|
@ -3,10 +3,10 @@
|
|||
|
||||
struct VS_INPUT
|
||||
{
|
||||
float4 position : POSITION;
|
||||
float2 uv : TEXCOORD;
|
||||
float4 color : COLOR;
|
||||
int texture_index: TEXTURE_INDEX;
|
||||
float4 position : POSITION;
|
||||
};
|
||||
|
||||
struct PS_INPUT
|
||||
|
@ -79,8 +79,8 @@ float4 ps_main(PS_INPUT input) : SV_TARGET
|
|||
*/
|
||||
|
||||
const u8 IMAGE_SHADER_VERTEX_BLOB_BYTES[]= {
|
||||
0x44, 0x58, 0x42, 0x43, 0xdd, 0x02, 0x55, 0xb0, 0x7b, 0x83, 0x6c, 0x34, 0x45, 0xe8, 0x51, 0xd4,
|
||||
0x76, 0xbf, 0x66, 0x77, 0x01, 0x00, 0x00, 0x00, 0x3c, 0x03, 0x00, 0x00, 0x05, 0x00, 0x00,
|
||||
0x44, 0x58, 0x42, 0x43, 0xf4, 0xea, 0x50, 0x9f, 0xcf, 0xeb, 0x01, 0x7b, 0x78, 0x58, 0xd5, 0x6b,
|
||||
0x4f, 0x9f, 0xc1, 0xe2, 0x01, 0x00, 0x00, 0x00, 0x3c, 0x03, 0x00, 0x00, 0x05, 0x00, 0x00,
|
||||
0x00, 0x34, 0x00, 0x00, 0x00, 0xa0, 0x00, 0x00, 0x00, 0x38, 0x01, 0x00, 0x00, 0xd4, 0x01,
|
||||
0x00, 0x00, 0xa0, 0x02, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x64, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
|
||||
|
@ -92,14 +92,14 @@ const u8 IMAGE_SHADER_VERTEX_BLOB_BYTES[]= {
|
|||
0x6c, 0x65, 0x72, 0x20, 0x31, 0x30, 0x2e, 0x31, 0x00, 0x49, 0x53, 0x47, 0x4e, 0x90, 0x00,
|
||||
0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x03, 0x03, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x0f, 0x00, 0x00, 0x77, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02,
|
||||
0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0f, 0x0f, 0x00,
|
||||
0x00, 0x54, 0x45, 0x58, 0x43, 0x4f, 0x4f, 0x52, 0x44, 0x00, 0x43, 0x4f, 0x4c, 0x4f, 0x52,
|
||||
0x00, 0x54, 0x45, 0x58, 0x54, 0x55, 0x52, 0x45, 0x5f, 0x49, 0x4e, 0x44, 0x45, 0x58, 0x00,
|
||||
0x50, 0x4f, 0x53, 0x49, 0x54, 0x49, 0x4f, 0x4e, 0x00, 0xab, 0xab, 0x4f, 0x53, 0x47, 0x4e,
|
||||
0x0f, 0x0f, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x03, 0x00, 0x00, 0x7a, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02,
|
||||
0x00, 0x00, 0x00, 0x0f, 0x0f, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00,
|
||||
0x00, 0x50, 0x4f, 0x53, 0x49, 0x54, 0x49, 0x4f, 0x4e, 0x00, 0x54, 0x45, 0x58, 0x43, 0x4f,
|
||||
0x4f, 0x52, 0x44, 0x00, 0x43, 0x4f, 0x4c, 0x4f, 0x52, 0x00, 0x54, 0x45, 0x58, 0x54, 0x55,
|
||||
0x52, 0x45, 0x5f, 0x49, 0x4e, 0x44, 0x45, 0x58, 0x00, 0xab, 0xab, 0x4f, 0x53, 0x47, 0x4e,
|
||||
0x94, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
|
@ -111,19 +111,19 @@ const u8 IMAGE_SHADER_VERTEX_BLOB_BYTES[]= {
|
|||
0x54, 0x45, 0x58, 0x43, 0x4f, 0x4f, 0x52, 0x44, 0x00, 0x43, 0x4f, 0x4c, 0x4f, 0x52, 0x00,
|
||||
0x54, 0x45, 0x58, 0x54, 0x55, 0x52, 0x45, 0x5f, 0x49, 0x4e, 0x44, 0x45, 0x58, 0x00, 0xab,
|
||||
0xab, 0xab, 0x53, 0x48, 0x45, 0x58, 0xc4, 0x00, 0x00, 0x00, 0x50, 0x00, 0x01, 0x00, 0x31,
|
||||
0x00, 0x00, 0x00, 0x6a, 0x08, 0x00, 0x01, 0x5f, 0x00, 0x00, 0x03, 0x32, 0x10, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x5f, 0x00, 0x00, 0x03, 0xf2, 0x10, 0x10, 0x00, 0x01, 0x00, 0x00,
|
||||
0x00, 0x5f, 0x00, 0x00, 0x03, 0x12, 0x10, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x5f, 0x00,
|
||||
0x00, 0x03, 0xf2, 0x10, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x04, 0xf2,
|
||||
0x00, 0x00, 0x00, 0x6a, 0x08, 0x00, 0x01, 0x5f, 0x00, 0x00, 0x03, 0xf2, 0x10, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x5f, 0x00, 0x00, 0x03, 0x32, 0x10, 0x10, 0x00, 0x01, 0x00, 0x00,
|
||||
0x00, 0x5f, 0x00, 0x00, 0x03, 0xf2, 0x10, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x5f, 0x00,
|
||||
0x00, 0x03, 0x12, 0x10, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x04, 0xf2,
|
||||
0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x03,
|
||||
0x32, 0x20, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x03, 0xf2, 0x20, 0x10,
|
||||
0x00, 0x02, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x03, 0x12, 0x20, 0x10, 0x00, 0x03, 0x00,
|
||||
0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0xf2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46,
|
||||
0x1e, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0x32, 0x20, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x46, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00,
|
||||
0x05, 0xf2, 0x20, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x1e, 0x10, 0x00, 0x01, 0x00,
|
||||
0x1e, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0x32, 0x20, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x46, 0x10, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00,
|
||||
0x05, 0xf2, 0x20, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x1e, 0x10, 0x00, 0x02, 0x00,
|
||||
0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0x12, 0x20, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0a,
|
||||
0x10, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54,
|
||||
0x10, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54,
|
||||
0x94, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
struct VS_INPUT
|
||||
{
|
||||
float4 position : POSITION;
|
||||
float2 uv : TEXCOORD;
|
||||
float4 color : COLOR;
|
||||
int texture_index: TEXTURE_INDEX;
|
||||
float4 position : POSITION;
|
||||
};
|
||||
|
||||
struct PS_INPUT
|
||||
|
|
|
@ -197,65 +197,39 @@ Draw_Quad *draw_image_xform(Gfx_Image *image, Matrix4 xform, Vector2 size, Vecto
|
|||
#define COLOR_BLACK ((Vector4){0.0, 0.0, 0.0, 1.0})
|
||||
|
||||
Gfx_Image *load_image_from_disk(string path, Allocator allocator) {
|
||||
string png;
|
||||
bool ok = os_read_entire_file(path, &png, allocator);
|
||||
if (!ok) return 0;
|
||||
string png;
|
||||
bool ok = os_read_entire_file(path, &png, allocator);
|
||||
if (!ok) return 0;
|
||||
|
||||
Gfx_Image *image = alloc(allocator, sizeof(Gfx_Image));
|
||||
|
||||
// This is fucking terrible I gotta write my own decoder
|
||||
|
||||
lodepng_allocator = allocator;
|
||||
|
||||
LodePNGState state;
|
||||
lodepng_state_init(&state);
|
||||
u32 error = lodepng_inspect(&image->width, &image->height, &state, png.data, png.count);
|
||||
if (error) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 5 lines of code to say "ignore_adler32 = true" (because it's broken and gives me an error)
|
||||
// I JUST WANT TO LOAD A PNG
|
||||
LodePNGDecoderSettings decoder;
|
||||
lodepng_decoder_settings_init(&decoder);
|
||||
lodepng_decompress_settings_init(&decoder.zlibsettings);
|
||||
decoder.zlibsettings.ignore_adler32 = true;
|
||||
state.decoder = decoder;
|
||||
|
||||
error = lodepng_decode(&image->data, &image->width, &image->height, &state, png.data, png.count);
|
||||
|
||||
lodepng_state_cleanup(&state);
|
||||
|
||||
dealloc_string(allocator, png);
|
||||
|
||||
if (error) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// We need to flip the image
|
||||
u32 row_bytes = image->width * 4; // #Magicvalue assuming 4 bytes
|
||||
u8* temp_row = (u8*)alloc(temp, row_bytes);
|
||||
for (u32 i = 0; i < image->height / 2; i++) {
|
||||
u8* top_row = image->data + i * row_bytes;
|
||||
u8* bottom_row = image->data + (image->height - i - 1) * row_bytes;
|
||||
|
||||
// Swap the top row with the bottom row
|
||||
memcpy(temp_row, top_row, row_bytes);
|
||||
memcpy(top_row, bottom_row, row_bytes);
|
||||
memcpy(bottom_row, temp_row, row_bytes);
|
||||
Gfx_Image *image = alloc(allocator, sizeof(Gfx_Image));
|
||||
|
||||
// Use stb_image to load and decode the PNG
|
||||
int width, height, channels;
|
||||
stbi_set_flip_vertically_on_load(1); // stb_image can flip the image on load
|
||||
unsigned char* stb_data = stbi_load_from_memory(png.data, png.count, &width, &height, &channels, STBI_rgb_alpha);
|
||||
|
||||
if (!stb_data) {
|
||||
dealloc(allocator, image);
|
||||
dealloc_string(allocator, png);
|
||||
return 0;
|
||||
}
|
||||
|
||||
image->gfx_handle = GFX_INVALID_HANDLE; // This is handled in gfx
|
||||
|
||||
image->allocator = allocator;
|
||||
|
||||
return image;
|
||||
|
||||
image->data = stb_data;
|
||||
image->width = width;
|
||||
image->height = height;
|
||||
image->gfx_handle = GFX_INVALID_HANDLE; // This is handled in gfx
|
||||
image->allocator = allocator;
|
||||
|
||||
dealloc_string(allocator, png);
|
||||
|
||||
return image;
|
||||
}
|
||||
|
||||
void delete_image(Gfx_Image *image) {
|
||||
dealloc(image->allocator, image->data);
|
||||
image->width = 0;
|
||||
image->height = 0;
|
||||
draw_frame.garbage_stack[draw_frame.garbage_stack_count] = image->gfx_handle;
|
||||
draw_frame.garbage_stack_count += 1;
|
||||
dealloc(image->allocator, image);
|
||||
stbi_image_free(image->data); // Free the image data allocated by stb_image
|
||||
image->width = 0;
|
||||
image->height = 0;
|
||||
draw_frame.garbage_stack[draw_frame.garbage_stack_count] = image->gfx_handle;
|
||||
draw_frame.garbage_stack_count += 1;
|
||||
dealloc(image->allocator, image);
|
||||
}
|
|
@ -15,8 +15,6 @@ int entry(int argc, char **argv) {
|
|||
Gfx_Image *hammer_image = load_image_from_disk(STR("oogabooga/examples/hammer.png"), get_heap_allocator());
|
||||
assert(hammer_image, "Failed loading hammer.png");
|
||||
|
||||
Gfx_Font *font = load_font_From_disk(
|
||||
|
||||
seed_for_random = os_get_current_cycle_count();
|
||||
|
||||
const float64 fps_limit = 69000;
|
||||
|
@ -36,7 +34,9 @@ int entry(int argc, char **argv) {
|
|||
delta = now - last_time;
|
||||
}
|
||||
last_time = now;
|
||||
os_update();
|
||||
tm_scope_cycles("os_update") {
|
||||
os_update();
|
||||
}
|
||||
|
||||
if (is_key_just_released(KEY_ESCAPE)) {
|
||||
window.should_close = true;
|
||||
|
@ -102,11 +102,10 @@ int entry(int argc, char **argv) {
|
|||
|
||||
draw_image(bush_image, v2(0.65, 0.65), v2(0.2*sin(now), 0.2*sin(now)), COLOR_WHITE);
|
||||
|
||||
draw_frame.font = STR("");
|
||||
tm_scope_cycles("gfx_update") {
|
||||
gfx_update();
|
||||
}
|
||||
|
||||
draw_text();
|
||||
|
||||
gfx_update();
|
||||
|
||||
if (is_key_just_released('E')) {
|
||||
log("FPS: %.2f", 1.0 / delta);
|
||||
|
|
|
@ -13,10 +13,10 @@ const Gfx_Handle GFX_INVALID_HANDLE = 0;
|
|||
|
||||
string temp_win32_null_terminated_wide_to_fixed_utf8(const u16 *utf16);
|
||||
|
||||
typedef struct D3D11_Vertex {
|
||||
typedef struct alignat(16) D3D11_Vertex {
|
||||
Vector4 color;
|
||||
Vector4 position;
|
||||
Vector2 uv;
|
||||
Vector4 color;
|
||||
int texture_index;
|
||||
} D3D11_Vertex;
|
||||
|
||||
|
@ -81,14 +81,19 @@ void CALLBACK d3d11_debug_callback(D3D11_MESSAGE_CATEGORY category, D3D11_MESSAG
|
|||
case D3D11_MESSAGE_SEVERITY_CORRUPTION:
|
||||
case D3D11_MESSAGE_SEVERITY_ERROR:
|
||||
log_error(msg);
|
||||
break;
|
||||
case D3D11_MESSAGE_SEVERITY_WARNING:
|
||||
log_warning(msg);
|
||||
break;
|
||||
case D3D11_MESSAGE_SEVERITY_INFO:
|
||||
log_info(msg);
|
||||
break;
|
||||
case D3D11_MESSAGE_SEVERITY_MESSAGE:
|
||||
log_verbose(msg);
|
||||
break;
|
||||
default:
|
||||
log("Ligma");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -127,7 +132,8 @@ void d3d11_update_swapchain() {
|
|||
if (create) {
|
||||
DXGI_SWAP_CHAIN_DESC1 scd = ZERO(DXGI_SWAP_CHAIN_DESC1);
|
||||
scd.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
//scd.BufferDesc.RefreshRate.Numerator = xx st.refresh_rate;
|
||||
scd.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
|
||||
//scd.BufferDesc.RefreshRate.Numerator = 0;
|
||||
//scd.BufferDesc.RefreshRate.Denominator = 1;
|
||||
|
||||
scd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
||||
|
@ -137,23 +143,23 @@ void d3d11_update_swapchain() {
|
|||
scd.Scaling = DXGI_SCALING_STRETCH; // for compatability with 7
|
||||
}
|
||||
|
||||
|
||||
// Windows 10 allows to use DXGI_SWAP_EFFECT_FLIP_DISCARD
|
||||
// for Windows 8 compatibility use DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL
|
||||
// for Windows 7 compatibility use DXGI_SWAP_EFFECT_DISCARD
|
||||
if (d3d11_feature_level >= D3D_FEATURE_LEVEL_11_0) {
|
||||
// this is supported only on FLIP presentation model
|
||||
scd.Scaling = DXGI_SCALING_NONE;
|
||||
scd.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL;
|
||||
scd.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
|
||||
scd.BufferCount = 3;
|
||||
gfx._can_vsync = false;
|
||||
log_verbose("Present mode is flip discard, 3 buffers");
|
||||
} else {
|
||||
scd.SwapEffect = DXGI_SWAP_EFFECT_DISCARD;
|
||||
scd.BufferCount = 2;
|
||||
gfx._can_vsync = true;
|
||||
log_verbose("Present mode is discard, 2 buffers");
|
||||
}
|
||||
|
||||
|
||||
// Obtain DXGI factory from device
|
||||
IDXGIDevice *dxgi_device;
|
||||
hr = VTABLE(QueryInterface, d3d11_device, &IID_IDXGIDevice, cast(void**)&dxgi_device);
|
||||
|
@ -224,7 +230,7 @@ void d3d11_update_swapchain() {
|
|||
|
||||
void gfx_init() {
|
||||
|
||||
gfx.enable_vsync = false;
|
||||
window.enable_vsync = false;
|
||||
|
||||
log_verbose("d3d11 gfx_init");
|
||||
|
||||
|
@ -426,42 +432,53 @@ void gfx_init() {
|
|||
|
||||
log_verbose("Shaders created");
|
||||
|
||||
|
||||
|
||||
D3D11_INPUT_ELEMENT_DESC layout[4];
|
||||
memset(layout, 0, sizeof(layout));
|
||||
|
||||
layout[0].SemanticName = "POSITION";
|
||||
layout[0].SemanticIndex = 0;
|
||||
layout[0].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
|
||||
layout[0].InputSlot = 0;
|
||||
layout[0].AlignedByteOffset = offsetof(D3D11_Vertex, position);
|
||||
layout[0].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
|
||||
layout[0].InstanceDataStepRate = 0;
|
||||
|
||||
layout[1].SemanticName = "TEXCOORD";
|
||||
layout[1].SemanticIndex = 0;
|
||||
layout[1].Format = DXGI_FORMAT_R32G32_FLOAT;
|
||||
layout[1].InputSlot = 0;
|
||||
layout[1].AlignedByteOffset = offsetof(D3D11_Vertex, uv);
|
||||
layout[1].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
|
||||
layout[1].InstanceDataStepRate = 0;
|
||||
|
||||
layout[2].SemanticName = "COLOR";
|
||||
layout[2].SemanticIndex = 0;
|
||||
layout[2].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
|
||||
layout[2].InputSlot = 0;
|
||||
layout[2].AlignedByteOffset = offsetof(D3D11_Vertex, color);
|
||||
layout[2].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
|
||||
layout[2].InstanceDataStepRate = 0;
|
||||
|
||||
layout[3].SemanticName = "TEXTURE_INDEX";
|
||||
layout[3].SemanticIndex = 0;
|
||||
layout[3].Format = DXGI_FORMAT_R32_SINT;
|
||||
layout[3].InputSlot = 0;
|
||||
layout[3].AlignedByteOffset = offsetof(D3D11_Vertex, texture_index);
|
||||
layout[3].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
|
||||
layout[3].InstanceDataStepRate = 0;
|
||||
|
||||
hr = VTABLE(CreateInputLayout, d3d11_device, layout, 4, vs_buffer, vs_size, &d3d11_image_vertex_layout);
|
||||
win32_check_hr(hr);
|
||||
|
||||
#if OOGABOOGA_DEV
|
||||
D3D11Release(vs_blob);
|
||||
D3D11Release(ps_blob);
|
||||
#endif
|
||||
|
||||
D3D11_INPUT_ELEMENT_DESC layout[4];
|
||||
memset(layout, 0, sizeof(layout));
|
||||
|
||||
layout[0] = (D3D11_INPUT_ELEMENT_DESC){
|
||||
"POSITION", 0,
|
||||
DXGI_FORMAT_R32G32B32A32_FLOAT, 0,
|
||||
offsetof(D3D11_Vertex, position),
|
||||
D3D11_INPUT_PER_VERTEX_DATA, 0
|
||||
};
|
||||
layout[1] = (D3D11_INPUT_ELEMENT_DESC){
|
||||
"TEXCOORD", 0,
|
||||
DXGI_FORMAT_R32G32_FLOAT, 0,
|
||||
offsetof(D3D11_Vertex, uv),
|
||||
D3D11_INPUT_PER_VERTEX_DATA, 0
|
||||
};
|
||||
layout[2] = (D3D11_INPUT_ELEMENT_DESC){
|
||||
"COLOR", 0,
|
||||
DXGI_FORMAT_R32G32B32A32_FLOAT, 0,
|
||||
offsetof(D3D11_Vertex, color),
|
||||
D3D11_INPUT_PER_VERTEX_DATA, 0
|
||||
};
|
||||
layout[3] = (D3D11_INPUT_ELEMENT_DESC){
|
||||
"TEXTURE_INDEX", 0,
|
||||
DXGI_FORMAT_R32_SINT, 0,
|
||||
offsetof(D3D11_Vertex, texture_index),
|
||||
D3D11_INPUT_PER_VERTEX_DATA, 0
|
||||
};
|
||||
|
||||
hr = VTABLE(CreateInputLayout, d3d11_device, layout, 4, vs_buffer, vs_size, &d3d11_image_vertex_layout);
|
||||
|
||||
log_info("D3D11 init done");
|
||||
|
||||
}
|
||||
|
||||
void d3d11_draw_call(int number_of_rendered_quads, ID3D11ShaderResourceView **textures, u64 num_textures) {
|
||||
|
@ -493,7 +510,6 @@ void d3d11_draw_call(int number_of_rendered_quads, ID3D11ShaderResourceView **te
|
|||
}
|
||||
|
||||
void gfx_update() {
|
||||
|
||||
if (window.should_close) return;
|
||||
|
||||
VTABLE(ClearRenderTargetView, d3d11_context, d3d11_window_render_target_view, (float*)&window.clear_color);
|
||||
|
@ -501,59 +517,61 @@ void gfx_update() {
|
|||
|
||||
HRESULT hr;
|
||||
|
||||
///
|
||||
// purge garbage
|
||||
for (u64 i = 0; i < draw_frame.garbage_stack_count; i++) {
|
||||
ID3D11ShaderResourceView *view = draw_frame.garbage_stack[i];
|
||||
ID3D11Resource *resource = 0;
|
||||
VTABLE(GetResource, view, &resource);
|
||||
|
||||
ID3D11Texture2D *texture = 0;
|
||||
hr = VTABLE(QueryInterface, resource, &IID_ID3D11Texture2D, (void**)&texture);
|
||||
if (SUCCEEDED(hr)) {
|
||||
D3D11Release(view);
|
||||
D3D11Release(texture);
|
||||
log("Destroyed an image");
|
||||
} else {
|
||||
panic("Unhandled D3D11 resource deletion");
|
||||
tm_scope_cycles("Frame setup") {
|
||||
///
|
||||
// purge garbage
|
||||
for (u64 i = 0; i < draw_frame.garbage_stack_count; i++) {
|
||||
ID3D11ShaderResourceView *view = draw_frame.garbage_stack[i];
|
||||
ID3D11Resource *resource = 0;
|
||||
VTABLE(GetResource, view, &resource);
|
||||
|
||||
ID3D11Texture2D *texture = 0;
|
||||
hr = VTABLE(QueryInterface, resource, &IID_ID3D11Texture2D, (void**)&texture);
|
||||
if (SUCCEEDED(hr)) {
|
||||
D3D11Release(view);
|
||||
D3D11Release(texture);
|
||||
log("Destroyed an image");
|
||||
} else {
|
||||
panic("Unhandled D3D11 resource deletion");
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
// Maybe resize swap chain
|
||||
RECT client_rect;
|
||||
bool ok = GetClientRect(window._os_handle, &client_rect);
|
||||
assert(ok, "GetClientRect failed with error code %lu", GetLastError());
|
||||
u32 window_width = client_rect.right-client_rect.left;
|
||||
u32 window_height = client_rect.bottom-client_rect.top;
|
||||
if (window_width != d3d11_swap_chain_width || window_height != d3d11_swap_chain_height) {
|
||||
d3d11_update_swapchain();
|
||||
}
|
||||
|
||||
///
|
||||
// Maybe grow quad vbo
|
||||
u32 required_size = sizeof(D3D11_Vertex) * draw_frame.num_blocks*QUADS_PER_BLOCK*6;
|
||||
|
||||
if (required_size > d3d11_quad_vbo_size) {
|
||||
if (d3d11_quad_vbo) {
|
||||
D3D11Release(d3d11_quad_vbo);
|
||||
dealloc(get_heap_allocator(), d3d11_staging_quad_buffer);
|
||||
}
|
||||
D3D11_BUFFER_DESC desc = ZERO(D3D11_BUFFER_DESC);
|
||||
desc.Usage = D3D11_USAGE_DYNAMIC;
|
||||
desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
desc.ByteWidth = required_size;
|
||||
desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
|
||||
HRESULT hr = VTABLE(CreateBuffer, d3d11_device, &desc, 0, &d3d11_quad_vbo);
|
||||
assert(SUCCEEDED(hr), "CreateBuffer failed");
|
||||
d3d11_quad_vbo_size = required_size;
|
||||
|
||||
d3d11_staging_quad_buffer = alloc(get_heap_allocator(), d3d11_quad_vbo_size);
|
||||
assert((u64)d3d11_staging_quad_buffer%16 == 0);
|
||||
|
||||
log_verbose("Grew quad vbo to %d bytes.", d3d11_quad_vbo_size);
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
// Maybe resize swap chain
|
||||
RECT client_rect;
|
||||
bool ok = GetClientRect(window._os_handle, &client_rect);
|
||||
assert(ok, "GetClientRect failed with error code %lu", GetLastError());
|
||||
u32 window_width = client_rect.right-client_rect.left;
|
||||
u32 window_height = client_rect.bottom-client_rect.top;
|
||||
if (window_width != d3d11_swap_chain_width || window_height != d3d11_swap_chain_height) {
|
||||
d3d11_update_swapchain();
|
||||
}
|
||||
|
||||
///
|
||||
// Maybe grow quad vbo
|
||||
u32 required_size = sizeof(D3D11_Vertex) * draw_frame.num_blocks*QUADS_PER_BLOCK*6;
|
||||
|
||||
if (required_size > d3d11_quad_vbo_size) {
|
||||
if (d3d11_quad_vbo) {
|
||||
D3D11Release(d3d11_quad_vbo);
|
||||
dealloc(get_heap_allocator(), d3d11_staging_quad_buffer);
|
||||
}
|
||||
D3D11_BUFFER_DESC desc = ZERO(D3D11_BUFFER_DESC);
|
||||
desc.Usage = D3D11_USAGE_DYNAMIC;
|
||||
desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
desc.ByteWidth = required_size;
|
||||
desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
|
||||
HRESULT hr = VTABLE(CreateBuffer, d3d11_device, &desc, 0, &d3d11_quad_vbo);
|
||||
assert(SUCCEEDED(hr), "CreateBuffer failed");
|
||||
d3d11_quad_vbo_size = required_size;
|
||||
|
||||
d3d11_staging_quad_buffer = alloc(get_heap_allocator(), d3d11_quad_vbo_size);
|
||||
|
||||
log_verbose("Grew quad vbo to %d bytes.", d3d11_quad_vbo_size);
|
||||
}
|
||||
|
||||
f64 rest_before = os_get_current_time_in_seconds();
|
||||
if (draw_frame.num_blocks > 0) {
|
||||
///
|
||||
// Render geometry from into vbo quad list
|
||||
|
@ -569,8 +587,8 @@ void gfx_update() {
|
|||
Draw_Quad_Block *block = &first_block;
|
||||
|
||||
tm_scope_cycles("Quad processing") {
|
||||
while (block != 0 && block->num_quads > 0) tm_scope_cycles("ad2As") {
|
||||
for (u64 i = 0; i < block->num_quads; i++) tm_scope_cycles("Single quad") {
|
||||
while (block != 0 && block->num_quads > 0) tm_scope_cycles("Quad block") {
|
||||
for (u64 i = 0; i < block->num_quads; i++) {
|
||||
|
||||
Draw_Quad *q = &block->quad_buffer[i];
|
||||
|
||||
|
@ -620,7 +638,7 @@ void gfx_update() {
|
|||
if (num_textures >= 32) {
|
||||
// If max textures reached, make a draw call and start over
|
||||
D3D11_MAPPED_SUBRESOURCE buffer_mapping;
|
||||
VTABLE(Map, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0, D3D11_MAP_WRITE_NO_OVERWRITE, 0, &buffer_mapping);
|
||||
VTABLE(Map, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0, D3D11_MAP_WRITE_DISCARD, 0, &buffer_mapping);
|
||||
memcpy(buffer_mapping.pData, d3d11_staging_quad_buffer, number_of_rendered_quads*sizeof(D3D11_Vertex)*6);
|
||||
VTABLE(Unmap, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0);
|
||||
d3d11_draw_call(number_of_rendered_quads, textures, num_textures);
|
||||
|
@ -676,30 +694,29 @@ void gfx_update() {
|
|||
}
|
||||
}
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE buffer_mapping;
|
||||
VTABLE(Map, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0, D3D11_MAP_WRITE_NO_OVERWRITE, 0, &buffer_mapping);
|
||||
memcpy(buffer_mapping.pData, d3d11_staging_quad_buffer, number_of_rendered_quads*sizeof(D3D11_Vertex)*6);
|
||||
VTABLE(Unmap, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0);
|
||||
tm_scope_cycles("Write to gpu") {
|
||||
D3D11_MAPPED_SUBRESOURCE buffer_mapping;
|
||||
tm_scope_cycles("The Map call") {
|
||||
hr = VTABLE(Map, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0, D3D11_MAP_WRITE_DISCARD, 0, &buffer_mapping);
|
||||
win32_check_hr(hr);
|
||||
}
|
||||
tm_scope_cycles("The memcpy") {
|
||||
memcpy(buffer_mapping.pData, d3d11_staging_quad_buffer, number_of_rendered_quads*sizeof(D3D11_Vertex)*6);
|
||||
}
|
||||
tm_scope_cycles("The Unmap call") {
|
||||
VTABLE(Unmap, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0);
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
// Draw call
|
||||
|
||||
u64 before_draw = os_get_current_cycle_count();
|
||||
d3d11_draw_call(number_of_rendered_quads, textures, num_textures);
|
||||
u64 after_draw = os_get_current_cycle_count();
|
||||
//log("Draw call took %llu cycles", after_draw-before_draw);
|
||||
tm_scope_cycles("Draw call") d3d11_draw_call(number_of_rendered_quads, textures, num_textures);
|
||||
}
|
||||
|
||||
f64 rest_after = os_get_current_time_in_seconds();
|
||||
if (is_key_just_pressed('E'))
|
||||
log("The rest took %.2fms", (rest_after-rest_before)*1000.0);
|
||||
|
||||
f64 before_present = os_get_current_time_in_seconds();
|
||||
hr = VTABLE(Present, d3d11_swap_chain, gfx._can_vsync && gfx.enable_vsync, 0);
|
||||
f64 after = os_get_current_time_in_seconds();
|
||||
if (is_key_just_pressed('E'))
|
||||
log("Present took %.2fms", (after-before_present)*1000.0);
|
||||
win32_check_hr(hr);
|
||||
|
||||
tm_scope_cycles("Present") {
|
||||
hr = VTABLE(Present, d3d11_swap_chain, window.enable_vsync, window.enable_vsync ? 0 : DXGI_PRESENT_ALLOW_TEARING);
|
||||
win32_check_hr(hr);
|
||||
}
|
||||
|
||||
#if CONFIGURATION == DEBUG
|
||||
///
|
||||
|
|
|
@ -15,18 +15,6 @@
|
|||
#error "Unknown renderer GFX_RENDERER defined"
|
||||
#endif
|
||||
|
||||
|
||||
typedef struct Gfx_State {
|
||||
|
||||
// config
|
||||
bool enable_vsync;
|
||||
|
||||
// readonly
|
||||
bool _can_vsync;
|
||||
|
||||
} Gfx_State;
|
||||
Gfx_State gfx;
|
||||
|
||||
forward_global const Gfx_Handle GFX_INVALID_HANDLE;
|
||||
|
||||
typedef struct Gfx_Image {
|
||||
|
|
|
@ -133,7 +133,7 @@ inline float v3_dot_product(Vector3 a, Vector3 b) {
|
|||
return simd_dot_product_float32_96((float*)&a, (float*)&b);
|
||||
}
|
||||
inline float v4_dot_product(Vector4 a, Vector4 b) {
|
||||
return simd_dot_product_float32_128((float*)&a, (float*)&b);
|
||||
return simd_dot_product_float32_128_aligned((float*)&a, (float*)&b);
|
||||
}
|
||||
|
||||
Vector2 v2_rotate_point_around_pivot(Vector2 point, Vector2 pivot, float32 rotation_radians) {
|
||||
|
|
|
@ -24,7 +24,7 @@ void* initialization_allocator_proc(u64 size, void *p, Allocator_Message message
|
|||
|
||||
if (init_memory_head >= ((u8*)init_memory_arena+INIT_MEMORY_SIZE)) {
|
||||
os_write_string_to_stdout(STR("Out of initialization memory! Please provide more by increasing INIT_MEMORY_SIZE"));
|
||||
os_break();
|
||||
crash();
|
||||
}
|
||||
return p;
|
||||
break;
|
||||
|
|
|
@ -145,23 +145,22 @@ typedef u8 bool;
|
|||
#warning "Compiler is not explicitly supported, some things will probably not work as expected"
|
||||
#endif
|
||||
|
||||
#include "cpu.c"
|
||||
|
||||
|
||||
#define DEBUG 0
|
||||
#define VERY_DEBUG 1
|
||||
#define RELEASE 2
|
||||
|
||||
#if !defined(CONFIGURATION)
|
||||
|
||||
#if defined(NDEBUG)
|
||||
#define CONFIGURATION RELEASE
|
||||
#else
|
||||
#define CONFIGURATION DEBUG
|
||||
#endif
|
||||
|
||||
#if defined(NDEBUG)
|
||||
#define CONFIGURATION RELEASE
|
||||
#else
|
||||
#define CONFIGURATION DEBUG
|
||||
#endif
|
||||
|
||||
|
||||
#include "cpu.c"
|
||||
|
||||
|
||||
|
||||
|
||||
#ifndef ENTRY_PROC
|
||||
#define ENTRY_PROC entry
|
||||
#endif
|
||||
|
|
|
@ -135,6 +135,8 @@ LRESULT CALLBACK win32_window_proc(HWND passed_window, UINT message, WPARAM wpar
|
|||
|
||||
void os_init(u64 program_memory_size) {
|
||||
|
||||
memset(&window, 0, sizeof(window));
|
||||
|
||||
timeBeginPeriod(1);
|
||||
#if CONFIGURATION == RELEASE
|
||||
SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS);
|
||||
|
|
|
@ -61,36 +61,6 @@ inline int crt_vprintf(const char* fmt, va_list args) {
|
|||
return os.crt_vprintf(fmt, args);
|
||||
}
|
||||
|
||||
#if !defined(COMPILER_HAS_MEMCPY_INTRINSICS) || CONFIGURATION == DEBUG
|
||||
inline void* naive_memcpy(void* dest, const void* source, size_t size) {
|
||||
for (u64 i = 0; i < (u64)size; i++) ((u8*)dest)[i] = ((u8*)source)[i];
|
||||
return dest;
|
||||
}
|
||||
inline void* memcpy(void* dest, const void* source, size_t size) {
|
||||
if (!os.crt_memcpy) return naive_memcpy(dest, source, size);
|
||||
return os.crt_memcpy(dest, source, size);
|
||||
}
|
||||
inline int naive_memcmp(const void* a, const void* b, size_t amount) {
|
||||
// I don't understand the return value of memcmp but I also dont care
|
||||
for (u64 i = 0; i < (u64)amount; i++) {
|
||||
if (((u8*)a)[i] != ((u8*)b)[i]) return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
inline int memcmp(const void* a, const void* b, size_t amount) {
|
||||
if (!os.crt_memcmp) return naive_memcmp(a, b, amount);
|
||||
return os.crt_memcmp(a, b, amount);
|
||||
}
|
||||
inline void* naive_memset(void* dest, int value, size_t amount) {
|
||||
for (u64 i = 0; i < (u64)amount; i++) ((u8*)dest)[i] = (u8)value;
|
||||
return dest;
|
||||
}
|
||||
inline void* memset(void* dest, int value, size_t amount) {
|
||||
if (!os.crt_memset) return naive_memset(dest, value, amount);
|
||||
return os.crt_memset(dest, value, amount);
|
||||
}
|
||||
#endif
|
||||
|
||||
inline bool bytes_match(void *a, void *b, u64 count) { return memcmp(a, b, count) == 0; }
|
||||
|
||||
inline int vsnprintf(char* buffer, size_t n, const char* fmt, va_list args) {
|
||||
|
@ -333,6 +303,7 @@ typedef struct Os_Window {
|
|||
u32 x;
|
||||
u32 y;
|
||||
Vector4 clear_color;
|
||||
bool enable_vsync;
|
||||
|
||||
bool should_close;
|
||||
|
||||
|
|
190
oogabooga/simd.c
190
oogabooga/simd.c
|
@ -30,6 +30,16 @@ inline void basic_mul_int32_512(s32 *a, s32 *b, s32* result);
|
|||
inline float basic_dot_product_float32_64(float *a, float *b);
|
||||
inline float basic_dot_product_float32_96(float *a, float *b);
|
||||
inline float basic_dot_product_float32_128(float *a, float *b);
|
||||
inline void basic_sqrt_float32_64(float *a, float *result);
|
||||
inline void basic_sqrt_float32_96(float *a, float *result);
|
||||
inline void basic_sqrt_float32_128(float *a, float *result);
|
||||
inline void basic_sqrt_float32_256(float *a, float *result);
|
||||
inline void basic_sqrt_float32_512(float *a, float *result);
|
||||
inline void basic_rsqrt_float32_64(float *a, float *result);
|
||||
inline void basic_rsqrt_float32_96(float *a, float *result);
|
||||
inline void basic_rsqrt_float32_128(float *a, float *result);
|
||||
inline void basic_rsqrt_float32_256(float *a, float *result);
|
||||
inline void basic_rsqrt_float32_512(float *a, float *result);
|
||||
|
||||
|
||||
|
||||
|
@ -123,6 +133,52 @@ inline void simd_div_float32_128_aligned(float *a, float *b, float* result) {
|
|||
__m128 vr = _mm_div_ps(va, vb);
|
||||
_mm_store_ps(result, vr);
|
||||
}
|
||||
inline void simd_sqrt_float32_96(float *a, float *result) {
|
||||
__m128 va = _mm_loadu_ps(a);
|
||||
va = _mm_and_ps(va, _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1))); // Mask last element
|
||||
__m128 vr = _mm_sqrt_ps(va);
|
||||
_mm_storeu_ps(result, vr);
|
||||
}
|
||||
|
||||
inline void simd_rsqrt_float32_96(float *a, float *result) {
|
||||
__m128 va = _mm_loadu_ps(a);
|
||||
va = _mm_and_ps(va, _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1))); // Mask last element
|
||||
__m128 vr = _mm_rsqrt_ps(va);
|
||||
_mm_storeu_ps(result, vr);
|
||||
}
|
||||
inline void simd_sqrt_float32_64(float *a, float *result) {
|
||||
__m128 va = _mm_loadl_pi(_mm_setzero_ps(), (__m64*)a);
|
||||
__m128 vr = _mm_sqrt_ps(va);
|
||||
_mm_storel_pi((__m64*)result, vr);
|
||||
}
|
||||
|
||||
inline void simd_rsqrt_float32_64(float *a, float *result) {
|
||||
__m128 va = _mm_loadl_pi(_mm_setzero_ps(), (__m64*)a);
|
||||
__m128 vr = _mm_rsqrt_ps(va);
|
||||
_mm_storel_pi((__m64*)result, vr);
|
||||
}
|
||||
inline void simd_sqrt_float32_128(float *a, float *result) {
|
||||
__m128 va = _mm_loadu_ps(a);
|
||||
__m128 vr = _mm_sqrt_ps(va);
|
||||
_mm_storeu_ps(result, vr);
|
||||
}
|
||||
|
||||
inline void simd_rsqrt_float32_128(float *a, float *result) {
|
||||
__m128 va = _mm_loadu_ps(a);
|
||||
__m128 vr = _mm_rsqrt_ps(va);
|
||||
_mm_storeu_ps(result, vr);
|
||||
}
|
||||
inline void simd_sqrt_float32_128_aligned(float *a, float *result) {
|
||||
__m128 va = _mm_load_ps(a);
|
||||
__m128 vr = _mm_sqrt_ps(va);
|
||||
_mm_store_ps(result, vr);
|
||||
}
|
||||
|
||||
inline void simd_rsqrt_float32_128_aligned(float *a, float *result) {
|
||||
__m128 va = _mm_load_ps(a);
|
||||
__m128 vr = _mm_rsqrt_ps(va);
|
||||
_mm_store_ps(result, vr);
|
||||
}
|
||||
|
||||
|
||||
#if SIMD_ENABLE_SSE2
|
||||
|
@ -191,14 +247,6 @@ inline float simd_dot_product_float32_96(float *a, float *b) {
|
|||
__m128 dot_product = _mm_dp_ps(vec1, vec2, 0x71);
|
||||
return _mm_cvtss_f32(dot_product);
|
||||
}
|
||||
inline float simd_dot_product_float32_96_aligned(float *a, float *b) {
|
||||
__m128 vec1 = _mm_load_ps(a);
|
||||
__m128 vec2 = _mm_load_ps(b);
|
||||
vec1 = _mm_and_ps(vec1, _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1)));
|
||||
vec2 = _mm_and_ps(vec2, _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1)));
|
||||
__m128 dot_product = _mm_dp_ps(vec1, vec2, 0x71);
|
||||
return _mm_cvtss_f32(dot_product);
|
||||
}
|
||||
inline float simd_dot_product_float32_128(float *a, float *b) {
|
||||
__m128 vec1 = _mm_loadu_ps(a);
|
||||
__m128 vec2 = _mm_loadu_ps(b);
|
||||
|
@ -217,8 +265,6 @@ inline float simd_dot_product_float32_128_aligned(float *a, float *b) {
|
|||
#define simd_dot_product_float32_64 basic_dot_product_float32_64
|
||||
#define simd_dot_product_float32_96 basic_dot_product_float32_96
|
||||
#define simd_dot_product_float32_128 basic_dot_product_float32_128
|
||||
#define simd_dot_product_float32_64_aligned basic_dot_product_float32_64
|
||||
#define simd_dot_product_float32_96_aligned basic_dot_product_float32_96
|
||||
#define simd_dot_product_float32_128_aligned basic_dot_product_float32_128
|
||||
#endif // SIMD_ENABLE_SSE41
|
||||
|
||||
|
@ -275,16 +321,41 @@ inline void simd_div_float32_256_aligned(float32 *a, float32 *b, float32* result
|
|||
__m256 vr = _mm256_div_ps(va, vb);
|
||||
_mm256_store_ps(result, vr);
|
||||
}
|
||||
inline void simd_sqrt_float32_256(float *a, float *result) {
|
||||
__m256 va = _mm256_loadu_ps(a);
|
||||
__m256 vr = _mm256_sqrt_ps(va);
|
||||
_mm256_storeu_ps(result, vr);
|
||||
}
|
||||
|
||||
inline void simd_rsqrt_float32_256(float *a, float *result) {
|
||||
__m256 va = _mm256_loadu_ps(a);
|
||||
__m256 vr = _mm256_rsqrt_ps(va);
|
||||
_mm256_storeu_ps(result, vr);
|
||||
}
|
||||
inline void simd_sqrt_float32_256_aligned(float *a, float *result) {
|
||||
__m256 va = _mm256_load_ps(a);
|
||||
__m256 vr = _mm256_sqrt_ps(va);
|
||||
_mm256_store_ps(result, vr);
|
||||
}
|
||||
|
||||
inline void simd_rsqrt_float32_256_aligned(float *a, float *result) {
|
||||
__m256 va = _mm256_load_ps(a);
|
||||
__m256 vr = _mm256_rsqrt_ps(va);
|
||||
_mm256_store_ps(result, vr);
|
||||
}
|
||||
#else
|
||||
#define simd_add_float32_256 basic_add_float32_256
|
||||
#define simd_sub_float32_256 basic_sub_float32_256
|
||||
#define simd_mul_float32_256 basic_mul_float32_256
|
||||
#define simd_div_float32_256 basic_div_float32_256
|
||||
|
||||
#define simd_sqrt_float32_256 basic_sqrt_float32_256
|
||||
#define simd_rsqrt_float32_256 basic_rsqrt_float32_256
|
||||
#define simd_add_float32_256_aligned basic_add_float32_256
|
||||
#define simd_sub_float32_256_aligned basic_sub_float32_256
|
||||
#define simd_mul_float32_256_aligned basic_mul_float32_256
|
||||
#define simd_div_float32_256_aligned basic_div_float32_256
|
||||
#define simd_sqrt_float32_256_aligned basic_sqrt_float32_256
|
||||
#define simd_rsqrt_float32_256_aligned basic_rsqrt_float32_256
|
||||
#endif
|
||||
|
||||
#if SIMD_ENABLE_AVX2
|
||||
|
@ -332,7 +403,6 @@ inline void simd_mul_int32_256_aligned(s32 *a, s32 *b, s32* result) {
|
|||
#define simd_add_int32_256 basic_add_int32_256
|
||||
#define simd_sub_int32_256 basic_sub_int32_256
|
||||
#define simd_mul_int32_256 basic_mul_int32_256
|
||||
|
||||
#define simd_add_int32_256_aligned basic_add_int32_256
|
||||
#define simd_sub_int32_256_aligned basic_sub_int32_256
|
||||
#define simd_mul_int32_256_aligned basic_mul_int32_256
|
||||
|
@ -432,6 +502,28 @@ inline void simd_mul_int32_512_aligned(int32 *a, int32 *b, int32* result) {
|
|||
__m512i vr = _mm512_mullo_epi32(va, vb);
|
||||
_mm512_store_si512((__m512i*)result, vr);
|
||||
}
|
||||
inline void simd_sqrt_float32_512(float *a, float *result) {
|
||||
__m512 va = _mm512_loadu_ps(a);
|
||||
__m512 vr = _mm512_sqrt_ps(va);
|
||||
_mm512_storeu_ps(result, vr);
|
||||
}
|
||||
|
||||
inline void simd_rsqrt_float32_512(float *a, float *result) {
|
||||
__m512 va = _mm512_loadu_ps(a);
|
||||
__m512 vr = _mm512_rsqrt14_ps(va); // AVX-512 does not have _mm512_rsqrt_ps
|
||||
_mm512_storeu_ps(result, vr);
|
||||
}
|
||||
inline void simd_sqrt_float32_512_aligned(float *a, float *result) {
|
||||
__m512 va = _mm512_load_ps(a);
|
||||
__m512 vr = _mm512_sqrt_ps(va);
|
||||
_mm512_store_ps(result, vr);
|
||||
}
|
||||
|
||||
inline void simd_rsqrt_float32_512_aligned(float *a, float *result) {
|
||||
__m512 va = _mm512_load_ps(a);
|
||||
__m512 vr = _mm512_rsqrt14_ps(va);
|
||||
_mm512_store_ps(result, vr);
|
||||
}
|
||||
#else
|
||||
#define simd_add_float32_512 basic_add_float32_512
|
||||
#define simd_sub_float32_512 basic_sub_float32_512
|
||||
|
@ -440,7 +532,8 @@ inline void simd_mul_int32_512_aligned(int32 *a, int32 *b, int32* result) {
|
|||
#define simd_add_int32_512 basic_add_int32_512
|
||||
#define simd_sub_int32_512 basic_sub_int32_512
|
||||
#define simd_mul_int32_512 basic_mul_int32_512
|
||||
|
||||
#define simd_sqrt_float32_512 basic_sqrt_float32_512
|
||||
#define simd_rsqrt_float32_512 basic_rsqrt_float32_512
|
||||
#define simd_add_float32_512_aligned basic_add_float32_512
|
||||
#define simd_sub_float32_512_aligned basic_sub_float32_512
|
||||
#define simd_mul_float32_512_aligned basic_mul_float32_512
|
||||
|
@ -448,6 +541,8 @@ inline void simd_mul_int32_512_aligned(int32 *a, int32 *b, int32* result) {
|
|||
#define simd_add_int32_512_aligned basic_add_int32_512
|
||||
#define simd_sub_int32_512_aligned basic_sub_int32_512
|
||||
#define simd_mul_int32_512_aligned basic_mul_int32_512
|
||||
#define simd_sqrt_float32_512_aligned basic_sqrt_float32_512
|
||||
#define simd_rsqrt_float32_512_aligned basic_rsqrt_float32_512
|
||||
#endif // SIMD_ENABLE_AVX512
|
||||
|
||||
#else
|
||||
|
@ -461,10 +556,16 @@ inline void simd_mul_int32_512_aligned(int32 *a, int32 *b, int32* result) {
|
|||
#define simd_mul_float32_128 basic_mul_float32_128
|
||||
#define simd_div_float32_64 basic_div_float32_64
|
||||
#define simd_div_float32_128 basic_div_float32_128
|
||||
#define simd_sqrt_float32_64 basic_sqrt_float32_64
|
||||
#define simd_sqrt_float32_128 basic_sqrt_float32_128
|
||||
#define simd_rsqrt_float32_64 basic_rsqrt_float32_64
|
||||
#define simd_rsqrt_float32_128 basic_rsqrt_float32_128
|
||||
#define simd_add_float32_128_aligned basic_add_float32_128
|
||||
#define simd_sub_float32_128_aligned basic_sub_float32_128
|
||||
#define simd_mul_float32_128_aligned basic_mul_float32_128
|
||||
#define simd_div_float32_128_aligned basic_div_float32_128
|
||||
#define simd_sqrt_float32_128_aligned basic_sqrt_float32_128
|
||||
#define simd_rsqrt_float32_128_aligned basic_rsqrt_float32_128
|
||||
|
||||
// SSE2
|
||||
#define simd_add_int32_128 basic_add_int32_128
|
||||
|
@ -475,19 +576,26 @@ inline void simd_mul_int32_512_aligned(int32 *a, int32 *b, int32* result) {
|
|||
#define simd_mul_int32_128_aligned basic_mul_int32_128
|
||||
|
||||
// SSE41
|
||||
#define simd_mul_int32_128 basic_mul_int32_128
|
||||
#define simd_mul_int32_128_aligned basic_mul_int32_128
|
||||
#define simd_dot_product_float32_64 basic_dot_product_float32_64
|
||||
#define simd_dot_product_float32_96 basic_dot_product_float32_96
|
||||
#define simd_dot_product_float32_128 basic_dot_product_float32_128
|
||||
#define simd_dot_product_float32_128_aligned basic_dot_product_float32_128
|
||||
|
||||
// AVX
|
||||
#define simd_add_float32_256 basic_add_float32_256
|
||||
#define simd_sub_float32_256 basic_sub_float32_256
|
||||
#define simd_mul_float32_256 basic_mul_float32_256
|
||||
#define simd_div_float32_256 basic_div_float32_256
|
||||
#define simd_sqrt_float32_256 basic_sqrt_float32_256
|
||||
#define simd_rsqrt_float32_256 basic_rsqrt_float32_256
|
||||
#define simd_add_float32_256_aligned basic_add_float32_256
|
||||
#define simd_sub_float32_256_aligned basic_sub_float32_256
|
||||
#define simd_mul_float32_256_aligned basic_mul_float32_256
|
||||
#define simd_div_float32_256_aligned basic_div_float32_256
|
||||
#define simd_sqrt_float32_256_aligned basic_sqrt_float32_256
|
||||
#define simd_rsqrt_float32_256_aligned basic_rsqrt_float32_256
|
||||
|
||||
// AVX2
|
||||
#define simd_add_int32_256 basic_add_int32_256
|
||||
|
@ -505,6 +613,8 @@ inline void simd_mul_int32_512_aligned(int32 *a, int32 *b, int32* result) {
|
|||
#define simd_add_int32_512 basic_add_int32_512
|
||||
#define simd_sub_int32_512 basic_sub_int32_512
|
||||
#define simd_mul_int32_512 basic_mul_int32_512
|
||||
#define simd_sqrt_float32_512 basic_sqrt_float32_512
|
||||
#define simd_rsqrt_float32_512 basic_rsqrt_float32_512
|
||||
#define simd_add_float32_512_aligned basic_add_float32_512
|
||||
#define simd_sub_float32_512_aligned basic_sub_float32_512
|
||||
#define simd_mul_float32_512_aligned basic_mul_float32_512
|
||||
|
@ -512,9 +622,14 @@ inline void simd_mul_int32_512_aligned(int32 *a, int32 *b, int32* result) {
|
|||
#define simd_add_int32_512_aligned basic_add_int32_512
|
||||
#define simd_sub_int32_512_aligned basic_sub_int32_512
|
||||
#define simd_mul_int32_512_aligned basic_mul_int32_512
|
||||
#define simd_sqrt_float32_512_aligned basic_sqrt_float32_512
|
||||
#define simd_rsqrt_float32_512_aligned basic_rsqrt_float32_512
|
||||
|
||||
#endif
|
||||
|
||||
double __cdecl sqrt(_In_ double _X);
|
||||
double __cdecl rsqrt(_In_ double _X);
|
||||
|
||||
inline void basic_add_float32_64 (float32 *a, float32 *b, float32* result) {
|
||||
result[0] = a[0] + b[0];
|
||||
result[1] = a[1] + b[1];
|
||||
|
@ -638,6 +753,55 @@ inline float basic_dot_product_float32_96(float *a, float *b) {
|
|||
inline float basic_dot_product_float32_128(float *a, float *b) {
|
||||
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
|
||||
}
|
||||
inline void basic_sqrt_float32_64(float *a, float *result) {
|
||||
result[0] = sqrt(a[0]);
|
||||
result[1] = sqrt(a[1]);
|
||||
}
|
||||
inline void basic_sqrt_float32_96(float *a, float *result) {
|
||||
result[0] = sqrt(a[0]);
|
||||
result[1] = sqrt(a[1]);
|
||||
result[2] = sqrt(a[2]);
|
||||
}
|
||||
inline void basic_sqrt_float32_128(float *a, float *result) {
|
||||
result[0] = sqrt(a[0]);
|
||||
result[1] = sqrt(a[1]);
|
||||
result[2] = sqrt(a[2]);
|
||||
result[3] = sqrt(a[3]);
|
||||
}
|
||||
inline void basic_sqrt_float32_256(float *a, float *result) {
|
||||
basic_sqrt_float32_128(a, result);
|
||||
basic_sqrt_float32_128(a+4, result+4);
|
||||
}
|
||||
inline void basic_sqrt_float32_512(float *a, float *result) {
|
||||
basic_sqrt_float32_256(a, result);
|
||||
basic_sqrt_float32_256(a+8, result+8);
|
||||
}
|
||||
inline void basic_rsqrt_float32_64(float *a, float *result) {
|
||||
result[0] = rsqrt(a[0]);
|
||||
result[1] = rsqrt(a[1]);
|
||||
}
|
||||
inline void basic_rsqrt_float32_96(float *a, float *result) {
|
||||
result[0] = rsqrt(a[0]);
|
||||
result[1] = rsqrt(a[1]);
|
||||
result[2] = rsqrt(a[2]);
|
||||
}
|
||||
inline void basic_rsqrt_float32_128(float *a, float *result) {
|
||||
result[0] = rsqrt(a[0]);
|
||||
result[1] = rsqrt(a[1]);
|
||||
result[2] = rsqrt(a[2]);
|
||||
result[3] = rsqrt(a[3]);
|
||||
}
|
||||
inline void basic_rsqrt_float32_256(float *a, float *result) {
|
||||
basic_rsqrt_float32_128(a, result);
|
||||
basic_rsqrt_float32_128(a+4, result+4);
|
||||
}
|
||||
inline void basic_rsqrt_float32_512(float *a, float *result) {
|
||||
basic_rsqrt_float32_256(a, result);
|
||||
basic_rsqrt_float32_256(a+8, result+8);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
*/
|
||||
|
||||
void * memcpy (void *,const void *,size_t);
|
||||
void* talloc(u64);
|
||||
|
||||
typedef struct string {
|
||||
|
@ -13,6 +12,7 @@ typedef struct string {
|
|||
u8 *data;
|
||||
} string;
|
||||
|
||||
#define fixed_string STR
|
||||
#define STR(s) ((string){ length_of_null_terminated_string((const char*)s), (u8*)s })
|
||||
|
||||
inline u64 length_of_null_terminated_string(const char* cstring) {
|
||||
|
|
|
@ -213,7 +213,7 @@ void printf(const char* fmt, ...) {
|
|||
|
||||
|
||||
typedef void(*Logger_Proc)(Log_Level level, string s);
|
||||
#define LOG_BASE(level, ...) If context.logger then ((Logger_Proc)context.logger)(level, tprint(__VA_ARGS__))
|
||||
#define LOG_BASE(level, ...) if (context.logger) ((Logger_Proc)context.logger)(level, tprint(__VA_ARGS__))
|
||||
|
||||
|
||||
#define log_verbose(...) LOG_BASE(LOG_VERBOSE, __VA_ARGS__)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// Custom allocators for lodepng
|
||||
Allocator get_heap_allocator();
|
||||
Allocator lodepng_allocator = {0};
|
||||
/*Allocator lodepng_allocator = {0};
|
||||
void* lodepng_malloc(size_t size) {
|
||||
#ifdef LODEPNG_MAX_ALLOC
|
||||
if(size > LODEPNG_MAX_ALLOC) return 0;
|
||||
|
@ -28,4 +28,47 @@ void lodepng_free(void* ptr) {
|
|||
#define LODEPNG_NO_COMPILE_ENCODER
|
||||
// One day I might write my own png decoder so we don't even need this
|
||||
#include "third_party/lodepng.h"
|
||||
#include "third_party/lodepng.c"
|
||||
#include "third_party/lodepng.c"*/
|
||||
|
||||
#define STB_TRUETYPE_IMPLEMENTATION
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
|
||||
typedef unsigned char u8;
|
||||
typedef signed char s8;
|
||||
typedef unsigned short u16;
|
||||
typedef signed short s16;
|
||||
typedef unsigned int u32;
|
||||
typedef signed int s32;
|
||||
|
||||
void *stbtt_malloc(size_t size) {
|
||||
if (!size) return 0;
|
||||
return alloc(get_heap_allocator(), size);
|
||||
}
|
||||
#define STBTT_malloc(x,u) ((void)(u),stbtt_malloc(x))
|
||||
void stbtt_free(void *p) {
|
||||
if (!p) return;
|
||||
dealloc(get_heap_allocator(), p);
|
||||
}
|
||||
#define STBTT_free(x,u) ((void)(u),stbtt_free(x))
|
||||
|
||||
#define STBTT_assert(x) assert(x)
|
||||
|
||||
size_t stbtt_strlen(const char* str) {
|
||||
size_t count = 0;
|
||||
while (str[count] != 0) count += 1;
|
||||
return count;
|
||||
}
|
||||
#define STBTT_strlen(x) stbtt_strlen(x)
|
||||
#define STBTT_memcpy memcpy
|
||||
#define STBTT_memset memset
|
||||
|
||||
|
||||
#define STBI_NO_STDIO
|
||||
#define STBI_ASSERT(x) {if (!(x)) *(volatile char*)0 = 0;}
|
||||
|
||||
#define STBI_MALLOC(sz) stbtt_malloc(sz)
|
||||
#define STBI_REALLOC(p,newsz) get_heap_allocator().proc(newsz, p, ALLOCATOR_REALLOCATE, 0)
|
||||
#define STBI_FREE(p) stbtt_free(p)
|
||||
|
||||
#include "third_party/stb_image.h"
|
||||
#include "third_party/stb_truetype.h"
|
7988
oogabooga/third_party/stb_image.h
vendored
Normal file
7988
oogabooga/third_party/stb_image.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
5080
oogabooga/third_party/stb_truetype.h
vendored
Normal file
5080
oogabooga/third_party/stb_truetype.h
vendored
Normal file
File diff suppressed because it is too large
Load diff
Reference in a new issue