- Replace lodepng with stb_image (& add stb_truetype for fonts)

- Fix d3d11 Input assembler not being created correctly bug
- Fix framerate being locked by swap chain present
- Move enable_vsync to window
- sqrt & rsqrt simd
- Add release build & run in vscode tasks & launch
- Cleanup
This commit is contained in:
Charlie 2024-07-04 20:56:27 +02:00
parent 4c5f882999
commit 05919248eb
26 changed files with 13557 additions and 305 deletions

4
.gitignore vendored
View file

@ -54,4 +54,6 @@ test_doc.vkn
*keybinds *keybinds
*.rdi *.rdi
google_trace.json google_trace.json
build/*

16
.vscode/launch.json vendored
View file

@ -2,7 +2,7 @@
"version": "0.2.0", "version": "0.2.0",
"configurations": [ "configurations": [
{ {
"name": "Launch with MSVC Debugger", "name": "Launch Debug with MSVC Debugger",
"type": "cppvsdbg", "type": "cppvsdbg",
"request": "launch", "request": "launch",
"program": "${workspaceFolder}/build/cgame.exe", // Run the output executable after compile "program": "${workspaceFolder}/build/cgame.exe", // Run the output executable after compile
@ -11,7 +11,19 @@
"cwd": "${workspaceFolder}", "cwd": "${workspaceFolder}",
"environment": [], "environment": [],
"console":"integratedTerminal", "console":"integratedTerminal",
// "preLaunchTask": "Compile" "preLaunchTask": "Compile"
},
{
"name": "Launch Release with MSVC Debugger",
"type": "cppvsdbg",
"request": "launch",
"program": "${workspaceFolder}/build/release/cgame.exe", // Run the output executable after compile
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
"environment": [],
"console":"integratedTerminal",
"preLaunchTask": "Compile Release"
} }
] ]
} }

17
.vscode/tasks.json vendored
View file

@ -16,6 +16,21 @@
// "close": false, // "close": false,
// "showReuseMessage": true, // "showReuseMessage": true,
} }
} },
{
"label": "Compile Release",
"type": "shell",
"command": "${workspaceFolder}\\build_release",
"group": {
"kind": "build"
},
"problemMatcher": ["$gcc"],
"presentation": {
"clear": true,
// "revealProblems": "onProblem",
// "close": false,
// "showReuseMessage": true,
}
}
] ]
} }

View file

@ -6,6 +6,6 @@ mkdir build
pushd build pushd build
clang -g -o cgame.exe ../build.c -O0 -std=c11 -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -lgdi32 -luser32 -lwinmm -ld3d11 -ldxguid -ld3dcompiler -lshlwapi -msse4.1 clang -g -o cgame.exe ../build.c -O0 -std=c11 -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -Wno-deprecated-declarations -lkernel32 -lgdi32 -luser32 -lwinmm -ld3d11 -ldxguid -ld3dcompiler -lshlwapi
popd popd

View file

@ -3,7 +3,7 @@
/// ///
// Build config stuff // Build config stuff
#define RUN_TESTS 1 #define RUN_TESTS 0
// This is only for people developing oogabooga! // This is only for people developing oogabooga!
#define OOGABOOGA_DEV 1 #define OOGABOOGA_DEV 1
@ -13,6 +13,7 @@
// ENABLE_SIMD Requires CPU to support at least SSE1 but I will be very surprised if you find a system today which doesn't // ENABLE_SIMD Requires CPU to support at least SSE1 but I will be very surprised if you find a system today which doesn't
#define ENABLE_SIMD 1 #define ENABLE_SIMD 1
#define INITIAL_PROGRAM_MEMORY_SIZE MB(5) #define INITIAL_PROGRAM_MEMORY_SIZE MB(5)
typedef struct Context_Extra { typedef struct Context_Extra {
@ -21,8 +22,6 @@ typedef struct Context_Extra {
// This needs to be defined before oogabooga if we want extra stuff in context // This needs to be defined before oogabooga if we want extra stuff in context
#define CONTEXT_EXTRA Context_Extra #define CONTEXT_EXTRA Context_Extra
#define GFX_RENDERER GFX_RENDERER_D3D11
// This defaults to "entry", but we can set it to anything (except "main" or other existing proc names" // This defaults to "entry", but we can set it to anything (except "main" or other existing proc names"
#define ENTRY_PROC entry #define ENTRY_PROC entry
@ -38,13 +37,13 @@ typedef struct Context_Extra {
// //
// this is a minimal starting point for new projects. Copy & rename to get started // this is a minimal starting point for new projects. Copy & rename to get started
#include "oogabooga/examples/minimal_game_loop.c" // #include "oogabooga/examples/minimal_game_loop.c"
// An engine dev stress test for rendering // An engine dev stress test for rendering
// #include "oogabooga/examples/renderer_stress_test.c" // #include "oogabooga/examples/renderer_stress_test.c"
// Randy's example game that he's building out as a tutorial for using the engine // Randy's example game that he's building out as a tutorial for using the engine
// #include "entry_randygame.c" #include "entry_randygame.c"
// This is where you swap in your own project! // This is where you swap in your own project!
// #include "entry_yourepicgamename.c" // #include "entry_yourepicgamename.c"

View file

@ -1,14 +1,18 @@
@echo off @echo off
rmdir /S /Q build if exist build/dissassembly (
mkdir build rmdir /s /q build
)
if not exist build (
mkdir build
)
pushd build pushd build
mkdir release mkdir dissassembly
pushd release pushd dissassembly
clang -o cgame.asm ../../build.c -Ofast -std=c11 -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -ffast-math -funroll-loops -finline-functions -fvectorize -fslp-vectorize -fomit-frame-pointer -fno-exceptions -fno-rtti -S -masm=intel clang -o cgame.asm ../../build.c -Ofast -std=c11 -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -Wno-deprecated-declarations -finline-functions -ffast-math -fno-math-errno -funsafe-math-optimizations -freciprocal-math -ffinite-math-only -fassociative-math -fno-signed-zeros -fno-trapping-math -ftree-vectorize -fomit-frame-pointer -funroll-loops -fno-rtti -fno-exceptions -S -masm=intel
popd popd
popd popd

View file

@ -1,5 +1,7 @@
@echo off @echo off
rmdir /S /Q build if exist build (
rmdir /s /q build
)
mkdir build mkdir build
pushd build pushd build
@ -7,7 +9,7 @@ pushd build
mkdir release mkdir release
pushd release pushd release
clang -o cgame.exe ../../build.c -Ofast -std=c11 -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -lgdi32 -luser32 -lwinmm -ld3d11 -ldxguid -ld3dcompiler -lshlwapi -finline-functions -ffast-math -fno-math-errno -funsafe-math-optimizations -freciprocal-math -ffinite-math-only -fassociative-math -fno-signed-zeros -fno-trapping-math -ftree-vectorize -fomit-frame-pointer -funroll-loops -fno-rtti -fno-exceptions -msse4.1 clang -o cgame.exe ../../build.c -Ofast -DNDEBUG -std=c11 -Wextra -Wno-incompatible-library-redeclaration -Wno-sign-compare -Wno-unused-parameter -Wno-builtin-requires-header -Wno-deprecated-declarations -lgdi32 -luser32 -lwinmm -ld3d11 -ldxguid -ld3dcompiler -lshlwapi -finline-functions -ffast-math -fno-math-errno -funsafe-math-optimizations -freciprocal-math -ffinite-math-only -fassociative-math -fno-signed-zeros -fno-trapping-math -ftree-vectorize -fomit-frame-pointer -funroll-loops -fno-rtti -fno-exceptions
popd popd
popd popd

View file

@ -10,37 +10,22 @@
#define local_persist static #define local_persist static
#define forward_global extern #define forward_global extern
// Haters gonna hate
#define If if (
#define then )
// If cond then {}
#ifdef _MSC_VER
inline void os_break() {
__debugbreak();
volatile int *a = 0;
*a = 5;
}
#else
#error "Only msvc compiler supported at the moment";
#endif
void printf(const char* fmt, ...); void printf(const char* fmt, ...);
#define ASSERT_STR_HELPER(x) #x #define ASSERT_STR_HELPER(x) #x
#define ASSERT_STR(x) ASSERT_STR_HELPER(x) #define ASSERT_STR(x) ASSERT_STR_HELPER(x)
#define assert_line(line, cond, ...) if(!(cond)) { printf("Assertion failed in file " __FILE__ " on line " ASSERT_STR(line) "\nFailed Condition: " #cond ". Message: " __VA_ARGS__); os_break(); } #define assert_line(line, cond, ...) {if(!(cond)) { printf("Assertion failed in file " __FILE__ " on line " ASSERT_STR(line) "\nFailed Condition: " #cond ". Message: " __VA_ARGS__); crash(); }}
#define assert(cond, ...) assert_line(__LINE__, cond, __VA_ARGS__); #define assert(cond, ...) {assert_line(__LINE__, cond, __VA_ARGS__)}
#define DEFER(start, end) for(int _i_ = ((start), 0); _i_ == 0; _i_ += 1, (end)) #define DEFER(start, end) for(int _i_ = ((start), 0); _i_ == 0; _i_ += 1, (end))
#if CONFIGURATION == RELEASE #if CONFIGURATION == RELEASE
#undef assert #undef assert
#define assert(...) #define assert(...) (void)0;
#endif #endif
#define panic(...) { print(__VA_ARGS__); os_break(); } #define panic(...) { print(__VA_ARGS__); crash(); }
#define cast(t) (t) #define cast(t) (t)
@ -48,7 +33,6 @@ void printf(const char* fmt, ...);
#define FIRST_ARG(arg1, ...) arg1 #define FIRST_ARG(arg1, ...) arg1
#define SECOND_ARG(arg1, arg2, ...) arg2 #define SECOND_ARG(arg1, arg2, ...) arg2
#define print(...) _Generic((FIRST_ARG(__VA_ARGS__)), \ #define print(...) _Generic((FIRST_ARG(__VA_ARGS__)), \

View file

@ -29,6 +29,11 @@ typedef struct Cpu_Capabilities {
#define inline __forceinline #define inline __forceinline
#define alignat(x) __declspec(align(x)) #define alignat(x) __declspec(align(x))
#define COMPILER_HAS_MEMCPY_INTRINSICS 1 #define COMPILER_HAS_MEMCPY_INTRINSICS 1
inline void crash() {
__debugbreak();
volatile int *a = 0;
*a = 5;
}
#include <intrin.h> #include <intrin.h>
#pragma intrinsic(__rdtsc) #pragma intrinsic(__rdtsc)
inline u64 rdtsc() { inline u64 rdtsc() {
@ -66,6 +71,11 @@ typedef struct Cpu_Capabilities {
#define inline __attribute__((always_inline)) inline #define inline __attribute__((always_inline)) inline
#define alignat(x) __attribute__((aligned(x))) #define alignat(x) __attribute__((aligned(x)))
#define COMPILER_HAS_MEMCPY_INTRINSICS 1 #define COMPILER_HAS_MEMCPY_INTRINSICS 1
inline void crash() {
__builtin_trap();
volatile int *a = 0;
*a = 5;
}
inline u64 rdtsc() { inline u64 rdtsc() {
unsigned int lo, hi; unsigned int lo, hi;
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi)); __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
@ -119,7 +129,6 @@ typedef struct Cpu_Capabilities {
#warning "Compiler is not explicitly supported, some things will probably not work as expected" #warning "Compiler is not explicitly supported, some things will probably not work as expected"
#endif #endif
Cpu_Capabilities query_cpu_capabilities() { Cpu_Capabilities query_cpu_capabilities() {
Cpu_Capabilities result = {0}; Cpu_Capabilities result = {0};

View file

@ -3,10 +3,10 @@
struct VS_INPUT struct VS_INPUT
{ {
float4 position : POSITION;
float2 uv : TEXCOORD; float2 uv : TEXCOORD;
float4 color : COLOR; float4 color : COLOR;
int texture_index: TEXTURE_INDEX; int texture_index: TEXTURE_INDEX;
float4 position : POSITION;
}; };
struct PS_INPUT struct PS_INPUT
@ -79,8 +79,8 @@ float4 ps_main(PS_INPUT input) : SV_TARGET
*/ */
const u8 IMAGE_SHADER_VERTEX_BLOB_BYTES[]= { const u8 IMAGE_SHADER_VERTEX_BLOB_BYTES[]= {
0x44, 0x58, 0x42, 0x43, 0xdd, 0x02, 0x55, 0xb0, 0x7b, 0x83, 0x6c, 0x34, 0x45, 0xe8, 0x51, 0xd4, 0x44, 0x58, 0x42, 0x43, 0xf4, 0xea, 0x50, 0x9f, 0xcf, 0xeb, 0x01, 0x7b, 0x78, 0x58, 0xd5, 0x6b,
0x76, 0xbf, 0x66, 0x77, 0x01, 0x00, 0x00, 0x00, 0x3c, 0x03, 0x00, 0x00, 0x05, 0x00, 0x00, 0x4f, 0x9f, 0xc1, 0xe2, 0x01, 0x00, 0x00, 0x00, 0x3c, 0x03, 0x00, 0x00, 0x05, 0x00, 0x00,
0x00, 0x34, 0x00, 0x00, 0x00, 0xa0, 0x00, 0x00, 0x00, 0x38, 0x01, 0x00, 0x00, 0xd4, 0x01, 0x00, 0x34, 0x00, 0x00, 0x00, 0xa0, 0x00, 0x00, 0x00, 0x38, 0x01, 0x00, 0x00, 0xd4, 0x01,
0x00, 0x00, 0xa0, 0x02, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa0, 0x02, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x64, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
@ -92,14 +92,14 @@ const u8 IMAGE_SHADER_VERTEX_BLOB_BYTES[]= {
0x6c, 0x65, 0x72, 0x20, 0x31, 0x30, 0x2e, 0x31, 0x00, 0x49, 0x53, 0x47, 0x4e, 0x90, 0x00, 0x6c, 0x65, 0x72, 0x20, 0x31, 0x30, 0x2e, 0x31, 0x00, 0x49, 0x53, 0x47, 0x4e, 0x90, 0x00,
0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x03, 0x03, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0f, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x0f, 0x00, 0x00, 0x77, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x03, 0x00, 0x00, 0x7a, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02,
0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0f, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0f, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00,
0x00, 0x54, 0x45, 0x58, 0x43, 0x4f, 0x4f, 0x52, 0x44, 0x00, 0x43, 0x4f, 0x4c, 0x4f, 0x52, 0x00, 0x50, 0x4f, 0x53, 0x49, 0x54, 0x49, 0x4f, 0x4e, 0x00, 0x54, 0x45, 0x58, 0x43, 0x4f,
0x00, 0x54, 0x45, 0x58, 0x54, 0x55, 0x52, 0x45, 0x5f, 0x49, 0x4e, 0x44, 0x45, 0x58, 0x00, 0x4f, 0x52, 0x44, 0x00, 0x43, 0x4f, 0x4c, 0x4f, 0x52, 0x00, 0x54, 0x45, 0x58, 0x54, 0x55,
0x50, 0x4f, 0x53, 0x49, 0x54, 0x49, 0x4f, 0x4e, 0x00, 0xab, 0xab, 0x4f, 0x53, 0x47, 0x4e, 0x52, 0x45, 0x5f, 0x49, 0x4e, 0x44, 0x45, 0x58, 0x00, 0xab, 0xab, 0x4f, 0x53, 0x47, 0x4e,
0x94, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@ -111,19 +111,19 @@ const u8 IMAGE_SHADER_VERTEX_BLOB_BYTES[]= {
0x54, 0x45, 0x58, 0x43, 0x4f, 0x4f, 0x52, 0x44, 0x00, 0x43, 0x4f, 0x4c, 0x4f, 0x52, 0x00, 0x54, 0x45, 0x58, 0x43, 0x4f, 0x4f, 0x52, 0x44, 0x00, 0x43, 0x4f, 0x4c, 0x4f, 0x52, 0x00,
0x54, 0x45, 0x58, 0x54, 0x55, 0x52, 0x45, 0x5f, 0x49, 0x4e, 0x44, 0x45, 0x58, 0x00, 0xab, 0x54, 0x45, 0x58, 0x54, 0x55, 0x52, 0x45, 0x5f, 0x49, 0x4e, 0x44, 0x45, 0x58, 0x00, 0xab,
0xab, 0xab, 0x53, 0x48, 0x45, 0x58, 0xc4, 0x00, 0x00, 0x00, 0x50, 0x00, 0x01, 0x00, 0x31, 0xab, 0xab, 0x53, 0x48, 0x45, 0x58, 0xc4, 0x00, 0x00, 0x00, 0x50, 0x00, 0x01, 0x00, 0x31,
0x00, 0x00, 0x00, 0x6a, 0x08, 0x00, 0x01, 0x5f, 0x00, 0x00, 0x03, 0x32, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x6a, 0x08, 0x00, 0x01, 0x5f, 0x00, 0x00, 0x03, 0xf2, 0x10, 0x10, 0x00,
0x00, 0x00, 0x00, 0x00, 0x5f, 0x00, 0x00, 0x03, 0xf2, 0x10, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f, 0x00, 0x00, 0x03, 0x32, 0x10, 0x10, 0x00, 0x01, 0x00, 0x00,
0x00, 0x5f, 0x00, 0x00, 0x03, 0x12, 0x10, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x5f, 0x00, 0x00, 0x5f, 0x00, 0x00, 0x03, 0xf2, 0x10, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x5f, 0x00,
0x00, 0x03, 0xf2, 0x10, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x04, 0xf2, 0x00, 0x03, 0x12, 0x10, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x04, 0xf2,
0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x03, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x03,
0x32, 0x20, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x03, 0xf2, 0x20, 0x10, 0x32, 0x20, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x03, 0xf2, 0x20, 0x10,
0x00, 0x02, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x03, 0x12, 0x20, 0x10, 0x00, 0x03, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x03, 0x12, 0x20, 0x10, 0x00, 0x03, 0x00,
0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0xf2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0xf2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46,
0x1e, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0x32, 0x20, 0x10, 0x00, 0x1e, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0x32, 0x20, 0x10, 0x00,
0x01, 0x00, 0x00, 0x00, 0x46, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x46, 0x10, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00,
0x05, 0xf2, 0x20, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x1e, 0x10, 0x00, 0x01, 0x00, 0x05, 0xf2, 0x20, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x46, 0x1e, 0x10, 0x00, 0x02, 0x00,
0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0x12, 0x20, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0x12, 0x20, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0a,
0x10, 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54, 0x10, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54,
0x94, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,

View file

@ -1,9 +1,9 @@
struct VS_INPUT struct VS_INPUT
{ {
float4 position : POSITION;
float2 uv : TEXCOORD; float2 uv : TEXCOORD;
float4 color : COLOR; float4 color : COLOR;
int texture_index: TEXTURE_INDEX; int texture_index: TEXTURE_INDEX;
float4 position : POSITION;
}; };
struct PS_INPUT struct PS_INPUT

View file

@ -197,65 +197,39 @@ Draw_Quad *draw_image_xform(Gfx_Image *image, Matrix4 xform, Vector2 size, Vecto
#define COLOR_BLACK ((Vector4){0.0, 0.0, 0.0, 1.0}) #define COLOR_BLACK ((Vector4){0.0, 0.0, 0.0, 1.0})
Gfx_Image *load_image_from_disk(string path, Allocator allocator) { Gfx_Image *load_image_from_disk(string path, Allocator allocator) {
string png; string png;
bool ok = os_read_entire_file(path, &png, allocator); bool ok = os_read_entire_file(path, &png, allocator);
if (!ok) return 0; if (!ok) return 0;
Gfx_Image *image = alloc(allocator, sizeof(Gfx_Image)); Gfx_Image *image = alloc(allocator, sizeof(Gfx_Image));
// This is fucking terrible I gotta write my own decoder // Use stb_image to load and decode the PNG
int width, height, channels;
lodepng_allocator = allocator; stbi_set_flip_vertically_on_load(1); // stb_image can flip the image on load
unsigned char* stb_data = stbi_load_from_memory(png.data, png.count, &width, &height, &channels, STBI_rgb_alpha);
LodePNGState state;
lodepng_state_init(&state); if (!stb_data) {
u32 error = lodepng_inspect(&image->width, &image->height, &state, png.data, png.count); dealloc(allocator, image);
if (error) { dealloc_string(allocator, png);
return 0; return 0;
}
// 5 lines of code to say "ignore_adler32 = true" (because it's broken and gives me an error)
// I JUST WANT TO LOAD A PNG
LodePNGDecoderSettings decoder;
lodepng_decoder_settings_init(&decoder);
lodepng_decompress_settings_init(&decoder.zlibsettings);
decoder.zlibsettings.ignore_adler32 = true;
state.decoder = decoder;
error = lodepng_decode(&image->data, &image->width, &image->height, &state, png.data, png.count);
lodepng_state_cleanup(&state);
dealloc_string(allocator, png);
if (error) {
return 0;
}
// We need to flip the image
u32 row_bytes = image->width * 4; // #Magicvalue assuming 4 bytes
u8* temp_row = (u8*)alloc(temp, row_bytes);
for (u32 i = 0; i < image->height / 2; i++) {
u8* top_row = image->data + i * row_bytes;
u8* bottom_row = image->data + (image->height - i - 1) * row_bytes;
// Swap the top row with the bottom row
memcpy(temp_row, top_row, row_bytes);
memcpy(top_row, bottom_row, row_bytes);
memcpy(bottom_row, temp_row, row_bytes);
} }
image->gfx_handle = GFX_INVALID_HANDLE; // This is handled in gfx image->data = stb_data;
image->width = width;
image->allocator = allocator; image->height = height;
image->gfx_handle = GFX_INVALID_HANDLE; // This is handled in gfx
return image; image->allocator = allocator;
dealloc_string(allocator, png);
return image;
} }
void delete_image(Gfx_Image *image) { void delete_image(Gfx_Image *image) {
dealloc(image->allocator, image->data); stbi_image_free(image->data); // Free the image data allocated by stb_image
image->width = 0; image->width = 0;
image->height = 0; image->height = 0;
draw_frame.garbage_stack[draw_frame.garbage_stack_count] = image->gfx_handle; draw_frame.garbage_stack[draw_frame.garbage_stack_count] = image->gfx_handle;
draw_frame.garbage_stack_count += 1; draw_frame.garbage_stack_count += 1;
dealloc(image->allocator, image); dealloc(image->allocator, image);
} }

View file

@ -15,8 +15,6 @@ int entry(int argc, char **argv) {
Gfx_Image *hammer_image = load_image_from_disk(STR("oogabooga/examples/hammer.png"), get_heap_allocator()); Gfx_Image *hammer_image = load_image_from_disk(STR("oogabooga/examples/hammer.png"), get_heap_allocator());
assert(hammer_image, "Failed loading hammer.png"); assert(hammer_image, "Failed loading hammer.png");
Gfx_Font *font = load_font_From_disk(
seed_for_random = os_get_current_cycle_count(); seed_for_random = os_get_current_cycle_count();
const float64 fps_limit = 69000; const float64 fps_limit = 69000;
@ -36,7 +34,9 @@ int entry(int argc, char **argv) {
delta = now - last_time; delta = now - last_time;
} }
last_time = now; last_time = now;
os_update(); tm_scope_cycles("os_update") {
os_update();
}
if (is_key_just_released(KEY_ESCAPE)) { if (is_key_just_released(KEY_ESCAPE)) {
window.should_close = true; window.should_close = true;
@ -102,11 +102,10 @@ int entry(int argc, char **argv) {
draw_image(bush_image, v2(0.65, 0.65), v2(0.2*sin(now), 0.2*sin(now)), COLOR_WHITE); draw_image(bush_image, v2(0.65, 0.65), v2(0.2*sin(now), 0.2*sin(now)), COLOR_WHITE);
draw_frame.font = STR(""); tm_scope_cycles("gfx_update") {
gfx_update();
}
draw_text();
gfx_update();
if (is_key_just_released('E')) { if (is_key_just_released('E')) {
log("FPS: %.2f", 1.0 / delta); log("FPS: %.2f", 1.0 / delta);

View file

@ -13,10 +13,10 @@ const Gfx_Handle GFX_INVALID_HANDLE = 0;
string temp_win32_null_terminated_wide_to_fixed_utf8(const u16 *utf16); string temp_win32_null_terminated_wide_to_fixed_utf8(const u16 *utf16);
typedef struct D3D11_Vertex { typedef struct alignat(16) D3D11_Vertex {
Vector4 color;
Vector4 position; Vector4 position;
Vector2 uv; Vector2 uv;
Vector4 color;
int texture_index; int texture_index;
} D3D11_Vertex; } D3D11_Vertex;
@ -81,14 +81,19 @@ void CALLBACK d3d11_debug_callback(D3D11_MESSAGE_CATEGORY category, D3D11_MESSAG
case D3D11_MESSAGE_SEVERITY_CORRUPTION: case D3D11_MESSAGE_SEVERITY_CORRUPTION:
case D3D11_MESSAGE_SEVERITY_ERROR: case D3D11_MESSAGE_SEVERITY_ERROR:
log_error(msg); log_error(msg);
break;
case D3D11_MESSAGE_SEVERITY_WARNING: case D3D11_MESSAGE_SEVERITY_WARNING:
log_warning(msg); log_warning(msg);
break;
case D3D11_MESSAGE_SEVERITY_INFO: case D3D11_MESSAGE_SEVERITY_INFO:
log_info(msg); log_info(msg);
break;
case D3D11_MESSAGE_SEVERITY_MESSAGE: case D3D11_MESSAGE_SEVERITY_MESSAGE:
log_verbose(msg); log_verbose(msg);
break;
default: default:
log("Ligma"); log("Ligma");
break;
} }
} }
@ -127,7 +132,8 @@ void d3d11_update_swapchain() {
if (create) { if (create) {
DXGI_SWAP_CHAIN_DESC1 scd = ZERO(DXGI_SWAP_CHAIN_DESC1); DXGI_SWAP_CHAIN_DESC1 scd = ZERO(DXGI_SWAP_CHAIN_DESC1);
scd.Format = DXGI_FORMAT_R8G8B8A8_UNORM; scd.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
//scd.BufferDesc.RefreshRate.Numerator = xx st.refresh_rate; scd.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
//scd.BufferDesc.RefreshRate.Numerator = 0;
//scd.BufferDesc.RefreshRate.Denominator = 1; //scd.BufferDesc.RefreshRate.Denominator = 1;
scd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; scd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
@ -137,23 +143,23 @@ void d3d11_update_swapchain() {
scd.Scaling = DXGI_SCALING_STRETCH; // for compatability with 7 scd.Scaling = DXGI_SCALING_STRETCH; // for compatability with 7
} }
// Windows 10 allows to use DXGI_SWAP_EFFECT_FLIP_DISCARD // Windows 10 allows to use DXGI_SWAP_EFFECT_FLIP_DISCARD
// for Windows 8 compatibility use DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL // for Windows 8 compatibility use DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL
// for Windows 7 compatibility use DXGI_SWAP_EFFECT_DISCARD // for Windows 7 compatibility use DXGI_SWAP_EFFECT_DISCARD
if (d3d11_feature_level >= D3D_FEATURE_LEVEL_11_0) { if (d3d11_feature_level >= D3D_FEATURE_LEVEL_11_0) {
// this is supported only on FLIP presentation model // this is supported only on FLIP presentation model
scd.Scaling = DXGI_SCALING_NONE; scd.Scaling = DXGI_SCALING_NONE;
scd.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; scd.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
scd.BufferCount = 3; scd.BufferCount = 3;
gfx._can_vsync = false;
log_verbose("Present mode is flip discard, 3 buffers"); log_verbose("Present mode is flip discard, 3 buffers");
} else { } else {
scd.SwapEffect = DXGI_SWAP_EFFECT_DISCARD; scd.SwapEffect = DXGI_SWAP_EFFECT_DISCARD;
scd.BufferCount = 2; scd.BufferCount = 2;
gfx._can_vsync = true;
log_verbose("Present mode is discard, 2 buffers"); log_verbose("Present mode is discard, 2 buffers");
} }
// Obtain DXGI factory from device // Obtain DXGI factory from device
IDXGIDevice *dxgi_device; IDXGIDevice *dxgi_device;
hr = VTABLE(QueryInterface, d3d11_device, &IID_IDXGIDevice, cast(void**)&dxgi_device); hr = VTABLE(QueryInterface, d3d11_device, &IID_IDXGIDevice, cast(void**)&dxgi_device);
@ -224,7 +230,7 @@ void d3d11_update_swapchain() {
void gfx_init() { void gfx_init() {
gfx.enable_vsync = false; window.enable_vsync = false;
log_verbose("d3d11 gfx_init"); log_verbose("d3d11 gfx_init");
@ -426,42 +432,53 @@ void gfx_init() {
log_verbose("Shaders created"); log_verbose("Shaders created");
D3D11_INPUT_ELEMENT_DESC layout[4];
memset(layout, 0, sizeof(layout));
layout[0].SemanticName = "POSITION";
layout[0].SemanticIndex = 0;
layout[0].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
layout[0].InputSlot = 0;
layout[0].AlignedByteOffset = offsetof(D3D11_Vertex, position);
layout[0].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
layout[0].InstanceDataStepRate = 0;
layout[1].SemanticName = "TEXCOORD";
layout[1].SemanticIndex = 0;
layout[1].Format = DXGI_FORMAT_R32G32_FLOAT;
layout[1].InputSlot = 0;
layout[1].AlignedByteOffset = offsetof(D3D11_Vertex, uv);
layout[1].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
layout[1].InstanceDataStepRate = 0;
layout[2].SemanticName = "COLOR";
layout[2].SemanticIndex = 0;
layout[2].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
layout[2].InputSlot = 0;
layout[2].AlignedByteOffset = offsetof(D3D11_Vertex, color);
layout[2].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
layout[2].InstanceDataStepRate = 0;
layout[3].SemanticName = "TEXTURE_INDEX";
layout[3].SemanticIndex = 0;
layout[3].Format = DXGI_FORMAT_R32_SINT;
layout[3].InputSlot = 0;
layout[3].AlignedByteOffset = offsetof(D3D11_Vertex, texture_index);
layout[3].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
layout[3].InstanceDataStepRate = 0;
hr = VTABLE(CreateInputLayout, d3d11_device, layout, 4, vs_buffer, vs_size, &d3d11_image_vertex_layout);
win32_check_hr(hr);
#if OOGABOOGA_DEV #if OOGABOOGA_DEV
D3D11Release(vs_blob); D3D11Release(vs_blob);
D3D11Release(ps_blob); D3D11Release(ps_blob);
#endif #endif
D3D11_INPUT_ELEMENT_DESC layout[4];
memset(layout, 0, sizeof(layout));
layout[0] = (D3D11_INPUT_ELEMENT_DESC){
"POSITION", 0,
DXGI_FORMAT_R32G32B32A32_FLOAT, 0,
offsetof(D3D11_Vertex, position),
D3D11_INPUT_PER_VERTEX_DATA, 0
};
layout[1] = (D3D11_INPUT_ELEMENT_DESC){
"TEXCOORD", 0,
DXGI_FORMAT_R32G32_FLOAT, 0,
offsetof(D3D11_Vertex, uv),
D3D11_INPUT_PER_VERTEX_DATA, 0
};
layout[2] = (D3D11_INPUT_ELEMENT_DESC){
"COLOR", 0,
DXGI_FORMAT_R32G32B32A32_FLOAT, 0,
offsetof(D3D11_Vertex, color),
D3D11_INPUT_PER_VERTEX_DATA, 0
};
layout[3] = (D3D11_INPUT_ELEMENT_DESC){
"TEXTURE_INDEX", 0,
DXGI_FORMAT_R32_SINT, 0,
offsetof(D3D11_Vertex, texture_index),
D3D11_INPUT_PER_VERTEX_DATA, 0
};
hr = VTABLE(CreateInputLayout, d3d11_device, layout, 4, vs_buffer, vs_size, &d3d11_image_vertex_layout);
log_info("D3D11 init done"); log_info("D3D11 init done");
} }
void d3d11_draw_call(int number_of_rendered_quads, ID3D11ShaderResourceView **textures, u64 num_textures) { void d3d11_draw_call(int number_of_rendered_quads, ID3D11ShaderResourceView **textures, u64 num_textures) {
@ -493,7 +510,6 @@ void d3d11_draw_call(int number_of_rendered_quads, ID3D11ShaderResourceView **te
} }
void gfx_update() { void gfx_update() {
if (window.should_close) return; if (window.should_close) return;
VTABLE(ClearRenderTargetView, d3d11_context, d3d11_window_render_target_view, (float*)&window.clear_color); VTABLE(ClearRenderTargetView, d3d11_context, d3d11_window_render_target_view, (float*)&window.clear_color);
@ -501,59 +517,61 @@ void gfx_update() {
HRESULT hr; HRESULT hr;
/// tm_scope_cycles("Frame setup") {
// purge garbage ///
for (u64 i = 0; i < draw_frame.garbage_stack_count; i++) { // purge garbage
ID3D11ShaderResourceView *view = draw_frame.garbage_stack[i]; for (u64 i = 0; i < draw_frame.garbage_stack_count; i++) {
ID3D11Resource *resource = 0; ID3D11ShaderResourceView *view = draw_frame.garbage_stack[i];
VTABLE(GetResource, view, &resource); ID3D11Resource *resource = 0;
VTABLE(GetResource, view, &resource);
ID3D11Texture2D *texture = 0;
hr = VTABLE(QueryInterface, resource, &IID_ID3D11Texture2D, (void**)&texture); ID3D11Texture2D *texture = 0;
if (SUCCEEDED(hr)) { hr = VTABLE(QueryInterface, resource, &IID_ID3D11Texture2D, (void**)&texture);
D3D11Release(view); if (SUCCEEDED(hr)) {
D3D11Release(texture); D3D11Release(view);
log("Destroyed an image"); D3D11Release(texture);
} else { log("Destroyed an image");
panic("Unhandled D3D11 resource deletion"); } else {
panic("Unhandled D3D11 resource deletion");
}
}
///
// Maybe resize swap chain
RECT client_rect;
bool ok = GetClientRect(window._os_handle, &client_rect);
assert(ok, "GetClientRect failed with error code %lu", GetLastError());
u32 window_width = client_rect.right-client_rect.left;
u32 window_height = client_rect.bottom-client_rect.top;
if (window_width != d3d11_swap_chain_width || window_height != d3d11_swap_chain_height) {
d3d11_update_swapchain();
}
///
// Maybe grow quad vbo
u32 required_size = sizeof(D3D11_Vertex) * draw_frame.num_blocks*QUADS_PER_BLOCK*6;
if (required_size > d3d11_quad_vbo_size) {
if (d3d11_quad_vbo) {
D3D11Release(d3d11_quad_vbo);
dealloc(get_heap_allocator(), d3d11_staging_quad_buffer);
}
D3D11_BUFFER_DESC desc = ZERO(D3D11_BUFFER_DESC);
desc.Usage = D3D11_USAGE_DYNAMIC;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
desc.ByteWidth = required_size;
desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
HRESULT hr = VTABLE(CreateBuffer, d3d11_device, &desc, 0, &d3d11_quad_vbo);
assert(SUCCEEDED(hr), "CreateBuffer failed");
d3d11_quad_vbo_size = required_size;
d3d11_staging_quad_buffer = alloc(get_heap_allocator(), d3d11_quad_vbo_size);
assert((u64)d3d11_staging_quad_buffer%16 == 0);
log_verbose("Grew quad vbo to %d bytes.", d3d11_quad_vbo_size);
} }
} }
///
// Maybe resize swap chain
RECT client_rect;
bool ok = GetClientRect(window._os_handle, &client_rect);
assert(ok, "GetClientRect failed with error code %lu", GetLastError());
u32 window_width = client_rect.right-client_rect.left;
u32 window_height = client_rect.bottom-client_rect.top;
if (window_width != d3d11_swap_chain_width || window_height != d3d11_swap_chain_height) {
d3d11_update_swapchain();
}
///
// Maybe grow quad vbo
u32 required_size = sizeof(D3D11_Vertex) * draw_frame.num_blocks*QUADS_PER_BLOCK*6;
if (required_size > d3d11_quad_vbo_size) {
if (d3d11_quad_vbo) {
D3D11Release(d3d11_quad_vbo);
dealloc(get_heap_allocator(), d3d11_staging_quad_buffer);
}
D3D11_BUFFER_DESC desc = ZERO(D3D11_BUFFER_DESC);
desc.Usage = D3D11_USAGE_DYNAMIC;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
desc.ByteWidth = required_size;
desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
HRESULT hr = VTABLE(CreateBuffer, d3d11_device, &desc, 0, &d3d11_quad_vbo);
assert(SUCCEEDED(hr), "CreateBuffer failed");
d3d11_quad_vbo_size = required_size;
d3d11_staging_quad_buffer = alloc(get_heap_allocator(), d3d11_quad_vbo_size);
log_verbose("Grew quad vbo to %d bytes.", d3d11_quad_vbo_size);
}
f64 rest_before = os_get_current_time_in_seconds();
if (draw_frame.num_blocks > 0) { if (draw_frame.num_blocks > 0) {
/// ///
// Render geometry from into vbo quad list // Render geometry from into vbo quad list
@ -569,8 +587,8 @@ void gfx_update() {
Draw_Quad_Block *block = &first_block; Draw_Quad_Block *block = &first_block;
tm_scope_cycles("Quad processing") { tm_scope_cycles("Quad processing") {
while (block != 0 && block->num_quads > 0) tm_scope_cycles("ad2As") { while (block != 0 && block->num_quads > 0) tm_scope_cycles("Quad block") {
for (u64 i = 0; i < block->num_quads; i++) tm_scope_cycles("Single quad") { for (u64 i = 0; i < block->num_quads; i++) {
Draw_Quad *q = &block->quad_buffer[i]; Draw_Quad *q = &block->quad_buffer[i];
@ -620,7 +638,7 @@ void gfx_update() {
if (num_textures >= 32) { if (num_textures >= 32) {
// If max textures reached, make a draw call and start over // If max textures reached, make a draw call and start over
D3D11_MAPPED_SUBRESOURCE buffer_mapping; D3D11_MAPPED_SUBRESOURCE buffer_mapping;
VTABLE(Map, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0, D3D11_MAP_WRITE_NO_OVERWRITE, 0, &buffer_mapping); VTABLE(Map, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0, D3D11_MAP_WRITE_DISCARD, 0, &buffer_mapping);
memcpy(buffer_mapping.pData, d3d11_staging_quad_buffer, number_of_rendered_quads*sizeof(D3D11_Vertex)*6); memcpy(buffer_mapping.pData, d3d11_staging_quad_buffer, number_of_rendered_quads*sizeof(D3D11_Vertex)*6);
VTABLE(Unmap, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0); VTABLE(Unmap, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0);
d3d11_draw_call(number_of_rendered_quads, textures, num_textures); d3d11_draw_call(number_of_rendered_quads, textures, num_textures);
@ -676,30 +694,29 @@ void gfx_update() {
} }
} }
D3D11_MAPPED_SUBRESOURCE buffer_mapping; tm_scope_cycles("Write to gpu") {
VTABLE(Map, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0, D3D11_MAP_WRITE_NO_OVERWRITE, 0, &buffer_mapping); D3D11_MAPPED_SUBRESOURCE buffer_mapping;
memcpy(buffer_mapping.pData, d3d11_staging_quad_buffer, number_of_rendered_quads*sizeof(D3D11_Vertex)*6); tm_scope_cycles("The Map call") {
VTABLE(Unmap, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0); hr = VTABLE(Map, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0, D3D11_MAP_WRITE_DISCARD, 0, &buffer_mapping);
win32_check_hr(hr);
}
tm_scope_cycles("The memcpy") {
memcpy(buffer_mapping.pData, d3d11_staging_quad_buffer, number_of_rendered_quads*sizeof(D3D11_Vertex)*6);
}
tm_scope_cycles("The Unmap call") {
VTABLE(Unmap, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0);
}
}
/// ///
// Draw call // Draw call
tm_scope_cycles("Draw call") d3d11_draw_call(number_of_rendered_quads, textures, num_textures);
u64 before_draw = os_get_current_cycle_count();
d3d11_draw_call(number_of_rendered_quads, textures, num_textures);
u64 after_draw = os_get_current_cycle_count();
//log("Draw call took %llu cycles", after_draw-before_draw);
} }
f64 rest_after = os_get_current_time_in_seconds(); tm_scope_cycles("Present") {
if (is_key_just_pressed('E')) hr = VTABLE(Present, d3d11_swap_chain, window.enable_vsync, window.enable_vsync ? 0 : DXGI_PRESENT_ALLOW_TEARING);
log("The rest took %.2fms", (rest_after-rest_before)*1000.0); win32_check_hr(hr);
}
f64 before_present = os_get_current_time_in_seconds();
hr = VTABLE(Present, d3d11_swap_chain, gfx._can_vsync && gfx.enable_vsync, 0);
f64 after = os_get_current_time_in_seconds();
if (is_key_just_pressed('E'))
log("Present took %.2fms", (after-before_present)*1000.0);
win32_check_hr(hr);
#if CONFIGURATION == DEBUG #if CONFIGURATION == DEBUG
/// ///

View file

@ -15,18 +15,6 @@
#error "Unknown renderer GFX_RENDERER defined" #error "Unknown renderer GFX_RENDERER defined"
#endif #endif
typedef struct Gfx_State {
// config
bool enable_vsync;
// readonly
bool _can_vsync;
} Gfx_State;
Gfx_State gfx;
forward_global const Gfx_Handle GFX_INVALID_HANDLE; forward_global const Gfx_Handle GFX_INVALID_HANDLE;
typedef struct Gfx_Image { typedef struct Gfx_Image {

View file

@ -133,7 +133,7 @@ inline float v3_dot_product(Vector3 a, Vector3 b) {
return simd_dot_product_float32_96((float*)&a, (float*)&b); return simd_dot_product_float32_96((float*)&a, (float*)&b);
} }
inline float v4_dot_product(Vector4 a, Vector4 b) { inline float v4_dot_product(Vector4 a, Vector4 b) {
return simd_dot_product_float32_128((float*)&a, (float*)&b); return simd_dot_product_float32_128_aligned((float*)&a, (float*)&b);
} }
Vector2 v2_rotate_point_around_pivot(Vector2 point, Vector2 pivot, float32 rotation_radians) { Vector2 v2_rotate_point_around_pivot(Vector2 point, Vector2 pivot, float32 rotation_radians) {

View file

@ -24,7 +24,7 @@ void* initialization_allocator_proc(u64 size, void *p, Allocator_Message message
if (init_memory_head >= ((u8*)init_memory_arena+INIT_MEMORY_SIZE)) { if (init_memory_head >= ((u8*)init_memory_arena+INIT_MEMORY_SIZE)) {
os_write_string_to_stdout(STR("Out of initialization memory! Please provide more by increasing INIT_MEMORY_SIZE")); os_write_string_to_stdout(STR("Out of initialization memory! Please provide more by increasing INIT_MEMORY_SIZE"));
os_break(); crash();
} }
return p; return p;
break; break;

View file

@ -145,23 +145,22 @@ typedef u8 bool;
#warning "Compiler is not explicitly supported, some things will probably not work as expected" #warning "Compiler is not explicitly supported, some things will probably not work as expected"
#endif #endif
#include "cpu.c"
#define DEBUG 0 #define DEBUG 0
#define VERY_DEBUG 1 #define VERY_DEBUG 1
#define RELEASE 2 #define RELEASE 2
#if !defined(CONFIGURATION) #if defined(NDEBUG)
#define CONFIGURATION RELEASE
#if defined(NDEBUG) #else
#define CONFIGURATION RELEASE #define CONFIGURATION DEBUG
#else
#define CONFIGURATION DEBUG
#endif
#endif #endif
#include "cpu.c"
#ifndef ENTRY_PROC #ifndef ENTRY_PROC
#define ENTRY_PROC entry #define ENTRY_PROC entry
#endif #endif

View file

@ -135,6 +135,8 @@ LRESULT CALLBACK win32_window_proc(HWND passed_window, UINT message, WPARAM wpar
void os_init(u64 program_memory_size) { void os_init(u64 program_memory_size) {
memset(&window, 0, sizeof(window));
timeBeginPeriod(1); timeBeginPeriod(1);
#if CONFIGURATION == RELEASE #if CONFIGURATION == RELEASE
SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS); SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS);

View file

@ -61,36 +61,6 @@ inline int crt_vprintf(const char* fmt, va_list args) {
return os.crt_vprintf(fmt, args); return os.crt_vprintf(fmt, args);
} }
#if !defined(COMPILER_HAS_MEMCPY_INTRINSICS) || CONFIGURATION == DEBUG
inline void* naive_memcpy(void* dest, const void* source, size_t size) {
for (u64 i = 0; i < (u64)size; i++) ((u8*)dest)[i] = ((u8*)source)[i];
return dest;
}
inline void* memcpy(void* dest, const void* source, size_t size) {
if (!os.crt_memcpy) return naive_memcpy(dest, source, size);
return os.crt_memcpy(dest, source, size);
}
inline int naive_memcmp(const void* a, const void* b, size_t amount) {
// I don't understand the return value of memcmp but I also dont care
for (u64 i = 0; i < (u64)amount; i++) {
if (((u8*)a)[i] != ((u8*)b)[i]) return -1;
}
return 0;
}
inline int memcmp(const void* a, const void* b, size_t amount) {
if (!os.crt_memcmp) return naive_memcmp(a, b, amount);
return os.crt_memcmp(a, b, amount);
}
inline void* naive_memset(void* dest, int value, size_t amount) {
for (u64 i = 0; i < (u64)amount; i++) ((u8*)dest)[i] = (u8)value;
return dest;
}
inline void* memset(void* dest, int value, size_t amount) {
if (!os.crt_memset) return naive_memset(dest, value, amount);
return os.crt_memset(dest, value, amount);
}
#endif
inline bool bytes_match(void *a, void *b, u64 count) { return memcmp(a, b, count) == 0; } inline bool bytes_match(void *a, void *b, u64 count) { return memcmp(a, b, count) == 0; }
inline int vsnprintf(char* buffer, size_t n, const char* fmt, va_list args) { inline int vsnprintf(char* buffer, size_t n, const char* fmt, va_list args) {
@ -333,6 +303,7 @@ typedef struct Os_Window {
u32 x; u32 x;
u32 y; u32 y;
Vector4 clear_color; Vector4 clear_color;
bool enable_vsync;
bool should_close; bool should_close;

View file

@ -30,6 +30,16 @@ inline void basic_mul_int32_512(s32 *a, s32 *b, s32* result);
inline float basic_dot_product_float32_64(float *a, float *b); inline float basic_dot_product_float32_64(float *a, float *b);
inline float basic_dot_product_float32_96(float *a, float *b); inline float basic_dot_product_float32_96(float *a, float *b);
inline float basic_dot_product_float32_128(float *a, float *b); inline float basic_dot_product_float32_128(float *a, float *b);
inline void basic_sqrt_float32_64(float *a, float *result);
inline void basic_sqrt_float32_96(float *a, float *result);
inline void basic_sqrt_float32_128(float *a, float *result);
inline void basic_sqrt_float32_256(float *a, float *result);
inline void basic_sqrt_float32_512(float *a, float *result);
inline void basic_rsqrt_float32_64(float *a, float *result);
inline void basic_rsqrt_float32_96(float *a, float *result);
inline void basic_rsqrt_float32_128(float *a, float *result);
inline void basic_rsqrt_float32_256(float *a, float *result);
inline void basic_rsqrt_float32_512(float *a, float *result);
@ -123,6 +133,52 @@ inline void simd_div_float32_128_aligned(float *a, float *b, float* result) {
__m128 vr = _mm_div_ps(va, vb); __m128 vr = _mm_div_ps(va, vb);
_mm_store_ps(result, vr); _mm_store_ps(result, vr);
} }
inline void simd_sqrt_float32_96(float *a, float *result) {
__m128 va = _mm_loadu_ps(a);
va = _mm_and_ps(va, _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1))); // Mask last element
__m128 vr = _mm_sqrt_ps(va);
_mm_storeu_ps(result, vr);
}
inline void simd_rsqrt_float32_96(float *a, float *result) {
__m128 va = _mm_loadu_ps(a);
va = _mm_and_ps(va, _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1))); // Mask last element
__m128 vr = _mm_rsqrt_ps(va);
_mm_storeu_ps(result, vr);
}
inline void simd_sqrt_float32_64(float *a, float *result) {
__m128 va = _mm_loadl_pi(_mm_setzero_ps(), (__m64*)a);
__m128 vr = _mm_sqrt_ps(va);
_mm_storel_pi((__m64*)result, vr);
}
inline void simd_rsqrt_float32_64(float *a, float *result) {
__m128 va = _mm_loadl_pi(_mm_setzero_ps(), (__m64*)a);
__m128 vr = _mm_rsqrt_ps(va);
_mm_storel_pi((__m64*)result, vr);
}
inline void simd_sqrt_float32_128(float *a, float *result) {
__m128 va = _mm_loadu_ps(a);
__m128 vr = _mm_sqrt_ps(va);
_mm_storeu_ps(result, vr);
}
inline void simd_rsqrt_float32_128(float *a, float *result) {
__m128 va = _mm_loadu_ps(a);
__m128 vr = _mm_rsqrt_ps(va);
_mm_storeu_ps(result, vr);
}
inline void simd_sqrt_float32_128_aligned(float *a, float *result) {
__m128 va = _mm_load_ps(a);
__m128 vr = _mm_sqrt_ps(va);
_mm_store_ps(result, vr);
}
inline void simd_rsqrt_float32_128_aligned(float *a, float *result) {
__m128 va = _mm_load_ps(a);
__m128 vr = _mm_rsqrt_ps(va);
_mm_store_ps(result, vr);
}
#if SIMD_ENABLE_SSE2 #if SIMD_ENABLE_SSE2
@ -191,14 +247,6 @@ inline float simd_dot_product_float32_96(float *a, float *b) {
__m128 dot_product = _mm_dp_ps(vec1, vec2, 0x71); __m128 dot_product = _mm_dp_ps(vec1, vec2, 0x71);
return _mm_cvtss_f32(dot_product); return _mm_cvtss_f32(dot_product);
} }
inline float simd_dot_product_float32_96_aligned(float *a, float *b) {
__m128 vec1 = _mm_load_ps(a);
__m128 vec2 = _mm_load_ps(b);
vec1 = _mm_and_ps(vec1, _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1)));
vec2 = _mm_and_ps(vec2, _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1)));
__m128 dot_product = _mm_dp_ps(vec1, vec2, 0x71);
return _mm_cvtss_f32(dot_product);
}
inline float simd_dot_product_float32_128(float *a, float *b) { inline float simd_dot_product_float32_128(float *a, float *b) {
__m128 vec1 = _mm_loadu_ps(a); __m128 vec1 = _mm_loadu_ps(a);
__m128 vec2 = _mm_loadu_ps(b); __m128 vec2 = _mm_loadu_ps(b);
@ -217,8 +265,6 @@ inline float simd_dot_product_float32_128_aligned(float *a, float *b) {
#define simd_dot_product_float32_64 basic_dot_product_float32_64 #define simd_dot_product_float32_64 basic_dot_product_float32_64
#define simd_dot_product_float32_96 basic_dot_product_float32_96 #define simd_dot_product_float32_96 basic_dot_product_float32_96
#define simd_dot_product_float32_128 basic_dot_product_float32_128 #define simd_dot_product_float32_128 basic_dot_product_float32_128
#define simd_dot_product_float32_64_aligned basic_dot_product_float32_64
#define simd_dot_product_float32_96_aligned basic_dot_product_float32_96
#define simd_dot_product_float32_128_aligned basic_dot_product_float32_128 #define simd_dot_product_float32_128_aligned basic_dot_product_float32_128
#endif // SIMD_ENABLE_SSE41 #endif // SIMD_ENABLE_SSE41
@ -275,16 +321,41 @@ inline void simd_div_float32_256_aligned(float32 *a, float32 *b, float32* result
__m256 vr = _mm256_div_ps(va, vb); __m256 vr = _mm256_div_ps(va, vb);
_mm256_store_ps(result, vr); _mm256_store_ps(result, vr);
} }
inline void simd_sqrt_float32_256(float *a, float *result) {
__m256 va = _mm256_loadu_ps(a);
__m256 vr = _mm256_sqrt_ps(va);
_mm256_storeu_ps(result, vr);
}
inline void simd_rsqrt_float32_256(float *a, float *result) {
__m256 va = _mm256_loadu_ps(a);
__m256 vr = _mm256_rsqrt_ps(va);
_mm256_storeu_ps(result, vr);
}
inline void simd_sqrt_float32_256_aligned(float *a, float *result) {
__m256 va = _mm256_load_ps(a);
__m256 vr = _mm256_sqrt_ps(va);
_mm256_store_ps(result, vr);
}
inline void simd_rsqrt_float32_256_aligned(float *a, float *result) {
__m256 va = _mm256_load_ps(a);
__m256 vr = _mm256_rsqrt_ps(va);
_mm256_store_ps(result, vr);
}
#else #else
#define simd_add_float32_256 basic_add_float32_256 #define simd_add_float32_256 basic_add_float32_256
#define simd_sub_float32_256 basic_sub_float32_256 #define simd_sub_float32_256 basic_sub_float32_256
#define simd_mul_float32_256 basic_mul_float32_256 #define simd_mul_float32_256 basic_mul_float32_256
#define simd_div_float32_256 basic_div_float32_256 #define simd_div_float32_256 basic_div_float32_256
#define simd_sqrt_float32_256 basic_sqrt_float32_256
#define simd_rsqrt_float32_256 basic_rsqrt_float32_256
#define simd_add_float32_256_aligned basic_add_float32_256 #define simd_add_float32_256_aligned basic_add_float32_256
#define simd_sub_float32_256_aligned basic_sub_float32_256 #define simd_sub_float32_256_aligned basic_sub_float32_256
#define simd_mul_float32_256_aligned basic_mul_float32_256 #define simd_mul_float32_256_aligned basic_mul_float32_256
#define simd_div_float32_256_aligned basic_div_float32_256 #define simd_div_float32_256_aligned basic_div_float32_256
#define simd_sqrt_float32_256_aligned basic_sqrt_float32_256
#define simd_rsqrt_float32_256_aligned basic_rsqrt_float32_256
#endif #endif
#if SIMD_ENABLE_AVX2 #if SIMD_ENABLE_AVX2
@ -332,7 +403,6 @@ inline void simd_mul_int32_256_aligned(s32 *a, s32 *b, s32* result) {
#define simd_add_int32_256 basic_add_int32_256 #define simd_add_int32_256 basic_add_int32_256
#define simd_sub_int32_256 basic_sub_int32_256 #define simd_sub_int32_256 basic_sub_int32_256
#define simd_mul_int32_256 basic_mul_int32_256 #define simd_mul_int32_256 basic_mul_int32_256
#define simd_add_int32_256_aligned basic_add_int32_256 #define simd_add_int32_256_aligned basic_add_int32_256
#define simd_sub_int32_256_aligned basic_sub_int32_256 #define simd_sub_int32_256_aligned basic_sub_int32_256
#define simd_mul_int32_256_aligned basic_mul_int32_256 #define simd_mul_int32_256_aligned basic_mul_int32_256
@ -432,6 +502,28 @@ inline void simd_mul_int32_512_aligned(int32 *a, int32 *b, int32* result) {
__m512i vr = _mm512_mullo_epi32(va, vb); __m512i vr = _mm512_mullo_epi32(va, vb);
_mm512_store_si512((__m512i*)result, vr); _mm512_store_si512((__m512i*)result, vr);
} }
inline void simd_sqrt_float32_512(float *a, float *result) {
__m512 va = _mm512_loadu_ps(a);
__m512 vr = _mm512_sqrt_ps(va);
_mm512_storeu_ps(result, vr);
}
inline void simd_rsqrt_float32_512(float *a, float *result) {
__m512 va = _mm512_loadu_ps(a);
__m512 vr = _mm512_rsqrt14_ps(va); // AVX-512 does not have _mm512_rsqrt_ps
_mm512_storeu_ps(result, vr);
}
inline void simd_sqrt_float32_512_aligned(float *a, float *result) {
__m512 va = _mm512_load_ps(a);
__m512 vr = _mm512_sqrt_ps(va);
_mm512_store_ps(result, vr);
}
inline void simd_rsqrt_float32_512_aligned(float *a, float *result) {
__m512 va = _mm512_load_ps(a);
__m512 vr = _mm512_rsqrt14_ps(va);
_mm512_store_ps(result, vr);
}
#else #else
#define simd_add_float32_512 basic_add_float32_512 #define simd_add_float32_512 basic_add_float32_512
#define simd_sub_float32_512 basic_sub_float32_512 #define simd_sub_float32_512 basic_sub_float32_512
@ -440,7 +532,8 @@ inline void simd_mul_int32_512_aligned(int32 *a, int32 *b, int32* result) {
#define simd_add_int32_512 basic_add_int32_512 #define simd_add_int32_512 basic_add_int32_512
#define simd_sub_int32_512 basic_sub_int32_512 #define simd_sub_int32_512 basic_sub_int32_512
#define simd_mul_int32_512 basic_mul_int32_512 #define simd_mul_int32_512 basic_mul_int32_512
#define simd_sqrt_float32_512 basic_sqrt_float32_512
#define simd_rsqrt_float32_512 basic_rsqrt_float32_512
#define simd_add_float32_512_aligned basic_add_float32_512 #define simd_add_float32_512_aligned basic_add_float32_512
#define simd_sub_float32_512_aligned basic_sub_float32_512 #define simd_sub_float32_512_aligned basic_sub_float32_512
#define simd_mul_float32_512_aligned basic_mul_float32_512 #define simd_mul_float32_512_aligned basic_mul_float32_512
@ -448,6 +541,8 @@ inline void simd_mul_int32_512_aligned(int32 *a, int32 *b, int32* result) {
#define simd_add_int32_512_aligned basic_add_int32_512 #define simd_add_int32_512_aligned basic_add_int32_512
#define simd_sub_int32_512_aligned basic_sub_int32_512 #define simd_sub_int32_512_aligned basic_sub_int32_512
#define simd_mul_int32_512_aligned basic_mul_int32_512 #define simd_mul_int32_512_aligned basic_mul_int32_512
#define simd_sqrt_float32_512_aligned basic_sqrt_float32_512
#define simd_rsqrt_float32_512_aligned basic_rsqrt_float32_512
#endif // SIMD_ENABLE_AVX512 #endif // SIMD_ENABLE_AVX512
#else #else
@ -461,10 +556,16 @@ inline void simd_mul_int32_512_aligned(int32 *a, int32 *b, int32* result) {
#define simd_mul_float32_128 basic_mul_float32_128 #define simd_mul_float32_128 basic_mul_float32_128
#define simd_div_float32_64 basic_div_float32_64 #define simd_div_float32_64 basic_div_float32_64
#define simd_div_float32_128 basic_div_float32_128 #define simd_div_float32_128 basic_div_float32_128
#define simd_sqrt_float32_64 basic_sqrt_float32_64
#define simd_sqrt_float32_128 basic_sqrt_float32_128
#define simd_rsqrt_float32_64 basic_rsqrt_float32_64
#define simd_rsqrt_float32_128 basic_rsqrt_float32_128
#define simd_add_float32_128_aligned basic_add_float32_128 #define simd_add_float32_128_aligned basic_add_float32_128
#define simd_sub_float32_128_aligned basic_sub_float32_128 #define simd_sub_float32_128_aligned basic_sub_float32_128
#define simd_mul_float32_128_aligned basic_mul_float32_128 #define simd_mul_float32_128_aligned basic_mul_float32_128
#define simd_div_float32_128_aligned basic_div_float32_128 #define simd_div_float32_128_aligned basic_div_float32_128
#define simd_sqrt_float32_128_aligned basic_sqrt_float32_128
#define simd_rsqrt_float32_128_aligned basic_rsqrt_float32_128
// SSE2 // SSE2
#define simd_add_int32_128 basic_add_int32_128 #define simd_add_int32_128 basic_add_int32_128
@ -475,19 +576,26 @@ inline void simd_mul_int32_512_aligned(int32 *a, int32 *b, int32* result) {
#define simd_mul_int32_128_aligned basic_mul_int32_128 #define simd_mul_int32_128_aligned basic_mul_int32_128
// SSE41 // SSE41
#define simd_mul_int32_128 basic_mul_int32_128
#define simd_mul_int32_128_aligned basic_mul_int32_128
#define simd_dot_product_float32_64 basic_dot_product_float32_64 #define simd_dot_product_float32_64 basic_dot_product_float32_64
#define simd_dot_product_float32_96 basic_dot_product_float32_96 #define simd_dot_product_float32_96 basic_dot_product_float32_96
#define simd_dot_product_float32_128 basic_dot_product_float32_128 #define simd_dot_product_float32_128 basic_dot_product_float32_128
#define simd_dot_product_float32_128_aligned basic_dot_product_float32_128
// AVX // AVX
#define simd_add_float32_256 basic_add_float32_256 #define simd_add_float32_256 basic_add_float32_256
#define simd_sub_float32_256 basic_sub_float32_256 #define simd_sub_float32_256 basic_sub_float32_256
#define simd_mul_float32_256 basic_mul_float32_256 #define simd_mul_float32_256 basic_mul_float32_256
#define simd_div_float32_256 basic_div_float32_256 #define simd_div_float32_256 basic_div_float32_256
#define simd_sqrt_float32_256 basic_sqrt_float32_256
#define simd_rsqrt_float32_256 basic_rsqrt_float32_256
#define simd_add_float32_256_aligned basic_add_float32_256 #define simd_add_float32_256_aligned basic_add_float32_256
#define simd_sub_float32_256_aligned basic_sub_float32_256 #define simd_sub_float32_256_aligned basic_sub_float32_256
#define simd_mul_float32_256_aligned basic_mul_float32_256 #define simd_mul_float32_256_aligned basic_mul_float32_256
#define simd_div_float32_256_aligned basic_div_float32_256 #define simd_div_float32_256_aligned basic_div_float32_256
#define simd_sqrt_float32_256_aligned basic_sqrt_float32_256
#define simd_rsqrt_float32_256_aligned basic_rsqrt_float32_256
// AVX2 // AVX2
#define simd_add_int32_256 basic_add_int32_256 #define simd_add_int32_256 basic_add_int32_256
@ -505,6 +613,8 @@ inline void simd_mul_int32_512_aligned(int32 *a, int32 *b, int32* result) {
#define simd_add_int32_512 basic_add_int32_512 #define simd_add_int32_512 basic_add_int32_512
#define simd_sub_int32_512 basic_sub_int32_512 #define simd_sub_int32_512 basic_sub_int32_512
#define simd_mul_int32_512 basic_mul_int32_512 #define simd_mul_int32_512 basic_mul_int32_512
#define simd_sqrt_float32_512 basic_sqrt_float32_512
#define simd_rsqrt_float32_512 basic_rsqrt_float32_512
#define simd_add_float32_512_aligned basic_add_float32_512 #define simd_add_float32_512_aligned basic_add_float32_512
#define simd_sub_float32_512_aligned basic_sub_float32_512 #define simd_sub_float32_512_aligned basic_sub_float32_512
#define simd_mul_float32_512_aligned basic_mul_float32_512 #define simd_mul_float32_512_aligned basic_mul_float32_512
@ -512,9 +622,14 @@ inline void simd_mul_int32_512_aligned(int32 *a, int32 *b, int32* result) {
#define simd_add_int32_512_aligned basic_add_int32_512 #define simd_add_int32_512_aligned basic_add_int32_512
#define simd_sub_int32_512_aligned basic_sub_int32_512 #define simd_sub_int32_512_aligned basic_sub_int32_512
#define simd_mul_int32_512_aligned basic_mul_int32_512 #define simd_mul_int32_512_aligned basic_mul_int32_512
#define simd_sqrt_float32_512_aligned basic_sqrt_float32_512
#define simd_rsqrt_float32_512_aligned basic_rsqrt_float32_512
#endif #endif
double __cdecl sqrt(_In_ double _X);
double __cdecl rsqrt(_In_ double _X);
inline void basic_add_float32_64 (float32 *a, float32 *b, float32* result) { inline void basic_add_float32_64 (float32 *a, float32 *b, float32* result) {
result[0] = a[0] + b[0]; result[0] = a[0] + b[0];
result[1] = a[1] + b[1]; result[1] = a[1] + b[1];
@ -638,6 +753,55 @@ inline float basic_dot_product_float32_96(float *a, float *b) {
inline float basic_dot_product_float32_128(float *a, float *b) { inline float basic_dot_product_float32_128(float *a, float *b) {
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]; return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
} }
inline void basic_sqrt_float32_64(float *a, float *result) {
result[0] = sqrt(a[0]);
result[1] = sqrt(a[1]);
}
inline void basic_sqrt_float32_96(float *a, float *result) {
result[0] = sqrt(a[0]);
result[1] = sqrt(a[1]);
result[2] = sqrt(a[2]);
}
inline void basic_sqrt_float32_128(float *a, float *result) {
result[0] = sqrt(a[0]);
result[1] = sqrt(a[1]);
result[2] = sqrt(a[2]);
result[3] = sqrt(a[3]);
}
inline void basic_sqrt_float32_256(float *a, float *result) {
basic_sqrt_float32_128(a, result);
basic_sqrt_float32_128(a+4, result+4);
}
inline void basic_sqrt_float32_512(float *a, float *result) {
basic_sqrt_float32_256(a, result);
basic_sqrt_float32_256(a+8, result+8);
}
inline void basic_rsqrt_float32_64(float *a, float *result) {
result[0] = rsqrt(a[0]);
result[1] = rsqrt(a[1]);
}
inline void basic_rsqrt_float32_96(float *a, float *result) {
result[0] = rsqrt(a[0]);
result[1] = rsqrt(a[1]);
result[2] = rsqrt(a[2]);
}
inline void basic_rsqrt_float32_128(float *a, float *result) {
result[0] = rsqrt(a[0]);
result[1] = rsqrt(a[1]);
result[2] = rsqrt(a[2]);
result[3] = rsqrt(a[3]);
}
inline void basic_rsqrt_float32_256(float *a, float *result) {
basic_rsqrt_float32_128(a, result);
basic_rsqrt_float32_128(a+4, result+4);
}
inline void basic_rsqrt_float32_512(float *a, float *result) {
basic_rsqrt_float32_256(a, result);
basic_rsqrt_float32_256(a+8, result+8);
}

View file

@ -5,7 +5,6 @@
*/ */
void * memcpy (void *,const void *,size_t);
void* talloc(u64); void* talloc(u64);
typedef struct string { typedef struct string {
@ -13,6 +12,7 @@ typedef struct string {
u8 *data; u8 *data;
} string; } string;
#define fixed_string STR
#define STR(s) ((string){ length_of_null_terminated_string((const char*)s), (u8*)s }) #define STR(s) ((string){ length_of_null_terminated_string((const char*)s), (u8*)s })
inline u64 length_of_null_terminated_string(const char* cstring) { inline u64 length_of_null_terminated_string(const char* cstring) {

View file

@ -213,7 +213,7 @@ void printf(const char* fmt, ...) {
typedef void(*Logger_Proc)(Log_Level level, string s); typedef void(*Logger_Proc)(Log_Level level, string s);
#define LOG_BASE(level, ...) If context.logger then ((Logger_Proc)context.logger)(level, tprint(__VA_ARGS__)) #define LOG_BASE(level, ...) if (context.logger) ((Logger_Proc)context.logger)(level, tprint(__VA_ARGS__))
#define log_verbose(...) LOG_BASE(LOG_VERBOSE, __VA_ARGS__) #define log_verbose(...) LOG_BASE(LOG_VERBOSE, __VA_ARGS__)

View file

@ -1,6 +1,6 @@
// Custom allocators for lodepng // Custom allocators for lodepng
Allocator get_heap_allocator(); Allocator get_heap_allocator();
Allocator lodepng_allocator = {0}; /*Allocator lodepng_allocator = {0};
void* lodepng_malloc(size_t size) { void* lodepng_malloc(size_t size) {
#ifdef LODEPNG_MAX_ALLOC #ifdef LODEPNG_MAX_ALLOC
if(size > LODEPNG_MAX_ALLOC) return 0; if(size > LODEPNG_MAX_ALLOC) return 0;
@ -28,4 +28,47 @@ void lodepng_free(void* ptr) {
#define LODEPNG_NO_COMPILE_ENCODER #define LODEPNG_NO_COMPILE_ENCODER
// One day I might write my own png decoder so we don't even need this // One day I might write my own png decoder so we don't even need this
#include "third_party/lodepng.h" #include "third_party/lodepng.h"
#include "third_party/lodepng.c" #include "third_party/lodepng.c"*/
#define STB_TRUETYPE_IMPLEMENTATION
#define STB_IMAGE_IMPLEMENTATION
typedef unsigned char u8;
typedef signed char s8;
typedef unsigned short u16;
typedef signed short s16;
typedef unsigned int u32;
typedef signed int s32;
void *stbtt_malloc(size_t size) {
if (!size) return 0;
return alloc(get_heap_allocator(), size);
}
#define STBTT_malloc(x,u) ((void)(u),stbtt_malloc(x))
void stbtt_free(void *p) {
if (!p) return;
dealloc(get_heap_allocator(), p);
}
#define STBTT_free(x,u) ((void)(u),stbtt_free(x))
#define STBTT_assert(x) assert(x)
size_t stbtt_strlen(const char* str) {
size_t count = 0;
while (str[count] != 0) count += 1;
return count;
}
#define STBTT_strlen(x) stbtt_strlen(x)
#define STBTT_memcpy memcpy
#define STBTT_memset memset
#define STBI_NO_STDIO
#define STBI_ASSERT(x) {if (!(x)) *(volatile char*)0 = 0;}
#define STBI_MALLOC(sz) stbtt_malloc(sz)
#define STBI_REALLOC(p,newsz) get_heap_allocator().proc(newsz, p, ALLOCATOR_REALLOCATE, 0)
#define STBI_FREE(p) stbtt_free(p)
#include "third_party/stb_image.h"
#include "third_party/stb_truetype.h"

7988
oogabooga/third_party/stb_image.h vendored Normal file

File diff suppressed because it is too large Load diff

5080
oogabooga/third_party/stb_truetype.h vendored Normal file

File diff suppressed because it is too large Load diff