From 88053ab14aa556a38ef79b206393b156a6cace21 Mon Sep 17 00:00:00 2001 From: Charlie Malmqvist Date: Sat, 20 Jul 2024 16:10:55 +0200 Subject: [PATCH] v0.01.001 - Spacial audio, custom shading, scissor boxing --- README.md | 26 +++- TODO | 12 +- build.c | 10 +- changelog.txt | 12 +- oogabooga/drawing.c | 136 ++++++++++--------- oogabooga/examples/custom_shader.c | 52 +++++++- oogabooga/examples/custom_shader.hlsl | 53 +++++++- oogabooga/examples/renderer_stress_test.c | 15 ++- oogabooga/gfx_impl_d3d11.c | 154 +++++++++++++++++----- oogabooga/gfx_interface.c | 7 + oogabooga/memory.c | 29 +++- oogabooga/utility.c | 1 + 12 files changed, 381 insertions(+), 126 deletions(-) diff --git a/README.md b/README.md index 66953aa..a257dd1 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ ooga booga ## TOC - [What is ooga booga?](#what-is-ooga-booga) + - [A new C Standard](#a-new-c-standard) - [SIMPLICITY IS KING](#simplicity-is-king) - [The "Build System"](#the-build-system) - [Course: From Scratch to Steam](#course-from-scratch-to-steam) @@ -15,6 +16,14 @@ ooga booga Ooga booga, often referred to as a *game engine* for simplicity, is more so designed to be a new C Standard, i.e. a new way to develop software from scratch in C. Other than `` we don't include a single C std header, but are instead writing a better standard library heavily optimized for developing games. Except for some image & audio file decoding, Ooga booga does not rely on any other third party code. +### A new C Standard + +Let's face it. The C standard is terrible. Don't even get me started on `string.h`. To be fair, any mainstream language standard is terrible. + +So what if we could strip out the nonsense standard of C and slap on something that's specifically made for video games, prioritizing speed and *simplicity*? + +That's exactly what oogabooga sets out to do. + ### SIMPLICITY IS KING Ooga booga is designed to keep things simple, and let you solve video game problems the simplest way possible. @@ -49,7 +58,22 @@ Currently, we only support Windows x64 systems. 3. Make a file my_file.c in ``` int entry(int argc, char **argv) { - print("Ooga, booga!\n"); + + window.title = STR("Minimal Game Example"); + window.scaled_width = 1280; // We need to set the scaled size if we want to handle system scaling (DPI) + window.scaled_height = 720; + window.x = 200; + window.y = 90; + window.clear_color = hex_to_rgba(0x6495EDff); + + while (!window.should_close) { + reset_temporary_storage(); + + os_update(); + gfx_update(); + } + + return 0; } ``` 4. in build.c add this line to the bottom diff --git a/TODO b/TODO index f9d409b..c616414 100644 --- a/TODO +++ b/TODO @@ -26,18 +26,10 @@ - Release freeze in run_tests - Window width&height is zero when minimized (and we make a 0x0 swap chain) - Window positioning & sizing is fucky wucky + - Memory error messages are misleading when no VERY_DEBUG - Renderer - - Still compile a default that's set by default and we can set with set_shader_for_basic_2d - // with screen space coords too (i.e, Vector2 instead of Matrix4) - void draw_outline_rect_xform(Matrix4 xform, Vector2 size, float thickness, Vector4 color); // maybe have draw_rect_xform just be an outline and have another one for draw_filled_rect_xform or something - void draw_rounded_rect_xform(Matrix4 xform, Vector2 size, float rounding, Vector4 color); - void draw_circle_xform(Matrix4 xform, float radius, int segments, Vector4 color); - void draw_line_xform(Matrix4 from, Matrix4 to, float thickness, Vector4 color); - - // purely for screen space coords (scissor rect shit) - void push_clip_rect(Vector2 position, Vector2 size); - void pop_clip_rect(); + - API to pass constant values to shader (codegen #define's) - Fonts - Atlases are way too big, render atlases with size depending on font_height (say, 128 codepoints per atlas) diff --git a/build.c b/build.c index d1de493..e6a6397 100644 --- a/build.c +++ b/build.c @@ -3,9 +3,6 @@ /// // Build config stuff -#define OOGABOOGA_DEV 1 -//#define RUN_TESTS 1 - #define INITIAL_PROGRAM_MEMORY_SIZE MB(5) // You might want to increase this if you get a log warning saying the temporary storage was overflown. @@ -15,6 +12,9 @@ // enough temporary storage for your game. #define TEMPORARY_STORAGE_SIZE MB(2) +// Enable VERY_DEBUG if you are having memory bugs to detect things like heap corruption earlier. +// #define VERY_DEBUG 1 + typedef struct Context_Extra { int monkee; } Context_Extra; @@ -36,14 +36,14 @@ typedef struct Context_Extra { // // This is a minimal starting point for new projects. Copy & rename to get started -// #include "oogabooga/examples/minimal_game_loop.c" +#include "oogabooga/examples/minimal_game_loop.c" // #include "oogabooga/examples/text_rendering.c" // #include "oogabooga/examples/custom_logger.c" // #include "oogabooga/examples/renderer_stress_test.c" // #include "oogabooga/examples/tile_game.c" // #include "oogabooga/examples/audio_test.c" -#include "oogabooga/examples/custom_shader.c" +// #include "oogabooga/examples/custom_shader.c" // This is where you swap in your own project! // #include "entry_yourepicgamename.c" diff --git a/changelog.txt b/changelog.txt index 14bd4ed..6d2314f 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,4 +1,4 @@ -## v0.01.001 - Spacial audio, custom shading +## v0.01.001 - Spacial audio, custom shading, scissor boxing - Audio - Implemented spacial audio playback Simply set player->position (it's in ndc space (-1 to 1), see audio_test.c) @@ -13,12 +13,19 @@ shader_recompile_with_extension(source, cbuffer_size) - pass a buffer to the shader constant buffer at b0 with draw_frame.cbuffer = &my_cbuffer_data + - pass userdata in the form of Vector4's + define VERTEX_2D_USER_DATA_COUNT for the amount of Vertex4 userdata's to be part of each vertex. + You can set the userdata in Draw_Quad->userdata which is a Vector4[VERTEX_2D_USER_DATA_COUNT]. + See custom_shading.c example. + - Added window scissor boxing (screen space) + push_window_scissor(min, max); + pop_window_scissor(); + - Added draw_circle() and draw_circle_xform in drawing.c - Made an example for custom shading (oogabooga/examples/custom_shading.c) - Embed default shader into codebase & always compile - Added draw_line(p0, p1, width, color) - Implemented culling of quads out of view - Fixed culling bug where big rectangles that overlapped the screen but had all corners outside the screen would get culled. - - Misc - Improved text measure and added a better explanation for it in font.c. - Added some useful Vector procedures: @@ -30,6 +37,7 @@ vx_cross() - added os_get_file_size_from_Path() - Some simple restructuring of existing code + - Made heap corruption detection more robust ## v0.01.000 - AUDIO! - Added audio sources diff --git a/oogabooga/drawing.c b/oogabooga/drawing.c index 7c050ce..d7693b2 100644 --- a/oogabooga/drawing.c +++ b/oogabooga/drawing.c @@ -1,79 +1,34 @@ /* -Usage: - - Just call draw_xxx procedures anywhere in the frame when you want something to be drawn that frame. - - - // Examples: - - // Verbose - Draw_Quad quad; - quad.bottom_left = v2(x, y); - quad.top_left = v2(x, y); - quad.top_right = v2(x, y); - quad.bottom_right = v2(x, y); - quad.color = v4(r, g, b, a); - quad.image = my_image; // ZERO(Gfx_Image) To draw a plain color - quad.uv = v4(0, 0, 1, 1); - - draw_quad(quad); - - - // Basic rect. Bottom left at X=-0.25, Y=-0.5 with a size of W=0.5, H=0.5 - draw_rect(v2(-0.25, -0.5), v2(0.5, 0.5), COLOR_GREEN); - - // Rotated rect. Bottom left at X=-0.25, Y=-0.5 with a size of W=0.5, H=0.5 - // With a centered pivot (half size) and a rotation of 2.4 radians - // If pivot is v2(0, 0), the rectangle will rotate around it's bottom left. - draw_rect_rotated(v2(-0.25, -0.5), v2(0.5, 0.5), COLOR_GREEN, v2(0.25, 0.25), 2.4f); - - // Basic image. Bottom left at X=-0.25, Y=-0.5 with a size of W=0.5, H=0.5 - draw_image(v2(-0.25, -0.5), v2(0.5, 0.5), COLOR_GREEN); - - // Rotated image. Bottom left at X=-0.25, Y=-0.5 with a size of W=0.5, H=0.5 - // With a centered pivot (half size) and a rotation of 2.4 radians - // If pivot is v2(0, 0), the rectangle will rotate around it's bottom left. - draw_image_rotated(v2(-0.25, -0.5), v2(0.5, 0.5), COLOR_GREEN, v2(0.25, 0.25), 2.4f); - - // Loading an image (png only) - Gfx_Image image = load_image_from_disk("my_image.png"); - if (!image.data) { - // We failed loading the image. - } - - // If you ever need to free the image: - delete_image(image); - - - API: - - // !! IMPORTANT - // The Draw_Quad* returned from draw procedures is a temporary pointer and may be - // invalid after the next draw_xxxx call. This is because quads are stored in a - // resizing buffer (because that gave us a non-trivial performance boost). - // So the purpose of returning them is to customize the quad right after the draw proc. + void push_z_layer(s32 z); + void pop_z_layer(); + void push_window_scissor(Vector2 min, Vector2 max); + void pop_window_scissor(); + Draw_Quad *draw_rect(Vector2 position, Vector2 size, Vector4 color); + Draw_Quad *draw_rect_xform(Matrix4 xform, Vector2 size, Vector4 color); + Draw_Quad *draw_circle(Vector2 position, Vector2 size, Vector4 color); + Draw_Quad *draw_circle_xform(Matrix4 xform, Vector2 size, Vector4 color); + Draw_Quad *draw_image(Gfx_Image *image, Vector2 position, Vector2 size, Vector4 color); + Draw_Quad *draw_image_xform(Gfx_Image *image, Matrix4 xform, Vector2 size, Vector4 color); Draw_Quad *draw_quad_projected(Draw_Quad quad, Matrix4 world_to_clip); Draw_Quad *draw_quad(Draw_Quad quad); Draw_Quad *draw_quad_xform(Draw_Quad quad, Matrix4 xform); - Draw_Quad *draw_rect(Vector2 position, Vector2 size, Vector4 color); - Draw_Quad *draw_rect_xform(Matrix4 xform, Vector2 size, Vector4 color); - Draw_Quad *draw_image(Gfx_Image *image, Vector2 position, Vector2 size, Vector4 color); - Draw_Quad *draw_image_xform(Gfx_Image *image, Matrix4 xform, Vector2 size, Vector4 color); - // raster_height is the pixel height that the text will be rasterized at. If text is blurry, - // you can try to increase raster_height and lower scale. + bool draw_text_callback(Gfx_Glyph glyph, Gfx_Font_Atlas *atlas, float glyph_x, float glyph_y, void *ud); + void draw_text_xform(Gfx_Font *font, string text, u32 raster_height, Matrix4 xform, Vector2 scale, Vector4 color); void draw_text(Gfx_Font *font, string text, u32 raster_height, Vector2 position, Vector2 scale, Vector4 color); - void draw_text_xform(Gfx_Font *font, string text, u32 raster_height, Matrix4 xform, Vector4 color); - + Gfx_Text_Metrics draw_text_and_measure(Gfx_Font *font, string text, u32 raster_height, Vector2 position, Vector2 scale, Vector4 color); + void draw_line(Vector2 p0, Vector2 p1, float line_width, Vector4 color); */ // We use radix sort so the exact bit count is of importance #define MAX_Z_BITS 21 #define MAX_Z ((1 << MAX_Z_BITS)/2) #define Z_STACK_MAX 4096 +#define SCISSOR_STACK_MAX 4096 typedef struct Draw_Quad { + // BEWARE !! These are in ndc Vector2 bottom_left, top_left, top_right, bottom_right; // r, g, b, a Vector4 color; @@ -82,8 +37,12 @@ typedef struct Draw_Quad { Gfx_Filter_Mode image_mag_filter; s32 z; u8 type; + bool has_scissor; // x1, y1, x2, y2 Vector4 uv; + Vector4 scissor; + + Vector4 userdata[VERTEX_2D_USER_DATA_COUNT]; // #Volatile do NOT change this to a pointer } Draw_Quad; @@ -99,6 +58,9 @@ typedef struct Draw_Frame { bool enable_z_sorting; s32 z_stack[Z_STACK_MAX]; u64 z_count; + + Vector4 scissor_stack[SCISSOR_STACK_MAX]; + u64 scissor_count; void *cbuffer; @@ -127,6 +89,17 @@ void pop_z_layer() { draw_frame.z_count -= 1; } +void push_window_scissor(Vector2 min, Vector2 max) { + assert(draw_frame.scissor_count < SCISSOR_STACK_MAX, "Too many scissors pushed. You can pop with pop_window_scissor() when you are done drawing to it."); + + draw_frame.scissor_stack[draw_frame.scissor_count] = v4(min.x, min.y, max.x, max.y); + draw_frame.scissor_count += 1; +} +void pop_window_scissor() { + assert(draw_frame.scissor_count > 0, "No scissors to pop!"); + draw_frame.scissor_count -= 1; +} + Draw_Quad _nil_quad = {0}; Draw_Quad *draw_quad_projected(Draw_Quad quad, Matrix4 world_to_clip) { quad.bottom_left = m4_transform(world_to_clip, v4(v2_expand(quad.bottom_left), 0, 1)).xy; @@ -151,6 +124,14 @@ Draw_Quad *draw_quad_projected(Draw_Quad quad, Matrix4 world_to_clip) { quad.z = 0; if (draw_frame.z_count > 0) quad.z = draw_frame.z_stack[draw_frame.z_count-1]; + quad.has_scissor = false; + if (draw_frame.scissor_count > 0) { + quad.scissor = draw_frame.scissor_stack[draw_frame.scissor_count-1]; + quad.has_scissor = true; + } + + memset(quad.userdata, 0, sizeof(quad.userdata)); + if (draw_frame.num_quads >= allocated_quads) { // #Memory @@ -185,6 +166,7 @@ Draw_Quad *draw_quad_xform(Draw_Quad quad, Matrix4 xform) { } Draw_Quad *draw_rect(Vector2 position, Vector2 size, Vector4 color) { + // #Copypaste #Volatile const float32 left = position.x; const float32 right = position.x + size.x; const float32 bottom = position.y; @@ -202,6 +184,7 @@ Draw_Quad *draw_rect(Vector2 position, Vector2 size, Vector4 color) { return draw_quad(q); } Draw_Quad *draw_rect_xform(Matrix4 xform, Vector2 size, Vector4 color) { + // #Copypaste #Volatile Draw_Quad q = ZERO(Draw_Quad); q.bottom_left = v2(0, 0); q.top_left = v2(0, size.y); @@ -213,6 +196,37 @@ Draw_Quad *draw_rect_xform(Matrix4 xform, Vector2 size, Vector4 color) { return draw_quad_xform(q, xform); } +Draw_Quad *draw_circle(Vector2 position, Vector2 size, Vector4 color) { + // #Copypaste #Volatile + const float32 left = position.x; + const float32 right = position.x + size.x; + const float32 bottom = position.y; + const float32 top = position.y+size.y; + + Draw_Quad q; + q.bottom_left = v2(left, bottom); + q.top_left = v2(left, top); + q.top_right = v2(right, top); + q.bottom_right = v2(right, bottom); + q.color = color; + q.image = 0; + q.type = QUAD_TYPE_CIRCLE; + + return draw_quad(q); +} +Draw_Quad *draw_circle_xform(Matrix4 xform, Vector2 size, Vector4 color) { + // #Copypaste #Volatile + Draw_Quad q = ZERO(Draw_Quad); + q.bottom_left = v2(0, 0); + q.top_left = v2(0, size.y); + q.top_right = v2(size.x, size.y); + q.bottom_right = v2(size.x, 0); + q.color = color; + q.image = 0; + q.type = QUAD_TYPE_CIRCLE; + + return draw_quad_xform(q, xform); +} Draw_Quad *draw_image(Gfx_Image *image, Vector2 position, Vector2 size, Vector4 color) { Draw_Quad *q = draw_rect(position, size, color); diff --git a/oogabooga/examples/custom_shader.c b/oogabooga/examples/custom_shader.c index 934f5cf..accd413 100644 --- a/oogabooga/examples/custom_shader.c +++ b/oogabooga/examples/custom_shader.c @@ -2,10 +2,21 @@ // BEWARE std140 packing: // https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-packing-rules typedef struct My_Cbuffer { - Vector2 mouse_pos_screen; - Vector2 window_size; + Vector2 mouse_pos_screen; // We use this to make a light around the mouse cursor + Vector2 window_size; // We only use this to revert the Y in the shader because for some reason d3d11 inverts it. } My_Cbuffer; + +// We implement these details which we implement in the shader +#define DETAIL_TYPE_ROUNDED_CORNERS 1 +#define DETAIL_TYPE_OUTLINED 2 + +// With custom shading we can extend the rendering library! +Draw_Quad *draw_rounded_rect(Vector2 p, Vector2 size, Vector4 color, float radius); +Draw_Quad *draw_rounded_rect_xform(Matrix4 xform, Vector2 size, Vector4 color, float radius); +Draw_Quad *draw_outlined_rect(Vector2 p, Vector2 size, Vector4 color, float line_width); +Draw_Quad *draw_outlined_rect_xform(Matrix4 xform, Vector2 size, Vector4 color, float line_width); + int entry(int argc, char **argv) { window.title = STR("Custom Shader Example"); @@ -51,9 +62,9 @@ int entry(int argc, char **argv) { Matrix4 rect_xform = m4_scalar(1.0); rect_xform = m4_rotate_z(rect_xform, (f32)now); rect_xform = m4_translate(rect_xform, v3(-.25f, -.25f, 0)); - draw_rect_xform(rect_xform, v2(.5f, .5f), COLOR_GREEN); + Draw_Quad *q = draw_rounded_rect_xform(rect_xform, v2(.5f, .5f), COLOR_GREEN, 0.1); - draw_rect(v2(sin(now), -.8), v2(.5, .25), COLOR_RED); + draw_outlined_rect(v2(sin(now), -.8), v2(.5, .25), COLOR_RED, 15); // Shader hot reloading @@ -69,4 +80,37 @@ int entry(int argc, char **argv) { } return 0; +} + +Draw_Quad *draw_rounded_rect(Vector2 p, Vector2 size, Vector4 color, float radius) { + Draw_Quad *q = draw_rect(p, size, color); + // detail_type + q->userdata[0].x = DETAIL_TYPE_ROUNDED_CORNERS; + // corner_radius + q->userdata[0].y = radius; + return q; +} +Draw_Quad *draw_rounded_rect_xform(Matrix4 xform, Vector2 size, Vector4 color, float radius) { + Draw_Quad *q = draw_rect_xform(xform, size, color); + // detail_type + q->userdata[0].x = DETAIL_TYPE_ROUNDED_CORNERS; + // corner_radius + q->userdata[0].y = radius; + return q; +} +Draw_Quad *draw_outlined_rect(Vector2 p, Vector2 size, Vector4 color, float line_width) { + Draw_Quad *q = draw_rect(p, size, color); + // detail_type + q->userdata[0].x = DETAIL_TYPE_OUTLINED; + // line_width + q->userdata[0].y = line_width; + return q; +} +Draw_Quad *draw_outlined_rect_xform(Matrix4 xform, Vector2 size, Vector4 color, float line_width) { + Draw_Quad *q = draw_rect_xform(xform, size, color); + // detail_type + q->userdata[0].x = DETAIL_TYPE_OUTLINED; + // line_width + q->userdata[0].y = line_width; + return q; } \ No newline at end of file diff --git a/oogabooga/examples/custom_shader.hlsl b/oogabooga/examples/custom_shader.hlsl index ae76462..eeff558 100644 --- a/oogabooga/examples/custom_shader.hlsl +++ b/oogabooga/examples/custom_shader.hlsl @@ -8,16 +8,61 @@ cbuffer some_cbuffer : register(b0) { float2 window_size; } -float4 pixel_shader_extension(PS_INPUT input, float4 color) { - const float light_distance = 500; +#define DETAIL_TYPE_ROUNDED_CORNERS 1 +#define DETAIL_TYPE_OUTLINED 2 - float2 vertex_pos = input.position_screen.xy; // In pixels +float4 get_light_contribution(PS_INPUT input) { + + const float light_distance = 500; // We could pass this with userdata + + float2 vertex_pos = input.position_screen.xy; // In pixels vertex_pos.y = window_size.y-vertex_pos.y; // For some reason d3d11 inverts the Y here so we need to revert it // Simple linear attenuation based on distance float attenuation = 1.0 - (length(mouse_pos_screen - vertex_pos) / light_distance); - float4 light = float4(attenuation, attenuation, attenuation, 1.0); + return float4(attenuation, attenuation, attenuation, 1.0); +} +// This procedure is the "entry" of our extension to the shader +// It basically just takes in the resulting color and input from vertex shader, for us to transform it +// however we want. +float4 pixel_shader_extension(PS_INPUT input, float4 color) { + + float detail_type = input.userdata[0].x; + + if (detail_type == DETAIL_TYPE_ROUNDED_CORNERS) { + float corner_radius = input.userdata[0].y; + + float2 pos = input.self_uv - float2(0.5, 0.5); + + float2 corner_distance = abs(pos) - (float2(0.5, 0.5) - corner_radius); + + float dist = length(max(corner_distance, 0.0)) - corner_radius; + float smoothing = 0.01; + float mask = 1.0-smoothstep(0.0, smoothing, dist); + + color *= mask; + } else if (detail_type == DETAIL_TYPE_OUTLINED) { + float line_width = input.userdata[0].y; + + float2 pixel_pos = round(input.self_uv*window_size); + + float xcenter = window_size.x/2; + float ycenter = window_size.y/2; + + float xedge = pixel_pos.x < xcenter ? 0.0 : window_size.x; + float yedge = pixel_pos.y < ycenter ? 0.0 : window_size.y; + + float xdist = abs(xedge-pixel_pos.x); + float ydist = abs(yedge-pixel_pos.y); + + if (xdist >= line_width && ydist >= line_width) { + discard; + } + } + + float4 light = get_light_contribution(input); + return color * light; } \ No newline at end of file diff --git a/oogabooga/examples/renderer_stress_test.c b/oogabooga/examples/renderer_stress_test.c index a3e2bfe..37e9cce 100644 --- a/oogabooga/examples/renderer_stress_test.c +++ b/oogabooga/examples/renderer_stress_test.c @@ -91,8 +91,15 @@ int entry(int argc, char **argv) { draw_frame.enable_z_sorting = do_enable_z_sorting; if (is_key_just_pressed('Z')) do_enable_z_sorting = !do_enable_z_sorting; + if (do_enable_z_sorting) { + push_window_scissor( + v2(input_frame.mouse_x-256, input_frame.mouse_y-256), + v2(input_frame.mouse_x+256, input_frame.mouse_y+256) + ); + } + seed_for_random = 69; - for (u64 i = 0; i < 100000; i++) { + for (u64 i = 0; i < 30000; i++) { float32 aspect = (float32)window.width/(float32)window.height; float min_x = -aspect; float max_x = aspect; @@ -117,7 +124,7 @@ int entry(int argc, char **argv) { Vector2 hover_position = v2_rotate_point_around_pivot(v2(-.5, -.5), v2(0, 0), (f32)now); Vector2 local_pivot = v2(.125f, .125f); - draw_rect(v2_sub(hover_position, local_pivot), v2(.25f, .25f), v4((sin(now)+1.0)/2.0, 1.0, 0.0, 1.0)); + draw_circle(v2_sub(hover_position, local_pivot), v2(.25f, .25f), v4((sin(now)+1.0)/2.0, 1.0, 0.0, 1.0)); draw_image(bush_image, v2(0.65, 0.65), v2(0.2*sin(now), 0.2*sin(now)), COLOR_WHITE); @@ -134,6 +141,10 @@ int entry(int argc, char **argv) { if (show) draw_image(atlas->image, v2(-1.6, -1), v2(4, 4), COLOR_WHITE); + if (do_enable_z_sorting) { + pop_window_scissor(); + } + tm_scope("gfx_update") { gfx_update(); } diff --git a/oogabooga/gfx_impl_d3d11.c b/oogabooga/gfx_impl_d3d11.c index 3c310ad..4cf702a 100644 --- a/oogabooga/gfx_impl_d3d11.c +++ b/oogabooga/gfx_impl_d3d11.c @@ -7,20 +7,23 @@ const Gfx_Handle GFX_INVALID_HANDLE = 0; string temp_win32_null_terminated_wide_to_fixed_utf8(const u16 *utf16); +// We wanna pack this at some point +// #Cleanup #Memory why am I doing alignat(16)? typedef struct alignat(16) D3D11_Vertex { Vector4 color; Vector4 position; Vector2 uv; - union { - s32 data1; - struct { - s8 texture_index; - u8 type; - u8 sampler; - u8 padding; - }; - }; + Vector2 self_uv; + s8 texture_index; + u8 type; + u8 sampler; + u8 has_scissor; + + Vector4 userdata[VERTEX_2D_USER_DATA_COUNT]; + + Vector4 scissor; + } D3D11_Vertex; ID3D11Debug *d3d11_debug = 0; @@ -257,6 +260,7 @@ bool d3d11_compile_shader(string source) { source = string_replace_all(source, STR("$INJECT_PIXEL_POST_PROCESS"), STR("float4 pixel_shader_extension(PS_INPUT input, float4 color) { return color; }"), temp); + source = string_replace_all(source, STR("$VERTEX_2D_USER_DATA_COUNT"), tprint("%d", VERTEX_2D_USER_DATA_COUNT), temp); // #Leak on recompile @@ -299,7 +303,8 @@ d3d11_compile_shader(string source) { - D3D11_INPUT_ELEMENT_DESC layout[6]; + #define layout_base_count 9 + D3D11_INPUT_ELEMENT_DESC layout[layout_base_count+VERTEX_2D_USER_DATA_COUNT]; memset(layout, 0, sizeof(layout)); layout[0].SemanticName = "POSITION"; @@ -350,8 +355,44 @@ d3d11_compile_shader(string source) { layout[5].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; layout[5].InstanceDataStepRate = 0; - hr = ID3D11Device_CreateInputLayout(d3d11_device, layout, 6, vs_buffer, vs_size, &d3d11_image_vertex_layout); + layout[6].SemanticName = "SELF_UV"; + layout[6].SemanticIndex = 0; + layout[6].Format = DXGI_FORMAT_R32G32_FLOAT; + layout[6].InputSlot = 0; + layout[6].AlignedByteOffset = offsetof(D3D11_Vertex, self_uv); + layout[6].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; + layout[6].InstanceDataStepRate = 0; + + layout[7].SemanticName = "SCISSOR"; + layout[7].SemanticIndex = 0; + layout[7].Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + layout[7].InputSlot = 0; + layout[7].AlignedByteOffset = offsetof(D3D11_Vertex, scissor); + layout[7].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; + layout[7].InstanceDataStepRate = 0; + + layout[8].SemanticName = "HAS_SCISSOR"; + layout[8].SemanticIndex = 0; + layout[8].Format = DXGI_FORMAT_R8_UINT; + layout[8].InputSlot = 0; + layout[8].AlignedByteOffset = offsetof(D3D11_Vertex, has_scissor); + layout[8].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; + layout[8].InstanceDataStepRate = 0; + + for (int i = 0; i < VERTEX_2D_USER_DATA_COUNT; ++i) { + layout[layout_base_count + i].SemanticName = "USERDATA"; + layout[layout_base_count + i].SemanticIndex = i; + layout[layout_base_count + i].Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + layout[layout_base_count + i].InputSlot = 0; + layout[layout_base_count + i].AlignedByteOffset = offsetof(D3D11_Vertex, userdata) + sizeof(Vector4) * i; + layout[layout_base_count + i].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; + } + + + hr = ID3D11Device_CreateInputLayout(d3d11_device, layout, layout_base_count+VERTEX_2D_USER_DATA_COUNT, vs_buffer, vs_size, &d3d11_image_vertex_layout); win32_check_hr(hr); + + #undef layout_base_count D3D11Release(vs_blob); D3D11Release(ps_blob); @@ -479,25 +520,6 @@ void gfx_init() { ID3D11DeviceContext_RSSetState(d3d11_context, d3d11_rasterizer); } - // COnst buffer - /*{ - D3D11_BUFFER_DESC bd; - bd.ByteWidth = align_forward(sizeof(GlobalConstBuffer), 16); - bd.Usage = D3D11_USAGE_DYNAMIC; - bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - ID3D11Device_CreateBuffer(dx_state.d3d_device, &bd, NULL, &dx_state.const_buffer_resource); - }*/ - - /*{ - D3D11_BUFFER_DESC bd; - bd.ByteWidth = align_forward(sizeof(BatchUniforms), 16); - bd.Usage = D3D11_USAGE_DYNAMIC; - bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - ID3D11Device_CreateBuffer(dx_state.d3d_device, &bd, NULL, &render_st.batch.ubo); - }*/ - { D3D11_SAMPLER_DESC sd = ZERO(D3D11_SAMPLER_DESC); sd.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; @@ -524,6 +546,7 @@ void gfx_init() { } string source = STR(d3d11_image_shader_source); + bool ok = d3d11_compile_shader(source); assert(ok, "Failed compiling default shader"); @@ -554,7 +577,14 @@ void d3d11_draw_call(int number_of_rendered_quads, ID3D11ShaderResourceView **te if (draw_frame.cbuffer && d3d11_cbuffer && d3d11_cbuffer_size) { D3D11_MAPPED_SUBRESOURCE cbuffer_mapping; - ID3D11DeviceContext_Map(d3d11_context, (ID3D11Resource*)d3d11_cbuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &cbuffer_mapping); + ID3D11DeviceContext_Map( + d3d11_context, + (ID3D11Resource*)d3d11_cbuffer, + 0, + D3D11_MAP_WRITE_DISCARD, + 0, + &cbuffer_mapping + ); memcpy(cbuffer_mapping.pData, draw_frame.cbuffer, d3d11_cbuffer_size); ID3D11DeviceContext_Unmap(d3d11_context, (ID3D11Resource*)d3d11_cbuffer, 0); @@ -706,11 +736,32 @@ void d3d11_process_draw_frame() { TR->uv = v2(q->uv.x2, q->uv.y2); BR->uv = v2(q->uv.x2, q->uv.y1); + BL->self_uv = v2(0, 0); + TL->self_uv = v2(0, 1); + TR->self_uv = v2(1, 1); + BR->self_uv = v2(1, 0); + + // #Speed + memcpy(BL->userdata, q->userdata, sizeof(q->userdata)); + memcpy(TL->userdata, q->userdata, sizeof(q->userdata)); + memcpy(TR->userdata, q->userdata, sizeof(q->userdata)); + memcpy(BR->userdata, q->userdata, sizeof(q->userdata)); + BL->color = TL->color = TR->color = BR->color = q->color; BL->texture_index=TL->texture_index=TR->texture_index=BR->texture_index = texture_index; BL->type=TL->type=TR->type=BR->type = (u8)q->type; + float t = q->scissor.y1; + q->scissor.y1 = q->scissor.y2; + q->scissor.y2 = t; + + q->scissor.y1 = window.pixel_height - q->scissor.y1; + q->scissor.y2 = window.pixel_height - q->scissor.y2; + + BL->has_scissor=TL->has_scissor=TR->has_scissor=BR->has_scissor = q->has_scissor; + BL->scissor=TL->scissor=TR->scissor=BR->scissor = q->scissor; + u8 sampler = -1; if (q->image_min_filter == GFX_FILTER_MODE_NEAREST && q->image_mag_filter == GFX_FILTER_MODE_NEAREST) @@ -900,6 +951,7 @@ shader_recompile_with_extension(string ext_source, u64 cbuffer_size) { string source = string_replace_all(STR(d3d11_image_shader_source), STR("$INJECT_PIXEL_POST_PROCESS"), ext_source, temp); + if (!d3d11_compile_shader(source)) return false; u64 aligned_cbuffer_size = (max(cbuffer_size, 16) + 16) & ~(15); @@ -928,10 +980,14 @@ struct VS_INPUT { float4 position : POSITION; float2 uv : TEXCOORD; + float2 self_uv : SELF_UV; float4 color : COLOR; int texture_index : TEXTURE_INDEX; uint type : TYPE; uint sampler_index : SAMPLER_INDEX; + uint has_scissor : HAS_SCISSOR; + float4 userdata[$VERTEX_2D_USER_DATA_COUNT] : USERDATA; + float4 scissor : SCISSOR; }; struct PS_INPUT @@ -939,12 +995,18 @@ struct PS_INPUT float4 position_screen : SV_POSITION; float4 position : POSITION; float2 uv : TEXCOORD0; + float2 self_uv : SELF_UV; float4 color : COLOR; int texture_index: TEXTURE_INDEX; int type: TYPE; int sampler_index: SAMPLER_INDEX; + uint has_scissor : HAS_SCISSOR; + float4 userdata[$VERTEX_2D_USER_DATA_COUNT] : USERDATA; + float4 scissor : SCISSOR; }; + + PS_INPUT vs_main(VS_INPUT input) { PS_INPUT output; @@ -955,6 +1017,12 @@ PS_INPUT vs_main(VS_INPUT input) output.texture_index = input.texture_index; output.type = input.type; output.sampler_index = input.sampler_index; + output.self_uv = input.self_uv; + for (int i = 0; i < $VERTEX_2D_USER_DATA_COUNT; i++) { + output.userdata[i] = input.userdata[i]; + } + output.scissor = input.scissor; + output.has_scissor = input.has_scissor; return output; } @@ -1112,8 +1180,17 @@ $INJECT_PIXEL_POST_PROCESS \n \043define QUAD_TYPE_REGULAR 0\n \043define QUAD_TYPE_TEXT 1\n +\043define QUAD_TYPE_CIRCLE 2\n float4 ps_main(PS_INPUT input) : SV_TARGET { + + if (input.has_scissor) { + float2 screen_pos = input.position_screen.xy; + if (screen_pos.x < input.scissor.x || screen_pos.x >= input.scissor.z || + screen_pos.y < input.scissor.y || screen_pos.y >= input.scissor.w) + discard; + } + if (input.type == QUAD_TYPE_REGULAR) { if (input.texture_index >= 0 && input.texture_index < 32 && input.sampler_index >= 0 && input.sampler_index <= 3) { return pixel_shader_extension(input, sample_texture(input.texture_index, input.sampler_index, input.uv)*input.color); @@ -1127,8 +1204,19 @@ float4 ps_main(PS_INPUT input) : SV_TARGET } else { return pixel_shader_extension(input, input.color); } - } + } else if (input.type == QUAD_TYPE_CIRCLE) { - return float4(0.0, 1.0, 0.0, 1.0); + float dist = length(input.self_uv-float2(0.5, 0.5)); + + if (dist > 0.5) return float4(0.0, 0.0, 0.0, 0.0); + + if (input.texture_index >= 0 && input.texture_index < 32 && input.sampler_index >= 0 && input.sampler_index <= 3) { + return pixel_shader_extension(input, sample_texture(input.texture_index, input.sampler_index, input.uv)*input.color); + } else { + return pixel_shader_extension(input, input.color); + } + } + + return float4(1.0, 1.0, 0.0, 1.0); } ); \ No newline at end of file diff --git a/oogabooga/gfx_interface.c b/oogabooga/gfx_interface.c index 8ddfbce..5f8ecb9 100644 --- a/oogabooga/gfx_interface.c +++ b/oogabooga/gfx_interface.c @@ -15,9 +15,16 @@ #error "Unknown renderer GFX_RENDERER defined" #endif + +#ifndef VERTEX_2D_USER_DATA_COUNT + #define VERTEX_2D_USER_DATA_COUNT 1 +#endif + forward_global const Gfx_Handle GFX_INVALID_HANDLE; +// #Volatile reflected in 2D batch shader #define QUAD_TYPE_REGULAR 0 #define QUAD_TYPE_TEXT 1 +#define QUAD_TYPE_CIRCLE 2 typedef enum Gfx_Filter_Mode { GFX_FILTER_MODE_NEAREST, diff --git a/oogabooga/memory.c b/oogabooga/memory.c index 3823a1a..c4fe745 100644 --- a/oogabooga/memory.c +++ b/oogabooga/memory.c @@ -71,6 +71,10 @@ typedef struct Heap_Block { void* start; Heap_Block *next; // 32 bytes !! +#if CONFIGURATION == DEBUG + u64 total_allocated; + u64 padding; +#endif } Heap_Block; #define HEAP_META_SIGNATURE 6969694206942069ull @@ -117,6 +121,7 @@ void sanity_check_block(Heap_Block *block) { assert(is_pointer_in_program_memory(block->start), "Heap_Block pointer is corrupt"); if(block->next) { assert(is_pointer_in_program_memory(block->next), "Heap_Block next pointer is corrupt"); } assert(block->size < GB(256), "A heap block is corrupt."); + assert(block->size >= INITIAL_PROGRAM_MEMORY_SIZE, "A heap block is corrupt."); assert((u64)block->start == (u64)block + sizeof(Heap_Block), "A heap block is corrupt."); @@ -139,6 +144,8 @@ void sanity_check_block(Heap_Block *block) { node = node->next; } + u64 expected_size = get_heap_block_size_excluding_metadata(block); + assert(block->total_allocated+total_free == expected_size, "Heap is corrupt.") } inline void check_meta(Heap_Allocation_Metadata *meta) { #if CONFIGURATION == DEBUG @@ -215,6 +222,7 @@ Heap_Block *make_heap_block(Heap_Block *parent, u64 size) { } else { block = (Heap_Block*)program_memory; } + block->total_allocated = 0; @@ -269,6 +277,18 @@ void *heap_alloc(u64 size) { assert(size < MAX_HEAP_BLOCK_SIZE, "Past Charlie has been lazy and did not handle large allocations like this. I apologize on behalf of past Charlie. A quick fix could be to increase the heap block size for now. #Incomplete #Limitation"); + +#if VERY_DEBUG + { + Heap_Block *block = heap_head; + + while (block != 0) { + sanity_check_block(block); + block = block->next; + } + } +#endif + Heap_Block *block = heap_head; Heap_Block *last_block = 0; Heap_Free_Node *best_fit = 0; @@ -278,10 +298,6 @@ void *heap_alloc(u64 size) { // #Speed // Maybe instead of going through EVERY free node to find best fit we do a good-enough fit while (block != 0) { - -#if VERY_DEBUG - sanity_check_block(block); -#endif if (get_heap_block_size_excluding_metadata(block) < size) { last_block = block; @@ -354,6 +370,7 @@ void *heap_alloc(u64 size) { meta->block = best_fit_block; #if CONFIGURATION == DEBUG meta->signature = HEAP_META_SIGNATURE; + meta->block->total_allocated += size; #endif check_meta(meta); @@ -449,6 +466,10 @@ void heap_dealloc(void *p) { } +#if CONFIGURATION == DEBUG + block->total_allocated -= size; +#endif + #if VERY_DEBUG sanity_check_block(block); #endif diff --git a/oogabooga/utility.c b/oogabooga/utility.c index 6f4bbae..a234f95 100644 --- a/oogabooga/utility.c +++ b/oogabooga/utility.c @@ -101,3 +101,4 @@ void merge_sort(void *collection, void *help_buffer, u64 item_count, u64 item_si } } } +