- Z Layers
- Sorting - Contiguous quad buffer
This commit is contained in:
parent
a9cbf7ee68
commit
c92b6fd4b7
15 changed files with 386 additions and 254 deletions
9
build.c
9
build.c
|
@ -3,6 +3,11 @@
|
|||
///
|
||||
// Build config stuff
|
||||
|
||||
// #Temporary
|
||||
#define RUN_TESTS 0
|
||||
#define OOGABOOGA_DEV 1
|
||||
#define ENABLE_PROFILING 1
|
||||
|
||||
#define INITIAL_PROGRAM_MEMORY_SIZE MB(5)
|
||||
|
||||
typedef struct Context_Extra {
|
||||
|
@ -26,11 +31,11 @@ typedef struct Context_Extra {
|
|||
//
|
||||
|
||||
// this is a minimal starting point for new projects. Copy & rename to get started
|
||||
#include "oogabooga/examples/minimal_game_loop.c"
|
||||
// #include "oogabooga/examples/minimal_game_loop.c"
|
||||
|
||||
// #include "oogabooga/examples/text_rendering.c"
|
||||
// #include "oogabooga/examples/custom_logger.c"
|
||||
// #include "oogabooga/examples/renderer_stress_test.c"
|
||||
#include "oogabooga/examples/renderer_stress_test.c"
|
||||
|
||||
// This is where you swap in your own project!
|
||||
// #include "entry_yourepicgamename.c"
|
||||
|
|
|
@ -1,3 +1,17 @@
|
|||
## v0.00.005 - Z layers
|
||||
Renderer:
|
||||
- Added optional Z-sorting
|
||||
- Either set quad->z or call push_z_layer(s64) (and pop_z_layer())
|
||||
- Enable with draw_frame.enable_z_sorting = true
|
||||
- Refactored the quad buffering to just be a growing quad buffer rather than a linked list of quad blocks. Your CPU will be thankful.
|
||||
|
||||
Misc:
|
||||
- removed gfx_impl_legacy_opengl.c
|
||||
- Sorting procedures
|
||||
- merge_sort()
|
||||
- radix_sort()
|
||||
- sorting tests
|
||||
|
||||
## v0.00.004 - Custom logging, more concurrency & bugfixing
|
||||
|
||||
Concurrency:
|
||||
|
|
|
@ -78,6 +78,7 @@ typedef struct Allocator {
|
|||
} Allocator;
|
||||
|
||||
Allocator get_heap_allocator();
|
||||
Allocator get_temporary_allocator();
|
||||
|
||||
typedef struct Context {
|
||||
void *logger; // void(*Logger_Proc)(Log_Level level, string fmt, ...)
|
||||
|
|
|
@ -62,13 +62,11 @@ Usage:
|
|||
|
||||
*/
|
||||
|
||||
// We use radix sort so the exact bit count is of important
|
||||
#define MAX_Z_BITS 21
|
||||
#define MAX_Z ((1 << MAX_Z_BITS)/2)
|
||||
#define Z_STACK_MAX 4096
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define QUADS_PER_BLOCK 256
|
||||
typedef struct Draw_Quad {
|
||||
Vector2 bottom_left, top_left, top_right, bottom_right;
|
||||
// r, g, b, a
|
||||
|
@ -82,33 +80,22 @@ typedef struct Draw_Quad {
|
|||
Gfx_Filter_Mode image_min_filter;
|
||||
Gfx_Filter_Mode image_mag_filter;
|
||||
|
||||
float32 z;
|
||||
s32 z;
|
||||
|
||||
} Draw_Quad;
|
||||
|
||||
|
||||
typedef struct Draw_Quad_Block {
|
||||
Draw_Quad quad_buffer[QUADS_PER_BLOCK];
|
||||
u64 num_quads;
|
||||
|
||||
float32 low_z, high_z;
|
||||
|
||||
struct Draw_Quad_Block *next;
|
||||
} Draw_Quad_Block;
|
||||
|
||||
// I made these blocks part of the frame at first so they were temp allocated BUT I think
|
||||
// that was a mistake because these blocks are accessed a lot so we want it to just be
|
||||
// persistent memory that's super hot all the time.
|
||||
Draw_Quad_Block first_block = {0};
|
||||
|
||||
Draw_Quad *quad_buffer;
|
||||
u64 allocated_quads;
|
||||
typedef struct Draw_Frame {
|
||||
Draw_Quad_Block *current;
|
||||
u64 num_blocks;
|
||||
u64 num_quads;
|
||||
|
||||
Matrix4 projection;
|
||||
Matrix4 view;
|
||||
|
||||
bool enable_z_sorting;
|
||||
s32 z_stack[Z_STACK_MAX];
|
||||
u64 z_count;
|
||||
} Draw_Frame;
|
||||
// This frame is passed to the platform layer and rendered in os_update.
|
||||
// Resets every frame.
|
||||
|
@ -117,14 +104,21 @@ Draw_Frame draw_frame = ZERO(Draw_Frame);
|
|||
void reset_draw_frame(Draw_Frame *frame) {
|
||||
*frame = (Draw_Frame){0};
|
||||
|
||||
frame->current = 0;
|
||||
|
||||
float32 aspect = (float32)window.width/(float32)window.height;
|
||||
|
||||
frame->projection = m4_make_orthographic_projection(-aspect, aspect, -1, 1, -1, 10);
|
||||
frame->view = m4_scalar(1.0);
|
||||
frame->view = m4_scalar(1.0);
|
||||
}
|
||||
|
||||
void push_z_layer(s32 z) {
|
||||
assert(draw_frame.z_count < Z_STACK_MAX, "Too many z layers pushed. You can pop with pop_z_layer() when you are done drawing to it.");
|
||||
|
||||
frame->num_blocks = 0;
|
||||
draw_frame.z_stack[draw_frame.z_count] = z;
|
||||
draw_frame.z_count += 1;
|
||||
}
|
||||
void pop_z_layer() {
|
||||
assert(draw_frame.z_count > 0, "No Z layers to pop!");
|
||||
draw_frame.z_count -= 1;
|
||||
}
|
||||
|
||||
Draw_Quad *draw_quad_projected(Draw_Quad quad, Matrix4 world_to_clip) {
|
||||
|
@ -135,37 +129,30 @@ Draw_Quad *draw_quad_projected(Draw_Quad quad, Matrix4 world_to_clip) {
|
|||
|
||||
quad.image_min_filter = GFX_FILTER_MODE_NEAREST;
|
||||
quad.image_mag_filter = GFX_FILTER_MODE_NEAREST;
|
||||
|
||||
if (!draw_frame.current) {
|
||||
draw_frame.current = &first_block;
|
||||
draw_frame.current->low_z = F32_MAX;
|
||||
draw_frame.current->high_z = F32_MIN;
|
||||
draw_frame.current->num_quads = 0;
|
||||
draw_frame.num_blocks = 1;
|
||||
}
|
||||
|
||||
assert(draw_frame.current->num_quads <= QUADS_PER_BLOCK);
|
||||
quad.z = 0;
|
||||
if (draw_frame.z_count > 0) quad.z = draw_frame.z_stack[draw_frame.z_count-1];
|
||||
|
||||
if (draw_frame.current->num_quads == QUADS_PER_BLOCK) {
|
||||
if (draw_frame.num_quads >= allocated_quads) {
|
||||
// #Memory
|
||||
|
||||
if (!draw_frame.current->next) {
|
||||
draw_frame.current->next = cast(Draw_Quad_Block*)alloc(get_heap_allocator(), sizeof(Draw_Quad_Block));
|
||||
*draw_frame.current->next = ZERO(Draw_Quad_Block);
|
||||
u64 new_count = max(get_next_power_of_two(draw_frame.num_quads+1), 128);
|
||||
|
||||
Draw_Quad *new_buffer = alloc(get_heap_allocator(), new_count*sizeof(Draw_Quad));
|
||||
|
||||
if (quad_buffer) {
|
||||
memcpy(new_buffer, quad_buffer, draw_frame.num_quads*sizeof(Draw_Quad));
|
||||
dealloc(get_heap_allocator(), quad_buffer);
|
||||
}
|
||||
|
||||
draw_frame.current = draw_frame.current->next;
|
||||
draw_frame.current->num_quads = 0;
|
||||
draw_frame.current->low_z = F32_MAX;
|
||||
draw_frame.current->high_z = F32_MIN;
|
||||
|
||||
draw_frame.num_blocks += 1;
|
||||
|
||||
quad_buffer = new_buffer;
|
||||
allocated_quads = new_count;
|
||||
}
|
||||
|
||||
draw_frame.current->quad_buffer[draw_frame.current->num_quads] = quad;
|
||||
draw_frame.current->num_quads += 1;
|
||||
quad_buffer[draw_frame.num_quads] = quad;
|
||||
draw_frame.num_quads += 1;
|
||||
|
||||
return &draw_frame.current->quad_buffer[draw_frame.current->num_quads-1];
|
||||
return &quad_buffer[draw_frame.num_quads-1];
|
||||
}
|
||||
Draw_Quad *draw_quad(Draw_Quad quad) {
|
||||
return draw_quad_projected(quad, m4_mul(draw_frame.projection, m4_inverse(draw_frame.view)));
|
||||
|
|
|
@ -19,6 +19,8 @@ int entry(int argc, char **argv) {
|
|||
rect_xform = m4_translate(rect_xform, v3(-.25f, -.25f, 0));
|
||||
draw_rect_xform(rect_xform, v2(.5f, .5f), COLOR_GREEN);
|
||||
|
||||
draw_rect(v2(sin(now), -.8), v2(.5, .25), COLOR_RED);
|
||||
|
||||
gfx_update();
|
||||
}
|
||||
|
||||
|
|
|
@ -87,8 +87,12 @@ int entry(int argc, char **argv) {
|
|||
camera_view = m4_translate(camera_view, v3(v2_expand(cam_move), 0));
|
||||
draw_frame.view = camera_view;
|
||||
|
||||
local_persist bool do_enable_z_sorting = false;
|
||||
draw_frame.enable_z_sorting = do_enable_z_sorting;
|
||||
if (is_key_just_pressed('Z')) do_enable_z_sorting = !do_enable_z_sorting;
|
||||
|
||||
seed_for_random = 69;
|
||||
for (u64 i = 0; i < 100000; i++) {
|
||||
for (u64 i = 0; i < 50000; i++) {
|
||||
float32 aspect = (float32)window.width/(float32)window.height;
|
||||
float min_x = -aspect;
|
||||
float max_x = aspect;
|
||||
|
@ -98,15 +102,18 @@ int entry(int argc, char **argv) {
|
|||
float x = get_random_float32() * (max_x-min_x) + min_x;
|
||||
float y = get_random_float32() * (max_y-min_y) + min_y;
|
||||
|
||||
push_z_layer((s32)(y*100));
|
||||
draw_image(bush_image, v2(x, y), v2(0.1, 0.1), COLOR_WHITE);
|
||||
pop_z_layer();
|
||||
}
|
||||
seed_for_random = os_get_current_cycle_count();
|
||||
|
||||
|
||||
Matrix4 hammer_xform = m4_scalar(1.0);
|
||||
hammer_xform = m4_rotate_z(hammer_xform, (f32)now);
|
||||
hammer_xform = m4_translate(hammer_xform, v3(-.25f, -.25f, 0));
|
||||
push_z_layer(1000001);
|
||||
draw_image_xform(hammer_image, hammer_xform, v2(.5f, .5f), COLOR_RED);
|
||||
pop_z_layer();
|
||||
|
||||
Vector2 hover_position = v2_rotate_point_around_pivot(v2(-.5, -.5), v2(0, 0), (f32)now);
|
||||
Vector2 local_pivot = v2(.125f, .125f);
|
||||
|
|
|
@ -15,7 +15,6 @@ string temp_win32_null_terminated_wide_to_fixed_utf8(const u16 *utf16);
|
|||
|
||||
typedef struct alignat(16) D3D11_Vertex {
|
||||
|
||||
|
||||
Vector4 color;
|
||||
Vector4 position;
|
||||
Vector2 uv;
|
||||
|
@ -59,6 +58,9 @@ ID3D11Buffer *d3d11_quad_vbo = 0;
|
|||
u32 d3d11_quad_vbo_size = 0;
|
||||
void *d3d11_staging_quad_buffer = 0;
|
||||
|
||||
Draw_Quad *sort_quad_buffer = 0;
|
||||
u64 sort_quad_buffer_size = 0;
|
||||
|
||||
const char* d3d11_stringify_category(D3D11_MESSAGE_CATEGORY category) {
|
||||
switch (category) {
|
||||
case D3D11_MESSAGE_CATEGORY_APPLICATION_DEFINED: return "Application Defined";
|
||||
|
@ -545,7 +547,7 @@ void d3d11_process_draw_frame() {
|
|||
|
||||
///
|
||||
// Maybe grow quad vbo
|
||||
u32 required_size = sizeof(D3D11_Vertex) * draw_frame.num_blocks*QUADS_PER_BLOCK*6;
|
||||
u32 required_size = sizeof(D3D11_Vertex) * allocated_quads*6;
|
||||
|
||||
if (required_size > d3d11_quad_vbo_size) {
|
||||
if (d3d11_quad_vbo) {
|
||||
|
@ -567,7 +569,7 @@ void d3d11_process_draw_frame() {
|
|||
log_verbose("Grew quad vbo to %d bytes.", d3d11_quad_vbo_size);
|
||||
}
|
||||
|
||||
if (draw_frame.num_blocks > 0) {
|
||||
if (draw_frame.num_quads > 0) {
|
||||
///
|
||||
// Render geometry from into vbo quad list
|
||||
|
||||
|
@ -580,120 +582,126 @@ void d3d11_process_draw_frame() {
|
|||
D3D11_Vertex* head = (D3D11_Vertex*)d3d11_staging_quad_buffer;
|
||||
D3D11_Vertex* pointer = head;
|
||||
u64 number_of_rendered_quads = 0;
|
||||
Draw_Quad_Block *block = &first_block;
|
||||
|
||||
tm_scope_cycles("Quad processing") {
|
||||
u64 block_index = 0;
|
||||
while (block != 0 && block->num_quads > 0 && block_index < draw_frame.num_blocks) tm_scope_cycles("Quad block") {
|
||||
for (u64 i = 0; i < block->num_quads; i++) {
|
||||
if (draw_frame.enable_z_sorting) tm_scope_cycles("Z sorting") {
|
||||
if (!sort_quad_buffer || (sort_quad_buffer_size < allocated_quads*sizeof(Draw_Quad))) {
|
||||
// #Memory #Heapalloc
|
||||
if (sort_quad_buffer) dealloc(get_heap_allocator(), sort_quad_buffer);
|
||||
sort_quad_buffer = alloc(get_heap_allocator(), allocated_quads*sizeof(Draw_Quad));
|
||||
sort_quad_buffer_size = allocated_quads*sizeof(Draw_Quad);
|
||||
}
|
||||
radix_sort(quad_buffer, sort_quad_buffer, draw_frame.num_quads, sizeof(Draw_Quad), offsetof(Draw_Quad, z), MAX_Z_BITS);
|
||||
}
|
||||
|
||||
for (u64 i = 0; i < draw_frame.num_quads; i++) {
|
||||
|
||||
Draw_Quad *q = &quad_buffer[i];
|
||||
|
||||
assert(q->z <= MAX_Z, "Z is too high. Z is %d, Max is %d.", q->z, MAX_Z);
|
||||
assert(q->z >= (-MAX_Z+1), "Z is too low. Z is %d, Min is %d.", q->z, -MAX_Z+1);
|
||||
|
||||
s8 texture_index = -1;
|
||||
|
||||
if (q->image) {
|
||||
|
||||
Draw_Quad *q = &block->quad_buffer[i];
|
||||
|
||||
s8 texture_index = -1;
|
||||
|
||||
if (q->image) {
|
||||
|
||||
if (last_texture == q->image->gfx_handle) {
|
||||
texture_index = last_texture_index;
|
||||
} else {
|
||||
// First look if texture is already bound
|
||||
for (u64 j = 0; j < num_textures; j++) {
|
||||
if (textures[j] == q->image->gfx_handle) {
|
||||
texture_index = (s8)j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Otherwise use a new slot
|
||||
if (texture_index <= -1) {
|
||||
if (num_textures >= 32) {
|
||||
// If max textures reached, make a draw call and start over
|
||||
D3D11_MAPPED_SUBRESOURCE buffer_mapping;
|
||||
VTABLE(Map, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0, D3D11_MAP_WRITE_DISCARD, 0, &buffer_mapping);
|
||||
memcpy(buffer_mapping.pData, d3d11_staging_quad_buffer, number_of_rendered_quads*sizeof(D3D11_Vertex)*6);
|
||||
VTABLE(Unmap, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0);
|
||||
d3d11_draw_call(number_of_rendered_quads, textures, num_textures);
|
||||
head = (D3D11_Vertex*)d3d11_staging_quad_buffer;
|
||||
num_textures = 0;
|
||||
texture_index = 0;
|
||||
number_of_rendered_quads = 0;
|
||||
pointer = head;
|
||||
} else {
|
||||
texture_index = (s8)num_textures;
|
||||
num_textures += 1;
|
||||
}
|
||||
if (last_texture == q->image->gfx_handle) {
|
||||
texture_index = last_texture_index;
|
||||
} else {
|
||||
// First look if texture is already bound
|
||||
for (u64 j = 0; j < num_textures; j++) {
|
||||
if (textures[j] == q->image->gfx_handle) {
|
||||
texture_index = (s8)j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Otherwise use a new slot
|
||||
if (texture_index <= -1) {
|
||||
if (num_textures >= 32) {
|
||||
// If max textures reached, make a draw call and start over
|
||||
D3D11_MAPPED_SUBRESOURCE buffer_mapping;
|
||||
VTABLE(Map, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0, D3D11_MAP_WRITE_DISCARD, 0, &buffer_mapping);
|
||||
memcpy(buffer_mapping.pData, d3d11_staging_quad_buffer, number_of_rendered_quads*sizeof(D3D11_Vertex)*6);
|
||||
VTABLE(Unmap, d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0);
|
||||
d3d11_draw_call(number_of_rendered_quads, textures, num_textures);
|
||||
head = (D3D11_Vertex*)d3d11_staging_quad_buffer;
|
||||
num_textures = 0;
|
||||
texture_index = 0;
|
||||
number_of_rendered_quads = 0;
|
||||
pointer = head;
|
||||
} else {
|
||||
texture_index = (s8)num_textures;
|
||||
num_textures += 1;
|
||||
}
|
||||
}
|
||||
textures[texture_index] = q->image->gfx_handle;
|
||||
last_texture = q->image->gfx_handle;
|
||||
last_texture_index = texture_index;
|
||||
}
|
||||
|
||||
if (q->type == QUAD_TYPE_TEXT) {
|
||||
float pixel_width = 2.0/(float)window.width;
|
||||
float pixel_height = 2.0/(float)window.height;
|
||||
|
||||
q->bottom_left.x = round(q->bottom_left.x / pixel_width) * pixel_width;
|
||||
q->bottom_left.y = round(q->bottom_left.y / pixel_height) * pixel_height;
|
||||
q->top_left.x = round(q->top_left.x / pixel_width) * pixel_width;
|
||||
q->top_left.y = round(q->top_left.y / pixel_height) * pixel_height;
|
||||
q->top_right.x = round(q->top_right.x / pixel_width) * pixel_width;
|
||||
q->top_right.y = round(q->top_right.y / pixel_height) * pixel_height;
|
||||
q->bottom_right.x = round(q->bottom_right.x / pixel_width) * pixel_width;
|
||||
q->bottom_right.y = round(q->bottom_right.y / pixel_height) * pixel_height;
|
||||
}
|
||||
|
||||
// We will write to 6 vertices for the one quad (two tris)
|
||||
{
|
||||
|
||||
D3D11_Vertex* BL = pointer + 0;
|
||||
D3D11_Vertex* TL = pointer + 1;
|
||||
D3D11_Vertex* TR = pointer + 2;
|
||||
D3D11_Vertex* BL2 = pointer + 3;
|
||||
D3D11_Vertex* TR2 = pointer + 4;
|
||||
D3D11_Vertex* BR = pointer + 5;
|
||||
pointer += 6;
|
||||
|
||||
BL->position = v4(q->bottom_left.x, q->bottom_left.y, 0, 1);
|
||||
TL->position = v4(q->top_left.x, q->top_left.y, 0, 1);
|
||||
TR->position = v4(q->top_right.x, q->top_right.y, 0, 1);
|
||||
BR->position = v4(q->bottom_right.x, q->bottom_right.y, 0, 1);
|
||||
|
||||
BL->uv = v2(q->uv.x1, q->uv.y1);
|
||||
TL->uv = v2(q->uv.x1, q->uv.y2);
|
||||
TR->uv = v2(q->uv.x2, q->uv.y2);
|
||||
BR->uv = v2(q->uv.x2, q->uv.y1);
|
||||
|
||||
BL->color = TL->color = TR->color = BR->color = q->color;
|
||||
|
||||
BL->texture_index=TL->texture_index=TR->texture_index=BR->texture_index = texture_index;
|
||||
BL->type=TL->type=TR->type=BR->type = (u8)q->type;
|
||||
|
||||
u8 sampler = -1;
|
||||
if (q->image_min_filter == GFX_FILTER_MODE_NEAREST
|
||||
&& q->image_mag_filter == GFX_FILTER_MODE_NEAREST)
|
||||
sampler = 0;
|
||||
if (q->image_min_filter == GFX_FILTER_MODE_LINEAR
|
||||
&& q->image_mag_filter == GFX_FILTER_MODE_LINEAR)
|
||||
sampler = 1;
|
||||
if (q->image_min_filter == GFX_FILTER_MODE_LINEAR
|
||||
&& q->image_mag_filter == GFX_FILTER_MODE_NEAREST)
|
||||
sampler = 2;
|
||||
if (q->image_min_filter == GFX_FILTER_MODE_NEAREST
|
||||
&& q->image_mag_filter == GFX_FILTER_MODE_LINEAR)
|
||||
sampler = 3;
|
||||
|
||||
BL->type=TL->type=TR->type=BR->type = (u8)q->type;
|
||||
BL->sampler=TL->sampler=TR->sampler=BR->sampler = (u8)sampler;
|
||||
|
||||
*BL2 = *BL;
|
||||
*TR2 = *TR;
|
||||
|
||||
number_of_rendered_quads += 1;
|
||||
}
|
||||
textures[texture_index] = q->image->gfx_handle;
|
||||
last_texture = q->image->gfx_handle;
|
||||
last_texture_index = texture_index;
|
||||
}
|
||||
|
||||
block_index += 1;
|
||||
block = block->next;
|
||||
if (q->type == QUAD_TYPE_TEXT) {
|
||||
float pixel_width = 2.0/(float)window.width;
|
||||
float pixel_height = 2.0/(float)window.height;
|
||||
|
||||
q->bottom_left.x = round(q->bottom_left.x / pixel_width) * pixel_width;
|
||||
q->bottom_left.y = round(q->bottom_left.y / pixel_height) * pixel_height;
|
||||
q->top_left.x = round(q->top_left.x / pixel_width) * pixel_width;
|
||||
q->top_left.y = round(q->top_left.y / pixel_height) * pixel_height;
|
||||
q->top_right.x = round(q->top_right.x / pixel_width) * pixel_width;
|
||||
q->top_right.y = round(q->top_right.y / pixel_height) * pixel_height;
|
||||
q->bottom_right.x = round(q->bottom_right.x / pixel_width) * pixel_width;
|
||||
q->bottom_right.y = round(q->bottom_right.y / pixel_height) * pixel_height;
|
||||
}
|
||||
|
||||
// We will write to 6 vertices for the one quad (two tris)
|
||||
{
|
||||
|
||||
D3D11_Vertex* BL = pointer + 0;
|
||||
D3D11_Vertex* TL = pointer + 1;
|
||||
D3D11_Vertex* TR = pointer + 2;
|
||||
D3D11_Vertex* BL2 = pointer + 3;
|
||||
D3D11_Vertex* TR2 = pointer + 4;
|
||||
D3D11_Vertex* BR = pointer + 5;
|
||||
pointer += 6;
|
||||
|
||||
BL->position = v4(q->bottom_left.x, q->bottom_left.y, 0, 1);
|
||||
TL->position = v4(q->top_left.x, q->top_left.y, 0, 1);
|
||||
TR->position = v4(q->top_right.x, q->top_right.y, 0, 1);
|
||||
BR->position = v4(q->bottom_right.x, q->bottom_right.y, 0, 1);
|
||||
|
||||
BL->uv = v2(q->uv.x1, q->uv.y1);
|
||||
TL->uv = v2(q->uv.x1, q->uv.y2);
|
||||
TR->uv = v2(q->uv.x2, q->uv.y2);
|
||||
BR->uv = v2(q->uv.x2, q->uv.y1);
|
||||
|
||||
BL->color = TL->color = TR->color = BR->color = q->color;
|
||||
|
||||
BL->texture_index=TL->texture_index=TR->texture_index=BR->texture_index = texture_index;
|
||||
BL->type=TL->type=TR->type=BR->type = (u8)q->type;
|
||||
|
||||
u8 sampler = -1;
|
||||
if (q->image_min_filter == GFX_FILTER_MODE_NEAREST
|
||||
&& q->image_mag_filter == GFX_FILTER_MODE_NEAREST)
|
||||
sampler = 0;
|
||||
if (q->image_min_filter == GFX_FILTER_MODE_LINEAR
|
||||
&& q->image_mag_filter == GFX_FILTER_MODE_LINEAR)
|
||||
sampler = 1;
|
||||
if (q->image_min_filter == GFX_FILTER_MODE_LINEAR
|
||||
&& q->image_mag_filter == GFX_FILTER_MODE_NEAREST)
|
||||
sampler = 2;
|
||||
if (q->image_min_filter == GFX_FILTER_MODE_NEAREST
|
||||
&& q->image_mag_filter == GFX_FILTER_MODE_LINEAR)
|
||||
sampler = 3;
|
||||
|
||||
BL->type=TL->type=TR->type=BR->type = (u8)q->type;
|
||||
BL->sampler=TL->sampler=TR->sampler=BR->sampler = (u8)sampler;
|
||||
|
||||
*BL2 = *BL;
|
||||
*TR2 = *TR;
|
||||
|
||||
number_of_rendered_quads += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -738,7 +746,9 @@ void gfx_update() {
|
|||
|
||||
d3d11_process_draw_frame();
|
||||
|
||||
VTABLE(Present, d3d11_swap_chain, window.enable_vsync, window.enable_vsync ? 0 : DXGI_PRESENT_ALLOW_TEARING);
|
||||
tm_scope_cycles("Present") {
|
||||
VTABLE(Present, d3d11_swap_chain, window.enable_vsync, window.enable_vsync ? 0 : DXGI_PRESENT_ALLOW_TEARING);
|
||||
}
|
||||
|
||||
|
||||
#if CONFIGURATION == DEBUG
|
||||
|
|
|
@ -1,80 +0,0 @@
|
|||
|
||||
// #Temporary #Cleanup
|
||||
// #Temporary #Cleanup
|
||||
// #Temporary #Cleanup
|
||||
// #Temporary #Cleanup
|
||||
#include "GL/gl.h"
|
||||
HDC hdc;
|
||||
typedef BOOL (APIENTRY *PFNWGLSWAPINTERVALEXTPROC) (int interval);
|
||||
|
||||
const Gfx_Handle GFX_INVALID_HANDLE = 0;
|
||||
|
||||
void gfx_init() {
|
||||
// #Temporary #Cleanup
|
||||
// #Temporary #Cleanup
|
||||
// #Temporary #Cleanup
|
||||
// #Temporary #Cleanup
|
||||
|
||||
PIXELFORMATDESCRIPTOR pfd = {
|
||||
sizeof(PIXELFORMATDESCRIPTOR),
|
||||
1,
|
||||
PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER,
|
||||
PFD_TYPE_RGBA,
|
||||
32,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
0, 0,
|
||||
0, 0, 0, 0, 0,
|
||||
24,
|
||||
8,
|
||||
0,
|
||||
PFD_MAIN_PLANE,
|
||||
0,
|
||||
0, 0, 0
|
||||
};
|
||||
|
||||
hdc = GetDC(window._os_handle);
|
||||
int pixelFormat = ChoosePixelFormat(hdc, &pfd);
|
||||
SetPixelFormat(hdc, pixelFormat, &pfd);
|
||||
|
||||
HGLRC hglrc = wglCreateContext(hdc);
|
||||
wglMakeCurrent(hdc, hglrc);
|
||||
|
||||
PFNWGLSWAPINTERVALEXTPROC wglSwapIntervalEXT = (PFNWGLSWAPINTERVALEXTPROC) wglGetProcAddress("wglSwapIntervalEXT");
|
||||
|
||||
assert(wglSwapIntervalEXT, "Could not load wglSwapIntervalEXT");
|
||||
|
||||
wglSwapIntervalEXT(0);
|
||||
|
||||
glEnable(GL_BLEND);
|
||||
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
|
||||
}
|
||||
|
||||
void gfx_update() {
|
||||
|
||||
Draw_Quad_Block *block = &draw_frame.first_block;
|
||||
glBegin(GL_QUADS);
|
||||
while (block != 0) {
|
||||
|
||||
for (u64 i = 0; i < block->num_quads; i++) {
|
||||
Draw_Quad q = block->quad_buffer[i];
|
||||
|
||||
|
||||
glColor4f(v4_expand(q.color));
|
||||
glVertex2f(v2_expand(q.bottom_left));
|
||||
glVertex2f(v2_expand(q.top_left));
|
||||
glVertex2f(v2_expand(q.top_right));
|
||||
glVertex2f(v2_expand(q.bottom_right));
|
||||
|
||||
}
|
||||
|
||||
block = block->next;
|
||||
}
|
||||
glEnd();
|
||||
|
||||
SwapBuffers(hdc);
|
||||
glClearColor(window.clear_color.r, window.clear_color.g, window.clear_color.b, window.clear_color.a);
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
glViewport(0, 0, window.width, window.height);
|
||||
|
||||
draw_frame = ZERO(Draw_Frame);
|
||||
}
|
|
@ -489,6 +489,9 @@ thread_local void * temporary_storage_pointer = 0;
|
|||
thread_local bool has_warned_temporary_storage_overflow = false;
|
||||
thread_local Allocator temp;
|
||||
|
||||
Allocator get_temporary_allocator() {
|
||||
return temp;
|
||||
}
|
||||
|
||||
void* temp_allocator_proc(u64 size, void *p, Allocator_Message message, void* data) {
|
||||
switch (message) {
|
||||
|
|
|
@ -107,7 +107,7 @@
|
|||
|
||||
#define OGB_VERSION_MAJOR 0
|
||||
#define OGB_VERSION_MINOR 0
|
||||
#define OGB_VERSION_PATCH 4
|
||||
#define OGB_VERSION_PATCH 5
|
||||
|
||||
#define OGB_VERSION (OGB_VERSION_MAJOR*1000000+OGB_VERSION_MINOR*1000+OGB_VERSION_PATCH)
|
||||
|
||||
|
@ -266,6 +266,7 @@ typedef u8 bool;
|
|||
#include "path_utils.c"
|
||||
#include "linmath.c"
|
||||
#include "range.c"
|
||||
#include "utility.c"
|
||||
|
||||
#include "hash_table.c"
|
||||
|
||||
|
@ -292,8 +293,6 @@ typedef u8 bool;
|
|||
#error "We only have a D3D11 renderer at the moment"
|
||||
#elif GFX_RENDERER == GFX_RENDERER_METAL
|
||||
#error "We only have a D3D11 renderer at the moment"
|
||||
#elif GFX_RENDERER == GFX_RENDERER_LEGACY_OPENGL
|
||||
#include "gfx_impl_legacy_opengl.c"
|
||||
#else
|
||||
#error "Unknown renderer GFX_RENDERER defined"
|
||||
#endif
|
||||
|
|
|
@ -139,7 +139,6 @@ void os_init(u64 program_memory_size) {
|
|||
|
||||
memset(&window, 0, sizeof(window));
|
||||
|
||||
timeBeginPeriod(1);
|
||||
#if CONFIGURATION == RELEASE
|
||||
SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS);
|
||||
#endif
|
||||
|
@ -364,7 +363,6 @@ bool os_grow_program_memory(u64 new_size) {
|
|||
|
||||
DWORD WINAPI win32_thread_invoker(LPVOID param) {
|
||||
|
||||
timeBeginPeriod(1);
|
||||
#if CONFIGURATION == RELEASE
|
||||
SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS);
|
||||
#endif
|
||||
|
@ -518,13 +516,15 @@ void os_high_precision_sleep(f64 ms) {
|
|||
s32 sleep_time = (s32)((end-start)-1.0);
|
||||
bool do_sleep = sleep_time >= 1;
|
||||
|
||||
timeBeginPeriod(1); // I don't see a reason to reset this
|
||||
timeBeginPeriod(1);
|
||||
|
||||
if (do_sleep) os_sleep(sleep_time);
|
||||
|
||||
while (os_get_current_time_in_seconds() < end) {
|
||||
os_yield_thread();
|
||||
}
|
||||
|
||||
timeEndPeriod(1);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ void _profiler_report_time_cycles(string name, u64 count, u64 start) {
|
|||
spinlock_acquire_or_wait(&_profiler_lock);
|
||||
|
||||
string fmt = STR("{\"cat\":\"function\",\"dur\":%.3f,\"name\":\"%s\",\"ph\":\"X\",\"pid\":0,\"tid\":%zu,\"ts\":%lld},");
|
||||
string_builder_print(&_profile_output, fmt, (float64)count*1000, name, GetCurrentThreadId(), start*1000);
|
||||
string_builder_print(&_profile_output, fmt, (float64)count*1000, name, context.thread_id, start*1000);
|
||||
|
||||
spinlock_release(&_profiler_lock);
|
||||
}
|
||||
|
|
|
@ -3,6 +3,15 @@
|
|||
// I know that we'll have a Range2i at some point, so maybe it's better to be explicit for less confusion?
|
||||
// I'll leave this decision up to u charlie just delete this whenever u see it
|
||||
|
||||
// charlie:
|
||||
// Is this range stuff really necessary?
|
||||
// Why not just:
|
||||
// typedef Vector2 Range1f;
|
||||
// typedef Vector4 Range2f;
|
||||
// Vector4 also already have alias for x1, y1, x2, y2 and we could add an alias for min & max vectors (see linmath.c)
|
||||
// This feels like introducing unnecessary complexity and vocabulary when it's really just
|
||||
// another way to say Vector2 and Vector4.
|
||||
|
||||
typedef struct Range1f {
|
||||
float min;
|
||||
float max;
|
||||
|
|
|
@ -1141,10 +1141,80 @@ void test_mutex() {
|
|||
|
||||
mutex_destroy(&data.mutex);
|
||||
}
|
||||
|
||||
int compare_draw_quads(const void *a, const void *b) {
|
||||
return ((Draw_Quad*)a)->z-((Draw_Quad*)b)->z;
|
||||
}
|
||||
void test_sort() {
|
||||
|
||||
int num_samples = 100;
|
||||
u64 id_bits = 21;
|
||||
u64 item_count = 5000;
|
||||
|
||||
f64 seconds = 0;
|
||||
u64 cycles = 0;
|
||||
|
||||
Draw_Quad *items = alloc(get_heap_allocator(), (item_count * 2) * sizeof(Draw_Quad));
|
||||
Draw_Quad *buffer = items + item_count;
|
||||
|
||||
for (int a = 0; a < num_samples; a++) {
|
||||
|
||||
for (u64 i = 0; i < item_count; i++) {
|
||||
if (i % 2 == 0) items[i].z = get_random_int_in_range(0, pow(2, id_bits) / 2);
|
||||
else items[i].z = i;
|
||||
}
|
||||
|
||||
u64 item_size = sizeof(Draw_Quad);
|
||||
u64 sort_value_offset_in_item = offsetof(Draw_Quad, z);
|
||||
|
||||
float64 start_seconds = os_get_current_time_in_seconds();
|
||||
u64 start_cycles = os_get_current_cycle_count();
|
||||
radix_sort(items, buffer, item_count, item_size, sort_value_offset_in_item, id_bits);
|
||||
u64 end_cycles = os_get_current_cycle_count();
|
||||
float64 end_seconds = os_get_current_time_in_seconds();
|
||||
|
||||
for (u64 i = 1; i < item_count; i++) {
|
||||
assert(items[i].z >= items[i-1].z, "Failed: not correctly sorted");
|
||||
}
|
||||
|
||||
seconds += end_seconds - start_seconds;
|
||||
cycles += end_cycles - start_cycles;
|
||||
}
|
||||
|
||||
print("Radix sort took on average %llu cycles and %.2f ms\n", cycles / num_samples, (seconds * 1000.0) / (float64)num_samples);
|
||||
|
||||
seconds = 0;
|
||||
cycles = 0;
|
||||
for (int a = 0; a < num_samples; a++) {
|
||||
|
||||
for (u64 i = 0; i < item_count; i++) {
|
||||
if (i % 2 == 0) items[i].z = get_random_int_in_range(0, pow(2, id_bits) / 2);
|
||||
else items[i].z = i;
|
||||
}
|
||||
|
||||
u64 item_size = sizeof(Draw_Quad);
|
||||
u64 sort_value_offset_in_item = offsetof(Draw_Quad, z);
|
||||
|
||||
float64 start_seconds = os_get_current_time_in_seconds();
|
||||
u64 start_cycles = os_get_current_cycle_count();
|
||||
merge_sort(items, buffer, item_count, item_size, compare_draw_quads);
|
||||
u64 end_cycles = os_get_current_cycle_count();
|
||||
float64 end_seconds = os_get_current_time_in_seconds();
|
||||
|
||||
for (u64 i = 1; i < item_count; i++) {
|
||||
assert(items[i].z >= items[i-1].z, "Failed: not correctly sorted");
|
||||
}
|
||||
|
||||
seconds += end_seconds - start_seconds;
|
||||
cycles += end_cycles - start_cycles;
|
||||
}
|
||||
|
||||
print("Merge sort took on average %llu cycles and %.2f ms\n", cycles / num_samples, (seconds * 1000.0) / (float64)num_samples);
|
||||
}
|
||||
void oogabooga_run_tests() {
|
||||
|
||||
|
||||
print("Testing allocator... ");
|
||||
/*print("Testing allocator... ");
|
||||
test_allocator(true);
|
||||
print("OK!\n");
|
||||
|
||||
|
@ -1178,7 +1248,9 @@ void oogabooga_run_tests() {
|
|||
|
||||
print("Testing mutex... ");
|
||||
test_mutex();
|
||||
print("OK!\n");*/
|
||||
|
||||
print("Testing radix sort... ");
|
||||
test_sort();
|
||||
print("OK!\n");
|
||||
|
||||
|
||||
}
|
103
oogabooga/utility.c
Normal file
103
oogabooga/utility.c
Normal file
|
@ -0,0 +1,103 @@
|
|||
|
||||
|
||||
|
||||
// This is a very niche sort algorithm.
|
||||
// I use it for Z sorting quads.
|
||||
// help_buffer should be same size as collection.
|
||||
// This only works with integers, and it will use the first number_of_bits in the integer
|
||||
// at sort_value_offset_in_item for sorting.
|
||||
// There is a cost of memory as we need to double the buffer we're sorting BUT the performance
|
||||
// gain is very promising.
|
||||
// At 21 bits I'm able to sort a completely randomized collection of 100k integers at around
|
||||
// 8m cycles (or 2.5-2.6ms on my shitty laptop i5-11300H)
|
||||
void radix_sort(void *collection, void *help_buffer, u64 item_count, u64 item_size, u64 sort_value_offset_in_item, u64 number_of_bits) {
|
||||
|
||||
local_persist const int RADIX = 256;
|
||||
local_persist const int BITS_PER_PASS = 8;
|
||||
local_persist const int MASK = (RADIX - 1);
|
||||
|
||||
const int PASS_COUNT = ((number_of_bits + BITS_PER_PASS - 1) / BITS_PER_PASS);
|
||||
const u64 SIGN_SHIFT = 1ULL << (number_of_bits - 1);
|
||||
|
||||
u64* count = (u64*)alloc(get_temporary_allocator(), RADIX * sizeof(u64));
|
||||
u8* items = (u8*)collection;
|
||||
u8* buffer = (u8*)help_buffer;
|
||||
|
||||
for (u32 pass = 0; pass < PASS_COUNT; ++pass) {
|
||||
u32 shift = pass * BITS_PER_PASS;
|
||||
|
||||
for (u32 i = 0; i < RADIX; ++i) {
|
||||
count[i] = 0;
|
||||
}
|
||||
|
||||
for (u64 i = 0; i < item_count; ++i) {
|
||||
u64 sort_value = *(u64*)(items + i * item_size + sort_value_offset_in_item);
|
||||
sort_value += SIGN_SHIFT; // Transform the value to handle negative numbers
|
||||
u32 digit = (sort_value >> shift) & MASK;
|
||||
++count[digit];
|
||||
}
|
||||
|
||||
u64 sum = 0;
|
||||
for (u32 i = 0; i < RADIX; ++i) {
|
||||
u64 temp = count[i];
|
||||
count[i] = sum;
|
||||
sum += temp;
|
||||
}
|
||||
|
||||
for (u64 i = 0; i < item_count; ++i) {
|
||||
u64 sort_value = *(u64*)(items + i * item_size + sort_value_offset_in_item);
|
||||
u64 transformed_value = sort_value + SIGN_SHIFT; // Transform the value to handle negative numbers
|
||||
u32 digit = (transformed_value >> shift) & MASK;
|
||||
memcpy(buffer + count[digit] * item_size, items + i * item_size, item_size);
|
||||
++count[digit];
|
||||
}
|
||||
|
||||
memcpy(items, buffer, item_count * item_size);
|
||||
}
|
||||
|
||||
dealloc(get_temporary_allocator(), count);
|
||||
}
|
||||
|
||||
void merge_sort(void *collection, void *help_buffer, u64 item_count, u64 item_size, int (*compare)(const void *, const void *)) {
|
||||
u8 *items = (u8 *)collection;
|
||||
u8 *buffer = (u8 *)help_buffer;
|
||||
|
||||
for (u64 width = 1; width < item_count; width *= 2) {
|
||||
for (u64 i = 0; i < item_count; i += 2 * width) {
|
||||
u64 left = i;
|
||||
u64 right = (i + width < item_count) ? (i + width) : item_count;
|
||||
u64 end = (i + 2 * width < item_count) ? (i + 2 * width) : item_count;
|
||||
|
||||
u64 left_index = left;
|
||||
u64 right_index = right;
|
||||
u64 k = left;
|
||||
|
||||
while (left_index < right && right_index < end) {
|
||||
if (compare(items + left_index * item_size, items + right_index * item_size) <= 0) {
|
||||
memcpy(buffer + k * item_size, items + left_index * item_size, item_size);
|
||||
left_index++;
|
||||
} else {
|
||||
memcpy(buffer + k * item_size, items + right_index * item_size, item_size);
|
||||
right_index++;
|
||||
}
|
||||
k++;
|
||||
}
|
||||
|
||||
while (left_index < right) {
|
||||
memcpy(buffer + k * item_size, items + left_index * item_size, item_size);
|
||||
left_index++;
|
||||
k++;
|
||||
}
|
||||
|
||||
while (right_index < end) {
|
||||
memcpy(buffer + k * item_size, items + right_index * item_size, item_size);
|
||||
right_index++;
|
||||
k++;
|
||||
}
|
||||
|
||||
for (u64 j = left; j < end; j++) {
|
||||
memcpy(items + j * item_size, buffer + j * item_size, item_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Reference in a new issue