diff --git a/TODO b/TODO index b9a5e1a..db313f5 100644 --- a/TODO +++ b/TODO @@ -1,5 +1,8 @@ + - Gamepad +- Compile with msys2, cygwin, mingw64 + - Audio - Allow audio programming - Inject mixer proc per player diff --git a/build.c b/build.c index 9e16373..f398886 100644 --- a/build.c +++ b/build.c @@ -33,11 +33,11 @@ typedef struct Context_Extra { // // This is a minimal starting point for new projects. Copy & rename to get started -// #include "oogabooga/examples/minimal_game_loop.c" +#include "oogabooga/examples/minimal_game_loop.c" // #include "oogabooga/examples/text_rendering.c" // #include "oogabooga/examples/custom_logger.c" -#include "oogabooga/examples/renderer_stress_test.c" +// #include "oogabooga/examples/renderer_stress_test.c" // #include "oogabooga/examples/tile_game.c" // #include "oogabooga/examples/audio_test.c" // #include "oogabooga/examples/custom_shader.c" diff --git a/changelog.txt b/changelog.txt index 2e5c059..858e59a 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,8 @@ +## v0.01.004 + - Misc + - Reworked os_get_current_time_in_seconds() -> os_get_elapsed_seconds() + Now returns seconds sincs app init. + - draw_frame.view -> draw_frame.camera_xform (deprecated .view) ## v0.01.003 - Mouse pointers, Audio improvement & features, bug fixes - Os layer diff --git a/oogabooga/concurrency.c b/oogabooga/concurrency.c index f0a7f94..fb541b1 100644 --- a/oogabooga/concurrency.c +++ b/oogabooga/concurrency.c @@ -96,7 +96,7 @@ void spinlock_acquire_or_wait(Spinlock* l) { } // Returns true on aquired, false if timeout seconds reached bool spinlock_acquire_or_wait_timeout(Spinlock* l, f64 timeout_seconds) { - f64 start = os_get_current_time_in_seconds(); + f64 start = os_get_elapsed_seconds(); while (true) { bool expected = false; if (compare_and_swap_bool(&l->locked, true, expected)) { @@ -104,7 +104,7 @@ bool spinlock_acquire_or_wait_timeout(Spinlock* l, f64 timeout_seconds) { } while (l->locked) { // spinny boi - if ((os_get_current_time_in_seconds()-start) >= timeout_seconds) return false; + if ((os_get_elapsed_seconds()-start) >= timeout_seconds) return false; } } return true; diff --git a/oogabooga/drawing.c b/oogabooga/drawing.c index 239ebe7..60a7bca 100644 --- a/oogabooga/drawing.c +++ b/oogabooga/drawing.c @@ -48,43 +48,52 @@ typedef struct Draw_Quad { typedef struct Draw_Frame { - u64 num_quads; - Matrix4 projection; - Matrix4 view; - - bool enable_z_sorting; - s32 z_stack[Z_STACK_MAX]; - u64 z_count; - - Vector4 scissor_stack[SCISSOR_STACK_MAX]; - u64 scissor_count; + union { + DEPRECATED(Matrix4 view, "Use draw_frame.camera_xform instead"); + Matrix4 camera_xform; + }; void *cbuffer; + u64 scissor_count; + Vector4 scissor_stack[SCISSOR_STACK_MAX]; + + Draw_Quad *quad_buffer; + + u64 z_count; + s32 z_stack[Z_STACK_MAX]; + bool enable_z_sorting; + } Draw_Frame; -// #Cleanup this should be in Draw_Frame -// #Global -ogb_instance Draw_Quad *quad_buffer; -ogb_instance u64 allocated_quads; // This frame is passed to the platform layer and rendered in os_update. // Resets every frame. -ogb_instance Draw_Frame draw_frame; +ogb_instance Draw_Frame draw_frame = {0}; #if !OOGABOOGA_LINK_EXTERNAL_INSTANCE -Draw_Quad *quad_buffer; -u64 allocated_quads; -Draw_Frame draw_frame = ZERO(Draw_Frame); +Draw_Frame draw_frame; #endif // NOT OOGABOOGA_LINK_EXTERNAL_INSTANCE void reset_draw_frame(Draw_Frame *frame) { + + // #Memory + // I would like to try to have the quad buffer to be allocated in a growing arena + // which is reset every frames, like temp allocator but large enough to fit the + // highest number of quads the program submits in a frame. + // For now, we just reset the count in the heap allocated buffer + + Draw_Quad *quad_buffer = frame->quad_buffer; + if (quad_buffer) growing_array_clear((void**)&quad_buffer); + *frame = (Draw_Frame){0}; + frame->quad_buffer = quad_buffer; + float32 aspect = (float32)window.width/(float32)window.height; frame->projection = m4_make_orthographic_projection(-aspect, aspect, -1, 1, -1, 10); - frame->view = m4_scalar(1.0); + frame->camera_xform = m4_scalar(1.0); } void push_z_layer(s32 z) { @@ -141,35 +150,26 @@ Draw_Quad *draw_quad_projected(Draw_Quad quad, Matrix4 world_to_clip) { memset(quad.userdata, 0, sizeof(quad.userdata)); - if (draw_frame.num_quads >= allocated_quads) { + if (!draw_frame.quad_buffer) { // #Memory - - u64 new_count = max(get_next_power_of_two(draw_frame.num_quads+1), 128); - - Draw_Quad *new_buffer = alloc(get_heap_allocator(), new_count*sizeof(Draw_Quad)); - - if (quad_buffer) { - memcpy(new_buffer, quad_buffer, draw_frame.num_quads*sizeof(Draw_Quad)); - dealloc(get_heap_allocator(), quad_buffer); - } - - quad_buffer = new_buffer; - allocated_quads = new_count; + // Use an arena + growing_array_init((void**)&draw_frame.quad_buffer, sizeof(Draw_Quad), get_heap_allocator()); } - quad_buffer[draw_frame.num_quads] = quad; - draw_frame.num_quads += 1; + Draw_Quad **target_buffer = &draw_frame.quad_buffer; - return &quad_buffer[draw_frame.num_quads-1]; + growing_array_add((void**)target_buffer, &quad); + + return &(*target_buffer)[growing_array_get_valid_count(*target_buffer)-1]; } Draw_Quad *draw_quad(Draw_Quad quad) { - return draw_quad_projected(quad, m4_mul(draw_frame.projection, m4_inverse(draw_frame.view))); + return draw_quad_projected(quad, m4_mul(draw_frame.projection, m4_inverse(draw_frame.camera_xform))); } Draw_Quad *draw_quad_xform(Draw_Quad quad, Matrix4 xform) { Matrix4 world_to_clip = m4_scalar(1.0); world_to_clip = m4_mul(world_to_clip, draw_frame.projection); - world_to_clip = m4_mul(world_to_clip, m4_inverse(draw_frame.view)); + world_to_clip = m4_mul(world_to_clip, m4_inverse(draw_frame.camera_xform)); world_to_clip = m4_mul(world_to_clip, xform); return draw_quad_projected(quad, world_to_clip); } diff --git a/oogabooga/examples/custom_shader.c b/oogabooga/examples/custom_shader.c index 5784cf6..fcfa627 100644 --- a/oogabooga/examples/custom_shader.c +++ b/oogabooga/examples/custom_shader.c @@ -44,10 +44,10 @@ int entry(int argc, char **argv) { // memory from an invalid address. My_Cbuffer cbuffer; - float64 last_time = os_get_current_time_in_seconds(); + float64 last_time = os_get_elapsed_seconds(); while (!window.should_close) { - float64 now = os_get_current_time_in_seconds(); + float64 now = os_get_elapsed_seconds(); if ((int)now != (int)last_time) { log("%.2f FPS\n%.2fms", 1.0/(now-last_time), (now-last_time)*1000); } @@ -88,8 +88,8 @@ int entry(int argc, char **argv) { } Vector2 world_to_screen(Vector2 p) { - Vector4 in_view_space = m4_transform(draw_frame.view, v4(p.x, p.y, 0.0, 1.0)); - Vector4 in_clip_space = m4_transform(draw_frame.projection, in_view_space); + Vector4 in_cam_space = m4_transform(draw_frame.camera_xform, v4(p.x, p.y, 0.0, 1.0)); + Vector4 in_clip_space = m4_transform(draw_frame.projection, in_cam_space); Vector4 ndc = { .x = in_clip_space.x / in_clip_space.w, diff --git a/oogabooga/examples/growing_array_example.c b/oogabooga/examples/growing_array_example.c index 5ec001a..b7eb306 100644 --- a/oogabooga/examples/growing_array_example.c +++ b/oogabooga/examples/growing_array_example.c @@ -35,9 +35,9 @@ int entry(int argc, char **argv) { assert(circles[i].pos.y == c.pos.y); } - float64 last_time = os_get_current_time_in_seconds(); + float64 last_time = os_get_elapsed_seconds(); while (!window.should_close) { - float64 now = os_get_current_time_in_seconds(); + float64 now = os_get_elapsed_seconds(); if ((int)now != (int)last_time) log("%.2f FPS\n%.2fms", 1.0/(now-last_time), (now-last_time)*1000); last_time = now; diff --git a/oogabooga/examples/hotload/build_engine.c b/oogabooga/examples/hotload/build_engine.c index c88a6ce..7077627 100644 --- a/oogabooga/examples/hotload/build_engine.c +++ b/oogabooga/examples/hotload/build_engine.c @@ -55,9 +55,9 @@ int entry(int argc, char **argv) { window.y = 90; window.clear_color = hex_to_rgba(0x6495EDff); - float64 last_time = os_get_current_time_in_seconds(); + float64 last_time = os_get_elapsed_seconds(); while (!window.should_close) { - float64 now = os_get_current_time_in_seconds(); + float64 now = os_get_elapsed_seconds(); float64 delta = now-last_time; if ((int)now != (int)last_time) log("%.2f FPS\n%.2fms", 1.0/(delta), (delta)*1000); last_time = now; diff --git a/oogabooga/examples/hotload/build_game.c b/oogabooga/examples/hotload/build_game.c index 266b911..8ce1c92 100644 --- a/oogabooga/examples/hotload/build_game.c +++ b/oogabooga/examples/hotload/build_game.c @@ -13,7 +13,7 @@ void SHARED_EXPORT game_update(f64 delta_time) { - float64 now = os_get_current_time_in_seconds(); + float64 now = os_get_elapsed_seconds(); Matrix4 rect_xform = m4_scalar(1.0); rect_xform = m4_rotate_z(rect_xform, (f32)now); diff --git a/oogabooga/examples/minimal_game_loop.c b/oogabooga/examples/minimal_game_loop.c index 848e04e..672ca73 100644 --- a/oogabooga/examples/minimal_game_loop.c +++ b/oogabooga/examples/minimal_game_loop.c @@ -8,9 +8,9 @@ int entry(int argc, char **argv) { window.y = 90; window.clear_color = hex_to_rgba(0x6495EDff); - float64 last_time = os_get_current_time_in_seconds(); + float64 last_time = os_get_elapsed_seconds(); while (!window.should_close) { - float64 now = os_get_current_time_in_seconds(); + float64 now = os_get_elapsed_seconds(); if ((int)now != (int)last_time) log("%.2f FPS\n%.2fms", 1.0/(now-last_time), (now-last_time)*1000); last_time = now; diff --git a/oogabooga/examples/renderer_stress_test.c b/oogabooga/examples/renderer_stress_test.c index 1e773dc..f7a2a9e 100644 --- a/oogabooga/examples/renderer_stress_test.c +++ b/oogabooga/examples/renderer_stress_test.c @@ -38,17 +38,17 @@ int entry(int argc, char **argv) { const float64 fps_limit = 69000; const float64 min_frametime = 1.0 / fps_limit; - Matrix4 camera_view = m4_scalar(1.0); + Matrix4 camera_xform = m4_scalar(1.0); - float64 last_time = os_get_current_time_in_seconds(); + float64 last_time = os_get_elapsed_seconds(); while (!window.should_close) tm_scope("Frame") { reset_temporary_storage(); - float64 now = os_get_current_time_in_seconds(); + float64 now = os_get_elapsed_seconds(); float64 delta = now - last_time; if (delta < min_frametime) { os_high_precision_sleep((min_frametime-delta)*1000.0); - now = os_get_current_time_in_seconds(); + now = os_get_elapsed_seconds(); delta = now - last_time; } last_time = now; @@ -89,8 +89,8 @@ int entry(int argc, char **argv) { } Vector2 cam_move = v2_mulf(cam_move_axis, delta * cam_move_speed); - camera_view = m4_translate(camera_view, v3(v2_expand(cam_move), 0)); - draw_frame.view = camera_view; + camera_xform = m4_translate(camera_xform, v3(v2_expand(cam_move), 0)); + draw_frame.camera_xform = camera_xform; local_persist bool do_enable_z_sorting = false; draw_frame.enable_z_sorting = do_enable_z_sorting; @@ -104,7 +104,7 @@ int entry(int argc, char **argv) { } seed_for_random = 69; - for (u64 i = 0; i < 30000; i++) { + for (u64 i = 0; i < 2; i++) { float32 aspect = (float32)window.width/(float32)window.height; float min_x = -aspect; float max_x = aspect; @@ -120,6 +120,16 @@ int entry(int argc, char **argv) { } seed_for_random = rdtsc(); + + draw_image(bush_image, v2(0.65, 0.65), v2(0.2*sin(now), 0.2*sin(now)), COLOR_WHITE); + + u32 atlas_index = 0; + Gfx_Font_Atlas *atlas = (Gfx_Font_Atlas*)hash_table_find(&font->variations[32].atlases, atlas_index); + + draw_text(font, STR("I am text"), 128, v2(sin(now), -0.61), v2(0.001, 0.001), COLOR_BLACK); + draw_text(font, STR("I am text"), 128, v2(sin(now)-0.01, -0.6), v2(0.001, 0.001), COLOR_WHITE); + + draw_text(font, STR("Hello jje\nnew line"), 128, v2(-1, 0.5), v2(0.001, 0.001), COLOR_WHITE); Matrix4 hammer_xform = m4_scalar(1.0); hammer_xform = m4_rotate_z(hammer_xform, (f32)now); hammer_xform = m4_translate(hammer_xform, v3(-.25f, -.25f, 0)); @@ -132,16 +142,6 @@ int entry(int argc, char **argv) { Vector2 local_pivot = v2(.125f, .125f); draw_circle(v2_sub(hover_position, local_pivot), v2(.25f, .25f), v4((sin(now)+1.0)/2.0, 1.0, 0.0, 1.0)); - draw_image(bush_image, v2(0.65, 0.65), v2(0.2*sin(now), 0.2*sin(now)), COLOR_WHITE); - - u32 atlas_index = 0; - Gfx_Font_Atlas *atlas = (Gfx_Font_Atlas*)hash_table_find(&font->variations[32].atlases, atlas_index); - - draw_text(font, STR("I am text"), 128, v2(sin(now), -0.61), v2(0.001, 0.001), COLOR_BLACK); - draw_text(font, STR("I am text"), 128, v2(sin(now)-0.01, -0.6), v2(0.001, 0.001), COLOR_WHITE); - - draw_text(font, STR("Hello jje\nnew line"), 128, v2(-1, 0.5), v2(0.001, 0.001), COLOR_WHITE); - local_persist bool show = false; if (is_key_just_pressed('T')) show = !show; diff --git a/oogabooga/examples/text_rendering.c b/oogabooga/examples/text_rendering.c index 10c9478..3897443 100644 --- a/oogabooga/examples/text_rendering.c +++ b/oogabooga/examples/text_rendering.c @@ -29,7 +29,7 @@ int entry(int argc, char **argv) { draw_text(font, STR("I am text"), font_height, v2(-2, 2), v2(1, 1), COLOR_BLACK); draw_text(font, STR("I am text"), font_height, v2(0, 0), v2(1, 1), COLOR_WHITE); - float now = (float)os_get_current_time_in_seconds(); + float now = (float)os_get_elapsed_seconds(); float animated_x = sin(now*0.1)*(window.width*0.5); // UTF-8 ! diff --git a/oogabooga/examples/tile_game.c b/oogabooga/examples/tile_game.c index 4c18afe..bbc1ff7 100644 --- a/oogabooga/examples/tile_game.c +++ b/oogabooga/examples/tile_game.c @@ -42,7 +42,7 @@ App_State app_state = APP_STATE_EDITING; s64 current_tile_layer = 0; -Matrix4 camera_view; +Matrix4 camera_xform; void update_editor(); void update_game(); @@ -56,16 +56,16 @@ int entry(int argc, char **argv) { window.y = 90; window.clear_color = hex_to_rgba(0x6495EDff); - camera_view = m4_scalar(1.0); + camera_xform = m4_scalar(1.0); - float64 last_time = os_get_current_time_in_seconds(); + float64 last_time = os_get_elapsed_seconds(); while (!window.should_close) { reset_temporary_storage(); draw_frame.projection = m4_make_orthographic_projection(window.pixel_width * -0.5, window.pixel_width * 0.5, window.pixel_height * -0.5, window.pixel_height * 0.5, -1, 10); draw_frame.enable_z_sorting = true; - float64 now = os_get_current_time_in_seconds(); + float64 now = os_get_elapsed_seconds(); delta_time = (float32)(now - last_time); last_time = now; @@ -84,7 +84,7 @@ int entry(int argc, char **argv) { Vector2 screen_to_world(Vector2 screen) { Matrix4 proj = draw_frame.projection; - Matrix4 view = draw_frame.view; + Matrix4 cam = draw_frame.camera_xform; float window_w = window.width; float window_h = window.height; @@ -95,7 +95,7 @@ Vector2 screen_to_world(Vector2 screen) { // Transform to world coordinates Vector4 world_pos = v4(ndc_x, ndc_y, 0, 1); world_pos = m4_transform(m4_inverse(proj), world_pos); - world_pos = m4_transform(view, world_pos); + world_pos = m4_transform(cam, world_pos); return world_pos.xy; } @@ -122,8 +122,8 @@ void update_editor() { } Vector2 cam_move = v2_mulf(cam_move_axis, delta_time * cam_move_speed); - camera_view = m4_translate(camera_view, v3(v2_expand(cam_move), 0)); - draw_frame.view = camera_view; + camera_xform = m4_translate(camera_xform, v3(v2_expand(cam_move), 0)); + draw_frame.camera_xform = camera_xform; Vector2 bottom_left = screen_to_world(v2(-window.width/2, -window.height/2)); Vector2 top_right = screen_to_world(v2( window.width/2, window.height/2)); diff --git a/oogabooga/gfx_impl_d3d11.c b/oogabooga/gfx_impl_d3d11.c index ab27362..aa9a534 100644 --- a/oogabooga/gfx_impl_d3d11.c +++ b/oogabooga/gfx_impl_d3d11.c @@ -610,23 +610,26 @@ void d3d11_process_draw_frame() { ID3D11DeviceContext_ClearRenderTargetView(d3d11_context, d3d11_window_render_target_view, (float*)&window.clear_color); + u64 number_of_quads = growing_array_get_valid_count(draw_frame.quad_buffer); + /// // Maybe grow quad vbo - u32 required_size = sizeof(D3D11_Vertex) * allocated_quads*6; + u64 required_size = sizeof(D3D11_Vertex) * number_of_quads*6; if (required_size > d3d11_quad_vbo_size) { if (d3d11_quad_vbo) { D3D11Release(d3d11_quad_vbo); dealloc(get_heap_allocator(), d3d11_staging_quad_buffer); } + u64 new_size = get_next_power_of_two(required_size); D3D11_BUFFER_DESC desc = ZERO(D3D11_BUFFER_DESC); desc.Usage = D3D11_USAGE_DYNAMIC; desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - desc.ByteWidth = required_size; + desc.ByteWidth = new_size; desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; HRESULT hr = ID3D11Device_CreateBuffer(d3d11_device, &desc, 0, &d3d11_quad_vbo); assert(SUCCEEDED(hr), "CreateBuffer failed"); - d3d11_quad_vbo_size = required_size; + d3d11_quad_vbo_size = new_size; d3d11_staging_quad_buffer = alloc(get_heap_allocator(), d3d11_quad_vbo_size); assert((u64)d3d11_staging_quad_buffer%16 == 0); @@ -634,7 +637,7 @@ void d3d11_process_draw_frame() { log_verbose("Grew quad vbo to %d bytes.", d3d11_quad_vbo_size); } - if (draw_frame.num_quads > 0) { + if (number_of_quads > 0) { /// // Render geometry from into vbo quad list @@ -648,20 +651,22 @@ void d3d11_process_draw_frame() { D3D11_Vertex* pointer = head; u64 number_of_rendered_quads = 0; + + tm_scope("Quad processing") { if (draw_frame.enable_z_sorting) tm_scope("Z sorting") { - if (!sort_quad_buffer || (sort_quad_buffer_size < allocated_quads*sizeof(Draw_Quad))) { + if (!sort_quad_buffer || (sort_quad_buffer_size < number_of_quads*sizeof(Draw_Quad))) { // #Memory #Heapalloc if (sort_quad_buffer) dealloc(get_heap_allocator(), sort_quad_buffer); - sort_quad_buffer = alloc(get_heap_allocator(), allocated_quads*sizeof(Draw_Quad)); - sort_quad_buffer_size = allocated_quads*sizeof(Draw_Quad); + sort_quad_buffer = alloc(get_heap_allocator(), number_of_quads*sizeof(Draw_Quad)); + sort_quad_buffer_size = number_of_quads*sizeof(Draw_Quad); } - radix_sort(quad_buffer, sort_quad_buffer, draw_frame.num_quads, sizeof(Draw_Quad), offsetof(Draw_Quad, z), MAX_Z_BITS); + radix_sort(draw_frame.quad_buffer, sort_quad_buffer, number_of_quads, sizeof(Draw_Quad), offsetof(Draw_Quad, z), MAX_Z_BITS); } - for (u64 i = 0; i < draw_frame.num_quads; i++) { + for (u64 i = 0; i < number_of_quads; i++) { - Draw_Quad *q = &quad_buffer[i]; + Draw_Quad *q = &draw_frame.quad_buffer[i]; assert(q->z <= MAX_Z, "Z is too high. Z is %d, Max is %d.", q->z, MAX_Z); assert(q->z >= (-MAX_Z+1), "Z is too low. Z is %d, Min is %d.", q->z, -MAX_Z+1); @@ -705,32 +710,27 @@ void d3d11_process_draw_frame() { last_texture_index = texture_index; } - if (q->type == QUAD_TYPE_TEXT) { - - // This is meant to fix the annoying artifacts that shows up when sampling text from an atlas - // presumably for floating point precision issues or something. - - // #Incomplete - // If we want to animate text with small movements then it will look wonky. - // This should be optional probably. - // Also, we might want to do this on non-text if rendering with linear filtering - // from a large texture atlas. - - float pixel_width = 2.0/(float)window.width; - float pixel_height = 2.0/(float)window.height; + // This is meant to fix the annoying artifacts that shows up when sampling from a large atlas + // presumably for floating point precision issues or something. + + // #Incomplete + // If we want to animate text with small movements then it will look wonky. + // This should be optional probably. + + float pixel_width = 2.0/(float)window.width; + float pixel_height = 2.0/(float)window.height; - bool xeven = window.width % 2 == 0; - bool yeven = window.height % 2 == 0; - - q->bottom_left.x = round(q->bottom_left.x / pixel_width) * pixel_width; - q->bottom_left.y = round(q->bottom_left.y / pixel_height) * pixel_height; - q->top_left.x = round(q->top_left.x / pixel_width) * pixel_width; - q->top_left.y = round(q->top_left.y / pixel_height) * pixel_height; - q->top_right.x = round(q->top_right.x / pixel_width) * pixel_width; - q->top_right.y = round(q->top_right.y / pixel_height) * pixel_height; - q->bottom_right.x = round(q->bottom_right.x / pixel_width) * pixel_width; - q->bottom_right.y = round(q->bottom_right.y / pixel_height) * pixel_height; - } + bool xeven = window.width % 2 == 0; + bool yeven = window.height % 2 == 0; + + q->bottom_left.x = round(q->bottom_left.x / pixel_width) * pixel_width; + q->bottom_left.y = round(q->bottom_left.y / pixel_height) * pixel_height; + q->top_left.x = round(q->top_left.x / pixel_width) * pixel_width; + q->top_left.y = round(q->top_left.y / pixel_height) * pixel_height; + q->top_right.x = round(q->top_right.x / pixel_width) * pixel_width; + q->top_right.y = round(q->top_right.y / pixel_height) * pixel_height; + q->bottom_right.x = round(q->bottom_right.x / pixel_width) * pixel_width; + q->bottom_right.y = round(q->bottom_right.y / pixel_height) * pixel_height; // We will write to 6 vertices for the one quad (two tris) { diff --git a/oogabooga/oogabooga.c b/oogabooga/oogabooga.c index 3abfcf3..964aba0 100644 --- a/oogabooga/oogabooga.c +++ b/oogabooga/oogabooga.c @@ -118,7 +118,7 @@ #define OGB_VERSION_MAJOR 0 #define OGB_VERSION_MINOR 1 -#define OGB_VERSION_PATCH 3 +#define OGB_VERSION_PATCH 4 #define OGB_VERSION (OGB_VERSION_MAJOR*1000000+OGB_VERSION_MINOR*1000+OGB_VERSION_PATCH) diff --git a/oogabooga/os_impl_windows.c b/oogabooga/os_impl_windows.c index a949274..f405133 100644 --- a/oogabooga/os_impl_windows.c +++ b/oogabooga/os_impl_windows.c @@ -108,6 +108,7 @@ bool win32_want_override_mouse_pointer = false; HCURSOR win32_shadowed_mouse_pointer = 0; bool win32_did_override_user_mouse_pointer = false; SYSTEM_INFO win32_system_info; +LARGE_INTEGER win32_counter_at_start; #ifndef OOGABOOGA_HEADLESS @@ -391,7 +392,7 @@ void os_init(u64 program_memory_capacity) { heap_init(); - + QueryPerformanceCounter(&win32_counter_at_start); #ifndef OOGABOOGA_HEADLESS win32_init_window(); @@ -604,7 +605,7 @@ void os_high_precision_sleep(f64 ms) { const f64 s = ms/1000.0; - f64 start = os_get_current_time_in_seconds(); + f64 start = os_get_elapsed_seconds(); f64 end = start + (f64)s; s32 sleep_time = (s32)((end-start)-1.0); bool do_sleep = sleep_time >= 1; @@ -613,7 +614,7 @@ void os_high_precision_sleep(f64 ms) { if (do_sleep) os_sleep(sleep_time); - while (os_get_current_time_in_seconds() < end) { + while (os_get_elapsed_seconds() < end) { os_yield_thread(); } @@ -627,16 +628,22 @@ void os_high_precision_sleep(f64 ms) { /// -u64 os_get_current_cycle_count() { - return rdtsc(); -} - -float64 os_get_current_time_in_seconds() { +// #Cleanup deprecated +float64 +os_get_current_time_in_seconds() { LARGE_INTEGER frequency, counter; if (!QueryPerformanceFrequency(&frequency) || !QueryPerformanceCounter(&counter)) { return -1.0; } - return (double)counter.QuadPart / (double)frequency.QuadPart; + return (float64)counter.QuadPart / (float64)frequency.QuadPart; +} + +float64 +os_get_elapsed_seconds() { + LARGE_INTEGER freq, counter = (LARGE_INTEGER){0}; + QueryPerformanceFrequency(&freq); + QueryPerformanceCounter(&counter); + return (float64)(counter.QuadPart-win32_counter_at_start.QuadPart) / (float64)freq.QuadPart; } diff --git a/oogabooga/os_interface.c b/oogabooga/os_interface.c index 9d5817c..33bb936 100644 --- a/oogabooga/os_interface.c +++ b/oogabooga/os_interface.c @@ -163,10 +163,12 @@ os_high_precision_sleep(f64 ms); // Time /// -DEPRECATED(u64 os_get_current_cycle_count(), "use rdtsc() instead"); +float64 ogb_instance +DEPRECATED(os_get_current_time_in_seconds(), "Use os_get_elapsed_seconds() instead"); float64 ogb_instance -os_get_current_time_in_seconds(); +os_get_elapsed_seconds(); + /// /// diff --git a/oogabooga/random.c b/oogabooga/random.c index 012deee..fdd74f8 100644 --- a/oogabooga/random.c +++ b/oogabooga/random.c @@ -6,7 +6,7 @@ #define INCREMENT 1442695040888963407ull // #Global -// set this to something like os_get_current_cycle_count() for very randomized seed +// set this to something like rtdsc() for very randomized seed ogb_instance u64 seed_for_random; #if !OOGABOOGA_LINK_EXTERNAL_INSTANCE diff --git a/oogabooga/tests.c b/oogabooga/tests.c index e55c7ef..e7a9e3a 100644 --- a/oogabooga/tests.c +++ b/oogabooga/tests.c @@ -1208,7 +1208,7 @@ void test_mutex() { Allocator allocator = get_heap_allocator(); - const int num_threads = 1000; + const int num_threads = 100; Thread *threads = alloc(allocator, sizeof(Thread)*num_threads); for (u64 i = 0; i < num_threads; i++) { @@ -1233,9 +1233,9 @@ int compare_draw_quads(const void *a, const void *b) { } void test_sort() { - int num_samples = 100; + int num_samples = 500; u64 id_bits = 21; - u64 item_count = 5000; + u64 item_count = 50000; f64 seconds = 0; u64 cycles = 0; @@ -1253,11 +1253,11 @@ void test_sort() { u64 item_size = sizeof(Draw_Quad); u64 sort_value_offset_in_item = offsetof(Draw_Quad, z); - float64 start_seconds = os_get_current_time_in_seconds(); + float64 start_seconds = os_get_elapsed_seconds(); u64 start_cycles = rdtsc(); radix_sort(items, buffer, item_count, item_size, sort_value_offset_in_item, id_bits); u64 end_cycles = rdtsc(); - float64 end_seconds = os_get_current_time_in_seconds(); + float64 end_seconds = os_get_elapsed_seconds(); for (u64 i = 1; i < item_count; i++) { assert(items[i].z >= items[i-1].z, "Failed: not correctly sorted"); @@ -1281,11 +1281,11 @@ void test_sort() { u64 item_size = sizeof(Draw_Quad); u64 sort_value_offset_in_item = offsetof(Draw_Quad, z); - float64 start_seconds = os_get_current_time_in_seconds(); + float64 start_seconds = os_get_elapsed_seconds(); u64 start_cycles = rdtsc(); merge_sort(items, buffer, item_count, item_size, compare_draw_quads); u64 end_cycles = rdtsc(); - float64 end_seconds = os_get_current_time_in_seconds(); + float64 end_seconds = os_get_elapsed_seconds(); for (u64 i = 1; i < item_count; i++) { assert(items[i].z >= items[i-1].z, "Failed: not correctly sorted"); diff --git a/oogabooga/utility.c b/oogabooga/utility.c index 32ef337..4b89fc9 100644 --- a/oogabooga/utility.c +++ b/oogabooga/utility.c @@ -13,27 +13,25 @@ void radix_sort(void *collection, void *help_buffer, u64 item_count, u64 item_size, u64 sort_value_offset_in_item, u64 number_of_bits) { local_persist const int RADIX = 256; local_persist const int BITS_PER_PASS = 8; - local_persist const int MASK = (RADIX - 1); const int PASS_COUNT = ((number_of_bits + BITS_PER_PASS - 1) / BITS_PER_PASS); - const u64 SIGN_SHIFT = 1ULL << (number_of_bits - 1); + const u64 HALF_RANGE_OF_VALUE_BITS = 1ULL << (number_of_bits - 1); - u64* count = (u64*)alloc(get_temporary_allocator(), RADIX * sizeof(u64)); - u64* prefix_sum = (u64*)alloc(get_temporary_allocator(), RADIX * sizeof(u64)); - u8* items = (u8*)collection; - u8* buffer = (u8*)help_buffer; + u64 count[RADIX]; + u64 prefix_sum[RADIX]; for (u32 pass = 0; pass < PASS_COUNT; ++pass) { u32 shift = pass * BITS_PER_PASS; - for (u32 i = 0; i < RADIX; ++i) { - count[i] = 0; - } + memset(count, 0, sizeof(count)); for (u64 i = 0; i < item_count; ++i) { - u64 sort_value = *(u64*)(items + i * item_size + sort_value_offset_in_item); - sort_value += SIGN_SHIFT; - u32 digit = (sort_value >> shift) & MASK; + u8 *item = (u8*)collection + i * item_size; + + u64 sort_value = *(u64*)(item + sort_value_offset_in_item); + sort_value += HALF_RANGE_OF_VALUE_BITS; // We treat the value as a signed integer + + u32 digit = (sort_value >> shift) & (RADIX-1); ++count[digit]; } @@ -43,14 +41,17 @@ void radix_sort(void *collection, void *help_buffer, u64 item_count, u64 item_si } for (u64 i = 0; i < item_count; ++i) { - u64 sort_value = *(u64*)(items + i * item_size + sort_value_offset_in_item); - u64 transformed_value = sort_value + SIGN_SHIFT; - u32 digit = (transformed_value >> shift) & MASK; - memcpy(buffer + prefix_sum[digit] * item_size, items + i * item_size, item_size); + u8 *item = (u8*)collection + i * item_size; + + u64 sort_value = *(u64*)(item + sort_value_offset_in_item); + sort_value += HALF_RANGE_OF_VALUE_BITS; // We treat the value as a signed integer + + u32 digit = (sort_value >> shift) & (RADIX-1); + memcpy((u8*)help_buffer + prefix_sum[digit] * item_size, item, item_size); ++prefix_sum[digit]; } - memcpy(items, buffer, item_count * item_size); + memcpy(collection, help_buffer, item_count * item_size); } }