os_get_elapsed_seconds() & some unecessary refactoring

This commit is contained in:
Charlie Malmqvist 2024-08-18 12:51:33 +02:00
parent 978162cf4d
commit a2b65c0eaf
20 changed files with 168 additions and 150 deletions

3
TODO
View file

@ -1,5 +1,8 @@
- Gamepad
- Compile with msys2, cygwin, mingw64
- Audio
- Allow audio programming
- Inject mixer proc per player

View file

@ -33,11 +33,11 @@ typedef struct Context_Extra {
//
// This is a minimal starting point for new projects. Copy & rename to get started
// #include "oogabooga/examples/minimal_game_loop.c"
#include "oogabooga/examples/minimal_game_loop.c"
// #include "oogabooga/examples/text_rendering.c"
// #include "oogabooga/examples/custom_logger.c"
#include "oogabooga/examples/renderer_stress_test.c"
// #include "oogabooga/examples/renderer_stress_test.c"
// #include "oogabooga/examples/tile_game.c"
// #include "oogabooga/examples/audio_test.c"
// #include "oogabooga/examples/custom_shader.c"

View file

@ -1,3 +1,8 @@
## v0.01.004
- Misc
- Reworked os_get_current_time_in_seconds() -> os_get_elapsed_seconds()
Now returns seconds sincs app init.
- draw_frame.view -> draw_frame.camera_xform (deprecated .view)
## v0.01.003 - Mouse pointers, Audio improvement & features, bug fixes
- Os layer

View file

@ -96,7 +96,7 @@ void spinlock_acquire_or_wait(Spinlock* l) {
}
// Returns true on aquired, false if timeout seconds reached
bool spinlock_acquire_or_wait_timeout(Spinlock* l, f64 timeout_seconds) {
f64 start = os_get_current_time_in_seconds();
f64 start = os_get_elapsed_seconds();
while (true) {
bool expected = false;
if (compare_and_swap_bool(&l->locked, true, expected)) {
@ -104,7 +104,7 @@ bool spinlock_acquire_or_wait_timeout(Spinlock* l, f64 timeout_seconds) {
}
while (l->locked) {
// spinny boi
if ((os_get_current_time_in_seconds()-start) >= timeout_seconds) return false;
if ((os_get_elapsed_seconds()-start) >= timeout_seconds) return false;
}
}
return true;

View file

@ -48,43 +48,52 @@ typedef struct Draw_Quad {
typedef struct Draw_Frame {
u64 num_quads;
Matrix4 projection;
Matrix4 view;
bool enable_z_sorting;
s32 z_stack[Z_STACK_MAX];
u64 z_count;
Vector4 scissor_stack[SCISSOR_STACK_MAX];
u64 scissor_count;
union {
DEPRECATED(Matrix4 view, "Use draw_frame.camera_xform instead");
Matrix4 camera_xform;
};
void *cbuffer;
u64 scissor_count;
Vector4 scissor_stack[SCISSOR_STACK_MAX];
Draw_Quad *quad_buffer;
u64 z_count;
s32 z_stack[Z_STACK_MAX];
bool enable_z_sorting;
} Draw_Frame;
// #Cleanup this should be in Draw_Frame
// #Global
ogb_instance Draw_Quad *quad_buffer;
ogb_instance u64 allocated_quads;
// This frame is passed to the platform layer and rendered in os_update.
// Resets every frame.
ogb_instance Draw_Frame draw_frame;
ogb_instance Draw_Frame draw_frame = {0};
#if !OOGABOOGA_LINK_EXTERNAL_INSTANCE
Draw_Quad *quad_buffer;
u64 allocated_quads;
Draw_Frame draw_frame = ZERO(Draw_Frame);
Draw_Frame draw_frame;
#endif // NOT OOGABOOGA_LINK_EXTERNAL_INSTANCE
void reset_draw_frame(Draw_Frame *frame) {
// #Memory
// I would like to try to have the quad buffer to be allocated in a growing arena
// which is reset every frames, like temp allocator but large enough to fit the
// highest number of quads the program submits in a frame.
// For now, we just reset the count in the heap allocated buffer
Draw_Quad *quad_buffer = frame->quad_buffer;
if (quad_buffer) growing_array_clear((void**)&quad_buffer);
*frame = (Draw_Frame){0};
frame->quad_buffer = quad_buffer;
float32 aspect = (float32)window.width/(float32)window.height;
frame->projection = m4_make_orthographic_projection(-aspect, aspect, -1, 1, -1, 10);
frame->view = m4_scalar(1.0);
frame->camera_xform = m4_scalar(1.0);
}
void push_z_layer(s32 z) {
@ -141,35 +150,26 @@ Draw_Quad *draw_quad_projected(Draw_Quad quad, Matrix4 world_to_clip) {
memset(quad.userdata, 0, sizeof(quad.userdata));
if (draw_frame.num_quads >= allocated_quads) {
if (!draw_frame.quad_buffer) {
// #Memory
u64 new_count = max(get_next_power_of_two(draw_frame.num_quads+1), 128);
Draw_Quad *new_buffer = alloc(get_heap_allocator(), new_count*sizeof(Draw_Quad));
if (quad_buffer) {
memcpy(new_buffer, quad_buffer, draw_frame.num_quads*sizeof(Draw_Quad));
dealloc(get_heap_allocator(), quad_buffer);
}
quad_buffer = new_buffer;
allocated_quads = new_count;
// Use an arena
growing_array_init((void**)&draw_frame.quad_buffer, sizeof(Draw_Quad), get_heap_allocator());
}
quad_buffer[draw_frame.num_quads] = quad;
draw_frame.num_quads += 1;
Draw_Quad **target_buffer = &draw_frame.quad_buffer;
return &quad_buffer[draw_frame.num_quads-1];
growing_array_add((void**)target_buffer, &quad);
return &(*target_buffer)[growing_array_get_valid_count(*target_buffer)-1];
}
Draw_Quad *draw_quad(Draw_Quad quad) {
return draw_quad_projected(quad, m4_mul(draw_frame.projection, m4_inverse(draw_frame.view)));
return draw_quad_projected(quad, m4_mul(draw_frame.projection, m4_inverse(draw_frame.camera_xform)));
}
Draw_Quad *draw_quad_xform(Draw_Quad quad, Matrix4 xform) {
Matrix4 world_to_clip = m4_scalar(1.0);
world_to_clip = m4_mul(world_to_clip, draw_frame.projection);
world_to_clip = m4_mul(world_to_clip, m4_inverse(draw_frame.view));
world_to_clip = m4_mul(world_to_clip, m4_inverse(draw_frame.camera_xform));
world_to_clip = m4_mul(world_to_clip, xform);
return draw_quad_projected(quad, world_to_clip);
}

View file

@ -44,10 +44,10 @@ int entry(int argc, char **argv) {
// memory from an invalid address.
My_Cbuffer cbuffer;
float64 last_time = os_get_current_time_in_seconds();
float64 last_time = os_get_elapsed_seconds();
while (!window.should_close) {
float64 now = os_get_current_time_in_seconds();
float64 now = os_get_elapsed_seconds();
if ((int)now != (int)last_time) {
log("%.2f FPS\n%.2fms", 1.0/(now-last_time), (now-last_time)*1000);
}
@ -88,8 +88,8 @@ int entry(int argc, char **argv) {
}
Vector2 world_to_screen(Vector2 p) {
Vector4 in_view_space = m4_transform(draw_frame.view, v4(p.x, p.y, 0.0, 1.0));
Vector4 in_clip_space = m4_transform(draw_frame.projection, in_view_space);
Vector4 in_cam_space = m4_transform(draw_frame.camera_xform, v4(p.x, p.y, 0.0, 1.0));
Vector4 in_clip_space = m4_transform(draw_frame.projection, in_cam_space);
Vector4 ndc = {
.x = in_clip_space.x / in_clip_space.w,

View file

@ -35,9 +35,9 @@ int entry(int argc, char **argv) {
assert(circles[i].pos.y == c.pos.y);
}
float64 last_time = os_get_current_time_in_seconds();
float64 last_time = os_get_elapsed_seconds();
while (!window.should_close) {
float64 now = os_get_current_time_in_seconds();
float64 now = os_get_elapsed_seconds();
if ((int)now != (int)last_time) log("%.2f FPS\n%.2fms", 1.0/(now-last_time), (now-last_time)*1000);
last_time = now;

View file

@ -55,9 +55,9 @@ int entry(int argc, char **argv) {
window.y = 90;
window.clear_color = hex_to_rgba(0x6495EDff);
float64 last_time = os_get_current_time_in_seconds();
float64 last_time = os_get_elapsed_seconds();
while (!window.should_close) {
float64 now = os_get_current_time_in_seconds();
float64 now = os_get_elapsed_seconds();
float64 delta = now-last_time;
if ((int)now != (int)last_time) log("%.2f FPS\n%.2fms", 1.0/(delta), (delta)*1000);
last_time = now;

View file

@ -13,7 +13,7 @@
void SHARED_EXPORT
game_update(f64 delta_time) {
float64 now = os_get_current_time_in_seconds();
float64 now = os_get_elapsed_seconds();
Matrix4 rect_xform = m4_scalar(1.0);
rect_xform = m4_rotate_z(rect_xform, (f32)now);

View file

@ -8,9 +8,9 @@ int entry(int argc, char **argv) {
window.y = 90;
window.clear_color = hex_to_rgba(0x6495EDff);
float64 last_time = os_get_current_time_in_seconds();
float64 last_time = os_get_elapsed_seconds();
while (!window.should_close) {
float64 now = os_get_current_time_in_seconds();
float64 now = os_get_elapsed_seconds();
if ((int)now != (int)last_time) log("%.2f FPS\n%.2fms", 1.0/(now-last_time), (now-last_time)*1000);
last_time = now;

View file

@ -38,17 +38,17 @@ int entry(int argc, char **argv) {
const float64 fps_limit = 69000;
const float64 min_frametime = 1.0 / fps_limit;
Matrix4 camera_view = m4_scalar(1.0);
Matrix4 camera_xform = m4_scalar(1.0);
float64 last_time = os_get_current_time_in_seconds();
float64 last_time = os_get_elapsed_seconds();
while (!window.should_close) tm_scope("Frame") {
reset_temporary_storage();
float64 now = os_get_current_time_in_seconds();
float64 now = os_get_elapsed_seconds();
float64 delta = now - last_time;
if (delta < min_frametime) {
os_high_precision_sleep((min_frametime-delta)*1000.0);
now = os_get_current_time_in_seconds();
now = os_get_elapsed_seconds();
delta = now - last_time;
}
last_time = now;
@ -89,8 +89,8 @@ int entry(int argc, char **argv) {
}
Vector2 cam_move = v2_mulf(cam_move_axis, delta * cam_move_speed);
camera_view = m4_translate(camera_view, v3(v2_expand(cam_move), 0));
draw_frame.view = camera_view;
camera_xform = m4_translate(camera_xform, v3(v2_expand(cam_move), 0));
draw_frame.camera_xform = camera_xform;
local_persist bool do_enable_z_sorting = false;
draw_frame.enable_z_sorting = do_enable_z_sorting;
@ -104,7 +104,7 @@ int entry(int argc, char **argv) {
}
seed_for_random = 69;
for (u64 i = 0; i < 30000; i++) {
for (u64 i = 0; i < 2; i++) {
float32 aspect = (float32)window.width/(float32)window.height;
float min_x = -aspect;
float max_x = aspect;
@ -120,6 +120,16 @@ int entry(int argc, char **argv) {
}
seed_for_random = rdtsc();
draw_image(bush_image, v2(0.65, 0.65), v2(0.2*sin(now), 0.2*sin(now)), COLOR_WHITE);
u32 atlas_index = 0;
Gfx_Font_Atlas *atlas = (Gfx_Font_Atlas*)hash_table_find(&font->variations[32].atlases, atlas_index);
draw_text(font, STR("I am text"), 128, v2(sin(now), -0.61), v2(0.001, 0.001), COLOR_BLACK);
draw_text(font, STR("I am text"), 128, v2(sin(now)-0.01, -0.6), v2(0.001, 0.001), COLOR_WHITE);
draw_text(font, STR("Hello jje\nnew line"), 128, v2(-1, 0.5), v2(0.001, 0.001), COLOR_WHITE);
Matrix4 hammer_xform = m4_scalar(1.0);
hammer_xform = m4_rotate_z(hammer_xform, (f32)now);
hammer_xform = m4_translate(hammer_xform, v3(-.25f, -.25f, 0));
@ -132,16 +142,6 @@ int entry(int argc, char **argv) {
Vector2 local_pivot = v2(.125f, .125f);
draw_circle(v2_sub(hover_position, local_pivot), v2(.25f, .25f), v4((sin(now)+1.0)/2.0, 1.0, 0.0, 1.0));
draw_image(bush_image, v2(0.65, 0.65), v2(0.2*sin(now), 0.2*sin(now)), COLOR_WHITE);
u32 atlas_index = 0;
Gfx_Font_Atlas *atlas = (Gfx_Font_Atlas*)hash_table_find(&font->variations[32].atlases, atlas_index);
draw_text(font, STR("I am text"), 128, v2(sin(now), -0.61), v2(0.001, 0.001), COLOR_BLACK);
draw_text(font, STR("I am text"), 128, v2(sin(now)-0.01, -0.6), v2(0.001, 0.001), COLOR_WHITE);
draw_text(font, STR("Hello jje\nnew line"), 128, v2(-1, 0.5), v2(0.001, 0.001), COLOR_WHITE);
local_persist bool show = false;
if (is_key_just_pressed('T')) show = !show;

View file

@ -29,7 +29,7 @@ int entry(int argc, char **argv) {
draw_text(font, STR("I am text"), font_height, v2(-2, 2), v2(1, 1), COLOR_BLACK);
draw_text(font, STR("I am text"), font_height, v2(0, 0), v2(1, 1), COLOR_WHITE);
float now = (float)os_get_current_time_in_seconds();
float now = (float)os_get_elapsed_seconds();
float animated_x = sin(now*0.1)*(window.width*0.5);
// UTF-8 !

View file

@ -42,7 +42,7 @@ App_State app_state = APP_STATE_EDITING;
s64 current_tile_layer = 0;
Matrix4 camera_view;
Matrix4 camera_xform;
void update_editor();
void update_game();
@ -56,16 +56,16 @@ int entry(int argc, char **argv) {
window.y = 90;
window.clear_color = hex_to_rgba(0x6495EDff);
camera_view = m4_scalar(1.0);
camera_xform = m4_scalar(1.0);
float64 last_time = os_get_current_time_in_seconds();
float64 last_time = os_get_elapsed_seconds();
while (!window.should_close) {
reset_temporary_storage();
draw_frame.projection = m4_make_orthographic_projection(window.pixel_width * -0.5, window.pixel_width * 0.5, window.pixel_height * -0.5, window.pixel_height * 0.5, -1, 10);
draw_frame.enable_z_sorting = true;
float64 now = os_get_current_time_in_seconds();
float64 now = os_get_elapsed_seconds();
delta_time = (float32)(now - last_time);
last_time = now;
@ -84,7 +84,7 @@ int entry(int argc, char **argv) {
Vector2 screen_to_world(Vector2 screen) {
Matrix4 proj = draw_frame.projection;
Matrix4 view = draw_frame.view;
Matrix4 cam = draw_frame.camera_xform;
float window_w = window.width;
float window_h = window.height;
@ -95,7 +95,7 @@ Vector2 screen_to_world(Vector2 screen) {
// Transform to world coordinates
Vector4 world_pos = v4(ndc_x, ndc_y, 0, 1);
world_pos = m4_transform(m4_inverse(proj), world_pos);
world_pos = m4_transform(view, world_pos);
world_pos = m4_transform(cam, world_pos);
return world_pos.xy;
}
@ -122,8 +122,8 @@ void update_editor() {
}
Vector2 cam_move = v2_mulf(cam_move_axis, delta_time * cam_move_speed);
camera_view = m4_translate(camera_view, v3(v2_expand(cam_move), 0));
draw_frame.view = camera_view;
camera_xform = m4_translate(camera_xform, v3(v2_expand(cam_move), 0));
draw_frame.camera_xform = camera_xform;
Vector2 bottom_left = screen_to_world(v2(-window.width/2, -window.height/2));
Vector2 top_right = screen_to_world(v2( window.width/2, window.height/2));

View file

@ -610,23 +610,26 @@ void d3d11_process_draw_frame() {
ID3D11DeviceContext_ClearRenderTargetView(d3d11_context, d3d11_window_render_target_view, (float*)&window.clear_color);
u64 number_of_quads = growing_array_get_valid_count(draw_frame.quad_buffer);
///
// Maybe grow quad vbo
u32 required_size = sizeof(D3D11_Vertex) * allocated_quads*6;
u64 required_size = sizeof(D3D11_Vertex) * number_of_quads*6;
if (required_size > d3d11_quad_vbo_size) {
if (d3d11_quad_vbo) {
D3D11Release(d3d11_quad_vbo);
dealloc(get_heap_allocator(), d3d11_staging_quad_buffer);
}
u64 new_size = get_next_power_of_two(required_size);
D3D11_BUFFER_DESC desc = ZERO(D3D11_BUFFER_DESC);
desc.Usage = D3D11_USAGE_DYNAMIC;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
desc.ByteWidth = required_size;
desc.ByteWidth = new_size;
desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
HRESULT hr = ID3D11Device_CreateBuffer(d3d11_device, &desc, 0, &d3d11_quad_vbo);
assert(SUCCEEDED(hr), "CreateBuffer failed");
d3d11_quad_vbo_size = required_size;
d3d11_quad_vbo_size = new_size;
d3d11_staging_quad_buffer = alloc(get_heap_allocator(), d3d11_quad_vbo_size);
assert((u64)d3d11_staging_quad_buffer%16 == 0);
@ -634,7 +637,7 @@ void d3d11_process_draw_frame() {
log_verbose("Grew quad vbo to %d bytes.", d3d11_quad_vbo_size);
}
if (draw_frame.num_quads > 0) {
if (number_of_quads > 0) {
///
// Render geometry from into vbo quad list
@ -648,20 +651,22 @@ void d3d11_process_draw_frame() {
D3D11_Vertex* pointer = head;
u64 number_of_rendered_quads = 0;
tm_scope("Quad processing") {
if (draw_frame.enable_z_sorting) tm_scope("Z sorting") {
if (!sort_quad_buffer || (sort_quad_buffer_size < allocated_quads*sizeof(Draw_Quad))) {
if (!sort_quad_buffer || (sort_quad_buffer_size < number_of_quads*sizeof(Draw_Quad))) {
// #Memory #Heapalloc
if (sort_quad_buffer) dealloc(get_heap_allocator(), sort_quad_buffer);
sort_quad_buffer = alloc(get_heap_allocator(), allocated_quads*sizeof(Draw_Quad));
sort_quad_buffer_size = allocated_quads*sizeof(Draw_Quad);
sort_quad_buffer = alloc(get_heap_allocator(), number_of_quads*sizeof(Draw_Quad));
sort_quad_buffer_size = number_of_quads*sizeof(Draw_Quad);
}
radix_sort(quad_buffer, sort_quad_buffer, draw_frame.num_quads, sizeof(Draw_Quad), offsetof(Draw_Quad, z), MAX_Z_BITS);
radix_sort(draw_frame.quad_buffer, sort_quad_buffer, number_of_quads, sizeof(Draw_Quad), offsetof(Draw_Quad, z), MAX_Z_BITS);
}
for (u64 i = 0; i < draw_frame.num_quads; i++) {
for (u64 i = 0; i < number_of_quads; i++) {
Draw_Quad *q = &quad_buffer[i];
Draw_Quad *q = &draw_frame.quad_buffer[i];
assert(q->z <= MAX_Z, "Z is too high. Z is %d, Max is %d.", q->z, MAX_Z);
assert(q->z >= (-MAX_Z+1), "Z is too low. Z is %d, Min is %d.", q->z, -MAX_Z+1);
@ -705,32 +710,27 @@ void d3d11_process_draw_frame() {
last_texture_index = texture_index;
}
if (q->type == QUAD_TYPE_TEXT) {
// This is meant to fix the annoying artifacts that shows up when sampling text from an atlas
// presumably for floating point precision issues or something.
// #Incomplete
// If we want to animate text with small movements then it will look wonky.
// This should be optional probably.
// Also, we might want to do this on non-text if rendering with linear filtering
// from a large texture atlas.
float pixel_width = 2.0/(float)window.width;
float pixel_height = 2.0/(float)window.height;
// This is meant to fix the annoying artifacts that shows up when sampling from a large atlas
// presumably for floating point precision issues or something.
// #Incomplete
// If we want to animate text with small movements then it will look wonky.
// This should be optional probably.
float pixel_width = 2.0/(float)window.width;
float pixel_height = 2.0/(float)window.height;
bool xeven = window.width % 2 == 0;
bool yeven = window.height % 2 == 0;
q->bottom_left.x = round(q->bottom_left.x / pixel_width) * pixel_width;
q->bottom_left.y = round(q->bottom_left.y / pixel_height) * pixel_height;
q->top_left.x = round(q->top_left.x / pixel_width) * pixel_width;
q->top_left.y = round(q->top_left.y / pixel_height) * pixel_height;
q->top_right.x = round(q->top_right.x / pixel_width) * pixel_width;
q->top_right.y = round(q->top_right.y / pixel_height) * pixel_height;
q->bottom_right.x = round(q->bottom_right.x / pixel_width) * pixel_width;
q->bottom_right.y = round(q->bottom_right.y / pixel_height) * pixel_height;
}
bool xeven = window.width % 2 == 0;
bool yeven = window.height % 2 == 0;
q->bottom_left.x = round(q->bottom_left.x / pixel_width) * pixel_width;
q->bottom_left.y = round(q->bottom_left.y / pixel_height) * pixel_height;
q->top_left.x = round(q->top_left.x / pixel_width) * pixel_width;
q->top_left.y = round(q->top_left.y / pixel_height) * pixel_height;
q->top_right.x = round(q->top_right.x / pixel_width) * pixel_width;
q->top_right.y = round(q->top_right.y / pixel_height) * pixel_height;
q->bottom_right.x = round(q->bottom_right.x / pixel_width) * pixel_width;
q->bottom_right.y = round(q->bottom_right.y / pixel_height) * pixel_height;
// We will write to 6 vertices for the one quad (two tris)
{

View file

@ -118,7 +118,7 @@
#define OGB_VERSION_MAJOR 0
#define OGB_VERSION_MINOR 1
#define OGB_VERSION_PATCH 3
#define OGB_VERSION_PATCH 4
#define OGB_VERSION (OGB_VERSION_MAJOR*1000000+OGB_VERSION_MINOR*1000+OGB_VERSION_PATCH)

View file

@ -108,6 +108,7 @@ bool win32_want_override_mouse_pointer = false;
HCURSOR win32_shadowed_mouse_pointer = 0;
bool win32_did_override_user_mouse_pointer = false;
SYSTEM_INFO win32_system_info;
LARGE_INTEGER win32_counter_at_start;
#ifndef OOGABOOGA_HEADLESS
@ -391,7 +392,7 @@ void os_init(u64 program_memory_capacity) {
heap_init();
QueryPerformanceCounter(&win32_counter_at_start);
#ifndef OOGABOOGA_HEADLESS
win32_init_window();
@ -604,7 +605,7 @@ void os_high_precision_sleep(f64 ms) {
const f64 s = ms/1000.0;
f64 start = os_get_current_time_in_seconds();
f64 start = os_get_elapsed_seconds();
f64 end = start + (f64)s;
s32 sleep_time = (s32)((end-start)-1.0);
bool do_sleep = sleep_time >= 1;
@ -613,7 +614,7 @@ void os_high_precision_sleep(f64 ms) {
if (do_sleep) os_sleep(sleep_time);
while (os_get_current_time_in_seconds() < end) {
while (os_get_elapsed_seconds() < end) {
os_yield_thread();
}
@ -627,16 +628,22 @@ void os_high_precision_sleep(f64 ms) {
///
u64 os_get_current_cycle_count() {
return rdtsc();
}
float64 os_get_current_time_in_seconds() {
// #Cleanup deprecated
float64
os_get_current_time_in_seconds() {
LARGE_INTEGER frequency, counter;
if (!QueryPerformanceFrequency(&frequency) || !QueryPerformanceCounter(&counter)) {
return -1.0;
}
return (double)counter.QuadPart / (double)frequency.QuadPart;
return (float64)counter.QuadPart / (float64)frequency.QuadPart;
}
float64
os_get_elapsed_seconds() {
LARGE_INTEGER freq, counter = (LARGE_INTEGER){0};
QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&counter);
return (float64)(counter.QuadPart-win32_counter_at_start.QuadPart) / (float64)freq.QuadPart;
}

View file

@ -163,10 +163,12 @@ os_high_precision_sleep(f64 ms);
// Time
///
DEPRECATED(u64 os_get_current_cycle_count(), "use rdtsc() instead");
float64 ogb_instance
DEPRECATED(os_get_current_time_in_seconds(), "Use os_get_elapsed_seconds() instead");
float64 ogb_instance
os_get_current_time_in_seconds();
os_get_elapsed_seconds();
///
///

View file

@ -6,7 +6,7 @@
#define INCREMENT 1442695040888963407ull
// #Global
// set this to something like os_get_current_cycle_count() for very randomized seed
// set this to something like rtdsc() for very randomized seed
ogb_instance u64 seed_for_random;
#if !OOGABOOGA_LINK_EXTERNAL_INSTANCE

View file

@ -1208,7 +1208,7 @@ void test_mutex() {
Allocator allocator = get_heap_allocator();
const int num_threads = 1000;
const int num_threads = 100;
Thread *threads = alloc(allocator, sizeof(Thread)*num_threads);
for (u64 i = 0; i < num_threads; i++) {
@ -1233,9 +1233,9 @@ int compare_draw_quads(const void *a, const void *b) {
}
void test_sort() {
int num_samples = 100;
int num_samples = 500;
u64 id_bits = 21;
u64 item_count = 5000;
u64 item_count = 50000;
f64 seconds = 0;
u64 cycles = 0;
@ -1253,11 +1253,11 @@ void test_sort() {
u64 item_size = sizeof(Draw_Quad);
u64 sort_value_offset_in_item = offsetof(Draw_Quad, z);
float64 start_seconds = os_get_current_time_in_seconds();
float64 start_seconds = os_get_elapsed_seconds();
u64 start_cycles = rdtsc();
radix_sort(items, buffer, item_count, item_size, sort_value_offset_in_item, id_bits);
u64 end_cycles = rdtsc();
float64 end_seconds = os_get_current_time_in_seconds();
float64 end_seconds = os_get_elapsed_seconds();
for (u64 i = 1; i < item_count; i++) {
assert(items[i].z >= items[i-1].z, "Failed: not correctly sorted");
@ -1281,11 +1281,11 @@ void test_sort() {
u64 item_size = sizeof(Draw_Quad);
u64 sort_value_offset_in_item = offsetof(Draw_Quad, z);
float64 start_seconds = os_get_current_time_in_seconds();
float64 start_seconds = os_get_elapsed_seconds();
u64 start_cycles = rdtsc();
merge_sort(items, buffer, item_count, item_size, compare_draw_quads);
u64 end_cycles = rdtsc();
float64 end_seconds = os_get_current_time_in_seconds();
float64 end_seconds = os_get_elapsed_seconds();
for (u64 i = 1; i < item_count; i++) {
assert(items[i].z >= items[i-1].z, "Failed: not correctly sorted");

View file

@ -13,27 +13,25 @@
void radix_sort(void *collection, void *help_buffer, u64 item_count, u64 item_size, u64 sort_value_offset_in_item, u64 number_of_bits) {
local_persist const int RADIX = 256;
local_persist const int BITS_PER_PASS = 8;
local_persist const int MASK = (RADIX - 1);
const int PASS_COUNT = ((number_of_bits + BITS_PER_PASS - 1) / BITS_PER_PASS);
const u64 SIGN_SHIFT = 1ULL << (number_of_bits - 1);
const u64 HALF_RANGE_OF_VALUE_BITS = 1ULL << (number_of_bits - 1);
u64* count = (u64*)alloc(get_temporary_allocator(), RADIX * sizeof(u64));
u64* prefix_sum = (u64*)alloc(get_temporary_allocator(), RADIX * sizeof(u64));
u8* items = (u8*)collection;
u8* buffer = (u8*)help_buffer;
u64 count[RADIX];
u64 prefix_sum[RADIX];
for (u32 pass = 0; pass < PASS_COUNT; ++pass) {
u32 shift = pass * BITS_PER_PASS;
for (u32 i = 0; i < RADIX; ++i) {
count[i] = 0;
}
memset(count, 0, sizeof(count));
for (u64 i = 0; i < item_count; ++i) {
u64 sort_value = *(u64*)(items + i * item_size + sort_value_offset_in_item);
sort_value += SIGN_SHIFT;
u32 digit = (sort_value >> shift) & MASK;
u8 *item = (u8*)collection + i * item_size;
u64 sort_value = *(u64*)(item + sort_value_offset_in_item);
sort_value += HALF_RANGE_OF_VALUE_BITS; // We treat the value as a signed integer
u32 digit = (sort_value >> shift) & (RADIX-1);
++count[digit];
}
@ -43,14 +41,17 @@ void radix_sort(void *collection, void *help_buffer, u64 item_count, u64 item_si
}
for (u64 i = 0; i < item_count; ++i) {
u64 sort_value = *(u64*)(items + i * item_size + sort_value_offset_in_item);
u64 transformed_value = sort_value + SIGN_SHIFT;
u32 digit = (transformed_value >> shift) & MASK;
memcpy(buffer + prefix_sum[digit] * item_size, items + i * item_size, item_size);
u8 *item = (u8*)collection + i * item_size;
u64 sort_value = *(u64*)(item + sort_value_offset_in_item);
sort_value += HALF_RANGE_OF_VALUE_BITS; // We treat the value as a signed integer
u32 digit = (sort_value >> shift) & (RADIX-1);
memcpy((u8*)help_buffer + prefix_sum[digit] * item_size, item, item_size);
++prefix_sum[digit];
}
memcpy(items, buffer, item_count * item_size);
memcpy(collection, help_buffer, item_count * item_size);
}
}