From fe3fea0c29193152e76db36e3141c06aa595a135 Mon Sep 17 00:00:00 2001 From: Charlie Malmqvist Date: Tue, 23 Jul 2024 12:11:52 +0200 Subject: [PATCH] Stable Z sort --- oogabooga/examples/renderer_stress_test.c | 1 + oogabooga/utility.c | 20 ++++++++------------ 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/oogabooga/examples/renderer_stress_test.c b/oogabooga/examples/renderer_stress_test.c index 37e9cce..ee7b36d 100644 --- a/oogabooga/examples/renderer_stress_test.c +++ b/oogabooga/examples/renderer_stress_test.c @@ -120,6 +120,7 @@ int entry(int argc, char **argv) { hammer_xform = m4_translate(hammer_xform, v3(-.25f, -.25f, 0)); push_z_layer(1000001); draw_image_xform(hammer_image, hammer_xform, v2(.5f, .5f), COLOR_RED); + draw_image_xform(hammer_image, hammer_xform, v2(.25f, .25f), COLOR_GREEN); pop_z_layer(); Vector2 hover_position = v2_rotate_point_around_pivot(v2(-.5, -.5), v2(0, 0), (f32)now); diff --git a/oogabooga/utility.c b/oogabooga/utility.c index a234f95..e2b8b07 100644 --- a/oogabooga/utility.c +++ b/oogabooga/utility.c @@ -11,7 +11,6 @@ // At 21 bits I'm able to sort a completely randomized collection of 100k integers at around // 8m cycles (or 2.5-2.6ms on my shitty laptop i5-11300H) void radix_sort(void *collection, void *help_buffer, u64 item_count, u64 item_size, u64 sort_value_offset_in_item, u64 number_of_bits) { - local_persist const int RADIX = 256; local_persist const int BITS_PER_PASS = 8; local_persist const int MASK = (RADIX - 1); @@ -20,6 +19,7 @@ void radix_sort(void *collection, void *help_buffer, u64 item_count, u64 item_si const u64 SIGN_SHIFT = 1ULL << (number_of_bits - 1); u64* count = (u64*)alloc(get_temporary_allocator(), RADIX * sizeof(u64)); + u64* prefix_sum = (u64*)alloc(get_temporary_allocator(), RADIX * sizeof(u64)); u8* items = (u8*)collection; u8* buffer = (u8*)help_buffer; @@ -32,30 +32,26 @@ void radix_sort(void *collection, void *help_buffer, u64 item_count, u64 item_si for (u64 i = 0; i < item_count; ++i) { u64 sort_value = *(u64*)(items + i * item_size + sort_value_offset_in_item); - sort_value += SIGN_SHIFT; // Transform the value to handle negative numbers + sort_value += SIGN_SHIFT; u32 digit = (sort_value >> shift) & MASK; ++count[digit]; } - u64 sum = 0; - for (u32 i = 0; i < RADIX; ++i) { - u64 temp = count[i]; - count[i] = sum; - sum += temp; + prefix_sum[0] = 0; + for (u32 i = 1; i < RADIX; ++i) { + prefix_sum[i] = prefix_sum[i - 1] + count[i - 1]; } for (u64 i = 0; i < item_count; ++i) { u64 sort_value = *(u64*)(items + i * item_size + sort_value_offset_in_item); - u64 transformed_value = sort_value + SIGN_SHIFT; // Transform the value to handle negative numbers + u64 transformed_value = sort_value + SIGN_SHIFT; u32 digit = (transformed_value >> shift) & MASK; - memcpy(buffer + count[digit] * item_size, items + i * item_size, item_size); - ++count[digit]; + memcpy(buffer + prefix_sum[digit] * item_size, items + i * item_size, item_size); + ++prefix_sum[digit]; } memcpy(items, buffer, item_count * item_size); } - - dealloc(get_temporary_allocator(), count); } void merge_sort(void *collection, void *help_buffer, u64 item_count, u64 item_size, int (*compare)(const void *, const void *)) {