102 lines
No EOL
3.8 KiB
C
102 lines
No EOL
3.8 KiB
C
|
|
|
|
|
|
// This is a very niche sort algorithm.
|
|
// I use it for Z sorting quads.
|
|
// help_buffer should be same size as collection.
|
|
// This only works with integers, and it will use the first number_of_bits in the integer
|
|
// at sort_value_offset_in_item for sorting.
|
|
// There is a cost of memory as we need to double the buffer we're sorting BUT the performance
|
|
// gain is very promising.
|
|
// At 21 bits I'm able to sort a completely randomized collection of 100k integers at around
|
|
// 8m cycles (or 2.5-2.6ms on my shitty laptop i5-11300H)
|
|
void radix_sort(void *collection, void *help_buffer, u64 item_count, u64 item_size, u64 sort_value_offset_in_item, u64 number_of_bits) {
|
|
local_persist const int RADIX = 256;
|
|
local_persist const int BITS_PER_PASS = 8;
|
|
|
|
const int PASS_COUNT = ((number_of_bits + BITS_PER_PASS - 1) / BITS_PER_PASS);
|
|
const u64 HALF_RANGE_OF_VALUE_BITS = 1ULL << (number_of_bits - 1);
|
|
|
|
u64 count[RADIX];
|
|
u64 prefix_sum[RADIX];
|
|
|
|
for (u32 pass = 0; pass < PASS_COUNT; ++pass) {
|
|
u32 shift = pass * BITS_PER_PASS;
|
|
|
|
memset(count, 0, sizeof(count));
|
|
|
|
for (u64 i = 0; i < item_count; ++i) {
|
|
u8 *item = (u8*)collection + i * item_size;
|
|
|
|
u64 sort_value = *(u64*)(item + sort_value_offset_in_item);
|
|
sort_value += HALF_RANGE_OF_VALUE_BITS; // We treat the value as a signed integer
|
|
|
|
u32 digit = (sort_value >> shift) & (RADIX-1);
|
|
++count[digit];
|
|
}
|
|
|
|
prefix_sum[0] = 0;
|
|
for (u32 i = 1; i < RADIX; ++i) {
|
|
prefix_sum[i] = prefix_sum[i - 1] + count[i - 1];
|
|
}
|
|
|
|
for (u64 i = 0; i < item_count; ++i) {
|
|
u8 *item = (u8*)collection + i * item_size;
|
|
|
|
u64 sort_value = *(u64*)(item + sort_value_offset_in_item);
|
|
sort_value += HALF_RANGE_OF_VALUE_BITS; // We treat the value as a signed integer
|
|
|
|
u32 digit = (sort_value >> shift) & (RADIX-1);
|
|
memcpy((u8*)help_buffer + prefix_sum[digit] * item_size, item, item_size);
|
|
++prefix_sum[digit];
|
|
}
|
|
|
|
memcpy(collection, help_buffer, item_count * item_size);
|
|
}
|
|
}
|
|
|
|
void merge_sort(void *collection, void *help_buffer, u64 item_count, u64 item_size, int (*compare)(const void *, const void *)) {
|
|
u8 *items = (u8 *)collection;
|
|
u8 *buffer = (u8 *)help_buffer;
|
|
|
|
for (u64 width = 1; width < item_count; width *= 2) {
|
|
for (u64 i = 0; i < item_count; i += 2 * width) {
|
|
u64 left = i;
|
|
u64 right = (i + width < item_count) ? (i + width) : item_count;
|
|
u64 end = (i + 2 * width < item_count) ? (i + 2 * width) : item_count;
|
|
|
|
u64 left_index = left;
|
|
u64 right_index = right;
|
|
u64 k = left;
|
|
|
|
while (left_index < right && right_index < end) {
|
|
if (compare(items + left_index * item_size, items + right_index * item_size) <= 0) {
|
|
memcpy(buffer + k * item_size, items + left_index * item_size, item_size);
|
|
left_index++;
|
|
} else {
|
|
memcpy(buffer + k * item_size, items + right_index * item_size, item_size);
|
|
right_index++;
|
|
}
|
|
k++;
|
|
}
|
|
|
|
while (left_index < right) {
|
|
memcpy(buffer + k * item_size, items + left_index * item_size, item_size);
|
|
left_index++;
|
|
k++;
|
|
}
|
|
|
|
while (right_index < end) {
|
|
memcpy(buffer + k * item_size, items + right_index * item_size, item_size);
|
|
right_index++;
|
|
k++;
|
|
}
|
|
|
|
for (u64 j = left; j < end; j++) {
|
|
memcpy(items + j * item_size, buffer + j * item_size, item_size);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
inline bool bytes_match(void *a, void *b, u64 count) { return memcmp(a, b, count) == 0; } |