This commit is contained in:
Charlie Malmqvist 2024-07-17 16:36:13 +02:00
parent ef87d9416e
commit e4649e564d
8 changed files with 77 additions and 153 deletions

View file

@ -1,6 +1,7 @@
/*
<<<<<< Bytecode compiled fro HLSL code below: >>>>>>
struct VS_INPUT
{
float4 position : POSITION;
@ -9,10 +10,6 @@ struct VS_INPUT
int texture_index : TEXTURE_INDEX;
uint type : TYPE;
uint sampler_index : SAMPLER_INDEX;
// s8 texture_index
// u8 type
// u8 sampler_index
// u8
};
struct PS_INPUT

View file

@ -1,3 +1,4 @@
struct VS_INPUT
{
float4 position : POSITION;
@ -6,10 +7,6 @@ struct VS_INPUT
int texture_index : TEXTURE_INDEX;
uint type : TYPE;
uint sampler_index : SAMPLER_INDEX;
// s8 texture_index
// u8 type
// u8 sampler_index
// u8
};
struct PS_INPUT

View file

@ -28,7 +28,7 @@ int entry(int argc, char **argv) {
render_atlas_if_not_yet_rendered(font, 32, 'A');
seed_for_random = os_get_current_cycle_count();
seed_for_random = rdtsc();
const float64 fps_limit = 69000;
const float64 min_frametime = 1.0 / fps_limit;
@ -106,7 +106,7 @@ int entry(int argc, char **argv) {
draw_image(bush_image, v2(x, y), v2(0.1, 0.1), COLOR_WHITE);
pop_z_layer();
}
seed_for_random = os_get_current_cycle_count();
seed_for_random = rdtsc();
Matrix4 hammer_xform = m4_scalar(1.0);
hammer_xform = m4_rotate_z(hammer_xform, (f32)now);

View file

@ -161,7 +161,7 @@ void font_variation_init(Gfx_Font_Variation *variation, Gfx_Font *font, u32 font
// This one is bottom-top as opposed to normally in stbtt where it's top-bottom
int x0, y0, x1, y1;
stbtt_GetCodepointBitmapBox(&font->stbtt_handle, (int)c, variation->scale, variation->scale, &x0, &y0, &x1, &y1);
float c_ascent = (float)(y1-y0); // #Bugprone #Cleanup I am not at all sure about this!
float c_ascent = (float)(y1-y0);
if (c_ascent > variation->metrics.latin_ascent)
variation->metrics.latin_ascent = c_ascent;

View file

@ -218,7 +218,6 @@ Heap_Block *make_heap_block(Heap_Block *parent, u64 size) {
// #Speed #Cleanup
if (((u8*)block)+size >= ((u8*)program_memory)+program_memory_size) {
u64 minimum_size = ((u8*)block+size) - (u8*)program_memory + 1;
u64 new_program_size = get_next_power_of_two(minimum_size);

View file

@ -255,22 +255,17 @@ void os_init(u64 program_memory_size) {
assert(os.crt != 0, "Could not load win32 crt library. Might be compiled with non-msvc? #Incomplete #Portability");
os.crt_vsnprintf = (Crt_Vsnprintf_Proc)os_dynamic_library_load_symbol(os.crt, STR("vsnprintf"));
assert(os.crt_vsnprintf, "Missing vsnprintf in crt");
os.crt_vprintf = (Crt_Vprintf_Proc)os_dynamic_library_load_symbol(os.crt, STR("vprintf"));
assert(os.crt_vprintf, "Missing vprintf in crt");
os.crt_vsprintf = (Crt_Vsprintf_Proc)os_dynamic_library_load_symbol(os.crt, STR("vsprintf"));
assert(os.crt_vsprintf, "Missing vsprintf in crt");
os.crt_memcpy = (Crt_Memcpy_Proc)os_dynamic_library_load_symbol(os.crt, STR("memcpy"));
assert(os.crt_memcpy, "Missing memcpy in crt");
os.crt_memcmp = (Crt_Memcmp_Proc)os_dynamic_library_load_symbol(os.crt, STR("memcmp"));
assert(os.crt_memcmp, "Missing crt_memcmp in crt");
os.crt_memset = (Crt_Memset_Proc)os_dynamic_library_load_symbol(os.crt, STR("memset"));
assert(os.crt_memset, "Missing memset in crt");
win32_init_window();
os_start_thread(os_make_thread(win32_audio_thread, get_heap_allocator()));
os_start_thread(os_make_thread(win32_audio_poll_default_device_thread, get_heap_allocator()));
local_persist Thread audio_thread, audio_poll_default_device_thread;
os_thread_init(&audio_thread, win32_audio_thread);
os_thread_init(&audio_poll_default_device_thread, win32_audio_poll_default_device_thread);
os_thread_start(&audio_thread);
os_thread_start(&audio_poll_default_device_thread);
while (!win32_has_audio_thread_started) { os_yield_thread(); }
}
@ -404,6 +399,8 @@ DWORD WINAPI win32_thread_invoker(LPVOID param) {
return 0;
}
////// DEPRECATED vvvvvvvvvvvvvvvvv
Thread* os_make_thread(Thread_Proc proc, Allocator allocator) {
Thread *t = (Thread*)alloc(allocator, sizeof(Thread));
t->id = 0; // This is set when we start it
@ -433,6 +430,33 @@ void os_start_thread(Thread *t) {
void os_join_thread(Thread *t) {
WaitForSingleObject(t->os_handle, INFINITE);
}
////// DEPRECATED ^^^^^^^^^^^^^^^^
void os_thread_init(Thread *t, Thread_Proc proc) {
memset(t, 0, sizeof(Thread));
t->id = 0;
t->proc = proc;
t->initial_context = context;
}
void os_thread_destroy(Thread *t) {
os_thread_join(t);
CloseHandle(t->os_handle);
}
void os_thread_start(Thread *t) {
t->os_handle = CreateThread(
0,
0,
win32_thread_invoker,
t,
0,
(DWORD*)&t->id
);
assert(t->os_handle, "Failed creating thread");
}
void os_thread_join(Thread *t) {
WaitForSingleObject(t->os_handle, INFINITE);
}
///
// Mutex primitive
@ -474,59 +498,6 @@ void os_unlock_mutex(Mutex_Handle m) {
assert(result, "Unlock mutex 0x%x failed with error %d", m, GetLastError());
}
///
// Spinlock "primitive"
Spinlock *os_make_spinlock(Allocator allocator) {
// #Memory #Cleanup do we need to heap allocate this ?
Spinlock *l = cast(Spinlock*)alloc(allocator, sizeof(Spinlock));
l->locked = false;
return l;
}
void os_spinlock_lock(Spinlock *l) {
while (true) {
bool expected = false;
if (compare_and_swap_bool(&l->locked, true, expected)) {
return;
}
while (l->locked) {
// spinny boi
}
}
}
void os_spinlock_unlock(Spinlock *l) {
bool expected = true;
bool success = compare_and_swap_bool(&l->locked, false, expected);
assert(success, "This thread should have acquired the spinlock but compare_and_swap failed");
}
///
// Concurrency utilities
bool os_compare_and_swap_8(u8 *a, u8 b, u8 old) {
// #Portability not sure how portable this is.
return _InterlockedCompareExchange8((volatile CHAR*)a, (CHAR)b, (CHAR)old) == (CHAR)old;
}
bool os_compare_and_swap_16(u16 *a, u16 b, u16 old) {
return InterlockedCompareExchange16((volatile SHORT*)a, (SHORT)b, (SHORT)old) == (SHORT)old;
}
bool os_compare_and_swap_32(u32 *a, u32 b, u32 old) {
return InterlockedCompareExchange((volatile LONG*)a, (LONG)b, (LONG)old) == (LONG)old;
}
bool os_compare_and_swap_64(u64 *a, u64 b, u64 old) {
return InterlockedCompareExchange64((volatile LONG64*)a, (LONG64)b, (LONG64)old) == (LONG64)old;
}
bool os_compare_and_swap_bool(bool *a, bool b, bool old) {
return os_compare_and_swap_8(cast(u8*)a, cast(u8)b, cast(u8)old);
}
void os_sleep(u32 ms) {
Sleep(ms);

View file

@ -32,13 +32,7 @@
#define _INTSIZEOF(n) ((sizeof(n) + sizeof(int) - 1) & ~(sizeof(int) - 1))
// #Cleanup we only need vsnprintf
typedef void* (__cdecl *Crt_Memcpy_Proc) (void*, const void*, size_t);
typedef int (__cdecl *Crt_Memcmp_Proc) (const void*, const void*, size_t);
typedef void* (__cdecl *Crt_Memset_Proc) (void*, int, size_t);
typedef int (__cdecl *Crt_Vprintf_Proc) (const char*, va_list);
typedef int (__cdecl *Crt_Vsnprintf_Proc) (char*, size_t, const char*, va_list);
typedef int (__cdecl *Crt_Vsprintf_Proc) (char*, const char*, va_list);
typedef struct Os_Info {
u64 page_size;
@ -46,37 +40,19 @@ typedef struct Os_Info {
Dynamic_Library_Handle crt;
// #Cleanup we only need vsnprintf
Crt_Memcpy_Proc crt_memcpy;
Crt_Memcmp_Proc crt_memcmp;
Crt_Memset_Proc crt_memset;
Crt_Vprintf_Proc crt_vprintf;
Crt_Vsnprintf_Proc crt_vsnprintf;
Crt_Vsprintf_Proc crt_vsprintf;
void *static_memory_start, *static_memory_end;
} Os_Info;
Os_Info os;
inline int crt_vprintf(const char* fmt, va_list args) {
return os.crt_vprintf(fmt, args);
}
inline bool bytes_match(void *a, void *b, u64 count) { return memcmp(a, b, count) == 0; }
inline int vsnprintf(char* buffer, size_t n, const char* fmt, va_list args) {
return os.crt_vsnprintf(buffer, n, fmt, args);
}
inline int crt_sprintf(char *str, const char *format, ...) {
va_list args;
va_start(args, format);
int r = os.crt_vsprintf(str, format, args);
va_end(args);
return r;
}
Mutex_Handle program_memory_mutex = 0;
bool os_grow_program_memory(size_t new_size);
@ -91,22 +67,25 @@ typedef struct Thread Thread;
typedef void(*Thread_Proc)(Thread*);
typedef struct Thread {
u64 id;
u64 id; // This is valid after os_thread_start
Context initial_context;
void* data;
Thread_Proc proc;
Thread_Handle os_handle;
Allocator allocator;
Allocator allocator; // Deprecated !! #Cleanup
} Thread;
///
// Thread primitive
// #Cleanup this shouldn't be allocating just for the pointer!! Just do os_thread_init(*)
Thread* os_make_thread(Thread_Proc proc, Allocator allocator);
void os_destroy_thread(Thread *t);
void os_start_thread(Thread* t);
void os_join_thread(Thread* t);
DEPRECATED(Thread* os_make_thread(Thread_Proc proc, Allocator allocator), "Use os_thread_init instead");
DEPRECATED(void os_destroy_thread(Thread *t), "Use os_thread_destroy instead");
DEPRECATED(void os_start_thread(Thread* t), "Use os_thread_start instead");
DEPRECATED(void os_join_thread(Thread* t), "Use os_thread_join instead");
void os_thread_init(Thread *t, Thread_Proc proc);
void os_thread_destroy(Thread *t);
void os_thread_start(Thread *t);
void os_thread_join(Thread *t);
///
@ -116,27 +95,8 @@ void os_destroy_mutex(Mutex_Handle m);
void os_lock_mutex(Mutex_Handle m);
void os_unlock_mutex(Mutex_Handle m);
typedef struct Spinlock Spinlock;
// #Cleanup Moved to threading.c
DEPRECATED(Spinlock *os_make_spinlock(Allocator allocator), "use spinlock_init instead");
DEPRECATED(void os_spinlock_lock(Spinlock* l), "use spinlock_acquire_or_wait instead");
DEPRECATED(void os_spinlock_unlock(Spinlock* l), "use spinlock_release instead");
///
// Concurrency utilities
// #Cleanup
// In retrospect, I'm not sure why I choose to implement this per OS.
// I think Win32 InterlockedCompareExchange just generates the cmpxchg
// instruction anyways, so may as well just inline asm it (or Win32
// if we're compiling with msvc) (LDREX/STREX on ARM)
// - CharlieM July 8th 2024
// compare_and_swap in cpu.c
DEPRECATED(bool os_compare_and_swap_8 (u8 *a, u8 b, u8 old), "use compare_and_swap instead");
DEPRECATED(bool os_compare_and_swap_16 (u16 *a, u16 b, u16 old), "use compare_and_swap instead");
DEPRECATED(bool os_compare_and_swap_32 (u32 *a, u32 b, u32 old), "use compare_and_swap instead");
DEPRECATED(bool os_compare_and_swap_64 (u64 *a, u64 b, u64 old), "use compare_and_swap instead");
DEPRECATED(bool os_compare_and_swap_bool(bool *a, bool b, bool old), "use compare_and_swap instead");
// Threading utilities
void os_sleep(u32 ms);
void os_yield_thread();
@ -147,8 +107,7 @@ void os_high_precision_sleep(f64 ms);
// Time
///
// #Cleanup getting the cycle count is an x86 intrinsic so this should be in cpu.c
u64 os_get_current_cycle_count();
DEPRECATED(u64 os_get_current_cycle_count(), "use rdtsc() instead");
float64 os_get_current_time_in_seconds();

View file

@ -212,11 +212,12 @@ void test_thread_proc1(Thread* t) {
void test_threads() {
Thread* t = os_make_thread(test_thread_proc1, get_heap_allocator());
os_start_thread(t);
Thread t;
os_thread_init(&t, test_thread_proc1);
os_thread_start(&t);
os_sleep(20);
print("This should be printed in middle of thread execution\n");
os_join_thread(t);
os_thread_join(&t);
print("Thread is joined\n");
Mutex_Handle m = os_make_mutex();
@ -767,65 +768,65 @@ void test_simd() {
memset(samples_a, 2, _TEST_NUM_SAMPLES*sizeof(float));
memset(samples_b, 2, _TEST_NUM_SAMPLES*sizeof(float));
u64 start = os_get_current_cycle_count();
u64 start = rdtsc();
for (u64 i = 0; i < _TEST_NUM_SAMPLES; i += 16) {
simd_mul_float32_512_aligned(&samples_a[i], &samples_b[i], &samples_a[i]);
}
u64 end = os_get_current_cycle_count();
u64 end = rdtsc();
u64 cycles = end-start;
print("simd 512 float32 mul took %llu cycles\n", cycles);
memset(samples_a, 2, _TEST_NUM_SAMPLES*sizeof(float));
memset(samples_b, 2, _TEST_NUM_SAMPLES*sizeof(float));
start = os_get_current_cycle_count();
start = rdtsc();
for (u64 i = 0; i < _TEST_NUM_SAMPLES; i += 8) {
simd_mul_float32_256_aligned(&samples_a[i], &samples_b[i], &samples_a[i]);
}
end = os_get_current_cycle_count();
end = rdtsc();
cycles = end-start;
print("simd 256 float32 mul took %llu cycles\n", cycles);
memset(samples_a, 2, _TEST_NUM_SAMPLES*sizeof(float));
memset(samples_b, 2, _TEST_NUM_SAMPLES*sizeof(float));
start = os_get_current_cycle_count();
start = rdtsc();
for (u64 i = 0; i < _TEST_NUM_SAMPLES; i += 4) {
simd_mul_float32_128_aligned(&samples_a[i], &samples_b[i], &samples_a[i]);
}
end = os_get_current_cycle_count();
end = rdtsc();
cycles = end-start;
print("simd 128 float32 mul took %llu cycles\n", cycles);
memset(samples_a, 2, _TEST_NUM_SAMPLES*sizeof(float));
memset(samples_b, 2, _TEST_NUM_SAMPLES*sizeof(float));
start = os_get_current_cycle_count();
start = rdtsc();
for (u64 i = 0; i < _TEST_NUM_SAMPLES; i += 2) {
simd_mul_float32_64(&samples_a[i], &samples_b[i], &samples_a[i]);
}
end = os_get_current_cycle_count();
end = rdtsc();
cycles = end-start;
print("simd 64 float32 mul took %llu cycles\n", cycles);
memset(samples_a, 2, _TEST_NUM_SAMPLES*sizeof(float));
memset(samples_b, 2, _TEST_NUM_SAMPLES*sizeof(float));
start = os_get_current_cycle_count();
start = rdtsc();
for (u64 i = 0; i < _TEST_NUM_SAMPLES; i += 1) {
samples_a[i] = samples_a[i] + samples_b[i];
}
end = os_get_current_cycle_count();
end = rdtsc();
cycles = end-start;
print("NO SIMD float32 mul took %llu cycles\n", cycles);
}
@ -1061,7 +1062,7 @@ void test_hash_table() {
void test_random_distribution() {
int bins[NUM_BINS] = {0};
seed_for_random = os_get_current_cycle_count();
seed_for_random = rdtsc();
for (int i = 0; i < NUM_SAMPLES; i++) {
f32 rand_val = get_random_float32();
int bin = (int)(rand_val * NUM_BINS);
@ -1124,16 +1125,16 @@ void test_mutex() {
const int num_threads = 100;
Thread **threads = alloc(allocator, sizeof(Thread*)*num_threads);
Thread *threads = alloc(allocator, sizeof(Thread)*num_threads);
for (u64 i = 0; i < num_threads; i++) {
threads[i] = os_make_thread(mutex_test_increment_counter, allocator);
threads[i]->data = &data;
os_thread_init(&threads[i], mutex_test_increment_counter);
threads[i].data = &data;
}
for (u64 i = 0; i < num_threads; i++) {
os_start_thread(threads[i]);
os_thread_start(&threads[i]);
}
for (u64 i = 0; i < num_threads; i++) {
os_join_thread(threads[i]);
os_thread_join(&threads[i]);
}
assert(data.counter == num_threads * MUTEX_TEST_TASK_COUNT, "Failed: Counter does not match expected value after threading tasks");
@ -1167,9 +1168,9 @@ void test_sort() {
u64 sort_value_offset_in_item = offsetof(Draw_Quad, z);
float64 start_seconds = os_get_current_time_in_seconds();
u64 start_cycles = os_get_current_cycle_count();
u64 start_cycles = rdtsc();
radix_sort(items, buffer, item_count, item_size, sort_value_offset_in_item, id_bits);
u64 end_cycles = os_get_current_cycle_count();
u64 end_cycles = rdtsc();
float64 end_seconds = os_get_current_time_in_seconds();
for (u64 i = 1; i < item_count; i++) {
@ -1195,9 +1196,9 @@ void test_sort() {
u64 sort_value_offset_in_item = offsetof(Draw_Quad, z);
float64 start_seconds = os_get_current_time_in_seconds();
u64 start_cycles = os_get_current_cycle_count();
u64 start_cycles = rdtsc();
merge_sort(items, buffer, item_count, item_size, compare_draw_quads);
u64 end_cycles = os_get_current_cycle_count();
u64 end_cycles = rdtsc();
float64 end_seconds = os_get_current_time_in_seconds();
for (u64 i = 1; i < item_count; i++) {