#Cleanup
This commit is contained in:
parent
ef87d9416e
commit
e4649e564d
8 changed files with 77 additions and 153 deletions
|
@ -1,6 +1,7 @@
|
||||||
/*
|
/*
|
||||||
<<<<<< Bytecode compiled fro HLSL code below: >>>>>>
|
<<<<<< Bytecode compiled fro HLSL code below: >>>>>>
|
||||||
|
|
||||||
|
|
||||||
struct VS_INPUT
|
struct VS_INPUT
|
||||||
{
|
{
|
||||||
float4 position : POSITION;
|
float4 position : POSITION;
|
||||||
|
@ -9,10 +10,6 @@ struct VS_INPUT
|
||||||
int texture_index : TEXTURE_INDEX;
|
int texture_index : TEXTURE_INDEX;
|
||||||
uint type : TYPE;
|
uint type : TYPE;
|
||||||
uint sampler_index : SAMPLER_INDEX;
|
uint sampler_index : SAMPLER_INDEX;
|
||||||
// s8 texture_index
|
|
||||||
// u8 type
|
|
||||||
// u8 sampler_index
|
|
||||||
// u8
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PS_INPUT
|
struct PS_INPUT
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
struct VS_INPUT
|
struct VS_INPUT
|
||||||
{
|
{
|
||||||
float4 position : POSITION;
|
float4 position : POSITION;
|
||||||
|
@ -6,10 +7,6 @@ struct VS_INPUT
|
||||||
int texture_index : TEXTURE_INDEX;
|
int texture_index : TEXTURE_INDEX;
|
||||||
uint type : TYPE;
|
uint type : TYPE;
|
||||||
uint sampler_index : SAMPLER_INDEX;
|
uint sampler_index : SAMPLER_INDEX;
|
||||||
// s8 texture_index
|
|
||||||
// u8 type
|
|
||||||
// u8 sampler_index
|
|
||||||
// u8
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct PS_INPUT
|
struct PS_INPUT
|
||||||
|
|
|
@ -28,7 +28,7 @@ int entry(int argc, char **argv) {
|
||||||
|
|
||||||
render_atlas_if_not_yet_rendered(font, 32, 'A');
|
render_atlas_if_not_yet_rendered(font, 32, 'A');
|
||||||
|
|
||||||
seed_for_random = os_get_current_cycle_count();
|
seed_for_random = rdtsc();
|
||||||
|
|
||||||
const float64 fps_limit = 69000;
|
const float64 fps_limit = 69000;
|
||||||
const float64 min_frametime = 1.0 / fps_limit;
|
const float64 min_frametime = 1.0 / fps_limit;
|
||||||
|
@ -106,7 +106,7 @@ int entry(int argc, char **argv) {
|
||||||
draw_image(bush_image, v2(x, y), v2(0.1, 0.1), COLOR_WHITE);
|
draw_image(bush_image, v2(x, y), v2(0.1, 0.1), COLOR_WHITE);
|
||||||
pop_z_layer();
|
pop_z_layer();
|
||||||
}
|
}
|
||||||
seed_for_random = os_get_current_cycle_count();
|
seed_for_random = rdtsc();
|
||||||
|
|
||||||
Matrix4 hammer_xform = m4_scalar(1.0);
|
Matrix4 hammer_xform = m4_scalar(1.0);
|
||||||
hammer_xform = m4_rotate_z(hammer_xform, (f32)now);
|
hammer_xform = m4_rotate_z(hammer_xform, (f32)now);
|
||||||
|
|
|
@ -161,7 +161,7 @@ void font_variation_init(Gfx_Font_Variation *variation, Gfx_Font *font, u32 font
|
||||||
// This one is bottom-top as opposed to normally in stbtt where it's top-bottom
|
// This one is bottom-top as opposed to normally in stbtt where it's top-bottom
|
||||||
int x0, y0, x1, y1;
|
int x0, y0, x1, y1;
|
||||||
stbtt_GetCodepointBitmapBox(&font->stbtt_handle, (int)c, variation->scale, variation->scale, &x0, &y0, &x1, &y1);
|
stbtt_GetCodepointBitmapBox(&font->stbtt_handle, (int)c, variation->scale, variation->scale, &x0, &y0, &x1, &y1);
|
||||||
float c_ascent = (float)(y1-y0); // #Bugprone #Cleanup I am not at all sure about this!
|
float c_ascent = (float)(y1-y0);
|
||||||
|
|
||||||
if (c_ascent > variation->metrics.latin_ascent)
|
if (c_ascent > variation->metrics.latin_ascent)
|
||||||
variation->metrics.latin_ascent = c_ascent;
|
variation->metrics.latin_ascent = c_ascent;
|
||||||
|
|
|
@ -218,7 +218,6 @@ Heap_Block *make_heap_block(Heap_Block *parent, u64 size) {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// #Speed #Cleanup
|
|
||||||
if (((u8*)block)+size >= ((u8*)program_memory)+program_memory_size) {
|
if (((u8*)block)+size >= ((u8*)program_memory)+program_memory_size) {
|
||||||
u64 minimum_size = ((u8*)block+size) - (u8*)program_memory + 1;
|
u64 minimum_size = ((u8*)block+size) - (u8*)program_memory + 1;
|
||||||
u64 new_program_size = get_next_power_of_two(minimum_size);
|
u64 new_program_size = get_next_power_of_two(minimum_size);
|
||||||
|
|
|
@ -255,22 +255,17 @@ void os_init(u64 program_memory_size) {
|
||||||
assert(os.crt != 0, "Could not load win32 crt library. Might be compiled with non-msvc? #Incomplete #Portability");
|
assert(os.crt != 0, "Could not load win32 crt library. Might be compiled with non-msvc? #Incomplete #Portability");
|
||||||
os.crt_vsnprintf = (Crt_Vsnprintf_Proc)os_dynamic_library_load_symbol(os.crt, STR("vsnprintf"));
|
os.crt_vsnprintf = (Crt_Vsnprintf_Proc)os_dynamic_library_load_symbol(os.crt, STR("vsnprintf"));
|
||||||
assert(os.crt_vsnprintf, "Missing vsnprintf in crt");
|
assert(os.crt_vsnprintf, "Missing vsnprintf in crt");
|
||||||
os.crt_vprintf = (Crt_Vprintf_Proc)os_dynamic_library_load_symbol(os.crt, STR("vprintf"));
|
|
||||||
assert(os.crt_vprintf, "Missing vprintf in crt");
|
|
||||||
os.crt_vsprintf = (Crt_Vsprintf_Proc)os_dynamic_library_load_symbol(os.crt, STR("vsprintf"));
|
|
||||||
assert(os.crt_vsprintf, "Missing vsprintf in crt");
|
|
||||||
os.crt_memcpy = (Crt_Memcpy_Proc)os_dynamic_library_load_symbol(os.crt, STR("memcpy"));
|
|
||||||
assert(os.crt_memcpy, "Missing memcpy in crt");
|
|
||||||
os.crt_memcmp = (Crt_Memcmp_Proc)os_dynamic_library_load_symbol(os.crt, STR("memcmp"));
|
|
||||||
assert(os.crt_memcmp, "Missing crt_memcmp in crt");
|
|
||||||
os.crt_memset = (Crt_Memset_Proc)os_dynamic_library_load_symbol(os.crt, STR("memset"));
|
|
||||||
assert(os.crt_memset, "Missing memset in crt");
|
|
||||||
|
|
||||||
win32_init_window();
|
win32_init_window();
|
||||||
|
|
||||||
|
|
||||||
os_start_thread(os_make_thread(win32_audio_thread, get_heap_allocator()));
|
local_persist Thread audio_thread, audio_poll_default_device_thread;
|
||||||
os_start_thread(os_make_thread(win32_audio_poll_default_device_thread, get_heap_allocator()));
|
|
||||||
|
os_thread_init(&audio_thread, win32_audio_thread);
|
||||||
|
os_thread_init(&audio_poll_default_device_thread, win32_audio_poll_default_device_thread);
|
||||||
|
|
||||||
|
os_thread_start(&audio_thread);
|
||||||
|
os_thread_start(&audio_poll_default_device_thread);
|
||||||
|
|
||||||
while (!win32_has_audio_thread_started) { os_yield_thread(); }
|
while (!win32_has_audio_thread_started) { os_yield_thread(); }
|
||||||
}
|
}
|
||||||
|
@ -404,6 +399,8 @@ DWORD WINAPI win32_thread_invoker(LPVOID param) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
////// DEPRECATED vvvvvvvvvvvvvvvvv
|
||||||
Thread* os_make_thread(Thread_Proc proc, Allocator allocator) {
|
Thread* os_make_thread(Thread_Proc proc, Allocator allocator) {
|
||||||
Thread *t = (Thread*)alloc(allocator, sizeof(Thread));
|
Thread *t = (Thread*)alloc(allocator, sizeof(Thread));
|
||||||
t->id = 0; // This is set when we start it
|
t->id = 0; // This is set when we start it
|
||||||
|
@ -433,6 +430,33 @@ void os_start_thread(Thread *t) {
|
||||||
void os_join_thread(Thread *t) {
|
void os_join_thread(Thread *t) {
|
||||||
WaitForSingleObject(t->os_handle, INFINITE);
|
WaitForSingleObject(t->os_handle, INFINITE);
|
||||||
}
|
}
|
||||||
|
////// DEPRECATED ^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
void os_thread_init(Thread *t, Thread_Proc proc) {
|
||||||
|
memset(t, 0, sizeof(Thread));
|
||||||
|
t->id = 0;
|
||||||
|
t->proc = proc;
|
||||||
|
t->initial_context = context;
|
||||||
|
}
|
||||||
|
void os_thread_destroy(Thread *t) {
|
||||||
|
os_thread_join(t);
|
||||||
|
CloseHandle(t->os_handle);
|
||||||
|
}
|
||||||
|
void os_thread_start(Thread *t) {
|
||||||
|
t->os_handle = CreateThread(
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
win32_thread_invoker,
|
||||||
|
t,
|
||||||
|
0,
|
||||||
|
(DWORD*)&t->id
|
||||||
|
);
|
||||||
|
|
||||||
|
assert(t->os_handle, "Failed creating thread");
|
||||||
|
}
|
||||||
|
void os_thread_join(Thread *t) {
|
||||||
|
WaitForSingleObject(t->os_handle, INFINITE);
|
||||||
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
// Mutex primitive
|
// Mutex primitive
|
||||||
|
@ -474,59 +498,6 @@ void os_unlock_mutex(Mutex_Handle m) {
|
||||||
assert(result, "Unlock mutex 0x%x failed with error %d", m, GetLastError());
|
assert(result, "Unlock mutex 0x%x failed with error %d", m, GetLastError());
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
// Spinlock "primitive"
|
|
||||||
|
|
||||||
Spinlock *os_make_spinlock(Allocator allocator) {
|
|
||||||
// #Memory #Cleanup do we need to heap allocate this ?
|
|
||||||
Spinlock *l = cast(Spinlock*)alloc(allocator, sizeof(Spinlock));
|
|
||||||
l->locked = false;
|
|
||||||
return l;
|
|
||||||
}
|
|
||||||
void os_spinlock_lock(Spinlock *l) {
|
|
||||||
while (true) {
|
|
||||||
bool expected = false;
|
|
||||||
if (compare_and_swap_bool(&l->locked, true, expected)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
while (l->locked) {
|
|
||||||
// spinny boi
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void os_spinlock_unlock(Spinlock *l) {
|
|
||||||
bool expected = true;
|
|
||||||
bool success = compare_and_swap_bool(&l->locked, false, expected);
|
|
||||||
assert(success, "This thread should have acquired the spinlock but compare_and_swap failed");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
///
|
|
||||||
// Concurrency utilities
|
|
||||||
|
|
||||||
bool os_compare_and_swap_8(u8 *a, u8 b, u8 old) {
|
|
||||||
// #Portability not sure how portable this is.
|
|
||||||
return _InterlockedCompareExchange8((volatile CHAR*)a, (CHAR)b, (CHAR)old) == (CHAR)old;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool os_compare_and_swap_16(u16 *a, u16 b, u16 old) {
|
|
||||||
return InterlockedCompareExchange16((volatile SHORT*)a, (SHORT)b, (SHORT)old) == (SHORT)old;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool os_compare_and_swap_32(u32 *a, u32 b, u32 old) {
|
|
||||||
return InterlockedCompareExchange((volatile LONG*)a, (LONG)b, (LONG)old) == (LONG)old;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool os_compare_and_swap_64(u64 *a, u64 b, u64 old) {
|
|
||||||
return InterlockedCompareExchange64((volatile LONG64*)a, (LONG64)b, (LONG64)old) == (LONG64)old;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool os_compare_and_swap_bool(bool *a, bool b, bool old) {
|
|
||||||
return os_compare_and_swap_8(cast(u8*)a, cast(u8)b, cast(u8)old);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void os_sleep(u32 ms) {
|
void os_sleep(u32 ms) {
|
||||||
Sleep(ms);
|
Sleep(ms);
|
||||||
|
|
|
@ -32,13 +32,7 @@
|
||||||
|
|
||||||
#define _INTSIZEOF(n) ((sizeof(n) + sizeof(int) - 1) & ~(sizeof(int) - 1))
|
#define _INTSIZEOF(n) ((sizeof(n) + sizeof(int) - 1) & ~(sizeof(int) - 1))
|
||||||
|
|
||||||
// #Cleanup we only need vsnprintf
|
|
||||||
typedef void* (__cdecl *Crt_Memcpy_Proc) (void*, const void*, size_t);
|
|
||||||
typedef int (__cdecl *Crt_Memcmp_Proc) (const void*, const void*, size_t);
|
|
||||||
typedef void* (__cdecl *Crt_Memset_Proc) (void*, int, size_t);
|
|
||||||
typedef int (__cdecl *Crt_Vprintf_Proc) (const char*, va_list);
|
|
||||||
typedef int (__cdecl *Crt_Vsnprintf_Proc) (char*, size_t, const char*, va_list);
|
typedef int (__cdecl *Crt_Vsnprintf_Proc) (char*, size_t, const char*, va_list);
|
||||||
typedef int (__cdecl *Crt_Vsprintf_Proc) (char*, const char*, va_list);
|
|
||||||
|
|
||||||
typedef struct Os_Info {
|
typedef struct Os_Info {
|
||||||
u64 page_size;
|
u64 page_size;
|
||||||
|
@ -46,37 +40,19 @@ typedef struct Os_Info {
|
||||||
|
|
||||||
Dynamic_Library_Handle crt;
|
Dynamic_Library_Handle crt;
|
||||||
|
|
||||||
// #Cleanup we only need vsnprintf
|
|
||||||
Crt_Memcpy_Proc crt_memcpy;
|
|
||||||
Crt_Memcmp_Proc crt_memcmp;
|
|
||||||
Crt_Memset_Proc crt_memset;
|
|
||||||
Crt_Vprintf_Proc crt_vprintf;
|
|
||||||
Crt_Vsnprintf_Proc crt_vsnprintf;
|
Crt_Vsnprintf_Proc crt_vsnprintf;
|
||||||
Crt_Vsprintf_Proc crt_vsprintf;
|
|
||||||
|
|
||||||
void *static_memory_start, *static_memory_end;
|
void *static_memory_start, *static_memory_end;
|
||||||
|
|
||||||
} Os_Info;
|
} Os_Info;
|
||||||
Os_Info os;
|
Os_Info os;
|
||||||
|
|
||||||
inline int crt_vprintf(const char* fmt, va_list args) {
|
|
||||||
return os.crt_vprintf(fmt, args);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool bytes_match(void *a, void *b, u64 count) { return memcmp(a, b, count) == 0; }
|
inline bool bytes_match(void *a, void *b, u64 count) { return memcmp(a, b, count) == 0; }
|
||||||
|
|
||||||
inline int vsnprintf(char* buffer, size_t n, const char* fmt, va_list args) {
|
inline int vsnprintf(char* buffer, size_t n, const char* fmt, va_list args) {
|
||||||
return os.crt_vsnprintf(buffer, n, fmt, args);
|
return os.crt_vsnprintf(buffer, n, fmt, args);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int crt_sprintf(char *str, const char *format, ...) {
|
|
||||||
va_list args;
|
|
||||||
va_start(args, format);
|
|
||||||
int r = os.crt_vsprintf(str, format, args);
|
|
||||||
va_end(args);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
Mutex_Handle program_memory_mutex = 0;
|
Mutex_Handle program_memory_mutex = 0;
|
||||||
|
|
||||||
bool os_grow_program_memory(size_t new_size);
|
bool os_grow_program_memory(size_t new_size);
|
||||||
|
@ -91,22 +67,25 @@ typedef struct Thread Thread;
|
||||||
typedef void(*Thread_Proc)(Thread*);
|
typedef void(*Thread_Proc)(Thread*);
|
||||||
|
|
||||||
typedef struct Thread {
|
typedef struct Thread {
|
||||||
u64 id;
|
u64 id; // This is valid after os_thread_start
|
||||||
Context initial_context;
|
Context initial_context;
|
||||||
void* data;
|
void* data;
|
||||||
Thread_Proc proc;
|
Thread_Proc proc;
|
||||||
Thread_Handle os_handle;
|
Thread_Handle os_handle;
|
||||||
Allocator allocator;
|
Allocator allocator; // Deprecated !! #Cleanup
|
||||||
} Thread;
|
} Thread;
|
||||||
|
|
||||||
///
|
///
|
||||||
// Thread primitive
|
// Thread primitive
|
||||||
// #Cleanup this shouldn't be allocating just for the pointer!! Just do os_thread_init(*)
|
DEPRECATED(Thread* os_make_thread(Thread_Proc proc, Allocator allocator), "Use os_thread_init instead");
|
||||||
Thread* os_make_thread(Thread_Proc proc, Allocator allocator);
|
DEPRECATED(void os_destroy_thread(Thread *t), "Use os_thread_destroy instead");
|
||||||
void os_destroy_thread(Thread *t);
|
DEPRECATED(void os_start_thread(Thread* t), "Use os_thread_start instead");
|
||||||
void os_start_thread(Thread* t);
|
DEPRECATED(void os_join_thread(Thread* t), "Use os_thread_join instead");
|
||||||
void os_join_thread(Thread* t);
|
|
||||||
|
|
||||||
|
void os_thread_init(Thread *t, Thread_Proc proc);
|
||||||
|
void os_thread_destroy(Thread *t);
|
||||||
|
void os_thread_start(Thread *t);
|
||||||
|
void os_thread_join(Thread *t);
|
||||||
|
|
||||||
|
|
||||||
///
|
///
|
||||||
|
@ -116,27 +95,8 @@ void os_destroy_mutex(Mutex_Handle m);
|
||||||
void os_lock_mutex(Mutex_Handle m);
|
void os_lock_mutex(Mutex_Handle m);
|
||||||
void os_unlock_mutex(Mutex_Handle m);
|
void os_unlock_mutex(Mutex_Handle m);
|
||||||
|
|
||||||
typedef struct Spinlock Spinlock;
|
|
||||||
// #Cleanup Moved to threading.c
|
|
||||||
DEPRECATED(Spinlock *os_make_spinlock(Allocator allocator), "use spinlock_init instead");
|
|
||||||
DEPRECATED(void os_spinlock_lock(Spinlock* l), "use spinlock_acquire_or_wait instead");
|
|
||||||
DEPRECATED(void os_spinlock_unlock(Spinlock* l), "use spinlock_release instead");
|
|
||||||
|
|
||||||
///
|
///
|
||||||
// Concurrency utilities
|
// Threading utilities
|
||||||
|
|
||||||
// #Cleanup
|
|
||||||
// In retrospect, I'm not sure why I choose to implement this per OS.
|
|
||||||
// I think Win32 InterlockedCompareExchange just generates the cmpxchg
|
|
||||||
// instruction anyways, so may as well just inline asm it (or Win32
|
|
||||||
// if we're compiling with msvc) (LDREX/STREX on ARM)
|
|
||||||
// - CharlieM July 8th 2024
|
|
||||||
// compare_and_swap in cpu.c
|
|
||||||
DEPRECATED(bool os_compare_and_swap_8 (u8 *a, u8 b, u8 old), "use compare_and_swap instead");
|
|
||||||
DEPRECATED(bool os_compare_and_swap_16 (u16 *a, u16 b, u16 old), "use compare_and_swap instead");
|
|
||||||
DEPRECATED(bool os_compare_and_swap_32 (u32 *a, u32 b, u32 old), "use compare_and_swap instead");
|
|
||||||
DEPRECATED(bool os_compare_and_swap_64 (u64 *a, u64 b, u64 old), "use compare_and_swap instead");
|
|
||||||
DEPRECATED(bool os_compare_and_swap_bool(bool *a, bool b, bool old), "use compare_and_swap instead");
|
|
||||||
|
|
||||||
void os_sleep(u32 ms);
|
void os_sleep(u32 ms);
|
||||||
void os_yield_thread();
|
void os_yield_thread();
|
||||||
|
@ -147,8 +107,7 @@ void os_high_precision_sleep(f64 ms);
|
||||||
// Time
|
// Time
|
||||||
///
|
///
|
||||||
|
|
||||||
// #Cleanup getting the cycle count is an x86 intrinsic so this should be in cpu.c
|
DEPRECATED(u64 os_get_current_cycle_count(), "use rdtsc() instead");
|
||||||
u64 os_get_current_cycle_count();
|
|
||||||
float64 os_get_current_time_in_seconds();
|
float64 os_get_current_time_in_seconds();
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -212,11 +212,12 @@ void test_thread_proc1(Thread* t) {
|
||||||
|
|
||||||
void test_threads() {
|
void test_threads() {
|
||||||
|
|
||||||
Thread* t = os_make_thread(test_thread_proc1, get_heap_allocator());
|
Thread t;
|
||||||
os_start_thread(t);
|
os_thread_init(&t, test_thread_proc1);
|
||||||
|
os_thread_start(&t);
|
||||||
os_sleep(20);
|
os_sleep(20);
|
||||||
print("This should be printed in middle of thread execution\n");
|
print("This should be printed in middle of thread execution\n");
|
||||||
os_join_thread(t);
|
os_thread_join(&t);
|
||||||
print("Thread is joined\n");
|
print("Thread is joined\n");
|
||||||
|
|
||||||
Mutex_Handle m = os_make_mutex();
|
Mutex_Handle m = os_make_mutex();
|
||||||
|
@ -767,65 +768,65 @@ void test_simd() {
|
||||||
memset(samples_a, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
memset(samples_a, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
||||||
memset(samples_b, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
memset(samples_b, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
||||||
|
|
||||||
u64 start = os_get_current_cycle_count();
|
u64 start = rdtsc();
|
||||||
|
|
||||||
for (u64 i = 0; i < _TEST_NUM_SAMPLES; i += 16) {
|
for (u64 i = 0; i < _TEST_NUM_SAMPLES; i += 16) {
|
||||||
simd_mul_float32_512_aligned(&samples_a[i], &samples_b[i], &samples_a[i]);
|
simd_mul_float32_512_aligned(&samples_a[i], &samples_b[i], &samples_a[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 end = os_get_current_cycle_count();
|
u64 end = rdtsc();
|
||||||
u64 cycles = end-start;
|
u64 cycles = end-start;
|
||||||
print("simd 512 float32 mul took %llu cycles\n", cycles);
|
print("simd 512 float32 mul took %llu cycles\n", cycles);
|
||||||
|
|
||||||
memset(samples_a, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
memset(samples_a, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
||||||
memset(samples_b, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
memset(samples_b, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
||||||
|
|
||||||
start = os_get_current_cycle_count();
|
start = rdtsc();
|
||||||
|
|
||||||
for (u64 i = 0; i < _TEST_NUM_SAMPLES; i += 8) {
|
for (u64 i = 0; i < _TEST_NUM_SAMPLES; i += 8) {
|
||||||
simd_mul_float32_256_aligned(&samples_a[i], &samples_b[i], &samples_a[i]);
|
simd_mul_float32_256_aligned(&samples_a[i], &samples_b[i], &samples_a[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
end = os_get_current_cycle_count();
|
end = rdtsc();
|
||||||
cycles = end-start;
|
cycles = end-start;
|
||||||
print("simd 256 float32 mul took %llu cycles\n", cycles);
|
print("simd 256 float32 mul took %llu cycles\n", cycles);
|
||||||
|
|
||||||
memset(samples_a, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
memset(samples_a, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
||||||
memset(samples_b, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
memset(samples_b, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
||||||
|
|
||||||
start = os_get_current_cycle_count();
|
start = rdtsc();
|
||||||
|
|
||||||
for (u64 i = 0; i < _TEST_NUM_SAMPLES; i += 4) {
|
for (u64 i = 0; i < _TEST_NUM_SAMPLES; i += 4) {
|
||||||
simd_mul_float32_128_aligned(&samples_a[i], &samples_b[i], &samples_a[i]);
|
simd_mul_float32_128_aligned(&samples_a[i], &samples_b[i], &samples_a[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
end = os_get_current_cycle_count();
|
end = rdtsc();
|
||||||
cycles = end-start;
|
cycles = end-start;
|
||||||
print("simd 128 float32 mul took %llu cycles\n", cycles);
|
print("simd 128 float32 mul took %llu cycles\n", cycles);
|
||||||
|
|
||||||
memset(samples_a, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
memset(samples_a, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
||||||
memset(samples_b, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
memset(samples_b, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
||||||
|
|
||||||
start = os_get_current_cycle_count();
|
start = rdtsc();
|
||||||
|
|
||||||
for (u64 i = 0; i < _TEST_NUM_SAMPLES; i += 2) {
|
for (u64 i = 0; i < _TEST_NUM_SAMPLES; i += 2) {
|
||||||
simd_mul_float32_64(&samples_a[i], &samples_b[i], &samples_a[i]);
|
simd_mul_float32_64(&samples_a[i], &samples_b[i], &samples_a[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
end = os_get_current_cycle_count();
|
end = rdtsc();
|
||||||
cycles = end-start;
|
cycles = end-start;
|
||||||
print("simd 64 float32 mul took %llu cycles\n", cycles);
|
print("simd 64 float32 mul took %llu cycles\n", cycles);
|
||||||
|
|
||||||
memset(samples_a, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
memset(samples_a, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
||||||
memset(samples_b, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
memset(samples_b, 2, _TEST_NUM_SAMPLES*sizeof(float));
|
||||||
|
|
||||||
start = os_get_current_cycle_count();
|
start = rdtsc();
|
||||||
|
|
||||||
for (u64 i = 0; i < _TEST_NUM_SAMPLES; i += 1) {
|
for (u64 i = 0; i < _TEST_NUM_SAMPLES; i += 1) {
|
||||||
samples_a[i] = samples_a[i] + samples_b[i];
|
samples_a[i] = samples_a[i] + samples_b[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
end = os_get_current_cycle_count();
|
end = rdtsc();
|
||||||
cycles = end-start;
|
cycles = end-start;
|
||||||
print("NO SIMD float32 mul took %llu cycles\n", cycles);
|
print("NO SIMD float32 mul took %llu cycles\n", cycles);
|
||||||
}
|
}
|
||||||
|
@ -1061,7 +1062,7 @@ void test_hash_table() {
|
||||||
|
|
||||||
void test_random_distribution() {
|
void test_random_distribution() {
|
||||||
int bins[NUM_BINS] = {0};
|
int bins[NUM_BINS] = {0};
|
||||||
seed_for_random = os_get_current_cycle_count();
|
seed_for_random = rdtsc();
|
||||||
for (int i = 0; i < NUM_SAMPLES; i++) {
|
for (int i = 0; i < NUM_SAMPLES; i++) {
|
||||||
f32 rand_val = get_random_float32();
|
f32 rand_val = get_random_float32();
|
||||||
int bin = (int)(rand_val * NUM_BINS);
|
int bin = (int)(rand_val * NUM_BINS);
|
||||||
|
@ -1124,16 +1125,16 @@ void test_mutex() {
|
||||||
|
|
||||||
const int num_threads = 100;
|
const int num_threads = 100;
|
||||||
|
|
||||||
Thread **threads = alloc(allocator, sizeof(Thread*)*num_threads);
|
Thread *threads = alloc(allocator, sizeof(Thread)*num_threads);
|
||||||
for (u64 i = 0; i < num_threads; i++) {
|
for (u64 i = 0; i < num_threads; i++) {
|
||||||
threads[i] = os_make_thread(mutex_test_increment_counter, allocator);
|
os_thread_init(&threads[i], mutex_test_increment_counter);
|
||||||
threads[i]->data = &data;
|
threads[i].data = &data;
|
||||||
}
|
}
|
||||||
for (u64 i = 0; i < num_threads; i++) {
|
for (u64 i = 0; i < num_threads; i++) {
|
||||||
os_start_thread(threads[i]);
|
os_thread_start(&threads[i]);
|
||||||
}
|
}
|
||||||
for (u64 i = 0; i < num_threads; i++) {
|
for (u64 i = 0; i < num_threads; i++) {
|
||||||
os_join_thread(threads[i]);
|
os_thread_join(&threads[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(data.counter == num_threads * MUTEX_TEST_TASK_COUNT, "Failed: Counter does not match expected value after threading tasks");
|
assert(data.counter == num_threads * MUTEX_TEST_TASK_COUNT, "Failed: Counter does not match expected value after threading tasks");
|
||||||
|
@ -1167,9 +1168,9 @@ void test_sort() {
|
||||||
u64 sort_value_offset_in_item = offsetof(Draw_Quad, z);
|
u64 sort_value_offset_in_item = offsetof(Draw_Quad, z);
|
||||||
|
|
||||||
float64 start_seconds = os_get_current_time_in_seconds();
|
float64 start_seconds = os_get_current_time_in_seconds();
|
||||||
u64 start_cycles = os_get_current_cycle_count();
|
u64 start_cycles = rdtsc();
|
||||||
radix_sort(items, buffer, item_count, item_size, sort_value_offset_in_item, id_bits);
|
radix_sort(items, buffer, item_count, item_size, sort_value_offset_in_item, id_bits);
|
||||||
u64 end_cycles = os_get_current_cycle_count();
|
u64 end_cycles = rdtsc();
|
||||||
float64 end_seconds = os_get_current_time_in_seconds();
|
float64 end_seconds = os_get_current_time_in_seconds();
|
||||||
|
|
||||||
for (u64 i = 1; i < item_count; i++) {
|
for (u64 i = 1; i < item_count; i++) {
|
||||||
|
@ -1195,9 +1196,9 @@ void test_sort() {
|
||||||
u64 sort_value_offset_in_item = offsetof(Draw_Quad, z);
|
u64 sort_value_offset_in_item = offsetof(Draw_Quad, z);
|
||||||
|
|
||||||
float64 start_seconds = os_get_current_time_in_seconds();
|
float64 start_seconds = os_get_current_time_in_seconds();
|
||||||
u64 start_cycles = os_get_current_cycle_count();
|
u64 start_cycles = rdtsc();
|
||||||
merge_sort(items, buffer, item_count, item_size, compare_draw_quads);
|
merge_sort(items, buffer, item_count, item_size, compare_draw_quads);
|
||||||
u64 end_cycles = os_get_current_cycle_count();
|
u64 end_cycles = rdtsc();
|
||||||
float64 end_seconds = os_get_current_time_in_seconds();
|
float64 end_seconds = os_get_current_time_in_seconds();
|
||||||
|
|
||||||
for (u64 i = 1; i < item_count; i++) {
|
for (u64 i = 1; i < item_count; i++) {
|
||||||
|
|
Reference in a new issue