From f9788b2e7472981046de4390544e7c89ee59dd17 Mon Sep 17 00:00:00 2001 From: Charlie Malmqvist Date: Sun, 28 Jul 2024 15:08:36 +0200 Subject: [PATCH] Some memory rework, concurrency improvements, cleanups --- TODO | 11 +- build.c | 6 +- changelog.txt | 11 +- oogabooga/concurrency.c | 30 +--- oogabooga/cpu.c | 22 +-- oogabooga/gfx_impl_d3d11.c | 105 ++++++----- oogabooga/memory.c | 64 ++----- oogabooga/oogabooga.c | 2 +- oogabooga/os_impl_windows.c | 339 ++++++++++++++++++++++-------------- oogabooga/os_interface.c | 44 ++++- oogabooga/profiling.c | 12 +- oogabooga/tests.c | 2 +- 12 files changed, 365 insertions(+), 283 deletions(-) diff --git a/TODO b/TODO index 5995d2f..4c6624f 100644 --- a/TODO +++ b/TODO @@ -21,7 +21,6 @@ - 24-Bit audio conversion doesn't really work - General bugs & issues - - Release freeze in run_tests - Window width&height is zero when minimized (and we make a 0x0 swap chain) - Window positioning & sizing is fucky wucky - Memory error messages are misleading when no VERY_DEBUG @@ -43,13 +42,19 @@ - Mouse pointer - Hide mouse pointer - -- Arenas +- Memory + - In heap allocator, mark pages that fit entirely into free nodes as NOACCESS + - Arenas - Examples/Guides: - Scaling text for pixel perfect rendering - Z sorting - Scissor boxing + - Concurrency + +- Rework profiler + - Store records and convert to google trace format on exit + - Measure both time and cycles, output a google_trace_cycles.json & google_trace_time.json - Needs testing: - Audio format channel conversions diff --git a/build.c b/build.c index fbb44aa..aa12dd3 100644 --- a/build.c +++ b/build.c @@ -3,7 +3,7 @@ /// // Build config stuff -#define INITIAL_PROGRAM_MEMORY_SIZE MB(5) +#define INITIAL_PROGRAM_MEMORY_SIZE MB(8) // You might want to increase this if you get a log warning saying the temporary storage was overflown. // In many cases, overflowing the temporary storage should be fine since it just wraps back around and @@ -37,11 +37,11 @@ typedef struct Context_Extra { // #include "oogabooga/examples/text_rendering.c" // #include "oogabooga/examples/custom_logger.c" -// #include "oogabooga/examples/renderer_stress_test.c" +#include "oogabooga/examples/renderer_stress_test.c" // #include "oogabooga/examples/tile_game.c" // #include "oogabooga/examples/audio_test.c" // #include "oogabooga/examples/custom_shader.c" -#include "oogabooga/examples/growing_array_example.c" +// #include "oogabooga/examples/growing_array_example.c" // This is where you swap in your own project! // #include "entry_yourepicgamename.c" diff --git a/changelog.txt b/changelog.txt index f0a8ee5..4bffd38 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,5 +1,5 @@ -## v0.01.003 - Nothing, really +## v0.01.003 - Stuff - Os layer - Implemented setting of mouse pointers, either to system standard pointers or a custom image - Ignore SETCURSOR events unless window resize @@ -9,10 +9,19 @@ - Renderer - Fix bad uv sampling bug when uneven window dimensions + - Memory + - Made program_memory act more like an arena (see os_reserve_next_memory_pages() & os_unlock_program_memory_pages()) + - In debug, default program memory to PAGE_NOACCESS which needs to be unlocked with os_unlock_program_memory_pages() (better crashes if we touch that memory) + - + - Misc - Deprecate Rangef stuff - peek_random() - Update #Contributions + - Clean up memory barriers in concurrency.c and use volatile instead + - Output d3d11 debug messages before crash on hr fail + - Configurable temporary storage size for new threads + - Cleanup temporary storage after thread destroy ## v0.01.002 - Flexible build options, Hotloading, growing array diff --git a/oogabooga/concurrency.c b/oogabooga/concurrency.c index 4f2c3ea..f0a7f94 100644 --- a/oogabooga/concurrency.c +++ b/oogabooga/concurrency.c @@ -4,18 +4,18 @@ typedef struct Mutex Mutex; typedef struct Binary_Semaphore Binary_Semaphore; // These are probably your best friend for sync-free multi-processing. -inline bool compare_and_swap_8(uint8_t *a, uint8_t b, uint8_t old); -inline bool compare_and_swap_16(uint16_t *a, uint16_t b, uint16_t old); -inline bool compare_and_swap_32(uint32_t *a, uint32_t b, uint32_t old); -inline bool compare_and_swap_64(uint64_t *a, uint64_t b, uint64_t old); -inline bool compare_and_swap_bool(bool *a, bool b, bool old); +inline bool compare_and_swap_8(volatile uint8_t *a, uint8_t b, uint8_t old); +inline bool compare_and_swap_16(volatile uint16_t *a, uint16_t b, uint16_t old); +inline bool compare_and_swap_32(volatile uint32_t *a, uint32_t b, uint32_t old); +inline bool compare_and_swap_64(volatile uint64_t *a, uint64_t b, uint64_t old); +inline bool compare_and_swap_bool(volatile bool *a, bool b, bool old); /// // Spinlock "primitive" // Like a mutex but it eats up the entire core while waiting. // Beneficial if contention is low or sync speed is important typedef struct Spinlock { - bool locked; + volatile bool locked; } Spinlock; void ogb_instance @@ -61,7 +61,7 @@ mutex_release(Mutex *m); /// // Binary semaphore typedef struct Binary_Semaphore { - bool signaled; + volatile bool signaled; Mutex mutex; } Binary_Semaphore; @@ -86,16 +86,12 @@ void spinlock_init(Spinlock *l) { void spinlock_acquire_or_wait(Spinlock* l) { while (true) { bool expected = false; - MEMORY_BARRIER; if (compare_and_swap_bool(&l->locked, true, expected)) { - MEMORY_BARRIER; return; } while (l->locked) { // spinny boi - MEMORY_BARRIER; } - MEMORY_BARRIER; } } // Returns true on aquired, false if timeout seconds reached @@ -103,24 +99,19 @@ bool spinlock_acquire_or_wait_timeout(Spinlock* l, f64 timeout_seconds) { f64 start = os_get_current_time_in_seconds(); while (true) { bool expected = false; - MEMORY_BARRIER; if (compare_and_swap_bool(&l->locked, true, expected)) { - MEMORY_BARRIER; return true; } while (l->locked) { // spinny boi if ((os_get_current_time_in_seconds()-start) >= timeout_seconds) return false; - MEMORY_BARRIER; } } return true; } void spinlock_release(Spinlock* l) { bool expected = true; - MEMORY_BARRIER; bool success = compare_and_swap_bool(&l->locked, false, expected); - MEMORY_BARRIER; assert(success, "This thread should have acquired the spinlock but compare_and_swap failed"); } @@ -142,29 +133,22 @@ void mutex_acquire_or_wait(Mutex *m) { if (spinlock_acquire_or_wait_timeout(&m->spinlock, m->spin_time_microseconds / 1000000.0)) { assert(!m->spinlock_acquired, "Internal sync error in Mutex"); m->spinlock_acquired = true; - MEMORY_BARRIER; } os_lock_mutex(m->os_handle); assert(!m->acquiring_thread, "Internal sync error in Mutex: Multiple threads acquired"); m->acquiring_thread = context.thread_id; - MEMORY_BARRIER; } void mutex_release(Mutex *m) { assert(m->acquiring_thread != 0, "Tried to release a mutex which is not acquired"); assert(m->acquiring_thread == context.thread_id, "Non-owning thread tried to release mutex"); m->acquiring_thread = 0; - MEMORY_BARRIER; bool was_spinlock_acquired = m->spinlock_acquired; m->spinlock_acquired = false; - MEMORY_BARRIER; os_unlock_mutex(m->os_handle); - MEMORY_BARRIER; if (was_spinlock_acquired) { spinlock_release(&m->spinlock); - MEMORY_BARRIER; } - MEMORY_BARRIER; } diff --git a/oogabooga/cpu.c b/oogabooga/cpu.c index 1c0a0cf..d901910 100644 --- a/oogabooga/cpu.c +++ b/oogabooga/cpu.c @@ -81,27 +81,27 @@ typedef struct Cpu_Capabilities { #pragma intrinsic(_InterlockedCompareExchange64) inline bool - compare_and_swap_8(uint8_t *a, uint8_t b, uint8_t old) { + compare_and_swap_8(volatile uint8_t *a, uint8_t b, uint8_t old) { return _InterlockedCompareExchange8((volatile char*)a, (char)b, (char)old) == old; } inline bool - compare_and_swap_16(uint16_t *a, uint16_t b, uint16_t old) { + compare_and_swap_16(volatile uint16_t *a, uint16_t b, uint16_t old) { return _InterlockedCompareExchange16((volatile short*)a, (short)b, (short)old) == old; } inline bool - compare_and_swap_32(uint32_t *a, uint32_t b, uint32_t old) { + compare_and_swap_32(volatile uint32_t *a, uint32_t b, uint32_t old) { return _InterlockedCompareExchange((volatile long*)a, (long)b, (long)old) == old; } inline bool - compare_and_swap_64(uint64_t *a, uint64_t b, uint64_t old) { + compare_and_swap_64(volatile uint64_t *a, uint64_t b, uint64_t old) { return _InterlockedCompareExchange64((volatile long long*)a, (long long)b, (long long)old) == old; } inline bool - compare_and_swap_bool(bool *a, bool b, bool old) { + compare_and_swap_bool(volatile bool *a, bool b, bool old) { return compare_and_swap_8((uint8_t*)a, (uint8_t)b, (uint8_t)old); } @@ -173,7 +173,7 @@ typedef struct Cpu_Capabilities { #define DEPRECATED(proc, msg) __attribute__((deprecated(msg))) proc inline bool - compare_and_swap_8(uint8_t *a, uint8_t b, uint8_t old) { + compare_and_swap_8(volatile uint8_t *a, uint8_t b, uint8_t old) { unsigned char result; __asm__ __volatile__( "lock; cmpxchgb %2, %1" @@ -185,7 +185,7 @@ typedef struct Cpu_Capabilities { } inline bool - compare_and_swap_16(uint16_t *a, uint16_t b, uint16_t old) { + compare_and_swap_16(volatile uint16_t *a, uint16_t b, uint16_t old) { unsigned short result; __asm__ __volatile__( "lock; cmpxchgw %2, %1" @@ -197,7 +197,7 @@ typedef struct Cpu_Capabilities { } inline bool - compare_and_swap_32(uint32_t *a, uint32_t b, uint32_t old) { + compare_and_swap_32(volatile uint32_t *a, uint32_t b, uint32_t old) { unsigned int result; __asm__ __volatile__( "lock; cmpxchgl %2, %1" @@ -209,7 +209,7 @@ typedef struct Cpu_Capabilities { } inline bool - compare_and_swap_64(uint64_t *a, uint64_t b, uint64_t old) { + compare_and_swap_64(volatile uint64_t *a, uint64_t b, uint64_t old) { unsigned long long result; __asm__ __volatile__( "lock; cmpxchgq %2, %1" @@ -221,11 +221,11 @@ typedef struct Cpu_Capabilities { } inline bool - compare_and_swap_bool(bool *a, bool b, bool old) { + compare_and_swap_bool(volatile bool *a, bool b, bool old) { return compare_and_swap_8((uint8_t*)a, (uint8_t)b, (uint8_t)old); } - #define MEMORY_BARRIER __asm__ __volatile__("" ::: "memory") + #define MEMORY_BARRIER {__asm__ __volatile__("" ::: "memory");__sync_synchronize();} #define thread_local __thread diff --git a/oogabooga/gfx_impl_d3d11.c b/oogabooga/gfx_impl_d3d11.c index 4514a9f..ab27362 100644 --- a/oogabooga/gfx_impl_d3d11.c +++ b/oogabooga/gfx_impl_d3d11.c @@ -26,6 +26,8 @@ typedef struct alignat(16) D3D11_Vertex { } D3D11_Vertex; +// #Global + ID3D11Debug *d3d11_debug = 0; ID3D11Device *d3d11_device = 0; @@ -61,9 +63,6 @@ u64 d3d11_cbuffer_size = 0; Draw_Quad *sort_quad_buffer = 0; u64 sort_quad_buffer_size = 0; -// Defined at the bottom of this file -extern const char *d3d11_image_shader_source; - const char* d3d11_stringify_category(D3D11_MESSAGE_CATEGORY category) { switch (category) { case D3D11_MESSAGE_CATEGORY_APPLICATION_DEFINED: return "Application Defined"; @@ -90,7 +89,6 @@ const char* d3d11_stringify_severity(D3D11_MESSAGE_SEVERITY severity) { default: return "Unknown"; } } - void CALLBACK d3d11_debug_callback(D3D11_MESSAGE_CATEGORY category, D3D11_MESSAGE_SEVERITY severity, D3D11_MESSAGE_ID id, const char* description) { if (id == 391) { @@ -122,6 +120,40 @@ void CALLBACK d3d11_debug_callback(D3D11_MESSAGE_CATEGORY category, D3D11_MESSAG break; } } +void +d3d11_output_debug_messages() { + /// + // Check debug messages, output to stdout + ID3D11InfoQueue* info_q = 0; + HRESULT hr = ID3D11Device_QueryInterface(d3d11_device, &IID_ID3D11InfoQueue, (void**)&info_q); + if (SUCCEEDED(hr)) { + u64 msg_count = ID3D11InfoQueue_GetNumStoredMessagesAllowedByRetrievalFilter(info_q); + for (u64 i = 0; i < msg_count; i++) { + SIZE_T msg_size = 0; + ID3D11InfoQueue_GetMessage(info_q, i, 0, &msg_size); + + D3D11_MESSAGE* msg = (D3D11_MESSAGE*)talloc(msg_size); + if (msg) { + ID3D11InfoQueue_GetMessage(info_q, i, msg, &msg_size); // Get the actual message + + d3d11_debug_callback(msg->Category, msg->Severity, msg->ID, msg->pDescription); + } + } + } +} + +#define d3d11_check_hr(hr) d3d11_check_hr_impl(hr, __LINE__, __FILE__); +void +d3d11_check_hr_impl(HRESULT hr, u32 line, const char* file_name) { + if (!SUCCEEDED(hr)) d3d11_output_debug_messages(); + win32_check_hr_impl(hr, line, file_name); +} + +// Defined at the bottom of this file +// #Global +extern const char *d3d11_image_shader_source; + + void d3d11_update_swapchain() { @@ -163,18 +195,18 @@ void d3d11_update_swapchain() { // Obtain DXGI factory from device IDXGIDevice *dxgi_device = 0; hr = ID3D11Device_QueryInterface(d3d11_device, &IID_IDXGIDevice, cast(void**)&dxgi_device); - win32_check_hr(hr); + d3d11_check_hr(hr); IDXGIAdapter *adapter; hr = IDXGIDevice_GetAdapter(dxgi_device, &adapter); - win32_check_hr(hr); + d3d11_check_hr(hr); IDXGIFactory2 *dxgi_factory; hr = IDXGIAdapter_GetParent(adapter, &IID_IDXGIFactory2, cast(void**)&dxgi_factory); - win32_check_hr(hr); + d3d11_check_hr(hr); hr = IDXGIFactory2_CreateSwapChainForHwnd(dxgi_factory, (IUnknown*)d3d11_device, window._os_handle, &scd, 0, 0, &d3d11_swap_chain); - win32_check_hr(hr); + d3d11_check_hr(hr); RECT client_rect; bool ok = GetClientRect(window._os_handle, &client_rect); @@ -185,7 +217,7 @@ void d3d11_update_swapchain() { // store the swap chain description, as created by CreateSwapChainForHwnd hr = IDXGISwapChain1_GetDesc1(d3d11_swap_chain, &d3d11_swap_chain_desc); - win32_check_hr(hr); + d3d11_check_hr(hr); // disable alt enter IDXGIFactory_MakeWindowAssociation(dxgi_factory, window._os_handle, cast (u32) DXGI_MWA_NO_ALT_ENTER); @@ -207,11 +239,11 @@ void d3d11_update_swapchain() { u32 window_height = client_rect.bottom-client_rect.top; hr = IDXGISwapChain1_ResizeBuffers(d3d11_swap_chain, d3d11_swap_chain_desc.BufferCount, window_width, window_height, d3d11_swap_chain_desc.Format, d3d11_swap_chain_desc.Flags); - win32_check_hr(hr); + d3d11_check_hr(hr); // update swap chain description hr = IDXGISwapChain1_GetDesc1(d3d11_swap_chain, &d3d11_swap_chain_desc); - win32_check_hr(hr); + d3d11_check_hr(hr); log("Resized swap chain from %dx%d to %dx%d", d3d11_swap_chain_width, d3d11_swap_chain_height, window_width, window_height); @@ -223,9 +255,9 @@ void d3d11_update_swapchain() { hr = IDXGISwapChain1_GetBuffer(d3d11_swap_chain, 0, &IID_ID3D11Texture2D, (void**)&d3d11_back_buffer); - win32_check_hr(hr); + d3d11_check_hr(hr); hr = ID3D11Device_CreateRenderTargetView(d3d11_device, (ID3D11Resource*)d3d11_back_buffer, 0, &d3d11_window_render_target_view); - win32_check_hr(hr); + d3d11_check_hr(hr); } bool @@ -266,10 +298,10 @@ d3d11_compile_shader(string source) { // Create the shaders hr = ID3D11Device_CreateVertexShader(d3d11_device, vs_buffer, vs_size, NULL, &d3d11_vertex_shader_for_2d); - win32_check_hr(hr); + d3d11_check_hr(hr); hr = ID3D11Device_CreatePixelShader(d3d11_device, ps_buffer, ps_size, NULL, &d3d11_fragment_shader_for_2d); - win32_check_hr(hr); + d3d11_check_hr(hr); log_verbose("Shaders created"); @@ -362,7 +394,7 @@ d3d11_compile_shader(string source) { hr = ID3D11Device_CreateInputLayout(d3d11_device, layout, layout_base_count+VERTEX_2D_USER_DATA_COUNT, vs_buffer, vs_size, &d3d11_image_vertex_layout); - win32_check_hr(hr); + d3d11_check_hr(hr); #undef layout_base_count @@ -427,7 +459,7 @@ void gfx_init() { } } - win32_check_hr(hr); + d3d11_check_hr(hr); if (debug_failed) { log_error("We could not init D3D11 with DEBUG flag. To fix this, you can try:\n1. Go to windows settings\n2. Go to System -> Optional features\n3. Add the feature called \"Graphics Tools\"\n4. Restart your computer\n5. Be frustrated that windows is like this.\nhttps://devblogs.microsoft.com/cppblog/visual-studio-2015-and-graphics-tools-for-windows-10/"); @@ -476,7 +508,7 @@ void gfx_init() { bd.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; bd.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; hr = ID3D11Device_CreateBlendState(d3d11_device, &bd, &d3d11_blend_state); - win32_check_hr(hr); + d3d11_check_hr(hr); ID3D11DeviceContext_OMSetBlendState(d3d11_context, d3d11_blend_state, NULL, 0xffffffff); } @@ -488,7 +520,7 @@ void gfx_init() { desc.DepthClipEnable = FALSE; desc.CullMode = D3D11_CULL_NONE; hr = ID3D11Device_CreateRasterizerState(d3d11_device, &desc, &d3d11_rasterizer); - win32_check_hr(hr); + d3d11_check_hr(hr); ID3D11DeviceContext_RSSetState(d3d11_context, d3d11_rasterizer); } @@ -502,19 +534,19 @@ void gfx_init() { sd.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; hr = ID3D11Device_CreateSamplerState(d3d11_device, &sd, &d3d11_image_sampler_np_fp); - win32_check_hr(hr); + d3d11_check_hr(hr); sd.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; hr =ID3D11Device_CreateSamplerState(d3d11_device, &sd, &d3d11_image_sampler_nl_fl); - win32_check_hr(hr); + d3d11_check_hr(hr); sd.Filter = D3D11_FILTER_MIN_LINEAR_MAG_MIP_POINT; hr = ID3D11Device_CreateSamplerState(d3d11_device, &sd, &d3d11_image_sampler_np_fl); - win32_check_hr(hr); + d3d11_check_hr(hr); sd.Filter = D3D11_FILTER_MIN_POINT_MAG_MIP_LINEAR; hr = ID3D11Device_CreateSamplerState(d3d11_device, &sd, &d3d11_image_sampler_nl_fp); - win32_check_hr(hr); + d3d11_check_hr(hr); } string source = STR(d3d11_image_shader_source); @@ -798,7 +830,7 @@ void d3d11_process_draw_frame() { D3D11_MAPPED_SUBRESOURCE buffer_mapping; tm_scope("The Map call") { hr = ID3D11DeviceContext_Map(d3d11_context, (ID3D11Resource*)d3d11_quad_vbo, 0, D3D11_MAP_WRITE_DISCARD, 0, &buffer_mapping); - win32_check_hr(hr); + d3d11_check_hr(hr); } tm_scope("The memcpy") { memcpy(buffer_mapping.pData, d3d11_staging_quad_buffer, number_of_rendered_quads*sizeof(D3D11_Vertex)*6); @@ -841,24 +873,7 @@ void gfx_update() { #if CONFIGURATION == DEBUG - /// - // Check debug messages, output to stdout - ID3D11InfoQueue* info_q = 0; - hr = ID3D11Device_QueryInterface(d3d11_device, &IID_ID3D11InfoQueue, (void**)&info_q); - if (SUCCEEDED(hr)) { - u64 msg_count = ID3D11InfoQueue_GetNumStoredMessagesAllowedByRetrievalFilter(info_q); - for (u64 i = 0; i < msg_count; i++) { - SIZE_T msg_size = 0; - ID3D11InfoQueue_GetMessage(info_q, i, 0, &msg_size); - - D3D11_MESSAGE* msg = (D3D11_MESSAGE*)talloc(msg_size); - if (msg) { - ID3D11InfoQueue_GetMessage(info_q, i, msg, &msg_size); // Get the actual message - - d3d11_debug_callback(msg->Category, msg->Severity, msg->ID, msg->pDescription); - } - } - } + d3d11_output_debug_messages(); #endif } @@ -899,10 +914,10 @@ void gfx_init_image(Gfx_Image *image, void *initial_data) { ID3D11Texture2D* texture = 0; HRESULT hr = ID3D11Device_CreateTexture2D(d3d11_device, &desc, &data_desc, &texture); - win32_check_hr(hr); + d3d11_check_hr(hr); hr = ID3D11Device_CreateShaderResourceView(d3d11_device, (ID3D11Resource*)texture, 0, &image->gfx_handle); - win32_check_hr(hr); + d3d11_check_hr(hr); if (!initial_data) { dealloc(image->allocator, data); @@ -972,7 +987,7 @@ shader_recompile_with_extension(string ext_source, u64 cbuffer_size) { desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; HRESULT hr = ID3D11Device_CreateBuffer(d3d11_device, &desc, null, &d3d11_cbuffer); - win32_check_hr(hr); + d3d11_check_hr(hr); d3d11_cbuffer_size = cbuffer_size; diff --git a/oogabooga/memory.c b/oogabooga/memory.c index afe3a92..05beea0 100644 --- a/oogabooga/memory.c +++ b/oogabooga/memory.c @@ -4,18 +4,6 @@ #define MB(x) ((KB(x))*1024ull) #define GB(x) ((MB(x))*1024ull) - -// #Global -ogb_instance void *program_memory; -ogb_instance u64 program_memory_size; -ogb_instance Mutex_Handle program_memory_mutex; - -#if !OOGABOOGA_LINK_EXTERNAL_INSTANCE -void *program_memory = 0; -u64 program_memory_size = 0; -Mutex_Handle program_memory_mutex = 0; -#endif // NOT OOGABOOGA_LINK_EXTERNAL_INSTANCE - #ifndef INIT_MEMORY_SIZE #define INIT_MEMORY_SIZE KB(50) #endif @@ -63,7 +51,7 @@ Allocator get_initialization_allocator() { // BUT: We aren't really supposed to allocate/deallocate directly on the heap too much anyways... #define MAX_HEAP_BLOCK_SIZE ((MB(500)+os.page_size)& ~(os.page_size-1)) -#define DEFAULT_HEAP_BLOCK_SIZE (min(MAX_HEAP_BLOCK_SIZE, program_memory_size)) +#define DEFAULT_HEAP_BLOCK_SIZE (min(MAX_HEAP_BLOCK_SIZE, program_memory_capacity)) #define HEAP_ALIGNMENT (sizeof(Heap_Free_Node)) typedef struct Heap_Free_Node Heap_Free_Node; typedef struct Heap_Block Heap_Block; @@ -115,7 +103,7 @@ u64 get_heap_block_size_including_metadata(Heap_Block *block) { } bool is_pointer_in_program_memory(void *p) { - return (u8*)p >= (u8*)program_memory && (u8*)p<((u8*)program_memory+program_memory_size); + return (u8*)p >= (u8*)program_memory && (u8*)p<((u8*)program_memory+program_memory_capacity); } bool is_pointer_in_stack(void* p) { void* stack_base = os_get_stack_base(); @@ -229,33 +217,20 @@ Heap_Block *make_heap_block(Heap_Block *parent, u64 size) { size += sizeof(Heap_Block); - size = (size) & ~(HEAP_ALIGNMENT-1); + size = (size+os.page_size) & ~(os.page_size-1); - Heap_Block *block; - if (parent) { - block = (Heap_Block*)(((u8*)parent)+get_heap_block_size_including_metadata(parent)); - parent->next = block; - } else { - block = (Heap_Block*)program_memory; - } + Heap_Block *block = (Heap_Block*)os_reserve_next_memory_pages(size); + + assert((u64)block % os.page_size == 0, "Heap block not aligned to page size"); + + if (parent) parent->next = block; + + os_unlock_program_memory_pages(block, size); + #if CONFIGURATION == DEBUG block->total_allocated = 0; #endif - - if (((u8*)block)+size >= ((u8*)program_memory)+program_memory_size) { - u64 minimum_size = ((u8*)block+size) - (u8*)program_memory + 1; - u64 new_program_size = get_next_power_of_two(minimum_size); - assert(new_program_size >= minimum_size, "Internal goof"); - const u64 ATTEMPTS = 1000; - for (u64 i = 0; i <= ATTEMPTS; i++) { - if (program_memory_size >= new_program_size) break; // Another thread might have resized already, causing it to fail here. - assert(i < ATTEMPTS, "OS is not letting us allocate more memory. Maybe we are out of memory? You sure must be using a lot of memory then."); - if (os_grow_program_memory(new_program_size)) - break; - } - } - block->start = ((u8*)block)+sizeof(Heap_Block); block->size = size; block->next = 0; @@ -547,14 +522,12 @@ get_temporary_allocator(); #if !OOGABOOGA_LINK_EXTERNAL_INSTANCE thread_local void * temporary_storage = 0; -thread_local bool temporary_storage_initted = false; thread_local void * temporary_storage_pointer = 0; thread_local bool has_warned_temporary_storage_overflow = false; thread_local Allocator temp_allocator; ogb_instance Allocator get_temporary_allocator() { - if (!temporary_storage_initted) return get_initialization_allocator(); return temp_allocator; } #endif @@ -563,7 +536,7 @@ ogb_instance void* temp_allocator_proc(u64 size, void *p, Allocator_Message message, void* data); ogb_instance void -temporary_storage_init(); +temporary_storage_init(u64 arena_size); ogb_instance void* talloc(u64 size); @@ -589,23 +562,19 @@ void* temp_allocator_proc(u64 size, void *p, Allocator_Message message, void* da return 0; } -void temporary_storage_init() { - if (temporary_storage_initted) return; +void temporary_storage_init(u64 arena_size) { - temporary_storage = heap_alloc(TEMPORARY_STORAGE_SIZE); + temporary_storage = heap_alloc(arena_size); assert(temporary_storage, "Failed allocating temporary storage"); temporary_storage_pointer = temporary_storage; temp_allocator.proc = temp_allocator_proc; temp_allocator.data = 0; - temporary_storage_initted = true; - temp_allocator.proc = temp_allocator_proc; } void* talloc(u64 size) { - if (!temporary_storage_initted) temporary_storage_init(); assert(size < TEMPORARY_STORAGE_SIZE, "Bruddah this is too large for temp allocator"); @@ -625,10 +594,7 @@ void* talloc(u64 size) { } void reset_temporary_storage() { - if (!temporary_storage_initted) temporary_storage_init(); - - temporary_storage_pointer = temporary_storage; - + temporary_storage_pointer = temporary_storage; has_warned_temporary_storage_overflow = true; } diff --git a/oogabooga/oogabooga.c b/oogabooga/oogabooga.c index 975a8c2..034ca35 100644 --- a/oogabooga/oogabooga.c +++ b/oogabooga/oogabooga.c @@ -385,7 +385,7 @@ void oogabooga_init(u64 program_memory_size) { Cpu_Capabilities features = query_cpu_capabilities(); os_init(program_memory_size); heap_init(); - temporary_storage_init(); + temporary_storage_init(TEMPORARY_STORAGE_SIZE); log_info("Ooga booga version is %d.%02d.%03d", OGB_VERSION_MAJOR, OGB_VERSION_MINOR, OGB_VERSION_PATCH); #ifndef OOGABOOGA_HEADLESS gfx_init(); diff --git a/oogabooga/os_impl_windows.c b/oogabooga/os_impl_windows.c index efc9eb3..8950f66 100644 --- a/oogabooga/os_impl_windows.c +++ b/oogabooga/os_impl_windows.c @@ -12,6 +12,59 @@ void* heap_alloc(u64); void heap_dealloc(void*); +u16 *win32_fixed_utf8_to_null_terminated_wide(string utf8, Allocator allocator) { + + if (utf8.count == 0) { + u16 *utf16_str = (u16 *)alloc(allocator, (1) * sizeof(u16)); + *utf16_str = 0; + return utf16_str; + } + + u64 utf16_length = MultiByteToWideChar(CP_UTF8, 0, (LPCCH)utf8.data, (int)utf8.count, 0, 0); + + u16 *utf16_str = (u16 *)alloc(allocator, (utf16_length + 1) * sizeof(u16)); + + int result = MultiByteToWideChar(CP_UTF8, 0, (LPCCH)utf8.data, (int)utf8.count, utf16_str, utf16_length); + if (result == 0) { + dealloc(allocator, utf16_str); + return 0; + } + + utf16_str[utf16_length] = 0; + + return utf16_str; +} +u16 *temp_win32_fixed_utf8_to_null_terminated_wide(string utf8) { + return win32_fixed_utf8_to_null_terminated_wide(utf8, get_temporary_allocator()); +} +string win32_null_terminated_wide_to_fixed_utf8(const u16 *utf16, Allocator allocator) { + u64 utf8_length = WideCharToMultiByte(CP_UTF8, 0, (LPCWCH)utf16, -1, 0, 0, 0, 0); + + if (utf8_length == 0) { + string utf8; + utf8.count = 0; + utf8.data = 0; + return utf8; + } + + u8 *utf8_str = (u8 *)alloc(allocator, utf8_length * sizeof(u8)); + + int result = WideCharToMultiByte(CP_UTF8, 0, (LPCWCH)utf16, -1, (LPSTR)utf8_str, (int)utf8_length, 0, 0); + if (result == 0) { + dealloc(allocator, utf8_str); + return (string){0, 0}; + } + + string utf8; + utf8.data = utf8_str; + utf8.count = utf8_length-1; + + return utf8; +} + +string temp_win32_null_terminated_wide_to_fixed_utf8(const u16 *utf16) { + return win32_null_terminated_wide_to_fixed_utf8(utf16, get_temporary_allocator()); +} #define win32_check_hr(hr) win32_check_hr_impl(hr, __LINE__, __FILE__); void win32_check_hr_impl(HRESULT hr, u32 line, const char* file_name) { if (hr != S_OK) { @@ -30,12 +83,21 @@ void win32_check_hr_impl(HRESULT hr, u32 line, const char* file_name) { 0, NULL ); + u16 *wide_err = 0; + if (messageLength > 0) { - MessageBoxW(NULL, (LPWSTR)errorMsg, L"Error", MB_OK | MB_ICONERROR); + wide_err = (LPWSTR)errorMsg; } else { - MessageBoxW(NULL, L"Failed to retrieve error message.", L"Error", MB_OK | MB_ICONERROR); + wide_err = (u16*)L"Failed to retrieve error message."; } + string utf8_err = temp_win32_null_terminated_wide_to_fixed_utf8(wide_err); + + string final_message_utf8 = tprint("%s\nIn file %cs on line %d", utf8_err, file_name, line); + + u16 *final_message_wide = temp_win32_fixed_utf8_to_null_terminated_wide(final_message_utf8); + + MessageBoxW(NULL, final_message_wide, L"Error", MB_OK | MB_ICONERROR); panic("win32 hr failed in file %cs on line %d, hr was %d", file_name, line, hr); } @@ -266,11 +328,19 @@ win32_audio_thread(Thread *t); void win32_audio_poll_default_device_thread(Thread *t); -bool win32_has_audio_thread_started = false; +volatile bool win32_has_audio_thread_started = false; #endif /* OOGABOOGA_HEADLESS */ -void os_init(u64 program_memory_size) { +void os_init(u64 program_memory_capacity) { + // #Volatile + // Any printing uses vsnprintf, and printing may happen in init, + // especially on errors, so this needs to happen first. + os.crt = os_load_dynamic_library(STR("msvcrt.dll")); + assert(os.crt != 0, "Could not load win32 crt library. Might be compiled with non-msvc? #Incomplete #Portability"); + os.crt_vsnprintf = (Crt_Vsnprintf_Proc)os_dynamic_library_load_symbol(os.crt, STR("vsnprintf")); + assert(os.crt_vsnprintf, "Missing vsnprintf in crt"); + #if CONFIGURATION == DEBUG HANDLE process = GetCurrentProcess(); SymInitialize(process, NULL, TRUE); @@ -284,7 +354,10 @@ void os_init(u64 program_memory_size) { #if CONFIGURATION == RELEASE + // #Configurable #Copypaste SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS); + SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL); + timeBeginPeriod(1); #endif SetProcessDpiAwarenessContext(DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2); @@ -314,14 +387,11 @@ void os_init(u64 program_memory_size) { program_memory_mutex = os_make_mutex(); - os_grow_program_memory(program_memory_size); + os_grow_program_memory(program_memory_capacity); heap_init(); - os.crt = os_load_dynamic_library(STR("msvcrt.dll")); - assert(os.crt != 0, "Could not load win32 crt library. Might be compiled with non-msvc? #Incomplete #Portability"); - os.crt_vsnprintf = (Crt_Vsnprintf_Proc)os_dynamic_library_load_symbol(os.crt, STR("vsnprintf")); - assert(os.crt_vsnprintf, "Missing vsnprintf in crt"); + #ifndef OOGABOOGA_HEADLESS win32_init_window(); @@ -380,71 +450,7 @@ void s64_to_null_terminated_string(s64 num, char* str, int base) s64_to_null_terminated_string_reverse(str, i); } -bool os_grow_program_memory(u64 new_size) { - os_lock_mutex(program_memory_mutex); // #Sync - if (program_memory_size >= new_size) { - os_unlock_mutex(program_memory_mutex); // #Sync - return true; - } - - - bool is_first_time = program_memory == 0; - - if (is_first_time) { - u64 aligned_size = (new_size+os.granularity) & ~(os.granularity); - void* aligned_base = (void*)(((u64)VIRTUAL_MEMORY_BASE+os.granularity) & ~(os.granularity-1)); - - program_memory = VirtualAlloc(aligned_base, aligned_size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); - if (program_memory == 0) { - os_unlock_mutex(program_memory_mutex); // #Sync - return false; - } - program_memory_size = aligned_size; - - memset(program_memory, 0xBA, program_memory_size); - } else { - // #Cleanup this mess - // Allocation size doesn't actually need to be aligned to granularity, page size is enough. - // Doesn't matter that much tho, but this is just a bit unfortunate to look at. - void* tail = (u8*)program_memory + program_memory_size; - u64 m = ((u64)program_memory_size % os.granularity); - assert(m == 0, "program_memory_size is not aligned to granularity!"); - m = ((u64)tail % os.granularity); - assert(m == 0, "Tail is not aligned to granularity!"); - u64 amount_to_allocate = new_size-program_memory_size; - amount_to_allocate = ((amount_to_allocate+os.granularity)&~(os.granularity-1)); - m = ((u64)amount_to_allocate % os.granularity); - assert(m == 0, "amount_to_allocate is not aligned to granularity!"); - // Just keep allocating at the tail of the current chunk - void* result = VirtualAlloc(tail, amount_to_allocate, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); - assert(result == tail); -#if CONFIGURATION == DEBUG - volatile u8 a = *(u8*)tail = 69; -#endif - memset(result, 0xBA, amount_to_allocate); - if (result == 0) { - os_unlock_mutex(program_memory_mutex); // #Sync - return false; - } - assert(tail == result, "It seems tail is not aligned properly. o nein"); - - program_memory_size += amount_to_allocate; - - m = ((u64)program_memory_size % os.granularity); - assert(m == 0, "program_memory_size is not aligned to granularity!"); - } - - - char size_str[32]; - s64_to_null_terminated_string(program_memory_size/1024, size_str, 10); - - os_write_string_to_stdout(STR("Program memory grew to ")); - os_write_string_to_stdout(STR(size_str)); - os_write_string_to_stdout(STR(" kb\n")); - os_unlock_mutex(program_memory_mutex); // #Sync - return true; -} /// @@ -458,15 +464,25 @@ bool os_grow_program_memory(u64 new_size) { DWORD WINAPI win32_thread_invoker(LPVOID param) { -#if CONFIGURATION == RELEASE - SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS); -#endif Thread *t = (Thread*)param; - temporary_storage_init(); + +#if CONFIGURATION == RELEASE + // #Configurable #Copypaste + SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS); + SetThreadPriority(t->os_handle, THREAD_PRIORITY_TIME_CRITICAL); + timeBeginPeriod(1); +#endif + + temporary_storage_init(t->temporary_storage_size); + context = t->initial_context; context.thread_id = GetCurrentThreadId(); + t->proc(t); + + heap_dealloc(temporary_storage); + return 0; } @@ -508,6 +524,7 @@ void os_thread_init(Thread *t, Thread_Proc proc) { t->id = 0; t->proc = proc; t->initial_context = context; + t->temporary_storage_size = KB(10); } void os_thread_destroy(Thread *t) { os_thread_join(t); @@ -649,59 +666,7 @@ void os_write_string_to_stdout(string s) { WriteFile(win32_stdout, s.data, s.count, 0, 0); } -u16 *win32_fixed_utf8_to_null_terminated_wide(string utf8, Allocator allocator) { - if (utf8.count == 0) { - u16 *utf16_str = (u16 *)alloc(allocator, (1) * sizeof(u16)); - *utf16_str = 0; - return utf16_str; - } - - u64 utf16_length = MultiByteToWideChar(CP_UTF8, 0, (LPCCH)utf8.data, (int)utf8.count, 0, 0); - - u16 *utf16_str = (u16 *)alloc(allocator, (utf16_length + 1) * sizeof(u16)); - - int result = MultiByteToWideChar(CP_UTF8, 0, (LPCCH)utf8.data, (int)utf8.count, utf16_str, utf16_length); - if (result == 0) { - dealloc(allocator, utf16_str); - return 0; - } - - utf16_str[utf16_length] = 0; - - return utf16_str; -} -u16 *temp_win32_fixed_utf8_to_null_terminated_wide(string utf8) { - return win32_fixed_utf8_to_null_terminated_wide(utf8, get_temporary_allocator()); -} -string win32_null_terminated_wide_to_fixed_utf8(const u16 *utf16, Allocator allocator) { - u64 utf8_length = WideCharToMultiByte(CP_UTF8, 0, (LPCWCH)utf16, -1, 0, 0, 0, 0); - - if (utf8_length == 0) { - string utf8; - utf8.count = 0; - utf8.data = 0; - return utf8; - } - - u8 *utf8_str = (u8 *)alloc(allocator, utf8_length * sizeof(u8)); - - int result = WideCharToMultiByte(CP_UTF8, 0, (LPCWCH)utf16, -1, (LPSTR)utf8_str, (int)utf8_length, 0, 0); - if (result == 0) { - dealloc(allocator, utf8_str); - return (string){0, 0}; - } - - string utf8; - utf8.data = utf8_str; - utf8.count = utf8_length-1; - - return utf8; -} - -string temp_win32_null_terminated_wide_to_fixed_utf8(const u16 *utf16) { - return win32_null_terminated_wide_to_fixed_utf8(utf16, get_temporary_allocator()); -} File os_file_open_s(string path, Os_Io_Open_Flags flags) { @@ -1186,6 +1151,116 @@ os_get_stack_trace(u64 *trace_count, Allocator allocator) { #endif // NOT DEBUG } +bool os_grow_program_memory(u64 new_size) { + os_lock_mutex(program_memory_mutex); // #Sync + if (program_memory_capacity >= new_size) { + os_unlock_mutex(program_memory_mutex); // #Sync + return true; + } + + + + bool is_first_time = program_memory == 0; + + if (is_first_time) { + // It's fine to allocate a region with size only aligned to page size, BUT, + // since we allocate each region with the base address at the tail of the + // previous region, then that tail needs to be aligned to granularity, which + // will be true if the size is also always aligned to granularity. + u64 aligned_size = (new_size+os.granularity) & ~(os.granularity); + void* aligned_base = (void*)(((u64)VIRTUAL_MEMORY_BASE+os.granularity) & ~(os.granularity-1)); + + program_memory = VirtualAlloc(aligned_base, aligned_size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + if (program_memory == 0) { + os_unlock_mutex(program_memory_mutex); // #Sync + return false; + } + program_memory_next = program_memory; + program_memory_capacity = aligned_size; +#if CONFIGURATION == DEBUG + memset(program_memory, 0xBA, program_memory_capacity); + DWORD _ = PAGE_READWRITE; + VirtualProtect(aligned_base, aligned_size, PAGE_NOACCESS, &_); +#endif + } else { + void* tail = (u8*)program_memory + program_memory_capacity; + + assert((u64)program_memory_capacity % os.granularity == 0, "program_memory_capacity is not aligned to granularity!"); + assert((u64)tail % os.granularity == 0, "Tail is not aligned to granularity!"); + + u64 amount_to_allocate = new_size-program_memory_capacity; + amount_to_allocate = ((amount_to_allocate+os.granularity)&~(os.granularity-1)); + + // Just keep allocating at the tail of the current chunk + void* result = VirtualAlloc(tail, amount_to_allocate, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); +#if CONFIGURATION == DEBUG + memset(result, 0xBA, amount_to_allocate); + DWORD _ = PAGE_READWRITE; + VirtualProtect(tail, amount_to_allocate, PAGE_NOACCESS, &_); +#endif + if (result == 0) { + os_unlock_mutex(program_memory_mutex); // #Sync + return false; + } + assert(tail == result, "It seems tail is not aligned properly. o nein"); + assert((u64)program_memory_capacity % os.granularity == 0, "program_memory_capacity is not aligned to granularity!"); + + program_memory_capacity += amount_to_allocate; + } + + + char size_str[32]; + s64_to_null_terminated_string(program_memory_capacity/1024, size_str, 10); + + os_write_string_to_stdout(STR("Program memory grew to ")); + os_write_string_to_stdout(STR(size_str)); + os_write_string_to_stdout(STR(" kb\n")); + os_unlock_mutex(program_memory_mutex); // #Sync + return true; +} + +void* +os_reserve_next_memory_pages(u64 size) { + assert(size % os.page_size == 0, "size was not aligned to page size in os_reserve_next_memory_pages"); + + void *p = program_memory_next; + + program_memory_next = (u8*)program_memory_next + size; + + void *program_tail = (u8*)program_memory + program_memory_capacity; + + if ((u64)program_memory_next > (u64)program_tail) { + u64 minimum_size = ((u64)program_memory_next) - (u64)program_memory + 1; + u64 new_program_size = get_next_power_of_two(minimum_size); + + const u64 ATTEMPTS = 1000; + for (u64 i = 0; i <= ATTEMPTS; i++) { + if (program_memory_capacity >= new_program_size) break; // Another thread might have resized already, causing it to fail here. + assert(i < ATTEMPTS, "OS is not letting us allocate more memory. Maybe we are out of memory? You sure must be using a lot of memory then."); + if (os_grow_program_memory(new_program_size)) + break; + } + } + + return p; +} + +void +os_unlock_program_memory_pages(void *start, u64 size) { +#if CONFIGURATION == DEBUG + assert((u64)start % os.page_size == 0, "When unlocking memory pages, the start address must be the start of a page"); + assert(size % os.page_size == 0, "When unlocking memory pages, the size must be aligned to page_size"); + // This memory may be across multiple allocated regions so we need to do this one page at a time. + // Probably super slow but this shouldn't happen often at all + it's only in debug. + // - Charlie M 28th July 2024 + for (u8 *p = (u8*)start; p < (u8*)start+size; p += os.page_size) { + DWORD old_protect = PAGE_NOACCESS; + BOOL ok = VirtualProtect(p, os.page_size, PAGE_READWRITE, &old_protect); + assert(ok, "VirtualProtect Failed with error %d", GetLastError()); + } +#endif +} + /// /// // Mouse pointer @@ -1460,7 +1535,6 @@ win32_audio_init() { void win32_audio_poll_default_device_thread(Thread *t) { while (!win32_has_audio_thread_started) { - MEMORY_BARRIER; os_yield_thread(); } @@ -1470,9 +1544,7 @@ win32_audio_poll_default_device_thread(Thread *t) { } mutex_acquire_or_wait(&audio_init_mutex); - MEMORY_BARRIER; mutex_release(&audio_init_mutex); - MEMORY_BARRIER; IMMDevice *now_default = 0; HRESULT hr = IMMDeviceEnumerator_GetDefaultAudioEndpoint(win32_device_enumerator, eRender, eConsole, &now_default); @@ -1507,11 +1579,8 @@ win32_audio_thread(Thread *t) { mutex_acquire_or_wait(&audio_init_mutex); win32_has_audio_thread_started = true; - MEMORY_BARRIER; win32_audio_init(); mutex_release(&audio_init_mutex); - - timeBeginPeriod(1); u32 buffer_frame_count; HRESULT hr = IAudioClient_GetBufferSize(win32_audio_client, &buffer_frame_count); diff --git a/oogabooga/os_interface.c b/oogabooga/os_interface.c index 41948c0..8ee3ce8 100644 --- a/oogabooga/os_interface.c +++ b/oogabooga/os_interface.c @@ -87,8 +87,7 @@ inline int vsnprintf(char* buffer, size_t n, const char* fmt, va_list args) { -bool ogb_instance -os_grow_program_memory(size_t new_size); + /// /// @@ -103,8 +102,11 @@ typedef struct Thread { u64 id; // This is valid after os_thread_start Context initial_context; void* data; + u64 temporary_storage_size; // Defaults to KB(10) Thread_Proc proc; Thread_Handle os_handle; + + Allocator allocator; // Deprecated !! #Cleanup } Thread; @@ -395,7 +397,8 @@ os_get_number_of_logical_processors(); ogb_instance string* os_get_stack_trace(u64 *trace_count, Allocator allocator); -void dump_stack_trace() { +inline void +dump_stack_trace() { u64 count; string *strings = os_get_stack_trace(&count, get_temporary_allocator()); @@ -405,6 +408,38 @@ void dump_stack_trace() { } } + +/// +/// +// Memory +/// + +// #Global +ogb_instance void *program_memory; +ogb_instance void *program_memory_next; +ogb_instance u64 program_memory_capacity; +ogb_instance Mutex_Handle program_memory_mutex; + +#if !OOGABOOGA_LINK_EXTERNAL_INSTANCE +void *program_memory = 0; +void *program_memory_next = 0; +u64 program_memory_capacity = 0; +Mutex_Handle program_memory_mutex = 0; +#endif // NOT OOGABOOGA_LINK_EXTERNAL_INSTANCE + +bool ogb_instance +os_grow_program_memory(size_t new_size); + +// BEWARE: +// - size must be aligned to os.page_size +// - Pages will not always belong to the same region (although they will be contigious in virtual adress space) +// - Pages will be locked (Win32 PAGE_NOACCESS) so you need to unlock with os_unlock_program_memory_pages() before use. +ogb_instance void* +os_reserve_next_memory_pages(u64 size); + +void ogb_instance +os_unlock_program_memory_pages(void *start, u64 size); + /// /// // Mouse pointer @@ -450,5 +485,4 @@ void ogb_instance os_init(u64 program_memory_size); void ogb_instance -os_update(); - +os_update(); \ No newline at end of file diff --git a/oogabooga/profiling.c b/oogabooga/profiling.c index 3a6c859..5694e55 100644 --- a/oogabooga/profiling.c +++ b/oogabooga/profiling.c @@ -40,17 +40,17 @@ void _profiler_report_time_cycles(string name, u64 count, u64 start) { } #if ENABLE_PROFILING #define tm_scope(name) \ - for (u64 start_time = os_get_current_cycle_count(), end_time = start_time, elapsed_time = 0; \ + for (u64 start_time = rdtsc(), end_time = start_time, elapsed_time = 0; \ elapsed_time == 0; \ - elapsed_time = (end_time = os_get_current_cycle_count()) - start_time, _profiler_report_time_cycles(STR(name), elapsed_time, start_time)) + elapsed_time = (end_time = rdtsc()) - start_time, _profiler_report_time_cycles(STR(name), elapsed_time, start_time)) #define tm_scope_var(name, var) \ - for (u64 start_time = os_get_current_cycle_count(), end_time = start_time, elapsed_time = 0; \ + for (u64 start_time = rdtsc(), end_time = start_time, elapsed_time = 0; \ elapsed_time == 0; \ - elapsed_time = (end_time = os_get_current_cycle_count()) - start_time, var=elapsed_time) + elapsed_time = (end_time = rdtsc()) - start_time, var=elapsed_time) #define tm_scope_accum(name, var) \ - for (u64 start_time = os_get_current_cycle_count(), end_time = start_time, elapsed_time = 0; \ + for (u64 start_time = rdtsc(), end_time = start_time, elapsed_time = 0; \ elapsed_time == 0; \ - elapsed_time = (end_time = os_get_current_cycle_count()) - start_time, var+=elapsed_time) + elapsed_time = (end_time = rdtsc()) - start_time, var+=elapsed_time) #else #define tm_scope(...) #define tm_scope_var(...) diff --git a/oogabooga/tests.c b/oogabooga/tests.c index 21657cd..e55c7ef 100644 --- a/oogabooga/tests.c +++ b/oogabooga/tests.c @@ -1208,7 +1208,7 @@ void test_mutex() { Allocator allocator = get_heap_allocator(); - const int num_threads = 100; + const int num_threads = 1000; Thread *threads = alloc(allocator, sizeof(Thread)*num_threads); for (u64 i = 0; i < num_threads; i++) {