From 71708cfc776b28c8eb3fbcc8ea9722ff8ea502fc Mon Sep 17 00:00:00 2001 From: Charlie Malmqvist Date: Sun, 28 Jul 2024 17:17:58 +0200 Subject: [PATCH] More unused memory locking --- .gitignore | 4 +- build.c | 2 +- changelog.txt | 3 +- oogabooga/base.c | 5 ++- oogabooga/linmath.c | 7 ++-- oogabooga/memory.c | 74 ++++++++++++++++++++++++++++++++++--- oogabooga/os_impl_windows.c | 23 ++++++++++-- oogabooga/os_interface.c | 2 + 8 files changed, 102 insertions(+), 18 deletions(-) diff --git a/.gitignore b/.gitignore index 952a2e6..7902c67 100644 --- a/.gitignore +++ b/.gitignore @@ -56,4 +56,6 @@ test_doc.vkn google_trace.json -build/* \ No newline at end of file +build/* + +*google_trace* \ No newline at end of file diff --git a/build.c b/build.c index aa12dd3..9e16373 100644 --- a/build.c +++ b/build.c @@ -3,7 +3,7 @@ /// // Build config stuff -#define INITIAL_PROGRAM_MEMORY_SIZE MB(8) +#define INITIAL_PROGRAM_MEMORY_SIZE MB(5) // You might want to increase this if you get a log warning saying the temporary storage was overflown. // In many cases, overflowing the temporary storage should be fine since it just wraps back around and diff --git a/changelog.txt b/changelog.txt index 4bffd38..ddff4da 100644 --- a/changelog.txt +++ b/changelog.txt @@ -12,7 +12,8 @@ - Memory - Made program_memory act more like an arena (see os_reserve_next_memory_pages() & os_unlock_program_memory_pages()) - In debug, default program memory to PAGE_NOACCESS which needs to be unlocked with os_unlock_program_memory_pages() (better crashes if we touch that memory) - - + - os_lock_program_memory_pages + - Heap locks pages when completely free - Misc - Deprecate Rangef stuff diff --git a/oogabooga/base.c b/oogabooga/base.c index c03e837..563054a 100644 --- a/oogabooga/base.c +++ b/oogabooga/base.c @@ -188,4 +188,7 @@ get_next_power_of_two(u64 x) { x |= x >> 32; return x + 1; -} \ No newline at end of file +} + +#define align_next(x, a) ((u64)((x)+(a)-1ULL) & (u64)~((a)-1ULL)) +#define align_previous(x, a) ((u64)(x) & (u64)~((a) - 1ULL)) \ No newline at end of file diff --git a/oogabooga/linmath.c b/oogabooga/linmath.c index 60e01ac..5727c3b 100644 --- a/oogabooga/linmath.c +++ b/oogabooga/linmath.c @@ -350,10 +350,7 @@ inline Vector4i v4i_abs(LMATH_ALIGN Vector4i a) { return v4i(absi(a.x), absi(a.y), absi(a.z), absi(a.w)); } - - - -typedef struct Matrix4 { +typedef struct alignat(16) Matrix4 { union {float32 m[4][4]; float32 data[16]; }; } Matrix4; @@ -417,6 +414,8 @@ Matrix4 m4_mul(LMATH_ALIGN Matrix4 a, LMATH_ALIGN Matrix4 b) { Matrix4 result; for (int i = 0; i < 4; ++i) { for (int j = 0; j < 4; ++j) { + + result.m[i][j] = a.m[i][0] * b.m[0][j] + a.m[i][1] * b.m[1][j] + a.m[i][2] * b.m[2][j] + diff --git a/oogabooga/memory.c b/oogabooga/memory.c index 05beea0..d712b0d 100644 --- a/oogabooga/memory.c +++ b/oogabooga/memory.c @@ -50,7 +50,7 @@ Allocator get_initialization_allocator() { // We could fix it by merging free nodes every now and then // BUT: We aren't really supposed to allocate/deallocate directly on the heap too much anyways... -#define MAX_HEAP_BLOCK_SIZE ((MB(500)+os.page_size)& ~(os.page_size-1)) +#define MAX_HEAP_BLOCK_SIZE align_next(MB(500), os.page_size) #define DEFAULT_HEAP_BLOCK_SIZE (min(MAX_HEAP_BLOCK_SIZE, program_memory_capacity)) #define HEAP_ALIGNMENT (sizeof(Heap_Free_Node)) typedef struct Heap_Free_Node Heap_Free_Node; @@ -217,14 +217,13 @@ Heap_Block *make_heap_block(Heap_Block *parent, u64 size) { size += sizeof(Heap_Block); - size = (size+os.page_size) & ~(os.page_size-1); + size = align_next(size, os.page_size); Heap_Block *block = (Heap_Block*)os_reserve_next_memory_pages(size); assert((u64)block % os.page_size == 0, "Heap block not aligned to page size"); if (parent) parent->next = block; - os_unlock_program_memory_pages(block, size); #if CONFIGURATION == DEBUG @@ -250,8 +249,6 @@ void heap_init() { spinlock_init(&heap_lock); } - - void *heap_alloc(u64 size) { if (!heap_initted) heap_init(); @@ -332,14 +329,34 @@ void *heap_alloc(u64 size) { assert(best_fit != 0, "Internal heap error"); + // Unlock best fit + + // #Copypaste + void *free_tail = (u8*)best_fit + best_fit->size; + void *first_page = (void*)align_previous(best_fit, os.page_size); + void *last_page_end = (void*)align_previous(free_tail, os.page_size); + if ((u8*)last_page_end > (u8*)first_page) { + os_unlock_program_memory_pages(first_page, (u64)last_page_end-(u64)first_page); + } + Heap_Free_Node *new_free_node = 0; if (size != best_fit->size) { u64 remainder = best_fit->size - size; new_free_node = (Heap_Free_Node*)(((u8*)best_fit)+size); new_free_node->size = remainder; new_free_node->next = best_fit->next; + + // Lock remaining free node + // #Copypaste + void *free_tail = (u8*)new_free_node + new_free_node->size; + void *next_page = (void*)align_next(new_free_node, os.page_size); + void *last_page_end = (void*)align_previous(free_tail, os.page_size); + if ((u8*)last_page_end > (u8*)next_page) { + os_lock_program_memory_pages(next_page, (u64)last_page_end-(u64)next_page); + } } + if (previous && new_free_node) { assert(previous->next == best_fit, "Internal heap error"); previous->next = new_free_node; @@ -407,6 +424,14 @@ void heap_dealloc(void *p) { new_node->size = size; if (new_node < block->free_head) { + // #Copypaste + void *free_tail = (u8*)new_node + new_node->size; + void *next_page = (void*)align_next(new_node, os.page_size); + void *last_page_end = (void*)align_previous(free_tail, os.page_size); + if ((u8*)last_page_end > (u8*)next_page) { + os_lock_program_memory_pages(next_page, (u64)last_page_end-(u64)next_page); + } + if ((u8*)new_node+size == (u8*)block->free_head) { new_node->size = size + block->free_head->size; new_node->next = block->free_head->next; @@ -415,11 +440,21 @@ void heap_dealloc(void *p) { new_node->next = block->free_head; block->free_head = new_node; } + } else { if (!block->free_head) { block->free_head = new_node; new_node->next = 0; + + // #Copypaste + void *free_tail = (u8*)new_node + new_node->size; + void *next_page = (void*)align_next(new_node, os.page_size); + void *last_page_end = (void*)align_previous(free_tail, os.page_size); + if ((u8*)last_page_end > (u8*)next_page) { + os_lock_program_memory_pages(next_page, (u64)last_page_end-(u64)next_page); + } + } else { Heap_Free_Node *node = block->free_head; @@ -437,7 +472,25 @@ void heap_dealloc(void *p) { if (new_node >= node) { u8* node_tail = (u8*)node + node->size; if (cast(u8*)new_node == node_tail) { - node->size += new_node->size; + + void *left_node_tail = (u8*)node+node->size; + + // We need to account for the cases where we coalesce free blocks with start/end in the middle + // of a page. + + // new_node->size will be locked but we need node->size += new_node->size; + u64 new_node_size = new_node->size; + + // #Copypaste + void *free_tail = (u8*)new_node + new_node->size; + void *next_page = (void*)align_previous(left_node_tail, os.page_size); + void *last_page_end = (void*)align_previous(free_tail, os.page_size); + if ((u8*)last_page_end > (u8*)next_page) { + os_lock_program_memory_pages(next_page, (u64)last_page_end-(u64)next_page); + } + + node->size += new_node_size; + break; } else { new_node->next = node->next; @@ -448,6 +501,15 @@ void heap_dealloc(void *p) { new_node->size += new_node->next->size; new_node->next = new_node->next->next; } + + // #Copypaste + void *free_tail = (u8*)new_node + new_node->size; + void *next_page = (void*)align_next(new_node, os.page_size); + void *last_page_end = (void*)align_previous(free_tail, os.page_size); + if ((u8*)last_page_end > (u8*)next_page) { + os_lock_program_memory_pages(next_page, (u64)last_page_end-(u64)next_page); + } + break; } } diff --git a/oogabooga/os_impl_windows.c b/oogabooga/os_impl_windows.c index 8950f66..cce8b78 100644 --- a/oogabooga/os_impl_windows.c +++ b/oogabooga/os_impl_windows.c @@ -1167,8 +1167,8 @@ bool os_grow_program_memory(u64 new_size) { // since we allocate each region with the base address at the tail of the // previous region, then that tail needs to be aligned to granularity, which // will be true if the size is also always aligned to granularity. - u64 aligned_size = (new_size+os.granularity) & ~(os.granularity); - void* aligned_base = (void*)(((u64)VIRTUAL_MEMORY_BASE+os.granularity) & ~(os.granularity-1)); + u64 aligned_size = align_next(new_size, os.granularity); + void *aligned_base = (void*)align_next(VIRTUAL_MEMORY_BASE, os.granularity); program_memory = VirtualAlloc(aligned_base, aligned_size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); if (program_memory == 0) { @@ -1188,8 +1188,7 @@ bool os_grow_program_memory(u64 new_size) { assert((u64)program_memory_capacity % os.granularity == 0, "program_memory_capacity is not aligned to granularity!"); assert((u64)tail % os.granularity == 0, "Tail is not aligned to granularity!"); - u64 amount_to_allocate = new_size-program_memory_capacity; - amount_to_allocate = ((amount_to_allocate+os.granularity)&~(os.granularity-1)); + u64 amount_to_allocate = align_next(new_size-program_memory_capacity, os.granularity); // Just keep allocating at the tail of the current chunk void* result = VirtualAlloc(tail, amount_to_allocate, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); @@ -1261,6 +1260,22 @@ os_unlock_program_memory_pages(void *start, u64 size) { #endif } +void +os_lock_program_memory_pages(void *start, u64 size) { +#if CONFIGURATION == DEBUG + assert((u64)start % os.page_size == 0, "When unlocking memory pages, the start address must be the start of a page"); + assert(size % os.page_size == 0, "When unlocking memory pages, the size must be aligned to page_size"); + // This memory may be across multiple allocated regions so we need to do this one page at a time. + // Probably super slow but this shouldn't happen often at all + it's only in debug. + // - Charlie M 28th July 2024 + for (u8 *p = (u8*)start; p < (u8*)start+size; p += os.page_size) { + DWORD old_protect = PAGE_READWRITE; + BOOL ok = VirtualProtect(p, os.page_size, PAGE_NOACCESS, &old_protect); + assert(ok, "VirtualProtect Failed with error %d", GetLastError()); + } +#endif +} + /// /// // Mouse pointer diff --git a/oogabooga/os_interface.c b/oogabooga/os_interface.c index 8ee3ce8..9d5817c 100644 --- a/oogabooga/os_interface.c +++ b/oogabooga/os_interface.c @@ -439,6 +439,8 @@ os_reserve_next_memory_pages(u64 size); void ogb_instance os_unlock_program_memory_pages(void *start, u64 size); +void ogb_instance +os_lock_program_memory_pages(void *start, u64 size); /// ///