diff --git a/oogabooga/linmath.c b/oogabooga/linmath.c index 449bd47..60e01ac 100644 --- a/oogabooga/linmath.c +++ b/oogabooga/linmath.c @@ -225,6 +225,131 @@ Vector2 v2_rotate_point_around_pivot(Vector2 point, Vector2 pivot, float32 rotat +typedef union Vector2i { + s32 data[2]; + struct {s32 x, y;}; +} Vector2i; + +inline Vector2i v2i(s32 x, s32 y) { return (Vector2i){x, y}; } +inline Vector2 v2i_to_v2(Vector2i a) { return v2((f32)a.x, (f32)a.y); }; +#define v2i_expand(v) (v).x, (v).y + +typedef union Vector3i { + s32 data[3]; + struct {s32 x, y, z;}; + struct {s32 r, g, b;}; + struct {Vector2i xy;}; + struct {s32 _x; Vector2i yz;}; +} Vector3i; + +inline Vector3i v3i(s32 x, s32 y, s32 z) { return (Vector3i){x, y, z}; } +inline Vector3 v3i_to_v3(Vector3i a) { return v3((f32)a.x, (f32)a.y, (f32)a.z); }; +#define v3i_expand(v) (v).x, (v).y, (v).z + +typedef union alignat(16) Vector4i { + s32 data[4]; + struct {s32 x, y, z, w;}; + struct {Vector2i xy; Vector2i zw;}; + struct {s32 x1, y1, x2, y2;}; + struct {s32 r, g, b, a;}; + struct {s32 left, bottom, right, top;}; + struct {Vector3i xyz;}; + struct {s32 _x; s32 yzw;}; +} Vector4i; + +inline Vector4i v4i(s32 x, s32 y, s32 z, s32 w) { return (Vector4i){x, y, z, w}; } +inline Vector4 v4i_to_v4(Vector4i a) { return v4((f32)a.x, (f32)a.y, (f32)a.z, (f32)a.w); }; +#define v4i_expand(v) (v).x, (v).y, (v).z, (v).w + + + +// Vector2i +inline Vector2i v2i_add(LMATH_ALIGN Vector2i a, LMATH_ALIGN Vector2i b) { + return v2i(a.x + b.x, a.y + b.y); +} +inline Vector2i v2i_sub(LMATH_ALIGN Vector2i a, LMATH_ALIGN Vector2i b) { + return v2i(a.x - b.x, a.y - b.y); +} +inline Vector2i v2i_mul(LMATH_ALIGN Vector2i a, LMATH_ALIGN Vector2i b) { + return v2i(a.x * b.x, a.y * b.y); +} +inline Vector2i v2i_muli(LMATH_ALIGN Vector2i a, s32 s) { + return v2i_mul(a, v2i(s, s)); +} +inline Vector2i v2i_div(Vector2i a, Vector2i b) { + return v2i(a.x / b.x, a.y / b.y); +} +inline Vector2i v2i_divi(Vector2i a, s32 s) { + return v2i_div(a, v2i(s, s)); +} + +// Vector3i +inline Vector3i v3i_add(LMATH_ALIGN Vector3i a, LMATH_ALIGN Vector3i b) { + LMATH_ALIGN Vector4i a128 = v4i(a.x, a.y, a.z, 0.0); + LMATH_ALIGN Vector4i b128 = v4i(b.x, b.y, b.z, 0.0); + simd_add_int32_128_aligned((s32*)&a128, (s32*)&b128, (s32*)&a128); + return a128.xyz; +} +inline Vector3i v3i_sub(LMATH_ALIGN Vector3i a, LMATH_ALIGN Vector3i b) { + LMATH_ALIGN Vector4i a128 = v4i(a.x, a.y, a.z, 0.0); + LMATH_ALIGN Vector4i b128 = v4i(b.x, b.y, b.z, 0.0); + simd_sub_int32_128_aligned((s32*)&a128, (s32*)&b128, (s32*)&a128); + return a128.xyz; +} +inline Vector3i v3i_mul(LMATH_ALIGN Vector3i a, LMATH_ALIGN Vector3i b) { + LMATH_ALIGN Vector4i a128 = v4i(a.x, a.y, a.z, 0.0); + LMATH_ALIGN Vector4i b128 = v4i(b.x, b.y, b.z, 0.0); + simd_mul_int32_128_aligned((s32*)&a128, (s32*)&b128, (s32*)&a128); + return a128.xyz; +} +inline Vector3i v3i_muli(LMATH_ALIGN Vector3i a, s32 s) { + return v3i_mul(a, v3i(s, s, s)); +} +inline Vector3i v3i_div(Vector3i a, Vector3i b) { + return v3i(a.x / b.x, a.y / b.y, a.z / b.z); +} +inline Vector3i v3i_divi(Vector3i a, s32 s) { + return v3i_div(a, v3i(s, s, s)); +} + +// Vector4i +inline Vector4i v4i_add(LMATH_ALIGN Vector4i a, LMATH_ALIGN Vector4i b) { + simd_add_int32_128_aligned((s32*)&a, (s32*)&b, (s32*)&a); + return a; +} +inline Vector4i v4i_sub(LMATH_ALIGN Vector4i a, LMATH_ALIGN Vector4i b) { + simd_sub_int32_128_aligned((s32*)&a, (s32*)&b, (s32*)&a); + return a; +} +inline Vector4i v4i_mul(LMATH_ALIGN Vector4i a, LMATH_ALIGN Vector4i b) { + simd_mul_int32_128_aligned((s32*)&a, (s32*)&b, (s32*)&a); + return a; +} +inline Vector4i v4i_muli(LMATH_ALIGN Vector4i a, s32 s) { + return v4i_mul(a, v4i(s, s, s, s)); +} + +inline Vector4i v4i_div(Vector4i a, Vector4i b) { + return v4i(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); +} +inline Vector4i v4i_divi(Vector4i a, s32 s) { + return v4i_div(a, v4i(s, s, s, s)); +} + +#define absi(x) ((x) > 0 ? (x) : -(x)) + +inline Vector2i v2i_abs(LMATH_ALIGN Vector2i a) { + return v2i(absi(a.x), absi(a.y)); +} + +inline Vector3i v3i_abs(LMATH_ALIGN Vector3i a) { + return v3i(absi(a.x), absi(a.y), absi(a.z)); +} + +inline Vector4i v4i_abs(LMATH_ALIGN Vector4i a) { + return v4i(absi(a.x), absi(a.y), absi(a.z), absi(a.w)); +} + diff --git a/oogabooga/tests.c b/oogabooga/tests.c index be108bc..21657cd 100644 --- a/oogabooga/tests.c +++ b/oogabooga/tests.c @@ -838,7 +838,8 @@ void test_simd() { end = rdtsc(); cycles = end-start; print("NO SIMD float32 mul took %llu cycles\n", cycles); -} +} + // Indirect testing of some simd stuff void test_linmath() { @@ -1029,6 +1030,81 @@ void test_linmath() { assert(floats_roughly_match(v3_dot_product, 38), "Failed: v3_dot"); assert(floats_roughly_match(v4_dot_product, 30), "Failed: v4_dot"); } + +void test_intmath() { + // Test vector creation and access + Vector2i v2i_test = v2i(1, 2); + assert(v2i_test.x == 1 && v2i_test.y == 2, "v2i creation incorrect"); + + Vector3i v3i_test = v3i(1, 2, 3); + assert(v3i_test.x == 1 && v3i_test.y == 2 && v3i_test.z == 3, "v3i creation incorrect"); + + Vector4i v4i_test = v4i(1, 2, 3, 4); + assert(v4i_test.x == 1 && v4i_test.y == 2 && v4i_test.z == 3 && v4i_test.w == 4, "v4i creation incorrect"); + + // Test vector2 operations + Vector2i v2i_a = v2i(3, 4); + Vector2i v2i_b = v2i(1, 2); + Vector2i v2i_result = v2i_add(v2i_a, v2i_b); + assert(v2i_result.x == 4 && v2i_result.y == 6, "v2i_add incorrect"); + + v2i_result = v2i_sub(v2i_a, v2i_b); + assert(v2i_result.x == 2 && v2i_result.y == 2, "v2i_sub incorrect"); + + v2i_result = v2i_mul(v2i_a, v2i_b); + assert(v2i_result.x == 3 && v2i_result.y == 8, "v2i_mul incorrect"); + + v2i_result = v2i_div(v2i_a, v2i_b); + assert(v2i_result.x == 3 && v2i_result.y == 2, "v2i_div incorrect"); + + v2i_result = v2i_muli(v2i_a, 2); + assert(v2i_result.x == 6 && v2i_result.y == 8, "v2i_muli incorrect"); + + v2i_result = v2i_divi(v2i_a, 2); + assert(v2i_result.x == 1 && v2i_result.y == 2, "v2i_divi incorrect"); + + // Test vector2 operations + Vector3i v3i_a = v3i(3, 4, 6); + Vector3i v3i_b = v3i(1, 2, 3); + Vector3i v3i_result = v3i_add(v3i_a, v3i_b); + assert(v3i_result.x == 4 && v3i_result.y == 6 && v3i_result.z == 9, "v3i_add incorrect."); + + v3i_result = v3i_sub(v3i_a, v3i_b); + assert(v3i_result.x == 2 && v3i_result.y == 2 && v3i_result.z == 3, "v3i_sub incorrect"); + + v3i_result = v3i_mul(v3i_a, v3i_b); + assert(v3i_result.x == 3 && v3i_result.y == 8 && v3i_result.z == 18, "v3i_mul incorrect"); + + v3i_result = v3i_div(v3i_a, v3i_b); + assert(v3i_result.x == 3 && v3i_result.y == 2 && v3i_result.z == 2, "v3i_div incorrect"); + + v3i_result = v3i_muli(v3i_a, 2); + assert(v3i_result.x == 6 && v3i_result.y == 8 && v3i_result.z == 12, "v3i_muli incorrect"); + + v3i_result = v3i_divi(v3i_a, 2); + assert(v3i_result.x == 1 && v3i_result.y == 2 && v3i_result.z == 3, "v3i_divi incorrect"); + + Vector4i v4i_a = v4i(3, 4, 6, 8); + Vector4i v4i_b = v4i(1, 2, 3, 4); + Vector4i v4i_result = v4i_add(v4i_a, v4i_b); + assert(v4i_result.x == 4 && v4i_result.y == 6 && v4i_result.z == 9 && v4i_result.w == 12, "v4i_add incorrect."); + + v4i_result = v4i_sub(v4i_a, v4i_b); + assert(v4i_result.x == 2 && v4i_result.y == 2 && v4i_result.z == 3 && v4i_result.w == 4, "v4i_sub incorrect"); + + v4i_result = v4i_mul(v4i_a, v4i_b); + assert(v4i_result.x == 3 && v4i_result.y == 8 && v4i_result.z == 18 && v4i_result.w == 32, "v4i_mul incorrect"); + + v4i_result = v4i_div(v4i_a, v4i_b); + assert(v4i_result.x == 3 && v4i_result.y == 2 && v4i_result.z == 2 && v4i_result.w == 2, "v4i_div incorrect"); + + v4i_result = v4i_muli(v4i_a, 2); + assert(v4i_result.x == 6 && v4i_result.y == 8 && v4i_result.z == 12 && v4i_result.w == 16, "v4i_muli incorrect"); + + v4i_result = v4i_divi(v4i_a, 2); + assert(v4i_result.x == 1 && v4i_result.y == 2 && v4i_result.z == 3 && v4i_result.w == 4, "v4i_divi incorrect"); +} + void test_hash_table() { Hash_Table table = make_hash_table(string, int, get_heap_allocator()); @@ -1314,6 +1390,10 @@ void oogabooga_run_tests() { print("Testing linmath... "); test_linmath(); print("OK!\n"); + + print("Testing intmath... "); + test_intmath(); + print("OK!\n"); print("Testing simd... "); test_simd(); @@ -1340,4 +1420,4 @@ void oogabooga_run_tests() { print("All tests ok!\n"); -} \ No newline at end of file +}