mirror of
https://github.com/thunderbrewhq/thunderbrew
synced 2025-12-12 03:02:30 +00:00
feat(gx): add directxmath for MinGW
This commit is contained in:
parent
0d09dee4b3
commit
3e77eb935a
51 changed files with 49251 additions and 12 deletions
415
vendor/directxmath-3.19.0/Extensions/DirectXMathFMA4.h
vendored
Normal file
415
vendor/directxmath-3.19.0/Extensions/DirectXMathFMA4.h
vendored
Normal file
|
|
@ -0,0 +1,415 @@
|
|||
//-------------------------------------------------------------------------------------
|
||||
// DirectXMathFMA4.h -- FMA4 extensions for SIMD C++ Math library
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
//
|
||||
// http://go.microsoft.com/fwlink/?LinkID=615560
|
||||
//-------------------------------------------------------------------------------------
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
|
||||
#error FMA4 not supported on ARM platform
|
||||
#endif
|
||||
|
||||
#include <DirectXMath.h>
|
||||
#include <ammintrin.h>
|
||||
|
||||
#ifdef __GNUC__
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
namespace DirectX
|
||||
{
|
||||
|
||||
namespace FMA4
|
||||
{
|
||||
|
||||
inline bool XMVerifyFMA4Support()
|
||||
{
|
||||
// Should return true for AMD Bulldozer processors
|
||||
// with OS support for AVX (Windows 7 Service Pack 1, Windows Server 2008 R2 Service Pack 1, Windows 8, Windows Server 2012)
|
||||
|
||||
// See http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
|
||||
int CPUInfo[4] = {-1};
|
||||
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
|
||||
__cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
|
||||
#else
|
||||
__cpuid(CPUInfo, 0);
|
||||
#endif
|
||||
|
||||
if ( CPUInfo[0] < 1 )
|
||||
return false;
|
||||
|
||||
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
|
||||
__cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
|
||||
#else
|
||||
__cpuid(CPUInfo, 1);
|
||||
#endif
|
||||
|
||||
// We check for AVX, OSXSAVE (required to access FMA4)
|
||||
if ( (CPUInfo[2] & 0x18000000) != 0x18000000 )
|
||||
return false;
|
||||
|
||||
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
|
||||
__cpuid(0x80000000, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
|
||||
#else
|
||||
__cpuid(CPUInfo, 0x80000000);
|
||||
#endif
|
||||
|
||||
if ( uint32_t(CPUInfo[0]) < 0x80000001u )
|
||||
return false;
|
||||
|
||||
// We check for FMA4
|
||||
#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid)
|
||||
__cpuid(0x80000001, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
|
||||
#else
|
||||
__cpuid(CPUInfo, 0x80000001);
|
||||
#endif
|
||||
|
||||
return ( CPUInfo[2] & 0x10000 );
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
// Vector
|
||||
//-------------------------------------------------------------------------------------
|
||||
|
||||
inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd
|
||||
(
|
||||
FXMVECTOR V1,
|
||||
FXMVECTOR V2,
|
||||
FXMVECTOR V3
|
||||
)
|
||||
{
|
||||
return _mm_macc_ps( V1, V2, V3 );
|
||||
}
|
||||
|
||||
inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract
|
||||
(
|
||||
FXMVECTOR V1,
|
||||
FXMVECTOR V2,
|
||||
FXMVECTOR V3
|
||||
)
|
||||
{
|
||||
return _mm_nmacc_ps( V1, V2, V3 );
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
// Vector2
|
||||
//-------------------------------------------------------------------------------------
|
||||
|
||||
inline XMVECTOR XM_CALLCONV XMVector2Transform
|
||||
(
|
||||
FXMVECTOR V,
|
||||
CXMMATRIX M
|
||||
)
|
||||
{
|
||||
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
|
||||
vResult = _mm_macc_ps( vResult, M.r[1], M.r[3] );
|
||||
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
|
||||
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
|
||||
return vResult;
|
||||
}
|
||||
|
||||
inline XMVECTOR XM_CALLCONV XMVector2TransformCoord
|
||||
(
|
||||
FXMVECTOR V,
|
||||
CXMMATRIX M
|
||||
)
|
||||
{
|
||||
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
|
||||
vResult = _mm_macc_ps( vResult, M.r[1], M.r[3] );
|
||||
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
|
||||
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
|
||||
XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
|
||||
vResult = _mm_div_ps( vResult, W );
|
||||
return vResult;
|
||||
}
|
||||
|
||||
inline XMVECTOR XM_CALLCONV XMVector2TransformNormal
|
||||
(
|
||||
FXMVECTOR V,
|
||||
CXMMATRIX M
|
||||
)
|
||||
{
|
||||
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
|
||||
vResult = _mm_mul_ps( vResult, M.r[1] );
|
||||
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
|
||||
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
|
||||
return vResult;
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
// Vector3
|
||||
//-------------------------------------------------------------------------------------
|
||||
|
||||
inline XMVECTOR XM_CALLCONV XMVector3Transform
|
||||
(
|
||||
FXMVECTOR V,
|
||||
CXMMATRIX M
|
||||
)
|
||||
{
|
||||
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
|
||||
vResult = _mm_macc_ps( vResult, M.r[2], M.r[3] );
|
||||
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
|
||||
vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
|
||||
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
|
||||
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
|
||||
return vResult;
|
||||
}
|
||||
|
||||
inline XMVECTOR XM_CALLCONV XMVector3TransformCoord
|
||||
(
|
||||
FXMVECTOR V,
|
||||
CXMMATRIX M
|
||||
)
|
||||
{
|
||||
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
|
||||
vResult = _mm_macc_ps( vResult, M.r[2], M.r[3] );
|
||||
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
|
||||
vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
|
||||
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
|
||||
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
|
||||
XMVECTOR W = _mm_permute_ps(vResult,_MM_SHUFFLE(3,3,3,3));
|
||||
vResult = _mm_div_ps( vResult, W );
|
||||
return vResult;
|
||||
}
|
||||
|
||||
inline XMVECTOR XM_CALLCONV XMVector3TransformNormal
|
||||
(
|
||||
FXMVECTOR V,
|
||||
CXMMATRIX M
|
||||
)
|
||||
{
|
||||
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
|
||||
vResult = _mm_mul_ps( vResult, M.r[2] );
|
||||
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
|
||||
vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
|
||||
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
|
||||
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
|
||||
return vResult;
|
||||
}
|
||||
|
||||
XMMATRIX XM_CALLCONV XMMatrixMultiply(CXMMATRIX M1, CXMMATRIX M2);
|
||||
|
||||
inline XMVECTOR XM_CALLCONV XMVector3Project
|
||||
(
|
||||
FXMVECTOR V,
|
||||
float ViewportX,
|
||||
float ViewportY,
|
||||
float ViewportWidth,
|
||||
float ViewportHeight,
|
||||
float ViewportMinZ,
|
||||
float ViewportMaxZ,
|
||||
CXMMATRIX Projection,
|
||||
CXMMATRIX View,
|
||||
CXMMATRIX World
|
||||
)
|
||||
{
|
||||
const float HalfViewportWidth = ViewportWidth * 0.5f;
|
||||
const float HalfViewportHeight = ViewportHeight * 0.5f;
|
||||
|
||||
XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, ViewportMaxZ - ViewportMinZ, 0.0f);
|
||||
XMVECTOR Offset = XMVectorSet(ViewportX + HalfViewportWidth, ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f);
|
||||
|
||||
XMMATRIX Transform = FMA4::XMMatrixMultiply(World, View);
|
||||
Transform = FMA4::XMMatrixMultiply(Transform, Projection);
|
||||
|
||||
XMVECTOR Result = FMA4::XMVector3TransformCoord(V, Transform);
|
||||
|
||||
Result = FMA4::XMVectorMultiplyAdd(Result, Scale, Offset);
|
||||
|
||||
return Result;
|
||||
}
|
||||
|
||||
inline XMVECTOR XM_CALLCONV XMVector3Unproject
|
||||
(
|
||||
FXMVECTOR V,
|
||||
float ViewportX,
|
||||
float ViewportY,
|
||||
float ViewportWidth,
|
||||
float ViewportHeight,
|
||||
float ViewportMinZ,
|
||||
float ViewportMaxZ,
|
||||
CXMMATRIX Projection,
|
||||
CXMMATRIX View,
|
||||
CXMMATRIX World
|
||||
)
|
||||
{
|
||||
static const XMVECTORF32 D = { { { -1.0f, 1.0f, 0.0f, 0.0f } } };
|
||||
|
||||
XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, ViewportMaxZ - ViewportMinZ, 1.0f);
|
||||
Scale = XMVectorReciprocal(Scale);
|
||||
|
||||
XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f);
|
||||
Offset = FMA4::XMVectorMultiplyAdd(Scale, Offset, D.v);
|
||||
|
||||
XMMATRIX Transform = FMA4::XMMatrixMultiply(World, View);
|
||||
Transform = FMA4::XMMatrixMultiply(Transform, Projection);
|
||||
Transform = XMMatrixInverse(nullptr, Transform);
|
||||
|
||||
XMVECTOR Result = FMA4::XMVectorMultiplyAdd(V, Scale, Offset);
|
||||
|
||||
return FMA4::XMVector3TransformCoord(Result, Transform);
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
// Vector4
|
||||
//-------------------------------------------------------------------------------------
|
||||
|
||||
inline XMVECTOR XM_CALLCONV XMVector4Transform
|
||||
(
|
||||
FXMVECTOR V,
|
||||
CXMMATRIX M
|
||||
)
|
||||
{
|
||||
XMVECTOR vResult = _mm_permute_ps(V,_MM_SHUFFLE(3,3,3,3)); // W
|
||||
vResult = _mm_mul_ps( vResult, M.r[3] );
|
||||
XMVECTOR vTemp = _mm_permute_ps(V,_MM_SHUFFLE(2,2,2,2)); // Z
|
||||
vResult = _mm_macc_ps( vTemp, M.r[2], vResult );
|
||||
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(1,1,1,1)); // Y
|
||||
vResult = _mm_macc_ps( vTemp, M.r[1], vResult );
|
||||
vTemp = _mm_permute_ps(V,_MM_SHUFFLE(0,0,0,0)); // X
|
||||
vResult = _mm_macc_ps( vTemp, M.r[0], vResult );
|
||||
return vResult;
|
||||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
// Matrix
|
||||
//-------------------------------------------------------------------------------------
|
||||
|
||||
inline XMMATRIX XM_CALLCONV XMMatrixMultiply
|
||||
(
|
||||
CXMMATRIX M1,
|
||||
CXMMATRIX M2
|
||||
)
|
||||
{
|
||||
XMMATRIX mResult;
|
||||
// Use vW to hold the original row
|
||||
XMVECTOR vW = M1.r[0];
|
||||
// Splat the component X,Y,Z then W
|
||||
XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
|
||||
XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
|
||||
XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
|
||||
// Perform the operation on the first row
|
||||
vX = _mm_mul_ps(vX,M2.r[0]);
|
||||
vX = _mm_macc_ps(vY,M2.r[1],vX);
|
||||
vX = _mm_macc_ps(vZ,M2.r[2],vX);
|
||||
vX = _mm_macc_ps(vW,M2.r[3],vX);
|
||||
mResult.r[0] = vX;
|
||||
// Repeat for the other 3 rows
|
||||
vW = M1.r[1];
|
||||
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = _mm_mul_ps(vX,M2.r[0]);
|
||||
vX = _mm_macc_ps(vY,M2.r[1],vX);
|
||||
vX = _mm_macc_ps(vZ,M2.r[2],vX);
|
||||
vX = _mm_macc_ps(vW,M2.r[3],vX);
|
||||
mResult.r[1] = vX;
|
||||
vW = M1.r[2];
|
||||
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = _mm_mul_ps(vX,M2.r[0]);
|
||||
vX = _mm_macc_ps(vY,M2.r[1],vX);
|
||||
vX = _mm_macc_ps(vZ,M2.r[2],vX);
|
||||
vX = _mm_macc_ps(vW,M2.r[3],vX);
|
||||
mResult.r[2] = vX;
|
||||
vW = M1.r[3];
|
||||
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = _mm_mul_ps(vX,M2.r[0]);
|
||||
vX = _mm_macc_ps(vY,M2.r[1],vX);
|
||||
vX = _mm_macc_ps(vZ,M2.r[2],vX);
|
||||
vX = _mm_macc_ps(vW,M2.r[3],vX);
|
||||
mResult.r[3] = vX;
|
||||
return mResult;
|
||||
}
|
||||
|
||||
inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
|
||||
(
|
||||
FXMMATRIX M1,
|
||||
CXMMATRIX M2
|
||||
)
|
||||
{
|
||||
// Use vW to hold the original row
|
||||
XMVECTOR vW = M1.r[0];
|
||||
// Splat the component X,Y,Z then W
|
||||
XMVECTOR vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
|
||||
XMVECTOR vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
|
||||
XMVECTOR vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
|
||||
// Perform the operation on the first row
|
||||
vX = _mm_mul_ps(vX,M2.r[0]);
|
||||
vX = _mm_macc_ps(vY,M2.r[1],vX);
|
||||
vX = _mm_macc_ps(vZ,M2.r[2],vX);
|
||||
vX = _mm_macc_ps(vW,M2.r[3],vX);
|
||||
__m128 r0 = vX;
|
||||
// Repeat for the other 3 rows
|
||||
vW = M1.r[1];
|
||||
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = _mm_mul_ps(vX,M2.r[0]);
|
||||
vX = _mm_macc_ps(vY,M2.r[1],vX);
|
||||
vX = _mm_macc_ps(vZ,M2.r[2],vX);
|
||||
vX = _mm_macc_ps(vW,M2.r[3],vX);
|
||||
__m128 r1 = vX;
|
||||
vW = M1.r[2];
|
||||
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = _mm_mul_ps(vX,M2.r[0]);
|
||||
vX = _mm_macc_ps(vY,M2.r[1],vX);
|
||||
vX = _mm_macc_ps(vZ,M2.r[2],vX);
|
||||
vX = _mm_macc_ps(vW,M2.r[3],vX);
|
||||
__m128 r2 = vX;
|
||||
vW = M1.r[3];
|
||||
vX = _mm_permute_ps(vW,_MM_SHUFFLE(0,0,0,0));
|
||||
vY = _mm_permute_ps(vW,_MM_SHUFFLE(1,1,1,1));
|
||||
vZ = _mm_permute_ps(vW,_MM_SHUFFLE(2,2,2,2));
|
||||
vW = _mm_permute_ps(vW,_MM_SHUFFLE(3,3,3,3));
|
||||
vX = _mm_mul_ps(vX,M2.r[0]);
|
||||
vX = _mm_macc_ps(vY,M2.r[1],vX);
|
||||
vX = _mm_macc_ps(vZ,M2.r[2],vX);
|
||||
vX = _mm_macc_ps(vW,M2.r[3],vX);
|
||||
__m128 r3 = vX;
|
||||
|
||||
// x.x,x.y,y.x,y.y
|
||||
XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0));
|
||||
// x.z,x.w,y.z,y.w
|
||||
XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2));
|
||||
// z.x,z.y,w.x,w.y
|
||||
XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0));
|
||||
// z.z,z.w,w.z,w.w
|
||||
XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2));
|
||||
|
||||
XMMATRIX mResult;
|
||||
// x.x,y.x,z.x,w.x
|
||||
mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
|
||||
// x.y,y.y,z.y,w.y
|
||||
mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
|
||||
// x.z,y.z,z.z,w.z
|
||||
mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
|
||||
// x.w,y.w,z.w,w.w
|
||||
mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
|
||||
return mResult;
|
||||
}
|
||||
|
||||
} // namespace FMA4
|
||||
|
||||
} // namespace DirectX;
|
||||
Loading…
Add table
Add a link
Reference in a new issue