fmodex/win32/src/fmod_dsp_connection_asm.s

733 lines
19 KiB
ArmAsm
Raw Normal View History

%include "./FMOD_static/win32/src/c32.mac"
; ==========================================================================================
; GLOBAL UNINITIALIZED DATA
; ==========================================================================================
[SEGMENT .data use32 align=32]
; ==========================================================================================
; CODE
; ==========================================================================================
[SEGMENT .text use32 align=32]
; =================================================================================================================================
; void FMOD_DSP_Connection_MixMonoToStereo_SIMD (float *inbuffer, float *outbuffer, unsigned int length, float lvolume, float rvolume);
; =================================================================================================================================
proc FMOD_DSP_Connection_MixMonoToStereo_SIMD
%$inbuffer arg
%$outbuffer arg
%$length arg
%$lvolume arg
%$rvolume arg
push eax
push ebx
push ecx
push edx
push esi
push edi
mov esi, [ebp+%$inbuffer]
mov edi, [ebp+%$outbuffer]
; xmm0 = [lvolume ][rvolume ][lvolume ][rvolume ]
; xmm1 = [samp1 ][samp1 ][samp2 ][samp2 ]
; xmm2 = [samp3 ][samp3 ][samp4 ][samp4 ]
; xmm3 = [ ][ ][ ][ ]
; xmm4 = [ ][ ][ ][ ]
; xmm5 = [ ][ ][ ][ ]
; xmm6 = [ ][ ][ ][ ]
; xmm7 = [ ][ ][ ][ ]
movss xmm0, [ebp+%$lvolume]
movss xmm1, [ebp+%$rvolume]
shufps xmm0, xmm0, 0
shufps xmm1, xmm1, 0
unpcklps xmm0, xmm1
mov edx, [ebp+%$length]
mov ecx, edx
%if 1
shr ecx, 2
test ecx, ecx
jz mixloopMtoSrolledstart
mixloopMtoSunrolled:
movups xmm1, [esi]
movaps xmm2, xmm1
movaps xmm3, [edi]
movaps xmm4, [edi + 16]
unpcklps xmm1, xmm1
unpckhps xmm2, xmm2
mulps xmm1, xmm0
mulps xmm2, xmm0
addps xmm3, xmm1
addps xmm4, xmm2
movaps [edi], xmm3
movaps [edi+16], xmm4
add edi, 32
add esi, 16
dec ecx
jnz near mixloopMtoSunrolled
mixloopMtoSrolledstart:
mov ecx, edx
and ecx, 3
%endif
test ecx, ecx
jz mixMtoSdone
mixloopMtoSrolled:
movss xmm1, [esi]
movss xmm2, xmm1
movss xmm3, [edi]
movss xmm4, [edi+4]
mulss xmm1, [ebp+%$lvolume]
mulss xmm2, [ebp+%$rvolume]
addss xmm3, xmm1
addss xmm4, xmm2
movss [edi], xmm3
movss [edi+4], xmm4
add edi, 8
add esi, 4
dec ecx
jnz near mixloopMtoSrolled
mixMtoSdone:
pop edi
pop esi
pop edx
pop ecx
pop ebx
pop eax
endproc
; =================================================================================================================================
; void FMOD_DSP_Connection_MixStereoToStereo_SIMD(float *inbuffer, float *outbuffer, unsigned int length, float lvolume, float rvolume);
; =================================================================================================================================
proc FMOD_DSP_Connection_MixStereoToStereo_SIMD
%$inbuffer arg
%$outbuffer arg
%$length arg
%$lvolume arg
%$rvolume arg
push eax
push ebx
push ecx
push edx
push esi
push edi
mov esi, [ebp+%$inbuffer]
mov edi, [ebp+%$outbuffer]
; xmm0 = [lvolume ][rvolume ][lvolume ][rvolume ]
; xmm1 = [samp1 ][samp1 ][samp2 ][samp2 ]
; xmm2 = [samp3 ][samp3 ][samp4 ][samp4 ]
; xmm3 = [ ][ ][ ][ ]
; xmm4 = [ ][ ][ ][ ]
; xmm5 = [ ][ ][ ][ ]
; xmm6 = [ ][ ][ ][ ]
; xmm7 = [ ][ ][ ][ ]
movss xmm0, [ebp+%$lvolume]
movss xmm1, [ebp+%$rvolume]
shufps xmm0, xmm0, 0
shufps xmm1, xmm1, 0
unpcklps xmm0, xmm1
mov edx, [ebp+%$length]
mov ecx, edx
%if 1
shr ecx, 2
test ecx, ecx
jz mixloopStoSrolledstart
mixloopStoSunrolled:
movups xmm1, [esi]
movups xmm2, [esi+16]
movaps xmm3, [edi]
movaps xmm4, [edi+16]
mulps xmm1, xmm0
mulps xmm2, xmm0
addps xmm3, xmm1
addps xmm4, xmm2
movaps [edi], xmm3
movaps [edi+16], xmm4
add edi, 32
add esi, 32
dec ecx
jnz near mixloopStoSunrolled
mixloopStoSrolledstart:
mov ecx, edx
and ecx, 3
%endif
test ecx, ecx
jz mixStoSdone
mixloopStoSrolled:
movss xmm1, [esi]
movss xmm2, [esi+4]
movss xmm3, [edi]
movss xmm4, [edi+4]
mulss xmm1, [ebp+%$lvolume]
mulss xmm2, [ebp+%$rvolume]
addss xmm3, xmm1
addss xmm4, xmm2
movss [edi], xmm3
movss [edi+4], xmm4
add edi, 8
add esi, 8
dec ecx
jnz near mixloopStoSrolled
mixStoSdone:
pop edi
pop esi
pop edx
pop ecx
pop ebx
pop eax
endproc
; =================================================================================================================================
; void FMOD_DSP_Connection_MixMonoTo5_1_SIMD(float *inbuffer, float *outbuffer, unsigned int length, float *volume0to3, float *volume4to1, float *volume2to5);
; =================================================================================================================================
proc FMOD_DSP_Connection_MixMonoTo5_1_SIMD
%$inbuffer arg
%$outbuffer arg
%$length arg
%$volume0to3 arg
%$volume4to1 arg
%$volume2to5 arg
push eax
push ebx
push ecx
push edx
push esi
push edi
mov esi, [ebp+%$inbuffer]
mov edi, [ebp+%$outbuffer]
; xmm0 = [l00 ][l10 ][l20 ][l30 ]
; xmm1 = [l40 ][l50 ][l00 ][l10 ]
; xmm2 = [l20 ][l30 ][l40 ][l50 ]
; xmm3 = [ ][ ][ ][ ]
; xmm4 = [ ][ ][ ][ ]
; xmm5 = [ ][ ][ ][ ]
; xmm6 = [ ][ ][ ][ ]
; xmm7 = [ ][ ][ ][ ]
mov edx, [ebp+%$length]
mov ecx, edx
mov eax, [ebp+%$volume0to3]
movups xmm0, [eax]
mov eax, [ebp+%$volume4to1]
movups xmm1, [eax]
mov eax, [ebp+%$volume2to5]
movups xmm2, [eax]
%if 1
shr ecx, 2
test ecx, ecx
jz near mixloopMto51rolledstart
mixloopMto51unrolled:
movss xmm3, [esi + 0]
movlps xmm4, [esi + 0]
shufps xmm3, xmm3, 0
shufps xmm4, xmm4, 50h
mulps xmm3, xmm0
mulps xmm4, xmm1
addps xmm3, [edi]
addps xmm4, [edi + 16]
movaps [edi], xmm3
movaps [edi+16], xmm4
movss xmm3, [esi + 4]
movss xmm4, [esi + 8]
shufps xmm3, xmm3, 0
shufps xmm4, xmm4, 0
mulps xmm3, xmm2
mulps xmm4, xmm0
addps xmm3, [edi + 32]
addps xmm4, [edi + 48]
movaps [edi+32], xmm3
movaps [edi+48], xmm4
movlps xmm3, [esi + 8]
movss xmm4, [esi + 12]
shufps xmm3, xmm3, 50h
shufps xmm4, xmm4, 0
mulps xmm3, xmm1
mulps xmm4, xmm2
addps xmm3, [edi + 64]
addps xmm4, [edi + 80]
movaps [edi+64], xmm3
movaps [edi+80], xmm4
add edi, 96
add esi, 16
dec ecx
jnz near mixloopMto51unrolled
mixloopMto51rolledstart:
mov ecx, ebp
and ecx, 3
%endif
test ecx, ecx
jz mixMto51done
mixloopMto51rolled:
movss xmm3, [esi + 0]
shufps xmm3, xmm3, 0
movups xmm4, [edi]
mulps xmm3, xmm0
addps xmm4, xmm3
movups [edi], xmm4
movlps xmm3, [esi + 0]
shufps xmm3, xmm3, 50h
movlps xmm4, [edi + 16]
mulps xmm3, xmm1
addps xmm4, xmm3
movlps [edi + 16], xmm4
add edi, 24
add esi, 4
dec ecx
jnz near mixloopMto51rolled
mixMto51done:
pop edi
pop esi
pop edx
pop ecx
pop ebx
pop eax
endproc
; =================================================================================================================================
; void FMOD_DSP_Connection_Mix5_1To5_1_SIMD(float *inbuffer, float *outbuffer, unsigned int length, float *volume0to3, float *volume4to1, float *volume2to5);
; =================================================================================================================================
proc FMOD_DSP_Connection_Mix5_1To5_1_SIMD
%$inbuffer arg
%$outbuffer arg
%$length arg
%$volume0to3 arg
%$volume4to1 arg
%$volume2to5 arg
push eax
push ebx
push ecx
push edx
push esi
push edi
mov esi, [ebp+%$inbuffer]
mov edi, [ebp+%$outbuffer]
; xmm0 = [l00 ][l10 ][l20 ][l30 ]
; xmm1 = [l40 ][l50 ][l00 ][l10 ]
; xmm2 = [l20 ][l30 ][l40 ][l50 ]
; xmm3 = [ ][ ][ ][ ]
; xmm4 = [ ][ ][ ][ ]
; xmm5 = [ ][ ][ ][ ]
; xmm6 = [ ][ ][ ][ ]
; xmm7 = [ ][ ][ ][ ]
mov edx, [ebp+%$length]
mov ecx, edx
mov eax, [ebp+%$volume0to3]
movups xmm0, [eax]
mov eax, [ebp+%$volume4to1]
movups xmm1, [eax]
mov eax, [ebp+%$volume2to5]
movups xmm2, [eax]
%if 1
shr ecx, 2
test ecx, ecx
jz near mixloop51to51rolledstart
mixloop51to51unrolled:
movaps xmm3, [esi + 0]
movaps xmm4, [esi + 16]
mulps xmm3, xmm0
mulps xmm4, xmm1
addps xmm3, [edi + 0]
addps xmm4, [edi + 16]
movaps [edi], xmm3
movaps [edi+16], xmm4
movaps xmm3, [esi + 32]
movaps xmm4, [esi + 48]
mulps xmm3, xmm2
mulps xmm4, xmm0
addps xmm3, [edi + 32]
addps xmm4, [edi + 48]
movaps [edi+32], xmm3
movaps [edi+48], xmm4
movaps xmm3, [esi + 64]
movaps xmm4, [esi + 80]
mulps xmm3, xmm1
mulps xmm4, xmm2
addps xmm3, [edi + 64]
addps xmm4, [edi + 80]
movaps [edi+64], xmm3
movaps [edi+80], xmm4
add edi, 96
add esi, 96
dec ecx
jnz near mixloop51to51unrolled
mixloop51to51rolledstart:
mov ecx, ebp
and ecx, 3
%endif
test ecx, ecx
jz mix51to51done
mixloop51to51rolled:
movups xmm3, [esi + 0]
movups xmm4, [edi + 0]
mulps xmm3, xmm0
addps xmm4, xmm3
movups [edi], xmm4
movlps xmm3, [esi + 16]
movlps xmm4, [edi + 16]
mulps xmm3, xmm1
addps xmm4, xmm3
movlps [edi+16], xmm4
add edi, 24
add esi, 24
dec ecx
jnz near mixloop51to51rolled
mix51to51done:
pop edi
pop esi
pop edx
pop ecx
pop ebx
pop eax
endproc
; =================================================================================================================================
; void FMOD_DSP_Connection_MixMonoTo7_1_SIMD(float *inbuffer, float *outbuffer, unsigned int length, float *volume0to3, float *volume4to7);
; =================================================================================================================================
proc FMOD_DSP_Connection_MixMonoTo7_1_SIMD
%$inbuffer arg
%$outbuffer arg
%$length arg
%$volume0to3 arg
%$volume4to7 arg
push eax
push ebx
push ecx
push edx
push esi
push edi
mov esi, [ebp+%$inbuffer]
mov edi, [ebp+%$outbuffer]
; xmm0 = [l00 ][l10 ][l20 ][l30 ]
; xmm1 = [l40 ][l50 ][l00 ][l10 ]
; xmm2 = [l20 ][l30 ][l40 ][l50 ]
; xmm3 = [ ][ ][ ][ ]
; xmm4 = [ ][ ][ ][ ]
; xmm5 = [ ][ ][ ][ ]
; xmm6 = [ ][ ][ ][ ]
; xmm7 = [ ][ ][ ][ ]
mov edx, [ebp+%$length]
mov ecx, edx
mov eax, [ebp+%$volume0to3]
movups xmm0, [eax]
mov eax, [ebp+%$volume4to7]
movups xmm1, [eax]
%if 1
shr ecx, 2
test ecx, ecx
jz near mixloopMto71rolledstart
mixloopMto71unrolled:
movss xmm3, [esi + 0]
shufps xmm3, xmm3, 0
movaps xmm4, xmm3
mulps xmm3, xmm0
mulps xmm4, xmm1
addps xmm3, [edi + 0]
addps xmm4, [edi + 16]
movaps [edi+0], xmm3
movaps [edi+16], xmm4
movss xmm3, [esi + 4]
shufps xmm3, xmm3, 0
movaps xmm4, xmm3
mulps xmm3, xmm0
mulps xmm4, xmm1
addps xmm3, [edi + 32]
addps xmm4, [edi + 48]
movaps [edi+32], xmm3
movaps [edi+48], xmm4
movss xmm3, [esi + 8]
shufps xmm3, xmm3, 0
movaps xmm4, xmm3
mulps xmm3, xmm0
mulps xmm4, xmm1
addps xmm3, [edi + 64]
addps xmm4, [edi + 80]
movaps [edi+64], xmm3
movaps [edi+80], xmm4
movss xmm3, [esi + 12]
shufps xmm3, xmm3, 0
movaps xmm4, xmm3
mulps xmm3, xmm0
mulps xmm4, xmm1
addps xmm3, [edi + 96]
addps xmm4, [edi + 112]
movaps [edi+96], xmm3
movaps [edi+112], xmm4
add edi, 128
add esi, 16
dec ecx
jnz near mixloopMto71unrolled
mixloopMto71rolledstart:
mov ecx, ebp
and ecx, 3
%endif
test ecx, ecx
jz mixMto71done
mixloopMto71rolled:
movss xmm3, [esi + 0]
shufps xmm3, xmm3, 0
movaps xmm4, xmm3
mulps xmm3, xmm0
mulps xmm4, xmm1
addps xmm3, [edi + 0]
addps xmm4, [edi + 16]
movaps [edi+0], xmm3
movaps [edi+16], xmm4
add edi, 32
add esi, 4
dec ecx
jnz near mixloopMto71rolled
mixMto71done:
pop edi
pop esi
pop edx
pop ecx
pop ebx
pop eax
endproc
; =================================================================================================================================
; void FMOD_DSP_Connection_Mix7_1To7_1_SIMD(float *inbuffer, float *outbuffer, unsigned int length, float *volume0to3, float *volume4to7);
; =================================================================================================================================
proc FMOD_DSP_Connection_Mix7_1To7_1_SIMD
%$inbuffer arg
%$outbuffer arg
%$length arg
%$volume0to3 arg
%$volume4to7 arg
push eax
push ebx
push ecx
push edx
push esi
push edi
mov esi, [ebp+%$inbuffer]
mov edi, [ebp+%$outbuffer]
; xmm0 = [l00 ][l10 ][l20 ][l30 ]
; xmm1 = [l40 ][l50 ][l00 ][l10 ]
; xmm2 = [l20 ][l30 ][l40 ][l50 ]
; xmm3 = [ ][ ][ ][ ]
; xmm4 = [ ][ ][ ][ ]
; xmm5 = [ ][ ][ ][ ]
; xmm6 = [ ][ ][ ][ ]
; xmm7 = [ ][ ][ ][ ]
mov edx, [ebp+%$length]
mov ecx, edx
mov eax, [ebp+%$volume0to3]
movups xmm0, [eax]
mov eax, [ebp+%$volume4to7]
movups xmm1, [eax]
%if 1
shr ecx, 2
test ecx, ecx
jz near mixloop71to71rolledstart
mixloop71to71unrolled:
movaps xmm3, [esi + 0]
movaps xmm4, [esi + 16]
mulps xmm3, xmm0
mulps xmm4, xmm1
addps xmm3, [edi + 0]
addps xmm4, [edi + 16]
movaps [edi], xmm3
movaps [edi+16], xmm4
movaps xmm3, [esi + 32]
movaps xmm4, [esi + 48]
mulps xmm3, xmm0
mulps xmm4, xmm1
addps xmm3, [edi + 32]
addps xmm4, [edi + 48]
movaps [edi+32], xmm3
movaps [edi+48], xmm4
movaps xmm3, [esi + 64]
movaps xmm4, [esi + 80]
mulps xmm3, xmm0
mulps xmm4, xmm1
addps xmm3, [edi + 64]
addps xmm4, [edi + 80]
movaps [edi+64], xmm3
movaps [edi+80], xmm4
movaps xmm3, [esi + 96]
movaps xmm4, [esi + 112]
mulps xmm3, xmm0
mulps xmm4, xmm1
addps xmm3, [edi + 96]
addps xmm4, [edi + 112]
movaps [edi+96], xmm3
movaps [edi+112], xmm4
add edi, 128
add esi, 128
dec ecx
jnz near mixloop71to71unrolled
mixloop71to71rolledstart:
mov ecx, ebp
and ecx, 3
%endif
test ecx, ecx
jz mix71to71done
mixloop71to71rolled:
movups xmm3, [esi + 0]
movups xmm4, [edi + 0]
mulps xmm3, xmm0
addps xmm4, xmm3
movups [edi], xmm4
movups xmm3, [esi + 16]
movups xmm4, [edi + 16]
mulps xmm3, xmm1
addps xmm4, xmm3
movups [edi+16], xmm4
add edi, 32
add esi, 32
dec ecx
jnz near mixloop71to71rolled
mix71to71done:
pop edi
pop esi
pop edx
pop ecx
pop ebx
pop eax
endproc