%include "./FMOD_static/win32/src/c32.mac" ; ========================================================================================== ; GLOBAL UNINITIALIZED DATA ; ========================================================================================== [SEGMENT .data use32 align=32] ; ========================================================================================== ; CODE ; ========================================================================================== [SEGMENT .text use32 align=32] ; ================================================================================================================================= ; void FMOD_DSP_Connection_MixMonoToStereo_SIMD (float *inbuffer, float *outbuffer, unsigned int length, float lvolume, float rvolume); ; ================================================================================================================================= proc FMOD_DSP_Connection_MixMonoToStereo_SIMD %$inbuffer arg %$outbuffer arg %$length arg %$lvolume arg %$rvolume arg push eax push ebx push ecx push edx push esi push edi mov esi, [ebp+%$inbuffer] mov edi, [ebp+%$outbuffer] ; xmm0 = [lvolume ][rvolume ][lvolume ][rvolume ] ; xmm1 = [samp1 ][samp1 ][samp2 ][samp2 ] ; xmm2 = [samp3 ][samp3 ][samp4 ][samp4 ] ; xmm3 = [ ][ ][ ][ ] ; xmm4 = [ ][ ][ ][ ] ; xmm5 = [ ][ ][ ][ ] ; xmm6 = [ ][ ][ ][ ] ; xmm7 = [ ][ ][ ][ ] movss xmm0, [ebp+%$lvolume] movss xmm1, [ebp+%$rvolume] shufps xmm0, xmm0, 0 shufps xmm1, xmm1, 0 unpcklps xmm0, xmm1 mov edx, [ebp+%$length] mov ecx, edx %if 1 shr ecx, 2 test ecx, ecx jz mixloopMtoSrolledstart mixloopMtoSunrolled: movups xmm1, [esi] movaps xmm2, xmm1 movaps xmm3, [edi] movaps xmm4, [edi + 16] unpcklps xmm1, xmm1 unpckhps xmm2, xmm2 mulps xmm1, xmm0 mulps xmm2, xmm0 addps xmm3, xmm1 addps xmm4, xmm2 movaps [edi], xmm3 movaps [edi+16], xmm4 add edi, 32 add esi, 16 dec ecx jnz near mixloopMtoSunrolled mixloopMtoSrolledstart: mov ecx, edx and ecx, 3 %endif test ecx, ecx jz mixMtoSdone mixloopMtoSrolled: movss xmm1, [esi] movss xmm2, xmm1 movss xmm3, [edi] movss xmm4, [edi+4] mulss xmm1, [ebp+%$lvolume] mulss xmm2, [ebp+%$rvolume] addss xmm3, xmm1 addss xmm4, xmm2 movss [edi], xmm3 movss [edi+4], xmm4 add edi, 8 add esi, 4 dec ecx jnz near mixloopMtoSrolled mixMtoSdone: pop edi pop esi pop edx pop ecx pop ebx pop eax endproc ; ================================================================================================================================= ; void FMOD_DSP_Connection_MixStereoToStereo_SIMD(float *inbuffer, float *outbuffer, unsigned int length, float lvolume, float rvolume); ; ================================================================================================================================= proc FMOD_DSP_Connection_MixStereoToStereo_SIMD %$inbuffer arg %$outbuffer arg %$length arg %$lvolume arg %$rvolume arg push eax push ebx push ecx push edx push esi push edi mov esi, [ebp+%$inbuffer] mov edi, [ebp+%$outbuffer] ; xmm0 = [lvolume ][rvolume ][lvolume ][rvolume ] ; xmm1 = [samp1 ][samp1 ][samp2 ][samp2 ] ; xmm2 = [samp3 ][samp3 ][samp4 ][samp4 ] ; xmm3 = [ ][ ][ ][ ] ; xmm4 = [ ][ ][ ][ ] ; xmm5 = [ ][ ][ ][ ] ; xmm6 = [ ][ ][ ][ ] ; xmm7 = [ ][ ][ ][ ] movss xmm0, [ebp+%$lvolume] movss xmm1, [ebp+%$rvolume] shufps xmm0, xmm0, 0 shufps xmm1, xmm1, 0 unpcklps xmm0, xmm1 mov edx, [ebp+%$length] mov ecx, edx %if 1 shr ecx, 2 test ecx, ecx jz mixloopStoSrolledstart mixloopStoSunrolled: movups xmm1, [esi] movups xmm2, [esi+16] movaps xmm3, [edi] movaps xmm4, [edi+16] mulps xmm1, xmm0 mulps xmm2, xmm0 addps xmm3, xmm1 addps xmm4, xmm2 movaps [edi], xmm3 movaps [edi+16], xmm4 add edi, 32 add esi, 32 dec ecx jnz near mixloopStoSunrolled mixloopStoSrolledstart: mov ecx, edx and ecx, 3 %endif test ecx, ecx jz mixStoSdone mixloopStoSrolled: movss xmm1, [esi] movss xmm2, [esi+4] movss xmm3, [edi] movss xmm4, [edi+4] mulss xmm1, [ebp+%$lvolume] mulss xmm2, [ebp+%$rvolume] addss xmm3, xmm1 addss xmm4, xmm2 movss [edi], xmm3 movss [edi+4], xmm4 add edi, 8 add esi, 8 dec ecx jnz near mixloopStoSrolled mixStoSdone: pop edi pop esi pop edx pop ecx pop ebx pop eax endproc ; ================================================================================================================================= ; void FMOD_DSP_Connection_MixMonoTo5_1_SIMD(float *inbuffer, float *outbuffer, unsigned int length, float *volume0to3, float *volume4to1, float *volume2to5); ; ================================================================================================================================= proc FMOD_DSP_Connection_MixMonoTo5_1_SIMD %$inbuffer arg %$outbuffer arg %$length arg %$volume0to3 arg %$volume4to1 arg %$volume2to5 arg push eax push ebx push ecx push edx push esi push edi mov esi, [ebp+%$inbuffer] mov edi, [ebp+%$outbuffer] ; xmm0 = [l00 ][l10 ][l20 ][l30 ] ; xmm1 = [l40 ][l50 ][l00 ][l10 ] ; xmm2 = [l20 ][l30 ][l40 ][l50 ] ; xmm3 = [ ][ ][ ][ ] ; xmm4 = [ ][ ][ ][ ] ; xmm5 = [ ][ ][ ][ ] ; xmm6 = [ ][ ][ ][ ] ; xmm7 = [ ][ ][ ][ ] mov edx, [ebp+%$length] mov ecx, edx mov eax, [ebp+%$volume0to3] movups xmm0, [eax] mov eax, [ebp+%$volume4to1] movups xmm1, [eax] mov eax, [ebp+%$volume2to5] movups xmm2, [eax] %if 1 shr ecx, 2 test ecx, ecx jz near mixloopMto51rolledstart mixloopMto51unrolled: movss xmm3, [esi + 0] movlps xmm4, [esi + 0] shufps xmm3, xmm3, 0 shufps xmm4, xmm4, 50h mulps xmm3, xmm0 mulps xmm4, xmm1 addps xmm3, [edi] addps xmm4, [edi + 16] movaps [edi], xmm3 movaps [edi+16], xmm4 movss xmm3, [esi + 4] movss xmm4, [esi + 8] shufps xmm3, xmm3, 0 shufps xmm4, xmm4, 0 mulps xmm3, xmm2 mulps xmm4, xmm0 addps xmm3, [edi + 32] addps xmm4, [edi + 48] movaps [edi+32], xmm3 movaps [edi+48], xmm4 movlps xmm3, [esi + 8] movss xmm4, [esi + 12] shufps xmm3, xmm3, 50h shufps xmm4, xmm4, 0 mulps xmm3, xmm1 mulps xmm4, xmm2 addps xmm3, [edi + 64] addps xmm4, [edi + 80] movaps [edi+64], xmm3 movaps [edi+80], xmm4 add edi, 96 add esi, 16 dec ecx jnz near mixloopMto51unrolled mixloopMto51rolledstart: mov ecx, ebp and ecx, 3 %endif test ecx, ecx jz mixMto51done mixloopMto51rolled: movss xmm3, [esi + 0] shufps xmm3, xmm3, 0 movups xmm4, [edi] mulps xmm3, xmm0 addps xmm4, xmm3 movups [edi], xmm4 movlps xmm3, [esi + 0] shufps xmm3, xmm3, 50h movlps xmm4, [edi + 16] mulps xmm3, xmm1 addps xmm4, xmm3 movlps [edi + 16], xmm4 add edi, 24 add esi, 4 dec ecx jnz near mixloopMto51rolled mixMto51done: pop edi pop esi pop edx pop ecx pop ebx pop eax endproc ; ================================================================================================================================= ; void FMOD_DSP_Connection_Mix5_1To5_1_SIMD(float *inbuffer, float *outbuffer, unsigned int length, float *volume0to3, float *volume4to1, float *volume2to5); ; ================================================================================================================================= proc FMOD_DSP_Connection_Mix5_1To5_1_SIMD %$inbuffer arg %$outbuffer arg %$length arg %$volume0to3 arg %$volume4to1 arg %$volume2to5 arg push eax push ebx push ecx push edx push esi push edi mov esi, [ebp+%$inbuffer] mov edi, [ebp+%$outbuffer] ; xmm0 = [l00 ][l10 ][l20 ][l30 ] ; xmm1 = [l40 ][l50 ][l00 ][l10 ] ; xmm2 = [l20 ][l30 ][l40 ][l50 ] ; xmm3 = [ ][ ][ ][ ] ; xmm4 = [ ][ ][ ][ ] ; xmm5 = [ ][ ][ ][ ] ; xmm6 = [ ][ ][ ][ ] ; xmm7 = [ ][ ][ ][ ] mov edx, [ebp+%$length] mov ecx, edx mov eax, [ebp+%$volume0to3] movups xmm0, [eax] mov eax, [ebp+%$volume4to1] movups xmm1, [eax] mov eax, [ebp+%$volume2to5] movups xmm2, [eax] %if 1 shr ecx, 2 test ecx, ecx jz near mixloop51to51rolledstart mixloop51to51unrolled: movaps xmm3, [esi + 0] movaps xmm4, [esi + 16] mulps xmm3, xmm0 mulps xmm4, xmm1 addps xmm3, [edi + 0] addps xmm4, [edi + 16] movaps [edi], xmm3 movaps [edi+16], xmm4 movaps xmm3, [esi + 32] movaps xmm4, [esi + 48] mulps xmm3, xmm2 mulps xmm4, xmm0 addps xmm3, [edi + 32] addps xmm4, [edi + 48] movaps [edi+32], xmm3 movaps [edi+48], xmm4 movaps xmm3, [esi + 64] movaps xmm4, [esi + 80] mulps xmm3, xmm1 mulps xmm4, xmm2 addps xmm3, [edi + 64] addps xmm4, [edi + 80] movaps [edi+64], xmm3 movaps [edi+80], xmm4 add edi, 96 add esi, 96 dec ecx jnz near mixloop51to51unrolled mixloop51to51rolledstart: mov ecx, ebp and ecx, 3 %endif test ecx, ecx jz mix51to51done mixloop51to51rolled: movups xmm3, [esi + 0] movups xmm4, [edi + 0] mulps xmm3, xmm0 addps xmm4, xmm3 movups [edi], xmm4 movlps xmm3, [esi + 16] movlps xmm4, [edi + 16] mulps xmm3, xmm1 addps xmm4, xmm3 movlps [edi+16], xmm4 add edi, 24 add esi, 24 dec ecx jnz near mixloop51to51rolled mix51to51done: pop edi pop esi pop edx pop ecx pop ebx pop eax endproc ; ================================================================================================================================= ; void FMOD_DSP_Connection_MixMonoTo7_1_SIMD(float *inbuffer, float *outbuffer, unsigned int length, float *volume0to3, float *volume4to7); ; ================================================================================================================================= proc FMOD_DSP_Connection_MixMonoTo7_1_SIMD %$inbuffer arg %$outbuffer arg %$length arg %$volume0to3 arg %$volume4to7 arg push eax push ebx push ecx push edx push esi push edi mov esi, [ebp+%$inbuffer] mov edi, [ebp+%$outbuffer] ; xmm0 = [l00 ][l10 ][l20 ][l30 ] ; xmm1 = [l40 ][l50 ][l00 ][l10 ] ; xmm2 = [l20 ][l30 ][l40 ][l50 ] ; xmm3 = [ ][ ][ ][ ] ; xmm4 = [ ][ ][ ][ ] ; xmm5 = [ ][ ][ ][ ] ; xmm6 = [ ][ ][ ][ ] ; xmm7 = [ ][ ][ ][ ] mov edx, [ebp+%$length] mov ecx, edx mov eax, [ebp+%$volume0to3] movups xmm0, [eax] mov eax, [ebp+%$volume4to7] movups xmm1, [eax] %if 1 shr ecx, 2 test ecx, ecx jz near mixloopMto71rolledstart mixloopMto71unrolled: movss xmm3, [esi + 0] shufps xmm3, xmm3, 0 movaps xmm4, xmm3 mulps xmm3, xmm0 mulps xmm4, xmm1 addps xmm3, [edi + 0] addps xmm4, [edi + 16] movaps [edi+0], xmm3 movaps [edi+16], xmm4 movss xmm3, [esi + 4] shufps xmm3, xmm3, 0 movaps xmm4, xmm3 mulps xmm3, xmm0 mulps xmm4, xmm1 addps xmm3, [edi + 32] addps xmm4, [edi + 48] movaps [edi+32], xmm3 movaps [edi+48], xmm4 movss xmm3, [esi + 8] shufps xmm3, xmm3, 0 movaps xmm4, xmm3 mulps xmm3, xmm0 mulps xmm4, xmm1 addps xmm3, [edi + 64] addps xmm4, [edi + 80] movaps [edi+64], xmm3 movaps [edi+80], xmm4 movss xmm3, [esi + 12] shufps xmm3, xmm3, 0 movaps xmm4, xmm3 mulps xmm3, xmm0 mulps xmm4, xmm1 addps xmm3, [edi + 96] addps xmm4, [edi + 112] movaps [edi+96], xmm3 movaps [edi+112], xmm4 add edi, 128 add esi, 16 dec ecx jnz near mixloopMto71unrolled mixloopMto71rolledstart: mov ecx, ebp and ecx, 3 %endif test ecx, ecx jz mixMto71done mixloopMto71rolled: movss xmm3, [esi + 0] shufps xmm3, xmm3, 0 movaps xmm4, xmm3 mulps xmm3, xmm0 mulps xmm4, xmm1 addps xmm3, [edi + 0] addps xmm4, [edi + 16] movaps [edi+0], xmm3 movaps [edi+16], xmm4 add edi, 32 add esi, 4 dec ecx jnz near mixloopMto71rolled mixMto71done: pop edi pop esi pop edx pop ecx pop ebx pop eax endproc ; ================================================================================================================================= ; void FMOD_DSP_Connection_Mix7_1To7_1_SIMD(float *inbuffer, float *outbuffer, unsigned int length, float *volume0to3, float *volume4to7); ; ================================================================================================================================= proc FMOD_DSP_Connection_Mix7_1To7_1_SIMD %$inbuffer arg %$outbuffer arg %$length arg %$volume0to3 arg %$volume4to7 arg push eax push ebx push ecx push edx push esi push edi mov esi, [ebp+%$inbuffer] mov edi, [ebp+%$outbuffer] ; xmm0 = [l00 ][l10 ][l20 ][l30 ] ; xmm1 = [l40 ][l50 ][l00 ][l10 ] ; xmm2 = [l20 ][l30 ][l40 ][l50 ] ; xmm3 = [ ][ ][ ][ ] ; xmm4 = [ ][ ][ ][ ] ; xmm5 = [ ][ ][ ][ ] ; xmm6 = [ ][ ][ ][ ] ; xmm7 = [ ][ ][ ][ ] mov edx, [ebp+%$length] mov ecx, edx mov eax, [ebp+%$volume0to3] movups xmm0, [eax] mov eax, [ebp+%$volume4to7] movups xmm1, [eax] %if 1 shr ecx, 2 test ecx, ecx jz near mixloop71to71rolledstart mixloop71to71unrolled: movaps xmm3, [esi + 0] movaps xmm4, [esi + 16] mulps xmm3, xmm0 mulps xmm4, xmm1 addps xmm3, [edi + 0] addps xmm4, [edi + 16] movaps [edi], xmm3 movaps [edi+16], xmm4 movaps xmm3, [esi + 32] movaps xmm4, [esi + 48] mulps xmm3, xmm0 mulps xmm4, xmm1 addps xmm3, [edi + 32] addps xmm4, [edi + 48] movaps [edi+32], xmm3 movaps [edi+48], xmm4 movaps xmm3, [esi + 64] movaps xmm4, [esi + 80] mulps xmm3, xmm0 mulps xmm4, xmm1 addps xmm3, [edi + 64] addps xmm4, [edi + 80] movaps [edi+64], xmm3 movaps [edi+80], xmm4 movaps xmm3, [esi + 96] movaps xmm4, [esi + 112] mulps xmm3, xmm0 mulps xmm4, xmm1 addps xmm3, [edi + 96] addps xmm4, [edi + 112] movaps [edi+96], xmm3 movaps [edi+112], xmm4 add edi, 128 add esi, 128 dec ecx jnz near mixloop71to71unrolled mixloop71to71rolledstart: mov ecx, ebp and ecx, 3 %endif test ecx, ecx jz mix71to71done mixloop71to71rolled: movups xmm3, [esi + 0] movups xmm4, [edi + 0] mulps xmm3, xmm0 addps xmm4, xmm3 movups [edi], xmm4 movups xmm3, [esi + 16] movups xmm4, [edi + 16] mulps xmm3, xmm1 addps xmm4, xmm3 movups [edi+16], xmm4 add edi, 32 add esi, 32 dec ecx jnz near mixloop71to71rolled mix71to71done: pop edi pop esi pop edx pop ecx pop ebx pop eax endproc