shader_jit_x64_compiler: Use haddps for horizontal summation

This commit is contained in:
MerryMage 2017-12-10 22:00:04 +00:00
parent af45f2b2de
commit efec8fe513

View file

@ -387,6 +387,10 @@ void JitShader::Compile_DP4(Instruction instr) {
Compile_SanitizedMul(SRC1, SRC2, SCRATCH); Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
if (Common::GetCPUCaps().sse3) {
haddps(SRC1, SRC1);
haddps(SRC1, SRC1);
} else {
movaps(SRC2, SRC1); movaps(SRC2, SRC1);
shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
addps(SRC1, SRC2); addps(SRC1, SRC2);
@ -394,6 +398,7 @@ void JitShader::Compile_DP4(Instruction instr) {
movaps(SRC2, SRC1); movaps(SRC2, SRC1);
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
addps(SRC1, SRC2); addps(SRC1, SRC2);
}
Compile_DestEnable(instr, SRC1); Compile_DestEnable(instr, SRC1);
} }
@ -419,6 +424,10 @@ void JitShader::Compile_DPH(Instruction instr) {
Compile_SanitizedMul(SRC1, SRC2, SCRATCH); Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
if (Common::GetCPUCaps().sse3) {
haddps(SRC1, SRC1);
haddps(SRC1, SRC1);
} else {
movaps(SRC2, SRC1); movaps(SRC2, SRC1);
shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
addps(SRC1, SRC2); addps(SRC1, SRC2);
@ -426,6 +435,7 @@ void JitShader::Compile_DPH(Instruction instr) {
movaps(SRC2, SRC1); movaps(SRC2, SRC1);
shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
addps(SRC1, SRC2); addps(SRC1, SRC2);
}
Compile_DestEnable(instr, SRC1); Compile_DestEnable(instr, SRC1);
} }