Ryujinx/ARMeilleure/CodeGen/X86/Assembler.cs
Wunk 17620d18db
ARMeilleure: Add initial support for AVX512 (EVEX encoding) (cont) (#4147)
* ARMeilleure: Add AVX512{F,VL,DQ,BW} detection

Add `UseAvx512Ortho` and `UseAvx512OrthoFloat` optimization flags as
short-hands for `F+VL` and `F+VL+DQ`.

* ARMeilleure: Add initial support for EVEX instruction encoding

Does not implement rounding, or exception controls.

* ARMeilleure: Add `X86Vpternlogd`

Accelerates the vector-`Not` instruction.

* ARMeilleure: Add check for `OSXSAVE` for AVX{2,512}

* ARMeilleure: Add check for `XCR0` flags

Add XCR0 register checks for AVX and AVX512F, following the guidelines
from section 14.3 and 15.2 from the Intel Architecture Software
Developer's Manual.

* ARMeilleure: Remove redundant `ReProtect` and `Dispose`, formatting

* ARMeilleure: Move XCR0 procedure to GetXcr0Eax

* ARMeilleure: Add `XCR0` to `FeatureInfo` structure

* ARMeilleure: Utilize `ReadOnlySpan` for Xcr0 assembly

Avoids an additional allocation

* ARMeilleure: Formatting fixes

* ARMeilleure: Fix EVEX encoding src2 register index

> Just like in VEX prefix, vvvv is provided in inverted form.

* ARMeilleure: Add `X86Vpternlogd` acceleration to `Vmvn_I`

Passes unit tests, verified instruction utilization

* ARMeilleure: Fix EVEX register operand designations

Operand 2 was being sourced improperly.

EVEX encoded instructions source their operands like so:
Operand 1: ModRM:reg
Operand 2: EVEX.vvvvv
Operand 3: ModRM:r/m
Operand 4: Imm

This fixes the improper register designations when emitting vpternlog.
Now "dest", "src1", "src2" arguments emit in the proper order in EVEX instructions.

* ARMeilleure: Add `X86Vpternlogd` acceleration to `Orn_V`

* ARMeilleure: PTC version bump

* ARMeilleure: Update EVEX encoding Debug.Assert to Debug.Fail

* ARMeilleure: Update EVEX encoding comment capitalization
2023-03-20 16:09:24 -03:00

1559 lines
48 KiB
C#

using ARMeilleure.CodeGen.Linking;
using ARMeilleure.IntermediateRepresentation;
using Ryujinx.Common.Memory;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Runtime.InteropServices;
namespace ARMeilleure.CodeGen.X86
{
partial class Assembler
{
private const int ReservedBytesForJump = 1;
private const int OpModRMBits = 24;
private const byte RexPrefix = 0x40;
private const byte RexWPrefix = 0x48;
private const byte LockPrefix = 0xf0;
private const int MaxRegNumber = 15;
private struct Jump
{
public bool IsConditional { get; }
public X86Condition Condition { get; }
public Operand JumpLabel { get; }
public long? JumpTarget { get; set; }
public long JumpPosition { get; }
public long Offset { get; set; }
public int InstSize { get; set; }
public Jump(Operand jumpLabel, long jumpPosition)
{
IsConditional = false;
Condition = 0;
JumpLabel = jumpLabel;
JumpTarget = null;
JumpPosition = jumpPosition;
Offset = 0;
InstSize = 0;
}
public Jump(X86Condition condition, Operand jumpLabel, long jumpPosition)
{
IsConditional = true;
Condition = condition;
JumpLabel = jumpLabel;
JumpTarget = null;
JumpPosition = jumpPosition;
Offset = 0;
InstSize = 0;
}
}
private struct Reloc
{
public int JumpIndex { get; set; }
public int Position { get; set; }
public Symbol Symbol { get; set; }
}
private readonly List<Jump> _jumps;
private readonly List<Reloc> _relocs;
private readonly Dictionary<Operand, long> _labels;
private readonly Stream _stream;
public bool HasRelocs => _relocs != null;
public Assembler(Stream stream, bool relocatable)
{
_stream = stream;
_labels = new Dictionary<Operand, long>();
_jumps = new List<Jump>();
_relocs = relocatable ? new List<Reloc>() : null;
}
public void MarkLabel(Operand label)
{
_labels.Add(label, _stream.Position);
}
public void Add(Operand dest, Operand source, OperandType type)
{
WriteInstruction(dest, source, type, X86Instruction.Add);
}
public void Addsd(Operand dest, Operand src1, Operand src2)
{
WriteInstruction(dest, src1, src2, X86Instruction.Addsd);
}
public void Addss(Operand dest, Operand src1, Operand src2)
{
WriteInstruction(dest, src1, src2, X86Instruction.Addss);
}
public void And(Operand dest, Operand source, OperandType type)
{
WriteInstruction(dest, source, type, X86Instruction.And);
}
public void Bsr(Operand dest, Operand source, OperandType type)
{
WriteInstruction(dest, source, type, X86Instruction.Bsr);
}
public void Bswap(Operand dest)
{
WriteInstruction(dest, default, dest.Type, X86Instruction.Bswap);
}
public void Call(Operand dest)
{
WriteInstruction(dest, default, OperandType.None, X86Instruction.Call);
}
public void Cdq()
{
WriteByte(0x99);
}
public void Cmovcc(Operand dest, Operand source, OperandType type, X86Condition condition)
{
ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Cmovcc];
WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM | (int)condition, rrm: true);
}
public void Cmp(Operand src1, Operand src2, OperandType type)
{
WriteInstruction(src1, src2, type, X86Instruction.Cmp);
}
public void Cqo()
{
WriteByte(0x48);
WriteByte(0x99);
}
public void Cmpxchg(Operand memOp, Operand src)
{
Debug.Assert(memOp.Kind == OperandKind.Memory);
WriteByte(LockPrefix);
WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg);
}
public void Cmpxchg16(Operand memOp, Operand src)
{
Debug.Assert(memOp.Kind == OperandKind.Memory);
WriteByte(LockPrefix);
WriteByte(0x66);
WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg);
}
public void Cmpxchg16b(Operand memOp)
{
Debug.Assert(memOp.Kind == OperandKind.Memory);
WriteByte(LockPrefix);
WriteInstruction(memOp, default, OperandType.None, X86Instruction.Cmpxchg16b);
}
public void Cmpxchg8(Operand memOp, Operand src)
{
Debug.Assert(memOp.Kind == OperandKind.Memory);
WriteByte(LockPrefix);
WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg8);
}
public void Comisd(Operand src1, Operand src2)
{
WriteInstruction(src1, default, src2, X86Instruction.Comisd);
}
public void Comiss(Operand src1, Operand src2)
{
WriteInstruction(src1, default, src2, X86Instruction.Comiss);
}
public void Cvtsd2ss(Operand dest, Operand src1, Operand src2)
{
WriteInstruction(dest, src1, src2, X86Instruction.Cvtsd2ss);
}
public void Cvtsi2sd(Operand dest, Operand src1, Operand src2, OperandType type)
{
WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2sd, type);
}
public void Cvtsi2ss(Operand dest, Operand src1, Operand src2, OperandType type)
{
WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2ss, type);
}
public void Cvtss2sd(Operand dest, Operand src1, Operand src2)
{
WriteInstruction(dest, src1, src2, X86Instruction.Cvtss2sd);
}
public void Div(Operand source)
{
WriteInstruction(default, source, source.Type, X86Instruction.Div);
}
public void Divsd(Operand dest, Operand src1, Operand src2)
{
WriteInstruction(dest, src1, src2, X86Instruction.Divsd);
}
public void Divss(Operand dest, Operand src1, Operand src2)
{
WriteInstruction(dest, src1, src2, X86Instruction.Divss);
}
public void Idiv(Operand source)
{
WriteInstruction(default, source, source.Type, X86Instruction.Idiv);
}
public void Imul(Operand source)
{
WriteInstruction(default, source, source.Type, X86Instruction.Imul128);
}
public void Imul(Operand dest, Operand source, OperandType type)
{
if (source.Kind != OperandKind.Register)
{
throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\".");
}
WriteInstruction(dest, source, type, X86Instruction.Imul);
}
public void Imul(Operand dest, Operand src1, Operand src2, OperandType type)
{
ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Imul];
if (src2.Kind != OperandKind.Constant)
{
throw new ArgumentException($"Invalid source 2 operand kind \"{src2.Kind}\".");
}
if (IsImm8(src2.Value, src2.Type) && info.OpRMImm8 != BadOp)
{
WriteOpCode(dest, default, src1, type, info.Flags, info.OpRMImm8, rrm: true);
WriteByte(src2.AsByte());
}
else if (IsImm32(src2.Value, src2.Type) && info.OpRMImm32 != BadOp)
{
WriteOpCode(dest, default, src1, type, info.Flags, info.OpRMImm32, rrm: true);
WriteInt32(src2.AsInt32());
}
else
{
throw new ArgumentException($"Failed to encode constant 0x{src2.Value:X}.");
}
}
public void Insertps(Operand dest, Operand src1, Operand src2, byte imm)
{
WriteInstruction(dest, src1, src2, X86Instruction.Insertps);
WriteByte(imm);
}
public void Jcc(X86Condition condition, Operand dest)
{
if (dest.Kind == OperandKind.Label)
{
_jumps.Add(new Jump(condition, dest, _stream.Position));
// ReservedBytesForJump
WriteByte(0);
}
else
{
throw new ArgumentException("Destination operand must be of kind Label", nameof(dest));
}
}
public void Jcc(X86Condition condition, long offset)
{
if (ConstFitsOnS8(offset))
{
WriteByte((byte)(0x70 | (int)condition));
WriteByte((byte)offset);
}
else if (ConstFitsOnS32(offset))
{
WriteByte(0x0f);
WriteByte((byte)(0x80 | (int)condition));
WriteInt32((int)offset);
}
else
{
throw new ArgumentOutOfRangeException(nameof(offset));
}
}
public void Jmp(long offset)
{
if (ConstFitsOnS8(offset))
{
WriteByte(0xeb);
WriteByte((byte)offset);
}
else if (ConstFitsOnS32(offset))
{
WriteByte(0xe9);
WriteInt32((int)offset);
}
else
{
throw new ArgumentOutOfRangeException(nameof(offset));
}
}
public void Jmp(Operand dest)
{
if (dest.Kind == OperandKind.Label)
{
_jumps.Add(new Jump(dest, _stream.Position));
// ReservedBytesForJump
WriteByte(0);
}
else
{
WriteInstruction(dest, default, OperandType.None, X86Instruction.Jmp);
}
}
public void Ldmxcsr(Operand dest)
{
WriteInstruction(dest, default, OperandType.I32, X86Instruction.Ldmxcsr);
}
public void Lea(Operand dest, Operand source, OperandType type)
{
WriteInstruction(dest, source, type, X86Instruction.Lea);
}
public void LockOr(Operand dest, Operand source, OperandType type)
{
WriteByte(LockPrefix);
WriteInstruction(dest, source, type, X86Instruction.Or);
}
public void Mov(Operand dest, Operand source, OperandType type)
{
WriteInstruction(dest, source, type, X86Instruction.Mov);
}
public void Mov16(Operand dest, Operand source)
{
WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov16);
}
public void Mov8(Operand dest, Operand source)
{
WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov8);
}
public void Movd(Operand dest, Operand source)
{
ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Movd];
if (source.Type.IsInteger() || source.Kind == OperandKind.Memory)
{
WriteOpCode(dest, default, source, OperandType.None, info.Flags, info.OpRRM, rrm: true);
}
else
{
WriteOpCode(dest, default, source, OperandType.None, info.Flags, info.OpRMR);
}
}
public void Movdqu(Operand dest, Operand source)
{
WriteInstruction(dest, default, source, X86Instruction.Movdqu);
}
public void Movhlps(Operand dest, Operand src1, Operand src2)
{
WriteInstruction(dest, src1, src2, X86Instruction.Movhlps);
}
public void Movlhps(Operand dest, Operand src1, Operand src2)
{
WriteInstruction(dest, src1, src2, X86Instruction.Movlhps);
}
public void Movq(Operand dest, Operand source)
{
ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Movd];
InstructionFlags flags = info.Flags | InstructionFlags.RexW;
if (source.Type.IsInteger() || source.Kind == OperandKind.Memory)
{
WriteOpCode(dest, default, source, OperandType.None, flags, info.OpRRM, rrm: true);
}
else if (dest.Type.IsInteger() || dest.Kind == OperandKind.Memory)
{
WriteOpCode(dest, default, source, OperandType.None, flags, info.OpRMR);
}
else
{
WriteInstruction(dest, source, OperandType.None, X86Instruction.Movq);
}
}
public void Movsd(Operand dest, Operand src1, Operand src2)
{
WriteInstruction(dest, src1, src2, X86Instruction.Movsd);
}
public void Movss(Operand dest, Operand src1, Operand src2)
{
WriteInstruction(dest, src1, src2, X86Instruction.Movss);
}
public void Movsx16(Operand dest, Operand source, OperandType type)
{
WriteInstruction(dest, source, type, X86Instruction.Movsx16);
}
public void Movsx32(Operand dest, Operand source, OperandType type)
{
WriteInstruction(dest, source, type, X86Instruction.Movsx32);
}
public void Movsx8(Operand dest, Operand source, OperandType type)
{
WriteInstruction(dest, source, type, X86Instruction.Movsx8);
}
public void Movzx16(Operand dest, Operand source, OperandType type)
{
WriteInstruction(dest, source, type, X86Instruction.Movzx16);
}
public void Movzx8(Operand dest, Operand source, OperandType type)
{
WriteInstruction(dest, source, type, X86Instruction.Movzx8);
}
public void Mul(Operand source)
{
WriteInstruction(default, source, source.Type, X86Instruction.Mul128);
}
public void Mulsd(Operand dest, Operand src1, Operand src2)
{
WriteInstruction(dest, src1, src2, X86Instruction.Mulsd);
}
public void Mulss(Operand dest, Operand src1, Operand src2)
{
WriteInstruction(dest, src1, src2, X86Instruction.Mulss);
}
public void Neg(Operand dest)
{
WriteInstruction(dest, default, dest.Type, X86Instruction.Neg);
}
public void Not(Operand dest)
{
WriteInstruction(dest, default, dest.Type, X86Instruction.Not);
}
public void Or(Operand dest, Operand source, OperandType type)
{
WriteInstruction(dest, source, type, X86Instruction.Or);
}
public void Pclmulqdq(Operand dest, Operand source, byte imm)
{
WriteInstruction(dest, default, source, X86Instruction.Pclmulqdq);
WriteByte(imm);
}
public void Pcmpeqw(Operand dest, Operand src1, Operand src2)
{
WriteInstruction(dest, src1, src2, X86Instruction.Pcmpeqw);
}
public void Pextrb(Operand dest, Operand source, byte imm)
{
WriteInstruction(dest, default, source, X86Instruction.Pextrb);
WriteByte(imm);
}
public void Pextrd(Operand dest, Operand source, byte imm)
{
WriteInstruction(dest, default, source, X86Instruction.Pextrd);
WriteByte(imm);
}
public void Pextrq(Operand dest, Operand source, byte imm)
{
WriteInstruction(dest, default, source, X86Instruction.Pextrq);
WriteByte(imm);
}
public void Pextrw(Operand dest, Operand source, byte imm)
{
WriteInstruction(dest, default, source, X86Instruction.Pextrw);
WriteByte(imm);
}
public void Pinsrb(Operand dest, Operand src1, Operand src2, byte imm)
{
WriteInstruction(dest, src1, src2, X86Instruction.Pinsrb);
WriteByte(imm);
}
public void Pinsrd(Operand dest, Operand src1, Operand src2, byte imm)
{
WriteInstruction(dest, src1, src2, X86Instruction.Pinsrd);
WriteByte(imm);
}
public void Pinsrq(Operand dest, Operand src1, Operand src2, byte imm)
{
WriteInstruction(dest, src1, src2, X86Instruction.Pinsrq);
WriteByte(imm);
}
public void Pinsrw(Operand dest, Operand src1, Operand src2, byte imm)
{
WriteInstruction(dest, src1, src2, X86Instruction.Pinsrw);
WriteByte(imm);
}
public void Pop(Operand dest)
{
if (dest.Kind == OperandKind.Register)
{
WriteCompactInst(dest, 0x58);
}
else
{
WriteInstruction(dest, default, dest.Type, X86Instruction.Pop);
}
}
public void Popcnt(Operand dest, Operand source, OperandType type)
{
WriteInstruction(dest, source, type, X86Instruction.Popcnt);
}
public void Pshufd(Operand dest, Operand source, byte imm)
{
WriteInstruction(dest, default, source, X86Instruction.Pshufd);
WriteByte(imm);
}
public void Push(Operand source)
{
if (source.Kind == OperandKind.Register)
{
WriteCompactInst(source, 0x50);
}
else
{
WriteInstruction(default, source, source.Type, X86Instruction.Push);
}
}
public void Return()
{
WriteByte(0xc3);
}
public void Ror(Operand dest, Operand source, OperandType type)
{
WriteShiftInst(dest, source, type, X86Instruction.Ror);
}
public void Sar(Operand dest, Operand source, OperandType type)
{
WriteShiftInst(dest, source, type, X86Instruction.Sar);
}
public void Shl(Operand dest, Operand source, OperandType type)
{
WriteShiftInst(dest, source, type, X86Instruction.Shl);
}
public void Shr(Operand dest, Operand source, OperandType type)
{
WriteShiftInst(dest, source, type, X86Instruction.Shr);
}
public void Setcc(Operand dest, X86Condition condition)
{
ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Setcc];
WriteOpCode(dest, default, default, OperandType.None, info.Flags, info.OpRRM | (int)condition);
}
public void Stmxcsr(Operand dest)
{
WriteInstruction(dest, default, OperandType.I32, X86Instruction.Stmxcsr);
}
public void Sub(Operand dest, Operand source, OperandType type)
{
WriteInstruction(dest, source, type, X86Instruction.Sub);
}
public void Subsd(Operand dest, Operand src1, Operand src2)
{
WriteInstruction(dest, src1, src2, X86Instruction.Subsd);
}
public void Subss(Operand dest, Operand src1, Operand src2)
{
WriteInstruction(dest, src1, src2, X86Instruction.Subss);
}
public void Test(Operand src1, Operand src2, OperandType type)
{
WriteInstruction(src1, src2, type, X86Instruction.Test);
}
public void Xor(Operand dest, Operand source, OperandType type)
{
WriteInstruction(dest, source, type, X86Instruction.Xor);
}
public void Xorps(Operand dest, Operand src1, Operand src2)
{
WriteInstruction(dest, src1, src2, X86Instruction.Xorps);
}
public void WriteInstruction(
X86Instruction inst,
Operand dest,
Operand source,
OperandType type = OperandType.None)
{
WriteInstruction(dest, default, source, inst, type);
}
public void WriteInstruction(X86Instruction inst, Operand dest, Operand src1, Operand src2)
{
if (src2.Kind == OperandKind.Constant)
{
WriteInstruction(src1, dest, src2, inst);
}
else
{
WriteInstruction(dest, src1, src2, inst);
}
}
public void WriteInstruction(
X86Instruction inst,
Operand dest,
Operand src1,
Operand src2,
OperandType type)
{
WriteInstruction(dest, src1, src2, inst, type);
}
public void WriteInstruction(X86Instruction inst, Operand dest, Operand source, byte imm)
{
WriteInstruction(dest, default, source, inst);
WriteByte(imm);
}
public void WriteInstruction(
X86Instruction inst,
Operand dest,
Operand src1,
Operand src2,
Operand src3)
{
// 3+ operands can only be encoded with the VEX encoding scheme.
Debug.Assert(HardwareCapabilities.SupportsVexEncoding);
WriteInstruction(dest, src1, src2, inst);
WriteByte((byte)(src3.AsByte() << 4));
}
public void WriteInstruction(
X86Instruction inst,
Operand dest,
Operand src1,
Operand src2,
byte imm)
{
WriteInstruction(dest, src1, src2, inst);
WriteByte(imm);
}
private void WriteShiftInst(Operand dest, Operand source, OperandType type, X86Instruction inst)
{
if (source.Kind == OperandKind.Register)
{
X86Register shiftReg = (X86Register)source.GetRegister().Index;
Debug.Assert(shiftReg == X86Register.Rcx, $"Invalid shift register \"{shiftReg}\".");
source = default;
}
else if (source.Kind == OperandKind.Constant)
{
source = Operand.Factory.Const((int)source.Value & (dest.Type == OperandType.I32 ? 0x1f : 0x3f));
}
WriteInstruction(dest, source, type, inst);
}
private void WriteInstruction(Operand dest, Operand source, OperandType type, X86Instruction inst)
{
ref readonly InstructionInfo info = ref _instTable[(int)inst];
if (source != default)
{
if (source.Kind == OperandKind.Constant)
{
ulong imm = source.Value;
if (inst == X86Instruction.Mov8)
{
WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm8);
WriteByte((byte)imm);
}
else if (inst == X86Instruction.Mov16)
{
WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm32);
WriteInt16((short)imm);
}
else if (IsImm8(imm, type) && info.OpRMImm8 != BadOp)
{
WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm8);
WriteByte((byte)imm);
}
else if (!source.Relocatable && IsImm32(imm, type) && info.OpRMImm32 != BadOp)
{
WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm32);
WriteInt32((int)imm);
}
else if (dest != default && dest.Kind == OperandKind.Register && info.OpRImm64 != BadOp)
{
int rexPrefix = GetRexPrefix(dest, source, type, rrm: false);
if (rexPrefix != 0)
{
WriteByte((byte)rexPrefix);
}
WriteByte((byte)(info.OpRImm64 + (dest.GetRegister().Index & 0b111)));
if (HasRelocs && source.Relocatable)
{
_relocs.Add(new Reloc
{
JumpIndex = _jumps.Count - 1,
Position = (int)_stream.Position,
Symbol = source.Symbol
});
}
WriteUInt64(imm);
}
else
{
throw new ArgumentException($"Failed to encode constant 0x{imm:X}.");
}
}
else if (source.Kind == OperandKind.Register && info.OpRMR != BadOp)
{
WriteOpCode(dest, default, source, type, info.Flags, info.OpRMR);
}
else if (info.OpRRM != BadOp)
{
WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM, rrm: true);
}
else
{
throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\".");
}
}
else if (info.OpRRM != BadOp)
{
WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM, rrm: true);
}
else if (info.OpRMR != BadOp)
{
WriteOpCode(dest, default, source, type, info.Flags, info.OpRMR);
}
else
{
throw new ArgumentNullException(nameof(source));
}
}
private void WriteInstruction(
Operand dest,
Operand src1,
Operand src2,
X86Instruction inst,
OperandType type = OperandType.None)
{
ref readonly InstructionInfo info = ref _instTable[(int)inst];
if (src2 != default)
{
if (src2.Kind == OperandKind.Constant)
{
ulong imm = src2.Value;
if ((byte)imm == imm && info.OpRMImm8 != BadOp)
{
WriteOpCode(dest, src1, default, type, info.Flags, info.OpRMImm8);
WriteByte((byte)imm);
}
else
{
throw new ArgumentException($"Failed to encode constant 0x{imm:X}.");
}
}
else if (src2.Kind == OperandKind.Register && info.OpRMR != BadOp)
{
WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR);
}
else if (info.OpRRM != BadOp)
{
WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true);
}
else
{
throw new ArgumentException($"Invalid source operand kind \"{src2.Kind}\".");
}
}
else if (info.OpRRM != BadOp)
{
WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true);
}
else if (info.OpRMR != BadOp)
{
WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR);
}
else
{
throw new ArgumentNullException(nameof(src2));
}
}
private void WriteOpCode(
Operand dest,
Operand src1,
Operand src2,
OperandType type,
InstructionFlags flags,
int opCode,
bool rrm = false)
{
int rexPrefix = GetRexPrefix(dest, src2, type, rrm);
if ((flags & InstructionFlags.RexW) != 0)
{
rexPrefix |= RexWPrefix;
}
int modRM = (opCode >> OpModRMBits) << 3;
MemoryOperand memOp = default;
bool hasMemOp = false;
if (dest != default)
{
if (dest.Kind == OperandKind.Register)
{
int regIndex = dest.GetRegister().Index;
modRM |= (regIndex & 0b111) << (rrm ? 3 : 0);
if ((flags & InstructionFlags.Reg8Dest) != 0 && regIndex >= 4)
{
rexPrefix |= RexPrefix;
}
}
else if (dest.Kind == OperandKind.Memory)
{
memOp = dest.GetMemory();
hasMemOp = true;
}
else
{
throw new ArgumentException("Invalid destination operand kind \"" + dest.Kind + "\".");
}
}
if (src2 != default)
{
if (src2.Kind == OperandKind.Register)
{
int regIndex = src2.GetRegister().Index;
modRM |= (regIndex & 0b111) << (rrm ? 0 : 3);
if ((flags & InstructionFlags.Reg8Src) != 0 && regIndex >= 4)
{
rexPrefix |= RexPrefix;
}
}
else if (src2.Kind == OperandKind.Memory && !hasMemOp)
{
memOp = src2.GetMemory();
hasMemOp = true;
}
else
{
throw new ArgumentException("Invalid source operand kind \"" + src2.Kind + "\".");
}
}
bool needsSibByte = false;
bool needsDisplacement = false;
int sib = 0;
if (hasMemOp)
{
// Either source or destination is a memory operand.
Register baseReg = memOp.BaseAddress.GetRegister();
X86Register baseRegLow = (X86Register)(baseReg.Index & 0b111);
needsSibByte = memOp.Index != default || baseRegLow == X86Register.Rsp;
needsDisplacement = memOp.Displacement != 0 || baseRegLow == X86Register.Rbp;
if (needsDisplacement)
{
if (ConstFitsOnS8(memOp.Displacement))
{
modRM |= 0x40;
}
else /* if (ConstFitsOnS32(memOp.Displacement)) */
{
modRM |= 0x80;
}
}
if (baseReg.Index >= 8)
{
Debug.Assert((uint)baseReg.Index <= MaxRegNumber);
rexPrefix |= RexPrefix | (baseReg.Index >> 3);
}
if (needsSibByte)
{
sib = (int)baseRegLow;
if (memOp.Index != default)
{
int indexReg = memOp.Index.GetRegister().Index;
Debug.Assert(indexReg != (int)X86Register.Rsp, "Using RSP as index register on the memory operand is not allowed.");
if (indexReg >= 8)
{
Debug.Assert((uint)indexReg <= MaxRegNumber);
rexPrefix |= RexPrefix | (indexReg >> 3) << 1;
}
sib |= (indexReg & 0b111) << 3;
}
else
{
sib |= 0b100 << 3;
}
sib |= (int)memOp.Scale << 6;
modRM |= 0b100;
}
else
{
modRM |= (int)baseRegLow;
}
}
else
{
// Source and destination are registers.
modRM |= 0xc0;
}
Debug.Assert(opCode != BadOp, "Invalid opcode value.");
if ((flags & InstructionFlags.Evex) != 0 && HardwareCapabilities.SupportsEvexEncoding)
{
WriteEvexInst(dest, src1, src2, type, flags, opCode);
opCode &= 0xff;
}
else if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
{
// In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits.
int vexByte2 = (flags & InstructionFlags.PrefixMask) switch
{
InstructionFlags.Prefix66 => 1,
InstructionFlags.PrefixF3 => 2,
InstructionFlags.PrefixF2 => 3,
_ => 0
};
if (src1 != default)
{
vexByte2 |= (src1.GetRegister().Index ^ 0xf) << 3;
}
else
{
vexByte2 |= 0b1111 << 3;
}
ushort opCodeHigh = (ushort)(opCode >> 8);
if ((rexPrefix & 0b1011) == 0 && opCodeHigh == 0xf)
{
// Two-byte form.
WriteByte(0xc5);
vexByte2 |= (~rexPrefix & 4) << 5;
WriteByte((byte)vexByte2);
}
else
{
// Three-byte form.
WriteByte(0xc4);
int vexByte1 = (~rexPrefix & 7) << 5;
switch (opCodeHigh)
{
case 0xf: vexByte1 |= 1; break;
case 0xf38: vexByte1 |= 2; break;
case 0xf3a: vexByte1 |= 3; break;
default: Debug.Assert(false, $"Failed to VEX encode opcode 0x{opCode:X}."); break;
}
vexByte2 |= (rexPrefix & 8) << 4;
WriteByte((byte)vexByte1);
WriteByte((byte)vexByte2);
}
opCode &= 0xff;
}
else
{
if (flags.HasFlag(InstructionFlags.Prefix66))
{
WriteByte(0x66);
}
if (flags.HasFlag(InstructionFlags.PrefixF2))
{
WriteByte(0xf2);
}
if (flags.HasFlag(InstructionFlags.PrefixF3))
{
WriteByte(0xf3);
}
if (rexPrefix != 0)
{
WriteByte((byte)rexPrefix);
}
}
if (dest != default && (flags & InstructionFlags.RegOnly) != 0)
{
opCode += dest.GetRegister().Index & 7;
}
if ((opCode & 0xff0000) != 0)
{
WriteByte((byte)(opCode >> 16));
}
if ((opCode & 0xff00) != 0)
{
WriteByte((byte)(opCode >> 8));
}
WriteByte((byte)opCode);
if ((flags & InstructionFlags.RegOnly) == 0)
{
WriteByte((byte)modRM);
if (needsSibByte)
{
WriteByte((byte)sib);
}
if (needsDisplacement)
{
if (ConstFitsOnS8(memOp.Displacement))
{
WriteByte((byte)memOp.Displacement);
}
else /* if (ConstFitsOnS32(memOp.Displacement)) */
{
WriteInt32(memOp.Displacement);
}
}
}
}
private void WriteEvexInst(
Operand dest,
Operand src1,
Operand src2,
OperandType type,
InstructionFlags flags,
int opCode,
bool broadcast = false,
int registerWidth = 128,
int maskRegisterIdx = 0,
bool zeroElements = false)
{
int op1Idx = dest.GetRegister().Index;
int op2Idx = src1.GetRegister().Index;
int op3Idx = src2.GetRegister().Index;
WriteByte(0x62);
// P0
// Extend operand 1 register
bool r = (op1Idx & 8) == 0;
// Extend operand 3 register
bool x = (op3Idx & 16) == 0;
// Extend operand 3 register
bool b = (op3Idx & 8) == 0;
// Extend operand 1 register
bool rp = (op1Idx & 16) == 0;
// Escape code index
byte mm = 0b00;
switch ((ushort)(opCode >> 8))
{
case 0xf00: mm = 0b01; break;
case 0xf38: mm = 0b10; break;
case 0xf3a: mm = 0b11; break;
default: Debug.Fail($"Failed to EVEX encode opcode 0x{opCode:X}."); break;
}
WriteByte(
(byte)(
(r ? 0x80 : 0) |
(x ? 0x40 : 0) |
(b ? 0x20 : 0) |
(rp ? 0x10 : 0) |
mm));
// P1
// Specify 64-bit lane mode
bool w = Is64Bits(type);
// Operand 2 register index
byte vvvv = (byte)(~op2Idx & 0b1111);
// Opcode prefix
byte pp = (flags & InstructionFlags.PrefixMask) switch
{
InstructionFlags.Prefix66 => 0b01,
InstructionFlags.PrefixF3 => 0b10,
InstructionFlags.PrefixF2 => 0b11,
_ => 0
};
WriteByte(
(byte)(
(w ? 0x80 : 0) |
(vvvv << 3) |
0b100 |
pp));
// P2
// Mask register determines what elements to zero, rather than what elements to merge
bool z = zeroElements;
// Specifies register-width
byte ll = 0b00;
switch (registerWidth)
{
case 128: ll = 0b00; break;
case 256: ll = 0b01; break;
case 512: ll = 0b10; break;
default: Debug.Fail($"Invalid EVEX vector register width {registerWidth}."); break;
}
// Embedded broadcast in the case of a memory operand
bool bcast = broadcast;
// Extend operand 2 register
bool vp = (op2Idx & 16) == 0;
// Mask register index
Debug.Assert(maskRegisterIdx < 8, $"Invalid mask register index {maskRegisterIdx}.");
byte aaa = (byte)(maskRegisterIdx & 0b111);
WriteByte(
(byte)(
(z ? 0x80 : 0) |
(ll << 5) |
(bcast ? 0x10 : 0) |
(vp ? 8 : 0) |
aaa));
}
private void WriteCompactInst(Operand operand, int opCode)
{
int regIndex = operand.GetRegister().Index;
if (regIndex >= 8)
{
WriteByte(0x41);
}
WriteByte((byte)(opCode + (regIndex & 0b111)));
}
private static int GetRexPrefix(Operand dest, Operand source, OperandType type, bool rrm)
{
int rexPrefix = 0;
if (Is64Bits(type))
{
rexPrefix = RexWPrefix;
}
void SetRegisterHighBit(Register reg, int bit)
{
if (reg.Index >= 8)
{
rexPrefix |= RexPrefix | (reg.Index >> 3) << bit;
}
}
if (dest != default && dest.Kind == OperandKind.Register)
{
SetRegisterHighBit(dest.GetRegister(), rrm ? 2 : 0);
}
if (source != default && source.Kind == OperandKind.Register)
{
SetRegisterHighBit(source.GetRegister(), rrm ? 0 : 2);
}
return rexPrefix;
}
public (byte[], RelocInfo) GetCode()
{
var jumps = CollectionsMarshal.AsSpan(_jumps);
var relocs = CollectionsMarshal.AsSpan(_relocs);
// Write jump relative offsets.
bool modified;
do
{
modified = false;
for (int i = 0; i < jumps.Length; i++)
{
ref Jump jump = ref jumps[i];
// If jump target not resolved yet, resolve it.
if (jump.JumpTarget == null)
{
jump.JumpTarget = _labels[jump.JumpLabel];
}
long jumpTarget = jump.JumpTarget.Value;
long offset = jumpTarget - jump.JumpPosition;
if (offset < 0)
{
for (int j = i - 1; j >= 0; j--)
{
ref Jump jump2 = ref jumps[j];
if (jump2.JumpPosition < jumpTarget)
{
break;
}
offset -= jump2.InstSize - ReservedBytesForJump;
}
}
else
{
for (int j = i + 1; j < jumps.Length; j++)
{
ref Jump jump2 = ref jumps[j];
if (jump2.JumpPosition >= jumpTarget)
{
break;
}
offset += jump2.InstSize - ReservedBytesForJump;
}
offset -= ReservedBytesForJump;
}
if (jump.IsConditional)
{
jump.InstSize = GetJccLength(offset);
}
else
{
jump.InstSize = GetJmpLength(offset);
}
// The jump is relative to the next instruction, not the current one.
// Since we didn't know the next instruction address when calculating
// the offset (as the size of the current jump instruction was not known),
// we now need to compensate the offset with the jump instruction size.
// It's also worth noting that:
// - This is only needed for backward jumps.
// - The GetJmpLength and GetJccLength also compensates the offset
// internally when computing the jump instruction size.
if (offset < 0)
{
offset -= jump.InstSize;
}
if (jump.Offset != offset)
{
jump.Offset = offset;
modified = true;
}
}
}
while (modified);
// Write the code, ignoring the dummy bytes after jumps, into a new stream.
_stream.Seek(0, SeekOrigin.Begin);
using var codeStream = MemoryStreamManager.Shared.GetStream();
var assembler = new Assembler(codeStream, HasRelocs);
bool hasRelocs = HasRelocs;
int relocIndex = 0;
int relocOffset = 0;
var relocEntries = hasRelocs
? new RelocEntry[relocs.Length]
: Array.Empty<RelocEntry>();
for (int i = 0; i < jumps.Length; i++)
{
ref Jump jump = ref jumps[i];
// If has relocations, calculate their new positions compensating for jumps.
if (hasRelocs)
{
relocOffset += jump.InstSize - ReservedBytesForJump;
for (; relocIndex < relocEntries.Length; relocIndex++)
{
ref Reloc reloc = ref relocs[relocIndex];
if (reloc.JumpIndex > i)
{
break;
}
relocEntries[relocIndex] = new RelocEntry(reloc.Position + relocOffset, reloc.Symbol);
}
}
Span<byte> buffer = new byte[jump.JumpPosition - _stream.Position];
_stream.Read(buffer);
_stream.Seek(ReservedBytesForJump, SeekOrigin.Current);
codeStream.Write(buffer);
if (jump.IsConditional)
{
assembler.Jcc(jump.Condition, jump.Offset);
}
else
{
assembler.Jmp(jump.Offset);
}
}
// Write remaining relocations. This case happens when there are no jumps assembled.
for (; relocIndex < relocEntries.Length; relocIndex++)
{
ref Reloc reloc = ref relocs[relocIndex];
relocEntries[relocIndex] = new RelocEntry(reloc.Position + relocOffset, reloc.Symbol);
}
_stream.CopyTo(codeStream);
var code = codeStream.ToArray();
var relocInfo = new RelocInfo(relocEntries);
return (code, relocInfo);
}
private static bool Is64Bits(OperandType type)
{
return type == OperandType.I64 || type == OperandType.FP64;
}
private static bool IsImm8(ulong immediate, OperandType type)
{
long value = type == OperandType.I32 ? (int)immediate : (long)immediate;
return ConstFitsOnS8(value);
}
private static bool IsImm32(ulong immediate, OperandType type)
{
long value = type == OperandType.I32 ? (int)immediate : (long)immediate;
return ConstFitsOnS32(value);
}
private static int GetJccLength(long offset)
{
if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset))
{
return 2;
}
else if (ConstFitsOnS32(offset < 0 ? offset - 6 : offset))
{
return 6;
}
else
{
throw new ArgumentOutOfRangeException(nameof(offset));
}
}
private static int GetJmpLength(long offset)
{
if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset))
{
return 2;
}
else if (ConstFitsOnS32(offset < 0 ? offset - 5 : offset))
{
return 5;
}
else
{
throw new ArgumentOutOfRangeException(nameof(offset));
}
}
private static bool ConstFitsOnS8(long value)
{
return value == (sbyte)value;
}
private static bool ConstFitsOnS32(long value)
{
return value == (int)value;
}
private void WriteInt16(short value)
{
WriteUInt16((ushort)value);
}
private void WriteInt32(int value)
{
WriteUInt32((uint)value);
}
private void WriteByte(byte value)
{
_stream.WriteByte(value);
}
private void WriteUInt16(ushort value)
{
_stream.WriteByte((byte)(value >> 0));
_stream.WriteByte((byte)(value >> 8));
}
private void WriteUInt32(uint value)
{
_stream.WriteByte((byte)(value >> 0));
_stream.WriteByte((byte)(value >> 8));
_stream.WriteByte((byte)(value >> 16));
_stream.WriteByte((byte)(value >> 24));
}
private void WriteUInt64(ulong value)
{
_stream.WriteByte((byte)(value >> 0));
_stream.WriteByte((byte)(value >> 8));
_stream.WriteByte((byte)(value >> 16));
_stream.WriteByte((byte)(value >> 24));
_stream.WriteByte((byte)(value >> 32));
_stream.WriteByte((byte)(value >> 40));
_stream.WriteByte((byte)(value >> 48));
_stream.WriteByte((byte)(value >> 56));
}
}
}