forums.ps2dev.org :: View topic - [ALLEGREX / CPU +FPU +VFPU] isa

forums.ps2dev.org
Homebrew PS2, PSP & PS3 Development Discussions

FAQ

Memberlist

Usergroups

Profile

[ALLEGREX / CPU +FPU +VFPU] isa

forums.ps2dev.org Forum Index -> PSP Development

View previous topic :: View next topic

Author

Message

hlide

Joined: 10 Sep 2006
Posts: 750

Posted: Tue Jun 10, 2008 8:09 pm Post subject: [ALLEGREX / CPU +FPU +VFPU] isa

ALLEGREX / CPU Instructions set

status : mostly done

Code:

field
{
rs:5; rt:5; rd:5;

shamt:5;

imm3:3;imm:16; imm26:26;

code:20; // syscall/break code

lsb:5; msb:5; // ins/ext bit positions

func:5; // cache function, specific to Allegrex

c0dr:5; c0cr:5;
}

group mips
{
// SPECIAL

nop(00000000000000000000000000000000)
{
cycles="1"
operation=
"
1: no operation
"
}

sll(00000000000:rt:rd:shamt:000000)
{
cycles="1"
operation=
"
1: GPR[rd] = u32(GPR[rt]) << shamt
"
}

srl(00000000000:rt:rd:shamt:000010)
{
cycles="1"
operation=
"
1: GPR[rd] = u32(GPR[rt]) >> shamt
"
}

sra(00000000000:rt:rd:shamt:000011)
{
cycles="1"
operation=
"
1: GPR[rd] = s32(GPR[rt]) >> shamt
"
}

sllv(000000:rs:rt:rd:00000000100)
{
cycles="1"
operation=
"
1: GPR[rd] = u32(GPR[rt]) << u32(GPR[rs]&31)
"
}

srlv(000000:rs:rt:rd:00000000110)
{
cycles="1"
operation=
"
1: GPR[rd] = u32(GPR[rt]) >> u32(GPR[rs]&31)
"
}

srav(000000:rs:rt:rd:00000000111)
{
cycles="1"
operation=
"
1: GPR[rd] = s32(GPR[rt]) >> u32(GPR[rs]&31)
"
}

jr(000000:rs:000000000000000001000)
{
cycles="2"
operation=
"
1: target = GPR[rs]
execute instruction at PC+4
2: PC = target
"
}

jalr(000000:rs:000000:rd:000000001001)
{
cycles="2"
operation=
"
1: GPR[rd] = PC+8
target = GPR[rs]
execute instruction at PC+4
2: PC = target
"
}

mfhi(0000000000000000:rd:00000010000)
{
cycles="?"
operation=
"
1: GPR[rd] = HI
"
}

mthi(000000:rs:000000000000000010001)
{
cycles="?"
operation=
"
1: HI = GPR[rs]
"
}

mflo(0000000000000000:rd:00000010010)
{
cycles="?"
operation=
"
1: GPR[rd] = LO
"
}

mtlo(000000:rs:000000000000000010011)
{
cycles="?"
operation=
"
1: LO = GPR[rs]
"
}

mult(000000:rs:rt:0000000000011000)
{
cycles="5"
operation=
"
1: result:64 = s64(GPR[rs]) * s64(GPR[rs])
LO = result[31..0]
HI = result[63..32]
"
}

multu(000000:rs:rt:0000000000011001)
{
cycles="5"
operation=
"
1: result:64 = u64(GPR[rs]) * u64(GPR[rs])
LO = result[31..0]
HI = result[63..32]
"
}

div(000000:rs:rt:0000000000011010)
{
cycles="36"
operation=
"
1: LO = s32(GPR[rs]) / s32(GPR[rs])
HI = s32(GPR[rs]) % s32(GPR[rs])
"
}

divu(000000:rs:rt:0000000000011011)
{
cycles="36"
operation=
"
1: LO = u32(GPR[rs]) / u32(GPR[rs])
HI = u32(GPR[rs]) % u32(GPR[rs])
"
}

add(000000:rs:rt:rd:00000100000)
{
cycles="1"
operation=
"
1: result:33 = ((GPR[rs][31]) << 32) | GPR[rs]) + ((GPR[rt][31]) << 32) | GPR[rt])
if (result[32] == result[31])
GPR[rd] = result[31..0]
else
raise integer overflow exception
"
}

addu(000000:rs:rt:rd:00000100001)
{
cycles="1"
operation=
"
1: GPR[rd] = GPR[rs] + GPR[rt]
"
}

sub(000000:rs:rt:rd:00000100010)
{
cycles="1"
operation=
"
1: result:33 = ((GPR[rs][31]) << 32) | GPR[rs]) - ((GPR[rt][31]) << 32) | GPR[rt])
if (result[32] == result[31])
GPR[rd] = result[31..0]
else
raise integer overflow exception
"
}

subu(000000:rs:rt:rd:00000100011)
{
cycles="1"
operation=
"
1: GPR[rd] = GPR[rs] - GPR[rt]
"
}

and(000000:rs:rt:rd:00000100100)
{
cycles="1"
operation=
"
1: GPR[rd] = GPR[rs] & GPR[rt]
"
}

or(000000:rs:rt:rd:00000100101)
{
cycles="1"
operation=
"
1: GPR[rd] = GPR[rs] | GPR[rt]
"
}

xor(000000:rs:rt:rd:00000100110)
{
cycles="1"
operation=
"
1: GPR[rd] = GPR[rs] ^ GPR[rt]
"
}

nor(000000:rs:rt:rd:00000100111)
{
cycles="1"
operation=
"
1: GPR[rd] = ~(GPR[rs] | GPR[rt])
"
}

slt(000000:rs:rt:rd:00000101010)
{
cycles="1"
operation=
"
1: GPR[rd] = s32(GPR[rs]) < s32(GPR[rt])
"
}

sltu(000000:rs:rt:rd:00000101011)
{
cycles="1"
operation=
"
1: GPR[rd] = u32(GPR[rs]) + u32(GPR[rt])
"
}

// REGIMM

bltz(000001:rs:00000:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) < 0)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

bgez(000001:rs:00001:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) >= 0)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

bltzl(000001:rs:00010:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) < 0)
if (ct)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

bgezl(000001:rs:00011:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) >= 0)
if (ct)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

bltzal(000001:rs:10000:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) < 0)
execute instruction at PC+4
if (ct)
GPR(31) = PC+8
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

bgezal(000001:rs:10001:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) >= 0)
execute instruction at PC+4
if (ct)
GPR(31) = PC+8
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

bltzall(000001:rs:10010:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) < 0)
if (ct)
execute instruction at PC+4
if (ct)
GPR(31) = PC+8
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

bgezall(000001:rs:10011:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) >= 0)
if (ct)
execute instruction at PC+4
if (ct)
GPR(31) = PC+8
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

// OPCODE #1

j(000010:imm26)
{
cycles="2"
operation=
"
1: execute instruction at PC+4
2: PC = PC[31..28] | (u32(imm26) << 2)
"
delayslot="1"
}

jal(000011:imm26)
{
cycles="2"
operation=
"
1: GPR(31) = PC+8
execute instruction at PC+4
2: PC = PC[31..28] | (u32(imm26) << 2)
"
delayslot="1"
}

beq(000100:rs:rt:imm16)
{
cycles="3"
operation=
"
1: ct = (GPR[rs] == GPR[rt])
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

bne(000101:rs:rt:imm16)
{
cycles="3"
operation=
"
1: ct = (GPR[rs] <> GPR[rt])
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

blez(000110:rs:00000:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) <= 0)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

bgtz(000111:rs:00000:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) > 0)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

addi(001000:rs:rt:imm16)
{
cycles="1"
operation=
"
1: result:33 = ((GPR[rs][31]) << 32) | GPR[rs]) + s32(imm16)
if (result[32] == result[31])
GPR[rt] = result[31..0]
else
raise integer overflow exception
"
}

addiu(001001:rs:rt:imm16)
{
cycles="1"
operation=
"
1: GPR[rt] = GPR[rs] + s32(imm16)
"
}

slti(001010:rs:rt:imm16)
{
cycles="1"
operation=
"
1: GPR[rt] = s32(GPR[rs]) < s32(imm16)
"
}

sltiu(001011:rs:rt:imm16)
{
cycles="1"
operation=
"
1: GPR[rt] = u32(GPR[rs]) < u32(s32(imm16))
"
}

andi(001100:rs:rt:imm16)
{
cycles="1"
operation=
"
1: GPR[rt] = s32(GPR[rs]) & u32(imm16)
"
}

ori(001101:rs:rt:imm16)
{
cycles="1"
operation=
"
1: GPR[rt] = s32(GPR[rs]) | u32(imm16)
"
}

xori(001110:rs:rt:imm16)
{
cycles="1"
operation=
"
1: GPR[rt] = s32(GPR[rs]) ^ u32(imm16)
"
}

lui(00111100000:rt:imm16)
{
cycles="1"
operation=
"
1: GPR[rt] = s32(GPR[rs]) | (u32(imm16) << 16)
"
}

// COP0

mfc0(01000000000:rt:c0dr:00000000000)
{
cycles="?"
operation=
"
1: GPR[rt] = C0DR(c0dr)
"
}

cfc0(01000000010:rt:c0cr:00000000000)
{
cycles="?"
operation=
"
1: GPR[rt] = C0CR(c0cr)
"
}

mtc0(01000000100:rt:c0dr:00000000000)
{
cycles="?"
operation=
"
1: C0DR(c0dr) = GPR[rt]
"
}

ctc0(01000100110:rt:c0cr:00000000000)
{
cycles="?"
operation=
"
1: C0CR(c0dr) = GPR[rt]
"
}

eret(01000000000000000000000000011000)
{
cycles="?"
operation=
"
1: if (ERL == 1)
PC = ErrorEPC
else
PC = RPC
if (ERL == 0)
EXL = 0
LLBit = 0
"
}

// OPCODE #2

beql(010100:rs:rt:imm16)
{
cycles="3"
operation=
"
1: ct = (GPR[rs] == GPR[rt])
if (ct)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

bnel(010101:rs:rt:imm16)
{
cycles="3"
operation=
"
1: ct = (GPR[rs] <> GPR[rt])
if (ct)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

blezl(010110:rs:00000:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) <= 0)
if (ct)
execute instruction at PC+4
if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

bgtzl(010111:rs:00000:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) > 0)
if (ct)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

lb(100000:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
GPR[rt] = s32(MemoryRead8(address))
"
}

lh(100001:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
if (address & 1)
raise address error exception
else
GPR[rt] = s32(MemoryRead16(address))
"
}

lwl(100010:rs:rt:imm16)
{
cycles="?"
}

lw(100011:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
if (address & 3)
raise address error exception
else
GPR[rt] = MemoryRead32(address)
"
}

lbu(100100:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
GPR[rt] = u32(MemoryRead8(address))
"
}

lhu(100101:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
if (address & 1)
raise address error exception
else
GPR[rt] = u32(MemoryRead16(address))
"
}

lwr(100110:rs:rt:imm16)
{
cycles="?"
}

sb(101000:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
MemoryWrite8(address, GPR[rt][7..0])
"
}

sh(101001:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
if (address & 1)
raise address error exception
else
MemoryWrite16(address, GPR[rt][15..0])
"
}

swl(101010:rs:rt:imm16)
{
cycles="?"
}

sw(101011:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
if (address & 3)
raise address error exception
else
MemoryWrite32(address, GPR[rt])
"
}

swr(101110:rs:rt:imm16)
{
cycles="?"
}

ll(110000:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
if (address & 3)
raise address error exception
else
GPR[rt] = MemoryRead32(address)
LLBit = 1
"
}

sc(111000:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
if (address & 3)
raise address error exception
else if (LLBit == 1)
MemoryWrite32(address, GPR[rt])
GPR[rt] = u32(LLBit)
"
}
}

group allegrex
{
// SPECIAL

rotr(00000000001:rt:rd:shamt:000010)
{
cycles="1"
operation=
"
1: GPR[rd] = (u32(GPR[rt]) >> shamt) | (GPR[rt] << (32 - shamt))
"
}

rotrv(000000:rs:rt:rd:00001000110)
{
cycles="1"
operation=
"
1: s = GPR[rs] & 31
GPR[rd] = (u32(GPR[rt]) >> s) | (GPR[rt] << (32 - s))
"
}

movz(000000:rs:rt:rd:00000001010)
{
cycles="1"
operation=
"
1: if (GPR[rt] == 0)
GPR[rd] = GPR[rs]
"
}

movn(000000:rs:rt:rd:00000001011)
{
cycles="1"
operation=
"
1: if (GPR[rt] <> 0)
GPR[rd] = GPR[rs]
"
}

syscall(000000:code:001100)
{
cycles="?"
}

break(000000:code:001100)
{
cycles="?"
}

sync(00000000000000000000000000001111)
{
cycles="?"
}

clz(000000:rs:00000:rd:00000010110)
{
cycles="1"
operation=
"
1: count = 32
i = 31
loop
if (GPR[rs][i] == 1)
count = 31 - i
while (count == 32 and i-- <> 0)
GPR[rd] = count;
"
}

clo(000000:rs:00000:rd:00000010111)
{
cycles="1"
operation=
"
1: count = 32
i = 31
loop
if (GPR[rs][i] == 0)
count = 31 - i
while (count == 32 and i-- <> 0)
GPR[rd] = count;
"
}

madd(000000:rs:rt:0000000000011100)
{
cycles="5"
operation=
"
1: result:64 = u64(LO) + s64(HI<<32) + s64(GPR[rs]) * s64(GPR[rs])
LO = result[31..0]
HI = result[63..32]
"
}

maddu(000000:rs:rt:0000000000011101)
{
cycles="5"
operation=
"
1: result:64 = u64(LO) + u64(HI<<32) + u64(GPR[rs]) * u64(GPR[rs])
LO = result[31..0]
HI = result[63..32]
"
}

max(000000:rs:rt:rd:00000101100)
{
cycles="1"
operation=
"
1: GPR[rd] = (s32(GPR[rs]) < s2(GPR[rt])) ? GPR[rt] : GPR[rs];
"
}

min(000000:rs:rt:rd:00000101101)
{
cycles="1"
operation=
"
1: GPR[rd] = (s32(GPR[rs]) < s2(GPR[rt])) ? GPR[rs] : GPR[rt];
"
}

msub(000000:rs:rt:000000000101110)
{
cycles="5"
operation=
"
1: result:64 = u64(LO) + s64(HI<<32) - s64(GPR[rs]) * s64(GPR[rs])
LO = result[31..0]
HI = result[63..32]
"
}

msubu(000000:rs:rt:000000000101111)
{
cycles="5"
operation=
"
1: result:64 = u64(LO) + u64(HI<<32) - u64(GPR[rs]) * u64(GPR[rs])
LO = result[31..0]
HI = result[63..32]
"
}

// OPCODE #1

halt(01110000000000000000000000000000)
{
cycles="?"
}

// SPECIAL3

ext(011111:rs:rt:(msb-lsb):lsb:000000)
{
cycles="1"
operation=
"
1: GPR[rt] = GPR[rs][msb..lsb];
"
}

ins(011111:rs:rt:msb:lsb:000100)
{
cycles="1"
operation=
"
1: GPR[rt][msb..lsb] = GPR[rs][msb-lsb..0];
"
}

wsbh(01111100000:rt:rd:00010100000)
{
cycles="1"
operation=
"
1: GPR[rd][ 7.. 0] = GPR[rt][15.. 8];
GPR[rd][15.. 8] = GPR[rt][ 7.. 0];
GPR[rd][23..16] = GPR[rt][31..24];
GPR[rd][31..24] = GPR[rt][23..16];
"
}

wsbw(01111100000:rt:rd:00011100000)
{
cycles="1"
operation=
"
1: GPR[rd][ 7.. 0] = GPR[rt][15.. 8];
GPR[rd][15.. 8] = GPR[rt][23..16];
GPR[rd][23..16] = GPR[rt][15.. 8];
GPR[rd][31..24] = GPR[rt][ 7.. 0];
"
}

seb(01111100000:rt:rd:10000100001)
{
cycles="1"
operation=
"
1: GPR[rd] = s32(GPR[rt][7..0]);
"
}

bitrev(01111100000:rt:rd:10100100000)
{
cycles="1"
operation=
"
1: for each i in [31..0]
GPR[rd][i] = GPR[rt][31-i];
"
}

seh(01111100000:rt:rd:11000100000)
{
cycles="1"
operation=
"
1: GPR[rd] = s32(GPR[rt][15..0]);
"
}

// OPCODE #2

cache(101111:rs:func:imm16)
{
cycles="?"
}
}

Last edited by hlide on Mon Jun 16, 2008 12:00 am; edited 6 times in total

hlide

Joined: 10 Sep 2006
Posts: 750

Posted: Sun Jun 15, 2008 1:51 am Post subject:

FPU instruction set

status : WIP

Code:

field
{
rs:5; rt:5; rd:5;

imm3:3; imm:16; imm26:26;

cc:3;

cond:4 = f(0000) |un(0001) |eq(0010) |ueq(0011)|
   olt(0100)|ult(0101) |ole(0110)|ule(0111)|
   sf(1000) |ngle(1001)|seq(1010)|ngl(1011)|
   lt(1100) |nge(1101) |le(1110) |ngt(1111);

fs:5; ft:5; fd:5;
}

group fpu
{
// cycles = "pitch/latency/repeat rate"

add.s(01000110000:ft:fs:fd:000000)
{
   cycles="1/4/3"
}

sub.s(01000110000:ft:fs:fd:000001)
{
   cycles="1/4/3"
}

mul.s(01000110000:ft:fs:fd:000010)
{
   cycles="1/7/6"
}

div.s(01000110000:ft:fs:fd:000011)
{
   cycles="28/28/27"
}

sqrt.s(01000110000-----:fs:fd:000100)
{
   cycles="28/28/27"
}

abs.s(01000110000-----:fs:fd:000101)
{
   cycles="1/2/1"
}

mov.s(01000110000-----:fs:fd:000110)
{
   cycles="1/1/1"
}

neg.s(01000110000-----:fs:fd:000111)
{
   cycles="1/2/1"
}

round.w.s(01000110000-----:fs:fd:001100)
{
   cycles="1/4/3"
}

trunc.w.s(01000110000-----:fs:fd:001101)
{
   cycles="1/4/3"
}

ceil.w.s(01000110000-----:fs:fd:001110)
{
   cycles="1/4/3"
}

floor.w.s(01000110000-----:fs:fd:001111)
{
   cycles="1/4/3"
}

cvt.s.w(01000110100-----:fs:fd:100000)
{
   cycles="1/6/5"
}

cvt.w.s(01000110000-----:fs:fd:100100)
{
   cycles="1/4/3"
}

c.<cond>.s(01000110000:ft:fs:cc:--11:cond)
{
   cycles="1/3/2"
}

mfc1(01000100000:rt:fs:-----------)
{
   cycles="1/?/?"
}

cfc1(01000100010:rt:fs:-----------)
{
   cycles="1/?/?"
}

mtc1(01000100100:rt:fs:-----------)
{
   cycles="1/?/?"
}

ctc1(01000100110:rt:fs:-----------)
{
   cycles="1/?/?"
}

bc1f(01000101000:cc:00:imm16)
{
   cycles="1/?/?"
}

bc1t(01000101000:cc:01:imm16)
{
   cycles="1/?/?"
}

bc1fl(01000101000:cc:00:imm16)
{
   cycles="1/?/?"
}

bc1tl(01000101000:cc:01:imm16)
{
   cycles="1/?/?"
}

lwc1(110001:rs:rt:imm16)
{
   cycles="1/?/?"
}

swc1(111001:rs:rt:imm16)
{
   cycles="1/?/?"
}
}

Last edited by hlide on Sun Jun 15, 2008 6:59 am; edited 3 times in total

hlide

Joined: 10 Sep 2006
Posts: 750

Posted: Sun Jun 15, 2008 2:26 am Post subject:

VFPU instruction set

PART 1/2

status : WIP

Code:

field
{
rs:5; rt:5;

imm3:3; imm7:7; imm:14; imm16:16;

vs_m:3; vs_c:2; vs_r:2; vs_x:1; vs_o:1;
vt_m:3; vt_c:2; vt_r:2; vt_x:1; vt_o:1;
vd_m:3; vd_c:2; vd_r:2; vt_x:1; vt_o:1;

negw:1; negz:1; negy:1; negx:1;
cstw:1; cstz:1; csty:1; cstx:1;
absw:1; absz:1; absy:1; absx:1;
swzw:2; swzz:2; swzy:2; swzx:2;
mskw:1; mskz:1; msky:1; mskx:1;
satw:2; satz:2; saty:2; satx:2;
}

macro
{
Mask(i:2)
{
VFPU_PFXD[i+8] = 1
set VPFXD as taken
}

NotMasked:1(i:2)
{
return (VFPU_PFXD >> 8)[i] == 1
}

Transform:32(swz:1, abs:1, cst:1, neg:1, x:32, y:32, z:32, w:32)
{
if (cst == 1)
when swz is
0 :
value = abs ? 0.0f : 3.0f
1 :
value = abs ? 1.0f : 1.0f/3.0f
2 :
value = abs ? 2.0f : 1.0f/4.0f
3 :
value = abs ? 0.5f : 1.0f/6.0f
else
when swz is
0 :
value' = x
1 :
value' = y
2 :
value' = z
3 :
value' = w

if (abs == 1)
value = value' < 0.0f ? -value' : value'
else
value = value'

return (neg == 1) ? -value : value
}

PrefixS:32(i:2, x:32[, y:32 [, z:32 [, w:32]]])
{
swz = (VFPU_PFXS >> 2*i)[1..0]
abs = (VFPU_PFXS >> 8)[i]
cst = (VFPU_PFXS >> 12)[i]
neg = (VFPU_PFXS >> 16)[i]

return Transform(swz, abs, cst, neg, x, y, z, w)
}

PrefixS:32(i:2, x:32[, y:32 [, z:32 [, w:32]]])
{
swz = (VFPU_PFXT >> 2*i)[1..0]
abs = (VFPU_PFXT >> 8)[i]
cst = (VFPU_PFXT >> 12)[i]
neg = (VFPU_PFXT >> 16)[i]

return Transform(swz, abs, cst, neg, x, y, z, w)
}

PrefixD:32(i:2, value:32)
{
when (VFPU_PFXD >> 2*i)[1..0] is
0 :
return value
1 :
return (value < 0.0f) ? 0.0f : ((value > 1.0f ? 1.0f : value))
3 :
return (value < -1.0f) ? -1.0f : ((value > 1.0f ? 1.0f : value))
}

SaveRegisterD(n:2, (r:2 | [ofs:1,] xch:1), m:3, c:2, x:32[, y:32[, z:32[, w:32]])
{
when n is
1 :
if (VPFXD is taken)
if NotMasked(0) VFPR[m][c][r] = PrefixD(0, x)
set VPFXD as free
else
VFPR[m][c][r] = x

2 :
if (VPFXD is taken)
if (xch)
if NotMasked(0) VFPR[m][0+ofs<<1][c] = PrefixD(0, x)
if NotMasked(1) VFPR[m][1+ofs<<1][c] = PrefixD(1, y)
else
if NotMasked(0) VFPR[m][c][0+ofs<<1] = PrefixD(0, x)
if NotMasked(1) VFPR[m][c][1+ofs<<1] = PrefixD(1, y)
set VPFXD as free
else
if (xch)
VFPR[m][0+ofs<<1][c] = x
VFPR[m][1+ofs<<1][c] = y
else
VFPR[m][c][0+ofs<<1] = x
VFPR[m][c][1+ofs<<1] = y

3 :
if (VPFXD is taken)
if (xch)
if NotMasked(0) VFPR[m][0+ofs][c] = PrefixD(0, x)
if NotMasked(1) VFPR[m][1+ofs][c] = PrefixD(1, y)
if NotMasked(2) VFPR[m][2+ofs][c] = PrefixD(2, z)
else
if NotMasked(0) VFPR[m][c][0+ofs] = PrefixD(0, x)
if NotMasked(1) VFPR[m][c][1+ofs] = PrefixD(1, y)
if NotMasked(2) VFPR[m][c][2+ofs] = PrefixD(2, z)
set VPFXD as free
else
if (xch)
VFPR[m][0+o][c] = x
VFPR[m][1+o][c] = y
VFPR[m][2+o][c] = z
else
VFPR[m][c][0+o] = x
VFPR[m][c][1+o] = y
VFPR[m][c][2+o] = z

4 :
if (VPFXD is taken)
if (xch)
if NotMasked(0) VFPR[m][0][c] = PrefixD(0, x)
if NotMasked(1) VFPR[m][1][c] = PrefixD(1, y)
if NotMasked(2) VFPR[m][2][c] = PrefixD(2, z)
if NotMasked(3) VFPR[m][2][c] = PrefixD(3, w)
else
if NotMasked(0) VFPR[m][c][0] = PrefixD(0, x)
if NotMasked(1) VFPR[m][c][1] = PrefixD(1, y)
if NotMasked(2) VFPR[m][c][2] = PrefixD(2, z)
if NotMasked(3) VFPR[m][c][3] = PrefixD(3, w)
set VPFXD as free
else
if (xch)
VFPR[m][0][c] = x
VFPR[m][1][c] = y
VFPR[m][2][c] = z
VFPR[m][2][c] = w
else
VFPR[m][c][0] = x
VFPR[m][c][1] = y
VFPR[m][c][2] = z
VFPR[m][c][3] = w
}

LoadRegisterS(n:2, (r:2 | [ofs:1,] xch:1), m:3, c:2, out x:32[, out y:32[, out z:32[, out w:32]])
{
when n is
1 :
x' = VFPR[m][c][r]

if (VPFXS is taken)
x = PrefixS(0, x')
set VPFXS as free
else
x = x'

2 :
if (xch)
x' = VFPR[m][0+ofs<<1][c]
y' = VFPR[m][1+ofs<<1][c]
else
x' = VFPR[m][c][0+ofs<<1]
y' = VFPR[m][c][1+ofs<<1]

if (VPFXS is taken)
x = PrefixS(0, x', y')
y = PrefixS(1, x', y')
set VPFXS as free
else
x = x'
y = y'

3 :
if (xch)
x' = VFPR[m][0+ofs][c]
y' = VFPR[m][1+ofs][c]
z' = VFPR[m][2+ofs][c]
else
x' = VFPR[m][c][0+ofs]
y' = VFPR[m][c][1+ofs]
z' = VFPR[m][c][2+ofs]

if (VPFXS is taken)
x = PrefixS(0, x', y', z')
y = PrefixS(1, x', y', z')
z = PrefixS(2, x', y', z')
set VPFXS as free
else
x = x'
y = y'
z = z'

4 :
if (xch)
x' = VFPR[vs_m][0][vs_c]
y' = VFPR[vs_m][1][vs_c]
z' = VFPR[vs_m][2][vs_c]
w' = VFPR[vs_m][3][vs_c]
else
x' = VFPR[vs_m][vs_c][0]
y' = VFPR[vs_m][vs_c][1]
z' = VFPR[vs_m][vs_c][2]
w' = VFPR[vs_m][vs_c][3]

if (VPFXS is taken)
x = PrefixS(0, x', y', z', w')
y = PrefixS(1, x', y', z', w')
z = PrefixS(2, x', y', z', w')
w = PrefixS(3, x', y', z', w')
set VPFXS as free
else
x = x'
y = y'
z = z'
w = w'
}

LoadRegisterT(n:2, (r:2 | [ofs:1,] xch:1), m:3, c:2, out x:32[, out y:32[, out z:32[, out w:32]])
{
when n is
1 :
x' = VFPR[m][c][r]

if (VPFXT is taken)
x = PrefixT(0, x')
set VPFXT as free
else
x = x'

2 :
if (xch)
x' = VFPR[m][0+ofs<<1][c]
y' = VFPR[m][1+ofs<<1][c]
else
x' = VFPR[m][c][0+ofs<<1]
y' = VFPR[m][c][1+ofs<<1]

if (VPFXT is taken)
x = PrefixT(0, x', y')
y = PrefixT(1, x', y')
set VPFXT as free
else
x = x'
y = y'

3 :
if (xch)
x' = VFPR[m][0+ofs][c]
y' = VFPR[m][1+ofs][c]
z' = VFPR[m][2+ofs][c]
else
x' = VFPR[m][c][0+ofs]
y' = VFPR[m][c][1+ofs]
z' = VFPR[m][c][2+ofs]

if (VPFXT is taken)
x = PrefixT(0, x', y', z')
y = PrefixT(1, x', y', z')
z = PrefixT(2, x', y', z')
set VPFXT as free
else
x = x'
y = y'
z = z'

4 :
if (xch)
x' = VFPR[vs_m][0][vs_c]
y' = VFPR[vs_m][1][vs_c]
z' = VFPR[vs_m][2][vs_c]
w' = VFPR[vs_m][3][vs_c]
else
x' = VFPR[vs_m][vs_c][0]
y' = VFPR[vs_m][vs_c][1]
z' = VFPR[vs_m][vs_c][2]
w' = VFPR[vs_m][vs_c][3]

if (VPFXT is taken)
x = PrefixT(0, x', y', z', w')
y = PrefixT(1, x', y', z', w')
z = PrefixT(2, x', y', z', w')
w = PrefixT(3, x', y', z', w')
set VPFXT as free
else
x = x'
y = y'
z = z'
w = w'
}
}

shortcut
{
v<op0>.s(110100:op0:0:0000000:0:vd_r:vd_m:vd_c)
{
when <op0> is
zero(0000000110) :
x = 0.0f
one(0000000111) :
x = 1.0f

SaveRegisterD(1, vd_r, vd_m, vd_c, x)
}

v<op0>.p(110100:op0:0:0000000:1:vd_o:vd_x:vd_m:vd_c)
{
when <op0> is
idt(0000000011) :
x = vd_c[0] ? 0.0f : 1.0f
y = vd_c[0] ? 1.0f : 0.0f
zero(0000000110) :
x = 0.0f
y = 0.0f
one(0000000111) :
x = 1.0f
y = 1.0f

SaveRegisterD(2, vd_o, vd_x, vd_m, vd_c, x, y)
}

v<op0>.t(110100:op0:1:0000000:0:vd_o:vd_x:vd_m:vd_c)
{
when <op0> is
zero(0000000110) :
x = 0.0f
y = 0.0f
z = 0.0f
one(0000000111) :
x = 0.0f
y = 0.0f
z = 0.0f

SaveRegisterD(3, vd_o, vd_x, vd_m, vd_c, x, y)
}

v<op0>.q(110100:op0:1:0000000:1:vd_o:vd_x:vd_m:vd_c)
{
when <op0> is
idt(0000000011) :
x = vd_c[1..0] == 0 ? 1.0f : 0.0f
y = vd_c[1..0] == 1 ? 1.0f : 0.0f
z = vd_c[1..0] == 2 ? 1.0f : 0.0f
w = vd_c[1..0] == 3 ? 1.0f : 0.0f
zero(0000000110) :
x = 0.0f
y = 0.0f
z = 0.0f
w = 0.0f
one(0000000111) :
x = 1.0f
y = 1.0f
z = 1.0f
w = 1.0f

SaveRegisterD(4, vd_x, vd_m, vd_c, x, y)
}

v<op1>.s(110100:op1:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
LoadRegisterS(1, vs_r, vs_m, vs_c, x1)

when <op1> is
abs(0000000001) :
x = (x1 < 0.0f) ? -x1 : x1

sin(0000010010) :
x = sin(PI * x1 / 2.0f)

cos(0000010011) :
x = cos(PI * x1 / 2.0f)

asin(0000010111) :
x = 2.0f * asin(x1) / PI

SaveRegisterD(1, vd_r, vd_m, vd_c, x)
}

v<op1>.s2p(110100:op1:0:vs_r:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
LoadRegisterS(1, vs_r, vs_m, vs_c, x1)

when <op1> is

us2i(0000111010) :
x = x1[15.. 0] << 15
y = x1[31..16] << 15

s2i(0000111011) :
x = x1[15.. 0] << 16
y = x1[31..16] << 16

SaveRegisterD(2, vd_o, vd_x, vd_m, vd_c, x, y)
}

v<op1>.s2q(110100:op1:0:vs_r:vs_m:vs_c:0:0:vd_x:vd_m:vd_c)
{
LoadRegisterS(1, vs_r, vs_m, vs_c, x1)

when <op1> is

uc2i(0000111000) :
x = x1[ 7.. 0] << 23 | x1[ 7.. 0] << 15 | x1[ 7.. 0] << 7 | x1[ 7.. 1]
y = x1[15.. 8] << 23 | x1[15.. 8] << 15 | x1[15.. 8] << 7 | x1[15.. 9]
z = x1[23..16] << 23 | x1[23..16] << 15 | x1[23..16] << 7 | x1[23..17]
w = x1[31..24] << 23 | x1[31..24] << 15 | x1[31..24] << 7 | x1[31..25]

c2i(0000111001) :
x = x1[ 7.. 0] << 24
y = x1[15.. 8] << 24
z = x1[23..16] << 24
w = x1[31..24] << 24

SaveRegisterD(4, vd_x, vd_m, vd_c, x, y, z, w)
}

v<op1>.p(110100:op1:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
LoadRegisterS(2, vs_o, vs_x, vs_m, vs_c, x1, y1)

when <op1> is
abs(0000000001) :
x = (x1 < 0.0f) ? -x1 : x1
y = (y1 < 0.0f) ? -y1 : y1
sin(0000010010) :
x = sin(PI * x1 / 2.0f)
y = sin(PI * y1 / 2.0f)
cos(0000010011) :
x = cos(PI * x1 / 2.0f)
y = cos(PI * y1 / 2.0f)
asin(0000010111) :
x = 2.0f * asin(x1) / PI
y = 2.0f * asin(y1) / PI
bfy1(0001000010) :
x = x1 + y1
y = x1 - y1

SaveRegisterD(2, vd_o, vd_x, vd_m, vd_c, x, y)
}

v<op1>.p2s(110100:op1:0:vs_o:vs_x:vs_m:vs_c:1:vd_r:vd_m:vd_c)
{
LoadRegisterS(2, vs_o, vs_x, vs_m, vs_c, x1, y1)

when <op1> is
fad(0001000110) :
x = x1 + y1

avg(0001000111) :
x = (x1 + y1) / 2.0f

SaveRegisterD(1, vd_r, vd_m, vd_c, x)
}

v<op1>.p2q(110100:op1:0:vs_o:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
LoadRegisterS(2, vs_o, vs_x, vs_m, vs_c, x1, y1)

when <op1> is
us2i(0000111010) :
x = x1[15.. 0] << 15
y = x1[31..16] << 15
z = y1[15.. 0] << 15
w = y1[31..16] << 15

s2i(0000111011) :
x = x1[15.. 0] << 16
y = x1[31..16] << 16
z = y1[15.. 0] << 16
w = y1[31..16] << 16

SaveRegisterD(4, vd_x, vd_m, vd_c, x, y, z, w)
}

v<op1>.t(110100:op1:0000000:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
LoadRegisterS(3, vs_o, vs_x, vs_m, vs_c, x1, y1, z1)

when <op1> is
abs(0000000001) :
x = (x1 < 0.0f) ? -x1 : x1
y = (y1 < 0.0f) ? -y1 : y1
z = (z1 < 0.0f) ? -z1 : z1

sin(0000010010) :
x = sin(PI * x1 / 2.0f)
y = sin(PI * y1 / 2.0f)
z = sin(PI * z1 / 2.0f)

cos(0000010011) :
x = cos(PI * x1 / 2.0f)
y = cos(PI * y1 / 2.0f)
z = cos(PI * z1 / 2.0f)

asin(0000010111) :
x = 2.0f * asin(x1) / PI
y = 2.0f * asin(y1) / PI
z = 2.0f * asin(z1) / PI

SaveRegisterD(3, vd_o, vd_x, vd_m, vd_c, x, y, z)
}

v<op1>.t2s(110100:op1:0000000:1:vs_o:vs_x:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
LoadRegisterS(3, vs_o, vs_x, vs_m, vs_c, x1, y1, z1)

when <op1> is
fad(0001000110) :
x = x1 + y1 + z1

avg(0001000111) :
x = (x1 + y1 + z1) / 3.0f

SaveRegisterD(1, vd_r, vd_m, vd_c, x)
}

v<op1>.q(110100:op1:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
LoadRegisterS(4, vs_x, vs_m, vs_c, x1, y1, z1, w1)

when <1, op1> is
abs(0000000001) :
x = (x1 < 0.0f) ? -x1 : x1
y = (y1 < 0.0f) ? -y1 : y1
z = (z1 < 0.0f) ? -z1 : z1
w = (w1 < 0.0f) ? -w1 : w1

sin(0000010010) :
x = sin(PI * x1 / 2.0f)
y = sin(PI * y1 / 2.0f)
z = sin(PI * z1 / 2.0f)
w = sin(PI * w1 / 2.0f)

cos(0000010011) :
x = cos(PI * x1 / 2.0f)
y = cos(PI * y1 / 2.0f)
z = cos(PI * z1 / 2.0f)
w = cos(PI * w1 / 2.0f)

bfy1(0001000010) :
x = x1 + y1
y = x1 - y1
z = z1 + w1
w = z1 - w1

bfy2(0001000011) :
x = x1 + z1
y = y1 + w1
z = x1 - z1
w = y1 - w1

SaveRegisterD(4, vd_x, vd_m, vd_c, x, y, z, w)
}

v<op1>.q2s(110100:op1:1:0:vs_x:vs_m:vs_c:1:vd_r:vd_m:vd_c)
{
LoadRegisterS(4, vs_x, vs_m, vs_c, x1, y1, z1, w1)

when <1, op1> is
fad(0001000110) :
x = x1 + y1 + z1 + w1

avg(0001000111) :
x = (x1 + y1 + z1 + w1) / 4.0f

SaveRegisterD(1, vd_r, vd_m, vd_c, x)
}

v<op2>.s(0110:op2:vt_r:vt_m:vt_c:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
LoadRegisterS(1, vs_r, vs_m, vs_c, x1)

LoadRegisterT(1, vt_r, vt_m, vt_c, x2)

when <op2> is
add(00000) :
x = x1 + x2

sub(00001) :
x = x1 - x2

div(00111):
x = x1 / x2

mul(01000):
x = x1 * x2

min(11010) :
x = (x1 < x2) ? x1 : x2

max(11011) :
x = (x1 > x2) ? x1 : x2

SaveRegisterD(1, vd_r, vd_m, vd_c, x)
}

v<op2>.p(0110:op2:vt_o:vt_x:vt_m:vt_c:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
LoadRegisterS(2, vs_o, vs_x, vs_m, vs_c, x1, y2)

LoadRegisterT(2, vt_o, vt_x, vt_m, vt_c, x2, y2)

when <op2> is
add(00000) :
x = x1 + x2
y = y1 + y2

sub(00001) :
x = x1 - x2
y = y1 - y2

div(00111):
x = x1 / x2
y = y1 / y2

mul(01000):
x = x1 * x2
y = y1 * y2

min(11010) :
x = (x1 < x2) ? x1 : x2
y = (y1 < y2) ? y1 : y2

max(11011) :
x = (x1 > x2) ? x1 : x2
y = (y1 > y2) ? y1 : y2

SaveRegisterD(2, vd_o, vd_x, vd_m, vd_c, x, y)
}

v<op2>.t(0110:op2:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
LoadRegisterS(3, vs_o, vs_x, vs_m, vs_c, x1, y2, z1)

LoadRegisterT(3, vt_o, vt_x, vt_m, vt_c, x2, y2, z2)

when <op2> is
add(00000) :
x = x1 + x2
y = y1 + y2
z = z1 + z2

sub(00001) :
x = x1 - x2
y = y1 - y2
z = z1 - z2

div(00111):
x = x1 / x2
y = y1 / y2
z = z1 / z2

mul(01000):
x = x1 * x2
y = y1 * y2
z = z1 * z2

min(11010) :
x = (x1 < x2) ? x1 : x2
y = (y1 < y2) ? y1 : y2
z = (z1 < z2) ? z1 : z2

max(11011) :
x = (x1 > x2) ? x1 : x2
y = (y1 < y2) ? y1 : y2
z = (z1 < z2) ? z1 : z2

SaveRegisterD(3, vd_o, vd_x, vd_m, vd_c, x, y, z)
}

v<op2>.q(0110:op2:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
LoadRegisterS(4, vs_x, vs_m, vs_c, x1, y2, z1, w1)

LoadRegisterT(4, vt_x, vt_m, vt_c, x2, y2, z2, w2)

when <op2> is
add(00000) :
x = x1 + x2
y = y1 + y2
z = z1 + z2
w = w1 + w2

sub(00001) :
x = x1 - x2
y = y1 - y2
z = z1 - z2
w = w1 - w2

div(00111):
x = x1 / x2
y = y1 / y2
z = z1 / z2
w = w1 / w2

mul(01000):
x = x1 * x2
y = y1 * y2
z = z1 * z2
w = w1 * w2

min(11010) :
x = (x1 < x2) ? x1 : x2
y = (y1 < y2) ? y1 : y2
z = (z1 < z2) ? z1 : z2
w = (w1 < w2) ? w1 : w2

max(11011) :
x = (x1 > x2) ? x1 : x2
y = (y1 < y2) ? y1 : y2
z = (z1 < z2) ? z1 : z2
w = (w1 < w2) ? w1 : w2

SaveRegisterD(4, vd_x, vd_m, vd_c, x, y, z, w)
}
}

Last edited by hlide on Mon Jun 16, 2008 12:57 am; edited 9 times in total

hlide

Joined: 10 Sep 2006
Posts: 750

Posted: Sun Jun 15, 2008 2:27 am Post subject:

VFPU instruction set

PART 2/2

status : WIP

Code:

group vfpu
{
mfv(01001000011:rt:000000000:vd_r:vd_m:vd_c)
{
cycles="6/0"
operation=
"
1: GPR[rt] = VFPR[vd_m][vd_c][vd_r]
"
}

mfvc(01001000011:rt:000000001:imm7)
{
cycles="6/0"
operation=
"
1: GPR[rt] = VFCR[imm7]
"
}

mtv(01001000111:rt:000000000:vd_r:vd_m:vd_c)
{
cycles="1/3"
operation=
"
1: VFPR[vd_m][vd_c][vd_r] = GPR[rt]
"
}

mtvc(01001000111:rt:000000001:imm7)
{
cycles="1/3"
operation=
"
1: VFCR[imm7] = GPR[rt]
"
}

bvf(01001001000:imm3:00:imm16)
{
cycles="?"
operation=
"
1: c = VFPU_CC[imm3] == 0
execute instruction at PC+4
2: if (c)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

bvfl(01001001000:imm3:10:imm16)
{
cycles="?"
operation=
"
1: c = VFPU_CC[imm3] == 0
if (c)
execute instruction at PC+4
2: if (c)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

bvt(01001001000:imm3:01:imm16)
{
cycles="?"
operation=
"
1: c = VFPU_CC[imm3] == 1
execute instruction at PC+4
2: if (c)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

bvtl(01001001000:imm3:11:imm16)
{
cycles="?"
operation=
"
1: c = VFPU_CC[imm3] == 1
if (c)
execute instruction at PC+4
2: if (c)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}

vadd.s(011000000:vt_r:vt_m:vt_c:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/5"

prefixes="taken,taken,taken"

see v<op2>.s add
}

vadd.p(011000000:vt_o:vt_x:vt_m:vt_c:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"

prefixes="taken,taken,taken"

see v<op2>.p add
}

vadd.t(011000000:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"

prefixes="taken,taken,taken"

see v<op2>.t add
}

vadd.q(011000:000:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"

prefixes="taken,taken,taken"

see v<op2>.q add
}

vsub.s(011000001:vt_r:vt_m:vt_c:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/5"

prefixes="taken,taken,taken"

see v<op2>.s sub
}

vsub.p(011000001:vt_o:vt_x:vt_m:vt_c:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"

prefixes="taken,taken,taken"

see v<op2>.p sub
}

vsub.t(011000001:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"

prefixes="taken,taken,taken"

see v<op2>.t sub
}

vsub.q(011000001:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"

prefixes="taken,taken,taken"

see v<op2>.q sub
}

vdiv.s(011000111:vt_r:vt_m:vt_c:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="14/17"

prefixes="taken,taken,taken"

see v<op2>.s div
}

vdiv.p(011000111:vt_o:vt_x:vt_m:vt_c:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="28/31"

prefixes="prohibed,prohibed,prohibed"

see v<op2>.p div
}

vdiv.t(011000111:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="42/45"

prefixes="prohibed,prohibed,prohibed"

see v<op2>.t div
}

vdiv.q(011000001:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="56/59"

prefixes="prohibed,prohibed,prohibed"

see v<op2>.q div
}

vmul.s(011001000:vt_r:vt_m:vt_c:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/5"

prefixes="taken,taken,taken"

see v<op2>.s mul
}

vmul.p(011001000:vt_o:vt_x:vt_m:vt_c:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"

prefixes="taken,taken,taken"

see v<op2>.p mul
}

vmul.t(011001000:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"

prefixes="taken,taken,taken"

see v<op2>.t mul
}

vmul.q(011001000:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"

prefixes="taken,taken,taken"

see v<op2>.q mul
}

vmin.s(011011010:vt_r:vt_m:vt_c:1:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/3"

prefixes="taken,taken,taken"

see v<op2>.s min
}

vmin.p(011011010:vt_o:vt_x:vt_m:vt_c:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="taken,taken,taken"

see v<op2>.p min
}

vmin.t(011011010:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="taken,taken,taken"

see v<op2>.t min
}

vmin.q(011011010:0:vt_x:vt_m:vt_c:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="taken,taken,taken"

see v<op2>.q min
}

vmax.s(011011011:vt_r:vt_m:vt_c:1:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/3"

prefixes="taken,taken,taken"

see v<op2>.s max
}

vmax.p(011011010:vt_o:vt_x:vt_m:vt_c:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="taken,taken,taken"

see v<op2>.p max
}

vmax.t(011011010:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="taken,taken,taken"

see v<op2>.t max
}

vmax.q(011011000:0:vt_x:vt_m:vt_c:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="taken,taken,taken"

see v<op2>.q max
}

vabs.s(1101000000000001:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/3"

prefixes="taken,ignored,taken"

see v<op1>.s abs
}

vabs.p(1101000000000001:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="taken,ignored,taken"

see v<op1>.p abs
}

vabs.t(1101000000000001:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="taken,ignored,taken"

see v<op1>.t abs
}

vabs.q(1101000000000001:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="taken,ignored,taken"

see v<op1>.q abs
}

vsin.s(1101000000010010:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/7"

prefixes="taken,ignored,taken"

see v<op1>.s sin
}

vsin.p(1101000000010010:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="2/8"

prefixes="prohibed,ignored,prohibed"

see v<op1>.p sin
}

vsin.t(1101000000010010:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="3/9"

prefixes="prohibed,ignored,prohibed"

see v<op1>.t sin
}

vsin.q(1101000000010010:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="4/10"

prefixes="prohibed,ignored,prohibed"

see v<op1>.q sin
}

vcos.s(1101000000010011:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/7"

prefixes="taken,ignored,taken"

see v<op1>.s cos
}

vcos.p(1101000000010011:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="2/8"

prefixes="prohibed,ignored,prohibed"

see v<op1>.p cos
}

vcos.t(1101000000010011:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="3/9"

prefixes="prohibed,ignored,prohibed"

see v<op1>.t cos
}

vcos.q(1101000000010011:1:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="4/10"

prefixes="prohibed,ignored,prohibed"

see v<op1>.q cos
}

vidt.p(1101000000000011:0:0000000:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="ignored,ignored,taken"

see v<op0>.p idt
}

vidt.q(1101000000000011:1:0000000:1:0:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="ignored,ignored,taken"

see v<op0>.q idt
}

vzero.s(1101000000000110:0:0000000:0:vd_r:vd_m:vd_c)
{
cycles="1/3"

prefixes="ignored,ignored,taken"

see v<op0>.s zero
}

vzero.p(1101000000000110:0:0000000:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="ignored,ignored,taken"

see v<op0>.p zero
}

vzero.t(1101000000000111:1:0000000:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="ignored,ignored,taken"

see v<op0>.t zero
}

vzero.q(1101000000000110:1:0000000:1:0:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="ignored,ignored,taken"

see v<op0>.q zero
}

vone.s(1101000000000111:0:0000000:0:vd_r:vd_m:vd_c)
{
cycles="1/3"

prefixes="ignored,ignored,taken"

see v<op0>.s one
}

vone.p(1101000000000111:0:0000000:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="ignored,ignored,taken"

see v<op0>.p one
}

vone.t(1101000000000111:1:0000000:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="ignored,ignored,taken"

see v<op0>.t one
}

vone.q(1101000000000111:1:0000000:1:0:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="ignored,ignored,taken"

see v<op0>.q one
}

vasin.s(1101000000010111:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/7"

prefixes="taken,ignored,taken"

see v<op1>.s asin
}

vasin.p(1101000000010111:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="2/8"

prefixes="prohibed,ignored,prohibed"

see v<op1>.p asin
}

vasin.t(1101000000010111:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="3/9"

prefixes="prohibed,ignored,prohibed"

see v<op1>.t asin
}

vasin.q(1101000000010111:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="4/9"

prefixes="prohibed,ignored,prohibed"

see v<op1>.q asin
}

vuc2i.s(1101000000111000:0:vs_r:vs_m:vs_c:0:0:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="prohibed,ignored,maskonly"

see v<op1>.s2q uc2i
}

vc2i.s(1101000000111001:0:vs_r:vs_m:vs_c:0:0:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="prohibed,ignored,maskonly"

see v<op1>.s2q c2i
}

vus2i.s(1101000000111010:0:vs_r:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="prohibed,ignored,maskonly"

see v<op1>.s2p us2i
}

vus2i.p(1101000000111010:0:vs_o:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="prohibed,ignored,maskonly"

see v<op1>.p2q us2i
}

vs2i.s(1101000000111011:0:vs_r:vs_m:vs_c:0:0:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="prohibed,ignored,maskonly"

see v<op1>.s2p s2i
}

vs2i.p(1101000000111011:0:vs_o:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/3"

prefixes="prohibed,ignored,maskonly"

see v<op1>.p2q s2i
}

vfad.p(1101000001000110:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/7"

prefixes="taken,ignored,taken"

see v<op1>.p2s fad
}

vfad.t(1101000001000110:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/7"

prefixes="taken,ignored,taken"

see v<op1>.t2s fad
}

vfad.q(1101000001000110:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/7"

prefixes="taken,ignored,taken"

see v<op1>.q2s fad
}

vavg.p(1101000001000111:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/7"

prefixes="taken,ignored,taken"

see v<op1>.p2s avg
}

vavg.t(1101000001000111:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/7"

prefixes="taken,ignored,taken"

see v<op1>.t2s avg
}

vavg.q(1101000001000111:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/7"

prefixes="taken,ignored,taken"

see v<op1>.q2s avg
}

vbfy1.p(1101000001000010:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"

prefixes="prohibed,ignored,prohibed"

see v<op1>.p bfy1
}

vbfy1.q(1101000001000010:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/5"

prefixes="prohibed,ignored,prohibed"

see v<op1>.q bfy1
}

vbfy2.q(1101000001000011:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/5"

prefixes="prohibed,ignored,prohibed"

see v<op1>.q bfy2
}

vpfxs(110111:00:negw:negz:negy:negx:cstw:cstz:csty:cstx:absw:absz:absy:absx:swzw:swzz:swzy:swzx)
{
cycles="1/0"

prefixes="overridden,ignored,ignored"

operation=
"
1: VFPU_PFXS[1..0] = swzx
VFPU_PFXS[3..2] = swzy
VFPU_PFXS[5..4] = swzz
VFPU_PFXS[7..6] = swzw

VFPU_PFXS[ 8] = absx
VFPU_PFXS[ 9] = absy
VFPU_PFXS[10] = absz
VFPU_PFXS[11] = absw

VFPU_PFXS[12] = cstx
VFPU_PFXS[13] = csty
VFPU_PFXS[14] = cstz
VFPU_PFXS[15] = cstw

VFPU_PFXS[16] = negx
VFPU_PFXS[17] = negy
VFPU_PFXS[18] = negz
VFPU_PFXS[19] = negw

set VPFXS as taken
"
}

vpfxt(110111:01:negw:negz:negy:negx:cstw:cstz:csty:cstx:absw:absz:absy:absx:swzw:swzz:swzy:swzx)
{
cycles="1/0"

prefixes="ignored,overridden,ignored"

operation=
"
1: VFPU_PFXT[1..0] = swzx
VFPU_PFXT[3..2] = swzy
VFPU_PFXT[5..4] = swzz
VFPU_PFXT[7..6] = swzw

VFPU_PFXT[ 8] = absx
VFPU_PFXT[ 9] = absy
VFPU_PFXT[10] = absz
VFPU_PFXT[11] = absw

VFPU_PFXT[12] = cstx
VFPU_PFXT[13] = csty
VFPU_PFXT[14] = cstz
VFPU_PFXT[15] = cstw

VFPU_PFXT[16] = negx
VFPU_PFXT[17] = negy
VFPU_PFXT[18] = negz
VFPU_PFXT[19] = negw

set VPFXT as taken
"
}

vpfxd(110111:10:00000000:mskw:mskz:msky:mskx:satw:satz:saty:satx)
{
cycles="1/0"

prefixes="ignored,ignored,overridden"

operation=
"
1: VFPU_PFXD[1..0] = satx
VFPU_PFXD[3..2] = saty
VFPU_PFXD[5..4] = satz
VFPU_PFXD[7..6] = satw

VFPU_PFXS[ 8] = mskx
VFPU_PFXS[ 9] = msky
VFPU_PFXS[10] = mskz
VFPU_PFXS[11] = mskw

set VPFXD as taken
"
}

lv.s(110010:rs:vt_m:vt_c:imm14:vt_r)
{
cycles="?"
operation=
"
1: address = GPR[rs] + (s32(imm14)<<2)
VFPR[vt_m][vt_c][vt_r] = MemoryRead32(address)
"
}

lv.q(110110:rs:vt_m:vt_c:imm14:0:vt_x)
{
cycles="?"
operation=
"
1: address = GPR[rs] + (s32(imm14)<<2)
if (address & 15)
raise address error exception
else if (rt_x)
qword = MemoryRead128(address)
if (rt_x)
VFPR[vt_m][0][vt_c] = qword[ 31.. 0]
VFPR[vt_m][1][vt_c] = qword[ 63.. 32]
VFPR[vt_m][2][vt_c] = qword[ 95.. 64]
VFPR[vt_m][3][vt_c] = qword[127.. 96]
else
VFPR[vt_m][vt_c][0] = qword[ 31.. 0]
VFPR[vt_m][vt_c][1] = qword[ 63.. 32]
VFPR[vt_m][vt_c][2] = qword[ 95.. 64]
VFPR[vt_m][vt_c][3] = qword[127.. 96]
"
}

sv.s(110010:rs:vt_m:vt_c:imm14:vt_r)
{
cycles="7/?"
operation=
"
1: address = GPR[rs] + (s32(imm14)<<2)
if (address & 3)
raise address error exception
else
MemoryWrite32(address, VFPR[vt_m][vt_c][vt_r])
"
}

svl.q(111101:rs:vt_m:vt_c:imm14:0:vt_x)
{
cycles="cached:7/?,uncached:10/?"
operation=
"
1: address = GPR[rs] + (s32(imm14)<<2)
if (address & 3)
raise address error exception
else if (vt_x)
data = MemoryRead128(address[31..4]<<4)
if (vt_x)
qword[ 31.. 0] = VFPR[vt_m][0][vt_c]
qword[ 63.. 32] = VFPR[vt_m][1][vt_c]
qword[ 95.. 64] = VFPR[vt_m][2][vt_c]
qword[127.. 96] = VFPR[vt_m][3][vt_c]
else
qword[ 31.. 0] = VFPR[vt_m][vt_c][0]
qword[ 63.. 32] = VFPR[vt_m][vt_c][1]
qword[ 95.. 64] = VFPR[vt_m][vt_c][2]
qword[127.. 96] = VFPR[vt_m][vt_c][3]
when (address[3..2]) is
0 :
data[ 31.. 0] = qword[127..96]
1 :
data[ 31.. 0] = qword[ 95..64]
data[ 63..32] = qword[127..96]
2 :
data[ 31.. 0] = qword[ 63..32]
data[ 63..32] = qword[ 95..64]
data[ 95..64] = qword[127..96]
3 :
data[ 31.. 0] = qword[ 31.. 0]
data[ 63..32] = qword[ 63..32]
data[ 95..64] = qword[ 95..64]
data[127..96] = qword[127..96]
MemoryWrite128(address, data, wb)
"
}

svr.q(111101:rs:vt_m:vt_c:imm14:1:vt_x)
{
cycles="cached:7/?,uncached:10/?"
operation=
"
1: address = GPR[rs] + (s32(imm14)<<2)
if (address & 3)
raise address error exception
else if (vt_x)
data = MemoryRead128(address[31..4]<<4)
if (vt_x)
qword[ 31.. 0] = VFPR[vt_m][0][vt_c]
qword[ 63.. 32] = VFPR[vt_m][1][vt_c]
qword[ 95.. 64] = VFPR[vt_m][2][vt_c]
qword[127.. 96] = VFPR[vt_m][3][vt_c]
else
qword[ 31.. 0] = VFPR[vt_m][vt_c][0]
qword[ 63.. 32] = VFPR[vt_m][vt_c][1]
qword[ 95.. 64] = VFPR[vt_m][vt_c][2]
qword[127.. 96] = VFPR[vt_m][vt_c][3]
when (address[3..2]) is
0 :
data[ 31.. 0] = qword[ 31.. 0]
data[ 63..32] = qword[ 63..32]
data[ 95..64] = qword[ 95..64]
data[127..96] = qword[127..96]
1 :
data[ 31.. 0] = qword[ 63..32]
data[ 63..32] = qword[ 95..64]
data[ 95..64] = qword[127..96]
2 :
data[ 31.. 0] = qword[ 95..64]
data[ 63..32] = qword[127..96]
3 :
data[ 31.. 0] = qword[127..96]
MemoryWrite128(address, data, wb)
"
}

sv.q(111110:rs:vt_m:vt_c:imm14:wb:vt_x)
{
cycles="cached:7/?,uncached:10/?,uncached-wb:1/0"
operation=
"
1: address = GPR[rs] + (s32(imm14)<<2)
if (address & 15)
raise address error exception
else if (vt_x)
if (vt_x)
qword[ 31.. 0] = VFPR[vt_m][0][vt_c]
qword[ 63.. 32] = VFPR[vt_m][1][vt_c]
qword[ 95.. 64] = VFPR[vt_m][2][vt_c]
qword[127.. 96] = VFPR[vt_m][3][vt_c]
else
qword[ 31.. 0] = VFPR[vt_m][vt_c][0]
qword[ 63.. 32] = VFPR[vt_m][vt_c][1]
qword[ 95.. 64] = VFPR[vt_m][vt_c][2]
qword[127.. 96] = VFPR[vt_m][vt_c][3]
MemoryWrite128(address, qword, wb)
"
}

vcmovf.s()
{
}

vcmovf.p()
{
}

vcmovf.t()
{
}

vcmovf.q()
{
}

vcmovf.s()
{
}

vcmovf.p()
{
}

vcmovt.t()
{
}

vcmovt.q()
{
}

vcmp.s()
{
}

vcmp.p()
{
}

vcmp.t()
{
}

vcmp.q()
{
}

vcrs.t()
{
}

vdet.p()
{
}

vdiv.s()
{
}

vdiv.p()
{
}

vdiv.t()
{
}

vdiv.q()
{
}

vdot.s()
{
}

vdot.p()
{
}

vdot.t()
{
}

vdot.q()
{
}

vexp2.s()
{
}

vexp2.p()
{
}

vexp2.t()
{
}

vexp2.q()
{
}

vf2h.p()
{
}

vf2h.q()
{
}

vf2id.s()
{
}

vf2id.p()
{
}

vf2id.t()
{
}

vf2id.q()
{
}

vf2in.s()
{
}

vf2in.p()
{
}

vf2in.t()
{
}

vf2in.q()
{
}

vf2id.s()
{
}

vf2id.p()
{
}

vf2id.t()
{
}

vf2id.q()
{
}

vf2iu.s()
{
}

vf2iu.p()
{
}

vf2iu.t()
{
}

vf2iu.q()
{
}

vf2iz.s()
{
}

vf2iz.p()
{
}

vf2iz.t()
{
}

vf2iz.q()
{
}

vfim.s()
{
}

vflush()
{
}

vh2f.s()
{
}

vh2f.p()
{
}

vhdp.p()
{
}

vhdp.t()
{
}

vhdp.q()
{
}

vhtfm2.p()
{
}

vhtfm3.t()
{
}

vhtfm4.q()
{
}

vi2c.q()
{
}

vi2f.s()
{
}

vi2f.p()
{
}

vi2f.t()
{
}

vi2f.q()
{
}

vi2s.p()
{
}

vi2s.q()
{
}

vi2uc.q()
{
}

vi2us.p()
{
}

vi2us.q()
{
}

viim.s()
{
}

vlgb.s()
{
}

vlog2.s()
{
}

vlog2.p()
{
}

vlog2.t()
{
}

vlog2.q()
{
}

vmfvc()
{
}

vmidt.p()
{
}

vmidt.t()
{
}

vmidt.q()
{
}

vmmov.p()
{
}

vmmov.t()
{
}

vmmov.q()
{
}

vmmul.p()
{
}

vmmul.t()
{
}

vmmul.q()
{
}

vmone.p()
{
}

vmone.t()
{
}

vmone.q()
{
}

vmov.s()
{
}

vmov.p()
{
}

vmov.t()
{
}

vmov.q()
{
}

vmscl.p()
{
}

vmscl.t()
{
}

vmscl.q()
{
}

vmtvc()
{
}

vmzero.p()
{
}

vmzero.t()
{
}

vmzero.q()
{
}

vneg.s()
{
}

vneg.p()
{
}

vneg.t()
{
}

vneg.q()
{
}

vnop()
{
}

vnrcp.s()
{
}

vnrcp.p()
{
}

vnrcp.t()
{
}

vnrcp.q()
{
}

vnsin.s()
{
}

vnsin.p()
{
}

vnsin.t()
{
}

vnsin.q()
{
}

vocp.s()
{
}

vocp.p()
{
}

vocp.t()
{
}

vocp.q()
{
}

vrcp.s()
{
}

vrcp.p()
{
}

vrcp.t()
{
}

vrcp.q()
{
}

vrexp2.s()
{
}

vrexp2.p()
{
}

vrexp2.t()
{
}

vrexp2.q()
{
}

vrndf1.s()
{
}

vrndf1.p()
{
}

vrndf1.t()
{
}

vrndf1.q()
{
}

vrndf2.s()
{
}

vrndf2.p()
{
}

vrndf2.t()
{
}

vrndf2.q()
{
}

vrndi.s()
{
}

vrndi.p()
{
}

vrndi.t()
{
}

vrndi.q()
{
}

vrnds.s()
{
}

vrot.p()
{
}

vrot.t()
{
}

vrot.q()
{
}

vrsq.s()
{
}

vrsq.p()
{
}

vrsq.t()
{
}

vrsq.q()
{
}

vsat0.s()
{
}

vsat0.p()
{
}

vsat0.t()
{
}

vsat0.q()
{
}

vsat1.s()
{
}

vsat1.p()
{
}

vsat1.t()
{
}

vsat1.q()
{
}

vsbn.s()
{
}

vsbz.s()
{
}

vscl.s()
{
}

vscl.p()
{
}

vscl.t()
{
}

vscl.q()
{
}

vscmp.s()
{
}

vscmp.p()
{
}

vscmp.t()
{
}

vscmp.q()
{
}

vsge.s()
{
}

vsge.p()
{
}

vsge.t()
{
}

vsge.q()
{
}

vsgn.s()
{
}

vsgn.p()
{
}

vsgn.t()
{
}

vsgn.q()
{
}

vslt.s()
{
}

vslt.p()
{
}

vslt.t()
{
}

vslt.q()
{
}

vsocp.s()
{
}

vsocp.p()
{
}

vsqrt.s()
{
}

vsqrt.p()
{
}

vsqrt.t()
{
}

vsqrt.q()
{
}

vsrt1.q()
{
}

vsrt2.q()
{
}

vsrt3.q()
{
}

vsrt4.q()
{
}

vsync()
{
}

vt4444.q()
{
}

vt5551.q()
{
}

vt5651.q()
{
}

vtfm2.p()
{
}

vtfm3.t()
{
}

vtfm4.q()
{
}

vwbn.s()
{
}

}

Last edited by hlide on Mon Jun 16, 2008 1:08 am; edited 3 times in total

crazyc

Joined: 17 Jun 2005
Posts: 410

Posted: Sun Jun 15, 2008 2:42 am Post subject:

hlide wrote:

for those who want to make a PSP emulator using either an interpreter or a dynarec and want to know how you feel when trying to implement VFPU, I have only one word to my mind : HELL !

Well, it's no worse then x86 modrm.

J.F.

Joined: 22 Feb 2004
Posts: 2906

Posted: Sun Jun 15, 2008 4:56 am Post subject:

This is a really awesome thread. I appreciate the effort you're putting into this, hlide. I'm kind of an assembly language nut, having done the majority of my early commercial work in 100% assembly. The more info there is like this, the better. :)

hlide

Joined: 10 Sep 2006
Posts: 750

Posted: Sun Jun 15, 2008 7:23 am Post subject:

@CrazyC : maybe, but making the same operation in a normal C code is absolutely crazy to code and very slow. I don't even dare to think how to exploit vectorial SSE instructions in X86 to emulate VFPU. I'm pretty sure PSP emulator's authors were panicking when considering vfpu emulation. For a dynarec, the situation may be better to optimize but I don't think we could easily issue vectorial SSE instructions even this way. NOTE: I'm using a new approach for VFPU by factorizing operations as possible

hlide

Joined: 10 Sep 2006
Posts: 750

Posted: Sun Jul 20, 2008 11:45 pm Post subject:

yes, MrMr[iCE], still on it. :) About the buggy LVL.Q/LVR.Q see : http://forums.ps2dev.org/viewtopic.php?t=10608

J.F.

Joined: 22 Feb 2004
Posts: 2906

Posted: Mon Jul 28, 2008 8:53 am Post subject:

Just thought I'd post a correction I just verified. For ins and ext, the constants are start_bit:count, not msb:lsb. So if you wanted to extract bits 16 through 23, you'd use "ext v0, a0, 16, 8". Further info: a count of 0 is the same as 32, also, bits don't wrap around. For example, "ext v0, a0, 4, 0" extracts bits 4 through 35, where anything above 31 is just 0. "ext v0, a0, 0, 0" would theoretically just be the same as moving the long, but the assembler won't compile that. N:0 and 0:N are fine, it's just 0:0 that won't compile... but as I said, that's just a move, so it doesn't matter. Last edited by J.F. on Mon Jul 28, 2008 9:16 am; edited 1 time in total

hlide

Joined: 10 Sep 2006
Posts: 750

Posted: Mon Jul 28, 2008 9:09 am Post subject:

J.F. wrote:

Just thought I'd post a correction I just verified. For ins and ext, the constants are start_bit:count, not msb:lsb. So if you wanted to extract bits 16 through 23, you'd use "ext v0, a0, 16, 8".

lsb = least significant bit = start position bit
msb = most significant bit = last position bit = start position bit+count-1

Code:

ext(011111:rs:rt:(msb-lsb):lsb:000000) <=> ext(011111:rs:rt:(count-1):start_bit:000000)

ins(011111:rs:rt:msb:lsb:000100) <=> ins(011111:rs:rt:(start_bit+count-1):start_bit:000100)

So it should be okay. This is not because we write them in ASM :

INS Rt, Rs, Pos, Count
EXT Rt, Rs, Pos, Count

that Pos and Count would be encoded the same way. What you see is the encoding bitmap of INS/EXT instructions from bit 31 to bit 0.

And don't ask me why MIPS32R2 authors choose this way to encode, that's probably be something like that :

EXT Rt, Rs, Pos, Len <=> Rt = sext((Rs>>Pos)&(1<<(Len-1))) <=> sext((Rs>>lsb)&(1<<(msb-lsb)))

INS Rt, Rs, Pos, Len <=> Rt[Pos+Len-1:Pos] = Rs <=> Rt[msb:lsb] = Rs

Just some speculation, of course :)

J.F.

Joined: 22 Feb 2004
Posts: 2906

Posted: Mon Jul 28, 2008 9:23 am Post subject:

hlide wrote:

J.F. wrote:

Just thought I'd post a correction I just verified. For ins and ext, the constants are start_bit:count, not msb:lsb. So if you wanted to extract bits 16 through 23, you'd use "ext v0, a0, 16, 8".

lsb = least significant bit = start position bit
msb = most significant bit = last position bit = start position bit+count-1

Code:

ext(011111:rs:rt:(msb-lsb):lsb:000000) <=> ext(011111:rs:rt:(count-1):start_bit:000000)

ins(011111:rs:rt:msb:lsb:000100) <=> ins(011111:rs:rt:(start_bit+count-1):start_bit:000100)

Okay, so it's the assembler syntax versus the encoded value.

shepherd

Joined: 02 Sep 2008
Posts: 2

Posted: Fri Feb 06, 2009 1:11 pm Post subject:

Good job, TKS!

hlide

Joined: 10 Sep 2006
Posts: 750

Posted: Sat Feb 07, 2009 5:25 am Post subject:

what does TKS mean ?

Wally

Joined: 26 Sep 2005
Posts: 672

Posted: Sat Feb 07, 2009 8:00 am Post subject:

hlide wrote:

what does TKS mean ?

Thanks

amorphophallus

Joined: 29 May 2009
Posts: 2

Posted: Fri May 29, 2009 1:43 am Post subject: VFPU spec

Just noob's question. Ive read document The Naked PSP paragraph of Vector Processor, sentence "128 32bits registers". Dose this mean 128 pieces of 32bits registers? _________________ --- amorph

hlide

Joined: 10 Sep 2006
Posts: 750

Posted: Fri May 29, 2009 4:27 am Post subject: Re: VFPU spec

amorphophallus wrote:

Just noob's question.

Ive read document The Naked PSP paragraph of Vector Processor, sentence "128 32bits registers".
Dose this mean 128 pieces of 32bits registers?

VFPU has 8 banks of 16 registers ==> 128 registers. It does mean you can use up to 128 float scalar registers

amorphophallus

Joined: 29 May 2009
Posts: 2

Posted: Fri May 29, 2009 9:35 pm Post subject:

thx :)

enemykila

Joined: 17 Mar 2010
Posts: 1

Posted: Wed Mar 24, 2010 9:34 pm Post subject:

Hi, does anybody know where to get the "Allegrex" manual?? It would be very useful for a homework from the University... Thanks

adrahil

Joined: 16 Mar 2006
Posts: 277

Posted: Thu Mar 25, 2010 8:30 am Post subject:

Get a standard MIPS manual, add in some stuff from this forum... What kind of homework would require you to have the documentation to a proprietary processor?

SilverSpring

Joined: 27 Feb 2007
Posts: 115

Posted: Thu Mar 25, 2010 4:08 pm Post subject:

Source of prxtool has the full Allegrex instruction set including the PSP-only instructions: http://svn.pspdev.org/filedetails.php?repname=psp&path=%2Ftrunk%2Fprxtool%2Fdisasm.C _________________ PSP PRX LibDocs

Display posts from previous:

	forums.ps2dev.org Forum Index -> PSP Development	All times are GMT + 10 Hours
Page 1 of 1

You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum