 |
forums.ps2dev.org Homebrew PS2, PSP & PS3 Development Discussions
|
| View previous topic :: View next topic |
| Author |
Message |
hlide
Joined: 10 Sep 2006 Posts: 750
|
Posted: Tue Jun 10, 2008 8:09 pm Post subject: [ALLEGREX / CPU +FPU +VFPU] isa |
|
|
ALLEGREX / CPU Instructions set
status : mostly done
| Code: | field
{
rs:5; rt:5; rd:5;
shamt:5;
imm3:3;imm:16; imm26:26;
code:20; // syscall/break code
lsb:5; msb:5; // ins/ext bit positions
func:5; // cache function, specific to Allegrex
c0dr:5; c0cr:5;
}
group mips
{
// SPECIAL
nop(00000000000000000000000000000000)
{
cycles="1"
operation=
"
1: no operation
"
}
sll(00000000000:rt:rd:shamt:000000)
{
cycles="1"
operation=
"
1: GPR[rd] = u32(GPR[rt]) << shamt
"
}
srl(00000000000:rt:rd:shamt:000010)
{
cycles="1"
operation=
"
1: GPR[rd] = u32(GPR[rt]) >> shamt
"
}
sra(00000000000:rt:rd:shamt:000011)
{
cycles="1"
operation=
"
1: GPR[rd] = s32(GPR[rt]) >> shamt
"
}
sllv(000000:rs:rt:rd:00000000100)
{
cycles="1"
operation=
"
1: GPR[rd] = u32(GPR[rt]) << u32(GPR[rs]&31)
"
}
srlv(000000:rs:rt:rd:00000000110)
{
cycles="1"
operation=
"
1: GPR[rd] = u32(GPR[rt]) >> u32(GPR[rs]&31)
"
}
srav(000000:rs:rt:rd:00000000111)
{
cycles="1"
operation=
"
1: GPR[rd] = s32(GPR[rt]) >> u32(GPR[rs]&31)
"
}
jr(000000:rs:000000000000000001000)
{
cycles="2"
operation=
"
1: target = GPR[rs]
execute instruction at PC+4
2: PC = target
"
}
jalr(000000:rs:000000:rd:000000001001)
{
cycles="2"
operation=
"
1: GPR[rd] = PC+8
target = GPR[rs]
execute instruction at PC+4
2: PC = target
"
}
mfhi(0000000000000000:rd:00000010000)
{
cycles="?"
operation=
"
1: GPR[rd] = HI
"
}
mthi(000000:rs:000000000000000010001)
{
cycles="?"
operation=
"
1: HI = GPR[rs]
"
}
mflo(0000000000000000:rd:00000010010)
{
cycles="?"
operation=
"
1: GPR[rd] = LO
"
}
mtlo(000000:rs:000000000000000010011)
{
cycles="?"
operation=
"
1: LO = GPR[rs]
"
}
mult(000000:rs:rt:0000000000011000)
{
cycles="5"
operation=
"
1: result:64 = s64(GPR[rs]) * s64(GPR[rs])
LO = result[31..0]
HI = result[63..32]
"
}
multu(000000:rs:rt:0000000000011001)
{
cycles="5"
operation=
"
1: result:64 = u64(GPR[rs]) * u64(GPR[rs])
LO = result[31..0]
HI = result[63..32]
"
}
div(000000:rs:rt:0000000000011010)
{
cycles="36"
operation=
"
1: LO = s32(GPR[rs]) / s32(GPR[rs])
HI = s32(GPR[rs]) % s32(GPR[rs])
"
}
divu(000000:rs:rt:0000000000011011)
{
cycles="36"
operation=
"
1: LO = u32(GPR[rs]) / u32(GPR[rs])
HI = u32(GPR[rs]) % u32(GPR[rs])
"
}
add(000000:rs:rt:rd:00000100000)
{
cycles="1"
operation=
"
1: result:33 = ((GPR[rs][31]) << 32) | GPR[rs]) + ((GPR[rt][31]) << 32) | GPR[rt])
if (result[32] == result[31])
GPR[rd] = result[31..0]
else
raise integer overflow exception
"
}
addu(000000:rs:rt:rd:00000100001)
{
cycles="1"
operation=
"
1: GPR[rd] = GPR[rs] + GPR[rt]
"
}
sub(000000:rs:rt:rd:00000100010)
{
cycles="1"
operation=
"
1: result:33 = ((GPR[rs][31]) << 32) | GPR[rs]) - ((GPR[rt][31]) << 32) | GPR[rt])
if (result[32] == result[31])
GPR[rd] = result[31..0]
else
raise integer overflow exception
"
}
subu(000000:rs:rt:rd:00000100011)
{
cycles="1"
operation=
"
1: GPR[rd] = GPR[rs] - GPR[rt]
"
}
and(000000:rs:rt:rd:00000100100)
{
cycles="1"
operation=
"
1: GPR[rd] = GPR[rs] & GPR[rt]
"
}
or(000000:rs:rt:rd:00000100101)
{
cycles="1"
operation=
"
1: GPR[rd] = GPR[rs] | GPR[rt]
"
}
xor(000000:rs:rt:rd:00000100110)
{
cycles="1"
operation=
"
1: GPR[rd] = GPR[rs] ^ GPR[rt]
"
}
nor(000000:rs:rt:rd:00000100111)
{
cycles="1"
operation=
"
1: GPR[rd] = ~(GPR[rs] | GPR[rt])
"
}
slt(000000:rs:rt:rd:00000101010)
{
cycles="1"
operation=
"
1: GPR[rd] = s32(GPR[rs]) < s32(GPR[rt])
"
}
sltu(000000:rs:rt:rd:00000101011)
{
cycles="1"
operation=
"
1: GPR[rd] = u32(GPR[rs]) + u32(GPR[rt])
"
}
// REGIMM
bltz(000001:rs:00000:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) < 0)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
bgez(000001:rs:00001:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) >= 0)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
bltzl(000001:rs:00010:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) < 0)
if (ct)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
bgezl(000001:rs:00011:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) >= 0)
if (ct)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
bltzal(000001:rs:10000:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) < 0)
execute instruction at PC+4
if (ct)
GPR(31) = PC+8
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
bgezal(000001:rs:10001:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) >= 0)
execute instruction at PC+4
if (ct)
GPR(31) = PC+8
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
bltzall(000001:rs:10010:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) < 0)
if (ct)
execute instruction at PC+4
if (ct)
GPR(31) = PC+8
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
bgezall(000001:rs:10011:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) >= 0)
if (ct)
execute instruction at PC+4
if (ct)
GPR(31) = PC+8
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
// OPCODE #1
j(000010:imm26)
{
cycles="2"
operation=
"
1: execute instruction at PC+4
2: PC = PC[31..28] | (u32(imm26) << 2)
"
delayslot="1"
}
jal(000011:imm26)
{
cycles="2"
operation=
"
1: GPR(31) = PC+8
execute instruction at PC+4
2: PC = PC[31..28] | (u32(imm26) << 2)
"
delayslot="1"
}
beq(000100:rs:rt:imm16)
{
cycles="3"
operation=
"
1: ct = (GPR[rs] == GPR[rt])
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
bne(000101:rs:rt:imm16)
{
cycles="3"
operation=
"
1: ct = (GPR[rs] <> GPR[rt])
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
blez(000110:rs:00000:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) <= 0)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
bgtz(000111:rs:00000:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) > 0)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
addi(001000:rs:rt:imm16)
{
cycles="1"
operation=
"
1: result:33 = ((GPR[rs][31]) << 32) | GPR[rs]) + s32(imm16)
if (result[32] == result[31])
GPR[rt] = result[31..0]
else
raise integer overflow exception
"
}
addiu(001001:rs:rt:imm16)
{
cycles="1"
operation=
"
1: GPR[rt] = GPR[rs] + s32(imm16)
"
}
slti(001010:rs:rt:imm16)
{
cycles="1"
operation=
"
1: GPR[rt] = s32(GPR[rs]) < s32(imm16)
"
}
sltiu(001011:rs:rt:imm16)
{
cycles="1"
operation=
"
1: GPR[rt] = u32(GPR[rs]) < u32(s32(imm16))
"
}
andi(001100:rs:rt:imm16)
{
cycles="1"
operation=
"
1: GPR[rt] = s32(GPR[rs]) & u32(imm16)
"
}
ori(001101:rs:rt:imm16)
{
cycles="1"
operation=
"
1: GPR[rt] = s32(GPR[rs]) | u32(imm16)
"
}
xori(001110:rs:rt:imm16)
{
cycles="1"
operation=
"
1: GPR[rt] = s32(GPR[rs]) ^ u32(imm16)
"
}
lui(00111100000:rt:imm16)
{
cycles="1"
operation=
"
1: GPR[rt] = s32(GPR[rs]) | (u32(imm16) << 16)
"
}
// COP0
mfc0(01000000000:rt:c0dr:00000000000)
{
cycles="?"
operation=
"
1: GPR[rt] = C0DR(c0dr)
"
}
cfc0(01000000010:rt:c0cr:00000000000)
{
cycles="?"
operation=
"
1: GPR[rt] = C0CR(c0cr)
"
}
mtc0(01000000100:rt:c0dr:00000000000)
{
cycles="?"
operation=
"
1: C0DR(c0dr) = GPR[rt]
"
}
ctc0(01000100110:rt:c0cr:00000000000)
{
cycles="?"
operation=
"
1: C0CR(c0dr) = GPR[rt]
"
}
eret(01000000000000000000000000011000)
{
cycles="?"
operation=
"
1: if (ERL == 1)
PC = ErrorEPC
else
PC = RPC
if (ERL == 0)
EXL = 0
LLBit = 0
"
}
// OPCODE #2
beql(010100:rs:rt:imm16)
{
cycles="3"
operation=
"
1: ct = (GPR[rs] == GPR[rt])
if (ct)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
bnel(010101:rs:rt:imm16)
{
cycles="3"
operation=
"
1: ct = (GPR[rs] <> GPR[rt])
if (ct)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
blezl(010110:rs:00000:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) <= 0)
if (ct)
execute instruction at PC+4
if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
bgtzl(010111:rs:00000:imm16)
{
cycles="3"
operation=
"
1: ct = (s32(GPR[rs]) > 0)
if (ct)
execute instruction at PC+4
2: if (ct)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
lb(100000:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
GPR[rt] = s32(MemoryRead8(address))
"
}
lh(100001:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
if (address & 1)
raise address error exception
else
GPR[rt] = s32(MemoryRead16(address))
"
}
lwl(100010:rs:rt:imm16)
{
cycles="?"
}
lw(100011:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
if (address & 3)
raise address error exception
else
GPR[rt] = MemoryRead32(address)
"
}
lbu(100100:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
GPR[rt] = u32(MemoryRead8(address))
"
}
lhu(100101:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
if (address & 1)
raise address error exception
else
GPR[rt] = u32(MemoryRead16(address))
"
}
lwr(100110:rs:rt:imm16)
{
cycles="?"
}
sb(101000:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
MemoryWrite8(address, GPR[rt][7..0])
"
}
sh(101001:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
if (address & 1)
raise address error exception
else
MemoryWrite16(address, GPR[rt][15..0])
"
}
swl(101010:rs:rt:imm16)
{
cycles="?"
}
sw(101011:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
if (address & 3)
raise address error exception
else
MemoryWrite32(address, GPR[rt])
"
}
swr(101110:rs:rt:imm16)
{
cycles="?"
}
ll(110000:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
if (address & 3)
raise address error exception
else
GPR[rt] = MemoryRead32(address)
LLBit = 1
"
}
sc(111000:rs:rt:imm16)
{
cycles="?"
operation=
"
1: address = GPR[rs] + s32(imm16)
if (address & 3)
raise address error exception
else if (LLBit == 1)
MemoryWrite32(address, GPR[rt])
GPR[rt] = u32(LLBit)
"
}
}
group allegrex
{
// SPECIAL
rotr(00000000001:rt:rd:shamt:000010)
{
cycles="1"
operation=
"
1: GPR[rd] = (u32(GPR[rt]) >> shamt) | (GPR[rt] << (32 - shamt))
"
}
rotrv(000000:rs:rt:rd:00001000110)
{
cycles="1"
operation=
"
1: s = GPR[rs] & 31
GPR[rd] = (u32(GPR[rt]) >> s) | (GPR[rt] << (32 - s))
"
}
movz(000000:rs:rt:rd:00000001010)
{
cycles="1"
operation=
"
1: if (GPR[rt] == 0)
GPR[rd] = GPR[rs]
"
}
movn(000000:rs:rt:rd:00000001011)
{
cycles="1"
operation=
"
1: if (GPR[rt] <> 0)
GPR[rd] = GPR[rs]
"
}
syscall(000000:code:001100)
{
cycles="?"
}
break(000000:code:001100)
{
cycles="?"
}
sync(00000000000000000000000000001111)
{
cycles="?"
}
clz(000000:rs:00000:rd:00000010110)
{
cycles="1"
operation=
"
1: count = 32
i = 31
loop
if (GPR[rs][i] == 1)
count = 31 - i
while (count == 32 and i-- <> 0)
GPR[rd] = count;
"
}
clo(000000:rs:00000:rd:00000010111)
{
cycles="1"
operation=
"
1: count = 32
i = 31
loop
if (GPR[rs][i] == 0)
count = 31 - i
while (count == 32 and i-- <> 0)
GPR[rd] = count;
"
}
madd(000000:rs:rt:0000000000011100)
{
cycles="5"
operation=
"
1: result:64 = u64(LO) + s64(HI<<32) + s64(GPR[rs]) * s64(GPR[rs])
LO = result[31..0]
HI = result[63..32]
"
}
maddu(000000:rs:rt:0000000000011101)
{
cycles="5"
operation=
"
1: result:64 = u64(LO) + u64(HI<<32) + u64(GPR[rs]) * u64(GPR[rs])
LO = result[31..0]
HI = result[63..32]
"
}
max(000000:rs:rt:rd:00000101100)
{
cycles="1"
operation=
"
1: GPR[rd] = (s32(GPR[rs]) < s2(GPR[rt])) ? GPR[rt] : GPR[rs];
"
}
min(000000:rs:rt:rd:00000101101)
{
cycles="1"
operation=
"
1: GPR[rd] = (s32(GPR[rs]) < s2(GPR[rt])) ? GPR[rs] : GPR[rt];
"
}
msub(000000:rs:rt:000000000101110)
{
cycles="5"
operation=
"
1: result:64 = u64(LO) + s64(HI<<32) - s64(GPR[rs]) * s64(GPR[rs])
LO = result[31..0]
HI = result[63..32]
"
}
msubu(000000:rs:rt:000000000101111)
{
cycles="5"
operation=
"
1: result:64 = u64(LO) + u64(HI<<32) - u64(GPR[rs]) * u64(GPR[rs])
LO = result[31..0]
HI = result[63..32]
"
}
// OPCODE #1
halt(01110000000000000000000000000000)
{
cycles="?"
}
// SPECIAL3
ext(011111:rs:rt:(msb-lsb):lsb:000000)
{
cycles="1"
operation=
"
1: GPR[rt] = GPR[rs][msb..lsb];
"
}
ins(011111:rs:rt:msb:lsb:000100)
{
cycles="1"
operation=
"
1: GPR[rt][msb..lsb] = GPR[rs][msb-lsb..0];
"
}
wsbh(01111100000:rt:rd:00010100000)
{
cycles="1"
operation=
"
1: GPR[rd][ 7.. 0] = GPR[rt][15.. 8];
GPR[rd][15.. 8] = GPR[rt][ 7.. 0];
GPR[rd][23..16] = GPR[rt][31..24];
GPR[rd][31..24] = GPR[rt][23..16];
"
}
wsbw(01111100000:rt:rd:00011100000)
{
cycles="1"
operation=
"
1: GPR[rd][ 7.. 0] = GPR[rt][15.. 8];
GPR[rd][15.. 8] = GPR[rt][23..16];
GPR[rd][23..16] = GPR[rt][15.. 8];
GPR[rd][31..24] = GPR[rt][ 7.. 0];
"
}
seb(01111100000:rt:rd:10000100001)
{
cycles="1"
operation=
"
1: GPR[rd] = s32(GPR[rt][7..0]);
"
}
bitrev(01111100000:rt:rd:10100100000)
{
cycles="1"
operation=
"
1: for each i in [31..0]
GPR[rd][i] = GPR[rt][31-i];
"
}
seh(01111100000:rt:rd:11000100000)
{
cycles="1"
operation=
"
1: GPR[rd] = s32(GPR[rt][15..0]);
"
}
// OPCODE #2
cache(101111:rs:func:imm16)
{
cycles="?"
}
}
|
Last edited by hlide on Mon Jun 16, 2008 12:00 am; edited 6 times in total |
|
| Back to top |
|
 |
hlide
Joined: 10 Sep 2006 Posts: 750
|
Posted: Sun Jun 15, 2008 1:51 am Post subject: |
|
|
FPU instruction set
status : WIP
| Code: | field
{
rs:5; rt:5; rd:5;
imm3:3; imm:16; imm26:26;
cc:3;
cond:4 = f(0000) |un(0001) |eq(0010) |ueq(0011)|
olt(0100)|ult(0101) |ole(0110)|ule(0111)|
sf(1000) |ngle(1001)|seq(1010)|ngl(1011)|
lt(1100) |nge(1101) |le(1110) |ngt(1111);
fs:5; ft:5; fd:5;
}
group fpu
{
// cycles = "pitch/latency/repeat rate"
add.s(01000110000:ft:fs:fd:000000)
{
cycles="1/4/3"
}
sub.s(01000110000:ft:fs:fd:000001)
{
cycles="1/4/3"
}
mul.s(01000110000:ft:fs:fd:000010)
{
cycles="1/7/6"
}
div.s(01000110000:ft:fs:fd:000011)
{
cycles="28/28/27"
}
sqrt.s(01000110000-----:fs:fd:000100)
{
cycles="28/28/27"
}
abs.s(01000110000-----:fs:fd:000101)
{
cycles="1/2/1"
}
mov.s(01000110000-----:fs:fd:000110)
{
cycles="1/1/1"
}
neg.s(01000110000-----:fs:fd:000111)
{
cycles="1/2/1"
}
round.w.s(01000110000-----:fs:fd:001100)
{
cycles="1/4/3"
}
trunc.w.s(01000110000-----:fs:fd:001101)
{
cycles="1/4/3"
}
ceil.w.s(01000110000-----:fs:fd:001110)
{
cycles="1/4/3"
}
floor.w.s(01000110000-----:fs:fd:001111)
{
cycles="1/4/3"
}
cvt.s.w(01000110100-----:fs:fd:100000)
{
cycles="1/6/5"
}
cvt.w.s(01000110000-----:fs:fd:100100)
{
cycles="1/4/3"
}
c.<cond>.s(01000110000:ft:fs:cc:--11:cond)
{
cycles="1/3/2"
}
mfc1(01000100000:rt:fs:-----------)
{
cycles="1/?/?"
}
cfc1(01000100010:rt:fs:-----------)
{
cycles="1/?/?"
}
mtc1(01000100100:rt:fs:-----------)
{
cycles="1/?/?"
}
ctc1(01000100110:rt:fs:-----------)
{
cycles="1/?/?"
}
bc1f(01000101000:cc:00:imm16)
{
cycles="1/?/?"
}
bc1t(01000101000:cc:01:imm16)
{
cycles="1/?/?"
}
bc1fl(01000101000:cc:00:imm16)
{
cycles="1/?/?"
}
bc1tl(01000101000:cc:01:imm16)
{
cycles="1/?/?"
}
lwc1(110001:rs:rt:imm16)
{
cycles="1/?/?"
}
swc1(111001:rs:rt:imm16)
{
cycles="1/?/?"
}
}
|
Last edited by hlide on Sun Jun 15, 2008 6:59 am; edited 3 times in total |
|
| Back to top |
|
 |
hlide
Joined: 10 Sep 2006 Posts: 750
|
Posted: Sun Jun 15, 2008 2:26 am Post subject: |
|
|
VFPU instruction set
PART 1/2
status : WIP
| Code: | field
{
rs:5; rt:5;
imm3:3; imm7:7; imm:14; imm16:16;
vs_m:3; vs_c:2; vs_r:2; vs_x:1; vs_o:1;
vt_m:3; vt_c:2; vt_r:2; vt_x:1; vt_o:1;
vd_m:3; vd_c:2; vd_r:2; vt_x:1; vt_o:1;
negw:1; negz:1; negy:1; negx:1;
cstw:1; cstz:1; csty:1; cstx:1;
absw:1; absz:1; absy:1; absx:1;
swzw:2; swzz:2; swzy:2; swzx:2;
mskw:1; mskz:1; msky:1; mskx:1;
satw:2; satz:2; saty:2; satx:2;
}
macro
{
Mask(i:2)
{
VFPU_PFXD[i+8] = 1
set VPFXD as taken
}
NotMasked:1(i:2)
{
return (VFPU_PFXD >> 8)[i] == 1
}
Transform:32(swz:1, abs:1, cst:1, neg:1, x:32, y:32, z:32, w:32)
{
if (cst == 1)
when swz is
0 :
value = abs ? 0.0f : 3.0f
1 :
value = abs ? 1.0f : 1.0f/3.0f
2 :
value = abs ? 2.0f : 1.0f/4.0f
3 :
value = abs ? 0.5f : 1.0f/6.0f
else
when swz is
0 :
value' = x
1 :
value' = y
2 :
value' = z
3 :
value' = w
if (abs == 1)
value = value' < 0.0f ? -value' : value'
else
value = value'
return (neg == 1) ? -value : value
}
PrefixS:32(i:2, x:32[, y:32 [, z:32 [, w:32]]])
{
swz = (VFPU_PFXS >> 2*i)[1..0]
abs = (VFPU_PFXS >> 8)[i]
cst = (VFPU_PFXS >> 12)[i]
neg = (VFPU_PFXS >> 16)[i]
return Transform(swz, abs, cst, neg, x, y, z, w)
}
PrefixS:32(i:2, x:32[, y:32 [, z:32 [, w:32]]])
{
swz = (VFPU_PFXT >> 2*i)[1..0]
abs = (VFPU_PFXT >> 8)[i]
cst = (VFPU_PFXT >> 12)[i]
neg = (VFPU_PFXT >> 16)[i]
return Transform(swz, abs, cst, neg, x, y, z, w)
}
PrefixD:32(i:2, value:32)
{
when (VFPU_PFXD >> 2*i)[1..0] is
0 :
return value
1 :
return (value < 0.0f) ? 0.0f : ((value > 1.0f ? 1.0f : value))
3 :
return (value < -1.0f) ? -1.0f : ((value > 1.0f ? 1.0f : value))
}
SaveRegisterD(n:2, (r:2 | [ofs:1,] xch:1), m:3, c:2, x:32[, y:32[, z:32[, w:32]])
{
when n is
1 :
if (VPFXD is taken)
if NotMasked(0) VFPR[m][c][r] = PrefixD(0, x)
set VPFXD as free
else
VFPR[m][c][r] = x
2 :
if (VPFXD is taken)
if (xch)
if NotMasked(0) VFPR[m][0+ofs<<1][c] = PrefixD(0, x)
if NotMasked(1) VFPR[m][1+ofs<<1][c] = PrefixD(1, y)
else
if NotMasked(0) VFPR[m][c][0+ofs<<1] = PrefixD(0, x)
if NotMasked(1) VFPR[m][c][1+ofs<<1] = PrefixD(1, y)
set VPFXD as free
else
if (xch)
VFPR[m][0+ofs<<1][c] = x
VFPR[m][1+ofs<<1][c] = y
else
VFPR[m][c][0+ofs<<1] = x
VFPR[m][c][1+ofs<<1] = y
3 :
if (VPFXD is taken)
if (xch)
if NotMasked(0) VFPR[m][0+ofs][c] = PrefixD(0, x)
if NotMasked(1) VFPR[m][1+ofs][c] = PrefixD(1, y)
if NotMasked(2) VFPR[m][2+ofs][c] = PrefixD(2, z)
else
if NotMasked(0) VFPR[m][c][0+ofs] = PrefixD(0, x)
if NotMasked(1) VFPR[m][c][1+ofs] = PrefixD(1, y)
if NotMasked(2) VFPR[m][c][2+ofs] = PrefixD(2, z)
set VPFXD as free
else
if (xch)
VFPR[m][0+o][c] = x
VFPR[m][1+o][c] = y
VFPR[m][2+o][c] = z
else
VFPR[m][c][0+o] = x
VFPR[m][c][1+o] = y
VFPR[m][c][2+o] = z
4 :
if (VPFXD is taken)
if (xch)
if NotMasked(0) VFPR[m][0][c] = PrefixD(0, x)
if NotMasked(1) VFPR[m][1][c] = PrefixD(1, y)
if NotMasked(2) VFPR[m][2][c] = PrefixD(2, z)
if NotMasked(3) VFPR[m][2][c] = PrefixD(3, w)
else
if NotMasked(0) VFPR[m][c][0] = PrefixD(0, x)
if NotMasked(1) VFPR[m][c][1] = PrefixD(1, y)
if NotMasked(2) VFPR[m][c][2] = PrefixD(2, z)
if NotMasked(3) VFPR[m][c][3] = PrefixD(3, w)
set VPFXD as free
else
if (xch)
VFPR[m][0][c] = x
VFPR[m][1][c] = y
VFPR[m][2][c] = z
VFPR[m][2][c] = w
else
VFPR[m][c][0] = x
VFPR[m][c][1] = y
VFPR[m][c][2] = z
VFPR[m][c][3] = w
}
LoadRegisterS(n:2, (r:2 | [ofs:1,] xch:1), m:3, c:2, out x:32[, out y:32[, out z:32[, out w:32]])
{
when n is
1 :
x' = VFPR[m][c][r]
if (VPFXS is taken)
x = PrefixS(0, x')
set VPFXS as free
else
x = x'
2 :
if (xch)
x' = VFPR[m][0+ofs<<1][c]
y' = VFPR[m][1+ofs<<1][c]
else
x' = VFPR[m][c][0+ofs<<1]
y' = VFPR[m][c][1+ofs<<1]
if (VPFXS is taken)
x = PrefixS(0, x', y')
y = PrefixS(1, x', y')
set VPFXS as free
else
x = x'
y = y'
3 :
if (xch)
x' = VFPR[m][0+ofs][c]
y' = VFPR[m][1+ofs][c]
z' = VFPR[m][2+ofs][c]
else
x' = VFPR[m][c][0+ofs]
y' = VFPR[m][c][1+ofs]
z' = VFPR[m][c][2+ofs]
if (VPFXS is taken)
x = PrefixS(0, x', y', z')
y = PrefixS(1, x', y', z')
z = PrefixS(2, x', y', z')
set VPFXS as free
else
x = x'
y = y'
z = z'
4 :
if (xch)
x' = VFPR[vs_m][0][vs_c]
y' = VFPR[vs_m][1][vs_c]
z' = VFPR[vs_m][2][vs_c]
w' = VFPR[vs_m][3][vs_c]
else
x' = VFPR[vs_m][vs_c][0]
y' = VFPR[vs_m][vs_c][1]
z' = VFPR[vs_m][vs_c][2]
w' = VFPR[vs_m][vs_c][3]
if (VPFXS is taken)
x = PrefixS(0, x', y', z', w')
y = PrefixS(1, x', y', z', w')
z = PrefixS(2, x', y', z', w')
w = PrefixS(3, x', y', z', w')
set VPFXS as free
else
x = x'
y = y'
z = z'
w = w'
}
LoadRegisterT(n:2, (r:2 | [ofs:1,] xch:1), m:3, c:2, out x:32[, out y:32[, out z:32[, out w:32]])
{
when n is
1 :
x' = VFPR[m][c][r]
if (VPFXT is taken)
x = PrefixT(0, x')
set VPFXT as free
else
x = x'
2 :
if (xch)
x' = VFPR[m][0+ofs<<1][c]
y' = VFPR[m][1+ofs<<1][c]
else
x' = VFPR[m][c][0+ofs<<1]
y' = VFPR[m][c][1+ofs<<1]
if (VPFXT is taken)
x = PrefixT(0, x', y')
y = PrefixT(1, x', y')
set VPFXT as free
else
x = x'
y = y'
3 :
if (xch)
x' = VFPR[m][0+ofs][c]
y' = VFPR[m][1+ofs][c]
z' = VFPR[m][2+ofs][c]
else
x' = VFPR[m][c][0+ofs]
y' = VFPR[m][c][1+ofs]
z' = VFPR[m][c][2+ofs]
if (VPFXT is taken)
x = PrefixT(0, x', y', z')
y = PrefixT(1, x', y', z')
z = PrefixT(2, x', y', z')
set VPFXT as free
else
x = x'
y = y'
z = z'
4 :
if (xch)
x' = VFPR[vs_m][0][vs_c]
y' = VFPR[vs_m][1][vs_c]
z' = VFPR[vs_m][2][vs_c]
w' = VFPR[vs_m][3][vs_c]
else
x' = VFPR[vs_m][vs_c][0]
y' = VFPR[vs_m][vs_c][1]
z' = VFPR[vs_m][vs_c][2]
w' = VFPR[vs_m][vs_c][3]
if (VPFXT is taken)
x = PrefixT(0, x', y', z', w')
y = PrefixT(1, x', y', z', w')
z = PrefixT(2, x', y', z', w')
w = PrefixT(3, x', y', z', w')
set VPFXT as free
else
x = x'
y = y'
z = z'
w = w'
}
}
shortcut
{
v<op0>.s(110100:op0:0:0000000:0:vd_r:vd_m:vd_c)
{
when <op0> is
zero(0000000110) :
x = 0.0f
one(0000000111) :
x = 1.0f
SaveRegisterD(1, vd_r, vd_m, vd_c, x)
}
v<op0>.p(110100:op0:0:0000000:1:vd_o:vd_x:vd_m:vd_c)
{
when <op0> is
idt(0000000011) :
x = vd_c[0] ? 0.0f : 1.0f
y = vd_c[0] ? 1.0f : 0.0f
zero(0000000110) :
x = 0.0f
y = 0.0f
one(0000000111) :
x = 1.0f
y = 1.0f
SaveRegisterD(2, vd_o, vd_x, vd_m, vd_c, x, y)
}
v<op0>.t(110100:op0:1:0000000:0:vd_o:vd_x:vd_m:vd_c)
{
when <op0> is
zero(0000000110) :
x = 0.0f
y = 0.0f
z = 0.0f
one(0000000111) :
x = 0.0f
y = 0.0f
z = 0.0f
SaveRegisterD(3, vd_o, vd_x, vd_m, vd_c, x, y)
}
v<op0>.q(110100:op0:1:0000000:1:vd_o:vd_x:vd_m:vd_c)
{
when <op0> is
idt(0000000011) :
x = vd_c[1..0] == 0 ? 1.0f : 0.0f
y = vd_c[1..0] == 1 ? 1.0f : 0.0f
z = vd_c[1..0] == 2 ? 1.0f : 0.0f
w = vd_c[1..0] == 3 ? 1.0f : 0.0f
zero(0000000110) :
x = 0.0f
y = 0.0f
z = 0.0f
w = 0.0f
one(0000000111) :
x = 1.0f
y = 1.0f
z = 1.0f
w = 1.0f
SaveRegisterD(4, vd_x, vd_m, vd_c, x, y)
}
v<op1>.s(110100:op1:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
LoadRegisterS(1, vs_r, vs_m, vs_c, x1)
when <op1> is
abs(0000000001) :
x = (x1 < 0.0f) ? -x1 : x1
sin(0000010010) :
x = sin(PI * x1 / 2.0f)
cos(0000010011) :
x = cos(PI * x1 / 2.0f)
asin(0000010111) :
x = 2.0f * asin(x1) / PI
SaveRegisterD(1, vd_r, vd_m, vd_c, x)
}
v<op1>.s2p(110100:op1:0:vs_r:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
LoadRegisterS(1, vs_r, vs_m, vs_c, x1)
when <op1> is
us2i(0000111010) :
x = x1[15.. 0] << 15
y = x1[31..16] << 15
s2i(0000111011) :
x = x1[15.. 0] << 16
y = x1[31..16] << 16
SaveRegisterD(2, vd_o, vd_x, vd_m, vd_c, x, y)
}
v<op1>.s2q(110100:op1:0:vs_r:vs_m:vs_c:0:0:vd_x:vd_m:vd_c)
{
LoadRegisterS(1, vs_r, vs_m, vs_c, x1)
when <op1> is
uc2i(0000111000) :
x = x1[ 7.. 0] << 23 | x1[ 7.. 0] << 15 | x1[ 7.. 0] << 7 | x1[ 7.. 1]
y = x1[15.. 8] << 23 | x1[15.. 8] << 15 | x1[15.. 8] << 7 | x1[15.. 9]
z = x1[23..16] << 23 | x1[23..16] << 15 | x1[23..16] << 7 | x1[23..17]
w = x1[31..24] << 23 | x1[31..24] << 15 | x1[31..24] << 7 | x1[31..25]
c2i(0000111001) :
x = x1[ 7.. 0] << 24
y = x1[15.. 8] << 24
z = x1[23..16] << 24
w = x1[31..24] << 24
SaveRegisterD(4, vd_x, vd_m, vd_c, x, y, z, w)
}
v<op1>.p(110100:op1:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
LoadRegisterS(2, vs_o, vs_x, vs_m, vs_c, x1, y1)
when <op1> is
abs(0000000001) :
x = (x1 < 0.0f) ? -x1 : x1
y = (y1 < 0.0f) ? -y1 : y1
sin(0000010010) :
x = sin(PI * x1 / 2.0f)
y = sin(PI * y1 / 2.0f)
cos(0000010011) :
x = cos(PI * x1 / 2.0f)
y = cos(PI * y1 / 2.0f)
asin(0000010111) :
x = 2.0f * asin(x1) / PI
y = 2.0f * asin(y1) / PI
bfy1(0001000010) :
x = x1 + y1
y = x1 - y1
SaveRegisterD(2, vd_o, vd_x, vd_m, vd_c, x, y)
}
v<op1>.p2s(110100:op1:0:vs_o:vs_x:vs_m:vs_c:1:vd_r:vd_m:vd_c)
{
LoadRegisterS(2, vs_o, vs_x, vs_m, vs_c, x1, y1)
when <op1> is
fad(0001000110) :
x = x1 + y1
avg(0001000111) :
x = (x1 + y1) / 2.0f
SaveRegisterD(1, vd_r, vd_m, vd_c, x)
}
v<op1>.p2q(110100:op1:0:vs_o:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
LoadRegisterS(2, vs_o, vs_x, vs_m, vs_c, x1, y1)
when <op1> is
us2i(0000111010) :
x = x1[15.. 0] << 15
y = x1[31..16] << 15
z = y1[15.. 0] << 15
w = y1[31..16] << 15
s2i(0000111011) :
x = x1[15.. 0] << 16
y = x1[31..16] << 16
z = y1[15.. 0] << 16
w = y1[31..16] << 16
SaveRegisterD(4, vd_x, vd_m, vd_c, x, y, z, w)
}
v<op1>.t(110100:op1:0000000:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
LoadRegisterS(3, vs_o, vs_x, vs_m, vs_c, x1, y1, z1)
when <op1> is
abs(0000000001) :
x = (x1 < 0.0f) ? -x1 : x1
y = (y1 < 0.0f) ? -y1 : y1
z = (z1 < 0.0f) ? -z1 : z1
sin(0000010010) :
x = sin(PI * x1 / 2.0f)
y = sin(PI * y1 / 2.0f)
z = sin(PI * z1 / 2.0f)
cos(0000010011) :
x = cos(PI * x1 / 2.0f)
y = cos(PI * y1 / 2.0f)
z = cos(PI * z1 / 2.0f)
asin(0000010111) :
x = 2.0f * asin(x1) / PI
y = 2.0f * asin(y1) / PI
z = 2.0f * asin(z1) / PI
SaveRegisterD(3, vd_o, vd_x, vd_m, vd_c, x, y, z)
}
v<op1>.t2s(110100:op1:0000000:1:vs_o:vs_x:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
LoadRegisterS(3, vs_o, vs_x, vs_m, vs_c, x1, y1, z1)
when <op1> is
fad(0001000110) :
x = x1 + y1 + z1
avg(0001000111) :
x = (x1 + y1 + z1) / 3.0f
SaveRegisterD(1, vd_r, vd_m, vd_c, x)
}
v<op1>.q(110100:op1:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
LoadRegisterS(4, vs_x, vs_m, vs_c, x1, y1, z1, w1)
when <1, op1> is
abs(0000000001) :
x = (x1 < 0.0f) ? -x1 : x1
y = (y1 < 0.0f) ? -y1 : y1
z = (z1 < 0.0f) ? -z1 : z1
w = (w1 < 0.0f) ? -w1 : w1
sin(0000010010) :
x = sin(PI * x1 / 2.0f)
y = sin(PI * y1 / 2.0f)
z = sin(PI * z1 / 2.0f)
w = sin(PI * w1 / 2.0f)
cos(0000010011) :
x = cos(PI * x1 / 2.0f)
y = cos(PI * y1 / 2.0f)
z = cos(PI * z1 / 2.0f)
w = cos(PI * w1 / 2.0f)
bfy1(0001000010) :
x = x1 + y1
y = x1 - y1
z = z1 + w1
w = z1 - w1
bfy2(0001000011) :
x = x1 + z1
y = y1 + w1
z = x1 - z1
w = y1 - w1
SaveRegisterD(4, vd_x, vd_m, vd_c, x, y, z, w)
}
v<op1>.q2s(110100:op1:1:0:vs_x:vs_m:vs_c:1:vd_r:vd_m:vd_c)
{
LoadRegisterS(4, vs_x, vs_m, vs_c, x1, y1, z1, w1)
when <1, op1> is
fad(0001000110) :
x = x1 + y1 + z1 + w1
avg(0001000111) :
x = (x1 + y1 + z1 + w1) / 4.0f
SaveRegisterD(1, vd_r, vd_m, vd_c, x)
}
v<op2>.s(0110:op2:vt_r:vt_m:vt_c:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
LoadRegisterS(1, vs_r, vs_m, vs_c, x1)
LoadRegisterT(1, vt_r, vt_m, vt_c, x2)
when <op2> is
add(00000) :
x = x1 + x2
sub(00001) :
x = x1 - x2
div(00111):
x = x1 / x2
mul(01000):
x = x1 * x2
min(11010) :
x = (x1 < x2) ? x1 : x2
max(11011) :
x = (x1 > x2) ? x1 : x2
SaveRegisterD(1, vd_r, vd_m, vd_c, x)
}
v<op2>.p(0110:op2:vt_o:vt_x:vt_m:vt_c:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
LoadRegisterS(2, vs_o, vs_x, vs_m, vs_c, x1, y2)
LoadRegisterT(2, vt_o, vt_x, vt_m, vt_c, x2, y2)
when <op2> is
add(00000) :
x = x1 + x2
y = y1 + y2
sub(00001) :
x = x1 - x2
y = y1 - y2
div(00111):
x = x1 / x2
y = y1 / y2
mul(01000):
x = x1 * x2
y = y1 * y2
min(11010) :
x = (x1 < x2) ? x1 : x2
y = (y1 < y2) ? y1 : y2
max(11011) :
x = (x1 > x2) ? x1 : x2
y = (y1 > y2) ? y1 : y2
SaveRegisterD(2, vd_o, vd_x, vd_m, vd_c, x, y)
}
v<op2>.t(0110:op2:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
LoadRegisterS(3, vs_o, vs_x, vs_m, vs_c, x1, y2, z1)
LoadRegisterT(3, vt_o, vt_x, vt_m, vt_c, x2, y2, z2)
when <op2> is
add(00000) :
x = x1 + x2
y = y1 + y2
z = z1 + z2
sub(00001) :
x = x1 - x2
y = y1 - y2
z = z1 - z2
div(00111):
x = x1 / x2
y = y1 / y2
z = z1 / z2
mul(01000):
x = x1 * x2
y = y1 * y2
z = z1 * z2
min(11010) :
x = (x1 < x2) ? x1 : x2
y = (y1 < y2) ? y1 : y2
z = (z1 < z2) ? z1 : z2
max(11011) :
x = (x1 > x2) ? x1 : x2
y = (y1 < y2) ? y1 : y2
z = (z1 < z2) ? z1 : z2
SaveRegisterD(3, vd_o, vd_x, vd_m, vd_c, x, y, z)
}
v<op2>.q(0110:op2:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
LoadRegisterS(4, vs_x, vs_m, vs_c, x1, y2, z1, w1)
LoadRegisterT(4, vt_x, vt_m, vt_c, x2, y2, z2, w2)
when <op2> is
add(00000) :
x = x1 + x2
y = y1 + y2
z = z1 + z2
w = w1 + w2
sub(00001) :
x = x1 - x2
y = y1 - y2
z = z1 - z2
w = w1 - w2
div(00111):
x = x1 / x2
y = y1 / y2
z = z1 / z2
w = w1 / w2
mul(01000):
x = x1 * x2
y = y1 * y2
z = z1 * z2
w = w1 * w2
min(11010) :
x = (x1 < x2) ? x1 : x2
y = (y1 < y2) ? y1 : y2
z = (z1 < z2) ? z1 : z2
w = (w1 < w2) ? w1 : w2
max(11011) :
x = (x1 > x2) ? x1 : x2
y = (y1 < y2) ? y1 : y2
z = (z1 < z2) ? z1 : z2
w = (w1 < w2) ? w1 : w2
SaveRegisterD(4, vd_x, vd_m, vd_c, x, y, z, w)
}
}
|
Last edited by hlide on Mon Jun 16, 2008 12:57 am; edited 9 times in total |
|
| Back to top |
|
 |
hlide
Joined: 10 Sep 2006 Posts: 750
|
Posted: Sun Jun 15, 2008 2:27 am Post subject: |
|
|
VFPU instruction set
PART 2/2
status : WIP
| Code: |
group vfpu
{
mfv(01001000011:rt:000000000:vd_r:vd_m:vd_c)
{
cycles="6/0"
operation=
"
1: GPR[rt] = VFPR[vd_m][vd_c][vd_r]
"
}
mfvc(01001000011:rt:000000001:imm7)
{
cycles="6/0"
operation=
"
1: GPR[rt] = VFCR[imm7]
"
}
mtv(01001000111:rt:000000000:vd_r:vd_m:vd_c)
{
cycles="1/3"
operation=
"
1: VFPR[vd_m][vd_c][vd_r] = GPR[rt]
"
}
mtvc(01001000111:rt:000000001:imm7)
{
cycles="1/3"
operation=
"
1: VFCR[imm7] = GPR[rt]
"
}
bvf(01001001000:imm3:00:imm16)
{
cycles="?"
operation=
"
1: c = VFPU_CC[imm3] == 0
execute instruction at PC+4
2: if (c)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
bvfl(01001001000:imm3:10:imm16)
{
cycles="?"
operation=
"
1: c = VFPU_CC[imm3] == 0
if (c)
execute instruction at PC+4
2: if (c)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
bvt(01001001000:imm3:01:imm16)
{
cycles="?"
operation=
"
1: c = VFPU_CC[imm3] == 1
execute instruction at PC+4
2: if (c)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
bvtl(01001001000:imm3:11:imm16)
{
cycles="?"
operation=
"
1: c = VFPU_CC[imm3] == 1
if (c)
execute instruction at PC+4
2: if (c)
PC = PC + (s16(imm16) << 2)
"
delayslot="1"
}
vadd.s(011000000:vt_r:vt_m:vt_c:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/5"
prefixes="taken,taken,taken"
see v<op2>.s add
}
vadd.p(011000000:vt_o:vt_x:vt_m:vt_c:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"
prefixes="taken,taken,taken"
see v<op2>.p add
}
vadd.t(011000000:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"
prefixes="taken,taken,taken"
see v<op2>.t add
}
vadd.q(011000:000:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"
prefixes="taken,taken,taken"
see v<op2>.q add
}
vsub.s(011000001:vt_r:vt_m:vt_c:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/5"
prefixes="taken,taken,taken"
see v<op2>.s sub
}
vsub.p(011000001:vt_o:vt_x:vt_m:vt_c:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"
prefixes="taken,taken,taken"
see v<op2>.p sub
}
vsub.t(011000001:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"
prefixes="taken,taken,taken"
see v<op2>.t sub
}
vsub.q(011000001:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"
prefixes="taken,taken,taken"
see v<op2>.q sub
}
vdiv.s(011000111:vt_r:vt_m:vt_c:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="14/17"
prefixes="taken,taken,taken"
see v<op2>.s div
}
vdiv.p(011000111:vt_o:vt_x:vt_m:vt_c:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="28/31"
prefixes="prohibed,prohibed,prohibed"
see v<op2>.p div
}
vdiv.t(011000111:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="42/45"
prefixes="prohibed,prohibed,prohibed"
see v<op2>.t div
}
vdiv.q(011000001:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="56/59"
prefixes="prohibed,prohibed,prohibed"
see v<op2>.q div
}
vmul.s(011001000:vt_r:vt_m:vt_c:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/5"
prefixes="taken,taken,taken"
see v<op2>.s mul
}
vmul.p(011001000:vt_o:vt_x:vt_m:vt_c:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"
prefixes="taken,taken,taken"
see v<op2>.p mul
}
vmul.t(011001000:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"
prefixes="taken,taken,taken"
see v<op2>.t mul
}
vmul.q(011001000:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"
prefixes="taken,taken,taken"
see v<op2>.q mul
}
vmin.s(011011010:vt_r:vt_m:vt_c:1:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/3"
prefixes="taken,taken,taken"
see v<op2>.s min
}
vmin.p(011011010:vt_o:vt_x:vt_m:vt_c:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="taken,taken,taken"
see v<op2>.p min
}
vmin.t(011011010:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="taken,taken,taken"
see v<op2>.t min
}
vmin.q(011011010:0:vt_x:vt_m:vt_c:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="taken,taken,taken"
see v<op2>.q min
}
vmax.s(011011011:vt_r:vt_m:vt_c:1:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/3"
prefixes="taken,taken,taken"
see v<op2>.s max
}
vmax.p(011011010:vt_o:vt_x:vt_m:vt_c:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="taken,taken,taken"
see v<op2>.p max
}
vmax.t(011011010:vt_o:vt_x:vt_m:vt_c:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="taken,taken,taken"
see v<op2>.t max
}
vmax.q(011011000:0:vt_x:vt_m:vt_c:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="taken,taken,taken"
see v<op2>.q max
}
vabs.s(1101000000000001:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/3"
prefixes="taken,ignored,taken"
see v<op1>.s abs
}
vabs.p(1101000000000001:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="taken,ignored,taken"
see v<op1>.p abs
}
vabs.t(1101000000000001:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="taken,ignored,taken"
see v<op1>.t abs
}
vabs.q(1101000000000001:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="taken,ignored,taken"
see v<op1>.q abs
}
vsin.s(1101000000010010:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/7"
prefixes="taken,ignored,taken"
see v<op1>.s sin
}
vsin.p(1101000000010010:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="2/8"
prefixes="prohibed,ignored,prohibed"
see v<op1>.p sin
}
vsin.t(1101000000010010:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="3/9"
prefixes="prohibed,ignored,prohibed"
see v<op1>.t sin
}
vsin.q(1101000000010010:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="4/10"
prefixes="prohibed,ignored,prohibed"
see v<op1>.q sin
}
vcos.s(1101000000010011:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/7"
prefixes="taken,ignored,taken"
see v<op1>.s cos
}
vcos.p(1101000000010011:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="2/8"
prefixes="prohibed,ignored,prohibed"
see v<op1>.p cos
}
vcos.t(1101000000010011:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="3/9"
prefixes="prohibed,ignored,prohibed"
see v<op1>.t cos
}
vcos.q(1101000000010011:1:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="4/10"
prefixes="prohibed,ignored,prohibed"
see v<op1>.q cos
}
vidt.p(1101000000000011:0:0000000:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="ignored,ignored,taken"
see v<op0>.p idt
}
vidt.q(1101000000000011:1:0000000:1:0:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="ignored,ignored,taken"
see v<op0>.q idt
}
vzero.s(1101000000000110:0:0000000:0:vd_r:vd_m:vd_c)
{
cycles="1/3"
prefixes="ignored,ignored,taken"
see v<op0>.s zero
}
vzero.p(1101000000000110:0:0000000:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="ignored,ignored,taken"
see v<op0>.p zero
}
vzero.t(1101000000000111:1:0000000:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="ignored,ignored,taken"
see v<op0>.t zero
}
vzero.q(1101000000000110:1:0000000:1:0:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="ignored,ignored,taken"
see v<op0>.q zero
}
vone.s(1101000000000111:0:0000000:0:vd_r:vd_m:vd_c)
{
cycles="1/3"
prefixes="ignored,ignored,taken"
see v<op0>.s one
}
vone.p(1101000000000111:0:0000000:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="ignored,ignored,taken"
see v<op0>.p one
}
vone.t(1101000000000111:1:0000000:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="ignored,ignored,taken"
see v<op0>.t one
}
vone.q(1101000000000111:1:0000000:1:0:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="ignored,ignored,taken"
see v<op0>.q one
}
vasin.s(1101000000010111:0:vs_r:vs_m:vs_c:0:vd_r:vd_m:vd_c)
{
cycles="1/7"
prefixes="taken,ignored,taken"
see v<op1>.s asin
}
vasin.p(1101000000010111:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="2/8"
prefixes="prohibed,ignored,prohibed"
see v<op1>.p asin
}
vasin.t(1101000000010111:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="3/9"
prefixes="prohibed,ignored,prohibed"
see v<op1>.t asin
}
vasin.q(1101000000010111:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="4/9"
prefixes="prohibed,ignored,prohibed"
see v<op1>.q asin
}
vuc2i.s(1101000000111000:0:vs_r:vs_m:vs_c:0:0:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="prohibed,ignored,maskonly"
see v<op1>.s2q uc2i
}
vc2i.s(1101000000111001:0:vs_r:vs_m:vs_c:0:0:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="prohibed,ignored,maskonly"
see v<op1>.s2q c2i
}
vus2i.s(1101000000111010:0:vs_r:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="prohibed,ignored,maskonly"
see v<op1>.s2p us2i
}
vus2i.p(1101000000111010:0:vs_o:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="prohibed,ignored,maskonly"
see v<op1>.p2q us2i
}
vs2i.s(1101000000111011:0:vs_r:vs_m:vs_c:0:0:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="prohibed,ignored,maskonly"
see v<op1>.s2p s2i
}
vs2i.p(1101000000111011:0:vs_o:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/3"
prefixes="prohibed,ignored,maskonly"
see v<op1>.p2q s2i
}
vfad.p(1101000001000110:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/7"
prefixes="taken,ignored,taken"
see v<op1>.p2s fad
}
vfad.t(1101000001000110:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/7"
prefixes="taken,ignored,taken"
see v<op1>.t2s fad
}
vfad.q(1101000001000110:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/7"
prefixes="taken,ignored,taken"
see v<op1>.q2s fad
}
vavg.p(1101000001000111:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/7"
prefixes="taken,ignored,taken"
see v<op1>.p2s avg
}
vavg.t(1101000001000111:1:vs_o:vs_x:vs_m:vs_c:0:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/7"
prefixes="taken,ignored,taken"
see v<op1>.t2s avg
}
vavg.q(1101000001000111:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/7"
prefixes="taken,ignored,taken"
see v<op1>.q2s avg
}
vbfy1.p(1101000001000010:0:vs_o:vs_x:vs_m:vs_c:1:vd_o:vd_x:vd_m:vd_c)
{
cycles="1/5"
prefixes="prohibed,ignored,prohibed"
see v<op1>.p bfy1
}
vbfy1.q(1101000001000010:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/5"
prefixes="prohibed,ignored,prohibed"
see v<op1>.q bfy1
}
vbfy2.q(1101000001000011:1:0:vs_x:vs_m:vs_c:1:0:vd_x:vd_m:vd_c)
{
cycles="1/5"
prefixes="prohibed,ignored,prohibed"
see v<op1>.q bfy2
}
vpfxs(110111:00:negw:negz:negy:negx:cstw:cstz:csty:cstx:absw:absz:absy:absx:swzw:swzz:swzy:swzx)
{
cycles="1/0"
prefixes="overridden,ignored,ignored"
operation=
"
1: VFPU_PFXS[1..0] = swzx
VFPU_PFXS[3..2] = swzy
VFPU_PFXS[5..4] = swzz
VFPU_PFXS[7..6] = swzw
VFPU_PFXS[ 8] = absx
VFPU_PFXS[ 9] = absy
VFPU_PFXS[10] = absz
VFPU_PFXS[11] = absw
VFPU_PFXS[12] = cstx
VFPU_PFXS[13] = csty
VFPU_PFXS[14] = cstz
VFPU_PFXS[15] = cstw
VFPU_PFXS[16] = negx
VFPU_PFXS[17] = negy
VFPU_PFXS[18] = negz
VFPU_PFXS[19] = negw
set VPFXS as taken
"
}
vpfxt(110111:01:negw:negz:negy:negx:cstw:cstz:csty:cstx:absw:absz:absy:absx:swzw:swzz:swzy:swzx)
{
cycles="1/0"
prefixes="ignored,overridden,ignored"
operation=
"
1: VFPU_PFXT[1..0] = swzx
VFPU_PFXT[3..2] = swzy
VFPU_PFXT[5..4] = swzz
VFPU_PFXT[7..6] = swzw
VFPU_PFXT[ 8] = absx
VFPU_PFXT[ 9] = absy
VFPU_PFXT[10] = absz
VFPU_PFXT[11] = absw
VFPU_PFXT[12] = cstx
VFPU_PFXT[13] = csty
VFPU_PFXT[14] = cstz
VFPU_PFXT[15] = cstw
VFPU_PFXT[16] = negx
VFPU_PFXT[17] = negy
VFPU_PFXT[18] = negz
VFPU_PFXT[19] = negw
set VPFXT as taken
"
}
vpfxd(110111:10:00000000:mskw:mskz:msky:mskx:satw:satz:saty:satx)
{
cycles="1/0"
prefixes="ignored,ignored,overridden"
operation=
"
1: VFPU_PFXD[1..0] = satx
VFPU_PFXD[3..2] = saty
VFPU_PFXD[5..4] = satz
VFPU_PFXD[7..6] = satw
VFPU_PFXS[ 8] = mskx
VFPU_PFXS[ 9] = msky
VFPU_PFXS[10] = mskz
VFPU_PFXS[11] = mskw
set VPFXD as taken
"
}
lv.s(110010:rs:vt_m:vt_c:imm14:vt_r)
{
cycles="?"
operation=
"
1: address = GPR[rs] + (s32(imm14)<<2)
VFPR[vt_m][vt_c][vt_r] = MemoryRead32(address)
"
}
lv.q(110110:rs:vt_m:vt_c:imm14:0:vt_x)
{
cycles="?"
operation=
"
1: address = GPR[rs] + (s32(imm14)<<2)
if (address & 15)
raise address error exception
else if (rt_x)
qword = MemoryRead128(address)
if (rt_x)
VFPR[vt_m][0][vt_c] = qword[ 31.. 0]
VFPR[vt_m][1][vt_c] = qword[ 63.. 32]
VFPR[vt_m][2][vt_c] = qword[ 95.. 64]
VFPR[vt_m][3][vt_c] = qword[127.. 96]
else
VFPR[vt_m][vt_c][0] = qword[ 31.. 0]
VFPR[vt_m][vt_c][1] = qword[ 63.. 32]
VFPR[vt_m][vt_c][2] = qword[ 95.. 64]
VFPR[vt_m][vt_c][3] = qword[127.. 96]
"
}
sv.s(110010:rs:vt_m:vt_c:imm14:vt_r)
{
cycles="7/?"
operation=
"
1: address = GPR[rs] + (s32(imm14)<<2)
if (address & 3)
raise address error exception
else
MemoryWrite32(address, VFPR[vt_m][vt_c][vt_r])
"
}
svl.q(111101:rs:vt_m:vt_c:imm14:0:vt_x)
{
cycles="cached:7/?,uncached:10/?"
operation=
"
1: address = GPR[rs] + (s32(imm14)<<2)
if (address & 3)
raise address error exception
else if (vt_x)
data = MemoryRead128(address[31..4]<<4)
if (vt_x)
qword[ 31.. 0] = VFPR[vt_m][0][vt_c]
qword[ 63.. 32] = VFPR[vt_m][1][vt_c]
qword[ 95.. 64] = VFPR[vt_m][2][vt_c]
qword[127.. 96] = VFPR[vt_m][3][vt_c]
else
qword[ 31.. 0] = VFPR[vt_m][vt_c][0]
qword[ 63.. 32] = VFPR[vt_m][vt_c][1]
qword[ 95.. 64] = VFPR[vt_m][vt_c][2]
qword[127.. 96] = VFPR[vt_m][vt_c][3]
when (address[3..2]) is
0 :
data[ 31.. 0] = qword[127..96]
1 :
data[ 31.. 0] = qword[ 95..64]
data[ 63..32] = qword[127..96]
2 :
data[ 31.. 0] = qword[ 63..32]
data[ 63..32] = qword[ 95..64]
data[ 95..64] = qword[127..96]
3 :
data[ 31.. 0] = qword[ 31.. 0]
data[ 63..32] = qword[ 63..32]
data[ 95..64] = qword[ 95..64]
data[127..96] = qword[127..96]
MemoryWrite128(address, data, wb)
"
}
svr.q(111101:rs:vt_m:vt_c:imm14:1:vt_x)
{
cycles="cached:7/?,uncached:10/?"
operation=
"
1: address = GPR[rs] + (s32(imm14)<<2)
if (address & 3)
raise address error exception
else if (vt_x)
data = MemoryRead128(address[31..4]<<4)
if (vt_x)
qword[ 31.. 0] = VFPR[vt_m][0][vt_c]
qword[ 63.. 32] = VFPR[vt_m][1][vt_c]
qword[ 95.. 64] = VFPR[vt_m][2][vt_c]
qword[127.. 96] = VFPR[vt_m][3][vt_c]
else
qword[ 31.. 0] = VFPR[vt_m][vt_c][0]
qword[ 63.. 32] = VFPR[vt_m][vt_c][1]
qword[ 95.. 64] = VFPR[vt_m][vt_c][2]
qword[127.. 96] = VFPR[vt_m][vt_c][3]
when (address[3..2]) is
0 :
data[ 31.. 0] = qword[ 31.. 0]
data[ 63..32] = qword[ 63..32]
data[ 95..64] = qword[ 95..64]
data[127..96] = qword[127..96]
1 :
data[ 31.. 0] = qword[ 63..32]
data[ 63..32] = qword[ 95..64]
data[ 95..64] = qword[127..96]
2 :
data[ 31.. 0] = qword[ 95..64]
data[ 63..32] = qword[127..96]
3 :
data[ 31.. 0] = qword[127..96]
MemoryWrite128(address, data, wb)
"
}
sv.q(111110:rs:vt_m:vt_c:imm14:wb:vt_x)
{
cycles="cached:7/?,uncached:10/?,uncached-wb:1/0"
operation=
"
1: address = GPR[rs] + (s32(imm14)<<2)
if (address & 15)
raise address error exception
else if (vt_x)
if (vt_x)
qword[ 31.. 0] = VFPR[vt_m][0][vt_c]
qword[ 63.. 32] = VFPR[vt_m][1][vt_c]
qword[ 95.. 64] = VFPR[vt_m][2][vt_c]
qword[127.. 96] = VFPR[vt_m][3][vt_c]
else
qword[ 31.. 0] = VFPR[vt_m][vt_c][0]
qword[ 63.. 32] = VFPR[vt_m][vt_c][1]
qword[ 95.. 64] = VFPR[vt_m][vt_c][2]
qword[127.. 96] = VFPR[vt_m][vt_c][3]
MemoryWrite128(address, qword, wb)
"
}
vcmovf.s()
{
}
vcmovf.p()
{
}
vcmovf.t()
{
}
vcmovf.q()
{
}
vcmovf.s()
{
}
vcmovf.p()
{
}
vcmovt.t()
{
}
vcmovt.q()
{
}
vcmp.s()
{
}
vcmp.p()
{
}
vcmp.t()
{
}
vcmp.q()
{
}
vcrs.t()
{
}
vdet.p()
{
}
vdiv.s()
{
}
vdiv.p()
{
}
vdiv.t()
{
}
vdiv.q()
{
}
vdot.s()
{
}
vdot.p()
{
}
vdot.t()
{
}
vdot.q()
{
}
vexp2.s()
{
}
vexp2.p()
{
}
vexp2.t()
{
}
vexp2.q()
{
}
vf2h.p()
{
}
vf2h.q()
{
}
vf2id.s()
{
}
vf2id.p()
{
}
vf2id.t()
{
}
vf2id.q()
{
}
vf2in.s()
{
}
vf2in.p()
{
}
vf2in.t()
{
}
vf2in.q()
{
}
vf2id.s()
{
}
vf2id.p()
{
}
vf2id.t()
{
}
vf2id.q()
{
}
vf2iu.s()
{
}
vf2iu.p()
{
}
vf2iu.t()
{
}
vf2iu.q()
{
}
vf2iz.s()
{
}
vf2iz.p()
{
}
vf2iz.t()
{
}
vf2iz.q()
{
}
vfim.s()
{
}
vflush()
{
}
vh2f.s()
{
}
vh2f.p()
{
}
vhdp.p()
{
}
vhdp.t()
{
}
vhdp.q()
{
}
vhtfm2.p()
{
}
vhtfm3.t()
{
}
vhtfm4.q()
{
}
vi2c.q()
{
}
vi2f.s()
{
}
vi2f.p()
{
}
vi2f.t()
{
}
vi2f.q()
{
}
vi2s.p()
{
}
vi2s.q()
{
}
vi2uc.q()
{
}
vi2us.p()
{
}
vi2us.q()
{
}
viim.s()
{
}
vlgb.s()
{
}
vlog2.s()
{
}
vlog2.p()
{
}
vlog2.t()
{
}
vlog2.q()
{
}
vmfvc()
{
}
vmidt.p()
{
}
vmidt.t()
{
}
vmidt.q()
{
}
vmmov.p()
{
}
vmmov.t()
{
}
vmmov.q()
{
}
vmmul.p()
{
}
vmmul.t()
{
}
vmmul.q()
{
}
vmone.p()
{
}
vmone.t()
{
}
vmone.q()
{
}
vmov.s()
{
}
vmov.p()
{
}
vmov.t()
{
}
vmov.q()
{
}
vmscl.p()
{
}
vmscl.t()
{
}
vmscl.q()
{
}
vmtvc()
{
}
vmzero.p()
{
}
vmzero.t()
{
}
vmzero.q()
{
}
vneg.s()
{
}
vneg.p()
{
}
vneg.t()
{
}
vneg.q()
{
}
vnop()
{
}
vnrcp.s()
{
}
vnrcp.p()
{
}
vnrcp.t()
{
}
vnrcp.q()
{
}
vnsin.s()
{
}
vnsin.p()
{
}
vnsin.t()
{
}
vnsin.q()
{
}
vocp.s()
{
}
vocp.p()
{
}
vocp.t()
{
}
vocp.q()
{
}
vrcp.s()
{
}
vrcp.p()
{
}
vrcp.t()
{
}
vrcp.q()
{
}
vrexp2.s()
{
}
vrexp2.p()
{
}
vrexp2.t()
{
}
vrexp2.q()
{
}
vrndf1.s()
{
}
vrndf1.p()
{
}
vrndf1.t()
{
}
vrndf1.q()
{
}
vrndf2.s()
{
}
vrndf2.p()
{
}
vrndf2.t()
{
}
vrndf2.q()
{
}
vrndi.s()
{
}
vrndi.p()
{
}
vrndi.t()
{
}
vrndi.q()
{
}
vrnds.s()
{
}
vrot.p()
{
}
vrot.t()
{
}
vrot.q()
{
}
vrsq.s()
{
}
vrsq.p()
{
}
vrsq.t()
{
}
vrsq.q()
{
}
vsat0.s()
{
}
vsat0.p()
{
}
vsat0.t()
{
}
vsat0.q()
{
}
vsat1.s()
{
}
vsat1.p()
{
}
vsat1.t()
{
}
vsat1.q()
{
}
vsbn.s()
{
}
vsbz.s()
{
}
vscl.s()
{
}
vscl.p()
{
}
vscl.t()
{
}
vscl.q()
{
}
vscmp.s()
{
}
vscmp.p()
{
}
vscmp.t()
{
}
vscmp.q()
{
}
vsge.s()
{
}
vsge.p()
{
}
vsge.t()
{
}
vsge.q()
{
}
vsgn.s()
{
}
vsgn.p()
{
}
vsgn.t()
{
}
vsgn.q()
{
}
vslt.s()
{
}
vslt.p()
{
}
vslt.t()
{
}
vslt.q()
{
}
vsocp.s()
{
}
vsocp.p()
{
}
vsqrt.s()
{
}
vsqrt.p()
{
}
vsqrt.t()
{
}
vsqrt.q()
{
}
vsrt1.q()
{
}
vsrt2.q()
{
}
vsrt3.q()
{
}
vsrt4.q()
{
}
vsync()
{
}
vt4444.q()
{
}
vt5551.q()
{
}
vt5651.q()
{
}
vtfm2.p()
{
}
vtfm3.t()
{
}
vtfm4.q()
{
}
vwbn.s()
{
}
} |
Last edited by hlide on Mon Jun 16, 2008 1:08 am; edited 3 times in total |
|
| Back to top |
|
 |
crazyc
Joined: 17 Jun 2005 Posts: 410
|
Posted: Sun Jun 15, 2008 2:42 am Post subject: |
|
|
| hlide wrote: | for those who want to make a PSP emulator using either an interpreter or a dynarec and want to know how you feel when trying to implement VFPU, I have only one word to my mind : HELL !
| Well, it's no worse then x86 modrm. |
|
| Back to top |
|
 |
J.F.
Joined: 22 Feb 2004 Posts: 2906
|
Posted: Sun Jun 15, 2008 4:56 am Post subject: |
|
|
| This is a really awesome thread. I appreciate the effort you're putting into this, hlide. I'm kind of an assembly language nut, having done the majority of my early commercial work in 100% assembly. The more info there is like this, the better. :) |
|
| Back to top |
|
 |
hlide
Joined: 10 Sep 2006 Posts: 750
|
Posted: Sun Jun 15, 2008 7:23 am Post subject: |
|
|
@CrazyC : maybe, but making the same operation in a normal C code is absolutely crazy to code and very slow. I don't even dare to think how to exploit vectorial SSE instructions in X86 to emulate VFPU. I'm pretty sure PSP emulator's authors were panicking when considering vfpu emulation.
For a dynarec, the situation may be better to optimize but I don't think we could easily issue vectorial SSE instructions even this way.
NOTE:
I'm using a new approach for VFPU by factorizing operations as possible |
|
| Back to top |
|
 |
hlide
Joined: 10 Sep 2006 Posts: 750
|
|
| Back to top |
|
 |
J.F.
Joined: 22 Feb 2004 Posts: 2906
|
Posted: Mon Jul 28, 2008 8:53 am Post subject: |
|
|
Just thought I'd post a correction I just verified. For ins and ext, the constants are start_bit:count, not msb:lsb. So if you wanted to extract bits 16 through 23, you'd use "ext v0, a0, 16, 8".
Further info: a count of 0 is the same as 32, also, bits don't wrap around. For example, "ext v0, a0, 4, 0" extracts bits 4 through 35, where anything above 31 is just 0. "ext v0, a0, 0, 0" would theoretically just be the same as moving the long, but the assembler won't compile that. N:0 and 0:N are fine, it's just 0:0 that won't compile... but as I said, that's just a move, so it doesn't matter.
Last edited by J.F. on Mon Jul 28, 2008 9:16 am; edited 1 time in total |
|
| Back to top |
|
 |
hlide
Joined: 10 Sep 2006 Posts: 750
|
Posted: Mon Jul 28, 2008 9:09 am Post subject: |
|
|
| J.F. wrote: | | Just thought I'd post a correction I just verified. For ins and ext, the constants are start_bit:count, not msb:lsb. So if you wanted to extract bits 16 through 23, you'd use "ext v0, a0, 16, 8". |
lsb = least significant bit = start position bit
msb = most significant bit = last position bit = start position bit+count-1
| Code: |
ext(011111:rs:rt:(msb-lsb):lsb:000000) <=> ext(011111:rs:rt:(count-1):start_bit:000000)
ins(011111:rs:rt:msb:lsb:000100) <=> ins(011111:rs:rt:(start_bit+count-1):start_bit:000100)
|
So it should be okay. This is not because we write them in ASM :
INS Rt, Rs, Pos, Count
EXT Rt, Rs, Pos, Count
that Pos and Count would be encoded the same way. What you see is the encoding bitmap of INS/EXT instructions from bit 31 to bit 0.
And don't ask me why MIPS32R2 authors choose this way to encode, that's probably be something like that :
EXT Rt, Rs, Pos, Len <=> Rt = sext((Rs>>Pos)&(1<<(Len-1))) <=> sext((Rs>>lsb)&(1<<(msb-lsb)))
INS Rt, Rs, Pos, Len <=> Rt[Pos+Len-1:Pos] = Rs <=> Rt[msb:lsb] = Rs
Just some speculation, of course :) |
|
| Back to top |
|
 |
J.F.
Joined: 22 Feb 2004 Posts: 2906
|
Posted: Mon Jul 28, 2008 9:23 am Post subject: |
|
|
| hlide wrote: | | J.F. wrote: | | Just thought I'd post a correction I just verified. For ins and ext, the constants are start_bit:count, not msb:lsb. So if you wanted to extract bits 16 through 23, you'd use "ext v0, a0, 16, 8". |
lsb = least significant bit = start position bit
msb = most significant bit = last position bit = start position bit+count-1
| Code: |
ext(011111:rs:rt:(msb-lsb):lsb:000000) <=> ext(011111:rs:rt:(count-1):start_bit:000000)
ins(011111:rs:rt:msb:lsb:000100) <=> ins(011111:rs:rt:(start_bit+count-1):start_bit:000100)
|
So it should be okay. This is not because we write them in ASM :
INS Rt, Rs, Pos, Count
EXT Rt, Rs, Pos, Count
that Pos and Count would be encoded the same way. What you see is the encoding bitmap of INS/EXT instructions from bit 31 to bit 0. |
Okay, so it's the assembler syntax versus the encoded value. |
|
| Back to top |
|
 |
shepherd
Joined: 02 Sep 2008 Posts: 2
|
Posted: Fri Feb 06, 2009 1:11 pm Post subject: |
|
|
| Good job, TKS! |
|
| Back to top |
|
 |
hlide
Joined: 10 Sep 2006 Posts: 750
|
Posted: Sat Feb 07, 2009 5:25 am Post subject: |
|
|
| what does TKS mean ? |
|
| Back to top |
|
 |
Wally

Joined: 26 Sep 2005 Posts: 672
|
Posted: Sat Feb 07, 2009 8:00 am Post subject: |
|
|
| hlide wrote: | | what does TKS mean ? |
Thanks |
|
| Back to top |
|
 |
amorphophallus
Joined: 29 May 2009 Posts: 2
|
Posted: Fri May 29, 2009 1:43 am Post subject: VFPU spec |
|
|
Just noob's question.
Ive read document The Naked PSP paragraph of Vector Processor, sentence "128 32bits registers".
Dose this mean 128 pieces of 32bits registers? _________________ ---
amorph |
|
| Back to top |
|
 |
hlide
Joined: 10 Sep 2006 Posts: 750
|
Posted: Fri May 29, 2009 4:27 am Post subject: Re: VFPU spec |
|
|
| amorphophallus wrote: | Just noob's question.
Ive read document The Naked PSP paragraph of Vector Processor, sentence "128 32bits registers".
Dose this mean 128 pieces of 32bits registers? |
VFPU has 8 banks of 16 registers ==> 128 registers. It does mean you can use up to 128 float scalar registers |
|
| Back to top |
|
 |
amorphophallus
Joined: 29 May 2009 Posts: 2
|
Posted: Fri May 29, 2009 9:35 pm Post subject: |
|
|
| thx :) |
|
| Back to top |
|
 |
enemykila
Joined: 17 Mar 2010 Posts: 1
|
Posted: Wed Mar 24, 2010 9:34 pm Post subject: |
|
|
Hi, does anybody know where to get the "Allegrex" manual??
It would be very useful for a homework from the University...
Thanks |
|
| Back to top |
|
 |
adrahil
Joined: 16 Mar 2006 Posts: 277
|
Posted: Thu Mar 25, 2010 8:30 am Post subject: |
|
|
Get a standard MIPS manual, add in some stuff from this forum...
What kind of homework would require you to have the documentation to a proprietary processor? |
|
| Back to top |
|
 |
SilverSpring
Joined: 27 Feb 2007 Posts: 115
|
|
| Back to top |
|
 |
|
|
You cannot post new topics in this forum You cannot reply to topics in this forum You cannot edit your posts in this forum You cannot delete your posts in this forum You cannot vote in polls in this forum
|
Powered by phpBB © 2001, 2005 phpBB Group
|