a6d7f78cef61f8cd5e039d60060d3b50017fc106
[libreriscv.git] / Comparative_analysis_Harmonised_RVP_vs_Andes_Packed_SIMD_ISA_proposal.mdwn
1 # Comparative analysis with Andes Packed ISA proposal
2
3 ## Register file
4
5 | Register | Andes ISA | Harmonised RVP ISA |
6 | ------------------ | ------------------------- | ------------------- |
7 | v0 | Hardwired zero | Hardwired zero |
8 | v1 | 32bit GPR or Vector[4xB|2xH] | Predicate masks |
9 | v2 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[4xSB] |
10 | v3 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[4xSB] |
11 | v4 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[4xSB] |
12 | v5 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[4xSB] |
13 | v6 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[4xSB] |
14 | v7 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[4xSB] |
15 | v8 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[4xUB] |
16 | v9 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[4xUB] |
17 | v10 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[4xUB] |
18 | v11 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[4xUB] |
19 | v12 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[4xUB] |
20 | v13 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[4xUB] |
21 | v14 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[4xUB] |
22 | v15 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[4xUB] |
23 | v16 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[2xSH] |
24 | v17 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[2xSH] |
25 | v18 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[2xSH] |
26 | v19 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[2xSH] |
27 | v20 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[2xSH] |
28 | v21 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[2xSH] |
29 | v22 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[2xSH] |
30 | v23 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[2xSH] |
31 | v24 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[2xUH] |
32 | v25 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[2xUH] |
33 | v26 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[2xUH] |
34 | v27 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[2xUH] |
35 | v28 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[2xUH] |
36 | v29 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[2xUH] |
37 | v30 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[1xUW] |
38 | v31 | 32bit GPR or Vector[4xB|2xH] | 32bit GPR or Vector[1xUW] |
39
40
41
42 | RADD16 rt, ra, rb | Signed Halving add | RADD (r16 <= rt,ra,rb <= r23), mm=00|
43 | URADD16 rt, ra, rb | Unsigned Halving add | RADD (r24 <= rt,ra,rb <= r29), mm=00|
44 | KADD16 rt, ra, rb | Signed Saturating add | VADD (r16 <= rt,ra,rb <= r23), mm=01|
45 | UKADD16 rt, ra, rb | Unsigned Saturating add | VADD (r24 <= rt,ra,rb <= r29), mm=01|
46 | SUB16 rt, ra, rb | Subtract | VSUB (r16 <= rt,ra,rb <= r29), mm=00|
47 | RSUB16 rt, ra, rb | Signed Halving sub | RSUB (r16 <= rt,ra,rb <= r23), mm=00|
48
49 ## 16-bit Arithmetic
50
51 | Andes Mnemonic | 16-bit Instruction | Harmonised RVP Equivalent |
52 | ------------------ | ------------------------- | ------------------- |
53 | ADD16 rt, ra, rb | Add | VADD (r16 <= rt,ra,rb <= r29), mm=00|
54 | RADD16 rt, ra, rb | Signed Halving add | RADD (r16 <= rt,ra,rb <= r23), mm=00|
55 | URADD16 rt, ra, rb | Unsigned Halving add | RADD (r24 <= rt,ra,rb <= r29), mm=00|
56 | KADD16 rt, ra, rb | Signed Saturating add | VADD (r16 <= rt,ra,rb <= r23), mm=01|
57 | UKADD16 rt, ra, rb | Unsigned Saturating add | VADD (r24 <= rt,ra,rb <= r29), mm=01|
58 | SUB16 rt, ra, rb | Subtract | VSUB (r16 <= rt,ra,rb <= r29), mm=00|
59 | RSUB16 rt, ra, rb | Signed Halving sub | RSUB (r16 <= rt,ra,rb <= r23), mm=00|
60 | URSUB16 rt, ra, rb | Unsigned Halving sub | RSUB (r24 <= rt,ra,rb <= r29), mm=00|
61 | KSUB16 rt, ra, rb | Signed Saturating sub | VSUB (r16 <= rt,ra,rb <= r23), mm=01|
62 | UKSUB16 rt, ra, rb | Unsigned Saturating sub | VSUB (r24 <= rt,ra,rb <= r29), mm=01|
63 | CRAS16 rt, ra, rb | Cross Add & Sub | |
64 | RCRAS16 rt, ra, rb | Signed Halving Cross Add & Sub | |
65 | URCRAS16 rt, ra, rb| Unsigned Halving Cross Add & Sub | |
66 | KCRAS16 rt, ra, rb | Signed Saturating Cross Add & Sub | |
67 | UKCRAS16 rt, ra, rb| Unsigned Saturating Cross Add & Sub | |
68 | CRSA16 rt, ra, rb | Cross Sub & Add | |
69 | RCRSA16 rt, ra, rb | Signed Halving Cross Sub & Add | |
70 | URCRSA16 rt, ra, rb| Unsigned Halving Cross Sub & Add | |
71 | KCRSA16 rt, ra, rb | Signed Saturating Cross Sub & Add | |
72 | UKCRSA16 rt, ra, rb| Unsigned Saturating Cross Sub & Add | |
73
74 ## 8-bit Arithmetic
75
76 | Andes Mnemonic | 8-bit Instruction | Harmonised RVP Equivalent |
77 | ------------------ | ------------------------- | ------------------- |
78 | ADD8 rt, ra, rb | Add | VADD (r2 <= rt,ra,rb <= r15), mm=00 |
79 | RADD8 rt, ra, rb | Signed Halving add | RADD (r2 <= rt,ra,rb <= r7), mm=00 |
80 | URADD8 rt, ra, rb | Unsigned Halving add | RADD (r8 <= rt,ra,rb <= r15), mm=00 |
81 | KADD8 rt, ra, rb | Signed Saturating add | VADD (r2 <= rt,ra,rb <= r7), mm=01 |
82 | UKADD8 rt, ra, rb | Unsigned Saturating add | VADD (r8 <= rt,ra,rb <= r15), mm=01 |
83 | SUB8 rt, ra, rb | Subtract | VSUB (r2 <= rt,ra,rb <= r15), mm=00 |
84 | RSUB8 rt, ra, rb | Signed Halving sub | RSUB (r2 <= rt,ra,rb <= r7), mm=00 |
85 | URSUB8 rt, ra, rb | Unsigned Halving sub | RSUB (r8 <= rt,ra,rb <= r15), mm=00 |
86 | KSUB8 rt, ra, rb | Signed Saturating sub | VSUB (r2 <= rt,ra,rb <= r7), mm=01 |
87 | UKSUB8 rt, ra, rb | Unsigned Saturating sub | VSUB (r8 <= rt,ra,rb <= r15), mm=01 |
88
89 ## 16-bit Shifts
90
91 SRA[I]16/SRL[I]16/SLL[I]16 to be mapped to VOP shift instructions in same manner as ADD16/SUB16
92
93 The “K” (Saturation) and “u” (Rounding) variants could be encoded using VOP’s mm field (mm=01 is saturated or rounded shift, mm=00 is standard VOP shift)
94
95 | Andes Mnemonic | 16-bit Instruction | Harmonised RVP Equivalent |
96 | ------------------ | ------------------------- | ------------------- |
97 | SRA16 rt, ra, rb | Shift right arithmetic | VSRA (r16 <= rt,ra,rb <= r29), mm=00|
98 | SRAI16 rt, ra, im | Shift right arithmetic imm | VSRAI (r16 <= rt,ra <= r29), mm=00|
99 | SRA16.u rt, ra, rb | Rounding Shift right arithmetic | VSRA (r16 <= rt,ra,rb <= r29), mm=01|
100 | SRAI16.u rt, ra, im | Rounding Shift right arithmetic imm | VSRAI (r16 <= rt,ra <= r29), mm=01|
101 | SRL16 rt, ra, rb | Shift right logical | VSRL (r16 <= rt,ra,rb <= r29), mm=00|
102 | SRLI16 rt, ra, im | Shift right logical imm | VSRLI (r16 <= rt,ra <= r29), mm=00|
103 | SRL16.u rt, ra, rb | Rounding Shift right logical | VSRL (r16 <= rt,ra,rb <= r29), mm=01|
104 | SRLI16.u rt, ra, im | Rounding Shift right logical imm | VSLRI (r16 <= rt,ra <= r29), mm=01|
105 | SLL16 rt, ra, rb | Shift left logical | VSLL (r16 <= rt,ra,rb <= r29), mm=00|
106 | SLLI16 rt, ra, im | Shift left logical imm | VSLLI (r16 <= rt,ra <= r29), mm=00|
107 | KSLL16 rt, ra, rb | Saturating Shift left logical | VSLL (r16 <= rt,ra,rb <= r29), mm=01|
108 | KSLLI16 rt, ra, im | Saturating Shift left logical imm | VSLLI (r16 <= rt,ra <= r29), mm=01|
109 | KSLRA16 rt, ra, rb | Saturating Shift left logical or Shift right arithmetic ||
110 | KSLRA16.u rt, ra, rb | Saturating Shift left logical or Rounding Shift right arithmetic ||
111
112
113 ## 8-bit Shifts
114
115 Andes SIMD Packed ISA omits 8 bit shifts, but these can be encoded in Harmonised RVP as follows:
116
117 | Andes Mnemonic | 8-bit Instruction | Harmonised RVP Equivalent |
118 | ------------------ | ------------------------- | ------------------- |
119 | n/a | Shift right arithmetic | VSRA (r2 <= rt,ra,rb <= r15), mm=00|
120 | n/a | Shift right arithmetic imm | VSRAI (r2 <= rt,ra <= r15), mm=00|
121 | n/a | Rounding Shift right arithmetic | VSRA (r2 <= rt,ra,rb <= r15), mm=01|
122 | n/a | Rounding Shift right arithmetic imm | VSRAI (r2 <= rt,ra <= r15), mm=01|
123 | n/a | Shift right logical | VSRL (r2 <= rt,ra,rb <= r15), mm=00|
124 | n/a | Shift right logical imm | VSRLI (r2 <= rt,ra <= r15), mm=00|
125 | n/a | Rounding Shift right logical | VSRL (r2 <= rt,ra,rb <= r15), mm=01|
126 | n/a | Rounding Shift right logical imm | VSLRI (r2 <= rt,ra <= r15), mm=01|
127 | n/a | Shift left logical | VSLL (r2 <= rt,ra,rb <= r15), mm=00|
128 | n/a | Shift left logical imm | VSLLI (r2 <= rt,ra <= r15), mm=00|
129 | n/a | Saturating Shift left logical | VSLL (r2 <= rt,ra,rb <= r15), mm=01|
130 | n/a | Saturating Shift left logical imm | VSLLI (r2 <= rt,ra <= r15), mm=01|
131
132 ## 16-bit Comparison instructions
133
134 | Andes Mnemonic | 16-bit Instruction | Harmonised RVP Equivalent |
135 | ------------------ | ------------------------- | ------------------- |
136 | CMPEQ16 rt, ra, rb | Compare equal | VSEQ (r16 <= rt,ra,rb <= r29), mm=00|
137 | SCMPLT16 rt, ra, rb | Signed Compare less than | !VSGT (r16 <= rt,ra,rb <= r23), mm=00|
138 | SCMPLE16 rt, ra, rb | Signed Compare less or equal | VSLE (r16 <= rt,ra,rb <= r23), mm=00|
139 | UCMPLT16 rt, ra, rb | Unsigned Compare less than | !VSGT (r24 <= rt,ra,rb <= r29), mm=00|
140 | UCMPLE16 rt, ra, rb | Unsigned Compare less or equal | VSLE (r24 <= rt,ra,rb <= r29), mm=00|
141
142 ## 8-bit Comparison instructions
143
144 | Andes Mnemonic | 8-bit Instruction | Harmonised RVP Equivalent |
145 | ------------------ | ------------------------- | ------------------- |
146 | CMPEQ8 rt, ra, rb | Compare equal | VSEQ (r2 <= rt,ra,rb <= r7), mm=00|
147 | SCMPLT8 rt, ra, rb | Signed Compare less than | !VSGT (r2 <= rt,ra,rb <= r7), mm=00|
148 | SCMPLE8 rt, ra, rb | Signed Compare less or equal | VSLE (r2 <= rt,ra,rb <= r7), mm=00|
149 | UCMPLT8 rt, ra, rb | Unsigned Compare less than | !VSGT (r8 <= rt,ra,rb <= r15), mm=00|
150 | UCMPLE8 rt, ra, rb | Unsigned Compare less or equal | VSLE (r8 <= rt,ra,rb <= r15), mm=00|
151
152 ## 16-bit Miscellaneous instructions
153
154 | Andes Mnemonic | 16-bit Instruction | Harmonised RVP Equivalent |
155 | ------------------ | ------------------------ | ------------------- |
156 | SMIN16 rt, ra, rb | Signed minimum | VMIN (r16 <= rt,ra,rb <= r23), mm=00|
157 | UMIN16 rt, ra, rb | Unsigned minimum | VMIN (r24 <= rt,ra,rb <= r29), mm=00|
158 | SMAX16 rt, ra, rb | Signed maximum | VMAX (r16 <= rt,ra,rb <= r23), mm=00|
159 | UMAX16 rt, ra, rb | Unsigned maximum | VMAX (r24 <= rt,ra,rb <= r29), mm=00|
160 | SCLIP16 rt, ra, rb | Signed clip | ?VCLIP (r16 <= rt,ra,rb <= r23), mm=01|
161 | UCLIP16 rt, ra, rb | Unsigned clip | ?VCLIP (r24 <= rt,ra,rb <= r29), mm=01|
162 | KMUL16 rt, ra, rb | Signed multiply 16x16->16 | VMUL (r16 <= rt,ra,rb <= r23), mm=01|
163 | KMULX16 rt, ra, rb | Signed crossed multiply 16x16->16 | |
164 | SMUL16 rt, ra, rb | Signed multiply 16x16->32 | VMUL (30 <= rt <= 31, r16 <= ra,rb <= r23), mm=00|
165 | SMULX16 rt, ra, rb | Signed crossed multiply 16x16->32 | |
166 | UMUL16 rt, ra, rb | Signed multiply 16x16->32 | VMUL (30 <= rt <= 31, r24 <= ra,rb <= r31), mm=00|
167 | UMULX16 rt, ra, rb | Signed crossed multiply 16x16->32 | |
168 | KABS16 rt, ra, rb | Saturated absolute value | VSGNX (r16 <= rt <= r29, r16 <= ra,rb <= r23, mm=01) |
169
170 ## 8-bit Miscellaneous instructions
171
172 | Andes Mnemonic | 8-bit Instruction | Harmonised RVP Equivalent |
173 | ------------------ | ------------------------- | ------------------- |
174 | SMIN8 rt, ra, rb | Signed minimum | VMIN (r2 <= rt,ra,rb <= r7), mm=00|
175 | UMIN8 rt, ra, rb | Unsigned minimum | VMIN (r8 <= rt,ra,rb <= r15), mm=00|
176 | SMAX8 rt, ra, rb | Signed maximum | VMAX (r2 <= rt,ra,rb <= r7), mm=00|
177 | UMAX8 rt, ra, rb | Unsigned maximum | VMAX (r8 <= rt,ra,rb <= r15), mm=00|
178 | KABS8 rt, ra, rb | Saturated absolute value | VSGNX (r2 <= rt <= r15, r2 <= ra,rb <= r8, mm=01) |