dac39dca48e81ad4d4530db95645dd784eda5281
[libreriscv.git] / isa_conflict_resolution / ioctl.mdwn
1 # pluggable extensions
2
3 ==RB===
4
5 This proposal adds a standardised extension instructions to the RV
6 instruction set by introducing a fixed small number N (e.g. N = 8) of
7 R-type opcodes xcmd0, .. xcmd7. Each takes in rs1 a 12bit "logical unit" (lun)
8 identifying a (on or off chip) (sub)device with at most N commands that can execute the command, together with xlen - 12 bits of additional data. Based on the logical unit bits in rs1 the CPU routes each of the 8 commands to a specific interface on a specific (sub)device on the CPU. Effectively, the xcmd0, ... xcmd7 instructions become "virtual method" opcodes, that can be overloaded for different extension (sub)devices
9
10 The specific value of the lun is supposed to be convenient for the cpu to route the xcmd to the proper device and thus unstandardised. To portably construct the lun a further R-type instruction we define xext. It takes a 20 bit universally unique identifier identifying an interface with upto 8 almost (but not quite) standard R-type instructions, but implemented by the extension (sub)device. The restriction on 8 command commands is not problematic because a hardware implementation can (indeed is expected) to implement several interfaces as a subdevice. An optional sequence number identifies a specific enumerated device on the cpu that implements the interface as a subdevice. For convenience, xext also or's bits rs2[0..XLEN-12]. If the UUID is not recognised 0 is returned.
11
12 Remark: xext is a purely stateless translation (and packing) operaton (unlike previous proposals).
13
14 The proposal allows people to define an extension interface of 8 (slightly crippled) R-type instructions implemented by an extension device, (e.g. an IP tile) configured at manufacturing or even startup time of the CPU. A sequence like
15
16 //fake UUID
17 lui rd 0xEDCBA
18 xext rd rd rs1
19 xcmd0 rd rd rs2
20
21 then acts like a single instruction EDCBA_cmd0 rd rs1 rs2 with the caveats that, annoyingly, rs1 can only use bits 0..XLEN-12 (the sequence is not indivisible but the crucial semantics that you might want indivisible is in xcmd0). This can be used almost exactly like an R-type instruction rock rd, rs1, rs2 (in fact can be opcode fused if so wished).
22
23 Programatically the instructions in the interface are just a set of glorified assembler macros
24
25 org.tinker.tinker:RocknRoll{
26 uuid : 0xABCDE
27 rock rd rs1 rs2 : xcmd0 rd rs1 rs2
28 roll rd rs1 rs2 : xcmd1 rd rs1 rs2
29 }
30
31 (or perhaps just non glorified standard assembler macros or defines with long names e.g.)
32
33 #define org_tinker_tinker__RocknRoll__interface_uuid 0xABCDE
34 #define org_tinker_tinker__RocknRoll__rock(rd, rs1, rs2) xcmd0 rd, rs1, rs2
35 #define org_tinker_tinker__RocknRoll__roll(rd, rs1, rs2) xcmd1 rd, rs1, rs2
36
37 so the same sequence is more readable as
38
39 lui rd org.tinker.tinker__RocknRoll__interface_uuid
40 xext rd rs1
41 org_tinker_tinker__RocknRoll__rock(rd, rd, rs2)
42
43
44 If several instructions of the same interface are used one can also have code like
45 lui t1 org_tinker_tinker__RocknRoll__interface_uuid
46
47 xext t1 zero
48 xcmd0 a5, t1, a0 // org_tinker_tinker__RocknRoll__rock(a5, t1, a0)
49 xcmd1 t2, t1, a1 // org_tinker_tinker__RocknRoll__roll(t2, t1, a5)
50 xcmd0 a0, t1, t2 // org_tinker_tinker__RocknRoll__rock(a0, t1, t2)
51
52 This amortises the cost of the xext instruction.
53
54 ==Implications for the RiscV ecosystem ==
55
56 Having a standardised overloadable interface simply avoids much of the
57 need for isa extensions for hardware with non standard interfaces and
58 semantics. This is analogous to the way that the standardised overloadable
59 ioctl interface of the kernel almost completely avoids the need for
60 extending the kernel with syscalls for the myriad of hardware devices
61 with their specific interfaces and semantics.
62
63 Since the rs1 input of the overloaded ext_ctl instruction's are taken
64 by the interface cookie, they are restricted in use compared to a normal
65 R-type instruction (it is possible to pass 12 bits of additional info by
66 or ing it with the cookie). Delegation is also expected to come at a small
67 additional performance price compared to a "native" instruction. This
68 should be an acceptable tradeoff in most cases.
69
70 The expanded flexibility comes at the cost: the standard can specify the
71 semantics of the delegation mechanism and the interfacing with the rest
72 of the cpu, but the actual semantics of the overloaded instructions can
73 only be defined by the designer of the interface. Likewise, a device
74 can be conforming as far as delegation and interaction with the CPU
75 is concerned, but whether the hardware is conforming to the semantics
76 of the interface is outside the scope of spec. Being able to specify
77 that semantics using the methods used for RV itself is clearly very
78 valuable. One impetus for doing that is using it for purposes of its own,
79 effectively freeing opcode space for other purposes. Also, some interfaces
80 may become de facto or de jure standards themselves, necessitating
81 hardware to implement competing interfaces. I.e., facilitating a free
82 for all, may lead to standards proliferation. C'est la vie.
83
84 The only "ISA-collisions" that can still occur are in the 20 bit (~10^6)
85 interface identifier space, with 12 more bits to identify a device on
86 a hart that implements the interface. One suggestion is setting aside
87 2^19 id's that are handed out for a small fee by a central (automated)
88 registration (making sure the space is not just claimed), while the
89 remaining 2^19 are used as a good hash on a long, plausibly globally
90 unique human readable interface name. This gives implementors the choice
91 between a guaranteed private identifier paying a fee, or relying on low
92 probabilities. On RV64 the UUID can also be extended to 52 bits (> 10^15).
93
94
95 ==== Description of the extension as C functions.==
96
97 /* register format of rs1 for xext instructions */
98 typedef struct uuid_device{
99 long dev:12;
100 long uuid: 8*sizeof(long) - 12;
101 } uuid_device_t
102
103 /* register format for rd of xext and rs1 for xcmd instructions, packs lun and data */
104 typedef struct lun_data{
105 long lun:12;
106 long data: 8*sizeof(long) - 12;
107 } lun_data_t
108
109 /* proposed R-type instructions
110 xext rd rs1 rs2
111 xcmd0 rd rs1 rs2
112 xcmd1 rd rs1 rs2
113 ...
114 xcmd7 rd rs1 rs2
115 */
116
117 lun_data_t xext(uuid_dev_t rs1, long rs2);
118 long xcmd0(lun_data_t rs1, long rs2);
119 long xcmd1(lun_data_t rs1, long rs2);
120 ...
121 long xcmd<N>(lun_data_t rs1, long rs2);
122
123 /* hardware interface presented by an implementing device. */
124 typedef
125 long device_fn(unsigned short subdevice_xcmd, lun_data_t rs1, long rs2);
126
127 /* cpu internal datatypes */
128
129 typedef
130 struct lun{
131 unsigned short id:12
132 } lun_t;
133
134 struct uuid_device2lun{
135 uuid_dev_t uuid_dev;
136 lun_t lun;
137 };
138
139 struct device_subdevice{
140 device_fn* device_addr;
141 unsigned short subdeviceId:12;
142 };
143
144 struct lun2device_subdevice{
145 lun_t lun;
146 struct device_subdevice devAddr_subdevId;
147 }
148
149 struct uuid_dev2lun lun_map[];
150
151 /* associative memory magic to map UUID + device to a convenient 12 bit lun, returns (lun_t){0} on failure */
152
153 lun_t cpu_lookup_lun(const struct uuid_dev2device_subdevice* lun_map, uuid_dev_t uuid_dev);
154
155 lun_data_t xext(uuid_dev_t rs1, long rs2)
156 {
157 lun_t lun = cpu_lookup_lun(lun_map, rs1);
158
159 return (lun_data_t){.lun = lun.id, .data = rs2 % (1<< (8*sizeof(long) - 12))}
160 }
161
162 struct lun2device_subdevice device_subdevice_map[];
163
164 /* maps lun to struct device_subdevice pair. In particular for lun = 0, returns (struct device_subdevice){NULL,0} on failure, */
165 device_subdevice_t cpu_lookup_device_subdevice(const struct lun2device_subdevice_map* dev_subdevice_map, short lun);
166
167 /* functional description of the delegating xcmd0 .. xcmd7 instructions */
168 template<k = 0..N-1> //pretend this is C
169 long xcmd<k>(lun_data_t rs1, long rs2)
170 {
171 struct device_subdevice dev_subdev = cpu_lookup_device_subdevice(device_subdevice_map, rs1.lun);
172 if(dev_subdev.devAddr == NULL)
173 trap(“Illegal instruction”);
174
175 return dev_subdev.devAddr(dev_subdev.subdevId | k << 12, rs1, rs2);
176 }
177
178
179
180 Example:
181
182 #define COM_BIGBUCKS__FROBATE__INTERFACE_UUID 0xABCDE
183 #define ORG_TINKER_TINKER__ROCKNROLL_INTERFACE_UUID 0x12345
184 #define ORG_TINKER_TINKER__JAZZ_INTERFACE_UUID 0xD0B0D
185
186 com.bigbucks:Frobate{
187 uuid: COM_BIGBUCKS__FROBATE__INTERFACE_UUID
188 frobate rd rs1 rs2 : cmd0 rd rs1 rs2
189 foo rd rs1 rs2 : cmd1 rd rs1 rs2
190 bar rd rs1 rs2 : cmd1 rd rs1 rs2
191 }
192
193 org.tinker.tinker:RocknRoll{
194 uuid: ORG_TINKER_TINKER__ROCKNROLL_INTERFACE_UUID
195 rock rd rs1 rs2: cmd0 rd rs1 rs2
196 roll rd rs1 rs2: cmd1 rd rs1 rs2
197 }
198
199 long com_bigbucks__device1(short subdevice_xcmd, lun_data_t rs1, long rs2)
200 {
201 switch(subdevice_xcmd) {
202 case 0 | 0 << 12 /* com.bigbucks:Frobate:frobate */ : return device1_frobate(rs1, rs2);
203 case 0 | 1 << 12 /* com.bigbucks:Frobate:foo */ : return device1_foo(rs1, rs2);
204 case 0 | 2 << 12 /* com.bigbucks:Frobate:bar */ : return device1_bar(rs1, rs2);
205 case 1 | 0 << 12 /* org.tinker.tinker:RocknRoll:rock */ : return device1_rock(rs1, rs2);
206 case 1 | 1 << 12 /* org.tinker.tinker:RocknRoll:roll */ : return device1_roll(rs1, rs2);
207 default: trap(“hardware configuration error”);
208 }
209 }
210
211 org.tinker.tinker:Jazz{
212 uuid: ORG_TINKER_TINKER__JAZZ_INTERFACE_UUID
213 boogy rd rs1 rs2: cmd0 rd rs1 rs2
214 }
215
216 long org_tinker_tinker__device2(short subdevice_xcmd, lun_data_t rs1, long rs2)
217 {
218 switch(dev_cmd.interfId){
219 case 0 | 0 << 12 /* com.bigbucks:Frobate:frobate */: return device2_frobate(rs1, rs2);
220 case 0 | 1 << 12 /* com.bigbucks:Frobate:foo */ : return device2_foo(rs1, rs2);
221 case 0 | 2 << 12 /* com.bigbucks:Frobate:bar */ : return device2_foo(rs1, rs2);
222 case 1 | 0 << 12 /* org_tinker_tinker:Jazz:boogy */: return device2_boogy(rs1, rs2);
223 default: trap(“hardware configuration error”);
224 }
225 }
226
227 /* struct lun2dev_subdevice_map[] */
228 dev_subdevice_map = {
229 // .lun = 0, error and falls back to trapping xcmd
230 {.lun = 1, .devAddr_interfId = {fallback, 0 /* ReturnZero */}},
231 {.lun = 2, .devAddr_interfId = {fallback, 1 /* ReturnMinusOne*/}},
232 // .lun = 3 .. 7 reserved for other fallback RV interfaces
233 // .lun = 8 .. 30 reserved as error numbers, c.li t1 31; bltu rd t1 L_fail tests errors
234 // .lun = 31 reserved out of caution
235 {.lun = 32, .devAddr_interfId = {device1, 0 /* Frobate interface */}},
236 {.lun = 33, .devAddr_InterfId = {device1, 1 /* RocknRoll interface */}},
237 {.lun = 34, .devAddr_interfId = {device2, 0 /* Frobate interface */}},
238 {.lun = 35, .devAddr_interfId = {device2, 1 /* Jazz interface */}},
239 }
240
241
242 /* struct uuid_dev2lun_map[] */
243 lun_map = {
244 {.uuid_devId = {ORG_RISCV__FALLBACK__RETURN_ZERO__INTERFACE_UUID , 0}, .lun = 1},
245 {.uuid_devId = {ORG_RISCV__FALLBACK__RETURN_MINUSONE__INTERFACE_UUID, 0},.lun = 2},
246 {.uuid_devId = {COM_BIGBUCKS__FROBATE__INTERFACE_UUID, 0}, .lun = 32},
247 {.uuid_devId = {COM_BIGBUCKS__FROBATE__INTERFACE_UUID, 1}, .lun = 34}, //sic!
248 {.uuid_devId = {ORG_TINKER_TINKER__ROCKNROLL__INTERFACE_UUID, 0}, .lun = 33}, //sic!
249 {.uuid_devId = {ORG_TINKER_TINKER__JAZZ__INTERFACE_UUID, 0}, .lun = 35}
250 }