Consolidate add/subtract instructions into a single op
[microwatt.git] / decode2.vhdl
1 library ieee;
2 use ieee.std_logic_1164.all;
3 use ieee.numeric_std.all;
4
5 library work;
6 use work.decode_types.all;
7 use work.common.all;
8 use work.helpers.all;
9 use work.insn_helpers.all;
10
11 entity decode2 is
12 port (
13 clk : in std_ulogic;
14 rst : in std_ulogic;
15
16 complete_in : in std_ulogic;
17 stall_out : out std_ulogic;
18
19 stopped_out : out std_ulogic;
20
21 flush_in: in std_ulogic;
22
23 d_in : in Decode1ToDecode2Type;
24
25 e_out : out Decode2ToExecute1Type;
26 m_out : out Decode2ToMultiplyType;
27 d_out : out Decode2ToDividerType;
28 l_out : out Decode2ToLoadstore1Type;
29
30 r_in : in RegisterFileToDecode2Type;
31 r_out : out Decode2ToRegisterFileType;
32
33 c_in : in CrFileToDecode2Type;
34 c_out : out Decode2ToCrFileType
35 );
36 end entity decode2;
37
38 architecture behaviour of decode2 is
39 type state_type is (IDLE, WAIT_FOR_PREV_TO_COMPLETE, WAIT_FOR_CURR_TO_COMPLETE);
40
41 type reg_internal_type is record
42 state : state_type;
43 outstanding : integer;
44 end record;
45
46 type reg_type is record
47 e : Decode2ToExecute1Type;
48 m : Decode2ToMultiplyType;
49 d : Decode2ToDividerType;
50 l : Decode2ToLoadstore1Type;
51 end record;
52
53 signal r_int, rin_int : reg_internal_type;
54 signal r, rin : reg_type;
55
56 type decode_input_reg_t is record
57 reg_valid : std_ulogic;
58 reg : std_ulogic_vector(4 downto 0);
59 data : std_ulogic_vector(63 downto 0);
60 end record;
61
62 function decode_input_reg_a (t : input_reg_a_t; insn_in : std_ulogic_vector(31 downto 0);
63 reg_data : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is
64 begin
65 case t is
66 when RA =>
67 return ('1', insn_ra(insn_in), reg_data);
68 when RA_OR_ZERO =>
69 return ('1', insn_ra(insn_in), ra_or_zero(reg_data, insn_ra(insn_in)));
70 when RS =>
71 return ('1', insn_rs(insn_in), reg_data);
72 when NONE =>
73 return ('0', (others => '0'), (others => '0'));
74 end case;
75 end;
76
77 function decode_input_reg_b (t : input_reg_b_t; insn_in : std_ulogic_vector(31 downto 0);
78 reg_data : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is
79 begin
80 case t is
81 when RB =>
82 return ('1', insn_rb(insn_in), reg_data);
83 when RS =>
84 return ('1', insn_rs(insn_in), reg_data);
85 when CONST_UI =>
86 return ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_ui(insn_in)), 64)));
87 when CONST_SI =>
88 return ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_si(insn_in)), 64)));
89 when CONST_SI_HI =>
90 return ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_si(insn_in)) & x"0000", 64)));
91 when CONST_UI_HI =>
92 return ('0', (others => '0'), std_ulogic_vector(resize(unsigned(insn_si(insn_in)) & x"0000", 64)));
93 when CONST_LI =>
94 return ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_li(insn_in)) & "00", 64)));
95 when CONST_BD =>
96 return ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_bd(insn_in)) & "00", 64)));
97 when CONST_DS =>
98 return ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_ds(insn_in)) & "00", 64)));
99 when CONST_M1 =>
100 return ('0', (others => '0'), x"FFFFFFFFFFFFFFFF");
101 when NONE =>
102 return ('0', (others => '0'), (others => '0'));
103 end case;
104 end;
105
106 function decode_input_reg_c (t : input_reg_c_t; insn_in : std_ulogic_vector(31 downto 0);
107 reg_data : std_ulogic_vector(63 downto 0)) return decode_input_reg_t is
108 begin
109 case t is
110 when RS =>
111 return ('1', insn_rs(insn_in), reg_data);
112 when NONE =>
113 return ('0', (others => '0'), (others => '0'));
114 end case;
115 end;
116
117 function decode_output_reg (t : output_reg_a_t; insn_in : std_ulogic_vector(31 downto 0)) return std_ulogic_vector is
118 begin
119 case t is
120 when RT =>
121 return insn_rt(insn_in);
122 when RA =>
123 return insn_ra(insn_in);
124 when NONE =>
125 return "00000";
126 end case;
127 end;
128
129 function decode_rc (t : rc_t; insn_in : std_ulogic_vector(31 downto 0)) return std_ulogic is
130 begin
131 case t is
132 when RC =>
133 return insn_rc(insn_in);
134 when ONE =>
135 return '1';
136 when NONE =>
137 return '0';
138 end case;
139 end;
140 begin
141
142 decode2_0: process(clk)
143 begin
144 if rising_edge(clk) then
145 assert r_int.outstanding <= 1 report "Outstanding bad " & integer'image(r_int.outstanding) severity failure;
146
147 if rin.e.valid = '1' or rin.l.valid = '1' or rin.m.valid = '1' or rin.d.valid = '1' then
148 report "execute " & to_hstring(rin.e.nia);
149 end if;
150 r <= rin;
151 r_int <= rin_int;
152 end if;
153 end process;
154
155 r_out.read1_reg <= insn_ra(d_in.insn) when (d_in.decode.input_reg_a = RA) else
156 insn_ra(d_in.insn) when d_in.decode.input_reg_a = RA_OR_ZERO else
157 insn_rs(d_in.insn) when d_in.decode.input_reg_a = RS else
158 (others => '0');
159
160 r_out.read2_reg <= insn_rb(d_in.insn) when d_in.decode.input_reg_b = RB else
161 insn_rs(d_in.insn) when d_in.decode.input_reg_b = RS else
162 (others => '0');
163
164 r_out.read3_reg <= insn_rs(d_in.insn) when d_in.decode.input_reg_c = RS else
165 (others => '0');
166
167 c_out.read <= d_in.decode.input_cr;
168
169 decode2_1: process(all)
170 variable v : reg_type;
171 variable v_int : reg_internal_type;
172 variable mul_a : std_ulogic_vector(63 downto 0);
173 variable mul_b : std_ulogic_vector(63 downto 0);
174 variable decoded_reg_a : decode_input_reg_t;
175 variable decoded_reg_b : decode_input_reg_t;
176 variable decoded_reg_c : decode_input_reg_t;
177 variable signed_division: std_ulogic;
178 variable is_valid : std_ulogic;
179 begin
180 v := r;
181 v_int := r_int;
182
183 v.e := Decode2ToExecute1Init;
184 v.l := Decode2ToLoadStore1Init;
185 v.m := Decode2ToMultiplyInit;
186 v.d := Decode2ToDividerInit;
187
188 mul_a := (others => '0');
189 mul_b := (others => '0');
190
191 --v.e.input_cr := d_in.decode.input_cr;
192 --v.m.input_cr := d_in.decode.input_cr;
193 --v.e.output_cr := d_in.decode.output_cr;
194
195 decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data);
196 decoded_reg_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, r_in.read2_data);
197 decoded_reg_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn, r_in.read3_data);
198
199 r_out.read1_enable <= decoded_reg_a.reg_valid;
200 r_out.read2_enable <= decoded_reg_b.reg_valid;
201 r_out.read3_enable <= decoded_reg_c.reg_valid;
202
203 -- execute unit
204 v.e.nia := d_in.nia;
205 v.e.insn_type := d_in.decode.insn_type;
206 v.e.read_reg1 := decoded_reg_a.reg;
207 v.e.read_data1 := decoded_reg_a.data;
208 v.e.read_reg2 := decoded_reg_b.reg;
209 v.e.read_data2 := decoded_reg_b.data;
210 v.e.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn);
211 v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
212 v.e.cr := c_in.read_cr_data;
213 v.e.invert_a := d_in.decode.invert_a;
214 v.e.input_carry := d_in.decode.input_carry;
215 v.e.output_carry := d_in.decode.output_carry;
216 if d_in.decode.lr = '1' then
217 v.e.lr := insn_lk(d_in.insn);
218 end if;
219 v.e.insn := d_in.insn;
220
221 -- multiply unit
222 v.m.insn_type := d_in.decode.insn_type;
223 mul_a := decoded_reg_a.data;
224 mul_b := decoded_reg_b.data;
225 v.m.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn);
226 v.m.rc := decode_rc(d_in.decode.rc, d_in.insn);
227
228 if d_in.decode.mul_32bit = '1' then
229 if d_in.decode.mul_signed = '1' then
230 v.m.data1 := (others => mul_a(31));
231 v.m.data1(31 downto 0) := mul_a(31 downto 0);
232 v.m.data2 := (others => mul_b(31));
233 v.m.data2(31 downto 0) := mul_b(31 downto 0);
234 else
235 v.m.data1 := '0' & x"00000000" & mul_a(31 downto 0);
236 v.m.data2 := '0' & x"00000000" & mul_b(31 downto 0);
237 end if;
238 else
239 if d_in.decode.mul_signed = '1' then
240 v.m.data1 := mul_a(63) & mul_a;
241 v.m.data2 := mul_b(63) & mul_b;
242 else
243 v.m.data1 := '0' & mul_a;
244 v.m.data2 := '0' & mul_b;
245 end if;
246 end if;
247
248 -- divide unit
249 -- PPC divide and modulus instruction words have these bits in
250 -- the bottom 11 bits: o1dns 010t1 r
251 -- where o = OE for div instrs, signedness for mod instrs
252 -- d = 1 for div*, 0 for mod*
253 -- n = 1 for normal, 0 for extended (dividend << 32/64)
254 -- s = 1 for signed, 0 for unsigned (for div*)
255 -- t = 1 for 32-bit, 0 for 64-bit
256 -- r = RC bit (record condition code)
257 v.d.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn);
258 v.d.is_modulus := not d_in.insn(8);
259 v.d.is_32bit := not d_in.insn(2);
260 if d_in.insn(8) = '1' then
261 signed_division := d_in.insn(6);
262 else
263 signed_division := d_in.insn(10);
264 end if;
265 v.d.is_signed := signed_division;
266 if d_in.insn(2) = '0' then
267 -- 64-bit forms
268 if d_in.insn(8) = '1' and d_in.insn(7) = '0' then
269 v.d.is_extended := '1';
270 end if;
271 v.d.dividend := decoded_reg_a.data;
272 v.d.divisor := decoded_reg_b.data;
273 else
274 -- 32-bit forms
275 if d_in.insn(8) = '1' and d_in.insn(7) = '0' then -- extended forms
276 v.d.dividend := decoded_reg_a.data(31 downto 0) & x"00000000";
277 elsif signed_division = '1' and decoded_reg_a.data(31) = '1' then
278 -- sign extend to 64 bits
279 v.d.dividend := x"ffffffff" & decoded_reg_a.data(31 downto 0);
280 else
281 v.d.dividend := x"00000000" & decoded_reg_a.data(31 downto 0);
282 end if;
283 if signed_division = '1' and decoded_reg_b.data(31) = '1' then
284 v.d.divisor := x"ffffffff" & decoded_reg_b.data(31 downto 0);
285 else
286 v.d.divisor := x"00000000" & decoded_reg_b.data(31 downto 0);
287 end if;
288 end if;
289 v.d.rc := decode_rc(d_in.decode.rc, d_in.insn);
290
291 -- load/store unit
292 v.l.update_reg := decoded_reg_a.reg;
293 v.l.addr1 := decoded_reg_a.data;
294 v.l.addr2 := decoded_reg_b.data;
295 v.l.data := decoded_reg_c.data;
296 v.l.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn);
297
298 if d_in.decode.insn_type = OP_LOAD then
299 v.l.load := '1';
300 else
301 v.l.load := '0';
302 end if;
303
304 case d_in.decode.length is
305 when is1B =>
306 v.l.length := "0001";
307 when is2B =>
308 v.l.length := "0010";
309 when is4B =>
310 v.l.length := "0100";
311 when is8B =>
312 v.l.length := "1000";
313 when NONE =>
314 v.l.length := "0000";
315 end case;
316
317 v.l.byte_reverse := d_in.decode.byte_reverse;
318 v.l.sign_extend := d_in.decode.sign_extend;
319 v.l.update := d_in.decode.update;
320
321 -- single issue
322
323 if complete_in = '1' then
324 v_int.outstanding := v_int.outstanding - 1;
325 end if;
326
327 -- state machine to handle instructions that must be single
328 -- through the pipeline.
329 stall_out <= '0';
330 is_valid := d_in.valid;
331
332 -- Handle debugger stop
333 stopped_out <= '0';
334 if d_in.stop_mark = '1' and v_int.outstanding = 0 then
335 stopped_out <= '1';
336 end if;
337
338 case v_int.state is
339 when IDLE =>
340 if (flush_in = '0') and (is_valid = '1') and (d_in.decode.sgl_pipe = '1') then
341 if v_int.outstanding /= 0 then
342 v_int.state := WAIT_FOR_PREV_TO_COMPLETE;
343 stall_out <= '1';
344 is_valid := '0';
345 else
346 -- send insn out and wait on it to complete
347 v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
348 end if;
349 end if;
350
351 when WAIT_FOR_PREV_TO_COMPLETE =>
352 if v_int.outstanding = 0 then
353 -- send insn out and wait on it to complete
354 v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
355 else
356 stall_out <= '1';
357 is_valid := '0';
358 end if;
359
360 when WAIT_FOR_CURR_TO_COMPLETE =>
361 if v_int.outstanding = 0 then
362 v_int.state := IDLE;
363 else
364 stall_out <= '1';
365 is_valid := '0';
366 end if;
367 end case;
368
369 v.e.valid := '0';
370 v.m.valid := '0';
371 v.d.valid := '0';
372 v.l.valid := '0';
373 case d_in.decode.unit is
374 when ALU =>
375 v.e.valid := is_valid;
376 when LDST =>
377 v.l.valid := is_valid;
378 when MUL =>
379 v.m.valid := is_valid;
380 when DIV =>
381 v.d.valid := is_valid;
382 when NONE =>
383 v.e.valid := is_valid;
384 v.e.insn_type := OP_ILLEGAL;
385 end case;
386
387 if flush_in = '1' then
388 v.e.valid := '0';
389 v.m.valid := '0';
390 v.d.valid := '0';
391 v.l.valid := '0';
392 end if;
393
394 -- track outstanding instructions
395 if v.e.valid = '1' or v.l.valid = '1' or v.m.valid = '1' or v.d.valid = '1' then
396 v_int.outstanding := v_int.outstanding + 1;
397 end if;
398
399 if rst = '1' then
400 v_int.state := IDLE;
401 v_int.outstanding := 0;
402 v.e := Decode2ToExecute1Init;
403 v.l := Decode2ToLoadStore1Init;
404 v.m := Decode2ToMultiplyInit;
405 v.d := Decode2ToDividerInit;
406 end if;
407
408 -- Update registers
409 rin <= v;
410 rin_int <= v_int;
411
412 -- Update outputs
413 e_out <= r.e;
414 l_out <= r.l;
415 m_out <= r.m;
416 d_out <= r.d;
417 end process;
418 end architecture behaviour;