core: Make result multiplexing explicit
authorPaul Mackerras <paulus@ozlabs.org>
Sat, 26 Sep 2020 07:19:57 +0000 (17:19 +1000)
committerPaul Mackerras <paulus@ozlabs.org>
Fri, 15 Jan 2021 01:40:09 +0000 (12:40 +1100)
This adds an explicit multiplexer feeding v.e.write_data in execute1,
with the select lines determined in the previous cycle based on the
insn_type.  Similarly, for multiply and divide instructions, there is
now an explicit multiplexer.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
common.vhdl
decode2.vhdl
execute1.vhdl
logical.vhdl

index 8b9380c858eb29331a7e8fb4c06beeed58d56db7..44f63bd7336ea28dd38282ff0ce6bc56b5dd1be8 100644 (file)
@@ -210,6 +210,7 @@ package common is
        rc: std_ulogic;
        oe: std_ulogic;
        invert_a: std_ulogic;
+        addm1 : std_ulogic;
        invert_out: std_ulogic;
        input_carry: carry_in_t;
        output_carry: std_ulogic;
@@ -224,18 +225,21 @@ package common is
        update : std_ulogic;                            -- is this an update instruction?
         reserve : std_ulogic;                           -- set for larx/stcx
         br_pred : std_ulogic;
+        result_sel : std_ulogic_vector(2 downto 0);     -- select source of result
+        sub_select : std_ulogic_vector(2 downto 0);     -- sub-result selection
         repeat : std_ulogic;                            -- set if instruction is cracked into two ops
         second : std_ulogic;                            -- set if this is the second op
     end record;
     constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
        (valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL,
          bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
-         bypass_cr => '0', lr => '0', rc => '0', oe => '0', invert_a => '0',
+         bypass_cr => '0', lr => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0',
         invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
         is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
          byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'),
          read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'),
          cr => (others => '0'), insn => (others => '0'), data_len => (others => '0'),
+         result_sel => "000", sub_select => "000",
          repeat => '0', second => '0', others => (others => '0'));
 
     type MultiplyInputType is record
index 8b4633a82037142cb7afbc5fc2e1f03b2ee67add..561fd79953420eb8697f703401dd9e8e87df962b 100644 (file)
@@ -221,6 +221,52 @@ architecture behaviour of decode2 is
         end case;
     end;
 
+    -- control signals that are derived from insn_type
+    type mux_select_array_t is array(insn_type_t) of std_ulogic_vector(2 downto 0);
+
+    constant result_select : mux_select_array_t := (
+        OP_AND      => "001",           -- logical_result
+        OP_OR       => "001",
+        OP_XOR      => "001",
+        OP_POPCNT   => "001",
+        OP_PRTY     => "001",
+        OP_CMPB     => "001",
+        OP_EXTS     => "001",
+        OP_BPERM    => "001",
+        OP_BCD      => "001",
+        OP_MTSPR    => "001",
+        OP_RLC      => "010",           -- rotator_result
+        OP_RLCL     => "010",
+        OP_RLCR     => "010",
+        OP_SHL      => "010",
+        OP_SHR      => "010",
+        OP_EXTSWSLI => "010",
+        OP_MUL_L64  => "011",           -- muldiv_result
+        OP_MUL_H64  => "011",
+        OP_MUL_H32  => "011",
+        OP_DIV      => "011",
+        OP_DIVE     => "011",
+        OP_MOD      => "011",
+        OP_CNTZ     => "100",           -- countzero_result
+        OP_MFSPR    => "101",           -- spr_result
+        OP_ISEL     => "111",           -- misc_result
+        OP_DARN     => "111",
+        OP_MFMSR    => "111",
+        OP_MFCR     => "111",
+        OP_SETB     => "111",
+        others      => "000"            -- default to adder_result
+        );
+
+    constant subresult_select : mux_select_array_t := (
+        OP_MUL_L64 => "000",            -- muldiv_result
+        OP_MUL_H64 => "001",
+        OP_MUL_H32 => "010",
+        OP_DIV     => "011",
+        OP_DIVE    => "011",
+        OP_MOD     => "011",
+        others     => "000"
+        );
+
     -- issue control signals
     signal control_valid_in : std_ulogic;
     signal control_valid_out : std_ulogic;
@@ -400,6 +446,10 @@ begin
         v.e.bypass_cr := cr_bypass;
         v.e.xerc := c_in.read_xerc_data;
         v.e.invert_a := d_in.decode.invert_a;
+        v.e.addm1 := '0';
+        if d_in.decode.insn_type = OP_BC or d_in.decode.insn_type = OP_BCREG then
+            v.e.addm1 := '1';
+        end if;
         v.e.invert_out := d_in.decode.invert_out;
         v.e.input_carry := d_in.decode.input_carry;
         v.e.output_carry := d_in.decode.output_carry;
@@ -415,6 +465,8 @@ begin
         v.e.update := d_in.decode.update;
         v.e.reserve := d_in.decode.reserve;
         v.e.br_pred := d_in.br_pred;
+        v.e.result_sel := result_select(d_in.decode.insn_type);
+        v.e.sub_select := subresult_select(d_in.decode.insn_type);
 
         -- issue control
         control_valid_in <= d_in.valid;
index 4ea26803413dfe916fbec4f5f956eef57d9f91e1..6d2eb04756ceee6be16df4c0ce31db9c9bd3fbe1 100644 (file)
@@ -60,6 +60,8 @@ architecture behaviour of execute1 is
         prev_op : insn_type_t;
        lr_update : std_ulogic;
        next_lr : std_ulogic_vector(63 downto 0);
+        resmux : std_ulogic_vector(2 downto 0);
+        submux : std_ulogic_vector(2 downto 0);
        mul_in_progress : std_ulogic;
         mul_finish : std_ulogic;
         div_in_progress : std_ulogic;
@@ -103,6 +105,13 @@ architecture behaviour of execute1 is
     signal rotator_carry: std_ulogic;
     signal logical_result: std_ulogic_vector(63 downto 0);
     signal countzero_result: std_ulogic_vector(63 downto 0);
+    signal alu_result: std_ulogic_vector(63 downto 0);
+    signal adder_result: std_ulogic_vector(63 downto 0);
+    signal misc_result: std_ulogic_vector(63 downto 0);
+    signal muldiv_result: std_ulogic_vector(63 downto 0);
+    signal spr_result: std_ulogic_vector(63 downto 0);
+    signal result_mux_sel: std_ulogic_vector(2 downto 0);
+    signal sub_mux_sel: std_ulogic_vector(2 downto 0);
 
     -- multiply signals
     signal x_to_multiply: MultiplyInputType;
@@ -285,6 +294,18 @@ begin
 
     terminate_out <= r.terminate;
 
+    -- Result mux
+    result_mux_sel <= e_in.result_sel when r.busy = '0' else r.resmux;
+    sub_mux_sel <= e_in.sub_select when r.busy = '0' else r.submux;
+    with result_mux_sel select alu_result <=
+        adder_result       when "000",
+        logical_result     when "001",
+        rotator_result     when "010",
+        muldiv_result      when "011",
+        countzero_result   when "100",
+        spr_result         when "101",
+        misc_result        when others;
+
     execute1_0: process(clk)
     begin
        if rising_edge(clk) then
@@ -310,7 +331,8 @@ begin
     execute1_1: process(all)
        variable v : reg_type;
        variable a_inv : std_ulogic_vector(63 downto 0);
-       variable result : std_ulogic_vector(63 downto 0);
+       variable b_or_m1 : std_ulogic_vector(63 downto 0);
+       variable addg6s : std_ulogic_vector(63 downto 0);
        variable newcrf : std_ulogic_vector(3 downto 0);
        variable sum_with_carry : std_ulogic_vector(64 downto 0);
        variable result_en : std_ulogic;
@@ -348,16 +370,17 @@ begin
         variable spr_val : std_ulogic_vector(63 downto 0);
         variable addend : std_ulogic_vector(127 downto 0);
         variable do_trace : std_ulogic;
+        variable hold_wr_data : std_ulogic;
         variable f : Execute1ToFetch1Type;
         variable fv : Execute1ToFPUType;
     begin
-       result := (others => '0');
        sum_with_carry := (others => '0');
        result_en := '0';
        newcrf := (others => '0');
         is_branch := '0';
         taken_branch := '0';
         abs_branch := '0';
+        hold_wr_data := '0';
 
        v := r;
        v.e := Execute1ToWritebackInit;
@@ -399,14 +422,24 @@ begin
         v.cntz_in_progress := '0';
         v.mul_finish := '0';
 
+        misc_result <= (others => '0');
+        spr_result <= (others => '0');
+        spr_val := (others => '0');
+
         -- Main adder
         if e_in.invert_a = '0' then
             a_inv := a_in;
         else
             a_inv := not a_in;
         end if;
-        sum_with_carry := ppc_adde(a_inv, b_in,
+        if e_in.addm1 = '0' then
+            b_or_m1 := b_in;
+        else
+            b_or_m1 := (others => '1');
+        end if;
+        sum_with_carry := ppc_adde(a_inv, b_or_m1,
                                    decode_input_carry(e_in.input_carry, v.e.xerc));
+        adder_result <= sum_with_carry(63 downto 0);
 
         -- signals to multiply and divide units
         sign1 := '0';
@@ -432,6 +465,7 @@ begin
             abs2 := - signed(b_in);
         end if;
 
+        -- Interface to multiply and divide units
        x_to_multiply <= MultiplyInputInit;
        x_to_multiply.is_32bit <= e_in.is_32bit;
 
@@ -479,6 +513,18 @@ begin
             x_to_divider.divisor <= x"00000000" & std_ulogic_vector(abs2(31 downto 0));
         end if;
 
+        case sub_mux_sel(1 downto 0) is
+            when "00" =>
+                muldiv_result <= multiply_to_x.result(63 downto 0);
+            when "01" =>
+                muldiv_result <= multiply_to_x.result(127 downto 64);
+            when "10" =>
+                muldiv_result <= multiply_to_x.result(63 downto 32) &
+                                 multiply_to_x.result(63 downto 32);
+            when others =>
+                muldiv_result <= divider_to_x.write_reg_data;
+        end case;
+
        ctrl_tmp <= ctrl;
        -- FIXME: run at 512MHz not core freq
        ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1);
@@ -611,6 +657,8 @@ begin
            v.slow_op_rc := e_in.rc;
            v.slow_op_oe := e_in.oe;
            v.slow_op_xerc := v.e.xerc;
+            v.resmux := e_in.result_sel;
+            v.submux := e_in.sub_select;
 
            case_0: case e_in.insn_type is
 
@@ -642,8 +690,7 @@ begin
            when OP_NOP | OP_DCBF | OP_DCBST | OP_DCBT | OP_DCBTST | OP_ICBT =>
                -- Do nothing
            when OP_ADD | OP_CMP | OP_TRAP =>
-               result := sum_with_carry(63 downto 0);
-                carry_32 := result(32) xor a_inv(32) xor b_in(32);
+                carry_32 := sum_with_carry(32) xor a_inv(32) xor b_in(32);
                 carry_64 := sum_with_carry(64);
                 if e_in.insn_type = OP_ADD then
                     if e_in.output_carry = '1' then
@@ -724,17 +771,18 @@ begin
                     end if;
                 end if;
             when OP_ADDG6S =>
-                result := (others => '0');
+                addg6s := (others => '0');
                 for i in 0 to 14 loop
                     lo := i * 4;
                     hi := (i + 1) * 4;
                     if (a_in(hi) xor b_in(hi) xor sum_with_carry(hi)) = '0' then
-                        result(lo + 3 downto lo) := "0110";
+                        addg6s(lo + 3 downto lo) := "0110";
                     end if;
                 end loop;
                 if sum_with_carry(64) = '0' then
-                    result(63 downto 60) := "0110";
+                    addg6s(63 downto 60) := "0110";
                 end if;
+                misc_result <= addg6s;
                 result_en := '1';
             when OP_CMPRB =>
                 newcrf := ppc_cmprb(a_in, b_in, insn_l(e_in.insn));
@@ -754,7 +802,6 @@ begin
                                      newcrf & newcrf & newcrf & newcrf;
             when OP_AND | OP_OR | OP_XOR | OP_POPCNT | OP_PRTY | OP_CMPB | OP_EXTS |
                     OP_BPERM | OP_BCD =>
-               result := logical_result;
                result_en := '1';
            when OP_B =>
                 is_branch := '1';
@@ -765,12 +812,11 @@ begin
                 end if;
            when OP_BC =>
                -- read_data1 is CTR
+                v.e.write_reg := fast_spr_num(SPR_CTR);
                bo := insn_bo(e_in.insn);
                bi := insn_bi(e_in.insn);
                if bo(4-2) = '0' then
-                   result := std_ulogic_vector(unsigned(a_in) - 1);
                    result_en := '1';
-                   v.e.write_reg := fast_spr_num(SPR_CTR);
                end if;
                 is_branch := '1';
                taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in);
@@ -781,12 +827,11 @@ begin
            when OP_BCREG =>
                -- read_data1 is CTR
                -- read_data2 is target register (CTR, LR or TAR)
+                v.e.write_reg := fast_spr_num(SPR_CTR);
                bo := insn_bo(e_in.insn);
                bi := insn_bi(e_in.insn);
                if bo(4-2) = '0' and e_in.insn(10) = '0' then
-                   result := std_ulogic_vector(unsigned(a_in) - 1);
                    result_en := '1';
-                   v.e.write_reg := fast_spr_num(SPR_CTR);
                end if;
                 is_branch := '1';
                taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in);
@@ -825,9 +870,9 @@ begin
            when OP_ISEL =>
                crbit := to_integer(unsigned(insn_bc(e_in.insn)));
                if cr_in(31-crbit) = '1' then
-                   result := a_in;
+                   misc_result <= a_in;
                else
-                   result := b_in;
+                   misc_result <= b_in;
                end if;
                result_en := '1';
            when OP_CROP =>
@@ -885,38 +930,38 @@ begin
                 if random_err = '0' then
                     case e_in.insn(17 downto 16) is
                         when "00" =>
-                            result := x"00000000" & random_cond(31 downto 0);
+                            misc_result <= x"00000000" & random_cond(31 downto 0);
                         when "10" =>
-                            result := random_raw;
+                            misc_result <= random_raw;
                         when others =>
-                            result := random_cond;
+                            misc_result <= random_cond;
                     end case;
                 else
-                    result := (others => '1');
+                    misc_result <= (others => '1');
                 end if;
                 result_en := '1';
            when OP_MFMSR =>
-               result := ctrl.msr;
+               misc_result <= ctrl.msr;
                result_en := '1';
            when OP_MFSPR =>
                report "MFSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
                    "=" & to_hstring(a_in);
                result_en := '1';
                if is_fast_spr(e_in.read_reg1) then
-                   result := a_in;
-                   if decode_spr_num(e_in.insn) = SPR_XER then
+                   spr_val := a_in;
+                    if decode_spr_num(e_in.insn) = SPR_XER then
                        -- bits 0:31 and 35:43 are treated as reserved and return 0s when read using mfxer
-                       result(63 downto 32) := (others => '0');
-                       result(63-32) := v.e.xerc.so;
-                       result(63-33) := v.e.xerc.ov;
-                       result(63-34) := v.e.xerc.ca;
-                       result(63-35 downto 63-43) := "000000000";
-                       result(63-44) := v.e.xerc.ov32;
-                       result(63-45) := v.e.xerc.ca32;
-                   end if;
+                       spr_val(63 downto 32) := (others => '0');
+                       spr_val(63-32) := v.e.xerc.so;
+                       spr_val(63-33) := v.e.xerc.ov;
+                       spr_val(63-34) := v.e.xerc.ca;
+                       spr_val(63-35 downto 63-43) := "000000000";
+                       spr_val(63-44) := v.e.xerc.ov32;
+                       spr_val(63-45) := v.e.xerc.ca32;
+                    end if;
                else
                     spr_val := c_in;
-                   case decode_spr_num(e_in.insn) is
+                    case decode_spr_num(e_in.insn) is
                    when SPR_TB =>
                        spr_val := ctrl.tb;
                    when SPR_TBU =>
@@ -940,22 +985,23 @@ begin
                         if ctrl.msr(MSR_PR) = '1' then
                             illegal := '1';
                         end if;
-                   end case;
-                    result := spr_val;
-               end if;
+                    end case;
+                end if;
+                spr_result <= spr_val;
+
            when OP_MFCR =>
                if e_in.insn(20) = '0' then
                    -- mfcr
-                   result := x"00000000" & cr_in;
+                   misc_result <= x"00000000" & cr_in;
                else
                    -- mfocrf
                    crnum := fxm_to_num(insn_fxm(e_in.insn));
-                   result := (others => '0');
+                   misc_result <= (others => '0');
                    for i in 0 to 7 loop
                        lo := (7-i)*4;
                        hi := lo + 3;
                        if crnum = i then
-                           result(hi downto lo) := cr_in(hi downto lo);
+                           misc_result(hi downto lo) <= cr_in(hi downto lo);
                        end if;
                    end loop;
                end if;
@@ -999,7 +1045,6 @@ begin
                report "MTSPR to SPR " & integer'image(decode_spr_num(e_in.insn)) &
                    "=" & to_hstring(c_in);
                if is_fast_spr(e_in.write_reg) then
-                   result := c_in;
                    result_en := '1';
                    if decode_spr_num(e_in.insn) = SPR_XER then
                        v.e.xerc.so := c_in(63-32);
@@ -1025,7 +1070,6 @@ begin
                    end case;
                end if;
            when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR | OP_EXTSWSLI =>
-               result := rotator_result;
                if e_in.output_carry = '1' then
                    set_carry(v.e, rotator_carry, rotator_carry);
                end if;
@@ -1033,11 +1077,11 @@ begin
             when OP_SETB =>
                 bfa := insn_bfa(e_in.insn);
                 crbit := to_integer(unsigned(bfa)) * 4;
-                result := (others => '0');
+                misc_result <= (others => '0');
                 if cr_in(31 - crbit) = '1' then
-                    result := (others => '1');
+                    misc_result <= (others => '1');
                 elsif cr_in(30 - crbit) = '1' then
-                    result(0) := '1';
+                    misc_result(0) <= '1';
                 end if;
 
            when OP_ISYNC =>
@@ -1130,10 +1174,9 @@ begin
            v.e.valid := '1';
             -- Keep r.e.write_data unchanged next cycle in case it is needed
             -- for a forwarded result (e.g. for CTR).
-            result := r.e.write_data;
+            hold_wr_data := '1';
         elsif r.cntz_in_progress = '1' then
             -- cnt[lt]z always takes two cycles
-            result := countzero_result;
             result_en := '1';
             v.e.write_reg := gpr_to_gspr(r.slow_op_dest);
             v.e.rc := r.slow_op_rc;
@@ -1144,18 +1187,7 @@ begin
               (r.div_in_progress = '1' and divider_to_x.valid = '1') then
                if r.mul_in_progress = '1' then
                     overflow := '0';
-                    case r.slow_op_insn is
-                        when OP_MUL_H32 =>
-                            result := multiply_to_x.result(63 downto 32) &
-                                      multiply_to_x.result(63 downto 32);
-                        when OP_MUL_H64 =>
-                            result := multiply_to_x.result(127 downto 64);
-                        when others =>
-                            -- i.e. OP_MUL_L64
-                            result := multiply_to_x.result(63 downto 0);
-                    end case;
                else
-                   result := divider_to_x.write_reg_data;
                    overflow := divider_to_x.overflow;
                end if;
                 if r.mul_in_progress = '1' and r.slow_op_oe = '1' then
@@ -1184,7 +1216,7 @@ begin
                v.div_in_progress := r.div_in_progress;
            end if;
         elsif r.mul_finish = '1' then
-            result := r.e.write_data;
+            hold_wr_data := '1';
             result_en := '1';
             v.e.write_reg := gpr_to_gspr(r.slow_op_dest);
             v.e.rc := r.slow_op_rc;
@@ -1225,7 +1257,11 @@ begin
             v.trace_next := '1';
         end if;
 
-       v.e.write_data := result;
+        if hold_wr_data = '0' then
+            v.e.write_data := alu_result;
+        else
+            v.e.write_data := r.e.write_data;
+        end if;
        v.e.write_enable := result_en and not exception;
 
         -- generate DSI or DSegI for load/store exceptions
index d008e4735660931f0a3398803244d039ae3fcc02..6b6f2029fd801320f8f963968dd3def1875e1047 100644 (file)
@@ -197,8 +197,7 @@ begin
                     tmp := x"00" & dpd_to_bcd(rs(51 downto 42)) & dpd_to_bcd(rs(41 downto 32)) &
                            x"00" & dpd_to_bcd(rs(19 downto 10)) & dpd_to_bcd(rs(9 downto 0));
                 end if;
-            when others =>
-                -- EXTS
+            when OP_EXTS =>
                 -- note datalen is a 1-hot encoding
                negative := (datalen(0) and rs(7)) or
                            (datalen(1) and rs(15)) or
@@ -211,6 +210,9 @@ begin
                    tmp(15 downto 8) := rs(15 downto 8);
                end if;
                tmp(7 downto 0) := rs(7 downto 0);
+            when others =>
+                -- e.g. OP_MTSPR
+                tmp := rs;
         end case;
 
         result <= tmp;