core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \
- decode1.vhdl helpers.vhdl insn_helpers.vhdl gpr_hazard.vhdl \
- cr_hazard.vhdl control.vhdl decode2.vhdl register_file.vhdl \
+ decode1.vhdl helpers.vhdl insn_helpers.vhdl \
+ control.vhdl decode2.vhdl register_file.vhdl \
cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
logical.vhdl countzero.vhdl multiply.vhdl divider.vhdl execute1.vhdl \
loadstore1.vhdl mmu.vhdl dcache.vhdl writeback.vhdl core_debug.vhdl \
use ieee.numeric_std.all;
library work;
+use work.utils.all;
use work.decode_types.all;
package common is
constant FPSCR_NI : integer := 63 - 61;
constant FPSCR_RN : integer := 63 - 63;
- type irq_state_t is (WRITE_SRR0, WRITE_SRR1);
+ -- Used for tracking instruction completion and pending register writes
+ constant TAG_COUNT : positive := 4;
+ constant TAG_NUMBER_BITS : natural := log2(TAG_COUNT);
+ subtype tag_number_t is integer range 0 to TAG_COUNT - 1;
+ subtype tag_index_t is unsigned(TAG_NUMBER_BITS - 1 downto 0);
+ type instr_tag_t is record
+ tag : tag_number_t;
+ valid : std_ulogic;
+ end record;
+ constant instr_tag_init : instr_tag_t := (tag => 0, valid => '0');
+ function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean;
+
+ subtype intr_vector_t is integer range 0 to 16#fff#;
-- For now, fixed 16 sources, make this either a parametric
-- package of some sort or an unconstrainted array.
dec: std_ulogic_vector(63 downto 0);
msr: std_ulogic_vector(63 downto 0);
cfar: std_ulogic_vector(63 downto 0);
- irq_state : irq_state_t;
- srr1: std_ulogic_vector(63 downto 0);
end record;
type Fetch1ToIcacheType is record
insn: std_ulogic_vector(31 downto 0);
ispr1: gspr_index_t; -- (G)SPR used for branch condition (CTR) or mfspr
ispr2: gspr_index_t; -- (G)SPR used for branch target (CTR, LR, TAR)
+ ispro: gspr_index_t; -- (G)SPR written with LR or CTR
decode: decode_rom_t;
br_pred: std_ulogic; -- Branch was predicted to be taken
big_endian: std_ulogic;
end record;
constant Decode1ToDecode2Init : Decode1ToDecode2Type :=
(valid => '0', stop_mark => '0', nia => (others => '0'), insn => (others => '0'),
- ispr1 => (others => '0'), ispr2 => (others => '0'), decode => decode_rom_init,
- br_pred => '0', big_endian => '0');
+ ispr1 => (others => '0'), ispr2 => (others => '0'), ispro => (others => '0'),
+ decode => decode_rom_init, br_pred => '0', big_endian => '0');
type Decode1ToFetch1Type is record
redirect : std_ulogic;
redirect_nia : std_ulogic_vector(63 downto 0);
end record;
+ type bypass_data_t is record
+ tag : instr_tag_t;
+ data : std_ulogic_vector(63 downto 0);
+ end record;
+ constant bypass_data_init : bypass_data_t := (tag => instr_tag_init, data => (others => '0'));
+
+ type cr_bypass_data_t is record
+ tag : instr_tag_t;
+ data : std_ulogic_vector(31 downto 0);
+ end record;
+ constant cr_bypass_data_init : cr_bypass_data_t := (tag => instr_tag_init, data => (others => '0'));
+
type Decode2ToExecute1Type is record
valid: std_ulogic;
unit : unit_t;
fac : facility_t;
insn_type: insn_type_t;
nia: std_ulogic_vector(63 downto 0);
+ instr_tag : instr_tag_t;
write_reg: gspr_index_t;
write_reg_enable: std_ulogic;
read_reg1: gspr_index_t;
read_data1: std_ulogic_vector(63 downto 0);
read_data2: std_ulogic_vector(63 downto 0);
read_data3: std_ulogic_vector(63 downto 0);
- bypass_data1: std_ulogic;
- bypass_data2: std_ulogic;
- bypass_data3: std_ulogic;
cr: std_ulogic_vector(31 downto 0);
- bypass_cr : std_ulogic;
xerc: xer_common_t;
lr: std_ulogic;
+ br_abs: std_ulogic;
rc: std_ulogic;
oe: std_ulogic;
invert_a: std_ulogic;
output_carry: std_ulogic;
input_cr: std_ulogic;
output_cr: std_ulogic;
+ output_xer: std_ulogic;
is_32bit: std_ulogic;
is_signed: std_ulogic;
insn: std_ulogic_vector(31 downto 0);
second : std_ulogic; -- set if this is the second op
end record;
constant Decode2ToExecute1Init : Decode2ToExecute1Type :=
- (valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL,
- write_reg_enable => '0', bypass_data1 => '0', bypass_data2 => '0', bypass_data3 => '0',
- bypass_cr => '0', lr => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0',
- invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0', output_cr => '0',
+ (valid => '0', unit => NONE, fac => NONE, insn_type => OP_ILLEGAL, instr_tag => instr_tag_init,
+ write_reg_enable => '0',
+ lr => '0', br_abs => '0', rc => '0', oe => '0', invert_a => '0', addm1 => '0',
+ invert_out => '0', input_carry => ZERO, output_carry => '0', input_cr => '0',
+ output_cr => '0', output_xer => '0',
is_32bit => '0', is_signed => '0', xerc => xerc_init, reserve => '0', br_pred => '0',
byte_reverse => '0', sign_extend => '0', update => '0', nia => (others => '0'),
read_data1 => (others => '0'), read_data2 => (others => '0'), read_data3 => (others => '0'),
end record;
type RegisterFileToDecode2Type is record
- read1_data : std_ulogic_vector(63 downto 0);
- read2_data : std_ulogic_vector(63 downto 0);
- read3_data : std_ulogic_vector(63 downto 0);
+ read1_data : std_ulogic_vector(63 downto 0);
+ read2_data : std_ulogic_vector(63 downto 0);
+ read3_data : std_ulogic_vector(63 downto 0);
end record;
type Decode2ToCrFileType is record
read_xerc_data : xer_common_t;
end record;
- type Execute1ToFetch1Type is record
- redirect: std_ulogic;
- virt_mode: std_ulogic;
- priv_mode: std_ulogic;
- big_endian: std_ulogic;
- mode_32bit: std_ulogic;
- redirect_nia: std_ulogic_vector(63 downto 0);
- br_nia : std_ulogic_vector(63 downto 0);
- br_last : std_ulogic;
- br_taken : std_ulogic;
- end record;
- constant Execute1ToFetch1Init : Execute1ToFetch1Type := (redirect => '0', virt_mode => '0',
- priv_mode => '0', big_endian => '0',
- mode_32bit => '0', br_taken => '0',
- br_last => '0', others => (others => '0'));
-
type Execute1ToLoadstore1Type is record
valid : std_ulogic;
op : insn_type_t; -- what ld/st or m[tf]spr or TLB op to do
nia : std_ulogic_vector(63 downto 0);
insn : std_ulogic_vector(31 downto 0);
+ instr_tag : instr_tag_t;
addr1 : std_ulogic_vector(63 downto 0);
addr2 : std_ulogic_vector(63 downto 0);
data : std_ulogic_vector(63 downto 0); -- data to write, unused for read
byte_reverse : std_ulogic;
sign_extend : std_ulogic; -- do we need to sign extend?
update : std_ulogic; -- is this an update instruction?
- update_reg : gpr_index_t; -- if so, the register to update
xerc : xer_common_t;
reserve : std_ulogic; -- set for larx/stcx.
rc : std_ulogic; -- set for stcx.
is_32bit : std_ulogic;
repeat : std_ulogic;
second : std_ulogic;
- end record;
- constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type := (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0',
- sign_extend => '0', update => '0', xerc => xerc_init,
- reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0',
- nia => (others => '0'), insn => (others => '0'),
- addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
- write_reg => (others => '0'), length => (others => '0'),
- mode_32bit => '0', is_32bit => '0',
- repeat => '0', second => '0', others => (others => '0'));
+ msr : std_ulogic_vector(63 downto 0);
+ end record;
+ constant Execute1ToLoadstore1Init : Execute1ToLoadstore1Type :=
+ (valid => '0', op => OP_ILLEGAL, ci => '0', byte_reverse => '0',
+ sign_extend => '0', update => '0', xerc => xerc_init,
+ reserve => '0', rc => '0', virt_mode => '0', priv_mode => '0',
+ nia => (others => '0'), insn => (others => '0'),
+ instr_tag => instr_tag_init,
+ addr1 => (others => '0'), addr2 => (others => '0'), data => (others => '0'),
+ write_reg => (others => '0'),
+ length => (others => '0'),
+ mode_32bit => '0', is_32bit => '0',
+ repeat => '0', second => '0',
+ msr => (others => '0'));
type Loadstore1ToExecute1Type is record
busy : std_ulogic;
- exception : std_ulogic;
- alignment : std_ulogic;
- invalid : std_ulogic;
- perm_error : std_ulogic;
- rc_error : std_ulogic;
- badtree : std_ulogic;
- segment_fault : std_ulogic;
- instr_fault : std_ulogic;
+ in_progress : std_ulogic;
end record;
type Loadstore1ToDcacheType is record
valid : std_ulogic;
+ hold : std_ulogic;
load : std_ulogic; -- is this a load
dcbz : std_ulogic;
nc : std_ulogic;
type Loadstore1ToWritebackType is record
valid : std_ulogic;
+ instr_tag : instr_tag_t;
write_enable: std_ulogic;
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
xerc : xer_common_t;
rc : std_ulogic;
store_done : std_ulogic;
+ interrupt : std_ulogic;
+ intr_vec : intr_vector_t;
+ srr0: std_ulogic_vector(63 downto 0);
+ srr1: std_ulogic_vector(15 downto 0);
end record;
- constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType := (valid => '0', write_enable => '0', xerc => xerc_init,
- rc => '0', store_done => '0', write_data => (others => '0'), others => (others => '0'));
+ constant Loadstore1ToWritebackInit : Loadstore1ToWritebackType :=
+ (valid => '0', instr_tag => instr_tag_init, write_enable => '0',
+ write_reg => (others => '0'), write_data => (others => '0'),
+ xerc => xerc_init, rc => '0', store_done => '0',
+ interrupt => '0', intr_vec => 0,
+ srr0 => (others => '0'), srr1 => (others => '0'));
type Execute1ToWritebackType is record
valid: std_ulogic;
+ instr_tag : instr_tag_t;
rc : std_ulogic;
mode_32bit : std_ulogic;
write_enable : std_ulogic;
write_cr_data : std_ulogic_vector(31 downto 0);
write_xerc_enable : std_ulogic;
xerc : xer_common_t;
- exc_write_enable : std_ulogic;
- exc_write_reg : gspr_index_t;
- exc_write_data : std_ulogic_vector(63 downto 0);
- end record;
- constant Execute1ToWritebackInit : Execute1ToWritebackType := (valid => '0', rc => '0', mode_32bit => '0', write_enable => '0',
- write_cr_enable => '0', exc_write_enable => '0',
- write_xerc_enable => '0', xerc => xerc_init,
- write_data => (others => '0'), write_cr_mask => (others => '0'),
- write_cr_data => (others => '0'), write_reg => (others => '0'),
- exc_write_reg => (others => '0'), exc_write_data => (others => '0'));
+ interrupt : std_ulogic;
+ intr_vec : intr_vector_t;
+ redirect: std_ulogic;
+ redir_mode: std_ulogic_vector(3 downto 0);
+ last_nia: std_ulogic_vector(63 downto 0);
+ br_offset: std_ulogic_vector(63 downto 0);
+ br_last: std_ulogic;
+ br_taken: std_ulogic;
+ abs_br: std_ulogic;
+ srr1: std_ulogic_vector(15 downto 0);
+ msr: std_ulogic_vector(63 downto 0);
+ end record;
+ constant Execute1ToWritebackInit : Execute1ToWritebackType :=
+ (valid => '0', instr_tag => instr_tag_init, rc => '0', mode_32bit => '0',
+ write_enable => '0', write_cr_enable => '0',
+ write_xerc_enable => '0', xerc => xerc_init,
+ write_data => (others => '0'), write_cr_mask => (others => '0'),
+ write_cr_data => (others => '0'), write_reg => (others => '0'),
+ interrupt => '0', intr_vec => 0, redirect => '0', redir_mode => "0000",
+ last_nia => (others => '0'), br_offset => (others => '0'),
+ br_last => '0', br_taken => '0', abs_br => '0',
+ srr1 => (others => '0'), msr => (others => '0'));
type Execute1ToFPUType is record
valid : std_ulogic;
op : insn_type_t;
nia : std_ulogic_vector(63 downto 0);
+ itag : instr_tag_t;
insn : std_ulogic_vector(31 downto 0);
single : std_ulogic;
fe_mode : std_ulogic_vector(1 downto 0);
out_cr : std_ulogic;
end record;
constant Execute1ToFPUInit : Execute1ToFPUType := (valid => '0', op => OP_ILLEGAL, nia => (others => '0'),
+ itag => instr_tag_init,
insn => (others => '0'), fe_mode => "00", rc => '0',
fra => (others => '0'), frb => (others => '0'),
frc => (others => '0'), frt => (others => '0'),
type FPUToExecute1Type is record
busy : std_ulogic;
exception : std_ulogic;
- interrupt : std_ulogic;
- illegal : std_ulogic;
end record;
constant FPUToExecute1Init : FPUToExecute1Type := (others => '0');
type FPUToWritebackType is record
valid : std_ulogic;
+ interrupt : std_ulogic;
+ instr_tag : instr_tag_t;
write_enable : std_ulogic;
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
write_cr_enable : std_ulogic;
write_cr_mask : std_ulogic_vector(7 downto 0);
write_cr_data : std_ulogic_vector(31 downto 0);
- end record;
- constant FPUToWritebackInit : FPUToWritebackType := (valid => '0', write_enable => '0', write_cr_enable => '0', others => (others => '0'));
+ intr_vec : intr_vector_t;
+ srr0 : std_ulogic_vector(63 downto 0);
+ srr1 : std_ulogic_vector(15 downto 0);
+ end record;
+ constant FPUToWritebackInit : FPUToWritebackType :=
+ (valid => '0', interrupt => '0', instr_tag => instr_tag_init,
+ write_enable => '0', write_reg => (others => '0'),
+ write_cr_enable => '0', write_cr_mask => (others => '0'),
+ write_cr_data => (others => '0'),
+ intr_vec => 0, srr1 => (others => '0'),
+ others => (others => '0'));
type DividerToExecute1Type is record
valid: std_ulogic;
constant DividerToExecute1Init : DividerToExecute1Type := (valid => '0', overflow => '0',
others => (others => '0'));
+ type WritebackToFetch1Type is record
+ redirect: std_ulogic;
+ virt_mode: std_ulogic;
+ priv_mode: std_ulogic;
+ big_endian: std_ulogic;
+ mode_32bit: std_ulogic;
+ redirect_nia: std_ulogic_vector(63 downto 0);
+ br_nia : std_ulogic_vector(63 downto 0);
+ br_last : std_ulogic;
+ br_taken : std_ulogic;
+ end record;
+ constant WritebackToFetch1Init : WritebackToFetch1Type :=
+ (redirect => '0', virt_mode => '0', priv_mode => '0', big_endian => '0',
+ mode_32bit => '0', redirect_nia => (others => '0'),
+ br_last => '0', br_taken => '0', br_nia => (others => '0'));
+
type WritebackToRegisterFileType is record
write_reg : gspr_index_t;
write_data : std_ulogic_vector(63 downto 0);
write_enable : std_ulogic;
end record;
- constant WritebackToRegisterFileInit : WritebackToRegisterFileType := (write_enable => '0', write_data => (others => '0'), others => (others => '0'));
+ constant WritebackToRegisterFileInit : WritebackToRegisterFileType :=
+ (write_enable => '0', write_data => (others => '0'), others => (others => '0'));
type WritebackToCrFileType is record
write_cr_enable : std_ulogic;
begin
case spr is
when SPR_LR =>
- n := 0;
+ n := 0; -- N.B. decode2 relies on this specific value
when SPR_CTR =>
- n:= 1;
+ n := 1; -- N.B. decode2 relies on this specific value
when SPR_SRR0 =>
n := 2;
when SPR_SRR1 =>
begin
return "10" & f;
end;
+
+ function tag_match(tag1 : instr_tag_t; tag2 : instr_tag_t) return boolean is
+ begin
+ return tag1.valid = '1' and tag2.valid = '1' and tag1.tag = tag2.tag;
+ end;
end common;
entity control is
generic (
- PIPELINE_DEPTH : natural := 2
+ EX1_BYPASS : boolean := true;
+ PIPELINE_DEPTH : natural := 3
);
port (
clk : in std_ulogic;
rst : in std_ulogic;
- complete_in : in std_ulogic;
+ complete_in : in instr_tag_t;
valid_in : in std_ulogic;
repeated : in std_ulogic;
flush_in : in std_ulogic;
gpr_write_valid_in : in std_ulogic;
gpr_write_in : in gspr_index_t;
- gpr_bypassable : in std_ulogic;
-
- update_gpr_write_valid : in std_ulogic;
- update_gpr_write_reg : in gspr_index_t;
gpr_a_read_valid_in : in std_ulogic;
gpr_a_read_in : in gspr_index_t;
gpr_c_read_valid_in : in std_ulogic;
gpr_c_read_in : in gspr_index_t;
+ execute_next_tag : in instr_tag_t;
+ execute_next_cr_tag : in instr_tag_t;
+
cr_read_in : in std_ulogic;
cr_write_in : in std_ulogic;
- cr_bypassable : in std_ulogic;
valid_out : out std_ulogic;
stall_out : out std_ulogic;
gpr_bypass_a : out std_ulogic;
gpr_bypass_b : out std_ulogic;
gpr_bypass_c : out std_ulogic;
- cr_bypass : out std_ulogic
+ cr_bypass : out std_ulogic;
+
+ instr_tag_out : out instr_tag_t
);
end entity control;
signal r_int, rin_int : reg_internal_type := reg_internal_init;
- signal stall_a_out : std_ulogic;
- signal stall_b_out : std_ulogic;
- signal stall_c_out : std_ulogic;
- signal cr_stall_out : std_ulogic;
-
signal gpr_write_valid : std_ulogic := '0';
signal cr_write_valid : std_ulogic := '0';
-begin
- gpr_hazard0: entity work.gpr_hazard
- generic map (
- PIPELINE_DEPTH => PIPELINE_DEPTH
- )
- port map (
- clk => clk,
- busy_in => busy_in,
- deferred => deferred,
- complete_in => complete_in,
- flush_in => flush_in,
- issuing => valid_out,
- repeated => repeated,
-
- gpr_write_valid_in => gpr_write_valid,
- gpr_write_in => gpr_write_in,
- bypass_avail => gpr_bypassable,
- gpr_read_valid_in => gpr_a_read_valid_in,
- gpr_read_in => gpr_a_read_in,
-
- ugpr_write_valid => update_gpr_write_valid,
- ugpr_write_reg => update_gpr_write_reg,
-
- stall_out => stall_a_out,
- use_bypass => gpr_bypass_a
- );
-
- gpr_hazard1: entity work.gpr_hazard
- generic map (
- PIPELINE_DEPTH => PIPELINE_DEPTH
- )
- port map (
- clk => clk,
- busy_in => busy_in,
- deferred => deferred,
- complete_in => complete_in,
- flush_in => flush_in,
- issuing => valid_out,
- repeated => repeated,
-
- gpr_write_valid_in => gpr_write_valid,
- gpr_write_in => gpr_write_in,
- bypass_avail => gpr_bypassable,
- gpr_read_valid_in => gpr_b_read_valid_in,
- gpr_read_in => gpr_b_read_in,
-
- ugpr_write_valid => update_gpr_write_valid,
- ugpr_write_reg => update_gpr_write_reg,
-
- stall_out => stall_b_out,
- use_bypass => gpr_bypass_b
- );
-
- gpr_hazard2: entity work.gpr_hazard
- generic map (
- PIPELINE_DEPTH => PIPELINE_DEPTH
- )
- port map (
- clk => clk,
- busy_in => busy_in,
- deferred => deferred,
- complete_in => complete_in,
- flush_in => flush_in,
- issuing => valid_out,
- repeated => repeated,
-
- gpr_write_valid_in => gpr_write_valid,
- gpr_write_in => gpr_write_in,
- bypass_avail => gpr_bypassable,
- gpr_read_valid_in => gpr_c_read_valid_in,
- gpr_read_in => gpr_c_read_in,
-
- ugpr_write_valid => update_gpr_write_valid,
- ugpr_write_reg => update_gpr_write_reg,
-
- stall_out => stall_c_out,
- use_bypass => gpr_bypass_c
- );
-
- cr_hazard0: entity work.cr_hazard
- generic map (
- PIPELINE_DEPTH => PIPELINE_DEPTH
- )
- port map (
- clk => clk,
- busy_in => busy_in,
- deferred => deferred,
- complete_in => complete_in,
- flush_in => flush_in,
- issuing => valid_out,
-
- cr_read_in => cr_read_in,
- cr_write_in => cr_write_valid,
- bypassable => cr_bypassable,
-
- stall_out => cr_stall_out,
- use_bypass => cr_bypass
- );
+ type tag_register is record
+ wr_gpr : std_ulogic;
+ reg : gspr_index_t;
+ recent : std_ulogic;
+ wr_cr : std_ulogic;
+ end record;
+ type tag_regs_array is array(tag_number_t) of tag_register;
+ signal tag_regs : tag_regs_array;
+
+ signal instr_tag : instr_tag_t;
+
+ signal gpr_tag_stall : std_ulogic;
+ signal cr_tag_stall : std_ulogic;
+
+ signal curr_tag : tag_number_t;
+ signal next_tag : tag_number_t;
+
+ signal curr_cr_tag : tag_number_t;
+
+begin
control0: process(clk)
begin
if rising_edge(clk) then
assert rin_int.outstanding >= 0 and rin_int.outstanding <= (PIPELINE_DEPTH+1)
report "Outstanding bad " & integer'image(rin_int.outstanding) severity failure;
r_int <= rin_int;
+ for i in tag_number_t loop
+ if rst = '1' or flush_in = '1' then
+ tag_regs(i).wr_gpr <= '0';
+ tag_regs(i).wr_cr <= '0';
+ else
+ if complete_in.valid = '1' and i = complete_in.tag then
+ tag_regs(i).wr_gpr <= '0';
+ tag_regs(i).wr_cr <= '0';
+ report "tag " & integer'image(i) & " not valid";
+ end if;
+ if gpr_write_valid = '1' and tag_regs(i).reg = gpr_write_in then
+ tag_regs(i).recent <= '0';
+ if tag_regs(i).recent = '1' and tag_regs(i).wr_gpr = '1' then
+ report "tag " & integer'image(i) & " not recent";
+ end if;
+ end if;
+ if instr_tag.valid = '1' and i = instr_tag.tag then
+ tag_regs(i).wr_gpr <= gpr_write_valid;
+ tag_regs(i).reg <= gpr_write_in;
+ tag_regs(i).recent <= gpr_write_valid;
+ tag_regs(i).wr_cr <= cr_write_valid;
+ if gpr_write_valid = '1' then
+ report "tag " & integer'image(i) & " valid for gpr " & to_hstring(gpr_write_in);
+ end if;
+ end if;
+ end if;
+ end loop;
+ if rst = '1' then
+ curr_tag <= 0;
+ curr_cr_tag <= 0;
+ else
+ curr_tag <= next_tag;
+ if cr_write_valid = '1' then
+ curr_cr_tag <= instr_tag.tag;
+ end if;
+ end if;
end if;
end process;
+ control_hazards : process(all)
+ variable gpr_stall : std_ulogic;
+ variable tag_a : instr_tag_t;
+ variable tag_b : instr_tag_t;
+ variable tag_c : instr_tag_t;
+ variable tag_s : instr_tag_t;
+ variable tag_t : instr_tag_t;
+ variable incr_tag : tag_number_t;
+ variable byp_a : std_ulogic;
+ variable byp_b : std_ulogic;
+ variable byp_c : std_ulogic;
+ variable tag_cr : instr_tag_t;
+ variable byp_cr : std_ulogic;
+ begin
+ tag_a := instr_tag_init;
+ for i in tag_number_t loop
+ if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_a_read_in then
+ tag_a.valid := gpr_a_read_valid_in;
+ tag_a.tag := i;
+ end if;
+ end loop;
+ if tag_match(tag_a, complete_in) then
+ tag_a.valid := '0';
+ end if;
+ tag_b := instr_tag_init;
+ for i in tag_number_t loop
+ if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_b_read_in then
+ tag_b.valid := gpr_b_read_valid_in;
+ tag_b.tag := i;
+ end if;
+ end loop;
+ if tag_match(tag_b, complete_in) then
+ tag_b.valid := '0';
+ end if;
+ tag_c := instr_tag_init;
+ for i in tag_number_t loop
+ if tag_regs(i).wr_gpr = '1' and tag_regs(i).recent = '1' and tag_regs(i).reg = gpr_c_read_in then
+ tag_c.valid := gpr_c_read_valid_in;
+ tag_c.tag := i;
+ end if;
+ end loop;
+ if tag_match(tag_c, complete_in) then
+ tag_c.valid := '0';
+ end if;
+
+ byp_a := '0';
+ if EX1_BYPASS and tag_match(execute_next_tag, tag_a) then
+ byp_a := '1';
+ end if;
+ byp_b := '0';
+ if EX1_BYPASS and tag_match(execute_next_tag, tag_b) then
+ byp_b := '1';
+ end if;
+ byp_c := '0';
+ if EX1_BYPASS and tag_match(execute_next_tag, tag_c) then
+ byp_c := '1';
+ end if;
+
+ gpr_bypass_a <= byp_a;
+ gpr_bypass_b <= byp_b;
+ gpr_bypass_c <= byp_c;
+
+ gpr_tag_stall <= (tag_a.valid and not byp_a) or
+ (tag_b.valid and not byp_b) or
+ (tag_c.valid and not byp_c);
+
+ incr_tag := curr_tag;
+ instr_tag.tag <= curr_tag;
+ instr_tag.valid <= valid_out and not deferred;
+ if instr_tag.valid = '1' then
+ incr_tag := (curr_tag + 1) mod TAG_COUNT;
+ end if;
+ next_tag <= incr_tag;
+ instr_tag_out <= instr_tag;
+
+ -- CR hazards
+ tag_cr.tag := curr_cr_tag;
+ tag_cr.valid := cr_read_in and tag_regs(curr_cr_tag).wr_cr;
+ if tag_match(tag_cr, complete_in) then
+ tag_cr.valid := '0';
+ end if;
+ byp_cr := '0';
+ if EX1_BYPASS and tag_match(execute_next_cr_tag, tag_cr) then
+ byp_cr := '1';
+ end if;
+
+ cr_bypass <= byp_cr;
+ cr_tag_stall <= tag_cr.valid and not byp_cr;
+ end process;
+
control1 : process(all)
variable v_int : reg_internal_type;
variable valid_tmp : std_ulogic;
stall_tmp := '0';
if flush_in = '1' then
- -- expect to see complete_in next cycle
- v_int.outstanding := 1;
- elsif complete_in = '1' then
+ v_int.outstanding := 0;
+ elsif complete_in.valid = '1' then
v_int.outstanding := r_int.outstanding - 1;
end if;
+ if r_int.outstanding >= PIPELINE_DEPTH + 1 then
+ valid_tmp := '0';
+ stall_tmp := '1';
+ end if;
if rst = '1' then
v_int := reg_internal_init;
v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
end if;
else
- -- let it go out if there are no GPR hazards
- stall_tmp := stall_a_out or stall_b_out or stall_c_out or cr_stall_out;
+ -- let it go out if there are no GPR or CR hazards
+ stall_tmp := gpr_tag_stall or cr_tag_stall;
end if;
end if;
v_int.state := WAIT_FOR_CURR_TO_COMPLETE;
end if;
else
- -- let it go out if there are no GPR hazards
- stall_tmp := stall_a_out or stall_b_out or stall_c_out or cr_stall_out;
+ -- let it go out if there are no GPR or CR hazards
+ stall_tmp := gpr_tag_stall or cr_tag_stall;
end if;
end if;
else
valid_tmp := '0';
end if;
- if valid_tmp = '1' then
- if deferred = '0' then
- v_int.outstanding := v_int.outstanding + 1;
- end if;
- gpr_write_valid <= gpr_write_valid_in;
- cr_write_valid <= cr_write_in;
- else
- gpr_write_valid <= '0';
- cr_write_valid <= '0';
+ gpr_write_valid <= gpr_write_valid_in and valid_tmp;
+ cr_write_valid <= cr_write_in and valid_tmp;
+
+ if valid_tmp = '1' and deferred = '0' then
+ v_int.outstanding := v_int.outstanding + 1;
end if;
-- update outputs
architecture behave of core is
-- icache signals
signal fetch1_to_icache : Fetch1ToIcacheType;
+ signal writeback_to_fetch1: WritebackToFetch1Type;
signal icache_to_decode1 : IcacheToDecode1Type;
signal mmu_to_icache : MmuToIcacheType;
-- execute signals
signal execute1_to_writeback: Execute1ToWritebackType;
- signal execute1_to_fetch1: Execute1ToFetch1Type;
+ signal execute1_bypass: bypass_data_t;
+ signal execute1_cr_bypass: cr_bypass_data_t;
-- load store signals
signal execute1_to_loadstore1: Execute1ToLoadstore1Type;
signal decode1_flush: std_ulogic;
signal fetch1_flush: std_ulogic;
- signal complete: std_ulogic;
+ signal complete: instr_tag_t;
signal terminate: std_ulogic;
signal core_rst: std_ulogic;
signal icache_inv: std_ulogic;
+ signal do_interrupt: std_ulogic;
-- Delayed/Latched resets and alt_reset
signal rst_fetch1 : std_ulogic := '1';
signal rst_ex1 : std_ulogic := '1';
signal rst_fpu : std_ulogic := '1';
signal rst_ls1 : std_ulogic := '1';
+ signal rst_wback : std_ulogic := '1';
signal rst_dbg : std_ulogic := '1';
signal alt_reset_d : std_ulogic;
rst_ex1 <= core_rst;
rst_fpu <= core_rst;
rst_ls1 <= core_rst;
+ rst_wback <= core_rst;
rst_dbg <= rst;
alt_reset_d <= alt_reset;
end if;
inval_btc => ex1_icache_inval or mmu_to_icache.tlbie,
stop_in => dbg_core_stop,
d_in => decode1_to_fetch1,
- e_in => execute1_to_fetch1,
+ w_in => writeback_to_fetch1,
i_out => fetch1_to_icache,
log_out => log_data(42 downto 0)
);
r_out => decode2_to_register_file,
c_in => cr_file_to_decode2,
c_out => decode2_to_cr_file,
+ execute_bypass => execute1_bypass,
+ execute_cr_bypass => execute1_cr_bypass,
log_out => log_data(119 downto 110)
);
decode2_busy_in <= ex1_busy_out;
port map (
clk => clk,
rst => rst_ex1,
- flush_out => flush,
+ flush_in => flush,
busy_out => ex1_busy_out,
e_in => decode2_to_execute1,
l_in => loadstore1_to_execute1,
fp_in => fpu_to_execute1,
ext_irq_in => ext_irq,
+ interrupt_in => do_interrupt,
l_out => execute1_to_loadstore1,
- f_out => execute1_to_fetch1,
fp_out => execute1_to_fpu,
e_out => execute1_to_writeback,
+ bypass_data => execute1_bypass,
+ bypass_cr_data => execute1_cr_bypass,
icache_inval => ex1_icache_inval,
dbg_msr_out => msr,
terminate_out => terminate,
writeback_0: entity work.writeback
port map (
clk => clk,
+ rst => rst_wback,
+ flush_out => flush,
e_in => execute1_to_writeback,
l_in => loadstore1_to_writeback,
fp_in => fpu_to_writeback,
w_out => writeback_to_register_file,
c_out => writeback_to_cr_file,
+ f_out => writeback_to_fetch1,
+ interrupt_out => do_interrupt,
complete_out => complete
);
+++ /dev/null
-library ieee;
-use ieee.std_logic_1164.all;
-use ieee.numeric_std.all;
-
-entity cr_hazard is
- generic (
- PIPELINE_DEPTH : natural := 1
- );
- port(
- clk : in std_ulogic;
- busy_in : in std_ulogic;
- deferred : in std_ulogic;
- complete_in : in std_ulogic;
- flush_in : in std_ulogic;
- issuing : in std_ulogic;
-
- cr_read_in : in std_ulogic;
- cr_write_in : in std_ulogic;
- bypassable : in std_ulogic;
-
- stall_out : out std_ulogic;
- use_bypass : out std_ulogic
- );
-end entity cr_hazard;
-architecture behaviour of cr_hazard is
- type pipeline_entry_type is record
- valid : std_ulogic;
- bypass : std_ulogic;
- end record;
- constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0');
-
- type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type;
- constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);
-
- signal r, rin : pipeline_t := pipeline_t_init;
-begin
- cr_hazard0: process(clk)
- begin
- if rising_edge(clk) then
- r <= rin;
- end if;
- end process;
-
- cr_hazard1: process(all)
- variable v : pipeline_t;
- begin
- v := r;
-
- -- XXX assumes PIPELINE_DEPTH = 1
- if complete_in = '1' then
- v(1).valid := '0';
- end if;
-
- use_bypass <= '0';
- stall_out <= '0';
- if cr_read_in = '1' then
- loop_0: for i in 0 to PIPELINE_DEPTH loop
- if v(i).valid = '1' then
- if r(i).bypass = '1' then
- use_bypass <= '1';
- else
- stall_out <= '1';
- end if;
- end if;
- end loop;
- end if;
-
- -- XXX assumes PIPELINE_DEPTH = 1
- if busy_in = '0' then
- v(1) := r(0);
- v(0).valid := '0';
- end if;
- if deferred = '0' and issuing = '1' then
- v(0).valid := cr_write_in;
- v(0).bypass := bypassable;
- end if;
- if flush_in = '1' then
- v(0).valid := '0';
- v(1).valid := '0';
- end if;
-
- -- update registers
- rin <= v;
-
- end process;
-end;
doall : std_ulogic; -- with tlbie, indicates flush whole TLB
tlbld : std_ulogic; -- indicates a TLB load request (from MMU)
mmu_req : std_ulogic; -- indicates source of request
+ d_valid : std_ulogic; -- indicates req.data is valid now
end record;
signal r0 : reg_stage_0_t;
r.mmu_req := '1';
else
r.req := d_in;
+ r.req.data := (others => '0');
r.tlbie := '0';
r.doall := '0';
r.tlbld := '0';
r.mmu_req := '0';
end if;
+ r.d_valid := '0';
if rst = '1' then
r0_full <= '0';
- elsif r1.full = '0' or r0_full = '0' then
+ elsif (r1.full = '0' and d_in.hold = '0') or r0_full = '0' then
r0 <= r;
r0_full <= r.req.valid;
end if;
+ -- Sample data the cycle after a request comes in from loadstore1.
+ -- If another request has come in already then the data will get
+ -- put directly into req.data below.
+ if r0.req.valid = '1' and r.req.valid = '0' and r0.d_valid = '0' and
+ r0.mmu_req = '0' then
+ r0.req.data <= d_in.data;
+ r0.d_valid <= '1';
+ end if;
end if;
end process;
m_out.stall <= '0';
-- Hold off the request in r0 when r1 has an uncompleted request
- r0_stall <= r0_full and r1.full;
- r0_valid <= r0_full and not r1.full;
+ r0_stall <= r0_full and (r1.full or d_in.hold);
+ r0_valid <= r0_full and not r1.full and not d_in.hold;
stall_out <= r0_stall;
-- TLB
req.dcbz := r0.req.dcbz;
req.real_addr := ra;
-- Force data to 0 for dcbz
- if r0.req.dcbz = '0' then
- req.data := d_in.data;
- else
+ if r0.req.dcbz = '1' then
req.data := (others => '0');
+ elsif r0.d_valid = '1' then
+ req.data := r0.req.data;
+ else
+ req.data := d_in.data;
end if;
-- Select all bytes for dcbz and for cacheable loads
if r0.req.dcbz = '1' or (r0.req.load = '1' and r0.req.nc = '0') then
-- complete the request next cycle.
-- Compare the whole address in case the request in
-- r1.req is not the one that started this refill.
- if r1.full = '1' and r1.req.same_tag = '1' and
- ((r1.dcbz = '1' and r1.req.dcbz = '1') or
- (r1.dcbz = '0' and r1.req.op = OP_LOAD_MISS)) and
- r1.store_row = get_row(r1.req.real_addr) then
+ if req.valid = '1' and req.same_tag = '1' and
+ ((r1.dcbz = '1' and req.dcbz = '1') or
+ (r1.dcbz = '0' and req.op = OP_LOAD_MISS)) and
+ r1.store_row = get_row(req.real_addr) then
r1.full <= '0';
r1.slow_valid <= '1';
if r1.mmu_req = '0' then
28 => (ALU, NONE, OP_AND, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE), -- andi.
29 => (ALU, NONE, OP_AND, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE), -- andis.
0 => (ALU, NONE, OP_ATTN, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- attn
- 18 => (ALU, NONE, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), -- b
+ 18 => (ALU, NONE, OP_B, NONE, CONST_LI, NONE, SPR, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), -- b
16 => (ALU, NONE, OP_BC, SPR, CONST_BD, NONE, SPR , '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), -- bc
11 => (ALU, NONE, OP_CMP, RA, CONST_SI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), -- cmpi
10 => (ALU, NONE, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmpli
34 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lbz
- 35 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lbzu
+ 35 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lbzu
50 => (LDST, FPU, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lfd
- 51 => (LDST, FPU, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lfdu
+ 51 => (LDST, FPU, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lfdu
48 => (LDST, FPU, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- lfs
- 49 => (LDST, FPU, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0', NONE), -- lfsu
+ 49 => (LDST, FPU, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0', DUPD), -- lfsu
42 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lha
- 43 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lhau
+ 43 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lhau
40 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lhz
- 41 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lhzu
+ 41 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lhzu
56 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DQ, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRTE), -- lq
32 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lwz
- 33 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lwzu
+ 33 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lwzu
7 => (ALU, NONE, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), -- mulli
24 => (ALU, NONE, OP_OR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- ori
25 => (ALU, NONE, OP_OR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- oris
23 => (ALU, NONE, OP_RLC, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- rlwnm
17 => (ALU, NONE, OP_SC, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- sc
38 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stb
- 39 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stbu
+ 39 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stbu
54 => (LDST, FPU, OP_STORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stfd
- 55 => (LDST, FPU, OP_STORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stfdu
+ 55 => (LDST, FPU, OP_STORE, RA_OR_ZERO, CONST_SI, FRS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stfdu
52 => (LDST, FPU, OP_STORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- stfs
- 53 => (LDST, FPU, OP_STORE, RA_OR_ZERO, CONST_SI, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0', NONE), -- stfsu
+ 53 => (LDST, FPU, OP_STORE, RA_OR_ZERO, CONST_SI, FRS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0', NONE), -- stfsu
44 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- sth
- 45 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- sthu
+ 45 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- sthu
36 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stw
- 37 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stwu
+ 37 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_SI, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stwu
8 => (ALU, NONE, OP_ADD, RA, CONST_SI, NONE, RT, '0', '0', '1', '0', ONE, '1', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- subfic
2 => (ALU, NONE, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), -- tdi
3 => (ALU, NONE, OP_TRAP, RA, CONST_SI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', NONE, '0', '1', NONE), -- twi
2#0100111010# => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cbcdtd
2#0100011010# => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cdtbcd
2#0000000000# => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), -- cmp
- 2#0111111100# => (ALU, NONE, OP_CMPB, NONE, RB, RS, RA, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmpb
+ 2#0111111100# => (ALU, NONE, OP_CMPB, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmpb
2#0011100000# => (ALU, NONE, OP_CMPEQB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmpeqb
2#0000100000# => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmpl
2#0011000000# => (ALU, NONE, OP_CMPRB, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- cmprb
2#1111101111# => (ALU, NONE, OP_ISEL, RA_OR_ZERO, RB, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- isel
2#0000110100# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE), -- lbarx
2#1101010101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lbzcix
- 2#0001110111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lbzux
+ 2#0001110111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lbzux
2#0001010111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lbzx
2#0001010100# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE), -- ldarx
2#1000010100# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '1', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- ldbrx
2#1101110101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- ldcix
- 2#0000110101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- ldux
+ 2#0000110101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- ldux
2#0000010101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- ldx
2#1001010111# => (LDST, FPU, OP_LOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lfdx
- 2#1001110111# => (LDST, FPU, OP_LOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lfdux
+ 2#1001110111# => (LDST, FPU, OP_LOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lfdux
2#1101010111# => (LDST, FPU, OP_LOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lfiwax
2#1101110111# => (LDST, FPU, OP_LOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lfiwzx
2#1000010111# => (LDST, FPU, OP_LOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- lfsx
- 2#1000110111# => (LDST, FPU, OP_LOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0', NONE), -- lfsux
+ 2#1000110111# => (LDST, FPU, OP_LOAD, RA_OR_ZERO, RB, NONE, FRT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0', DUPD), -- lfsux
2#0001110100# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE), -- lharx
- 2#0101110111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lhaux
+ 2#0101110111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lhaux
2#0101010111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lhax
2#1100010110# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '1', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lhbrx
2#1100110101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lhzcix
- 2#0100110111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lhzux
+ 2#0100110111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lhzux
2#0100010111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lhzx
2#0100010100# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', DRTE), -- lqarx
2#0000010100# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '1', '0', '0', NONE, '0', '0', NONE), -- lwarx
- 2#0101110101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lwaux
+ 2#0101110101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lwaux
2#0101010101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lwax
2#1000010110# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '1', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lwbrx
2#1100010101# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lwzcix
- 2#0000110111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- lwzux
+ 2#0000110111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- lwzux
2#0000010111# => (LDST, NONE, OP_LOAD, RA_OR_ZERO, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lwzx
2#1001000000# => (ALU, NONE, OP_MCRXRX, NONE, NONE, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mcrxrx
2#0000010011# => (ALU, NONE, OP_MFCR, NONE, NONE, NONE, RT, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- mfcr/mfocrf
2#1000011000# => (ALU, NONE, OP_SHR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0', NONE), -- srw
2#1111010101# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stbcix
2#1010110110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '1', '0', '0', ONE, '0', '0', NONE), -- stbcx
- 2#0011110111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stbux
+ 2#0011110111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stbux
2#0011010111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stbx
2#1010010100# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '1', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stdbrx
2#1111110101# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stdcix
2#0011010110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', ONE, '0', '0', NONE), -- stdcx
- 2#0010110101# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stdux
+ 2#0010110101# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stdux
2#0010010101# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stdx
2#1011010111# => (LDST, FPU, OP_STORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stfdx
- 2#1011110111# => (LDST, FPU, OP_STORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stfdux
+ 2#1011110111# => (LDST, FPU, OP_STORE, RA_OR_ZERO, RB, FRS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stfdux
2#1111010111# => (LDST, FPU, OP_STORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stfiwx
2#1010010111# => (LDST, FPU, OP_STORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '1', '0', NONE, '0', '0', NONE), -- stfsx
- 2#1010110111# => (LDST, FPU, OP_STORE, RA_OR_ZERO, RB, FRS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0', NONE), -- stfsux
+ 2#1010110111# => (LDST, FPU, OP_STORE, RA_OR_ZERO, RB, FRS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '1', '0', NONE, '0', '0', NONE), -- stfsux
2#1110010110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '1', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- sthbrx
2#1110110101# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- sthcix
2#1011010110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '1', '0', '0', ONE, '0', '0', NONE), -- sthcx
- 2#0110110111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- sthux
+ 2#0110110111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- sthux
2#0110010111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- sthx
2#0010110110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '1', '0', '0', ONE, '0', '0', DRSE), -- stqcx
2#1010010110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '1', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stwbrx
2#1110010101# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stwcix
2#0010010110# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '1', '0', '0', ONE, '0', '0', NONE), -- stwcx
- 2#0010110111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stwux
+ 2#0010110111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stwux
2#0010010111# => (LDST, NONE, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- stwx
2#0000101000# => (ALU, NONE, OP_ADD, RA, RB, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- subf
2#1000101000# => (ALU, NONE, OP_ADD, RA, RB, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), -- subfo
-- unit fac internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl rpt
-- op in out A out in out len ext pipe
0 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DS, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- ld
- 1 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DS, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- ldu
+ 1 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DS, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', DUPD), -- ldu
2 => (LDST, NONE, OP_LOAD, RA_OR_ZERO, CONST_DS, NONE, RT, '0', '0', '0', '0', ZERO, '0', is4B, '0', '1', '0', '0', '0', '0', NONE, '0', '0', NONE), -- lwa
others => decode_rom_init
);
-- unit fac internal in1 in2 in3 out CR CR inv inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl rpt
-- op in out A out in out len ext pipe
0 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_DS, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), -- std
- 1 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_DS, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stdu
+ 1 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_DS, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '1', '0', '0', '0', NONE, '0', '0', NONE), -- stdu
2 => (LDST, NONE, OP_STORE, RA_OR_ZERO, CONST_DS, RS, NONE, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', DRSE), -- stq
others => decode_rom_init
);
-- major opcode 31, lots of things
v.decode := decode_op_31_array(to_integer(unsigned(f_in.insn(10 downto 1))));
- -- Work out ispr1/ispr2 independent of v.decode since they seem to be critical path
+ -- Work out ispr1/ispro independent of v.decode since they seem to be critical path
sprn := decode_spr_num(f_in.insn);
v.ispr1 := fast_spr_num(sprn);
+ v.ispro := fast_spr_num(sprn);
if std_match(f_in.insn(10 downto 1), "01-1010011") then
-- mfspr or mtspr
-- CTR may be needed as input to bc
if f_in.insn(23) = '0' then
v.ispr1 := fast_spr_num(SPR_CTR);
+ v.ispro := fast_spr_num(SPR_CTR);
+ elsif f_in.insn(0) = '1' then
+ v.ispro := fast_spr_num(SPR_LR);
end if;
-- Predict backward branches as taken, forward as untaken
v.br_pred := f_in.insn(15);
-- Unconditional branches are always taken
v.br_pred := '1';
br_offset := signed(f_in.insn(25 downto 2));
+ if f_in.insn(0) = '1' then
+ v.ispro := fast_spr_num(SPR_LR);
+ end if;
when 19 =>
vi.override := not decode_op_19_valid(to_integer(unsigned(f_in.insn(5 downto 1) & f_in.insn(10 downto 6))));
-- Branch uses CTR as condition when BO(2) is 0. This is
-- also used to indicate that CTR is modified (they go
-- together).
- if f_in.insn(23) = '0' then
+ -- bcctr doesn't update CTR or use it in the branch condition
+ if f_in.insn(23) = '0' and (f_in.insn(10) = '0' or f_in.insn(6) = '1') then
v.ispr1 := fast_spr_num(SPR_CTR);
+ v.ispro := fast_spr_num(SPR_CTR);
+ elsif f_in.insn(0) = '1' then
+ v.ispro := fast_spr_num(SPR_LR);
end if;
if f_in.insn(10) = '0' then
v.ispr2 := fast_spr_num(SPR_LR);
clk : in std_ulogic;
rst : in std_ulogic;
- complete_in : in std_ulogic;
+ complete_in : in instr_tag_t;
busy_in : in std_ulogic;
stall_out : out std_ulogic;
c_in : in CrFileToDecode2Type;
c_out : out Decode2ToCrFileType;
+ execute_bypass : in bypass_data_t;
+ execute_cr_bypass : in cr_bypass_data_t;
+
log_out : out std_ulogic_vector(9 downto 0)
);
end entity decode2;
end case;
end;
- -- For now, use "rc" in the decode table to decide whether oe exists.
- -- This is not entirely correct architecturally: For mulhd and
- -- mulhdu, the OE field is reserved. It remains to be seen what an
- -- actual POWER9 does if we set it on those instructions, for now we
- -- test that further down when assigning to the multiplier oe input.
- --
- function decode_oe (t : rc_t; insn_in : std_ulogic_vector(31 downto 0)) return std_ulogic is
- begin
- case t is
- when RC =>
- return insn_oe(insn_in);
- when OTHERS =>
- return '0';
- end case;
- end;
-
-- control signals that are derived from insn_type
type mux_select_array_t is array(insn_type_t) of std_ulogic_vector(2 downto 0);
OP_MOD => "011",
OP_CNTZ => "100", -- countzero_result
OP_MFSPR => "101", -- spr_result
+ OP_B => "110", -- next_nia
+ OP_BC => "110",
+ OP_BCREG => "110",
OP_ADDG6S => "111", -- misc_result
OP_ISEL => "111",
OP_DARN => "111",
OP_MFMSR => "100",
OP_MFCR => "101",
OP_SETB => "110",
+ OP_CMP => "000", -- cr_result
+ OP_CMPRB => "001",
+ OP_CMPEQB => "010",
+ OP_CROP => "011",
+ OP_MCRXRX => "100",
+ OP_MTCRF => "101",
others => "000"
);
signal gpr_write_valid : std_ulogic;
signal gpr_write : gspr_index_t;
- signal gpr_bypassable : std_ulogic;
-
- signal update_gpr_write_valid : std_ulogic;
- signal update_gpr_write_reg : gspr_index_t;
signal gpr_a_read_valid : std_ulogic;
- signal gpr_a_read :gspr_index_t;
- signal gpr_a_bypass : std_ulogic;
+ signal gpr_a_read : gspr_index_t;
+ signal gpr_a_bypass : std_ulogic;
signal gpr_b_read_valid : std_ulogic;
- signal gpr_b_read : gspr_index_t;
- signal gpr_b_bypass : std_ulogic;
+ signal gpr_b_read : gspr_index_t;
+ signal gpr_b_bypass : std_ulogic;
signal gpr_c_read_valid : std_ulogic;
- signal gpr_c_read : gspr_index_t;
- signal gpr_c_bypass : std_ulogic;
+ signal gpr_c_read : gspr_index_t;
+ signal gpr_c_bypass : std_ulogic;
+ signal cr_read_valid : std_ulogic;
signal cr_write_valid : std_ulogic;
signal cr_bypass : std_ulogic;
- signal cr_bypass_avail : std_ulogic;
+
+ signal instr_tag : instr_tag_t;
begin
control_0: entity work.control
generic map (
- PIPELINE_DEPTH => 1
+ EX1_BYPASS => EX1_BYPASS
)
port map (
clk => clk,
gpr_write_valid_in => gpr_write_valid,
gpr_write_in => gpr_write,
- gpr_bypassable => gpr_bypassable,
-
- update_gpr_write_valid => update_gpr_write_valid,
- update_gpr_write_reg => update_gpr_write_reg,
gpr_a_read_valid_in => gpr_a_read_valid,
gpr_a_read_in => gpr_a_read,
gpr_c_read_valid_in => gpr_c_read_valid,
gpr_c_read_in => gpr_c_read,
- cr_read_in => d_in.decode.input_cr,
+ execute_next_tag => execute_bypass.tag,
+ execute_next_cr_tag => execute_cr_bypass.tag,
+
+ cr_read_in => cr_read_valid,
cr_write_in => cr_write_valid,
cr_bypass => cr_bypass,
- cr_bypassable => cr_bypass_avail,
valid_out => control_valid_out,
stall_out => control_stall_out,
gpr_bypass_a => gpr_a_bypass,
gpr_bypass_b => gpr_b_bypass,
- gpr_bypass_c => gpr_c_bypass
+ gpr_bypass_c => gpr_c_bypass,
+
+ instr_tag_out => instr_tag
);
deferred <= r.e.valid and busy_in;
variable decoded_reg_c : decode_input_reg_t;
variable decoded_reg_o : decode_output_reg_t;
variable length : std_ulogic_vector(3 downto 0);
+ variable op : insn_type_t;
begin
v := r;
--v.e.input_cr := d_in.decode.input_cr;
v.e.output_cr := d_in.decode.output_cr;
-
+
+ -- Work out whether XER common bits are set
+ v.e.output_xer := d_in.decode.output_carry;
+ case d_in.decode.insn_type is
+ when OP_ADD | OP_MUL_L64 | OP_DIV | OP_DIVE =>
+ -- OE field is valid in OP_ADD/OP_MUL_L64 with major opcode 31 only
+ if d_in.insn(31 downto 26) = "011111" and insn_oe(d_in.insn) = '1' then
+ v.e.oe := '1';
+ v.e.output_xer := '1';
+ end if;
+ when OP_MTSPR =>
+ if decode_spr_num(d_in.insn) = SPR_XER then
+ v.e.output_xer := '1';
+ end if;
+ when others =>
+ end case;
+
decoded_reg_a := decode_input_reg_a (d_in.decode.input_reg_a, d_in.insn, r_in.read1_data, d_in.ispr1,
d_in.nia);
decoded_reg_b := decode_input_reg_b (d_in.decode.input_reg_b, d_in.insn, r_in.read2_data, d_in.ispr2);
decoded_reg_c := decode_input_reg_c (d_in.decode.input_reg_c, d_in.insn, r_in.read3_data);
- decoded_reg_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn, d_in.ispr1);
+ decoded_reg_o := decode_output_reg (d_in.decode.output_reg_a, d_in.insn, d_in.ispro);
+
+ if d_in.decode.lr = '1' then
+ v.e.lr := insn_lk(d_in.insn);
+ -- b and bc have even major opcodes; bcreg is considered absolute
+ v.e.br_abs := insn_aa(d_in.insn) or d_in.insn(26);
+ end if;
+ op := d_in.decode.insn_type;
if d_in.decode.repeat /= NONE then
v.e.repeat := '1';
if r.repeat = d_in.big_endian then
decoded_reg_o.reg(0) := '1';
end if;
+ when DUPD =>
+ -- update-form loads, 2nd instruction writes RA
+ if r.repeat = '1' then
+ decoded_reg_o.reg := decoded_reg_a.reg;
+ end if;
when others =>
end case;
+ elsif v.e.lr = '1' and decoded_reg_a.reg_valid = '1' then
+ -- bcl/bclrl/bctarl that needs to write both CTR and LR has to be doubled
+ v.e.repeat := '1';
+ v.e.second := r.repeat;
+ -- first one does CTR, second does LR
+ decoded_reg_o.reg(0) := not r.repeat;
end if;
r_out.read1_enable <= decoded_reg_a.reg_valid and d_in.valid;
v.e.nia := d_in.nia;
v.e.unit := d_in.decode.unit;
v.e.fac := d_in.decode.facility;
- v.e.insn_type := d_in.decode.insn_type;
+ v.e.instr_tag := instr_tag;
v.e.read_reg1 := decoded_reg_a.reg;
- v.e.read_data1 := decoded_reg_a.data;
- v.e.bypass_data1 := gpr_a_bypass;
v.e.read_reg2 := decoded_reg_b.reg;
- v.e.read_data2 := decoded_reg_b.data;
- v.e.bypass_data2 := gpr_b_bypass;
- v.e.read_data3 := decoded_reg_c.data;
- v.e.bypass_data3 := gpr_c_bypass;
v.e.write_reg := decoded_reg_o.reg;
v.e.write_reg_enable := decoded_reg_o.reg_valid;
v.e.rc := decode_rc(d_in.decode.rc, d_in.insn);
- if not (d_in.decode.insn_type = OP_MUL_H32 or d_in.decode.insn_type = OP_MUL_H64) then
- v.e.oe := decode_oe(d_in.decode.rc, d_in.insn);
- end if;
- v.e.cr := c_in.read_cr_data;
- v.e.bypass_cr := cr_bypass;
v.e.xerc := c_in.read_xerc_data;
v.e.invert_a := d_in.decode.invert_a;
v.e.addm1 := '0';
- if d_in.decode.insn_type = OP_BC or d_in.decode.insn_type = OP_BCREG then
- -- add -1 to CTR
- v.e.addm1 := '1';
- if d_in.insn(23) = '1' or
- (d_in.decode.insn_type = OP_BCREG and d_in.insn(10) = '0') then
- -- don't write decremented CTR if BO(2) = 1 or bcctr
- v.e.write_reg_enable := '0';
- end if;
- end if;
+ v.e.insn_type := op;
v.e.invert_out := d_in.decode.invert_out;
v.e.input_carry := d_in.decode.input_carry;
v.e.output_carry := d_in.decode.output_carry;
v.e.is_32bit := d_in.decode.is_32bit;
v.e.is_signed := d_in.decode.is_signed;
- if d_in.decode.lr = '1' then
- v.e.lr := insn_lk(d_in.insn);
- end if;
v.e.insn := d_in.insn;
v.e.data_len := length;
v.e.byte_reverse := d_in.decode.byte_reverse;
v.e.update := d_in.decode.update;
v.e.reserve := d_in.decode.reserve;
v.e.br_pred := d_in.br_pred;
- v.e.result_sel := result_select(d_in.decode.insn_type);
- v.e.sub_select := subresult_select(d_in.decode.insn_type);
+ v.e.result_sel := result_select(op);
+ v.e.sub_select := subresult_select(op);
+ if op = OP_BC or op = OP_BCREG then
+ if d_in.insn(23) = '0' and r.repeat = '0' and
+ not (d_in.decode.insn_type = OP_BCREG and d_in.insn(10) = '0') then
+ -- decrement CTR if BO(2) = 0 and not bcctr
+ v.e.addm1 := '1';
+ v.e.result_sel := "000"; -- select adder output
+ end if;
+ end if;
+
+ -- See if any of the operands can get their value via the bypass path.
+ case gpr_a_bypass is
+ when '1' =>
+ v.e.read_data1 := execute_bypass.data;
+ when others =>
+ v.e.read_data1 := decoded_reg_a.data;
+ end case;
+ case gpr_b_bypass is
+ when '1' =>
+ v.e.read_data2 := execute_bypass.data;
+ when others =>
+ v.e.read_data2 := decoded_reg_b.data;
+ end case;
+ case gpr_c_bypass is
+ when '1' =>
+ v.e.read_data3 := execute_bypass.data;
+ when others =>
+ v.e.read_data3 := decoded_reg_c.data;
+ end case;
+
+ v.e.cr := c_in.read_cr_data;
+ if cr_bypass = '1' then
+ v.e.cr := execute_cr_bypass.data;
+ end if;
-- issue control
control_valid_in <= d_in.valid;
gpr_write_valid <= v.e.write_reg_enable;
gpr_write <= decoded_reg_o.reg;
- gpr_bypassable <= '0';
- if EX1_BYPASS and d_in.decode.unit = ALU then
- gpr_bypassable <= '1';
- end if;
- update_gpr_write_valid <= d_in.decode.update;
- update_gpr_write_reg <= decoded_reg_a.reg;
- if v.e.lr = '1' then
- -- there are no instructions that have both update=1 and lr=1
- update_gpr_write_valid <= '1';
- update_gpr_write_reg <= fast_spr_num(SPR_LR);
- end if;
gpr_a_read_valid <= decoded_reg_a.reg_valid;
gpr_a_read <= decoded_reg_a.reg;
gpr_c_read <= decoded_reg_c.reg;
cr_write_valid <= d_in.decode.output_cr or decode_rc(d_in.decode.rc, d_in.insn);
- cr_bypass_avail <= '0';
- if EX1_BYPASS and d_in.decode.unit = ALU then
- cr_bypass_avail <= d_in.decode.output_cr;
- end if;
+ -- Since ops that write CR only write some of the fields,
+ -- any op that writes CR effectively also reads it.
+ cr_read_valid <= cr_write_valid or d_in.decode.input_cr;
v.e.valid := control_valid_out;
if control_valid_out = '1' then
r.e.valid &
stopped_out &
stall_out &
- r.e.bypass_data3 &
- r.e.bypass_data2 &
- r.e.bypass_data1;
+ gpr_a_bypass &
+ gpr_b_bypass &
+ gpr_c_bypass;
end if;
end process;
log_out <= log_data;
type repeat_t is (NONE, -- instruction is not repeated
DRSE, -- double RS, endian twist
- DRTE); -- double RT, endian twist
+ DRTE, -- double RT, endian twist
+ DUPD); -- update-form load
type decode_rom_t is record
unit : unit_t;
rst : in std_ulogic;
-- asynchronous
- flush_out : out std_ulogic;
+ flush_in : in std_ulogic;
busy_out : out std_ulogic;
e_in : in Decode2ToExecute1Type;
fp_in : in FPUToExecute1Type;
ext_irq_in : std_ulogic;
+ interrupt_in : std_ulogic;
-- asynchronous
l_out : out Execute1ToLoadstore1Type;
- f_out : out Execute1ToFetch1Type;
fp_out : out Execute1ToFPUType;
e_out : out Execute1ToWritebackType;
+ bypass_data : out bypass_data_t;
+ bypass_cr_data : out cr_bypass_data_t;
dbg_msr_out : out std_ulogic_vector(63 downto 0);
fp_exception_next : std_ulogic;
trace_next : std_ulogic;
prev_op : insn_type_t;
- lr_update : std_ulogic;
- next_lr : std_ulogic_vector(63 downto 0);
+ br_taken : std_ulogic;
mul_in_progress : std_ulogic;
mul_finish : std_ulogic;
div_in_progress : std_ulogic;
cntz_in_progress : std_ulogic;
- last_nia : std_ulogic_vector(63 downto 0);
- redirect : std_ulogic;
- abs_br : std_ulogic;
- taken_br : std_ulogic;
- br_last : std_ulogic;
- do_intr : std_ulogic;
- vector : integer range 0 to 16#fff#;
- br_offset : std_ulogic_vector(63 downto 0);
- redir_mode : std_ulogic_vector(3 downto 0);
log_addr_spr : std_ulogic_vector(31 downto 0);
end record;
constant reg_type_init : reg_type :=
(e => Execute1ToWritebackInit,
cur_instr => Decode2ToExecute1Init,
- busy => '0', lr_update => '0', terminate => '0',
- fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL,
+ busy => '0', terminate => '0',
+ fp_exception_next => '0', trace_next => '0', prev_op => OP_ILLEGAL, br_taken => '0',
mul_in_progress => '0', mul_finish => '0', div_in_progress => '0', cntz_in_progress => '0',
- next_lr => (others => '0'), last_nia => (others => '0'),
- redirect => '0', abs_br => '0', taken_br => '0', br_last => '0', do_intr => '0', vector => 0,
- br_offset => (others => '0'), redir_mode => "0000",
others => (others => '0'));
signal r, rin : reg_type;
signal a_in, b_in, c_in : std_ulogic_vector(63 downto 0);
signal cr_in : std_ulogic_vector(31 downto 0);
+ signal xerc_in : xer_common_t;
signal valid_in : std_ulogic;
- signal ctrl: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0'));
- signal ctrl_tmp: ctrl_t := (irq_state => WRITE_SRR0, others => (others => '0'));
+ signal ctrl: ctrl_t := (others => (others => '0'));
+ signal ctrl_tmp: ctrl_t := (others => (others => '0'));
signal right_shift, rot_clear_left, rot_clear_right: std_ulogic;
signal rot_sign_ext: std_ulogic;
signal rotator_result: std_ulogic_vector(63 downto 0);
signal spr_result: std_ulogic_vector(63 downto 0);
signal result_mux_sel: std_ulogic_vector(2 downto 0);
signal sub_mux_sel: std_ulogic_vector(2 downto 0);
+ signal next_nia : std_ulogic_vector(63 downto 0);
signal current: Decode2ToExecute1Type;
+ signal carry_32 : std_ulogic;
+ signal carry_64 : std_ulogic;
+ signal overflow_32 : std_ulogic;
+ signal overflow_64 : std_ulogic;
+
+ signal trapval : std_ulogic_vector(4 downto 0);
+
+ signal write_cr_mask : std_ulogic_vector(7 downto 0);
+ signal write_cr_data : std_ulogic_vector(31 downto 0);
+
-- multiply signals
signal x_to_multiply: MultiplyInputType;
signal multiply_to_x: MultiplyOutputType;
begin
e.xerc.ca32 := carry32;
e.xerc.ca := carry;
- e.write_xerc_enable := '1';
end;
procedure set_ov(e: inout Execute1ToWritebackType;
if ov = '1' then
e.xerc.so := '1';
end if;
- e.write_xerc_enable := '1';
end;
function calc_ov(msb_a : std_ulogic; msb_b: std_ulogic;
dbg_msr_out <= ctrl.msr;
log_rd_addr <= r.log_addr_spr;
- a_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data1 = '1' else e_in.read_data1;
- b_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data2 = '1' else e_in.read_data2;
- c_in <= r.e.write_data when EX1_BYPASS and e_in.bypass_data3 = '1' else e_in.read_data3;
+ a_in <= e_in.read_data1;
+ b_in <= e_in.read_data2;
+ c_in <= e_in.read_data3;
+ cr_in <= e_in.cr;
+
+ -- XER forwarding. To avoid having to track XER hazards, we use
+ -- the previously latched value. Since the XER common bits
+ -- (SO, OV[32] and CA[32]) are only modified by instructions that are
+ -- handled here, we can just forward the result being sent to
+ -- writeback.
+ xerc_in <= r.e.xerc when r.e.write_xerc_enable = '1' or r.busy = '1' else e_in.xerc;
- busy_out <= l_in.busy or r.busy or fp_in.busy;
- valid_in <= e_in.valid and not busy_out;
+ with e_in.unit select busy_out <=
+ l_in.busy or r.busy or fp_in.busy when LDST,
+ l_in.busy or l_in.in_progress or r.busy or fp_in.busy when others;
+
+ valid_in <= e_in.valid and not busy_out and not flush_in;
terminate_out <= r.terminate;
muldiv_result when "011",
countzero_result when "100",
spr_result when "101",
+ next_nia when "110",
misc_result when others;
execute1_0: process(clk)
ctrl.tb <= (others => '0');
ctrl.dec <= (others => '0');
ctrl.msr <= (MSR_SF => '1', MSR_LE => '1', others => '0');
- ctrl.irq_state <= WRITE_SRR0;
else
r <= rin;
ctrl <= ctrl_tmp;
- assert not (r.lr_update = '1' and valid_in = '1')
- report "LR update collision with valid in EX1"
- severity failure;
- if r.lr_update = '1' then
- report "LR update to " & to_hstring(r.next_lr);
+ if valid_in = '1' then
+ report "execute " & to_hstring(e_in.nia) & " op=" & insn_type_t'image(e_in.insn_type) &
+ " wr=" & to_hstring(rin.e.write_reg) & " we=" & std_ulogic'image(rin.e.write_enable) &
+ " tag=" & integer'image(rin.e.instr_tag.tag) & std_ulogic'image(rin.e.instr_tag.valid);
end if;
end if;
end if;
end process;
- execute1_1: process(all)
- variable v : reg_type;
+ -- Data path for integer instructions
+ execute1_dp: process(all)
variable a_inv : std_ulogic_vector(63 downto 0);
variable b_or_m1 : std_ulogic_vector(63 downto 0);
+ variable sum_with_carry : std_ulogic_vector(64 downto 0);
+ variable sign1, sign2 : std_ulogic;
+ variable abs1, abs2 : signed(63 downto 0);
+ variable addend : std_ulogic_vector(127 downto 0);
variable addg6s : std_ulogic_vector(63 downto 0);
+ variable crbit : integer range 0 to 31;
variable isel_result : std_ulogic_vector(63 downto 0);
variable darn : std_ulogic_vector(63 downto 0);
- variable mfcr_result : std_ulogic_vector(63 downto 0);
variable setb_result : std_ulogic_vector(63 downto 0);
- variable newcrf : std_ulogic_vector(3 downto 0);
- variable sum_with_carry : std_ulogic_vector(64 downto 0);
- variable crnum : crnum_t;
- variable crbit : integer range 0 to 31;
- variable scrnum : crnum_t;
+ variable mfcr_result : std_ulogic_vector(63 downto 0);
variable lo, hi : integer;
- variable sh, mb, me : std_ulogic_vector(5 downto 0);
- variable sh32, mb32, me32 : std_ulogic_vector(4 downto 0);
- variable bo, bi : std_ulogic_vector(4 downto 0);
- variable bf, bfa : std_ulogic_vector(2 downto 0);
- variable cr_op : std_ulogic_vector(9 downto 0);
- variable cr_operands : std_ulogic_vector(1 downto 0);
- variable bt, ba, bb : std_ulogic_vector(4 downto 0);
- variable btnum, banum, bbnum : integer range 0 to 31;
- variable crresult : std_ulogic;
variable l : std_ulogic;
- variable next_nia : std_ulogic_vector(63 downto 0);
- variable carry_32, carry_64 : std_ulogic;
- variable sign1, sign2 : std_ulogic;
- variable abs1, abs2 : signed(63 downto 0);
- variable overflow : std_ulogic;
variable zerohi, zerolo : std_ulogic;
variable msb_a, msb_b : std_ulogic;
variable a_lt : std_ulogic;
variable a_lt_lo : std_ulogic;
variable a_lt_hi : std_ulogic;
- variable lv : Execute1ToLoadstore1Type;
- variable irq_valid : std_ulogic;
- variable exception : std_ulogic;
- variable exception_nextpc : std_ulogic;
- variable trapval : std_ulogic_vector(4 downto 0);
- variable illegal : std_ulogic;
- variable is_branch : std_ulogic;
- variable is_direct_branch : std_ulogic;
- variable taken_branch : std_ulogic;
- variable abs_branch : std_ulogic;
- variable spr_val : std_ulogic_vector(63 downto 0);
- variable addend : std_ulogic_vector(127 downto 0);
- variable do_trace : std_ulogic;
- variable hold_wr_data : std_ulogic;
- variable f : Execute1ToFetch1Type;
- variable fv : Execute1ToFPUType;
+ variable newcrf : std_ulogic_vector(3 downto 0);
+ variable bf, bfa : std_ulogic_vector(2 downto 0);
+ variable crnum : crnum_t;
+ variable scrnum : crnum_t;
+ variable cr_operands : std_ulogic_vector(1 downto 0);
+ variable crresult : std_ulogic;
+ variable bt, ba, bb : std_ulogic_vector(4 downto 0);
+ variable btnum : integer range 0 to 3;
+ variable banum, bbnum : integer range 0 to 31;
+ variable j : integer;
begin
- sum_with_carry := (others => '0');
- newcrf := (others => '0');
- is_branch := '0';
- is_direct_branch := '0';
- taken_branch := '0';
- abs_branch := '0';
- hold_wr_data := '0';
-
- v := r;
- v.e := Execute1ToWritebackInit;
- v.redirect := '0';
- v.abs_br := '0';
- v.do_intr := '0';
- v.vector := 0;
- v.br_offset := (others => '0');
- v.redir_mode := ctrl.msr(MSR_IR) & not ctrl.msr(MSR_PR) &
- not ctrl.msr(MSR_LE) & not ctrl.msr(MSR_SF);
- v.taken_br := '0';
- v.br_last := '0';
-
- lv := Execute1ToLoadstore1Init;
- fv := Execute1ToFPUInit;
-
- -- XER forwarding. To avoid having to track XER hazards, we use
- -- the previously latched value. Since the XER common bits
- -- (SO, OV[32] and CA[32]) are only modified by instructions that are
- -- handled here, we can just forward the result being sent to
- -- writeback.
- if r.e.write_xerc_enable = '1' or r.busy = '1' then
- v.e.xerc := r.e.xerc;
- else
- v.e.xerc := e_in.xerc;
- end if;
-
- -- CR forwarding
- cr_in <= e_in.cr;
- if EX1_BYPASS and e_in.bypass_cr = '1' and r.e.write_cr_enable = '1' then
- for i in 0 to 7 loop
- if r.e.write_cr_mask(i) = '1' then
- cr_in(i * 4 + 3 downto i * 4) <= r.e.write_cr_data(i * 4 + 3 downto i * 4);
- end if;
- end loop;
- end if;
-
- v.lr_update := '0';
- v.mul_in_progress := '0';
- v.div_in_progress := '0';
- v.cntz_in_progress := '0';
- v.mul_finish := '0';
-
- spr_result <= (others => '0');
- spr_val := (others => '0');
-
-- Main adder
if e_in.invert_a = '0' then
a_inv := a_in;
b_or_m1 := (others => '1');
end if;
sum_with_carry := ppc_adde(a_inv, b_or_m1,
- decode_input_carry(e_in.input_carry, v.e.xerc));
+ decode_input_carry(e_in.input_carry, xerc_in));
adder_result <= sum_with_carry(63 downto 0);
- carry_32 := sum_with_carry(32) xor a_inv(32) xor b_in(32);
- carry_64 := sum_with_carry(64);
+ carry_32 <= sum_with_carry(32) xor a_inv(32) xor b_in(32);
+ carry_64 <= sum_with_carry(64);
+ overflow_32 <= calc_ov(a_inv(31), b_in(31), carry_32, sum_with_carry(31));
+ overflow_64 <= calc_ov(a_inv(63), b_in(63), carry_64, sum_with_carry(63));
-- signals to multiply and divide units
sign1 := '0';
end if;
-- Interface to multiply and divide units
- x_to_multiply <= MultiplyInputInit;
- x_to_multiply.is_32bit <= e_in.is_32bit;
-
- x_to_divider <= Execute1ToDividerInit;
x_to_divider.is_signed <= e_in.is_signed;
x_to_divider.is_32bit <= e_in.is_32bit;
+ x_to_divider.is_extended <= '0';
+ x_to_divider.is_modulus <= '0';
if e_in.insn_type = OP_MOD then
x_to_divider.is_modulus <= '1';
end if;
addend := not addend;
end if;
+ x_to_multiply.is_32bit <= e_in.is_32bit;
x_to_multiply.not_result <= sign1 xor sign2;
x_to_multiply.addend <= addend;
x_to_divider.neg_result <= sign1 xor (sign2 and not x_to_divider.is_modulus);
zerohi := not (or (a_in(63 downto 32) xor b_in(63 downto 32)));
if zerolo = '1' and (l = '0' or zerohi = '1') then
-- values are equal
- trapval := "00100";
+ trapval <= "00100";
else
a_lt_lo := '0';
a_lt_hi := '0';
if msb_a /= msb_b then
-- Comparison is clear from MSB difference.
-- for signed, 0 is greater; for unsigned, 1 is greater
- trapval := msb_a & msb_b & '0' & msb_b & msb_a;
+ trapval <= msb_a & msb_b & '0' & msb_b & msb_a;
else
-- MSBs are equal, so signed and unsigned comparisons give the
-- same answer.
- trapval := a_lt & not a_lt & '0' & a_lt & not a_lt;
+ trapval <= a_lt & not a_lt & '0' & a_lt & not a_lt;
end if;
end if;
+ -- CR result mux
+ bf := insn_bf(e_in.insn);
+ crnum := to_integer(unsigned(bf));
+ newcrf := (others => '0');
+ case current.sub_select is
+ when "000" =>
+ -- CMP and CMPL instructions
+ if e_in.is_signed = '1' then
+ newcrf := trapval(4 downto 2) & xerc_in.so;
+ else
+ newcrf := trapval(1 downto 0) & trapval(2) & xerc_in.so;
+ end if;
+ when "001" =>
+ newcrf := ppc_cmprb(a_in, b_in, insn_l(e_in.insn));
+ when "010" =>
+ newcrf := ppc_cmpeqb(a_in, b_in);
+ when "011" =>
+ if current.insn(1) = '1' then
+ -- CR logical instructions
+ j := (7 - crnum) * 4;
+ newcrf := cr_in(j + 3 downto j);
+ bt := insn_bt(e_in.insn);
+ ba := insn_ba(e_in.insn);
+ bb := insn_bb(e_in.insn);
+ btnum := 3 - to_integer(unsigned(bt(1 downto 0)));
+ banum := 31 - to_integer(unsigned(ba));
+ bbnum := 31 - to_integer(unsigned(bb));
+ -- Bits 6-9 of the instruction word give the truth table
+ -- of the requested logical operation
+ cr_operands := cr_in(banum) & cr_in(bbnum);
+ crresult := e_in.insn(6 + to_integer(unsigned(cr_operands)));
+ for i in 0 to 3 loop
+ if i = btnum then
+ newcrf(i) := crresult;
+ end if;
+ end loop;
+ else
+ -- MCRF
+ bfa := insn_bfa(e_in.insn);
+ scrnum := to_integer(unsigned(bfa));
+ j := (7 - scrnum) * 4;
+ newcrf := cr_in(j + 3 downto j);
+ end if;
+ when "100" =>
+ -- MCRXRX
+ newcrf := xerc_in.ov & xerc_in.ca & xerc_in.ov32 & xerc_in.ca32;
+ when others =>
+ end case;
+ if current.insn_type = OP_MTCRF then
+ if e_in.insn(20) = '0' then
+ -- mtcrf
+ write_cr_mask <= insn_fxm(e_in.insn);
+ else
+ -- mtocrf: We require one hot priority encoding here
+ crnum := fxm_to_num(insn_fxm(e_in.insn));
+ write_cr_mask <= num_to_fxm(crnum);
+ end if;
+ write_cr_data <= c_in(31 downto 0);
+ else
+ write_cr_mask <= num_to_fxm(crnum);
+ write_cr_data <= newcrf & newcrf & newcrf & newcrf &
+ newcrf & newcrf & newcrf & newcrf;
+ end if;
+
+ end process;
+
+ execute1_1: process(all)
+ variable v : reg_type;
+ variable lo, hi : integer;
+ variable sh, mb, me : std_ulogic_vector(5 downto 0);
+ variable bo, bi : std_ulogic_vector(4 downto 0);
+ variable overflow : std_ulogic;
+ variable lv : Execute1ToLoadstore1Type;
+ variable irq_valid : std_ulogic;
+ variable exception : std_ulogic;
+ variable illegal : std_ulogic;
+ variable is_branch : std_ulogic;
+ variable is_direct_branch : std_ulogic;
+ variable taken_branch : std_ulogic;
+ variable abs_branch : std_ulogic;
+ variable spr_val : std_ulogic_vector(63 downto 0);
+ variable do_trace : std_ulogic;
+ variable hold_wr_data : std_ulogic;
+ variable fv : Execute1ToFPUType;
+ begin
+ is_branch := '0';
+ is_direct_branch := '0';
+ taken_branch := '0';
+ abs_branch := '0';
+ hold_wr_data := '0';
+
+ v := r;
+ v.e := Execute1ToWritebackInit;
+ v.e.redir_mode := ctrl.msr(MSR_IR) & not ctrl.msr(MSR_PR) &
+ not ctrl.msr(MSR_LE) & not ctrl.msr(MSR_SF);
+ v.e.xerc := xerc_in;
+
+ lv := Execute1ToLoadstore1Init;
+ fv := Execute1ToFPUInit;
+
+ x_to_multiply.valid <= '0';
+ x_to_divider.valid <= '0';
+ v.mul_in_progress := '0';
+ v.div_in_progress := '0';
+ v.cntz_in_progress := '0';
+ v.mul_finish := '0';
+
+ spr_result <= (others => '0');
+ spr_val := (others => '0');
+
ctrl_tmp <= ctrl;
-- FIXME: run at 512MHz not core freq
ctrl_tmp.tb <= std_ulogic_vector(unsigned(ctrl.tb) + 1);
irq_valid := '0';
if ctrl.msr(MSR_EE) = '1' then
if ctrl.dec(63) = '1' then
- v.vector := 16#900#;
+ v.e.intr_vec := 16#900#;
report "IRQ valid: DEC";
irq_valid := '1';
elsif ext_irq_in = '1' then
- v.vector := 16#500#;
+ v.e.intr_vec := 16#500#;
report "IRQ valid: External";
irq_valid := '1';
end if;
v.busy := '0';
-- Next insn adder used in a couple of places
- next_nia := std_ulogic_vector(unsigned(e_in.nia) + 4);
+ next_nia <= std_ulogic_vector(unsigned(e_in.nia) + 4);
-- rotator control signals
right_shift <= '1' when e_in.insn_type = OP_SHR else '0';
rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0';
rot_sign_ext <= '1' when e_in.insn_type = OP_EXTSWSLI else '0';
- ctrl_tmp.srr1 <= msr_copy(ctrl.msr);
- ctrl_tmp.irq_state <= WRITE_SRR0;
+ v.e.srr1 := (others => '0');
exception := '0';
illegal := '0';
- exception_nextpc := '0';
- v.e.exc_write_enable := '0';
- v.e.exc_write_reg := fast_spr_num(SPR_SRR0);
if valid_in = '1' then
- v.e.exc_write_data := e_in.nia;
- v.last_nia := e_in.nia;
+ v.e.last_nia := e_in.nia;
else
- v.e.exc_write_data := r.last_nia;
+ v.e.last_nia := r.e.last_nia;
end if;
v.e.mode_32bit := not ctrl.msr(MSR_SF);
+ v.e.instr_tag := current.instr_tag;
do_trace := valid_in and ctrl.msr(MSR_SE);
if valid_in = '1' then
-- Determine if there is any exception to be taken
-- before/instead of executing this instruction
- if valid_in = '1' and e_in.second = '0' then
+ if valid_in = '1' and e_in.second = '0' and l_in.in_progress = '0' then
if HAS_FPU and r.fp_exception_next = '1' then
-- This is used for FP-type program interrupts that
-- become pending due to MSR[FE0,FE1] changing from 00 to non-zero.
exception := '1';
- v.vector := 16#700#;
- ctrl_tmp.srr1(63 - 43) <= '1';
- ctrl_tmp.srr1(63 - 47) <= '1';
+ v.e.intr_vec := 16#700#;
+ v.e.srr1(47 - 43) := '1';
+ v.e.srr1(47 - 47) := '1';
elsif r.trace_next = '1' then
-- Generate a trace interrupt rather than executing the next instruction
-- or taking any asynchronous interrupt
exception := '1';
- v.vector := 16#d00#;
- ctrl_tmp.srr1(63 - 33) <= '1';
+ v.e.intr_vec := 16#d00#;
+ v.e.srr1(47 - 33) := '1';
if r.prev_op = OP_LOAD or r.prev_op = OP_ICBI or r.prev_op = OP_ICBT or
r.prev_op = OP_DCBT or r.prev_op = OP_DCBST or r.prev_op = OP_DCBF then
- ctrl_tmp.srr1(63 - 35) <= '1';
+ v.e.srr1(47 - 35) := '1';
elsif r.prev_op = OP_STORE or r.prev_op = OP_DCBZ or r.prev_op = OP_DCBTST then
- ctrl_tmp.srr1(63 - 36) <= '1';
+ v.e.srr1(47 - 36) := '1';
end if;
elsif irq_valid = '1' then
elsif ctrl.msr(MSR_PR) = '1' and instr_is_privileged(e_in.insn_type, e_in.insn) then
-- generate a program interrupt
exception := '1';
- v.vector := 16#700#;
+ v.e.intr_vec := 16#700#;
-- set bit 45 to indicate privileged instruction type interrupt
- ctrl_tmp.srr1(63 - 45) <= '1';
+ v.e.srr1(47 - 45) := '1';
report "privileged instruction";
elsif not HAS_FPU and e_in.fac = FPU then
elsif HAS_FPU and ctrl.msr(MSR_FP) = '0' and e_in.fac = FPU then
-- generate a floating-point unavailable interrupt
exception := '1';
- v.vector := 16#800#;
+ v.e.intr_vec := 16#800#;
report "FP unavailable interrupt";
end if;
end if;
if valid_in = '1' and exception = '0' and illegal = '0' and e_in.unit = ALU then
- report "execute nia " & to_hstring(e_in.nia);
-
v.cur_instr := e_in;
- v.next_lr := next_nia;
v.e.valid := '1';
case_0: case e_in.insn_type is
-- we need two cycles to write srr0 and 1
if e_in.insn(1) = '1' then
exception := '1';
- exception_nextpc := '1';
- v.vector := 16#C00#;
+ v.e.intr_vec := 16#C00#;
+ v.e.last_nia := next_nia;
report "sc";
else
illegal := '1';
else
v.e.xerc.ov := carry_64;
v.e.xerc.ov32 := carry_32;
- v.e.write_xerc_enable := '1';
end if;
end if;
if e_in.oe = '1' then
- set_ov(v.e,
- calc_ov(a_inv(63), b_in(63), carry_64, sum_with_carry(63)),
- calc_ov(a_inv(31), b_in(31), carry_32, sum_with_carry(31)));
+ set_ov(v.e, overflow_64, overflow_32);
end if;
when OP_CMP =>
- -- CMP and CMPL instructions
- if e_in.is_signed = '1' then
- newcrf := trapval(4 downto 2) & v.e.xerc.so;
- else
- newcrf := trapval(1 downto 0) & trapval(2) & v.e.xerc.so;
- end if;
- bf := insn_bf(e_in.insn);
- crnum := to_integer(unsigned(bf));
- v.e.write_cr_enable := '1';
- v.e.write_cr_mask := num_to_fxm(crnum);
- for i in 0 to 7 loop
- lo := i*4;
- hi := lo + 3;
- v.e.write_cr_data(hi downto lo) := newcrf;
- end loop;
when OP_TRAP =>
-- trap instructions (tw, twi, td, tdi)
- v.vector := 16#700#;
+ v.e.intr_vec := 16#700#;
-- set bit 46 to say trap occurred
- ctrl_tmp.srr1(63 - 46) <= '1';
+ v.e.srr1(47 - 46) := '1';
if or (trapval and insn_to(e_in.insn)) = '1' then
-- generate trap-type program interrupt
exception := '1';
end if;
when OP_ADDG6S =>
when OP_CMPRB =>
- newcrf := ppc_cmprb(a_in, b_in, insn_l(e_in.insn));
- bf := insn_bf(e_in.insn);
- crnum := to_integer(unsigned(bf));
- v.e.write_cr_enable := '1';
- v.e.write_cr_mask := num_to_fxm(crnum);
- v.e.write_cr_data := newcrf & newcrf & newcrf & newcrf &
- newcrf & newcrf & newcrf & newcrf;
when OP_CMPEQB =>
- newcrf := ppc_cmpeqb(a_in, b_in);
- bf := insn_bf(e_in.insn);
- crnum := to_integer(unsigned(bf));
- v.e.write_cr_enable := '1';
- v.e.write_cr_mask := num_to_fxm(crnum);
- v.e.write_cr_data := newcrf & newcrf & newcrf & newcrf &
- newcrf & newcrf & newcrf & newcrf;
when OP_AND | OP_OR | OP_XOR | OP_POPCNT | OP_PRTY | OP_CMPB | OP_EXTS |
OP_BPERM | OP_BCD =>
+
when OP_B =>
is_branch := '1';
taken_branch := '1';
is_direct_branch := '1';
- abs_branch := insn_aa(e_in.insn);
+ abs_branch := e_in.br_abs;
if ctrl.msr(MSR_BE) = '1' then
do_trace := '1';
end if;
- when OP_BC =>
- -- read_data1 is CTR
+ when OP_BC | OP_BCREG =>
+ -- read_data1 is CTR
+ -- for OP_BCREG, read_data2 is target register (CTR, LR or TAR)
+ -- If this instruction updates both CTR and LR, then it is
+ -- doubled; the first instruction decrements CTR and determines
+ -- whether the branch is taken, and the second does the
+ -- redirect and the LR update.
bo := insn_bo(e_in.insn);
bi := insn_bi(e_in.insn);
- is_branch := '1';
- is_direct_branch := '1';
- taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in);
- abs_branch := insn_aa(e_in.insn);
- if ctrl.msr(MSR_BE) = '1' then
- do_trace := '1';
+ if e_in.second = '0' then
+ taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in);
+ else
+ taken_branch := r.br_taken;
end if;
- when OP_BCREG =>
- -- read_data1 is CTR
- -- read_data2 is target register (CTR, LR or TAR)
- bo := insn_bo(e_in.insn);
- bi := insn_bi(e_in.insn);
- is_branch := '1';
- taken_branch := ppc_bc_taken(bo, bi, cr_in, a_in);
- abs_branch := '1';
- if ctrl.msr(MSR_BE) = '1' then
- do_trace := '1';
+ v.br_taken := taken_branch;
+ abs_branch := e_in.br_abs;
+ if e_in.repeat = '0' or e_in.second = '1' then
+ is_branch := '1';
+ if e_in.insn_type = OP_BC then
+ is_direct_branch := '1';
+ end if;
+ if ctrl.msr(MSR_BE) = '1' then
+ do_trace := '1';
+ end if;
end if;
when OP_RFID =>
- v.redir_mode := (a_in(MSR_IR) or a_in(MSR_PR)) & not a_in(MSR_PR) &
- not a_in(MSR_LE) & not a_in(MSR_SF);
+ v.e.redir_mode := (a_in(MSR_IR) or a_in(MSR_PR)) & not a_in(MSR_PR) &
+ not a_in(MSR_LE) & not a_in(MSR_SF);
-- Can't use msr_copy here because the partial function MSR
-- bits should be left unchanged, not zeroed.
ctrl_tmp.msr(63 downto 31) <= a_in(63 downto 31);
v.cntz_in_progress := '1';
v.busy := '1';
when OP_ISEL =>
- when OP_CROP =>
- cr_op := insn_cr(e_in.insn);
- report "CR OP " & to_hstring(cr_op);
- if cr_op(0) = '0' then -- MCRF
- bf := insn_bf(e_in.insn);
- bfa := insn_bfa(e_in.insn);
- v.e.write_cr_enable := '1';
- crnum := to_integer(unsigned(bf));
- scrnum := to_integer(unsigned(bfa));
- v.e.write_cr_mask := num_to_fxm(crnum);
- for i in 0 to 7 loop
- lo := (7-i)*4;
- hi := lo + 3;
- if i = scrnum then
- newcrf := cr_in(hi downto lo);
- end if;
- end loop;
- for i in 0 to 7 loop
- lo := i*4;
- hi := lo + 3;
- v.e.write_cr_data(hi downto lo) := newcrf;
- end loop;
- else
- v.e.write_cr_enable := '1';
- bt := insn_bt(e_in.insn);
- ba := insn_ba(e_in.insn);
- bb := insn_bb(e_in.insn);
- btnum := 31 - to_integer(unsigned(bt));
- banum := 31 - to_integer(unsigned(ba));
- bbnum := 31 - to_integer(unsigned(bb));
- -- Bits 5-8 of cr_op give the truth table of the requested
- -- logical operation
- cr_operands := cr_in(banum) & cr_in(bbnum);
- crresult := cr_op(5 + to_integer(unsigned(cr_operands)));
- v.e.write_cr_mask := num_to_fxm((31-btnum) / 4);
- for i in 0 to 31 loop
- if i = btnum then
- v.e.write_cr_data(i) := crresult;
- else
- v.e.write_cr_data(i) := cr_in(i);
- end if;
- end loop;
- end if;
+ when OP_CROP =>
when OP_MCRXRX =>
- newcrf := v.e.xerc.ov & v.e.xerc.ca & v.e.xerc.ov32 & v.e.xerc.ca32;
- bf := insn_bf(e_in.insn);
- crnum := to_integer(unsigned(bf));
- v.e.write_cr_enable := '1';
- v.e.write_cr_mask := num_to_fxm(crnum);
- v.e.write_cr_data := newcrf & newcrf & newcrf & newcrf &
- newcrf & newcrf & newcrf & newcrf;
when OP_DARN =>
when OP_MFMSR =>
when OP_MFSPR =>
if decode_spr_num(e_in.insn) = SPR_XER then
-- bits 0:31 and 35:43 are treated as reserved and return 0s when read using mfxer
spr_val(63 downto 32) := (others => '0');
- spr_val(63-32) := v.e.xerc.so;
- spr_val(63-33) := v.e.xerc.ov;
- spr_val(63-34) := v.e.xerc.ca;
+ spr_val(63-32) := xerc_in.so;
+ spr_val(63-33) := xerc_in.ov;
+ spr_val(63-34) := xerc_in.ca;
spr_val(63-35 downto 63-43) := "000000000";
- spr_val(63-44) := v.e.xerc.ov32;
- spr_val(63-45) := v.e.xerc.ca32;
+ spr_val(63-44) := xerc_in.ov32;
+ spr_val(63-45) := xerc_in.ca32;
end if;
else
spr_val := c_in;
when OP_MFCR =>
when OP_MTCRF =>
- v.e.write_cr_enable := '1';
- if e_in.insn(20) = '0' then
- -- mtcrf
- v.e.write_cr_mask := insn_fxm(e_in.insn);
- else
- -- mtocrf: We require one hot priority encoding here
- crnum := fxm_to_num(insn_fxm(e_in.insn));
- v.e.write_cr_mask := num_to_fxm(crnum);
- end if;
- v.e.write_cr_data := c_in(31 downto 0);
when OP_MTMSRD =>
if e_in.insn(16) = '1' then
-- just update EE and RI
v.e.xerc.ca := c_in(63-34);
v.e.xerc.ov32 := c_in(63-44);
v.e.xerc.ca32 := c_in(63-45);
- v.e.write_xerc_enable := '1';
end if;
else
-- slow spr
when OP_SETB =>
when OP_ISYNC =>
- v.redirect := '1';
- v.br_offset := std_ulogic_vector(to_unsigned(4, 64));
+ v.e.redirect := '1';
+ v.e.br_offset := std_ulogic_vector(to_unsigned(4, 64));
when OP_ICBI =>
icache_inval <= '1';
ctrl_tmp.cfar <= e_in.nia;
end if;
if taken_branch = '1' then
- v.br_offset := b_in;
- v.abs_br := abs_branch;
+ v.e.br_offset := b_in;
+ v.e.abs_br := abs_branch;
else
- v.br_offset := std_ulogic_vector(to_unsigned(4, 64));
+ v.e.br_offset := std_ulogic_vector(to_unsigned(4, 64));
end if;
if taken_branch /= e_in.br_pred then
- v.redirect := '1';
+ v.e.redirect := '1';
end if;
- v.br_last := is_direct_branch;
- v.taken_br := taken_branch;
+ v.e.br_last := is_direct_branch;
+ v.e.br_taken := taken_branch;
end if;
elsif valid_in = '1' and exception = '0' and illegal = '0' then
-- The following cases all occur when r.busy = 1 and therefore
-- valid_in = 0. Hence they don't happen in the same cycle as any of
-- the cases above which depend on valid_in = 1.
-
- if ctrl.irq_state = WRITE_SRR1 then
- v.e.exc_write_reg := fast_spr_num(SPR_SRR1);
- v.e.exc_write_data := ctrl.srr1;
- v.e.exc_write_enable := '1';
- ctrl_tmp.msr(MSR_SF) <= '1';
- ctrl_tmp.msr(MSR_EE) <= '0';
- ctrl_tmp.msr(MSR_PR) <= '0';
- ctrl_tmp.msr(MSR_SE) <= '0';
- ctrl_tmp.msr(MSR_BE) <= '0';
- ctrl_tmp.msr(MSR_FP) <= '0';
- ctrl_tmp.msr(MSR_FE0) <= '0';
- ctrl_tmp.msr(MSR_FE1) <= '0';
- ctrl_tmp.msr(MSR_IR) <= '0';
- ctrl_tmp.msr(MSR_DR) <= '0';
- ctrl_tmp.msr(MSR_RI) <= '0';
- ctrl_tmp.msr(MSR_LE) <= '1';
- v.trace_next := '0';
- v.fp_exception_next := '0';
- report "Writing SRR1: " & to_hstring(ctrl.srr1);
-
- elsif r.cntz_in_progress = '1' then
+ if r.cntz_in_progress = '1' then
-- cnt[lt]z always takes two cycles
v.e.valid := '1';
elsif r.mul_in_progress = '1' or r.div_in_progress = '1' then
v.mul_finish := '1';
v.busy := '1';
else
- v.e.write_xerc_enable := current.oe;
-- We must test oe because the RC update code in writeback
-- will use the xerc value to set CR0:SO so we must not clobber
-- xerc if OE wasn't set.
end if;
elsif r.mul_finish = '1' then
hold_wr_data := '1';
- v.e.write_xerc_enable := current.oe;
v.e.xerc.ov := multiply_to_x.overflow;
v.e.xerc.ov32 := multiply_to_x.overflow;
if multiply_to_x.overflow = '1' then
end if;
v.e.valid := '1';
end if;
- -- When doing delayed LR update, keep r.e.write_data unchanged
- -- next cycle in case it is needed for a forwarded result (e.g. CTR).
- if r.lr_update = '1' then
- hold_wr_data := '1';
- end if;
-
- -- Generate FP-type program interrupt. fp_in.interrupt will only
- -- be set during the execution of a FP instruction.
- -- The case where MSR[FE0,FE1] goes from zero to non-zero is
- -- handled above by mtmsrd and rfid setting v.fp_exception_next.
- if HAS_FPU and fp_in.interrupt = '1' then
- v.vector := 16#700#;
- ctrl_tmp.srr1(63 - 43) <= '1';
- exception := '1';
- end if;
- if illegal = '1' or (HAS_FPU and fp_in.illegal = '1') then
+ if illegal = '1' then
exception := '1';
- v.vector := 16#700#;
+ v.e.intr_vec := 16#700#;
-- Since we aren't doing Hypervisor emulation assist (0xe40) we
-- set bit 44 to indicate we have an illegal
- ctrl_tmp.srr1(63 - 44) <= '1';
+ v.e.srr1(47 - 44) := '1';
report "illegal";
end if;
- if exception = '1' then
- v.e.exc_write_enable := '1';
- if exception_nextpc = '1' then
- v.e.exc_write_data := next_nia;
- end if;
- end if;
- -- generate DSI or DSegI for load/store exceptions
- -- or ISI or ISegI for instruction fetch exceptions
- if l_in.exception = '1' then
- if l_in.alignment = '1' then
- v.vector := 16#600#;
- elsif l_in.instr_fault = '0' then
- if l_in.segment_fault = '0' then
- v.vector := 16#300#;
- else
- v.vector := 16#380#;
- end if;
- else
- if l_in.segment_fault = '0' then
- ctrl_tmp.srr1(63 - 33) <= l_in.invalid;
- ctrl_tmp.srr1(63 - 35) <= l_in.perm_error; -- noexec fault
- ctrl_tmp.srr1(63 - 44) <= l_in.badtree;
- ctrl_tmp.srr1(63 - 45) <= l_in.rc_error;
- v.vector := 16#400#;
- else
- v.vector := 16#480#;
- end if;
- end if;
- v.e.exc_write_enable := '1';
- v.e.exc_write_reg := fast_spr_num(SPR_SRR0);
- report "ldst exception writing srr0=" & to_hstring(r.last_nia);
- end if;
-
- if exception = '1' or l_in.exception = '1' then
- ctrl_tmp.irq_state <= WRITE_SRR1;
- v.redirect := '1';
- v.do_intr := '1';
- end if;
+ v.e.interrupt := exception;
if do_trace = '1' then
v.trace_next := '1';
end if;
+ if interrupt_in = '1' then
+ ctrl_tmp.msr(MSR_SF) <= '1';
+ ctrl_tmp.msr(MSR_EE) <= '0';
+ ctrl_tmp.msr(MSR_PR) <= '0';
+ ctrl_tmp.msr(MSR_SE) <= '0';
+ ctrl_tmp.msr(MSR_BE) <= '0';
+ ctrl_tmp.msr(MSR_FP) <= '0';
+ ctrl_tmp.msr(MSR_FE0) <= '0';
+ ctrl_tmp.msr(MSR_FE1) <= '0';
+ ctrl_tmp.msr(MSR_IR) <= '0';
+ ctrl_tmp.msr(MSR_DR) <= '0';
+ ctrl_tmp.msr(MSR_RI) <= '0';
+ ctrl_tmp.msr(MSR_LE) <= '1';
+ v.trace_next := '0';
+ v.fp_exception_next := '0';
+ end if;
+
if hold_wr_data = '0' then
v.e.write_data := alu_result;
else
v.e.write_reg := current.write_reg;
v.e.write_enable := current.write_reg_enable and v.e.valid and not exception;
v.e.rc := current.rc and v.e.valid and not exception;
-
- -- Update LR on the next cycle after a branch link
- -- If we're not writing back anything else, we can write back LR
- -- this cycle, otherwise we take an extra cycle. We use the
- -- exc_write path since next_nia is written through that path
- -- in other places.
- if v.e.valid = '1' and exception = '0' and current.lr = '1' then
- if current.write_reg_enable = '0' then
- v.e.exc_write_enable := '1';
- v.e.exc_write_data := next_nia;
- v.e.exc_write_reg := fast_spr_num(SPR_LR);
+ v.e.write_cr_data := write_cr_data;
+ v.e.write_cr_mask := write_cr_mask;
+ v.e.write_cr_enable := current.output_cr and v.e.valid and not exception;
+ v.e.write_xerc_enable := current.output_xer and v.e.valid and not exception;
+
+ bypass_data.tag.valid <= current.instr_tag.valid and current.write_reg_enable and v.e.valid;
+ bypass_data.tag.tag <= current.instr_tag.tag;
+ bypass_data.data <= v.e.write_data;
+
+ bypass_cr_data.tag.valid <= current.instr_tag.valid and current.output_cr and v.e.valid;
+ bypass_cr_data.tag.tag <= current.instr_tag.tag;
+ for i in 0 to 7 loop
+ if v.e.write_cr_mask(i) = '1' then
+ bypass_cr_data.data(i*4 + 3 downto i*4) <= v.e.write_cr_data(i*4 + 3 downto i*4);
else
- v.lr_update := '1';
- v.e.valid := '0';
- report "Delayed LR update to " & to_hstring(next_nia);
- v.busy := '1';
- end if;
- end if;
- if r.lr_update = '1' then
- v.e.exc_write_enable := '1';
- v.e.exc_write_data := r.next_lr;
- v.e.exc_write_reg := fast_spr_num(SPR_LR);
- v.e.valid := '1';
- end if;
-
- -- Defer completion for one cycle when redirecting.
- -- This also ensures r.busy = 1 when ctrl.irq_state = WRITE_SRR1
- if v.redirect = '1' then
- v.busy := '1';
- v.e.valid := '0';
- end if;
- if r.redirect = '1' then
- v.e.valid := '1';
- end if;
-
- -- Outputs to fetch1
- f.redirect := r.redirect;
- f.br_nia := r.last_nia;
- f.br_last := r.br_last and not r.do_intr;
- f.br_taken := r.taken_br;
- if r.do_intr = '1' then
- f.redirect_nia := std_ulogic_vector(to_unsigned(r.vector, 64));
- f.virt_mode := '0';
- f.priv_mode := '1';
- -- XXX need an interrupt LE bit here, e.g. from LPCR
- f.big_endian := '0';
- f.mode_32bit := '0';
- else
- if r.abs_br = '1' then
- f.redirect_nia := r.br_offset;
- else
- f.redirect_nia := std_ulogic_vector(unsigned(r.last_nia) + unsigned(r.br_offset));
+ bypass_cr_data.data(i*4 + 3 downto i*4) <= cr_in(i*4 + 3 downto i*4);
end if;
- -- send MSR[IR], ~MSR[PR], ~MSR[LE] and ~MSR[SF] up to fetch1
- f.virt_mode := r.redir_mode(3);
- f.priv_mode := r.redir_mode(2);
- f.big_endian := r.redir_mode(1);
- f.mode_32bit := r.redir_mode(0);
- end if;
+ end loop;
-- Outputs to loadstore1 (async)
lv.op := e_in.insn_type;
lv.nia := e_in.nia;
+ lv.instr_tag := e_in.instr_tag;
lv.addr1 := a_in;
lv.addr2 := b_in;
lv.data := c_in;
lv.byte_reverse := e_in.byte_reverse xnor ctrl.msr(MSR_LE);
lv.sign_extend := e_in.sign_extend;
lv.update := e_in.update;
- lv.update_reg := gspr_to_gpr(e_in.read_reg1);
- lv.xerc := v.e.xerc;
+ lv.xerc := xerc_in;
lv.reserve := e_in.reserve;
lv.rc := e_in.rc;
lv.insn := e_in.insn;
fv.op := e_in.insn_type;
fv.nia := e_in.nia;
fv.insn := e_in.insn;
+ fv.itag := e_in.instr_tag;
fv.single := e_in.is_32bit;
fv.fe_mode := ctrl.msr(MSR_FE0) & ctrl.msr(MSR_FE1);
fv.fra := a_in;
rin <= v;
-- update outputs
- f_out <= f;
l_out <= lv;
e_out <= r.e;
+ e_out.msr <= msr_copy(ctrl.msr);
fp_out <= fv;
- flush_out <= f_out.redirect;
exception_log <= exception;
irq_valid_log <= irq_valid;
ctrl.msr(MSR_IR) & ctrl.msr(MSR_DR) &
exception_log &
irq_valid_log &
- std_ulogic_vector(to_unsigned(irq_state_t'pos(ctrl.irq_state), 1)) &
+ interrupt_in &
"000" &
r.e.write_enable &
r.e.valid &
- f_out.redirect &
+ (r.e.redirect or r.e.interrupt) &
r.busy &
- flush_out;
+ flush_in;
end if;
end process;
log_out <= log_data;
stop_in : in std_ulogic;
alt_reset_in : in std_ulogic;
- -- redirect from execution unit
- e_in : in Execute1ToFetch1Type;
+ -- redirect from writeback unit
+ w_in : in WritebackToFetch1Type;
-- redirect from decode1
d_in : in Decode1ToFetch1Type;
" P:" & std_ulogic'image(r_next.priv_mode) &
" E:" & std_ulogic'image(r_next.big_endian) &
" 32:" & std_ulogic'image(r_next_int.mode_32bit) &
- " R:" & std_ulogic'image(e_in.redirect) & std_ulogic'image(d_in.redirect) &
+ " R:" & std_ulogic'image(w_in.redirect) & std_ulogic'image(d_in.redirect) &
" S:" & std_ulogic'image(stall_in) &
" T:" & std_ulogic'image(stop_in) &
" nia:" & to_hstring(r_next.nia);
end if;
- if rst = '1' or e_in.redirect = '1' or d_in.redirect = '1' or stall_in = '0' then
+ if rst = '1' or w_in.redirect = '1' or d_in.redirect = '1' or stall_in = '0' then
r.virt_mode <= r_next.virt_mode;
r.priv_mode <= r_next.priv_mode;
r.big_endian <= r_next.big_endian;
signal btc_wr_addr : std_ulogic_vector(BTC_ADDR_BITS - 1 downto 0);
signal btc_wr_v : std_ulogic;
begin
- btc_wr_data <= e_in.br_nia(63 downto BTC_ADDR_BITS + 2) &
- e_in.redirect_nia(63 downto 2);
- btc_wr_addr <= e_in.br_nia(BTC_ADDR_BITS + 1 downto 2);
- btc_wr <= e_in.br_last;
- btc_wr_v <= e_in.br_taken;
+ btc_wr_data <= w_in.br_nia(63 downto BTC_ADDR_BITS + 2) &
+ w_in.redirect_nia(63 downto 2);
+ btc_wr_addr <= w_in.br_nia(BTC_ADDR_BITS + 1 downto 2);
+ btc_wr <= w_in.br_last;
+ btc_wr_v <= w_in.br_taken;
btc_ram : process(clk)
variable raddr : unsigned(BTC_ADDR_BITS - 1 downto 0);
v.big_endian := '0';
v_int.mode_32bit := '0';
v_int.predicted_nia := (others => '0');
- elsif e_in.redirect = '1' then
- v.nia := e_in.redirect_nia(63 downto 2) & "00";
- if e_in.mode_32bit = '1' then
+ elsif w_in.redirect = '1' then
+ v.nia := w_in.redirect_nia(63 downto 2) & "00";
+ if w_in.mode_32bit = '1' then
v.nia(63 downto 32) := (others => '0');
end if;
- v.virt_mode := e_in.virt_mode;
- v.priv_mode := e_in.priv_mode;
- v.big_endian := e_in.big_endian;
- v_int.mode_32bit := e_in.mode_32bit;
+ v.virt_mode := w_in.virt_mode;
+ v.priv_mode := w_in.priv_mode;
+ v.big_endian := w_in.big_endian;
+ v_int.mode_32bit := w_in.mode_32bit;
elsif d_in.redirect = '1' then
v.nia := d_in.redirect_nia(63 downto 2) & "00";
if r_int.mode_32bit = '1' then
-- If the last NIA value went down with a stop mark, it didn't get
-- executed, and hence we shouldn't increment NIA.
- advance_nia <= rst or e_in.redirect or d_in.redirect or (not r.stop_mark and not stall_in);
+ advance_nia <= rst or w_in.redirect or d_in.redirect or (not r.stop_mark and not stall_in);
r_next <= v;
r_next_int <= v_int;
busy : std_ulogic;
instr_done : std_ulogic;
do_intr : std_ulogic;
+ illegal : std_ulogic;
op : insn_type_t;
insn : std_ulogic_vector(31 downto 0);
+ nia : std_ulogic_vector(63 downto 0);
+ instr_tag : instr_tag_t;
dest_fpr : gspr_index_t;
fe_mode : std_ulogic;
rc : std_ulogic;
e_out.busy <= r.busy;
e_out.exception <= r.fpscr(FPSCR_FEX);
- e_out.interrupt <= r.do_intr;
w_out.valid <= r.instr_done and not r.do_intr;
+ w_out.instr_tag <= r.instr_tag;
w_out.write_enable <= r.writing_back;
w_out.write_reg <= r.dest_fpr;
w_out.write_data <= fp_result;
w_out.write_cr_mask <= r.cr_mask;
w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
r.cr_result & r.cr_result & r.cr_result & r.cr_result;
+ w_out.interrupt <= r.do_intr;
+ w_out.intr_vec <= 16#700#;
+ w_out.srr0 <= r.nia;
+ w_out.srr1 <= (47-44 => r.illegal, 47-43 => not r.illegal, others => '0');
fpu_1: process(all)
variable v : reg_type;
-- capture incoming instruction
if e_in.valid = '1' then
v.insn := e_in.insn;
+ v.nia := e_in.nia;
v.op := e_in.op;
+ v.instr_tag := e_in.itag;
v.fe_mode := or (e_in.fe_mode);
v.dest_fpr := e_in.frt;
v.single_prec := e_in.single;
v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
end if;
+ v.illegal := illegal;
if illegal = '1' then
v.instr_done := '0';
- v.do_intr := '0';
+ v.do_intr := '1';
v.writing_back := '0';
v.busy := '0';
v.state := IDLE;
end if;
rin <= v;
- e_out.illegal <= illegal;
end process;
end architecture behaviour;
+++ /dev/null
-library ieee;
-use ieee.std_logic_1164.all;
-use ieee.numeric_std.all;
-
-library work;
-use work.common.all;
-
-entity gpr_hazard is
- generic (
- PIPELINE_DEPTH : natural := 1
- );
- port(
- clk : in std_ulogic;
- busy_in : in std_ulogic;
- deferred : in std_ulogic;
- complete_in : in std_ulogic;
- flush_in : in std_ulogic;
- issuing : in std_ulogic;
- repeated : in std_ulogic;
-
- gpr_write_valid_in : in std_ulogic;
- gpr_write_in : in gspr_index_t;
- bypass_avail : in std_ulogic;
- gpr_read_valid_in : in std_ulogic;
- gpr_read_in : in gspr_index_t;
-
- ugpr_write_valid : in std_ulogic;
- ugpr_write_reg : in gspr_index_t;
-
- stall_out : out std_ulogic;
- use_bypass : out std_ulogic
- );
-end entity gpr_hazard;
-architecture behaviour of gpr_hazard is
- type pipeline_entry_type is record
- valid : std_ulogic;
- bypass : std_ulogic;
- gpr : gspr_index_t;
- ugpr_valid : std_ulogic;
- ugpr : gspr_index_t;
- end record;
- constant pipeline_entry_init : pipeline_entry_type := (valid => '0', bypass => '0', gpr => (others => '0'),
- ugpr_valid => '0', ugpr => (others => '0'));
-
- type pipeline_t is array(0 to PIPELINE_DEPTH) of pipeline_entry_type;
- constant pipeline_t_init : pipeline_t := (others => pipeline_entry_init);
-
- signal r, rin : pipeline_t := pipeline_t_init;
-begin
- gpr_hazard0: process(clk)
- begin
- if rising_edge(clk) then
- r <= rin;
- end if;
- end process;
-
- gpr_hazard1: process(all)
- variable v : pipeline_t;
- begin
- v := r;
-
- if complete_in = '1' then
- v(PIPELINE_DEPTH).valid := '0';
- v(PIPELINE_DEPTH).ugpr_valid := '0';
- end if;
-
- stall_out <= '0';
- use_bypass <= '0';
- if repeated = '0' and gpr_read_valid_in = '1' then
- loop_0: for i in 0 to PIPELINE_DEPTH loop
- -- The second half of a split instruction never has GPR
- -- dependencies on the first half's output GPR,
- -- so ignore matches when i = 0 for the second half.
- if v(i).valid = '1' and r(i).gpr = gpr_read_in and
- not (i = 0 and repeated = '1') then
- if r(i).bypass = '1' then
- use_bypass <= '1';
- else
- stall_out <= '1';
- end if;
- end if;
- if v(i).ugpr_valid = '1' and r(i).ugpr = gpr_read_in then
- stall_out <= '1';
- end if;
- end loop;
- end if;
-
- -- XXX assumes PIPELINE_DEPTH = 1
- if busy_in = '0' then
- v(1) := v(0);
- v(0).valid := '0';
- v(0).ugpr_valid := '0';
- end if;
- if deferred = '0' and issuing = '1' then
- v(0).valid := gpr_write_valid_in;
- v(0).bypass := bypass_avail;
- v(0).gpr := gpr_write_in;
- v(0).ugpr_valid := ugpr_write_valid;
- v(0).ugpr := ugpr_write_reg;
- end if;
- if flush_in = '1' then
- v(0).valid := '0';
- v(0).ugpr_valid := '0';
- v(1).valid := '0';
- v(1).ugpr_valid := '0';
- end if;
-
- -- update registers
- rin <= v;
-
- end process;
-end;
);
end loadstore1;
--- Note, we don't currently use the stall output from the dcache because
--- we know it can take two requests without stalling when idle, we are
--- its only user, and we know it never stalls when idle.
-
architecture behave of loadstore1 is
-- State machine for unaligned loads/stores
type state_t is (IDLE, -- ready for instruction
- SECOND_REQ, -- send 2nd request of unaligned xfer
- ACK_WAIT, -- waiting for ack from dcache
MMU_LOOKUP, -- waiting for MMU to look up translation
TLBIE_WAIT, -- waiting for MMU to finish doing a tlbie
- FINISH_LFS, -- write back converted SP data for lfs*
- COMPLETE -- extra cycle to complete an operation
+ FINISH_LFS -- write back converted SP data for lfs*
);
type byte_index_t is array(0 to 7) of unsigned(2 downto 0);
subtype byte_trim_t is std_ulogic_vector(1 downto 0);
type trim_ctl_t is array(0 to 7) of byte_trim_t;
- type reg_stage_t is record
- -- latch most of the input request
+ type request_t is record
+ valid : std_ulogic;
+ dc_req : std_ulogic;
load : std_ulogic;
+ store : std_ulogic;
tlbie : std_ulogic;
dcbz : std_ulogic;
+ read_spr : std_ulogic;
+ write_spr : std_ulogic;
+ mmu_op : std_ulogic;
+ instr_fault : std_ulogic;
+ load_zero : std_ulogic;
+ do_update : std_ulogic;
+ noop : std_ulogic;
+ mode_32bit : std_ulogic;
addr : std_ulogic_vector(63 downto 0);
+ addr0 : std_ulogic_vector(63 downto 0);
+ byte_sel : std_ulogic_vector(7 downto 0);
+ second_bytes : std_ulogic_vector(7 downto 0);
store_data : std_ulogic_vector(63 downto 0);
- load_data : std_ulogic_vector(63 downto 0);
+ instr_tag : instr_tag_t;
write_reg : gspr_index_t;
length : std_ulogic_vector(3 downto 0);
+ elt_length : std_ulogic_vector(3 downto 0);
byte_reverse : std_ulogic;
- byte_offset : unsigned(2 downto 0);
brev_mask : unsigned(2 downto 0);
sign_extend : std_ulogic;
update : std_ulogic;
- update_reg : gpr_index_t;
xerc : xer_common_t;
reserve : std_ulogic;
atomic : std_ulogic;
nc : std_ulogic; -- non-cacheable access
virt_mode : std_ulogic;
priv_mode : std_ulogic;
+ load_sp : std_ulogic;
+ sprn : std_ulogic_vector(9 downto 0);
+ is_slbia : std_ulogic;
+ align_intr : std_ulogic;
+ dword_index : std_ulogic;
+ two_dwords : std_ulogic;
+ nia : std_ulogic_vector(63 downto 0);
+ end record;
+ constant request_init : request_t := (valid => '0', dc_req => '0', load => '0', store => '0', tlbie => '0',
+ dcbz => '0', read_spr => '0', write_spr => '0', mmu_op => '0',
+ instr_fault => '0', load_zero => '0', do_update => '0', noop => '0',
+ mode_32bit => '0', addr => (others => '0'), addr0 => (others => '0'),
+ byte_sel => x"00", second_bytes => x"00",
+ store_data => (others => '0'), instr_tag => instr_tag_init,
+ write_reg => 7x"00", length => x"0",
+ elt_length => x"0", byte_reverse => '0', brev_mask => "000",
+ sign_extend => '0', update => '0',
+ xerc => xerc_init, reserve => '0',
+ atomic => '0', atomic_last => '0', rc => '0', nc => '0',
+ virt_mode => '0', priv_mode => '0', load_sp => '0',
+ sprn => 10x"0", is_slbia => '0', align_intr => '0',
+ dword_index => '0', two_dwords => '0',
+ nia => (others => '0'));
+
+ type reg_stage1_t is record
+ req : request_t;
+ issued : std_ulogic;
+ end record;
+
+ type reg_stage2_t is record
+ req : request_t;
+ byte_index : byte_index_t;
+ use_second : std_ulogic_vector(7 downto 0);
+ wait_dc : std_ulogic;
+ wait_mmu : std_ulogic;
+ one_cycle : std_ulogic;
+ wr_sel : std_ulogic_vector(1 downto 0);
+ end record;
+
+ type reg_stage3_t is record
state : state_t;
- dwords_done : std_ulogic;
- last_dword : std_ulogic;
- first_bytes : std_ulogic_vector(7 downto 0);
- second_bytes : std_ulogic_vector(7 downto 0);
+ instr_tag : instr_tag_t;
+ write_enable : std_ulogic;
+ write_reg : gspr_index_t;
+ write_data : std_ulogic_vector(63 downto 0);
+ rc : std_ulogic;
+ xerc : xer_common_t;
+ store_done : std_ulogic;
+ convert_lfs : std_ulogic;
+ load_data : std_ulogic_vector(63 downto 0);
dar : std_ulogic_vector(63 downto 0);
dsisr : std_ulogic_vector(31 downto 0);
- instr_fault : std_ulogic;
- align_intr : std_ulogic;
- sprval : std_ulogic_vector(63 downto 0);
- busy : std_ulogic;
- wait_dcache : std_ulogic;
- wait_mmu : std_ulogic;
- do_update : std_ulogic;
- extra_cycle : std_ulogic;
- mode_32bit : std_ulogic;
- byte_index : byte_index_t;
- use_second : std_ulogic_vector(7 downto 0);
- trim_ctl : trim_ctl_t;
- load_sp : std_ulogic;
ld_sp_data : std_ulogic_vector(31 downto 0);
ld_sp_nz : std_ulogic;
ld_sp_lz : std_ulogic_vector(5 downto 0);
- wr_sel : std_ulogic_vector(1 downto 0);
+ stage1_en : std_ulogic;
+ interrupt : std_ulogic;
+ intr_vec : integer range 0 to 16#fff#;
+ nia : std_ulogic_vector(63 downto 0);
+ srr1 : std_ulogic_vector(15 downto 0);
end record;
- signal r, rin : reg_stage_t;
- signal lsu_sum : std_ulogic_vector(63 downto 0);
+ signal req_in : request_t;
+ signal r1, r1in : reg_stage1_t;
+ signal r2, r2in : reg_stage2_t;
+ signal r3, r3in : reg_stage3_t;
+
+ signal busy : std_ulogic;
+ signal complete : std_ulogic;
+ signal in_progress : std_ulogic;
+ signal flushing : std_ulogic;
signal store_sp_data : std_ulogic_vector(31 downto 0);
signal load_dp_data : std_ulogic_vector(63 downto 0);
+ signal store_data : std_ulogic_vector(63 downto 0);
+
+ signal stage1_issue_enable : std_ulogic;
+ signal stage1_req : request_t;
+ signal stage1_dcreq : std_ulogic;
+ signal stage1_dreq : std_ulogic;
+ signal stage2_busy_next : std_ulogic;
+ signal stage3_busy_next : std_ulogic;
-- Generate byte enables from sizes
function length_to_sel(length : in std_logic_vector(3 downto 0)) return std_ulogic_vector is
end;
begin
- -- Calculate the address in the first cycle
- lsu_sum <= std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2)) when l_in.valid = '1' else (others => '0');
-
- loadstore1_0: process(clk)
+ loadstore1_reg: process(clk)
begin
if rising_edge(clk) then
if rst = '1' then
- r.state <= IDLE;
- r.busy <= '0';
- r.do_update <= '0';
+ r1.req.valid <= '0';
+ r2.req.valid <= '0';
+ r2.wait_dc <= '0';
+ r2.wait_mmu <= '0';
+ r2.one_cycle <= '0';
+ r3.state <= IDLE;
+ r3.write_enable <= '0';
+ r3.interrupt <= '0';
+ r3.stage1_en <= '1';
+ r3.convert_lfs <= '0';
+ flushing <= '0';
else
- r <= rin;
+ r1 <= r1in;
+ r2 <= r2in;
+ r3 <= r3in;
+ flushing <= (flushing or (r1in.req.valid and r1in.req.align_intr)) and
+ not r3in.interrupt;
+ end if;
+ stage1_dreq <= stage1_dcreq;
+ if d_in.valid = '1' then
+ assert r2.req.valid = '1' and r2.req.dc_req = '1' and r3.state = IDLE severity failure;
+ end if;
+ if d_in.error = '1' then
+ assert r2.req.valid = '1' and r2.req.dc_req = '1' and r3.state = IDLE severity failure;
+ end if;
+ if m_in.done = '1' or m_in.err = '1' then
+ assert r2.req.valid = '1' and (r3.state = MMU_LOOKUP or r3.state = TLBIE_WAIT) severity failure;
end if;
end if;
end process;
variable frac : std_ulogic_vector(22 downto 0);
variable frac_shift : unsigned(4 downto 0);
begin
- frac := r.ld_sp_data(22 downto 0);
- exp := unsigned(r.ld_sp_data(30 downto 23));
- exp_nz := or (r.ld_sp_data(30 downto 23));
- exp_ao := and (r.ld_sp_data(30 downto 23));
+ frac := r3.ld_sp_data(22 downto 0);
+ exp := unsigned(r3.ld_sp_data(30 downto 23));
+ exp_nz := or (r3.ld_sp_data(30 downto 23));
+ exp_ao := and (r3.ld_sp_data(30 downto 23));
frac_shift := (others => '0');
if exp_ao = '1' then
exp_dp := to_unsigned(2047, 11); -- infinity or NaN
elsif exp_nz = '1' then
exp_dp := 896 + resize(exp, 11); -- finite normalized value
- elsif r.ld_sp_nz = '0' then
+ elsif r3.ld_sp_nz = '0' then
exp_dp := to_unsigned(0, 11); -- zero
else
-- denormalized SP operand, need to normalize
- exp_dp := 896 - resize(unsigned(r.ld_sp_lz), 11);
- frac_shift := unsigned(r.ld_sp_lz(4 downto 0)) + 1;
+ exp_dp := 896 - resize(unsigned(r3.ld_sp_lz), 11);
+ frac_shift := unsigned(r3.ld_sp_lz(4 downto 0)) + 1;
end if;
- load_dp_data(63) <= r.ld_sp_data(31);
+ load_dp_data(63) <= r3.ld_sp_data(31);
load_dp_data(62 downto 52) <= std_ulogic_vector(exp_dp);
load_dp_data(51 downto 29) <= shifter_23l(frac, frac_shift);
load_dp_data(28 downto 0) <= (others => '0');
end process;
end generate;
- loadstore1_1: process(all)
- variable v : reg_stage_t;
+ -- Translate a load/store instruction into the internal request format
+ -- XXX this should only depend on l_in, but actually depends on
+ -- r1.req.addr0 as well (in the l_in.second = 1 case).
+ loadstore1_in: process(all)
+ variable v : request_t;
+ variable lsu_sum : std_ulogic_vector(63 downto 0);
variable brev_lenm1 : unsigned(2 downto 0);
- variable byte_offset : unsigned(2 downto 0);
- variable j : integer;
- variable k : unsigned(2 downto 0);
- variable kk : unsigned(3 downto 0);
variable long_sel : std_ulogic_vector(15 downto 0);
- variable byte_sel : std_ulogic_vector(7 downto 0);
- variable req : std_ulogic;
- variable busy : std_ulogic;
variable addr : std_ulogic_vector(63 downto 0);
- variable maddr : std_ulogic_vector(63 downto 0);
- variable wdata : std_ulogic_vector(63 downto 0);
- variable write_enable : std_ulogic;
- variable do_update : std_ulogic;
- variable done : std_ulogic;
- variable data_permuted : std_ulogic_vector(63 downto 0);
- variable data_trimmed : std_ulogic_vector(63 downto 0);
- variable store_data : std_ulogic_vector(63 downto 0);
- variable byte_rev : std_ulogic;
- variable length : std_ulogic_vector(3 downto 0);
- variable negative : std_ulogic;
variable sprn : std_ulogic_vector(9 downto 0);
- variable exception : std_ulogic;
- variable next_addr : std_ulogic_vector(63 downto 0);
- variable mmureq : std_ulogic;
- variable dsisr : std_ulogic_vector(31 downto 0);
- variable mmu_mtspr : std_ulogic;
- variable itlb_fault : std_ulogic;
variable misaligned : std_ulogic;
+ variable addr_mask : std_ulogic_vector(2 downto 0);
+ begin
+ v := request_init;
+ sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
+
+ v.valid := l_in.valid;
+ v.instr_tag := l_in.instr_tag;
+ v.mode_32bit := l_in.mode_32bit;
+ v.write_reg := l_in.write_reg;
+ v.length := l_in.length;
+ v.elt_length := l_in.length;
+ v.byte_reverse := l_in.byte_reverse;
+ v.sign_extend := l_in.sign_extend;
+ v.update := l_in.update;
+ v.xerc := l_in.xerc;
+ v.reserve := l_in.reserve;
+ v.rc := l_in.rc;
+ v.nc := l_in.ci;
+ v.virt_mode := l_in.virt_mode;
+ v.priv_mode := l_in.priv_mode;
+ v.sprn := sprn;
+ v.nia := l_in.nia;
+
+ lsu_sum := std_ulogic_vector(unsigned(l_in.addr1) + unsigned(l_in.addr2));
+
+ if HAS_FPU and l_in.is_32bit = '1' then
+ v.store_data := x"00000000" & store_sp_data;
+ else
+ v.store_data := l_in.data;
+ end if;
+
+ addr := lsu_sum;
+
+ if l_in.second = '1' then
+ if l_in.update = '0' then
+ -- for the second half of a 16-byte transfer,
+ -- use the previous address plus 8.
+ addr := std_ulogic_vector(unsigned(r1.req.addr0(63 downto 3)) + 1) & r1.req.addr0(2 downto 0);
+ else
+ -- for an update-form load, use the previous address
+ -- as the value to write back to RA.
+ addr := r1.req.addr0;
+ end if;
+ end if;
+ if l_in.mode_32bit = '1' then
+ addr(63 downto 32) := (others => '0');
+ end if;
+ v.addr := addr;
+ v.addr0 := addr;
+
+ -- XXX Temporary hack. Mark the op as non-cachable if the address
+ -- is the form 0xc------- for a real-mode access.
+ if addr(31 downto 28) = "1100" and l_in.virt_mode = '0' then
+ v.nc := '1';
+ end if;
+
+ addr_mask := std_ulogic_vector(unsigned(l_in.length(2 downto 0)) - 1);
+
+ -- Do length_to_sel and work out if we are doing 2 dwords
+ long_sel := xfer_data_sel(v.length, addr(2 downto 0));
+ v.byte_sel := long_sel(7 downto 0);
+ v.second_bytes := long_sel(15 downto 8);
+ if long_sel(15 downto 8) /= "00000000" then
+ v.two_dwords := '1';
+ end if;
+
+ -- check alignment for larx/stcx
+ misaligned := or (addr_mask and addr(2 downto 0));
+ v.align_intr := l_in.reserve and misaligned;
+ if l_in.repeat = '1' and l_in.second = '0' and l_in.update = '0' and addr(3) = '1' then
+ -- length is really 16 not 8
+ -- Make misaligned lq cause an alignment interrupt in LE mode,
+ -- in order to avoid the case with RA = RT + 1 where the second half
+ -- faults but the first doesn't (and updates RT+1, destroying RA).
+ -- The equivalent BE case doesn't occur because RA = RT is illegal.
+ misaligned := '1';
+ if l_in.reserve = '1' or (l_in.op = OP_LOAD and l_in.byte_reverse = '0') then
+ v.align_intr := '1';
+ end if;
+ end if;
+
+ v.atomic := not misaligned;
+ v.atomic_last := not misaligned and (l_in.second or not l_in.repeat);
+
+ case l_in.op is
+ when OP_STORE =>
+ v.store := '1';
+ when OP_LOAD =>
+ if l_in.update = '0' or l_in.second = '0' then
+ v.load := '1';
+ if HAS_FPU and l_in.is_32bit = '1' then
+ -- Allow an extra cycle for SP->DP precision conversion
+ v.load_sp := '1';
+ end if;
+ else
+ -- write back address to RA
+ v.do_update := '1';
+ end if;
+ when OP_DCBZ =>
+ v.dcbz := '1';
+ v.align_intr := v.nc;
+ when OP_TLBIE =>
+ v.tlbie := '1';
+ v.addr := l_in.addr2; -- address from RB for tlbie
+ v.is_slbia := l_in.insn(7);
+ v.mmu_op := '1';
+ when OP_MFSPR =>
+ v.read_spr := '1';
+ when OP_MTSPR =>
+ v.write_spr := '1';
+ v.mmu_op := sprn(9) or sprn(5);
+ when OP_FETCH_FAILED =>
+ -- send it to the MMU to do the radix walk
+ v.instr_fault := '1';
+ v.addr := l_in.nia;
+ v.mmu_op := '1';
+ when others =>
+ end case;
+ v.dc_req := l_in.valid and (v.load or v.store or v.dcbz) and not v.align_intr;
+
+ -- Work out controls for load and store formatting
+ brev_lenm1 := "000";
+ if v.byte_reverse = '1' then
+ brev_lenm1 := unsigned(v.length(2 downto 0)) - 1;
+ end if;
+ v.brev_mask := brev_lenm1;
+
+ req_in <= v;
+ end process;
+
+ busy <= r1.req.valid and ((r1.req.dc_req and not r1.issued) or
+ (r1.issued and d_in.error) or
+ stage2_busy_next or
+ (r1.req.dc_req and r1.req.two_dwords and not r1.req.dword_index));
+ complete <= r2.one_cycle or (r2.wait_dc and d_in.valid) or
+ (r2.wait_mmu and m_in.done) or r3.convert_lfs;
+ in_progress <= r1.req.valid or (r2.req.valid and not complete);
+
+ stage1_issue_enable <= r3.stage1_en and not (r1.req.valid and r1.req.mmu_op) and
+ not (r2.req.valid and r2.req.mmu_op);
+
+ -- Processing done in the first cycle of a load/store instruction
+ loadstore1_1: process(all)
+ variable v : reg_stage1_t;
+ variable req : request_t;
+ variable dcreq : std_ulogic;
+ variable addr : std_ulogic_vector(63 downto 0);
+ begin
+ v := r1;
+ dcreq := '0';
+ req := req_in;
+ if flushing = '1' then
+ -- Make this a no-op request rather than simply invalid.
+ -- It will never get to stage 3 since there is a request ahead of
+ -- it with align_intr = 1.
+ req.dc_req := '0';
+ end if;
+
+ -- Note that l_in.valid is gated with busy inside execute1
+ if l_in.valid = '1' then
+ dcreq := req.dc_req and stage1_issue_enable and not d_in.error and not dc_stall;
+ v.req := req;
+ v.issued := dcreq;
+ elsif r1.req.valid = '1' then
+ if r1.req.dc_req = '1' and r1.issued = '0' then
+ req := r1.req;
+ dcreq := stage1_issue_enable and not dc_stall and not d_in.error;
+ v.issued := dcreq;
+ elsif r1.issued = '1' and d_in.error = '1' then
+ v.issued := '0';
+ elsif stage2_busy_next = '0' then
+ -- we can change what's in r1 next cycle because the current thing
+ -- in r1 will go into r2
+ if r1.req.dc_req = '1' and r1.req.two_dwords = '1' and r1.req.dword_index = '0' then
+ -- construct the second request for a misaligned access
+ v.req.dword_index := '1';
+ v.req.addr := std_ulogic_vector(unsigned(r1.req.addr(63 downto 3)) + 1) & "000";
+ if r1.req.mode_32bit = '1' then
+ v.req.addr(32) := '0';
+ end if;
+ v.req.byte_sel := r1.req.second_bytes;
+ v.issued := stage1_issue_enable and not dc_stall;
+ dcreq := stage1_issue_enable and not dc_stall;
+ req := v.req;
+ else
+ v.req.valid := '0';
+ end if;
+ end if;
+ end if;
+ if r3in.interrupt = '1' then
+ v.req.valid := '0';
+ dcreq := '0';
+ end if;
+
+ stage1_req <= req;
+ stage1_dcreq <= dcreq;
+ r1in <= v;
+ end process;
+
+ -- Processing done in the second cycle of a load/store instruction.
+ -- Store data is formatted here and sent to the dcache.
+ -- The request in r1 is sent to stage 3 if stage 3 will not be busy next cycle.
+ loadstore1_2: process(all)
+ variable v : reg_stage2_t;
+ variable j : integer;
+ variable k : unsigned(2 downto 0);
+ variable kk : unsigned(3 downto 0);
+ variable idx : unsigned(2 downto 0);
+ variable byte_offset : unsigned(2 downto 0);
begin
- v := r;
+ v := r2;
+
+ -- Byte reversing and rotating for stores.
+ -- Done in the second cycle (the cycle after l_in.valid = 1).
+ byte_offset := unsigned(r1.req.addr0(2 downto 0));
+ for i in 0 to 7 loop
+ k := (to_unsigned(i, 3) - byte_offset) xor r1.req.brev_mask;
+ j := to_integer(k) * 8;
+ store_data(i * 8 + 7 downto i * 8) <= r1.req.store_data(j + 7 downto j);
+ end loop;
+
+ if stage3_busy_next = '0' and
+ (r1.req.valid = '0' or r1.issued = '1' or r1.req.dc_req = '0') then
+ v.req := r1.req;
+ v.req.store_data := store_data;
+ v.wait_dc := r1.req.valid and r1.req.dc_req and not r1.req.load_sp and
+ not (r1.req.two_dwords and not r1.req.dword_index);
+ v.wait_mmu := r1.req.valid and r1.req.mmu_op;
+ v.one_cycle := r1.req.valid and (r1.req.noop or r1.req.read_spr or
+ (r1.req.write_spr and not r1.req.mmu_op) or
+ r1.req.load_zero or r1.req.do_update);
+ if r1.req.read_spr = '1' then
+ v.wr_sel := "00";
+ elsif r1.req.do_update = '1' or r1.req.store = '1' then
+ v.wr_sel := "01";
+ elsif r1.req.load_sp = '1' then
+ v.wr_sel := "10";
+ else
+ v.wr_sel := "11";
+ end if;
+
+ -- Work out load formatter controls for next cycle
+ for i in 0 to 7 loop
+ idx := to_unsigned(i, 3) xor r1.req.brev_mask;
+ kk := ('0' & idx) + ('0' & byte_offset);
+ v.use_second(i) := kk(3);
+ v.byte_index(i) := kk(2 downto 0);
+ end loop;
+ elsif stage3_busy_next = '0' then
+ v.req.valid := '0';
+ v.wait_dc := '0';
+ v.wait_mmu := '0';
+ end if;
+
+ stage2_busy_next <= r1.req.valid and stage3_busy_next;
+
+ if r3in.interrupt = '1' then
+ v.req.valid := '0';
+ end if;
+
+ r2in <= v;
+ end process;
+
+ -- Processing done in the third cycle of a load/store instruction.
+ -- At this stage we can do things that have side effects without
+ -- fear of the instruction getting flushed. This is the point at
+ -- which requests get sent to the MMU.
+ loadstore1_3: process(all)
+ variable v : reg_stage3_t;
+ variable j : integer;
+ variable req : std_ulogic;
+ variable mmureq : std_ulogic;
+ variable mmu_mtspr : std_ulogic;
+ variable write_enable : std_ulogic;
+ variable write_data : std_ulogic_vector(63 downto 0);
+ variable do_update : std_ulogic;
+ variable done : std_ulogic;
+ variable part_done : std_ulogic;
+ variable exception : std_ulogic;
+ variable data_permuted : std_ulogic_vector(63 downto 0);
+ variable data_trimmed : std_ulogic_vector(63 downto 0);
+ variable sprval : std_ulogic_vector(63 downto 0);
+ variable negative : std_ulogic;
+ variable dsisr : std_ulogic_vector(31 downto 0);
+ variable itlb_fault : std_ulogic;
+ variable trim_ctl : trim_ctl_t;
+ begin
+ v := r3;
+
req := '0';
+ mmureq := '0';
mmu_mtspr := '0';
- itlb_fault := '0';
- sprn := std_ulogic_vector(to_unsigned(decode_spr_num(l_in.insn), 10));
+ done := '0';
+ part_done := '0';
+ exception := '0';
dsisr := (others => '0');
- mmureq := '0';
- v.wr_sel := "11";
-
write_enable := '0';
-
- do_update := r.do_update;
- v.do_update := '0';
+ sprval := (others => '0');
+ do_update := '0';
+ v.convert_lfs := '0';
+ v.srr1 := (others => '0');
-- load data formatting
-- shift and byte-reverse data bytes
for i in 0 to 7 loop
- j := to_integer(r.byte_index(i)) * 8;
+ j := to_integer(r2.byte_index(i)) * 8;
data_permuted(i * 8 + 7 downto i * 8) := d_in.data(j + 7 downto j);
end loop;
-- For unaligned loads crossing two dwords, the sign bit is in the
-- first dword for big-endian (byte_reverse = 1), or the second dword
-- for little-endian.
- if r.dwords_done = '1' and r.byte_reverse = '1' then
- negative := (r.length(3) and r.load_data(63)) or
- (r.length(2) and r.load_data(31)) or
- (r.length(1) and r.load_data(15)) or
- (r.length(0) and r.load_data(7));
+ if r2.req.dword_index = '1' and r2.req.byte_reverse = '1' then
+ negative := (r2.req.length(3) and r3.load_data(63)) or
+ (r2.req.length(2) and r3.load_data(31)) or
+ (r2.req.length(1) and r3.load_data(15)) or
+ (r2.req.length(0) and r3.load_data(7));
else
- negative := (r.length(3) and data_permuted(63)) or
- (r.length(2) and data_permuted(31)) or
- (r.length(1) and data_permuted(15)) or
- (r.length(0) and data_permuted(7));
+ negative := (r2.req.length(3) and data_permuted(63)) or
+ (r2.req.length(2) and data_permuted(31)) or
+ (r2.req.length(1) and data_permuted(15)) or
+ (r2.req.length(0) and data_permuted(7));
end if;
-- trim and sign-extend
for i in 0 to 7 loop
- case r.trim_ctl(i) is
+ if i < to_integer(unsigned(r2.req.length)) then
+ if r2.req.dword_index = '1' then
+ trim_ctl(i) := '1' & not r2.use_second(i);
+ else
+ trim_ctl(i) := "10";
+ end if;
+ else
+ trim_ctl(i) := "00";
+ end if;
+ end loop;
+
+ for i in 0 to 7 loop
+ case trim_ctl(i) is
when "11" =>
- data_trimmed(i * 8 + 7 downto i * 8) := r.load_data(i * 8 + 7 downto i * 8);
+ data_trimmed(i * 8 + 7 downto i * 8) := r3.load_data(i * 8 + 7 downto i * 8);
when "10" =>
data_trimmed(i * 8 + 7 downto i * 8) := data_permuted(i * 8 + 7 downto i * 8);
- when "01" =>
- data_trimmed(i * 8 + 7 downto i * 8) := (others => negative);
when others =>
- data_trimmed(i * 8 + 7 downto i * 8) := x"00";
+ data_trimmed(i * 8 + 7 downto i * 8) := (others => negative and r2.req.sign_extend);
end case;
end loop;
v.ld_sp_lz := count_left_zeroes(data_trimmed(22 downto 0));
end if;
- -- Byte reversing and rotating for stores.
- -- Done in the second cycle (the cycle after l_in.valid = 1).
- for i in 0 to 7 loop
- k := (to_unsigned(i, 3) - r.byte_offset) xor r.brev_mask;
- j := to_integer(k) * 8;
- store_data(i * 8 + 7 downto i * 8) := r.store_data(j + 7 downto j);
- end loop;
-
- -- compute (addr + 8) & ~7 for the second doubleword when unaligned
- next_addr := std_ulogic_vector(unsigned(r.addr(63 downto 3)) + 1) & "000";
-
- -- Busy calculation.
- -- We need to minimize the delay from clock to busy valid because it
- -- gates the start of execution of the next instruction.
- busy := r.busy and not ((r.wait_dcache and d_in.valid) or (r.wait_mmu and m_in.done));
- v.busy := busy;
-
- done := '0';
- if r.state /= IDLE and busy = '0' then
- done := '1';
+ if d_in.valid = '1' and r2.req.load = '1' then
+ v.load_data := data_permuted;
end if;
- exception := '0';
- if r.dwords_done = '1' or r.state = SECOND_REQ then
- addr := next_addr;
- byte_sel := r.second_bytes;
- else
- addr := r.addr;
- byte_sel := r.first_bytes;
- end if;
- if r.mode_32bit = '1' then
- addr(63 downto 32) := (others => '0');
+ if r2.req.valid = '1' then
+ if r2.req.read_spr = '1' then
+ write_enable := '1';
+ -- partial decode on SPR number should be adequate given
+ -- the restricted set that get sent down this path
+ if r2.req.sprn(9) = '0' and r2.req.sprn(5) = '0' then
+ if r2.req.sprn(0) = '0' then
+ sprval := x"00000000" & r3.dsisr;
+ else
+ sprval := r3.dar;
+ end if;
+ else
+ -- reading one of the SPRs in the MMU
+ sprval := m_in.sprval;
+ end if;
+ end if;
+ if r2.req.align_intr = '1' then
+ -- generate alignment interrupt
+ exception := '1';
+ end if;
+ if r2.req.load_zero = '1' then
+ write_enable := '1';
+ end if;
+ if r2.req.do_update = '1' then
+ do_update := '1';
+ end if;
end if;
- maddr := addr;
- case r.state is
+ case r3.state is
when IDLE =>
-
- when SECOND_REQ =>
- req := '1';
- v.state := ACK_WAIT;
- v.last_dword := '0';
-
- when ACK_WAIT =>
- -- r.wr_sel gets set one cycle after we come into ACK_WAIT state,
- -- which is OK because the dcache always takes at least two cycles.
- if r.update = '1' and (r.load = '0' or (HAS_FPU and r.load_sp = '1')) then
- v.wr_sel := "01";
+ if d_in.valid = '1' then
+ if r2.req.two_dwords = '0' or r2.req.dword_index = '1' then
+ write_enable := r2.req.load and not r2.req.load_sp;
+ if HAS_FPU and r2.req.load_sp = '1' then
+ -- SP to DP conversion takes a cycle
+ v.state := FINISH_LFS;
+ v.convert_lfs := '1';
+ else
+ -- stores write back rA update
+ do_update := r2.req.update and r2.req.store;
+ end if;
+ else
+ part_done := '1';
+ end if;
end if;
if d_in.error = '1' then
- -- dcache will discard the second request if it
- -- gets an error on the 1st of two requests
if d_in.cache_paradox = '1' then
-- signal an interrupt straight away
exception := '1';
- dsisr(63 - 38) := not r.load;
+ dsisr(63 - 38) := not r2.req.load;
-- XXX there is no architected bit for this
+ -- (probably should be a machine check in fact)
dsisr(63 - 35) := d_in.cache_paradox;
else
-- Look up the translation for TLB miss
-- in case the PTE has been updated.
mmureq := '1';
v.state := MMU_LOOKUP;
+ v.stage1_en := '0';
end if;
end if;
- if d_in.valid = '1' then
- if r.last_dword = '0' then
- v.dwords_done := '1';
- v.last_dword := '1';
- if r.load = '1' then
- v.load_data := data_permuted;
+ if r2.req.valid = '1' then
+ if r2.req.mmu_op = '1' then
+ -- send request (tlbie, mtspr, itlb miss) to MMU
+ mmureq := not r2.req.write_spr;
+ mmu_mtspr := r2.req.write_spr;
+ if r2.req.instr_fault = '1' then
+ v.state := MMU_LOOKUP;
+ else
+ v.state := TLBIE_WAIT;
end if;
- else
- write_enable := r.load and not r.load_sp;
- if HAS_FPU and r.load_sp = '1' then
- -- SP to DP conversion takes a cycle
- -- Write back rA update in this cycle if needed
- do_update := r.update;
- v.wr_sel := "10";
- v.state := FINISH_LFS;
- elsif r.extra_cycle = '1' then
- -- loads with rA update need an extra cycle
- v.wr_sel := "01";
- v.state := COMPLETE;
- v.do_update := r.update;
+ elsif r2.req.write_spr = '1' then
+ if r2.req.sprn(0) = '0' then
+ v.dsisr := r2.req.store_data(31 downto 0);
else
- -- stores write back rA update in this cycle
- do_update := r.update;
+ v.dar := r2.req.store_data;
end if;
- v.busy := '0';
end if;
end if;
- -- r.wait_dcache gets set one cycle after we come into ACK_WAIT state,
- -- which is OK because the dcache always takes at least two cycles.
- v.wait_dcache := r.last_dword and not r.extra_cycle;
when MMU_LOOKUP =>
if m_in.done = '1' then
- if r.instr_fault = '0' then
+ if r2.req.instr_fault = '0' then
-- retry the request now that the MMU has installed a TLB entry
req := '1';
- if r.last_dword = '0' then
- v.state := SECOND_REQ;
- else
- v.state := ACK_WAIT;
- end if;
+ v.stage1_en := '1';
+ v.state := IDLE;
end if;
end if;
if m_in.err = '1' then
exception := '1';
dsisr(63 - 33) := m_in.invalid;
dsisr(63 - 36) := m_in.perm_error;
- dsisr(63 - 38) := not r.load;
+ dsisr(63 - 38) := r2.req.store or r2.req.dcbz;
dsisr(63 - 44) := m_in.badtree;
dsisr(63 - 45) := m_in.rc_error;
end if;
when TLBIE_WAIT =>
when FINISH_LFS =>
-
- when COMPLETE =>
- exception := r.align_intr;
+ write_enable := '1';
end case;
- if done = '1' or exception = '1' then
+ if complete = '1' or exception = '1' then
+ v.stage1_en := '1';
v.state := IDLE;
- v.busy := '0';
end if;
- -- Note that l_in.valid is gated with busy inside execute1
- if l_in.valid = '1' then
- v.mode_32bit := l_in.mode_32bit;
- v.load := '0';
- v.dcbz := '0';
- v.tlbie := '0';
- v.instr_fault := '0';
- v.align_intr := '0';
- v.dwords_done := '0';
- v.last_dword := '1';
- v.write_reg := l_in.write_reg;
- v.length := l_in.length;
- v.byte_reverse := l_in.byte_reverse;
- v.sign_extend := l_in.sign_extend;
- v.update := l_in.update;
- v.update_reg := l_in.update_reg;
- v.xerc := l_in.xerc;
- v.reserve := l_in.reserve;
- v.rc := l_in.rc;
- v.nc := l_in.ci;
- v.virt_mode := l_in.virt_mode;
- v.priv_mode := l_in.priv_mode;
- v.load_sp := '0';
- v.wait_dcache := '0';
- v.wait_mmu := '0';
- v.do_update := '0';
- v.extra_cycle := '0';
-
- if HAS_FPU and l_in.is_32bit = '1' then
- v.store_data := x"00000000" & store_sp_data;
- else
- v.store_data := l_in.data;
- end if;
-
- addr := lsu_sum;
- if l_in.second = '1' then
- -- for the second half of a 16-byte transfer, use next_addr
- addr := next_addr;
- end if;
- if l_in.mode_32bit = '1' then
- addr(63 downto 32) := (others => '0');
- end if;
- v.addr := addr;
- maddr := l_in.addr2; -- address from RB for tlbie
-
- -- XXX Temporary hack. Mark the op as non-cachable if the address
- -- is the form 0xc------- for a real-mode access.
- if addr(31 downto 28) = "1100" and l_in.virt_mode = '0' then
- v.nc := '1';
- end if;
-
- if l_in.second = '0' then
- -- Do length_to_sel and work out if we are doing 2 dwords
- long_sel := xfer_data_sel(l_in.length, lsu_sum(2 downto 0));
- byte_sel := long_sel(7 downto 0);
- v.first_bytes := byte_sel;
- v.second_bytes := long_sel(15 downto 8);
- else
- byte_sel := r.first_bytes;
- long_sel := r.second_bytes & r.first_bytes;
- end if;
-
- -- check alignment for larx/stcx
- misaligned := or (std_ulogic_vector(unsigned(l_in.length(2 downto 0)) - 1) and addr(2 downto 0));
- v.align_intr := l_in.reserve and misaligned;
- if l_in.repeat = '1' and l_in.second = '0' and addr(3) = '1' then
- -- length is really 16 not 8
- -- Make misaligned lq cause an alignment interrupt in LE mode,
- -- in order to avoid the case with RA = RT + 1 where the second half
- -- faults but the first doesn't (and updates RT+1, destroying RA).
- -- The equivalent BE case doesn't occur because RA = RT is illegal.
- misaligned := '1';
- if l_in.reserve = '1' or (l_in.op = OP_LOAD and l_in.byte_reverse = '0') then
- v.align_intr := '1';
+ -- generate DSI or DSegI for load/store exceptions
+ -- or ISI or ISegI for instruction fetch exceptions
+ v.interrupt := exception;
+ if exception = '1' then
+ v.nia := r2.req.nia;
+ if r2.req.align_intr = '1' then
+ v.intr_vec := 16#600#;
+ v.dar := r2.req.addr;
+ elsif r2.req.instr_fault = '0' then
+ v.dar := r2.req.addr;
+ if m_in.segerr = '0' then
+ v.intr_vec := 16#300#;
+ v.dsisr := dsisr;
+ else
+ v.intr_vec := 16#380#;
end if;
- end if;
-
- v.atomic := not misaligned;
- v.atomic_last := not misaligned and (l_in.second or not l_in.repeat);
-
- case l_in.op is
- when OP_STORE =>
- req := '1';
- when OP_LOAD =>
- req := '1';
- v.load := '1';
- -- Allow an extra cycle for RA update on loads
- v.extra_cycle := l_in.update;
- if HAS_FPU and l_in.is_32bit = '1' then
- -- Allow an extra cycle for SP->DP precision conversion
- v.load_sp := '1';
- v.extra_cycle := '1';
- end if;
- when OP_DCBZ =>
- v.align_intr := v.nc;
- req := '1';
- v.dcbz := '1';
- when OP_TLBIE =>
- mmureq := '1';
- v.tlbie := '1';
- v.state := TLBIE_WAIT;
- v.wait_mmu := '1';
- when OP_MFSPR =>
- v.wr_sel := "00";
- -- partial decode on SPR number should be adequate given
- -- the restricted set that get sent down this path
- if sprn(9) = '0' and sprn(5) = '0' then
- if sprn(0) = '0' then
- v.sprval := x"00000000" & r.dsisr;
- else
- v.sprval := r.dar;
- end if;
- else
- -- reading one of the SPRs in the MMU
- v.sprval := m_in.sprval;
- end if;
- v.state := COMPLETE;
- when OP_MTSPR =>
- if sprn(9) = '0' and sprn(5) = '0' then
- if sprn(0) = '0' then
- v.dsisr := l_in.data(31 downto 0);
- else
- v.dar := l_in.data;
- end if;
- v.state := COMPLETE;
- else
- -- writing one of the SPRs in the MMU
- mmu_mtspr := '1';
- v.state := TLBIE_WAIT;
- v.wait_mmu := '1';
- end if;
- when OP_FETCH_FAILED =>
- -- send it to the MMU to do the radix walk
- maddr := l_in.nia;
- v.instr_fault := '1';
- mmureq := '1';
- v.state := MMU_LOOKUP;
- v.wait_mmu := '1';
- when others =>
- assert false report "unknown op sent to loadstore1";
- end case;
-
- if req = '1' then
- if v.align_intr = '1' then
- v.state := COMPLETE;
- elsif long_sel(15 downto 8) = "00000000" then
- v.state := ACK_WAIT;
+ else
+ if m_in.segerr = '0' then
+ v.srr1(47 - 33) := m_in.invalid;
+ v.srr1(47 - 35) := m_in.perm_error; -- noexec fault
+ v.srr1(47 - 44) := m_in.badtree;
+ v.srr1(47 - 45) := m_in.rc_error;
+ v.intr_vec := 16#400#;
else
- v.state := SECOND_REQ;
+ v.intr_vec := 16#480#;
end if;
end if;
-
- v.busy := req or mmureq or mmu_mtspr;
- end if;
-
- -- Work out controls for store formatting
- if l_in.valid = '1' then
- byte_offset := unsigned(lsu_sum(2 downto 0));
- byte_rev := l_in.byte_reverse;
- length := l_in.length;
- brev_lenm1 := "000";
- if byte_rev = '1' then
- brev_lenm1 := unsigned(length(2 downto 0)) - 1;
- end if;
- v.byte_offset := byte_offset;
- v.brev_mask := brev_lenm1;
- end if;
-
- -- Work out load formatter controls for next cycle
- byte_offset := unsigned(v.addr(2 downto 0));
- brev_lenm1 := "000";
- if v.byte_reverse = '1' then
- brev_lenm1 := unsigned(v.length(2 downto 0)) - 1;
end if;
- for i in 0 to 7 loop
- kk := ('0' & (to_unsigned(i, 3) xor brev_lenm1)) + ('0' & byte_offset);
- v.use_second(i) := kk(3);
- v.byte_index(i) := kk(2 downto 0);
- end loop;
-
- for i in 0 to 7 loop
- if i < to_integer(unsigned(v.length)) then
- if v.dwords_done = '1' then
- v.trim_ctl(i) := '1' & not v.use_second(i);
- else
- v.trim_ctl(i) := "10";
- end if;
- else
- v.trim_ctl(i) := '0' & v.sign_extend;
- end if;
- end loop;
+ case r2.wr_sel is
+ when "00" =>
+ -- mfspr result
+ write_data := sprval;
+ when "01" =>
+ -- update reg
+ write_data := r2.req.addr0;
+ when "10" =>
+ -- lfs result
+ write_data := load_dp_data;
+ when others =>
+ -- load data
+ write_data := data_trimmed;
+ end case;
-- Update outputs to dcache
- d_out.valid <= req and not v.align_intr;
- d_out.load <= v.load;
- d_out.dcbz <= v.dcbz;
- d_out.nc <= v.nc;
- d_out.reserve <= v.reserve;
- d_out.atomic <= v.atomic;
- d_out.atomic_last <= v.atomic_last;
- d_out.addr <= addr;
- d_out.data <= store_data;
- d_out.byte_sel <= byte_sel;
- d_out.virt_mode <= v.virt_mode;
- d_out.priv_mode <= v.priv_mode;
+ if stage1_issue_enable = '1' then
+ d_out.valid <= stage1_dcreq;
+ d_out.load <= stage1_req.load;
+ d_out.dcbz <= stage1_req.dcbz;
+ d_out.nc <= stage1_req.nc;
+ d_out.reserve <= stage1_req.reserve;
+ d_out.atomic <= stage1_req.atomic;
+ d_out.atomic_last <= stage1_req.atomic_last;
+ d_out.addr <= stage1_req.addr;
+ d_out.byte_sel <= stage1_req.byte_sel;
+ d_out.virt_mode <= stage1_req.virt_mode;
+ d_out.priv_mode <= stage1_req.priv_mode;
+ else
+ d_out.valid <= req;
+ d_out.load <= r2.req.load;
+ d_out.dcbz <= r2.req.dcbz;
+ d_out.nc <= r2.req.nc;
+ d_out.reserve <= r2.req.reserve;
+ d_out.atomic <= r2.req.atomic;
+ d_out.atomic_last <= r2.req.atomic_last;
+ d_out.addr <= r2.req.addr;
+ d_out.byte_sel <= r2.req.byte_sel;
+ d_out.virt_mode <= r2.req.virt_mode;
+ d_out.priv_mode <= r2.req.priv_mode;
+ end if;
+ if stage1_dreq = '1' then
+ d_out.data <= store_data;
+ else
+ d_out.data <= r2.req.store_data;
+ end if;
+ d_out.hold <= r2.req.valid and r2.req.load_sp and d_in.valid;
-- Update outputs to MMU
m_out.valid <= mmureq;
- m_out.iside <= v.instr_fault;
- m_out.load <= r.load;
- m_out.priv <= r.priv_mode;
- m_out.tlbie <= v.tlbie;
+ m_out.iside <= r2.req.instr_fault;
+ m_out.load <= r2.req.load;
+ m_out.priv <= r2.req.priv_mode;
+ m_out.tlbie <= r2.req.tlbie;
m_out.mtspr <= mmu_mtspr;
- m_out.sprn <= sprn;
- m_out.addr <= maddr;
- m_out.slbia <= l_in.insn(7);
- m_out.rs <= l_in.data;
+ m_out.sprn <= r2.req.sprn;
+ m_out.addr <= r2.req.addr;
+ m_out.slbia <= r2.req.is_slbia;
+ m_out.rs <= r2.req.store_data;
-- Update outputs to writeback
- -- Multiplex either cache data to the destination GPR or
- -- the address for the rA update.
- l_out.valid <= done;
- case r.wr_sel is
- when "00" =>
- l_out.write_enable <= '1';
- l_out.write_reg <= r.write_reg;
- l_out.write_data <= r.sprval;
- when "01" =>
- l_out.write_enable <= do_update;
- l_out.write_reg <= gpr_to_gspr(r.update_reg);
- l_out.write_data <= r.addr;
- when "10" =>
- l_out.write_enable <= '1';
- l_out.write_reg <= r.write_reg;
- l_out.write_data <= load_dp_data;
- when others =>
- l_out.write_enable <= write_enable;
- l_out.write_reg <= r.write_reg;
- l_out.write_data <= data_trimmed;
- end case;
- l_out.xerc <= r.xerc;
- l_out.rc <= r.rc and done;
+ l_out.valid <= complete;
+ l_out.instr_tag <= r2.req.instr_tag;
+ l_out.write_enable <= write_enable or do_update;
+ l_out.write_reg <= r2.req.write_reg;
+ l_out.write_data <= write_data;
+ l_out.xerc <= r2.req.xerc;
+ l_out.rc <= r2.req.rc and complete;
l_out.store_done <= d_in.store_done;
+ l_out.interrupt <= r3.interrupt;
+ l_out.intr_vec <= r3.intr_vec;
+ l_out.srr0 <= r3.nia;
+ l_out.srr1 <= r3.srr1;
- -- update exception info back to execute1
+ -- update busy signal back to execute1
e_out.busy <= busy;
- e_out.exception <= exception;
- e_out.alignment <= r.align_intr;
- e_out.instr_fault <= r.instr_fault;
- e_out.invalid <= m_in.invalid;
- e_out.badtree <= m_in.badtree;
- e_out.perm_error <= m_in.perm_error;
- e_out.rc_error <= m_in.rc_error;
- e_out.segment_fault <= m_in.segerr;
- if exception = '1' and r.instr_fault = '0' then
- v.dar := addr;
- if m_in.segerr = '0' and r.align_intr = '0' then
- v.dsisr := dsisr;
- end if;
- end if;
+ e_out.in_progress <= in_progress;
+
+ -- Busy calculation.
+ stage3_busy_next <= r2.req.valid and not (complete or part_done or exception);
-- Update registers
- rin <= v;
+ r3in <= v;
end process;
begin
if rising_edge(clk) then
log_data <= e_out.busy &
- e_out.exception &
+ l_out.interrupt &
l_out.valid &
m_out.valid &
d_out.valid &
m_in.done &
- r.dwords_done &
- std_ulogic_vector(to_unsigned(state_t'pos(r.state), 3));
+ r2.req.dword_index &
+ std_ulogic_vector(to_unsigned(state_t'pos(r3.state), 3));
end if;
end process;
log_out <= log_data;
- sim_console.vhdl
- logical.vhdl
- countzero.vhdl
- - gpr_hazard.vhdl
- - cr_hazard.vhdl
- control.vhdl
- execute1.vhdl
- fpu.vhdl
entity writeback is
port (
clk : in std_ulogic;
+ rst : in std_ulogic;
e_in : in Execute1ToWritebackType;
l_in : in Loadstore1ToWritebackType;
w_out : out WritebackToRegisterFileType;
c_out : out WritebackToCrFileType;
+ f_out : out WritebackToFetch1Type;
- complete_out : out std_ulogic
+ flush_out : out std_ulogic;
+ interrupt_out: out std_ulogic;
+ complete_out : out instr_tag_t
);
end entity writeback;
architecture behaviour of writeback is
+ type irq_state_t is (WRITE_SRR0, WRITE_SRR1);
+
+ type reg_type is record
+ state : irq_state_t;
+ srr1 : std_ulogic_vector(63 downto 0);
+ end record;
+
+ signal r, rin : reg_type;
+
begin
writeback_0: process(clk)
variable x : std_ulogic_vector(0 downto 0);
variable w : std_ulogic_vector(0 downto 0);
begin
if rising_edge(clk) then
+ if rst = '1' then
+ r.state <= WRITE_SRR0;
+ r.srr1 <= (others => '0');
+ else
+ r <= rin;
+ end if;
+
-- Do consistency checks only on the clock edge
x(0) := e_in.valid;
y(0) := l_in.valid;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) +
to_integer(unsigned(w))) <= 1 severity failure;
- x(0) := e_in.write_enable or e_in.exc_write_enable;
+ x(0) := e_in.write_enable;
y(0) := l_in.write_enable;
w(0) := fp_in.write_enable;
assert (to_integer(unsigned(x)) + to_integer(unsigned(y)) +
y(0) := fp_in.write_cr_enable;
assert (to_integer(unsigned(w)) + to_integer(unsigned(x)) +
to_integer(unsigned(y))) <= 1 severity failure;
+
+ assert not (e_in.valid = '1' and e_in.instr_tag.valid = '0') severity failure;
+ assert not (l_in.valid = '1' and l_in.instr_tag.valid = '0') severity failure;
+ assert not (fp_in.valid = '1' and fp_in.instr_tag.valid = '0') severity failure;
end if;
end process;
writeback_1: process(all)
+ variable v : reg_type;
+ variable f : WritebackToFetch1Type;
variable cf: std_ulogic_vector(3 downto 0);
variable zero : std_ulogic;
variable sign : std_ulogic;
variable scf : std_ulogic_vector(3 downto 0);
+ variable vec : integer range 0 to 16#fff#;
+ variable srr1 : std_ulogic_vector(15 downto 0);
+ variable intr : std_ulogic;
begin
w_out <= WritebackToRegisterFileInit;
c_out <= WritebackToCrFileInit;
-
- complete_out <= '0';
- if e_in.valid = '1' or l_in.valid = '1' or fp_in.valid = '1' then
- complete_out <= '1';
+ f := WritebackToFetch1Init;
+ interrupt_out <= '0';
+ vec := 0;
+ v := r;
+
+ complete_out <= instr_tag_init;
+ if e_in.valid = '1' then
+ complete_out <= e_in.instr_tag;
+ elsif l_in.valid = '1' then
+ complete_out <= l_in.instr_tag;
+ elsif fp_in.valid = '1' then
+ complete_out <= fp_in.instr_tag;
end if;
- if e_in.exc_write_enable = '1' then
- w_out.write_reg <= e_in.exc_write_reg;
- w_out.write_data <= e_in.exc_write_data;
+ intr := e_in.interrupt or l_in.interrupt or fp_in.interrupt;
+
+ if r.state = WRITE_SRR1 then
+ w_out.write_reg <= fast_spr_num(SPR_SRR1);
+ w_out.write_data <= r.srr1;
+ w_out.write_enable <= '1';
+ interrupt_out <= '1';
+ v.state := WRITE_SRR0;
+
+ elsif intr = '1' then
+ w_out.write_reg <= fast_spr_num(SPR_SRR0);
w_out.write_enable <= '1';
+ v.state := WRITE_SRR1;
+ srr1 := (others => '0');
+ if e_in.interrupt = '1' then
+ vec := e_in.intr_vec;
+ w_out.write_data <= e_in.last_nia;
+ srr1 := e_in.srr1;
+ elsif l_in.interrupt = '1' then
+ vec := l_in.intr_vec;
+ w_out.write_data <= l_in.srr0;
+ srr1 := l_in.srr1;
+ elsif fp_in.interrupt = '1' then
+ vec := fp_in.intr_vec;
+ w_out.write_data <= fp_in.srr0;
+ srr1 := fp_in.srr1;
+ end if;
+ v.srr1(63 downto 31) := e_in.msr(63 downto 31);
+ v.srr1(30 downto 27) := srr1(14 downto 11);
+ v.srr1(26 downto 22) := e_in.msr(26 downto 22);
+ v.srr1(21 downto 16) := srr1(5 downto 0);
+ v.srr1(15 downto 0) := e_in.msr(15 downto 0);
+
else
if e_in.write_enable = '1' then
w_out.write_reg <= e_in.write_reg;
c_out.write_cr_data(31 downto 28) <= cf;
end if;
end if;
+
+ -- Outputs to fetch1
+ f.redirect := e_in.redirect;
+ f.br_nia := e_in.last_nia;
+ f.br_last := e_in.br_last;
+ f.br_taken := e_in.br_taken;
+ if intr = '1' then
+ f.redirect := '1';
+ f.br_last := '0';
+ f.redirect_nia := std_ulogic_vector(to_unsigned(vec, 64));
+ f.virt_mode := '0';
+ f.priv_mode := '1';
+ -- XXX need an interrupt LE bit here, e.g. from LPCR
+ f.big_endian := '0';
+ f.mode_32bit := '0';
+ else
+ if e_in.abs_br = '1' then
+ f.redirect_nia := e_in.br_offset;
+ else
+ f.redirect_nia := std_ulogic_vector(unsigned(e_in.last_nia) + unsigned(e_in.br_offset));
+ end if;
+ -- send MSR[IR], ~MSR[PR], ~MSR[LE] and ~MSR[SF] up to fetch1
+ f.virt_mode := e_in.redir_mode(3);
+ f.priv_mode := e_in.redir_mode(2);
+ f.big_endian := e_in.redir_mode(1);
+ f.mode_32bit := e_in.redir_mode(0);
+ end if;
+
+ f_out <= f;
+ flush_out <= f_out.redirect;
+
+ rin <= v;
end process;
end;