soc: Drive uart1_irq to 0 when we don't have UART1
[microwatt.git] / fpu.vhdl
1 -- Floating-point unit for Microwatt
2
3 library ieee;
4 use ieee.std_logic_1164.all;
5 use ieee.numeric_std.all;
6
7 library work;
8 use work.insn_helpers.all;
9 use work.decode_types.all;
10 use work.crhelpers.all;
11 use work.helpers.all;
12 use work.common.all;
13
14 entity fpu is
15 port (
16 clk : in std_ulogic;
17 rst : in std_ulogic;
18
19 e_in : in Execute1toFPUType;
20 e_out : out FPUToExecute1Type;
21
22 w_out : out FPUToWritebackType
23 );
24 end entity fpu;
25
26 architecture behaviour of fpu is
27 type fp_number_class is (ZERO, FINITE, INFINITY, NAN);
28
29 constant EXP_BITS : natural := 13;
30
31 type fpu_reg_type is record
32 class : fp_number_class;
33 negative : std_ulogic;
34 exponent : signed(EXP_BITS-1 downto 0); -- unbiased
35 mantissa : std_ulogic_vector(63 downto 0); -- 10.54 format
36 end record;
37
38 type state_t is (IDLE,
39 DO_MCRFS, DO_MTFSB, DO_MTFSFI, DO_MFFS, DO_MTFSF,
40 DO_FMR, DO_FMRG, DO_FCMP, DO_FTDIV, DO_FTSQRT,
41 DO_FCFID, DO_FCTI,
42 DO_FRSP, DO_FRI,
43 DO_FADD, DO_FMUL, DO_FDIV, DO_FSQRT, DO_FMADD,
44 DO_FRE, DO_FRSQRTE,
45 DO_FSEL,
46 FRI_1,
47 ADD_1, ADD_SHIFT, ADD_2, ADD_3,
48 CMP_1, CMP_2,
49 MULT_1,
50 FMADD_1, FMADD_2, FMADD_3,
51 FMADD_4, FMADD_5, FMADD_6,
52 LOOKUP,
53 DIV_2, DIV_3, DIV_4, DIV_5, DIV_6,
54 FRE_1,
55 RSQRT_1,
56 FTDIV_1,
57 SQRT_1, SQRT_2, SQRT_3, SQRT_4,
58 SQRT_5, SQRT_6, SQRT_7, SQRT_8,
59 SQRT_9, SQRT_10, SQRT_11, SQRT_12,
60 INT_SHIFT, INT_ROUND, INT_ISHIFT,
61 INT_FINAL, INT_CHECK, INT_OFLOW,
62 FINISH, NORMALIZE,
63 ROUND_UFLOW, ROUND_OFLOW,
64 ROUNDING, ROUNDING_2, ROUNDING_3,
65 DENORM,
66 RENORM_A, RENORM_A2,
67 RENORM_B, RENORM_B2,
68 RENORM_C, RENORM_C2,
69 NAN_RESULT, EXC_RESULT);
70
71 type reg_type is record
72 state : state_t;
73 busy : std_ulogic;
74 instr_done : std_ulogic;
75 do_intr : std_ulogic;
76 op : insn_type_t;
77 insn : std_ulogic_vector(31 downto 0);
78 dest_fpr : gspr_index_t;
79 fe_mode : std_ulogic;
80 rc : std_ulogic;
81 is_cmp : std_ulogic;
82 single_prec : std_ulogic;
83 fpscr : std_ulogic_vector(31 downto 0);
84 a : fpu_reg_type;
85 b : fpu_reg_type;
86 c : fpu_reg_type;
87 r : std_ulogic_vector(63 downto 0); -- 10.54 format
88 s : std_ulogic_vector(55 downto 0); -- extended fraction
89 x : std_ulogic;
90 p : std_ulogic_vector(63 downto 0); -- 8.56 format
91 y : std_ulogic_vector(63 downto 0); -- 8.56 format
92 result_sign : std_ulogic;
93 result_class : fp_number_class;
94 result_exp : signed(EXP_BITS-1 downto 0);
95 shift : signed(EXP_BITS-1 downto 0);
96 writing_back : std_ulogic;
97 int_result : std_ulogic;
98 cr_result : std_ulogic_vector(3 downto 0);
99 cr_mask : std_ulogic_vector(7 downto 0);
100 old_exc : std_ulogic_vector(4 downto 0);
101 update_fprf : std_ulogic;
102 quieten_nan : std_ulogic;
103 tiny : std_ulogic;
104 denorm : std_ulogic;
105 round_mode : std_ulogic_vector(2 downto 0);
106 is_subtract : std_ulogic;
107 exp_cmp : std_ulogic;
108 madd_cmp : std_ulogic;
109 add_bsmall : std_ulogic;
110 is_multiply : std_ulogic;
111 is_sqrt : std_ulogic;
112 first : std_ulogic;
113 count : unsigned(1 downto 0);
114 doing_ftdiv : std_ulogic_vector(1 downto 0);
115 opsel_a : std_ulogic_vector(1 downto 0);
116 use_a : std_ulogic;
117 use_b : std_ulogic;
118 use_c : std_ulogic;
119 invalid : std_ulogic;
120 negate : std_ulogic;
121 longmask : std_ulogic;
122 end record;
123
124 type lookup_table is array(0 to 1023) of std_ulogic_vector(17 downto 0);
125
126 signal r, rin : reg_type;
127
128 signal fp_result : std_ulogic_vector(63 downto 0);
129 signal opsel_b : std_ulogic_vector(1 downto 0);
130 signal opsel_r : std_ulogic_vector(1 downto 0);
131 signal opsel_s : std_ulogic_vector(1 downto 0);
132 signal opsel_ainv : std_ulogic;
133 signal opsel_mask : std_ulogic;
134 signal opsel_binv : std_ulogic;
135 signal in_a : std_ulogic_vector(63 downto 0);
136 signal in_b : std_ulogic_vector(63 downto 0);
137 signal result : std_ulogic_vector(63 downto 0);
138 signal carry_in : std_ulogic;
139 signal lost_bits : std_ulogic;
140 signal r_hi_nz : std_ulogic;
141 signal r_lo_nz : std_ulogic;
142 signal s_nz : std_ulogic;
143 signal misc_sel : std_ulogic_vector(3 downto 0);
144 signal f_to_multiply : MultiplyInputType;
145 signal multiply_to_f : MultiplyOutputType;
146 signal msel_1 : std_ulogic_vector(1 downto 0);
147 signal msel_2 : std_ulogic_vector(1 downto 0);
148 signal msel_add : std_ulogic_vector(1 downto 0);
149 signal msel_inv : std_ulogic;
150 signal inverse_est : std_ulogic_vector(18 downto 0);
151
152 -- opsel values
153 constant AIN_R : std_ulogic_vector(1 downto 0) := "00";
154 constant AIN_A : std_ulogic_vector(1 downto 0) := "01";
155 constant AIN_B : std_ulogic_vector(1 downto 0) := "10";
156 constant AIN_C : std_ulogic_vector(1 downto 0) := "11";
157
158 constant BIN_ZERO : std_ulogic_vector(1 downto 0) := "00";
159 constant BIN_R : std_ulogic_vector(1 downto 0) := "01";
160 constant BIN_MASK : std_ulogic_vector(1 downto 0) := "10";
161 constant BIN_PS6 : std_ulogic_vector(1 downto 0) := "11";
162
163 constant RES_SUM : std_ulogic_vector(1 downto 0) := "00";
164 constant RES_SHIFT : std_ulogic_vector(1 downto 0) := "01";
165 constant RES_MULT : std_ulogic_vector(1 downto 0) := "10";
166 constant RES_MISC : std_ulogic_vector(1 downto 0) := "11";
167
168 constant S_ZERO : std_ulogic_vector(1 downto 0) := "00";
169 constant S_NEG : std_ulogic_vector(1 downto 0) := "01";
170 constant S_SHIFT : std_ulogic_vector(1 downto 0) := "10";
171 constant S_MULT : std_ulogic_vector(1 downto 0) := "11";
172
173 -- msel values
174 constant MUL1_A : std_ulogic_vector(1 downto 0) := "00";
175 constant MUL1_B : std_ulogic_vector(1 downto 0) := "01";
176 constant MUL1_Y : std_ulogic_vector(1 downto 0) := "10";
177 constant MUL1_R : std_ulogic_vector(1 downto 0) := "11";
178
179 constant MUL2_C : std_ulogic_vector(1 downto 0) := "00";
180 constant MUL2_LUT : std_ulogic_vector(1 downto 0) := "01";
181 constant MUL2_P : std_ulogic_vector(1 downto 0) := "10";
182 constant MUL2_R : std_ulogic_vector(1 downto 0) := "11";
183
184 constant MULADD_ZERO : std_ulogic_vector(1 downto 0) := "00";
185 constant MULADD_CONST : std_ulogic_vector(1 downto 0) := "01";
186 constant MULADD_A : std_ulogic_vector(1 downto 0) := "10";
187 constant MULADD_RS : std_ulogic_vector(1 downto 0) := "11";
188
189 -- Inverse lookup table, indexed by the top 8 fraction bits
190 -- The first 256 entries are the reciprocal (1/x) lookup table,
191 -- and the remaining 768 entries are the reciprocal square root table.
192 -- Output range is [0.5, 1) in 0.19 format, though the top
193 -- bit isn't stored since it is always 1.
194 -- Each output value is the inverse of the center of the input
195 -- range for the value, i.e. entry 0 is 1 / (1 + 1/512),
196 -- entry 1 is 1 / (1 + 3/512), etc.
197 signal inverse_table : lookup_table := (
198 -- 1/x lookup table
199 -- Unit bit is assumed to be 1, so input range is [1, 2)
200 18x"3fc01", 18x"3f411", 18x"3ec31", 18x"3e460", 18x"3dc9f", 18x"3d4ec", 18x"3cd49", 18x"3c5b5",
201 18x"3be2f", 18x"3b6b8", 18x"3af4f", 18x"3a7f4", 18x"3a0a7", 18x"39968", 18x"39237", 18x"38b14",
202 18x"383fe", 18x"37cf5", 18x"375f9", 18x"36f0a", 18x"36828", 18x"36153", 18x"35a8a", 18x"353ce",
203 18x"34d1e", 18x"3467a", 18x"33fe3", 18x"33957", 18x"332d7", 18x"32c62", 18x"325f9", 18x"31f9c",
204 18x"3194a", 18x"31303", 18x"30cc7", 18x"30696", 18x"30070", 18x"2fa54", 18x"2f443", 18x"2ee3d",
205 18x"2e841", 18x"2e250", 18x"2dc68", 18x"2d68b", 18x"2d0b8", 18x"2caee", 18x"2c52e", 18x"2bf79",
206 18x"2b9cc", 18x"2b429", 18x"2ae90", 18x"2a900", 18x"2a379", 18x"29dfb", 18x"29887", 18x"2931b",
207 18x"28db8", 18x"2885e", 18x"2830d", 18x"27dc4", 18x"27884", 18x"2734d", 18x"26e1d", 18x"268f6",
208 18x"263d8", 18x"25ec1", 18x"259b3", 18x"254ac", 18x"24fad", 18x"24ab7", 18x"245c8", 18x"240e1",
209 18x"23c01", 18x"23729", 18x"23259", 18x"22d90", 18x"228ce", 18x"22413", 18x"21f60", 18x"21ab4",
210 18x"2160f", 18x"21172", 18x"20cdb", 18x"2084b", 18x"203c2", 18x"1ff40", 18x"1fac4", 18x"1f64f",
211 18x"1f1e1", 18x"1ed79", 18x"1e918", 18x"1e4be", 18x"1e069", 18x"1dc1b", 18x"1d7d4", 18x"1d392",
212 18x"1cf57", 18x"1cb22", 18x"1c6f3", 18x"1c2ca", 18x"1bea7", 18x"1ba8a", 18x"1b672", 18x"1b261",
213 18x"1ae55", 18x"1aa50", 18x"1a64f", 18x"1a255", 18x"19e60", 18x"19a70", 18x"19686", 18x"192a2",
214 18x"18ec3", 18x"18ae9", 18x"18715", 18x"18345", 18x"17f7c", 18x"17bb7", 18x"177f7", 18x"1743d",
215 18x"17087", 18x"16cd7", 18x"1692c", 18x"16585", 18x"161e4", 18x"15e47", 18x"15ab0", 18x"1571d",
216 18x"1538e", 18x"15005", 18x"14c80", 18x"14900", 18x"14584", 18x"1420d", 18x"13e9b", 18x"13b2d",
217 18x"137c3", 18x"1345e", 18x"130fe", 18x"12da2", 18x"12a4a", 18x"126f6", 18x"123a7", 18x"1205c",
218 18x"11d15", 18x"119d2", 18x"11694", 18x"11359", 18x"11023", 18x"10cf1", 18x"109c2", 18x"10698",
219 18x"10372", 18x"10050", 18x"0fd31", 18x"0fa17", 18x"0f700", 18x"0f3ed", 18x"0f0de", 18x"0edd3",
220 18x"0eacb", 18x"0e7c7", 18x"0e4c7", 18x"0e1ca", 18x"0ded2", 18x"0dbdc", 18x"0d8eb", 18x"0d5fc",
221 18x"0d312", 18x"0d02b", 18x"0cd47", 18x"0ca67", 18x"0c78a", 18x"0c4b1", 18x"0c1db", 18x"0bf09",
222 18x"0bc3a", 18x"0b96e", 18x"0b6a5", 18x"0b3e0", 18x"0b11e", 18x"0ae5f", 18x"0aba3", 18x"0a8eb",
223 18x"0a636", 18x"0a383", 18x"0a0d4", 18x"09e28", 18x"09b80", 18x"098da", 18x"09637", 18x"09397",
224 18x"090fb", 18x"08e61", 18x"08bca", 18x"08936", 18x"086a5", 18x"08417", 18x"0818c", 18x"07f04",
225 18x"07c7e", 18x"079fc", 18x"0777c", 18x"074ff", 18x"07284", 18x"0700d", 18x"06d98", 18x"06b26",
226 18x"068b6", 18x"0664a", 18x"063e0", 18x"06178", 18x"05f13", 18x"05cb1", 18x"05a52", 18x"057f5",
227 18x"0559a", 18x"05342", 18x"050ed", 18x"04e9a", 18x"04c4a", 18x"049fc", 18x"047b0", 18x"04567",
228 18x"04321", 18x"040dd", 18x"03e9b", 18x"03c5c", 18x"03a1f", 18x"037e4", 18x"035ac", 18x"03376",
229 18x"03142", 18x"02f11", 18x"02ce2", 18x"02ab5", 18x"0288b", 18x"02663", 18x"0243d", 18x"02219",
230 18x"01ff7", 18x"01dd8", 18x"01bbb", 18x"019a0", 18x"01787", 18x"01570", 18x"0135b", 18x"01149",
231 18x"00f39", 18x"00d2a", 18x"00b1e", 18x"00914", 18x"0070c", 18x"00506", 18x"00302", 18x"00100",
232 -- 1/sqrt(x) lookup table
233 -- Input is in the range [1, 4), i.e. two bits to the left of the
234 -- binary point. Those 2 bits index the following 3 blocks of 256 values.
235 -- 1.0 ... 1.9999
236 18x"3fe00", 18x"3fa06", 18x"3f612", 18x"3f224", 18x"3ee3a", 18x"3ea58", 18x"3e67c", 18x"3e2a4",
237 18x"3ded2", 18x"3db06", 18x"3d73e", 18x"3d37e", 18x"3cfc2", 18x"3cc0a", 18x"3c85a", 18x"3c4ae",
238 18x"3c106", 18x"3bd64", 18x"3b9c8", 18x"3b630", 18x"3b29e", 18x"3af10", 18x"3ab86", 18x"3a802",
239 18x"3a484", 18x"3a108", 18x"39d94", 18x"39a22", 18x"396b6", 18x"3934e", 18x"38fea", 18x"38c8c",
240 18x"38932", 18x"385dc", 18x"3828a", 18x"37f3e", 18x"37bf6", 18x"378b2", 18x"37572", 18x"37236",
241 18x"36efe", 18x"36bca", 18x"3689a", 18x"36570", 18x"36248", 18x"35f26", 18x"35c06", 18x"358ea",
242 18x"355d4", 18x"352c0", 18x"34fb0", 18x"34ca4", 18x"3499c", 18x"34698", 18x"34398", 18x"3409c",
243 18x"33da2", 18x"33aac", 18x"337bc", 18x"334cc", 18x"331e2", 18x"32efc", 18x"32c18", 18x"32938",
244 18x"3265a", 18x"32382", 18x"320ac", 18x"31dd8", 18x"31b0a", 18x"3183e", 18x"31576", 18x"312b0",
245 18x"30fee", 18x"30d2e", 18x"30a74", 18x"307ba", 18x"30506", 18x"30254", 18x"2ffa4", 18x"2fcf8",
246 18x"2fa4e", 18x"2f7a8", 18x"2f506", 18x"2f266", 18x"2efca", 18x"2ed2e", 18x"2ea98", 18x"2e804",
247 18x"2e572", 18x"2e2e4", 18x"2e058", 18x"2ddce", 18x"2db48", 18x"2d8c6", 18x"2d646", 18x"2d3c8",
248 18x"2d14c", 18x"2ced4", 18x"2cc5e", 18x"2c9ea", 18x"2c77a", 18x"2c50c", 18x"2c2a2", 18x"2c038",
249 18x"2bdd2", 18x"2bb70", 18x"2b90e", 18x"2b6b0", 18x"2b454", 18x"2b1fa", 18x"2afa4", 18x"2ad4e",
250 18x"2aafc", 18x"2a8ac", 18x"2a660", 18x"2a414", 18x"2a1cc", 18x"29f86", 18x"29d42", 18x"29b00",
251 18x"298c2", 18x"29684", 18x"2944a", 18x"29210", 18x"28fda", 18x"28da6", 18x"28b74", 18x"28946",
252 18x"28718", 18x"284ec", 18x"282c4", 18x"2809c", 18x"27e78", 18x"27c56", 18x"27a34", 18x"27816",
253 18x"275fa", 18x"273e0", 18x"271c8", 18x"26fb0", 18x"26d9c", 18x"26b8a", 18x"2697a", 18x"2676c",
254 18x"26560", 18x"26356", 18x"2614c", 18x"25f46", 18x"25d42", 18x"25b40", 18x"2593e", 18x"25740",
255 18x"25542", 18x"25348", 18x"2514e", 18x"24f58", 18x"24d62", 18x"24b6e", 18x"2497c", 18x"2478c",
256 18x"2459e", 18x"243b0", 18x"241c6", 18x"23fde", 18x"23df6", 18x"23c10", 18x"23a2c", 18x"2384a",
257 18x"2366a", 18x"2348c", 18x"232ae", 18x"230d2", 18x"22efa", 18x"22d20", 18x"22b4a", 18x"22976",
258 18x"227a2", 18x"225d2", 18x"22402", 18x"22234", 18x"22066", 18x"21e9c", 18x"21cd2", 18x"21b0a",
259 18x"21944", 18x"2177e", 18x"215ba", 18x"213fa", 18x"21238", 18x"2107a", 18x"20ebc", 18x"20d00",
260 18x"20b46", 18x"2098e", 18x"207d6", 18x"20620", 18x"2046c", 18x"202b8", 18x"20108", 18x"1ff58",
261 18x"1fda8", 18x"1fbfc", 18x"1fa50", 18x"1f8a4", 18x"1f6fc", 18x"1f554", 18x"1f3ae", 18x"1f208",
262 18x"1f064", 18x"1eec2", 18x"1ed22", 18x"1eb82", 18x"1e9e4", 18x"1e846", 18x"1e6aa", 18x"1e510",
263 18x"1e378", 18x"1e1e0", 18x"1e04a", 18x"1deb4", 18x"1dd20", 18x"1db8e", 18x"1d9fc", 18x"1d86c",
264 18x"1d6de", 18x"1d550", 18x"1d3c4", 18x"1d238", 18x"1d0ae", 18x"1cf26", 18x"1cd9e", 18x"1cc18",
265 18x"1ca94", 18x"1c910", 18x"1c78c", 18x"1c60a", 18x"1c48a", 18x"1c30c", 18x"1c18e", 18x"1c010",
266 18x"1be94", 18x"1bd1a", 18x"1bba0", 18x"1ba28", 18x"1b8b2", 18x"1b73c", 18x"1b5c6", 18x"1b452",
267 18x"1b2e0", 18x"1b16e", 18x"1affe", 18x"1ae8e", 18x"1ad20", 18x"1abb4", 18x"1aa46", 18x"1a8dc",
268 -- 2.0 ... 2.9999
269 18x"1a772", 18x"1a608", 18x"1a4a0", 18x"1a33a", 18x"1a1d4", 18x"1a070", 18x"19f0c", 18x"19da8",
270 18x"19c48", 18x"19ae6", 18x"19986", 18x"19828", 18x"196ca", 18x"1956e", 18x"19412", 18x"192b8",
271 18x"1915e", 18x"19004", 18x"18eae", 18x"18d56", 18x"18c00", 18x"18aac", 18x"18958", 18x"18804",
272 18x"186b2", 18x"18562", 18x"18412", 18x"182c2", 18x"18174", 18x"18026", 18x"17eda", 18x"17d8e",
273 18x"17c44", 18x"17afa", 18x"179b2", 18x"1786a", 18x"17724", 18x"175de", 18x"17498", 18x"17354",
274 18x"17210", 18x"170ce", 18x"16f8c", 18x"16e4c", 18x"16d0c", 18x"16bcc", 18x"16a8e", 18x"16950",
275 18x"16814", 18x"166d8", 18x"1659e", 18x"16464", 18x"1632a", 18x"161f2", 18x"160ba", 18x"15f84",
276 18x"15e4e", 18x"15d1a", 18x"15be6", 18x"15ab2", 18x"15980", 18x"1584e", 18x"1571c", 18x"155ec",
277 18x"154bc", 18x"1538e", 18x"15260", 18x"15134", 18x"15006", 18x"14edc", 18x"14db0", 18x"14c86",
278 18x"14b5e", 18x"14a36", 18x"1490e", 18x"147e6", 18x"146c0", 18x"1459a", 18x"14476", 18x"14352",
279 18x"14230", 18x"1410c", 18x"13fea", 18x"13eca", 18x"13daa", 18x"13c8a", 18x"13b6c", 18x"13a4e",
280 18x"13930", 18x"13814", 18x"136f8", 18x"135dc", 18x"134c2", 18x"133a8", 18x"1328e", 18x"13176",
281 18x"1305e", 18x"12f48", 18x"12e30", 18x"12d1a", 18x"12c06", 18x"12af2", 18x"129de", 18x"128ca",
282 18x"127b8", 18x"126a6", 18x"12596", 18x"12486", 18x"12376", 18x"12266", 18x"12158", 18x"1204a",
283 18x"11f3e", 18x"11e32", 18x"11d26", 18x"11c1a", 18x"11b10", 18x"11a06", 18x"118fc", 18x"117f4",
284 18x"116ec", 18x"115e4", 18x"114de", 18x"113d8", 18x"112d2", 18x"111ce", 18x"110ca", 18x"10fc6",
285 18x"10ec2", 18x"10dc0", 18x"10cbe", 18x"10bbc", 18x"10abc", 18x"109bc", 18x"108bc", 18x"107be",
286 18x"106c0", 18x"105c2", 18x"104c4", 18x"103c8", 18x"102cc", 18x"101d0", 18x"100d6", 18x"0ffdc",
287 18x"0fee2", 18x"0fdea", 18x"0fcf0", 18x"0fbf8", 18x"0fb02", 18x"0fa0a", 18x"0f914", 18x"0f81e",
288 18x"0f72a", 18x"0f636", 18x"0f542", 18x"0f44e", 18x"0f35a", 18x"0f268", 18x"0f176", 18x"0f086",
289 18x"0ef94", 18x"0eea4", 18x"0edb4", 18x"0ecc6", 18x"0ebd6", 18x"0eae8", 18x"0e9fa", 18x"0e90e",
290 18x"0e822", 18x"0e736", 18x"0e64a", 18x"0e55e", 18x"0e474", 18x"0e38a", 18x"0e2a0", 18x"0e1b8",
291 18x"0e0d0", 18x"0dfe8", 18x"0df00", 18x"0de1a", 18x"0dd32", 18x"0dc4c", 18x"0db68", 18x"0da82",
292 18x"0d99e", 18x"0d8ba", 18x"0d7d6", 18x"0d6f4", 18x"0d612", 18x"0d530", 18x"0d44e", 18x"0d36c",
293 18x"0d28c", 18x"0d1ac", 18x"0d0cc", 18x"0cfee", 18x"0cf0e", 18x"0ce30", 18x"0cd54", 18x"0cc76",
294 18x"0cb9a", 18x"0cabc", 18x"0c9e0", 18x"0c906", 18x"0c82a", 18x"0c750", 18x"0c676", 18x"0c59c",
295 18x"0c4c4", 18x"0c3ea", 18x"0c312", 18x"0c23a", 18x"0c164", 18x"0c08c", 18x"0bfb6", 18x"0bee0",
296 18x"0be0a", 18x"0bd36", 18x"0bc62", 18x"0bb8c", 18x"0baba", 18x"0b9e6", 18x"0b912", 18x"0b840",
297 18x"0b76e", 18x"0b69c", 18x"0b5cc", 18x"0b4fa", 18x"0b42a", 18x"0b35a", 18x"0b28a", 18x"0b1bc",
298 18x"0b0ee", 18x"0b01e", 18x"0af50", 18x"0ae84", 18x"0adb6", 18x"0acea", 18x"0ac1e", 18x"0ab52",
299 18x"0aa86", 18x"0a9bc", 18x"0a8f0", 18x"0a826", 18x"0a75c", 18x"0a694", 18x"0a5ca", 18x"0a502",
300 18x"0a43a", 18x"0a372", 18x"0a2aa", 18x"0a1e4", 18x"0a11c", 18x"0a056", 18x"09f90", 18x"09ecc",
301 -- 3.0 ... 3.9999
302 18x"09e06", 18x"09d42", 18x"09c7e", 18x"09bba", 18x"09af6", 18x"09a32", 18x"09970", 18x"098ae",
303 18x"097ec", 18x"0972a", 18x"09668", 18x"095a8", 18x"094e8", 18x"09426", 18x"09368", 18x"092a8",
304 18x"091e8", 18x"0912a", 18x"0906c", 18x"08fae", 18x"08ef0", 18x"08e32", 18x"08d76", 18x"08cba",
305 18x"08bfe", 18x"08b42", 18x"08a86", 18x"089ca", 18x"08910", 18x"08856", 18x"0879c", 18x"086e2",
306 18x"08628", 18x"08570", 18x"084b6", 18x"083fe", 18x"08346", 18x"0828e", 18x"081d8", 18x"08120",
307 18x"0806a", 18x"07fb4", 18x"07efe", 18x"07e48", 18x"07d92", 18x"07cde", 18x"07c2a", 18x"07b76",
308 18x"07ac2", 18x"07a0e", 18x"0795a", 18x"078a8", 18x"077f4", 18x"07742", 18x"07690", 18x"075de",
309 18x"0752e", 18x"0747c", 18x"073cc", 18x"0731c", 18x"0726c", 18x"071bc", 18x"0710c", 18x"0705e",
310 18x"06fae", 18x"06f00", 18x"06e52", 18x"06da4", 18x"06cf6", 18x"06c4a", 18x"06b9c", 18x"06af0",
311 18x"06a44", 18x"06998", 18x"068ec", 18x"06840", 18x"06796", 18x"066ea", 18x"06640", 18x"06596",
312 18x"064ec", 18x"06442", 18x"0639a", 18x"062f0", 18x"06248", 18x"061a0", 18x"060f8", 18x"06050",
313 18x"05fa8", 18x"05f00", 18x"05e5a", 18x"05db4", 18x"05d0e", 18x"05c68", 18x"05bc2", 18x"05b1c",
314 18x"05a76", 18x"059d2", 18x"0592e", 18x"05888", 18x"057e4", 18x"05742", 18x"0569e", 18x"055fa",
315 18x"05558", 18x"054b6", 18x"05412", 18x"05370", 18x"052ce", 18x"0522e", 18x"0518c", 18x"050ec",
316 18x"0504a", 18x"04faa", 18x"04f0a", 18x"04e6a", 18x"04dca", 18x"04d2c", 18x"04c8c", 18x"04bee",
317 18x"04b50", 18x"04ab0", 18x"04a12", 18x"04976", 18x"048d8", 18x"0483a", 18x"0479e", 18x"04700",
318 18x"04664", 18x"045c8", 18x"0452c", 18x"04490", 18x"043f6", 18x"0435a", 18x"042c0", 18x"04226",
319 18x"0418a", 18x"040f0", 18x"04056", 18x"03fbe", 18x"03f24", 18x"03e8c", 18x"03df2", 18x"03d5a",
320 18x"03cc2", 18x"03c2a", 18x"03b92", 18x"03afa", 18x"03a62", 18x"039cc", 18x"03934", 18x"0389e",
321 18x"03808", 18x"03772", 18x"036dc", 18x"03646", 18x"035b2", 18x"0351c", 18x"03488", 18x"033f2",
322 18x"0335e", 18x"032ca", 18x"03236", 18x"031a2", 18x"03110", 18x"0307c", 18x"02fea", 18x"02f56",
323 18x"02ec4", 18x"02e32", 18x"02da0", 18x"02d0e", 18x"02c7c", 18x"02bec", 18x"02b5a", 18x"02aca",
324 18x"02a38", 18x"029a8", 18x"02918", 18x"02888", 18x"027f8", 18x"0276a", 18x"026da", 18x"0264a",
325 18x"025bc", 18x"0252e", 18x"024a0", 18x"02410", 18x"02384", 18x"022f6", 18x"02268", 18x"021da",
326 18x"0214e", 18x"020c0", 18x"02034", 18x"01fa8", 18x"01f1c", 18x"01e90", 18x"01e04", 18x"01d78",
327 18x"01cee", 18x"01c62", 18x"01bd8", 18x"01b4c", 18x"01ac2", 18x"01a38", 18x"019ae", 18x"01924",
328 18x"0189c", 18x"01812", 18x"01788", 18x"01700", 18x"01676", 18x"015ee", 18x"01566", 18x"014de",
329 18x"01456", 18x"013ce", 18x"01346", 18x"012c0", 18x"01238", 18x"011b2", 18x"0112c", 18x"010a4",
330 18x"0101e", 18x"00f98", 18x"00f12", 18x"00e8c", 18x"00e08", 18x"00d82", 18x"00cfe", 18x"00c78",
331 18x"00bf4", 18x"00b70", 18x"00aec", 18x"00a68", 18x"009e4", 18x"00960", 18x"008dc", 18x"00858",
332 18x"007d6", 18x"00752", 18x"006d0", 18x"0064e", 18x"005cc", 18x"0054a", 18x"004c8", 18x"00446",
333 18x"003c4", 18x"00342", 18x"002c2", 18x"00240", 18x"001c0", 18x"00140", 18x"000c0", 18x"00040"
334 );
335
336 -- Left and right shifter with 120 bit input and 64 bit output.
337 -- Shifts inp left by shift bits and returns the upper 64 bits of
338 -- the result. The shift parameter is interpreted as a signed
339 -- number in the range -64..63, with negative values indicating
340 -- right shifts.
341 function shifter_64(inp: std_ulogic_vector(119 downto 0);
342 shift: std_ulogic_vector(6 downto 0))
343 return std_ulogic_vector is
344 variable s1 : std_ulogic_vector(94 downto 0);
345 variable s2 : std_ulogic_vector(70 downto 0);
346 variable result : std_ulogic_vector(63 downto 0);
347 begin
348 case shift(6 downto 5) is
349 when "00" =>
350 s1 := inp(119 downto 25);
351 when "01" =>
352 s1 := inp(87 downto 0) & "0000000";
353 when "10" =>
354 s1 := x"0000000000000000" & inp(119 downto 89);
355 when others =>
356 s1 := x"00000000" & inp(119 downto 57);
357 end case;
358 case shift(4 downto 3) is
359 when "00" =>
360 s2 := s1(94 downto 24);
361 when "01" =>
362 s2 := s1(86 downto 16);
363 when "10" =>
364 s2 := s1(78 downto 8);
365 when others =>
366 s2 := s1(70 downto 0);
367 end case;
368 case shift(2 downto 0) is
369 when "000" =>
370 result := s2(70 downto 7);
371 when "001" =>
372 result := s2(69 downto 6);
373 when "010" =>
374 result := s2(68 downto 5);
375 when "011" =>
376 result := s2(67 downto 4);
377 when "100" =>
378 result := s2(66 downto 3);
379 when "101" =>
380 result := s2(65 downto 2);
381 when "110" =>
382 result := s2(64 downto 1);
383 when others =>
384 result := s2(63 downto 0);
385 end case;
386 return result;
387 end;
388
389 -- Generate a mask with 0-bits on the left and 1-bits on the right which
390 -- selects the bits will be lost in doing a right shift. The shift
391 -- parameter is the bottom 6 bits of a negative shift count,
392 -- indicating a right shift.
393 function right_mask(shift: unsigned(5 downto 0)) return std_ulogic_vector is
394 variable result: std_ulogic_vector(63 downto 0);
395 begin
396 result := (others => '0');
397 for i in 0 to 63 loop
398 if i >= shift then
399 result(63 - i) := '1';
400 end if;
401 end loop;
402 return result;
403 end;
404
405 -- Split a DP floating-point number into components and work out its class.
406 -- If is_int = 1, the input is considered an integer
407 function decode_dp(fpr: std_ulogic_vector(63 downto 0); is_int: std_ulogic) return fpu_reg_type is
408 variable r : fpu_reg_type;
409 variable exp_nz : std_ulogic;
410 variable exp_ao : std_ulogic;
411 variable frac_nz : std_ulogic;
412 variable cls : std_ulogic_vector(2 downto 0);
413 begin
414 r.negative := fpr(63);
415 exp_nz := or (fpr(62 downto 52));
416 exp_ao := and (fpr(62 downto 52));
417 frac_nz := or (fpr(51 downto 0));
418 if is_int = '0' then
419 r.exponent := signed(resize(unsigned(fpr(62 downto 52)), EXP_BITS)) - to_signed(1023, EXP_BITS);
420 if exp_nz = '0' then
421 r.exponent := to_signed(-1022, EXP_BITS);
422 end if;
423 r.mantissa := "000000000" & exp_nz & fpr(51 downto 0) & "00";
424 cls := exp_ao & exp_nz & frac_nz;
425 case cls is
426 when "000" => r.class := ZERO;
427 when "001" => r.class := FINITE; -- denormalized
428 when "010" => r.class := FINITE;
429 when "011" => r.class := FINITE;
430 when "110" => r.class := INFINITY;
431 when others => r.class := NAN;
432 end case;
433 else
434 r.mantissa := fpr;
435 r.exponent := (others => '0');
436 if (fpr(63) or exp_nz or frac_nz) = '1' then
437 r.class := FINITE;
438 else
439 r.class := ZERO;
440 end if;
441 end if;
442 return r;
443 end;
444
445 -- Construct a DP floating-point result from components
446 function pack_dp(sign: std_ulogic; class: fp_number_class; exp: signed(EXP_BITS-1 downto 0);
447 mantissa: std_ulogic_vector; single_prec: std_ulogic; quieten_nan: std_ulogic)
448 return std_ulogic_vector is
449 variable result : std_ulogic_vector(63 downto 0);
450 begin
451 result := (others => '0');
452 result(63) := sign;
453 case class is
454 when ZERO =>
455 when FINITE =>
456 if mantissa(54) = '1' then
457 -- normalized number
458 result(62 downto 52) := std_ulogic_vector(resize(exp, 11) + 1023);
459 end if;
460 result(51 downto 29) := mantissa(53 downto 31);
461 if single_prec = '0' then
462 result(28 downto 0) := mantissa(30 downto 2);
463 end if;
464 when INFINITY =>
465 result(62 downto 52) := "11111111111";
466 when NAN =>
467 result(62 downto 52) := "11111111111";
468 result(51) := quieten_nan or mantissa(53);
469 result(50 downto 29) := mantissa(52 downto 31);
470 if single_prec = '0' then
471 result(28 downto 0) := mantissa(30 downto 2);
472 end if;
473 end case;
474 return result;
475 end;
476
477 -- Determine whether to increment when rounding
478 -- Returns rounding_inc & inexact
479 -- Assumes x includes the bottom 29 bits of the mantissa already
480 -- if single_prec = 1 (usually arranged by setting set_x = 1 earlier).
481 function fp_rounding(mantissa: std_ulogic_vector(63 downto 0); x: std_ulogic;
482 single_prec: std_ulogic; rn: std_ulogic_vector(2 downto 0);
483 sign: std_ulogic)
484 return std_ulogic_vector is
485 variable grx : std_ulogic_vector(2 downto 0);
486 variable ret : std_ulogic_vector(1 downto 0);
487 variable lsb : std_ulogic;
488 begin
489 if single_prec = '0' then
490 grx := mantissa(1 downto 0) & x;
491 lsb := mantissa(2);
492 else
493 grx := mantissa(30 downto 29) & x;
494 lsb := mantissa(31);
495 end if;
496 ret(1) := '0';
497 ret(0) := or (grx);
498 case rn(1 downto 0) is
499 when "00" => -- round to nearest
500 if grx = "100" and rn(2) = '0' then
501 ret(1) := lsb; -- tie, round to even
502 else
503 ret(1) := grx(2);
504 end if;
505 when "01" => -- round towards zero
506 when others => -- round towards +/- inf
507 if rn(0) = sign then
508 -- round towards greater magnitude
509 ret(1) := ret(0);
510 end if;
511 end case;
512 return ret;
513 end;
514
515 -- Determine result flags to write into the FPSCR
516 function result_flags(sign: std_ulogic; class: fp_number_class; unitbit: std_ulogic)
517 return std_ulogic_vector is
518 begin
519 case class is
520 when ZERO =>
521 return sign & "0010";
522 when FINITE =>
523 return (not unitbit) & sign & (not sign) & "00";
524 when INFINITY =>
525 return '0' & sign & (not sign) & "01";
526 when NAN =>
527 return "10001";
528 end case;
529 end;
530
531 begin
532 fpu_multiply_0: entity work.multiply
533 port map (
534 clk => clk,
535 m_in => f_to_multiply,
536 m_out => multiply_to_f
537 );
538
539 fpu_0: process(clk)
540 begin
541 if rising_edge(clk) then
542 if rst = '1' then
543 r.state <= IDLE;
544 r.busy <= '0';
545 r.instr_done <= '0';
546 r.do_intr <= '0';
547 r.fpscr <= (others => '0');
548 r.writing_back <= '0';
549 else
550 assert not (r.state /= IDLE and e_in.valid = '1') severity failure;
551 r <= rin;
552 end if;
553 end if;
554 end process;
555
556 -- synchronous reads from lookup table
557 lut_access: process(clk)
558 variable addrhi : std_ulogic_vector(1 downto 0);
559 variable addr : std_ulogic_vector(9 downto 0);
560 begin
561 if rising_edge(clk) then
562 if r.is_sqrt = '1' then
563 addrhi := r.b.mantissa(55 downto 54);
564 else
565 addrhi := "00";
566 end if;
567 addr := addrhi & r.b.mantissa(53 downto 46);
568 inverse_est <= '1' & inverse_table(to_integer(unsigned(addr)));
569 end if;
570 end process;
571
572 e_out.busy <= r.busy;
573 e_out.exception <= r.fpscr(FPSCR_FEX);
574 e_out.interrupt <= r.do_intr;
575
576 w_out.valid <= r.instr_done and not r.do_intr;
577 w_out.write_enable <= r.writing_back;
578 w_out.write_reg <= r.dest_fpr;
579 w_out.write_data <= fp_result;
580 w_out.write_cr_enable <= r.instr_done and (r.rc or r.is_cmp);
581 w_out.write_cr_mask <= r.cr_mask;
582 w_out.write_cr_data <= r.cr_result & r.cr_result & r.cr_result & r.cr_result &
583 r.cr_result & r.cr_result & r.cr_result & r.cr_result;
584
585 fpu_1: process(all)
586 variable v : reg_type;
587 variable adec : fpu_reg_type;
588 variable bdec : fpu_reg_type;
589 variable cdec : fpu_reg_type;
590 variable fpscr_mask : std_ulogic_vector(31 downto 0);
591 variable illegal : std_ulogic;
592 variable j, k : integer;
593 variable flm : std_ulogic_vector(7 downto 0);
594 variable int_input : std_ulogic;
595 variable mask : std_ulogic_vector(63 downto 0);
596 variable in_a0 : std_ulogic_vector(63 downto 0);
597 variable in_b0 : std_ulogic_vector(63 downto 0);
598 variable misc : std_ulogic_vector(63 downto 0);
599 variable shift_res : std_ulogic_vector(63 downto 0);
600 variable round : std_ulogic_vector(1 downto 0);
601 variable update_fx : std_ulogic;
602 variable arith_done : std_ulogic;
603 variable invalid : std_ulogic;
604 variable zero_divide : std_ulogic;
605 variable mant_nz : std_ulogic;
606 variable min_exp : signed(EXP_BITS-1 downto 0);
607 variable max_exp : signed(EXP_BITS-1 downto 0);
608 variable bias_exp : signed(EXP_BITS-1 downto 0);
609 variable new_exp : signed(EXP_BITS-1 downto 0);
610 variable exp_tiny : std_ulogic;
611 variable exp_huge : std_ulogic;
612 variable renormalize : std_ulogic;
613 variable clz : std_ulogic_vector(5 downto 0);
614 variable set_x : std_ulogic;
615 variable mshift : signed(EXP_BITS-1 downto 0);
616 variable need_check : std_ulogic;
617 variable msb : std_ulogic;
618 variable is_add : std_ulogic;
619 variable set_a : std_ulogic;
620 variable set_b : std_ulogic;
621 variable set_c : std_ulogic;
622 variable set_y : std_ulogic;
623 variable set_s : std_ulogic;
624 variable qnan_result : std_ulogic;
625 variable px_nz : std_ulogic;
626 variable pcmpb_eq : std_ulogic;
627 variable pcmpb_lt : std_ulogic;
628 variable pshift : std_ulogic;
629 variable renorm_sqrt : std_ulogic;
630 variable sqrt_exp : signed(EXP_BITS-1 downto 0);
631 variable shiftin : std_ulogic;
632 variable mulexp : signed(EXP_BITS-1 downto 0);
633 variable maddend : std_ulogic_vector(127 downto 0);
634 variable sum : std_ulogic_vector(63 downto 0);
635 begin
636 v := r;
637 illegal := '0';
638 v.busy := '0';
639 int_input := '0';
640
641 -- capture incoming instruction
642 if e_in.valid = '1' then
643 v.insn := e_in.insn;
644 v.op := e_in.op;
645 v.fe_mode := or (e_in.fe_mode);
646 v.dest_fpr := e_in.frt;
647 v.single_prec := e_in.single;
648 v.longmask := e_in.single;
649 v.int_result := '0';
650 v.rc := e_in.rc;
651 v.is_cmp := e_in.out_cr;
652 if e_in.out_cr = '0' then
653 v.cr_mask := num_to_fxm(1);
654 else
655 v.cr_mask := num_to_fxm(to_integer(unsigned(insn_bf(e_in.insn))));
656 end if;
657 int_input := '0';
658 if e_in.op = OP_FPOP_I then
659 int_input := '1';
660 end if;
661 v.quieten_nan := '1';
662 v.tiny := '0';
663 v.denorm := '0';
664 v.round_mode := '0' & r.fpscr(FPSCR_RN+1 downto FPSCR_RN);
665 v.is_subtract := '0';
666 v.is_multiply := '0';
667 v.is_sqrt := '0';
668 v.add_bsmall := '0';
669 v.doing_ftdiv := "00";
670
671 adec := decode_dp(e_in.fra, int_input);
672 bdec := decode_dp(e_in.frb, int_input);
673 cdec := decode_dp(e_in.frc, int_input);
674 v.a := adec;
675 v.b := bdec;
676 v.c := cdec;
677
678 v.exp_cmp := '0';
679 if adec.exponent > bdec.exponent then
680 v.exp_cmp := '1';
681 end if;
682 v.madd_cmp := '0';
683 if (adec.exponent + cdec.exponent + 1) >= bdec.exponent then
684 v.madd_cmp := '1';
685 end if;
686 end if;
687
688 r_hi_nz <= or (r.r(55 downto 31));
689 r_lo_nz <= or (r.r(30 downto 2));
690 s_nz <= or (r.s);
691
692 if r.single_prec = '0' then
693 if r.doing_ftdiv(1) = '0' then
694 max_exp := to_signed(1023, EXP_BITS);
695 else
696 max_exp := to_signed(1020, EXP_BITS);
697 end if;
698 if r.doing_ftdiv(0) = '0' then
699 min_exp := to_signed(-1022, EXP_BITS);
700 else
701 min_exp := to_signed(-1021, EXP_BITS);
702 end if;
703 bias_exp := to_signed(1536, EXP_BITS);
704 else
705 max_exp := to_signed(127, EXP_BITS);
706 min_exp := to_signed(-126, EXP_BITS);
707 bias_exp := to_signed(192, EXP_BITS);
708 end if;
709 new_exp := r.result_exp - r.shift;
710 exp_tiny := '0';
711 exp_huge := '0';
712 if new_exp < min_exp then
713 exp_tiny := '1';
714 end if;
715 if new_exp > max_exp then
716 exp_huge := '1';
717 end if;
718
719 -- Compare P with zero and with B
720 px_nz := or (r.p(57 downto 4));
721 pcmpb_eq := '0';
722 if r.p(59 downto 4) = r.b.mantissa(55 downto 0) then
723 pcmpb_eq := '1';
724 end if;
725 pcmpb_lt := '0';
726 if unsigned(r.p(59 downto 4)) < unsigned(r.b.mantissa(55 downto 0)) then
727 pcmpb_lt := '1';
728 end if;
729
730 v.writing_back := '0';
731 v.instr_done := '0';
732 v.update_fprf := '0';
733 v.shift := to_signed(0, EXP_BITS);
734 v.first := '0';
735 v.opsel_a := AIN_R;
736 opsel_ainv <= '0';
737 opsel_mask <= '0';
738 opsel_b <= BIN_ZERO;
739 opsel_binv <= '0';
740 opsel_r <= RES_SUM;
741 opsel_s <= S_ZERO;
742 carry_in <= '0';
743 misc_sel <= "0000";
744 fpscr_mask := (others => '1');
745 update_fx := '0';
746 arith_done := '0';
747 invalid := '0';
748 zero_divide := '0';
749 renormalize := '0';
750 set_x := '0';
751 qnan_result := '0';
752 set_a := '0';
753 set_b := '0';
754 set_c := '0';
755 set_s := '0';
756 f_to_multiply.is_32bit <= '0';
757 f_to_multiply.valid <= '0';
758 msel_1 <= MUL1_A;
759 msel_2 <= MUL2_C;
760 msel_add <= MULADD_ZERO;
761 msel_inv <= '0';
762 set_y := '0';
763 pshift := '0';
764 renorm_sqrt := '0';
765 shiftin := '0';
766 case r.state is
767 when IDLE =>
768 v.use_a := '0';
769 v.use_b := '0';
770 v.use_c := '0';
771 v.invalid := '0';
772 v.negate := '0';
773 if e_in.valid = '1' then
774 case e_in.insn(5 downto 1) is
775 when "00000" =>
776 if e_in.insn(8) = '1' then
777 if e_in.insn(6) = '0' then
778 v.state := DO_FTDIV;
779 else
780 v.state := DO_FTSQRT;
781 end if;
782 elsif e_in.insn(7) = '1' then
783 v.state := DO_MCRFS;
784 else
785 v.opsel_a := AIN_B;
786 v.state := DO_FCMP;
787 end if;
788 when "00110" =>
789 if e_in.insn(10) = '0' then
790 if e_in.insn(8) = '0' then
791 v.state := DO_MTFSB;
792 else
793 v.state := DO_MTFSFI;
794 end if;
795 else
796 v.state := DO_FMRG;
797 end if;
798 when "00111" =>
799 if e_in.insn(8) = '0' then
800 v.state := DO_MFFS;
801 else
802 v.state := DO_MTFSF;
803 end if;
804 when "01000" =>
805 v.opsel_a := AIN_B;
806 if e_in.insn(9 downto 8) /= "11" then
807 v.state := DO_FMR;
808 else
809 v.state := DO_FRI;
810 end if;
811 when "01100" =>
812 v.opsel_a := AIN_B;
813 v.state := DO_FRSP;
814 when "01110" =>
815 v.opsel_a := AIN_B;
816 if int_input = '1' then
817 -- fcfid[u][s]
818 v.state := DO_FCFID;
819 else
820 v.state := DO_FCTI;
821 end if;
822 when "01111" =>
823 v.round_mode := "001";
824 v.opsel_a := AIN_B;
825 v.state := DO_FCTI;
826 when "10010" =>
827 v.opsel_a := AIN_A;
828 if v.b.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
829 v.opsel_a := AIN_B;
830 end if;
831 v.state := DO_FDIV;
832 when "10100" | "10101" =>
833 v.opsel_a := AIN_A;
834 v.state := DO_FADD;
835 when "10110" =>
836 v.is_sqrt := '1';
837 v.opsel_a := AIN_B;
838 v.state := DO_FSQRT;
839 when "10111" =>
840 v.state := DO_FSEL;
841 when "11000" =>
842 v.opsel_a := AIN_B;
843 v.state := DO_FRE;
844 when "11001" =>
845 v.is_multiply := '1';
846 v.opsel_a := AIN_A;
847 if v.c.mantissa(54) = '0' and v.a.mantissa(54) = '1' then
848 v.opsel_a := AIN_C;
849 end if;
850 v.state := DO_FMUL;
851 when "11010" =>
852 v.is_sqrt := '1';
853 v.opsel_a := AIN_B;
854 v.state := DO_FRSQRTE;
855 when "11100" | "11101" | "11110" | "11111" =>
856 if v.a.mantissa(54) = '0' then
857 v.opsel_a := AIN_A;
858 elsif v.c.mantissa(54) = '0' then
859 v.opsel_a := AIN_C;
860 else
861 v.opsel_a := AIN_B;
862 end if;
863 v.state := DO_FMADD;
864 when others =>
865 illegal := '1';
866 end case;
867 end if;
868 v.x := '0';
869 v.old_exc := r.fpscr(FPSCR_VX downto FPSCR_XX);
870 set_s := '1';
871
872 when DO_MCRFS =>
873 j := to_integer(unsigned(insn_bfa(r.insn)));
874 for i in 0 to 7 loop
875 if i = j then
876 k := (7 - i) * 4;
877 v.cr_result := r.fpscr(k + 3 downto k);
878 fpscr_mask(k + 3 downto k) := "0000";
879 end if;
880 end loop;
881 v.fpscr := r.fpscr and (fpscr_mask or x"6007F8FF");
882 v.instr_done := '1';
883 v.state := IDLE;
884
885 when DO_FTDIV =>
886 v.instr_done := '1';
887 v.state := IDLE;
888 v.cr_result := "0000";
889 if r.a.class = INFINITY or r.b.class = ZERO or r.b.class = INFINITY or
890 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
891 v.cr_result(2) := '1';
892 end if;
893 if r.a.class = NAN or r.a.class = INFINITY or
894 r.b.class = NAN or r.b.class = ZERO or r.b.class = INFINITY or
895 (r.a.class = FINITE and r.a.exponent <= to_signed(-970, EXP_BITS)) then
896 v.cr_result(1) := '1';
897 else
898 v.doing_ftdiv := "11";
899 v.first := '1';
900 v.state := FTDIV_1;
901 v.instr_done := '0';
902 end if;
903
904 when DO_FTSQRT =>
905 v.instr_done := '1';
906 v.state := IDLE;
907 v.cr_result := "0000";
908 if r.b.class = ZERO or r.b.class = INFINITY or
909 (r.b.class = FINITE and r.b.mantissa(53) = '0') then
910 v.cr_result(2) := '1';
911 end if;
912 if r.b.class = NAN or r.b.class = INFINITY or r.b.class = ZERO
913 or r.b.negative = '1' or r.b.exponent <= to_signed(-970, EXP_BITS) then
914 v.cr_result(1) := '0';
915 end if;
916
917 when DO_FCMP =>
918 -- fcmp[uo]
919 -- r.opsel_a = AIN_B
920 v.instr_done := '1';
921 v.state := IDLE;
922 update_fx := '1';
923 v.result_exp := r.b.exponent;
924 if (r.a.class = NAN and r.a.mantissa(53) = '0') or
925 (r.b.class = NAN and r.b.mantissa(53) = '0') then
926 -- Signalling NAN
927 v.fpscr(FPSCR_VXSNAN) := '1';
928 if r.insn(6) = '1' and r.fpscr(FPSCR_VE) = '0' then
929 v.fpscr(FPSCR_VXVC) := '1';
930 end if;
931 invalid := '1';
932 v.cr_result := "0001"; -- unordered
933 elsif r.a.class = NAN or r.b.class = NAN then
934 if r.insn(6) = '1' then
935 -- fcmpo
936 v.fpscr(FPSCR_VXVC) := '1';
937 invalid := '1';
938 end if;
939 v.cr_result := "0001"; -- unordered
940 elsif r.a.class = ZERO and r.b.class = ZERO then
941 v.cr_result := "0010"; -- equal
942 elsif r.a.negative /= r.b.negative then
943 v.cr_result := r.a.negative & r.b.negative & "00";
944 elsif r.a.class = ZERO then
945 -- A and B are the same sign from here down
946 v.cr_result := not r.b.negative & r.b.negative & "00";
947 elsif r.a.class = INFINITY then
948 if r.b.class = INFINITY then
949 v.cr_result := "0010";
950 else
951 v.cr_result := r.a.negative & not r.a.negative & "00";
952 end if;
953 elsif r.b.class = ZERO then
954 -- A is finite from here down
955 v.cr_result := r.a.negative & not r.a.negative & "00";
956 elsif r.b.class = INFINITY then
957 v.cr_result := not r.b.negative & r.b.negative & "00";
958 elsif r.exp_cmp = '1' then
959 -- A and B are both finite from here down
960 v.cr_result := r.a.negative & not r.a.negative & "00";
961 elsif r.a.exponent /= r.b.exponent then
962 -- A exponent is smaller than B
963 v.cr_result := not r.a.negative & r.a.negative & "00";
964 else
965 -- Prepare to subtract mantissas, put B in R
966 v.cr_result := "0000";
967 v.instr_done := '0';
968 v.opsel_a := AIN_A;
969 v.state := CMP_1;
970 end if;
971 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
972
973 when DO_MTFSB =>
974 -- mtfsb{0,1}
975 j := to_integer(unsigned(insn_bt(r.insn)));
976 for i in 0 to 31 loop
977 if i = j then
978 v.fpscr(31 - i) := r.insn(6);
979 end if;
980 end loop;
981 v.instr_done := '1';
982 v.state := IDLE;
983
984 when DO_MTFSFI =>
985 -- mtfsfi
986 j := to_integer(unsigned(insn_bf(r.insn)));
987 if r.insn(16) = '0' then
988 for i in 0 to 7 loop
989 if i = j then
990 k := (7 - i) * 4;
991 v.fpscr(k + 3 downto k) := insn_u(r.insn);
992 end if;
993 end loop;
994 end if;
995 v.instr_done := '1';
996 v.state := IDLE;
997
998 when DO_FMRG =>
999 -- fmrgew, fmrgow
1000 opsel_r <= RES_MISC;
1001 misc_sel <= "01" & r.insn(8) & '0';
1002 v.int_result := '1';
1003 v.writing_back := '1';
1004 v.instr_done := '1';
1005 v.state := IDLE;
1006
1007 when DO_MFFS =>
1008 v.int_result := '1';
1009 v.writing_back := '1';
1010 opsel_r <= RES_MISC;
1011 case r.insn(20 downto 16) is
1012 when "00000" =>
1013 -- mffs
1014 when "00001" =>
1015 -- mffsce
1016 v.fpscr(FPSCR_VE downto FPSCR_XE) := "00000";
1017 when "10100" | "10101" =>
1018 -- mffscdrn[i] (but we don't implement DRN)
1019 fpscr_mask := x"000000FF";
1020 when "10110" =>
1021 -- mffscrn
1022 fpscr_mask := x"000000FF";
1023 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) :=
1024 r.b.mantissa(FPSCR_RN+1 downto FPSCR_RN);
1025 when "10111" =>
1026 -- mffscrni
1027 fpscr_mask := x"000000FF";
1028 v.fpscr(FPSCR_RN+1 downto FPSCR_RN) := r.insn(12 downto 11);
1029 when "11000" =>
1030 -- mffsl
1031 fpscr_mask := x"0007F0FF";
1032 when others =>
1033 illegal := '1';
1034 end case;
1035 v.instr_done := '1';
1036 v.state := IDLE;
1037
1038 when DO_MTFSF =>
1039 if r.insn(25) = '1' then
1040 flm := x"FF";
1041 elsif r.insn(16) = '1' then
1042 flm := x"00";
1043 else
1044 flm := r.insn(24 downto 17);
1045 end if;
1046 for i in 0 to 7 loop
1047 k := i * 4;
1048 if flm(i) = '1' then
1049 v.fpscr(k + 3 downto k) := r.b.mantissa(k + 3 downto k);
1050 end if;
1051 end loop;
1052 v.instr_done := '1';
1053 v.state := IDLE;
1054
1055 when DO_FMR =>
1056 -- r.opsel_a = AIN_B
1057 v.result_class := r.b.class;
1058 v.result_exp := r.b.exponent;
1059 v.quieten_nan := '0';
1060 if r.insn(9) = '1' then
1061 v.result_sign := '0'; -- fabs
1062 elsif r.insn(8) = '1' then
1063 v.result_sign := '1'; -- fnabs
1064 elsif r.insn(7) = '1' then
1065 v.result_sign := r.b.negative; -- fmr
1066 elsif r.insn(6) = '1' then
1067 v.result_sign := not r.b.negative; -- fneg
1068 else
1069 v.result_sign := r.a.negative; -- fcpsgn
1070 end if;
1071 v.writing_back := '1';
1072 v.instr_done := '1';
1073 v.state := IDLE;
1074
1075 when DO_FRI => -- fri[nzpm]
1076 -- r.opsel_a = AIN_B
1077 v.result_class := r.b.class;
1078 v.result_sign := r.b.negative;
1079 v.result_exp := r.b.exponent;
1080 v.fpscr(FPSCR_FR) := '0';
1081 v.fpscr(FPSCR_FI) := '0';
1082 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1083 -- Signalling NAN
1084 v.fpscr(FPSCR_VXSNAN) := '1';
1085 invalid := '1';
1086 end if;
1087 if r.b.class = FINITE then
1088 if r.b.exponent >= to_signed(52, EXP_BITS) then
1089 -- integer already, no rounding required
1090 arith_done := '1';
1091 else
1092 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1093 v.state := FRI_1;
1094 v.round_mode := '1' & r.insn(7 downto 6);
1095 end if;
1096 else
1097 arith_done := '1';
1098 end if;
1099
1100 when DO_FRSP =>
1101 -- r.opsel_a = AIN_B, r.shift = 0
1102 v.result_class := r.b.class;
1103 v.result_sign := r.b.negative;
1104 v.result_exp := r.b.exponent;
1105 v.fpscr(FPSCR_FR) := '0';
1106 v.fpscr(FPSCR_FI) := '0';
1107 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1108 -- Signalling NAN
1109 v.fpscr(FPSCR_VXSNAN) := '1';
1110 invalid := '1';
1111 end if;
1112 set_x := '1';
1113 if r.b.class = FINITE then
1114 if r.b.exponent < to_signed(-126, EXP_BITS) then
1115 v.shift := r.b.exponent - to_signed(-126, EXP_BITS);
1116 v.state := ROUND_UFLOW;
1117 elsif r.b.exponent > to_signed(127, EXP_BITS) then
1118 v.state := ROUND_OFLOW;
1119 else
1120 v.shift := to_signed(-2, EXP_BITS);
1121 v.state := ROUNDING;
1122 end if;
1123 else
1124 arith_done := '1';
1125 end if;
1126
1127 when DO_FCTI =>
1128 -- instr bit 9: 1=dword 0=word
1129 -- instr bit 8: 1=unsigned 0=signed
1130 -- instr bit 1: 1=round to zero 0=use fpscr[RN]
1131 -- r.opsel_a = AIN_B
1132 v.result_class := r.b.class;
1133 v.result_sign := r.b.negative;
1134 v.result_exp := r.b.exponent;
1135 v.fpscr(FPSCR_FR) := '0';
1136 v.fpscr(FPSCR_FI) := '0';
1137 if r.b.class = NAN and r.b.mantissa(53) = '0' then
1138 -- Signalling NAN
1139 v.fpscr(FPSCR_VXSNAN) := '1';
1140 invalid := '1';
1141 end if;
1142
1143 v.int_result := '1';
1144 case r.b.class is
1145 when ZERO =>
1146 arith_done := '1';
1147 when FINITE =>
1148 if r.b.exponent >= to_signed(64, EXP_BITS) or
1149 (r.insn(9) = '0' and r.b.exponent >= to_signed(32, EXP_BITS)) then
1150 v.state := INT_OFLOW;
1151 elsif r.b.exponent >= to_signed(52, EXP_BITS) then
1152 -- integer already, no rounding required,
1153 -- shift into final position
1154 v.shift := r.b.exponent - to_signed(54, EXP_BITS);
1155 if r.insn(8) = '1' and r.b.negative = '1' then
1156 v.state := INT_OFLOW;
1157 else
1158 v.state := INT_ISHIFT;
1159 end if;
1160 else
1161 v.shift := r.b.exponent - to_signed(52, EXP_BITS);
1162 v.state := INT_SHIFT;
1163 end if;
1164 when INFINITY | NAN =>
1165 v.state := INT_OFLOW;
1166 end case;
1167
1168 when DO_FCFID =>
1169 -- r.opsel_a = AIN_B
1170 v.result_sign := '0';
1171 if r.insn(8) = '0' and r.b.negative = '1' then
1172 -- fcfid[s] with negative operand, set R = -B
1173 opsel_ainv <= '1';
1174 carry_in <= '1';
1175 v.result_sign := '1';
1176 end if;
1177 v.result_class := r.b.class;
1178 v.result_exp := to_signed(54, EXP_BITS);
1179 v.fpscr(FPSCR_FR) := '0';
1180 v.fpscr(FPSCR_FI) := '0';
1181 if r.b.class = ZERO then
1182 arith_done := '1';
1183 else
1184 v.state := FINISH;
1185 end if;
1186
1187 when DO_FADD =>
1188 -- fadd[s] and fsub[s]
1189 -- r.opsel_a = AIN_A
1190 v.result_sign := r.a.negative;
1191 v.result_class := r.a.class;
1192 v.result_exp := r.a.exponent;
1193 v.fpscr(FPSCR_FR) := '0';
1194 v.fpscr(FPSCR_FI) := '0';
1195 v.use_a := '1';
1196 v.use_b := '1';
1197 is_add := r.a.negative xor r.b.negative xor r.insn(1);
1198 if r.a.class = FINITE and r.b.class = FINITE then
1199 v.is_subtract := not is_add;
1200 v.add_bsmall := r.exp_cmp;
1201 v.opsel_a := AIN_B;
1202 if r.exp_cmp = '0' then
1203 v.shift := r.a.exponent - r.b.exponent;
1204 v.result_sign := r.b.negative xnor r.insn(1);
1205 if r.a.exponent = r.b.exponent then
1206 v.state := ADD_2;
1207 else
1208 v.longmask := '0';
1209 v.state := ADD_SHIFT;
1210 end if;
1211 else
1212 v.state := ADD_1;
1213 end if;
1214 else
1215 if r.a.class = NAN or r.b.class = NAN then
1216 v.state := NAN_RESULT;
1217 elsif r.a.class = INFINITY and r.b.class = INFINITY and is_add = '0' then
1218 -- invalid operation, construct QNaN
1219 v.fpscr(FPSCR_VXISI) := '1';
1220 qnan_result := '1';
1221 arith_done := '1';
1222 elsif r.a.class = ZERO and r.b.class = ZERO and is_add = '0' then
1223 -- return -0 for rounding to -infinity
1224 v.result_sign := r.round_mode(1) and r.round_mode(0);
1225 arith_done := '1';
1226 elsif r.a.class = INFINITY or r.b.class = ZERO then
1227 -- result is A
1228 v.opsel_a := AIN_A;
1229 v.state := EXC_RESULT;
1230 else
1231 -- result is +/- B
1232 v.opsel_a := AIN_B;
1233 v.negate := not r.insn(1);
1234 v.state := EXC_RESULT;
1235 end if;
1236 end if;
1237
1238 when DO_FMUL =>
1239 -- fmul[s]
1240 -- r.opsel_a = AIN_A unless C is denorm and A isn't
1241 v.result_sign := r.a.negative xor r.c.negative;
1242 v.result_class := r.a.class;
1243 v.fpscr(FPSCR_FR) := '0';
1244 v.fpscr(FPSCR_FI) := '0';
1245 v.use_a := '1';
1246 v.use_c := '1';
1247 if r.a.class = FINITE and r.c.class = FINITE then
1248 v.result_exp := r.a.exponent + r.c.exponent;
1249 -- Renormalize denorm operands
1250 if r.a.mantissa(54) = '0' then
1251 v.state := RENORM_A;
1252 elsif r.c.mantissa(54) = '0' then
1253 v.state := RENORM_C;
1254 else
1255 f_to_multiply.valid <= '1';
1256 v.state := MULT_1;
1257 end if;
1258 else
1259 if r.a.class = NAN or r.c.class = NAN then
1260 v.state := NAN_RESULT;
1261 elsif (r.a.class = INFINITY and r.c.class = ZERO) or
1262 (r.a.class = ZERO and r.c.class = INFINITY) then
1263 -- invalid operation, construct QNaN
1264 v.fpscr(FPSCR_VXIMZ) := '1';
1265 qnan_result := '1';
1266 elsif r.a.class = ZERO or r.a.class = INFINITY then
1267 -- result is +/- A
1268 arith_done := '1';
1269 else
1270 -- r.c.class is ZERO or INFINITY
1271 v.opsel_a := AIN_C;
1272 v.negate := r.a.negative;
1273 v.state := EXC_RESULT;
1274 end if;
1275 end if;
1276
1277 when DO_FDIV =>
1278 -- r.opsel_a = AIN_A unless B is denorm and A isn't
1279 v.result_class := r.a.class;
1280 v.fpscr(FPSCR_FR) := '0';
1281 v.fpscr(FPSCR_FI) := '0';
1282 v.use_a := '1';
1283 v.use_b := '1';
1284 v.result_sign := r.a.negative xor r.b.negative;
1285 v.result_exp := r.a.exponent - r.b.exponent;
1286 v.count := "00";
1287 if r.a.class = FINITE and r.b.class = FINITE then
1288 -- Renormalize denorm operands
1289 if r.a.mantissa(54) = '0' then
1290 v.state := RENORM_A;
1291 elsif r.b.mantissa(54) = '0' then
1292 v.state := RENORM_B;
1293 else
1294 v.first := '1';
1295 v.state := DIV_2;
1296 end if;
1297 else
1298 if r.a.class = NAN or r.b.class = NAN then
1299 v.state := NAN_RESULT;
1300 elsif r.b.class = INFINITY then
1301 if r.a.class = INFINITY then
1302 v.fpscr(FPSCR_VXIDI) := '1';
1303 qnan_result := '1';
1304 else
1305 v.result_class := ZERO;
1306 end if;
1307 arith_done := '1';
1308 elsif r.b.class = ZERO then
1309 if r.a.class = ZERO then
1310 v.fpscr(FPSCR_VXZDZ) := '1';
1311 qnan_result := '1';
1312 else
1313 if r.a.class = FINITE then
1314 zero_divide := '1';
1315 end if;
1316 v.result_class := INFINITY;
1317 end if;
1318 arith_done := '1';
1319 else -- r.b.class = FINITE, result_class = r.a.class
1320 arith_done := '1';
1321 end if;
1322 end if;
1323
1324 when DO_FSEL =>
1325 v.fpscr(FPSCR_FR) := '0';
1326 v.fpscr(FPSCR_FI) := '0';
1327 if r.a.class = ZERO or (r.a.negative = '0' and r.a.class /= NAN) then
1328 v.opsel_a := AIN_C;
1329 else
1330 v.opsel_a := AIN_B;
1331 end if;
1332 v.quieten_nan := '0';
1333 v.state := EXC_RESULT;
1334
1335 when DO_FSQRT =>
1336 -- r.opsel_a = AIN_B
1337 v.result_class := r.b.class;
1338 v.result_sign := r.b.negative;
1339 v.fpscr(FPSCR_FR) := '0';
1340 v.fpscr(FPSCR_FI) := '0';
1341 v.use_b := '1';
1342 case r.b.class is
1343 when FINITE =>
1344 v.result_exp := r.b.exponent;
1345 if r.b.negative = '1' then
1346 v.fpscr(FPSCR_VXSQRT) := '1';
1347 qnan_result := '1';
1348 elsif r.b.mantissa(54) = '0' then
1349 v.state := RENORM_B;
1350 elsif r.b.exponent(0) = '0' then
1351 v.state := SQRT_1;
1352 else
1353 v.shift := to_signed(1, EXP_BITS);
1354 v.state := RENORM_B2;
1355 end if;
1356 when NAN =>
1357 v.state := NAN_RESULT;
1358 when ZERO =>
1359 -- result is B
1360 arith_done := '1';
1361 when INFINITY =>
1362 if r.b.negative = '1' then
1363 v.fpscr(FPSCR_VXSQRT) := '1';
1364 qnan_result := '1';
1365 -- else result is B
1366 end if;
1367 arith_done := '1';
1368 end case;
1369
1370 when DO_FRE =>
1371 -- r.opsel_a = AIN_B
1372 v.result_class := r.b.class;
1373 v.result_sign := r.b.negative;
1374 v.fpscr(FPSCR_FR) := '0';
1375 v.fpscr(FPSCR_FI) := '0';
1376 v.use_b := '1';
1377 case r.b.class is
1378 when FINITE =>
1379 v.result_exp := - r.b.exponent;
1380 if r.b.mantissa(54) = '0' then
1381 v.state := RENORM_B;
1382 else
1383 v.state := FRE_1;
1384 end if;
1385 when NAN =>
1386 v.state := NAN_RESULT;
1387 when INFINITY =>
1388 v.result_class := ZERO;
1389 arith_done := '1';
1390 when ZERO =>
1391 v.result_class := INFINITY;
1392 zero_divide := '1';
1393 arith_done := '1';
1394 end case;
1395
1396 when DO_FRSQRTE =>
1397 -- r.opsel_a = AIN_B
1398 v.result_class := r.b.class;
1399 v.result_sign := r.b.negative;
1400 v.fpscr(FPSCR_FR) := '0';
1401 v.fpscr(FPSCR_FI) := '0';
1402 v.use_b := '1';
1403 v.shift := to_signed(1, EXP_BITS);
1404 case r.b.class is
1405 when FINITE =>
1406 v.result_exp := r.b.exponent;
1407 if r.b.negative = '1' then
1408 v.fpscr(FPSCR_VXSQRT) := '1';
1409 qnan_result := '1';
1410 elsif r.b.mantissa(54) = '0' then
1411 v.state := RENORM_B;
1412 elsif r.b.exponent(0) = '0' then
1413 v.state := RSQRT_1;
1414 else
1415 v.state := RENORM_B2;
1416 end if;
1417 when NAN =>
1418 v.state := NAN_RESULT;
1419 when INFINITY =>
1420 if r.b.negative = '1' then
1421 v.fpscr(FPSCR_VXSQRT) := '1';
1422 qnan_result := '1';
1423 else
1424 v.result_class := ZERO;
1425 end if;
1426 arith_done := '1';
1427 when ZERO =>
1428 v.result_class := INFINITY;
1429 zero_divide := '1';
1430 arith_done := '1';
1431 end case;
1432
1433 when DO_FMADD =>
1434 -- fmadd, fmsub, fnmadd, fnmsub
1435 -- r.opsel_a = AIN_A if A is denorm, else AIN_C if C is denorm,
1436 -- else AIN_B
1437 v.result_sign := r.a.negative;
1438 v.result_class := r.a.class;
1439 v.result_exp := r.a.exponent;
1440 v.fpscr(FPSCR_FR) := '0';
1441 v.fpscr(FPSCR_FI) := '0';
1442 v.use_a := '1';
1443 v.use_b := '1';
1444 v.use_c := '1';
1445 is_add := r.a.negative xor r.c.negative xor r.b.negative xor r.insn(1);
1446 if r.a.class = FINITE and r.c.class = FINITE and
1447 (r.b.class = FINITE or r.b.class = ZERO) then
1448 v.is_subtract := not is_add;
1449 mulexp := r.a.exponent + r.c.exponent;
1450 v.result_exp := mulexp;
1451 -- Make sure A and C are normalized
1452 if r.a.mantissa(54) = '0' then
1453 v.state := RENORM_A;
1454 elsif r.c.mantissa(54) = '0' then
1455 v.state := RENORM_C;
1456 elsif r.b.class = ZERO then
1457 -- no addend, degenerates to multiply
1458 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1459 f_to_multiply.valid <= '1';
1460 v.is_multiply := '1';
1461 v.state := MULT_1;
1462 elsif r.madd_cmp = '0' then
1463 -- addend is bigger, do multiply first
1464 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1465 f_to_multiply.valid <= '1';
1466 v.state := FMADD_1;
1467 else
1468 -- product is bigger, shift B right and use it as the
1469 -- addend to the multiplier
1470 v.shift := r.b.exponent - mulexp + to_signed(64, EXP_BITS);
1471 -- for subtract, multiplier does B - A * C
1472 v.result_sign := not (r.a.negative xor r.c.negative xor r.insn(2) xor is_add);
1473 v.result_exp := r.b.exponent;
1474 v.state := FMADD_2;
1475 end if;
1476 else
1477 if r.a.class = NAN or r.b.class = NAN or r.c.class = NAN then
1478 v.state := NAN_RESULT;
1479 elsif (r.a.class = ZERO and r.c.class = INFINITY) or
1480 (r.a.class = INFINITY and r.c.class = ZERO) then
1481 -- invalid operation, construct QNaN
1482 v.fpscr(FPSCR_VXIMZ) := '1';
1483 qnan_result := '1';
1484 elsif r.a.class = INFINITY or r.c.class = INFINITY then
1485 if r.b.class = INFINITY and is_add = '0' then
1486 -- invalid operation, construct QNaN
1487 v.fpscr(FPSCR_VXISI) := '1';
1488 qnan_result := '1';
1489 else
1490 -- result is infinity
1491 v.result_class := INFINITY;
1492 v.result_sign := r.a.negative xor r.c.negative xor r.insn(2);
1493 arith_done := '1';
1494 end if;
1495 else
1496 -- Here A is zero, C is zero, or B is infinity
1497 -- Result is +/-B in all of those cases
1498 v.opsel_a := AIN_B;
1499 if r.b.class /= ZERO or is_add = '1' then
1500 v.negate := not (r.insn(1) xor r.insn(2));
1501 else
1502 -- have to be careful about rule for 0 - 0 result sign
1503 v.negate := r.b.negative xor (r.round_mode(1) and r.round_mode(0)) xor r.insn(2);
1504 end if;
1505 v.state := EXC_RESULT;
1506 end if;
1507 end if;
1508
1509 when RENORM_A =>
1510 renormalize := '1';
1511 v.state := RENORM_A2;
1512 if r.insn(4) = '1' then
1513 v.opsel_a := AIN_C;
1514 else
1515 v.opsel_a := AIN_B;
1516 end if;
1517
1518 when RENORM_A2 =>
1519 -- r.opsel_a = AIN_C for fmul/fmadd, AIN_B for fdiv
1520 set_a := '1';
1521 v.result_exp := new_exp;
1522 if r.insn(4) = '1' then
1523 if r.c.mantissa(54) = '1' then
1524 if r.insn(3) = '0' or r.b.class = ZERO then
1525 v.first := '1';
1526 v.state := MULT_1;
1527 else
1528 v.madd_cmp := '0';
1529 if new_exp + 1 >= r.b.exponent then
1530 v.madd_cmp := '1';
1531 end if;
1532 v.opsel_a := AIN_B;
1533 v.state := DO_FMADD;
1534 end if;
1535 else
1536 v.state := RENORM_C;
1537 end if;
1538 else
1539 if r.b.mantissa(54) = '1' then
1540 v.first := '1';
1541 v.state := DIV_2;
1542 else
1543 v.state := RENORM_B;
1544 end if;
1545 end if;
1546
1547 when RENORM_B =>
1548 renormalize := '1';
1549 renorm_sqrt := r.is_sqrt;
1550 v.state := RENORM_B2;
1551
1552 when RENORM_B2 =>
1553 set_b := '1';
1554 if r.is_sqrt = '0' then
1555 v.result_exp := r.result_exp + r.shift;
1556 else
1557 v.result_exp := new_exp;
1558 end if;
1559 v.opsel_a := AIN_B;
1560 v.state := LOOKUP;
1561
1562 when RENORM_C =>
1563 renormalize := '1';
1564 v.state := RENORM_C2;
1565
1566 when RENORM_C2 =>
1567 set_c := '1';
1568 v.result_exp := new_exp;
1569 if r.insn(3) = '0' or r.b.class = ZERO then
1570 v.first := '1';
1571 v.state := MULT_1;
1572 else
1573 v.madd_cmp := '0';
1574 if new_exp + 1 >= r.b.exponent then
1575 v.madd_cmp := '1';
1576 end if;
1577 v.opsel_a := AIN_B;
1578 v.state := DO_FMADD;
1579 end if;
1580
1581 when ADD_1 =>
1582 -- transferring B to R
1583 v.shift := r.b.exponent - r.a.exponent;
1584 v.result_exp := r.b.exponent;
1585 v.longmask := '0';
1586 v.state := ADD_SHIFT;
1587
1588 when ADD_SHIFT =>
1589 -- r.shift = - exponent difference, r.longmask = 0
1590 opsel_r <= RES_SHIFT;
1591 v.x := s_nz;
1592 set_x := '1';
1593 v.longmask := r.single_prec;
1594 if r.add_bsmall = '1' then
1595 v.opsel_a := AIN_A;
1596 else
1597 v.opsel_a := AIN_B;
1598 end if;
1599 v.state := ADD_2;
1600
1601 when ADD_2 =>
1602 -- r.opsel_a = AIN_A if r.add_bsmall = 1 else AIN_B
1603 opsel_b <= BIN_R;
1604 opsel_binv <= r.is_subtract;
1605 carry_in <= r.is_subtract and not r.x;
1606 v.shift := to_signed(-1, EXP_BITS);
1607 v.state := ADD_3;
1608
1609 when ADD_3 =>
1610 -- check for overflow or negative result (can't get both)
1611 -- r.shift = -1
1612 if r.r(63) = '1' then
1613 -- result is opposite sign to expected
1614 v.result_sign := not r.result_sign;
1615 opsel_ainv <= '1';
1616 carry_in <= '1';
1617 v.state := FINISH;
1618 elsif r.r(55) = '1' then
1619 -- sum overflowed, shift right
1620 opsel_r <= RES_SHIFT;
1621 set_x := '1';
1622 v.shift := to_signed(-2, EXP_BITS);
1623 if exp_huge = '1' then
1624 v.state := ROUND_OFLOW;
1625 else
1626 v.state := ROUNDING;
1627 end if;
1628 elsif r.r(54) = '1' then
1629 set_x := '1';
1630 v.shift := to_signed(-2, EXP_BITS);
1631 v.state := ROUNDING;
1632 elsif (r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1633 -- r.x must be zero at this point
1634 v.result_class := ZERO;
1635 if r.is_subtract = '1' then
1636 -- set result sign depending on rounding mode
1637 v.result_sign := r.round_mode(1) and r.round_mode(0);
1638 end if;
1639 arith_done := '1';
1640 else
1641 renormalize := '1';
1642 v.state := NORMALIZE;
1643 end if;
1644
1645 when CMP_1 =>
1646 -- r.opsel_a = AIN_A
1647 opsel_b <= BIN_R;
1648 opsel_binv <= '1';
1649 carry_in <= '1';
1650 v.state := CMP_2;
1651
1652 when CMP_2 =>
1653 if r.r(63) = '1' then
1654 -- A is smaller in magnitude
1655 v.cr_result := not r.a.negative & r.a.negative & "00";
1656 elsif (r_hi_nz or r_lo_nz) = '0' then
1657 v.cr_result := "0010";
1658 else
1659 v.cr_result := r.a.negative & not r.a.negative & "00";
1660 end if;
1661 v.fpscr(FPSCR_FL downto FPSCR_FU) := v.cr_result;
1662 v.instr_done := '1';
1663 v.state := IDLE;
1664
1665 when MULT_1 =>
1666 f_to_multiply.valid <= r.first;
1667 opsel_r <= RES_MULT;
1668 if multiply_to_f.valid = '1' then
1669 v.state := FINISH;
1670 end if;
1671
1672 when FMADD_1 =>
1673 -- Addend is bigger here
1674 v.result_sign := not (r.b.negative xor r.insn(1) xor r.insn(2));
1675 -- note v.shift is at most -2 here
1676 v.shift := r.result_exp - r.b.exponent;
1677 opsel_r <= RES_MULT;
1678 opsel_s <= S_MULT;
1679 set_s := '1';
1680 f_to_multiply.valid <= r.first;
1681 if multiply_to_f.valid = '1' then
1682 v.longmask := '0';
1683 v.state := ADD_SHIFT;
1684 end if;
1685
1686 when FMADD_2 =>
1687 -- Product is potentially bigger here
1688 -- r.shift = addend exp - product exp + 64, r.r = r.b.mantissa
1689 set_s := '1';
1690 opsel_s <= S_SHIFT;
1691 v.shift := r.shift - to_signed(64, EXP_BITS);
1692 v.state := FMADD_3;
1693
1694 when FMADD_3 =>
1695 -- r.shift = addend exp - product exp
1696 opsel_r <= RES_SHIFT;
1697 v.first := '1';
1698 v.state := FMADD_4;
1699
1700 when FMADD_4 =>
1701 msel_add <= MULADD_RS;
1702 f_to_multiply.valid <= r.first;
1703 msel_inv <= r.is_subtract;
1704 opsel_r <= RES_MULT;
1705 opsel_s <= S_MULT;
1706 set_s := '1';
1707 v.shift := to_signed(56, EXP_BITS);
1708 if multiply_to_f.valid = '1' then
1709 if multiply_to_f.result(121) = '1' then
1710 v.state := FMADD_5;
1711 else
1712 v.state := FMADD_6;
1713 end if;
1714 end if;
1715
1716 when FMADD_5 =>
1717 -- negate R:S:X
1718 v.result_sign := not r.result_sign;
1719 opsel_ainv <= '1';
1720 carry_in <= not (s_nz or r.x);
1721 opsel_s <= S_NEG;
1722 set_s := '1';
1723 v.shift := to_signed(56, EXP_BITS);
1724 v.state := FMADD_6;
1725
1726 when FMADD_6 =>
1727 -- r.shift = 56 (or 0, but only if r is now nonzero)
1728 if (r.r(56) or r_hi_nz or r_lo_nz or r.r(1) or r.r(0)) = '0' then
1729 if s_nz = '0' then
1730 -- must be a subtraction, and r.x must be zero
1731 v.result_class := ZERO;
1732 v.result_sign := r.round_mode(1) and r.round_mode(0);
1733 arith_done := '1';
1734 else
1735 -- R is all zeroes but there are non-zero bits in S
1736 -- so shift them into R and set S to 0
1737 opsel_r <= RES_SHIFT;
1738 set_s := '1';
1739 -- stay in state FMADD_6
1740 end if;
1741 elsif r.r(56 downto 54) = "001" then
1742 v.state := FINISH;
1743 else
1744 renormalize := '1';
1745 v.state := NORMALIZE;
1746 end if;
1747
1748 when LOOKUP =>
1749 -- r.opsel_a = AIN_B
1750 -- wait one cycle for inverse_table[B] lookup
1751 v.first := '1';
1752 if r.insn(4) = '0' then
1753 if r.insn(3) = '0' then
1754 v.state := DIV_2;
1755 else
1756 v.state := SQRT_1;
1757 end if;
1758 elsif r.insn(2) = '0' then
1759 v.state := FRE_1;
1760 else
1761 v.state := RSQRT_1;
1762 end if;
1763
1764 when DIV_2 =>
1765 -- compute Y = inverse_table[B] (when count=0); P = 2 - B * Y
1766 msel_1 <= MUL1_B;
1767 msel_add <= MULADD_CONST;
1768 msel_inv <= '1';
1769 if r.count = 0 then
1770 msel_2 <= MUL2_LUT;
1771 else
1772 msel_2 <= MUL2_P;
1773 end if;
1774 set_y := r.first;
1775 pshift := '1';
1776 f_to_multiply.valid <= r.first;
1777 if multiply_to_f.valid = '1' then
1778 v.first := '1';
1779 v.count := r.count + 1;
1780 v.state := DIV_3;
1781 end if;
1782
1783 when DIV_3 =>
1784 -- compute Y = P = P * Y
1785 msel_1 <= MUL1_Y;
1786 msel_2 <= MUL2_P;
1787 f_to_multiply.valid <= r.first;
1788 pshift := '1';
1789 if multiply_to_f.valid = '1' then
1790 v.first := '1';
1791 if r.count = 3 then
1792 v.state := DIV_4;
1793 else
1794 v.state := DIV_2;
1795 end if;
1796 end if;
1797
1798 when DIV_4 =>
1799 -- compute R = P = A * Y (quotient)
1800 msel_1 <= MUL1_A;
1801 msel_2 <= MUL2_P;
1802 set_y := r.first;
1803 f_to_multiply.valid <= r.first;
1804 pshift := '1';
1805 if multiply_to_f.valid = '1' then
1806 opsel_r <= RES_MULT;
1807 v.first := '1';
1808 v.state := DIV_5;
1809 end if;
1810
1811 when DIV_5 =>
1812 -- compute P = A - B * R (remainder)
1813 msel_1 <= MUL1_B;
1814 msel_2 <= MUL2_R;
1815 msel_add <= MULADD_A;
1816 msel_inv <= '1';
1817 f_to_multiply.valid <= r.first;
1818 if multiply_to_f.valid = '1' then
1819 v.state := DIV_6;
1820 end if;
1821
1822 when DIV_6 =>
1823 -- test if remainder is 0 or >= B
1824 if pcmpb_lt = '1' then
1825 -- quotient is correct, set X if remainder non-zero
1826 v.x := r.p(58) or px_nz;
1827 else
1828 -- quotient needs to be incremented by 1
1829 carry_in <= '1';
1830 v.x := not pcmpb_eq;
1831 end if;
1832 v.state := FINISH;
1833
1834 when FRE_1 =>
1835 opsel_r <= RES_MISC;
1836 misc_sel <= "0111";
1837 v.shift := to_signed(1, EXP_BITS);
1838 v.state := NORMALIZE;
1839
1840 when FTDIV_1 =>
1841 v.cr_result(1) := exp_tiny or exp_huge;
1842 if exp_tiny = '1' or exp_huge = '1' or r.a.class = ZERO or r.first = '0' then
1843 v.instr_done := '1';
1844 v.state := IDLE;
1845 else
1846 v.shift := r.a.exponent;
1847 v.doing_ftdiv := "10";
1848 end if;
1849
1850 when RSQRT_1 =>
1851 opsel_r <= RES_MISC;
1852 misc_sel <= "0111";
1853 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1854 v.result_exp := - sqrt_exp;
1855 v.shift := to_signed(1, EXP_BITS);
1856 v.state := NORMALIZE;
1857
1858 when SQRT_1 =>
1859 -- put invsqr[B] in R and compute P = invsqr[B] * B
1860 -- also transfer B (in R) to A
1861 set_a := '1';
1862 opsel_r <= RES_MISC;
1863 misc_sel <= "0111";
1864 msel_1 <= MUL1_B;
1865 msel_2 <= MUL2_LUT;
1866 f_to_multiply.valid <= '1';
1867 v.shift := to_signed(-1, EXP_BITS);
1868 v.count := "00";
1869 v.state := SQRT_2;
1870
1871 when SQRT_2 =>
1872 -- shift R right one place
1873 -- not expecting multiplier result yet
1874 -- r.shift = -1
1875 opsel_r <= RES_SHIFT;
1876 v.first := '1';
1877 v.state := SQRT_3;
1878
1879 when SQRT_3 =>
1880 -- put R into Y, wait for product from multiplier
1881 msel_2 <= MUL2_R;
1882 set_y := r.first;
1883 pshift := '1';
1884 if multiply_to_f.valid = '1' then
1885 -- put result into R
1886 opsel_r <= RES_MULT;
1887 v.first := '1';
1888 v.state := SQRT_4;
1889 end if;
1890
1891 when SQRT_4 =>
1892 -- compute 1.5 - Y * P
1893 msel_1 <= MUL1_Y;
1894 msel_2 <= MUL2_P;
1895 msel_add <= MULADD_CONST;
1896 msel_inv <= '1';
1897 f_to_multiply.valid <= r.first;
1898 pshift := '1';
1899 if multiply_to_f.valid = '1' then
1900 v.state := SQRT_5;
1901 end if;
1902
1903 when SQRT_5 =>
1904 -- compute Y = Y * P
1905 msel_1 <= MUL1_Y;
1906 msel_2 <= MUL2_P;
1907 f_to_multiply.valid <= '1';
1908 v.first := '1';
1909 v.state := SQRT_6;
1910
1911 when SQRT_6 =>
1912 -- pipeline in R = R * P
1913 msel_1 <= MUL1_R;
1914 msel_2 <= MUL2_P;
1915 f_to_multiply.valid <= r.first;
1916 pshift := '1';
1917 if multiply_to_f.valid = '1' then
1918 v.first := '1';
1919 v.state := SQRT_7;
1920 end if;
1921
1922 when SQRT_7 =>
1923 -- first multiply is done, put result in Y
1924 msel_2 <= MUL2_P;
1925 set_y := r.first;
1926 -- wait for second multiply (should be here already)
1927 pshift := '1';
1928 if multiply_to_f.valid = '1' then
1929 -- put result into R
1930 opsel_r <= RES_MULT;
1931 v.first := '1';
1932 v.count := r.count + 1;
1933 if r.count < 2 then
1934 v.state := SQRT_4;
1935 else
1936 v.first := '1';
1937 v.state := SQRT_8;
1938 end if;
1939 end if;
1940
1941 when SQRT_8 =>
1942 -- compute P = A - R * R, which can be +ve or -ve
1943 -- we arranged for B to be put into A earlier
1944 msel_1 <= MUL1_R;
1945 msel_2 <= MUL2_R;
1946 msel_add <= MULADD_A;
1947 msel_inv <= '1';
1948 pshift := '1';
1949 f_to_multiply.valid <= r.first;
1950 if multiply_to_f.valid = '1' then
1951 v.first := '1';
1952 v.state := SQRT_9;
1953 end if;
1954
1955 when SQRT_9 =>
1956 -- compute P = P * Y
1957 -- since Y is an estimate of 1/sqrt(B), this makes P an
1958 -- estimate of the adjustment needed to R. Since the error
1959 -- could be negative and we have an unsigned multiplier, the
1960 -- upper bits can be wrong, but it turns out the lowest 8 bits
1961 -- are correct and are all we need (given 3 iterations through
1962 -- SQRT_4 to SQRT_7).
1963 msel_1 <= MUL1_Y;
1964 msel_2 <= MUL2_P;
1965 pshift := '1';
1966 f_to_multiply.valid <= r.first;
1967 if multiply_to_f.valid = '1' then
1968 v.state := SQRT_10;
1969 end if;
1970
1971 when SQRT_10 =>
1972 -- Add the bottom 8 bits of P, sign-extended,
1973 -- divided by 4, onto R.
1974 -- The division by 4 is because R is 10.54 format
1975 -- whereas P is 8.56 format.
1976 opsel_b <= BIN_PS6;
1977 sqrt_exp := r.b.exponent(EXP_BITS-1) & r.b.exponent(EXP_BITS-1 downto 1);
1978 v.result_exp := sqrt_exp;
1979 v.shift := to_signed(1, EXP_BITS);
1980 v.first := '1';
1981 v.state := SQRT_11;
1982
1983 when SQRT_11 =>
1984 -- compute P = A - R * R (remainder)
1985 -- also put 2 * R + 1 into B for comparison with P
1986 msel_1 <= MUL1_R;
1987 msel_2 <= MUL2_R;
1988 msel_add <= MULADD_A;
1989 msel_inv <= '1';
1990 f_to_multiply.valid <= r.first;
1991 shiftin := '1';
1992 set_b := r.first;
1993 if multiply_to_f.valid = '1' then
1994 v.state := SQRT_12;
1995 end if;
1996
1997 when SQRT_12 =>
1998 -- test if remainder is 0 or >= B = 2*R + 1
1999 if pcmpb_lt = '1' then
2000 -- square root is correct, set X if remainder non-zero
2001 v.x := r.p(58) or px_nz;
2002 else
2003 -- square root needs to be incremented by 1
2004 carry_in <= '1';
2005 v.x := not pcmpb_eq;
2006 end if;
2007 v.state := FINISH;
2008
2009 when INT_SHIFT =>
2010 -- r.shift = b.exponent - 52
2011 opsel_r <= RES_SHIFT;
2012 set_x := '1';
2013 v.state := INT_ROUND;
2014 v.shift := to_signed(-2, EXP_BITS);
2015
2016 when INT_ROUND =>
2017 -- r.shift = -2
2018 opsel_r <= RES_SHIFT;
2019 round := fp_rounding(r.r, r.x, '0', r.round_mode, r.result_sign);
2020 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2021 -- Check for negative values that don't round to 0 for fcti*u*
2022 if r.insn(8) = '1' and r.result_sign = '1' and
2023 (r_hi_nz or r_lo_nz or v.fpscr(FPSCR_FR)) = '1' then
2024 v.state := INT_OFLOW;
2025 else
2026 v.state := INT_FINAL;
2027 end if;
2028
2029 when INT_ISHIFT =>
2030 -- r.shift = b.exponent - 54;
2031 opsel_r <= RES_SHIFT;
2032 v.state := INT_FINAL;
2033
2034 when INT_FINAL =>
2035 -- Negate if necessary, and increment for rounding if needed
2036 opsel_ainv <= r.result_sign;
2037 carry_in <= r.fpscr(FPSCR_FR) xor r.result_sign;
2038 -- Check for possible overflows
2039 case r.insn(9 downto 8) is
2040 when "00" => -- fctiw[z]
2041 need_check := r.r(31) or (r.r(30) and not r.result_sign);
2042 when "01" => -- fctiwu[z]
2043 need_check := r.r(31);
2044 when "10" => -- fctid[z]
2045 need_check := r.r(63) or (r.r(62) and not r.result_sign);
2046 when others => -- fctidu[z]
2047 need_check := r.r(63);
2048 end case;
2049 if need_check = '1' then
2050 v.state := INT_CHECK;
2051 else
2052 if r.fpscr(FPSCR_FI) = '1' then
2053 v.fpscr(FPSCR_XX) := '1';
2054 end if;
2055 arith_done := '1';
2056 end if;
2057
2058 when INT_CHECK =>
2059 if r.insn(9) = '0' then
2060 msb := r.r(31);
2061 else
2062 msb := r.r(63);
2063 end if;
2064 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2065 if (r.insn(8) = '0' and msb /= r.result_sign) or
2066 (r.insn(8) = '1' and msb /= '1') then
2067 opsel_r <= RES_MISC;
2068 v.fpscr(FPSCR_VXCVI) := '1';
2069 invalid := '1';
2070 else
2071 if r.fpscr(FPSCR_FI) = '1' then
2072 v.fpscr(FPSCR_XX) := '1';
2073 end if;
2074 end if;
2075 arith_done := '1';
2076
2077 when INT_OFLOW =>
2078 opsel_r <= RES_MISC;
2079 misc_sel <= '1' & r.insn(9 downto 8) & r.result_sign;
2080 if r.b.class = NAN then
2081 misc_sel(0) <= '1';
2082 end if;
2083 v.fpscr(FPSCR_VXCVI) := '1';
2084 invalid := '1';
2085 arith_done := '1';
2086
2087 when FRI_1 =>
2088 -- r.shift = b.exponent - 52
2089 opsel_r <= RES_SHIFT;
2090 set_x := '1';
2091 v.shift := to_signed(-2, EXP_BITS);
2092 v.state := ROUNDING;
2093
2094 when FINISH =>
2095 if r.is_multiply = '1' and px_nz = '1' then
2096 v.x := '1';
2097 end if;
2098 if r.r(63 downto 54) /= "0000000001" then
2099 renormalize := '1';
2100 v.state := NORMALIZE;
2101 else
2102 set_x := '1';
2103 if exp_tiny = '1' then
2104 v.shift := new_exp - min_exp;
2105 v.state := ROUND_UFLOW;
2106 elsif exp_huge = '1' then
2107 v.state := ROUND_OFLOW;
2108 else
2109 v.shift := to_signed(-2, EXP_BITS);
2110 v.state := ROUNDING;
2111 end if;
2112 end if;
2113
2114 when NORMALIZE =>
2115 -- Shift so we have 9 leading zeroes (we know R is non-zero)
2116 -- r.shift = clz(r.r) - 9
2117 opsel_r <= RES_SHIFT;
2118 set_x := '1';
2119 if exp_tiny = '1' then
2120 v.shift := new_exp - min_exp;
2121 v.state := ROUND_UFLOW;
2122 elsif exp_huge = '1' then
2123 v.state := ROUND_OFLOW;
2124 else
2125 v.shift := to_signed(-2, EXP_BITS);
2126 v.state := ROUNDING;
2127 end if;
2128
2129 when ROUND_UFLOW =>
2130 -- r.shift = - amount by which exponent underflows
2131 v.tiny := '1';
2132 if r.fpscr(FPSCR_UE) = '0' then
2133 -- disabled underflow exception case
2134 -- have to denormalize before rounding
2135 opsel_r <= RES_SHIFT;
2136 set_x := '1';
2137 v.shift := to_signed(-2, EXP_BITS);
2138 v.state := ROUNDING;
2139 else
2140 -- enabled underflow exception case
2141 -- if denormalized, have to normalize before rounding
2142 v.fpscr(FPSCR_UX) := '1';
2143 v.result_exp := r.result_exp + bias_exp;
2144 if r.r(54) = '0' then
2145 renormalize := '1';
2146 v.state := NORMALIZE;
2147 else
2148 v.shift := to_signed(-2, EXP_BITS);
2149 v.state := ROUNDING;
2150 end if;
2151 end if;
2152
2153 when ROUND_OFLOW =>
2154 v.fpscr(FPSCR_OX) := '1';
2155 if r.fpscr(FPSCR_OE) = '0' then
2156 -- disabled overflow exception
2157 -- result depends on rounding mode
2158 v.fpscr(FPSCR_XX) := '1';
2159 v.fpscr(FPSCR_FI) := '1';
2160 if r.round_mode(1 downto 0) = "00" or
2161 (r.round_mode(1) = '1' and r.round_mode(0) = r.result_sign) then
2162 v.result_class := INFINITY;
2163 v.fpscr(FPSCR_FR) := '1';
2164 else
2165 v.fpscr(FPSCR_FR) := '0';
2166 end if;
2167 -- construct largest representable number
2168 v.result_exp := max_exp;
2169 opsel_r <= RES_MISC;
2170 misc_sel <= "001" & r.single_prec;
2171 arith_done := '1';
2172 else
2173 -- enabled overflow exception
2174 v.result_exp := r.result_exp - bias_exp;
2175 v.shift := to_signed(-2, EXP_BITS);
2176 v.state := ROUNDING;
2177 end if;
2178
2179 when ROUNDING =>
2180 opsel_mask <= '1';
2181 round := fp_rounding(r.r, r.x, r.single_prec, r.round_mode, r.result_sign);
2182 v.fpscr(FPSCR_FR downto FPSCR_FI) := round;
2183 if round(1) = '1' then
2184 -- set mask to increment the LSB for the precision
2185 opsel_b <= BIN_MASK;
2186 carry_in <= '1';
2187 v.shift := to_signed(-1, EXP_BITS);
2188 v.state := ROUNDING_2;
2189 else
2190 if r.r(54) = '0' then
2191 -- result after masking could be zero, or could be a
2192 -- denormalized result that needs to be renormalized
2193 renormalize := '1';
2194 v.state := ROUNDING_3;
2195 else
2196 arith_done := '1';
2197 end if;
2198 end if;
2199 if round(0) = '1' then
2200 v.fpscr(FPSCR_XX) := '1';
2201 if r.tiny = '1' then
2202 v.fpscr(FPSCR_UX) := '1';
2203 end if;
2204 end if;
2205
2206 when ROUNDING_2 =>
2207 -- Check for overflow during rounding
2208 -- r.shift = -1
2209 v.x := '0';
2210 if r.r(55) = '1' then
2211 opsel_r <= RES_SHIFT;
2212 if exp_huge = '1' then
2213 v.state := ROUND_OFLOW;
2214 else
2215 arith_done := '1';
2216 end if;
2217 elsif r.r(54) = '0' then
2218 -- Do CLZ so we can renormalize the result
2219 renormalize := '1';
2220 v.state := ROUNDING_3;
2221 else
2222 arith_done := '1';
2223 end if;
2224
2225 when ROUNDING_3 =>
2226 -- r.shift = clz(r.r) - 9
2227 mant_nz := r_hi_nz or (r_lo_nz and not r.single_prec);
2228 if mant_nz = '0' then
2229 v.result_class := ZERO;
2230 if r.is_subtract = '1' then
2231 -- set result sign depending on rounding mode
2232 v.result_sign := r.round_mode(1) and r.round_mode(0);
2233 end if;
2234 arith_done := '1';
2235 else
2236 -- Renormalize result after rounding
2237 opsel_r <= RES_SHIFT;
2238 v.denorm := exp_tiny;
2239 v.shift := new_exp - to_signed(-1022, EXP_BITS);
2240 if new_exp < to_signed(-1022, EXP_BITS) then
2241 v.state := DENORM;
2242 else
2243 arith_done := '1';
2244 end if;
2245 end if;
2246
2247 when DENORM =>
2248 -- r.shift = result_exp - -1022
2249 opsel_r <= RES_SHIFT;
2250 arith_done := '1';
2251
2252 when NAN_RESULT =>
2253 if (r.use_a = '1' and r.a.class = NAN and r.a.mantissa(53) = '0') or
2254 (r.use_b = '1' and r.b.class = NAN and r.b.mantissa(53) = '0') or
2255 (r.use_c = '1' and r.c.class = NAN and r.c.mantissa(53) = '0') then
2256 -- Signalling NAN
2257 v.fpscr(FPSCR_VXSNAN) := '1';
2258 invalid := '1';
2259 end if;
2260 if r.use_a = '1' and r.a.class = NAN then
2261 v.opsel_a := AIN_A;
2262 elsif r.use_b = '1' and r.b.class = NAN then
2263 v.opsel_a := AIN_B;
2264 elsif r.use_c = '1' and r.c.class = NAN then
2265 v.opsel_a := AIN_C;
2266 end if;
2267 v.state := EXC_RESULT;
2268
2269 when EXC_RESULT =>
2270 -- r.opsel_a = AIN_A, AIN_B or AIN_C according to which input is the result
2271 case r.opsel_a is
2272 when AIN_B =>
2273 v.result_sign := r.b.negative xor r.negate;
2274 v.result_exp := r.b.exponent;
2275 v.result_class := r.b.class;
2276 when AIN_C =>
2277 v.result_sign := r.c.negative xor r.negate;
2278 v.result_exp := r.c.exponent;
2279 v.result_class := r.c.class;
2280 when others =>
2281 v.result_sign := r.a.negative xor r.negate;
2282 v.result_exp := r.a.exponent;
2283 v.result_class := r.a.class;
2284 end case;
2285 arith_done := '1';
2286
2287 end case;
2288
2289 if zero_divide = '1' then
2290 v.fpscr(FPSCR_ZX) := '1';
2291 end if;
2292 if qnan_result = '1' then
2293 invalid := '1';
2294 v.result_class := NAN;
2295 v.result_sign := '0';
2296 misc_sel <= "0001";
2297 opsel_r <= RES_MISC;
2298 arith_done := '1';
2299 end if;
2300 if invalid = '1' then
2301 v.invalid := '1';
2302 end if;
2303 if arith_done = '1' then
2304 -- Enabled invalid exception doesn't write result or FPRF
2305 -- Neither does enabled zero-divide exception
2306 if (v.invalid and r.fpscr(FPSCR_VE)) = '0' and
2307 (zero_divide and r.fpscr(FPSCR_ZE)) = '0' then
2308 v.writing_back := '1';
2309 v.update_fprf := '1';
2310 end if;
2311 v.instr_done := '1';
2312 v.state := IDLE;
2313 update_fx := '1';
2314 end if;
2315
2316 -- Multiplier and divide/square root data path
2317 case msel_1 is
2318 when MUL1_A =>
2319 f_to_multiply.data1 <= r.a.mantissa(61 downto 0) & "00";
2320 when MUL1_B =>
2321 f_to_multiply.data1 <= r.b.mantissa(61 downto 0) & "00";
2322 when MUL1_Y =>
2323 f_to_multiply.data1 <= r.y;
2324 when others =>
2325 f_to_multiply.data1 <= r.r(61 downto 0) & "00";
2326 end case;
2327 case msel_2 is
2328 when MUL2_C =>
2329 f_to_multiply.data2 <= r.c.mantissa(61 downto 0) & "00";
2330 when MUL2_LUT =>
2331 f_to_multiply.data2 <= x"00" & inverse_est & '0' & x"000000000";
2332 when MUL2_P =>
2333 f_to_multiply.data2 <= r.p;
2334 when others =>
2335 f_to_multiply.data2 <= r.r(61 downto 0) & "00";
2336 end case;
2337 maddend := (others => '0');
2338 case msel_add is
2339 when MULADD_CONST =>
2340 -- addend is 2.0 or 1.5 in 16.112 format
2341 if r.is_sqrt = '0' then
2342 maddend(113) := '1'; -- 2.0
2343 else
2344 maddend(112 downto 111) := "11"; -- 1.5
2345 end if;
2346 when MULADD_A =>
2347 -- addend is A in 16.112 format
2348 maddend(121 downto 58) := r.a.mantissa;
2349 when MULADD_RS =>
2350 -- addend is concatenation of R and S in 16.112 format
2351 maddend := "000000" & r.r & r.s & "00";
2352 when others =>
2353 end case;
2354 if msel_inv = '1' then
2355 f_to_multiply.addend <= not maddend;
2356 else
2357 f_to_multiply.addend <= maddend;
2358 end if;
2359 f_to_multiply.not_result <= msel_inv;
2360 if set_y = '1' then
2361 v.y := f_to_multiply.data2;
2362 end if;
2363 if multiply_to_f.valid = '1' then
2364 if pshift = '0' then
2365 v.p := multiply_to_f.result(63 downto 0);
2366 else
2367 v.p := multiply_to_f.result(119 downto 56);
2368 end if;
2369 end if;
2370
2371 -- Data path.
2372 -- This has A and B input multiplexers, an adder, a shifter,
2373 -- count-leading-zeroes logic, and a result mux.
2374 if r.longmask = '1' then
2375 mshift := r.shift + to_signed(-29, EXP_BITS);
2376 else
2377 mshift := r.shift;
2378 end if;
2379 if mshift < to_signed(-64, EXP_BITS) then
2380 mask := (others => '1');
2381 elsif mshift >= to_signed(0, EXP_BITS) then
2382 mask := (others => '0');
2383 else
2384 mask := right_mask(unsigned(mshift(5 downto 0)));
2385 end if;
2386 case r.opsel_a is
2387 when AIN_R =>
2388 in_a0 := r.r;
2389 when AIN_A =>
2390 in_a0 := r.a.mantissa;
2391 when AIN_B =>
2392 in_a0 := r.b.mantissa;
2393 when others =>
2394 in_a0 := r.c.mantissa;
2395 end case;
2396 if (or (mask and in_a0)) = '1' and set_x = '1' then
2397 v.x := '1';
2398 end if;
2399 if opsel_ainv = '1' then
2400 in_a0 := not in_a0;
2401 end if;
2402 in_a <= in_a0;
2403 case opsel_b is
2404 when BIN_ZERO =>
2405 in_b0 := (others => '0');
2406 when BIN_R =>
2407 in_b0 := r.r;
2408 when BIN_MASK =>
2409 in_b0 := mask;
2410 when others =>
2411 -- BIN_PS6, 6 LSBs of P/4 sign-extended to 64
2412 in_b0 := std_ulogic_vector(resize(signed(r.p(7 downto 2)), 64));
2413 end case;
2414 if opsel_binv = '1' then
2415 in_b0 := not in_b0;
2416 end if;
2417 in_b <= in_b0;
2418 if r.shift >= to_signed(-64, EXP_BITS) and r.shift <= to_signed(63, EXP_BITS) then
2419 shift_res := shifter_64(r.r & (shiftin or r.s(55)) & r.s(54 downto 0),
2420 std_ulogic_vector(r.shift(6 downto 0)));
2421 else
2422 shift_res := (others => '0');
2423 end if;
2424 sum := std_ulogic_vector(unsigned(in_a) + unsigned(in_b) + carry_in);
2425 if opsel_mask = '1' then
2426 sum := sum and not mask;
2427 end if;
2428 case opsel_r is
2429 when RES_SUM =>
2430 result <= sum;
2431 when RES_SHIFT =>
2432 result <= shift_res;
2433 when RES_MULT =>
2434 result <= multiply_to_f.result(121 downto 58);
2435 when others =>
2436 case misc_sel is
2437 when "0000" =>
2438 misc := x"00000000" & (r.fpscr and fpscr_mask);
2439 when "0001" =>
2440 -- generated QNaN mantissa
2441 misc := x"0020000000000000";
2442 when "0010" =>
2443 -- mantissa of max representable DP number
2444 misc := x"007ffffffffffffc";
2445 when "0011" =>
2446 -- mantissa of max representable SP number
2447 misc := x"007fffff80000000";
2448 when "0100" =>
2449 -- fmrgow result
2450 misc := r.a.mantissa(31 downto 0) & r.b.mantissa(31 downto 0);
2451 when "0110" =>
2452 -- fmrgew result
2453 misc := r.a.mantissa(63 downto 32) & r.b.mantissa(63 downto 32);
2454 when "0111" =>
2455 misc := 10x"000" & inverse_est & 35x"000000000";
2456 when "1000" =>
2457 -- max positive result for fctiw[z]
2458 misc := x"000000007fffffff";
2459 when "1001" =>
2460 -- max negative result for fctiw[z]
2461 misc := x"ffffffff80000000";
2462 when "1010" =>
2463 -- max positive result for fctiwu[z]
2464 misc := x"00000000ffffffff";
2465 when "1011" =>
2466 -- max negative result for fctiwu[z]
2467 misc := x"0000000000000000";
2468 when "1100" =>
2469 -- max positive result for fctid[z]
2470 misc := x"7fffffffffffffff";
2471 when "1101" =>
2472 -- max negative result for fctid[z]
2473 misc := x"8000000000000000";
2474 when "1110" =>
2475 -- max positive result for fctidu[z]
2476 misc := x"ffffffffffffffff";
2477 when "1111" =>
2478 -- max negative result for fctidu[z]
2479 misc := x"0000000000000000";
2480 when others =>
2481 misc := x"0000000000000000";
2482 end case;
2483 result <= misc;
2484 end case;
2485 v.r := result;
2486 if set_s = '1' then
2487 case opsel_s is
2488 when S_NEG =>
2489 v.s := std_ulogic_vector(unsigned(not r.s) + (not r.x));
2490 when S_MULT =>
2491 v.s := multiply_to_f.result(57 downto 2);
2492 when S_SHIFT =>
2493 v.s := shift_res(63 downto 8);
2494 if shift_res(7 downto 0) /= x"00" then
2495 v.x := '1';
2496 end if;
2497 when others =>
2498 v.s := (others => '0');
2499 end case;
2500 end if;
2501
2502 if set_a = '1' then
2503 v.a.exponent := new_exp;
2504 v.a.mantissa := shift_res;
2505 end if;
2506 if set_b = '1' then
2507 v.b.exponent := new_exp;
2508 v.b.mantissa := shift_res;
2509 end if;
2510 if set_c = '1' then
2511 v.c.exponent := new_exp;
2512 v.c.mantissa := shift_res;
2513 end if;
2514
2515 if opsel_r = RES_SHIFT then
2516 v.result_exp := new_exp;
2517 end if;
2518
2519 if renormalize = '1' then
2520 clz := count_left_zeroes(r.r);
2521 if renorm_sqrt = '1' then
2522 -- make denormalized value end up with even exponent
2523 clz(0) := '1';
2524 end if;
2525 v.shift := resize(signed('0' & clz) - 9, EXP_BITS);
2526 end if;
2527
2528 if r.int_result = '1' then
2529 fp_result <= r.r;
2530 else
2531 fp_result <= pack_dp(r.result_sign, r.result_class, r.result_exp, r.r,
2532 r.single_prec, r.quieten_nan);
2533 end if;
2534 if r.update_fprf = '1' then
2535 v.fpscr(FPSCR_C downto FPSCR_FU) := result_flags(r.result_sign, r.result_class,
2536 r.r(54) and not r.denorm);
2537 end if;
2538
2539 v.fpscr(FPSCR_VX) := (or (v.fpscr(FPSCR_VXSNAN downto FPSCR_VXVC))) or
2540 (or (v.fpscr(FPSCR_VXSOFT downto FPSCR_VXCVI)));
2541 v.fpscr(FPSCR_FEX) := or (v.fpscr(FPSCR_VX downto FPSCR_XX) and
2542 v.fpscr(FPSCR_VE downto FPSCR_XE));
2543 if update_fx = '1' and
2544 (v.fpscr(FPSCR_VX downto FPSCR_XX) and not r.old_exc) /= "00000" then
2545 v.fpscr(FPSCR_FX) := '1';
2546 end if;
2547 if r.rc = '1' then
2548 v.cr_result := v.fpscr(FPSCR_FX downto FPSCR_OX);
2549 end if;
2550
2551 if illegal = '1' then
2552 v.instr_done := '0';
2553 v.do_intr := '0';
2554 v.writing_back := '0';
2555 v.busy := '0';
2556 v.state := IDLE;
2557 else
2558 v.do_intr := v.instr_done and v.fpscr(FPSCR_FEX) and r.fe_mode;
2559 if v.state /= IDLE or v.do_intr = '1' then
2560 v.busy := '1';
2561 end if;
2562 end if;
2563
2564 rin <= v;
2565 e_out.illegal <= illegal;
2566 end process;
2567
2568 end architecture behaviour;