openpower/sv/int_fp_mv/appendix.mdwn

   1 [[!tag standards]]
   2
   3 # SVP64 polymorphic elwidth overrides
   4
   5 SimpleV, the Draft Cray-style Vectorization for OpenPOWER, may
   6 independently override both or either of the source or destination
   7 register bitwidth in the base operation used to create the Vector
   8 operation.  In the case of IEEE754 FP operands this gives an
   9 opportunity to add `FP16` as well.as `BF16` to the Power ISA
  10 with no actual new Scalar opcodes.
  11
  12 However there is the potential for confusion as to the definition
  13 of what Single and Double mean when the operand width has been
  14 over-ridden.  Simple-V therefore sets the following
  15  "reinterpretation" rules:
  16
  17 * any operation whose assembler mnemonic does not end in "s"
  18   (being defined in v3.0B as a "double" operation) is
  19   instead an operation at the overridden elwidth for the
  20   relevant operand, instead of a 64 bit "Double"
  21 * any operation nominally defined as a "single" FP operation
  22   is redefined to be **half the elwidth** rather than
  23   "half of 64 bit" (32 bit, aka "Single")
  24
  25 Examples:
  26
  27 * `sv.fmvtg/sw=32 RT.v, FRA.v` is defined as treating FRA
  28    as a vector of *FP32* source operands each *32* bits wide
  29    which are to be placed into *64* bit integer destination elements.
  30 * `sv.fmvfgs/dw=32 FRT.v, RA.v` is defined as taking the bottom
  31    32 bits of each RA integer source, then performing a **32 bit**
  32    FP32 to **FP16** conversion and storing the result in the
  33    **32 bits** of an FRT destination element.
  34
  35 "Single" is therefore redefined in SVP64 to be "half elwidth"
  36 rather than Double width hardcoded to 64 and Single width
  37 hardcoded to 32.  This allows a full range of conversions
  38 between FP64, FP32, FP16 and BF16.
  39
  40 Note however that attempts to perform "Single" operations on
  41 FP16 elwidths will raise an illegal instruction trap: Half
  42 of FP16 is FP8, which is not defined as a legal IEEE754 format.
  43
  44 # Simple-V SVP64 Saturation
  45
  46 SVP64 also allows for Saturation, such that the result is truncated
  47 to the maximum or minimum range of the result operand rather than
  48 overflowing.
  49
  50 There will be some interaction here with Conversion routines which
  51 will need careful application of the SVP64 Saturation rules: some
  52 work will be duplicated by the operation itself, but in some cases
  53 it will change the result.
  54
  55 The critical thing to note is that SVP64 Saturation is to be considered
  56 as the "priority override" where the operation should take place at
  57 "Infinite bitwidth followed by a result post-analysis phase".
  58
  59 Thus if by chance an unsigned conversion to INT was carried out,
  60 with a destination override to 16 bit results, in combination
  61 with a **signed** SVP64 Saturation override, the result would
  62 be truncated to within the range 0 to 0x7FFF.  The actual
  63 operation itself, being an *Unsigned* conversion, would set the
  64 minimum value to zero, whilst the SVP64 *Signed* Saturation
  65 would set the maximum to a Signed 16 bit integer.
  66
  67 As always with SVP64, some thought and care has to be put into
  68 how the override behaviour will interact with the base scalar
  69 operation.
  70
  71 # Power ISA v3.0 Assembly Equivalents<a name="assembler"></a>
  72
  73 Demonstration of how much assembler is needed if these Language-specific
  74 FP -> Integer Conversion Modes are not available
  75
  76 ## c (IEEE754 standard compliant)
  77
  78 ```
  79 int32_t toInt32(double number)
  80 {
  81     uint32_t result = (int32_t)number;
  82     return result;
  83 }
  84 ```
  85
  86 ### 64-bit float -> 32-bit signed integer
  87
  88 ```
  89 toInt32(double):
  90         fctiwz 1,1
  91         addi 9,1,-16
  92         stfiwx 1,0,9
  93         lwz 3,-16(1)
  94         extsw 3,3
  95         blr
  96         .long 0
  97         .byte 0,9,0,0,0,0,0,0
  98 ```
  99
 100 ## Rust
 101
 102 ```
 103 pub fn fcvttgd_rust(v: f64) -> i64 {
 104     v as i64
 105 }
 106
 107 pub fn fcvttgud_rust(v: f64) -> u64 {
 108     v as u64
 109 }
 110
 111 pub fn fcvttgw_rust(v: f64) -> i32 {
 112     v as i32
 113 }
 114
 115 pub fn fcvttguw_rust(v: f64) -> u32 {
 116     v as u32
 117 }
 118 ```
 119
 120 ### 64-bit float -> 64-bit signed integer
 121
 122 ```
 123 .LCPI0_0:
 124         .long   0xdf000000
 125 .LCPI0_1:
 126         .quad   0x43dfffffffffffff
 127 example::fcvttgd_rust:
 128 .Lfunc_gep0:
 129         addis 2, 12, .TOC.-.Lfunc_gep0@ha
 130         addi 2, 2, .TOC.-.Lfunc_gep0@l
 131         addis 3, 2, .LCPI0_0@toc@ha
 132         fctidz 2, 1
 133         fcmpu 5, 1, 1
 134         li 4, 1
 135         li 5, -1
 136         lfs 0, .LCPI0_0@toc@l(3)
 137         addis 3, 2, .LCPI0_1@toc@ha
 138         rldic 4, 4, 63, 0
 139         fcmpu 0, 1, 0
 140         lfd 0, .LCPI0_1@toc@l(3)
 141         stfd 2, -8(1)
 142         ld 3, -8(1)
 143         fcmpu 1, 1, 0
 144         cror 24, 0, 3
 145         isel 3, 4, 3, 24
 146         rldic 4, 5, 0, 1
 147         isel 3, 4, 3, 5
 148         isel 3, 0, 3, 23
 149         blr
 150         .long   0
 151         .quad   0
 152 ```
 153
 154 ### 64-bit float -> 64-bit unsigned integer
 155
 156 ```
 157 .LCPI1_0:
 158         .long   0x00000000
 159 .LCPI1_1:
 160         .quad   0x43efffffffffffff
 161 example::fcvttgud_rust:
 162 .Lfunc_gep1:
 163         addis 2, 12, .TOC.-.Lfunc_gep1@ha
 164         addi 2, 2, .TOC.-.Lfunc_gep1@l
 165         addis 3, 2, .LCPI1_0@toc@ha
 166         fctiduz 2, 1
 167         li 4, -1
 168         lfs 0, .LCPI1_0@toc@l(3)
 169         addis 3, 2, .LCPI1_1@toc@ha
 170         fcmpu 0, 1, 0
 171         lfd 0, .LCPI1_1@toc@l(3)
 172         stfd 2, -8(1)
 173         ld 3, -8(1)
 174         fcmpu 1, 1, 0
 175         cror 20, 0, 3
 176         isel 3, 0, 3, 20
 177         isel 3, 4, 3, 5
 178         blr
 179         .long   0
 180         .quad   0
 181 ```
 182
 183 ### 64-bit float -> 32-bit signed integer
 184
 185 ```
 186 .LCPI2_0:
 187         .long   0xcf000000
 188 .LCPI2_1:
 189         .quad   0x41dfffffffc00000
 190 example::fcvttgw_rust:
 191 .Lfunc_gep2:
 192         addis 2, 12, .TOC.-.Lfunc_gep2@ha
 193         addi 2, 2, .TOC.-.Lfunc_gep2@l
 194         addis 3, 2, .LCPI2_0@toc@ha
 195         fctiwz 2, 1
 196         lis 4, -32768
 197         lis 5, 32767
 198         lfs 0, .LCPI2_0@toc@l(3)
 199         addis 3, 2, .LCPI2_1@toc@ha
 200         fcmpu 0, 1, 0
 201         lfd 0, .LCPI2_1@toc@l(3)
 202         addi 3, 1, -4
 203         stfiwx 2, 0, 3
 204         fcmpu 5, 1, 1
 205         lwz 3, -4(1)
 206         fcmpu 1, 1, 0
 207         cror 24, 0, 3
 208         isel 3, 4, 3, 24
 209         ori 4, 5, 65535
 210         isel 3, 4, 3, 5
 211         isel 3, 0, 3, 23
 212         blr
 213         .long   0
 214         .quad   0
 215 ```
 216
 217 ### 64-bit float -> 32-bit unsigned integer
 218
 219 ```
 220 .LCPI3_0:
 221         .long   0x00000000
 222 .LCPI3_1:
 223         .quad   0x41efffffffe00000
 224 example::fcvttguw_rust:
 225 .Lfunc_gep3:
 226         addis 2, 12, .TOC.-.Lfunc_gep3@ha
 227         addi 2, 2, .TOC.-.Lfunc_gep3@l
 228         addis 3, 2, .LCPI3_0@toc@ha
 229         fctiwuz 2, 1
 230         li 4, -1
 231         lfs 0, .LCPI3_0@toc@l(3)
 232         addis 3, 2, .LCPI3_1@toc@ha
 233         fcmpu 0, 1, 0
 234         lfd 0, .LCPI3_1@toc@l(3)
 235         addi 3, 1, -4
 236         stfiwx 2, 0, 3
 237         lwz 3, -4(1)
 238         fcmpu 1, 1, 0
 239         cror 20, 0, 3
 240         isel 3, 0, 3, 20
 241         isel 3, 4, 3, 5
 242         blr
 243         .long   0
 244         .quad   0
 245 ```
 246
 247 ## JavaScript
 248
 249 ```
 250 #include <stdint.h>
 251
 252 namespace WTF {
 253 template<typename Target, typename Src>
 254 inline Target bitwise_cast(Src v) {
 255     union {
 256         Src s;
 257         Target t;
 258     } u;
 259     u.s = v;
 260     if (exp < 32) {
 261         int32_t missingOne = 1 << exp;
 262         result &= missingOne - 1;
 263         result += missingOne;
 264     }
 265
 266     // If the input value was negative (we could test either 'number' or 'bits',
 267     // but testing 'bits' is likely faster) invert the result appropriately.
 268     return bits < 0 ? -result : result;
 269 }
 270 ```
 271
 272 ### 64-bit float -> 32-bit signed integer
 273
 274 ```
 275 toInt32(double):
 276         stfd 1,-16(1)
 277         li 3,0
 278         ori 2,2,0
 279         ld 9,-16(1)
 280         rldicl 8,9,12,53
 281         addi 10,8,-1023
 282         cmplwi 7,10,83
 283         bgtlr 7
 284         cmpwi 7,10,52
 285         bgt 7,.L7
 286         cmpwi 7,10,31
 287         subfic 3,10,52
 288         srad 3,9,3
 289         extsw 3,3
 290         bgt 7,.L4
 291         li 8,1
 292         slw 10,8,10
 293         addi 8,10,-1
 294         and 3,8,3
 295         add 10,10,3
 296         extsw 3,10
 297 .L4:
 298         cmpdi 7,9,0
 299         bgelr 7
 300 .L8:
 301         neg 3,3
 302         extsw 3,3
 303         blr
 304 .L7:
 305         cmpdi 7,9,0
 306         addi 3,8,-1075
 307         sld 3,9,3
 308         extsw 3,3
 309         bgelr 7
 310         b .L8
 311         .long 0
 312         .byte 0,9,0,0,0,0,0,0
 313 ```