From 2f4f4da48ac94f7d61109be9606df60a1d927694 Mon Sep 17 00:00:00 2001
From: Bill Zorn <bill.zorn@gmail.com>
Date: Wed, 8 Aug 2018 17:23:27 -0700
Subject: [PATCH] revise fma/qma interface to match SoftPosit

---
 sfpy/float.pyx |  57 ++++++---------------------
 sfpy/posit.pyx | 105 +++++++++++++++++--------------------------------
 2 files changed, 48 insertions(+), 114 deletions(-)

diff --git a/sfpy/float.pyx b/sfpy/float.pyx
index 968323f..daf21a2 100644
--- a/sfpy/float.pyx
+++ b/sfpy/float.pyx
@@ -261,11 +261,7 @@ cdef class Float16:
     def __mul__(self, Float16 other):
         return self.mul(other)
 
-    cpdef Float16 fma(self, Float16 a2, Float16 a3):
-        cdef cfloat.float16_t f = cfloat.f16_mulAdd(self._c_float, a2._c_float, a3._c_float)
-        return Float16.from_c_float(f)
-
-    cpdef Float16 fam(self, Float16 a1, Float16 a2):
+    cpdef Float16 fma(self, Float16 a1, Float16 a2):
         cdef cfloat.float16_t f = cfloat.f16_mulAdd(a1._c_float, a2._c_float, self._c_float)
         return Float16.from_c_float(f)
 
@@ -319,10 +315,7 @@ cdef class Float16:
         self.imul(other)
         return self
 
-    cpdef void ifma(self, Float16 a2, Float16 a3):
-        self._c_float = cfloat.f16_mulAdd(self._c_float, a2._c_float, a3._c_float)
-
-    cpdef void ifam(self, Float16 a1, Float16 a2):
+    cpdef void ifma(self, Float16 a1, Float16 a2):
         self._c_float = cfloat.f16_mulAdd(a1._c_float, a2._c_float, self._c_float)
 
     cpdef void idiv(self, Float16 other):
@@ -408,12 +401,8 @@ cpdef Float16 f16_mul(Float16 a1, Float16 a2):
     cdef cfloat.float16_t f = cfloat.f16_mul(a1._c_float, a2._c_float)
     return Float16.from_c_float(f)
 
-cpdef Float16 f16_fma(Float16 a1, Float16 a2, Float16 a3):
-    cdef cfloat.float16_t f = cfloat.f16_mulAdd(a1._c_float, a2._c_float, a3._c_float)
-    return Float16.from_c_float(f)
-
-cpdef Float16 f16_fam(Float16 a3, Float16 a1, Float16 a2):
-    cdef cfloat.float16_t f = cfloat.f16_mulAdd(a1._c_float, a2._c_float, a3._c_float)
+cpdef Float16 f16_fma(Float16 acc, Float16 a1, Float16 a2):
+    cdef cfloat.float16_t f = cfloat.f16_mulAdd(a1._c_float, a2._c_float, acc._c_float)
     return Float16.from_c_float(f)
 
 cpdef Float16 f16_div(Float16 a1, Float16 a2):
@@ -570,11 +559,7 @@ cdef class Float32:
     def __mul__(self, Float32 other):
         return self.mul(other)
 
-    cpdef Float32 fma(self, Float32 a2, Float32 a3):
-        cdef cfloat.float32_t f = cfloat.f32_mulAdd(self._c_float, a2._c_float, a3._c_float)
-        return Float32.from_c_float(f)
-
-    cpdef Float32 fam(self, Float32 a1, Float32 a2):
+    cpdef Float32 fma(self, Float32 a1, Float32 a2):
         cdef cfloat.float32_t f = cfloat.f32_mulAdd(a1._c_float, a2._c_float, self._c_float)
         return Float32.from_c_float(f)
 
@@ -628,10 +613,7 @@ cdef class Float32:
         self.imul(other)
         return self
 
-    cpdef void ifma(self, Float32 a2, Float32 a3):
-        self._c_float = cfloat.f32_mulAdd(self._c_float, a2._c_float, a3._c_float)
-
-    cpdef void ifam(self, Float32 a1, Float32 a2):
+    cpdef void ifma(self, Float32 a1, Float32 a2):
         self._c_float = cfloat.f32_mulAdd(a1._c_float, a2._c_float, self._c_float)
 
     cpdef void idiv(self, Float32 other):
@@ -717,12 +699,8 @@ cpdef Float32 f32_mul(Float32 a1, Float32 a2):
     cdef cfloat.float32_t f = cfloat.f32_mul(a1._c_float, a2._c_float)
     return Float32.from_c_float(f)
 
-cpdef Float32 f32_fma(Float32 a1, Float32 a2, Float32 a3):
-    cdef cfloat.float32_t f = cfloat.f32_mulAdd(a1._c_float, a2._c_float, a3._c_float)
-    return Float32.from_c_float(f)
-
-cpdef Float32 f32_fam(Float32 a3, Float32 a1, Float32 a2):
-    cdef cfloat.float32_t f = cfloat.f32_mulAdd(a1._c_float, a2._c_float, a3._c_float)
+cpdef Float32 f32_fma(Float32 acc, Float32 a1, Float32 a2):
+    cdef cfloat.float32_t f = cfloat.f32_mulAdd(a1._c_float, a2._c_float, acc._c_float)
     return Float32.from_c_float(f)
 
 cpdef Float32 f32_div(Float32 a1, Float32 a2):
@@ -877,11 +855,7 @@ cdef class Float64:
     def __mul__(self, Float64 other):
         return self.mul(other)
 
-    cpdef Float64 fma(self, Float64 a2, Float64 a3):
-        cdef cfloat.float64_t f = cfloat.f64_mulAdd(self._c_float, a2._c_float, a3._c_float)
-        return Float64.from_c_float(f)
-
-    cpdef Float64 fam(self, Float64 a1, Float64 a2):
+    cpdef Float64 fma(self, Float64 a1, Float64 a2):
         cdef cfloat.float64_t f = cfloat.f64_mulAdd(a1._c_float, a2._c_float, self._c_float)
         return Float64.from_c_float(f)
 
@@ -935,10 +909,7 @@ cdef class Float64:
         self.imul(other)
         return self
 
-    cpdef void ifma(self, Float64 a2, Float64 a3):
-        self._c_float = cfloat.f64_mulAdd(self._c_float, a2._c_float, a3._c_float)
-
-    cpdef void ifam(self, Float64 a1, Float64 a2):
+    cpdef void ifma(self, Float64 a1, Float64 a2):
         self._c_float = cfloat.f64_mulAdd(a1._c_float, a2._c_float, self._c_float)
 
     cpdef void idiv(self, Float64 other):
@@ -1024,12 +995,8 @@ cpdef Float64 f64_mul(Float64 a1, Float64 a2):
     cdef cfloat.float64_t f = cfloat.f64_mul(a1._c_float, a2._c_float)
     return Float64.from_c_float(f)
 
-cpdef Float64 f64_fma(Float64 a1, Float64 a2, Float64 a3):
-    cdef cfloat.float64_t f = cfloat.f64_mulAdd(a1._c_float, a2._c_float, a3._c_float)
-    return Float64.from_c_float(f)
-
-cpdef Float64 f64_fam(Float64 a3, Float64 a1, Float64 a2):
-    cdef cfloat.float64_t f = cfloat.f64_mulAdd(a1._c_float, a2._c_float, a3._c_float)
+cpdef Float64 f64_fma(Float64 acc, Float64 a1, Float64 a2):
+    cdef cfloat.float64_t f = cfloat.f64_mulAdd(a1._c_float, a2._c_float, acc._c_float)
     return Float64.from_c_float(f)
 
 cpdef Float64 f64_div(Float64 a1, Float64 a2):
diff --git a/sfpy/posit.pyx b/sfpy/posit.pyx
index f2f76a7..fac8280 100644
--- a/sfpy/posit.pyx
+++ b/sfpy/posit.pyx
@@ -135,11 +135,7 @@ cdef class Posit8:
     def __mul__(self, Posit8 other):
         return self.mul(other)
 
-    cpdef Posit8 fma(self, Posit8 a2, Posit8 a3):
-        cdef cposit.posit8_t f = cposit.p8_mulAdd(self._c_posit, a2._c_posit, a3._c_posit)
-        return Posit8.from_c_posit(f)
-
-    cpdef Posit8 fam(self, Posit8 a1, Posit8 a2):
+    cpdef Posit8 fma(self, Posit8 a1, Posit8 a2):
         cdef cposit.posit8_t f = cposit.p8_mulAdd(a1._c_posit, a2._c_posit, self._c_posit)
         return Posit8.from_c_posit(f)
 
@@ -186,10 +182,7 @@ cdef class Posit8:
         self.imul(other)
         return self
 
-    cpdef void ifma(self, Posit8 a2, Posit8 a3):
-        self._c_posit = cposit.p8_mulAdd(self._c_posit, a2._c_posit, a3._c_posit)
-
-    cpdef void ifam(self, Posit8 a1, Posit8 a2):
+    cpdef void ifma(self, Posit8 a1, Posit8 a2):
         self._c_posit = cposit.p8_mulAdd(a1._c_posit, a2._c_posit, self._c_posit)
 
     cpdef void idiv(self, Posit8 other):
@@ -301,18 +294,18 @@ cdef class Quire8:
 
     # arithmetic
 
-    cpdef Quire8 qam(self, Posit8 a1, Posit8 a2):
+    cpdef Quire8 qma(self, Posit8 a1, Posit8 a2):
         cdef cposit.quire8_t f = cposit.q8_fdp_add(self._c_quire, a1._c_posit, a2._c_posit)
         return Quire8.from_c_quire(f)
 
-    cpdef Quire8 qsm(self, Posit8 a1, Posit8 a2):
+    cpdef Quire8 qms(self, Posit8 a1, Posit8 a2):
         cdef cposit.quire8_t f = cposit.q8_fdp_sub(self._c_quire, a1._c_posit, a2._c_posit)
         return Quire8.from_c_quire(f)
 
-    cpdef void iqam(self, Posit8 a1, Posit8 a2):
+    cpdef void iqma(self, Posit8 a1, Posit8 a2):
         self._c_quire = cposit.q8_fdp_add(self._c_quire, a1._c_posit, a2._c_posit)
 
-    cpdef void iqsm(self, Posit8 a1, Posit8 a2):
+    cpdef void iqms(self, Posit8 a1, Posit8 a2):
         self._c_quire = cposit.q8_fdp_sub(self._c_quire, a1._c_posit, a2._c_posit)
 
     cpdef void iclr(self):
@@ -351,12 +344,8 @@ cpdef Posit8 p8_mul(Posit8 a1, Posit8 a2):
     cdef cposit.posit8_t f = cposit.p8_mul(a1._c_posit, a2._c_posit)
     return Posit8.from_c_posit(f)
 
-cpdef Posit8 p8_fma(Posit8 a1, Posit8 a2, Posit8 a3):
-    cdef cposit.posit8_t f = cposit.p8_mulAdd(a1._c_posit, a2._c_posit, a3._c_posit)
-    return Posit8.from_c_posit(f)
-
-cpdef Posit8 p8_fam(Posit8 a3, Posit8 a1, Posit8 a2):
-    cdef cposit.posit8_t f = cposit.p8_mulAdd(a1._c_posit, a2._c_posit, a3._c_posit)
+cpdef Posit8 p8_fma(Posit8 acc, Posit8 a1, Posit8 a2):
+    cdef cposit.posit8_t f = cposit.p8_mulAdd(a1._c_posit, a2._c_posit, acc._c_posit)
     return Posit8.from_c_posit(f)
 
 cpdef Posit8 p8_div(Posit8 a1, Posit8 a2):
@@ -390,12 +379,12 @@ cpdef Quire8 p8_to_q8(Posit8 a1):
     f = cposit.q8_fdp_add(f, a1._c_posit, _p8_one)
     return Quire8.from_c_quire(f)
 
-cpdef Quire8 q8_qam(Quire8 a3, Posit8 a1, Posit8 a2):
-    cdef cposit.quire8_t f = cposit.q8_fdp_add(a3._c_quire, a1._c_posit, a2._c_posit)
+cpdef Quire8 q8_qma(Quire8 acc, Posit8 a1, Posit8 a2):
+    cdef cposit.quire8_t f = cposit.q8_fdp_add(acc._c_quire, a1._c_posit, a2._c_posit)
     return Quire8.from_c_quire(f)
 
-cpdef Quire8 q8_qsm(Quire8 a3, Posit8 a1, Posit8 a2):
-    cdef cposit.quire8_t f = cposit.q8_fdp_sub(a3._c_quire, a1._c_posit, a2._c_posit)
+cpdef Quire8 q8_qms(Quire8 acc, Posit8 a1, Posit8 a2):
+    cdef cposit.quire8_t f = cposit.q8_fdp_sub(acc._c_quire, a1._c_posit, a2._c_posit)
     return Quire8.from_c_quire(f)
 
 cpdef Posit8 q8_to_p8(Quire8 a1):
@@ -505,11 +494,7 @@ cdef class Posit16:
     def __mul__(self, Posit16 other):
         return self.mul(other)
 
-    cpdef Posit16 fma(self, Posit16 a2, Posit16 a3):
-        cdef cposit.posit16_t f = cposit.p16_mulAdd(self._c_posit, a2._c_posit, a3._c_posit)
-        return Posit16.from_c_posit(f)
-
-    cpdef Posit16 fam(self, Posit16 a1, Posit16 a2):
+    cpdef Posit16 fma(self, Posit16 a1, Posit16 a2):
         cdef cposit.posit16_t f = cposit.p16_mulAdd(a1._c_posit, a2._c_posit, self._c_posit)
         return Posit16.from_c_posit(f)
 
@@ -556,10 +541,7 @@ cdef class Posit16:
         self.imul(other)
         return self
 
-    cpdef void ifma(self, Posit16 a2, Posit16 a3):
-        self._c_posit = cposit.p16_mulAdd(self._c_posit, a2._c_posit, a3._c_posit)
-
-    cpdef void ifam(self, Posit16 a1, Posit16 a2):
+    cpdef void ifma(self, Posit16 a1, Posit16 a2):
         self._c_posit = cposit.p16_mulAdd(a1._c_posit, a2._c_posit, self._c_posit)
 
     cpdef void idiv(self, Posit16 other):
@@ -689,18 +671,18 @@ cdef class Quire16:
 
     # arithmetic
 
-    cpdef Quire16 qam(self, Posit16 a1, Posit16 a2):
+    cpdef Quire16 qma(self, Posit16 a1, Posit16 a2):
         cdef cposit.quire16_t f = cposit.q16_fdp_add(self._c_quire, a1._c_posit, a2._c_posit)
         return Quire16.from_c_quire(f)
 
-    cpdef Quire16 qsm(self, Posit16 a1, Posit16 a2):
+    cpdef Quire16 qms(self, Posit16 a1, Posit16 a2):
         cdef cposit.quire16_t f = cposit.q16_fdp_sub(self._c_quire, a1._c_posit, a2._c_posit)
         return Quire16.from_c_quire(f)
 
-    cpdef void iqam(self, Posit16 a1, Posit16 a2):
+    cpdef void iqma(self, Posit16 a1, Posit16 a2):
         self._c_quire = cposit.q16_fdp_add(self._c_quire, a1._c_posit, a2._c_posit)
 
-    cpdef void iqsm(self, Posit16 a1, Posit16 a2):
+    cpdef void iqms(self, Posit16 a1, Posit16 a2):
         self._c_quire = cposit.q16_fdp_sub(self._c_quire, a1._c_posit, a2._c_posit)
 
     cpdef void iclr(self):
@@ -739,12 +721,8 @@ cpdef Posit16 p16_mul(Posit16 a1, Posit16 a2):
     cdef cposit.posit16_t f = cposit.p16_mul(a1._c_posit, a2._c_posit)
     return Posit16.from_c_posit(f)
 
-cpdef Posit16 p16_fma(Posit16 a1, Posit16 a2, Posit16 a3):
-    cdef cposit.posit16_t f = cposit.p16_mulAdd(a1._c_posit, a2._c_posit, a3._c_posit)
-    return Posit16.from_c_posit(f)
-
-cpdef Posit16 p16_fam(Posit16 a3, Posit16 a1, Posit16 a2):
-    cdef cposit.posit16_t f = cposit.p16_mulAdd(a1._c_posit, a2._c_posit, a3._c_posit)
+cpdef Posit16 p16_fma(Posit16 acc, Posit16 a1, Posit16 a2):
+    cdef cposit.posit16_t f = cposit.p16_mulAdd(a1._c_posit, a2._c_posit, acc._c_posit)
     return Posit16.from_c_posit(f)
 
 cpdef Posit16 p16_div(Posit16 a1, Posit16 a2):
@@ -778,12 +756,12 @@ cpdef Quire16 p16_to_q16(Posit16 a1):
     f = cposit.q16_fdp_add(f, a1._c_posit, _p16_one)
     return Quire16.from_c_quire(f)
 
-cpdef Quire16 q16_qam(Quire16 a3, Posit16 a1, Posit16 a2):
-    cdef cposit.quire16_t f = cposit.q16_fdp_add(a3._c_quire, a1._c_posit, a2._c_posit)
+cpdef Quire16 q16_qma(Quire16 acc, Posit16 a1, Posit16 a2):
+    cdef cposit.quire16_t f = cposit.q16_fdp_add(acc._c_quire, a1._c_posit, a2._c_posit)
     return Quire16.from_c_quire(f)
 
-cpdef Quire16 q16_qsm(Quire16 a3, Posit16 a1, Posit16 a2):
-    cdef cposit.quire16_t f = cposit.q16_fdp_sub(a3._c_quire, a1._c_posit, a2._c_posit)
+cpdef Quire16 q16_qms(Quire16 acc, Posit16 a1, Posit16 a2):
+    cdef cposit.quire16_t f = cposit.q16_fdp_sub(acc._c_quire, a1._c_posit, a2._c_posit)
     return Quire16.from_c_quire(f)
 
 cpdef Posit16 q16_to_p16(Quire16 a1):
@@ -893,11 +871,7 @@ cdef class Posit32:
     def __mul__(self, Posit32 other):
         return self.mul(other)
 
-    cpdef Posit32 fma(self, Posit32 a2, Posit32 a3):
-        cdef cposit.posit32_t f = cposit.p32_mulAdd(self._c_posit, a2._c_posit, a3._c_posit)
-        return Posit32.from_c_posit(f)
-
-    cpdef Posit32 fam(self, Posit32 a1, Posit32 a2):
+    cpdef Posit32 fma(self, Posit32 a1, Posit32 a2):
         cdef cposit.posit32_t f = cposit.p32_mulAdd(a1._c_posit, a2._c_posit, self._c_posit)
         return Posit32.from_c_posit(f)
 
@@ -944,10 +918,7 @@ cdef class Posit32:
         self.imul(other)
         return self
 
-    cpdef void ifma(self, Posit32 a2, Posit32 a3):
-        self._c_posit = cposit.p32_mulAdd(self._c_posit, a2._c_posit, a3._c_posit)
-
-    cpdef void ifam(self, Posit32 a1, Posit32 a2):
+    cpdef void ifma(self, Posit32 a1, Posit32 a2):
         self._c_posit = cposit.p32_mulAdd(a1._c_posit, a2._c_posit, self._c_posit)
 
     cpdef void idiv(self, Posit32 other):
@@ -1077,18 +1048,18 @@ cdef class Quire32:
 
     # arithmetic
 
-    cpdef Quire32 qam(self, Posit32 a1, Posit32 a2):
+    cpdef Quire32 qma(self, Posit32 a1, Posit32 a2):
         cdef cposit.quire32_t f = cposit.q32_fdp_add(self._c_quire, a1._c_posit, a2._c_posit)
         return Quire32.from_c_quire(f)
 
-    cpdef Quire32 qsm(self, Posit32 a1, Posit32 a2):
+    cpdef Quire32 qms(self, Posit32 a1, Posit32 a2):
         cdef cposit.quire32_t f = cposit.q32_fdp_sub(self._c_quire, a1._c_posit, a2._c_posit)
         return Quire32.from_c_quire(f)
 
-    cpdef void iqam(self, Posit32 a1, Posit32 a2):
+    cpdef void iqma(self, Posit32 a1, Posit32 a2):
         self._c_quire = cposit.q32_fdp_add(self._c_quire, a1._c_posit, a2._c_posit)
 
-    cpdef void iqsm(self, Posit32 a1, Posit32 a2):
+    cpdef void iqms(self, Posit32 a1, Posit32 a2):
         self._c_quire = cposit.q32_fdp_sub(self._c_quire, a1._c_posit, a2._c_posit)
 
     cpdef void iclr(self):
@@ -1127,12 +1098,8 @@ cpdef Posit32 p32_mul(Posit32 a1, Posit32 a2):
     cdef cposit.posit32_t f = cposit.p32_mul(a1._c_posit, a2._c_posit)
     return Posit32.from_c_posit(f)
 
-cpdef Posit32 p32_fma(Posit32 a1, Posit32 a2, Posit32 a3):
-    cdef cposit.posit32_t f = cposit.p32_mulAdd(a1._c_posit, a2._c_posit, a3._c_posit)
-    return Posit32.from_c_posit(f)
-
-cpdef Posit32 p32_fam(Posit32 a3, Posit32 a1, Posit32 a2):
-    cdef cposit.posit32_t f = cposit.p32_mulAdd(a1._c_posit, a2._c_posit, a3._c_posit)
+cpdef Posit32 p32_fma(Posit32 acc, Posit32 a1, Posit32 a2):
+    cdef cposit.posit32_t f = cposit.p32_mulAdd(a1._c_posit, a2._c_posit, acc._c_posit)
     return Posit32.from_c_posit(f)
 
 cpdef Posit32 p32_div(Posit32 a1, Posit32 a2):
@@ -1166,12 +1133,12 @@ cpdef Quire32 p32_to_q32(Posit32 a1):
     f = cposit.q32_fdp_add(f, a1._c_posit, _p32_one)
     return Quire32.from_c_quire(f)
 
-cpdef Quire32 q32_qam(Quire32 a3, Posit32 a1, Posit32 a2):
-    cdef cposit.quire32_t f = cposit.q32_fdp_add(a3._c_quire, a1._c_posit, a2._c_posit)
+cpdef Quire32 q32_qma(Quire32 acc, Posit32 a1, Posit32 a2):
+    cdef cposit.quire32_t f = cposit.q32_fdp_add(acc._c_quire, a1._c_posit, a2._c_posit)
     return Quire32.from_c_quire(f)
 
-cpdef Quire32 q32_qsm(Quire32 a3, Posit32 a1, Posit32 a2):
-    cdef cposit.quire32_t f = cposit.q32_fdp_sub(a3._c_quire, a1._c_posit, a2._c_posit)
+cpdef Quire32 q32_qms(Quire32 acc, Posit32 a1, Posit32 a2):
+    cdef cposit.quire32_t f = cposit.q32_fdp_sub(acc._c_quire, a1._c_posit, a2._c_posit)
     return Quire32.from_c_quire(f)
 
 cpdef Posit32 q32_to_p32(Quire32 a1):
-- 
2.30.2