From a5b29326b9d1cd5ac3e79c663114eefbb7b969d9 Mon Sep 17 00:00:00 2001 From: lkcl Date: Sat, 15 Apr 2023 13:39:11 +0100 Subject: [PATCH] --- openpower/sv/rfc/ls009.mdwn | 96 +++++++++++++++++++++++++++++++++++-- 1 file changed, 93 insertions(+), 3 deletions(-) diff --git a/openpower/sv/rfc/ls009.mdwn b/openpower/sv/rfc/ls009.mdwn index c3f41edf1..c41016ace 100644 --- a/openpower/sv/rfc/ls009.mdwn +++ b/openpower/sv/rfc/ls009.mdwn @@ -1573,6 +1573,7 @@ Specification for Matrix (2D/3D) REMAP. Vectors of "loopends" are returned when in Vectors of CR Fields on `sv.svstep.`, or a single CR Field CR0 on `svstep.` in Vertical-First Mode. The `SVSTATE.srcstep` or `SVSTATE.dststep` sequential offset is put through this algorithm to determine the actual Element Offset. +Hardware implementations are achievable with simple counter-and-compare logic. ``` # python "yield" can be iterated. use this to make it clear how @@ -1624,7 +1625,6 @@ def iterate_indices(SVSHAPE): # e.g. [z][x][y] or [y][z][x]. "skip" allows one of # those to be knocked out if SVSHAPE.skip == i+1: continue - #print ("select %d %s" % (i, dbg)) idx *= mult # shifts up by previous dimension(s) result += idx # adds on this dimension mult *= lim # for the next dimension @@ -1633,8 +1633,6 @@ def iterate_indices(SVSHAPE): ((y_end and x_end)<<1) | ((y_end and x_end and z_end)<<2)) - if hasattr(SVSHAPE, "postprocess"): # for Indexed mode - result = SVSHAPE.postprocess(result, step) yield result + SVSHAPE.offset, loopends step += 1 @@ -1669,4 +1667,96 @@ if __name__ == '__main__': demo() ``` +## REMAP Parallel Reduction pseudocode + +The python3 program below is stand-alone executable and is the Canonical Specification +for Parallel Reduction REMAP. Alternative implementations producing different ordering +is prohibited. The Algorithm below is not limited to RADIX2 sizes, and Predicate +sources, unlike in Matrix REMAP, apply to the Element Indices **after** REMAP +has been applied, not before. MV operations are not required: the algorithm +tracks positions of elements that would normally be moved and when applying +an Element Reduction Operation sources the operands from their last-known (tracked) +position. + +``` +# a "yield" version of the Parallel Reduction REMAP algorithm. +# the algorithm is in-place. it does not perform "MV" operations. +# instead, where a masked-out value *should* be read from is tracked + +def iterate_indices(SVSHAPE, pred=None): + # get indices to iterate over, in the required order + xd = SVSHAPE.lims[0] + # create lists of indices to iterate over in each dimension + ix = list(range(xd)) + # invert the indices if needed + if SVSHAPE.invxyz[0]: ix.reverse() + # start a loop from the lowest step + step = 1 + steps = [] + while step < xd: + step *= 2 + steps.append(step) + # invert the indices if needed + if SVSHAPE.invxyz[1]: steps.reverse() + for step in steps: + stepend = (step == steps[-1]) # note end of steps + idxs = list(range(0, xd, step)) + results = [] + for i in idxs: + other = i + step // 2 + ci = ix[i] + oi = ix[other] if other < xd else None + other_pred = other < xd and (pred is None or pred[oi]) + if (pred is None or pred[ci]) and other_pred: + if SVSHAPE.skip == 0b00: # submode 00 + result = ci + elif SVSHAPE.skip == 0b01: # submode 01 + result = oi + results.append([result + SVSHAPE.offset, 0]) + elif other_pred: + ix[i] = oi + if results: + results[-1][1] = (stepend<<1) | 1 # notify end of loops + yield from results + +def demo(): + # set the dimension sizes here + xdim = 9 + + # set up an SVSHAPE + class SVSHAPE: + pass + SVSHAPE0 = SVSHAPE() + SVSHAPE0.lims = [xdim, 0, 0] + SVSHAPE0.order = [0,1,2] + SVSHAPE0.mode = 0b10 + SVSHAPE0.skip = 0b00 + SVSHAPE0.offset = 0 # experiment with different offset, here + SVSHAPE0.invxyz = [0,0,0] # inversion if desired + + SVSHAPE1 = SVSHAPE() + SVSHAPE1.lims = [xdim, 0, 0] + SVSHAPE1.order = [0,1,2] + SVSHAPE1.mode = 0b10 + SVSHAPE1.skip = 0b01 + SVSHAPE1.offset = 0 # experiment with different offset, here + SVSHAPE1.invxyz = [0,0,0] # inversion if desired + + # enumerate over the iterator function, getting new indices + shapes = list(iterate_indices(SVSHAPE0)), \ + list(iterate_indices(SVSHAPE1)) + for idx in range(len(shapes[0])): + l = shapes[0][idx] + r = shapes[1][idx] + (l_idx, lend) = l + (r_idx, rend) = r + print ("%d->%d:%d" % (idx, l_idx, r_idx), + "end", bin(lend)[2:], bin(rend)[2:]) + +# run the demo +if __name__ == '__main__': + demo() +``` + + [[!tag opf_rfc]] -- 2.30.2