From: Jacob Lifshay Date: Fri, 22 Apr 2022 21:53:06 +0000 (-0700) Subject: add reduce_only option to prefix_sum_ops X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c4357f7839c612a9c560a2bda14c34af6adee87b;p=nmutil.git add reduce_only option to prefix_sum_ops --- diff --git a/src/nmutil/prefix_sum.py b/src/nmutil/prefix_sum.py index 4aa89b2..a1ffe85 100644 --- a/src/nmutil/prefix_sum.py +++ b/src/nmutil/prefix_sum.py @@ -25,7 +25,7 @@ class Op: """row in the prefix-sum diagram""" -def prefix_sum_ops(item_count, *, work_efficient=False): +def prefix_sum_ops(item_count, *, work_efficient=False, reduce_only=False): """ Get the associative operations needed to compute a parallel prefix-sum of `item_count` items. @@ -45,6 +45,9 @@ def prefix_sum_ops(item_count, *, work_efficient=False): True if the algorithm used should be work-efficient -- has a larger depth (about twice as large) but does only `O(N)` operations total instead of `O(N*log(N))`. + reduce_only: bool + True if the work-efficient algorithm should stop after the initial + tree-reduction step. Returns: Iterable[Op] output associative operations. """ @@ -61,7 +64,7 @@ def prefix_sum_ops(item_count, *, work_efficient=False): yield Op(out=i, lhs=i - dist, rhs=i, row=row) dist <<= 1 row += 1 - if work_efficient: + if work_efficient and not reduce_only: # express all output items in terms of the computed partial sums. dist >>= 1 while dist >= 1: