From c4357f7839c612a9c560a2bda14c34af6adee87b Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Fri, 22 Apr 2022 14:53:06 -0700 Subject: [PATCH] add reduce_only option to prefix_sum_ops --- src/nmutil/prefix_sum.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/nmutil/prefix_sum.py b/src/nmutil/prefix_sum.py index 4aa89b2..a1ffe85 100644 --- a/src/nmutil/prefix_sum.py +++ b/src/nmutil/prefix_sum.py @@ -25,7 +25,7 @@ class Op: """row in the prefix-sum diagram""" -def prefix_sum_ops(item_count, *, work_efficient=False): +def prefix_sum_ops(item_count, *, work_efficient=False, reduce_only=False): """ Get the associative operations needed to compute a parallel prefix-sum of `item_count` items. @@ -45,6 +45,9 @@ def prefix_sum_ops(item_count, *, work_efficient=False): True if the algorithm used should be work-efficient -- has a larger depth (about twice as large) but does only `O(N)` operations total instead of `O(N*log(N))`. + reduce_only: bool + True if the work-efficient algorithm should stop after the initial + tree-reduction step. Returns: Iterable[Op] output associative operations. """ @@ -61,7 +64,7 @@ def prefix_sum_ops(item_count, *, work_efficient=False): yield Op(out=i, lhs=i - dist, rhs=i, row=row) dist <<= 1 row += 1 - if work_efficient: + if work_efficient and not reduce_only: # express all output items in terms of the computed partial sums. dist >>= 1 while dist >= 1: -- 2.30.2