Blender V4.3
BLI_task.hh
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#pragma once
6
10
11#ifdef WITH_TBB
12/* Quiet top level deprecation message, unrelated to API usage here. */
13# if defined(WIN32) && !defined(NOMINMAX)
14/* TBB includes Windows.h which will define min/max macros causing issues
15 * when we try to use std::min and std::max later on. */
16# define NOMINMAX
17# define TBB_MIN_MAX_CLEANUP
18# endif
19# include <tbb/blocked_range.h>
20# include <tbb/parallel_for.h>
21# include <tbb/parallel_for_each.h>
22# include <tbb/parallel_invoke.h>
23# include <tbb/parallel_reduce.h>
24# include <tbb/task_arena.h>
25# ifdef WIN32
26/* We cannot keep this defined, since other parts of the code deal with this on their own, leading
27 * to multiple define warnings unless we un-define this, however we can only undefine this if we
28 * were the ones that made the definition earlier. */
29# ifdef TBB_MIN_MAX_CLEANUP
30# undef NOMINMAX
31# endif
32# endif
33#endif
34
35#include "BLI_function_ref.hh"
36#include "BLI_index_range.hh"
37#include "BLI_lazy_threading.hh"
38#include "BLI_span.hh"
40#include "BLI_utildefines.h"
41
42namespace blender {
43
47struct GrainSize {
49
50 explicit constexpr GrainSize(const int64_t grain_size) : value(grain_size) {}
51};
52
53} // namespace blender
54
55namespace blender::threading {
56
57template<typename Range, typename Function>
58inline void parallel_for_each(Range &&range, const Function &function)
59{
60#ifdef WITH_TBB
61 tbb::parallel_for_each(range, function);
62#else
63 for (auto &&value : range) {
64 function(value);
65 }
66#endif
67}
68
69namespace detail {
71 int64_t grain_size,
72 FunctionRef<void(IndexRange)> function,
73 const TaskSizeHints &size_hints);
75} // namespace detail
76
94template<typename Function>
95inline void parallel_for(const IndexRange range,
96 const int64_t grain_size,
97 const Function &function,
98 const TaskSizeHints &size_hints = detail::TaskSizeHints_Static(1))
99{
100 if (range.is_empty()) {
101 return;
102 }
103 /* Invoking tbb for small workloads has a large overhead. */
104 if (use_single_thread(size_hints, range, grain_size)) {
105 function(range);
106 return;
107 }
108 detail::parallel_for_impl(range, grain_size, function, size_hints);
109}
110
115inline IndexRange align_sub_range(const IndexRange unaligned_range,
116 const int64_t alignment,
117 const IndexRange global_range)
118{
119 const int64_t global_begin = global_range.start();
120 const int64_t global_end = global_range.one_after_last();
121 const int64_t alignment_mask = ~(alignment - 1);
122
123 const int64_t unaligned_begin = unaligned_range.start();
124 const int64_t unaligned_end = unaligned_range.one_after_last();
125 const int64_t aligned_begin = std::max(global_begin, unaligned_begin & alignment_mask);
126 const int64_t aligned_end = unaligned_end == global_end ?
127 unaligned_end :
128 std::max(global_begin, unaligned_end & alignment_mask);
129 const IndexRange aligned_range = IndexRange::from_begin_end(aligned_begin, aligned_end);
130 return aligned_range;
131}
132
140template<typename Function>
141inline void parallel_for_aligned(const IndexRange range,
142 const int64_t grain_size,
143 const int64_t alignment,
144 const Function &function)
145{
146 parallel_for(range, grain_size, [&](const IndexRange unaligned_range) {
147 const IndexRange aligned_range = align_sub_range(unaligned_range, alignment, range);
148 function(aligned_range);
149 });
150}
151
152template<typename Value, typename Function, typename Reduction>
154 int64_t grain_size,
155 const Value &identity,
156 const Function &function,
157 const Reduction &reduction)
158{
159#ifdef WITH_TBB
160 if (range.size() >= grain_size) {
162 return tbb::parallel_reduce(
163 tbb::blocked_range<int64_t>(range.first(), range.one_after_last(), grain_size),
164 identity,
165 [&](const tbb::blocked_range<int64_t> &subrange, const Value &ident) {
166 return function(IndexRange(subrange.begin(), subrange.size()), ident);
167 },
168 reduction);
169 }
170#else
171 UNUSED_VARS(grain_size, reduction);
172#endif
173 return function(range, identity);
174}
175
176template<typename Value, typename Function, typename Reduction>
178 const int64_t grain_size,
179 const int64_t alignment,
180 const Value &identity,
181 const Function &function,
182 const Reduction &reduction)
183{
185 range,
186 grain_size,
187 identity,
188 [&](const IndexRange unaligned_range, const Value &ident) {
189 const IndexRange aligned_range = align_sub_range(unaligned_range, alignment, range);
190 function(aligned_range, ident);
191 },
192 reduction);
193}
194
199template<typename... Functions> inline void parallel_invoke(Functions &&...functions)
200{
201#ifdef WITH_TBB
202 tbb::parallel_invoke(std::forward<Functions>(functions)...);
203#else
204 (functions(), ...);
205#endif
206}
207
213template<typename... Functions>
214inline void parallel_invoke(const bool use_threading, Functions &&...functions)
215{
216 if (use_threading) {
218 parallel_invoke(std::forward<Functions>(functions)...);
219 }
220 else {
221 (functions(), ...);
222 }
223}
224
226template<typename Function> inline void isolate_task(const Function &function)
227{
228#ifdef WITH_TBB
230 tbb::this_task_arena::isolate(function);
231#else
232 function();
233#endif
234}
235
242template<typename Function>
243inline void memory_bandwidth_bound_task(const int64_t approximate_bytes_touched,
244 const Function &function)
245{
246 /* Don't limit threading when all touched memory can stay in the CPU cache, because there a much
247 * higher memory bandwidth is available compared to accessing RAM. This value is supposed to be
248 * on the order of the L3 cache size. Accessing that value is not quite straight forward and even
249 * if it was, it's not clear if using the exact cache size would be beneficial because there is
250 * often more stuff going on the CPU at the same time. */
251 if (approximate_bytes_touched <= 8 * 1024 * 1024) {
252 function();
253 return;
254 }
256}
257
258} // namespace blender::threading
#define UNUSED_VARS(...)
Group Output data from inside of a node group A color picker Mix two input colors RGB to Convert a color s luminance to a grayscale value Generate a normal vector and a dot product Brightness Control the brightness and contrast of the input color Vector Map input vector components with curves Camera Retrieve information about the camera and how it relates to the current shading point s position Clamp a value between a minimum and a maximum Vector Perform vector math operation Invert Invert a producing a negative Combine Generate a color from its and blue Hue Saturation Value
constexpr int64_t first() const
constexpr int64_t one_after_last() const
constexpr int64_t size() const
constexpr bool is_empty() const
static constexpr IndexRange from_begin_end(const int64_t begin, const int64_t end)
constexpr int64_t start() const
void parallel_for_impl(IndexRange range, int64_t grain_size, FunctionRef< void(IndexRange)> function, const TaskSizeHints &size_hints)
void memory_bandwidth_bound_task_impl(FunctionRef< void()> function)
void isolate_task(const Function &function)
Definition BLI_task.hh:226
void parallel_invoke(Functions &&...functions)
Definition BLI_task.hh:199
void parallel_for_each(Range &&range, const Function &function)
Definition BLI_task.hh:58
void parallel_for(const IndexRange range, const int64_t grain_size, const Function &function, const TaskSizeHints &size_hints=detail::TaskSizeHints_Static(1))
Definition BLI_task.hh:95
bool use_single_thread(const TaskSizeHints &size_hints, const IndexRange range, const int64_t threshold)
void memory_bandwidth_bound_task(const int64_t approximate_bytes_touched, const Function &function)
Definition BLI_task.hh:243
void parallel_for_aligned(const IndexRange range, const int64_t grain_size, const int64_t alignment, const Function &function)
Definition BLI_task.hh:141
Value parallel_reduce(IndexRange range, int64_t grain_size, const Value &identity, const Function &function, const Reduction &reduction)
Definition BLI_task.hh:153
Value parallel_reduce_aligned(const IndexRange range, const int64_t grain_size, const int64_t alignment, const Value &identity, const Function &function, const Reduction &reduction)
Definition BLI_task.hh:177
IndexRange align_sub_range(const IndexRange unaligned_range, const int64_t alignment, const IndexRange global_range)
Definition BLI_task.hh:115
__int64 int64_t
Definition stdint.h:89
constexpr GrainSize(const int64_t grain_size)
Definition BLI_task.hh:50