Blender  V2.93
device_multi.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <sstream>
18 #include <stdlib.h>
19 
20 #include "bvh/bvh_multi.h"
21 
22 #include "device/device.h"
23 #include "device/device_intern.h"
24 #include "device/device_network.h"
25 
26 #include "render/buffers.h"
27 #include "render/geometry.h"
28 
29 #include "util/util_foreach.h"
30 #include "util/util_list.h"
31 #include "util/util_logging.h"
32 #include "util/util_map.h"
33 #include "util/util_time.h"
34 
36 
37 class MultiDevice : public Device {
38  public:
39  struct SubDevice {
42  map<device_ptr, device_ptr> ptr_map;
44  };
45 
46  list<SubDevice> devices, denoising_devices;
51 
53  : Device(info, stats, profiler, background_),
54  unique_key(1),
56  {
57  foreach (DeviceInfo &subinfo, info.multi_devices) {
58  /* Always add CPU devices at the back since GPU devices can change
59  * host memory pointers, which CPU uses as device pointer. */
60  SubDevice *sub;
61  if (subinfo.type == DEVICE_CPU) {
62  devices.emplace_back();
63  sub = &devices.back();
64  }
65  else {
66  devices.emplace_front();
67  sub = &devices.front();
68  }
69 
70  /* The pointer to 'sub->stats' will stay valid even after new devices
71  * are added, since 'devices' is a linked list. */
72  sub->device = Device::create(subinfo, sub->stats, profiler, background);
73  }
74 
75  foreach (DeviceInfo &subinfo, info.denoising_devices) {
76  denoising_devices.emplace_front();
77  SubDevice *sub = &denoising_devices.front();
78 
79  sub->device = Device::create(subinfo, sub->stats, profiler, background);
80  }
81 
82  /* Build a list of peer islands for the available render devices */
83  foreach (SubDevice &sub, devices) {
84  /* First ensure that every device is in at least once peer island */
85  if (sub.peer_island_index < 0) {
86  peer_islands.emplace_back();
87  sub.peer_island_index = (int)peer_islands.size() - 1;
88  peer_islands[sub.peer_island_index].push_back(&sub);
89  }
90 
91  if (!info.has_peer_memory) {
92  continue;
93  }
94 
95  /* Second check peer access between devices and fill up the islands accordingly */
96  foreach (SubDevice &peer_sub, devices) {
97  if (peer_sub.peer_island_index < 0 &&
98  peer_sub.device->info.type == sub.device->info.type &&
99  peer_sub.device->check_peer_access(sub.device)) {
100  peer_sub.peer_island_index = sub.peer_island_index;
101  peer_islands[sub.peer_island_index].push_back(&peer_sub);
102  }
103  }
104  }
105 
106  /* Try to re-use memory when denoising and render devices use the same physical devices
107  * (e.g. OptiX denoising and CUDA rendering device pointing to the same GPU).
108  * Ordering has to match as well, so that 'DeviceTask::split' behaves consistent. */
110  (devices.size() == denoising_devices.size());
112  for (list<SubDevice>::iterator device_it = devices.begin(),
113  denoising_device_it = denoising_devices.begin();
114  device_it != devices.end() && denoising_device_it != denoising_devices.end();
115  ++device_it, ++denoising_device_it) {
116  const DeviceInfo &info = device_it->device->info;
117  const DeviceInfo &denoising_info = denoising_device_it->device->info;
118  if ((info.type != DEVICE_CUDA && info.type != DEVICE_OPTIX) ||
119  (denoising_info.type != DEVICE_CUDA && denoising_info.type != DEVICE_OPTIX) ||
120  info.num != denoising_info.num) {
122  break;
123  }
124  }
125  }
126 
127 #ifdef WITH_NETWORK
128  /* try to add network devices */
129  ServerDiscovery discovery(true);
130  time_sleep(1.0);
131 
132  vector<string> servers = discovery.get_server_list();
133 
134  foreach (string &server, servers) {
135  Device *device = device_network_create(info, stats, profiler, server.c_str());
136  if (device)
137  devices.push_back(SubDevice(device));
138  }
139 #endif
140  }
141 
143  {
144  foreach (SubDevice &sub, devices)
145  delete sub.device;
146  foreach (SubDevice &sub, denoising_devices)
147  delete sub.device;
148  }
149 
150  const string &error_message() override
151  {
152  error_msg.clear();
153 
154  foreach (SubDevice &sub, devices)
155  error_msg += sub.device->error_message();
156  foreach (SubDevice &sub, denoising_devices)
157  error_msg += sub.device->error_message();
158 
159  return error_msg;
160  }
161 
162  virtual bool show_samples() const override
163  {
164  if (devices.size() > 1) {
165  return false;
166  }
167  return devices.front().device->show_samples();
168  }
169 
170  virtual BVHLayoutMask get_bvh_layout_mask() const override
171  {
172  BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
173  BVHLayoutMask bvh_layout_mask_all = BVH_LAYOUT_NONE;
174  foreach (const SubDevice &sub_device, devices) {
175  BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask();
176  bvh_layout_mask &= device_bvh_layout_mask;
177  bvh_layout_mask_all |= device_bvh_layout_mask;
178  }
179 
180  /* With multiple OptiX devices, every device needs its own acceleration structure */
181  if (bvh_layout_mask == BVH_LAYOUT_OPTIX) {
182  return BVH_LAYOUT_MULTI_OPTIX;
183  }
184 
185  /* When devices do not share a common BVH layout, fall back to creating one for each */
186  const BVHLayoutMask BVH_LAYOUT_OPTIX_EMBREE = (BVH_LAYOUT_OPTIX | BVH_LAYOUT_EMBREE);
187  if ((bvh_layout_mask_all & BVH_LAYOUT_OPTIX_EMBREE) == BVH_LAYOUT_OPTIX_EMBREE) {
189  }
190 
191  return bvh_layout_mask;
192  }
193 
194  bool load_kernels(const DeviceRequestedFeatures &requested_features) override
195  {
196  foreach (SubDevice &sub, devices)
197  if (!sub.device->load_kernels(requested_features))
198  return false;
199 
200  use_denoising = requested_features.use_denoising;
201  if (requested_features.use_denoising) {
202  /* Only need denoising feature, everything else is unused. */
203  DeviceRequestedFeatures denoising_features;
204  denoising_features.use_denoising = true;
205  foreach (SubDevice &sub, denoising_devices)
206  if (!sub.device->load_kernels(denoising_features))
207  return false;
208  }
209 
210  return true;
211  }
212 
213  bool wait_for_availability(const DeviceRequestedFeatures &requested_features) override
214  {
215  foreach (SubDevice &sub, devices)
216  if (!sub.device->wait_for_availability(requested_features))
217  return false;
218 
219  if (requested_features.use_denoising) {
220  foreach (SubDevice &sub, denoising_devices)
221  if (!sub.device->wait_for_availability(requested_features))
222  return false;
223  }
224 
225  return true;
226  }
227 
229  {
231 
232  foreach (SubDevice &sub, devices) {
234  switch (subresult) {
237  return subresult;
238 
241  break;
242  }
243  }
244 
245  return result;
246  }
247 
248  void build_bvh(BVH *bvh, Progress &progress, bool refit) override
249  {
250  /* Try to build and share a single acceleration structure, if possible */
252  devices.back().device->build_bvh(bvh, progress, refit);
253  return;
254  }
255 
256  assert(bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX ||
258 
259  BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh);
260  bvh_multi->sub_bvhs.resize(devices.size());
261 
262  vector<BVHMulti *> geom_bvhs;
263  geom_bvhs.reserve(bvh->geometry.size());
264  foreach (Geometry *geom, bvh->geometry) {
265  geom_bvhs.push_back(static_cast<BVHMulti *>(geom->bvh));
266  }
267 
268  /* Broadcast acceleration structure build to all render devices */
269  size_t i = 0;
270  foreach (SubDevice &sub, devices) {
271  /* Change geometry BVH pointers to the sub BVH */
272  for (size_t k = 0; k < bvh->geometry.size(); ++k) {
273  bvh->geometry[k]->bvh = geom_bvhs[k]->sub_bvhs[i];
274  }
275 
276  if (!bvh_multi->sub_bvhs[i]) {
277  BVHParams params = bvh->params;
279  params.bvh_layout = BVH_LAYOUT_OPTIX;
281  params.bvh_layout = sub.device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX :
283 
284  /* Skip building a bottom level acceleration structure for non-instanced geometry on Embree
285  * (since they are put into the top level directly, see bvh_embree.cpp) */
286  if (!params.top_level && params.bvh_layout == BVH_LAYOUT_EMBREE &&
287  !bvh->geometry[0]->is_instanced()) {
288  i++;
289  continue;
290  }
291 
292  bvh_multi->sub_bvhs[i] = BVH::create(params, bvh->geometry, bvh->objects, sub.device);
293  }
294 
295  sub.device->build_bvh(bvh_multi->sub_bvhs[i], progress, refit);
296  i++;
297  }
298 
299  /* Change geometry BVH pointers back to the multi BVH. */
300  for (size_t k = 0; k < bvh->geometry.size(); ++k) {
301  bvh->geometry[k]->bvh = geom_bvhs[k];
302  }
303  }
304 
305  virtual void *osl_memory() override
306  {
307  if (devices.size() > 1) {
308  return NULL;
309  }
310  return devices.front().device->osl_memory();
311  }
312 
313  bool is_resident(device_ptr key, Device *sub_device) override
314  {
315  foreach (SubDevice &sub, devices) {
316  if (sub.device == sub_device) {
317  return find_matching_mem_device(key, sub)->device == sub_device;
318  }
319  }
320  return false;
321  }
322 
324  {
325  assert(key != 0 && (sub.peer_island_index >= 0 || sub.ptr_map.find(key) != sub.ptr_map.end()));
326 
327  /* Get the memory owner of this key (first try current device, then peer devices) */
328  SubDevice *owner_sub = &sub;
329  if (owner_sub->ptr_map.find(key) == owner_sub->ptr_map.end()) {
330  foreach (SubDevice *island_sub, peer_islands[sub.peer_island_index]) {
331  if (island_sub != owner_sub &&
332  island_sub->ptr_map.find(key) != island_sub->ptr_map.end()) {
333  owner_sub = island_sub;
334  }
335  }
336  }
337  return owner_sub;
338  }
339 
341  {
342  assert(!island.empty());
343 
344  /* Get the memory owner of this key or the device with the lowest memory usage when new */
345  SubDevice *owner_sub = island.front();
346  foreach (SubDevice *island_sub, island) {
347  if (key ? (island_sub->ptr_map.find(key) != island_sub->ptr_map.end()) :
348  (island_sub->device->stats.mem_used < owner_sub->device->stats.mem_used)) {
349  owner_sub = island_sub;
350  }
351  }
352  return owner_sub;
353  }
354 
356  {
357  return find_matching_mem_device(key, sub)->ptr_map[key];
358  }
359 
360  void mem_alloc(device_memory &mem) override
361  {
362  device_ptr key = unique_key++;
363 
364  if (mem.type == MEM_PIXELS) {
365  /* Always allocate pixels memory on all devices
366  * This is necessary to ensure PBOs are registered everywhere, which FILM_CONVERT uses */
367  foreach (SubDevice &sub, devices) {
368  mem.device = sub.device;
369  mem.device_pointer = 0;
370  mem.device_size = 0;
371 
372  sub.device->mem_alloc(mem);
373  sub.ptr_map[key] = mem.device_pointer;
374  }
375  }
376  else {
377  assert(mem.type == MEM_READ_ONLY || mem.type == MEM_READ_WRITE ||
378  mem.type == MEM_DEVICE_ONLY);
379  /* The remaining memory types can be distributed across devices */
380  foreach (const vector<SubDevice *> &island, peer_islands) {
381  SubDevice *owner_sub = find_suitable_mem_device(key, island);
382  mem.device = owner_sub->device;
383  mem.device_pointer = 0;
384  mem.device_size = 0;
385 
386  owner_sub->device->mem_alloc(mem);
387  owner_sub->ptr_map[key] = mem.device_pointer;
388  }
389  }
390 
391  mem.device = this;
392  mem.device_pointer = key;
394  }
395 
396  void mem_copy_to(device_memory &mem) override
397  {
398  device_ptr existing_key = mem.device_pointer;
399  device_ptr key = (existing_key) ? existing_key : unique_key++;
400  size_t existing_size = mem.device_size;
401 
402  /* The tile buffers are allocated on each device (see below), so copy to all of them */
403  if (strcmp(mem.name, "RenderBuffers") == 0 && use_denoising) {
404  foreach (SubDevice &sub, devices) {
405  mem.device = sub.device;
406  mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
407  mem.device_size = existing_size;
408 
409  sub.device->mem_copy_to(mem);
410  sub.ptr_map[key] = mem.device_pointer;
411  }
412  }
413  else {
414  foreach (const vector<SubDevice *> &island, peer_islands) {
415  SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
416  mem.device = owner_sub->device;
417  mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
418  mem.device_size = existing_size;
419 
420  owner_sub->device->mem_copy_to(mem);
421  owner_sub->ptr_map[key] = mem.device_pointer;
422 
423  if (mem.type == MEM_GLOBAL || mem.type == MEM_TEXTURE) {
424  /* Need to create texture objects and update pointer in kernel globals on all devices */
425  foreach (SubDevice *island_sub, island) {
426  if (island_sub != owner_sub) {
427  island_sub->device->mem_copy_to(mem);
428  }
429  }
430  }
431  }
432  }
433 
434  mem.device = this;
435  mem.device_pointer = key;
436  stats.mem_alloc(mem.device_size - existing_size);
437  }
438 
439  void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override
440  {
441  device_ptr key = mem.device_pointer;
442  int i = 0, sub_h = h / devices.size();
443 
444  foreach (SubDevice &sub, devices) {
445  int sy = y + i * sub_h;
446  int sh = (i == (int)devices.size() - 1) ? h - sub_h * i : sub_h;
447 
448  SubDevice *owner_sub = find_matching_mem_device(key, sub);
449  mem.device = owner_sub->device;
450  mem.device_pointer = owner_sub->ptr_map[key];
451 
452  owner_sub->device->mem_copy_from(mem, sy, w, sh, elem);
453  i++;
454  }
455 
456  mem.device = this;
457  mem.device_pointer = key;
458  }
459 
460  void mem_zero(device_memory &mem) override
461  {
462  device_ptr existing_key = mem.device_pointer;
463  device_ptr key = (existing_key) ? existing_key : unique_key++;
464  size_t existing_size = mem.device_size;
465 
466  /* This is a hack to only allocate the tile buffers on denoising devices
467  * Similarly the tile buffers also need to be allocated separately on all devices so any
468  * overlap rendered for denoising does not interfere with each other */
469  if (strcmp(mem.name, "RenderBuffers") == 0 && use_denoising) {
470  vector<device_ptr> device_pointers;
471  device_pointers.reserve(devices.size());
472 
473  foreach (SubDevice &sub, devices) {
474  mem.device = sub.device;
475  mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
476  mem.device_size = existing_size;
477 
478  sub.device->mem_zero(mem);
479  sub.ptr_map[key] = mem.device_pointer;
480 
481  device_pointers.push_back(mem.device_pointer);
482  }
483  foreach (SubDevice &sub, denoising_devices) {
485  sub.ptr_map[key] = device_pointers.front();
486  device_pointers.erase(device_pointers.begin());
487  }
488  else {
489  mem.device = sub.device;
490  mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
491  mem.device_size = existing_size;
492 
493  sub.device->mem_zero(mem);
494  sub.ptr_map[key] = mem.device_pointer;
495  }
496  }
497  }
498  else {
499  foreach (const vector<SubDevice *> &island, peer_islands) {
500  SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
501  mem.device = owner_sub->device;
502  mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
503  mem.device_size = existing_size;
504 
505  owner_sub->device->mem_zero(mem);
506  owner_sub->ptr_map[key] = mem.device_pointer;
507  }
508  }
509 
510  mem.device = this;
511  mem.device_pointer = key;
512  stats.mem_alloc(mem.device_size - existing_size);
513  }
514 
515  void mem_free(device_memory &mem) override
516  {
517  device_ptr key = mem.device_pointer;
518  size_t existing_size = mem.device_size;
519 
520  /* Free memory that was allocated for all devices (see above) on each device */
521  if (mem.type == MEM_PIXELS || (strcmp(mem.name, "RenderBuffers") == 0 && use_denoising)) {
522  foreach (SubDevice &sub, devices) {
523  mem.device = sub.device;
524  mem.device_pointer = sub.ptr_map[key];
525  mem.device_size = existing_size;
526 
527  sub.device->mem_free(mem);
528  sub.ptr_map.erase(sub.ptr_map.find(key));
529  }
530  foreach (SubDevice &sub, denoising_devices) {
532  sub.ptr_map.erase(key);
533  }
534  else {
535  mem.device = sub.device;
536  mem.device_pointer = sub.ptr_map[key];
537  mem.device_size = existing_size;
538 
539  sub.device->mem_free(mem);
540  sub.ptr_map.erase(sub.ptr_map.find(key));
541  }
542  }
543  }
544  else {
545  foreach (const vector<SubDevice *> &island, peer_islands) {
546  SubDevice *owner_sub = find_matching_mem_device(key, *island.front());
547  mem.device = owner_sub->device;
548  mem.device_pointer = owner_sub->ptr_map[key];
549  mem.device_size = existing_size;
550 
551  owner_sub->device->mem_free(mem);
552  owner_sub->ptr_map.erase(owner_sub->ptr_map.find(key));
553 
554  if (mem.type == MEM_TEXTURE) {
555  /* Free texture objects on all devices */
556  foreach (SubDevice *island_sub, island) {
557  if (island_sub != owner_sub) {
558  island_sub->device->mem_free(mem);
559  }
560  }
561  }
562  }
563  }
564 
565  mem.device = this;
566  mem.device_pointer = 0;
567  mem.device_size = 0;
568  stats.mem_free(existing_size);
569  }
570 
571  void const_copy_to(const char *name, void *host, size_t size) override
572  {
573  foreach (SubDevice &sub, devices)
574  sub.device->const_copy_to(name, host, size);
575  }
576 
578  int y,
579  int w,
580  int h,
581  int width,
582  int height,
583  int dx,
584  int dy,
585  int dw,
586  int dh,
587  bool transparent,
588  const DeviceDrawParams &draw_params) override
589  {
590  assert(rgba.type == MEM_PIXELS);
591 
592  device_ptr key = rgba.device_pointer;
593  int i = 0, sub_h = h / devices.size();
594  int sub_height = height / devices.size();
595 
596  foreach (SubDevice &sub, devices) {
597  int sy = y + i * sub_h;
598  int sh = (i == (int)devices.size() - 1) ? h - sub_h * i : sub_h;
599  int sheight = (i == (int)devices.size() - 1) ? height - sub_height * i : sub_height;
600  int sdy = dy + i * sub_height;
601  /* adjust math for w/width */
602 
603  rgba.device_pointer = sub.ptr_map[key];
604  sub.device->draw_pixels(
605  rgba, sy, w, sh, width, sheight, dx, sdy, dw, dh, transparent, draw_params);
606  i++;
607  }
608 
609  rgba.device_pointer = key;
610  }
611 
612  void map_tile(Device *sub_device, RenderTile &tile) override
613  {
614  if (!tile.buffer) {
615  return;
616  }
617 
618  foreach (SubDevice &sub, devices) {
619  if (sub.device == sub_device) {
620  tile.buffer = find_matching_mem(tile.buffer, sub);
621  return;
622  }
623  }
624 
625  foreach (SubDevice &sub, denoising_devices) {
626  if (sub.device == sub_device) {
627  tile.buffer = sub.ptr_map[tile.buffer];
628  return;
629  }
630  }
631  }
632 
633  int device_number(Device *sub_device) override
634  {
635  int i = 0;
636 
637  foreach (SubDevice &sub, devices) {
638  if (sub.device == sub_device)
639  return i;
640  i++;
641  }
642 
643  foreach (SubDevice &sub, denoising_devices) {
644  if (sub.device == sub_device)
645  return i;
646  i++;
647  }
648 
649  return -1;
650  }
651 
652  void map_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors) override
653  {
654  for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
655  RenderTile &tile = neighbors.tiles[i];
656 
657  if (!tile.buffers) {
658  continue;
659  }
660 
661  device_vector<float> &mem = tile.buffers->buffer;
662  tile.buffer = mem.device_pointer;
663 
665  /* Skip unnecessary copies in viewport mode (buffer covers the
666  * whole image), but still need to fix up the tile device pointer. */
667  map_tile(sub_device, tile);
668  continue;
669  }
670 
671  /* If the tile was rendered on another device, copy its memory to
672  * to the current device now, for the duration of the denoising task.
673  * Note that this temporarily modifies the RenderBuffers and calls
674  * the device, so this function is not thread safe. */
675  if (mem.device != sub_device) {
676  /* Only copy from device to host once. This is faster, but
677  * also required for the case where a CPU thread is denoising
678  * a tile rendered on the GPU. In that case we have to avoid
679  * overwriting the buffer being de-noised by the CPU thread. */
680  if (!tile.buffers->map_neighbor_copied) {
681  tile.buffers->map_neighbor_copied = true;
682  mem.copy_from_device();
683  }
684 
685  if (mem.device == this) {
686  /* Can re-use memory if tile is already allocated on the sub device. */
687  map_tile(sub_device, tile);
688  mem.swap_device(sub_device, mem.device_size, tile.buffer);
689  }
690  else {
691  mem.swap_device(sub_device, 0, 0);
692  }
693 
694  mem.copy_to_device();
695 
696  tile.buffer = mem.device_pointer;
697  tile.device_size = mem.device_size;
698 
699  mem.restore_device();
700  }
701  }
702  }
703 
704  void unmap_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors) override
705  {
706  RenderTile &target_tile = neighbors.target;
707  device_vector<float> &mem = target_tile.buffers->buffer;
708 
710  return;
711  }
712 
713  /* Copy denoised result back to the host. */
714  mem.swap_device(sub_device, target_tile.device_size, target_tile.buffer);
715  mem.copy_from_device();
716  mem.restore_device();
717 
718  /* Copy denoised result to the original device. */
719  mem.copy_to_device();
720 
721  for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
722  RenderTile &tile = neighbors.tiles[i];
723  if (!tile.buffers) {
724  continue;
725  }
726 
727  device_vector<float> &mem = tile.buffers->buffer;
728 
729  if (mem.device != sub_device && mem.device != this) {
730  /* Free up memory again if it was allocated for the copy above. */
731  mem.swap_device(sub_device, tile.device_size, tile.buffer);
732  sub_device->mem_free(mem);
733  mem.restore_device();
734  }
735  }
736  }
737 
739  {
740  int total_tasks = 0;
741  list<DeviceTask> tasks;
742  task.split(tasks, devices.size());
743  foreach (SubDevice &sub, devices) {
744  if (!tasks.empty()) {
745  DeviceTask subtask = tasks.front();
746  tasks.pop_front();
747 
748  total_tasks += sub.device->get_split_task_count(subtask);
749  }
750  }
751  return total_tasks;
752  }
753 
754  void task_add(DeviceTask &task) override
755  {
756  list<SubDevice> task_devices = devices;
757  if (!denoising_devices.empty()) {
758  if (task.type == DeviceTask::DENOISE_BUFFER) {
759  /* Denoising tasks should be redirected to the denoising devices entirely. */
760  task_devices = denoising_devices;
761  }
762  else if (task.type == DeviceTask::RENDER && (task.tile_types & RenderTile::DENOISE)) {
763  const uint tile_types = task.tile_types;
764  /* For normal rendering tasks only redirect the denoising part to the denoising devices.
765  * Do not need to split the task here, since they all run through 'acquire_tile'. */
766  task.tile_types = RenderTile::DENOISE;
767  foreach (SubDevice &sub, denoising_devices) {
768  sub.device->task_add(task);
769  }
770  /* Rendering itself should still be executed on the rendering devices. */
771  task.tile_types = tile_types ^ RenderTile::DENOISE;
772  }
773  }
774 
775  list<DeviceTask> tasks;
776  task.split(tasks, task_devices.size());
777 
778  foreach (SubDevice &sub, task_devices) {
779  if (!tasks.empty()) {
780  DeviceTask subtask = tasks.front();
781  tasks.pop_front();
782 
783  if (task.buffer)
784  subtask.buffer = find_matching_mem(task.buffer, sub);
785  if (task.rgba_byte)
786  subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
787  if (task.rgba_half)
788  subtask.rgba_half = sub.ptr_map[task.rgba_half];
789  if (task.shader_input)
790  subtask.shader_input = find_matching_mem(task.shader_input, sub);
791  if (task.shader_output)
792  subtask.shader_output = find_matching_mem(task.shader_output, sub);
793 
794  sub.device->task_add(subtask);
795 
796  if (task.buffers && task.buffers->buffer.device == this) {
797  /* Synchronize access to RenderBuffers, since 'map_neighbor_tiles' is not thread-safe. */
798  sub.device->task_wait();
799  }
800  }
801  }
802  }
803 
804  void task_wait() override
805  {
806  foreach (SubDevice &sub, devices)
807  sub.device->task_wait();
808  foreach (SubDevice &sub, denoising_devices)
809  sub.device->task_wait();
810  }
811 
812  void task_cancel() override
813  {
814  foreach (SubDevice &sub, devices)
815  sub.device->task_cancel();
816  foreach (SubDevice &sub, denoising_devices)
817  sub.device->task_cancel();
818  }
819 };
820 
821 Device *device_multi_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
822 {
823  return new MultiDevice(info, stats, profiler, background);
824 }
825 
unsigned int uint
Definition: BLI_sys_types.h:83
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei width
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei height
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:119
int BVHLayoutMask
Definition: bvh_params.h:39
vector< BVH * > sub_bvhs
Definition: bvh_multi.h:27
BVHLayout bvh_layout
Definition: bvh_params.h:70
Definition: bvh/bvh.h:80
vector< Geometry * > geometry
Definition: bvh/bvh.h:83
static BVH * create(const BVHParams &params, const vector< Geometry * > &geometry, const vector< Object * > &objects, Device *device)
Definition: bvh.cpp:85
BVHParams params
Definition: bvh/bvh.h:82
vector< Object * > objects
Definition: bvh/bvh.h:84
int num
Definition: device.h:77
vector< DeviceInfo > denoising_devices
Definition: device.h:91
vector< DeviceInfo > multi_devices
Definition: device.h:90
bool has_peer_memory
Definition: device.h:87
DeviceType type
Definition: device.h:74
device_ptr rgba_byte
Definition: device_task.h:136
device_ptr rgba_half
Definition: device_task.h:137
device_ptr shader_output
Definition: device_task.h:144
device_ptr shader_input
Definition: device_task.h:143
device_ptr buffer
Definition: device_task.h:138
Definition: device.h:293
static Device * create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background=true)
Definition: device.cpp:382
virtual void draw_pixels(device_memory &mem, int y, int w, int h, int width, int height, int dx, int dy, int dw, int dh, bool transparent, const DeviceDrawParams &draw_params)
Definition: device.cpp:234
virtual BVHLayoutMask get_bvh_layout_mask() const =0
virtual DeviceKernelStatus get_active_kernel_switch_state()
Definition: device.h:397
virtual void const_copy_to(const char *name, void *host, size_t size)=0
virtual void mem_zero(device_memory &mem)=0
bool background
Definition: device.h:314
string error_msg
Definition: device.h:315
virtual void task_wait()=0
virtual bool wait_for_availability(const DeviceRequestedFeatures &)
Definition: device.h:389
virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem)=0
Profiler & profiler
Definition: device.h:362
Stats & stats
Definition: device.h:361
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
Definition: device.cpp:369
virtual void task_cancel()=0
virtual int get_split_task_count(DeviceTask &)
Definition: device.h:403
virtual const string & error_message()
Definition: device.h:338
virtual bool check_peer_access(Device *)
Definition: device.h:458
virtual void mem_free(device_memory &mem)=0
virtual void mem_copy_to(device_memory &mem)=0
DeviceInfo info
Definition: device.h:337
virtual void task_add(DeviceTask &task)=0
virtual void mem_alloc(device_memory &mem)=0
virtual bool load_kernels(const DeviceRequestedFeatures &)
Definition: device.h:380
BVH * bvh
Definition: geometry.h:100
list< SubDevice > devices
void unmap_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors) override
bool is_resident(device_ptr key, Device *sub_device) override
void map_tile(Device *sub_device, RenderTile &tile) override
const string & error_message() override
int device_number(Device *sub_device) override
bool wait_for_availability(const DeviceRequestedFeatures &requested_features) override
bool load_kernels(const DeviceRequestedFeatures &requested_features) override
virtual BVHLayoutMask get_bvh_layout_mask() const override
void mem_copy_to(device_memory &mem) override
void mem_free(device_memory &mem) override
bool matching_rendering_and_denoising_devices
void const_copy_to(const char *name, void *host, size_t size) override
DeviceKernelStatus get_active_kernel_switch_state() override
virtual bool show_samples() const override
int get_split_task_count(DeviceTask &task) override
SubDevice * find_matching_mem_device(device_ptr key, SubDevice &sub)
vector< vector< SubDevice * > > peer_islands
void mem_zero(device_memory &mem) override
void build_bvh(BVH *bvh, Progress &progress, bool refit) override
void task_wait() override
void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override
void map_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors) override
bool use_denoising
MultiDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
void task_add(DeviceTask &task) override
SubDevice * find_suitable_mem_device(device_ptr key, const vector< SubDevice * > &island)
void mem_alloc(device_memory &mem) override
void task_cancel() override
list< SubDevice > denoising_devices
void draw_pixels(device_memory &rgba, int y, int w, int h, int width, int height, int dx, int dy, int dw, int dh, bool transparent, const DeviceDrawParams &draw_params) override
device_ptr unique_key
virtual void * osl_memory() override
device_ptr find_matching_mem(device_ptr key, SubDevice &sub)
device_vector< float > buffer
Definition: buffers.h:80
bool map_neighbor_copied
Definition: buffers.h:81
static const int SIZE
Definition: buffers.h:173
RenderTile target
Definition: buffers.h:177
RenderTile tiles[SIZE]
Definition: buffers.h:176
RenderBuffers * buffers
Definition: buffers.h:152
int device_size
Definition: buffers.h:147
device_ptr buffer
Definition: buffers.h:146
size_t mem_used
Definition: util_stats.h:48
void mem_free(size_t size)
Definition: util_stats.h:42
void mem_alloc(size_t size)
Definition: util_stats.h:36
const char * name
MemoryType type
device_ptr device_pointer
Device * device
size_t device_size
void swap_device(Device *new_device, size_t new_device_size, device_ptr new_device_ptr)
void copy_from_device()
void copy_to_device()
DeviceKernelStatus
Definition: device.h:63
@ DEVICE_KERNEL_UNKNOWN
Definition: device.h:67
@ DEVICE_KERNEL_USING_FEATURE_KERNEL
Definition: device.h:65
@ DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE
Definition: device.h:64
@ DEVICE_KERNEL_FEATURE_KERNEL_INVALID
Definition: device.h:66
@ DEVICE_CUDA
Definition: device.h:47
@ DEVICE_CPU
Definition: device.h:45
@ DEVICE_OPTIX
Definition: device.h:50
Device * device_network_create(DeviceInfo &info, Stats &stats, Profiler &profiler, const char *address)
@ MEM_PIXELS
Definition: device_memory.h:41
@ MEM_GLOBAL
Definition: device_memory.h:39
@ MEM_TEXTURE
Definition: device_memory.h:40
@ MEM_READ_WRITE
Definition: device_memory.h:37
@ MEM_DEVICE_ONLY
Definition: device_memory.h:38
@ MEM_READ_ONLY
Definition: device_memory.h:36
Device * device_multi_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
#define CCL_NAMESPACE_END
@ BVH_LAYOUT_OPTIX
@ BVH_LAYOUT_NONE
@ BVH_LAYOUT_EMBREE
@ BVH_LAYOUT_MULTI_OPTIX
@ BVH_LAYOUT_BVH2
@ BVH_LAYOUT_ALL
@ BVH_LAYOUT_MULTI_OPTIX_EMBREE
struct blender::compositor::@172::@174 task
map< device_ptr, device_ptr > ptr_map
void time_sleep(double t)
Definition: util_time.cpp:57
uint64_t device_ptr
Definition: util_types.h:62