Blender  V2.93
device_cuda.h
Go to the documentation of this file.
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifdef WITH_CUDA
18 
19 # include "device/device.h"
20 # include "device/device_denoising.h"
22 
23 # include "util/util_map.h"
24 # include "util/util_task.h"
25 
26 # ifdef WITH_CUDA_DYNLOAD
27 # include "cuew.h"
28 # else
29 # include "util/util_opengl.h"
30 # include <cuda.h>
31 # include <cudaGL.h>
32 # endif
33 
35 
36 class CUDASplitKernel;
37 
38 class CUDADevice : public Device {
39 
40  friend class CUDASplitKernelFunction;
41  friend class CUDASplitKernel;
42  friend class CUDAContextScope;
43 
44  public:
46  CUdevice cuDevice;
47  CUcontext cuContext;
48  CUmodule cuModule, cuFilterModule;
49  size_t device_texture_headroom;
50  size_t device_working_headroom;
51  bool move_texture_to_host;
52  size_t map_host_used;
53  size_t map_host_limit;
54  int can_map_host;
55  int pitch_alignment;
56  int cuDevId;
57  int cuDevArchitecture;
58  bool first_error;
59  CUDASplitKernel *split_kernel;
60 
61  struct CUDAMem {
62  CUDAMem() : texobject(0), array(0), use_mapped_host(false)
63  {
64  }
65 
66  CUtexObject texobject;
67  CUarray array;
68 
69  /* If true, a mapped host memory in shared_pointer is being used. */
70  bool use_mapped_host;
71  };
72  typedef map<device_memory *, CUDAMem> CUDAMemMap;
73  CUDAMemMap cuda_mem_map;
74  thread_mutex cuda_mem_map_mutex;
75 
76  struct PixelMem {
77  GLuint cuPBO;
78  CUgraphicsResource cuPBOresource;
79  GLuint cuTexId;
80  int w, h;
81  };
82  map<device_ptr, PixelMem> pixel_mem_map;
83 
84  /* Bindless Textures */
85  device_vector<TextureInfo> texture_info;
86  bool need_texture_info;
87 
88  /* Kernels */
89  struct {
90  bool loaded;
91 
92  CUfunction adaptive_stopping;
93  CUfunction adaptive_filter_x;
94  CUfunction adaptive_filter_y;
95  CUfunction adaptive_scale_samples;
96  int adaptive_num_threads_per_block;
97  } functions;
98 
99  static bool have_precompiled_kernels();
100 
101  virtual bool show_samples() const override;
102 
103  virtual BVHLayoutMask get_bvh_layout_mask() const override;
104 
105  void set_error(const string &error) override;
106 
107  CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_);
108 
109  virtual ~CUDADevice();
110 
111  bool support_device(const DeviceRequestedFeatures & /*requested_features*/);
112 
113  bool check_peer_access(Device *peer_device) override;
114 
115  bool use_adaptive_compilation();
116 
117  bool use_split_kernel();
118 
119  virtual string compile_kernel_get_common_cflags(
120  const DeviceRequestedFeatures &requested_features, bool filter = false, bool split = false);
121 
122  string compile_kernel(const DeviceRequestedFeatures &requested_features,
123  const char *name,
124  const char *base = "cuda",
125  bool force_ptx = false);
126 
127  virtual bool load_kernels(const DeviceRequestedFeatures &requested_features) override;
128 
129  void load_functions();
130 
131  void reserve_local_memory(const DeviceRequestedFeatures &requested_features);
132 
133  void init_host_memory();
134 
135  void load_texture_info();
136 
137  void move_textures_to_host(size_t size, bool for_texture);
138 
139  CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
140 
141  void generic_copy_to(device_memory &mem);
142 
143  void generic_free(device_memory &mem);
144 
145  void mem_alloc(device_memory &mem) override;
146 
147  void mem_copy_to(device_memory &mem) override;
148 
149  void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override;
150 
151  void mem_zero(device_memory &mem) override;
152 
153  void mem_free(device_memory &mem) override;
154 
155  device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override;
156 
157  virtual void const_copy_to(const char *name, void *host, size_t size) override;
158 
159  void global_alloc(device_memory &mem);
160 
161  void global_free(device_memory &mem);
162 
163  void tex_alloc(device_texture &mem);
164 
165  void tex_free(device_texture &mem);
166 
167  bool denoising_non_local_means(device_ptr image_ptr,
168  device_ptr guide_ptr,
169  device_ptr variance_ptr,
170  device_ptr out_ptr,
172 
173  bool denoising_construct_transform(DenoisingTask *task);
174 
175  bool denoising_accumulate(device_ptr color_ptr,
176  device_ptr color_variance_ptr,
177  device_ptr scale_ptr,
178  int frame,
180 
181  bool denoising_solve(device_ptr output_ptr, DenoisingTask *task);
182 
183  bool denoising_combine_halves(device_ptr a_ptr,
184  device_ptr b_ptr,
185  device_ptr mean_ptr,
186  device_ptr variance_ptr,
187  int r,
188  int4 rect,
190 
191  bool denoising_divide_shadow(device_ptr a_ptr,
192  device_ptr b_ptr,
193  device_ptr sample_variance_ptr,
194  device_ptr sv_variance_ptr,
195  device_ptr buffer_variance_ptr,
197 
198  bool denoising_get_feature(int mean_offset,
199  int variance_offset,
200  device_ptr mean_ptr,
201  device_ptr variance_ptr,
202  float scale,
204 
205  bool denoising_write_feature(int out_offset,
206  device_ptr from_ptr,
207  device_ptr buffer_ptr,
209 
210  bool denoising_detect_outliers(device_ptr image_ptr,
211  device_ptr variance_ptr,
212  device_ptr depth_ptr,
213  device_ptr output_ptr,
215 
216  void denoise(RenderTile &rtile, DenoisingTask &denoising);
217 
218  void adaptive_sampling_filter(uint filter_sample,
219  WorkTile *wtile,
220  CUdeviceptr d_wtile,
221  CUstream stream = 0);
222  void adaptive_sampling_post(RenderTile &rtile,
223  WorkTile *wtile,
224  CUdeviceptr d_wtile,
225  CUstream stream = 0);
226 
227  void render(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles);
228 
229  void film_convert(DeviceTask &task,
231  device_ptr rgba_byte,
232  device_ptr rgba_half);
233 
234  void shader(DeviceTask &task);
235 
236  CUdeviceptr map_pixels(device_ptr mem);
237 
238  void unmap_pixels(device_ptr mem);
239 
240  void pixels_alloc(device_memory &mem);
241 
242  void pixels_copy_from(device_memory &mem, int y, int w, int h);
243 
244  void pixels_free(device_memory &mem);
245 
246  void draw_pixels(device_memory &mem,
247  int y,
248  int w,
249  int h,
250  int width,
251  int height,
252  int dx,
253  int dy,
254  int dw,
255  int dh,
256  bool transparent,
257  const DeviceDrawParams &draw_params) override;
258 
259  void thread_run(DeviceTask &task);
260 
261  virtual void task_add(DeviceTask &task) override;
262 
263  virtual void task_wait() override;
264 
265  virtual void task_cancel() override;
266 };
267 
269 
270 #endif
unsigned int uint
Definition: BLI_sys_types.h:83
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble GLdouble r _GL_VOID_RET _GL_VOID GLfloat GLfloat r _GL_VOID_RET _GL_VOID GLint GLint r _GL_VOID_RET _GL_VOID GLshort GLshort r _GL_VOID_RET _GL_VOID GLdouble GLdouble r
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei width
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei height
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:119
int BVHLayoutMask
Definition: bvh_params.h:39
Definition: device.h:293
virtual void draw_pixels(device_memory &mem, int y, int w, int h, int width, int height, int dx, int dy, int dw, int dh, bool transparent, const DeviceDrawParams &draw_params)
Definition: device.cpp:234
virtual device_ptr mem_alloc_sub_ptr(device_memory &, int, int)
Definition: device.h:324
virtual BVHLayoutMask get_bvh_layout_mask() const =0
virtual void const_copy_to(const char *name, void *host, size_t size)=0
virtual void mem_zero(device_memory &mem)=0
virtual void task_wait()=0
virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem)=0
virtual void task_cancel()=0
virtual bool check_peer_access(Device *)
Definition: device.h:458
virtual void mem_free(device_memory &mem)=0
virtual void set_error(const string &error)
Definition: device.h:346
virtual void mem_copy_to(device_memory &mem)=0
virtual void task_add(DeviceTask &task)=0
virtual void mem_alloc(device_memory &mem)=0
virtual bool load_kernels(const DeviceRequestedFeatures &)
Definition: device.h:380
virtual bool show_samples() const
Definition: device.h:354
TaskPool * task_pool
DO_INLINE void filter(lfVector *V, fmatrix3x3 *S)
unsigned long long CUtexObject
#define CCL_NAMESPACE_END
void KERNEL_FUNCTION_FULL_NAME() shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int filter, int i, int offset, int sample)
__kernel void ccl_constant KernelData ccl_global void ccl_global char ccl_global int ccl_global char ccl_global unsigned int ccl_global float * buffer
static void error(const char *str)
Definition: meshlaplacian.c:65
struct blender::compositor::@172::@174 task
void split(const std::string &s, const char delim, std::vector< std::string > &tokens)
Definition: abc_util.cc:115
CCL_NAMESPACE_BEGIN typedef std::mutex thread_mutex
Definition: util_thread.h:40
uint64_t device_ptr
Definition: util_types.h:62