 |
Blender
V2.93
|
Go to the documentation of this file.
20 #if __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350
21 # define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
22 # define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16
23 # define CUDA_BLOCK_MAX_THREADS 1024
24 # define CUDA_THREAD_MAX_REGISTERS 63
27 # define CUDA_THREADS_BLOCK_WIDTH 16
28 # define CUDA_KERNEL_MAX_REGISTERS 63
29 # define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
32 #elif __CUDA_ARCH__ == 320
33 # define CUDA_MULTIPRESSOR_MAX_REGISTERS 32768
34 # define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16
35 # define CUDA_BLOCK_MAX_THREADS 1024
36 # define CUDA_THREAD_MAX_REGISTERS 63
39 # define CUDA_THREADS_BLOCK_WIDTH 16
40 # define CUDA_KERNEL_MAX_REGISTERS 63
41 # define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
44 #elif __CUDA_ARCH__ == 370
45 # define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
46 # define CUDA_MULTIPROCESSOR_MAX_BLOCKS 16
47 # define CUDA_BLOCK_MAX_THREADS 1024
48 # define CUDA_THREAD_MAX_REGISTERS 255
51 # define CUDA_THREADS_BLOCK_WIDTH 16
52 # define CUDA_KERNEL_MAX_REGISTERS 63
53 # define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
56 #elif __CUDA_ARCH__ <= 699
57 # define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
58 # define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32
59 # define CUDA_BLOCK_MAX_THREADS 1024
60 # define CUDA_THREAD_MAX_REGISTERS 255
63 # define CUDA_THREADS_BLOCK_WIDTH 16
66 # if __CUDACC_VER_MAJOR__ >= 9 && __CUDA_ARCH__ >= 600
67 # define CUDA_KERNEL_MAX_REGISTERS 64
69 # define CUDA_KERNEL_MAX_REGISTERS 48
71 # define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 63
74 #elif __CUDA_ARCH__ <= 899
75 # define CUDA_MULTIPRESSOR_MAX_REGISTERS 65536
76 # define CUDA_MULTIPROCESSOR_MAX_BLOCKS 32
77 # define CUDA_BLOCK_MAX_THREADS 1024
78 # define CUDA_THREAD_MAX_REGISTERS 255
81 # define CUDA_THREADS_BLOCK_WIDTH 16
82 # define CUDA_KERNEL_MAX_REGISTERS 64
83 # define CUDA_KERNEL_BRANCHED_MAX_REGISTERS 72
87 # error "Unknown or unsupported CUDA architecture, can't determine launch bounds"
93 #define CUDA_KERNEL_SPLIT_MAX_REGISTERS CUDA_THREAD_MAX_REGISTERS
98 #define CUDA_LAUNCH_BOUNDS(threads_block_width, thread_num_registers) \
99 __launch_bounds__(threads_block_width *threads_block_width, \
100 CUDA_MULTIPRESSOR_MAX_REGISTERS / \
101 (threads_block_width * threads_block_width * thread_num_registers))
105 #if CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH > CUDA_BLOCK_MAX_THREADS
106 # error "Maximum number of threads per block exceeded"
109 #if CUDA_MULTIPRESSOR_MAX_REGISTERS / \
110 (CUDA_THREADS_BLOCK_WIDTH * CUDA_THREADS_BLOCK_WIDTH * CUDA_KERNEL_MAX_REGISTERS) > \
111 CUDA_MULTIPROCESSOR_MAX_BLOCKS
112 # error "Maximum number of blocks per multiprocessor exceeded"
115 #if CUDA_KERNEL_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS
116 # error "Maximum number of registers per thread exceeded"
119 #if CUDA_KERNEL_BRANCHED_MAX_REGISTERS > CUDA_THREAD_MAX_REGISTERS
120 # error "Maximum number of registers per thread exceeded"