Blender  V2.93
numaapi_win32.c
Go to the documentation of this file.
1 // Copyright (c) 2016, libnumaapi authors
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to
5 // deal in the Software without restriction, including without limitation the
6 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 // sell copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19 // IN THE SOFTWARE.
20 //
21 // Author: Sergey Sharybin <sergey.vfx@gmail.com>
22 
23 #include "build_config.h"
24 
25 #if OS_WIN
26 
27 #include "numaapi.h"
28 
29 #ifndef NOGDI
30 # define NOGDI
31 #endif
32 #ifndef NOMINMAX
33 # define NOMINMAX
34 #endif
35 #ifndef WIN32_LEAN_AND_MEAN
36 # define WIN32_LEAN_AND_MEAN
37 #endif
38 #ifndef NOCOMM
39 # define NOCOMM
40 #endif
41 
42 #include <stdlib.h>
43 #include <stdint.h>
44 #include <windows.h>
45 
46 #if ARCH_CPU_64_BITS
47 # include <VersionHelpers.h>
48 #endif
49 
51 // Initialization.
52 
53 // Kernel library, from where the symbols come.
54 static HMODULE kernel_lib;
55 
56 // Types of all symbols which are read from the library.
57 
58 // NUMA function types.
59 typedef BOOL t_GetNumaHighestNodeNumber(PULONG highest_node_number);
60 typedef BOOL t_GetNumaNodeProcessorMask(UCHAR node, ULONGLONG* processor_mask);
61 typedef BOOL t_GetNumaNodeProcessorMaskEx(USHORT node,
62  GROUP_AFFINITY* processor_mask);
63 typedef BOOL t_GetNumaProcessorNode(UCHAR processor, UCHAR* node_number);
64 typedef void* t_VirtualAllocExNuma(HANDLE process_handle,
65  LPVOID address,
66  SIZE_T size,
67  DWORD allocation_type,
68  DWORD protect,
69  DWORD preferred);
70 typedef BOOL t_VirtualFree(void* address, SIZE_T size, DWORD free_type);
71 // Threading function types.
72 typedef BOOL t_SetProcessAffinityMask(HANDLE process_handle,
73  DWORD_PTR process_affinity_mask);
74 typedef BOOL t_SetThreadGroupAffinity(HANDLE thread_handle,
75  const GROUP_AFFINITY* group_affinity,
76  GROUP_AFFINITY* PreviousGroupAffinity);
77 typedef BOOL t_GetThreadGroupAffinity(HANDLE thread_handle,
78  GROUP_AFFINITY* group_affinity);
79 typedef DWORD t_GetCurrentProcessorNumber(void);
80 typedef void t_GetCurrentProcessorNumberEx(PROCESSOR_NUMBER* proc_number);
81 typedef DWORD t_GetActiveProcessorCount(WORD group_number);
82 
83 
84 // NUMA symbols.
85 static t_GetNumaHighestNodeNumber* _GetNumaHighestNodeNumber;
86 static t_GetNumaNodeProcessorMask* _GetNumaNodeProcessorMask;
87 static t_GetNumaNodeProcessorMaskEx* _GetNumaNodeProcessorMaskEx;
88 static t_GetNumaProcessorNode* _GetNumaProcessorNode;
89 static t_VirtualAllocExNuma* _VirtualAllocExNuma;
90 static t_VirtualFree* _VirtualFree;
91 // Threading symbols.
92 static t_SetProcessAffinityMask* _SetProcessAffinityMask;
93 static t_SetThreadGroupAffinity* _SetThreadGroupAffinity;
94 static t_GetThreadGroupAffinity* _GetThreadGroupAffinity;
95 static t_GetCurrentProcessorNumber* _GetCurrentProcessorNumber;
96 static t_GetCurrentProcessorNumberEx* _GetCurrentProcessorNumberEx;
97 static t_GetActiveProcessorCount* _GetActiveProcessorCount;
98 
99 static void numaExit(void) {
100  // TODO(sergey): Consider closing library here.
101 }
102 
103 static NUMAAPI_Result loadNumaSymbols(void) {
104  // Prevent multiple initializations.
105  static bool initialized = false;
107  if (initialized) {
108  return result;
109  }
110  initialized = true;
111  // Register de-initialization.
112  const int error = atexit(numaExit);
113  if (error) {
115  return result;
116  }
117  // Load library.
118  kernel_lib = LoadLibraryA("Kernel32.dll");
119  // Load symbols.
120 
121 #define _LIBRARY_FIND(lib, name) \
122  do { \
123  _##name = (t_##name *)GetProcAddress(lib, #name); \
124  } while (0)
125 #define KERNEL_LIBRARY_FIND(name) _LIBRARY_FIND(kernel_lib, name)
126 
127  // NUMA.
128  KERNEL_LIBRARY_FIND(GetNumaHighestNodeNumber);
129  KERNEL_LIBRARY_FIND(GetNumaNodeProcessorMask);
130  KERNEL_LIBRARY_FIND(GetNumaNodeProcessorMaskEx);
131  KERNEL_LIBRARY_FIND(GetNumaProcessorNode);
132  KERNEL_LIBRARY_FIND(VirtualAllocExNuma);
133  KERNEL_LIBRARY_FIND(VirtualFree);
134  // Threading.
135  KERNEL_LIBRARY_FIND(SetProcessAffinityMask);
136  KERNEL_LIBRARY_FIND(SetThreadGroupAffinity);
137  KERNEL_LIBRARY_FIND(GetThreadGroupAffinity);
138  KERNEL_LIBRARY_FIND(GetCurrentProcessorNumber);
139  KERNEL_LIBRARY_FIND(GetCurrentProcessorNumberEx);
140  KERNEL_LIBRARY_FIND(GetActiveProcessorCount);
141 
142 #undef KERNEL_LIBRARY_FIND
143 #undef _LIBRARY_FIND
144 
146  return result;
147 }
148 
150 #if !ARCH_CPU_64_BITS
151  // No NUMA on 32 bit platforms.
152  return NUMAAPI_NOT_AVAILABLE;
153 #else
154  if (!IsWindows7OrGreater()) {
155  // Require Windows 7 or higher.
157  }
158  loadNumaSymbols();
159  return NUMAAPI_SUCCESS;
160 #endif
161 }
162 
164 // Internal helpers.
165 
166 static int countNumSetBits(ULONGLONG mask) {
167  // TODO(sergey): There might be faster way calculating number of set bits.
168  // NOTE: mask must be unsigned, there is undefined behavior for signed ints.
169  int num_bits = 0;
170  while (mask != 0) {
171  num_bits += (mask & 1);
172  mask = (mask >> 1);
173  }
174  return num_bits;
175 }
176 
178 // Topology query.
179 
180 int numaAPI_GetNumNodes(void) {
181  ULONG highest_node_number;
182  if (!_GetNumaHighestNodeNumber(&highest_node_number)) {
183  return 0;
184  }
185  // TODO(sergey): Resolve the type narrowing.
186  // NOTE: This is not necessarily a total amount of nodes in the system.
187  return (int)highest_node_number + 1;
188 }
189 
190 bool numaAPI_IsNodeAvailable(int node) {
191  // Trick to detect whether the node is usable or not: check whether
192  // there are any processors associated with it.
193  //
194  // This is needed because numaApiGetNumNodes() is not guaranteed to
195  // give total amount of nodes and some nodes might be unavailable.
196  GROUP_AFFINITY processor_mask = { 0 };
197  if (!_GetNumaNodeProcessorMaskEx(node, &processor_mask)) {
198  return false;
199  }
200  if (processor_mask.Mask == 0) {
201  return false;
202  }
203  return true;
204 }
205 
207  GROUP_AFFINITY processor_mask = { 0 };
208  if (!_GetNumaNodeProcessorMaskEx(node, &processor_mask)) {
209  return 0;
210  }
211  return countNumSetBits(processor_mask.Mask);
212 }
213 
215 // Topology helpers.
216 
218  HANDLE thread_handle = GetCurrentThread();
219  GROUP_AFFINITY group_affinity;
220  // TODO(sergey): Needs implementation.
221  if (!_GetThreadGroupAffinity(thread_handle, &group_affinity)) {
222  return 0;
223  }
224  // First, count number of possible bits in the affinity mask.
225  const int num_processors = countNumSetBits(group_affinity.Mask);
226  // Then check that it's not exceeding number of processors in tjhe group.
227  const int num_group_processors =
228  _GetActiveProcessorCount(group_affinity.Group);
229  if (num_group_processors < num_processors) {
230  return num_group_processors;
231  }
232  return num_processors;
233 }
234 
236 // Affinities.
237 
238 bool numaAPI_RunProcessOnNode(int node) {
239  // TODO(sergey): Make sure requested node is within active CPU group.
240  // Change affinity of the proces to make it to run on a given node.
241  HANDLE process_handle = GetCurrentProcess();
242  GROUP_AFFINITY processor_mask = { 0 };
243  if (_GetNumaNodeProcessorMaskEx(node, &processor_mask) == 0) {
244  return false;
245  }
246  // TODO: Affinity should respect processor group.
247  if (_SetProcessAffinityMask(process_handle, processor_mask.Mask) == 0) {
248  return false;
249  }
250  return true;
251 }
252 
253 bool numaAPI_RunThreadOnNode(int node) {
254  HANDLE thread_handle = GetCurrentThread();
255  GROUP_AFFINITY group_affinity = { 0 };
256  if (_GetNumaNodeProcessorMaskEx(node, &group_affinity) == 0) {
257  return false;
258  }
259  if (_SetThreadGroupAffinity(thread_handle, &group_affinity, NULL) == 0) {
260  return false;
261  }
262  return true;
263 }
264 
266 // Memory management.
267 
268 void* numaAPI_AllocateOnNode(size_t size, int node) {
269  return _VirtualAllocExNuma(GetCurrentProcess(),
270  NULL,
271  size,
272  MEM_RESERVE | MEM_COMMIT,
273  PAGE_READWRITE,
274  node);
275 }
276 
277 void* numaAPI_AllocateLocal(size_t size) {
278  UCHAR current_processor = (UCHAR)_GetCurrentProcessorNumber();
279  UCHAR node;
280  if (!_GetNumaProcessorNode(current_processor, &node)) {
281  return NULL;
282  }
284 }
285 
286 void numaAPI_Free(void* start, size_t size) {
287  if (!_VirtualFree(start, size, MEM_RELEASE)) {
288  // TODO(sergey): Throw an error!
289  }
290 }
291 
292 #endif // OS_WIN
typedef LPVOID
#define UCHAR
Definition: GeoCommon.h:20
#define USHORT
Definition: GeoCommon.h:21
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
OperationNode * node
static bool initialized
Definition: gpu_init_exit.c:41
static void error(const char *str)
Definition: meshlaplacian.c:65
bool numaAPI_IsNodeAvailable(int node)
Definition: numaapi_stub.c:45
bool numaAPI_RunProcessOnNode(int node)
Definition: numaapi_stub.c:65
void * numaAPI_AllocateOnNode(size_t size, int node)
Definition: numaapi_stub.c:78
NUMAAPI_Result numaAPI_Initialize(void)
Definition: numaapi_stub.c:34
NUMAAPI_Result
Definition: numaapi.h:36
@ NUMAAPI_SUCCESS
Definition: numaapi.h:37
@ NUMAAPI_ERROR_ATEXIT
Definition: numaapi.h:43
@ NUMAAPI_NOT_AVAILABLE
Definition: numaapi.h:39
void * numaAPI_AllocateLocal(size_t size)
Definition: numaapi_stub.c:84
void numaAPI_Free(void *start, size_t size)
Definition: numaapi_stub.c:89
int numaAPI_GetNumCurrentNodesProcessors(void)
Definition: numaapi_stub.c:58
int numaAPI_GetNumNodeProcessors(int node)
Definition: numaapi_stub.c:50
int numaAPI_GetNumNodes(void)
Definition: numaapi_stub.c:41
bool numaAPI_RunThreadOnNode(int node)
Definition: numaapi_stub.c:70
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)