Blender  V2.93
numaapi_linux.c
Go to the documentation of this file.
1 // Copyright (c) 2016, libnumaapi authors
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to
5 // deal in the Software without restriction, including without limitation the
6 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 // sell copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19 // IN THE SOFTWARE.
20 //
21 // Author: Sergey Sharybin <sergey.vfx@gmail.com>
22 
23 #include "build_config.h"
24 
25 #if OS_LINUX
26 
27 #include "numaapi.h"
28 
29 #include <stdlib.h>
30 
31 #ifndef WITH_DYNLOAD
32 # include <numa.h>
33 #else
34 # include <dlfcn.h>
35 #endif
36 
37 #ifdef WITH_DYNLOAD
38 
39 // Descriptor numa library.
40 static void* numa_lib;
41 
42 // Types of all symbols which are read from the library.
43 struct bitmask;
44 typedef int tnuma_available(void);
45 typedef int tnuma_max_node(void);
46 typedef int tnuma_node_to_cpus(int node, struct bitmask* mask);
47 typedef long tnuma_node_size(int node, long* freep);
48 typedef int tnuma_run_on_node(int node);
49 typedef void* tnuma_alloc_onnode(size_t size, int node);
50 typedef void* tnuma_alloc_local(size_t size);
51 typedef void tnuma_free(void* start, size_t size);
52 typedef struct bitmask* tnuma_bitmask_clearall(struct bitmask *bitmask);
53 typedef int tnuma_bitmask_isbitset(const struct bitmask *bitmask,
54  unsigned int n);
55 typedef struct bitmask* tnuma_bitmask_setbit(struct bitmask *bitmask,
56  unsigned int n);
57 typedef unsigned int tnuma_bitmask_nbytes(struct bitmask *bitmask);
58 typedef void tnuma_bitmask_free(struct bitmask *bitmask);
59 typedef struct bitmask* tnuma_allocate_cpumask(void);
60 typedef struct bitmask* tnuma_allocate_nodemask(void);
61 typedef void tnuma_free_cpumask(struct bitmask* bitmask);
62 typedef void tnuma_free_nodemask(struct bitmask* bitmask);
63 typedef int tnuma_run_on_node_mask(struct bitmask *nodemask);
64 typedef int tnuma_run_on_node_mask_all(struct bitmask *nodemask);
65 typedef struct bitmask *tnuma_get_run_node_mask(void);
66 typedef void tnuma_set_interleave_mask(struct bitmask *nodemask);
67 typedef void tnuma_set_localalloc(void);
68 
69 // Actual symbols.
70 static tnuma_available* numa_available;
71 static tnuma_max_node* numa_max_node;
72 static tnuma_node_to_cpus* numa_node_to_cpus;
73 static tnuma_node_size* numa_node_size;
74 static tnuma_run_on_node* numa_run_on_node;
75 static tnuma_alloc_onnode* numa_alloc_onnode;
76 static tnuma_alloc_local* numa_alloc_local;
77 static tnuma_free* numa_free;
78 static tnuma_bitmask_clearall* numa_bitmask_clearall;
79 static tnuma_bitmask_isbitset* numa_bitmask_isbitset;
80 static tnuma_bitmask_setbit* numa_bitmask_setbit;
81 static tnuma_bitmask_nbytes* numa_bitmask_nbytes;
82 static tnuma_bitmask_free* numa_bitmask_free;
83 static tnuma_allocate_cpumask* numa_allocate_cpumask;
84 static tnuma_allocate_nodemask* numa_allocate_nodemask;
85 static tnuma_free_nodemask* numa_free_nodemask;
86 static tnuma_free_cpumask* numa_free_cpumask;
87 static tnuma_run_on_node_mask* numa_run_on_node_mask;
88 static tnuma_run_on_node_mask_all* numa_run_on_node_mask_all;
89 static tnuma_get_run_node_mask* numa_get_run_node_mask;
90 static tnuma_set_interleave_mask* numa_set_interleave_mask;
91 static tnuma_set_localalloc* numa_set_localalloc;
92 
93 static void* findLibrary(const char** paths) {
94  int i = 0;
95  while (paths[i] != NULL) {
96  void* lib = dlopen(paths[i], RTLD_LAZY);
97  if (lib != NULL) {
98  return lib;
99  }
100  ++i;
101  }
102  return NULL;
103 }
104 
105 static void numaExit(void) {
106  if (numa_lib == NULL) {
107  return;
108  }
109  dlclose(numa_lib);
110  numa_lib = NULL;
111 }
112 
113 static NUMAAPI_Result loadNumaSymbols(void) {
114  // Prevent multiple initializations.
115  static bool initialized = false;
117  if (initialized) {
118  return result;
119  }
120  initialized = true;
121  // Find appropriate .so library.
122  const char* numa_paths[] = {
123  "libnuma.so.1",
124  "libnuma.so",
125  NULL};
126  // Register de-initialization.
127  const int error = atexit(numaExit);
128  if (error) {
130  return result;
131  }
132  // Load library.
133  numa_lib = findLibrary(numa_paths);
134  if (numa_lib == NULL) {
136  return result;
137  }
138  // Load symbols.
139 
140 #define _LIBRARY_FIND(lib, name) \
141  do { \
142  name = (t##name *)dlsym(lib, #name); \
143  } while (0)
144 #define NUMA_LIBRARY_FIND(name) _LIBRARY_FIND(numa_lib, name)
145 
146  NUMA_LIBRARY_FIND(numa_available);
147  NUMA_LIBRARY_FIND(numa_max_node);
148  NUMA_LIBRARY_FIND(numa_node_to_cpus);
149  NUMA_LIBRARY_FIND(numa_node_size);
150  NUMA_LIBRARY_FIND(numa_run_on_node);
151  NUMA_LIBRARY_FIND(numa_alloc_onnode);
152  NUMA_LIBRARY_FIND(numa_alloc_local);
153  NUMA_LIBRARY_FIND(numa_free);
154  NUMA_LIBRARY_FIND(numa_bitmask_clearall);
155  NUMA_LIBRARY_FIND(numa_bitmask_isbitset);
156  NUMA_LIBRARY_FIND(numa_bitmask_setbit);
157  NUMA_LIBRARY_FIND(numa_bitmask_nbytes);
158  NUMA_LIBRARY_FIND(numa_bitmask_free);
159  NUMA_LIBRARY_FIND(numa_allocate_cpumask);
160  NUMA_LIBRARY_FIND(numa_allocate_nodemask);
161  NUMA_LIBRARY_FIND(numa_free_cpumask);
162  NUMA_LIBRARY_FIND(numa_free_nodemask);
163  NUMA_LIBRARY_FIND(numa_run_on_node_mask);
164  NUMA_LIBRARY_FIND(numa_run_on_node_mask_all);
165  NUMA_LIBRARY_FIND(numa_get_run_node_mask);
166  NUMA_LIBRARY_FIND(numa_set_interleave_mask);
167  NUMA_LIBRARY_FIND(numa_set_localalloc);
168 
169 #undef NUMA_LIBRARY_FIND
170 #undef _LIBRARY_FIND
171 
173  return result;
174 }
175 #endif
176 
178 // Initialization.
179 
181 #ifdef WITH_DYNLOAD
182  NUMAAPI_Result result = loadNumaSymbols();
183  if (result != NUMAAPI_SUCCESS) {
184  return result;
185  }
186 #endif
187  if (numa_available() < 0) {
188  return NUMAAPI_NOT_AVAILABLE;
189  }
190  return NUMAAPI_SUCCESS;
191 }
192 
194 // Topology query.
195 
196 int numaAPI_GetNumNodes(void) {
197  return numa_max_node() + 1;
198 }
199 
200 bool numaAPI_IsNodeAvailable(int node) {
202 }
203 
205  struct bitmask* cpu_mask = numa_allocate_cpumask();
206  numa_node_to_cpus(node, cpu_mask);
207  const unsigned int num_bytes = numa_bitmask_nbytes(cpu_mask);
208  const unsigned int num_bits = num_bytes * 8;
209  // TODO(sergey): There might be faster way calculating number of set bits.
210  int num_processors = 0;
211  for (unsigned int bit = 0; bit < num_bits; ++bit) {
212  if (numa_bitmask_isbitset(cpu_mask, bit)) {
213  ++num_processors;
214  }
215  }
216 #ifdef WITH_DYNLOAD
217  if (numa_free_cpumask != NULL) {
218  numa_free_cpumask(cpu_mask);
219  } else {
220  numa_bitmask_free(cpu_mask);
221  }
222 #else
223  numa_free_cpumask(cpu_mask);
224 #endif
225  return num_processors;
226 }
227 
229 // Topology helpers.
230 
232  struct bitmask* node_mask = numa_get_run_node_mask();
233  const unsigned int num_bytes = numa_bitmask_nbytes(node_mask);
234  const unsigned int num_bits = num_bytes * 8;
235  int num_processors = 0;
236  for (unsigned int bit = 0; bit < num_bits; ++bit) {
237  if (numa_bitmask_isbitset(node_mask, bit)) {
238  num_processors += numaAPI_GetNumNodeProcessors(bit);
239  }
240  }
241  numa_bitmask_free(node_mask);
242  return num_processors;
243 }
244 
246 // Affinities.
247 
248 bool numaAPI_RunProcessOnNode(int node) {
250  return true;
251 }
252 
253 bool numaAPI_RunThreadOnNode(int node) {
254  // Construct bit mask from node index.
255  struct bitmask* node_mask = numa_allocate_nodemask();
256  numa_bitmask_clearall(node_mask);
257  numa_bitmask_setbit(node_mask, node);
258  numa_run_on_node_mask_all(node_mask);
259  // TODO(sergey): The following commands are based on x265 code, we might want
260  // to make those optional, or require to call those explicitly.
261  //
262  // Current assumption is that this is similar to SetThreadGroupAffinity().
263  if (numa_node_size(node, NULL) > 0) {
264  numa_set_interleave_mask(node_mask);
265  numa_set_localalloc();
266  }
267 #ifdef WITH_DYNLOAD
268  if (numa_free_nodemask != NULL) {
269  numa_free_nodemask(node_mask);
270  } else {
271  numa_bitmask_free(node_mask);
272  }
273 #else
274  numa_free_nodemask(node_mask);
275 #endif
276  return true;
277 }
278 
280 // Memory management.
281 
282 void* numaAPI_AllocateOnNode(size_t size, int node) {
283  return numa_alloc_onnode(size, node);
284 }
285 
286 void* numaAPI_AllocateLocal(size_t size) {
287  return numa_alloc_local(size);
288 }
289 
290 void numaAPI_Free(void* start, size_t size) {
291  numa_free(start, size);
292 }
293 
294 #endif // OS_LINUX
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
OperationNode * node
DRWShaderLibrary * lib
static bool initialized
Definition: gpu_init_exit.c:41
static void error(const char *str)
Definition: meshlaplacian.c:65
bool numaAPI_IsNodeAvailable(int node)
Definition: numaapi_stub.c:45
bool numaAPI_RunProcessOnNode(int node)
Definition: numaapi_stub.c:65
void * numaAPI_AllocateOnNode(size_t size, int node)
Definition: numaapi_stub.c:78
NUMAAPI_Result numaAPI_Initialize(void)
Definition: numaapi_stub.c:34
NUMAAPI_Result
Definition: numaapi.h:36
@ NUMAAPI_SUCCESS
Definition: numaapi.h:37
@ NUMAAPI_ERROR_ATEXIT
Definition: numaapi.h:43
@ NUMAAPI_NOT_AVAILABLE
Definition: numaapi.h:39
void * numaAPI_AllocateLocal(size_t size)
Definition: numaapi_stub.c:84
void numaAPI_Free(void *start, size_t size)
Definition: numaapi_stub.c:89
int numaAPI_GetNumCurrentNodesProcessors(void)
Definition: numaapi_stub.c:58
int numaAPI_GetNumNodeProcessors(int node)
Definition: numaapi_stub.c:50
int numaAPI_GetNumNodes(void)
Definition: numaapi_stub.c:41
bool numaAPI_RunThreadOnNode(int node)
Definition: numaapi_stub.c:70
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)