PAPI  5.7.0.0
linux-cuda.c File Reference

This implements a PAPI component that enables PAPI-C to access hardware monitoring counters for NVIDIA CUDA GPU devices through the CUPTI library. More...

Include dependency graph for linux-cuda.c:

Go to the source code of this file.

Data Structures

struct  papicuda_context_t
 
struct  papicuda_name_desc_t
 
struct  papicuda_device_desc_t
 
struct  papicuda_active_cucontext_t
 
struct  papicuda_control_t
 

Macros

#define PAPICUDA_MAX_COUNTERS   512
 
#define CHECK_PRINT_EVAL(checkcond, str, evalthis)
 
#define CUDA_CALL(call, handleerror)
 
#define CU_CALL(call, handleerror)
 
#define CUPTI_CALL(call, handleerror)
 
#define BUF_SIZE   (32 * 1024)
 
#define ALIGN_SIZE   (8)
 
#define ALIGN_BUFFER(buffer, align)   (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer))
 
#define CUAPIWEAK   __attribute__( ( weak ) )
 
#define DECLARECUFUNC(funcname, funcsig)   CUresult CUAPIWEAK funcname funcsig; CUresult( *funcname##Ptr ) funcsig;
 
#define CUDAAPIWEAK   __attribute__( ( weak ) )
 
#define DECLARECUDAFUNC(funcname, funcsig)   cudaError_t CUDAAPIWEAK funcname funcsig; cudaError_t( *funcname##Ptr ) funcsig;
 
#define CUPTIAPIWEAK   __attribute__( ( weak ) )
 
#define DECLARECUPTIFUNC(funcname, funcsig)   CUptiResult CUPTIAPIWEAK funcname funcsig; CUptiResult( *funcname##Ptr ) funcsig;
 
#define DLSYM_AND_CHECK(dllib, name)   dlsym( dllib, name ); if ( dlerror()!=NULL ) { strncpy( _cuda_vector.cmp_info.disabled_reason, "A CUDA required function was not found in dynamic libs", PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); }
 

Functions

static int papicuda_cleanup_eventset (hwd_control_state_t *ctrl)
 
static int papicuda_add_native_events (papicuda_context_t *gctxt)
 
static int papicuda_convert_metric_value_to_long_long (CUpti_MetricValue metricValue, CUpti_MetricValueKind valueKind, long long int *papiValue)
 
static int papicuda_init_thread (hwd_context_t *ctx)
 
static int papicuda_init_component (int cidx)
 
static int papicuda_init_control_state (hwd_control_state_t *ctrl)
 
static int papicuda_update_control_state (hwd_control_state_t *ctrl, NativeInfo_t *nativeInfo, int nativeCount, hwd_context_t *ctx)
 
static int papicuda_start (hwd_context_t *ctx, hwd_control_state_t *ctrl)
 
static int papicuda_read (hwd_context_t *ctx, hwd_control_state_t *ctrl, long long **values, int flags)
 
static int papicuda_stop (hwd_context_t *ctx, hwd_control_state_t *ctrl)
 
int papicuda_shutdown_thread (hwd_context_t *ctx)
 
static int papicuda_shutdown_component (void)
 
static int papicuda_reset (hwd_context_t *ctx, hwd_control_state_t *ctrl)
 
static int papicuda_ctrl (hwd_context_t *ctx, int code, _papi_int_option_t *option)
 
static int papicuda_set_domain (hwd_control_state_t *ctrl, int domain)
 
static int papicuda_ntv_enum_events (unsigned int *EventCode, int modifier)
 
static int papicuda_ntv_code_to_name (unsigned int EventCode, char *name, int len)
 
static int papicuda_ntv_code_to_descr (unsigned int EventCode, char *name, int len)
 
void readMetricValue (CUpti_EventGroup eventGroup, uint32_t numEvents, uint64_t numTotalInstances, CUdevice dev, uint32_t numMetrics, CUpti_MetricID *metricId, CUpti_MetricValueKind *myKinds, long long int *values, uint64_t timeDuration)
 

Variables

static void * dl1 = NULL
 
static void * dl2 = NULL
 
static void * dl3 = NULL
 
papi_vector_t _cuda_vector
 
static papicuda_context_tglobal_papicuda_context = NULL
 
static papicuda_control_tglobal_papicuda_control = NULL
 
void(* _dl_non_dynamic_init )(void)
 

Detailed Description

Author
Tony Castaldo tonyc.nosp@m.asta.nosp@m.ldo@i.nosp@m.cl.u.nosp@m.tk.ed.nosp@m.u (updated in 2018, to use batch reads and support nvlink metrics.
Asim YarKhan yarkh.nosp@m.an@i.nosp@m.cl.ut.nosp@m.k.ed.nosp@m.u (updated in 2017 to support CUDA metrics)
Asim YarKhan yarkh.nosp@m.an@i.nosp@m.cl.ut.nosp@m.k.ed.nosp@m.u (updated in 2015 for multiple CUDA contexts/devices)
Heike Jagode (First version, in collaboration with Robert Dietrich, TU Dresden) jagod.nosp@m.e@ic.nosp@m.l.utk.nosp@m..edu

The open source software license for PAPI conforms to the BSD License template.

Definition in file linux-cuda.c.

Macro Definition Documentation

◆ ALIGN_BUFFER

#define ALIGN_BUFFER (   buffer,
  align 
)    (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer))

Definition at line 167 of file linux-cuda.c.

◆ ALIGN_SIZE

#define ALIGN_SIZE   (8)

Definition at line 166 of file linux-cuda.c.

◆ BUF_SIZE

#define BUF_SIZE   (32 * 1024)

Definition at line 165 of file linux-cuda.c.

◆ CHECK_PRINT_EVAL

#define CHECK_PRINT_EVAL (   checkcond,
  str,
  evalthis 
)
Value:
do { \
int _cond = (checkcond); \
if (_cond) { \
SUBDBG("error: condition %s failed: %s.\n", #checkcond, str); \
evalthis; \
} \
} while (0)

Definition at line 124 of file linux-cuda.c.

◆ CU_CALL

#define CU_CALL (   call,
  handleerror 
)
Value:
do { \
CUresult _status = (call); \
if (_status != CUDA_SUCCESS) { \
SUBDBG("error: function %s failed with error %d.\n", #call, _status); \
/* fprintf(stderr,"Line %i CU_CALL error function %s failed with error %d.\n", __LINE__, #call, _status); */ \
handleerror; \
} \
} while (0)

Definition at line 142 of file linux-cuda.c.

◆ CUAPIWEAK

#define CUAPIWEAK   __attribute__( ( weak ) )

◆ CUDA_CALL

#define CUDA_CALL (   call,
  handleerror 
)
Value:
do { \
cudaError_t _status = (call); \
if (_status != cudaSuccess) { \
SUBDBG("error: function %s failed with error %d.\n", #call, _status); \
handleerror; \
} \
} while (0)

Definition at line 133 of file linux-cuda.c.

◆ CUDAAPIWEAK

#define CUDAAPIWEAK   __attribute__( ( weak ) )

◆ CUPTI_CALL

#define CUPTI_CALL (   call,
  handleerror 
)
Value:
do { \
CUptiResult _status = (call); \
if (_status != CUPTI_SUCCESS) { \
const char *errstr; \
(*cuptiGetResultStringPtr)(_status, &errstr); \
SUBDBG("error: function %s failed with error %s.\n", #call, errstr); \
/* fprintf(stderr, "Line %i CUPTI_CALL macro '%s' failed with error '%s'.\n", __LINE__, #call, errstr); */ \
handleerror; \
} \
} while (0)

Definition at line 153 of file linux-cuda.c.

◆ CUPTIAPIWEAK

#define CUPTIAPIWEAK   __attribute__( ( weak ) )

◆ DECLARECUDAFUNC

#define DECLARECUDAFUNC (   funcname,
  funcsig 
)    cudaError_t CUDAAPIWEAK funcname funcsig; cudaError_t( *funcname##Ptr ) funcsig;

◆ DECLARECUFUNC

#define DECLARECUFUNC (   funcname,
  funcsig 
)    CUresult CUAPIWEAK funcname funcsig; CUresult( *funcname##Ptr ) funcsig;

◆ DECLARECUPTIFUNC

#define DECLARECUPTIFUNC (   funcname,
  funcsig 
)    CUptiResult CUPTIAPIWEAK funcname funcsig; CUptiResult( *funcname##Ptr ) funcsig;

◆ DLSYM_AND_CHECK

#define DLSYM_AND_CHECK (   dllib,
  name 
)    dlsym( dllib, name ); if ( dlerror()!=NULL ) { strncpy( _cuda_vector.cmp_info.disabled_reason, "A CUDA required function was not found in dynamic libs", PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); }

◆ PAPICUDA_MAX_COUNTERS

#define PAPICUDA_MAX_COUNTERS   512

Definition at line 37 of file linux-cuda.c.

Function Documentation

◆ papicuda_add_native_events()

static int papicuda_add_native_events ( papicuda_context_t gctxt)
static

Definition at line 333 of file linux-cuda.c.

334 {
335  SUBDBG("Entering\n");
336  CUresult cuErr;
337  int deviceNum;
338  uint32_t domainNum, eventNum;
339  papicuda_device_desc_t *mydevice;
340  char tmpStr[PAPI_MIN_STR_LEN];
341  tmpStr[PAPI_MIN_STR_LEN - 1] = '\0';
342  size_t tmpSizeBytes;
343  int ii;
344  uint32_t maxEventSize;
345 
346  /* How many CUDA devices do we have? */
347  cuErr = (*cuDeviceGetCountPtr) (&gctxt->deviceCount);
348  if(cuErr == CUDA_ERROR_NOT_INITIALIZED) {
349  /* If CUDA not initialized, initialize CUDA and retry the device list */
350  /* This is required for some of the PAPI tools, that do not call the init functions */
351  if(((*cuInitPtr) (0)) != CUDA_SUCCESS) {
352  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA cannot be found and initialized (cuInit failed).", PAPI_MAX_STR_LEN);
353  return PAPI_ENOSUPP;
354  }
355  CU_CALL((*cuDeviceGetCountPtr) (&gctxt->deviceCount), return (PAPI_EMISC));
356  }
357 
358  if(gctxt->deviceCount == 0) {
359  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA initialized but no CUDA devices found.", PAPI_MAX_STR_LEN);
360  return PAPI_ENOSUPP;
361  }
362  SUBDBG("Found %d devices\n", gctxt->deviceCount);
363 
364  /* allocate memory for device information */
366  CHECK_PRINT_EVAL(!gctxt->deviceArray, "ERROR CUDA: Could not allocate memory for CUDA device structure", return (PAPI_ENOMEM));
367 
368  /* For each device, get domains and domain-events counts */
369  maxEventSize = 0;
370  for(deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) {
371  mydevice = &gctxt->deviceArray[deviceNum];
372  /* Get device id, name, numeventdomains for each device */
373  CU_CALL((*cuDeviceGetPtr) (&mydevice->cuDev, deviceNum), // get CUdevice.
374  return (PAPI_EMISC)); // .. on failure.
375 
376  CU_CALL((*cuDeviceGetNamePtr) (mydevice->deviceName, // get device name,
377  PAPI_MIN_STR_LEN - 1, mydevice->cuDev), // .. max length,
378  return (PAPI_EMISC)); // .. on failure.
379 
380  mydevice->deviceName[PAPI_MIN_STR_LEN - 1] = '\0'; // z-terminate it.
381 
382  CUPTI_CALL((*cuptiDeviceGetNumEventDomainsPtr) // get number of domains,
383  (mydevice->cuDev, &mydevice->maxDomains),
384  return (PAPI_EMISC)); // .. on failure.
385 
386  /* Allocate space to hold domain IDs */
387  mydevice->domainIDArray = (CUpti_EventDomainID *) papi_calloc(
388  mydevice->maxDomains, sizeof(CUpti_EventDomainID));
389 
390  CHECK_PRINT_EVAL(!mydevice->domainIDArray, "ERROR CUDA: Could not allocate memory for CUDA device domains", return (PAPI_ENOMEM));
391 
392  /* Put domain ids into allocated space */
393  size_t domainarraysize = mydevice->maxDomains * sizeof(CUpti_EventDomainID);
394  CUPTI_CALL((*cuptiDeviceEnumEventDomainsPtr) // enumerate domain ids into space.
395  (mydevice->cuDev, &domainarraysize, mydevice->domainIDArray),
396  return (PAPI_EMISC)); // .. on failure.
397 
398  /* Allocate space to hold domain event counts */
399  mydevice->domainIDNumEvents = (uint32_t *) papi_calloc(mydevice->maxDomains, sizeof(uint32_t));
400  CHECK_PRINT_EVAL(!mydevice->domainIDNumEvents, "ERROR CUDA: Could not allocate memory for domain event counts", return (PAPI_ENOMEM));
401 
402  /* For each domain, get event counts in domainNumEvents[] */
403  for(domainNum = 0; domainNum < mydevice->maxDomains; domainNum++) { // For each domain,
404  CUpti_EventDomainID domainID = mydevice->domainIDArray[domainNum]; // .. make a copy of the domain ID.
405  /* Get num events in domain */
406  CUPTI_CALL((*cuptiEventDomainGetNumEventsPtr) // Get number of events in this domain,
407  (domainID, &mydevice->domainIDNumEvents[domainNum]), // .. store in array.
408  return (PAPI_EMISC)); // .. on failure.
409 
410  maxEventSize += mydevice->domainIDNumEvents[domainNum]; // keep track of overall number of events.
411  } // end for each domain.
412  } // end of for each device.
413 
414  // Increase maxEventSize for metrics on this device.
415  for(deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) { // for each device,
416  uint32_t maxMetrics = 0;
417  CUptiResult cuptiRet;
418  mydevice = &gctxt->deviceArray[deviceNum]; // Get papicuda_device_desc pointer.
419  cuptiRet = (*cuptiDeviceGetNumMetricsPtr) (mydevice->cuDev, &maxMetrics); // Read the # metrics on this device.
420  if (cuptiRet != CUPTI_SUCCESS || maxMetrics < 1) continue; // If no metrics, skip to next device.
421  maxEventSize += maxMetrics; // make room for metrics we discover later.
422  } // end for each device.
423 
424  /* Allocate space for all events and descriptors */
425  gctxt->availEventKind = (CUpti_ActivityKind *) papi_calloc(maxEventSize, sizeof(CUpti_ActivityKind));
426  CHECK_PRINT_EVAL(!gctxt->availEventKind, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM));
427  gctxt->availEventDeviceNum = (int *) papi_calloc(maxEventSize, sizeof(int));
428  CHECK_PRINT_EVAL(!gctxt->availEventDeviceNum, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM));
429  gctxt->availEventIDArray = (CUpti_EventID *) papi_calloc(maxEventSize, sizeof(CUpti_EventID));
430  CHECK_PRINT_EVAL(!gctxt->availEventIDArray, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM));
431  gctxt->availEventIsBeingMeasuredInEventset = (uint32_t *) papi_calloc(maxEventSize, sizeof(uint32_t));
432  CHECK_PRINT_EVAL(!gctxt->availEventIsBeingMeasuredInEventset, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM));
433  gctxt->availEventDesc = (papicuda_name_desc_t *) papi_calloc(maxEventSize, sizeof(papicuda_name_desc_t));
434  CHECK_PRINT_EVAL(!gctxt->availEventDesc, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM));
435 
436  // Record all events on each device, and their descriptions.
437  uint32_t idxEventArray = 0;
438  for(deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) { // loop through each device.
439  mydevice = &gctxt->deviceArray[deviceNum]; // get a pointer to the papicuda_device_desc struct.
440 
441  // For each domain, get and store event IDs, names, descriptions.
442  for(domainNum = 0; domainNum < mydevice->maxDomains; domainNum++) { // loop through the domains in this device.
443 
444  /* Get domain id */
445  CUpti_EventDomainID domainID = mydevice->domainIDArray[domainNum]; // get the domain id,
446  uint32_t domainNumEvents = mydevice->domainIDNumEvents[domainNum]; // get the number of events in it.
447 
448  // SUBDBG( "For device %d domain %d domainID %d numEvents %d\n", mydevice->cuDev, domainNum, domainID, domainNumEvents );
449 
450  CUpti_EventID *domainEventIDArray = // Make space for the events in this domain.
451  (CUpti_EventID *) papi_calloc(domainNumEvents, sizeof(CUpti_EventID)); // ..
452  CHECK_PRINT_EVAL(!domainEventIDArray, "ERROR CUDA: Could not allocate memory for events", return (PAPI_ENOMEM));
453 
454  size_t domainEventArraySize = domainNumEvents * sizeof(CUpti_EventID); // compute size of array we allocated.
455  CUPTI_CALL((*cuptiEventDomainEnumEventsPtr) // Enumerate the events in the domain,
456  (domainID, &domainEventArraySize, domainEventIDArray), // ..
457  return (PAPI_EMISC)); // .. on failure, exit.
458 
459  for(eventNum = 0; eventNum < domainNumEvents; eventNum++) { // Loop through the events in this domain.
460  CUpti_EventID myeventCuptiEventId = domainEventIDArray[eventNum]; // .. get this event,
461  gctxt->availEventKind[idxEventArray] = CUPTI_ACTIVITY_KIND_EVENT; // .. record the kind,
462  gctxt->availEventIDArray[idxEventArray] = myeventCuptiEventId; // .. record the id,
463  gctxt->availEventDeviceNum[idxEventArray] = deviceNum; // .. record the device number,
464 
465  tmpSizeBytes = PAPI_MIN_STR_LEN - 1 * sizeof(char); // .. compute size of name,
466  CUPTI_CALL((*cuptiEventGetAttributePtr) (myeventCuptiEventId, // .. Get the event name seen by cupti,
467  CUPTI_EVENT_ATTR_NAME, &tmpSizeBytes, tmpStr), // .. into tmpStr.
468  return (PAPI_EMISC)); // .. on failure, exit routine.
469 
470  snprintf(gctxt->availEventDesc[idxEventArray].name, PAPI_MIN_STR_LEN, // record expaneded name for papi user.
471  "event:%s:device=%d", tmpStr, deviceNum);
472  gctxt->availEventDesc[idxEventArray].name[PAPI_MIN_STR_LEN - 1] = '\0'; // ensure null termination.
473  char *nameTmpPtr = gctxt->availEventDesc[idxEventArray].name; // For looping, get pointer to name.
474  for(ii = 0; ii < (int) strlen(nameTmpPtr); ii++) { // Replace spaces with underscores.
475  if(nameTmpPtr[ii] == ' ') nameTmpPtr[ii] = '_'; // ..
476  }
477 
478  /* Save description in the native event array */
479  tmpSizeBytes = PAPI_2MAX_STR_LEN - 1 * sizeof(char); // Most space to use for description.
480  CUPTI_CALL((*cuptiEventGetAttributePtr) (myeventCuptiEventId, // Get it,
481  CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &tmpSizeBytes, // .. Set limit (and recieve bytes written),
482  gctxt->availEventDesc[idxEventArray].description), // .. in the description.
483  return (PAPI_EMISC)); // .. on failure.
484  gctxt->availEventDesc[idxEventArray].description[PAPI_2MAX_STR_LEN - 1] = '\0'; // Ensure null terminator.
485  gctxt->availEventDesc[idxEventArray].numMetricEvents = 0; // Not a metric.
486  gctxt->availEventDesc[idxEventArray].metricEvents = NULL; // No space allocated.
487  /* Increment index past events in this domain to start of next domain */
488  idxEventArray++; // Bump total number of events.
489  } // end of events in this domain.
490 
491  papi_free(domainEventIDArray); // done with temp space.
492  } // end of domain loop within device.
493  } // end of device loop, for events.
494 
495  // Now we retrieve and store all METRIC info for each device; this includes
496  // both cuda metrics and nvlink metrics.
497  SUBDBG("Checking for metrics\n");
498  for (deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) {
499  uint32_t maxMetrics = 0, i, j;
500  CUpti_MetricID *metricIdList = NULL;
501  CUptiResult cuptiRet;
502  mydevice = &gctxt->deviceArray[deviceNum]; // Get papicuda_device_desc pointer.
503  cuptiRet = (*cuptiDeviceGetNumMetricsPtr) (mydevice->cuDev, &maxMetrics); // Read the # metrics on this device.
504  if (cuptiRet != CUPTI_SUCCESS || maxMetrics < 1) continue; // If no metrics, skip to next device.
505 
506  SUBDBG("Device %d: Checking each of the (maxMetrics) %d metrics\n", deviceNum, maxMetrics);
507 
508  // Make a temporary list of the metric Ids to add to the available named collectables.
509  size_t size = maxMetrics * sizeof(CUpti_EventID);
510  metricIdList = (CUpti_MetricID *) papi_calloc(maxMetrics, sizeof(CUpti_EventID));
511  CHECK_PRINT_EVAL(metricIdList == NULL, "Out of memory", return (PAPI_ENOMEM));
512 
513  CUPTI_CALL((*cuptiDeviceEnumMetricsPtr) // Enumerate the metric Ids for this device,
514  (mydevice->cuDev, &size, metricIdList), // .. into metricIdList.
515  return (PAPI_EMISC)); // .. On failure, but should work, we have metrics!
516 
517  // Elimination loop for metrics we cannot support.
518  int saveDeviceNum = 0;
519  CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC)); // save caller's device num.
520 
521  for (i=0, j=0; i<maxMetrics; i++) { // process each metric Id.
522  size = PAPI_MIN_STR_LEN-1; // Most bytes allowed to be written.
523  CUPTI_CALL((*cuptiMetricGetAttributePtr) (metricIdList[i], // Get the name.
524  CUPTI_METRIC_ATTR_NAME, &size, (uint8_t *) tmpStr),
525  return (PAPI_EMISC));
526 
527  // Note that 'size' also returned total bytes written.
528  tmpStr[size] = '\0';
529 
530  if (strcmp("branch_efficiency", tmpStr) == 0) continue; // If it is branch efficiency, skip it.
531 
532  // We'd like to reject anything requiring more than 1
533  // set, but there is a problem I cannot find; I have
534  // been unable to create a CUcontext here so I can
535  // execute the CreateEventGroups. I've tried both
536  // ways, it returns an error saying no cuda devices
537  // available. There does not seem to be a way to get
538  // the number of "sets" (passes) for a metric without
539  // having a context.
540 
541  // CUpti_EventGroupSets *thisEventGroupSets = NULL;
542  //CUPTI_CALL ((*cuptiMetricCreateEventGroupSetsPtr) (
543  // tempContext,
544  // sizeof(CUpti_MetricID),
545  // &metricIdList[i],
546  // &thisEventGroupSets),
547  // return (PAPI_EMISC));
548  //
549  //int numSets = 0; // # of sets (passes) required.
550  //if (thisEventGroupSets != NULL) {
551  // numSets=thisEventGroupSets->numSets; // Get sets if a grouping is necessary.
552  // CUPTI_CALL((*cuptiEventGroupSetsDestroyPtr) (thisEventGroupSets), // Done with this.
553  // return (PAPI_EMISC));
554  //}
555  //
556  //if (numSets > 1) continue; // skip this metric too many passes.
557 
558  metricIdList[j++] = metricIdList[i]; // we are compressing if we skipped any.
559  } // end elimination loop.
560 
561  // Done with eliminations, the rest are valid.
562  maxMetrics = j; // Change the number to process.
563 
564  // Eliminations accomplished, now add the valid metric Ids to the list.
565  for(i = 0; i < maxMetrics; i++) { // for each id,
566  gctxt->availEventIDArray[idxEventArray] = metricIdList[i]; // add to the list of collectables.
567  gctxt->availEventKind[idxEventArray] = CUPTI_ACTIVITY_KIND_METRIC; // Indicate it is a metric.
568  gctxt->availEventDeviceNum[idxEventArray] = deviceNum; // remember the device number.
569  size = PAPI_MIN_STR_LEN;
570  CUPTI_CALL((*cuptiMetricGetAttributePtr) (metricIdList[i], // Get the name, fail if we cannot.
571  CUPTI_METRIC_ATTR_NAME, &size, (uint8_t *) tmpStr),
572  return (PAPI_EMISC));
573 
574  if (size >= PAPI_MIN_STR_LEN) { // Truncate if we don't have room for the name.
575  gctxt->availEventDesc[idxEventArray].name[PAPI_MIN_STR_LEN - 1] = '\0';
576  }
577 
578  size_t MV_KindSize = sizeof(CUpti_MetricValueKind);
579  CUPTI_CALL((*cuptiMetricGetAttributePtr) // Collect the metric kind.
580  (metricIdList[i], CUPTI_METRIC_ATTR_VALUE_KIND, &MV_KindSize, // .. for this metric,
581  &gctxt->availEventDesc[idxEventArray].MV_Kind), // .. store in the event description,
582  return (PAPI_EMISC)); // .. on failure, but should always work.
583 
584  snprintf(gctxt->availEventDesc[idxEventArray].name, PAPI_MIN_STR_LEN, // .. develop name for papi user in tmpStr.
585  "metric:%s:device=%d", tmpStr, deviceNum);
586 
587  size = PAPI_2MAX_STR_LEN-1; // Most bytes to return.
588  CUPTI_CALL((*cuptiMetricGetAttributePtr) // Collect the long description.
589  (metricIdList[i], CUPTI_METRIC_ATTR_LONG_DESCRIPTION, &size, // .. for this metric, no more than size.
590  (uint8_t *) gctxt->availEventDesc[idxEventArray].description), // .. and store in event description.
591  return (PAPI_EMISC)); // .. on failure, but should always work.
592 
593  // Note that 'size' also returned total bytes written.
594  gctxt->availEventDesc[idxEventArray].description[size] = '\0'; // Always z-terminate.
595 
596  // Now we get all the sub-events of this metric.
597  uint32_t numSubs;
598  CUpti_MetricID itemId = metricIdList[i]; //.. shortcut to metric id.
599  CUPTI_CALL((*cuptiMetricGetNumEventsPtr) (itemId, &numSubs), // .. Get number of sub-events in metric.
600  return (PAPI_EINVAL)); // .. on failure of call.
601 
602  size_t sizeBytes = numSubs * sizeof(CUpti_EventID); // .. compute size of array we need.
603  CUpti_EventID *subEventIds = papi_malloc(sizeBytes); // .. Make the space.
604  CHECK_PRINT_EVAL(subEventIds == NULL, "Malloc failed", // .. If malloc fails,
605  return (PAPI_ENOMEM));
606 
607  CUPTI_CALL((*cuptiMetricEnumEventsPtr) // .. Enumrate events in the metric.
608  (itemId, &sizeBytes, subEventIds), // .. store in array.
609  return (PAPI_EINVAL)); // .. If cupti call fails.
610 
611  gctxt->availEventDesc[idxEventArray].metricEvents = subEventIds; // .. Copy the array pointer for IDs.
612  gctxt->availEventDesc[idxEventArray].numMetricEvents = numSubs; // .. Copy number of elements in it.
613 
614  idxEventArray++; // count another collectable found.
615  } // end maxMetrics loop.
616 
617  papi_free(metricIdList); // Done with this enumeration of metrics.
618  // Part of problem above, cannot create tempContext for unknown reason.
619  // CU_CALL((*cuCtxDestroyPtr) (tempContext), return (PAPI_EMISC)); // destroy the temporary context.
620  CUDA_CALL((*cudaSetDevicePtr) (saveDeviceNum), return (PAPI_EMISC)); // set the device pointer back to caller.
621  } // end 'for each device'.
622 
623  gctxt->availEventSize = idxEventArray;
624 
625  /* return 0 if everything went OK */
626  return 0;
627 } // end papicuda_add_native_events
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:637
#define PAPI_ENOMEM
Definition: fpapi.h:107
#define PAPI_EINVAL
Definition: fpapi.h:106
uint32_t * domainIDNumEvents
Definition: linux-cuda.c:70
#define papi_free(a)
Definition: papi_memory.h:35
uint32_t * availEventIDArray
Definition: linux-cuda.c:49
#define PAPI_ENOSUPP
Definition: fpapi.h:123
#define papi_malloc(a)
Definition: papi_memory.h:34
int * availEventDeviceNum
Definition: linux-cuda.c:48
#define CU_CALL(call, handleerror)
Definition: linux-cuda.c:142
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
#define CHECK_PRINT_EVAL(checkcond, str, evalthis)
Definition: linux-cuda.c:124
struct papicuda_name_desc * availEventDesc
Definition: linux-cuda.c:51
#define PAPI_2MAX_STR_LEN
Definition: papi.h:467
CUpti_EventDomainID * domainIDArray
Definition: linux-cuda.c:69
#define PAPI_EMISC
Definition: fpapi.h:119
#define PAPI_MIN_STR_LEN
Definition: fpapi.h:41
uint32_t availEventSize
Definition: linux-cuda.c:46
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define CUDA_CALL(call, handleerror)
Definition: linux-cuda.c:133
papi_vector_t _cuda_vector
Definition: linux-cuda.c:115
struct papicuda_device_desc * deviceArray
Definition: linux-cuda.c:45
CUresult CUDAAPI(* cuInitPtr)(unsigned int)
Definition: benchSANVML.c:47
char deviceName[PAPI_MIN_STR_LEN]
Definition: linux-cuda.c:67
CUpti_ActivityKind * availEventKind
Definition: linux-cuda.c:47
uint32_t * availEventIsBeingMeasuredInEventset
Definition: linux-cuda.c:50
#define papi_calloc(a, b)
Definition: papi_memory.h:37
#define CUPTI_CALL(call, handleerror)
Definition: linux-cuda.c:153
cudaError_t(* cudaGetDevicePtr)(int *)
Definition: benchSANVML.c:53
int i
Definition: fileop.c:140
#define PAPI_MAX_STR_LEN
Definition: fpapi.h:43
Here is the caller graph for this function:

◆ papicuda_cleanup_eventset()

static int papicuda_cleanup_eventset ( hwd_control_state_t ctrl)
static

Definition at line 1298 of file linux-cuda.c.

1299 {
1300  SUBDBG("Entering\n");
1301  (void) ctrl; // Don't need this parameter.
1304  // papicuda_active_cucontext_t *currctrl;
1305  uint32_t cc;
1306  int saveDeviceNum;
1307  unsigned int ui;
1308 
1309  SUBDBG("Save current context, then switch to each active device/context and enable eventgroups\n");
1310  CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC));
1311  for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) {
1312  CUcontext currCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx;
1313  int currDeviceNum = gctrl->arrayOfActiveCUContexts[cc]->deviceNum;
1314  CUpti_EventGroupSets *currEventGroupSets = gctrl->arrayOfActiveCUContexts[cc]->eventGroupSets;
1315  if(currDeviceNum != saveDeviceNum)
1316  CU_CALL((*cuCtxPushCurrentPtr) (currCuCtx), return (PAPI_EMISC));
1317  else
1318  CU_CALL((*cuCtxSetCurrentPtr) (currCuCtx), return (PAPI_EMISC));
1319  //CUPTI_CALL((*cuptiEventGroupSetsDestroyPtr) (currEventGroupPasses), return (PAPI_EMISC));
1320  (*cuptiEventGroupSetsDestroyPtr) (currEventGroupSets);
1321  gctrl->arrayOfActiveCUContexts[cc]->eventGroupSets = NULL;
1322  papi_free( gctrl->arrayOfActiveCUContexts[cc] );
1323  /* Pop the pushed context */
1324  if(currDeviceNum != saveDeviceNum)
1325  CU_CALL((*cuCtxPopCurrentPtr) (&currCuCtx), return (PAPI_EMISC));
1326  }
1327  /* Record that there are no active contexts or events */
1328  for (ui=0; ui<gctrl->activeEventCount; ui++) { // For each active event,
1329  int idx = gctrl->activeEventIndex[ui]; // .. Get its index...
1330  gctxt->availEventIsBeingMeasuredInEventset[idx] = 0; // .. No longer being measured.
1331  }
1332 
1333  gctrl->countOfActiveCUContexts = 0;
1334  gctrl->activeEventCount = 0;
1335  return (PAPI_OK);
1336 } // end papicuda_cleanup_eventset
#define PAPI_OK
Definition: fpapi.h:105
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:121
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:118
#define papi_free(a)
Definition: papi_memory.h:35
#define CU_CALL(call, handleerror)
Definition: linux-cuda.c:142
#define PAPI_EMISC
Definition: fpapi.h:119
papicuda_active_cucontext_t * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:100
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define CUDA_CALL(call, handleerror)
Definition: linux-cuda.c:133
CUpti_EventGroupSets * eventGroupSets
Definition: linux-cuda.c:92
uint32_t countOfActiveCUContexts
Definition: linux-cuda.c:99
int activeEventIndex[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:102
uint32_t activeEventCount
Definition: linux-cuda.c:101
uint32_t * availEventIsBeingMeasuredInEventset
Definition: linux-cuda.c:50
cudaError_t(* cudaGetDevicePtr)(int *)
Definition: benchSANVML.c:53
Here is the caller graph for this function:

◆ papicuda_convert_metric_value_to_long_long()

static int papicuda_convert_metric_value_to_long_long ( CUpti_MetricValue  metricValue,
CUpti_MetricValueKind  valueKind,
long long int *  papiValue 
)
static

Definition at line 637 of file linux-cuda.c.

638 {
639  union {
640  long long ll;
641  double fp;
642  } tmpValue;
643 
644  SUBDBG("Try to convert the CUPTI metric value kind (index %d) to PAPI value (long long or double)\n", valueKind);
645  switch (valueKind) {
646  case CUPTI_METRIC_VALUE_KIND_DOUBLE:
647  SUBDBG("Metric double %f\n", metricValue.metricValueDouble);
648  tmpValue.ll = (long long)(metricValue.metricValueDouble);
649  //CHECK_PRINT_EVAL(tmpValue.fp - metricValue.metricValueDouble > 1e-6, "Error converting metric\n", return (PAPI_EMISC));
650  break;
651  case CUPTI_METRIC_VALUE_KIND_UINT64:
652  SUBDBG("Metric uint64 = %llu\n", (unsigned long long) metricValue.metricValueUint64);
653  tmpValue.ll = (long long) (metricValue.metricValueUint64);
654  CHECK_PRINT_EVAL(tmpValue.ll - metricValue.metricValueUint64 > 1e-6, "Error converting metric\n", return (PAPI_EMISC));
655  break;
656  case CUPTI_METRIC_VALUE_KIND_INT64:
657  SUBDBG("Metric int64 = %lld\n", (long long) metricValue.metricValueInt64);
658  tmpValue.ll = (long long) (metricValue.metricValueInt64);
659  CHECK_PRINT_EVAL(tmpValue.ll - metricValue.metricValueInt64 > 1e-6, "Error converting metric\n", return (PAPI_EMISC));
660  break;
661  case CUPTI_METRIC_VALUE_KIND_PERCENT:
662  SUBDBG("Metric percent = %f%%\n", metricValue.metricValuePercent);
663  tmpValue.ll = (long long)(metricValue.metricValuePercent*100);
664  //CHECK_PRINT_EVAL(tmpValue.ll - metricValue.metricValuePercent > 1e-6, "Error converting metric\n", return (PAPI_EMISC));
665  break;
666  case CUPTI_METRIC_VALUE_KIND_THROUGHPUT:
667  SUBDBG("Metric throughput %llu bytes/sec\n", (unsigned long long) metricValue.metricValueThroughput);
668  tmpValue.ll = (long long) (metricValue.metricValueThroughput);
669  CHECK_PRINT_EVAL(tmpValue.ll - metricValue.metricValueThroughput > 1e-6, "Error converting metric\n", return (PAPI_EMISC));
670  break;
671  case CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL:
672  SUBDBG("Metric utilization level %u\n", (unsigned int) metricValue.metricValueUtilizationLevel);
673  tmpValue.ll = (long long) (metricValue.metricValueUtilizationLevel);
674  CHECK_PRINT_EVAL(tmpValue.ll - metricValue.metricValueUtilizationLevel > 1e-6, "Error converting metric\n", return (PAPI_EMISC));
675  break;
676  default:
677  CHECK_PRINT_EVAL(1, "ERROR: unsupported metric value kind", return (PAPI_EINVAL));
678  exit(-1);
679  }
680 
681  *papiValue = tmpValue.ll;
682  return (PAPI_OK);
683 } // end routine
#define PAPI_OK
Definition: fpapi.h:105
#define PAPI_EINVAL
Definition: fpapi.h:106
static FILE * fp
#define CHECK_PRINT_EVAL(checkcond, str, evalthis)
Definition: linux-cuda.c:124
#define PAPI_EMISC
Definition: fpapi.h:119
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
void exit()
Here is the call graph for this function:
Here is the caller graph for this function:

◆ papicuda_ctrl()

static int papicuda_ctrl ( hwd_context_t ctx,
int  code,
_papi_int_option_t option 
)
static

Definition at line 1447 of file linux-cuda.c.

1448 {
1449  SUBDBG("Entering\n");
1450  (void) ctx;
1451  (void) code;
1452  (void) option;
1453  return (PAPI_OK);
1454 }
#define PAPI_OK
Definition: fpapi.h:105
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

◆ papicuda_init_component()

static int papicuda_init_component ( int  cidx)
static

Definition at line 715 of file linux-cuda.c.

716 {
717  SUBDBG("Entering with component idx: %d\n", cidx);
718  int rv;
719 
720  /* link in all the cuda libraries and resolve the symbols we need to use */
721  if(papicuda_linkCudaLibraries() != PAPI_OK) {
722  SUBDBG("Dynamic link of CUDA libraries failed, component will be disabled.\n");
723  SUBDBG("See disable reason in papi_component_avail output for more details.\n");
724  return (PAPI_ENOSUPP);
725  }
726 
727  /* Create the structure */
730 
731  /* Get list of all native CUDA events supported */
733  if(rv != 0)
734  return (rv);
735 
736  /* Export some information */
741 
742  return (PAPI_OK);
743 } // end init_component
#define PAPI_OK
Definition: fpapi.h:105
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:118
#define PAPI_ENOSUPP
Definition: fpapi.h:123
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
static int cidx
uint32_t availEventSize
Definition: linux-cuda.c:46
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
papi_vector_t _cuda_vector
Definition: linux-cuda.c:115
static int papicuda_add_native_events(papicuda_context_t *gctxt)
Definition: linux-cuda.c:333
#define papi_calloc(a, b)
Definition: papi_memory.h:37
Here is the call graph for this function:

◆ papicuda_init_control_state()

static int papicuda_init_control_state ( hwd_control_state_t ctrl)
static

Definition at line 750 of file linux-cuda.c.

751 {
752  SUBDBG("Entering\n");
753  (void) ctrl;
755 
756  CHECK_PRINT_EVAL(!gctxt, "Error: The PAPI CUDA component needs to be initialized first", return (PAPI_ENOINIT));
757  /* If no events were found during the initial component initialization, return error */
759  strncpy(_cuda_vector.cmp_info.disabled_reason, "ERROR CUDA: No events exist", PAPI_MAX_STR_LEN);
760  return (PAPI_EMISC);
761  }
762  /* If it does not exist, create the global structure to hold CUDA contexts and active events */
767  }
768 
769  return PAPI_OK;
770 } // end papicuda_init_control_state
#define PAPI_OK
Definition: fpapi.h:105
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:637
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:121
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:118
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
#define CHECK_PRINT_EVAL(checkcond, str, evalthis)
Definition: linux-cuda.c:124
#define PAPI_EMISC
Definition: fpapi.h:119
uint32_t availEventSize
Definition: linux-cuda.c:46
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
papi_vector_t _cuda_vector
Definition: linux-cuda.c:115
uint32_t countOfActiveCUContexts
Definition: linux-cuda.c:99
#define PAPI_ENOINIT
Definition: fpapi.h:121
uint32_t activeEventCount
Definition: linux-cuda.c:101
#define papi_calloc(a, b)
Definition: papi_memory.h:37
#define PAPI_MAX_STR_LEN
Definition: fpapi.h:43

◆ papicuda_init_thread()

static int papicuda_init_thread ( hwd_context_t ctx)
static

Definition at line 693 of file linux-cuda.c.

694 {
695  (void) ctx;
696  SUBDBG("Entering\n");
697 
698  return PAPI_OK;
699 }
#define PAPI_OK
Definition: fpapi.h:105
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

◆ papicuda_ntv_code_to_descr()

static int papicuda_ntv_code_to_descr ( unsigned int  EventCode,
char *  name,
int  len 
)
static

Definition at line 1529 of file linux-cuda.c.

1530 {
1531  // SUBDBG( "Entering\n" );
1532  unsigned int index = EventCode;
1534  if(index < gctxt->availEventSize) {
1535  strncpy(name, gctxt->availEventDesc[index].description, len);
1536  } else {
1537  return (PAPI_EINVAL);
1538  }
1539  return (PAPI_OK);
1540 }
#define PAPI_OK
Definition: fpapi.h:105
static const char * name
Definition: fork_overflow.c:31
#define PAPI_EINVAL
Definition: fpapi.h:106
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:118
struct papicuda_name_desc * availEventDesc
Definition: linux-cuda.c:51

◆ papicuda_ntv_code_to_name()

static int papicuda_ntv_code_to_name ( unsigned int  EventCode,
char *  name,
int  len 
)
static

Definition at line 1509 of file linux-cuda.c.

1510 {
1511  // SUBDBG( "Entering EventCode %d\n", EventCode );
1512  unsigned int index = EventCode;
1514  if(index < gctxt->availEventSize) {
1515  strncpy(name, gctxt->availEventDesc[index].name, len);
1516  } else {
1517  return (PAPI_EINVAL);
1518  }
1519  // SUBDBG( "Exit: EventCode %d: Name %s\n", EventCode, name );
1520  return (PAPI_OK);
1521 }
#define PAPI_OK
Definition: fpapi.h:105
static const char * name
Definition: fork_overflow.c:31
#define PAPI_EINVAL
Definition: fpapi.h:106
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:118
struct papicuda_name_desc * availEventDesc
Definition: linux-cuda.c:51

◆ papicuda_ntv_enum_events()

static int papicuda_ntv_enum_events ( unsigned int *  EventCode,
int  modifier 
)
static

Definition at line 1482 of file linux-cuda.c.

1483 {
1484  // SUBDBG( "Entering (get next event after %u)\n", *EventCode );
1485  switch (modifier) {
1486  case PAPI_ENUM_FIRST:
1487  *EventCode = 0;
1488  return (PAPI_OK);
1489  break;
1490  case PAPI_ENUM_EVENTS:
1491  if(*EventCode < global_papicuda_context->availEventSize - 1) {
1492  *EventCode = *EventCode + 1;
1493  return (PAPI_OK);
1494  } else
1495  return (PAPI_ENOEVNT);
1496  break;
1497  default:
1498  return (PAPI_EINVAL);
1499  }
1500  return (PAPI_OK);
1501 }
#define PAPI_OK
Definition: fpapi.h:105
#define PAPI_EINVAL
Definition: fpapi.h:106
#define PAPI_ENOEVNT
Definition: fpapi.h:112

◆ papicuda_read()

static int papicuda_read ( hwd_context_t ctx,
hwd_control_state_t ctrl,
long long **  values,
int  flags 
)
static

Definition at line 1054 of file linux-cuda.c.

1055 {
1056  SUBDBG("Entering\n");
1057  (void) ctx;
1058  (void) ctrl;
1059  (void) flags;
1062  uint32_t gg, i, j, cc;
1063  int saveDeviceNum;
1064 
1065  // Get read time stamp
1066  CUPTI_CALL((*cuptiGetTimestampPtr) // Read current timestamp.
1067  (&gctrl->cuptiReadTimestampNs),
1068  return (PAPI_EMISC));
1069  uint64_t durationNs = gctrl->cuptiReadTimestampNs -
1070  gctrl->cuptiStartTimestampNs; // compute duration from start.
1071  gctrl->cuptiStartTimestampNs = gctrl->cuptiReadTimestampNs; // Change start to value just read.
1072 
1073  SUBDBG("Save current context, then switch to each active device/context and enable context-specific eventgroups\n");
1074  CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC)); // Save Caller's current device number on entry.
1075 
1076  for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) { // For each active context,
1077  papicuda_active_cucontext_t *activeCuCtxt =
1078  gctrl->arrayOfActiveCUContexts[cc]; // A shortcut.
1079  int currDeviceNum = activeCuCtxt->deviceNum; // Get the device number.
1080  CUcontext currCuCtx = activeCuCtxt->cuCtx; // Get the actual CUcontext.
1081 
1082  SUBDBG("Set to device %d cuCtx %p \n", currDeviceNum, currCuCtx);
1083  if(currDeviceNum != saveDeviceNum) { // If my current is not the same as callers,
1084  CU_CALL((*cuCtxPushCurrentPtr) (currCuCtx), return (PAPI_EMISC)); // .. Push the current, and replace with mine.
1085  // Note, cuCtxPushCurrent() implicitly includes a cudaSetDevice().
1086  } else { // If my current IS the same as callers,
1087  CU_CALL((*cuCtxSetCurrentPtr) (currCuCtx), return (PAPI_EMISC)); // .. No push. Just set the current.
1088  }
1089 
1090  CU_CALL((*cuCtxSynchronizePtr) (), return (PAPI_EMISC)); // Block until device finishes all prior tasks.
1091  CUpti_EventGroupSets *myEventGroupSets = activeCuCtxt->eventGroupSets; // Make a copy of pointer to EventGroupSets.
1092 
1093  uint32_t numEvents, numInstances, numTotalInstances;
1094  size_t sizeofuint32num = sizeof(uint32_t);
1095  CUpti_EventDomainID groupDomainID;
1096  size_t groupDomainIDSize = sizeof(groupDomainID);
1097  CUdevice cudevice = gctxt->deviceArray[currDeviceNum].cuDev; // Make a copy of the current device.
1098 
1099  // For each pass, we get the event groups that can be read together.
1100  // But since elsewhere, we don't allow events to be added that would
1101  // REQUIRE more than one pass, this will always be just ONE pass. So we
1102  // only need to loop over the groups.
1103 
1104  CUpti_EventGroupSet *groupset = &myEventGroupSets->sets[0]; // The one and only set.
1105  SUBDBG("Read events in this context\n");
1106  int AEIdx = 0; // we will be over-writing the allEvents array.
1107 
1108  for (gg = 0; gg < groupset->numEventGroups; gg++) { // process each eventgroup within the groupset.
1109  CUpti_EventGroup group = groupset->eventGroups[gg]; // Shortcut to the group.
1110 
1111  CUPTI_CALL((*cuptiEventGroupGetAttributePtr) // Get 'groupDomainID' for this group.
1112  (group, CUPTI_EVENT_GROUP_ATTR_EVENT_DOMAIN_ID,
1113  &groupDomainIDSize, &groupDomainID),
1114  return (PAPI_EMISC));
1115 
1116  // 'numTotalInstances' and 'numInstances are needed for scaling
1117  // the values retrieved. (Nvidia instructions and samples).
1118  CUPTI_CALL((*cuptiDeviceGetEventDomainAttributePtr) // Get 'numTotalInstances' for this domain.
1119  (cudevice,
1120  groupDomainID,
1121  CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT,
1122  &sizeofuint32num,
1123  &numTotalInstances),
1124  return (PAPI_EMISC));
1125 
1126  CUPTI_CALL((*cuptiEventGroupGetAttributePtr) // Get 'numInstances' for this domain.
1127  (group,
1128  CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT,
1129  &sizeofuint32num,
1130  &numInstances),
1131  return (PAPI_EMISC));
1132 
1133  CUPTI_CALL((*cuptiEventGroupGetAttributePtr) // Get 'numEvents' in this group.
1134  (group,
1135  CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS,
1136  &sizeofuint32num,
1137  &numEvents),
1138  return (PAPI_EMISC));
1139 
1140  // Now we will read all events in this group; aggregate the values
1141  // and then distribute them. We do not calculate metrics here;
1142  // wait until all groups are read and all values are available.
1143 
1144  size_t resultArrayBytes = sizeof(uint64_t) * numEvents * numTotalInstances;
1145  size_t eventIdArrayBytes = sizeof(CUpti_EventID) * numEvents;
1146  size_t numCountersRead = 2;
1147 
1148  CUpti_EventID *eventIdArray = (CUpti_EventID *) papi_malloc(eventIdArrayBytes);
1149  uint64_t *resultArray = (uint64_t *) papi_malloc(resultArrayBytes);
1150  uint64_t *aggrResultArray = (uint64_t *) papi_calloc(numEvents, sizeof(uint64_t));
1151 
1152  for (i=0; i<(resultArrayBytes/sizeof(uint64_t)); i++) resultArray[i]=0;
1153 
1154  if (eventIdArray == NULL || resultArray == NULL || aggrResultArray == NULL) {
1155  fprintf(stderr, "%s:%i failed to allocate memory.\n", __FILE__, __LINE__);
1156  return(PAPI_EMISC);
1157  }
1158 
1159  CUPTI_CALL( (*cuptiEventGroupReadAllEventsPtr) // Read all events.
1160  (group, CUPTI_EVENT_READ_FLAG_NONE, // This flag is the only allowed flag.
1161  &resultArrayBytes, resultArray,
1162  &eventIdArrayBytes, eventIdArray,
1163  &numCountersRead),
1164  return (PAPI_EMISC));
1165 
1166  // Now (per Nvidia) we must sum up all domains for each event.
1167  // Arrangement of 2-d Array returned in resultArray:
1168  // domain instance 0: event0 event1 ... eventN
1169  // domain instance 1: event0 event1 ... eventN
1170  // ...
1171  // domain instance M: event0 event1 ... eventN
1172  // But we accumulate by column, event[0], event[1], etc.
1173 
1174  for (i = 0; i < numEvents; i++) { // outer loop is column (event) we are on.
1175  for (j = 0; j < numTotalInstances; j++) { // inner loop is row (instance) we are on.
1176  aggrResultArray[i] += resultArray[i + numEvents * j]; // accumulate the column.
1177  }
1178  }
1179 
1180  // We received an eventIdArray; note this is not necessarily in the
1181  // same order as we added them; CUpti can reorder them when sorting
1182  // them into groups. However, the total number of events must be
1183  // the same, so now as we read each group, we just overwrite the
1184  // allEvents[] and allEventValues[] arrays. It doesn't make a
1185  // difference to cuptiGetMetricValue what order the events appear
1186  // in.
1187 
1188  // After all these groups are read, allEvents will be complete, and
1189  // we can use it to compute the metrics and move metric and event
1190  // values back into user order.
1191 
1192  for (i=0; i<numEvents; i++) { // For each event in eventIdArray (just this group),
1193  CUpti_EventID myId = eventIdArray[i]; // shortcut for the event id within this group.
1194  activeCuCtxt->allEvents[AEIdx] = myId; // Overwrite All Events id.
1195  activeCuCtxt->allEventValues[AEIdx++] = aggrResultArray[i]; // Overwrite all events value; increment position.
1196  } // end loop for each event.
1197 
1198  papi_free(eventIdArray);
1199  papi_free(resultArray);
1200  papi_free(aggrResultArray);
1201  } // end of an event group.
1202 
1203  // We have finished all event groups within this context; allEvents[]
1204  // and allEventValues[] are populated. Now we compute metrics and move
1205  // event values. We do that by looping through the events assigned to
1206  // this context, and we must back track to the activeEventIdx[] and
1207  // activeEventValues[] array in gctrl. We have kept our indexes into
1208  // that array, in ctxActive[].
1209 
1210  uint32_t ctxActiveCount = activeCuCtxt->ctxActiveCount; // Number of (papi user) events in this context.
1211  uint32_t *ctxActive = activeCuCtxt->ctxActiveEvents; // index of each event in gctrl->activeEventXXXX.
1212 
1213  for (j=0; j<ctxActiveCount; j++) { // Search for matching active event.
1214  uint32_t activeIdx, availIdx;
1215 
1216  activeIdx=ctxActive[j]; // get index into activeEventIdx.
1217  availIdx = gctrl->activeEventIndex[activeIdx]; // Get the availEventIdx.
1218  CUpti_EventID thisEventId = gctxt->availEventIDArray[availIdx]; // Get the event ID (or metric ID).
1219  struct papicuda_name_desc *myDesc=&(gctxt->availEventDesc[availIdx]); // get pointer to the description.
1220 
1221  if (myDesc->numMetricEvents == 0) { // If this is a simple cuda event (not a metric),
1222  int k;
1223  for (k=0; k<AEIdx; k++) { // search the array for this event id.
1224  if (activeCuCtxt->allEvents[k] == thisEventId) { // If I found the event,
1225  gctrl->activeEventValues[activeIdx] = // Record the value,
1226  activeCuCtxt->allEventValues[k];
1227  break; // break out of the search loop.
1228  } // end if I found it.
1229  } // end search loop.
1230 
1231  continue; // Jump to next in ctxActiveCount.
1232  } else { // If I found a metric, I must compute it.
1233  CUpti_MetricValue myValue; // Space for a return.
1234  CUPTI_CALL( (*cuptiMetricGetValue) // Get the value,
1235  (cudevice, thisEventId, // device and metric Id,
1236  AEIdx * sizeof(CUpti_EventID), // size of event list,
1237  activeCuCtxt->allEvents, // the event list.
1238  AEIdx * sizeof(uint64_t), // size of corresponding event values,
1239  activeCuCtxt->allEventValues, // the event values.
1240  durationNs, &myValue), // duration (for rates), and where to return the value.
1241  return(PAPI_EMISC)); // In case of error.
1242 
1243  papicuda_convert_metric_value_to_long_long( // convert the value computed to long long and store it.
1244  myValue, myDesc->MV_Kind,
1245  &gctrl->activeEventValues[activeIdx]);
1246  }
1247  } // end loop on active events in this context.
1248 
1249  if(currDeviceNum != saveDeviceNum) { // If we had to change the context from user's,
1250  CUDA_CALL((*cudaSetDevicePtr) (saveDeviceNum), // set the device pointer to the user's original.
1251  return (PAPI_EMISC)); // .. .. (on faiure).
1252  CU_CALL((*cuCtxPopCurrentPtr) (&currCuCtx), return (PAPI_EMISC)); // .. pop the pushed context back to user's.
1253  }
1254  } // end of loop for each active context.
1255 
1256  *values = gctrl->activeEventValues; // Return ptr to the list of computed values to user.
1257  return (PAPI_OK);
1258 } // end of papicuda_read().
#define PAPI_OK
Definition: fpapi.h:105
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:121
uint64_t allEventValues[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:90
uint64_t cuptiStartTimestampNs
Definition: linux-cuda.c:105
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:118
#define papi_free(a)
Definition: papi_memory.h:35
uint32_t * availEventIDArray
Definition: linux-cuda.c:49
#define papi_malloc(a)
Definition: papi_memory.h:34
#define CU_CALL(call, handleerror)
Definition: linux-cuda.c:142
static int papicuda_convert_metric_value_to_long_long(CUpti_MetricValue metricValue, CUpti_MetricValueKind valueKind, long long int *papiValue)
Definition: linux-cuda.c:637
struct papicuda_name_desc * availEventDesc
Definition: linux-cuda.c:51
#define PAPI_EMISC
Definition: fpapi.h:119
CUpti_EventID allEvents[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:89
papicuda_active_cucontext_t * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:100
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define CUDA_CALL(call, handleerror)
Definition: linux-cuda.c:133
CUpti_EventGroupSets * eventGroupSets
Definition: linux-cuda.c:92
struct papicuda_device_desc * deviceArray
Definition: linux-cuda.c:45
uint32_t countOfActiveCUContexts
Definition: linux-cuda.c:99
long long activeEventValues[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:103
static long long values[NUM_EVENTS]
Definition: init_fini.c:10
int activeEventIndex[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:102
uint32_t ctxActiveEvents[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:86
uint64_t cuptiReadTimestampNs
Definition: linux-cuda.c:106
#define papi_calloc(a, b)
Definition: papi_memory.h:37
#define CUPTI_CALL(call, handleerror)
Definition: linux-cuda.c:153
cudaError_t(* cudaGetDevicePtr)(int *)
Definition: benchSANVML.c:53
int i
Definition: fileop.c:140
Here is the call graph for this function:

◆ papicuda_reset()

static int papicuda_reset ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)
static

Definition at line 1405 of file linux-cuda.c.

1406 {
1407  (void) ctx;
1408  (void) ctrl;
1410  uint32_t gg, ii, cc, ss;
1411  int saveDeviceNum;
1412 
1413  SUBDBG("Reset all active event values\n");
1414  for(ii = 0; ii < gctrl->activeEventCount; ii++)
1415  gctrl->activeEventValues[ii] = 0;
1416 
1417  SUBDBG("Save current context, then switch to each active device/context and reset\n");
1418  CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC));
1419  for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) {
1420  CUcontext currCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx;
1421  int currDeviceNum = gctrl->arrayOfActiveCUContexts[cc]->deviceNum;
1422  if(currDeviceNum != saveDeviceNum)
1423  CU_CALL((*cuCtxPushCurrentPtr) (currCuCtx), return (PAPI_EMISC));
1424  else
1425  CU_CALL((*cuCtxSetCurrentPtr) (currCuCtx), return (PAPI_EMISC));
1426  CUpti_EventGroupSets *currEventGroupSets = gctrl->arrayOfActiveCUContexts[cc]->eventGroupSets;
1427  for (ss=0; ss<currEventGroupSets->numSets; ss++) {
1428  CUpti_EventGroupSet groupset = currEventGroupSets->sets[ss];
1429  for(gg = 0; gg < groupset.numEventGroups; gg++) {
1430  CUpti_EventGroup group = groupset.eventGroups[gg];
1431  CUPTI_CALL((*cuptiEventGroupResetAllEventsPtr) (group), return (PAPI_EMISC));
1432  }
1433  CUPTI_CALL((*cuptiEventGroupSetEnablePtr) (&groupset), return (PAPI_EMISC));
1434  }
1435  if(currDeviceNum != saveDeviceNum)
1436  CU_CALL((*cuCtxPopCurrentPtr) (&currCuCtx), return (PAPI_EMISC));
1437  }
1438  return (PAPI_OK);
1439 } // end papicuda_reset().
#define PAPI_OK
Definition: fpapi.h:105
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:121
#define CU_CALL(call, handleerror)
Definition: linux-cuda.c:142
#define PAPI_EMISC
Definition: fpapi.h:119
papicuda_active_cucontext_t * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:100
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define CUDA_CALL(call, handleerror)
Definition: linux-cuda.c:133
CUpti_EventGroupSets * eventGroupSets
Definition: linux-cuda.c:92
uint32_t countOfActiveCUContexts
Definition: linux-cuda.c:99
long long activeEventValues[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:103
uint32_t activeEventCount
Definition: linux-cuda.c:101
#define CUPTI_CALL(call, handleerror)
Definition: linux-cuda.c:153
cudaError_t(* cudaGetDevicePtr)(int *)
Definition: benchSANVML.c:53

◆ papicuda_set_domain()

static int papicuda_set_domain ( hwd_control_state_t ctrl,
int  domain 
)
static

Definition at line 1466 of file linux-cuda.c.

1467 {
1468  SUBDBG("Entering\n");
1469  (void) ctrl;
1470  if((PAPI_DOM_USER & domain) || (PAPI_DOM_KERNEL & domain) || (PAPI_DOM_OTHER & domain) || (PAPI_DOM_ALL & domain))
1471  return (PAPI_OK);
1472  else
1473  return (PAPI_EINVAL);
1474  return (PAPI_OK);
1475 }
#define PAPI_OK
Definition: fpapi.h:105
#define PAPI_DOM_KERNEL
Definition: fpapi.h:22
#define PAPI_EINVAL
Definition: fpapi.h:106
#define PAPI_DOM_OTHER
Definition: fpapi.h:23
#define PAPI_DOM_USER
Definition: fpapi.h:21
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define PAPI_DOM_ALL
Definition: fpapi.h:25

◆ papicuda_shutdown_component()

static int papicuda_shutdown_component ( void  )
static

Definition at line 1349 of file linux-cuda.c.

1350 {
1351  SUBDBG("Entering\n");
1354  int deviceNum;
1355  uint32_t i, cc;
1356  /* Free context */
1357  if(gctxt) {
1358  for(deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) {
1359  papicuda_device_desc_t *mydevice = &gctxt->deviceArray[deviceNum];
1360  papi_free(mydevice->domainIDArray);
1361  papi_free(mydevice->domainIDNumEvents);
1362  }
1363 
1364  for (i=0; i<gctxt->availEventSize; i++) { // For every event in this context,
1365  struct papicuda_name_desc *desc = &(gctxt->availEventDesc[i]); // get a name description.
1366  if (desc->numMetricEvents > 0) { // If we have any sub-events,
1367  papi_free(desc->metricEvents); // .. Free the list of sub-events.
1368  }
1369  } // end for every available event.
1370 
1371  papi_free(gctxt->availEventIDArray);
1373  papi_free(gctxt->availEventKind);
1375  papi_free(gctxt->availEventDesc);
1376  papi_free(gctxt->deviceArray);
1377  papi_free(gctxt);
1378  global_papicuda_context = gctxt = NULL;
1379  }
1380  /* Free control */
1381  if(gctrl) {
1382  for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) {
1383 #ifdef PAPICUDA_KERNEL_REPLAY_MODE
1384  CUcontext currCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx;
1385  CUPTI_CALL((*cuptiDisableKernelReplayModePtr) (currCuCtx), return (PAPI_EMISC));
1386 #endif
1387  if(gctrl->arrayOfActiveCUContexts[cc] != NULL)
1388  papi_free(gctrl->arrayOfActiveCUContexts[cc]);
1389  }
1390  papi_free(gctrl);
1391  global_papicuda_control = gctrl = NULL;
1392  }
1393  // close the dynamic libraries needed by this component (opened in the init substrate call)
1394  dlclose(dl1);
1395  dlclose(dl2);
1396  dlclose(dl3);
1397  return (PAPI_OK);
1398 } // end papicuda_shutdown_component().
#define PAPI_OK
Definition: fpapi.h:105
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:121
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:118
uint32_t * domainIDNumEvents
Definition: linux-cuda.c:70
#define papi_free(a)
Definition: papi_memory.h:35
uint32_t * availEventIDArray
Definition: linux-cuda.c:49
int * availEventDeviceNum
Definition: linux-cuda.c:48
static void * dl1
Definition: linux-cuda.c:110
struct papicuda_name_desc * availEventDesc
Definition: linux-cuda.c:51
static void * dl2
Definition: linux-cuda.c:111
CUpti_EventDomainID * domainIDArray
Definition: linux-cuda.c:69
#define PAPI_EMISC
Definition: fpapi.h:119
papicuda_active_cucontext_t * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:100
uint32_t availEventSize
Definition: linux-cuda.c:46
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static void * dl3
Definition: linux-cuda.c:112
struct papicuda_device_desc * deviceArray
Definition: linux-cuda.c:45
uint32_t countOfActiveCUContexts
Definition: linux-cuda.c:99
CUpti_ActivityKind * availEventKind
Definition: linux-cuda.c:47
uint32_t * availEventIsBeingMeasuredInEventset
Definition: linux-cuda.c:50
#define CUPTI_CALL(call, handleerror)
Definition: linux-cuda.c:153
int i
Definition: fileop.c:140

◆ papicuda_shutdown_thread()

int papicuda_shutdown_thread ( hwd_context_t ctx)

Definition at line 1340 of file linux-cuda.c.

1341 {
1342  SUBDBG("Entering\n");
1343  (void) ctx;
1344 
1345  return (PAPI_OK);
1346 }
#define PAPI_OK
Definition: fpapi.h:105
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

◆ papicuda_start()

static int papicuda_start ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)
static

Definition at line 996 of file linux-cuda.c.

997 {
998  SUBDBG("Entering\n");
999  (void) ctx;
1000  (void) ctrl;
1002  // papicuda_context_t *gctxt = global_papicuda_context;
1003  uint32_t ii, gg, cc;
1004  int saveDeviceNum = -1;
1005 
1006  SUBDBG("Reset all active event values\n");
1007  for(ii = 0; ii < gctrl->activeEventCount; ii++) // These are the values we will return.
1008  gctrl->activeEventValues[ii] = 0;
1009 
1010  SUBDBG("Save current context, then switch to each active device/context and enable eventgroups\n");
1011  CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC));
1012  CUPTI_CALL((*cuptiGetTimestampPtr) (&gctrl->cuptiStartTimestampNs), return (PAPI_EMISC));
1013 
1014  for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) { // For each context,
1015  int eventDeviceNum = gctrl->arrayOfActiveCUContexts[cc]->deviceNum; // .. get device number.
1016  CUcontext eventCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx; // .. get this context,
1017  SUBDBG("Set to device %d cuCtx %p \n", eventDeviceNum, eventCuCtx);
1018  if(eventDeviceNum != saveDeviceNum) { // .. If we need to switch,
1019  CU_CALL((*cuCtxPushCurrentPtr) (eventCuCtx), return (PAPI_EMISC)); // .. .. push current on stack, use this one.
1020  }
1021 
1022  CUpti_EventGroupSets *eventGroupSets = // .. Shortcut to eventGroupSets for this context.
1023  gctrl->arrayOfActiveCUContexts[cc]->eventGroupSets; // ..
1024  CUpti_EventGroupSet *groupset = &eventGroupSets->sets[0]; // .. There can be only one set of groups.
1025  for(gg = 0; gg < groupset->numEventGroups; gg++) { // .. For each group within this groupset,
1026  uint32_t one = 1;
1027  CUPTI_CALL((*cuptiEventGroupSetAttributePtr) ( // .. .. Say we want to profile all domains.
1028  groupset->eventGroups[gg],
1029  CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES,
1030  sizeof(uint32_t), &one),
1031  return (PAPI_EMISC)); // .. .. on failure of call.
1032  } // end for each group.
1033 
1034  CUPTI_CALL((*cuptiEventGroupSetEnablePtr) (groupset), // .. Enable all groups in set (start collecting).
1035  return (PAPI_EMISC)); // .. on failure of call.
1036 
1037  if(eventDeviceNum != saveDeviceNum) { // .. If we pushed a context,
1038  CU_CALL((*cuCtxPopCurrentPtr) (&eventCuCtx), return (PAPI_EMISC)); // .. Pop it.
1039  }
1040  } // end of loop on all contexts.
1041 
1042  return (PAPI_OK); // We started all groups.
1043 } // end routine.
#define PAPI_OK
Definition: fpapi.h:105
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:121
uint64_t cuptiStartTimestampNs
Definition: linux-cuda.c:105
#define CU_CALL(call, handleerror)
Definition: linux-cuda.c:142
#define PAPI_EMISC
Definition: fpapi.h:119
int one
papicuda_active_cucontext_t * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:100
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define CUDA_CALL(call, handleerror)
Definition: linux-cuda.c:133
CUpti_EventGroupSets * eventGroupSets
Definition: linux-cuda.c:92
uint32_t countOfActiveCUContexts
Definition: linux-cuda.c:99
long long activeEventValues[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:103
uint32_t activeEventCount
Definition: linux-cuda.c:101
#define CUPTI_CALL(call, handleerror)
Definition: linux-cuda.c:153
cudaError_t(* cudaGetDevicePtr)(int *)
Definition: benchSANVML.c:53

◆ papicuda_stop()

static int papicuda_stop ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)
static

Definition at line 1261 of file linux-cuda.c.

1262 {
1263  SUBDBG("Entering\n");
1264  (void) ctx;
1265  (void) ctrl;
1267  uint32_t cc, ss;
1268  int saveDeviceNum;
1269 
1270  SUBDBG("Save current context, then switch to each active device/context and enable eventgroups\n");
1271  CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC));
1272  for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) {
1273  int currDeviceNum = gctrl->arrayOfActiveCUContexts[cc]->deviceNum;
1274  CUcontext currCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx;
1275  SUBDBG("Set to device %d cuCtx %p \n", currDeviceNum, currCuCtx);
1276  if(currDeviceNum != saveDeviceNum)
1277  CU_CALL((*cuCtxPushCurrentPtr) (currCuCtx), return (PAPI_EMISC));
1278  else
1279  CU_CALL((*cuCtxSetCurrentPtr) (currCuCtx), return (PAPI_EMISC));
1280  CUpti_EventGroupSets *currEventGroupSets = gctrl->arrayOfActiveCUContexts[cc]->eventGroupSets;
1281  for (ss=0; ss<currEventGroupSets->numSets; ss++) { // For each group in the set,
1282  CUpti_EventGroupSet groupset = currEventGroupSets->sets[ss]; // get the set,
1283  CUPTI_CALL((*cuptiEventGroupSetDisablePtr) (&groupset), // disable the whole set.
1284  return (PAPI_EMISC)); // .. on failure.
1285  }
1286  /* Pop the pushed context */
1287  if(currDeviceNum != saveDeviceNum)
1288  CU_CALL((*cuCtxPopCurrentPtr) (&currCuCtx), return (PAPI_EMISC));
1289 
1290  }
1291  return (PAPI_OK);
1292 } // end of papicuda_stop.
#define PAPI_OK
Definition: fpapi.h:105
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:121
#define CU_CALL(call, handleerror)
Definition: linux-cuda.c:142
#define PAPI_EMISC
Definition: fpapi.h:119
papicuda_active_cucontext_t * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:100
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define CUDA_CALL(call, handleerror)
Definition: linux-cuda.c:133
CUpti_EventGroupSets * eventGroupSets
Definition: linux-cuda.c:92
uint32_t countOfActiveCUContexts
Definition: linux-cuda.c:99
#define CUPTI_CALL(call, handleerror)
Definition: linux-cuda.c:153
cudaError_t(* cudaGetDevicePtr)(int *)
Definition: benchSANVML.c:53

◆ papicuda_update_control_state()

static int papicuda_update_control_state ( hwd_control_state_t ctrl,
NativeInfo_t nativeInfo,
int  nativeCount,
hwd_context_t ctx 
)
static

Definition at line 779 of file linux-cuda.c.

781 {
782  SUBDBG("Entering with nativeCount %d\n", nativeCount);
783  (void) ctx;
784  papicuda_control_t *gctrl = global_papicuda_control; // We don't use the passed-in parameter, we use a global.
785  papicuda_context_t *gctxt = global_papicuda_context; // We don't use the passed-in parameter, we use a global.
786  int currDeviceNum;
787  CUcontext currCuCtx;
788  int eventContextIdx;
789  CUcontext eventCuCtx;
790  int index, ii, ee, cc;
791 
792  /* Return if no events */
793  if(nativeCount == 0)
794  return (PAPI_OK);
795 
796  /* Get deviceNum, initialize context if needed via free, get context */
797  CUDA_CALL((*cudaGetDevicePtr) (&currDeviceNum), return (PAPI_EMISC));
798  SUBDBG("currDeviceNum %d \n", currDeviceNum);
799 
800  CUDA_CALL((*cudaFreePtr) (NULL), return (PAPI_EMISC));
801  CU_CALL((*cuCtxGetCurrentPtr) (&currCuCtx), return (PAPI_EMISC));
802  SUBDBG("currDeviceNum %d cuCtx %p \n", currDeviceNum, currCuCtx);
803 
804  /* Handle user request of events to be monitored */
805  for (ii = 0; ii < nativeCount; ii++) { // For each event provided by caller,
806  index = nativeInfo[ii].ni_event; // Get the index of the event (in the global context).
807  char *eventName = gctxt->availEventDesc[index].name; // Shortcut to name.
808  int numMetricEvents= gctxt->availEventDesc[index].numMetricEvents; // Get if this is an event (=0) or metric (>0).
809  int eventDeviceNum = gctxt->availEventDeviceNum[index]; // Device number for this event.
810  (void) eventName; // Useful in checkpoint and debug, don't warn if not used.
811 
812  /* if this event is already added continue to next ii, if not, mark it as being added */
813  if (gctxt->availEventIsBeingMeasuredInEventset[index] == 1) { // If already being collected, skip it.
814  SUBDBG("Skipping event %s which is already added\n", eventName);
815  continue;
816  } else {
817  gctxt->availEventIsBeingMeasuredInEventset[index] = 1; // If not being collected yet, flag it as being collected now.
818  }
819 
820  /* Find context/control in papicuda, creating it if does not exist */
821  for(cc = 0; cc < (int) gctrl->countOfActiveCUContexts; cc++) { // Scan all active contexts.
822  CHECK_PRINT_EVAL(cc >= PAPICUDA_MAX_COUNTERS, "Exceeded hardcoded maximum number of contexts (PAPICUDA_MAX_COUNTERS)", return (PAPI_EMISC));
823 
824  if(gctrl->arrayOfActiveCUContexts[cc]->deviceNum == eventDeviceNum) { // If this cuda context is for the device for this event,
825  eventCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx; // Remember that context.
826  SUBDBG("Event %s device %d already has a cuCtx %p registered\n", eventName, eventDeviceNum, eventCuCtx);
827 
828  if(eventCuCtx != currCuCtx) // If that is not our CURRENT context, push and make it so.
829  CU_CALL((*cuCtxPushCurrentPtr) (eventCuCtx), // .. Stack the current counter, replace with this one.
830  return (PAPI_EMISC)); // .. .. on failure.
831  break; // .. exit the loop.
832  } // end if found.
833  } // end loop through active contexts.
834 
835  if(cc == (int) gctrl->countOfActiveCUContexts) { // If we never found the context, create one.
836  SUBDBG("Event %s device %d does not have a cuCtx registered yet...\n", eventName, eventDeviceNum);
837  if(currDeviceNum != eventDeviceNum) { // .. If we need to switch to another device,
838  CUDA_CALL((*cudaSetDevicePtr) (eventDeviceNum), // .. .. set the device pointer to the event's device.
839  return (PAPI_EMISC)); // .. .. .. (on faiure).
840  CUDA_CALL((*cudaFreePtr) (NULL), return (PAPI_EMISC)); // .. .. This is a no-op, but used to force init of a context.
841  CU_CALL((*cuCtxGetCurrentPtr) (&eventCuCtx), // .. .. So we can get a pointer to it.
842  return (PAPI_EMISC)); // .. .. .. On failure.
843  } else { // .. If we are already on the right device,
844  eventCuCtx = currCuCtx; // .. .. just get the current context.
845  }
846 
847  gctrl->arrayOfActiveCUContexts[cc] = papi_calloc(1, sizeof(papicuda_active_cucontext_t)); // allocate a structure.
848  CHECK_PRINT_EVAL(gctrl->arrayOfActiveCUContexts[cc] == NULL, "Memory allocation for new active context failed", return (PAPI_ENOMEM));
849  gctrl->arrayOfActiveCUContexts[cc]->deviceNum = eventDeviceNum; // Fill in everything.
850  gctrl->arrayOfActiveCUContexts[cc]->cuCtx = eventCuCtx;
851  gctrl->arrayOfActiveCUContexts[cc]->allEventsCount = 0; // All events read by this context on this device.
852  gctrl->arrayOfActiveCUContexts[cc]->ctxActiveCount = 0; // active events being read by this context on this device.
853  gctrl->countOfActiveCUContexts++;
854  SUBDBG("Added a new context deviceNum %d cuCtx %p ... now countOfActiveCUContexts is %d\n", eventDeviceNum, eventCuCtx, gctrl->countOfActiveCUContexts);
855  } // end if we needed to create a new context.
856 
857  //---------------------------------------------------------------------
858  // We found the context, or created it, and the index is in cc.
859  //---------------------------------------------------------------------
860  eventContextIdx = cc;
861  papicuda_active_cucontext_t *eventctrl = gctrl->arrayOfActiveCUContexts[eventContextIdx]; // get the context for this event.
862 
863  // We need to get all the events (or sub-events of a metric) and add
864  // them to our list of all events. Note we only check if we exceed the
865  // bounds of the allEvents[] array; everything added to any other array
866  // results in at least ONE add to allEvents[], so it will fail before
867  // or coincident with any other array. TC
868 
869  CUpti_EventID itemId = gctxt->availEventIDArray[index]; // event (or metric) ID.
870 
871  if (numMetricEvents == 0) { // Dealing with a simple event.
872  eventctrl->allEvents[eventctrl->allEventsCount++] = itemId; // add to aggregate list, count it.
873  if (eventctrl->allEventsCount >= PAPICUDA_MAX_COUNTERS) { // .. Fail if we exceed size of array.
874  SUBDBG("Num events (generated by metric) exceeded PAPICUDA_MAX_COUNTERS\n");
875  return(PAPI_EINVAL);
876  }
877  } else { // dealing with a metric.
878  // cuda events and metrics have already been skipped if duplicates,
879  // but we can't say the same for sub-events of a metric. We need to
880  // check we don't duplicate them in allEvents.
881 
882  for(ee = 0; ee < numMetricEvents; ee++) { // For each event retrieved,
883  int aeIdx;
884  CUpti_EventID myId = gctxt->availEventDesc[index].metricEvents[ee]; // collect the sub-event ID.
885 
886  for (aeIdx=0; aeIdx<(int) eventctrl->allEventsCount; aeIdx++) { // loop through existing events.
887  if (eventctrl->allEvents[aeIdx] == myId) break; // break out if duplicate found.
888  }
889 
890  if (aeIdx < (int) eventctrl->allEventsCount) continue; // Don't add if already present.
891  eventctrl->allEvents[eventctrl->allEventsCount++] = myId; // add event to the all array.
892 
893  if (eventctrl->allEventsCount >= PAPICUDA_MAX_COUNTERS) { // Fail if we exceed size of array.
894  SUBDBG("Num events (generated by metric) exceeded PAPICUDA_MAX_COUNTERS\n");
895  return(PAPI_EINVAL);
896  }
897  } // end for each event in metric.
898  } // end if we must process all sub-events of a metric.
899 
900  // Record index of this active event back into the nativeInfo
901  // structure.
902 
903  nativeInfo[ii].ni_position = gctrl->activeEventCount;
904 
905  // Record index of this active event within this context. We need this
906  // so after we read this context, we can move values (or compute
907  // metrics and move values) into their proper position within the
908  // activeValues[] array.
909 
910  eventctrl->ctxActiveEvents[eventctrl->ctxActiveCount++] = // within this active_cucontext.
911  gctrl->activeEventCount; // ..
912 
913  // Record in internal gctrl arrays.
914  // so we have a succinct list of active events and metrics; this will
915  // be useful for performance especially on metrics, where we must
916  // compose values.
917 
918  CHECK_PRINT_EVAL(gctrl->activeEventCount == PAPICUDA_MAX_COUNTERS - 1, "Exceeded maximum num of events (PAPI_MAX_COUNTERS)", return (PAPI_EMISC));
919  gctrl->activeEventIndex[gctrl->activeEventCount] = index;
920  gctrl->activeEventValues[gctrl->activeEventCount] = 0;
921  gctrl->activeEventCount++;
922 
923  // EventGroupSets does an analysis to creates 'sets' of events that
924  // can be collected simultaneously, i.e. the application must be
925  // run once per set. CUpti calls these 'passes'. We don't allow
926  // such combinations, there is no way to tell a PAPI user to run
927  // their application multiple times. WITHIN a single set are
928  // EventGroups which are collected simultaneously but must be read
929  // separately because each group applies to a separate domain. So
930  // we don't mind that; but we must exit with an invalid combination
931  // if numsets > 1, indicating the most recent event requested
932  // cannot be collected simultaneously with the others.
933 
934  // We destroy any existing eventGroupSets, and then create one for the
935  // new set of events.
936 
937  SUBDBG("Create eventGroupSets for context (destroy pre-existing) (nativeCount %d, allEventsCount %d) \n", gctrl->activeEventCount, eventctrl->allEventsCount);
938  if(eventctrl->allEventsCount > 0) { // If we have events...
939  // SUBDBG("Destroy previous eventGroupPasses for the context \n");
940  if(eventctrl->eventGroupSets != NULL) { // if we have a previous analysis;
941  CUPTI_CALL((*cuptiEventGroupSetsDestroyPtr) // .. Destroy it.
942  (eventctrl->eventGroupSets), return (PAPI_EMISC)); // .. If we can't, return error.
943  eventctrl->eventGroupSets = NULL; // .. Reset pointer.
944  }
945 
946  size_t sizeBytes = (eventctrl->allEventsCount) * sizeof(CUpti_EventID); // compute bytes in the array.
947 
948  // SUBDBG("About to create eventGroupPasses for the context (sizeBytes %zu) \n", sizeBytes);
949 #ifdef PAPICUDA_KERNEL_REPLAY_MODE
950  CUPTI_CALL((*cuptiEnableKernelReplayModePtr) (eventCuCtx),
951  return (PAPI_ECMP));
952  CUPTI_CALL((*cuptiEventGroupSetsCreatePtr)
953  (eventCuCtx, sizeBytes, eventctrl->allEvents,
954  &eventctrl->eventGroupSets),
955  return (PAPI_ECMP));
956 
957 #else // Normal operation.
958  CUPTI_CALL((*cuptiSetEventCollectionModePtr)
959  (eventCuCtx,CUPTI_EVENT_COLLECTION_MODE_CONTINUOUS),
960  return(PAPI_ECMP));
961 
962 // CUPTI provides two routines to create EventGroupSets, one is used
963 // here cuptiEventGroupSetsCreate(), the other is for metrics, it will
964 // automatically collect the events needed for a metric. It is called
965 // cuptiMetricCreateEventGroupSets(). We have checked and these two routines
966 // produce groups of the same size with the same event IDs, and work equally.
967 
968  CUPTI_CALL((*cuptiEventGroupSetsCreatePtr)
969  (eventCuCtx, sizeBytes, eventctrl->allEvents,
970  &eventctrl->eventGroupSets),
971  return (PAPI_EMISC));
972 
973  if (eventctrl->eventGroupSets->numSets > 1) { // If more than one pass is required,
974  SUBDBG("Error occurred: The combined CUPTI events cannot be collected simultaneously ... try different events\n");
975  papicuda_cleanup_eventset(ctrl); // Will do cuptiEventGroupSetsDestroy() to clean up memory.
976  return(PAPI_ECOMBO);
977  } else {
978  SUBDBG("Created eventGroupSets. nativeCount %d, allEventsCount %d. Sets (passes-required) = %d) \n", gctrl->activeEventCount, eventctrl->allEventsCount, eventctrl->eventGroupSets->numSets);
979  }
980 
981 #endif // #if/#else/#endif on PAPICUDA_KERNEL_REPLAY_MODE
982 
983  } // end if we had any events.
984 
985  if(eventCuCtx != currCuCtx) // restore original context for caller, if we changed it.
986  CU_CALL((*cuCtxPopCurrentPtr) (&eventCuCtx), return (PAPI_EMISC));
987 
988  }
989  return (PAPI_OK);
990 } // end PAPI_update_control_state.
#define PAPI_OK
Definition: fpapi.h:105
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:121
#define PAPI_ENOMEM
Definition: fpapi.h:107
#define PAPI_EINVAL
Definition: fpapi.h:106
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:118
#define PAPICUDA_MAX_COUNTERS
Definition: linux-cuda.c:37
uint32_t * availEventIDArray
Definition: linux-cuda.c:49
#define PAPI_ECOMBO
Definition: fpapi.h:129
int * availEventDeviceNum
Definition: linux-cuda.c:48
#define CU_CALL(call, handleerror)
Definition: linux-cuda.c:142
#define CHECK_PRINT_EVAL(checkcond, str, evalthis)
Definition: linux-cuda.c:124
#define PAPI_ECMP
Definition: fpapi.h:109
struct papicuda_name_desc * availEventDesc
Definition: linux-cuda.c:51
#define PAPI_EMISC
Definition: fpapi.h:119
CUpti_EventID allEvents[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:89
static int papicuda_cleanup_eventset(hwd_control_state_t *ctrl)
Definition: linux-cuda.c:1298
papicuda_active_cucontext_t * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:100
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define CUDA_CALL(call, handleerror)
Definition: linux-cuda.c:133
CUpti_EventGroupSets * eventGroupSets
Definition: linux-cuda.c:92
uint32_t countOfActiveCUContexts
Definition: linux-cuda.c:99
long long activeEventValues[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:103
int activeEventIndex[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:102
uint32_t activeEventCount
Definition: linux-cuda.c:101
uint32_t * availEventIsBeingMeasuredInEventset
Definition: linux-cuda.c:50
uint32_t ctxActiveEvents[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:86
#define papi_calloc(a, b)
Definition: papi_memory.h:37
#define CUPTI_CALL(call, handleerror)
Definition: linux-cuda.c:153
cudaError_t(* cudaGetDevicePtr)(int *)
Definition: benchSANVML.c:53
Here is the call graph for this function:

◆ readMetricValue()

void readMetricValue ( CUpti_EventGroup  eventGroup,
uint32_t  numEvents,
uint64_t  numTotalInstances,
CUdevice  dev,
uint32_t  numMetrics,
CUpti_MetricID *  metricId,
CUpti_MetricValueKind *  myKinds,
long long int *  values,
uint64_t  timeDuration 
)

Definition at line 1605 of file linux-cuda.c.

1614 {
1615  size_t bufferSizeBytes, numCountersRead;
1616  uint64_t *eventValueArray = NULL;
1617  CUpti_EventID *eventIdArray;
1618  size_t arraySizeBytes = 0;
1619  uint64_t *aggrEventValueArray = NULL;
1620  size_t aggrEventValueArraySize;
1621  uint32_t i = 0, j = 0;
1622 
1623  arraySizeBytes = sizeof(CUpti_EventID) * numEvents;
1624  bufferSizeBytes = sizeof(uint64_t) * numEvents * numTotalInstances;
1625 
1626  eventValueArray = (uint64_t *) malloc(bufferSizeBytes);
1627 
1628  eventIdArray = (CUpti_EventID *) malloc(arraySizeBytes);
1629 
1630  aggrEventValueArray = (uint64_t *) calloc(numEvents, sizeof(uint64_t));
1631 
1632  aggrEventValueArraySize = sizeof(uint64_t) * numEvents;
1633 
1634  CUPTI_CALL( (*cuptiEventGroupReadAllEvents)
1635  (eventGroup, CUPTI_EVENT_READ_FLAG_NONE, &bufferSizeBytes,
1636  eventValueArray, &arraySizeBytes, eventIdArray, &numCountersRead),
1637  return);
1638 
1639  // Arrangement of 2-d Array returned in eventValueArray:
1640  // domain instance 0: event0 event1 ... eventN
1641  // domain instance 1: event0 event1 ... eventN
1642  // ...
1643  // domain instance M: event0 event1 ... eventN
1644  // But we accumulate by column, event[0], event[1], etc.
1645 
1646  for (i = 0; i < numEvents; i++) { // outer loop is column (event) we are on.
1647  for (j = 0; j < numTotalInstances; j++) { // inner loop is row (instance) we are on.
1648  aggrEventValueArray[i] += eventValueArray[i + numEvents * j];
1649  }
1650  }
1651 
1652  // After aggregation, we use the data to compose the metrics.
1653  for (i = 0; i < numMetrics; i++) {
1654  CUpti_MetricValue metricValue;
1655  CUPTI_CALL( (*cuptiMetricGetValue)
1656  (dev, metricId[i], arraySizeBytes, eventIdArray,
1657  aggrEventValueArraySize, aggrEventValueArray,
1658  timeDuration, &metricValue),
1659  return);
1660 
1661  papicuda_convert_metric_value_to_long_long(metricValue, myKinds[i], &values[i]);
1662  }
1663 
1664  free(eventValueArray);
1665  free(eventIdArray);
1666 } // end readMetricValue.
static int papicuda_convert_metric_value_to_long_long(CUpti_MetricValue metricValue, CUpti_MetricValueKind valueKind, long long int *papiValue)
Definition: linux-cuda.c:637
static long long values[NUM_EVENTS]
Definition: init_fini.c:10
#define CUPTI_CALL(call, handleerror)
Definition: linux-cuda.c:153
int i
Definition: fileop.c:140
Here is the call graph for this function:

Variable Documentation

◆ _cuda_vector

papi_vector_t _cuda_vector

Definition at line 115 of file linux-cuda.c.

◆ _dl_non_dynamic_init

void(* _dl_non_dynamic_init) (void)

Definition at line 188 of file linux-cuda.c.

261 {
262 #define DLSYM_AND_CHECK( dllib, name ) dlsym( dllib, name ); if ( dlerror()!=NULL ) { strncpy( _cuda_vector.cmp_info.disabled_reason, "A CUDA required function was not found in dynamic libs", PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); }
263 
264  /* Attempt to guess if we were statically linked to libc, if so bail */
265  if(_dl_non_dynamic_init != NULL) {
266  strncpy(_cuda_vector.cmp_info.disabled_reason, "The CUDA component does not support statically linking to libc.", PAPI_MAX_STR_LEN);
267  return PAPI_ENOSUPP;
268  }
269  /* Need to link in the cuda libraries, if not found disable the component */
270  dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL);
271  CHECK_PRINT_EVAL(!dl1, "CUDA library libcuda.so not found.", return (PAPI_ENOSUPP));
272  cuCtxGetCurrentPtr = DLSYM_AND_CHECK(dl1, "cuCtxGetCurrent");
273  cuCtxSetCurrentPtr = DLSYM_AND_CHECK(dl1, "cuCtxSetCurrent");
274  cuDeviceGetPtr = DLSYM_AND_CHECK(dl1, "cuDeviceGet");
275  cuDeviceGetCountPtr = DLSYM_AND_CHECK(dl1, "cuDeviceGetCount");
276  cuDeviceGetNamePtr = DLSYM_AND_CHECK(dl1, "cuDeviceGetName");
277  cuInitPtr = DLSYM_AND_CHECK(dl1, "cuInit");
278  cuCtxPopCurrentPtr = DLSYM_AND_CHECK(dl1, "cuCtxPopCurrent");
279  cuCtxPushCurrentPtr = DLSYM_AND_CHECK(dl1, "cuCtxPushCurrent");
280  cuCtxDestroyPtr = DLSYM_AND_CHECK(dl1, "cuCtxDestroy");
281  cuCtxCreatePtr = DLSYM_AND_CHECK(dl1, "cuCtxCreate");
282  cuCtxSynchronizePtr = DLSYM_AND_CHECK(dl1, "cuCtxSynchronize");
283 
284  dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL | RTLD_NODELETE);
285  CHECK_PRINT_EVAL(!dl2, "CUDA runtime library libcudart.so not found.", return (PAPI_ENOSUPP));
286  cudaGetDevicePtr = DLSYM_AND_CHECK(dl2, "cudaGetDevice");
287  cudaSetDevicePtr = DLSYM_AND_CHECK(dl2, "cudaSetDevice");
288  cudaFreePtr = DLSYM_AND_CHECK(dl2, "cudaFree");
289 
290  dl3 = dlopen("libcupti.so", RTLD_NOW | RTLD_GLOBAL);
291  CHECK_PRINT_EVAL(!dl3, "CUDA Profiling Tools Interface (CUPTI) library libcupti.so not found.", return (PAPI_ENOSUPP));
292  /* The macro DLSYM_AND_CHECK results in the expansion example below */
293  /* cuptiDeviceEnumEventDomainsPtr = dlsym( dl3, "cuptiDeviceEnumEventDomains" ); */
294  /* if ( dlerror()!=NULL ) { strncpy( _cuda_vector.cmp_info.disabled_reason, "A CUDA required function was not found in dynamic libs", PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); } */
295  cuptiDeviceEnumMetricsPtr = DLSYM_AND_CHECK(dl3, "cuptiDeviceEnumMetrics");
296  cuptiDeviceGetEventDomainAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiDeviceGetEventDomainAttribute");
297  cuptiDeviceGetNumMetricsPtr = DLSYM_AND_CHECK(dl3, "cuptiDeviceGetNumMetrics");
298  cuptiEventGroupGetAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupGetAttribute");
299  cuptiEventGroupReadEventPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupReadEvent");
300  cuptiEventGroupSetAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupSetAttribute");
301  cuptiMetricGetRequiredEventGroupSetsPtr = DLSYM_AND_CHECK(dl3, "cuptiMetricGetRequiredEventGroupSets");
302  cuptiEventGroupSetDisablePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupSetDisable");
303  cuptiEventGroupSetEnablePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupSetEnable");
304  cuptiEventGroupSetsCreatePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupSetsCreate");
305  cuptiEventGroupSetsDestroyPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupSetsDestroy");
306  cuptiGetTimestampPtr = DLSYM_AND_CHECK(dl3, "cuptiGetTimestamp");
307  cuptiMetricEnumEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiMetricEnumEvents");
308  cuptiMetricGetAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiMetricGetAttribute");
309  cuptiMetricGetNumEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiMetricGetNumEvents");
310  cuptiMetricGetValuePtr = DLSYM_AND_CHECK(dl3, "cuptiMetricGetValue");
311  cuptiMetricCreateEventGroupSetsPtr = DLSYM_AND_CHECK(dl3, "cuptiMetricCreateEventGroupSets");
312  cuptiSetEventCollectionModePtr = DLSYM_AND_CHECK(dl3, "cuptiSetEventCollectionMode");
313  cuptiDeviceEnumEventDomainsPtr = DLSYM_AND_CHECK(dl3, "cuptiDeviceEnumEventDomains");
314  cuptiDeviceGetNumEventDomainsPtr = DLSYM_AND_CHECK(dl3, "cuptiDeviceGetNumEventDomains");
315  cuptiEventDomainEnumEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiEventDomainEnumEvents");
316  cuptiEventDomainGetAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiEventDomainGetAttribute");
317  cuptiEventDomainGetNumEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiEventDomainGetNumEvents");
318  cuptiEventGetAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGetAttribute");
319  cuptiEventGroupAddEventPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupAddEvent");
320  cuptiEventGroupCreatePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupCreate");
321  cuptiEventGroupDestroyPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupDestroy");
322  cuptiEventGroupDisablePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupDisable");
323  cuptiEventGroupEnablePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupEnable");
324  cuptiEventGroupReadAllEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupReadAllEvents");
325  cuptiEventGroupResetAllEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupResetAllEvents");
326  cuptiGetResultStringPtr = DLSYM_AND_CHECK(dl3, "cuptiGetResultString");
327  cuptiEnableKernelReplayModePtr = DLSYM_AND_CHECK(dl3, "cuptiEnableKernelReplayMode");
328  cuptiDisableKernelReplayModePtr = DLSYM_AND_CHECK(dl3, "cuptiEnableKernelReplayMode");
329  return (PAPI_OK);
330 }
#define PAPI_OK
Definition: fpapi.h:105
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:637
#define PAPI_ENOSUPP
Definition: fpapi.h:123
#define DLSYM_AND_CHECK(dllib, name)
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
static void * dl1
Definition: linux-cuda.c:110
#define CHECK_PRINT_EVAL(checkcond, str, evalthis)
Definition: linux-cuda.c:124
static void * dl2
Definition: linux-cuda.c:111
void(* _dl_non_dynamic_init)(void)
Definition: linux-cuda.c:188
static void * dl3
Definition: linux-cuda.c:112
papi_vector_t _cuda_vector
Definition: linux-cuda.c:115
CUresult CUDAAPI(* cuInitPtr)(unsigned int)
Definition: benchSANVML.c:47
cudaError_t(* cudaGetDevicePtr)(int *)
Definition: benchSANVML.c:53
#define PAPI_MAX_STR_LEN
Definition: fpapi.h:43

◆ dl1

void* dl1 = NULL
static

Definition at line 110 of file linux-cuda.c.

◆ dl2

void* dl2 = NULL
static

Definition at line 111 of file linux-cuda.c.

◆ dl3

void* dl3 = NULL
static

Definition at line 112 of file linux-cuda.c.

◆ global_papicuda_context

papicuda_context_t* global_papicuda_context = NULL
static

Definition at line 118 of file linux-cuda.c.

◆ global_papicuda_control

papicuda_control_t* global_papicuda_control = NULL
static

Definition at line 121 of file linux-cuda.c.