Darknet/YOLO v6.0-37-gb57f9029
Object Detection Framework
 
Loading...
Searching...
No Matches
darknet_gpu.hpp File Reference

Some GPU-specific includes and definitions. More...

#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#include <curand.h>
#include <cublas_v2.h>
#include <cudnn.h>
#include <hip/hip_runtime.h>
#include <rocrand/rocrand.h>
#include <hiprand/hiprand.h>
#include <hipblas/hipblas.h>
#include <miopen/miopen.h>
Include dependency graph for darknet_gpu.hpp:
This graph shows which files directly or indirectly include this file:

Macros

#define CUBLAS_OP_N   HIPBLAS_OP_N
 
#define CUBLAS_OP_T   HIPBLAS_OP_T
 
#define CUBLAS_STATUS_SUCCESS   HIPBLAS_STATUS_SUCCESS
 
#define cublasCreate   hipblasCreate
 
#define cublasHandle_t   hipblasHandle_t
 
#define cublasSetStream   hipblasSetStream
 
#define cublasSgemm   hipblasSgemm
 
#define cublasStatus_t   hipblasStatus_t
 
#define CUcontext   hipCtx_t
 
#define cuCtxGetCurrent   hipCtxGetCurrent
 
#define CUDA_SUCCESS   hipSuccess
 
#define cudaDeviceProp   hipDeviceProp_t
 
#define cudaDeviceScheduleBlockingSync   hipDeviceScheduleBlockingSync
 
#define cudaDeviceSynchronize   hipDeviceSynchronize
 
#define cudaDriverGetVersion   hipDriverGetVersion
 
#define cudaError   hipError_t
 
#define cudaError_t   hipError_t
 
#define cudaErrorInsufficientDriver   hipErrorInsufficientDriver
 
#define cudaErrorNoDevice   hipErrorNoDevice
 
#define cudaEvent_t   hipEvent_t
 
#define cudaEventCreate   hipEventCreate
 
#define cudaEventCreateWithFlags   hipEventCreateWithFlags
 
#define cudaEventDestroy   hipEventDestroy
 
#define cudaEventDisableTiming   hipEventDisableTiming
 
#define cudaEventElapsedTime   hipEventElapsedTime
 
#define cudaEventRecord   hipEventRecord
 
#define cudaEventSynchronize   hipEventSynchronize
 
#define cudaFree   hipFree
 
#define cudaFreeHost   hipHostFree
 
#define cudaGetDevice   hipGetDevice
 
#define cudaGetDeviceCount   hipGetDeviceCount
 
#define cudaGetDeviceProperties   hipGetDeviceProperties
 
#define cudaGetErrorName   hipGetErrorName
 
#define cudaGetErrorString   hipGetErrorString
 
#define cudaGetLastError   hipGetLastError
 
#define cudaGraph_t   hipGraph_t
 
#define cudaGraphExec_t   hipGraphExec_t
 
#define cudaGraphInstantiate   hipGraphInstantiate
 
#define cudaGraphLaunch   hipGraphLaunch
 
#define cudaHostAlloc   hipHostMalloc
 
#define cudaHostAlloc   hipHostMalloc
 
#define cudaHostAllocDefault   hipHostMallocDefault
 
#define cudaHostRegisterMapped   hipHostRegisterMapped
 
#define cudaMalloc   hipMalloc
 
#define cudaMemcpy   hipMemcpy
 
#define cudaMemcpyAsync   hipMemcpyAsync
 
#define cudaMemcpyDefault   hipMemcpyDefault
 
#define cudaMemcpyDeviceToHost   hipMemcpyDeviceToHost
 
#define cudaMemcpyHostToDevice   hipMemcpyHostToDevice
 
#define cudaMemGetInfo   hipMemGetInfo
 
#define cudaMemset   hipMemset
 
#define cudaPeekAtLastError   hipPeekAtLastError
 
#define cudaReadModeElementType   hipReadModeElementType
 
#define cudaRuntimeGetVersion   hipRuntimeGetVersion
 
#define cudaSetDevice   hipSetDevice
 
#define cudaSetDeviceFlags   hipSetDeviceFlags
 
#define cudaStream_t   hipStream_t
 
#define cudaStreamBeginCapture   hipStreamBeginCapture
 
#define cudaStreamCaptureModeGlobal   hipStreamCaptureModeGlobal
 
#define cudaStreamCreate   hipStreamCreate
 
#define cudaStreamCreateWithFlags   hipStreamCreateWithFlags
 
#define cudaStreamEndCapture   hipStreamEndCapture
 
#define cudaStreamNonBlocking   hipStreamNonBlocking
 
#define cudaStreamSynchronize   hipStreamSynchronize
 
#define cudaStreamWaitEvent   hipStreamWaitEvent
 
#define cudaSuccess   hipSuccess
 
#define CUDNN_BATCHNORM_SPATIAL   miopenBatchNormMode_t::miopenBNSpatial
 
#define CUDNN_CROSS_CORRELATION   miopenConvolutionMode_t::miopenConvolution
 
#define CUDNN_DATA_FLOAT   miopenDataType_t::miopenFloat
 
#define CUDNN_DATA_HALF   miopenDataType_t::miopenHalf
 
#define CUDNN_STATUS_SUCCESS   miopenStatus_t::miopenStatusSuccess
 
#define CUDNN_TENSOR_NCHW   0
 
#define cudnnBatchNormalizationBackward   miopenBatchNormalizationBackward
 
#define cudnnBatchNormalizationForwardTraining   miopenBatchNormalizationForwardTraining
 
#define cudnnConvolutionBackwardData(h, alpha, wd, w, dyd, dy, cd, algo, ws, s, beta, dxd, dx)   miopenConvolutionBackwardData(h, alpha, dyd, dy, wd, w, cd, algo, beta, dxd, dx, wd, s)
 
#define cudnnConvolutionBackwardFilter(h, alpha, xd, x, dyd, dy, cd, algo, ws, s, beta, dwd, dw)   miopenConvolutionBackwardWeights(h, alpha, dyd, dy, xd, x, cd, algo, beta, dwd, dw, ws, s)
 
#define cudnnConvolutionBwdDataAlgo_t   miopenConvBwdDataAlgorithm_t
 
#define cudnnConvolutionBwdFilterAlgo_t   miopenConvBwdWeightsAlgorithm_t
 
#define cudnnConvolutionDescriptor_t   miopenConvolutionDescriptor_t
 
#define cudnnConvolutionForward(h, alpha, xd, x, wd, w, cd, algo, ws, s, beta, yd, y)   miopenConvolutionForward(h, alpha, xd, x, wd, w, cd, algo, beta, yd, y, ws, s)
 
#define cudnnConvolutionFwdAlgo_t   miopenConvFwdAlgorithm_t
 
#define cudnnConvolutionMode_t   miopenConvolutionMode_t
 
#define cudnnCreate   miopenCreate
 
#define cudnnCreateConvolutionDescriptor   miopenCreateConvolutionDescriptor
 
#define cudnnCreateFilterDescriptor   miopenCreateTensorDescriptor /* TODO: check this, does tensor replace filter? */
 
#define cudnnCreateTensorDescriptor   miopenCreateTensorDescriptor
 
#define cudnnDataType_t   miopenDataType_t
 
#define cudnnDataType_t   miopenDataType_t
 
#define cudnnDestroyTensorDescriptor   miopenDestroyTensorDescriptor
 
#define cudnnFilterDescriptor_t   miopenTensorDescriptor_t
 
#define cudnnGetConvolutionBackwardDataWorkspaceSize(h, wd, dyd, cd, dxd, algo, s)   miopenConvolutionBackwardDataGetWorkSpaceSize(h, dyd, wd, cd, dxd, s)
 
#define cudnnGetConvolutionBackwardFilterWorkspaceSize(h, xd, dyd, cd, gd, algo, s)   miopenConvolutionBackwardWeightsGetWorkSpaceSize(h, dyd, xd, cd, dwd, s)
 
#define cudnnGetConvolutionForwardWorkspaceSize(h, xd, wd, cd, yd, algo, s)   miopenConvolutionForwardGetWorkSpaceSize(h, wd, xd, cd, yd, s)
 
#define cudnnHandle_t   miopenHandle_t
 
#define cudnnPoolingDescriptor_t   miopenPoolingDescriptor_t
 
#define cudnnSetConvolution2dDescriptor(d, h, w, u, v, x, y, mode)   miopenInitConvolutionDescriptor(d, mode, h, w, u, v, x, y)
 
#define cudnnSetFilter4dDescriptor(d, t, i, n, c, h, w)   miopenSet4dTensorDescriptor(d, t, n, c, h, w)
 
#define cudnnSetStream   miopenSetStream
 
#define cudnnSetTensor4dDescriptor(d, i, t, b, c, h, w)   miopenSet4dTensorDescriptor(d, t, b, c, h, w)
 
#define cudnnStatus_t   miopenStatus_t
 
#define cudnnTensorDescriptor_t   miopenTensorDescriptor_t
 
#define CURAND_RNG_PSEUDO_DEFAULT   HIPRAND_RNG_PSEUDO_DEFAULT
 
#define curandCreateGenerator   hiprandCreateGenerator
 
#define curandGenerateUniform   hiprandGenerateUniform
 
#define curandGenerator_t   hiprandGenerator_t
 
#define curandSetPseudoRandomGeneratorSeed   hiprandSetPseudoRandomGeneratorSeed
 
#define CUresult   hipError_t
 
#define SHFL_DOWN(val, offset)   shfl_xor(val, offset)
 

Detailed Description

Some GPU-specific includes and definitions.

Handles differences between NVIDIA CUDA and AMD ROCm.

Macro Definition Documentation

◆ CUBLAS_OP_N

#define CUBLAS_OP_N   HIPBLAS_OP_N

◆ CUBLAS_OP_T

#define CUBLAS_OP_T   HIPBLAS_OP_T

◆ CUBLAS_STATUS_SUCCESS

#define CUBLAS_STATUS_SUCCESS   HIPBLAS_STATUS_SUCCESS

◆ cublasCreate

#define cublasCreate   hipblasCreate

◆ cublasHandle_t

#define cublasHandle_t   hipblasHandle_t

◆ cublasSetStream

#define cublasSetStream   hipblasSetStream

◆ cublasSgemm

#define cublasSgemm   hipblasSgemm

◆ cublasStatus_t

#define cublasStatus_t   hipblasStatus_t

◆ CUcontext

#define CUcontext   hipCtx_t

◆ cuCtxGetCurrent

#define cuCtxGetCurrent   hipCtxGetCurrent

◆ CUDA_SUCCESS

#define CUDA_SUCCESS   hipSuccess

◆ cudaDeviceProp

#define cudaDeviceProp   hipDeviceProp_t

◆ cudaDeviceScheduleBlockingSync

#define cudaDeviceScheduleBlockingSync   hipDeviceScheduleBlockingSync

◆ cudaDeviceSynchronize

#define cudaDeviceSynchronize   hipDeviceSynchronize

◆ cudaDriverGetVersion

#define cudaDriverGetVersion   hipDriverGetVersion

◆ cudaError

#define cudaError   hipError_t

◆ cudaError_t

#define cudaError_t   hipError_t

◆ cudaErrorInsufficientDriver

#define cudaErrorInsufficientDriver   hipErrorInsufficientDriver

◆ cudaErrorNoDevice

#define cudaErrorNoDevice   hipErrorNoDevice

◆ cudaEvent_t

#define cudaEvent_t   hipEvent_t

◆ cudaEventCreate

#define cudaEventCreate   hipEventCreate

◆ cudaEventCreateWithFlags

#define cudaEventCreateWithFlags   hipEventCreateWithFlags

◆ cudaEventDestroy

#define cudaEventDestroy   hipEventDestroy

◆ cudaEventDisableTiming

#define cudaEventDisableTiming   hipEventDisableTiming

◆ cudaEventElapsedTime

#define cudaEventElapsedTime   hipEventElapsedTime

◆ cudaEventRecord

#define cudaEventRecord   hipEventRecord

◆ cudaEventSynchronize

#define cudaEventSynchronize   hipEventSynchronize

◆ cudaFree

#define cudaFree   hipFree

◆ cudaFreeHost

#define cudaFreeHost   hipHostFree

◆ cudaGetDevice

#define cudaGetDevice   hipGetDevice

◆ cudaGetDeviceCount

#define cudaGetDeviceCount   hipGetDeviceCount

◆ cudaGetDeviceProperties

#define cudaGetDeviceProperties   hipGetDeviceProperties

◆ cudaGetErrorName

#define cudaGetErrorName   hipGetErrorName

◆ cudaGetErrorString

#define cudaGetErrorString   hipGetErrorString

◆ cudaGetLastError

#define cudaGetLastError   hipGetLastError

◆ cudaGraph_t

#define cudaGraph_t   hipGraph_t

◆ cudaGraphExec_t

#define cudaGraphExec_t   hipGraphExec_t

◆ cudaGraphInstantiate

#define cudaGraphInstantiate   hipGraphInstantiate

◆ cudaGraphLaunch

#define cudaGraphLaunch   hipGraphLaunch

◆ cudaHostAlloc [1/2]

#define cudaHostAlloc   hipHostMalloc

◆ cudaHostAlloc [2/2]

#define cudaHostAlloc   hipHostMalloc

◆ cudaHostAllocDefault

#define cudaHostAllocDefault   hipHostMallocDefault

◆ cudaHostRegisterMapped

#define cudaHostRegisterMapped   hipHostRegisterMapped

◆ cudaMalloc

#define cudaMalloc   hipMalloc

◆ cudaMemcpy

#define cudaMemcpy   hipMemcpy

◆ cudaMemcpyAsync

#define cudaMemcpyAsync   hipMemcpyAsync

◆ cudaMemcpyDefault

#define cudaMemcpyDefault   hipMemcpyDefault

◆ cudaMemcpyDeviceToHost

#define cudaMemcpyDeviceToHost   hipMemcpyDeviceToHost

◆ cudaMemcpyHostToDevice

#define cudaMemcpyHostToDevice   hipMemcpyHostToDevice

◆ cudaMemGetInfo

#define cudaMemGetInfo   hipMemGetInfo

◆ cudaMemset

#define cudaMemset   hipMemset

◆ cudaPeekAtLastError

#define cudaPeekAtLastError   hipPeekAtLastError

◆ cudaReadModeElementType

#define cudaReadModeElementType   hipReadModeElementType

◆ cudaRuntimeGetVersion

#define cudaRuntimeGetVersion   hipRuntimeGetVersion

◆ cudaSetDevice

#define cudaSetDevice   hipSetDevice

◆ cudaSetDeviceFlags

#define cudaSetDeviceFlags   hipSetDeviceFlags

◆ cudaStream_t

#define cudaStream_t   hipStream_t

◆ cudaStreamBeginCapture

#define cudaStreamBeginCapture   hipStreamBeginCapture

◆ cudaStreamCaptureModeGlobal

#define cudaStreamCaptureModeGlobal   hipStreamCaptureModeGlobal

◆ cudaStreamCreate

#define cudaStreamCreate   hipStreamCreate

◆ cudaStreamCreateWithFlags

#define cudaStreamCreateWithFlags   hipStreamCreateWithFlags

◆ cudaStreamEndCapture

#define cudaStreamEndCapture   hipStreamEndCapture

◆ cudaStreamNonBlocking

#define cudaStreamNonBlocking   hipStreamNonBlocking

◆ cudaStreamSynchronize

#define cudaStreamSynchronize   hipStreamSynchronize

◆ cudaStreamWaitEvent

#define cudaStreamWaitEvent   hipStreamWaitEvent

◆ cudaSuccess

#define cudaSuccess   hipSuccess

◆ CUDNN_BATCHNORM_SPATIAL

#define CUDNN_BATCHNORM_SPATIAL   miopenBatchNormMode_t::miopenBNSpatial

◆ CUDNN_CROSS_CORRELATION

#define CUDNN_CROSS_CORRELATION   miopenConvolutionMode_t::miopenConvolution

◆ CUDNN_DATA_FLOAT

#define CUDNN_DATA_FLOAT   miopenDataType_t::miopenFloat

◆ CUDNN_DATA_HALF

#define CUDNN_DATA_HALF   miopenDataType_t::miopenHalf

◆ CUDNN_STATUS_SUCCESS

#define CUDNN_STATUS_SUCCESS   miopenStatus_t::miopenStatusSuccess

◆ CUDNN_TENSOR_NCHW

#define CUDNN_TENSOR_NCHW   0

◆ cudnnBatchNormalizationBackward

#define cudnnBatchNormalizationBackward   miopenBatchNormalizationBackward

◆ cudnnBatchNormalizationForwardTraining

#define cudnnBatchNormalizationForwardTraining   miopenBatchNormalizationForwardTraining

◆ cudnnConvolutionBackwardData

#define cudnnConvolutionBackwardData (   h,
  alpha,
  wd,
  w,
  dyd,
  dy,
  cd,
  algo,
  ws,
  s,
  beta,
  dxd,
  dx 
)    miopenConvolutionBackwardData(h, alpha, dyd, dy, wd, w, cd, algo, beta, dxd, dx, wd, s)

◆ cudnnConvolutionBackwardFilter

#define cudnnConvolutionBackwardFilter (   h,
  alpha,
  xd,
  x,
  dyd,
  dy,
  cd,
  algo,
  ws,
  s,
  beta,
  dwd,
  dw 
)    miopenConvolutionBackwardWeights(h, alpha, dyd, dy, xd, x, cd, algo, beta, dwd, dw, ws, s)

◆ cudnnConvolutionBwdDataAlgo_t

#define cudnnConvolutionBwdDataAlgo_t   miopenConvBwdDataAlgorithm_t

◆ cudnnConvolutionBwdFilterAlgo_t

#define cudnnConvolutionBwdFilterAlgo_t   miopenConvBwdWeightsAlgorithm_t

◆ cudnnConvolutionDescriptor_t

#define cudnnConvolutionDescriptor_t   miopenConvolutionDescriptor_t

◆ cudnnConvolutionForward

#define cudnnConvolutionForward (   h,
  alpha,
  xd,
  x,
  wd,
  w,
  cd,
  algo,
  ws,
  s,
  beta,
  yd,
 
)    miopenConvolutionForward(h, alpha, xd, x, wd, w, cd, algo, beta, yd, y, ws, s)

◆ cudnnConvolutionFwdAlgo_t

#define cudnnConvolutionFwdAlgo_t   miopenConvFwdAlgorithm_t

◆ cudnnConvolutionMode_t

#define cudnnConvolutionMode_t   miopenConvolutionMode_t

◆ cudnnCreate

#define cudnnCreate   miopenCreate

◆ cudnnCreateConvolutionDescriptor

#define cudnnCreateConvolutionDescriptor   miopenCreateConvolutionDescriptor

◆ cudnnCreateFilterDescriptor

#define cudnnCreateFilterDescriptor   miopenCreateTensorDescriptor /* TODO: check this, does tensor replace filter? */

◆ cudnnCreateTensorDescriptor

#define cudnnCreateTensorDescriptor   miopenCreateTensorDescriptor

◆ cudnnDataType_t [1/2]

#define cudnnDataType_t   miopenDataType_t

◆ cudnnDataType_t [2/2]

#define cudnnDataType_t   miopenDataType_t

◆ cudnnDestroyTensorDescriptor

#define cudnnDestroyTensorDescriptor   miopenDestroyTensorDescriptor

◆ cudnnFilterDescriptor_t

#define cudnnFilterDescriptor_t   miopenTensorDescriptor_t

◆ cudnnGetConvolutionBackwardDataWorkspaceSize

#define cudnnGetConvolutionBackwardDataWorkspaceSize (   h,
  wd,
  dyd,
  cd,
  dxd,
  algo,
 
)    miopenConvolutionBackwardDataGetWorkSpaceSize(h, dyd, wd, cd, dxd, s)

◆ cudnnGetConvolutionBackwardFilterWorkspaceSize

#define cudnnGetConvolutionBackwardFilterWorkspaceSize (   h,
  xd,
  dyd,
  cd,
  gd,
  algo,
 
)    miopenConvolutionBackwardWeightsGetWorkSpaceSize(h, dyd, xd, cd, dwd, s)

◆ cudnnGetConvolutionForwardWorkspaceSize

#define cudnnGetConvolutionForwardWorkspaceSize (   h,
  xd,
  wd,
  cd,
  yd,
  algo,
 
)    miopenConvolutionForwardGetWorkSpaceSize(h, wd, xd, cd, yd, s)

◆ cudnnHandle_t

#define cudnnHandle_t   miopenHandle_t

◆ cudnnPoolingDescriptor_t

#define cudnnPoolingDescriptor_t   miopenPoolingDescriptor_t

◆ cudnnSetConvolution2dDescriptor

#define cudnnSetConvolution2dDescriptor (   d,
  h,
  w,
  u,
  v,
  x,
  y,
  mode 
)    miopenInitConvolutionDescriptor(d, mode, h, w, u, v, x, y)

◆ cudnnSetFilter4dDescriptor

#define cudnnSetFilter4dDescriptor (   d,
  t,
  i,
  n,
  c,
  h,
 
)    miopenSet4dTensorDescriptor(d, t, n, c, h, w)

◆ cudnnSetStream

#define cudnnSetStream   miopenSetStream

◆ cudnnSetTensor4dDescriptor

#define cudnnSetTensor4dDescriptor (   d,
  i,
  t,
  b,
  c,
  h,
 
)    miopenSet4dTensorDescriptor(d, t, b, c, h, w)

◆ cudnnStatus_t

#define cudnnStatus_t   miopenStatus_t

◆ cudnnTensorDescriptor_t

#define cudnnTensorDescriptor_t   miopenTensorDescriptor_t

◆ CURAND_RNG_PSEUDO_DEFAULT

#define CURAND_RNG_PSEUDO_DEFAULT   HIPRAND_RNG_PSEUDO_DEFAULT

◆ curandCreateGenerator

#define curandCreateGenerator   hiprandCreateGenerator

◆ curandGenerateUniform

#define curandGenerateUniform   hiprandGenerateUniform

◆ curandGenerator_t

#define curandGenerator_t   hiprandGenerator_t

◆ curandSetPseudoRandomGeneratorSeed

#define curandSetPseudoRandomGeneratorSeed   hiprandSetPseudoRandomGeneratorSeed

◆ CUresult

#define CUresult   hipError_t

◆ SHFL_DOWN

#define SHFL_DOWN (   val,
  offset 
)    shfl_xor(val, offset)