Some GPU-specific includes and definitions. More...
#include <cuda.h>#include <cuda_runtime.h>#include <cuda_runtime_api.h>#include <curand.h>#include <cublas_v2.h>#include <cudnn.h>#include <hip/hip_runtime.h>#include <rocrand/rocrand.h>#include <hiprand/hiprand.h>#include <hipblas/hipblas.h>#include <miopen/miopen.h>

Macros | |
| #define | CUBLAS_OP_N HIPBLAS_OP_N |
| #define | CUBLAS_OP_T HIPBLAS_OP_T |
| #define | CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS |
| #define | cublasCreate hipblasCreate |
| #define | cublasHandle_t hipblasHandle_t |
| #define | cublasSetStream hipblasSetStream |
| #define | cublasSgemm hipblasSgemm |
| #define | cublasStatus_t hipblasStatus_t |
| #define | CUcontext hipCtx_t |
| #define | cuCtxGetCurrent hipCtxGetCurrent |
| #define | CUDA_SUCCESS hipSuccess |
| #define | cudaDeviceProp hipDeviceProp_t |
| #define | cudaDeviceScheduleBlockingSync hipDeviceScheduleBlockingSync |
| #define | cudaDeviceSynchronize hipDeviceSynchronize |
| #define | cudaDriverGetVersion hipDriverGetVersion |
| #define | cudaError hipError_t |
| #define | cudaError_t hipError_t |
| #define | cudaErrorInsufficientDriver hipErrorInsufficientDriver |
| #define | cudaErrorNoDevice hipErrorNoDevice |
| #define | cudaEvent_t hipEvent_t |
| #define | cudaEventCreate hipEventCreate |
| #define | cudaEventCreateWithFlags hipEventCreateWithFlags |
| #define | cudaEventDestroy hipEventDestroy |
| #define | cudaEventDisableTiming hipEventDisableTiming |
| #define | cudaEventElapsedTime hipEventElapsedTime |
| #define | cudaEventRecord hipEventRecord |
| #define | cudaEventSynchronize hipEventSynchronize |
| #define | cudaFree hipFree |
| #define | cudaFreeHost hipHostFree |
| #define | cudaGetDevice hipGetDevice |
| #define | cudaGetDeviceCount hipGetDeviceCount |
| #define | cudaGetDeviceProperties hipGetDeviceProperties |
| #define | cudaGetErrorName hipGetErrorName |
| #define | cudaGetErrorString hipGetErrorString |
| #define | cudaGetLastError hipGetLastError |
| #define | cudaGraph_t hipGraph_t |
| #define | cudaGraphExec_t hipGraphExec_t |
| #define | cudaGraphInstantiate hipGraphInstantiate |
| #define | cudaGraphLaunch hipGraphLaunch |
| #define | cudaHostAlloc hipHostMalloc |
| #define | cudaHostAlloc hipHostMalloc |
| #define | cudaHostAllocDefault hipHostMallocDefault |
| #define | cudaHostRegisterMapped hipHostRegisterMapped |
| #define | cudaMalloc hipMalloc |
| #define | cudaMemcpy hipMemcpy |
| #define | cudaMemcpyAsync hipMemcpyAsync |
| #define | cudaMemcpyDefault hipMemcpyDefault |
| #define | cudaMemcpyDeviceToHost hipMemcpyDeviceToHost |
| #define | cudaMemcpyHostToDevice hipMemcpyHostToDevice |
| #define | cudaMemGetInfo hipMemGetInfo |
| #define | cudaMemset hipMemset |
| #define | cudaPeekAtLastError hipPeekAtLastError |
| #define | cudaReadModeElementType hipReadModeElementType |
| #define | cudaRuntimeGetVersion hipRuntimeGetVersion |
| #define | cudaSetDevice hipSetDevice |
| #define | cudaSetDeviceFlags hipSetDeviceFlags |
| #define | cudaStream_t hipStream_t |
| #define | cudaStreamBeginCapture hipStreamBeginCapture |
| #define | cudaStreamCaptureModeGlobal hipStreamCaptureModeGlobal |
| #define | cudaStreamCreate hipStreamCreate |
| #define | cudaStreamCreateWithFlags hipStreamCreateWithFlags |
| #define | cudaStreamEndCapture hipStreamEndCapture |
| #define | cudaStreamNonBlocking hipStreamNonBlocking |
| #define | cudaStreamSynchronize hipStreamSynchronize |
| #define | cudaStreamWaitEvent hipStreamWaitEvent |
| #define | cudaSuccess hipSuccess |
| #define | CUDNN_BATCHNORM_SPATIAL miopenBatchNormMode_t::miopenBNSpatial |
| #define | CUDNN_CROSS_CORRELATION miopenConvolutionMode_t::miopenConvolution |
| #define | CUDNN_DATA_FLOAT miopenDataType_t::miopenFloat |
| #define | CUDNN_DATA_HALF miopenDataType_t::miopenHalf |
| #define | CUDNN_STATUS_SUCCESS miopenStatus_t::miopenStatusSuccess |
| #define | CUDNN_TENSOR_NCHW 0 |
| #define | cudnnBatchNormalizationBackward miopenBatchNormalizationBackward |
| #define | cudnnBatchNormalizationForwardTraining miopenBatchNormalizationForwardTraining |
| #define | cudnnConvolutionBackwardData(h, alpha, wd, w, dyd, dy, cd, algo, ws, s, beta, dxd, dx) miopenConvolutionBackwardData(h, alpha, dyd, dy, wd, w, cd, algo, beta, dxd, dx, wd, s) |
| #define | cudnnConvolutionBackwardFilter(h, alpha, xd, x, dyd, dy, cd, algo, ws, s, beta, dwd, dw) miopenConvolutionBackwardWeights(h, alpha, dyd, dy, xd, x, cd, algo, beta, dwd, dw, ws, s) |
| #define | cudnnConvolutionBwdDataAlgo_t miopenConvBwdDataAlgorithm_t |
| #define | cudnnConvolutionBwdFilterAlgo_t miopenConvBwdWeightsAlgorithm_t |
| #define | cudnnConvolutionDescriptor_t miopenConvolutionDescriptor_t |
| #define | cudnnConvolutionForward(h, alpha, xd, x, wd, w, cd, algo, ws, s, beta, yd, y) miopenConvolutionForward(h, alpha, xd, x, wd, w, cd, algo, beta, yd, y, ws, s) |
| #define | cudnnConvolutionFwdAlgo_t miopenConvFwdAlgorithm_t |
| #define | cudnnConvolutionMode_t miopenConvolutionMode_t |
| #define | cudnnCreate miopenCreate |
| #define | cudnnCreateConvolutionDescriptor miopenCreateConvolutionDescriptor |
| #define | cudnnCreateFilterDescriptor miopenCreateTensorDescriptor /* TODO: check this, does tensor replace filter? */ |
| #define | cudnnCreateTensorDescriptor miopenCreateTensorDescriptor |
| #define | cudnnDataType_t miopenDataType_t |
| #define | cudnnDataType_t miopenDataType_t |
| #define | cudnnDestroyTensorDescriptor miopenDestroyTensorDescriptor |
| #define | cudnnFilterDescriptor_t miopenTensorDescriptor_t |
| #define | cudnnGetConvolutionBackwardDataWorkspaceSize(h, wd, dyd, cd, dxd, algo, s) miopenConvolutionBackwardDataGetWorkSpaceSize(h, dyd, wd, cd, dxd, s) |
| #define | cudnnGetConvolutionBackwardFilterWorkspaceSize(h, xd, dyd, cd, gd, algo, s) miopenConvolutionBackwardWeightsGetWorkSpaceSize(h, dyd, xd, cd, dwd, s) |
| #define | cudnnGetConvolutionForwardWorkspaceSize(h, xd, wd, cd, yd, algo, s) miopenConvolutionForwardGetWorkSpaceSize(h, wd, xd, cd, yd, s) |
| #define | cudnnHandle_t miopenHandle_t |
| #define | cudnnPoolingDescriptor_t miopenPoolingDescriptor_t |
| #define | cudnnSetConvolution2dDescriptor(d, h, w, u, v, x, y, mode) miopenInitConvolutionDescriptor(d, mode, h, w, u, v, x, y) |
| #define | cudnnSetFilter4dDescriptor(d, t, i, n, c, h, w) miopenSet4dTensorDescriptor(d, t, n, c, h, w) |
| #define | cudnnSetStream miopenSetStream |
| #define | cudnnSetTensor4dDescriptor(d, i, t, b, c, h, w) miopenSet4dTensorDescriptor(d, t, b, c, h, w) |
| #define | cudnnStatus_t miopenStatus_t |
| #define | cudnnTensorDescriptor_t miopenTensorDescriptor_t |
| #define | CURAND_RNG_PSEUDO_DEFAULT HIPRAND_RNG_PSEUDO_DEFAULT |
| #define | curandCreateGenerator hiprandCreateGenerator |
| #define | curandGenerateUniform hiprandGenerateUniform |
| #define | curandGenerator_t hiprandGenerator_t |
| #define | curandSetPseudoRandomGeneratorSeed hiprandSetPseudoRandomGeneratorSeed |
| #define | CUresult hipError_t |
| #define | SHFL_DOWN(val, offset) shfl_xor(val, offset) |
Some GPU-specific includes and definitions.
Handles differences between NVIDIA CUDA and AMD ROCm.
| #define CUBLAS_OP_N HIPBLAS_OP_N |
| #define CUBLAS_OP_T HIPBLAS_OP_T |
| #define CUBLAS_STATUS_SUCCESS HIPBLAS_STATUS_SUCCESS |
| #define cublasCreate hipblasCreate |
| #define cublasHandle_t hipblasHandle_t |
| #define cublasSetStream hipblasSetStream |
| #define cublasSgemm hipblasSgemm |
| #define cublasStatus_t hipblasStatus_t |
| #define CUcontext hipCtx_t |
| #define cuCtxGetCurrent hipCtxGetCurrent |
| #define CUDA_SUCCESS hipSuccess |
| #define cudaDeviceProp hipDeviceProp_t |
| #define cudaDeviceScheduleBlockingSync hipDeviceScheduleBlockingSync |
| #define cudaDeviceSynchronize hipDeviceSynchronize |
| #define cudaDriverGetVersion hipDriverGetVersion |
| #define cudaError hipError_t |
| #define cudaError_t hipError_t |
| #define cudaErrorInsufficientDriver hipErrorInsufficientDriver |
| #define cudaErrorNoDevice hipErrorNoDevice |
| #define cudaEvent_t hipEvent_t |
| #define cudaEventCreate hipEventCreate |
| #define cudaEventCreateWithFlags hipEventCreateWithFlags |
| #define cudaEventDestroy hipEventDestroy |
| #define cudaEventDisableTiming hipEventDisableTiming |
| #define cudaEventElapsedTime hipEventElapsedTime |
| #define cudaEventRecord hipEventRecord |
| #define cudaEventSynchronize hipEventSynchronize |
| #define cudaFree hipFree |
| #define cudaFreeHost hipHostFree |
| #define cudaGetDevice hipGetDevice |
| #define cudaGetDeviceCount hipGetDeviceCount |
| #define cudaGetDeviceProperties hipGetDeviceProperties |
| #define cudaGetErrorName hipGetErrorName |
| #define cudaGetErrorString hipGetErrorString |
| #define cudaGetLastError hipGetLastError |
| #define cudaGraph_t hipGraph_t |
| #define cudaGraphExec_t hipGraphExec_t |
| #define cudaGraphInstantiate hipGraphInstantiate |
| #define cudaGraphLaunch hipGraphLaunch |
| #define cudaHostAlloc hipHostMalloc |
| #define cudaHostAlloc hipHostMalloc |
| #define cudaHostAllocDefault hipHostMallocDefault |
| #define cudaHostRegisterMapped hipHostRegisterMapped |
| #define cudaMalloc hipMalloc |
| #define cudaMemcpy hipMemcpy |
| #define cudaMemcpyAsync hipMemcpyAsync |
| #define cudaMemcpyDefault hipMemcpyDefault |
| #define cudaMemcpyDeviceToHost hipMemcpyDeviceToHost |
| #define cudaMemcpyHostToDevice hipMemcpyHostToDevice |
| #define cudaMemGetInfo hipMemGetInfo |
| #define cudaMemset hipMemset |
| #define cudaPeekAtLastError hipPeekAtLastError |
| #define cudaReadModeElementType hipReadModeElementType |
| #define cudaRuntimeGetVersion hipRuntimeGetVersion |
| #define cudaSetDevice hipSetDevice |
| #define cudaSetDeviceFlags hipSetDeviceFlags |
| #define cudaStream_t hipStream_t |
| #define cudaStreamBeginCapture hipStreamBeginCapture |
| #define cudaStreamCaptureModeGlobal hipStreamCaptureModeGlobal |
| #define cudaStreamCreate hipStreamCreate |
| #define cudaStreamCreateWithFlags hipStreamCreateWithFlags |
| #define cudaStreamEndCapture hipStreamEndCapture |
| #define cudaStreamNonBlocking hipStreamNonBlocking |
| #define cudaStreamSynchronize hipStreamSynchronize |
| #define cudaStreamWaitEvent hipStreamWaitEvent |
| #define cudaSuccess hipSuccess |
| #define CUDNN_BATCHNORM_SPATIAL miopenBatchNormMode_t::miopenBNSpatial |
| #define CUDNN_CROSS_CORRELATION miopenConvolutionMode_t::miopenConvolution |
| #define CUDNN_DATA_FLOAT miopenDataType_t::miopenFloat |
| #define CUDNN_DATA_HALF miopenDataType_t::miopenHalf |
| #define CUDNN_STATUS_SUCCESS miopenStatus_t::miopenStatusSuccess |
| #define CUDNN_TENSOR_NCHW 0 |
| #define cudnnBatchNormalizationBackward miopenBatchNormalizationBackward |
| #define cudnnBatchNormalizationForwardTraining miopenBatchNormalizationForwardTraining |
| #define cudnnConvolutionBackwardData | ( | h, | |
| alpha, | |||
| wd, | |||
| w, | |||
| dyd, | |||
| dy, | |||
| cd, | |||
| algo, | |||
| ws, | |||
| s, | |||
| beta, | |||
| dxd, | |||
| dx | |||
| ) | miopenConvolutionBackwardData(h, alpha, dyd, dy, wd, w, cd, algo, beta, dxd, dx, wd, s) |
| #define cudnnConvolutionBackwardFilter | ( | h, | |
| alpha, | |||
| xd, | |||
| x, | |||
| dyd, | |||
| dy, | |||
| cd, | |||
| algo, | |||
| ws, | |||
| s, | |||
| beta, | |||
| dwd, | |||
| dw | |||
| ) | miopenConvolutionBackwardWeights(h, alpha, dyd, dy, xd, x, cd, algo, beta, dwd, dw, ws, s) |
| #define cudnnConvolutionBwdDataAlgo_t miopenConvBwdDataAlgorithm_t |
| #define cudnnConvolutionBwdFilterAlgo_t miopenConvBwdWeightsAlgorithm_t |
| #define cudnnConvolutionDescriptor_t miopenConvolutionDescriptor_t |
| #define cudnnConvolutionForward | ( | h, | |
| alpha, | |||
| xd, | |||
| x, | |||
| wd, | |||
| w, | |||
| cd, | |||
| algo, | |||
| ws, | |||
| s, | |||
| beta, | |||
| yd, | |||
| y | |||
| ) | miopenConvolutionForward(h, alpha, xd, x, wd, w, cd, algo, beta, yd, y, ws, s) |
| #define cudnnConvolutionFwdAlgo_t miopenConvFwdAlgorithm_t |
| #define cudnnConvolutionMode_t miopenConvolutionMode_t |
| #define cudnnCreate miopenCreate |
| #define cudnnCreateConvolutionDescriptor miopenCreateConvolutionDescriptor |
| #define cudnnCreateFilterDescriptor miopenCreateTensorDescriptor /* TODO: check this, does tensor replace filter? */ |
| #define cudnnCreateTensorDescriptor miopenCreateTensorDescriptor |
| #define cudnnDataType_t miopenDataType_t |
| #define cudnnDataType_t miopenDataType_t |
| #define cudnnDestroyTensorDescriptor miopenDestroyTensorDescriptor |
| #define cudnnFilterDescriptor_t miopenTensorDescriptor_t |
| #define cudnnGetConvolutionBackwardDataWorkspaceSize | ( | h, | |
| wd, | |||
| dyd, | |||
| cd, | |||
| dxd, | |||
| algo, | |||
| s | |||
| ) | miopenConvolutionBackwardDataGetWorkSpaceSize(h, dyd, wd, cd, dxd, s) |
| #define cudnnGetConvolutionBackwardFilterWorkspaceSize | ( | h, | |
| xd, | |||
| dyd, | |||
| cd, | |||
| gd, | |||
| algo, | |||
| s | |||
| ) | miopenConvolutionBackwardWeightsGetWorkSpaceSize(h, dyd, xd, cd, dwd, s) |
| #define cudnnGetConvolutionForwardWorkspaceSize | ( | h, | |
| xd, | |||
| wd, | |||
| cd, | |||
| yd, | |||
| algo, | |||
| s | |||
| ) | miopenConvolutionForwardGetWorkSpaceSize(h, wd, xd, cd, yd, s) |
| #define cudnnHandle_t miopenHandle_t |
| #define cudnnPoolingDescriptor_t miopenPoolingDescriptor_t |
| #define cudnnSetConvolution2dDescriptor | ( | d, | |
| h, | |||
| w, | |||
| u, | |||
| v, | |||
| x, | |||
| y, | |||
| mode | |||
| ) | miopenInitConvolutionDescriptor(d, mode, h, w, u, v, x, y) |
| #define cudnnSetFilter4dDescriptor | ( | d, | |
| t, | |||
| i, | |||
| n, | |||
| c, | |||
| h, | |||
| w | |||
| ) | miopenSet4dTensorDescriptor(d, t, n, c, h, w) |
| #define cudnnSetStream miopenSetStream |
| #define cudnnSetTensor4dDescriptor | ( | d, | |
| i, | |||
| t, | |||
| b, | |||
| c, | |||
| h, | |||
| w | |||
| ) | miopenSet4dTensorDescriptor(d, t, b, c, h, w) |
| #define cudnnStatus_t miopenStatus_t |
| #define cudnnTensorDescriptor_t miopenTensorDescriptor_t |
| #define CURAND_RNG_PSEUDO_DEFAULT HIPRAND_RNG_PSEUDO_DEFAULT |
| #define curandCreateGenerator hiprandCreateGenerator |
| #define curandGenerateUniform hiprandGenerateUniform |
| #define curandGenerator_t hiprandGenerator_t |
| #define curandSetPseudoRandomGeneratorSeed hiprandSetPseudoRandomGeneratorSeed |
| #define CUresult hipError_t |
| #define SHFL_DOWN | ( | val, | |
| offset | |||
| ) | shfl_xor(val, offset) |