#include "darknet_internal.hpp"
#include "darknet_gpu.hpp"

Include dependency graph for dark_cuda.hpp:

This graph shows which files directly or indirectly include this file:

Macros
#define	BLOCK 512

#define	BLOCK_TRANSPOSE32 256

#define	CHECK_CUBLAS(X) cublas_check_error_extended(X, __FILE__, __func__, __LINE__ );

#define	CHECK_CUDA(X)
	Macro to quickly check if a CUDA error has taken place. Only calls the CUDA error function if a problem is detected.

#define	CHECK_CUDNN(X)
	Macro to quickly check if a CUDNN error has taken place. Only calls the CUDNN error function if a problem is detected.

#define	FULL_MASK 0xffffffff

#define	WARP_SIZE 32

Enumerations
enum	{ cudnn_fastest , cudnn_smallest , cudnn_specify }

Functions
hipblasHandle_t	blas_handle ()

void	check_cuda_error (hipError_t status, const char const filename, const char const funcname, const int line)
	Use CHECK_CUDA() instead.

void	check_cuda_error_extended (hipError_t status, const char const filename, const char const funcname, const int line)
	Use CHECK_CUDA() instead.

void	cublas_check_error_extended (hipblasStatus_t status, const char const filename, const char const funcname, const int line)
	Use CHECK_CUBLAS() instead.

float	cuda_compare (float x_gpu, float x, size_t n, char *s)

void	cuda_free (float *x_gpu)

void	cuda_free_host (float *x_cpu)

int	cuda_get_device ()

dim3	cuda_gridsize (size_t n)

float *	cuda_make_array (float *x, size_t n)
	Allocate memory on the GPU.

float *	cuda_make_array_pinned (float *x, size_t n)

float *	cuda_make_array_pinned_preallocated (float *x, size_t n)

void **	cuda_make_array_pointers (void **x, size_t n)

int *	cuda_make_int_array (size_t n)

int *	cuda_make_int_array_new_api (int *x, size_t n)

void	cuda_push_array (float x_gpu, float x, size_t n)

void	cuda_random (float *x_gpu, size_t n)

void	cudnn_check_error_extended (miopenStatus_t status, const char const filename, const char const function, const int line)
	Use CHECK_CUDNN() instead.

miopenHandle_t	cudnn_handle ()

void	free_pinned_memory ()

hipStream_t	get_cuda_stream ()

int	get_gpu_compute_capability (int i, char *device_name)

int	get_number_of_blocks (int array_size, int block_size)

void	pre_allocate_pinned_memory (size_t size)

void	reset_wait_stream_events ()

void	show_cuda_cudnn_info ()

hipStream_t	switch_stream (int i)

void	wait_stream (int i)

Variables
int	cuda_debug_sync

Macro Definition Documentation

◆ BLOCK

#define BLOCK 512

Todo:: What is this? See where it is used in all the .cu files.

◆ BLOCK_TRANSPOSE32

#define BLOCK_TRANSPOSE32 256

◆ CHECK_CUBLAS

#define CHECK_CUBLAS ( X ) cublas_check_error_extended(X, __FILE__, __func__, __LINE__ );

◆ CHECK_CUDA

#define CHECK_CUDA ( X )

Value:

{                                                                           \
    const auto STATUS = X;                                                  \
    if (STATUS != cudaSuccess)                                              \
    {                                                                       \
        check_cuda_error_extended(STATUS, __FILE__, __func__, __LINE__);    \
    }                                                                       \
}

Macro to quickly check if a CUDA error has taken place. Only calls the CUDA error function if a problem is detected.

◆ CHECK_CUDNN

#define CHECK_CUDNN ( X )

Value:

{                                                                           \
    const auto STATUS = X;                                                  \
    if (STATUS != CUDNN_STATUS_SUCCESS)                                     \
    {                                                                       \
        cudnn_check_error_extended(STATUS, __FILE__, __func__, __LINE__);   \
    }                                                                       \
}

Macro to quickly check if a CUDNN error has taken place. Only calls the CUDNN error function if a problem is detected.

◆ FULL_MASK

#define FULL_MASK 0xffffffff

◆ WARP_SIZE

#define WARP_SIZE 32

Enumeration Type Documentation

◆ anonymous enum

anonymous enum

Enumerator
cudnn_fastest
cudnn_smallest
cudnn_specify

Function Documentation

◆ blas_handle()

hipblasHandle_t blas_handle ( )

Here is the call graph for this function:

Here is the caller graph for this function:

◆ check_cuda_error()

void check_cuda_error	(	hipError_t	status,
		const char *const	filename,
		const char *const	funcname,
		const int	line
	)

Use CHECK_CUDA() instead.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ check_cuda_error_extended()

void check_cuda_error_extended	(	hipError_t	status,
		const char *const	filename,
		const char *const	funcname,
		const int	line
	)

Use CHECK_CUDA() instead.

Here is the call graph for this function:

◆ cublas_check_error_extended()

void cublas_check_error_extended	(	hipblasStatus_t	status,
		const char *const	filename,
		const char *const	funcname,
		const int	line
	)

Use CHECK_CUBLAS() instead.

Here is the call graph for this function:

◆ cuda_compare()

float cuda_compare	(	float *	x_gpu,
		float *	x,
		size_t	n,
		char *	s
	)

Here is the call graph for this function:

◆ cuda_free()

void cuda_free ( float * x_gpu )

Here is the caller graph for this function:

◆ cuda_free_host()

void cuda_free_host ( float * x_cpu )

Here is the caller graph for this function:

◆ cuda_get_device()

int cuda_get_device ( )

Here is the caller graph for this function:

◆ cuda_gridsize()

dim3 cuda_gridsize ( size_t n )

◆ cuda_make_array()

float * cuda_make_array	(	float *	x,
		size_t	n
	)

Allocate memory on the GPU.

If x is not null, then copy the given floats from the host pointer.

Returns: a pointer to the CUDA memory allocation.

Warning: The copy is asynchronous and may not have finished when this function returns!

Here is the call graph for this function:

Here is the caller graph for this function:

◆ cuda_make_array_pinned()

float * cuda_make_array_pinned	(	float *	x,
		size_t	n
	)

Here is the call graph for this function:

Here is the caller graph for this function:

◆ cuda_make_array_pinned_preallocated()

float * cuda_make_array_pinned_preallocated	(	float *	x,
		size_t	n
	)

Here is the call graph for this function:

Here is the caller graph for this function:

◆ cuda_make_array_pointers()

void ** cuda_make_array_pointers	(	void **	x,
		size_t	n
	)

Here is the call graph for this function:

Here is the caller graph for this function:

◆ cuda_make_int_array()

int * cuda_make_int_array ( size_t n )

Here is the caller graph for this function:

◆ cuda_make_int_array_new_api()

int * cuda_make_int_array_new_api	(	int *	x,
		size_t	n
	)

Here is the call graph for this function:

Here is the caller graph for this function:

◆ cuda_push_array()

void cuda_push_array	(	float *	x_gpu,
		float *	x,
		size_t	n
	)

Here is the call graph for this function:

Here is the caller graph for this function:

◆ cuda_random()

void cuda_random	(	float *	x_gpu,
		size_t	n
	)

Here is the call graph for this function:

Here is the caller graph for this function:

◆ cudnn_check_error_extended()

void cudnn_check_error_extended	(	miopenStatus_t	status,
		const char *const	filename,
		const char *const	function,
		const int	line
	)

Use CHECK_CUDNN() instead.

Here is the call graph for this function:

◆ cudnn_handle()

miopenHandle_t cudnn_handle ( )

Here is the call graph for this function:

Here is the caller graph for this function:

◆ free_pinned_memory()

void free_pinned_memory ( )

Here is the call graph for this function:

Here is the caller graph for this function:

◆ get_cuda_stream()

hipStream_t get_cuda_stream ( )

Here is the call graph for this function:

◆ get_gpu_compute_capability()

int get_gpu_compute_capability	(	int	i,
		char *	device_name
	)

Here is the caller graph for this function:

◆ get_number_of_blocks()

int get_number_of_blocks	(	int	array_size,
		int	block_size
	)

Here is the caller graph for this function:

◆ pre_allocate_pinned_memory()

void pre_allocate_pinned_memory ( size_t size )

Here is the call graph for this function:

Here is the caller graph for this function:

◆ reset_wait_stream_events()

void reset_wait_stream_events ( )

Here is the caller graph for this function:

◆ show_cuda_cudnn_info()

void show_cuda_cudnn_info ( )

Here is the call graph for this function:

Here is the caller graph for this function:

◆ switch_stream()

hipStream_t switch_stream ( int i )

Here is the call graph for this function:

Here is the caller graph for this function:

◆ wait_stream()

void wait_stream ( int i )

Here is the call graph for this function:

Here is the caller graph for this function:

Variable Documentation

◆ cuda_debug_sync

int cuda_debug_sync

extern

Todo:: V3 is this still needed?

Todo:: V3 is cuda_debug_sync still necessary?

Macros

Enumerations

Functions

Variables

Macro Definition Documentation

◆ BLOCK

◆ BLOCK_TRANSPOSE32

◆ CHECK_CUBLAS

◆ CHECK_CUDA

◆ CHECK_CUDNN

◆ FULL_MASK

◆ WARP_SIZE

Enumeration Type Documentation

◆ anonymous enum

Function Documentation

◆ blas_handle()

◆ check_cuda_error()

◆ check_cuda_error_extended()

◆ cublas_check_error_extended()

◆ cuda_compare()

◆ cuda_free()

◆ cuda_free_host()

◆ cuda_get_device()

◆ cuda_gridsize()

◆ cuda_make_array()

◆ cuda_make_array_pinned()

◆ cuda_make_array_pinned_preallocated()

◆ cuda_make_array_pointers()

◆ cuda_make_int_array()

◆ cuda_make_int_array_new_api()

◆ cuda_push_array()

◆ cuda_random()

◆ cudnn_check_error_extended()

◆ cudnn_handle()

◆ free_pinned_memory()

◆ get_cuda_stream()

◆ get_gpu_compute_capability()

◆ get_number_of_blocks()

◆ pre_allocate_pinned_memory()

◆ reset_wait_stream_events()

◆ show_cuda_cudnn_info()

◆ switch_stream()

◆ wait_stream()

Variable Documentation

◆ cuda_debug_sync