

Macros | |
| #define | BLOCK 512 |
| #define | BLOCK_TRANSPOSE32 256 |
| #define | CHECK_CUBLAS(X) cublas_check_error_extended(X, __FILE__, __func__, __LINE__ ); |
| #define | CHECK_CUDA(X) |
| Macro to quickly check if a CUDA error has taken place. Only calls the CUDA error function if a problem is detected. | |
| #define | CHECK_CUDNN(X) |
| Macro to quickly check if a CUDNN error has taken place. Only calls the CUDNN error function if a problem is detected. | |
| #define | FULL_MASK 0xffffffff |
| #define | WARP_SIZE 32 |
Enumerations | |
| enum | { cudnn_fastest , cudnn_smallest , cudnn_specify } |
Functions | |
| hipblasHandle_t | blas_handle () |
| void | check_cuda_error (hipError_t status, const char *const filename, const char *const funcname, const int line) |
| Use CHECK_CUDA() instead. | |
| void | check_cuda_error_extended (hipError_t status, const char *const filename, const char *const funcname, const int line) |
| Use CHECK_CUDA() instead. | |
| void | cublas_check_error_extended (hipblasStatus_t status, const char *const filename, const char *const funcname, const int line) |
| Use CHECK_CUBLAS() instead. | |
| float | cuda_compare (float *x_gpu, float *x, size_t n, char *s) |
| void | cuda_free (float *x_gpu) |
| void | cuda_free_host (float *x_cpu) |
| int | cuda_get_device () |
| dim3 | cuda_gridsize (size_t n) |
| float * | cuda_make_array (float *x, size_t n) |
| Allocate memory on the GPU. | |
| float * | cuda_make_array_pinned (float *x, size_t n) |
| float * | cuda_make_array_pinned_preallocated (float *x, size_t n) |
| void ** | cuda_make_array_pointers (void **x, size_t n) |
| int * | cuda_make_int_array (size_t n) |
| int * | cuda_make_int_array_new_api (int *x, size_t n) |
| void | cuda_push_array (float *x_gpu, float *x, size_t n) |
| void | cuda_random (float *x_gpu, size_t n) |
| void | cudnn_check_error_extended (miopenStatus_t status, const char *const filename, const char *const function, const int line) |
| Use CHECK_CUDNN() instead. | |
| miopenHandle_t | cudnn_handle () |
| void | free_pinned_memory () |
| hipStream_t | get_cuda_stream () |
| int | get_gpu_compute_capability (int i, char *device_name) |
| int | get_number_of_blocks (int array_size, int block_size) |
| void | pre_allocate_pinned_memory (size_t size) |
| void | reset_wait_stream_events () |
| void | show_cuda_cudnn_info () |
| hipStream_t | switch_stream (int i) |
| void | wait_stream (int i) |
Variables | |
| int | cuda_debug_sync |
| #define BLOCK 512 |
| #define BLOCK_TRANSPOSE32 256 |
| #define CHECK_CUBLAS | ( | X | ) | cublas_check_error_extended(X, __FILE__, __func__, __LINE__ ); |
| #define CHECK_CUDA | ( | X | ) |
Macro to quickly check if a CUDA error has taken place. Only calls the CUDA error function if a problem is detected.
| #define CHECK_CUDNN | ( | X | ) |
Macro to quickly check if a CUDNN error has taken place. Only calls the CUDNN error function if a problem is detected.
| #define FULL_MASK 0xffffffff |
| #define WARP_SIZE 32 |
| hipblasHandle_t blas_handle | ( | ) |


| void check_cuda_error | ( | hipError_t | status, |
| const char *const | filename, | ||
| const char *const | funcname, | ||
| const int | line | ||
| ) |
Use CHECK_CUDA() instead.


| void check_cuda_error_extended | ( | hipError_t | status, |
| const char *const | filename, | ||
| const char *const | funcname, | ||
| const int | line | ||
| ) |
| void cublas_check_error_extended | ( | hipblasStatus_t | status, |
| const char *const | filename, | ||
| const char *const | funcname, | ||
| const int | line | ||
| ) |
| float cuda_compare | ( | float * | x_gpu, |
| float * | x, | ||
| size_t | n, | ||
| char * | s | ||
| ) |

| void cuda_free | ( | float * | x_gpu | ) |

| void cuda_free_host | ( | float * | x_cpu | ) |

| int cuda_get_device | ( | ) |

| dim3 cuda_gridsize | ( | size_t | n | ) |
| float * cuda_make_array | ( | float * | x, |
| size_t | n | ||
| ) |
Allocate memory on the GPU.
If x is not null, then copy the given floats from the host pointer.


| float * cuda_make_array_pinned | ( | float * | x, |
| size_t | n | ||
| ) |


| float * cuda_make_array_pinned_preallocated | ( | float * | x, |
| size_t | n | ||
| ) |


| void ** cuda_make_array_pointers | ( | void ** | x, |
| size_t | n | ||
| ) |


| int * cuda_make_int_array | ( | size_t | n | ) |

| int * cuda_make_int_array_new_api | ( | int * | x, |
| size_t | n | ||
| ) |


| void cuda_push_array | ( | float * | x_gpu, |
| float * | x, | ||
| size_t | n | ||
| ) |


| void cuda_random | ( | float * | x_gpu, |
| size_t | n | ||
| ) |


| void cudnn_check_error_extended | ( | miopenStatus_t | status, |
| const char *const | filename, | ||
| const char *const | function, | ||
| const int | line | ||
| ) |
| miopenHandle_t cudnn_handle | ( | ) |


| void free_pinned_memory | ( | ) |


| hipStream_t get_cuda_stream | ( | ) |

| int get_gpu_compute_capability | ( | int | i, |
| char * | device_name | ||
| ) |

| int get_number_of_blocks | ( | int | array_size, |
| int | block_size | ||
| ) |

| void pre_allocate_pinned_memory | ( | size_t | size | ) |


| void reset_wait_stream_events | ( | ) |

| void show_cuda_cudnn_info | ( | ) |


| hipStream_t switch_stream | ( | int | i | ) |


| void wait_stream | ( | int | i | ) |

