General matrix multiplication (GEMM) More...

#include "activations.hpp"

Include dependency graph for gemm.hpp:

This graph shows which files directly or indirectly include this file:

Functions
void	activate_array_cpu_custom (float *x, const int n, const ACTIVATION a)

void	convolution_2d (int w, int h, int ksize, int n, int c, int pad, int stride, float weights, float input, float output, float mean)

void	convolution_repacked (uint32_t packed_input, uint32_t packed_weights, float output, int w, int h, int c, int n, int size, int pad, int new_lda, float mean_arr)

void	float_to_bit (float src, unsigned char dst, size_t size)

void	forward_maxpool_layer_avx (float src, float dst, int *indexes, int size, int w, int h, int out_w, int out_h, int c, int pad, int stride, int batch)

static void	gemm_cpu (int TA, int TB, int M, int N, int K, float ALPHA, float A, int lda, float B, int ldb, float BETA, float *C, int ldc)

void	gemm_gpu (int TA, int TB, int M, int N, int K, float ALPHA, float A, int lda, float B, int ldb, float BETA, float *C, int ldc)

void	gemm_nn_bin_32bit_packed (int M, int N, int K, float ALPHA, uint32_t A, int lda, uint32_t B, int ldb, float C, int ldc, float mean_arr)

void	gemm_nn_bin_transposed_32bit_packed (int M, int N, int K, float ALPHA, uint32_t A, int lda, uint32_t B, int ldb, float C, int ldc, float mean_arr)

void	gemm_nn_custom_bin_mean_transposed (int M, int N, int K, float ALPHA_UNUSED, unsigned char A, int lda, unsigned char B, int ldb, float C, int ldc, float mean_arr)

void	gemm_ongpu (int TA, int TB, int M, int N, int K, float ALPHA, float A_gpu, int lda, float B_gpu, int ldb, float BETA, float *C_gpu, int ldc)

static unsigned char	get_bit (unsigned char const *const src, size_t index)

void	im2col_cpu_custom (float data_im, int channels, int height, int width, int ksize, int stride, int pad, float data_col)

void	im2col_cpu_custom_align (float data_im, int channels, int height, int width, int ksize, int stride, int pad, float data_col, int bit_align)

void	im2col_cpu_custom_bin (float data_im, int channels, int height, int width, int ksize, int stride, int pad, float data_col, int bit_align)

void	im2col_cpu_custom_transpose (float data_im, int channels, int height, int width, int ksize, int stride, int pad, float data_col, int ldb_align)

int	is_avx ()

int	is_fma_avx2 ()

void	repack_input (float input, float re_packed_input, int w, int h, int c)

static void	set_bit (unsigned char *const dst, size_t index)

void	transpose_32x32_bits_reversed_diagonale (uint32_t A, uint32_t B, int m, int n)

void	transpose_bin (uint32_t A, uint32_t B, const int n, const int m, const int lda, const int ldb, const int block_size)

void	transpose_block_SSE4x4 (float A, float B, const int n, const int m, const int lda, const int ldb, const int block_size)

void	transpose_uint32 (uint32_t src, uint32_t dst, int src_h, int src_w, int src_align, int dst_align)

Detailed Description

General matrix multiplication (GEMM)

Function Documentation

◆ activate_array_cpu_custom()

void activate_array_cpu_custom	(	float *	x,
		const int	n,
		const ACTIVATION	a
	)

Here is the call graph for this function:

Here is the caller graph for this function:

◆ convolution_2d()

void convolution_2d	(	int	w,
		int	h,
		int	ksize,
		int	n,
		int	c,
		int	pad,
		int	stride,
		float *	weights,
		float *	input,
		float *	output,
		float *	mean
	)

◆ convolution_repacked()

void convolution_repacked	(	uint32_t *	packed_input,
		uint32_t *	packed_weights,
		float *	output,
		int	w,
		int	h,
		int	c,
		int	n,
		int	size,
		int	pad,
		int	new_lda,
		float *	mean_arr
	)

◆ float_to_bit()

void float_to_bit	(	float *	src,
		unsigned char *	dst,
		size_t	size
	)

Here is the caller graph for this function:

◆ forward_maxpool_layer_avx()

void forward_maxpool_layer_avx	(	float *	src,
		float *	dst,
		int *	indexes,
		int	size,
		int	w,
		int	h,
		int	out_w,
		int	out_h,
		int	c,
		int	pad,
		int	stride,
		int	batch
	)

Here is the caller graph for this function:

◆ gemm_cpu()

static void gemm_cpu	(	int	TA,
		int	TB,
		int	M,
		int	N,
		int	K,
		float	ALPHA,
		float *	A,
		int	lda,
		float *	B,
		int	ldb,
		float	BETA,
		float *	C,
		int	ldc
	)

inlinestatic

Here is the caller graph for this function:

◆ gemm_gpu()

void gemm_gpu	(	int	TA,
		int	TB,
		int	M,
		int	N,
		int	K,
		float	ALPHA,
		float *	A,
		int	lda,
		float *	B,
		int	ldb,
		float	BETA,
		float *	C,
		int	ldc
	)

Here is the call graph for this function:

Here is the caller graph for this function:

◆ gemm_nn_bin_32bit_packed()

void gemm_nn_bin_32bit_packed	(	int	M,
		int	N,
		int	K,
		float	ALPHA,
		uint32_t *	A,
		int	lda,
		uint32_t *	B,
		int	ldb,
		float *	C,
		int	ldc,
		float *	mean_arr
	)

◆ gemm_nn_bin_transposed_32bit_packed()

void gemm_nn_bin_transposed_32bit_packed	(	int	M,
		int	N,
		int	K,
		float	ALPHA,
		uint32_t *	A,
		int	lda,
		uint32_t *	B,
		int	ldb,
		float *	C,
		int	ldc,
		float *	mean_arr
	)

◆ gemm_nn_custom_bin_mean_transposed()

void gemm_nn_custom_bin_mean_transposed	(	int	M,
		int	N,
		int	K,
		float	ALPHA_UNUSED,
		unsigned char *	A,
		int	lda,
		unsigned char *	B,
		int	ldb,
		float *	C,
		int	ldc,
		float *	mean_arr
	)

Here is the caller graph for this function:

◆ gemm_ongpu()

void gemm_ongpu	(	int	TA,
		int	TB,
		int	M,
		int	N,
		int	K,
		float	ALPHA,
		float *	A_gpu,
		int	lda,
		float *	B_gpu,
		int	ldb,
		float	BETA,
		float *	C_gpu,
		int	ldc
	)

Here is the call graph for this function:

Here is the caller graph for this function:

◆ get_bit()

static unsigned char get_bit	(	unsigned char const *const	src,
		size_t	index
	)

inlinestatic

◆ im2col_cpu_custom()

void im2col_cpu_custom	(	float *	data_im,
		int	channels,
		int	height,
		int	width,
		int	ksize,
		int	stride,
		int	pad,
		float *	data_col
	)

Here is the call graph for this function:

Here is the caller graph for this function:

◆ im2col_cpu_custom_align()

void im2col_cpu_custom_align	(	float *	data_im,
		int	channels,
		int	height,
		int	width,
		int	ksize,
		int	stride,
		int	pad,
		float *	data_col,
		int	bit_align
	)

◆ im2col_cpu_custom_bin()

void im2col_cpu_custom_bin	(	float *	data_im,
		int	channels,
		int	height,
		int	width,
		int	ksize,
		int	stride,
		int	pad,
		float *	data_col,
		int	bit_align
	)

Here is the call graph for this function:

Here is the caller graph for this function:

◆ im2col_cpu_custom_transpose()

void im2col_cpu_custom_transpose	(	float *	data_im,
		int	channels,
		int	height,
		int	width,
		int	ksize,
		int	stride,
		int	pad,
		float *	data_col,
		int	ldb_align
	)

◆ is_avx()

int is_avx ( )

Here is the caller graph for this function:

◆ is_fma_avx2()

int is_fma_avx2 ( )

Here is the caller graph for this function:

◆ repack_input()

void repack_input	(	float *	input,
		float *	re_packed_input,
		int	w,
		int	h,
		int	c
	)

Here is the caller graph for this function:

◆ set_bit()

static void set_bit	(	unsigned char *const	dst,
		size_t	index
	)

inlinestatic

Here is the caller graph for this function:

◆ transpose_32x32_bits_reversed_diagonale()

void transpose_32x32_bits_reversed_diagonale	(	uint32_t *	A,
		uint32_t *	B,
		int	m,
		int	n
	)

Here is the call graph for this function:

Here is the caller graph for this function:

◆ transpose_bin()

void transpose_bin	(	uint32_t *	A,
		uint32_t *	B,
		const int	n,
		const int	m,
		const int	lda,
		const int	ldb,
		const int	block_size
	)

◆ transpose_block_SSE4x4()

void transpose_block_SSE4x4	(	float *	A,
		float *	B,
		const int	n,
		const int	m,
		const int	lda,
		const int	ldb,
		const int	block_size
	)

◆ transpose_uint32()

void transpose_uint32	(	uint32_t *	src,
		uint32_t *	dst,
		int	src_h,
		int	src_w,
		int	src_align,
		int	dst_align
	)

Here is the caller graph for this function:

Functions

Detailed Description

Function Documentation

◆ activate_array_cpu_custom()

◆ convolution_2d()

◆ convolution_repacked()

◆ float_to_bit()

◆ forward_maxpool_layer_avx()

◆ gemm_cpu()

◆ gemm_gpu()

◆ gemm_nn_bin_32bit_packed()

◆ gemm_nn_bin_transposed_32bit_packed()

◆ gemm_nn_custom_bin_mean_transposed()

◆ gemm_ongpu()

◆ get_bit()

◆ im2col_cpu_custom()

◆ im2col_cpu_custom_align()

◆ im2col_cpu_custom_bin()

◆ im2col_cpu_custom_transpose()

◆ is_avx()

◆ is_fma_avx2()

◆ repack_input()

◆ set_bit()

◆ transpose_32x32_bits_reversed_diagonale()

◆ transpose_bin()

◆ transpose_block_SSE4x4()

◆ transpose_uint32()