tesseract++ 0.0.1
N-dimensional tensor library for embedded systems
Loading...
Searching...
No Matches
generic_microkernel.h
Go to the documentation of this file.
1#ifndef GENERIC_MICROKERNEL_H
2#define GENERIC_MICROKERNEL_H
3
4#include "config.h"
5
6// Generic microkernel for ANY type and ANY bit width (scalar fallback)
7// This is a partial specialization that matches any T and any Bits with GENERICARCH
9{
10}; // Scalar fallback
11
12template <typename T, my_size_t Bits>
13struct Microkernel<T, Bits, GENERICARCH>
14{
15 static constexpr my_size_t simdWidth = 1;
16 // GEMM tiling constants (scalar fallback)
17 static constexpr my_size_t num_registers = 16;
18 static constexpr my_size_t MR = 4;
19 static constexpr my_size_t NR_VECS = 1;
20 static constexpr my_size_t NR = NR_VECS * simdWidth; // 1
21 using VecType = T;
22 using ScalarType = T; // In scalar mode, VecType is the same as ScalarType
23
24 FORCE_INLINE static VecType load(const T *ptr) noexcept { return *ptr; }
25 FORCE_INLINE static VecType loadu(const T *ptr) noexcept { return *ptr; }
26
27 FORCE_INLINE static void store(T *ptr, VecType val) noexcept { *ptr = val; }
28 FORCE_INLINE static void storeu(T *ptr, VecType val) noexcept { *ptr = val; }
29
30 FORCE_INLINE static VecType set1(T scalar) noexcept { return scalar; } // In scalar mode, set1 is identity}
31 FORCE_INLINE static VecType add(VecType a, VecType b) noexcept { return a + b; }
32 FORCE_INLINE static VecType mul(VecType a, VecType b) noexcept { return a * b; }
33 FORCE_INLINE static VecType sub(VecType a, VecType b) noexcept { return a - b; }
34 FORCE_INLINE static VecType div(VecType a, VecType b) noexcept { return a / b; }
35
36 FORCE_INLINE static VecType fmadd(VecType a, VecType b, VecType c) noexcept { return a * b + c; }
37 FORCE_INLINE static VecType fmsub(VecType a, VecType b, VecType c) noexcept { return a * b - c; }
38 FORCE_INLINE static VecType fnmadd(VecType a, VecType b, VecType c) noexcept { return -(a * b) + c; }
39 FORCE_INLINE static VecType fnmsub(VecType a, VecType b, VecType c) noexcept { return -(a * b) - c; }
40
41 FORCE_INLINE static VecType min(VecType a, VecType b) noexcept { return a < b ? a : b; }
42 FORCE_INLINE static VecType max(VecType a, VecType b) noexcept { return a > b ? a : b; }
43
44 FORCE_INLINE static VecType gather(const T *base, const my_size_t *indices) noexcept { return base[indices[0]]; }
45 FORCE_INLINE static void scatter(T *base, const my_size_t *indices, VecType val) noexcept { base[indices[0]] = val; }
46
47 FORCE_INLINE static VecType abs(VecType v) noexcept { return v < T{0} ? -v : v; }
48 FORCE_INLINE static bool all_within_tolerance(VecType a, VecType b, T tol) noexcept
49 {
50 T diff = a - b;
51 return abs(diff) <= tol;
52 }
53};
54
55#endif // GENERIC_MICROKERNEL_H
Global configuration for the tesseract tensor library.
#define my_size_t
Size/index type used throughout the library.
Definition config.h:126
#define FORCE_INLINE
Hint the compiler to always inline a function.
Definition config.h:26
Definition generic_microkernel.h:9
static FORCE_INLINE VecType add(VecType a, VecType b) noexcept
Definition generic_microkernel.h:31
static FORCE_INLINE void scatter(T *base, const my_size_t *indices, VecType val) noexcept
Definition generic_microkernel.h:45
static FORCE_INLINE VecType div(VecType a, VecType b) noexcept
Definition generic_microkernel.h:34
T VecType
Definition generic_microkernel.h:21
static FORCE_INLINE VecType load(const T *ptr) noexcept
Definition generic_microkernel.h:24
static FORCE_INLINE VecType gather(const T *base, const my_size_t *indices) noexcept
Definition generic_microkernel.h:44
static FORCE_INLINE VecType abs(VecType v) noexcept
Definition generic_microkernel.h:47
static FORCE_INLINE VecType fnmsub(VecType a, VecType b, VecType c) noexcept
Definition generic_microkernel.h:39
static FORCE_INLINE VecType max(VecType a, VecType b) noexcept
Definition generic_microkernel.h:42
static FORCE_INLINE VecType loadu(const T *ptr) noexcept
Definition generic_microkernel.h:25
static FORCE_INLINE void storeu(T *ptr, VecType val) noexcept
Definition generic_microkernel.h:28
static FORCE_INLINE VecType set1(T scalar) noexcept
Definition generic_microkernel.h:30
static FORCE_INLINE VecType min(VecType a, VecType b) noexcept
Definition generic_microkernel.h:41
static FORCE_INLINE VecType fmadd(VecType a, VecType b, VecType c) noexcept
Definition generic_microkernel.h:36
static FORCE_INLINE VecType fnmadd(VecType a, VecType b, VecType c) noexcept
Definition generic_microkernel.h:38
static FORCE_INLINE VecType mul(VecType a, VecType b) noexcept
Definition generic_microkernel.h:32
static FORCE_INLINE VecType fmsub(VecType a, VecType b, VecType c) noexcept
Definition generic_microkernel.h:37
static FORCE_INLINE void store(T *ptr, VecType val) noexcept
Definition generic_microkernel.h:27
T ScalarType
Definition generic_microkernel.h:22
static FORCE_INLINE bool all_within_tolerance(VecType a, VecType b, T tol) noexcept
Definition generic_microkernel.h:48
static FORCE_INLINE VecType sub(VecType a, VecType b) noexcept
Definition generic_microkernel.h:33
Definition microkernel_base.h:16
static constexpr my_size_t simdWidth
Definition microkernel_base.h:17