tesseract++ 0.0.1
N-dimensional tensor library for embedded systems
Loading...
Searching...
No Matches
kernel_ops.h
Go to the documentation of this file.
1
14#ifndef KERNEL_OPS_H
15#define KERNEL_OPS_H
16
17#include "config.h"
25
26template <typename T, my_size_t Bits, typename Arch>
28{
30 static constexpr my_size_t simdWidth = K::simdWidth;
31
32 // ========================================================================
33 // Evaluation
34 // ========================================================================
35
40 template <typename Expr>
41 FORCE_INLINE static void eval(T *output, const Expr &expr) noexcept
42 {
44 }
45
46 // ========================================================================
47 // Reductions
48 // ========================================================================
49
50 template <typename Expr>
51 FORCE_INLINE static T reduce_min(const Expr &expr) noexcept
52 {
54 }
55
56 template <typename Expr>
57 FORCE_INLINE static T reduce_max(const Expr &expr) noexcept
58 {
60 }
61
62 template <typename Expr>
63 FORCE_INLINE static T reduce_sum(const Expr &expr) noexcept
64 {
66 }
67
68 // ========================================================================
69 // Comparisons
70 // ========================================================================
71
75 template <typename Expr1, typename Expr2>
77 const Expr1 &lhs,
78 const Expr2 &rhs,
79 T tolerance) noexcept
80 {
82 }
83
84 // ========================================================================
85 // Dot Products
86 // ========================================================================
87
91 template <typename Expr1, typename Expr2>
92 FORCE_INLINE static T dot(
93 const Expr1 &expr1, my_size_t base1, my_size_t stride1,
94 const Expr2 &expr2, my_size_t base2, my_size_t stride2,
95 my_size_t len) noexcept
96 {
98 expr1, base1, stride1,
99 expr2, base2, stride2,
100 len);
101 }
102
106 template <typename Expr1, typename Expr2>
108 const Expr1 &expr1, my_size_t base1, my_size_t stride1,
109 const Expr2 &expr2, my_size_t base2, my_size_t stride2,
110 my_size_t len) noexcept
111 {
113 expr1, base1, stride1,
114 expr2, base2, stride2,
115 len);
116 }
117};
118
119#endif // KERNEL_OPS_H
Global configuration for the tesseract tensor library.
#define my_size_t
Size/index type used throughout the library.
Definition config.h:126
#define FORCE_INLINE
Hint the compiler to always inline a function.
Definition config.h:26
Comparison operations — approximate equality between expressions.
Dot product operations — contraction primitives for einsum.
Expression evaluation — dispatch, contiguous path, and permuted path.
Register-blocked GEMM for optimized 2D matrix multiplication.
Shared SIMD helper utilities for kernel operations.
Reduction operations — min, max, sum over expression elements.
Definition kernel_ops.h:28
static FORCE_INLINE T reduce_min(const Expr &expr) noexcept
Definition kernel_ops.h:51
static FORCE_INLINE T reduce_max(const Expr &expr) noexcept
Definition kernel_ops.h:57
static FORCE_INLINE bool reduce_all_approx_equal(const Expr1 &lhs, const Expr2 &rhs, T tolerance) noexcept
Check if all logical elements of two expressions are approximately equal.
Definition kernel_ops.h:76
static FORCE_INLINE T reduce_sum(const Expr &expr) noexcept
Definition kernel_ops.h:63
static FORCE_INLINE T naive_dot_physical(const Expr1 &expr1, my_size_t base1, my_size_t stride1, const Expr2 &expr2, my_size_t base2, my_size_t stride2, my_size_t len) noexcept
Naive scalar dot product for testing/validation.
Definition kernel_ops.h:107
static FORCE_INLINE T dot(const Expr1 &expr1, my_size_t base1, my_size_t stride1, const Expr2 &expr2, my_size_t base2, my_size_t stride2, my_size_t len) noexcept
Dispatch dot product based on stride values.
Definition kernel_ops.h:92
static FORCE_INLINE void eval(T *output, const Expr &expr) noexcept
Evaluation: Dispatch: pick contiguous or permuted eval based on expression layout.
Definition kernel_ops.h:41
static constexpr my_size_t simdWidth
Definition kernel_ops.h:30
Definition microkernel_base.h:16
static constexpr my_size_t simdWidth
Definition microkernel_base.h:17
static FORCE_INLINE bool reduce_all_approx_equal(const Expr1 &lhs, const Expr2 &rhs, T tolerance) noexcept
Check if all logical elements of two expressions are approximately equal.
Definition kernel_compare.h:45
static FORCE_INLINE T naive_dot_physical(const Expr1 &expr1, my_size_t base1, my_size_t stride1, const Expr2 &expr2, my_size_t base2, my_size_t stride2, my_size_t len) noexcept
Naive scalar dot product for testing/validation.
Definition kernel_dot.h:79
static FORCE_INLINE T dot(const Expr1 &expr1, my_size_t base1, my_size_t stride1, const Expr2 &expr2, my_size_t base2, my_size_t stride2, my_size_t len) noexcept
Dispatch dot product based on stride values.
Definition kernel_dot.h:55
static FORCE_INLINE void eval(T *output, const Expr &expr) noexcept
Dispatch: pick contiguous or permuted eval based on expression layout.
Definition kernel_eval.h:36
static FORCE_INLINE T reduce_sum(const Expr &expr) noexcept
Definition kernel_reduce.h:80
static FORCE_INLINE T reduce_min(const Expr &expr) noexcept
Definition kernel_reduce.h:68
static FORCE_INLINE T reduce_max(const Expr &expr) noexcept
Definition kernel_reduce.h:74