26template <
typename T, my_
size_t Bits,
typename Arch>
40 template <
typename Expr>
50 template <
typename Expr>
56 template <
typename Expr>
62 template <
typename Expr>
75 template <
typename Expr1,
typename Expr2>
91 template <
typename Expr1,
typename Expr2>
98 expr1, base1, stride1,
99 expr2, base2, stride2,
106 template <
typename Expr1,
typename Expr2>
113 expr1, base1, stride1,
114 expr2, base2, stride2,
Global configuration for the tesseract tensor library.
#define my_size_t
Size/index type used throughout the library.
Definition config.h:126
#define FORCE_INLINE
Hint the compiler to always inline a function.
Definition config.h:26
Comparison operations — approximate equality between expressions.
Dot product operations — contraction primitives for einsum.
Expression evaluation — dispatch, contiguous path, and permuted path.
Register-blocked GEMM for optimized 2D matrix multiplication.
Shared SIMD helper utilities for kernel operations.
Reduction operations — min, max, sum over expression elements.
Definition kernel_ops.h:28
static FORCE_INLINE T reduce_min(const Expr &expr) noexcept
Definition kernel_ops.h:51
static FORCE_INLINE T reduce_max(const Expr &expr) noexcept
Definition kernel_ops.h:57
static FORCE_INLINE bool reduce_all_approx_equal(const Expr1 &lhs, const Expr2 &rhs, T tolerance) noexcept
Check if all logical elements of two expressions are approximately equal.
Definition kernel_ops.h:76
static FORCE_INLINE T reduce_sum(const Expr &expr) noexcept
Definition kernel_ops.h:63
static FORCE_INLINE T naive_dot_physical(const Expr1 &expr1, my_size_t base1, my_size_t stride1, const Expr2 &expr2, my_size_t base2, my_size_t stride2, my_size_t len) noexcept
Naive scalar dot product for testing/validation.
Definition kernel_ops.h:107
static FORCE_INLINE T dot(const Expr1 &expr1, my_size_t base1, my_size_t stride1, const Expr2 &expr2, my_size_t base2, my_size_t stride2, my_size_t len) noexcept
Dispatch dot product based on stride values.
Definition kernel_ops.h:92
static FORCE_INLINE void eval(T *output, const Expr &expr) noexcept
Evaluation: Dispatch: pick contiguous or permuted eval based on expression layout.
Definition kernel_ops.h:41
static constexpr my_size_t simdWidth
Definition kernel_ops.h:30
Definition microkernel_base.h:16
static constexpr my_size_t simdWidth
Definition microkernel_base.h:17
static FORCE_INLINE bool reduce_all_approx_equal(const Expr1 &lhs, const Expr2 &rhs, T tolerance) noexcept
Check if all logical elements of two expressions are approximately equal.
Definition kernel_compare.h:45
static FORCE_INLINE T naive_dot_physical(const Expr1 &expr1, my_size_t base1, my_size_t stride1, const Expr2 &expr2, my_size_t base2, my_size_t stride2, my_size_t len) noexcept
Naive scalar dot product for testing/validation.
Definition kernel_dot.h:79
static FORCE_INLINE T dot(const Expr1 &expr1, my_size_t base1, my_size_t stride1, const Expr2 &expr2, my_size_t base2, my_size_t stride2, my_size_t len) noexcept
Dispatch dot product based on stride values.
Definition kernel_dot.h:55
static FORCE_INLINE void eval(T *output, const Expr &expr) noexcept
Dispatch: pick contiguous or permuted eval based on expression layout.
Definition kernel_eval.h:36
static FORCE_INLINE T reduce_sum(const Expr &expr) noexcept
Definition kernel_reduce.h:80
static FORCE_INLINE T reduce_min(const Expr &expr) noexcept
Definition kernel_reduce.h:68
static FORCE_INLINE T reduce_max(const Expr &expr) noexcept
Definition kernel_reduce.h:74