tesseract/kernel__ops_8h_source.html

#ifndef KERNEL_OPS_H

#define KERNEL_OPS_H


#include "config.h"

#include "fused/microkernels/microkernel_base.h"

#include "fused/kernel_ops/kernel_helpers.h"

#include "fused/kernel_ops/kernel_eval.h"

#include "fused/kernel_ops/kernel_reduce.h"

#include "fused/kernel_ops/kernel_compare.h"

#include "fused/kernel_ops/kernel_dot.h"

#include "fused/kernel_ops/kernel_gemm.h"


template <typename T, my_size_t Bits, typename Arch>


struct KernelOps

{

    using K = Microkernel<T, Bits, Arch>;

    static constexpr my_size_t simdWidth = K::simdWidth;


    // ========================================================================

    // Evaluation

    // ========================================================================


    template <typename Expr>


    FORCE_INLINE static void eval(T *output, const Expr &expr) noexcept

    {

        detail::KernelEval<T, Bits, Arch>::eval(output, expr);

    }


    // ========================================================================

    // Reductions

    // ========================================================================


    template <typename Expr>


    FORCE_INLINE static T reduce_min(const Expr &expr) noexcept

    {

        return detail::KernelReduce<T, Bits, Arch>::reduce_min(expr);

    }


    template <typename Expr>


    FORCE_INLINE static T reduce_max(const Expr &expr) noexcept

    {

        return detail::KernelReduce<T, Bits, Arch>::reduce_max(expr);

    }


    template <typename Expr>


    FORCE_INLINE static T reduce_sum(const Expr &expr) noexcept

    {

        return detail::KernelReduce<T, Bits, Arch>::reduce_sum(expr);

    }


    // ========================================================================

    // Comparisons

    // ========================================================================


    template <typename Expr1, typename Expr2>


    FORCE_INLINE static bool reduce_all_approx_equal(

        const Expr1 &lhs,

        const Expr2 &rhs,

        T tolerance) noexcept

    {

        return detail::KernelCompare<T, Bits, Arch>::reduce_all_approx_equal(lhs, rhs, tolerance);

    }


    // ========================================================================

    // Dot Products

    // ========================================================================


    template <typename Expr1, typename Expr2>


    FORCE_INLINE static T dot(

        const Expr1 &expr1, my_size_t base1, my_size_t stride1,

        const Expr2 &expr2, my_size_t base2, my_size_t stride2,

        my_size_t len) noexcept

    {

        return detail::KernelDot<T, Bits, Arch>::dot(

            expr1, base1, stride1,

            expr2, base2, stride2,

            len);

    }


    template <typename Expr1, typename Expr2>


    FORCE_INLINE static T naive_dot_physical(

        const Expr1 &expr1, my_size_t base1, my_size_t stride1,

        const Expr2 &expr2, my_size_t base2, my_size_t stride2,

        my_size_t len) noexcept

    {

        return detail::KernelDot<T, Bits, Arch>::naive_dot_physical(

            expr1, base1, stride1,

            expr2, base2, stride2,

            len);

    }


};


#endif // KERNEL_OPS_H

config.h
Global configuration for the tesseract tensor library.

my_size_t
#define my_size_t
Size/index type used throughout the library.
Definition config.h:126

FORCE_INLINE
#define FORCE_INLINE
Hint the compiler to always inline a function.
Definition config.h:26

kernel_compare.h
Comparison operations — approximate equality between expressions.

kernel_dot.h
Dot product operations — contraction primitives for einsum.

kernel_eval.h
Expression evaluation — dispatch, contiguous path, and permuted path.

kernel_gemm.h
Register-blocked GEMM for optimized 2D matrix multiplication.

kernel_helpers.h
Shared SIMD helper utilities for kernel operations.

kernel_reduce.h
Reduction operations — min, max, sum over expression elements.

microkernel_base.h

KernelOps
Definition kernel_ops.h:28

KernelOps::reduce_min
static FORCE_INLINE T reduce_min(const Expr &expr) noexcept
Definition kernel_ops.h:51

KernelOps::reduce_max
static FORCE_INLINE T reduce_max(const Expr &expr) noexcept
Definition kernel_ops.h:57

KernelOps::reduce_all_approx_equal
static FORCE_INLINE bool reduce_all_approx_equal(const Expr1 &lhs, const Expr2 &rhs, T tolerance) noexcept
Check if all logical elements of two expressions are approximately equal.
Definition kernel_ops.h:76

KernelOps::reduce_sum
static FORCE_INLINE T reduce_sum(const Expr &expr) noexcept
Definition kernel_ops.h:63

KernelOps::naive_dot_physical
static FORCE_INLINE T naive_dot_physical(const Expr1 &expr1, my_size_t base1, my_size_t stride1, const Expr2 &expr2, my_size_t base2, my_size_t stride2, my_size_t len) noexcept
Naive scalar dot product for testing/validation.
Definition kernel_ops.h:107

KernelOps::dot
static FORCE_INLINE T dot(const Expr1 &expr1, my_size_t base1, my_size_t stride1, const Expr2 &expr2, my_size_t base2, my_size_t stride2, my_size_t len) noexcept
Dispatch dot product based on stride values.
Definition kernel_ops.h:92

KernelOps::eval
static FORCE_INLINE void eval(T *output, const Expr &expr) noexcept
Evaluation: Dispatch: pick contiguous or permuted eval based on expression layout.
Definition kernel_ops.h:41

KernelOps::simdWidth
static constexpr my_size_t simdWidth
Definition kernel_ops.h:30

Microkernel
Definition microkernel_base.h:16

Microkernel::simdWidth
static constexpr my_size_t simdWidth
Definition microkernel_base.h:17

detail::KernelCompare::reduce_all_approx_equal
static FORCE_INLINE bool reduce_all_approx_equal(const Expr1 &lhs, const Expr2 &rhs, T tolerance) noexcept
Check if all logical elements of two expressions are approximately equal.
Definition kernel_compare.h:45

detail::KernelDot::naive_dot_physical
static FORCE_INLINE T naive_dot_physical(const Expr1 &expr1, my_size_t base1, my_size_t stride1, const Expr2 &expr2, my_size_t base2, my_size_t stride2, my_size_t len) noexcept
Naive scalar dot product for testing/validation.
Definition kernel_dot.h:79

detail::KernelDot::dot
static FORCE_INLINE T dot(const Expr1 &expr1, my_size_t base1, my_size_t stride1, const Expr2 &expr2, my_size_t base2, my_size_t stride2, my_size_t len) noexcept
Dispatch dot product based on stride values.
Definition kernel_dot.h:55

detail::KernelEval::eval
static FORCE_INLINE void eval(T *output, const Expr &expr) noexcept
Dispatch: pick contiguous or permuted eval based on expression layout.
Definition kernel_eval.h:36

detail::KernelReduce::reduce_sum
static FORCE_INLINE T reduce_sum(const Expr &expr) noexcept
Definition kernel_reduce.h:80

detail::KernelReduce::reduce_min
static FORCE_INLINE T reduce_min(const Expr &expr) noexcept
Definition kernel_reduce.h:68

detail::KernelReduce::reduce_max
static FORCE_INLINE T reduce_max(const Expr &expr) noexcept
Definition kernel_reduce.h:74