tesseract++ 0.0.1
N-dimensional tensor library for embedded systems
Loading...
Searching...
No Matches
kernel_compare.h
Go to the documentation of this file.
1
11#ifndef KERNEL_COMPARE_H
12#define KERNEL_COMPARE_H
13
14#include "config.h"
17
18namespace detail
19{
20
21 template <typename T, my_size_t Bits, typename Arch>
23 {
25 static constexpr my_size_t simdWidth = K::simdWidth;
26
27 // ========================================================================
28 // Public API
29 // ========================================================================
30
44 template <typename Expr1, typename Expr2>
46 const Expr1 &lhs,
47 const Expr2 &rhs,
48 T tolerance) noexcept
49 {
52 {
53 // std::cout << "reduce_all_approx_equal: dispatching to contiguous path" << std::endl;
54 return approx_equal_contiguous(lhs, rhs, tolerance);
55 }
56 else
57 {
58 // std::cout << "reduce_all_approx_equal: dispatching to logical path" << std::endl;
59 return approx_equal_logical(lhs, rhs, tolerance);
60 }
61 }
62
63 private:
64 // ========================================================================
65 // Contiguous path
66 // ========================================================================
67
74 template <typename Expr1, typename Expr2>
75 FORCE_INLINE static bool approx_equal_contiguous(
76 const Expr1 &lhs,
77 const Expr2 &rhs,
78 T tolerance) noexcept
79 {
80 using ExprPadPolicy = typename Expr1::Layout::PadPolicyType;
81
82 static constexpr my_size_t lastDim = ExprPadPolicy::LastDim;
83 static constexpr my_size_t paddedLastDim = ExprPadPolicy::PaddedLastDim;
84 static constexpr my_size_t numSlices = ExprPadPolicy::PhysicalSize / paddedLastDim;
85 static constexpr my_size_t simdSteps = lastDim / simdWidth;
86 static constexpr my_size_t scalarStart = simdSteps * simdWidth;
87
88 if constexpr (simdSteps > 0)
89 {
90 for (my_size_t slice = 0; slice < numSlices; ++slice)
91 {
92 const my_size_t base = slice * paddedLastDim;
93 for (my_size_t i = 0; i < simdSteps; ++i)
94 {
95 auto lhs_vec = lhs.template evalu<T, Bits, Arch>(base + i * simdWidth);
96 auto rhs_vec = rhs.template evalu<T, Bits, Arch>(base + i * simdWidth);
97 if (!K::all_within_tolerance(lhs_vec, rhs_vec, tolerance))
98 return false;
99 }
100 }
101 }
102
103 if constexpr (scalarStart < lastDim)
104 {
105 using ScalarK = Microkernel<T, 1, GENERICARCH>;
106 for (my_size_t slice = 0; slice < numSlices; ++slice)
107 {
108 const my_size_t base = slice * paddedLastDim;
109 for (my_size_t i = scalarStart; i < lastDim; ++i)
110 {
111 T lhs_val = lhs.template evalu<T, 1, GENERICARCH>(base + i);
112 T rhs_val = rhs.template evalu<T, 1, GENERICARCH>(base + i);
113 if (ScalarK::abs(lhs_val - rhs_val) > tolerance)
114 return false;
115 }
116 }
117 }
118
119 return true;
120 }
121
122 // ========================================================================
123 // Logical path
124 // ========================================================================
125
141 template <typename Expr1, typename Expr2>
142 FORCE_INLINE static bool approx_equal_logical(
143 const Expr1 &lhs,
144 const Expr2 &rhs,
145 T tolerance) noexcept
146 {
147 // Use Expr1's logical dims to drive iteration
148 // (both must have same logical dims)
149 // TODO: enforce this at compile time? Or is it guaranteed anyways? think...!
150 static constexpr my_size_t logicalSize = Expr1::TotalSize;
151
152 using ScalarK = Microkernel<T, 1, GENERICARCH>;
153
154 for (my_size_t i = 0; i < logicalSize; ++i)
155 {
156 T lhs_val = lhs.template logical_evalu<T, 1, GENERICARCH>(i);
157 T rhs_val = rhs.template logical_evalu<T, 1, GENERICARCH>(i);
158 if (ScalarK::abs(lhs_val - rhs_val) > tolerance)
159 return false;
160 }
161
162 return true;
163 }
164 };
165
166} // namespace detail
167
168#endif // KERNEL_COMPARE_H
Global configuration for the tesseract tensor library.
#define my_size_t
Size/index type used throughout the library.
Definition config.h:126
#define FORCE_INLINE
Hint the compiler to always inline a function.
Definition config.h:26
Definition BaseExpr.h:4
Definition microkernel_base.h:16
static constexpr my_size_t simdWidth
Definition microkernel_base.h:17
Definition kernel_compare.h:23
static FORCE_INLINE bool reduce_all_approx_equal(const Expr1 &lhs, const Expr2 &rhs, T tolerance) noexcept
Check if all logical elements of two expressions are approximately equal.
Definition kernel_compare.h:45
static constexpr my_size_t simdWidth
Definition kernel_compare.h:25
Definition basic_expr_traits.h:6