35 template <
typename Expr>
41 eval_vectorized_contiguous(output, expr);
46 eval_vectorized_permuted(output, expr);
55 template <
typename Expr,
typename Seq>
61 struct OutputPadImpl<Expr,
index_seq<Is...>>
66 template <
typename Expr>
67 struct OutputPadPolicy
69 using type =
typename OutputPadImpl<Expr, typename make_index_seq<Expr::NumDims>::type>::type;
91 template <
typename Expr>
94 const Expr &expr)
noexcept
96 using Layout =
typename Expr::Layout;
97 static constexpr my_size_t physicalSize = Layout::PhysicalSize;
99 static constexpr bool hasRemainder = (physicalSize %
simdWidth) != 0;
103 static_assert(physicalSize %
simdWidth == 0,
104 "PhysicalSize must be a multiple of SimdWidth");
107 for (
my_size_t i = 0; i < simdSteps; ++i)
109 auto val = expr.template evalu<T, Bits, Arch>(i *
simdWidth);
116 if constexpr (hasRemainder)
118 std::cout <<
"Warning: Scalar evaluation for remainder elements." << std::endl;
150 template <
typename Expr>
153 const Expr &expr)
noexcept
155 using OutputPad =
typename OutputPadPolicy<Expr>::type;
157 static constexpr my_size_t lastDim = OutputPad::LastDim;
158 static constexpr my_size_t paddedLastDim = OutputPad::PaddedLastDim;
159 static constexpr my_size_t numSlices = OutputPad::PhysicalSize / paddedLastDim;
166 for (
my_size_t slice = 0; slice < numSlices; ++slice)
168 const my_size_t out_base = slice * paddedLastDim;
170 for (
my_size_t i = 0; i < simdSteps; ++i)
172 auto val = expr.template logical_evalu<T, Bits, Arch>(logical_flat);
177 if constexpr (scalarStart < lastDim)
179 for (
my_size_t i = scalarStart; i < lastDim; ++i)
181 output[out_base + i] = expr.template logical_evalu<T, 1, GENERICARCH>(logical_flat);
static FORCE_INLINE void eval(T *output, const Expr &expr) noexcept
Dispatch: pick contiguous or permuted eval based on expression layout.
Definition kernel_eval.h:36