tesseract++ 0.0.1
N-dimensional tensor library for embedded systems
Loading...
Searching...
No Matches
fused_tensor.h
Go to the documentation of this file.
1#ifndef FUSEDTENSORND_H
2#define FUSEDTENSORND_H
3
4#include <random>
5
6#include "memory/mem_utils.h"
7
8#include "config.h"
9#include "helper_traits.h"
10#include "simple_type_traits.h"
11
12#include "fused/BaseExpr.h"
22// #include "fused/views/permuted_view.h"
24// #include "fused/layouts/strided_layout.h"
27
28// Base class: FusedTensorND
29template <typename T, my_size_t... Dims>
30class FusedTensorND : public BaseExpr<FusedTensorND<T, Dims...>>
31{
32public:
33 // Compile time constants
34 static constexpr my_size_t NumDims = sizeof...(Dims);
35 static constexpr my_size_t Dim[] = {Dims...};
36 static constexpr my_size_t TotalSize = (Dims * ...);
37 using value_type = T;
38 using Self = FusedTensorND<T, Dims...>;
39
40 // Default constructors
41 FusedTensorND() noexcept = default;
42
43 // Constructor to initialize all elements to a specific value
44 explicit FusedTensorND(T initValue) noexcept
45 : data_(initValue) {}
46
47 // Copy constructor
48 FusedTensorND(const FusedTensorND &other) noexcept
49 : data_(other.data_) // invoke copy constructor of AccessPolicy
50 {
51#ifdef DEBUG_FUSED_TENSOR
52 MyErrorHandler::log("Copy constructor called", ErrorLevel::Info);
53#endif
54 if (this == &other)
55 {
56#ifdef DEBUG_FUSED_TENSOR
57 MyErrorHandler::log("Self-assignment detected, skipping copy.", ErrorLevel::Info);
58#endif
59 return; // Handle self-assignment
60 }
61 }
62
63 // Move constructor
64 FusedTensorND(FusedTensorND &&other) noexcept
65 : data_(move(other.data_)) // invoke move constructor of AccessPolicy
66 {
67#ifdef DEBUG_FUSED_TENSOR
68 MyErrorHandler::log("Move constructor called", ErrorLevel::Info);
69#endif
70 if (this == &other)
71 {
72#ifdef DEBUG_FUSED_TENSOR
73 MyErrorHandler::log("Self-assignment detected, skipping move.", ErrorLevel::Info);
74#endif
75 return; // Handle self-assignment
76 }
77 }
78
79 template <typename Output>
80 bool may_alias(const Output &output) const noexcept
81 {
82 // So the if constexpr is an optimization — when the compiler knows aliasing is impossible,
83 // it skips the check. When it can't know (same type), it defers to runtime.
84 if constexpr (is_same_v<remove_cvref_t<Output>, FusedTensorND>)
85 {
86 return this == &output;
87 }
88 else
89 {
90 return false;
91 }
92 }
93
94 template <typename Expr>
96 {
97#ifdef DEBUG_FUSED_TENSOR
98 MyErrorHandler::log("FusedTensorND assignment operator called", ErrorLevel::Info);
99#endif
100 const auto &e = expr.derived();
101
102 if (e.may_alias(*this))
103 {
104 MyErrorHandler::log("Aliasing detected in assignment operator", ErrorLevel::Warning);
105 }
106
107 // check if the dimensions match at compile time
108 if constexpr (NumDims != Expr::NumDims)
109 {
110 MyErrorHandler::error("Dimensions count mismatch in assignment operator");
111 }
112 if constexpr (!dims_match<NumDims>(Dim, Expr::Dim))
113 {
114 MyErrorHandler::error("Dimensions size mismatch in assignment operator");
115 }
116
118 data_.data(), e);
119
120 return *this;
121 }
122
123 // ========================================================================
124 // FusedTensorND::evalu — physical flat ONLY, K::load
125 // ========================================================================
126 // Treats flat as a PHYSICAL offset into the padded buffer.
127 // Used by the contiguous kernel path which iterates physical slices.
128 //
129 // WARNING: Do NOT pass logical flat indices to this function when
130 // padding exists (lastDim != paddedLastDim). Use logical_evalu instead.
131 template <typename T_, my_size_t Bits, typename Arch>
133 {
135 return K::load(data_.data() + flat);
136 }
137
146 template <typename T_, my_size_t Bits, typename Arch>
148 logical_evalu(my_size_t logical_flat) const noexcept
149 {
151
152 if constexpr (K::simdWidth == 1)
153 {
154 return K::load(data_.data() +
156 }
157 else
158 {
159 my_size_t idxList[K::simdWidth];
160 for (my_size_t i = 0; i < K::simdWidth; ++i)
161 idxList[i] = Layout::logical_flat_to_physical_flat(logical_flat + i);
162 return K::gather(data_.data(), idxList);
163 }
164 }
165
166 FusedTensorND &operator=(const FusedTensorND &other) noexcept
167 {
168#ifdef DEBUG_FUSED_TENSOR
169 MyErrorHandler::log("FusedTensorND copy assignment", ErrorLevel::Info);
170#endif
171 if (this == &other)
172 {
173#ifdef DEBUG_FUSED_TENSOR
174 MyErrorHandler::log("Self-assignment detected, skipping copy.", ErrorLevel::Info);
175#endif
176 return *this; // Handle self-assignment
177 }
178
179 // Copy the data
180 data_ = other.data_; // calls the copy assignment of AccessPolicy
181 return *this;
182 }
183
184 // move assignment operator
186 {
187#ifdef DEBUG_FUSED_TENSOR
188 MyErrorHandler::log("FusedTensorND move assignment", ErrorLevel::Info);
189#endif
190 if (this == &other)
191 {
192#ifdef DEBUG_FUSED_TENSOR
193 MyErrorHandler::log("Self-assignment detected, skipping move.", ErrorLevel::Info);
194#endif
195 return *this; // Handle self-assignment
196 }
197
198 // Move the data
199 data_ = move(other.data_); // calls the move assignment of AccessPolicy
200 return *this;
201 }
202
203 // Variadic access operator for accessing tensor elements with separate indices
204 template <typename... Indices>
205 requires(sizeof...(Indices) == NumDims)
206 inline T &operator()(Indices... indices) TESSERACT_CONDITIONAL_NOEXCEPT
207 {
208 my_size_t idxArray[] = {static_cast<my_size_t>(indices)...}; // Convert indices to an array
209 return data_[Layout::logical_coords_to_physical_flat(idxArray)];
210 }
211
212 // Const version of the variadic access operator
213 template <typename... Indices>
214 requires(sizeof...(Indices) == NumDims)
215 inline const T &operator()(Indices... indices) const TESSERACT_CONDITIONAL_NOEXCEPT
216 {
217 my_size_t idxArray[] = {static_cast<my_size_t>(indices)...};
218 return data_[Layout::logical_coords_to_physical_flat(idxArray)];
219 }
220
221 // version of passing a pointer to indices array eg _tensor1(indices1), indices1 is a pointer to an array of known size
223 {
224 // Unsafe — caller must guarantee NumDims elements.
225 return data_[Layout::logical_coords_to_physical_flat(indices)];
226 }
227
228 inline const T &operator()(const my_size_t *indices) const TESSERACT_CONDITIONAL_NOEXCEPT
229 {
230 // Unsafe — caller must guarantee NumDims elements.
231 return data_[Layout::logical_coords_to_physical_flat(indices)];
232 }
233
234 // version of passing a array of indices eg _tensor1(indices1), indices1 is an array of known size use template
236 {
237 return data_[Layout::logical_coords_to_physical_flat(indices)];
238 }
239
241 {
242 return data_[Layout::logical_coords_to_physical_flat(indices)];
243 }
244
245 // check if all dimensions are the same at compile time
246 static constexpr bool areDimsEqual()
247 {
248 return all_equal<Dims...>();
249 }
250
251 bool isIdentity() const
252 {
253 // Check if the tensor is "square" (hypercube). If the tensor
254 // is not square, it cannot be identity -> return false
255 if (!areDimsEqual())
256 {
257 return false;
258 }
259
260 // Calculate all indices combinations for all dimensions
261 static constexpr my_size_t total_combinations = (1 * ... * Dims); // fold expression to calculate the total number of combinations
262 my_size_t combinations[total_combinations][sizeof...(Dims)]; // 2D array to store all combinations
263 static constexpr my_size_t max_vals[sizeof...(Dims)] = {Dims...}; // array to store the maximum values for each dimension
264 generate_combinations(max_vals, combinations); // generate all combinations
265
266 for (my_size_t i = 0; i < total_combinations; ++i)
267 {
268 // itterate over all dimensions
269 // if all indices are the same, then it's a diagonal element
270 bool isElementDiagonal = true;
271 for (my_size_t j = 0; j < getNumDims(); ++j)
272 {
273 if (combinations[i][j] != combinations[i][0])
274 {
275 isElementDiagonal = false;
276 break;
277 }
278 }
279
280 if (isElementDiagonal)
281 {
282 // if the element is diagonal, check if it is equal to 1.
283 // element - 1 must be greater than the precision tolerance
284 if (std::abs((*this)(combinations[i]) - 1) > PRECISION_TOLERANCE)
285 {
286 return false;
287 }
288 }
289 else
290 {
291 // if the element is not diagonal, check if it is equal to 0.
292 // element must be less than the precision tolerance
293 if (!(std::abs((*this)(combinations[i])) < PRECISION_TOLERANCE))
294 {
295 return false;
296 }
297 }
298 }
299 return true;
300 }
301
302 // Generic transpose_view by pack
303 template <my_size_t... Perm>
304 FORCE_INLINE auto transpose_view() const noexcept
305 {
306 // static_assert to check that Permutation pack is valid are in PermutedViewConstExpr
307 return PermutedViewConstExpr<Self, Perm...>(*this);
308 }
309
310 FORCE_INLINE auto transpose_view(void) const noexcept
311 {
312 // since for 2D tenosrs the permutation of axis is known
313 // at compile time we can use PermutedViewConstExpr
314 static_assert(sizeof...(Dims) == 2, "Transpose is only supported for 2D tensors");
316 }
317
318 // FORCE_INLINE auto transpose_view(const my_size_t perm[NumDims]) const noexcept
319 // {
320 // return PermutedView<Self, NumDims>(*this, perm);
321 // }
322
323 FORCE_INLINE static constexpr my_size_t getTotalSize() noexcept
324 {
325 return TotalSize;
326 }
327
328 FORCE_INLINE static constexpr my_size_t getNumDims() noexcept
329 {
330 return NumDims;
331 }
332
333 // Utility function to retrieve the shape of the tensor as (1,5,6) for a 3D tensor use the getNumDims
334 std::string getShape() const
335 {
336 std::string shape = "(";
337 for (my_size_t i = 0; i < getNumDims(); ++i)
338 {
339 shape += std::to_string(getDim(i));
340 if (i < getNumDims() - 1)
341 shape += ",";
342 }
343 shape += ")";
344 return shape;
345 }
346
347 FusedTensorND &setToZero(void) noexcept
348 {
349 // Safe to fill entire physical buffer — padding stays 0 too
350 for (my_size_t i = 0; i < Layout::PhysicalSize; ++i)
351 data_[i] = T{};
352 return *this;
353 }
354
355 FusedTensorND &setHomogen(T _val) noexcept
356 {
358 for (my_size_t i = 0; i < Layout::PhysicalSize; ++i)
359 data_[i] = _val;
360 return *this;
361 }
362
363 FusedTensorND &setRandom(T _maxRand, T _minRand)
364 {
365 std::mt19937 rng(static_cast<unsigned int>(std::time(nullptr)));
366
367 if constexpr (std::is_floating_point<T>::value)
368 {
369 std::uniform_real_distribution<T> dist(_minRand, _maxRand);
370 for (my_size_t i = 0; i < TotalSize; ++i)
371 data_[Layout::logical_flat_to_physical_flat(i)] = dist(rng);
372 }
373 else
374 {
375 std::uniform_int_distribution<T> dist(_minRand, _maxRand);
376 for (my_size_t i = 0; i < TotalSize; ++i)
377 data_[Layout::logical_flat_to_physical_flat(i)] = dist(rng);
378 }
379 return *this;
380 }
381
382 // for all dimensions
384 {
385 static_assert(sizeof...(Dims) >= 2, "setDiagonal requires at least 2 dimensions.");
386
387 // set the entire matrix to zeros
388 setToZero();
389
390 // Calculate the minimum dimension
391 constexpr my_size_t minDim = min_value<Dims...>();
392 my_size_t indices[NumDims] = {0}; // Initialize all indices to zero
393
394 for (my_size_t i = 0; i < minDim; ++i)
395 {
396 // Set the current diagonal index for all dimensions
397 for (my_size_t d = 0; d < getNumDims(); ++d)
398 {
399 indices[d] = i; // Set the diagonal index, others to zero
400 }
401
402 // Calculate the index in the flat array and set the value
403 data_[Layout::logical_coords_to_physical_flat(indices)] = _val;
404 }
405 return *this;
406 }
407
409 {
410 static_assert(sizeof...(Dims) >= 2, "Identity requires at least 2 dimensions.");
411 static_assert(all_equal<Dims...>(), "All dimensions must be equal for an identity tensor");
412
413 this->setDiagonal(1);
414 return *this;
415 }
416
417 // static FusedTensorND I(void)
418 // {
419 // static_assert(sizeof...(Dims) >= 2, "Identity requires at least 2 dimensions.");
420
421 // static_assert(all_equal<Dims...>(), "All dimensions must be equal for an identity tensor");
422
423 // FusedTensorND<T, Dims...> _outp;
424 // _outp.setDiagonal(1);
425 // return _outp;
426 // }
427
429 {
430 // Only set logical elements — padding must stay uninitialized
431 for (my_size_t i = 0; i < TotalSize; ++i)
432 {
433 data_[Layout::logical_flat_to_physical_flat(i)] = static_cast<T>(i);
434 }
435 return *this;
436 }
437
438 template <my_size_t DiagonalSize>
439 void getDiagonalEntries(FusedTensorND<T, DiagonalSize, 1> &diagonalEntries) const // TODO: needs to be tested
440 {
441 static_assert(sizeof...(Dims) >= 2, "Getting diagonal entries requires at least 2 dimensions.");
442 // Calculate the minimum dimension
443 my_size_t minDim = std::min({Dims...}); // Using initializer list to find the minimum TODO: std::min can be replaced with by helper_trait min_value
444 my_size_t indices[getNumDims()] = {0}; // Initialize all indices to zero
445
446 for (my_size_t i = 0; i < minDim; ++i)
447 {
448 // Set the current diagonal index for all dimensions
449 for (my_size_t d = 0; d < getNumDims(); ++d)
450 {
451 indices[d] = i; // Set the diagonal index, others to zero
452 }
453
454 // Calculate the index in the flat array and set the value
455 diagonalEntries(i, 0) = data_[Layout::logical_coords_to_physical_flat(indices)];
456 }
457 }
458
481 template <typename LeftExpr, typename RightExpr>
485 const BaseExpr<LeftExpr> &_tensor1,
486 const BaseExpr<RightExpr> &_tensor2,
487 const my_size_t a,
488 const my_size_t b)
489 {
490 static constexpr my_size_t Dims1 = LeftExpr::NumDims;
491 static constexpr my_size_t Dims2 = RightExpr::NumDims;
492
493 static_assert(Dims1 >= 2, "Tensor 1 must have at least 2 dimensions");
494 static_assert(Dims2 >= 2, "Tensor 2 must have at least 2 dimensions");
495
496 // Runtime validation
497 if (a >= Dims1 || b >= Dims2)
498 MyErrorHandler::error("Invalid contraction axis");
499
500 if (_tensor1.derived().getDim(a) != _tensor2.derived().getDim(b))
501 MyErrorHandler::error("Contraction dimensions mismatch");
502
503 using Layout1 = typename LeftExpr::Layout;
504 using Layout2 = typename RightExpr::Layout;
505 using OutputLayout = Layout;
507
508 const my_size_t K_len = _tensor1.derived().getDim(a);
509 const my_size_t contract_stride1 = Layout1::stride(a);
510 const my_size_t contract_stride2 = Layout2::stride(b);
511
512 // ====================================================================
513 // Build and validate output dimensions
514 // ====================================================================
515
516 static constexpr my_size_t n_newDims = Dims1 + Dims2 - 2;
517 my_size_t out_dims[n_newDims];
518
519 my_size_t d = 0;
520 for (my_size_t i = 0; i < Dims1; ++i)
521 if (i != a)
522 out_dims[d++] = _tensor1.derived().getDim(i);
523 for (my_size_t i = 0; i < Dims2; ++i)
524 if (i != b)
525 out_dims[d++] = _tensor2.derived().getDim(i);
526
527 FusedTensorND _outp;
528 for (my_size_t i = 0; i < n_newDims; ++i)
529 {
530 if (out_dims[i] != _outp.getDim(i))
531 MyErrorHandler::error("Output dimensions mismatch");
532 }
533
534 // ====================================================================
535 // 2D GEMM path — always favorable after optional transpose
536 // ====================================================================
537
538 if constexpr (Dims1 == 2 && Dims2 == 2)
539 {
540 // Lambda: run GEMM given ready-to-go tensors and their axes
541 // A_ready has contraction on its last dim (stride 1)
542 // B_ready has free dim on its last dim (stride 1)
543 auto run_gemm = [&](const auto &A_ready, const auto &B_ready)
544 {
545 using LayoutA = typename std::remove_cvref_t<decltype(A_ready)>::Layout;
546 using LayoutB = typename std::remove_cvref_t<decltype(B_ready)>::Layout;
547
548 const my_size_t M = A_ready.getDim(0);
549 const my_size_t N = B_ready.getDim(1);
550
552 A_ready.data(), M, K_len, LayoutA::stride(0),
553 B_ready.data(), N, LayoutB::stride(0),
554 _outp.data(), OutputLayout::stride(0));
555 };
556
557 auto make_transposed = [](const auto &expr)
558 {
559 using E = remove_cvref_t<decltype(expr)>;
560 if constexpr (!requires { expr.transpose(); })
561 {
562 // FusedTensorND: materialize transpose_view
564 dst = expr.transpose_view();
565 return dst;
566 }
567 else if constexpr (expression::traits<E>::IsPermuted)
568 {
569 // Real permuted view (e.g. <1,0>): .transpose() returns the
570 // base tensor, whose physical layout IS the transposed data
571 return expr.transpose();
572 }
573 else
574 {
575 // Identity permuted view (e.g. <0,1>): .transpose() returns
576 // the base tensor with SAME dims — need to actually transpose
577 auto &base = expr.transpose();
579 dst = base.transpose_view();
580 return dst;
581 }
582 };
583
584 auto ensure_materialized = [](const auto &expr)
585 {
586 using E = remove_cvref_t<decltype(expr)>;
587 if constexpr (!requires { expr.transpose(); })
588 {
589 // FusedTensorND: physical layout already correct
590 return expr;
591 }
592 else if constexpr (!expression::traits<E>::IsPermuted)
593 {
594 // Identity view: base tensor has matching physical layout
595 return expr.transpose();
596 }
597 else
598 {
599 // Real permuted view: physical layout doesn't match logical dims
601 dst = expr;
602 return dst;
603 }
604 };
605
606 if (a == 1 && b == 0)
607 {
608 if constexpr (requires { _tensor1.derived().transpose(); } || requires { _tensor2.derived().transpose(); })
609 {
610 // At least one input is a PermutedViewConstExpr —
611 // materialize to fix physical layout
612 run_gemm(ensure_materialized(_tensor1.derived()),
613 ensure_materialized(_tensor2.derived()));
614 }
615 else
616 {
617 // Both are FusedTensorND — physical layout guaranteed favorable
618 run_gemm(_tensor1.derived(), _tensor2.derived());
619 }
620 }
621 else if (a == 0 && b == 0)
622 {
623 // std::cout << "Running GEMM with A transposed\n";
624 auto A_t = make_transposed(_tensor1.derived());
625 run_gemm(A_t, _tensor2.derived());
626 }
627 else if (a == 1 && b == 1)
628 {
629 // std::cout << "Running GEMM with B transposed\n";
630 auto B_t = make_transposed(_tensor2.derived());
631 run_gemm(_tensor1.derived(), B_t);
632 }
633 else
634 {
635 // std::cout << "Running GEMM with both A and B transposed\n";
636 auto A_t = make_transposed(_tensor1.derived());
637 auto B_t = make_transposed(_tensor2.derived());
638 run_gemm(A_t, B_t);
639 }
640
641 return _outp;
642 }
643
644 // ====================================================================
645 // Generic fallback: stride-mapped per-element dot products
646 // ====================================================================
647
648 my_size_t strides1_map[n_newDims];
649 my_size_t strides2_map[n_newDims];
650
651 d = 0;
652 for (my_size_t i = 0; i < Dims1; ++i)
653 {
654 if (i != a)
655 {
656 strides1_map[d] = Layout1::stride(i);
657 strides2_map[d] = 0;
658 ++d;
659 }
660 }
661 for (my_size_t i = 0; i < Dims2; ++i)
662 {
663 if (i != b)
664 {
665 strides1_map[d] = 0;
666 strides2_map[d] = Layout2::stride(i);
667 ++d;
668 }
669 }
670
671 my_size_t out_strides[n_newDims];
672 for (my_size_t i = 0; i < n_newDims; ++i)
673 out_strides[i] = OutputLayout::stride(i);
674
675 T *out_ptr = _outp.data();
676
677 static constexpr my_size_t total_elements = (1 * ... * Dims);
678
679 for (my_size_t flat = 0; flat < total_elements; ++flat)
680 {
681 my_size_t coords[n_newDims];
682 my_size_t tmp = flat;
683 for (my_size_t i = n_newDims; i-- > 0;)
684 {
685 coords[i] = tmp % out_dims[i];
686 tmp /= out_dims[i];
687 }
688
689 my_size_t base1 = 0;
690 my_size_t base2 = 0;
691 my_size_t out_phys = 0;
692 for (my_size_t i = 0; i < n_newDims; ++i)
693 {
694 base1 += coords[i] * strides1_map[i];
695 base2 += coords[i] * strides2_map[i];
696 out_phys += coords[i] * out_strides[i];
697 }
698
699 out_ptr[out_phys] = Kern::dot(
700 _tensor1.derived(), base1, contract_stride1,
701 _tensor2.derived(), base2, contract_stride2,
702 K_len);
703 }
704
705 return _outp;
706 }
707
708 // Function to print the contents of the tensor
709 void print(bool with_padding = false) const
710 {
711 printND(with_padding);
712 }
713
724 void printND(bool showPadding = false) const
725 {
726 static constexpr my_size_t ND = Layout::NumDims;
727
728 my_size_t coords[ND] = {};
729
730 // Number of 2D slices = product of dims 0..ND-3
731 my_size_t numSlices = 1;
732 for (my_size_t d = 0; d + 2 < ND; ++d)
733 numSlices *= getDim(d);
734
735 const my_size_t rowDim = (ND >= 2) ? getDim(ND - 2) : 1;
736 const my_size_t colDim = getDim(ND - 1);
737 const my_size_t physColDim = Layout::PadPolicyType::PhysicalDims.at(ND - 1);
738
739 for (my_size_t s = 0; s < numSlices; ++s)
740 {
741 // Print slice header for 3D+
742 if constexpr (ND > 2)
743 {
744 MyErrorHandler::log("Slice [");
745 for (my_size_t d = 0; d + 2 < ND; ++d)
746 {
747 if (d > 0)
749 MyErrorHandler::log(coords[d]);
750 }
751 MyErrorHandler::log("]:\n");
752 }
753
754 // Print 2D matrix
755 for (my_size_t i = 0; i < rowDim; ++i)
756 {
757 if constexpr (ND >= 2)
758 coords[ND - 2] = i;
759
760 // Logical elements
761 for (my_size_t j = 0; j < colDim; ++j)
762 {
763 coords[ND - 1] = j;
765 MyErrorHandler::log(data_[offset]);
767 }
768
769 // Padding elements
770 if (showPadding && physColDim > colDim)
771 {
773 // Get base offset for this row (col = 0)
774 coords[ND - 1] = 0;
776 // Last dim has stride 1, so padding is at rowBase + colDim..physColDim-1
777 for (my_size_t j = colDim; j < physColDim; ++j)
778 {
779 MyErrorHandler::log(data_[rowBase + j]);
781 }
782 }
783
785 }
787
788 // Increment outer coordinates (odometer, right-to-left over dims 0..ND-3)
789 if constexpr (ND > 2)
790 {
791 for (my_size_t d = ND - 3;; --d)
792 {
793 coords[d]++;
794 if (coords[d] < getDim(d))
795 break;
796 coords[d] = 0;
797 if (d == 0)
798 break;
799 }
800 }
801 }
802 }
803
808
813
814 // print layout info
815 void printLayoutInfo() const
816 {
817 MyErrorHandler::log("Tensor Layout Info:", ErrorLevel::Info);
818 MyErrorHandler::log("Number of Dimensions: " + std::to_string(NumDims), ErrorLevel::Info);
821 for (my_size_t i = 0; i < NumDims; ++i)
822 MyErrorHandler::log(std::to_string(getStride(i)) + " ", ErrorLevel::Info);
824 }
825
827 {
828 MyErrorHandler::log("Access Policy Info:", ErrorLevel::Info);
830 MyErrorHandler::log("Logical Size: " + std::to_string(AccessPolicy::PadPolicy::LogicalSize), ErrorLevel::Info);
831 MyErrorHandler::log("SIMD Width: " + std::to_string(AccessPolicy::PadPolicy::SimdWidth), ErrorLevel::Info);
833 }
834
835 void print_flat_data() const
836 {
838 for (my_size_t i = 0; i < AccessPolicy::PhysicalSize; ++i)
839 {
840 MyErrorHandler::log(std::to_string(data_[i]) + " ", ErrorLevel::Info);
841 }
843 }
844
845private:
846 // Example of using different access and storage policies
847 using AccessPolicy = DenseAccess<T, SimdPaddingPolicy, StaticStorage, Dims...>;
848 // using AccessPolicy = DenseAccess<T, NoPaddingPolicy, StaticStorage, Dims...>; // works only for GENERICARCH
849
850 // using AccessPolicy = DenseAccess<T, SimdPaddingPolicy, DynamicStorage, Dims...>; // works
851 // using AccessPolicy = DenseAccess<T, NoPaddingPolicy, DynamicStorage, Dims...>; // bad alloc
852
853 // using AccessPolicy = SparseAccess<T, TotalSize, my_size_t, DynamicStorage, DynamicStorage>; // something is wrong here
854 // using AccessPolicy = SparseAccess<T, TotalSize, my_size_t, StaticStorage, StaticStorage>; // something is wrong here
855 // using AccessPolicy = SparseAccess<T, TotalSize, my_size_t>; // default is static storage // something is wrong here
856 AccessPolicy data_;
857
858 template <my_size_t... Dims1>
859 FORCE_INLINE void checkDimensionsMismatch(const FusedTensorND<T, Dims1...> &other) const // TODO: conditionally noexcept
860 {
861 // check if the dimensions of the tensors are the same taking into account the transpose order
862 for (my_size_t i = 0; i < getNumDims(); ++i)
863 {
864 if (this->getDim(i) != other.getDim(i))
865 {
866 MyErrorHandler::error("Dimensions mismatch");
867 }
868 }
869 }
870
871 template <my_size_t NumDims, my_size_t M>
872 [[deprecated]] static void print_combinations(const my_size_t (&combinations)[M][NumDims])
873 {
874 for (my_size_t i = 0; i < M; ++i)
875 {
877 for (my_size_t j = 0; j < NumDims; ++j)
878 {
879 MyErrorHandler::log(combinations[i][j]);
880 MyErrorHandler::log(j < NumDims - 1 ? ", " : " ");
881 }
882 MyErrorHandler::log("}\n");
883 }
884 }
885
886 // Template function to generate all combinations and store them in a 2D array
887 template <my_size_t NumDims, my_size_t M>
888 [[deprecated]] static void generate_combinations(const my_size_t (&max_values)[NumDims], my_size_t (&combinations)[M][NumDims])
889 {
890 my_size_t combination[NumDims] = {0}; // Initialize the first combination with all 0s
891
892 // Fill each row in `combinations` with the next combination
893 for (my_size_t row = 0; row < M; ++row)
894 {
895 for (my_size_t i = 0; i < NumDims; ++i)
896 {
897 combinations[row][i] = combination[i];
898 }
899
900 // print the combination
901 // here you can calculate the contraction of the tensor
902 // if you don't want to store all the combinations
903 // you can calculate the contraction here
904 // for now comment this print statement
905 // for (my_size_t i = 0; i < NumDims; ++i)
906 // {
907 // std::cout << combination[i] << ", ";
908 // }
909 // std::cout << std::endl;
910
911 // Increment combination like a counter with custom max values
912 int position = NumDims - 1; // TODO: do not use int. Make the loop safe -> to not overflow
913 while (position >= 0)
914 {
915 ++combination[position];
916 if (combination[position] < max_values[position])
917 {
918 break;
919 }
920 combination[position] = 0;
921 --position;
922 }
923 }
924 }
925
926 // 1D print function
927 [[deprecated]] void print1D() const
928 {
929 for (my_size_t i = 0; i < getDim(0); ++i)
930 {
931 MyErrorHandler::log((*this)(i));
933 }
935 }
936
937 // 2D print function
938 [[deprecated]] void print2D(bool with_padding) const
939 {
940 const my_size_t rows = getDim(0);
941 const my_size_t cols = with_padding
942 ? AccessPolicy::PadPolicy::PhysicalDims[1]
943 : getDim(1);
944
945 for (my_size_t i = 0; i < rows; ++i)
946 {
947 for (my_size_t j = 0; j < cols; ++j)
948 {
949 if (!with_padding)
950 {
951 MyErrorHandler::log((*this)(i, j));
952 }
953 else
954 {
955 // Direct physical access: row * physical_stride + col
956 MyErrorHandler::log(data_[Layout::base_stride(0) * i + j]);
957 }
958
959 if (with_padding && j == getDim(1) - 1)
960 MyErrorHandler::log(" |"); // visual separator before padding
962 }
964 }
965 }
966
967 // 3D print function
968 [[deprecated]] void print3D() const
969 {
970 for (my_size_t s = 0; s < getDim(0); ++s)
971 {
972 for (my_size_t i = 0; i < getDim(1); ++i)
973 {
974 for (my_size_t j = 0; j < getDim(2); ++j)
975 {
976 MyErrorHandler::log((*this)(s, i, j));
978 }
980 }
982 }
983 }
984
985 [[deprecated]] void print4D() const
986 {
987 for (my_size_t b = 0; b < getDim(0); ++b)
988 {
989 MyErrorHandler::log("Batch [");
991 MyErrorHandler::log("]:\n");
992 for (my_size_t s = 0; s < getDim(1); ++s)
993 {
994 MyErrorHandler::log(" Slice [");
996 MyErrorHandler::log("]:\n");
997 for (my_size_t i = 0; i < getDim(2); ++i)
998 {
999 MyErrorHandler::log(" [ ");
1000 for (my_size_t j = 0; j < getDim(3); ++j)
1001 {
1002 MyErrorHandler::log(operator()(b, s, i, j));
1004 }
1005 MyErrorHandler::log("]\n");
1006 }
1007 }
1008 MyErrorHandler::log("\n");
1009 }
1010 }
1011
1012 // template <typename, my_size_t>
1013 // friend class PermutedView;
1014
1015 template <typename, my_size_t...>
1017
1018public:
1019 FORCE_INLINE constexpr const T *data() const noexcept { return data_.data(); }
1020 FORCE_INLINE constexpr T *data() noexcept { return data_.data(); }
1021
1023};
1024
1025#endif // FUSEDTENSORND_H
Definition BaseExpr.h:15
const Derived & derived() const
Definition BaseExpr.h:17
Dense storage access with padding policy.
Definition dense_access.h:20
static constexpr my_size_t PhysicalSize
Definition dense_access.h:24
FORCE_INLINE constexpr T * data() noexcept
Definition dense_access.h:52
static void log(const T &msg, ErrorLevel level=ErrorLevel::Plain)
Definition error_handler.h:18
static void error(const T &msg)
Definition error_handler.h:30
Definition fused_tensor.h:31
FusedTensorND & setSequencial(void)
Definition fused_tensor.h:428
void print_access_policy_info() const
Definition fused_tensor.h:826
T & operator()(my_size_t(&indices)[NumDims]) TESSERACT_CONDITIONAL_NOEXCEPT
Definition fused_tensor.h:235
const T & operator()(my_size_t(&indices)[NumDims]) const TESSERACT_CONDITIONAL_NOEXCEPT
Definition fused_tensor.h:240
void getDiagonalEntries(FusedTensorND< T, DiagonalSize, 1 > &diagonalEntries) const
Definition fused_tensor.h:439
FusedTensorND(FusedTensorND &&other) noexcept
Definition fused_tensor.h:64
FusedTensorND & setIdentity(void)
Definition fused_tensor.h:408
FORCE_INLINE auto transpose_view() const noexcept
Definition fused_tensor.h:304
FusedTensorND & setDiagonal(T _val)
Definition fused_tensor.h:383
FORCE_INLINE auto transpose_view(void) const noexcept
Definition fused_tensor.h:310
FusedTensorND & setToZero(void) noexcept
Definition fused_tensor.h:347
StridedLayoutConstExpr< typename AccessPolicy::PadPolicy > Layout
Definition fused_tensor.h:1022
FusedTensorND< T, Dims... > Self
Definition fused_tensor.h:38
Microkernel< T_, Bits, Arch >::VecType evalu(my_size_t flat) const noexcept
Definition fused_tensor.h:132
static FORCE_INLINE constexpr my_size_t getNumDims() noexcept
Definition fused_tensor.h:328
std::string getShape() const
Definition fused_tensor.h:334
FusedTensorND(const FusedTensorND &other) noexcept
Definition fused_tensor.h:48
FusedTensorND & setHomogen(T _val) noexcept
Definition fused_tensor.h:355
FusedTensorND & operator=(const BaseExpr< Expr > &expr)
Definition fused_tensor.h:95
const T & operator()(const my_size_t *indices) const TESSERACT_CONDITIONAL_NOEXCEPT
Definition fused_tensor.h:228
static constexpr bool areDimsEqual()
Definition fused_tensor.h:246
FusedTensorND & operator=(const FusedTensorND &other) noexcept
Definition fused_tensor.h:166
void printND(bool showPadding=false) const
Print tensor of arbitrary dimensions.
Definition fused_tensor.h:724
static constexpr my_size_t Dim[]
Definition fused_tensor.h:35
FORCE_INLINE constexpr T * data() noexcept
Definition fused_tensor.h:1020
bool isIdentity() const
Definition fused_tensor.h:251
static constexpr my_size_t NumDims
Definition fused_tensor.h:34
T & operator()(const my_size_t *indices) TESSERACT_CONDITIONAL_NOEXCEPT
Definition fused_tensor.h:222
static FORCE_INLINE constexpr my_size_t getDim(my_size_t i) TESSERACT_CONDITIONAL_NOEXCEPT
Definition fused_tensor.h:804
static FORCE_INLINE constexpr my_size_t getStride(my_size_t i) TESSERACT_CONDITIONAL_NOEXCEPT
Definition fused_tensor.h:809
void print(bool with_padding=false) const
Definition fused_tensor.h:709
FusedTensorND() noexcept=default
T value_type
Definition fused_tensor.h:37
FORCE_INLINE Microkernel< T_, Bits, Arch >::VecType logical_evalu(my_size_t logical_flat) const noexcept
Evaluate at a LOGICAL flat index.
Definition fused_tensor.h:148
static constexpr my_size_t TotalSize
Definition fused_tensor.h:36
FusedTensorND & operator=(FusedTensorND &&other) noexcept
Definition fused_tensor.h:185
static FORCE_INLINE constexpr my_size_t getTotalSize() noexcept
Definition fused_tensor.h:323
FusedTensorND & setRandom(T _maxRand, T _minRand)
Definition fused_tensor.h:363
bool may_alias(const Output &output) const noexcept
Definition fused_tensor.h:80
void printLayoutInfo() const
Definition fused_tensor.h:815
FORCE_INLINE constexpr const T * data() const noexcept
Definition fused_tensor.h:1019
static FusedTensorND einsum(const BaseExpr< LeftExpr > &_tensor1, const BaseExpr< RightExpr > &_tensor2, const my_size_t a, const my_size_t b)
Contract two tensors along specified axes using SIMD dot products.
Definition fused_tensor.h:484
void print_flat_data() const
Definition fused_tensor.h:835
friend class PermutedViewConstExpr
Definition fused_tensor.h:1016
Compile-time permuted view over a tensor.
Definition permuted_view_constexpr.h:36
Definition static_storage.h:9
Global configuration for the tesseract tensor library.
#define my_size_t
Size/index type used throughout the library.
Definition config.h:126
#define TESSERACT_CONDITIONAL_NOEXCEPT
Definition config.h:56
#define PRECISION_TOLERANCE
Tolerance for floating-point comparisons (e.g. symmetry checks, Cholesky).
Definition config.h:117
#define FORCE_INLINE
Hint the compiler to always inline a function.
Definition config.h:26
consteval bool all_equal()
Check if all values in a parameter pack are equal.
Definition helper_traits.h:20
consteval my_size_t min_value()
Compile-time minimum of a non-type parameter pack.
Definition helper_traits.h:68
Façade for higher-level kernel operations built on top of microkernels.
STL-free memory utilities.
SimdPaddingPolicyBase< T, Microkernel< T, BITS, DefaultArch >::simdWidth, Dims... > SimdPaddingPolicy
Definition simd_padding_policy.h:349
typename remove_cvref< T >::type remove_cvref_t
Alias template for remove_cvref.
Definition simple_type_traits.h:169
constexpr remove_reference_t< T > && move(T &&t) noexcept
Cast to rvalue reference (replacement for std::move).
Definition simple_type_traits.h:178
Definition kernel_ops.h:28
static FORCE_INLINE void eval(T *output, const Expr &expr) noexcept
Evaluation: Dispatch: pick contiguous or permuted eval based on expression layout.
Definition kernel_ops.h:41
Definition microkernel_base.h:16
T VecType
Definition microkernel_base.h:18
Compile-time strided layout with optional permutation.
Definition strided_layout_constexpr.h:38
static constexpr my_size_t NumDims
Definition strided_layout_constexpr.h:39
static constexpr my_size_t PhysicalSize
Definition strided_layout_constexpr.h:41
static FORCE_INLINE constexpr my_size_t logical_flat_to_physical_flat(my_size_t logical_flat) TESSERACT_CONDITIONAL_NOEXCEPT
Definition strided_layout_constexpr.h:290
static FORCE_INLINE constexpr my_size_t stride(my_size_t i) TESSERACT_CONDITIONAL_NOEXCEPT
Get physical stride at dimension i (with permutation applied).
Definition strided_layout_constexpr.h:263
static FORCE_INLINE constexpr my_size_t base_stride(my_size_t i) TESSERACT_CONDITIONAL_NOEXCEPT
Get base stride at dimension i (unpermuted, for physical decomposition).
Definition strided_layout_constexpr.h:251
static FORCE_INLINE constexpr my_size_t logical_coords_to_physical_flat(const my_size_t(&indices)[NumDims]) TESSERACT_CONDITIONAL_NOEXCEPT
Logical coordinates (Array multi-index) to physical flat index (bounds-checked).
Definition strided_layout_constexpr.h:328
static FORCE_INLINE constexpr my_size_t logical_dim(my_size_t i) TESSERACT_CONDITIONAL_NOEXCEPT
Get logical dimension at index i (with permutation applied).
Definition strided_layout_constexpr.h:239
static void gemm(const T *A, my_size_t M, my_size_t K_len, my_size_t strideA, const T *B, my_size_t N, my_size_t strideB, T *C, my_size_t strideC) noexcept
Register-blocked GEMM: C[M,N] = A[M,K] × B[K,N].
Definition kernel_gemm.h:147
Definition basic_expr_traits.h:6