11#ifndef EIGEN_MATRIX_PRODUCT_ALTIVEC_H
12#define EIGEN_MATRIX_PRODUCT_ALTIVEC_H
14#ifndef EIGEN_ALTIVEC_USE_CUSTOM_PACK
15#define EIGEN_ALTIVEC_USE_CUSTOM_PACK 1
18#include "MatrixProductCommon.h"
22#if !defined(EIGEN_ALTIVEC_DISABLE_MMA) && !defined(EIGEN_ALTIVEC_MMA_ONLY)
24#define EIGEN_ALTIVEC_MMA_ONLY
26#define EIGEN_ALTIVEC_DISABLE_MMA
32#if __has_builtin(__builtin_mma_assemble_acc)
33 #define ALTIVEC_MMA_SUPPORT
37#if defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
38 #include "MatrixProductMMA.h"
53template<
typename Scalar>
56 typedef typename packet_traits<Scalar>::type vectortype;
57 typedef PacketBlock<vectortype,4> type;
58 typedef vectortype rhstype;
61 vectorsize = packet_traits<Scalar>::size,
68struct quad_traits<double>
70 typedef Packet2d vectortype;
71 typedef PacketBlock<vectortype,4> type;
72 typedef PacketBlock<Packet2d,2> rhstype;
75 vectorsize = packet_traits<double>::size,
85const static Packet16uc p16uc_GETREAL32 = { 0, 1, 2, 3,
90const static Packet16uc p16uc_GETIMAG32 = { 4, 5, 6, 7,
94const static Packet16uc p16uc_GETREAL64 = { 0, 1, 2, 3, 4, 5, 6, 7,
95 16, 17, 18, 19, 20, 21, 22, 23};
98const static Packet16uc p16uc_GETIMAG64 = { 8, 9, 10, 11, 12, 13, 14, 15,
99 24, 25, 26, 27, 28, 29, 30, 31};
119template<
typename Scalar,
typename Index,
int StorageOrder>
120EIGEN_ALWAYS_INLINE std::complex<Scalar> getAdjointVal(
Index i,
Index j, const_blas_data_mapper<std::complex<Scalar>,
Index, StorageOrder>& dt)
122 std::complex<Scalar> v;
125 v.real( dt(j,i).
real());
126 v.imag(-dt(j,i).
imag());
129 v.real( dt(i,j).
real());
130 v.imag( dt(i,j).
imag());
132 v.real( dt(i,j).
real());
138template<
typename Scalar,
typename Index,
int StorageOrder,
int N>
139EIGEN_STRONG_INLINE
void symm_pack_complex_rhs_helper(std::complex<Scalar>* blockB,
const std::complex<Scalar>* _rhs,
Index rhsStride,
Index rows,
Index cols,
Index k2)
141 const Index depth = k2 + rows;
142 const_blas_data_mapper<std::complex<Scalar>,
Index, StorageOrder> rhs(_rhs, rhsStride);
143 const Index vectorSize = N*quad_traits<Scalar>::vectorsize;
144 const Index vectorDelta = vectorSize * rows;
145 Scalar* blockBf =
reinterpret_cast<Scalar *
>(blockB);
147 Index rir = 0, rii, j = 0;
148 for(; j + vectorSize <= cols; j+=vectorSize)
150 rii = rir + vectorDelta;
152 for(
Index i = k2; i < depth; i++)
154 for(
Index k = 0; k < vectorSize; k++)
156 std::complex<Scalar> v = getAdjointVal<Scalar, Index, StorageOrder>(i, j + k, rhs);
158 blockBf[rir + k] = v.real();
159 blockBf[rii + k] = v.imag();
172 for(
Index i = k2; i < depth; i++)
174 std::complex<Scalar> v = getAdjointVal<Scalar, Index, StorageOrder>(i, j, rhs);
176 blockBf[rir] = v.real();
177 blockBf[rii] = v.imag();
187template<
typename Scalar,
typename Index,
int StorageOrder>
188EIGEN_STRONG_INLINE
void symm_pack_complex_lhs_helper(std::complex<Scalar>* blockA,
const std::complex<Scalar>* _lhs,
Index lhsStride,
Index cols,
Index rows)
190 const Index depth = cols;
191 const_blas_data_mapper<std::complex<Scalar>,
Index, StorageOrder> lhs(_lhs, lhsStride);
192 const Index vectorSize = quad_traits<Scalar>::vectorsize;
193 const Index vectorDelta = vectorSize * depth;
194 Scalar* blockAf = (Scalar *)(blockA);
196 Index rir = 0, rii, j = 0;
197 for(; j + vectorSize <= rows; j+=vectorSize)
199 rii = rir + vectorDelta;
201 for(
Index i = 0; i < depth; i++)
203 for(
Index k = 0; k < vectorSize; k++)
205 std::complex<Scalar> v = getAdjointVal<Scalar, Index, StorageOrder>(j+k, i, lhs);
207 blockAf[rir + k] = v.real();
208 blockAf[rii + k] = v.imag();
219 rii = rir + ((rows - j) * depth);
221 for(
Index i = 0; i < depth; i++)
226 std::complex<Scalar> v = getAdjointVal<Scalar, Index, StorageOrder>(k, i, lhs);
228 blockAf[rir] = v.real();
229 blockAf[rii] = v.imag();
238template<
typename Scalar,
typename Index,
int StorageOrder,
int N>
239EIGEN_STRONG_INLINE
void symm_pack_rhs_helper(Scalar* blockB,
const Scalar* _rhs,
Index rhsStride,
Index rows,
Index cols,
Index k2)
241 const Index depth = k2 + rows;
242 const_blas_data_mapper<Scalar, Index, StorageOrder> rhs(_rhs, rhsStride);
243 const Index vectorSize = quad_traits<Scalar>::vectorsize;
246 for(; j + N*vectorSize <= cols; j+=N*vectorSize)
249 for(; i < depth; i++)
251 for(
Index k = 0; k < N*vectorSize; k++)
254 blockB[ri + k] = rhs(j+k, i);
256 blockB[ri + k] = rhs(i, j+k);
264 for(
Index i = k2; i < depth; i++)
267 blockB[ri] = rhs(i, j);
269 blockB[ri] = rhs(j, i);
275template<
typename Scalar,
typename Index,
int StorageOrder>
276EIGEN_STRONG_INLINE
void symm_pack_lhs_helper(Scalar* blockA,
const Scalar* _lhs,
Index lhsStride,
Index cols,
Index rows)
278 const Index depth = cols;
279 const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs, lhsStride);
280 const Index vectorSize = quad_traits<Scalar>::vectorsize;
283 for(; j + vectorSize <= rows; j+=vectorSize)
287 for(; i < depth; i++)
289 for(
Index k = 0; k < vectorSize; k++)
292 blockA[ri + k] = lhs(j+k, i);
294 blockA[ri + k] = lhs(i, j+k);
302 for(
Index i = 0; i < depth; i++)
308 blockA[ri] = lhs(k, i);
310 blockA[ri] = lhs(i, k);
317template<
typename Index,
int nr,
int StorageOrder>
318struct symm_pack_rhs<std::complex<float>,
Index, nr, StorageOrder>
320 void operator()(std::complex<float>* blockB,
const std::complex<float>* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
322 symm_pack_complex_rhs_helper<float, Index, StorageOrder, 1>(blockB, _rhs, rhsStride, rows, cols, k2);
326template<
typename Index,
int Pack1,
int Pack2_dummy,
int StorageOrder>
327struct symm_pack_lhs<std::complex<float>,
Index, Pack1, Pack2_dummy, StorageOrder>
329 void operator()(std::complex<float>* blockA,
const std::complex<float>* _lhs, Index lhsStride, Index cols, Index rows)
331 symm_pack_complex_lhs_helper<float, Index, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);
337template<
typename Index,
int nr,
int StorageOrder>
338struct symm_pack_rhs<std::complex<double>,
Index, nr, StorageOrder>
340 void operator()(std::complex<double>* blockB,
const std::complex<double>* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
342 symm_pack_complex_rhs_helper<double, Index, StorageOrder, 2>(blockB, _rhs, rhsStride, rows, cols, k2);
346template<
typename Index,
int Pack1,
int Pack2_dummy,
int StorageOrder>
347struct symm_pack_lhs<std::complex<double>,
Index, Pack1, Pack2_dummy, StorageOrder>
349 void operator()(std::complex<double>* blockA,
const std::complex<double>* _lhs, Index lhsStride, Index cols, Index rows)
351 symm_pack_complex_lhs_helper<double, Index, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);
356template<
typename Index,
int nr,
int StorageOrder>
357struct symm_pack_rhs<float,
Index, nr, StorageOrder>
359 void operator()(
float* blockB,
const float* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
361 symm_pack_rhs_helper<float, Index, StorageOrder, 1>(blockB, _rhs, rhsStride, rows, cols, k2);
365template<
typename Index,
int Pack1,
int Pack2_dummy,
int StorageOrder>
366struct symm_pack_lhs<float,
Index, Pack1, Pack2_dummy, StorageOrder>
368 void operator()(
float* blockA,
const float* _lhs, Index lhsStride, Index cols, Index rows)
370 symm_pack_lhs_helper<float, Index, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);
375template<
typename Index,
int nr,
int StorageOrder>
376struct symm_pack_rhs<double,
Index, nr, StorageOrder>
378 void operator()(
double* blockB,
const double* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
380 symm_pack_rhs_helper<double, Index, StorageOrder, 2>(blockB, _rhs, rhsStride, rows, cols, k2);
384template<
typename Index,
int Pack1,
int Pack2_dummy,
int StorageOrder>
385struct symm_pack_lhs<double,
Index, Pack1, Pack2_dummy, StorageOrder>
387 void operator()(
double* blockA,
const double* _lhs, Index lhsStride, Index cols, Index rows)
389 symm_pack_lhs_helper<double, Index, StorageOrder>(blockA, _lhs, lhsStride, cols, rows);
404template<
typename Scalar,
typename Packet,
typename Index,
int N>
405EIGEN_ALWAYS_INLINE
void storeBlock(Scalar* to, PacketBlock<Packet,N>& block)
407 const Index size = 16 /
sizeof(Scalar);
408 pstore<Scalar>(to + (0 * size), block.packet[0]);
409 pstore<Scalar>(to + (1 * size), block.packet[1]);
411 pstore<Scalar>(to + (2 * size), block.packet[2]);
414 pstore<Scalar>(to + (3 * size), block.packet[3]);
419template<
typename Scalar,
typename Index,
typename DataMapper,
typename Packet,
typename PacketC,
int StorageOrder,
bool Conjugate,
bool PanelMode,
bool UseLhs>
421 EIGEN_STRONG_INLINE
void operator()(std::complex<Scalar>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
423 const Index vectorSize = quad_traits<Scalar>::vectorsize;
424 const Index vectorDelta = vectorSize * ((PanelMode) ? stride : depth);
425 Index rir = ((PanelMode) ? (vectorSize*offset) : 0), rii;
426 Scalar* blockAt =
reinterpret_cast<Scalar *
>(blockA);
429 for(; j + vectorSize <= rows; j+=vectorSize)
433 rii = rir + vectorDelta;
435 for(; i + vectorSize <= depth; i+=vectorSize)
437 PacketBlock<Packet,4> blockr, blocki;
438 PacketBlock<PacketC,8> cblock;
441 bload<DataMapper, PacketC, Index, 2, StorageOrder, true, 4>(cblock, lhs, j, i);
443 bload<DataMapper, PacketC, Index, 2, StorageOrder, true, 4>(cblock, lhs, i, j);
446 blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[4].v, p16uc_GETREAL32);
447 blockr.packet[1] = vec_perm(cblock.packet[1].v, cblock.packet[5].v, p16uc_GETREAL32);
448 blockr.packet[2] = vec_perm(cblock.packet[2].v, cblock.packet[6].v, p16uc_GETREAL32);
449 blockr.packet[3] = vec_perm(cblock.packet[3].v, cblock.packet[7].v, p16uc_GETREAL32);
451 blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[4].v, p16uc_GETIMAG32);
452 blocki.packet[1] = vec_perm(cblock.packet[1].v, cblock.packet[5].v, p16uc_GETIMAG32);
453 blocki.packet[2] = vec_perm(cblock.packet[2].v, cblock.packet[6].v, p16uc_GETIMAG32);
454 blocki.packet[3] = vec_perm(cblock.packet[3].v, cblock.packet[7].v, p16uc_GETIMAG32);
458 blocki.packet[0] = -blocki.packet[0];
459 blocki.packet[1] = -blocki.packet[1];
460 blocki.packet[2] = -blocki.packet[2];
461 blocki.packet[3] = -blocki.packet[3];
464 if(((StorageOrder ==
RowMajor) && UseLhs) || (((StorageOrder ==
ColMajor) && !UseLhs)))
470 storeBlock<Scalar, Packet, Index, 4>(blockAt + rir, blockr);
471 storeBlock<Scalar, Packet, Index, 4>(blockAt + rii, blocki);
476 for(; i < depth; i++)
478 PacketBlock<Packet,1> blockr, blocki;
479 PacketBlock<PacketC,2> cblock;
481 if(((StorageOrder ==
ColMajor) && UseLhs) || (((StorageOrder ==
RowMajor) && !UseLhs)))
484 cblock.packet[0] = lhs.template loadPacket<PacketC>(j + 0, i);
485 cblock.packet[1] = lhs.template loadPacket<PacketC>(j + 2, i);
487 cblock.packet[0] = lhs.template loadPacket<PacketC>(i, j + 0);
488 cblock.packet[1] = lhs.template loadPacket<PacketC>(i, j + 2);
492 cblock.packet[0] = pload2(lhs(j + 0, i), lhs(j + 1, i));
493 cblock.packet[1] = pload2(lhs(j + 2, i), lhs(j + 3, i));
495 cblock.packet[0] = pload2(lhs(i, j + 0), lhs(i, j + 1));
496 cblock.packet[1] = pload2(lhs(i, j + 2), lhs(i, j + 3));
500 blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETREAL32);
501 blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETIMAG32);
505 blocki.packet[0] = -blocki.packet[0];
508 pstore<Scalar>(blockAt + rir, blockr.packet[0]);
509 pstore<Scalar>(blockAt + rii, blocki.packet[0]);
515 rir += ((PanelMode) ? (vectorSize*(2*stride - depth)) : vectorDelta);
520 if(PanelMode) rir -= (offset*(vectorSize - 1));
524 rii = rir + ((PanelMode) ? stride : depth);
526 for(
Index i = 0; i < depth; i++)
528 blockAt[rir] = lhs(i, j).real();
531 blockAt[rii] = -lhs(i, j).imag();
533 blockAt[rii] = lhs(i, j).imag();
539 rir += ((PanelMode) ? (2*stride - depth) : depth);
544 if(PanelMode) rir += (offset*(rows - j - vectorSize));
545 rii = rir + (((PanelMode) ? stride : depth) * (rows - j));
547 for(
Index i = 0; i < depth; i++)
552 blockAt[rir] = lhs(k, i).real();
555 blockAt[rii] = -lhs(k, i).imag();
557 blockAt[rii] = lhs(k, i).imag();
569template<
typename Scalar,
typename Index,
typename DataMapper,
typename Packet,
int StorageOrder,
bool PanelMode,
bool UseLhs>
571 EIGEN_STRONG_INLINE
void operator()(Scalar* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
573 const Index vectorSize = quad_traits<Scalar>::vectorsize;
576 for(; j + vectorSize <= rows; j+=vectorSize)
580 if(PanelMode) ri += vectorSize*offset;
582 for(; i + vectorSize <= depth; i+=vectorSize)
584 PacketBlock<Packet,4> block;
587 bload<DataMapper, Packet, Index, 4, StorageOrder, false, 4>(block, lhs, j, i);
589 bload<DataMapper, Packet, Index, 4, StorageOrder, false, 4>(block, lhs, i, j);
591 if(((StorageOrder ==
RowMajor) && UseLhs) || ((StorageOrder ==
ColMajor) && !UseLhs))
596 storeBlock<Scalar, Packet, Index, 4>(blockA + ri, block);
600 for(; i < depth; i++)
602 if(((StorageOrder ==
RowMajor) && UseLhs) || ((StorageOrder ==
ColMajor) && !UseLhs))
605 blockA[ri+0] = lhs(j+0, i);
606 blockA[ri+1] = lhs(j+1, i);
607 blockA[ri+2] = lhs(j+2, i);
608 blockA[ri+3] = lhs(j+3, i);
610 blockA[ri+0] = lhs(i, j+0);
611 blockA[ri+1] = lhs(i, j+1);
612 blockA[ri+2] = lhs(i, j+2);
613 blockA[ri+3] = lhs(i, j+3);
618 lhsV = lhs.template loadPacket<Packet>(j, i);
620 lhsV = lhs.template loadPacket<Packet>(i, j);
622 pstore<Scalar>(blockA + ri, lhsV);
628 if(PanelMode) ri += vectorSize*(stride - offset - depth);
633 if(PanelMode) ri += offset;
637 for(
Index i = 0; i < depth; i++)
639 blockA[ri] = lhs(i, j);
643 if(PanelMode) ri += stride - depth;
648 if(PanelMode) ri += offset*(rows - j);
650 for(
Index i = 0; i < depth; i++)
655 blockA[ri] = lhs(k, i);
665template<
typename Index,
typename DataMapper,
int StorageOrder,
bool PanelMode>
666struct dhs_pack<double,
Index, DataMapper, Packet2d, StorageOrder, PanelMode, true>
668 EIGEN_STRONG_INLINE
void operator()(
double* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
670 const Index vectorSize = quad_traits<double>::vectorsize;
673 for(; j + vectorSize <= rows; j+=vectorSize)
677 if(PanelMode) ri += vectorSize*offset;
679 for(; i + vectorSize <= depth; i+=vectorSize)
681 PacketBlock<Packet2d,2> block;
684 block.packet[0] = lhs.template loadPacket<Packet2d>(j + 0, i);
685 block.packet[1] = lhs.template loadPacket<Packet2d>(j + 1, i);
689 block.packet[0] = lhs.template loadPacket<Packet2d>(j, i + 0);
690 block.packet[1] = lhs.template loadPacket<Packet2d>(j, i + 1);
693 storeBlock<double, Packet2d, Index, 2>(blockA + ri, block);
697 for(; i < depth; i++)
701 blockA[ri+0] = lhs(j+0, i);
702 blockA[ri+1] = lhs(j+1, i);
704 Packet2d lhsV = lhs.template loadPacket<Packet2d>(j, i);
705 pstore<double>(blockA + ri, lhsV);
711 if(PanelMode) ri += vectorSize*(stride - offset - depth);
716 if(PanelMode) ri += offset*(rows - j);
718 for(
Index i = 0; i < depth; i++)
723 blockA[ri] = lhs(k, i);
732template<
typename Index,
typename DataMapper,
int StorageOrder,
bool PanelMode>
733struct dhs_pack<double,
Index, DataMapper, Packet2d, StorageOrder, PanelMode, false>
735 EIGEN_STRONG_INLINE
void operator()(
double* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
737 const Index vectorSize = quad_traits<double>::vectorsize;
740 for(; j + 2*vectorSize <= cols; j+=2*vectorSize)
744 if(PanelMode) ri += offset*(2*vectorSize);
746 for(; i + vectorSize <= depth; i+=vectorSize)
748 PacketBlock<Packet2d,4> block;
751 PacketBlock<Packet2d,2> block1, block2;
752 block1.packet[0] = rhs.template loadPacket<Packet2d>(i, j + 0);
753 block1.packet[1] = rhs.template loadPacket<Packet2d>(i, j + 1);
754 block2.packet[0] = rhs.template loadPacket<Packet2d>(i, j + 2);
755 block2.packet[1] = rhs.template loadPacket<Packet2d>(i, j + 3);
760 pstore<double>(blockB + ri , block1.packet[0]);
761 pstore<double>(blockB + ri + 2, block2.packet[0]);
762 pstore<double>(blockB + ri + 4, block1.packet[1]);
763 pstore<double>(blockB + ri + 6, block2.packet[1]);
765 block.packet[0] = rhs.template loadPacket<Packet2d>(i + 0, j + 0);
766 block.packet[1] = rhs.template loadPacket<Packet2d>(i + 0, j + 2);
767 block.packet[2] = rhs.template loadPacket<Packet2d>(i + 1, j + 0);
768 block.packet[3] = rhs.template loadPacket<Packet2d>(i + 1, j + 2);
770 storeBlock<double, Packet2d, Index, 4>(blockB + ri, block);
775 for(; i < depth; i++)
779 blockB[ri+0] = rhs(i, j+0);
780 blockB[ri+1] = rhs(i, j+1);
784 blockB[ri+0] = rhs(i, j+2);
785 blockB[ri+1] = rhs(i, j+3);
787 Packet2d rhsV = rhs.template loadPacket<Packet2d>(i, j);
788 pstore<double>(blockB + ri, rhsV);
792 rhsV = rhs.template loadPacket<Packet2d>(i, j + 2);
793 pstore<double>(blockB + ri, rhsV);
798 if(PanelMode) ri += (2*vectorSize)*(stride - offset - depth);
801 if(PanelMode) ri += offset;
805 for(
Index i = 0; i < depth; i++)
807 blockB[ri] = rhs(i, j);
811 if(PanelMode) ri += stride - depth;
817template<
typename Index,
typename DataMapper,
typename Packet,
typename PacketC,
int StorageOrder,
bool Conjugate,
bool PanelMode>
818struct dhs_cpack<double,
Index, DataMapper, Packet, PacketC, StorageOrder, Conjugate, PanelMode, true>
820 EIGEN_STRONG_INLINE
void operator()(std::complex<double>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
822 const Index vectorSize = quad_traits<double>::vectorsize;
823 const Index vectorDelta = vectorSize * ((PanelMode) ? stride : depth);
824 Index rir = ((PanelMode) ? (vectorSize*offset) : 0), rii;
825 double* blockAt =
reinterpret_cast<double *
>(blockA);
828 for(; j + vectorSize <= rows; j+=vectorSize)
832 rii = rir + vectorDelta;
834 for(; i + vectorSize <= depth; i+=vectorSize)
836 PacketBlock<Packet,2> blockr, blocki;
837 PacketBlock<PacketC,4> cblock;
841 cblock.packet[0] = lhs.template loadPacket<PacketC>(j, i + 0);
842 cblock.packet[1] = lhs.template loadPacket<PacketC>(j, i + 1);
844 cblock.packet[2] = lhs.template loadPacket<PacketC>(j + 1, i + 0);
845 cblock.packet[3] = lhs.template loadPacket<PacketC>(j + 1, i + 1);
847 blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[2].v, p16uc_GETREAL64);
848 blockr.packet[1] = vec_perm(cblock.packet[1].v, cblock.packet[3].v, p16uc_GETREAL64);
850 blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[2].v, p16uc_GETIMAG64);
851 blocki.packet[1] = vec_perm(cblock.packet[1].v, cblock.packet[3].v, p16uc_GETIMAG64);
853 cblock.packet[0] = lhs.template loadPacket<PacketC>(j + 0, i);
854 cblock.packet[1] = lhs.template loadPacket<PacketC>(j + 1, i);
856 cblock.packet[2] = lhs.template loadPacket<PacketC>(j + 0, i + 1);
857 cblock.packet[3] = lhs.template loadPacket<PacketC>(j + 1, i + 1);
859 blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETREAL64);
860 blockr.packet[1] = vec_perm(cblock.packet[2].v, cblock.packet[3].v, p16uc_GETREAL64);
862 blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETIMAG64);
863 blocki.packet[1] = vec_perm(cblock.packet[2].v, cblock.packet[3].v, p16uc_GETIMAG64);
868 blocki.packet[0] = -blocki.packet[0];
869 blocki.packet[1] = -blocki.packet[1];
872 storeBlock<double, Packet, Index, 2>(blockAt + rir, blockr);
873 storeBlock<double, Packet, Index, 2>(blockAt + rii, blocki);
878 for(; i < depth; i++)
880 PacketBlock<Packet,1> blockr, blocki;
881 PacketBlock<PacketC,2> cblock;
883 cblock.packet[0] = lhs.template loadPacket<PacketC>(j + 0, i);
884 cblock.packet[1] = lhs.template loadPacket<PacketC>(j + 1, i);
886 blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETREAL64);
887 blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETIMAG64);
891 blocki.packet[0] = -blocki.packet[0];
894 pstore<double>(blockAt + rir, blockr.packet[0]);
895 pstore<double>(blockAt + rii, blocki.packet[0]);
901 rir += ((PanelMode) ? (vectorSize*(2*stride - depth)) : vectorDelta);
906 if(PanelMode) rir += (offset*(rows - j - vectorSize));
907 rii = rir + (((PanelMode) ? stride : depth) * (rows - j));
909 for(
Index i = 0; i < depth; i++)
914 blockAt[rir] = lhs(k, i).real();
917 blockAt[rii] = -lhs(k, i).imag();
919 blockAt[rii] = lhs(k, i).imag();
930template<
typename Index,
typename DataMapper,
typename Packet,
typename PacketC,
int StorageOrder,
bool Conjugate,
bool PanelMode>
931struct dhs_cpack<double,
Index, DataMapper, Packet, PacketC, StorageOrder, Conjugate, PanelMode, false>
933 EIGEN_STRONG_INLINE
void operator()(std::complex<double>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
935 const Index vectorSize = quad_traits<double>::vectorsize;
936 const Index vectorDelta = 2*vectorSize * ((PanelMode) ? stride : depth);
937 Index rir = ((PanelMode) ? (2*vectorSize*offset) : 0), rii;
938 double* blockBt =
reinterpret_cast<double *
>(blockB);
941 for(; j + 2*vectorSize <= cols; j+=2*vectorSize)
945 rii = rir + vectorDelta;
947 for(; i < depth; i++)
949 PacketBlock<PacketC,4> cblock;
950 PacketBlock<Packet,2> blockr, blocki;
952 bload<DataMapper, PacketC, Index, 2, ColMajor, false, 4>(cblock, rhs, i, j);
954 blockr.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETREAL64);
955 blockr.packet[1] = vec_perm(cblock.packet[2].v, cblock.packet[3].v, p16uc_GETREAL64);
957 blocki.packet[0] = vec_perm(cblock.packet[0].v, cblock.packet[1].v, p16uc_GETIMAG64);
958 blocki.packet[1] = vec_perm(cblock.packet[2].v, cblock.packet[3].v, p16uc_GETIMAG64);
962 blocki.packet[0] = -blocki.packet[0];
963 blocki.packet[1] = -blocki.packet[1];
966 storeBlock<double, Packet, Index, 2>(blockBt + rir, blockr);
967 storeBlock<double, Packet, Index, 2>(blockBt + rii, blocki);
973 rir += ((PanelMode) ? (2*vectorSize*(2*stride - depth)) : vectorDelta);
976 if(PanelMode) rir -= (offset*(2*vectorSize - 1));
980 rii = rir + ((PanelMode) ? stride : depth);
982 for(
Index i = 0; i < depth; i++)
984 blockBt[rir] = rhs(i, j).real();
987 blockBt[rii] = -rhs(i, j).imag();
989 blockBt[rii] = rhs(i, j).imag();
995 rir += ((PanelMode) ? (2*stride - depth) : depth);
1005template<
typename Packet,
bool NegativeAccumulate,
int N>
1006EIGEN_ALWAYS_INLINE
void pger_common(PacketBlock<Packet,N>* acc,
const Packet& lhsV,
const Packet* rhsV)
1008 if(NegativeAccumulate)
1010 acc->packet[0] = vec_nmsub(lhsV, rhsV[0], acc->packet[0]);
1012 acc->packet[1] = vec_nmsub(lhsV, rhsV[1], acc->packet[1]);
1015 acc->packet[2] = vec_nmsub(lhsV, rhsV[2], acc->packet[2]);
1018 acc->packet[3] = vec_nmsub(lhsV, rhsV[3], acc->packet[3]);
1021 acc->packet[0] = vec_madd(lhsV, rhsV[0], acc->packet[0]);
1023 acc->packet[1] = vec_madd(lhsV, rhsV[1], acc->packet[1]);
1026 acc->packet[2] = vec_madd(lhsV, rhsV[2], acc->packet[2]);
1029 acc->packet[3] = vec_madd(lhsV, rhsV[3], acc->packet[3]);
1034template<
int N,
typename Scalar,
typename Packet,
bool NegativeAccumulate>
1035EIGEN_ALWAYS_INLINE
void pger(PacketBlock<Packet,N>* acc,
const Scalar* lhs,
const Packet* rhsV)
1037 Packet lhsV = pload<Packet>(lhs);
1039 pger_common<Packet, NegativeAccumulate, N>(acc, lhsV, rhsV);
1042template<
typename Scalar,
typename Packet,
typename Index, const Index remaining_rows>
1043EIGEN_ALWAYS_INLINE
void loadPacketRemaining(
const Scalar* lhs, Packet &lhsV)
1046 lhsV = vec_xl_len((Scalar *)lhs, remaining_rows *
sizeof(Scalar));
1051 }
while (++i < remaining_rows);
1055template<
int N,
typename Scalar,
typename Packet,
typename Index,
bool NegativeAccumulate, const Index remaining_rows>
1056EIGEN_ALWAYS_INLINE
void pger(PacketBlock<Packet,N>* acc,
const Scalar* lhs,
const Packet* rhsV)
1059 loadPacketRemaining<Scalar, Packet, Index, remaining_rows>(lhs, lhsV);
1061 pger_common<Packet, NegativeAccumulate, N>(acc, lhsV, rhsV);
1065template<
int N,
typename Packet,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
1066EIGEN_ALWAYS_INLINE
void pgerc_common(PacketBlock<Packet,N>* accReal, PacketBlock<Packet,N>* accImag,
const Packet &lhsV,
const Packet &lhsVi,
const Packet* rhsV,
const Packet* rhsVi)
1068 pger_common<Packet, false, N>(accReal, lhsV, rhsV);
1071 pger_common<Packet, ConjugateRhs, N>(accImag, lhsV, rhsVi);
1072 EIGEN_UNUSED_VARIABLE(lhsVi);
1075 pger_common<Packet, ConjugateLhs == ConjugateRhs, N>(accReal, lhsVi, rhsVi);
1076 pger_common<Packet, ConjugateRhs, N>(accImag, lhsV, rhsVi);
1078 EIGEN_UNUSED_VARIABLE(rhsVi);
1080 pger_common<Packet, ConjugateLhs, N>(accImag, lhsVi, rhsV);
1084template<
int N,
typename Scalar,
typename Packet,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
1085EIGEN_ALWAYS_INLINE
void pgerc(PacketBlock<Packet,N>* accReal, PacketBlock<Packet,N>* accImag,
const Scalar* lhs_ptr,
const Scalar* lhs_ptr_imag,
const Packet* rhsV,
const Packet* rhsVi)
1087 Packet lhsV = ploadLhs<Scalar, Packet>(lhs_ptr);
1089 if(!LhsIsReal) lhsVi = ploadLhs<Scalar, Packet>(lhs_ptr_imag);
1090 else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);
1092 pgerc_common<N, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(accReal, accImag, lhsV, lhsVi, rhsV, rhsVi);
1095template<
typename Scalar,
typename Packet,
typename Index,
bool LhsIsReal, const Index remaining_rows>
1096EIGEN_ALWAYS_INLINE
void loadPacketRemaining(
const Scalar* lhs_ptr,
const Scalar* lhs_ptr_imag, Packet &lhsV, Packet &lhsVi)
1099 lhsV = vec_xl_len((Scalar *)lhs_ptr, remaining_rows *
sizeof(Scalar));
1100 if(!LhsIsReal) lhsVi = vec_xl_len((Scalar *)lhs_ptr_imag, remaining_rows *
sizeof(Scalar));
1101 else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);
1105 lhsV[i] = lhs_ptr[i];
1106 if(!LhsIsReal) lhsVi[i] = lhs_ptr_imag[i];
1107 }
while (++i < remaining_rows);
1108 if(LhsIsReal) EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);
1112template<
int N,
typename Scalar,
typename Packet,
typename Index,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal, const Index remaining_rows>
1113EIGEN_ALWAYS_INLINE
void pgerc(PacketBlock<Packet,N>* accReal, PacketBlock<Packet,N>* accImag,
const Scalar* lhs_ptr,
const Scalar* lhs_ptr_imag,
const Packet* rhsV,
const Packet* rhsVi)
1116 loadPacketRemaining<Scalar, Packet, Index, LhsIsReal, remaining_rows>(lhs_ptr, lhs_ptr_imag, lhsV, lhsVi);
1118 pgerc_common<N, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(accReal, accImag, lhsV, lhsVi, rhsV, rhsVi);
1121template<
typename Scalar,
typename Packet>
1122EIGEN_ALWAYS_INLINE Packet ploadLhs(
const Scalar* lhs)
1124 return ploadu<Packet>(lhs);
1128template<
typename Scalar,
typename Packet,
int N>
1129EIGEN_ALWAYS_INLINE
void bsetzero(PacketBlock<Packet,N>& acc)
1131 acc.packet[0] = pset1<Packet>((Scalar)0);
1133 acc.packet[1] = pset1<Packet>((Scalar)0);
1136 acc.packet[2] = pset1<Packet>((Scalar)0);
1139 acc.packet[3] = pset1<Packet>((Scalar)0);
1144template<
typename Packet,
int N>
1145EIGEN_ALWAYS_INLINE
void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ,
const Packet& pAlpha)
1147 acc.packet[0] = pmadd(pAlpha, accZ.packet[0], acc.packet[0]);
1149 acc.packet[1] = pmadd(pAlpha, accZ.packet[1], acc.packet[1]);
1152 acc.packet[2] = pmadd(pAlpha, accZ.packet[2], acc.packet[2]);
1155 acc.packet[3] = pmadd(pAlpha, accZ.packet[3], acc.packet[3]);
1159template<
typename Packet,
int N>
1160EIGEN_ALWAYS_INLINE
void bscalec_common(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ,
const Packet& pAlpha)
1162 acc.packet[0] = pmul<Packet>(accZ.packet[0], pAlpha);
1164 acc.packet[1] = pmul<Packet>(accZ.packet[1], pAlpha);
1167 acc.packet[2] = pmul<Packet>(accZ.packet[2], pAlpha);
1170 acc.packet[3] = pmul<Packet>(accZ.packet[3], pAlpha);
1175template<
typename Packet,
int N>
1176EIGEN_ALWAYS_INLINE
void bscalec(PacketBlock<Packet,N>& aReal, PacketBlock<Packet,N>& aImag,
const Packet& bReal,
const Packet& bImag, PacketBlock<Packet,N>& cReal, PacketBlock<Packet,N>& cImag)
1178 bscalec_common<Packet, N>(cReal, aReal, bReal);
1180 bscalec_common<Packet, N>(cImag, aImag, bReal);
1182 pger_common<Packet, true, N>(&cReal, bImag, aImag.packet);
1184 pger_common<Packet, false, N>(&cImag, bImag, aReal.packet);
1187template<
typename Packet,
int N>
1188EIGEN_ALWAYS_INLINE
void band(PacketBlock<Packet,N>& acc,
const Packet& pMask)
1190 acc.packet[0] = pand(acc.packet[0], pMask);
1192 acc.packet[1] = pand(acc.packet[1], pMask);
1195 acc.packet[2] = pand(acc.packet[2], pMask);
1198 acc.packet[3] = pand(acc.packet[3], pMask);
1202template<
typename Packet,
int N>
1203EIGEN_ALWAYS_INLINE
void bscalec(PacketBlock<Packet,N>& aReal, PacketBlock<Packet,N>& aImag,
const Packet& bReal,
const Packet& bImag, PacketBlock<Packet,N>& cReal, PacketBlock<Packet,N>& cImag,
const Packet& pMask)
1205 band<Packet, N>(aReal, pMask);
1206 band<Packet, N>(aImag, pMask);
1208 bscalec<Packet,N>(aReal, aImag, bReal, bImag, cReal, cImag);
1212template<
typename DataMapper,
typename Packet,
typename Index, const Index accCols,
int StorageOrder,
bool Complex,
int N>
1213EIGEN_ALWAYS_INLINE
void bload(PacketBlock<Packet,N*(Complex?2:1)>& acc,
const DataMapper& res,
Index row,
Index col)
1216 acc.packet[0] = res.template loadPacket<Packet>(row + 0, col);
1218 acc.packet[1] = res.template loadPacket<Packet>(row + 1, col);
1221 acc.packet[2] = res.template loadPacket<Packet>(row + 2, col);
1224 acc.packet[3] = res.template loadPacket<Packet>(row + 3, col);
1227 acc.packet[0+N] = res.template loadPacket<Packet>(row + 0, col + accCols);
1229 acc.packet[1+N] = res.template loadPacket<Packet>(row + 1, col + accCols);
1232 acc.packet[2+N] = res.template loadPacket<Packet>(row + 2, col + accCols);
1235 acc.packet[3+N] = res.template loadPacket<Packet>(row + 3, col + accCols);
1239 acc.packet[0] = res.template loadPacket<Packet>(row, col + 0);
1241 acc.packet[1] = res.template loadPacket<Packet>(row, col + 1);
1244 acc.packet[2] = res.template loadPacket<Packet>(row, col + 2);
1247 acc.packet[3] = res.template loadPacket<Packet>(row, col + 3);
1250 acc.packet[0+N] = res.template loadPacket<Packet>(row + accCols, col + 0);
1252 acc.packet[1+N] = res.template loadPacket<Packet>(row + accCols, col + 1);
1255 acc.packet[2+N] = res.template loadPacket<Packet>(row + accCols, col + 2);
1258 acc.packet[3+N] = res.template loadPacket<Packet>(row + accCols, col + 3);
1264const static Packet4i mask41 = { -1, 0, 0, 0 };
1265const static Packet4i mask42 = { -1, -1, 0, 0 };
1266const static Packet4i mask43 = { -1, -1, -1, 0 };
1268const static Packet2l mask21 = { -1, 0 };
1270template<
typename Packet>
1271EIGEN_ALWAYS_INLINE Packet bmask(
const int remaining_rows)
1273 if (remaining_rows == 0) {
1274 return pset1<Packet>(
float(0.0));
1276 switch (remaining_rows) {
1277 case 1:
return Packet(mask41);
1278 case 2:
return Packet(mask42);
1279 default:
return Packet(mask43);
1285EIGEN_ALWAYS_INLINE Packet2d bmask<Packet2d>(
const int remaining_rows)
1287 if (remaining_rows == 0) {
1288 return pset1<Packet2d>(
double(0.0));
1290 return Packet2d(mask21);
1294template<
typename Packet,
int N>
1295EIGEN_ALWAYS_INLINE
void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ,
const Packet& pAlpha,
const Packet& pMask)
1297 band<Packet, N>(accZ, pMask);
1299 bscale<Packet, N>(acc, accZ, pAlpha);
1302template<
typename Packet,
int N> EIGEN_ALWAYS_INLINE
void
1303pbroadcastN_old(
const __UNPACK_TYPE__(Packet) *a,
1304 Packet& a0, Packet& a1, Packet& a2, Packet& a3)
1306 a0 = pset1<Packet>(a[0]);
1308 a1 = pset1<Packet>(a[1]);
1310 EIGEN_UNUSED_VARIABLE(a1);
1313 a2 = pset1<Packet>(a[2]);
1315 EIGEN_UNUSED_VARIABLE(a2);
1318 a3 = pset1<Packet>(a[3]);
1320 EIGEN_UNUSED_VARIABLE(a3);
1325EIGEN_ALWAYS_INLINE
void pbroadcastN_old<Packet4f,4>(
const float* a, Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
1327 pbroadcast4<Packet4f>(a, a0, a1, a2, a3);
1331EIGEN_ALWAYS_INLINE
void pbroadcastN_old<Packet2d,4>(
const double* a, Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
1333 a1 = pload<Packet2d>(a);
1334 a3 = pload<Packet2d>(a + 2);
1335 a0 = vec_splat(a1, 0);
1336 a1 = vec_splat(a1, 1);
1337 a2 = vec_splat(a3, 0);
1338 a3 = vec_splat(a3, 1);
1341template<
typename Packet,
int N> EIGEN_ALWAYS_INLINE
void
1342pbroadcastN(
const __UNPACK_TYPE__(Packet) *a,
1343 Packet& a0, Packet& a1, Packet& a2, Packet& a3)
1345 a0 = pset1<Packet>(a[0]);
1347 a1 = pset1<Packet>(a[1]);
1349 EIGEN_UNUSED_VARIABLE(a1);
1352 a2 = pset1<Packet>(a[2]);
1354 EIGEN_UNUSED_VARIABLE(a2);
1357 a3 = pset1<Packet>(a[3]);
1359 EIGEN_UNUSED_VARIABLE(a3);
1363template<> EIGEN_ALWAYS_INLINE
void
1364pbroadcastN<Packet4f,4>(
const float *a,
1365 Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
1367 a3 = pload<Packet4f>(a);
1368 a0 = vec_splat(a3, 0);
1369 a1 = vec_splat(a3, 1);
1370 a2 = vec_splat(a3, 2);
1371 a3 = vec_splat(a3, 3);
1378#define MICRO_UNROLL_PEEL(func) \
1379 func(0) func(1) func(2) func(3) func(4) func(5) func(6) func(7)
1381#define MICRO_ZERO_PEEL(peel) \
1382 if ((PEEL_ROW > peel) && (peel != 0)) { \
1383 bsetzero<Scalar, Packet, accRows>(accZero##peel); \
1385 EIGEN_UNUSED_VARIABLE(accZero##peel); \
1388#define MICRO_ZERO_PEEL_ROW \
1389 MICRO_UNROLL_PEEL(MICRO_ZERO_PEEL);
1391#define MICRO_WORK_PEEL(peel) \
1392 if (PEEL_ROW > peel) { \
1393 pbroadcastN<Packet,accRows>(rhs_ptr + (accRows * peel), rhsV##peel[0], rhsV##peel[1], rhsV##peel[2], rhsV##peel[3]); \
1394 pger<accRows, Scalar, Packet, false>(&accZero##peel, lhs_ptr + (remaining_rows * peel), rhsV##peel); \
1396 EIGEN_UNUSED_VARIABLE(rhsV##peel); \
1399#define MICRO_WORK_PEEL_ROW \
1400 Packet rhsV0[4], rhsV1[4], rhsV2[4], rhsV3[4], rhsV4[4], rhsV5[4], rhsV6[4], rhsV7[4]; \
1401 MICRO_UNROLL_PEEL(MICRO_WORK_PEEL); \
1402 lhs_ptr += (remaining_rows * PEEL_ROW); \
1403 rhs_ptr += (accRows * PEEL_ROW);
1405#define MICRO_ADD_PEEL(peel, sum) \
1406 if (PEEL_ROW > peel) { \
1407 for (Index i = 0; i < accRows; i++) { \
1408 accZero##sum.packet[i] += accZero##peel.packet[i]; \
1412#define MICRO_ADD_PEEL_ROW \
1413 MICRO_ADD_PEEL(4, 0) MICRO_ADD_PEEL(5, 1) MICRO_ADD_PEEL(6, 2) MICRO_ADD_PEEL(7, 3) \
1414 MICRO_ADD_PEEL(2, 0) MICRO_ADD_PEEL(3, 1) MICRO_ADD_PEEL(1, 0)
1416template<
typename Scalar,
typename Packet,
typename Index, const Index accRows, const Index remaining_rows>
1417EIGEN_ALWAYS_INLINE
void MICRO_EXTRA_ROW(
1418 const Scalar* &lhs_ptr,
1419 const Scalar* &rhs_ptr,
1420 PacketBlock<Packet,accRows> &accZero)
1423 pbroadcastN<Packet,accRows>(rhs_ptr, rhsV[0], rhsV[1], rhsV[2], rhsV[3]);
1424 pger<accRows, Scalar, Packet, false>(&accZero, lhs_ptr, rhsV);
1425 lhs_ptr += remaining_rows;
1429template<
typename Scalar,
typename Packet,
typename DataMapper,
typename Index, const Index accRows, const Index accCols, const Index remaining_rows>
1430EIGEN_ALWAYS_INLINE
void gemm_unrolled_row_iteration(
1431 const DataMapper& res,
1432 const Scalar* lhs_base,
1433 const Scalar* rhs_base,
1441 const Packet& pAlpha,
1442 const Packet& pMask)
1444 const Scalar* rhs_ptr = rhs_base;
1445 const Scalar* lhs_ptr = lhs_base + row*strideA + remaining_rows*offsetA;
1446 PacketBlock<Packet,accRows> accZero0, accZero1, accZero2, accZero3, accZero4, accZero5, accZero6, accZero7, acc;
1448 bsetzero<Scalar, Packet, accRows>(accZero0);
1450 Index remaining_depth = (col + quad_traits<Scalar>::rows < cols) ? depth : (depth & -quad_traits<Scalar>::rows);
1452 if (remaining_depth >= PEEL_ROW) {
1456 EIGEN_POWER_PREFETCH(rhs_ptr);
1457 EIGEN_POWER_PREFETCH(lhs_ptr);
1459 }
while ((k += PEEL_ROW) + PEEL_ROW <= remaining_depth);
1462 for(; k < remaining_depth; k++)
1464 MICRO_EXTRA_ROW<Scalar, Packet, Index, accRows, remaining_rows>(lhs_ptr, rhs_ptr, accZero0);
1467 if ((remaining_depth == depth) && (rows >= accCols))
1469 bload<DataMapper, Packet, Index, 0, ColMajor, false, accRows>(acc, res, row, 0);
1470 bscale<Packet,accRows>(acc, accZero0, pAlpha, pMask);
1471 res.template storePacketBlock<Packet,accRows>(row, 0, acc);
1473 for(; k < depth; k++)
1476 pbroadcastN<Packet,accRows>(rhs_ptr, rhsV[0], rhsV[1], rhsV[2], rhsV[3]);
1477 pger<accRows, Scalar, Packet, Index, false, remaining_rows>(&accZero0, lhs_ptr, rhsV);
1478 lhs_ptr += remaining_rows;
1482 for(
Index j = 0; j < accRows; j++) {
1483 accZero0.packet[j] = vec_mul(pAlpha, accZero0.packet[j]);
1484 for(
Index i = 0; i < remaining_rows; i++) {
1485 res(row + i, j) += accZero0.packet[j][i];
1491template<
typename Scalar,
typename Packet,
typename DataMapper,
typename Index, const Index accRows, const Index accCols>
1492EIGEN_ALWAYS_INLINE
void gemm_extra_row(
1493 const DataMapper& res,
1494 const Scalar* lhs_base,
1495 const Scalar* rhs_base,
1503 Index remaining_rows,
1504 const Packet& pAlpha,
1505 const Packet& pMask)
1507 switch(remaining_rows) {
1509 gemm_unrolled_row_iteration<Scalar, Packet, DataMapper, Index, accRows, accCols, 1>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, rows, cols, pAlpha, pMask);
1512 if (
sizeof(Scalar) ==
sizeof(
float)) {
1513 gemm_unrolled_row_iteration<Scalar, Packet, DataMapper, Index, accRows, accCols, 2>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, rows, cols, pAlpha, pMask);
1517 if (
sizeof(Scalar) ==
sizeof(
float)) {
1518 gemm_unrolled_row_iteration<Scalar, Packet, DataMapper, Index, accRows, accCols, 3>(res, lhs_base, rhs_base, depth, strideA, offsetA, row, col, rows, cols, pAlpha, pMask);
1524#define MICRO_UNROLL(func) \
1525 func(0) func(1) func(2) func(3) func(4) func(5) func(6) func(7)
1527#define MICRO_UNROLL_WORK(func, func2, peel) \
1528 MICRO_UNROLL(func2); \
1529 func(0,peel) func(1,peel) func(2,peel) func(3,peel) \
1530 func(4,peel) func(5,peel) func(6,peel) func(7,peel)
1532#define MICRO_LOAD_ONE(iter) \
1533 if (unroll_factor > iter) { \
1534 lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr##iter); \
1535 lhs_ptr##iter += accCols; \
1537 EIGEN_UNUSED_VARIABLE(lhsV##iter); \
1540#define MICRO_WORK_ONE(iter, peel) \
1541 if (unroll_factor > iter) { \
1542 pger_common<Packet, false, accRows>(&accZero##iter, lhsV##iter, rhsV##peel); \
1545#define MICRO_TYPE_PEEL4(func, func2, peel) \
1546 if (PEEL > peel) { \
1547 Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4, lhsV5, lhsV6, lhsV7; \
1548 pbroadcastN<Packet,accRows>(rhs_ptr + (accRows * peel), rhsV##peel[0], rhsV##peel[1], rhsV##peel[2], rhsV##peel[3]); \
1549 MICRO_UNROLL_WORK(func, func2, peel) \
1551 EIGEN_UNUSED_VARIABLE(rhsV##peel); \
1554#define MICRO_UNROLL_TYPE_PEEL(M, func, func1, func2) \
1555 Packet rhsV0[M], rhsV1[M], rhsV2[M], rhsV3[M], rhsV4[M], rhsV5[M], rhsV6[M], rhsV7[M]; \
1556 func(func1,func2,0); func(func1,func2,1); \
1557 func(func1,func2,2); func(func1,func2,3); \
1558 func(func1,func2,4); func(func1,func2,5); \
1559 func(func1,func2,6); func(func1,func2,7);
1561#define MICRO_UNROLL_TYPE_ONE(M, func, func1, func2) \
1563 func(func1,func2,0);
1565#define MICRO_ONE_PEEL4 \
1566 MICRO_UNROLL_TYPE_PEEL(4, MICRO_TYPE_PEEL4, MICRO_WORK_ONE, MICRO_LOAD_ONE); \
1567 rhs_ptr += (accRows * PEEL);
1570 MICRO_UNROLL_TYPE_ONE(4, MICRO_TYPE_PEEL4, MICRO_WORK_ONE, MICRO_LOAD_ONE); \
1573#define MICRO_DST_PTR_ONE(iter) \
1574 if (unroll_factor > iter) { \
1575 bsetzero<Scalar, Packet, accRows>(accZero##iter); \
1577 EIGEN_UNUSED_VARIABLE(accZero##iter); \
1580#define MICRO_DST_PTR MICRO_UNROLL(MICRO_DST_PTR_ONE)
1582#define MICRO_SRC_PTR_ONE(iter) \
1583 if (unroll_factor > iter) { \
1584 lhs_ptr##iter = lhs_base + ( (row/accCols) + iter )*strideA*accCols; \
1586 EIGEN_UNUSED_VARIABLE(lhs_ptr##iter); \
1589#define MICRO_SRC_PTR MICRO_UNROLL(MICRO_SRC_PTR_ONE)
1591#define MICRO_PREFETCH_ONE(iter) \
1592 if (unroll_factor > iter) { \
1593 EIGEN_POWER_PREFETCH(lhs_ptr##iter); \
1596#define MICRO_PREFETCH MICRO_UNROLL(MICRO_PREFETCH_ONE)
1598#define MICRO_STORE_ONE(iter) \
1599 if (unroll_factor > iter) { \
1600 bload<DataMapper, Packet, Index, 0, ColMajor, false, accRows>(acc, res, row + iter*accCols, 0); \
1601 bscale<Packet,accRows>(acc, accZero##iter, pAlpha); \
1602 res.template storePacketBlock<Packet,accRows>(row + iter*accCols, 0, acc); \
1605#define MICRO_STORE MICRO_UNROLL(MICRO_STORE_ONE)
1607template<
int unroll_factor,
typename Scalar,
typename Packet,
typename DataMapper,
typename Index, const Index accRows, const Index accCols>
1608EIGEN_STRONG_INLINE
void gemm_unrolled_iteration(
1609 const DataMapper& res,
1610 const Scalar* lhs_base,
1611 const Scalar* rhs_base,
1615 const Packet& pAlpha)
1617 const Scalar* rhs_ptr = rhs_base;
1618 const Scalar* lhs_ptr0 = NULL, * lhs_ptr1 = NULL, * lhs_ptr2 = NULL, * lhs_ptr3 = NULL, * lhs_ptr4 = NULL, * lhs_ptr5 = NULL, * lhs_ptr6 = NULL, * lhs_ptr7 = NULL;
1619 PacketBlock<Packet,accRows> accZero0, accZero1, accZero2, accZero3, accZero4, accZero5, accZero6, accZero7;
1620 PacketBlock<Packet,accRows> acc;
1626 for(; k + PEEL <= depth; k+= PEEL)
1628 EIGEN_POWER_PREFETCH(rhs_ptr);
1632 for(; k < depth; k++)
1638 row += unroll_factor*accCols;
1641template<
typename Scalar,
typename Packet,
typename DataMapper,
typename Index, const Index accRows, const Index accCols>
1642EIGEN_ALWAYS_INLINE
void gemm_cols(
1643 const DataMapper& res,
1644 const Scalar* blockA,
1645 const Scalar* blockB,
1654 Index remaining_rows,
1655 const Packet& pAlpha,
1656 const Packet& pMask)
1658 const DataMapper res3 = res.getSubMapper(0, col);
1660 const Scalar* rhs_base = blockB + col*strideB + accRows*offsetB;
1661 const Scalar* lhs_base = blockA + accCols*offsetA;
1665 while(row + MAX_UNROLL*accCols <= rows) {
1666 gemm_unrolled_iteration<MAX_UNROLL, Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
1668 switch( (rows-row)/accCols ) {
1671 gemm_unrolled_iteration<7, Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
1676 gemm_unrolled_iteration<6, Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
1681 gemm_unrolled_iteration<5, Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
1686 gemm_unrolled_iteration<4, Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
1691 gemm_unrolled_iteration<3, Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
1696 gemm_unrolled_iteration<2, Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
1701 gemm_unrolled_iteration<1, Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, lhs_base, rhs_base, depth, strideA, row, pAlpha);
1709 if(remaining_rows > 0)
1711 gemm_extra_row<Scalar, Packet, DataMapper, Index, accRows, accCols>(res3, blockA, rhs_base, depth, strideA, offsetA, row, col, rows, cols, remaining_rows, pAlpha, pMask);
1715template<
typename Scalar,
typename Packet,
typename DataMapper,
typename Index, const Index accCols>
1716EIGEN_STRONG_INLINE
void gemm_extra_cols(
1717 const DataMapper& res,
1718 const Scalar* blockA,
1719 const Scalar* blockB,
1728 Index remaining_rows,
1729 const Packet& pAlpha,
1730 const Packet& pMask)
1732 for (; col < cols; col++) {
1733 gemm_cols<Scalar, Packet, DataMapper, Index, 1, accCols>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlpha, pMask);
1740template<
typename Scalar,
typename Index,
typename Packet,
typename RhsPacket,
typename DataMapper, const Index accRows, const Index accCols>
1741EIGEN_STRONG_INLINE
void gemm(
const DataMapper& res,
const Scalar* blockA,
const Scalar* blockB,
Index rows,
Index depth,
Index cols, Scalar alpha,
Index strideA,
Index strideB,
Index offsetA,
Index offsetB)
1743 const Index remaining_rows = rows % accCols;
1745 if( strideA == -1 ) strideA = depth;
1746 if( strideB == -1 ) strideB = depth;
1748 const Packet pAlpha = pset1<Packet>(alpha);
1749 const Packet pMask = bmask<Packet>((
const int)(remaining_rows));
1752 for(; col + accRows <= cols; col += accRows)
1754 gemm_cols<Scalar, Packet, DataMapper, Index, accRows, accCols>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlpha, pMask);
1757 gemm_extra_cols<Scalar, Packet, DataMapper, Index, accCols>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlpha, pMask);
1760#define accColsC (accCols / 2)
1761#define advanceRows ((LhsIsReal) ? 1 : 2)
1762#define advanceCols ((RhsIsReal) ? 1 : 2)
1765#define PEEL_COMPLEX 3
1766#define PEEL_COMPLEX_ROW 3
1768#define MICRO_COMPLEX_UNROLL_PEEL(func) \
1769 func(0) func(1) func(2) func(3)
1771#define MICRO_COMPLEX_ZERO_PEEL(peel) \
1772 if ((PEEL_COMPLEX_ROW > peel) && (peel != 0)) { \
1773 bsetzero<Scalar, Packet, accRows>(accReal##peel); \
1774 bsetzero<Scalar, Packet, accRows>(accImag##peel); \
1776 EIGEN_UNUSED_VARIABLE(accReal##peel); \
1777 EIGEN_UNUSED_VARIABLE(accImag##peel); \
1780#define MICRO_COMPLEX_ZERO_PEEL_ROW \
1781 MICRO_COMPLEX_UNROLL_PEEL(MICRO_COMPLEX_ZERO_PEEL);
1783#define MICRO_COMPLEX_WORK_PEEL(peel) \
1784 if (PEEL_COMPLEX_ROW > peel) { \
1785 pbroadcastN_old<Packet,accRows>(rhs_ptr_real + (accRows * peel), rhsV##peel[0], rhsV##peel[1], rhsV##peel[2], rhsV##peel[3]); \
1786 if(!RhsIsReal) pbroadcastN_old<Packet,accRows>(rhs_ptr_imag + (accRows * peel), rhsVi##peel[0], rhsVi##peel[1], rhsVi##peel[2], rhsVi##peel[3]); \
1787 pgerc<accRows, Scalar, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##peel, &accImag##peel, lhs_ptr_real + (remaining_rows * peel), lhs_ptr_imag + (remaining_rows * peel), rhsV##peel, rhsVi##peel); \
1789 EIGEN_UNUSED_VARIABLE(rhsV##peel); \
1790 EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
1793#define MICRO_COMPLEX_WORK_PEEL_ROW \
1794 Packet rhsV0[4], rhsV1[4], rhsV2[4], rhsV3[4]; \
1795 Packet rhsVi0[4], rhsVi1[4], rhsVi2[4], rhsVi3[4]; \
1796 MICRO_COMPLEX_UNROLL_PEEL(MICRO_COMPLEX_WORK_PEEL); \
1797 lhs_ptr_real += (remaining_rows * PEEL_COMPLEX_ROW); \
1798 if(!LhsIsReal) lhs_ptr_imag += (remaining_rows * PEEL_COMPLEX_ROW); \
1799 else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag); \
1800 rhs_ptr_real += (accRows * PEEL_COMPLEX_ROW); \
1801 if(!RhsIsReal) rhs_ptr_imag += (accRows * PEEL_COMPLEX_ROW); \
1802 else EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);
1804#define MICRO_COMPLEX_ADD_PEEL(peel, sum) \
1805 if (PEEL_COMPLEX_ROW > peel) { \
1806 for (Index i = 0; i < accRows; i++) { \
1807 accReal##sum.packet[i] += accReal##peel.packet[i]; \
1808 accImag##sum.packet[i] += accImag##peel.packet[i]; \
1812#define MICRO_COMPLEX_ADD_PEEL_ROW \
1813 MICRO_COMPLEX_ADD_PEEL(2, 0) MICRO_COMPLEX_ADD_PEEL(3, 1) \
1814 MICRO_COMPLEX_ADD_PEEL(1, 0)
1816template<
typename Scalar,
typename Packet,
typename Index, const Index accRows,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal, const Index remaining_rows>
1817EIGEN_ALWAYS_INLINE
void MICRO_COMPLEX_EXTRA_ROW(
1818 const Scalar* &lhs_ptr_real,
const Scalar* &lhs_ptr_imag,
1819 const Scalar* &rhs_ptr_real,
const Scalar* &rhs_ptr_imag,
1820 PacketBlock<Packet,accRows> &accReal, PacketBlock<Packet,accRows> &accImag)
1822 Packet rhsV[4], rhsVi[4];
1823 pbroadcastN_old<Packet,accRows>(rhs_ptr_real, rhsV[0], rhsV[1], rhsV[2], rhsV[3]);
1824 if(!RhsIsReal) pbroadcastN_old<Packet,accRows>(rhs_ptr_imag, rhsVi[0], rhsVi[1], rhsVi[2], rhsVi[3]);
1825 pgerc<accRows, Scalar, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal, &accImag, lhs_ptr_real, lhs_ptr_imag, rhsV, rhsVi);
1826 lhs_ptr_real += remaining_rows;
1827 if(!LhsIsReal) lhs_ptr_imag += remaining_rows;
1828 else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);
1829 rhs_ptr_real += accRows;
1830 if(!RhsIsReal) rhs_ptr_imag += accRows;
1831 else EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);
1834template<
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
typename Index, const Index accRows, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal, const Index remaining_rows>
1835EIGEN_ALWAYS_INLINE
void gemm_unrolled_complex_row_iteration(
1836 const DataMapper& res,
1837 const Scalar* lhs_base,
1838 const Scalar* rhs_base,
1847 const Packet& pAlphaReal,
1848 const Packet& pAlphaImag,
1849 const Packet& pMask)
1851 const Scalar* rhs_ptr_real = rhs_base;
1852 const Scalar* rhs_ptr_imag = NULL;
1853 if(!RhsIsReal) rhs_ptr_imag = rhs_base + accRows*strideB;
1854 else EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);
1855 const Scalar* lhs_ptr_real = lhs_base + advanceRows*row*strideA + remaining_rows*offsetA;
1856 const Scalar* lhs_ptr_imag = NULL;
1857 if(!LhsIsReal) lhs_ptr_imag = lhs_ptr_real + remaining_rows*strideA;
1858 else EIGEN_UNUSED_VARIABLE(lhs_ptr_imag);
1859 PacketBlock<Packet,accRows> accReal0, accImag0, accReal1, accImag1, accReal2, accImag2, accReal3, accImag3;
1860 PacketBlock<Packet,accRows> taccReal, taccImag;
1861 PacketBlock<Packetc,accRows> acc0, acc1;
1862 PacketBlock<Packetc,accRows*2> tRes;
1864 bsetzero<Scalar, Packet, accRows>(accReal0);
1865 bsetzero<Scalar, Packet, accRows>(accImag0);
1867 Index remaining_depth = (col + quad_traits<Scalar>::rows < cols) ? depth : (depth & -quad_traits<Scalar>::rows);
1869 if (remaining_depth >= PEEL_COMPLEX_ROW) {
1870 MICRO_COMPLEX_ZERO_PEEL_ROW
1873 EIGEN_POWER_PREFETCH(rhs_ptr_real);
1875 EIGEN_POWER_PREFETCH(rhs_ptr_imag);
1877 EIGEN_POWER_PREFETCH(lhs_ptr_real);
1879 EIGEN_POWER_PREFETCH(lhs_ptr_imag);
1881 MICRO_COMPLEX_WORK_PEEL_ROW
1882 }
while ((k += PEEL_COMPLEX_ROW) + PEEL_COMPLEX_ROW <= remaining_depth);
1883 MICRO_COMPLEX_ADD_PEEL_ROW
1885 for(; k < remaining_depth; k++)
1887 MICRO_COMPLEX_EXTRA_ROW<Scalar, Packet, Index, accRows, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal, remaining_rows>(lhs_ptr_real, lhs_ptr_imag, rhs_ptr_real, rhs_ptr_imag, accReal0, accImag0);
1890 if ((remaining_depth == depth) && (rows >= accCols))
1892 bload<DataMapper, Packetc, Index, accColsC, ColMajor, true, accRows>(tRes, res, row, 0);
1893 bscalec<Packet,accRows>(accReal0, accImag0, pAlphaReal, pAlphaImag, taccReal, taccImag, pMask);
1894 bcouple<Packet, Packetc, accRows>(taccReal, taccImag, tRes, acc0, acc1);
1895 res.template storePacketBlock<Packetc,accRows>(row + 0, 0, acc0);
1896 res.template storePacketBlock<Packetc,accRows>(row + accColsC, 0, acc1);
1898 for(; k < depth; k++)
1900 Packet rhsV[4], rhsVi[4];
1901 pbroadcastN_old<Packet,accRows>(rhs_ptr_real, rhsV[0], rhsV[1], rhsV[2], rhsV[3]);
1902 if(!RhsIsReal) pbroadcastN_old<Packet,accRows>(rhs_ptr_imag, rhsVi[0], rhsVi[1], rhsVi[2], rhsVi[3]);
1903 pgerc<accRows, Scalar, Packet, Index, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal, remaining_rows>(&accReal0, &accImag0, lhs_ptr_real, lhs_ptr_imag, rhsV, rhsVi);
1904 lhs_ptr_real += remaining_rows;
1905 if(!LhsIsReal) lhs_ptr_imag += remaining_rows;
1906 rhs_ptr_real += accRows;
1907 if(!RhsIsReal) rhs_ptr_imag += accRows;
1910 bscalec<Packet,accRows>(accReal0, accImag0, pAlphaReal, pAlphaImag, taccReal, taccImag);
1911 bcouple_common<Packet, Packetc, accRows>(taccReal, taccImag, acc0, acc1);
1913 if ((
sizeof(Scalar) ==
sizeof(
float)) && (remaining_rows == 1))
1915 for(
Index j = 0; j < accRows; j++) {
1916 res(row + 0, j) += pfirst<Packetc>(acc0.packet[j]);
1919 for(
Index j = 0; j < accRows; j++) {
1920 PacketBlock<Packetc,1> acc2;
1921 acc2.packet[0] = res.template loadPacket<Packetc>(row + 0, j) + acc0.packet[j];
1922 res.template storePacketBlock<Packetc,1>(row + 0, j, acc2);
1923 if(remaining_rows > accColsC) {
1924 res(row + accColsC, j) += pfirst<Packetc>(acc1.packet[j]);
1931template<
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
typename Index, const Index accRows, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
1932EIGEN_ALWAYS_INLINE
void gemm_complex_extra_row(
1933 const DataMapper& res,
1934 const Scalar* lhs_base,
1935 const Scalar* rhs_base,
1944 Index remaining_rows,
1945 const Packet& pAlphaReal,
1946 const Packet& pAlphaImag,
1947 const Packet& pMask)
1949 switch(remaining_rows) {
1951 gemm_unrolled_complex_row_iteration<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal, 1>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, rows, cols, pAlphaReal, pAlphaImag, pMask);
1954 if (
sizeof(Scalar) ==
sizeof(
float)) {
1955 gemm_unrolled_complex_row_iteration<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal, 2>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, rows, cols, pAlphaReal, pAlphaImag, pMask);
1959 if (
sizeof(Scalar) ==
sizeof(
float)) {
1960 gemm_unrolled_complex_row_iteration<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal, 3>(res, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, col, rows, cols, pAlphaReal, pAlphaImag, pMask);
1966#define MICRO_COMPLEX_UNROLL(func) \
1967 func(0) func(1) func(2) func(3)
1969#define MICRO_COMPLEX_UNROLL_WORK(func, func2, peel) \
1970 MICRO_COMPLEX_UNROLL(func2); \
1971 func(0,peel) func(1,peel) func(2,peel) func(3,peel)
1973#define MICRO_COMPLEX_LOAD_ONE(iter) \
1974 if (unroll_factor > iter) { \
1975 lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr_real##iter); \
1977 lhsVi##iter = ploadLhs<Scalar, Packet>(lhs_ptr_real##iter + imag_delta); \
1979 EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
1981 lhs_ptr_real##iter += accCols; \
1983 EIGEN_UNUSED_VARIABLE(lhsV##iter); \
1984 EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
1987#define MICRO_COMPLEX_WORK_ONE4(iter, peel) \
1988 if (unroll_factor > iter) { \
1989 pgerc_common<accRows, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV##iter, lhsVi##iter, rhsV##peel, rhsVi##peel); \
1992#define MICRO_COMPLEX_TYPE_PEEL4(func, func2, peel) \
1993 if (PEEL_COMPLEX > peel) { \
1994 Packet lhsV0, lhsV1, lhsV2, lhsV3; \
1995 Packet lhsVi0, lhsVi1, lhsVi2, lhsVi3; \
1996 pbroadcastN_old<Packet,accRows>(rhs_ptr_real + (accRows * peel), rhsV##peel[0], rhsV##peel[1], rhsV##peel[2], rhsV##peel[3]); \
1998 pbroadcastN_old<Packet,accRows>(rhs_ptr_imag + (accRows * peel), rhsVi##peel[0], rhsVi##peel[1], rhsVi##peel[2], rhsVi##peel[3]); \
2000 EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
2002 MICRO_COMPLEX_UNROLL_WORK(func, func2, peel) \
2004 EIGEN_UNUSED_VARIABLE(rhsV##peel); \
2005 EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
2008#define MICRO_COMPLEX_UNROLL_TYPE_PEEL(M, func, func1, func2) \
2009 Packet rhsV0[M], rhsV1[M], rhsV2[M], rhsV3[M]; \
2010 Packet rhsVi0[M], rhsVi1[M], rhsVi2[M], rhsVi3[M]; \
2011 func(func1,func2,0); func(func1,func2,1); \
2012 func(func1,func2,2); func(func1,func2,3);
2014#define MICRO_COMPLEX_UNROLL_TYPE_ONE(M, func, func1, func2) \
2015 Packet rhsV0[M], rhsVi0[M];\
2016 func(func1,func2,0);
2018#define MICRO_COMPLEX_ONE_PEEL4 \
2019 MICRO_COMPLEX_UNROLL_TYPE_PEEL(4, MICRO_COMPLEX_TYPE_PEEL4, MICRO_COMPLEX_WORK_ONE4, MICRO_COMPLEX_LOAD_ONE); \
2020 rhs_ptr_real += (accRows * PEEL_COMPLEX); \
2021 if(!RhsIsReal) rhs_ptr_imag += (accRows * PEEL_COMPLEX);
2023#define MICRO_COMPLEX_ONE4 \
2024 MICRO_COMPLEX_UNROLL_TYPE_ONE(4, MICRO_COMPLEX_TYPE_PEEL4, MICRO_COMPLEX_WORK_ONE4, MICRO_COMPLEX_LOAD_ONE); \
2025 rhs_ptr_real += accRows; \
2026 if(!RhsIsReal) rhs_ptr_imag += accRows;
2028#define MICRO_COMPLEX_DST_PTR_ONE(iter) \
2029 if (unroll_factor > iter) { \
2030 bsetzero<Scalar, Packet, accRows>(accReal##iter); \
2031 bsetzero<Scalar, Packet, accRows>(accImag##iter); \
2033 EIGEN_UNUSED_VARIABLE(accReal##iter); \
2034 EIGEN_UNUSED_VARIABLE(accImag##iter); \
2037#define MICRO_COMPLEX_DST_PTR MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_DST_PTR_ONE)
2039#define MICRO_COMPLEX_SRC_PTR_ONE(iter) \
2040 if (unroll_factor > iter) { \
2041 lhs_ptr_real##iter = lhs_base + ( ((advanceRows*row)/accCols) + iter*advanceRows )*strideA*accCols; \
2043 EIGEN_UNUSED_VARIABLE(lhs_ptr_real##iter); \
2046#define MICRO_COMPLEX_SRC_PTR MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_SRC_PTR_ONE)
2048#define MICRO_COMPLEX_PREFETCH_ONE(iter) \
2049 if (unroll_factor > iter) { \
2050 EIGEN_POWER_PREFETCH(lhs_ptr_real##iter); \
2053#define MICRO_COMPLEX_PREFETCH MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_PREFETCH_ONE)
2055#define MICRO_COMPLEX_STORE_ONE(iter) \
2056 if (unroll_factor > iter) { \
2057 bload<DataMapper, Packetc, Index, accColsC, ColMajor, true, accRows>(tRes, res, row + iter*accCols, 0); \
2058 bscalec<Packet,accRows>(accReal##iter, accImag##iter, pAlphaReal, pAlphaImag, taccReal, taccImag); \
2059 bcouple<Packet, Packetc, accRows>(taccReal, taccImag, tRes, acc0, acc1); \
2060 res.template storePacketBlock<Packetc,accRows>(row + iter*accCols + 0, 0, acc0); \
2061 res.template storePacketBlock<Packetc,accRows>(row + iter*accCols + accColsC, 0, acc1); \
2064#define MICRO_COMPLEX_STORE MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_STORE_ONE)
2066template<
int unroll_factor,
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
typename Index, const Index accRows, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
2067EIGEN_STRONG_INLINE
void gemm_complex_unrolled_iteration(
2068 const DataMapper& res,
2069 const Scalar* lhs_base,
2070 const Scalar* rhs_base,
2075 const Packet& pAlphaReal,
2076 const Packet& pAlphaImag)
2078 const Scalar* rhs_ptr_real = rhs_base;
2079 const Scalar* rhs_ptr_imag = NULL;
2080 const Index imag_delta = accCols*strideA;
2082 rhs_ptr_imag = rhs_base + accRows*strideB;
2084 EIGEN_UNUSED_VARIABLE(rhs_ptr_imag);
2086 const Scalar* lhs_ptr_real0 = NULL, * lhs_ptr_real1 = NULL;
2087 const Scalar* lhs_ptr_real2 = NULL, * lhs_ptr_real3 = NULL;
2088 PacketBlock<Packet,accRows> accReal0, accImag0, accReal1, accImag1;
2089 PacketBlock<Packet,accRows> accReal2, accImag2, accReal3, accImag3;
2090 PacketBlock<Packet,accRows> taccReal, taccImag;
2091 PacketBlock<Packetc,accRows> acc0, acc1;
2092 PacketBlock<Packetc,accRows*2> tRes;
2094 MICRO_COMPLEX_SRC_PTR
2095 MICRO_COMPLEX_DST_PTR
2098 for(; k + PEEL_COMPLEX <= depth; k+= PEEL_COMPLEX)
2100 EIGEN_POWER_PREFETCH(rhs_ptr_real);
2102 EIGEN_POWER_PREFETCH(rhs_ptr_imag);
2104 MICRO_COMPLEX_PREFETCH
2105 MICRO_COMPLEX_ONE_PEEL4
2107 for(; k < depth; k++)
2113 row += unroll_factor*accCols;
2116template<
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
typename Index, const Index accRows, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
2117EIGEN_ALWAYS_INLINE
void gemm_complex_cols(
2118 const DataMapper& res,
2119 const Scalar* blockA,
2120 const Scalar* blockB,
2129 Index remaining_rows,
2130 const Packet& pAlphaReal,
2131 const Packet& pAlphaImag,
2132 const Packet& pMask)
2134 const DataMapper res3 = res.getSubMapper(0, col);
2136 const Scalar* rhs_base = blockB + advanceCols*col*strideB + accRows*offsetB;
2137 const Scalar* lhs_base = blockA + accCols*offsetA;
2140#define MAX_COMPLEX_UNROLL 3
2141 while(row + MAX_COMPLEX_UNROLL*accCols <= rows) {
2142 gemm_complex_unrolled_iteration<MAX_COMPLEX_UNROLL, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, lhs_base, rhs_base, depth, strideA, strideB, row, pAlphaReal, pAlphaImag);
2144 switch( (rows-row)/accCols ) {
2145#if MAX_COMPLEX_UNROLL > 4
2147 gemm_complex_unrolled_iteration<4, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, lhs_base, rhs_base, depth, strideA, strideB, row, pAlphaReal, pAlphaImag);
2150#if MAX_COMPLEX_UNROLL > 3
2152 gemm_complex_unrolled_iteration<3, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, lhs_base, rhs_base, depth, strideA, strideB, row, pAlphaReal, pAlphaImag);
2155#if MAX_COMPLEX_UNROLL > 2
2157 gemm_complex_unrolled_iteration<2, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, lhs_base, rhs_base, depth, strideA, strideB, row, pAlphaReal, pAlphaImag);
2160#if MAX_COMPLEX_UNROLL > 1
2162 gemm_complex_unrolled_iteration<1, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, lhs_base, rhs_base, depth, strideA, strideB, row, pAlphaReal, pAlphaImag);
2168#undef MAX_COMPLEX_UNROLL
2170 if(remaining_rows > 0)
2172 gemm_complex_extra_row<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res3, blockA, rhs_base, depth, strideA, offsetA, strideB, row, col, rows, cols, remaining_rows, pAlphaReal, pAlphaImag, pMask);
2176template<
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
typename Index, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
2177EIGEN_STRONG_INLINE
void gemm_complex_extra_cols(
2178 const DataMapper& res,
2179 const Scalar* blockA,
2180 const Scalar* blockB,
2189 Index remaining_rows,
2190 const Packet& pAlphaReal,
2191 const Packet& pAlphaImag,
2192 const Packet& pMask)
2194 for (; col < cols; col++) {
2195 gemm_complex_cols<Scalar, Packet, Packetc, DataMapper, Index, 1, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlphaReal, pAlphaImag, pMask);
2199template<
typename LhsScalar,
typename RhsScalar,
typename Scalarc,
typename Scalar,
typename Index,
typename Packet,
typename Packetc,
typename RhsPacket,
typename DataMapper, const Index accRows, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
2200EIGEN_STRONG_INLINE
void gemm_complex(
const DataMapper& res,
const LhsScalar* blockAc,
const RhsScalar* blockBc,
Index rows,
Index depth,
Index cols, Scalarc alpha,
Index strideA,
Index strideB,
Index offsetA,
Index offsetB)
2202 const Index remaining_rows = rows % accCols;
2204 if( strideA == -1 ) strideA = depth;
2205 if( strideB == -1 ) strideB = depth;
2207 const Packet pAlphaReal = pset1<Packet>(alpha.real());
2208 const Packet pAlphaImag = pset1<Packet>(alpha.imag());
2209 const Packet pMask = bmask<Packet>((
const int)(remaining_rows));
2211 const Scalar* blockA = (Scalar *) blockAc;
2212 const Scalar* blockB = (Scalar *) blockBc;
2215 for(; col + accRows <= cols; col += accRows)
2217 gemm_complex_cols<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlphaReal, pAlphaImag, pMask);
2220 gemm_complex_extra_cols<Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(res, blockA, blockB, depth, strideA, offsetA, strideB, offsetB, col, rows, cols, remaining_rows, pAlphaReal, pAlphaImag, pMask);
2230template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2231struct gemm_pack_lhs<double,
Index, DataMapper, Pack1, Pack2, Packet,
ColMajor, Conjugate, PanelMode>
2233 void operator()(
double* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride=0,
Index offset=0);
2236template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2237void gemm_pack_lhs<double, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>
2238 ::operator()(
double* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
2240 dhs_pack<double, Index, DataMapper, Packet2d, ColMajor, PanelMode, true> pack;
2241 pack(blockA, lhs, depth, rows, stride, offset);
2244template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2245struct gemm_pack_lhs<double,
Index, DataMapper, Pack1, Pack2, Packet,
RowMajor, Conjugate, PanelMode>
2247 void operator()(
double* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride=0,
Index offset=0);
2250template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2251void gemm_pack_lhs<double, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>
2252 ::operator()(
double* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
2254 dhs_pack<double, Index, DataMapper, Packet2d, RowMajor, PanelMode, true> pack;
2255 pack(blockA, lhs, depth, rows, stride, offset);
2258#if EIGEN_ALTIVEC_USE_CUSTOM_PACK
2259template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2260struct gemm_pack_rhs<double,
Index, DataMapper, nr,
ColMajor, Conjugate, PanelMode>
2262 void operator()(
double* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride=0,
Index offset=0);
2265template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2266void gemm_pack_rhs<double, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
2267 ::operator()(
double* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
2269 dhs_pack<double, Index, DataMapper, Packet2d, ColMajor, PanelMode, false> pack;
2270 pack(blockB, rhs, depth, cols, stride, offset);
2273template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2274struct gemm_pack_rhs<double,
Index, DataMapper, nr,
RowMajor, Conjugate, PanelMode>
2276 void operator()(
double* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride=0,
Index offset=0);
2279template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2280void gemm_pack_rhs<double, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
2281 ::operator()(
double* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
2283 dhs_pack<double, Index, DataMapper, Packet2d, RowMajor, PanelMode, false> pack;
2284 pack(blockB, rhs, depth, cols, stride, offset);
2288template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2289struct gemm_pack_lhs<float,
Index, DataMapper, Pack1, Pack2, Packet,
RowMajor, Conjugate, PanelMode>
2291 void operator()(
float* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride=0,
Index offset=0);
2294template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2295void gemm_pack_lhs<float, Index, DataMapper, Pack1, Pack2, Packet, RowMajor, Conjugate, PanelMode>
2296 ::operator()(
float* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
2298 dhs_pack<float, Index, DataMapper, Packet4f, RowMajor, PanelMode, true> pack;
2299 pack(blockA, lhs, depth, rows, stride, offset);
2302template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2303struct gemm_pack_lhs<float,
Index, DataMapper, Pack1, Pack2, Packet,
ColMajor, Conjugate, PanelMode>
2305 void operator()(
float* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride=0,
Index offset=0);
2308template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2309void gemm_pack_lhs<float, Index, DataMapper, Pack1, Pack2, Packet, ColMajor, Conjugate, PanelMode>
2310 ::operator()(
float* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
2312 dhs_pack<float, Index, DataMapper, Packet4f, ColMajor, PanelMode, true> pack;
2313 pack(blockA, lhs, depth, rows, stride, offset);
2316template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2317struct gemm_pack_lhs<std::complex<float>,
Index, DataMapper, Pack1, Pack2, Packet,
RowMajor, Conjugate, PanelMode>
2319 void operator()(std::complex<float>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride=0,
Index offset=0);
2322template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2323void gemm_pack_lhs<std::complex<float>,
Index, DataMapper, Pack1, Pack2, Packet,
RowMajor, Conjugate, PanelMode>
2324 ::operator()(std::complex<float>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
2326 dhs_cpack<float, Index, DataMapper, Packet4f, Packet2cf, RowMajor, Conjugate, PanelMode, true> pack;
2327 pack(blockA, lhs, depth, rows, stride, offset);
2330template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2331struct gemm_pack_lhs<std::complex<float>,
Index, DataMapper, Pack1, Pack2, Packet,
ColMajor, Conjugate, PanelMode>
2333 void operator()(std::complex<float>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride=0,
Index offset=0);
2336template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2337void gemm_pack_lhs<std::complex<float>,
Index, DataMapper, Pack1, Pack2, Packet,
ColMajor, Conjugate, PanelMode>
2338 ::operator()(std::complex<float>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
2340 dhs_cpack<float, Index, DataMapper, Packet4f, Packet2cf, ColMajor, Conjugate, PanelMode, true> pack;
2341 pack(blockA, lhs, depth, rows, stride, offset);
2344#if EIGEN_ALTIVEC_USE_CUSTOM_PACK
2345template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2346struct gemm_pack_rhs<float,
Index, DataMapper, nr,
ColMajor, Conjugate, PanelMode>
2348 void operator()(
float* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride=0,
Index offset=0);
2351template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2352void gemm_pack_rhs<float, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
2353 ::operator()(
float* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
2355 dhs_pack<float, Index, DataMapper, Packet4f, ColMajor, PanelMode, false> pack;
2356 pack(blockB, rhs, depth, cols, stride, offset);
2359template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2360struct gemm_pack_rhs<float,
Index, DataMapper, nr,
RowMajor, Conjugate, PanelMode>
2362 void operator()(
float* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride=0,
Index offset=0);
2365template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2366void gemm_pack_rhs<float, Index, DataMapper, nr, RowMajor, Conjugate, PanelMode>
2367 ::operator()(
float* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
2369 dhs_pack<float, Index, DataMapper, Packet4f, RowMajor, PanelMode, false> pack;
2370 pack(blockB, rhs, depth, cols, stride, offset);
2374template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2375struct gemm_pack_rhs<std::complex<float>,
Index, DataMapper, nr,
ColMajor, Conjugate, PanelMode>
2377 void operator()(std::complex<float>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride=0,
Index offset=0);
2380template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2381void gemm_pack_rhs<std::complex<float>,
Index, DataMapper, nr,
ColMajor, Conjugate, PanelMode>
2382 ::operator()(std::complex<float>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
2384 dhs_cpack<float, Index, DataMapper, Packet4f, Packet2cf, ColMajor, Conjugate, PanelMode, false> pack;
2385 pack(blockB, rhs, depth, cols, stride, offset);
2388template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2389struct gemm_pack_rhs<std::complex<float>,
Index, DataMapper, nr,
RowMajor, Conjugate, PanelMode>
2391 void operator()(std::complex<float>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride=0,
Index offset=0);
2394template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2395void gemm_pack_rhs<std::complex<float>,
Index, DataMapper, nr,
RowMajor, Conjugate, PanelMode>
2396 ::operator()(std::complex<float>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
2398 dhs_cpack<float, Index, DataMapper, Packet4f, Packet2cf, RowMajor, Conjugate, PanelMode, false> pack;
2399 pack(blockB, rhs, depth, cols, stride, offset);
2402template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2403struct gemm_pack_lhs<std::complex<double>,
Index, DataMapper, Pack1, Pack2, Packet,
RowMajor, Conjugate, PanelMode>
2405 void operator()(std::complex<double>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride=0,
Index offset=0);
2408template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2409void gemm_pack_lhs<std::complex<double>,
Index, DataMapper, Pack1, Pack2, Packet,
RowMajor, Conjugate, PanelMode>
2410 ::operator()(std::complex<double>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
2412 dhs_cpack<double, Index, DataMapper, Packet2d, Packet1cd, RowMajor, Conjugate, PanelMode, true> pack;
2413 pack(blockA, lhs, depth, rows, stride, offset);
2416template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2417struct gemm_pack_lhs<std::complex<double>,
Index, DataMapper, Pack1, Pack2, Packet,
ColMajor, Conjugate, PanelMode>
2419 void operator()(std::complex<double>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride=0,
Index offset=0);
2422template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2423void gemm_pack_lhs<std::complex<double>,
Index, DataMapper, Pack1, Pack2, Packet,
ColMajor, Conjugate, PanelMode>
2424 ::operator()(std::complex<double>* blockA,
const DataMapper& lhs,
Index depth,
Index rows,
Index stride,
Index offset)
2426 dhs_cpack<double, Index, DataMapper, Packet2d, Packet1cd, ColMajor, Conjugate, PanelMode, true> pack;
2427 pack(blockA, lhs, depth, rows, stride, offset);
2430template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2431struct gemm_pack_rhs<std::complex<double>,
Index, DataMapper, nr,
ColMajor, Conjugate, PanelMode>
2433 void operator()(std::complex<double>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride=0,
Index offset=0);
2436template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2437void gemm_pack_rhs<std::complex<double>,
Index, DataMapper, nr,
ColMajor, Conjugate, PanelMode>
2438 ::operator()(std::complex<double>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
2440 dhs_cpack<double, Index, DataMapper, Packet2d, Packet1cd, ColMajor, Conjugate, PanelMode, false> pack;
2441 pack(blockB, rhs, depth, cols, stride, offset);
2444template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2445struct gemm_pack_rhs<std::complex<double>,
Index, DataMapper, nr,
RowMajor, Conjugate, PanelMode>
2447 void operator()(std::complex<double>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride=0,
Index offset=0);
2450template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2451void gemm_pack_rhs<std::complex<double>,
Index, DataMapper, nr,
RowMajor, Conjugate, PanelMode>
2452 ::operator()(std::complex<double>* blockB,
const DataMapper& rhs,
Index depth,
Index cols,
Index stride,
Index offset)
2454 dhs_cpack<double, Index, DataMapper, Packet2d, Packet1cd, RowMajor, Conjugate, PanelMode, false> pack;
2455 pack(blockB, rhs, depth, cols, stride, offset);
2459template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2460struct gebp_kernel<float, float,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2462 typedef typename quad_traits<float>::vectortype Packet;
2463 typedef typename quad_traits<float>::rhstype RhsPacket;
2465 void operator()(
const DataMapper& res,
const float* blockA,
const float* blockB,
2470template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2471void gebp_kernel<float, float, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2472 ::operator()(
const DataMapper& res,
const float* blockA,
const float* blockB,
2476 const Index accRows = quad_traits<float>::rows;
2477 const Index accCols = quad_traits<float>::size;
2478 void (*gemm_function)(
const DataMapper&,
const float*,
const float*,
Index,
Index,
Index, float,
Index,
Index,
Index,
Index);
2480 #ifdef EIGEN_ALTIVEC_MMA_ONLY
2482 gemm_function = &Eigen::internal::gemmMMA<float, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
2483 #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
2484 if (__builtin_cpu_supports (
"arch_3_1") && __builtin_cpu_supports (
"mma")){
2485 gemm_function = &Eigen::internal::gemmMMA<float, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
2488 gemm_function = &Eigen::internal::gemm<float, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
2491 gemm_function = &Eigen::internal::gemm<float, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
2493 gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
2496template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2497struct gebp_kernel<std::complex<float>, std::complex<float>,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2499 typedef Packet4f Packet;
2500 typedef Packet2cf Packetc;
2501 typedef Packet4f RhsPacket;
2503 void operator()(
const DataMapper& res,
const std::complex<float>* blockA,
const std::complex<float>* blockB,
2508template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2509void gebp_kernel<std::complex<float>, std::complex<float>,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2510 ::operator()(
const DataMapper& res,
const std::complex<float>* blockA,
const std::complex<float>* blockB,
2514 const Index accRows = quad_traits<float>::rows;
2515 const Index accCols = quad_traits<float>::size;
2516 void (*gemm_function)(
const DataMapper&,
const std::complex<float>*,
const std::complex<float>*,
2519 #ifdef EIGEN_ALTIVEC_MMA_ONLY
2521 gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, std::complex<float>, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
false>;
2522 #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
2523 if (__builtin_cpu_supports (
"arch_3_1") && __builtin_cpu_supports (
"mma")){
2524 gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, std::complex<float>, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
false>;
2527 gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, std::complex<float>, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
false>;
2530 gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, std::complex<float>, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
false>;
2532 gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
2535template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2536struct gebp_kernel<float, std::complex<float>,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2538 typedef Packet4f Packet;
2539 typedef Packet2cf Packetc;
2540 typedef Packet4f RhsPacket;
2542 void operator()(
const DataMapper& res,
const float* blockA,
const std::complex<float>* blockB,
2547template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2548void gebp_kernel<float, std::complex<float>,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2549 ::operator()(
const DataMapper& res,
const float* blockA,
const std::complex<float>* blockB,
2553 const Index accRows = quad_traits<float>::rows;
2554 const Index accCols = quad_traits<float>::size;
2555 void (*gemm_function)(
const DataMapper&,
const float*,
const std::complex<float>*,
2557 #ifdef EIGEN_ALTIVEC_MMA_ONLY
2559 gemm_function = &Eigen::internal::gemm_complexMMA<float, std::complex<float>, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
true,
false>;
2560 #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
2561 if (__builtin_cpu_supports (
"arch_3_1") && __builtin_cpu_supports (
"mma")){
2562 gemm_function = &Eigen::internal::gemm_complexMMA<float, std::complex<float>, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
true,
false>;
2565 gemm_function = &Eigen::internal::gemm_complex<float, std::complex<float>, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
true,
false>;
2568 gemm_function = &Eigen::internal::gemm_complex<float, std::complex<float>, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
true,
false>;
2570 gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
2573template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2574struct gebp_kernel<std::complex<float>, float,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2576 typedef Packet4f Packet;
2577 typedef Packet2cf Packetc;
2578 typedef Packet4f RhsPacket;
2580 void operator()(
const DataMapper& res,
const std::complex<float>* blockA,
const float* blockB,
2585template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2586void gebp_kernel<std::complex<float>, float,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2587 ::operator()(
const DataMapper& res,
const std::complex<float>* blockA,
const float* blockB,
2591 const Index accRows = quad_traits<float>::rows;
2592 const Index accCols = quad_traits<float>::size;
2593 void (*gemm_function)(
const DataMapper&,
const std::complex<float>*,
const float*,
2595 #ifdef EIGEN_ALTIVEC_MMA_ONLY
2597 gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, float, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
true>;
2598 #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
2599 if (__builtin_cpu_supports (
"arch_3_1") && __builtin_cpu_supports (
"mma")){
2600 gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<float>, float, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
true>;
2603 gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, float, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
true>;
2606 gemm_function = &Eigen::internal::gemm_complex<std::complex<float>, float, std::complex<float>, float,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
true>;
2608 gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
2611template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2612struct gebp_kernel<double, double,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2614 typedef typename quad_traits<double>::vectortype Packet;
2615 typedef typename quad_traits<double>::rhstype RhsPacket;
2617 void operator()(
const DataMapper& res,
const double* blockA,
const double* blockB,
2622template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2623void gebp_kernel<double, double, Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2624 ::operator()(
const DataMapper& res,
const double* blockA,
const double* blockB,
2628 const Index accRows = quad_traits<double>::rows;
2629 const Index accCols = quad_traits<double>::size;
2630 void (*gemm_function)(
const DataMapper&,
const double*,
const double*,
Index,
Index,
Index, double,
Index,
Index,
Index,
Index);
2632 #ifdef EIGEN_ALTIVEC_MMA_ONLY
2634 gemm_function = &Eigen::internal::gemmMMA<double, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
2635 #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
2636 if (__builtin_cpu_supports (
"arch_3_1") && __builtin_cpu_supports (
"mma")){
2637 gemm_function = &Eigen::internal::gemmMMA<double, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
2640 gemm_function = &Eigen::internal::gemm<double, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
2643 gemm_function = &Eigen::internal::gemm<double, Index, Packet, RhsPacket, DataMapper, accRows, accCols>;
2645 gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
2648template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2649struct gebp_kernel<std::complex<double>, std::complex<double>,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2651 typedef quad_traits<double>::vectortype Packet;
2652 typedef Packet1cd Packetc;
2653 typedef quad_traits<double>::rhstype RhsPacket;
2655 void operator()(
const DataMapper& res,
const std::complex<double>* blockA,
const std::complex<double>* blockB,
2660template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2661void gebp_kernel<std::complex<double>, std::complex<double>,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2662 ::operator()(
const DataMapper& res,
const std::complex<double>* blockA,
const std::complex<double>* blockB,
2666 const Index accRows = quad_traits<double>::rows;
2667 const Index accCols = quad_traits<double>::size;
2668 void (*gemm_function)(
const DataMapper&,
const std::complex<double>*,
const std::complex<double>*,
2670 #ifdef EIGEN_ALTIVEC_MMA_ONLY
2672 gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, std::complex<double>, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
false>;
2673 #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
2674 if (__builtin_cpu_supports (
"arch_3_1") && __builtin_cpu_supports (
"mma")){
2675 gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, std::complex<double>, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
false>;
2678 gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, std::complex<double>, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
false>;
2681 gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, std::complex<double>, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
false>;
2683 gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
2686template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2687struct gebp_kernel<std::complex<double>, double,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2689 typedef quad_traits<double>::vectortype Packet;
2690 typedef Packet1cd Packetc;
2691 typedef quad_traits<double>::rhstype RhsPacket;
2693 void operator()(
const DataMapper& res,
const std::complex<double>* blockA,
const double* blockB,
2698template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2699void gebp_kernel<std::complex<double>, double,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2700 ::operator()(
const DataMapper& res,
const std::complex<double>* blockA,
const double* blockB,
2704 const Index accRows = quad_traits<double>::rows;
2705 const Index accCols = quad_traits<double>::size;
2706 void (*gemm_function)(
const DataMapper&,
const std::complex<double>*,
const double*,
2708 #ifdef EIGEN_ALTIVEC_MMA_ONLY
2710 gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, double, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
true>;
2711 #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
2712 if (__builtin_cpu_supports (
"arch_3_1") && __builtin_cpu_supports (
"mma")){
2713 gemm_function = &Eigen::internal::gemm_complexMMA<std::complex<double>, double, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
true>;
2716 gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, double, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
true>;
2719 gemm_function = &Eigen::internal::gemm_complex<std::complex<double>, double, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
false,
true>;
2721 gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
2724template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2725struct gebp_kernel<double, std::complex<double>,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2727 typedef quad_traits<double>::vectortype Packet;
2728 typedef Packet1cd Packetc;
2729 typedef quad_traits<double>::rhstype RhsPacket;
2731 void operator()(
const DataMapper& res,
const double* blockA,
const std::complex<double>* blockB,
2736template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2737void gebp_kernel<double, std::complex<double>,
Index, DataMapper, mr, nr, ConjugateLhs, ConjugateRhs>
2738 ::operator()(
const DataMapper& res,
const double* blockA,
const std::complex<double>* blockB,
2742 const Index accRows = quad_traits<double>::rows;
2743 const Index accCols = quad_traits<double>::size;
2744 void (*gemm_function)(
const DataMapper&,
const double*,
const std::complex<double>*,
2746 #ifdef EIGEN_ALTIVEC_MMA_ONLY
2748 gemm_function = &Eigen::internal::gemm_complexMMA<double, std::complex<double>, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
true,
false>;
2749 #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
2750 if (__builtin_cpu_supports (
"arch_3_1") && __builtin_cpu_supports (
"mma")){
2751 gemm_function = &Eigen::internal::gemm_complexMMA<double, std::complex<double>, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
true,
false>;
2754 gemm_function = &Eigen::internal::gemm_complex<double, std::complex<double>, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
true,
false>;
2757 gemm_function = &Eigen::internal::gemm_complex<double, std::complex<double>, std::complex<double>, double,
Index, Packet, Packetc, RhsPacket, DataMapper, accRows, accCols, ConjugateLhs, ConjugateRhs,
true,
false>;
2759 gemm_function(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB);
@ ColMajor
Definition: Constants.h:319
@ RowMajor
Definition: Constants.h:321
Namespace containing all symbols from the Eigen library.
Definition: Core:141
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_real_op< typename Derived::Scalar >, const Derived > real(const Eigen::ArrayBase< Derived > &x)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_imag_op< typename Derived::Scalar >, const Derived > imag(const Eigen::ArrayBase< Derived > &x)