diff options
author | sotech117 <michael_foiani@brown.edu> | 2024-04-09 03:14:17 -0400 |
---|---|---|
committer | sotech117 <michael_foiani@brown.edu> | 2024-04-09 03:14:17 -0400 |
commit | 7a8d0d8bc2572707c9d35006f30ea835c86954b0 (patch) | |
tree | dedb9a65c1698202ad485378b4186b667008abe5 /Eigen/src/Core/util | |
parent | 818324678bd5dca790c57048e5012d2937a4b5e5 (diff) |
first draft to generate waves
Diffstat (limited to 'Eigen/src/Core/util')
-rwxr-xr-x | Eigen/src/Core/util/BlasUtil.h | 583 | ||||
-rw-r--r-- | Eigen/src/Core/util/ConfigureVectorization.h | 512 | ||||
-rw-r--r-- | Eigen/src/Core/util/Constants.h | 563 | ||||
-rwxr-xr-x | Eigen/src/Core/util/DisableStupidWarnings.h | 106 | ||||
-rw-r--r-- | Eigen/src/Core/util/ForwardDeclarations.h | 322 | ||||
-rw-r--r-- | Eigen/src/Core/util/IndexedViewHelper.h | 186 | ||||
-rw-r--r-- | Eigen/src/Core/util/IntegralConstant.h | 272 | ||||
-rwxr-xr-x | Eigen/src/Core/util/MKL_support.h | 137 | ||||
-rw-r--r-- | Eigen/src/Core/util/Macros.h | 1464 | ||||
-rw-r--r-- | Eigen/src/Core/util/Memory.h | 1163 | ||||
-rwxr-xr-x | Eigen/src/Core/util/Meta.h | 812 | ||||
-rw-r--r-- | Eigen/src/Core/util/NonMPL2.h | 3 | ||||
-rw-r--r-- | Eigen/src/Core/util/ReenableStupidWarnings.h | 31 | ||||
-rw-r--r-- | Eigen/src/Core/util/ReshapedHelper.h | 51 | ||||
-rw-r--r-- | Eigen/src/Core/util/StaticAssert.h | 221 | ||||
-rw-r--r-- | Eigen/src/Core/util/SymbolicIndex.h | 293 | ||||
-rw-r--r-- | Eigen/src/Core/util/XprHelper.h | 856 |
17 files changed, 7575 insertions, 0 deletions
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h new file mode 100755 index 0000000..e16a564 --- /dev/null +++ b/Eigen/src/Core/util/BlasUtil.h @@ -0,0 +1,583 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BLASUTIL_H +#define EIGEN_BLASUTIL_H + +// This file contains many lightweight helper classes used to +// implement and control fast level 2 and level 3 BLAS-like routines. + +namespace Eigen { + +namespace internal { + +// forward declarations +template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false> +struct gebp_kernel; + +template<typename Scalar, typename Index, typename DataMapper, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false> +struct gemm_pack_rhs; + +template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, int StorageOrder, bool Conjugate = false, bool PanelMode = false> +struct gemm_pack_lhs; + +template< + typename Index, + typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, + typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, + int ResStorageOrder, int ResInnerStride> +struct general_matrix_matrix_product; + +template<typename Index, + typename LhsScalar, typename LhsMapper, int LhsStorageOrder, bool ConjugateLhs, + typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version=Specialized> +struct general_matrix_vector_product; + +template<typename From,typename To> struct get_factor { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return To(x); } +}; + +template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return numext::real(x); } +}; + + +template<typename Scalar, typename Index> +class BlasVectorMapper { + public: + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {} + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { + return m_data[i]; + } + template <typename Packet, int AlignmentType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet load(Index i) const { + return ploadt<Packet, AlignmentType>(m_data + i); + } + + template <typename Packet> + EIGEN_DEVICE_FUNC bool aligned(Index i) const { + return (UIntPtr(m_data+i)%sizeof(Packet))==0; + } + + protected: + Scalar* m_data; +}; + +template<typename Scalar, typename Index, int AlignmentType, int Incr=1> +class BlasLinearMapper; + +template<typename Scalar, typename Index, int AlignmentType> +class BlasLinearMapper<Scalar,Index,AlignmentType> +{ +public: + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data, Index incr=1) + : m_data(data) + { + EIGEN_ONLY_USED_FOR_DEBUG(incr); + eigen_assert(incr==1); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const { + internal::prefetch(&operator()(i)); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const { + return m_data[i]; + } + + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i) const { + return ploadt<PacketType, AlignmentType>(m_data + i); + } + + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const { + pstoret<Scalar, PacketType, AlignmentType>(m_data + i, p); + } + +protected: + Scalar *m_data; +}; + +// Lightweight helper class to access matrix coefficients. +template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned, int Incr = 1> +class blas_data_mapper; + +// TMP to help PacketBlock store implementation. +// There's currently no known use case for PacketBlock load. +// The default implementation assumes ColMajor order. +// It always store each packet sequentially one `stride` apart. +template<typename Index, typename Scalar, typename Packet, int n, int idx, int StorageOrder> +struct PacketBlockManagement +{ + PacketBlockManagement<Index, Scalar, Packet, n, idx - 1, StorageOrder> pbm; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const { + pbm.store(to, stride, i, j, block); + pstoreu<Scalar>(to + i + (j + idx)*stride, block.packet[idx]); + } +}; + +// PacketBlockManagement specialization to take care of RowMajor order without ifs. +template<typename Index, typename Scalar, typename Packet, int n, int idx> +struct PacketBlockManagement<Index, Scalar, Packet, n, idx, RowMajor> +{ + PacketBlockManagement<Index, Scalar, Packet, n, idx - 1, RowMajor> pbm; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const { + pbm.store(to, stride, i, j, block); + pstoreu<Scalar>(to + j + (i + idx)*stride, block.packet[idx]); + } +}; + +template<typename Index, typename Scalar, typename Packet, int n, int StorageOrder> +struct PacketBlockManagement<Index, Scalar, Packet, n, -1, StorageOrder> +{ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const { + EIGEN_UNUSED_VARIABLE(to); + EIGEN_UNUSED_VARIABLE(stride); + EIGEN_UNUSED_VARIABLE(i); + EIGEN_UNUSED_VARIABLE(j); + EIGEN_UNUSED_VARIABLE(block); + } +}; + +template<typename Index, typename Scalar, typename Packet, int n> +struct PacketBlockManagement<Index, Scalar, Packet, n, -1, RowMajor> +{ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const { + EIGEN_UNUSED_VARIABLE(to); + EIGEN_UNUSED_VARIABLE(stride); + EIGEN_UNUSED_VARIABLE(i); + EIGEN_UNUSED_VARIABLE(j); + EIGEN_UNUSED_VARIABLE(block); + } +}; + +template<typename Scalar, typename Index, int StorageOrder, int AlignmentType> +class blas_data_mapper<Scalar,Index,StorageOrder,AlignmentType,1> +{ +public: + typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper; + typedef BlasVectorMapper<Scalar, Index> VectorMapper; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr=1) + : m_data(data), m_stride(stride) + { + EIGEN_ONLY_USED_FOR_DEBUG(incr); + eigen_assert(incr==1); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType> + getSubMapper(Index i, Index j) const { + return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { + return LinearMapper(&operator()(i, j)); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { + return VectorMapper(&operator()(i, j)); + } + + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const { + return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; + } + + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i, Index j) const { + return ploadt<PacketType, AlignmentType>(&operator()(i, j)); + } + + template <typename PacketT, int AlignmentT> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const { + return ploadt<PacketT, AlignmentT>(&operator()(i, j)); + } + + template<typename SubPacket> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const { + pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride); + } + + template<typename SubPacket> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const { + return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride); + } + + EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; } + EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; } + + EIGEN_DEVICE_FUNC Index firstAligned(Index size) const { + if (UIntPtr(m_data)%sizeof(Scalar)) { + return -1; + } + return internal::first_default_aligned(m_data, size); + } + + template<typename SubPacket, int n> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketBlock(Index i, Index j, const PacketBlock<SubPacket, n> &block) const { + PacketBlockManagement<Index, Scalar, SubPacket, n, n-1, StorageOrder> pbm; + pbm.store(m_data, m_stride, i, j, block); + } +protected: + Scalar* EIGEN_RESTRICT m_data; + const Index m_stride; +}; + +// Implementation of non-natural increment (i.e. inner-stride != 1) +// The exposed API is not complete yet compared to the Incr==1 case +// because some features makes less sense in this case. +template<typename Scalar, typename Index, int AlignmentType, int Incr> +class BlasLinearMapper +{ +public: + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data,Index incr) : m_data(data), m_incr(incr) {} + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const { + internal::prefetch(&operator()(i)); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const { + return m_data[i*m_incr.value()]; + } + + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i) const { + return pgather<Scalar,PacketType>(m_data + i*m_incr.value(), m_incr.value()); + } + + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const { + pscatter<Scalar, PacketType>(m_data + i*m_incr.value(), p, m_incr.value()); + } + +protected: + Scalar *m_data; + const internal::variable_if_dynamic<Index,Incr> m_incr; +}; + +template<typename Scalar, typename Index, int StorageOrder, int AlignmentType,int Incr> +class blas_data_mapper +{ +public: + typedef BlasLinearMapper<Scalar, Index, AlignmentType,Incr> LinearMapper; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr) : m_data(data), m_stride(stride), m_incr(incr) {} + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper + getSubMapper(Index i, Index j) const { + return blas_data_mapper(&operator()(i, j), m_stride, m_incr.value()); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { + return LinearMapper(&operator()(i, j), m_incr.value()); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const { + return m_data[StorageOrder==RowMajor ? j*m_incr.value() + i*m_stride : i*m_incr.value() + j*m_stride]; + } + + template<typename PacketType> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i, Index j) const { + return pgather<Scalar,PacketType>(&operator()(i, j),m_incr.value()); + } + + template <typename PacketT, int AlignmentT> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const { + return pgather<Scalar,PacketT>(&operator()(i, j),m_incr.value()); + } + + template<typename SubPacket> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const { + pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride); + } + + template<typename SubPacket> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const { + return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride); + } + + // storePacketBlock_helper defines a way to access values inside the PacketBlock, this is essentially required by the Complex types. + template<typename SubPacket, typename ScalarT, int n, int idx> + struct storePacketBlock_helper + { + storePacketBlock_helper<SubPacket, ScalarT, n, idx-1> spbh; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const { + spbh.store(sup, i,j,block); + for(int l = 0; l < unpacket_traits<SubPacket>::size; l++) + { + ScalarT *v = &sup->operator()(i+l, j+idx); + *v = block.packet[idx][l]; + } + } + }; + + template<typename SubPacket, int n, int idx> + struct storePacketBlock_helper<SubPacket, std::complex<float>, n, idx> + { + storePacketBlock_helper<SubPacket, std::complex<float>, n, idx-1> spbh; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const { + spbh.store(sup,i,j,block); + for(int l = 0; l < unpacket_traits<SubPacket>::size; l++) + { + std::complex<float> *v = &sup->operator()(i+l, j+idx); + v->real(block.packet[idx].v[2*l+0]); + v->imag(block.packet[idx].v[2*l+1]); + } + } + }; + + template<typename SubPacket, int n, int idx> + struct storePacketBlock_helper<SubPacket, std::complex<double>, n, idx> + { + storePacketBlock_helper<SubPacket, std::complex<double>, n, idx-1> spbh; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const { + spbh.store(sup,i,j,block); + for(int l = 0; l < unpacket_traits<SubPacket>::size; l++) + { + std::complex<double> *v = &sup->operator()(i+l, j+idx); + v->real(block.packet[idx].v[2*l+0]); + v->imag(block.packet[idx].v[2*l+1]); + } + } + }; + + template<typename SubPacket, typename ScalarT, int n> + struct storePacketBlock_helper<SubPacket, ScalarT, n, -1> + { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const { + } + }; + + template<typename SubPacket, int n> + struct storePacketBlock_helper<SubPacket, std::complex<float>, n, -1> + { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const { + } + }; + + template<typename SubPacket, int n> + struct storePacketBlock_helper<SubPacket, std::complex<double>, n, -1> + { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const { + } + }; + // This function stores a PacketBlock on m_data, this approach is really quite slow compare to Incr=1 and should be avoided when possible. + template<typename SubPacket, int n> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketBlock(Index i, Index j, const PacketBlock<SubPacket, n>&block) const { + storePacketBlock_helper<SubPacket, Scalar, n, n-1> spb; + spb.store(this, i,j,block); + } +protected: + Scalar* EIGEN_RESTRICT m_data; + const Index m_stride; + const internal::variable_if_dynamic<Index,Incr> m_incr; +}; + +// lightweight helper class to access matrix coefficients (const version) +template<typename Scalar, typename Index, int StorageOrder> +class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> { + public: + EIGEN_ALWAYS_INLINE const_blas_data_mapper(const Scalar *data, Index stride) : blas_data_mapper<const Scalar, Index, StorageOrder>(data, stride) {} + + EIGEN_ALWAYS_INLINE const_blas_data_mapper<Scalar, Index, StorageOrder> getSubMapper(Index i, Index j) const { + return const_blas_data_mapper<Scalar, Index, StorageOrder>(&(this->operator()(i, j)), this->m_stride); + } +}; + + +/* Helper class to analyze the factors of a Product expression. + * In particular it allows to pop out operator-, scalar multiples, + * and conjugate */ +template<typename XprType> struct blas_traits +{ + typedef typename traits<XprType>::Scalar Scalar; + typedef const XprType& ExtractType; + typedef XprType _ExtractType; + enum { + IsComplex = NumTraits<Scalar>::IsComplex, + IsTransposed = false, + NeedToConjugate = false, + HasUsableDirectAccess = ( (int(XprType::Flags)&DirectAccessBit) + && ( bool(XprType::IsVectorAtCompileTime) + || int(inner_stride_at_compile_time<XprType>::ret) == 1) + ) ? 1 : 0, + HasScalarFactor = false + }; + typedef typename conditional<bool(HasUsableDirectAccess), + ExtractType, + typename _ExtractType::PlainObject + >::type DirectLinearAccessType; + static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return x; } + static inline EIGEN_DEVICE_FUNC const Scalar extractScalarFactor(const XprType&) { return Scalar(1); } +}; + +// pop conjugate +template<typename Scalar, typename NestedXpr> +struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> > + : blas_traits<NestedXpr> +{ + typedef blas_traits<NestedXpr> Base; + typedef CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> XprType; + typedef typename Base::ExtractType ExtractType; + + enum { + IsComplex = NumTraits<Scalar>::IsComplex, + NeedToConjugate = Base::NeedToConjugate ? 0 : IsComplex + }; + static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); } + static inline Scalar extractScalarFactor(const XprType& x) { return conj(Base::extractScalarFactor(x.nestedExpression())); } +}; + +// pop scalar multiple +template<typename Scalar, typename NestedXpr, typename Plain> +struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> > + : blas_traits<NestedXpr> +{ + enum { + HasScalarFactor = true + }; + typedef blas_traits<NestedXpr> Base; + typedef CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> XprType; + typedef typename Base::ExtractType ExtractType; + static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); } + static inline EIGEN_DEVICE_FUNC Scalar extractScalarFactor(const XprType& x) + { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); } +}; +template<typename Scalar, typename NestedXpr, typename Plain> +struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > > + : blas_traits<NestedXpr> +{ + enum { + HasScalarFactor = true + }; + typedef blas_traits<NestedXpr> Base; + typedef CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > XprType; + typedef typename Base::ExtractType ExtractType; + static inline ExtractType extract(const XprType& x) { return Base::extract(x.lhs()); } + static inline Scalar extractScalarFactor(const XprType& x) + { return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; } +}; +template<typename Scalar, typename Plain1, typename Plain2> +struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1>, + const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain2> > > + : blas_traits<CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1> > +{}; + +// pop opposite +template<typename Scalar, typename NestedXpr> +struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> > + : blas_traits<NestedXpr> +{ + enum { + HasScalarFactor = true + }; + typedef blas_traits<NestedXpr> Base; + typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType; + typedef typename Base::ExtractType ExtractType; + static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); } + static inline Scalar extractScalarFactor(const XprType& x) + { return - Base::extractScalarFactor(x.nestedExpression()); } +}; + +// pop/push transpose +template<typename NestedXpr> +struct blas_traits<Transpose<NestedXpr> > + : blas_traits<NestedXpr> +{ + typedef typename NestedXpr::Scalar Scalar; + typedef blas_traits<NestedXpr> Base; + typedef Transpose<NestedXpr> XprType; + typedef Transpose<const typename Base::_ExtractType> ExtractType; // const to get rid of a compile error; anyway blas traits are only used on the RHS + typedef Transpose<const typename Base::_ExtractType> _ExtractType; + typedef typename conditional<bool(Base::HasUsableDirectAccess), + ExtractType, + typename ExtractType::PlainObject + >::type DirectLinearAccessType; + enum { + IsTransposed = Base::IsTransposed ? 0 : 1 + }; + static inline ExtractType extract(const XprType& x) { return ExtractType(Base::extract(x.nestedExpression())); } + static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); } +}; + +template<typename T> +struct blas_traits<const T> + : blas_traits<T> +{}; + +template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectAccess> +struct extract_data_selector { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static const typename T::Scalar* run(const T& m) + { + return blas_traits<T>::extract(m).data(); + } +}; + +template<typename T> +struct extract_data_selector<T,false> { + static typename T::Scalar* run(const T&) { return 0; } +}; + +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const typename T::Scalar* extract_data(const T& m) +{ + return extract_data_selector<T>::run(m); +} + +/** + * \c combine_scalar_factors extracts and multiplies factors from GEMM and GEMV products. + * There is a specialization for booleans + */ +template<typename ResScalar, typename Lhs, typename Rhs> +struct combine_scalar_factors_impl +{ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static ResScalar run(const Lhs& lhs, const Rhs& rhs) + { + return blas_traits<Lhs>::extractScalarFactor(lhs) * blas_traits<Rhs>::extractScalarFactor(rhs); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static ResScalar run(const ResScalar& alpha, const Lhs& lhs, const Rhs& rhs) + { + return alpha * blas_traits<Lhs>::extractScalarFactor(lhs) * blas_traits<Rhs>::extractScalarFactor(rhs); + } +}; +template<typename Lhs, typename Rhs> +struct combine_scalar_factors_impl<bool, Lhs, Rhs> +{ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(const Lhs& lhs, const Rhs& rhs) + { + return blas_traits<Lhs>::extractScalarFactor(lhs) && blas_traits<Rhs>::extractScalarFactor(rhs); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(const bool& alpha, const Lhs& lhs, const Rhs& rhs) + { + return alpha && blas_traits<Lhs>::extractScalarFactor(lhs) && blas_traits<Rhs>::extractScalarFactor(rhs); + } +}; + +template<typename ResScalar, typename Lhs, typename Rhs> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ResScalar combine_scalar_factors(const ResScalar& alpha, const Lhs& lhs, const Rhs& rhs) +{ + return combine_scalar_factors_impl<ResScalar,Lhs,Rhs>::run(alpha, lhs, rhs); +} +template<typename ResScalar, typename Lhs, typename Rhs> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ResScalar combine_scalar_factors(const Lhs& lhs, const Rhs& rhs) +{ + return combine_scalar_factors_impl<ResScalar,Lhs,Rhs>::run(lhs, rhs); +} + + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_BLASUTIL_H diff --git a/Eigen/src/Core/util/ConfigureVectorization.h b/Eigen/src/Core/util/ConfigureVectorization.h new file mode 100644 index 0000000..af4e696 --- /dev/null +++ b/Eigen/src/Core/util/ConfigureVectorization.h @@ -0,0 +1,512 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2018 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2020, Arm Limited and Contributors +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CONFIGURE_VECTORIZATION_H +#define EIGEN_CONFIGURE_VECTORIZATION_H + +//------------------------------------------------------------------------------------------ +// Static and dynamic alignment control +// +// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES +// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively. +// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not, +// a default value is automatically computed based on architecture, compiler, and OS. +// +// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX} +// to be used to declare statically aligned buffers. +//------------------------------------------------------------------------------------------ + + +/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements. + * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled, + * so that vectorization doesn't affect binary compatibility. + * + * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link + * vectorized and non-vectorized code. + * + * FIXME: this code can be cleaned up once we switch to proper C++11 only. + */ +#if (defined EIGEN_CUDACC) + #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n) + #define EIGEN_ALIGNOF(x) __alignof(x) +#elif EIGEN_HAS_ALIGNAS + #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n) + #define EIGEN_ALIGNOF(x) alignof(x) +#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM + #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) + #define EIGEN_ALIGNOF(x) __alignof(x) +#elif EIGEN_COMP_MSVC + #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n)) + #define EIGEN_ALIGNOF(x) __alignof(x) +#elif EIGEN_COMP_SUNCC + // FIXME not sure about this one: + #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) + #define EIGEN_ALIGNOF(x) __alignof(x) +#else + #error Please tell me what is the equivalent of alignas(n) and alignof(x) for your compiler +#endif + +// If the user explicitly disable vectorization, then we also disable alignment +#if defined(EIGEN_DONT_VECTORIZE) + #if defined(EIGEN_GPUCC) + // GPU code is always vectorized and requires memory alignment for + // statically allocated buffers. + #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16 + #else + #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0 + #endif +#elif defined(__AVX512F__) + // 64 bytes static alignment is preferred only if really required + #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64 +#elif defined(__AVX__) + // 32 bytes static alignment is preferred only if really required + #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32 +#else + #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16 +#endif + + +// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense +#define EIGEN_MIN_ALIGN_BYTES 16 + +// Defined the boundary (in bytes) on which the data needs to be aligned. Note +// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be +// aligned at all regardless of the value of this #define. + +#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0 +#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY. +#endif + +// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprecated +// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0 +#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN) + #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES + #undef EIGEN_MAX_STATIC_ALIGN_BYTES + #endif + #define EIGEN_MAX_STATIC_ALIGN_BYTES 0 +#endif + +#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES + + // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES + + // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable + // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always + // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in + // certain common platform (compiler+architecture combinations) to avoid these problems. + // Only static alignment is really problematic (relies on nonstandard compiler extensions), + // try to keep heap alignment even when we have to disable static alignment. + #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS) + #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 + #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6) + // Old versions of GCC on ARM, at least 4.4, were once seen to have buggy static alignment support. + // Not sure which version fixed it, hopefully it doesn't affect 4.7, which is still somewhat in use. + // 4.8 and newer seem definitely unaffected. + #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 + #else + #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0 + #endif + + // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX + #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \ + && !EIGEN_GCC3_OR_OLDER \ + && !EIGEN_COMP_SUNCC \ + && !EIGEN_OS_QNX + #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1 + #else + #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0 + #endif + + #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT + #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES + #else + #define EIGEN_MAX_STATIC_ALIGN_BYTES 0 + #endif + +#endif + +// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_STATIC_ALIGN_BYTES +#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES +#undef EIGEN_MAX_STATIC_ALIGN_BYTES +#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES +#endif + +#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT) + #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT +#endif + +// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not. +// It takes into account both the user choice to explicitly enable/disable alignment (by setting EIGEN_MAX_STATIC_ALIGN_BYTES) +// and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). +// Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used. + + +// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY +#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8) +#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16) +#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32) +#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64) +#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 +#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES) +#else +#define EIGEN_ALIGN_MAX +#endif + + +// Dynamic alignment control + +#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0 +#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN. +#endif + +#ifdef EIGEN_DONT_ALIGN + #ifdef EIGEN_MAX_ALIGN_BYTES + #undef EIGEN_MAX_ALIGN_BYTES + #endif + #define EIGEN_MAX_ALIGN_BYTES 0 +#elif !defined(EIGEN_MAX_ALIGN_BYTES) + #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES +#endif + +#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES +#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES +#else +#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES +#endif + + +#ifndef EIGEN_UNALIGNED_VECTORIZE +#define EIGEN_UNALIGNED_VECTORIZE 1 +#endif + +//---------------------------------------------------------------------- + +// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into +// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks +#if EIGEN_MAX_ALIGN_BYTES==0 + #ifndef EIGEN_DONT_VECTORIZE + #define EIGEN_DONT_VECTORIZE + #endif +#endif + + +// The following (except #include <malloc.h> and _M_IX86_FP ??) can likely be +// removed as gcc 4.1 and msvc 2008 are not supported anyways. +#if EIGEN_COMP_MSVC + #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled + #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later + // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP. + #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64 + #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER + #endif + #endif +#else + #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) ) + #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC + #endif +#endif + +#if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC)) + + #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER) + + // Defines symbols for compile-time detection of which instructions are + // used. + // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_SSE + #define EIGEN_VECTORIZE_SSE2 + + // Detect sse3/ssse3/sse4: + // gcc and icc defines __SSE3__, ... + // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you + // want to force the use of those instructions with msvc. + #ifdef __SSE3__ + #define EIGEN_VECTORIZE_SSE3 + #endif + #ifdef __SSSE3__ + #define EIGEN_VECTORIZE_SSSE3 + #endif + #ifdef __SSE4_1__ + #define EIGEN_VECTORIZE_SSE4_1 + #endif + #ifdef __SSE4_2__ + #define EIGEN_VECTORIZE_SSE4_2 + #endif + #ifdef __AVX__ + #ifndef EIGEN_USE_SYCL + #define EIGEN_VECTORIZE_AVX + #endif + #define EIGEN_VECTORIZE_SSE3 + #define EIGEN_VECTORIZE_SSSE3 + #define EIGEN_VECTORIZE_SSE4_1 + #define EIGEN_VECTORIZE_SSE4_2 + #endif + #ifdef __AVX2__ + #ifndef EIGEN_USE_SYCL + #define EIGEN_VECTORIZE_AVX2 + #define EIGEN_VECTORIZE_AVX + #endif + #define EIGEN_VECTORIZE_SSE3 + #define EIGEN_VECTORIZE_SSSE3 + #define EIGEN_VECTORIZE_SSE4_1 + #define EIGEN_VECTORIZE_SSE4_2 + #endif + #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__)) + // MSVC does not expose a switch dedicated for FMA + // For MSVC, AVX2 => FMA + #define EIGEN_VECTORIZE_FMA + #endif + #if defined(__AVX512F__) + #ifndef EIGEN_VECTORIZE_FMA + #if EIGEN_COMP_GNUC + #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638). + #else + #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638). + #endif + #endif + #ifndef EIGEN_USE_SYCL + #define EIGEN_VECTORIZE_AVX512 + #define EIGEN_VECTORIZE_AVX2 + #define EIGEN_VECTORIZE_AVX + #endif + #define EIGEN_VECTORIZE_FMA + #define EIGEN_VECTORIZE_SSE3 + #define EIGEN_VECTORIZE_SSSE3 + #define EIGEN_VECTORIZE_SSE4_1 + #define EIGEN_VECTORIZE_SSE4_2 + #ifndef EIGEN_USE_SYCL + #ifdef __AVX512DQ__ + #define EIGEN_VECTORIZE_AVX512DQ + #endif + #ifdef __AVX512ER__ + #define EIGEN_VECTORIZE_AVX512ER + #endif + #ifdef __AVX512BF16__ + #define EIGEN_VECTORIZE_AVX512BF16 + #endif + #endif + #endif + + // Disable AVX support on broken xcode versions + #if defined(__apple_build_version__) && (__apple_build_version__ == 11000033 ) && ( __MAC_OS_X_VERSION_MIN_REQUIRED == 101500 ) + // A nasty bug in the clang compiler shipped with xcode in a common compilation situation + // when XCode 11.0 and Mac deployment target macOS 10.15 is https://trac.macports.org/ticket/58776#no1 + #ifdef EIGEN_VECTORIZE_AVX + #undef EIGEN_VECTORIZE_AVX + #warning "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. " + #ifdef EIGEN_VECTORIZE_AVX2 + #undef EIGEN_VECTORIZE_AVX2 + #endif + #ifdef EIGEN_VECTORIZE_FMA + #undef EIGEN_VECTORIZE_FMA + #endif + #ifdef EIGEN_VECTORIZE_AVX512 + #undef EIGEN_VECTORIZE_AVX512 + #endif + #ifdef EIGEN_VECTORIZE_AVX512DQ + #undef EIGEN_VECTORIZE_AVX512DQ + #endif + #ifdef EIGEN_VECTORIZE_AVX512ER + #undef EIGEN_VECTORIZE_AVX512ER + #endif + #endif + // NOTE: Confirmed test failures in XCode 11.0, and XCode 11.2 with -macosx-version-min=10.15 and AVX + // NOTE using -macosx-version-min=10.15 with Xcode 11.0 results in runtime segmentation faults in many tests, 11.2 produce core dumps in 3 tests + // NOTE using -macosx-version-min=10.14 produces functioning and passing tests in all cases + // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.8)" XCode 11.0 <- Produces many segfault and core dumping tests + // with -macosx-version-min=10.15 and AVX + // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.12)" XCode 11.2 <- Produces 3 core dumping tests with + // -macosx-version-min=10.15 and AVX + #endif + + // include files + + // This extern "C" works around a MINGW-w64 compilation issue + // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354 + // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do). + // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations + // with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know; + // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too. + // notice that since these are C headers, the extern "C" is theoretically needed anyways. + extern "C" { + // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly. + // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus: + #if EIGEN_COMP_ICC >= 1110 + #include <immintrin.h> + #else + #include <mmintrin.h> + #include <emmintrin.h> + #include <xmmintrin.h> + #ifdef EIGEN_VECTORIZE_SSE3 + #include <pmmintrin.h> + #endif + #ifdef EIGEN_VECTORIZE_SSSE3 + #include <tmmintrin.h> + #endif + #ifdef EIGEN_VECTORIZE_SSE4_1 + #include <smmintrin.h> + #endif + #ifdef EIGEN_VECTORIZE_SSE4_2 + #include <nmmintrin.h> + #endif + #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512) + #include <immintrin.h> + #endif + #endif + } // end extern "C" + + #elif defined __VSX__ + + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_VSX + #include <altivec.h> + // We need to #undef all these ugly tokens defined in <altivec.h> + // => use __vector instead of vector + #undef bool + #undef vector + #undef pixel + + #elif defined __ALTIVEC__ + + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_ALTIVEC + #include <altivec.h> + // We need to #undef all these ugly tokens defined in <altivec.h> + // => use __vector instead of vector + #undef bool + #undef vector + #undef pixel + + #elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE) + + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_NEON + #include <arm_neon.h> + + // We currently require SVE to be enabled explicitly via EIGEN_ARM64_USE_SVE and + // will not select the backend automatically + #elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE) + + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_SVE + #include <arm_sve.h> + + // Since we depend on knowing SVE vector lengths at compile-time, we need + // to ensure a fixed lengths is set + #if defined __ARM_FEATURE_SVE_BITS + #define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS + #else +#error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set." +#endif + +#elif (defined __s390x__ && defined __VEC__) + +#define EIGEN_VECTORIZE +#define EIGEN_VECTORIZE_ZVECTOR +#include <vecintrin.h> + +#elif defined __mips_msa + +// Limit MSA optimizations to little-endian CPUs for now. +// TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs? +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +#if defined(__LP64__) +#define EIGEN_MIPS_64 +#else +#define EIGEN_MIPS_32 +#endif +#define EIGEN_VECTORIZE +#define EIGEN_VECTORIZE_MSA +#include <msa.h> +#endif + +#endif +#endif + +// Following the Arm ACLE arm_neon.h should also include arm_fp16.h but not all +// compilers seem to follow this. We therefore include it explicitly. +// See also: https://bugs.llvm.org/show_bug.cgi?id=47955 +#if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC) + #include <arm_fp16.h> +#endif + +#if defined(__F16C__) && (!defined(EIGEN_GPUCC) && (!defined(EIGEN_COMP_CLANG) || EIGEN_COMP_CLANG>=380)) + // We can use the optimized fp16 to float and float to fp16 conversion routines + #define EIGEN_HAS_FP16_C + + #if defined(EIGEN_COMP_CLANG) + // Workaround for clang: The FP16C intrinsics for clang are included by + // immintrin.h, as opposed to emmintrin.h as suggested by Intel: + // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=FP16C&expand=1711 + #include <immintrin.h> + #endif +#endif + +#if defined EIGEN_CUDACC + #define EIGEN_VECTORIZE_GPU + #include <vector_types.h> + #if EIGEN_CUDA_SDK_VER >= 70500 + #define EIGEN_HAS_CUDA_FP16 + #endif +#endif + +#if defined(EIGEN_HAS_CUDA_FP16) + #include <cuda_runtime_api.h> + #include <cuda_fp16.h> +#endif + +#if defined(EIGEN_HIPCC) + #define EIGEN_VECTORIZE_GPU + #include <hip/hip_vector_types.h> + #define EIGEN_HAS_HIP_FP16 + #include <hip/hip_fp16.h> +#endif + + +/** \brief Namespace containing all symbols from the %Eigen library. */ +namespace Eigen { + +inline static const char *SimdInstructionSetsInUse(void) { +#if defined(EIGEN_VECTORIZE_AVX512) + return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_AVX) + return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_SSE4_2) + return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_SSE4_1) + return "SSE, SSE2, SSE3, SSSE3, SSE4.1"; +#elif defined(EIGEN_VECTORIZE_SSSE3) + return "SSE, SSE2, SSE3, SSSE3"; +#elif defined(EIGEN_VECTORIZE_SSE3) + return "SSE, SSE2, SSE3"; +#elif defined(EIGEN_VECTORIZE_SSE2) + return "SSE, SSE2"; +#elif defined(EIGEN_VECTORIZE_ALTIVEC) + return "AltiVec"; +#elif defined(EIGEN_VECTORIZE_VSX) + return "VSX"; +#elif defined(EIGEN_VECTORIZE_NEON) + return "ARM NEON"; +#elif defined(EIGEN_VECTORIZE_SVE) + return "ARM SVE"; +#elif defined(EIGEN_VECTORIZE_ZVECTOR) + return "S390X ZVECTOR"; +#elif defined(EIGEN_VECTORIZE_MSA) + return "MIPS MSA"; +#else + return "None"; +#endif +} + +} // end namespace Eigen + + +#endif // EIGEN_CONFIGURE_VECTORIZATION_H diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h new file mode 100644 index 0000000..35dcaa7 --- /dev/null +++ b/Eigen/src/Core/util/Constants.h @@ -0,0 +1,563 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com> +// Copyright (C) 2020, Arm Limited and Contributors +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CONSTANTS_H +#define EIGEN_CONSTANTS_H + +namespace Eigen { + +/** This value means that a positive quantity (e.g., a size) is not known at compile-time, and that instead the value is + * stored in some runtime variable. + * + * Changing the value of Dynamic breaks the ABI, as Dynamic is often used as a template parameter for Matrix. + */ +const int Dynamic = -1; + +/** This value means that a signed quantity (e.g., a signed index) is not known at compile-time, and that instead its value + * has to be specified at runtime. + */ +const int DynamicIndex = 0xffffff; + +/** This value means that the increment to go from one value to another in a sequence is not constant for each step. + */ +const int UndefinedIncr = 0xfffffe; + +/** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm<int>(). + * The value Infinity there means the L-infinity norm. + */ +const int Infinity = -1; + +/** This value means that the cost to evaluate an expression coefficient is either very expensive or + * cannot be known at compile time. + * + * This value has to be positive to (1) simplify cost computation, and (2) allow to distinguish between a very expensive and very very expensive expressions. + * It thus must also be large enough to make sure unrolling won't happen and that sub expressions will be evaluated, but not too large to avoid overflow. + */ +const int HugeCost = 10000; + +/** \defgroup flags Flags + * \ingroup Core_Module + * + * These are the possible bits which can be OR'ed to constitute the flags of a matrix or + * expression. + * + * It is important to note that these flags are a purely compile-time notion. They are a compile-time property of + * an expression type, implemented as enum's. They are not stored in memory at runtime, and they do not incur any + * runtime overhead. + * + * \sa MatrixBase::Flags + */ + +/** \ingroup flags + * + * for a matrix, this means that the storage order is row-major. + * If this bit is not set, the storage order is column-major. + * For an expression, this determines the storage order of + * the matrix created by evaluation of that expression. + * \sa \blank \ref TopicStorageOrders */ +const unsigned int RowMajorBit = 0x1; + +/** \ingroup flags + * means the expression should be evaluated by the calling expression */ +const unsigned int EvalBeforeNestingBit = 0x2; + +/** \ingroup flags + * \deprecated + * means the expression should be evaluated before any assignment */ +EIGEN_DEPRECATED +const unsigned int EvalBeforeAssigningBit = 0x4; // FIXME deprecated + +/** \ingroup flags + * + * Short version: means the expression might be vectorized + * + * Long version: means that the coefficients can be handled by packets + * and start at a memory location whose alignment meets the requirements + * of the present CPU architecture for optimized packet access. In the fixed-size + * case, there is the additional condition that it be possible to access all the + * coefficients by packets (this implies the requirement that the size be a multiple of 16 bytes, + * and that any nontrivial strides don't break the alignment). In the dynamic-size case, + * there is no such condition on the total size and strides, so it might not be possible to access + * all coeffs by packets. + * + * \note This bit can be set regardless of whether vectorization is actually enabled. + * To check for actual vectorizability, see \a ActualPacketAccessBit. + */ +const unsigned int PacketAccessBit = 0x8; + +#ifdef EIGEN_VECTORIZE +/** \ingroup flags + * + * If vectorization is enabled (EIGEN_VECTORIZE is defined) this constant + * is set to the value \a PacketAccessBit. + * + * If vectorization is not enabled (EIGEN_VECTORIZE is not defined) this constant + * is set to the value 0. + */ +const unsigned int ActualPacketAccessBit = PacketAccessBit; +#else +const unsigned int ActualPacketAccessBit = 0x0; +#endif + +/** \ingroup flags + * + * Short version: means the expression can be seen as 1D vector. + * + * Long version: means that one can access the coefficients + * of this expression by coeff(int), and coeffRef(int) in the case of a lvalue expression. These + * index-based access methods are guaranteed + * to not have to do any runtime computation of a (row, col)-pair from the index, so that it + * is guaranteed that whenever it is available, index-based access is at least as fast as + * (row,col)-based access. Expressions for which that isn't possible don't have the LinearAccessBit. + * + * If both PacketAccessBit and LinearAccessBit are set, then the + * packets of this expression can be accessed by packet(int), and writePacket(int) in the case of a + * lvalue expression. + * + * Typically, all vector expressions have the LinearAccessBit, but there is one exception: + * Product expressions don't have it, because it would be troublesome for vectorization, even when the + * Product is a vector expression. Thus, vector Product expressions allow index-based coefficient access but + * not index-based packet access, so they don't have the LinearAccessBit. + */ +const unsigned int LinearAccessBit = 0x10; + +/** \ingroup flags + * + * Means the expression has a coeffRef() method, i.e. is writable as its individual coefficients are directly addressable. + * This rules out read-only expressions. + * + * Note that DirectAccessBit and LvalueBit are mutually orthogonal, as there are examples of expression having one but note + * the other: + * \li writable expressions that don't have a very simple memory layout as a strided array, have LvalueBit but not DirectAccessBit + * \li Map-to-const expressions, for example Map<const Matrix>, have DirectAccessBit but not LvalueBit + * + * Expressions having LvalueBit also have their coeff() method returning a const reference instead of returning a new value. + */ +const unsigned int LvalueBit = 0x20; + +/** \ingroup flags + * + * Means that the underlying array of coefficients can be directly accessed as a plain strided array. The memory layout + * of the array of coefficients must be exactly the natural one suggested by rows(), cols(), + * outerStride(), innerStride(), and the RowMajorBit. This rules out expressions such as Diagonal, whose coefficients, + * though referencable, do not have such a regular memory layout. + * + * See the comment on LvalueBit for an explanation of how LvalueBit and DirectAccessBit are mutually orthogonal. + */ +const unsigned int DirectAccessBit = 0x40; + +/** \deprecated \ingroup flags + * + * means the first coefficient packet is guaranteed to be aligned. + * An expression cannot have the AlignedBit without the PacketAccessBit flag. + * In other words, this means we are allow to perform an aligned packet access to the first element regardless + * of the expression kind: + * \code + * expression.packet<Aligned>(0); + * \endcode + */ +EIGEN_DEPRECATED const unsigned int AlignedBit = 0x80; + +const unsigned int NestByRefBit = 0x100; + +/** \ingroup flags + * + * for an expression, this means that the storage order + * can be either row-major or column-major. + * The precise choice will be decided at evaluation time or when + * combined with other expressions. + * \sa \blank \ref RowMajorBit, \ref TopicStorageOrders */ +const unsigned int NoPreferredStorageOrderBit = 0x200; + +/** \ingroup flags + * + * Means that the underlying coefficients can be accessed through pointers to the sparse (un)compressed storage format, + * that is, the expression provides: + * \code + inline const Scalar* valuePtr() const; + inline const Index* innerIndexPtr() const; + inline const Index* outerIndexPtr() const; + inline const Index* innerNonZeroPtr() const; + \endcode + */ +const unsigned int CompressedAccessBit = 0x400; + + +// list of flags that are inherited by default +const unsigned int HereditaryBits = RowMajorBit + | EvalBeforeNestingBit; + +/** \defgroup enums Enumerations + * \ingroup Core_Module + * + * Various enumerations used in %Eigen. Many of these are used as template parameters. + */ + +/** \ingroup enums + * Enum containing possible values for the \c Mode or \c UpLo parameter of + * MatrixBase::selfadjointView() and MatrixBase::triangularView(), and selfadjoint solvers. */ +enum UpLoType { + /** View matrix as a lower triangular matrix. */ + Lower=0x1, + /** View matrix as an upper triangular matrix. */ + Upper=0x2, + /** %Matrix has ones on the diagonal; to be used in combination with #Lower or #Upper. */ + UnitDiag=0x4, + /** %Matrix has zeros on the diagonal; to be used in combination with #Lower or #Upper. */ + ZeroDiag=0x8, + /** View matrix as a lower triangular matrix with ones on the diagonal. */ + UnitLower=UnitDiag|Lower, + /** View matrix as an upper triangular matrix with ones on the diagonal. */ + UnitUpper=UnitDiag|Upper, + /** View matrix as a lower triangular matrix with zeros on the diagonal. */ + StrictlyLower=ZeroDiag|Lower, + /** View matrix as an upper triangular matrix with zeros on the diagonal. */ + StrictlyUpper=ZeroDiag|Upper, + /** Used in BandMatrix and SelfAdjointView to indicate that the matrix is self-adjoint. */ + SelfAdjoint=0x10, + /** Used to support symmetric, non-selfadjoint, complex matrices. */ + Symmetric=0x20 +}; + +/** \ingroup enums + * Enum for indicating whether a buffer is aligned or not. */ +enum AlignmentType { + Unaligned=0, /**< Data pointer has no specific alignment. */ + Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */ + Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */ + Aligned32=32, /**< Data pointer is aligned on a 32 bytes boundary. */ + Aligned64=64, /**< Data pointer is aligned on a 64 bytes boundary. */ + Aligned128=128, /**< Data pointer is aligned on a 128 bytes boundary. */ + AlignedMask=255, + Aligned=16, /**< \deprecated Synonym for Aligned16. */ +#if EIGEN_MAX_ALIGN_BYTES==128 + AlignedMax = Aligned128 +#elif EIGEN_MAX_ALIGN_BYTES==64 + AlignedMax = Aligned64 +#elif EIGEN_MAX_ALIGN_BYTES==32 + AlignedMax = Aligned32 +#elif EIGEN_MAX_ALIGN_BYTES==16 + AlignedMax = Aligned16 +#elif EIGEN_MAX_ALIGN_BYTES==8 + AlignedMax = Aligned8 +#elif EIGEN_MAX_ALIGN_BYTES==0 + AlignedMax = Unaligned +#else +#error Invalid value for EIGEN_MAX_ALIGN_BYTES +#endif +}; + +/** \ingroup enums + * Enum containing possible values for the \p Direction parameter of + * Reverse, PartialReduxExpr and VectorwiseOp. */ +enum DirectionType { + /** For Reverse, all columns are reversed; + * for PartialReduxExpr and VectorwiseOp, act on columns. */ + Vertical, + /** For Reverse, all rows are reversed; + * for PartialReduxExpr and VectorwiseOp, act on rows. */ + Horizontal, + /** For Reverse, both rows and columns are reversed; + * not used for PartialReduxExpr and VectorwiseOp. */ + BothDirections +}; + +/** \internal \ingroup enums + * Enum to specify how to traverse the entries of a matrix. */ +enum TraversalType { + /** \internal Default traversal, no vectorization, no index-based access */ + DefaultTraversal, + /** \internal No vectorization, use index-based access to have only one for loop instead of 2 nested loops */ + LinearTraversal, + /** \internal Equivalent to a slice vectorization for fixed-size matrices having good alignment + * and good size */ + InnerVectorizedTraversal, + /** \internal Vectorization path using a single loop plus scalar loops for the + * unaligned boundaries */ + LinearVectorizedTraversal, + /** \internal Generic vectorization path using one vectorized loop per row/column with some + * scalar loops to handle the unaligned boundaries */ + SliceVectorizedTraversal, + /** \internal Special case to properly handle incompatible scalar types or other defecting cases*/ + InvalidTraversal, + /** \internal Evaluate all entries at once */ + AllAtOnceTraversal +}; + +/** \internal \ingroup enums + * Enum to specify whether to unroll loops when traversing over the entries of a matrix. */ +enum UnrollingType { + /** \internal Do not unroll loops. */ + NoUnrolling, + /** \internal Unroll only the inner loop, but not the outer loop. */ + InnerUnrolling, + /** \internal Unroll both the inner and the outer loop. If there is only one loop, + * because linear traversal is used, then unroll that loop. */ + CompleteUnrolling +}; + +/** \internal \ingroup enums + * Enum to specify whether to use the default (built-in) implementation or the specialization. */ +enum SpecializedType { + Specialized, + BuiltIn +}; + +/** \ingroup enums + * Enum containing possible values for the \p _Options template parameter of + * Matrix, Array and BandMatrix. */ +enum StorageOptions { + /** Storage order is column major (see \ref TopicStorageOrders). */ + ColMajor = 0, + /** Storage order is row major (see \ref TopicStorageOrders). */ + RowMajor = 0x1, // it is only a coincidence that this is equal to RowMajorBit -- don't rely on that + /** Align the matrix itself if it is vectorizable fixed-size */ + AutoAlign = 0, + /** Don't require alignment for the matrix itself (the array of coefficients, if dynamically allocated, may still be requested to be aligned) */ // FIXME --- clarify the situation + DontAlign = 0x2 +}; + +/** \ingroup enums + * Enum for specifying whether to apply or solve on the left or right. */ +enum SideType { + /** Apply transformation on the left. */ + OnTheLeft = 1, + /** Apply transformation on the right. */ + OnTheRight = 2 +}; + +/** \ingroup enums + * Enum for specifying NaN-propagation behavior, e.g. for coeff-wise min/max. */ +enum NaNPropagationOptions { + /** Implementation defined behavior if NaNs are present. */ + PropagateFast = 0, + /** Always propagate NaNs. */ + PropagateNaN, + /** Always propagate not-NaNs. */ + PropagateNumbers +}; + +/* the following used to be written as: + * + * struct NoChange_t {}; + * namespace { + * EIGEN_UNUSED NoChange_t NoChange; + * } + * + * on the ground that it feels dangerous to disambiguate overloaded functions on enum/integer types. + * However, this leads to "variable declared but never referenced" warnings on Intel Composer XE, + * and we do not know how to get rid of them (bug 450). + */ + +enum NoChange_t { NoChange }; +enum Sequential_t { Sequential }; +enum Default_t { Default }; + +/** \internal \ingroup enums + * Used in AmbiVector. */ +enum AmbiVectorMode { + IsDense = 0, + IsSparse +}; + +/** \ingroup enums + * Used as template parameter in DenseCoeffBase and MapBase to indicate + * which accessors should be provided. */ +enum AccessorLevels { + /** Read-only access via a member function. */ + ReadOnlyAccessors, + /** Read/write access via member functions. */ + WriteAccessors, + /** Direct read-only access to the coefficients. */ + DirectAccessors, + /** Direct read/write access to the coefficients. */ + DirectWriteAccessors +}; + +/** \ingroup enums + * Enum with options to give to various decompositions. */ +enum DecompositionOptions { + /** \internal Not used (meant for LDLT?). */ + Pivoting = 0x01, + /** \internal Not used (meant for LDLT?). */ + NoPivoting = 0x02, + /** Used in JacobiSVD to indicate that the square matrix U is to be computed. */ + ComputeFullU = 0x04, + /** Used in JacobiSVD to indicate that the thin matrix U is to be computed. */ + ComputeThinU = 0x08, + /** Used in JacobiSVD to indicate that the square matrix V is to be computed. */ + ComputeFullV = 0x10, + /** Used in JacobiSVD to indicate that the thin matrix V is to be computed. */ + ComputeThinV = 0x20, + /** Used in SelfAdjointEigenSolver and GeneralizedSelfAdjointEigenSolver to specify + * that only the eigenvalues are to be computed and not the eigenvectors. */ + EigenvaluesOnly = 0x40, + /** Used in SelfAdjointEigenSolver and GeneralizedSelfAdjointEigenSolver to specify + * that both the eigenvalues and the eigenvectors are to be computed. */ + ComputeEigenvectors = 0x80, + /** \internal */ + EigVecMask = EigenvaluesOnly | ComputeEigenvectors, + /** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should + * solve the generalized eigenproblem \f$ Ax = \lambda B x \f$. */ + Ax_lBx = 0x100, + /** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should + * solve the generalized eigenproblem \f$ ABx = \lambda x \f$. */ + ABx_lx = 0x200, + /** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should + * solve the generalized eigenproblem \f$ BAx = \lambda x \f$. */ + BAx_lx = 0x400, + /** \internal */ + GenEigMask = Ax_lBx | ABx_lx | BAx_lx +}; + +/** \ingroup enums + * Possible values for the \p QRPreconditioner template parameter of JacobiSVD. */ +enum QRPreconditioners { + /** Do not specify what is to be done if the SVD of a non-square matrix is asked for. */ + NoQRPreconditioner, + /** Use a QR decomposition without pivoting as the first step. */ + HouseholderQRPreconditioner, + /** Use a QR decomposition with column pivoting as the first step. */ + ColPivHouseholderQRPreconditioner, + /** Use a QR decomposition with full pivoting as the first step. */ + FullPivHouseholderQRPreconditioner +}; + +#ifdef Success +#error The preprocessor symbol 'Success' is defined, possibly by the X11 header file X.h +#endif + +/** \ingroup enums + * Enum for reporting the status of a computation. */ +enum ComputationInfo { + /** Computation was successful. */ + Success = 0, + /** The provided data did not satisfy the prerequisites. */ + NumericalIssue = 1, + /** Iterative procedure did not converge. */ + NoConvergence = 2, + /** The inputs are invalid, or the algorithm has been improperly called. + * When assertions are enabled, such errors trigger an assert. */ + InvalidInput = 3 +}; + +/** \ingroup enums + * Enum used to specify how a particular transformation is stored in a matrix. + * \sa Transform, Hyperplane::transform(). */ +enum TransformTraits { + /** Transformation is an isometry. */ + Isometry = 0x1, + /** Transformation is an affine transformation stored as a (Dim+1)^2 matrix whose last row is + * assumed to be [0 ... 0 1]. */ + Affine = 0x2, + /** Transformation is an affine transformation stored as a (Dim) x (Dim+1) matrix. */ + AffineCompact = 0x10 | Affine, + /** Transformation is a general projective transformation stored as a (Dim+1)^2 matrix. */ + Projective = 0x20 +}; + +/** \internal \ingroup enums + * Enum used to choose between implementation depending on the computer architecture. */ +namespace Architecture +{ + enum Type { + Generic = 0x0, + SSE = 0x1, + AltiVec = 0x2, + VSX = 0x3, + NEON = 0x4, + MSA = 0x5, + SVE = 0x6, +#if defined EIGEN_VECTORIZE_SSE + Target = SSE +#elif defined EIGEN_VECTORIZE_ALTIVEC + Target = AltiVec +#elif defined EIGEN_VECTORIZE_VSX + Target = VSX +#elif defined EIGEN_VECTORIZE_NEON + Target = NEON +#elif defined EIGEN_VECTORIZE_SVE + Target = SVE +#elif defined EIGEN_VECTORIZE_MSA + Target = MSA +#else + Target = Generic +#endif + }; +} + +/** \internal \ingroup enums + * Enum used as template parameter in Product and product evaluators. */ +enum ProductImplType +{ DefaultProduct=0, LazyProduct, AliasFreeProduct, CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct }; + +/** \internal \ingroup enums + * Enum used in experimental parallel implementation. */ +enum Action {GetAction, SetAction}; + +/** The type used to identify a dense storage. */ +struct Dense {}; + +/** The type used to identify a general sparse storage. */ +struct Sparse {}; + +/** The type used to identify a general solver (factored) storage. */ +struct SolverStorage {}; + +/** The type used to identify a permutation storage. */ +struct PermutationStorage {}; + +/** The type used to identify a permutation storage. */ +struct TranspositionsStorage {}; + +/** The type used to identify a matrix expression */ +struct MatrixXpr {}; + +/** The type used to identify an array expression */ +struct ArrayXpr {}; + +// An evaluator must define its shape. By default, it can be one of the following: +struct DenseShape { static std::string debugName() { return "DenseShape"; } }; +struct SolverShape { static std::string debugName() { return "SolverShape"; } }; +struct HomogeneousShape { static std::string debugName() { return "HomogeneousShape"; } }; +struct DiagonalShape { static std::string debugName() { return "DiagonalShape"; } }; +struct BandShape { static std::string debugName() { return "BandShape"; } }; +struct TriangularShape { static std::string debugName() { return "TriangularShape"; } }; +struct SelfAdjointShape { static std::string debugName() { return "SelfAdjointShape"; } }; +struct PermutationShape { static std::string debugName() { return "PermutationShape"; } }; +struct TranspositionsShape { static std::string debugName() { return "TranspositionsShape"; } }; +struct SparseShape { static std::string debugName() { return "SparseShape"; } }; + +namespace internal { + + // random access iterators based on coeff*() accessors. +struct IndexBased {}; + +// evaluator based on iterators to access coefficients. +struct IteratorBased {}; + +/** \internal + * Constants for comparison functors + */ +enum ComparisonName { + cmp_EQ = 0, + cmp_LT = 1, + cmp_LE = 2, + cmp_UNORD = 3, + cmp_NEQ = 4, + cmp_GT = 5, + cmp_GE = 6 +}; +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_CONSTANTS_H diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h new file mode 100755 index 0000000..fe0cfec --- /dev/null +++ b/Eigen/src/Core/util/DisableStupidWarnings.h @@ -0,0 +1,106 @@ +#ifndef EIGEN_WARNINGS_DISABLED +#define EIGEN_WARNINGS_DISABLED + +#ifdef _MSC_VER + // 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p)) + // 4101 - unreferenced local variable + // 4181 - qualifier applied to reference type ignored + // 4211 - nonstandard extension used : redefined extern to static + // 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data + // 4273 - QtAlignedMalloc, inconsistent DLL linkage + // 4324 - structure was padded due to declspec(align()) + // 4503 - decorated name length exceeded, name was truncated + // 4512 - assignment operator could not be generated + // 4522 - 'class' : multiple assignment operators specified + // 4700 - uninitialized local variable 'xyz' used + // 4714 - function marked as __forceinline not inlined + // 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow + // 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning) + #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS + #pragma warning( push ) + #endif + #pragma warning( disable : 4100 4101 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800) + +#elif defined __INTEL_COMPILER + // 2196 - routine is both "inline" and "noinline" ("noinline" assumed) + // ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body + // typedef that may be a reference type. + // 279 - controlling expression is constant + // ICC 12 generates this warning on assert(constant_expression_depending_on_template_params) and frankly this is a legitimate use case. + // 1684 - conversion from pointer to same-sized integral type (potential portability problem) + // 2259 - non-pointer conversion from "Eigen::Index={ptrdiff_t={long}}" to "int" may lose significant bits + #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS + #pragma warning push + #endif + #pragma warning disable 2196 279 1684 2259 + +#elif defined __clang__ + // -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant + // this is really a stupid warning as it warns on compile-time expressions involving enums + #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS + #pragma clang diagnostic push + #endif + #pragma clang diagnostic ignored "-Wconstant-logical-operand" + #if __clang_major__ >= 3 && __clang_minor__ >= 5 + #pragma clang diagnostic ignored "-Wabsolute-value" + #endif + #if __clang_major__ >= 10 + #pragma clang diagnostic ignored "-Wimplicit-int-float-conversion" + #endif + #if ( defined(__ALTIVEC__) || defined(__VSX__) ) && __cplusplus < 201103L + // warning: generic selections are a C11-specific feature + // ignoring warnings thrown at vec_ctf in Altivec/PacketMath.h + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + +#elif defined __GNUC__ + + #if (!defined(EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS)) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) + #pragma GCC diagnostic push + #endif + // g++ warns about local variables shadowing member functions, which is too strict + #pragma GCC diagnostic ignored "-Wshadow" + #if __GNUC__ == 4 && __GNUC_MINOR__ < 8 + // Until g++-4.7 there are warnings when comparing unsigned int vs 0, even in templated functions: + #pragma GCC diagnostic ignored "-Wtype-limits" + #endif + #if __GNUC__>=6 + #pragma GCC diagnostic ignored "-Wignored-attributes" + #endif + #if __GNUC__==7 + // See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89325 + #pragma GCC diagnostic ignored "-Wattributes" + #endif +#endif + +#if defined __NVCC__ + #pragma diag_suppress boolean_controlling_expr_is_constant + // Disable the "statement is unreachable" message + #pragma diag_suppress code_is_unreachable + // Disable the "dynamic initialization in unreachable code" message + #pragma diag_suppress initialization_not_reachable + // Disable the "invalid error number" message that we get with older versions of nvcc + #pragma diag_suppress 1222 + // Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are many of them and they seem to change with every version of the compiler) + #pragma diag_suppress 2527 + #pragma diag_suppress 2529 + #pragma diag_suppress 2651 + #pragma diag_suppress 2653 + #pragma diag_suppress 2668 + #pragma diag_suppress 2669 + #pragma diag_suppress 2670 + #pragma diag_suppress 2671 + #pragma diag_suppress 2735 + #pragma diag_suppress 2737 + #pragma diag_suppress 2739 +#endif + +#else +// warnings already disabled: +# ifndef EIGEN_WARNINGS_DISABLED_2 +# define EIGEN_WARNINGS_DISABLED_2 +# elif defined(EIGEN_INTERNAL_DEBUGGING) +# error "Do not include \"DisableStupidWarnings.h\" recursively more than twice!" +# endif + +#endif // not EIGEN_WARNINGS_DISABLED diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h new file mode 100644 index 0000000..2f9cc44 --- /dev/null +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -0,0 +1,322 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com> +// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FORWARDDECLARATIONS_H +#define EIGEN_FORWARDDECLARATIONS_H + +namespace Eigen { +namespace internal { + +template<typename T> struct traits; + +// here we say once and for all that traits<const T> == traits<T> +// When constness must affect traits, it has to be constness on template parameters on which T itself depends. +// For example, traits<Map<const T> > != traits<Map<T> >, but +// traits<const Map<T> > == traits<Map<T> > +template<typename T> struct traits<const T> : traits<T> {}; + +template<typename Derived> struct has_direct_access +{ + enum { ret = (traits<Derived>::Flags & DirectAccessBit) ? 1 : 0 }; +}; + +template<typename Derived> struct accessors_level +{ + enum { has_direct_access = (traits<Derived>::Flags & DirectAccessBit) ? 1 : 0, + has_write_access = (traits<Derived>::Flags & LvalueBit) ? 1 : 0, + value = has_direct_access ? (has_write_access ? DirectWriteAccessors : DirectAccessors) + : (has_write_access ? WriteAccessors : ReadOnlyAccessors) + }; +}; + +template<typename T> struct evaluator_traits; + +template< typename T> struct evaluator; + +} // end namespace internal + +template<typename T> struct NumTraits; + +template<typename Derived> struct EigenBase; +template<typename Derived> class DenseBase; +template<typename Derived> class PlainObjectBase; +template<typename Derived, int Level> class DenseCoeffsBase; + +template<typename _Scalar, int _Rows, int _Cols, + int _Options = AutoAlign | +#if EIGEN_GNUC_AT(3,4) + // workaround a bug in at least gcc 3.4.6 + // the innermost ?: ternary operator is misparsed. We write it slightly + // differently and this makes gcc 3.4.6 happy, but it's ugly. + // The error would only show up with EIGEN_DEFAULT_TO_ROW_MAJOR is defined + // (when EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION is RowMajor) + ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor + : !(_Cols==1 && _Rows!=1) ? EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION + : Eigen::ColMajor ), +#else + ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor + : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor + : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ), +#endif + int _MaxRows = _Rows, + int _MaxCols = _Cols +> class Matrix; + +template<typename Derived> class MatrixBase; +template<typename Derived> class ArrayBase; + +template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged; +template<typename ExpressionType, template <typename> class StorageBase > class NoAlias; +template<typename ExpressionType> class NestByValue; +template<typename ExpressionType> class ForceAlignedAccess; +template<typename ExpressionType> class SwapWrapper; + +template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false> class Block; +template<typename XprType, typename RowIndices, typename ColIndices> class IndexedView; +template<typename XprType, int Rows=Dynamic, int Cols=Dynamic, int Order=0> class Reshaped; + +template<typename MatrixType, int Size=Dynamic> class VectorBlock; +template<typename MatrixType> class Transpose; +template<typename MatrixType> class Conjugate; +template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp; +template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp; +template<typename ViewOp, typename MatrixType> class CwiseUnaryView; +template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp; +template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3> class CwiseTernaryOp; +template<typename Decomposition, typename Rhstype> class Solve; +template<typename XprType> class Inverse; + +template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product; + +template<typename Derived> class DiagonalBase; +template<typename _DiagonalVectorType> class DiagonalWrapper; +template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix; +template<typename MatrixType, typename DiagonalType, int ProductOrder> class DiagonalProduct; +template<typename MatrixType, int Index = 0> class Diagonal; +template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class PermutationMatrix; +template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class Transpositions; +template<typename Derived> class PermutationBase; +template<typename Derived> class TranspositionsBase; +template<typename _IndicesType> class PermutationWrapper; +template<typename _IndicesType> class TranspositionsWrapper; + +template<typename Derived, + int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors +> class MapBase; +template<int OuterStrideAtCompileTime, int InnerStrideAtCompileTime> class Stride; +template<int Value = Dynamic> class InnerStride; +template<int Value = Dynamic> class OuterStride; +template<typename MatrixType, int MapOptions=Unaligned, typename StrideType = Stride<0,0> > class Map; +template<typename Derived> class RefBase; +template<typename PlainObjectType, int Options = 0, + typename StrideType = typename internal::conditional<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> >::type > class Ref; + +template<typename Derived> class TriangularBase; +template<typename MatrixType, unsigned int Mode> class TriangularView; +template<typename MatrixType, unsigned int Mode> class SelfAdjointView; +template<typename MatrixType> class SparseView; +template<typename ExpressionType> class WithFormat; +template<typename MatrixType> struct CommaInitializer; +template<typename Derived> class ReturnByValue; +template<typename ExpressionType> class ArrayWrapper; +template<typename ExpressionType> class MatrixWrapper; +template<typename Derived> class SolverBase; +template<typename XprType> class InnerIterator; + +namespace internal { +template<typename XprType> class generic_randaccess_stl_iterator; +template<typename XprType> class pointer_based_stl_iterator; +template<typename XprType, DirectionType Direction> class subvector_stl_iterator; +template<typename XprType, DirectionType Direction> class subvector_stl_reverse_iterator; +template<typename DecompositionType> struct kernel_retval_base; +template<typename DecompositionType> struct kernel_retval; +template<typename DecompositionType> struct image_retval_base; +template<typename DecompositionType> struct image_retval; +} // end namespace internal + +namespace internal { +template<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynamic, int Subs=Dynamic, int Options=0> class BandMatrix; +} + +namespace internal { +template<typename Lhs, typename Rhs> struct product_type; + +template<bool> struct EnableIf; + +/** \internal + * \class product_evaluator + * Products need their own evaluator with more template arguments allowing for + * easier partial template specializations. + */ +template< typename T, + int ProductTag = internal::product_type<typename T::Lhs,typename T::Rhs>::ret, + typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape, + typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape, + typename LhsScalar = typename traits<typename T::Lhs>::Scalar, + typename RhsScalar = typename traits<typename T::Rhs>::Scalar + > struct product_evaluator; +} + +template<typename Lhs, typename Rhs, + int ProductType = internal::product_type<Lhs,Rhs>::value> +struct ProductReturnType; + +// this is a workaround for sun CC +template<typename Lhs, typename Rhs> struct LazyProductReturnType; + +namespace internal { + +// Provides scalar/packet-wise product and product with accumulation +// with optional conjugation of the arguments. +template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRhs=false> struct conj_helper; + +template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_sum_op; +template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_difference_op; +template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_conj_product_op; +template<typename LhsScalar,typename RhsScalar=LhsScalar, int NaNPropagation=PropagateFast> struct scalar_min_op; +template<typename LhsScalar,typename RhsScalar=LhsScalar, int NaNPropagation=PropagateFast> struct scalar_max_op; +template<typename Scalar> struct scalar_opposite_op; +template<typename Scalar> struct scalar_conjugate_op; +template<typename Scalar> struct scalar_real_op; +template<typename Scalar> struct scalar_imag_op; +template<typename Scalar> struct scalar_abs_op; +template<typename Scalar> struct scalar_abs2_op; +template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_absolute_difference_op; +template<typename Scalar> struct scalar_sqrt_op; +template<typename Scalar> struct scalar_rsqrt_op; +template<typename Scalar> struct scalar_exp_op; +template<typename Scalar> struct scalar_log_op; +template<typename Scalar> struct scalar_cos_op; +template<typename Scalar> struct scalar_sin_op; +template<typename Scalar> struct scalar_acos_op; +template<typename Scalar> struct scalar_asin_op; +template<typename Scalar> struct scalar_tan_op; +template<typename Scalar> struct scalar_inverse_op; +template<typename Scalar> struct scalar_square_op; +template<typename Scalar> struct scalar_cube_op; +template<typename Scalar, typename NewType> struct scalar_cast_op; +template<typename Scalar> struct scalar_random_op; +template<typename Scalar> struct scalar_constant_op; +template<typename Scalar> struct scalar_identity_op; +template<typename Scalar,bool is_complex, bool is_integer> struct scalar_sign_op; +template<typename Scalar,typename ScalarExponent> struct scalar_pow_op; +template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_hypot_op; +template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op; +template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op; + +// SpecialFunctions module +template<typename Scalar> struct scalar_lgamma_op; +template<typename Scalar> struct scalar_digamma_op; +template<typename Scalar> struct scalar_erf_op; +template<typename Scalar> struct scalar_erfc_op; +template<typename Scalar> struct scalar_ndtri_op; +template<typename Scalar> struct scalar_igamma_op; +template<typename Scalar> struct scalar_igammac_op; +template<typename Scalar> struct scalar_zeta_op; +template<typename Scalar> struct scalar_betainc_op; + +// Bessel functions in SpecialFunctions module +template<typename Scalar> struct scalar_bessel_i0_op; +template<typename Scalar> struct scalar_bessel_i0e_op; +template<typename Scalar> struct scalar_bessel_i1_op; +template<typename Scalar> struct scalar_bessel_i1e_op; +template<typename Scalar> struct scalar_bessel_j0_op; +template<typename Scalar> struct scalar_bessel_y0_op; +template<typename Scalar> struct scalar_bessel_j1_op; +template<typename Scalar> struct scalar_bessel_y1_op; +template<typename Scalar> struct scalar_bessel_k0_op; +template<typename Scalar> struct scalar_bessel_k0e_op; +template<typename Scalar> struct scalar_bessel_k1_op; +template<typename Scalar> struct scalar_bessel_k1e_op; + + +} // end namespace internal + +struct IOFormat; + +// Array module +template<typename _Scalar, int _Rows, int _Cols, + int _Options = AutoAlign | +#if EIGEN_GNUC_AT(3,4) + // workaround a bug in at least gcc 3.4.6 + // the innermost ?: ternary operator is misparsed. We write it slightly + // differently and this makes gcc 3.4.6 happy, but it's ugly. + // The error would only show up with EIGEN_DEFAULT_TO_ROW_MAJOR is defined + // (when EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION is RowMajor) + ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor + : !(_Cols==1 && _Rows!=1) ? EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION + : Eigen::ColMajor ), +#else + ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor + : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor + : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ), +#endif + int _MaxRows = _Rows, int _MaxCols = _Cols> class Array; +template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> class Select; +template<typename MatrixType, typename BinaryOp, int Direction> class PartialReduxExpr; +template<typename ExpressionType, int Direction> class VectorwiseOp; +template<typename MatrixType,int RowFactor,int ColFactor> class Replicate; +template<typename MatrixType, int Direction = BothDirections> class Reverse; + +template<typename MatrixType> class FullPivLU; +template<typename MatrixType> class PartialPivLU; +namespace internal { +template<typename MatrixType> struct inverse_impl; +} +template<typename MatrixType> class HouseholderQR; +template<typename MatrixType> class ColPivHouseholderQR; +template<typename MatrixType> class FullPivHouseholderQR; +template<typename MatrixType> class CompleteOrthogonalDecomposition; +template<typename MatrixType> class SVDBase; +template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD; +template<typename MatrixType> class BDCSVD; +template<typename MatrixType, int UpLo = Lower> class LLT; +template<typename MatrixType, int UpLo = Lower> class LDLT; +template<typename VectorsType, typename CoeffsType, int Side=OnTheLeft> class HouseholderSequence; +template<typename Scalar> class JacobiRotation; + +// Geometry module: +template<typename Derived, int _Dim> class RotationBase; +template<typename Lhs, typename Rhs> class Cross; +template<typename Derived> class QuaternionBase; +template<typename Scalar> class Rotation2D; +template<typename Scalar> class AngleAxis; +template<typename Scalar,int Dim> class Translation; +template<typename Scalar,int Dim> class AlignedBox; +template<typename Scalar, int Options = AutoAlign> class Quaternion; +template<typename Scalar,int Dim,int Mode,int _Options=AutoAlign> class Transform; +template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class ParametrizedLine; +template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class Hyperplane; +template<typename Scalar> class UniformScaling; +template<typename MatrixType,int Direction> class Homogeneous; + +// Sparse module: +template<typename Derived> class SparseMatrixBase; + +// MatrixFunctions module +template<typename Derived> struct MatrixExponentialReturnValue; +template<typename Derived> class MatrixFunctionReturnValue; +template<typename Derived> class MatrixSquareRootReturnValue; +template<typename Derived> class MatrixLogarithmReturnValue; +template<typename Derived> class MatrixPowerReturnValue; +template<typename Derived> class MatrixComplexPowerReturnValue; + +namespace internal { +template <typename Scalar> +struct stem_function +{ + typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar; + typedef ComplexScalar type(ComplexScalar, int); +}; +} + +} // end namespace Eigen + +#endif // EIGEN_FORWARDDECLARATIONS_H diff --git a/Eigen/src/Core/util/IndexedViewHelper.h b/Eigen/src/Core/util/IndexedViewHelper.h new file mode 100644 index 0000000..f85de30 --- /dev/null +++ b/Eigen/src/Core/util/IndexedViewHelper.h @@ -0,0 +1,186 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_INDEXED_VIEW_HELPER_H +#define EIGEN_INDEXED_VIEW_HELPER_H + +namespace Eigen { + +namespace internal { +struct symbolic_last_tag {}; +} + +/** \var last + * \ingroup Core_Module + * + * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically reference the last element/row/columns + * of the underlying vector or matrix once passed to DenseBase::operator()(const RowIndices&, const ColIndices&). + * + * This symbolic placeholder supports standard arithmetic operations. + * + * A typical usage example would be: + * \code + * using namespace Eigen; + * using Eigen::last; + * VectorXd v(n); + * v(seq(2,last-2)).setOnes(); + * \endcode + * + * \sa end + */ +static const symbolic::SymbolExpr<internal::symbolic_last_tag> last; // PLEASE use Eigen::last instead of Eigen::placeholders::last + +/** \var lastp1 + * \ingroup Core_Module + * + * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically + * reference the last+1 element/row/columns of the underlying vector or matrix once + * passed to DenseBase::operator()(const RowIndices&, const ColIndices&). + * + * This symbolic placeholder supports standard arithmetic operations. + * It is essentially an alias to last+fix<1>. + * + * \sa last + */ +#ifdef EIGEN_PARSED_BY_DOXYGEN +static const auto lastp1 = last+fix<1>; +#else +// Using a FixedExpr<1> expression is important here to make sure the compiler +// can fully optimize the computation starting indices with zero overhead. +static const symbolic::AddExpr<symbolic::SymbolExpr<internal::symbolic_last_tag>,symbolic::ValueExpr<Eigen::internal::FixedInt<1> > > lastp1(last+fix<1>()); +#endif + +namespace internal { + + // Replace symbolic last/end "keywords" by their true runtime value +inline Index eval_expr_given_size(Index x, Index /* size */) { return x; } + +template<int N> +FixedInt<N> eval_expr_given_size(FixedInt<N> x, Index /*size*/) { return x; } + +template<typename Derived> +Index eval_expr_given_size(const symbolic::BaseExpr<Derived> &x, Index size) +{ + return x.derived().eval(last=size-1); +} + +// Extract increment/step at compile time +template<typename T, typename EnableIf = void> struct get_compile_time_incr { + enum { value = UndefinedIncr }; +}; + +// Analogue of std::get<0>(x), but tailored for our needs. +template<typename T> +EIGEN_CONSTEXPR Index first(const T& x) EIGEN_NOEXCEPT { return x.first(); } + +// IndexedViewCompatibleType/makeIndexedViewCompatible turn an arbitrary object of type T into something usable by MatrixSlice +// The generic implementation is a no-op +template<typename T,int XprSize,typename EnableIf=void> +struct IndexedViewCompatibleType { + typedef T type; +}; + +template<typename T,typename Q> +const T& makeIndexedViewCompatible(const T& x, Index /*size*/, Q) { return x; } + +//-------------------------------------------------------------------------------- +// Handling of a single Index +//-------------------------------------------------------------------------------- + +struct SingleRange { + enum { + SizeAtCompileTime = 1 + }; + SingleRange(Index val) : m_value(val) {} + Index operator[](Index) const { return m_value; } + static EIGEN_CONSTEXPR Index size() EIGEN_NOEXCEPT { return 1; } + Index first() const EIGEN_NOEXCEPT { return m_value; } + Index m_value; +}; + +template<> struct get_compile_time_incr<SingleRange> { + enum { value = 1 }; // 1 or 0 ?? +}; + +// Turn a single index into something that looks like an array (i.e., that exposes a .size(), and operator[](int) methods) +template<typename T, int XprSize> +struct IndexedViewCompatibleType<T,XprSize,typename internal::enable_if<internal::is_integral<T>::value>::type> { + // Here we could simply use Array, but maybe it's less work for the compiler to use + // a simpler wrapper as SingleRange + //typedef Eigen::Array<Index,1,1> type; + typedef SingleRange type; +}; + +template<typename T, int XprSize> +struct IndexedViewCompatibleType<T, XprSize, typename enable_if<symbolic::is_symbolic<T>::value>::type> { + typedef SingleRange type; +}; + + +template<typename T> +typename enable_if<symbolic::is_symbolic<T>::value,SingleRange>::type +makeIndexedViewCompatible(const T& id, Index size, SpecializedType) { + return eval_expr_given_size(id,size); +} + +//-------------------------------------------------------------------------------- +// Handling of all +//-------------------------------------------------------------------------------- + +struct all_t { all_t() {} }; + +// Convert a symbolic 'all' into a usable range type +template<int XprSize> +struct AllRange { + enum { SizeAtCompileTime = XprSize }; + AllRange(Index size = XprSize) : m_size(size) {} + EIGEN_CONSTEXPR Index operator[](Index i) const EIGEN_NOEXCEPT { return i; } + EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_size.value(); } + EIGEN_CONSTEXPR Index first() const EIGEN_NOEXCEPT { return 0; } + variable_if_dynamic<Index,XprSize> m_size; +}; + +template<int XprSize> +struct IndexedViewCompatibleType<all_t,XprSize> { + typedef AllRange<XprSize> type; +}; + +template<typename XprSizeType> +inline AllRange<get_fixed_value<XprSizeType>::value> makeIndexedViewCompatible(all_t , XprSizeType size, SpecializedType) { + return AllRange<get_fixed_value<XprSizeType>::value>(size); +} + +template<int Size> struct get_compile_time_incr<AllRange<Size> > { + enum { value = 1 }; +}; + +} // end namespace internal + + +/** \var all + * \ingroup Core_Module + * Can be used as a parameter to DenseBase::operator()(const RowIndices&, const ColIndices&) to index all rows or columns + */ +static const Eigen::internal::all_t all; // PLEASE use Eigen::all instead of Eigen::placeholders::all + + +namespace placeholders { + typedef symbolic::SymbolExpr<internal::symbolic_last_tag> last_t; + typedef symbolic::AddExpr<symbolic::SymbolExpr<internal::symbolic_last_tag>,symbolic::ValueExpr<Eigen::internal::FixedInt<1> > > end_t; + typedef Eigen::internal::all_t all_t; + + EIGEN_DEPRECATED static const all_t all = Eigen::all; // PLEASE use Eigen::all instead of Eigen::placeholders::all + EIGEN_DEPRECATED static const last_t last = Eigen::last; // PLEASE use Eigen::last instead of Eigen::placeholders::last + EIGEN_DEPRECATED static const end_t end = Eigen::lastp1; // PLEASE use Eigen::lastp1 instead of Eigen::placeholders::end +} + +} // end namespace Eigen + +#endif // EIGEN_INDEXED_VIEW_HELPER_H diff --git a/Eigen/src/Core/util/IntegralConstant.h b/Eigen/src/Core/util/IntegralConstant.h new file mode 100644 index 0000000..945d426 --- /dev/null +++ b/Eigen/src/Core/util/IntegralConstant.h @@ -0,0 +1,272 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_INTEGRAL_CONSTANT_H +#define EIGEN_INTEGRAL_CONSTANT_H + +namespace Eigen { + +namespace internal { + +template<int N> class FixedInt; +template<int N> class VariableAndFixedInt; + +/** \internal + * \class FixedInt + * + * This class embeds a compile-time integer \c N. + * + * It is similar to c++11 std::integral_constant<int,N> but with some additional features + * such as: + * - implicit conversion to int + * - arithmetic and some bitwise operators: -, +, *, /, %, &, | + * - c++98/14 compatibility with fix<N> and fix<N>() syntax to define integral constants. + * + * It is strongly discouraged to directly deal with this class FixedInt. Instances are expcected to + * be created by the user using Eigen::fix<N> or Eigen::fix<N>(). In C++98-11, the former syntax does + * not create a FixedInt<N> instance but rather a point to function that needs to be \em cleaned-up + * using the generic helper: + * \code + * internal::cleanup_index_type<T>::type + * internal::cleanup_index_type<T,DynamicKey>::type + * \endcode + * where T can a FixedInt<N>, a pointer to function FixedInt<N> (*)(), or numerous other integer-like representations. + * \c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values. + * + * For convenience, you can extract the compile-time value \c N in a generic way using the following helper: + * \code + * internal::get_fixed_value<T,DefaultVal>::value + * \endcode + * that will give you \c N if T equals FixedInt<N> or FixedInt<N> (*)(), and \c DefaultVal if T does not embed any compile-time value (e.g., T==int). + * + * \sa fix<N>, class VariableAndFixedInt + */ +template<int N> class FixedInt +{ +public: + static const int value = N; + EIGEN_CONSTEXPR operator int() const { return value; } + FixedInt() {} + FixedInt( VariableAndFixedInt<N> other) { + #ifndef EIGEN_INTERNAL_DEBUGGING + EIGEN_UNUSED_VARIABLE(other); + #endif + eigen_internal_assert(int(other)==N); + } + + FixedInt<-N> operator-() const { return FixedInt<-N>(); } + template<int M> + FixedInt<N+M> operator+( FixedInt<M>) const { return FixedInt<N+M>(); } + template<int M> + FixedInt<N-M> operator-( FixedInt<M>) const { return FixedInt<N-M>(); } + template<int M> + FixedInt<N*M> operator*( FixedInt<M>) const { return FixedInt<N*M>(); } + template<int M> + FixedInt<N/M> operator/( FixedInt<M>) const { return FixedInt<N/M>(); } + template<int M> + FixedInt<N%M> operator%( FixedInt<M>) const { return FixedInt<N%M>(); } + template<int M> + FixedInt<N|M> operator|( FixedInt<M>) const { return FixedInt<N|M>(); } + template<int M> + FixedInt<N&M> operator&( FixedInt<M>) const { return FixedInt<N&M>(); } + +#if EIGEN_HAS_CXX14_VARIABLE_TEMPLATES + // Needed in C++14 to allow fix<N>(): + FixedInt operator() () const { return *this; } + + VariableAndFixedInt<N> operator() (int val) const { return VariableAndFixedInt<N>(val); } +#else + FixedInt ( FixedInt<N> (*)() ) {} +#endif + +#if EIGEN_HAS_CXX11 + FixedInt(std::integral_constant<int,N>) {} +#endif +}; + +/** \internal + * \class VariableAndFixedInt + * + * This class embeds both a compile-time integer \c N and a runtime integer. + * Both values are supposed to be equal unless the compile-time value \c N has a special + * value meaning that the runtime-value should be used. Depending on the context, this special + * value can be either Eigen::Dynamic (for positive quantities) or Eigen::DynamicIndex (for + * quantities that can be negative). + * + * It is the return-type of the function Eigen::fix<N>(int), and most of the time this is the only + * way it is used. It is strongly discouraged to directly deal with instances of VariableAndFixedInt. + * Indeed, in order to write generic code, it is the responsibility of the callee to properly convert + * it to either a true compile-time quantity (i.e. a FixedInt<N>), or to a runtime quantity (e.g., an Index) + * using the following generic helper: + * \code + * internal::cleanup_index_type<T>::type + * internal::cleanup_index_type<T,DynamicKey>::type + * \endcode + * where T can be a template instantiation of VariableAndFixedInt or numerous other integer-like representations. + * \c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values. + * + * For convenience, you can also extract the compile-time value \c N using the following helper: + * \code + * internal::get_fixed_value<T,DefaultVal>::value + * \endcode + * that will give you \c N if T equals VariableAndFixedInt<N>, and \c DefaultVal if T does not embed any compile-time value (e.g., T==int). + * + * \sa fix<N>(int), class FixedInt + */ +template<int N> class VariableAndFixedInt +{ +public: + static const int value = N; + operator int() const { return m_value; } + VariableAndFixedInt(int val) { m_value = val; } +protected: + int m_value; +}; + +template<typename T, int Default=Dynamic> struct get_fixed_value { + static const int value = Default; +}; + +template<int N,int Default> struct get_fixed_value<FixedInt<N>,Default> { + static const int value = N; +}; + +#if !EIGEN_HAS_CXX14 +template<int N,int Default> struct get_fixed_value<FixedInt<N> (*)(),Default> { + static const int value = N; +}; +#endif + +template<int N,int Default> struct get_fixed_value<VariableAndFixedInt<N>,Default> { + static const int value = N ; +}; + +template<typename T, int N, int Default> +struct get_fixed_value<variable_if_dynamic<T,N>,Default> { + static const int value = N; +}; + +template<typename T> EIGEN_DEVICE_FUNC Index get_runtime_value(const T &x) { return x; } +#if !EIGEN_HAS_CXX14 +template<int N> EIGEN_DEVICE_FUNC Index get_runtime_value(FixedInt<N> (*)()) { return N; } +#endif + +// Cleanup integer/FixedInt/VariableAndFixedInt/etc types: + +// By default, no cleanup: +template<typename T, int DynamicKey=Dynamic, typename EnableIf=void> struct cleanup_index_type { typedef T type; }; + +// Convert any integral type (e.g., short, int, unsigned int, etc.) to Eigen::Index +template<typename T, int DynamicKey> struct cleanup_index_type<T,DynamicKey,typename internal::enable_if<internal::is_integral<T>::value>::type> { typedef Index type; }; + +#if !EIGEN_HAS_CXX14 +// In c++98/c++11, fix<N> is a pointer to function that we better cleanup to a true FixedInt<N>: +template<int N, int DynamicKey> struct cleanup_index_type<FixedInt<N> (*)(), DynamicKey> { typedef FixedInt<N> type; }; +#endif + +// If VariableAndFixedInt does not match DynamicKey, then we turn it to a pure compile-time value: +template<int N, int DynamicKey> struct cleanup_index_type<VariableAndFixedInt<N>, DynamicKey> { typedef FixedInt<N> type; }; +// If VariableAndFixedInt matches DynamicKey, then we turn it to a pure runtime-value (aka Index): +template<int DynamicKey> struct cleanup_index_type<VariableAndFixedInt<DynamicKey>, DynamicKey> { typedef Index type; }; + +#if EIGEN_HAS_CXX11 +template<int N, int DynamicKey> struct cleanup_index_type<std::integral_constant<int,N>, DynamicKey> { typedef FixedInt<N> type; }; +#endif + +} // end namespace internal + +#ifndef EIGEN_PARSED_BY_DOXYGEN + +#if EIGEN_HAS_CXX14_VARIABLE_TEMPLATES +template<int N> +static const internal::FixedInt<N> fix{}; +#else +template<int N> +inline internal::FixedInt<N> fix() { return internal::FixedInt<N>(); } + +// The generic typename T is mandatory. Otherwise, a code like fix<N> could refer to either the function above or this next overload. +// This way a code like fix<N> can only refer to the previous function. +template<int N,typename T> +inline internal::VariableAndFixedInt<N> fix(T val) { return internal::VariableAndFixedInt<N>(internal::convert_index<int>(val)); } +#endif + +#else // EIGEN_PARSED_BY_DOXYGEN + +/** \var fix<N>() + * \ingroup Core_Module + * + * This \em identifier permits to construct an object embedding a compile-time integer \c N. + * + * \tparam N the compile-time integer value + * + * It is typically used in conjunction with the Eigen::seq and Eigen::seqN functions to pass compile-time values to them: + * \code + * seqN(10,fix<4>,fix<-3>) // <=> [10 7 4 1] + * \endcode + * + * See also the function fix(int) to pass both a compile-time and runtime value. + * + * In c++14, it is implemented as: + * \code + * template<int N> static const internal::FixedInt<N> fix{}; + * \endcode + * where internal::FixedInt<N> is an internal template class similar to + * <a href="http://en.cppreference.com/w/cpp/types/integral_constant">\c std::integral_constant </a><tt> <int,N> </tt> + * Here, \c fix<N> is thus an object of type \c internal::FixedInt<N>. + * + * In c++98/11, it is implemented as a function: + * \code + * template<int N> inline internal::FixedInt<N> fix(); + * \endcode + * Here internal::FixedInt<N> is thus a pointer to function. + * + * If for some reason you want a true object in c++98 then you can write: \code fix<N>() \endcode which is also valid in c++14. + * + * \sa fix<N>(int), seq, seqN + */ +template<int N> +static const auto fix(); + +/** \fn fix<N>(int) + * \ingroup Core_Module + * + * This function returns an object embedding both a compile-time integer \c N, and a fallback runtime value \a val. + * + * \tparam N the compile-time integer value + * \param val the fallback runtime integer value + * + * This function is a more general version of the \ref fix identifier/function that can be used in template code + * where the compile-time value could turn out to actually mean "undefined at compile-time". For positive integers + * such as a size or a dimension, this case is identified by Eigen::Dynamic, whereas runtime signed integers + * (e.g., an increment/stride) are identified as Eigen::DynamicIndex. In such a case, the runtime value \a val + * will be used as a fallback. + * + * A typical use case would be: + * \code + * template<typename Derived> void foo(const MatrixBase<Derived> &mat) { + * const int N = Derived::RowsAtCompileTime==Dynamic ? Dynamic : Derived::RowsAtCompileTime/2; + * const int n = mat.rows()/2; + * ... mat( seqN(0,fix<N>(n) ) ...; + * } + * \endcode + * In this example, the function Eigen::seqN knows that the second argument is expected to be a size. + * If the passed compile-time value N equals Eigen::Dynamic, then the proxy object returned by fix will be dissmissed, and converted to an Eigen::Index of value \c n. + * Otherwise, the runtime-value \c n will be dissmissed, and the returned ArithmeticSequence will be of the exact same type as <tt> seqN(0,fix<N>) </tt>. + * + * \sa fix, seqN, class ArithmeticSequence + */ +template<int N> +static const auto fix(int val); + +#endif // EIGEN_PARSED_BY_DOXYGEN + +} // end namespace Eigen + +#endif // EIGEN_INTEGRAL_CONSTANT_H diff --git a/Eigen/src/Core/util/MKL_support.h b/Eigen/src/Core/util/MKL_support.h new file mode 100755 index 0000000..17963fa --- /dev/null +++ b/Eigen/src/Core/util/MKL_support.h @@ -0,0 +1,137 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * Include file with common MKL declarations + ******************************************************************************** +*/ + +#ifndef EIGEN_MKL_SUPPORT_H +#define EIGEN_MKL_SUPPORT_H + +#ifdef EIGEN_USE_MKL_ALL + #ifndef EIGEN_USE_BLAS + #define EIGEN_USE_BLAS + #endif + #ifndef EIGEN_USE_LAPACKE + #define EIGEN_USE_LAPACKE + #endif + #ifndef EIGEN_USE_MKL_VML + #define EIGEN_USE_MKL_VML + #endif +#endif + +#ifdef EIGEN_USE_LAPACKE_STRICT + #define EIGEN_USE_LAPACKE +#endif + +#if defined(EIGEN_USE_MKL_VML) && !defined(EIGEN_USE_MKL) + #define EIGEN_USE_MKL +#endif + + +#if defined EIGEN_USE_MKL +# if (!defined MKL_DIRECT_CALL) && (!defined EIGEN_MKL_NO_DIRECT_CALL) +# define MKL_DIRECT_CALL +# define MKL_DIRECT_CALL_JUST_SET +# endif +# include <mkl.h> +/*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/ +# ifndef INTEL_MKL_VERSION +# undef EIGEN_USE_MKL /* INTEL_MKL_VERSION is not even defined on older versions */ +# elif INTEL_MKL_VERSION < 100305 /* the intel-mkl-103-release-notes say this was when the lapacke.h interface was added*/ +# undef EIGEN_USE_MKL +# endif +# ifndef EIGEN_USE_MKL + /*If the MKL version is too old, undef everything*/ +# undef EIGEN_USE_MKL_ALL +# undef EIGEN_USE_LAPACKE +# undef EIGEN_USE_MKL_VML +# undef EIGEN_USE_LAPACKE_STRICT +# undef EIGEN_USE_LAPACKE +# ifdef MKL_DIRECT_CALL_JUST_SET +# undef MKL_DIRECT_CALL +# endif +# endif +#endif + +#if defined EIGEN_USE_MKL + +#define EIGEN_MKL_VML_THRESHOLD 128 + +/* MKL_DOMAIN_BLAS, etc are defined only in 10.3 update 7 */ +/* MKL_BLAS, etc are not defined in 11.2 */ +#ifdef MKL_DOMAIN_ALL +#define EIGEN_MKL_DOMAIN_ALL MKL_DOMAIN_ALL +#else +#define EIGEN_MKL_DOMAIN_ALL MKL_ALL +#endif + +#ifdef MKL_DOMAIN_BLAS +#define EIGEN_MKL_DOMAIN_BLAS MKL_DOMAIN_BLAS +#else +#define EIGEN_MKL_DOMAIN_BLAS MKL_BLAS +#endif + +#ifdef MKL_DOMAIN_FFT +#define EIGEN_MKL_DOMAIN_FFT MKL_DOMAIN_FFT +#else +#define EIGEN_MKL_DOMAIN_FFT MKL_FFT +#endif + +#ifdef MKL_DOMAIN_VML +#define EIGEN_MKL_DOMAIN_VML MKL_DOMAIN_VML +#else +#define EIGEN_MKL_DOMAIN_VML MKL_VML +#endif + +#ifdef MKL_DOMAIN_PARDISO +#define EIGEN_MKL_DOMAIN_PARDISO MKL_DOMAIN_PARDISO +#else +#define EIGEN_MKL_DOMAIN_PARDISO MKL_PARDISO +#endif +#endif + +#if defined(EIGEN_USE_BLAS) && !defined(EIGEN_USE_MKL) +#include "../../misc/blas.h" +#endif + +namespace Eigen { + +typedef std::complex<double> dcomplex; +typedef std::complex<float> scomplex; + +#if defined(EIGEN_USE_MKL) +typedef MKL_INT BlasIndex; +#else +typedef int BlasIndex; +#endif + +} // end namespace Eigen + + +#endif // EIGEN_MKL_SUPPORT_H diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h new file mode 100644 index 0000000..986c3d4 --- /dev/null +++ b/Eigen/src/Core/util/Macros.h @@ -0,0 +1,1464 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MACROS_H +#define EIGEN_MACROS_H + +//------------------------------------------------------------------------------------------ +// Eigen version and basic defaults +//------------------------------------------------------------------------------------------ + +#define EIGEN_WORLD_VERSION 3 +#define EIGEN_MAJOR_VERSION 4 +#define EIGEN_MINOR_VERSION 0 + +#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ + (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ + EIGEN_MINOR_VERSION>=z)))) + +#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR +#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor +#else +#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::ColMajor +#endif + +#ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t +#endif + +// Upperbound on the C++ version to use. +// Expected values are 03, 11, 14, 17, etc. +// By default, let's use an arbitrarily large C++ version. +#ifndef EIGEN_MAX_CPP_VER +#define EIGEN_MAX_CPP_VER 99 +#endif + +/** Allows to disable some optimizations which might affect the accuracy of the result. + * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them. + * They currently include: + * - single precision ArrayBase::sin() and ArrayBase::cos() for SSE and AVX vectorization. + */ +#ifndef EIGEN_FAST_MATH +#define EIGEN_FAST_MATH 1 +#endif + +#ifndef EIGEN_STACK_ALLOCATION_LIMIT +// 131072 == 128 KB +#define EIGEN_STACK_ALLOCATION_LIMIT 131072 +#endif + +//------------------------------------------------------------------------------------------ +// Compiler identification, EIGEN_COMP_* +//------------------------------------------------------------------------------------------ + +/// \internal EIGEN_COMP_GNUC set to 1 for all compilers compatible with GCC +#ifdef __GNUC__ + #define EIGEN_COMP_GNUC (__GNUC__*10+__GNUC_MINOR__) +#else + #define EIGEN_COMP_GNUC 0 +#endif + +/// \internal EIGEN_COMP_CLANG set to major+minor version (e.g., 307 for clang 3.7) if the compiler is clang +#if defined(__clang__) + #define EIGEN_COMP_CLANG (__clang_major__*100+__clang_minor__) +#else + #define EIGEN_COMP_CLANG 0 +#endif + +/// \internal EIGEN_COMP_CASTXML set to 1 if being preprocessed by CastXML +#if defined(__castxml__) + #define EIGEN_COMP_CASTXML 1 +#else + #define EIGEN_COMP_CASTXML 0 +#endif + +/// \internal EIGEN_COMP_LLVM set to 1 if the compiler backend is llvm +#if defined(__llvm__) + #define EIGEN_COMP_LLVM 1 +#else + #define EIGEN_COMP_LLVM 0 +#endif + +/// \internal EIGEN_COMP_ICC set to __INTEL_COMPILER if the compiler is Intel compiler, 0 otherwise +#if defined(__INTEL_COMPILER) + #define EIGEN_COMP_ICC __INTEL_COMPILER +#else + #define EIGEN_COMP_ICC 0 +#endif + +/// \internal EIGEN_COMP_MINGW set to 1 if the compiler is mingw +#if defined(__MINGW32__) + #define EIGEN_COMP_MINGW 1 +#else + #define EIGEN_COMP_MINGW 0 +#endif + +/// \internal EIGEN_COMP_SUNCC set to 1 if the compiler is Solaris Studio +#if defined(__SUNPRO_CC) + #define EIGEN_COMP_SUNCC 1 +#else + #define EIGEN_COMP_SUNCC 0 +#endif + +/// \internal EIGEN_COMP_MSVC set to _MSC_VER if the compiler is Microsoft Visual C++, 0 otherwise. +#if defined(_MSC_VER) + #define EIGEN_COMP_MSVC _MSC_VER +#else + #define EIGEN_COMP_MSVC 0 +#endif + +#if defined(__NVCC__) +#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9) + #define EIGEN_COMP_NVCC ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100)) +#elif defined(__CUDACC_VER__) + #define EIGEN_COMP_NVCC __CUDACC_VER__ +#else + #error "NVCC did not define compiler version." +#endif +#else + #define EIGEN_COMP_NVCC 0 +#endif + +// For the record, here is a table summarizing the possible values for EIGEN_COMP_MSVC: +// name ver MSC_VER +// 2008 9 1500 +// 2010 10 1600 +// 2012 11 1700 +// 2013 12 1800 +// 2015 14 1900 +// "15" 15 1900 +// 2017-14.1 15.0 1910 +// 2017-14.11 15.3 1911 +// 2017-14.12 15.5 1912 +// 2017-14.13 15.6 1913 +// 2017-14.14 15.7 1914 + +/// \internal EIGEN_COMP_MSVC_LANG set to _MSVC_LANG if the compiler is Microsoft Visual C++, 0 otherwise. +#if defined(_MSVC_LANG) + #define EIGEN_COMP_MSVC_LANG _MSVC_LANG +#else + #define EIGEN_COMP_MSVC_LANG 0 +#endif + +// For the record, here is a table summarizing the possible values for EIGEN_COMP_MSVC_LANG: +// MSVC option Standard MSVC_LANG +// /std:c++14 (default as of VS 2019) C++14 201402L +// /std:c++17 C++17 201703L +// /std:c++latest >C++17 >201703L + +/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC or clang-cl +#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC || EIGEN_COMP_LLVM || EIGEN_COMP_CLANG) + #define EIGEN_COMP_MSVC_STRICT _MSC_VER +#else + #define EIGEN_COMP_MSVC_STRICT 0 +#endif + +/// \internal EIGEN_COMP_IBM set to xlc version if the compiler is IBM XL C++ +// XLC version +// 3.1 0x0301 +// 4.5 0x0405 +// 5.0 0x0500 +// 12.1 0x0C01 +#if defined(__IBMCPP__) || defined(__xlc__) || defined(__ibmxl__) + #define EIGEN_COMP_IBM __xlC__ +#else + #define EIGEN_COMP_IBM 0 +#endif + +/// \internal EIGEN_COMP_PGI set to PGI version if the compiler is Portland Group Compiler +#if defined(__PGI) + #define EIGEN_COMP_PGI (__PGIC__*100+__PGIC_MINOR__) +#else + #define EIGEN_COMP_PGI 0 +#endif + +/// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler +#if defined(__CC_ARM) || defined(__ARMCC_VERSION) + #define EIGEN_COMP_ARM 1 +#else + #define EIGEN_COMP_ARM 0 +#endif + +/// \internal EIGEN_COMP_EMSCRIPTEN set to 1 if the compiler is Emscripten Compiler +#if defined(__EMSCRIPTEN__) + #define EIGEN_COMP_EMSCRIPTEN 1 +#else + #define EIGEN_COMP_EMSCRIPTEN 0 +#endif + + +/// \internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.) +#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM || EIGEN_COMP_EMSCRIPTEN) + #define EIGEN_COMP_GNUC_STRICT 1 +#else + #define EIGEN_COMP_GNUC_STRICT 0 +#endif + + +#if EIGEN_COMP_GNUC + #define EIGEN_GNUC_AT_LEAST(x,y) ((__GNUC__==x && __GNUC_MINOR__>=y) || __GNUC__>x) + #define EIGEN_GNUC_AT_MOST(x,y) ((__GNUC__==x && __GNUC_MINOR__<=y) || __GNUC__<x) + #define EIGEN_GNUC_AT(x,y) ( __GNUC__==x && __GNUC_MINOR__==y ) +#else + #define EIGEN_GNUC_AT_LEAST(x,y) 0 + #define EIGEN_GNUC_AT_MOST(x,y) 0 + #define EIGEN_GNUC_AT(x,y) 0 +#endif + +// FIXME: could probably be removed as we do not support gcc 3.x anymore +#if EIGEN_COMP_GNUC && (__GNUC__ <= 3) +#define EIGEN_GCC3_OR_OLDER 1 +#else +#define EIGEN_GCC3_OR_OLDER 0 +#endif + + + +//------------------------------------------------------------------------------------------ +// Architecture identification, EIGEN_ARCH_* +//------------------------------------------------------------------------------------------ + + +#if defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC)) || defined(__amd64) + #define EIGEN_ARCH_x86_64 1 +#else + #define EIGEN_ARCH_x86_64 0 +#endif + +#if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386) + #define EIGEN_ARCH_i386 1 +#else + #define EIGEN_ARCH_i386 0 +#endif + +#if EIGEN_ARCH_x86_64 || EIGEN_ARCH_i386 + #define EIGEN_ARCH_i386_OR_x86_64 1 +#else + #define EIGEN_ARCH_i386_OR_x86_64 0 +#endif + +/// \internal EIGEN_ARCH_ARM set to 1 if the architecture is ARM +#if defined(__arm__) + #define EIGEN_ARCH_ARM 1 +#else + #define EIGEN_ARCH_ARM 0 +#endif + +/// \internal EIGEN_ARCH_ARM64 set to 1 if the architecture is ARM64 +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) + #define EIGEN_ARCH_ARM64 1 +#else + #define EIGEN_ARCH_ARM64 0 +#endif + +/// \internal EIGEN_ARCH_ARM_OR_ARM64 set to 1 if the architecture is ARM or ARM64 +#if EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64 + #define EIGEN_ARCH_ARM_OR_ARM64 1 +#else + #define EIGEN_ARCH_ARM_OR_ARM64 0 +#endif + +/// \internal EIGEN_ARCH_ARMV8 set to 1 if the architecture is armv8 or greater. +#if EIGEN_ARCH_ARM_OR_ARM64 && defined(__ARM_ARCH) && __ARM_ARCH >= 8 +#define EIGEN_ARCH_ARMV8 1 +#else +#define EIGEN_ARCH_ARMV8 0 +#endif + + +/// \internal EIGEN_HAS_ARM64_FP16 set to 1 if the architecture provides an IEEE +/// compliant Arm fp16 type +#if EIGEN_ARCH_ARM64 + #ifndef EIGEN_HAS_ARM64_FP16 + #if defined(__ARM_FP16_FORMAT_IEEE) + #define EIGEN_HAS_ARM64_FP16 1 + #else + #define EIGEN_HAS_ARM64_FP16 0 + #endif + #endif +#endif + +/// \internal EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC set to 1 if the architecture +/// supports Neon vector intrinsics for fp16. +#if EIGEN_ARCH_ARM64 + #ifndef EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 1 + #else + #define EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC 0 + #endif + #endif +#endif + +/// \internal EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC set to 1 if the architecture +/// supports Neon scalar intrinsics for fp16. +#if EIGEN_ARCH_ARM64 + #ifndef EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC + #if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) + #define EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC 1 + #endif + #endif +#endif + +/// \internal EIGEN_ARCH_MIPS set to 1 if the architecture is MIPS +#if defined(__mips__) || defined(__mips) + #define EIGEN_ARCH_MIPS 1 +#else + #define EIGEN_ARCH_MIPS 0 +#endif + +/// \internal EIGEN_ARCH_SPARC set to 1 if the architecture is SPARC +#if defined(__sparc__) || defined(__sparc) + #define EIGEN_ARCH_SPARC 1 +#else + #define EIGEN_ARCH_SPARC 0 +#endif + +/// \internal EIGEN_ARCH_IA64 set to 1 if the architecture is Intel Itanium +#if defined(__ia64__) + #define EIGEN_ARCH_IA64 1 +#else + #define EIGEN_ARCH_IA64 0 +#endif + +/// \internal EIGEN_ARCH_PPC set to 1 if the architecture is PowerPC +#if defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC) + #define EIGEN_ARCH_PPC 1 +#else + #define EIGEN_ARCH_PPC 0 +#endif + + + +//------------------------------------------------------------------------------------------ +// Operating system identification, EIGEN_OS_* +//------------------------------------------------------------------------------------------ + +/// \internal EIGEN_OS_UNIX set to 1 if the OS is a unix variant +#if defined(__unix__) || defined(__unix) + #define EIGEN_OS_UNIX 1 +#else + #define EIGEN_OS_UNIX 0 +#endif + +/// \internal EIGEN_OS_LINUX set to 1 if the OS is based on Linux kernel +#if defined(__linux__) + #define EIGEN_OS_LINUX 1 +#else + #define EIGEN_OS_LINUX 0 +#endif + +/// \internal EIGEN_OS_ANDROID set to 1 if the OS is Android +// note: ANDROID is defined when using ndk_build, __ANDROID__ is defined when using a standalone toolchain. +#if defined(__ANDROID__) || defined(ANDROID) + #define EIGEN_OS_ANDROID 1 +#else + #define EIGEN_OS_ANDROID 0 +#endif + +/// \internal EIGEN_OS_GNULINUX set to 1 if the OS is GNU Linux and not Linux-based OS (e.g., not android) +#if defined(__gnu_linux__) && !(EIGEN_OS_ANDROID) + #define EIGEN_OS_GNULINUX 1 +#else + #define EIGEN_OS_GNULINUX 0 +#endif + +/// \internal EIGEN_OS_BSD set to 1 if the OS is a BSD variant +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) + #define EIGEN_OS_BSD 1 +#else + #define EIGEN_OS_BSD 0 +#endif + +/// \internal EIGEN_OS_MAC set to 1 if the OS is MacOS +#if defined(__APPLE__) + #define EIGEN_OS_MAC 1 +#else + #define EIGEN_OS_MAC 0 +#endif + +/// \internal EIGEN_OS_QNX set to 1 if the OS is QNX +#if defined(__QNX__) + #define EIGEN_OS_QNX 1 +#else + #define EIGEN_OS_QNX 0 +#endif + +/// \internal EIGEN_OS_WIN set to 1 if the OS is Windows based +#if defined(_WIN32) + #define EIGEN_OS_WIN 1 +#else + #define EIGEN_OS_WIN 0 +#endif + +/// \internal EIGEN_OS_WIN64 set to 1 if the OS is Windows 64bits +#if defined(_WIN64) + #define EIGEN_OS_WIN64 1 +#else + #define EIGEN_OS_WIN64 0 +#endif + +/// \internal EIGEN_OS_WINCE set to 1 if the OS is Windows CE +#if defined(_WIN32_WCE) + #define EIGEN_OS_WINCE 1 +#else + #define EIGEN_OS_WINCE 0 +#endif + +/// \internal EIGEN_OS_CYGWIN set to 1 if the OS is Windows/Cygwin +#if defined(__CYGWIN__) + #define EIGEN_OS_CYGWIN 1 +#else + #define EIGEN_OS_CYGWIN 0 +#endif + +/// \internal EIGEN_OS_WIN_STRICT set to 1 if the OS is really Windows and not some variants +#if EIGEN_OS_WIN && !( EIGEN_OS_WINCE || EIGEN_OS_CYGWIN ) + #define EIGEN_OS_WIN_STRICT 1 +#else + #define EIGEN_OS_WIN_STRICT 0 +#endif + +/// \internal EIGEN_OS_SUN set to __SUNPRO_C if the OS is SUN +// compiler solaris __SUNPRO_C +// version studio +// 5.7 10 0x570 +// 5.8 11 0x580 +// 5.9 12 0x590 +// 5.10 12.1 0x5100 +// 5.11 12.2 0x5110 +// 5.12 12.3 0x5120 +#if (defined(sun) || defined(__sun)) && !(defined(__SVR4) || defined(__svr4__)) + #define EIGEN_OS_SUN __SUNPRO_C +#else + #define EIGEN_OS_SUN 0 +#endif + +/// \internal EIGEN_OS_SOLARIS set to 1 if the OS is Solaris +#if (defined(sun) || defined(__sun)) && (defined(__SVR4) || defined(__svr4__)) + #define EIGEN_OS_SOLARIS 1 +#else + #define EIGEN_OS_SOLARIS 0 +#endif + + +//------------------------------------------------------------------------------------------ +// Detect GPU compilers and architectures +//------------------------------------------------------------------------------------------ + +// NVCC is not supported as the target platform for HIPCC +// Note that this also makes EIGEN_CUDACC and EIGEN_HIPCC mutually exclusive +#if defined(__NVCC__) && defined(__HIPCC__) + #error "NVCC as the target platform for HIPCC is currently not supported." +#endif + +#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA) + // Means the compiler is either nvcc or clang with CUDA enabled + #define EIGEN_CUDACC __CUDACC__ +#endif + +#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA) + // Means we are generating code for the device + #define EIGEN_CUDA_ARCH __CUDA_ARCH__ +#endif + +#if defined(EIGEN_CUDACC) +#include <cuda.h> + #define EIGEN_CUDA_SDK_VER (CUDA_VERSION * 10) +#else + #define EIGEN_CUDA_SDK_VER 0 +#endif + +#if defined(__HIPCC__) && !defined(EIGEN_NO_HIP) + // Means the compiler is HIPCC (analogous to EIGEN_CUDACC, but for HIP) + #define EIGEN_HIPCC __HIPCC__ + + // We need to include hip_runtime.h here because it pulls in + // ++ hip_common.h which contains the define for __HIP_DEVICE_COMPILE__ + // ++ host_defines.h which contains the defines for the __host__ and __device__ macros + #include <hip/hip_runtime.h> + + #if defined(__HIP_DEVICE_COMPILE__) + // analogous to EIGEN_CUDA_ARCH, but for HIP + #define EIGEN_HIP_DEVICE_COMPILE __HIP_DEVICE_COMPILE__ + #endif + + // For HIP (ROCm 3.5 and higher), we need to explicitly set the launch_bounds attribute + // value to 1024. The compiler assigns a default value of 256 when the attribute is not + // specified. This results in failures on the HIP platform, for cases when a GPU kernel + // without an explicit launch_bounds attribute is called with a threads_per_block value + // greater than 256. + // + // This is a regression in functioanlity and is expected to be fixed within the next + // couple of ROCm releases (compiler will go back to using 1024 value as the default) + // + // In the meantime, we will use a "only enabled for HIP" macro to set the launch_bounds + // attribute. + + #define EIGEN_HIP_LAUNCH_BOUNDS_1024 __launch_bounds__(1024) + +#endif + +#if !defined(EIGEN_HIP_LAUNCH_BOUNDS_1024) +#define EIGEN_HIP_LAUNCH_BOUNDS_1024 +#endif // !defined(EIGEN_HIP_LAUNCH_BOUNDS_1024) + +// Unify CUDA/HIPCC + +#if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC) +// +// If either EIGEN_CUDACC or EIGEN_HIPCC is defined, then define EIGEN_GPUCC +// +#define EIGEN_GPUCC +// +// EIGEN_HIPCC implies the HIP compiler and is used to tweak Eigen code for use in HIP kernels +// EIGEN_CUDACC implies the CUDA compiler and is used to tweak Eigen code for use in CUDA kernels +// +// In most cases the same tweaks are required to the Eigen code to enable in both the HIP and CUDA kernels. +// For those cases, the corresponding code should be guarded with +// #if defined(EIGEN_GPUCC) +// instead of +// #if defined(EIGEN_CUDACC) || defined(EIGEN_HIPCC) +// +// For cases where the tweak is specific to HIP, the code should be guarded with +// #if defined(EIGEN_HIPCC) +// +// For cases where the tweak is specific to CUDA, the code should be guarded with +// #if defined(EIGEN_CUDACC) +// +#endif + +#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE) +// +// If either EIGEN_CUDA_ARCH or EIGEN_HIP_DEVICE_COMPILE is defined, then define EIGEN_GPU_COMPILE_PHASE +// +#define EIGEN_GPU_COMPILE_PHASE +// +// GPU compilers (HIPCC, NVCC) typically do two passes over the source code, +// + one to compile the source for the "host" (ie CPU) +// + another to compile the source for the "device" (ie. GPU) +// +// Code that needs to enabled only during the either the "host" or "device" compilation phase +// needs to be guarded with a macro that indicates the current compilation phase +// +// EIGEN_HIP_DEVICE_COMPILE implies the device compilation phase in HIP +// EIGEN_CUDA_ARCH implies the device compilation phase in CUDA +// +// In most cases, the "host" / "device" specific code is the same for both HIP and CUDA +// For those cases, the code should be guarded with +// #if defined(EIGEN_GPU_COMPILE_PHASE) +// instead of +// #if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE) +// +// For cases where the tweak is specific to HIP, the code should be guarded with +// #if defined(EIGEN_HIP_DEVICE_COMPILE) +// +// For cases where the tweak is specific to CUDA, the code should be guarded with +// #if defined(EIGEN_CUDA_ARCH) +// +#endif + +#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__) +// EIGEN_USE_SYCL is a user-defined macro while __SYCL_DEVICE_ONLY__ is a compiler-defined macro. +// In most cases we want to check if both macros are defined which can be done using the define below. +#define SYCL_DEVICE_ONLY +#endif + +//------------------------------------------------------------------------------------------ +// Detect Compiler/Architecture/OS specific features +//------------------------------------------------------------------------------------------ + +#if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG + // see bug 89 + #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0 +#else + #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1 +#endif + +// Cross compiler wrapper around LLVM's __has_builtin +#ifdef __has_builtin +# define EIGEN_HAS_BUILTIN(x) __has_builtin(x) +#else +# define EIGEN_HAS_BUILTIN(x) 0 +#endif + +// A Clang feature extension to determine compiler features. +// We use it to determine 'cxx_rvalue_references' +#ifndef __has_feature +# define __has_feature(x) 0 +#endif + +// Some old compilers do not support template specializations like: +// template<typename T,int N> void foo(const T x[N]); +#if !( EIGEN_COMP_CLANG && ( (EIGEN_COMP_CLANG<309) \ + || (defined(__apple_build_version__) && (__apple_build_version__ < 9000000))) \ + || EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<49) +#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 1 +#else +#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 0 +#endif + +// The macro EIGEN_CPLUSPLUS is a replacement for __cplusplus/_MSVC_LANG that +// works for both platforms, indicating the C++ standard version number. +// +// With MSVC, without defining /Zc:__cplusplus, the __cplusplus macro will +// report 199711L regardless of the language standard specified via /std. +// We need to rely on _MSVC_LANG instead, which is only available after +// VS2015.3. +#if EIGEN_COMP_MSVC_LANG > 0 +#define EIGEN_CPLUSPLUS EIGEN_COMP_MSVC_LANG +#elif EIGEN_COMP_MSVC >= 1900 +#define EIGEN_CPLUSPLUS 201103L +#elif defined(__cplusplus) +#define EIGEN_CPLUSPLUS __cplusplus +#else +#define EIGEN_CPLUSPLUS 0 +#endif + +// The macro EIGEN_COMP_CXXVER defines the c++ verson expected by the compiler. +// For instance, if compiling with gcc and -std=c++17, then EIGEN_COMP_CXXVER +// is defined to 17. +#if EIGEN_CPLUSPLUS > 201703L + #define EIGEN_COMP_CXXVER 20 +#elif EIGEN_CPLUSPLUS > 201402L + #define EIGEN_COMP_CXXVER 17 +#elif EIGEN_CPLUSPLUS > 201103L + #define EIGEN_COMP_CXXVER 14 +#elif EIGEN_CPLUSPLUS >= 201103L + #define EIGEN_COMP_CXXVER 11 +#else + #define EIGEN_COMP_CXXVER 03 +#endif + +#ifndef EIGEN_HAS_CXX14_VARIABLE_TEMPLATES + #if defined(__cpp_variable_templates) && __cpp_variable_templates >= 201304 && EIGEN_MAX_CPP_VER>=14 + #define EIGEN_HAS_CXX14_VARIABLE_TEMPLATES 1 + #else + #define EIGEN_HAS_CXX14_VARIABLE_TEMPLATES 0 + #endif +#endif + + +// The macros EIGEN_HAS_CXX?? defines a rough estimate of available c++ features +// but in practice we should not rely on them but rather on the availabilty of +// individual features as defined later. +// This is why there is no EIGEN_HAS_CXX17. +// FIXME: get rid of EIGEN_HAS_CXX14 and maybe even EIGEN_HAS_CXX11. +#if EIGEN_MAX_CPP_VER>=11 && EIGEN_COMP_CXXVER>=11 +#define EIGEN_HAS_CXX11 1 +#else +#define EIGEN_HAS_CXX11 0 +#endif + +#if EIGEN_MAX_CPP_VER>=14 && EIGEN_COMP_CXXVER>=14 +#define EIGEN_HAS_CXX14 1 +#else +#define EIGEN_HAS_CXX14 0 +#endif + +// Do we support r-value references? +#ifndef EIGEN_HAS_RVALUE_REFERENCES +#if EIGEN_MAX_CPP_VER>=11 && \ + (__has_feature(cxx_rvalue_references) || \ + (EIGEN_COMP_CXXVER >= 11) || (EIGEN_COMP_MSVC >= 1600)) + #define EIGEN_HAS_RVALUE_REFERENCES 1 +#else + #define EIGEN_HAS_RVALUE_REFERENCES 0 +#endif +#endif + +// Does the compiler support C99? +// Need to include <cmath> to make sure _GLIBCXX_USE_C99 gets defined +#include <cmath> +#ifndef EIGEN_HAS_C99_MATH +#if EIGEN_MAX_CPP_VER>=11 && \ + ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \ + || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \ + || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) \ + || (EIGEN_COMP_MSVC >= 1900) || defined(SYCL_DEVICE_ONLY)) + #define EIGEN_HAS_C99_MATH 1 +#else + #define EIGEN_HAS_C99_MATH 0 +#endif +#endif + +// Does the compiler support result_of? +// result_of was deprecated in c++17 and removed in c++ 20 +#ifndef EIGEN_HAS_STD_RESULT_OF +#if EIGEN_HAS_CXX11 && EIGEN_COMP_CXXVER < 17 +#define EIGEN_HAS_STD_RESULT_OF 1 +#else +#define EIGEN_HAS_STD_RESULT_OF 0 +#endif +#endif + +// Does the compiler support std::hash? +#ifndef EIGEN_HAS_STD_HASH +// The std::hash struct is defined in C++11 but is not labelled as a __device__ +// function and is not constexpr, so cannot be used on device. +#if EIGEN_HAS_CXX11 && !defined(EIGEN_GPU_COMPILE_PHASE) +#define EIGEN_HAS_STD_HASH 1 +#else +#define EIGEN_HAS_STD_HASH 0 +#endif +#endif // EIGEN_HAS_STD_HASH + +#ifndef EIGEN_HAS_STD_INVOKE_RESULT +#if EIGEN_MAX_CPP_VER >= 17 && EIGEN_COMP_CXXVER >= 17 +#define EIGEN_HAS_STD_INVOKE_RESULT 1 +#else +#define EIGEN_HAS_STD_INVOKE_RESULT 0 +#endif +#endif + +#ifndef EIGEN_HAS_ALIGNAS +#if EIGEN_MAX_CPP_VER>=11 && EIGEN_HAS_CXX11 && \ + ( __has_feature(cxx_alignas) \ + || EIGEN_HAS_CXX14 \ + || (EIGEN_COMP_MSVC >= 1800) \ + || (EIGEN_GNUC_AT_LEAST(4,8)) \ + || (EIGEN_COMP_CLANG>=305) \ + || (EIGEN_COMP_ICC>=1500) \ + || (EIGEN_COMP_PGI>=1500) \ + || (EIGEN_COMP_SUNCC>=0x5130)) +#define EIGEN_HAS_ALIGNAS 1 +#else +#define EIGEN_HAS_ALIGNAS 0 +#endif +#endif + +// Does the compiler support type_traits? +// - full support of type traits was added only to GCC 5.1.0. +// - 20150626 corresponds to the last release of 4.x libstdc++ +#ifndef EIGEN_HAS_TYPE_TRAITS +#if EIGEN_MAX_CPP_VER>=11 && (EIGEN_HAS_CXX11 || EIGEN_COMP_MSVC >= 1700) \ + && ((!EIGEN_COMP_GNUC_STRICT) || EIGEN_GNUC_AT_LEAST(5, 1)) \ + && ((!defined(__GLIBCXX__)) || __GLIBCXX__ > 20150626) +#define EIGEN_HAS_TYPE_TRAITS 1 +#define EIGEN_INCLUDE_TYPE_TRAITS +#else +#define EIGEN_HAS_TYPE_TRAITS 0 +#endif +#endif + +// Does the compiler support variadic templates? +#ifndef EIGEN_HAS_VARIADIC_TEMPLATES +#if EIGEN_MAX_CPP_VER>=11 && (EIGEN_COMP_CXXVER >= 11) \ + && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_COMP_NVCC >= 80000) ) + // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices: + // this prevents nvcc from crashing when compiling Eigen on Tegra X1 +#define EIGEN_HAS_VARIADIC_TEMPLATES 1 +#elif EIGEN_MAX_CPP_VER>=11 && (EIGEN_COMP_CXXVER >= 11) && defined(SYCL_DEVICE_ONLY) +#define EIGEN_HAS_VARIADIC_TEMPLATES 1 +#else +#define EIGEN_HAS_VARIADIC_TEMPLATES 0 +#endif +#endif + +// Does the compiler fully support const expressions? (as in c++14) +#ifndef EIGEN_HAS_CONSTEXPR + #if defined(EIGEN_CUDACC) + // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above + #if EIGEN_MAX_CPP_VER>=14 && (EIGEN_COMP_CXXVER >= 11 && (EIGEN_COMP_CLANG || EIGEN_COMP_NVCC >= 70500)) + #define EIGEN_HAS_CONSTEXPR 1 + #endif + #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (EIGEN_COMP_CXXVER >= 14) || \ + (EIGEN_GNUC_AT_LEAST(4,8) && (EIGEN_COMP_CXXVER >= 11)) || \ + (EIGEN_COMP_CLANG >= 306 && (EIGEN_COMP_CXXVER >= 11))) + #define EIGEN_HAS_CONSTEXPR 1 + #endif + + #ifndef EIGEN_HAS_CONSTEXPR + #define EIGEN_HAS_CONSTEXPR 0 + #endif + +#endif // EIGEN_HAS_CONSTEXPR + +#if EIGEN_HAS_CONSTEXPR +#define EIGEN_CONSTEXPR constexpr +#else +#define EIGEN_CONSTEXPR +#endif + +// Does the compiler support C++11 math? +// Let's be conservative and enable the default C++11 implementation only if we are sure it exists +#ifndef EIGEN_HAS_CXX11_MATH + #if EIGEN_MAX_CPP_VER>=11 && ((EIGEN_COMP_CXXVER > 11) || (EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \ + && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC)) + #define EIGEN_HAS_CXX11_MATH 1 + #else + #define EIGEN_HAS_CXX11_MATH 0 + #endif +#endif + +// Does the compiler support proper C++11 containers? +#ifndef EIGEN_HAS_CXX11_CONTAINERS + #if EIGEN_MAX_CPP_VER>=11 && \ + ((EIGEN_COMP_CXXVER > 11) \ + || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC>=1400))) + #define EIGEN_HAS_CXX11_CONTAINERS 1 + #else + #define EIGEN_HAS_CXX11_CONTAINERS 0 + #endif +#endif + +// Does the compiler support C++11 noexcept? +#ifndef EIGEN_HAS_CXX11_NOEXCEPT + #if EIGEN_MAX_CPP_VER>=11 && \ + (__has_feature(cxx_noexcept) \ + || (EIGEN_COMP_CXXVER > 11) \ + || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC>=1400))) + #define EIGEN_HAS_CXX11_NOEXCEPT 1 + #else + #define EIGEN_HAS_CXX11_NOEXCEPT 0 + #endif +#endif + +#ifndef EIGEN_HAS_CXX11_ATOMIC + #if EIGEN_MAX_CPP_VER>=11 && \ + (__has_feature(cxx_atomic) \ + || (EIGEN_COMP_CXXVER > 11) \ + || ((EIGEN_COMP_CXXVER == 11) && (EIGEN_COMP_MSVC==0 || EIGEN_COMP_MSVC >= 1700))) + #define EIGEN_HAS_CXX11_ATOMIC 1 + #else + #define EIGEN_HAS_CXX11_ATOMIC 0 + #endif +#endif + +#ifndef EIGEN_HAS_CXX11_OVERRIDE_FINAL + #if EIGEN_MAX_CPP_VER>=11 && \ + (EIGEN_COMP_CXXVER >= 11 || EIGEN_COMP_MSVC >= 1700) + #define EIGEN_HAS_CXX11_OVERRIDE_FINAL 1 + #else + #define EIGEN_HAS_CXX11_OVERRIDE_FINAL 0 + #endif +#endif + +// NOTE: the required Apple's clang version is very conservative +// and it could be that XCode 9 works just fine. +// NOTE: the MSVC version is based on https://en.cppreference.com/w/cpp/compiler_support +// and not tested. +#ifndef EIGEN_HAS_CXX17_OVERALIGN +#if EIGEN_MAX_CPP_VER>=17 && EIGEN_COMP_CXXVER>=17 && ( \ + (EIGEN_COMP_MSVC >= 1912) \ + || (EIGEN_GNUC_AT_LEAST(7,0)) \ + || ((!defined(__apple_build_version__)) && (EIGEN_COMP_CLANG>=500)) \ + || (( defined(__apple_build_version__)) && (__apple_build_version__>=10000000)) \ + ) +#define EIGEN_HAS_CXX17_OVERALIGN 1 +#else +#define EIGEN_HAS_CXX17_OVERALIGN 0 +#endif +#endif + +#if defined(EIGEN_CUDACC) && EIGEN_HAS_CONSTEXPR + // While available already with c++11, this is useful mostly starting with c++14 and relaxed constexpr rules + #if defined(__NVCC__) + // nvcc considers constexpr functions as __host__ __device__ with the option --expt-relaxed-constexpr + #ifdef __CUDACC_RELAXED_CONSTEXPR__ + #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC + #endif + #elif defined(__clang__) && defined(__CUDA__) && __has_feature(cxx_relaxed_constexpr) + // clang++ always considers constexpr functions as implicitly __host__ __device__ + #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC + #endif +#endif + +// Does the compiler support the __int128 and __uint128_t extensions for 128-bit +// integer arithmetic? +// +// Clang and GCC define __SIZEOF_INT128__ when these extensions are supported, +// but we avoid using them in certain cases: +// +// * Building using Clang for Windows, where the Clang runtime library has +// 128-bit support only on LP64 architectures, but Windows is LLP64. +#ifndef EIGEN_HAS_BUILTIN_INT128 +#if defined(__SIZEOF_INT128__) && !(EIGEN_OS_WIN && EIGEN_COMP_CLANG) +#define EIGEN_HAS_BUILTIN_INT128 1 +#else +#define EIGEN_HAS_BUILTIN_INT128 0 +#endif +#endif + +//------------------------------------------------------------------------------------------ +// Preprocessor programming helpers +//------------------------------------------------------------------------------------------ + +// This macro can be used to prevent from macro expansion, e.g.: +// std::max EIGEN_NOT_A_MACRO(a,b) +#define EIGEN_NOT_A_MACRO + +#define EIGEN_DEBUG_VAR(x) std::cerr << #x << " = " << x << std::endl; + +// concatenate two tokens +#define EIGEN_CAT2(a,b) a ## b +#define EIGEN_CAT(a,b) EIGEN_CAT2(a,b) + +#define EIGEN_COMMA , + +// convert a token to a string +#define EIGEN_MAKESTRING2(a) #a +#define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a) + +// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC, +// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline +// but GCC is still doing fine with just inline. +#ifndef EIGEN_STRONG_INLINE +#if (EIGEN_COMP_MSVC || EIGEN_COMP_ICC) && !defined(EIGEN_GPUCC) +#define EIGEN_STRONG_INLINE __forceinline +#else +#define EIGEN_STRONG_INLINE inline +#endif +#endif + +// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible +// attribute to maximize inlining. This should only be used when really necessary: in particular, +// it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times. +// FIXME with the always_inline attribute, +// gcc 3.4.x and 4.1 reports the following compilation error: +// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const' +// : function body not available +// See also bug 1367 +#if EIGEN_GNUC_AT_LEAST(4,2) && !defined(SYCL_DEVICE_ONLY) +#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline +#else +#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE +#endif + +#if EIGEN_COMP_GNUC +#define EIGEN_DONT_INLINE __attribute__((noinline)) +#elif EIGEN_COMP_MSVC +#define EIGEN_DONT_INLINE __declspec(noinline) +#else +#define EIGEN_DONT_INLINE +#endif + +#if EIGEN_COMP_GNUC +#define EIGEN_PERMISSIVE_EXPR __extension__ +#else +#define EIGEN_PERMISSIVE_EXPR +#endif + +// GPU stuff + +// Disable some features when compiling with GPU compilers (NVCC/clang-cuda/SYCL/HIPCC) +#if defined(EIGEN_CUDACC) || defined(SYCL_DEVICE_ONLY) || defined(EIGEN_HIPCC) + // Do not try asserts on device code + #ifndef EIGEN_NO_DEBUG + #define EIGEN_NO_DEBUG + #endif + + #ifdef EIGEN_INTERNAL_DEBUGGING + #undef EIGEN_INTERNAL_DEBUGGING + #endif + + #ifdef EIGEN_EXCEPTIONS + #undef EIGEN_EXCEPTIONS + #endif +#endif + +#if defined(SYCL_DEVICE_ONLY) + #ifndef EIGEN_DONT_VECTORIZE + #define EIGEN_DONT_VECTORIZE + #endif + #define EIGEN_DEVICE_FUNC __attribute__((flatten)) __attribute__((always_inline)) +// All functions callable from CUDA/HIP code must be qualified with __device__ +#elif defined(EIGEN_GPUCC) + #define EIGEN_DEVICE_FUNC __host__ __device__ +#else + #define EIGEN_DEVICE_FUNC +#endif + + +// this macro allows to get rid of linking errors about multiply defined functions. +// - static is not very good because it prevents definitions from different object files to be merged. +// So static causes the resulting linked executable to be bloated with multiple copies of the same function. +// - inline is not perfect either as it unwantedly hints the compiler toward inlining the function. +#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC +#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC inline + +#ifdef NDEBUG +# ifndef EIGEN_NO_DEBUG +# define EIGEN_NO_DEBUG +# endif +#endif + +// eigen_plain_assert is where we implement the workaround for the assert() bug in GCC <= 4.3, see bug 89 +#ifdef EIGEN_NO_DEBUG + #ifdef SYCL_DEVICE_ONLY // used to silence the warning on SYCL device + #define eigen_plain_assert(x) EIGEN_UNUSED_VARIABLE(x) + #else + #define eigen_plain_assert(x) + #endif +#else + #if EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO + namespace Eigen { + namespace internal { + inline bool copy_bool(bool b) { return b; } + } + } + #define eigen_plain_assert(x) assert(x) + #else + // work around bug 89 + #include <cstdlib> // for abort + #include <iostream> // for std::cerr + + namespace Eigen { + namespace internal { + // trivial function copying a bool. Must be EIGEN_DONT_INLINE, so we implement it after including Eigen headers. + // see bug 89. + namespace { + EIGEN_DONT_INLINE bool copy_bool(bool b) { return b; } + } + inline void assert_fail(const char *condition, const char *function, const char *file, int line) + { + std::cerr << "assertion failed: " << condition << " in function " << function << " at " << file << ":" << line << std::endl; + abort(); + } + } + } + #define eigen_plain_assert(x) \ + do { \ + if(!Eigen::internal::copy_bool(x)) \ + Eigen::internal::assert_fail(EIGEN_MAKESTRING(x), __PRETTY_FUNCTION__, __FILE__, __LINE__); \ + } while(false) + #endif +#endif + +// eigen_assert can be overridden +#ifndef eigen_assert +#define eigen_assert(x) eigen_plain_assert(x) +#endif + +#ifdef EIGEN_INTERNAL_DEBUGGING +#define eigen_internal_assert(x) eigen_assert(x) +#else +#define eigen_internal_assert(x) +#endif + +#ifdef EIGEN_NO_DEBUG +#define EIGEN_ONLY_USED_FOR_DEBUG(x) EIGEN_UNUSED_VARIABLE(x) +#else +#define EIGEN_ONLY_USED_FOR_DEBUG(x) +#endif + +#ifndef EIGEN_NO_DEPRECATED_WARNING + #if EIGEN_COMP_GNUC + #define EIGEN_DEPRECATED __attribute__((deprecated)) + #elif EIGEN_COMP_MSVC + #define EIGEN_DEPRECATED __declspec(deprecated) + #else + #define EIGEN_DEPRECATED + #endif +#else + #define EIGEN_DEPRECATED +#endif + +#if EIGEN_COMP_GNUC +#define EIGEN_UNUSED __attribute__((unused)) +#else +#define EIGEN_UNUSED +#endif + +// Suppresses 'unused variable' warnings. +namespace Eigen { + namespace internal { + template<typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ignore_unused_variable(const T&) {} + } +} +#define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var); + +#if !defined(EIGEN_ASM_COMMENT) + #if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64) + #define EIGEN_ASM_COMMENT(X) __asm__("#" X) + #else + #define EIGEN_ASM_COMMENT(X) + #endif +#endif + + +// Acts as a barrier preventing operations involving `X` from crossing. This +// occurs, for example, in the fast rounding trick where a magic constant is +// added then subtracted, which is otherwise compiled away with -ffast-math. +// +// See bug 1674 +#if !defined(EIGEN_OPTIMIZATION_BARRIER) + #if EIGEN_COMP_GNUC + // According to https://gcc.gnu.org/onlinedocs/gcc/Constraints.html: + // X: Any operand whatsoever. + // r: A register operand is allowed provided that it is in a general + // register. + // g: Any register, memory or immediate integer operand is allowed, except + // for registers that are not general registers. + // w: (AArch32/AArch64) Floating point register, Advanced SIMD vector + // register or SVE vector register. + // x: (SSE) Any SSE register. + // (AArch64) Like w, but restricted to registers 0 to 15 inclusive. + // v: (PowerPC) An Altivec vector register. + // wa:(PowerPC) A VSX register. + // + // "X" (uppercase) should work for all cases, though this seems to fail for + // some versions of GCC for arm/aarch64 with + // "error: inconsistent operand constraints in an 'asm'" + // Clang x86_64/arm/aarch64 seems to require "g" to support both scalars and + // vectors, otherwise + // "error: non-trivial scalar-to-vector conversion, possible invalid + // constraint for vector type" + // + // GCC for ppc64le generates an internal compiler error with x/X/g. + // GCC for AVX generates an internal compiler error with X. + // + // Tested on icc/gcc/clang for sse, avx, avx2, avx512dq + // gcc for arm, aarch64, + // gcc for ppc64le, + // both vectors and scalars. + // + // Note that this is restricted to plain types - this will not work + // directly for std::complex<T>, Eigen::half, Eigen::bfloat16. For these, + // you will need to apply to the underlying POD type. + #if EIGEN_ARCH_PPC && EIGEN_COMP_GNUC_STRICT + // This seems to be broken on clang. Packet4f is loaded into a single + // register rather than a vector, zeroing out some entries. Integer + // types also generate a compile error. + // General, Altivec, VSX. + #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+r,v,wa" (X)); + #elif EIGEN_ARCH_ARM_OR_ARM64 + // General, NEON. + #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+g,w" (X)); + #elif EIGEN_ARCH_i386_OR_x86_64 + // General, SSE. + #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+g,x" (X)); + #else + // Not implemented for other architectures. + #define EIGEN_OPTIMIZATION_BARRIER(X) + #endif + #else + // Not implemented for other compilers. + #define EIGEN_OPTIMIZATION_BARRIER(X) + #endif +#endif + +#if EIGEN_COMP_MSVC + // NOTE MSVC often gives C4127 warnings with compiletime if statements. See bug 1362. + // This workaround is ugly, but it does the job. +# define EIGEN_CONST_CONDITIONAL(cond) (void)0, cond +#else +# define EIGEN_CONST_CONDITIONAL(cond) cond +#endif + +#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD + #define EIGEN_RESTRICT +#endif +#ifndef EIGEN_RESTRICT + #define EIGEN_RESTRICT __restrict +#endif + + +#ifndef EIGEN_DEFAULT_IO_FORMAT +#ifdef EIGEN_MAKING_DOCS +// format used in Eigen's documentation +// needed to define it here as escaping characters in CMake add_definition's argument seems very problematic. +#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat(3, 0, " ", "\n", "", "") +#else +#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat() +#endif +#endif + +// just an empty macro ! +#define EIGEN_EMPTY + + +// When compiling CUDA/HIP device code with NVCC or HIPCC +// pull in math functions from the global namespace. +// In host mode, and when device code is compiled with clang, +// use the std versions. +#if (defined(EIGEN_CUDA_ARCH) && defined(__NVCC__)) || defined(EIGEN_HIP_DEVICE_COMPILE) + #define EIGEN_USING_STD(FUNC) using ::FUNC; +#else + #define EIGEN_USING_STD(FUNC) using std::FUNC; +#endif + +#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || (EIGEN_COMP_MSVC == 1900 && EIGEN_COMP_NVCC)) + // For older MSVC versions, as well as 1900 && CUDA 8, using the base operator is necessary, + // otherwise we get duplicate definition errors + // For later MSVC versions, we require explicit operator= definition, otherwise we get + // use of implicitly deleted operator errors. + // (cf Bugs 920, 1000, 1324, 2291) + #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ + using Base::operator =; +#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) + #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ + using Base::operator =; \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { Base::operator=(other); return *this; } \ + template <typename OtherDerived> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase<OtherDerived>& other) { Base::operator=(other.derived()); return *this; } +#else + #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ + using Base::operator =; \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) \ + { \ + Base::operator=(other); \ + return *this; \ + } +#endif + + +/** + * \internal + * \brief Macro to explicitly define the default copy constructor. + * This is necessary, because the implicit definition is deprecated if the copy-assignment is overridden. + */ +#if EIGEN_HAS_CXX11 +#define EIGEN_DEFAULT_COPY_CONSTRUCTOR(CLASS) CLASS(const CLASS&) = default; +#else +#define EIGEN_DEFAULT_COPY_CONSTRUCTOR(CLASS) +#endif + + + +/** \internal + * \brief Macro to manually inherit assignment operators. + * This is necessary, because the implicitly defined assignment operator gets deleted when a custom operator= is defined. + * With C++11 or later this also default-implements the copy-constructor + */ +#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) \ + EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ + EIGEN_DEFAULT_COPY_CONSTRUCTOR(Derived) + +/** \internal + * \brief Macro to manually define default constructors and destructors. + * This is necessary when the copy constructor is re-defined. + * For empty helper classes this should usually be protected, to avoid accidentally creating empty objects. + * + * Hiding the default destructor lead to problems in C++03 mode together with boost::multiprecision + */ +#if EIGEN_HAS_CXX11 +#define EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(Derived) \ + Derived() = default; \ + ~Derived() = default; +#else +#define EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(Derived) \ + Derived() {}; \ + /* ~Derived() {}; */ +#endif + + + + + +/** +* Just a side note. Commenting within defines works only by documenting +* behind the object (via '!<'). Comments cannot be multi-line and thus +* we have these extra long lines. What is confusing doxygen over here is +* that we use '\' and basically have a bunch of typedefs with their +* documentation in a single line. +**/ + +#define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \ + typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; /*!< \brief Numeric type, e.g. float, double, int or std::complex<float>. */ \ + typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; /*!< \brief The underlying numeric type for composed scalar types. \details In cases where Scalar is e.g. std::complex<T>, T were corresponding to RealScalar. */ \ + typedef typename Base::CoeffReturnType CoeffReturnType; /*!< \brief The return type for coefficient access. \details Depending on whether the object allows direct coefficient access (e.g. for a MatrixXd), this type is either 'const Scalar&' or simply 'Scalar' for objects that do not allow direct coefficient access. */ \ + typedef typename Eigen::internal::ref_selector<Derived>::type Nested; \ + typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \ + typedef typename Eigen::internal::traits<Derived>::StorageIndex StorageIndex; \ + enum CompileTimeTraits \ + { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \ + ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \ + Flags = Eigen::internal::traits<Derived>::Flags, \ + SizeAtCompileTime = Base::SizeAtCompileTime, \ + MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \ + IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \ + using Base::derived; \ + using Base::const_cast_derived; + + +// FIXME Maybe the EIGEN_DENSE_PUBLIC_INTERFACE could be removed as importing PacketScalar is rarely needed +#define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \ + EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \ + typedef typename Base::PacketScalar PacketScalar; + + +#define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b) +#define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b) + +// EIGEN_SIZE_MIN_PREFER_DYNAMIC gives the min between compile-time sizes. 0 has absolute priority, followed by 1, +// followed by Dynamic, followed by other finite values. The reason for giving Dynamic the priority over +// finite values is that min(3, Dynamic) should be Dynamic, since that could be anything between 0 and 3. +#define EIGEN_SIZE_MIN_PREFER_DYNAMIC(a,b) (((int)a == 0 || (int)b == 0) ? 0 \ + : ((int)a == 1 || (int)b == 1) ? 1 \ + : ((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \ + : ((int)a <= (int)b) ? (int)a : (int)b) + +// EIGEN_SIZE_MIN_PREFER_FIXED is a variant of EIGEN_SIZE_MIN_PREFER_DYNAMIC comparing MaxSizes. The difference is that finite values +// now have priority over Dynamic, so that min(3, Dynamic) gives 3. Indeed, whatever the actual value is +// (between 0 and 3), it is not more than 3. +#define EIGEN_SIZE_MIN_PREFER_FIXED(a,b) (((int)a == 0 || (int)b == 0) ? 0 \ + : ((int)a == 1 || (int)b == 1) ? 1 \ + : ((int)a == Dynamic && (int)b == Dynamic) ? Dynamic \ + : ((int)a == Dynamic) ? (int)b \ + : ((int)b == Dynamic) ? (int)a \ + : ((int)a <= (int)b) ? (int)a : (int)b) + +// see EIGEN_SIZE_MIN_PREFER_DYNAMIC. No need for a separate variant for MaxSizes here. +#define EIGEN_SIZE_MAX(a,b) (((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \ + : ((int)a >= (int)b) ? (int)a : (int)b) + +#define EIGEN_LOGICAL_XOR(a,b) (((a) || (b)) && !((a) && (b))) + +#define EIGEN_IMPLIES(a,b) (!(a) || (b)) + +#if EIGEN_HAS_BUILTIN(__builtin_expect) || EIGEN_COMP_GNUC +#define EIGEN_PREDICT_FALSE(x) (__builtin_expect(x, false)) +#define EIGEN_PREDICT_TRUE(x) (__builtin_expect(false || (x), true)) +#else +#define EIGEN_PREDICT_FALSE(x) (x) +#define EIGEN_PREDICT_TRUE(x) (x) +#endif + +// the expression type of a standard coefficient wise binary operation +#define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME) \ + CwiseBinaryOp< \ + EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)< \ + typename internal::traits<LHS>::Scalar, \ + typename internal::traits<RHS>::Scalar \ + >, \ + const LHS, \ + const RHS \ + > + +#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,OPNAME) \ + template<typename OtherDerived> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME) \ + (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \ + { \ + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME)(derived(), other.derived()); \ + } + +#define EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,TYPEA,TYPEB) \ + (Eigen::internal::has_ReturnType<Eigen::ScalarBinaryOpTraits<TYPEA,TYPEB,EIGEN_CAT(EIGEN_CAT(Eigen::internal::scalar_,OPNAME),_op)<TYPEA,TYPEB> > >::value) + +#define EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(EXPR,SCALAR,OPNAME) \ + CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<typename internal::traits<EXPR>::Scalar,SCALAR>, const EXPR, \ + const typename internal::plain_constant_type<EXPR,SCALAR>::type> + +#define EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(SCALAR,EXPR,OPNAME) \ + CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<SCALAR,typename internal::traits<EXPR>::Scalar>, \ + const typename internal::plain_constant_type<EXPR,SCALAR>::type, const EXPR> + +// Workaround for MSVC 2010 (see ML thread "patch with compile for for MSVC 2010") +#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC_STRICT<=1600) +#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) typename internal::enable_if<true,X>::type +#else +#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) X +#endif + +#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \ + template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ + EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type,OPNAME))\ + (METHOD)(const T& scalar) const { \ + typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,Scalar,T)>::type PromotedT; \ + return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,PromotedT,OPNAME)(derived(), \ + typename internal::plain_constant_type<Derived,PromotedT>::type(derived().rows(), derived().cols(), internal::scalar_constant_op<PromotedT>(scalar))); \ + } + +#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \ + template <typename T> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend \ + EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type,Derived,OPNAME)) \ + (METHOD)(const T& scalar, const StorageBaseType& matrix) { \ + typedef typename internal::promote_scalar_arg<Scalar,T,EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,T,Scalar)>::type PromotedT; \ + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedT,Derived,OPNAME)( \ + typename internal::plain_constant_type<Derived,PromotedT>::type(matrix.derived().rows(), matrix.derived().cols(), internal::scalar_constant_op<PromotedT>(scalar)), matrix.derived()); \ + } + +#define EIGEN_MAKE_SCALAR_BINARY_OP(METHOD,OPNAME) \ + EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \ + EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) + + +#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL) && !defined(EIGEN_HIP_DEVICE_COMPILE) + #define EIGEN_EXCEPTIONS +#endif + + +#ifdef EIGEN_EXCEPTIONS +# define EIGEN_THROW_X(X) throw X +# define EIGEN_THROW throw +# define EIGEN_TRY try +# define EIGEN_CATCH(X) catch (X) +#else +# if defined(EIGEN_CUDA_ARCH) +# define EIGEN_THROW_X(X) asm("trap;") +# define EIGEN_THROW asm("trap;") +# elif defined(EIGEN_HIP_DEVICE_COMPILE) +# define EIGEN_THROW_X(X) asm("s_trap 0") +# define EIGEN_THROW asm("s_trap 0") +# else +# define EIGEN_THROW_X(X) std::abort() +# define EIGEN_THROW std::abort() +# endif +# define EIGEN_TRY if (true) +# define EIGEN_CATCH(X) else +#endif + + +#if EIGEN_HAS_CXX11_NOEXCEPT +# define EIGEN_INCLUDE_TYPE_TRAITS +# define EIGEN_NOEXCEPT noexcept +# define EIGEN_NOEXCEPT_IF(x) noexcept(x) +# define EIGEN_NO_THROW noexcept(true) +# define EIGEN_EXCEPTION_SPEC(X) noexcept(false) +#else +# define EIGEN_NOEXCEPT +# define EIGEN_NOEXCEPT_IF(x) +# define EIGEN_NO_THROW throw() +# if EIGEN_COMP_MSVC || EIGEN_COMP_CXXVER>=17 + // MSVC does not support exception specifications (warning C4290), + // and they are deprecated in c++11 anyway. This is even an error in c++17. +# define EIGEN_EXCEPTION_SPEC(X) throw() +# else +# define EIGEN_EXCEPTION_SPEC(X) throw(X) +# endif +#endif + +#if EIGEN_HAS_VARIADIC_TEMPLATES +// The all function is used to enable a variadic version of eigen_assert which can take a parameter pack as its input. +namespace Eigen { +namespace internal { + +inline bool all(){ return true; } + +template<typename T, typename ...Ts> +bool all(T t, Ts ... ts){ return t && all(ts...); } + +} +} +#endif + +#if EIGEN_HAS_CXX11_OVERRIDE_FINAL +// provide override and final specifiers if they are available: +# define EIGEN_OVERRIDE override +# define EIGEN_FINAL final +#else +# define EIGEN_OVERRIDE +# define EIGEN_FINAL +#endif + +// Wrapping #pragma unroll in a macro since it is required for SYCL +#if defined(SYCL_DEVICE_ONLY) + #if defined(_MSC_VER) + #define EIGEN_UNROLL_LOOP __pragma(unroll) + #else + #define EIGEN_UNROLL_LOOP _Pragma("unroll") + #endif +#else + #define EIGEN_UNROLL_LOOP +#endif + +#endif // EIGEN_MACROS_H diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h new file mode 100644 index 0000000..875318c --- /dev/null +++ b/Eigen/src/Core/util/Memory.h @@ -0,0 +1,1163 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com> +// Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com> +// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com> +// Copyright (C) 2010 Thomas Capricelli <orzel@freehackers.org> +// Copyright (C) 2013 Pavel Holoborodko <pavel@holoborodko.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +/***************************************************************************** +*** Platform checks for aligned malloc functions *** +*****************************************************************************/ + +#ifndef EIGEN_MEMORY_H +#define EIGEN_MEMORY_H + +#ifndef EIGEN_MALLOC_ALREADY_ALIGNED + +// Try to determine automatically if malloc is already aligned. + +// On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see: +// http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html +// This is true at least since glibc 2.8. +// This leaves the question how to detect 64-bit. According to this document, +// http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf +// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed +// quite safe, at least within the context of glibc, to equate 64-bit with LP64. +#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \ + && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_DEFAULT_ALIGN_BYTES == 16) + #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1 +#else + #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0 +#endif + +// FreeBSD 6 seems to have 16-byte aligned malloc +// See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup +// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures +// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup +#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16) + #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1 +#else + #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0 +#endif + +#if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \ + || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \ + || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \ + || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED + #define EIGEN_MALLOC_ALREADY_ALIGNED 1 +#else + #define EIGEN_MALLOC_ALREADY_ALIGNED 0 +#endif + +#endif + +namespace Eigen { + +namespace internal { + +EIGEN_DEVICE_FUNC +inline void throw_std_bad_alloc() +{ + #ifdef EIGEN_EXCEPTIONS + throw std::bad_alloc(); + #else + std::size_t huge = static_cast<std::size_t>(-1); + #if defined(EIGEN_HIPCC) + // + // calls to "::operator new" are to be treated as opaque function calls (i.e no inlining), + // and as a consequence the code in the #else block triggers the hipcc warning : + // "no overloaded function has restriction specifiers that are compatible with the ambient context" + // + // "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects + // the same on "operator new" + // Reverting code back to the old version in this #if block for the hipcc compiler + // + new int[huge]; + #else + void* unused = ::operator new(huge); + EIGEN_UNUSED_VARIABLE(unused); + #endif + #endif +} + +/***************************************************************************** +*** Implementation of handmade aligned functions *** +*****************************************************************************/ + +/* ----- Hand made implementations of aligned malloc/free and realloc ----- */ + +/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned. + * Fast, but wastes 16 additional bytes of memory. Does not throw any exception. + */ +EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES) +{ + eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*) and a power of 2"); + + EIGEN_USING_STD(malloc) + void *original = malloc(size+alignment); + + if (original == 0) return 0; + void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment); + *(reinterpret_cast<void**>(aligned) - 1) = original; + return aligned; +} + +/** \internal Frees memory allocated with handmade_aligned_malloc */ +EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr) +{ + if (ptr) { + EIGEN_USING_STD(free) + free(*(reinterpret_cast<void**>(ptr) - 1)); + } +} + +/** \internal + * \brief Reallocates aligned memory. + * Since we know that our handmade version is based on std::malloc + * we can use std::realloc to implement efficient reallocation. + */ +inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0) +{ + if (ptr == 0) return handmade_aligned_malloc(size); + void *original = *(reinterpret_cast<void**>(ptr) - 1); + std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original); + original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES); + if (original == 0) return 0; + void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES); + void *previous_aligned = static_cast<char *>(original)+previous_offset; + if(aligned!=previous_aligned) + std::memmove(aligned, previous_aligned, size); + + *(reinterpret_cast<void**>(aligned) - 1) = original; + return aligned; +} + +/***************************************************************************** +*** Implementation of portable aligned versions of malloc/free/realloc *** +*****************************************************************************/ + +#ifdef EIGEN_NO_MALLOC +EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() +{ + eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)"); +} +#elif defined EIGEN_RUNTIME_NO_MALLOC +EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false) +{ + static bool value = true; + if (update == 1) + value = new_value; + return value; +} +EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); } +EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); } +EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() +{ + eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)"); +} +#else +EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() +{} +#endif + +/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements. + * On allocation error, the returned pointer is null, and std::bad_alloc is thrown. + */ +EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) +{ + check_that_malloc_is_allowed(); + + void *result; + #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED + + EIGEN_USING_STD(malloc) + result = malloc(size); + + #if EIGEN_DEFAULT_ALIGN_BYTES==16 + eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator."); + #endif + #else + result = handmade_aligned_malloc(size); + #endif + + if(!result && size) + throw_std_bad_alloc(); + + return result; +} + +/** \internal Frees memory allocated with aligned_malloc. */ +EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) +{ + #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED + + EIGEN_USING_STD(free) + free(ptr); + + #else + handmade_aligned_free(ptr); + #endif +} + +/** + * \internal + * \brief Reallocates an aligned block of memory. + * \throws std::bad_alloc on allocation failure + */ +inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size) +{ + EIGEN_UNUSED_VARIABLE(old_size) + + void *result; +#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED + result = std::realloc(ptr,new_size); +#else + result = handmade_aligned_realloc(ptr,new_size,old_size); +#endif + + if (!result && new_size) + throw_std_bad_alloc(); + + return result; +} + +/***************************************************************************** +*** Implementation of conditionally aligned functions *** +*****************************************************************************/ + +/** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned. + * On allocation error, the returned pointer is null, and a std::bad_alloc is thrown. + */ +template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size) +{ + return aligned_malloc(size); +} + +template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size) +{ + check_that_malloc_is_allowed(); + + EIGEN_USING_STD(malloc) + void *result = malloc(size); + + if(!result && size) + throw_std_bad_alloc(); + return result; +} + +/** \internal Frees memory allocated with conditional_aligned_malloc */ +template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr) +{ + aligned_free(ptr); +} + +template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr) +{ + EIGEN_USING_STD(free) + free(ptr); +} + +template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size) +{ + return aligned_realloc(ptr, new_size, old_size); +} + +template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t) +{ + return std::realloc(ptr, new_size); +} + +/***************************************************************************** +*** Construction/destruction of array elements *** +*****************************************************************************/ + +/** \internal Destructs the elements of an array. + * The \a size parameters tells on how many objects to call the destructor of T. + */ +template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, std::size_t size) +{ + // always destruct an array starting from the end. + if(ptr) + while(size) ptr[--size].~T(); +} + +/** \internal Constructs the elements of an array. + * The \a size parameter tells on how many objects to call the constructor of T. + */ +template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, std::size_t size) +{ + std::size_t i; + EIGEN_TRY + { + for (i = 0; i < size; ++i) ::new (ptr + i) T; + return ptr; + } + EIGEN_CATCH(...) + { + destruct_elements_of_array(ptr, i); + EIGEN_THROW; + } + return NULL; +} + +/***************************************************************************** +*** Implementation of aligned new/delete-like functions *** +*****************************************************************************/ + +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size) +{ + if(size > std::size_t(-1) / sizeof(T)) + throw_std_bad_alloc(); +} + +/** \internal Allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment. + * On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown. + * The default constructor of T is called. + */ +template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size) +{ + check_size_for_overflow<T>(size); + T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size)); + EIGEN_TRY + { + return construct_elements_of_array(result, size); + } + EIGEN_CATCH(...) + { + aligned_free(result); + EIGEN_THROW; + } + return result; +} + +template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size) +{ + check_size_for_overflow<T>(size); + T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size)); + EIGEN_TRY + { + return construct_elements_of_array(result, size); + } + EIGEN_CATCH(...) + { + conditional_aligned_free<Align>(result); + EIGEN_THROW; + } + return result; +} + +/** \internal Deletes objects constructed with aligned_new + * The \a size parameters tells on how many objects to call the destructor of T. + */ +template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size) +{ + destruct_elements_of_array<T>(ptr, size); + Eigen::internal::aligned_free(ptr); +} + +/** \internal Deletes objects constructed with conditional_aligned_new + * The \a size parameters tells on how many objects to call the destructor of T. + */ +template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, std::size_t size) +{ + destruct_elements_of_array<T>(ptr, size); + conditional_aligned_free<Align>(ptr); +} + +template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size) +{ + check_size_for_overflow<T>(new_size); + check_size_for_overflow<T>(old_size); + if(new_size < old_size) + destruct_elements_of_array(pts+new_size, old_size-new_size); + T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size)); + if(new_size > old_size) + { + EIGEN_TRY + { + construct_elements_of_array(result+old_size, new_size-old_size); + } + EIGEN_CATCH(...) + { + conditional_aligned_free<Align>(result); + EIGEN_THROW; + } + } + return result; +} + + +template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size) +{ + if(size==0) + return 0; // short-cut. Also fixes Bug 884 + check_size_for_overflow<T>(size); + T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size)); + if(NumTraits<T>::RequireInitialization) + { + EIGEN_TRY + { + construct_elements_of_array(result, size); + } + EIGEN_CATCH(...) + { + conditional_aligned_free<Align>(result); + EIGEN_THROW; + } + } + return result; +} + +template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size) +{ + check_size_for_overflow<T>(new_size); + check_size_for_overflow<T>(old_size); + if(NumTraits<T>::RequireInitialization && (new_size < old_size)) + destruct_elements_of_array(pts+new_size, old_size-new_size); + T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size)); + if(NumTraits<T>::RequireInitialization && (new_size > old_size)) + { + EIGEN_TRY + { + construct_elements_of_array(result+old_size, new_size-old_size); + } + EIGEN_CATCH(...) + { + conditional_aligned_free<Align>(result); + EIGEN_THROW; + } + } + return result; +} + +template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, std::size_t size) +{ + if(NumTraits<T>::RequireInitialization) + destruct_elements_of_array<T>(ptr, size); + conditional_aligned_free<Align>(ptr); +} + +/****************************************************************************/ + +/** \internal Returns the index of the first element of the array that is well aligned with respect to the requested \a Alignment. + * + * \tparam Alignment requested alignment in Bytes. + * \param array the address of the start of the array + * \param size the size of the array + * + * \note If no element of the array is well aligned or the requested alignment is not a multiple of a scalar, + * the size of the array is returned. For example with SSE, the requested alignment is typically 16-bytes. If + * packet size for the given scalar type is 1, then everything is considered well-aligned. + * + * \note Otherwise, if the Alignment is larger that the scalar size, we rely on the assumptions that sizeof(Scalar) is a + * power of 2. On the other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for + * example with Scalar=double on certain 32-bit platforms, see bug #79. + * + * There is also the variant first_aligned(const MatrixBase&) defined in DenseCoeffsBase.h. + * \sa first_default_aligned() + */ +template<int Alignment, typename Scalar, typename Index> +EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size) +{ + const Index ScalarSize = sizeof(Scalar); + const Index AlignmentSize = Alignment / ScalarSize; + const Index AlignmentMask = AlignmentSize-1; + + if(AlignmentSize<=1) + { + // Either the requested alignment if smaller than a scalar, or it exactly match a 1 scalar + // so that all elements of the array have the same alignment. + return 0; + } + else if( (UIntPtr(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0) + { + // The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size. + // Consequently, no element of the array is well aligned. + return size; + } + else + { + Index first = (AlignmentSize - (Index((UIntPtr(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask; + return (first < size) ? first : size; + } +} + +/** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet requirement. + * \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase<Derived>) */ +template<typename Scalar, typename Index> +EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size) +{ + typedef typename packet_traits<Scalar>::type DefaultPacketType; + return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size); +} + +/** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size + */ +template<typename Index> +inline Index first_multiple(Index size, Index base) +{ + return ((size+base-1)/base)*base; +} + +// std::copy is much slower than memcpy, so let's introduce a smart_copy which +// use memcpy on trivial types, i.e., on types that does not require an initialization ctor. +template<typename T, bool UseMemcpy> struct smart_copy_helper; + +template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target) +{ + smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target); +} + +template<typename T> struct smart_copy_helper<T,true> { + EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) + { + IntPtr size = IntPtr(end)-IntPtr(start); + if(size==0) return; + eigen_internal_assert(start!=0 && end!=0 && target!=0); + EIGEN_USING_STD(memcpy) + memcpy(target, start, size); + } +}; + +template<typename T> struct smart_copy_helper<T,false> { + EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) + { std::copy(start, end, target); } +}; + +// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. +template<typename T, bool UseMemmove> struct smart_memmove_helper; + +template<typename T> void smart_memmove(const T* start, const T* end, T* target) +{ + smart_memmove_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target); +} + +template<typename T> struct smart_memmove_helper<T,true> { + static inline void run(const T* start, const T* end, T* target) + { + IntPtr size = IntPtr(end)-IntPtr(start); + if(size==0) return; + eigen_internal_assert(start!=0 && end!=0 && target!=0); + std::memmove(target, start, size); + } +}; + +template<typename T> struct smart_memmove_helper<T,false> { + static inline void run(const T* start, const T* end, T* target) + { + if (UIntPtr(target) < UIntPtr(start)) + { + std::copy(start, end, target); + } + else + { + std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T); + std::copy_backward(start, end, target + count); + } + } +}; + +#if EIGEN_HAS_RVALUE_REFERENCES +template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target) +{ + return std::move(start, end, target); +} +#else +template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target) +{ + return std::copy(start, end, target); +} +#endif + +/***************************************************************************** +*** Implementation of runtime stack allocation (falling back to malloc) *** +*****************************************************************************/ + +// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA +// to the appropriate stack allocation function +#if ! defined EIGEN_ALLOCA && ! defined EIGEN_GPU_COMPILE_PHASE + #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca) + #define EIGEN_ALLOCA alloca + #elif EIGEN_COMP_MSVC + #define EIGEN_ALLOCA _alloca + #endif +#endif + +// With clang -Oz -mthumb, alloca changes the stack pointer in a way that is +// not allowed in Thumb2. -DEIGEN_STACK_ALLOCATION_LIMIT=0 doesn't work because +// the compiler still emits bad code because stack allocation checks use "<=". +// TODO: Eliminate after https://bugs.llvm.org/show_bug.cgi?id=23772 +// is fixed. +#if defined(__clang__) && defined(__thumb__) + #undef EIGEN_ALLOCA +#endif + +// This helper class construct the allocated memory, and takes care of destructing and freeing the handled data +// at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions. +template<typename T> class aligned_stack_memory_handler : noncopyable +{ + public: + /* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size. + * Note that \a ptr can be 0 regardless of the other parameters. + * This constructor takes care of constructing/initializing the elements of the buffer if required by the scalar type T (see NumTraits<T>::RequireInitialization). + * In this case, the buffer elements will also be destructed when this handler will be destructed. + * Finally, if \a dealloc is true, then the pointer \a ptr is freed. + **/ + EIGEN_DEVICE_FUNC + aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc) + : m_ptr(ptr), m_size(size), m_deallocate(dealloc) + { + if(NumTraits<T>::RequireInitialization && m_ptr) + Eigen::internal::construct_elements_of_array(m_ptr, size); + } + EIGEN_DEVICE_FUNC + ~aligned_stack_memory_handler() + { + if(NumTraits<T>::RequireInitialization && m_ptr) + Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size); + if(m_deallocate) + Eigen::internal::aligned_free(m_ptr); + } + protected: + T* m_ptr; + std::size_t m_size; + bool m_deallocate; +}; + +#ifdef EIGEN_ALLOCA + +template<typename Xpr, int NbEvaluations, + bool MapExternalBuffer = nested_eval<Xpr,NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime==Dynamic + > +struct local_nested_eval_wrapper +{ + static const bool NeedExternalBuffer = false; + typedef typename Xpr::Scalar Scalar; + typedef typename nested_eval<Xpr,NbEvaluations>::type ObjectType; + ObjectType object; + + EIGEN_DEVICE_FUNC + local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr) + { + EIGEN_UNUSED_VARIABLE(ptr); + eigen_internal_assert(ptr==0); + } +}; + +template<typename Xpr, int NbEvaluations> +struct local_nested_eval_wrapper<Xpr,NbEvaluations,true> +{ + static const bool NeedExternalBuffer = true; + typedef typename Xpr::Scalar Scalar; + typedef typename plain_object_eval<Xpr>::type PlainObject; + typedef Map<PlainObject,EIGEN_DEFAULT_ALIGN_BYTES> ObjectType; + ObjectType object; + + EIGEN_DEVICE_FUNC + local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) + : object(ptr==0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar)*xpr.size())) : ptr, xpr.rows(), xpr.cols()), + m_deallocate(ptr==0) + { + if(NumTraits<Scalar>::RequireInitialization && object.data()) + Eigen::internal::construct_elements_of_array(object.data(), object.size()); + object = xpr; + } + + EIGEN_DEVICE_FUNC + ~local_nested_eval_wrapper() + { + if(NumTraits<Scalar>::RequireInitialization && object.data()) + Eigen::internal::destruct_elements_of_array(object.data(), object.size()); + if(m_deallocate) + Eigen::internal::aligned_free(object.data()); + } + +private: + bool m_deallocate; +}; + +#endif // EIGEN_ALLOCA + +template<typename T> class scoped_array : noncopyable +{ + T* m_ptr; +public: + explicit scoped_array(std::ptrdiff_t size) + { + m_ptr = new T[size]; + } + ~scoped_array() + { + delete[] m_ptr; + } + T& operator[](std::ptrdiff_t i) { return m_ptr[i]; } + const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; } + T* &ptr() { return m_ptr; } + const T* ptr() const { return m_ptr; } + operator const T*() const { return m_ptr; } +}; + +template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) +{ + std::swap(a.ptr(),b.ptr()); +} + +} // end namespace internal + +/** \internal + * + * The macro ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) declares, allocates, + * and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack + * if the size in bytes is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform + * (currently, this is Linux, OSX and Visual Studio only). Otherwise the memory is allocated on the heap. + * The allocated buffer is automatically deleted when exiting the scope of this declaration. + * If BUFFER is non null, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs. + * Here is an example: + * \code + * { + * ei_declare_aligned_stack_constructed_variable(float,data,size,0); + * // use data[0] to data[size-1] + * } + * \endcode + * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token. + * + * The macro ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) is analogue to + * \code + * typename internal::nested_eval<XPRT_T,N>::type NAME(XPR); + * \endcode + * with the advantage of using aligned stack allocation even if the maximal size of XPR at compile time is unknown. + * This is accomplished through alloca if this later is supported and if the required number of bytes + * is below EIGEN_STACK_ALLOCATION_LIMIT. + */ +#ifdef EIGEN_ALLOCA + + #if EIGEN_DEFAULT_ALIGN_BYTES>0 + // We always manually re-align the result of EIGEN_ALLOCA. + // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment. + #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((internal::UIntPtr(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + #else + #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE) + #endif + + #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \ + Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \ + TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \ + : reinterpret_cast<TYPE*>( \ + (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \ + : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \ + Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) + + + #define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) \ + Eigen::internal::local_nested_eval_wrapper<XPR_T,N> EIGEN_CAT(NAME,_wrapper)(XPR, reinterpret_cast<typename XPR_T::Scalar*>( \ + ( (Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::NeedExternalBuffer) && ((sizeof(typename XPR_T::Scalar)*XPR.size())<=EIGEN_STACK_ALLOCATION_LIMIT) ) \ + ? EIGEN_ALIGNED_ALLOCA( sizeof(typename XPR_T::Scalar)*XPR.size() ) : 0 ) ) ; \ + typename Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::ObjectType NAME(EIGEN_CAT(NAME,_wrapper).object) + +#else + + #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \ + Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \ + TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \ + Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true) + + +#define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) typename Eigen::internal::nested_eval<XPR_T,N>::type NAME(XPR) + +#endif + + +/***************************************************************************** +*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] *** +*****************************************************************************/ + +#if EIGEN_HAS_CXX17_OVERALIGN + +// C++17 -> no need to bother about alignment anymore :) + +#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) +#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) +#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW +#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) + +#else + +// HIP does not support new/delete on device. +#if EIGEN_MAX_ALIGN_BYTES!=0 && !defined(EIGEN_HIP_DEVICE_COMPILE) + #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ + EIGEN_DEVICE_FUNC \ + void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \ + EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \ + EIGEN_CATCH (...) { return 0; } \ + } + #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \ + EIGEN_DEVICE_FUNC \ + void *operator new(std::size_t size) { \ + return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \ + } \ + EIGEN_DEVICE_FUNC \ + void *operator new[](std::size_t size) { \ + return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \ + } \ + EIGEN_DEVICE_FUNC \ + void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ + EIGEN_DEVICE_FUNC \ + void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ + EIGEN_DEVICE_FUNC \ + void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ + EIGEN_DEVICE_FUNC \ + void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \ + /* in-place new and delete. since (at least afaik) there is no actual */ \ + /* memory allocated we can safely let the default implementation handle */ \ + /* this particular case. */ \ + EIGEN_DEVICE_FUNC \ + static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \ + EIGEN_DEVICE_FUNC \ + static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \ + EIGEN_DEVICE_FUNC \ + void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \ + EIGEN_DEVICE_FUNC \ + void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \ + /* nothrow-new (returns zero instead of std::bad_alloc) */ \ + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ + EIGEN_DEVICE_FUNC \ + void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \ + Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \ + } \ + typedef void eigen_aligned_operator_new_marker_type; +#else + #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) +#endif + +#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true) +#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \ + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool( \ + ((Size)!=Eigen::Dynamic) && \ + (((EIGEN_MAX_ALIGN_BYTES>=16) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES )==0)) || \ + ((EIGEN_MAX_ALIGN_BYTES>=32) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/2)==0)) || \ + ((EIGEN_MAX_ALIGN_BYTES>=64) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/4)==0)) ))) + +#endif + +/****************************************************************************/ + +/** \class aligned_allocator +* \ingroup Core_Module +* +* \brief STL compatible allocator to use with types requiring a non standrad alignment. +* +* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd. +* By default, it will thus provide at least 16 bytes alignment and more in following cases: +* - 32 bytes alignment if AVX is enabled. +* - 64 bytes alignment if AVX512 is enabled. +* +* This can be controlled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented +* \link TopicPreprocessorDirectivesPerformance there \endlink. +* +* Example: +* \code +* // Matrix4f requires 16 bytes alignment: +* std::map< int, Matrix4f, std::less<int>, +* aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4; +* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator: +* std::map< int, Vector3f > my_map_vec3; +* \endcode +* +* \sa \blank \ref TopicStlContainers. +*/ +template<class T> +class aligned_allocator : public std::allocator<T> +{ +public: + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef T value_type; + + template<class U> + struct rebind + { + typedef aligned_allocator<U> other; + }; + + aligned_allocator() : std::allocator<T>() {} + + aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {} + + template<class U> + aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {} + + ~aligned_allocator() {} + + #if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0) + // In gcc std::allocator::max_size() is bugged making gcc triggers a warning: + // eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807 + // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544 + size_type max_size() const { + return (std::numeric_limits<std::ptrdiff_t>::max)()/sizeof(T); + } + #endif + + pointer allocate(size_type num, const void* /*hint*/ = 0) + { + internal::check_size_for_overflow<T>(num); + return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) ); + } + + void deallocate(pointer p, size_type /*num*/) + { + internal::aligned_free(p); + } +}; + +//---------- Cache sizes ---------- + +#if !defined(EIGEN_NO_CPUID) +# if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64 +# if defined(__PIC__) && EIGEN_ARCH_i386 + // Case for x86 with PIC +# define EIGEN_CPUID(abcd,func,id) \ + __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id)); +# elif defined(__PIC__) && EIGEN_ARCH_x86_64 + // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model. + // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway. +# define EIGEN_CPUID(abcd,func,id) \ + __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id)); +# else + // Case for x86_64 or x86 w/o PIC +# define EIGEN_CPUID(abcd,func,id) \ + __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) ); +# endif +# elif EIGEN_COMP_MSVC +# if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64 +# define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id) +# endif +# endif +#endif + +namespace internal { + +#ifdef EIGEN_CPUID + +inline bool cpuid_is_vendor(int abcd[4], const int vendor[3]) +{ + return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2]; +} + +inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3) +{ + int abcd[4]; + l1 = l2 = l3 = 0; + int cache_id = 0; + int cache_type = 0; + do { + abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; + EIGEN_CPUID(abcd,0x4,cache_id); + cache_type = (abcd[0] & 0x0F) >> 0; + if(cache_type==1||cache_type==3) // data or unified cache + { + int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5] + int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22] + int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12] + int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0] + int sets = (abcd[2]); // C[31:0] + + int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1); + + switch(cache_level) + { + case 1: l1 = cache_size; break; + case 2: l2 = cache_size; break; + case 3: l3 = cache_size; break; + default: break; + } + } + cache_id++; + } while(cache_type>0 && cache_id<16); +} + +inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3) +{ + int abcd[4]; + abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; + l1 = l2 = l3 = 0; + EIGEN_CPUID(abcd,0x00000002,0); + unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2; + bool check_for_p2_core2 = false; + for(int i=0; i<14; ++i) + { + switch(bytes[i]) + { + case 0x0A: l1 = 8; break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines + case 0x0C: l1 = 16; break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines + case 0x0E: l1 = 24; break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines + case 0x10: l1 = 16; break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64) + case 0x15: l1 = 16; break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64) + case 0x2C: l1 = 32; break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines + case 0x30: l1 = 32; break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines + case 0x60: l1 = 16; break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored + case 0x66: l1 = 8; break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored + case 0x67: l1 = 16; break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored + case 0x68: l1 = 32; break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored + case 0x1A: l2 = 96; break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64) + case 0x22: l3 = 512; break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored + case 0x23: l3 = 1024; break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored + case 0x25: l3 = 2048; break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored + case 0x29: l3 = 4096; break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored + case 0x39: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored + case 0x3A: l2 = 192; break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored + case 0x3B: l2 = 128; break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored + case 0x3C: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored + case 0x3D: l2 = 384; break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored + case 0x3E: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored + case 0x40: l2 = 0; break; // no integrated L2 cache (P6 core) or L3 cache (P4 core) + case 0x41: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines + case 0x42: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines + case 0x43: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines + case 0x44: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines + case 0x45: l2 = 2048; break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines + case 0x46: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines + case 0x47: l3 = 8192; break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines + case 0x48: l2 = 3072; break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines + case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;// code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2 + case 0x4A: l3 = 6144; break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines + case 0x4B: l3 = 8192; break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines + case 0x4C: l3 = 12288; break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines + case 0x4D: l3 = 16384; break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines + case 0x4E: l2 = 6144; break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines + case 0x78: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines + case 0x79: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored + case 0x7A: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored + case 0x7B: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored + case 0x7C: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored + case 0x7D: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines + case 0x7E: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64) + case 0x7F: l2 = 512; break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines + case 0x80: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines + case 0x81: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines + case 0x82: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines + case 0x83: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines + case 0x84: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines + case 0x85: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines + case 0x86: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines + case 0x87: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines + case 0x88: l3 = 2048; break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64) + case 0x89: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64) + case 0x8A: l3 = 8192; break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64) + case 0x8D: l3 = 3072; break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64) + + default: break; + } + } + if(check_for_p2_core2 && l2 == l3) + l3 = 0; + l1 *= 1024; + l2 *= 1024; + l3 *= 1024; +} + +inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs) +{ + if(max_std_funcs>=4) + queryCacheSizes_intel_direct(l1,l2,l3); + else if(max_std_funcs>=2) + queryCacheSizes_intel_codes(l1,l2,l3); + else + l1 = l2 = l3 = 0; +} + +inline void queryCacheSizes_amd(int& l1, int& l2, int& l3) +{ + int abcd[4]; + abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; + + // First query the max supported function. + EIGEN_CPUID(abcd,0x80000000,0); + if(static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006)) + { + EIGEN_CPUID(abcd,0x80000005,0); + l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB + abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; + EIGEN_CPUID(abcd,0x80000006,0); + l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB + l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB + } + else + { + l1 = l2 = l3 = 0; + } +} +#endif + +/** \internal + * Queries and returns the cache sizes in Bytes of the L1, L2, and L3 data caches respectively */ +inline void queryCacheSizes(int& l1, int& l2, int& l3) +{ + #ifdef EIGEN_CPUID + int abcd[4]; + const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e}; + const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163}; + const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!" + + // identify the CPU vendor + EIGEN_CPUID(abcd,0x0,0); + int max_std_funcs = abcd[0]; + if(cpuid_is_vendor(abcd,GenuineIntel)) + queryCacheSizes_intel(l1,l2,l3,max_std_funcs); + else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_)) + queryCacheSizes_amd(l1,l2,l3); + else + // by default let's use Intel's API + queryCacheSizes_intel(l1,l2,l3,max_std_funcs); + + // here is the list of other vendors: +// ||cpuid_is_vendor(abcd,"VIA VIA VIA ") +// ||cpuid_is_vendor(abcd,"CyrixInstead") +// ||cpuid_is_vendor(abcd,"CentaurHauls") +// ||cpuid_is_vendor(abcd,"GenuineTMx86") +// ||cpuid_is_vendor(abcd,"TransmetaCPU") +// ||cpuid_is_vendor(abcd,"RiseRiseRise") +// ||cpuid_is_vendor(abcd,"Geode by NSC") +// ||cpuid_is_vendor(abcd,"SiS SiS SiS ") +// ||cpuid_is_vendor(abcd,"UMC UMC UMC ") +// ||cpuid_is_vendor(abcd,"NexGenDriven") + #else + l1 = l2 = l3 = -1; + #endif +} + +/** \internal + * \returns the size in Bytes of the L1 data cache */ +inline int queryL1CacheSize() +{ + int l1(-1), l2, l3; + queryCacheSizes(l1,l2,l3); + return l1; +} + +/** \internal + * \returns the size in Bytes of the L2 or L3 cache if this later is present */ +inline int queryTopLevelCacheSize() +{ + int l1, l2(-1), l3(-1); + queryCacheSizes(l1,l2,l3); + return (std::max)(l2,l3); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_MEMORY_H diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h new file mode 100755 index 0000000..81ae2a3 --- /dev/null +++ b/Eigen/src/Core/util/Meta.h @@ -0,0 +1,812 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_META_H +#define EIGEN_META_H + +#if defined(EIGEN_GPU_COMPILE_PHASE) + + #include <cfloat> + + #if defined(EIGEN_CUDA_ARCH) + #include <math_constants.h> + #endif + + #if defined(EIGEN_HIP_DEVICE_COMPILE) + #include "Eigen/src/Core/arch/HIP/hcc/math_constants.h" + #endif + +#endif + +// Recent versions of ICC require <cstdint> for pointer types below. +#define EIGEN_ICC_NEEDS_CSTDINT (EIGEN_COMP_ICC>=1600 && EIGEN_COMP_CXXVER >= 11) + +// Define portable (u)int{32,64} types +#if EIGEN_HAS_CXX11 || EIGEN_ICC_NEEDS_CSTDINT +#include <cstdint> +namespace Eigen { +namespace numext { +typedef std::uint8_t uint8_t; +typedef std::int8_t int8_t; +typedef std::uint16_t uint16_t; +typedef std::int16_t int16_t; +typedef std::uint32_t uint32_t; +typedef std::int32_t int32_t; +typedef std::uint64_t uint64_t; +typedef std::int64_t int64_t; +} +} +#else +// Without c++11, all compilers able to compile Eigen also +// provide the C99 stdint.h header file. +#include <stdint.h> +namespace Eigen { +namespace numext { +typedef ::uint8_t uint8_t; +typedef ::int8_t int8_t; +typedef ::uint16_t uint16_t; +typedef ::int16_t int16_t; +typedef ::uint32_t uint32_t; +typedef ::int32_t int32_t; +typedef ::uint64_t uint64_t; +typedef ::int64_t int64_t; +} +} +#endif + +namespace Eigen { + +typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex; + +/** + * \brief The Index type as used for the API. + * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE. + * \sa \blank \ref TopicPreprocessorDirectives, StorageIndex. + */ + +typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE Index; + +namespace internal { + +/** \internal + * \file Meta.h + * This file contains generic metaprogramming classes which are not specifically related to Eigen. + * \note In case you wonder, yes we're aware that Boost already provides all these features, + * we however don't want to add a dependency to Boost. + */ + +// Only recent versions of ICC complain about using ptrdiff_t to hold pointers, +// and older versions do not provide *intptr_t types. +#if EIGEN_ICC_NEEDS_CSTDINT +typedef std::intptr_t IntPtr; +typedef std::uintptr_t UIntPtr; +#else +typedef std::ptrdiff_t IntPtr; +typedef std::size_t UIntPtr; +#endif +#undef EIGEN_ICC_NEEDS_CSTDINT + +struct true_type { enum { value = 1 }; }; +struct false_type { enum { value = 0 }; }; + +template<bool Condition> +struct bool_constant; + +template<> +struct bool_constant<true> : true_type {}; + +template<> +struct bool_constant<false> : false_type {}; + +template<bool Condition, typename Then, typename Else> +struct conditional { typedef Then type; }; + +template<typename Then, typename Else> +struct conditional <false, Then, Else> { typedef Else type; }; + +template<typename T> struct remove_reference { typedef T type; }; +template<typename T> struct remove_reference<T&> { typedef T type; }; + +template<typename T> struct remove_pointer { typedef T type; }; +template<typename T> struct remove_pointer<T*> { typedef T type; }; +template<typename T> struct remove_pointer<T*const> { typedef T type; }; + +template <class T> struct remove_const { typedef T type; }; +template <class T> struct remove_const<const T> { typedef T type; }; +template <class T> struct remove_const<const T[]> { typedef T type[]; }; +template <class T, unsigned int Size> struct remove_const<const T[Size]> { typedef T type[Size]; }; + +template<typename T> struct remove_all { typedef T type; }; +template<typename T> struct remove_all<const T> { typedef typename remove_all<T>::type type; }; +template<typename T> struct remove_all<T const&> { typedef typename remove_all<T>::type type; }; +template<typename T> struct remove_all<T&> { typedef typename remove_all<T>::type type; }; +template<typename T> struct remove_all<T const*> { typedef typename remove_all<T>::type type; }; +template<typename T> struct remove_all<T*> { typedef typename remove_all<T>::type type; }; + +template<typename T> struct is_arithmetic { enum { value = false }; }; +template<> struct is_arithmetic<float> { enum { value = true }; }; +template<> struct is_arithmetic<double> { enum { value = true }; }; +template<> struct is_arithmetic<long double> { enum { value = true }; }; +template<> struct is_arithmetic<bool> { enum { value = true }; }; +template<> struct is_arithmetic<char> { enum { value = true }; }; +template<> struct is_arithmetic<signed char> { enum { value = true }; }; +template<> struct is_arithmetic<unsigned char> { enum { value = true }; }; +template<> struct is_arithmetic<signed short> { enum { value = true }; }; +template<> struct is_arithmetic<unsigned short>{ enum { value = true }; }; +template<> struct is_arithmetic<signed int> { enum { value = true }; }; +template<> struct is_arithmetic<unsigned int> { enum { value = true }; }; +template<> struct is_arithmetic<signed long> { enum { value = true }; }; +template<> struct is_arithmetic<unsigned long> { enum { value = true }; }; + +template<typename T, typename U> struct is_same { enum { value = 0 }; }; +template<typename T> struct is_same<T,T> { enum { value = 1 }; }; + +template< class T > +struct is_void : is_same<void, typename remove_const<T>::type> {}; + +#if EIGEN_HAS_CXX11 +template<> struct is_arithmetic<signed long long> { enum { value = true }; }; +template<> struct is_arithmetic<unsigned long long> { enum { value = true }; }; +using std::is_integral; +#else +template<typename T> struct is_integral { enum { value = false }; }; +template<> struct is_integral<bool> { enum { value = true }; }; +template<> struct is_integral<char> { enum { value = true }; }; +template<> struct is_integral<signed char> { enum { value = true }; }; +template<> struct is_integral<unsigned char> { enum { value = true }; }; +template<> struct is_integral<signed short> { enum { value = true }; }; +template<> struct is_integral<unsigned short> { enum { value = true }; }; +template<> struct is_integral<signed int> { enum { value = true }; }; +template<> struct is_integral<unsigned int> { enum { value = true }; }; +template<> struct is_integral<signed long> { enum { value = true }; }; +template<> struct is_integral<unsigned long> { enum { value = true }; }; +#if EIGEN_COMP_MSVC +template<> struct is_integral<signed __int64> { enum { value = true }; }; +template<> struct is_integral<unsigned __int64> { enum { value = true }; }; +#endif +#endif + +#if EIGEN_HAS_CXX11 +using std::make_unsigned; +#else +// TODO: Possibly improve this implementation of make_unsigned. +// It is currently used only by +// template<typename Scalar> struct random_default_impl<Scalar, false, true>. +template<typename> struct make_unsigned; +template<> struct make_unsigned<char> { typedef unsigned char type; }; +template<> struct make_unsigned<signed char> { typedef unsigned char type; }; +template<> struct make_unsigned<unsigned char> { typedef unsigned char type; }; +template<> struct make_unsigned<signed short> { typedef unsigned short type; }; +template<> struct make_unsigned<unsigned short> { typedef unsigned short type; }; +template<> struct make_unsigned<signed int> { typedef unsigned int type; }; +template<> struct make_unsigned<unsigned int> { typedef unsigned int type; }; +template<> struct make_unsigned<signed long> { typedef unsigned long type; }; +template<> struct make_unsigned<unsigned long> { typedef unsigned long type; }; +#if EIGEN_COMP_MSVC +template<> struct make_unsigned<signed __int64> { typedef unsigned __int64 type; }; +template<> struct make_unsigned<unsigned __int64> { typedef unsigned __int64 type; }; +#endif + +// Some platforms define int64_t as `long long` even for C++03, where +// `long long` is not guaranteed by the standard. In this case we are missing +// the definition for make_unsigned. If we just define it, we run into issues +// where `long long` doesn't exist in some compilers for C++03. We therefore add +// the specialization for these platforms only. +#if EIGEN_OS_MAC || EIGEN_COMP_MINGW +template<> struct make_unsigned<unsigned long long> { typedef unsigned long long type; }; +template<> struct make_unsigned<long long> { typedef unsigned long long type; }; +#endif +#endif + +template <typename T> struct add_const { typedef const T type; }; +template <typename T> struct add_const<T&> { typedef T& type; }; + +template <typename T> struct is_const { enum { value = 0 }; }; +template <typename T> struct is_const<T const> { enum { value = 1 }; }; + +template<typename T> struct add_const_on_value_type { typedef const T type; }; +template<typename T> struct add_const_on_value_type<T&> { typedef T const& type; }; +template<typename T> struct add_const_on_value_type<T*> { typedef T const* type; }; +template<typename T> struct add_const_on_value_type<T* const> { typedef T const* const type; }; +template<typename T> struct add_const_on_value_type<T const* const> { typedef T const* const type; }; + +#if EIGEN_HAS_CXX11 + +using std::is_convertible; + +#else + +template<typename From, typename To> +struct is_convertible_impl +{ +private: + struct any_conversion + { + template <typename T> any_conversion(const volatile T&); + template <typename T> any_conversion(T&); + }; + struct yes {int a[1];}; + struct no {int a[2];}; + + template<typename T> + static yes test(T, int); + + template<typename T> + static no test(any_conversion, ...); + +public: + static typename internal::remove_reference<From>::type* ms_from; +#ifdef __INTEL_COMPILER + #pragma warning push + #pragma warning ( disable : 2259 ) +#endif + enum { value = sizeof(test<To>(*ms_from, 0))==sizeof(yes) }; +#ifdef __INTEL_COMPILER + #pragma warning pop +#endif +}; + +template<typename From, typename To> +struct is_convertible +{ + enum { value = is_convertible_impl<From,To>::value }; +}; + +template<typename T> +struct is_convertible<T,T&> { enum { value = false }; }; + +template<typename T> +struct is_convertible<const T,const T&> { enum { value = true }; }; + +#endif + +/** \internal Allows to enable/disable an overload + * according to a compile time condition. + */ +template<bool Condition, typename T=void> struct enable_if; + +template<typename T> struct enable_if<true,T> +{ typedef T type; }; + +#if defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11 +#if !defined(__FLT_EPSILON__) +#define __FLT_EPSILON__ FLT_EPSILON +#define __DBL_EPSILON__ DBL_EPSILON +#endif + +namespace device { + +template<typename T> struct numeric_limits +{ + EIGEN_DEVICE_FUNC + static EIGEN_CONSTEXPR T epsilon() { return 0; } + static T (max)() { assert(false && "Highest not supported for this type"); } + static T (min)() { assert(false && "Lowest not supported for this type"); } + static T infinity() { assert(false && "Infinity not supported for this type"); } + static T quiet_NaN() { assert(false && "quiet_NaN not supported for this type"); } +}; +template<> struct numeric_limits<float> +{ + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static float epsilon() { return __FLT_EPSILON__; } + EIGEN_DEVICE_FUNC + static float (max)() { + #if defined(EIGEN_CUDA_ARCH) + return CUDART_MAX_NORMAL_F; + #else + return HIPRT_MAX_NORMAL_F; + #endif + } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static float (min)() { return FLT_MIN; } + EIGEN_DEVICE_FUNC + static float infinity() { + #if defined(EIGEN_CUDA_ARCH) + return CUDART_INF_F; + #else + return HIPRT_INF_F; + #endif + } + EIGEN_DEVICE_FUNC + static float quiet_NaN() { + #if defined(EIGEN_CUDA_ARCH) + return CUDART_NAN_F; + #else + return HIPRT_NAN_F; + #endif + } +}; +template<> struct numeric_limits<double> +{ + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static double epsilon() { return __DBL_EPSILON__; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static double (max)() { return DBL_MAX; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static double (min)() { return DBL_MIN; } + EIGEN_DEVICE_FUNC + static double infinity() { + #if defined(EIGEN_CUDA_ARCH) + return CUDART_INF; + #else + return HIPRT_INF; + #endif + } + EIGEN_DEVICE_FUNC + static double quiet_NaN() { + #if defined(EIGEN_CUDA_ARCH) + return CUDART_NAN; + #else + return HIPRT_NAN; + #endif + } +}; +template<> struct numeric_limits<int> +{ + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static int epsilon() { return 0; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static int (max)() { return INT_MAX; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static int (min)() { return INT_MIN; } +}; +template<> struct numeric_limits<unsigned int> +{ + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static unsigned int epsilon() { return 0; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static unsigned int (max)() { return UINT_MAX; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static unsigned int (min)() { return 0; } +}; +template<> struct numeric_limits<long> +{ + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static long epsilon() { return 0; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static long (max)() { return LONG_MAX; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static long (min)() { return LONG_MIN; } +}; +template<> struct numeric_limits<unsigned long> +{ + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static unsigned long epsilon() { return 0; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static unsigned long (max)() { return ULONG_MAX; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static unsigned long (min)() { return 0; } +}; +template<> struct numeric_limits<long long> +{ + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static long long epsilon() { return 0; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static long long (max)() { return LLONG_MAX; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static long long (min)() { return LLONG_MIN; } +}; +template<> struct numeric_limits<unsigned long long> +{ + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static unsigned long long epsilon() { return 0; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static unsigned long long (max)() { return ULLONG_MAX; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static unsigned long long (min)() { return 0; } +}; +template<> struct numeric_limits<bool> +{ + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static bool epsilon() { return false; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static bool (max)() { return true; } + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR + static bool (min)() { return false; } +}; + +} + +#endif // defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11 + +/** \internal + * A base class do disable default copy ctor and copy assignment operator. + */ +class noncopyable +{ + EIGEN_DEVICE_FUNC noncopyable(const noncopyable&); + EIGEN_DEVICE_FUNC const noncopyable& operator=(const noncopyable&); +protected: + EIGEN_DEVICE_FUNC noncopyable() {} + EIGEN_DEVICE_FUNC ~noncopyable() {} +}; + +/** \internal + * Provides access to the number of elements in the object of as a compile-time constant expression. + * It "returns" Eigen::Dynamic if the size cannot be resolved at compile-time (default). + * + * Similar to std::tuple_size, but more general. + * + * It currently supports: + * - any types T defining T::SizeAtCompileTime + * - plain C arrays as T[N] + * - std::array (c++11) + * - some internal types such as SingleRange and AllRange + * + * The second template parameter eases SFINAE-based specializations. + */ +template<typename T, typename EnableIf = void> struct array_size { + enum { value = Dynamic }; +}; + +template<typename T> struct array_size<T,typename internal::enable_if<((T::SizeAtCompileTime&0)==0)>::type> { + enum { value = T::SizeAtCompileTime }; +}; + +template<typename T, int N> struct array_size<const T (&)[N]> { + enum { value = N }; +}; +template<typename T, int N> struct array_size<T (&)[N]> { + enum { value = N }; +}; + +#if EIGEN_HAS_CXX11 +template<typename T, std::size_t N> struct array_size<const std::array<T,N> > { + enum { value = N }; +}; +template<typename T, std::size_t N> struct array_size<std::array<T,N> > { + enum { value = N }; +}; +#endif + +/** \internal + * Analogue of the std::size free function. + * It returns the size of the container or view \a x of type \c T + * + * It currently supports: + * - any types T defining a member T::size() const + * - plain C arrays as T[N] + * + */ +template<typename T> +EIGEN_CONSTEXPR Index size(const T& x) { return x.size(); } + +template<typename T,std::size_t N> +EIGEN_CONSTEXPR Index size(const T (&) [N]) { return N; } + +/** \internal + * Convenient struct to get the result type of a nullary, unary, binary, or + * ternary functor. + * + * Pre C++11: + * Supports both a Func::result_type member and templated + * Func::result<Func(ArgTypes...)>::type member. + * + * If none of these members is provided, then the type of the first + * argument is returned. + * + * Post C++11: + * This uses std::result_of. However, note the `type` member removes + * const and converts references/pointers to their corresponding value type. + */ +#if EIGEN_HAS_STD_INVOKE_RESULT +template<typename T> struct result_of; + +template<typename F, typename... ArgTypes> +struct result_of<F(ArgTypes...)> { + typedef typename std::invoke_result<F, ArgTypes...>::type type1; + typedef typename remove_all<type1>::type type; +}; +#elif EIGEN_HAS_STD_RESULT_OF +template<typename T> struct result_of { + typedef typename std::result_of<T>::type type1; + typedef typename remove_all<type1>::type type; +}; +#else +template<typename T> struct result_of { }; + +struct has_none {int a[1];}; +struct has_std_result_type {int a[2];}; +struct has_tr1_result {int a[3];}; + +template<typename Func, int SizeOf> +struct nullary_result_of_select {}; + +template<typename Func> +struct nullary_result_of_select<Func, sizeof(has_std_result_type)> {typedef typename Func::result_type type;}; + +template<typename Func> +struct nullary_result_of_select<Func, sizeof(has_tr1_result)> {typedef typename Func::template result<Func()>::type type;}; + +template<typename Func> +struct result_of<Func()> { + template<typename T> + static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); + template<typename T> + static has_tr1_result testFunctor(T const *, typename T::template result<T()>::type const * = 0); + static has_none testFunctor(...); + + // note that the following indirection is needed for gcc-3.3 + enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))}; + typedef typename nullary_result_of_select<Func, FunctorType>::type type; +}; + +template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)> +struct unary_result_of_select {typedef typename internal::remove_all<ArgType>::type type;}; + +template<typename Func, typename ArgType> +struct unary_result_of_select<Func, ArgType, sizeof(has_std_result_type)> {typedef typename Func::result_type type;}; + +template<typename Func, typename ArgType> +struct unary_result_of_select<Func, ArgType, sizeof(has_tr1_result)> {typedef typename Func::template result<Func(ArgType)>::type type;}; + +template<typename Func, typename ArgType> +struct result_of<Func(ArgType)> { + template<typename T> + static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); + template<typename T> + static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType)>::type const * = 0); + static has_none testFunctor(...); + + // note that the following indirection is needed for gcc-3.3 + enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))}; + typedef typename unary_result_of_select<Func, ArgType, FunctorType>::type type; +}; + +template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(has_none)> +struct binary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;}; + +template<typename Func, typename ArgType0, typename ArgType1> +struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_std_result_type)> +{typedef typename Func::result_type type;}; + +template<typename Func, typename ArgType0, typename ArgType1> +struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_tr1_result)> +{typedef typename Func::template result<Func(ArgType0,ArgType1)>::type type;}; + +template<typename Func, typename ArgType0, typename ArgType1> +struct result_of<Func(ArgType0,ArgType1)> { + template<typename T> + static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); + template<typename T> + static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1)>::type const * = 0); + static has_none testFunctor(...); + + // note that the following indirection is needed for gcc-3.3 + enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))}; + typedef typename binary_result_of_select<Func, ArgType0, ArgType1, FunctorType>::type type; +}; + +template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2, int SizeOf=sizeof(has_none)> +struct ternary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;}; + +template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2> +struct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_std_result_type)> +{typedef typename Func::result_type type;}; + +template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2> +struct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_tr1_result)> +{typedef typename Func::template result<Func(ArgType0,ArgType1,ArgType2)>::type type;}; + +template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2> +struct result_of<Func(ArgType0,ArgType1,ArgType2)> { + template<typename T> + static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); + template<typename T> + static has_tr1_result testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1,ArgType2)>::type const * = 0); + static has_none testFunctor(...); + + // note that the following indirection is needed for gcc-3.3 + enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))}; + typedef typename ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, FunctorType>::type type; +}; + +#endif + +#if EIGEN_HAS_STD_INVOKE_RESULT +template<typename F, typename... ArgTypes> +struct invoke_result { + typedef typename std::invoke_result<F, ArgTypes...>::type type1; + typedef typename remove_all<type1>::type type; +}; +#elif EIGEN_HAS_CXX11 +template<typename F, typename... ArgTypes> +struct invoke_result { + typedef typename result_of<F(ArgTypes...)>::type type1; + typedef typename remove_all<type1>::type type; +}; +#else +template<typename F, typename ArgType0 = void, typename ArgType1 = void, typename ArgType2 = void> +struct invoke_result { + typedef typename result_of<F(ArgType0, ArgType1, ArgType2)>::type type1; + typedef typename remove_all<type1>::type type; +}; + +template<typename F> +struct invoke_result<F, void, void, void> { + typedef typename result_of<F()>::type type1; + typedef typename remove_all<type1>::type type; +}; + +template<typename F, typename ArgType0> +struct invoke_result<F, ArgType0, void, void> { + typedef typename result_of<F(ArgType0)>::type type1; + typedef typename remove_all<type1>::type type; +}; + +template<typename F, typename ArgType0, typename ArgType1> +struct invoke_result<F, ArgType0, ArgType1, void> { + typedef typename result_of<F(ArgType0, ArgType1)>::type type1; + typedef typename remove_all<type1>::type type; +}; +#endif + +struct meta_yes { char a[1]; }; +struct meta_no { char a[2]; }; + +// Check whether T::ReturnType does exist +template <typename T> +struct has_ReturnType +{ + template <typename C> static meta_yes testFunctor(C const *, typename C::ReturnType const * = 0); + template <typename C> static meta_no testFunctor(...); + + enum { value = sizeof(testFunctor<T>(static_cast<T*>(0))) == sizeof(meta_yes) }; +}; + +template<typename T> const T* return_ptr(); + +template <typename T, typename IndexType=Index> +struct has_nullary_operator +{ + template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()())>0)>::type * = 0); + static meta_no testFunctor(...); + + enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) }; +}; + +template <typename T, typename IndexType=Index> +struct has_unary_operator +{ + template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0)))>0)>::type * = 0); + static meta_no testFunctor(...); + + enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) }; +}; + +template <typename T, typename IndexType=Index> +struct has_binary_operator +{ + template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr<C>()->operator()(IndexType(0),IndexType(0)))>0)>::type * = 0); + static meta_no testFunctor(...); + + enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) }; +}; + +/** \internal In short, it computes int(sqrt(\a Y)) with \a Y an integer. + * Usage example: \code meta_sqrt<1023>::ret \endcode + */ +template<int Y, + int InfX = 0, + int SupX = ((Y==1) ? 1 : Y/2), + bool Done = ((SupX-InfX)<=1 ? true : ((SupX*SupX <= Y) && ((SupX+1)*(SupX+1) > Y))) > + // use ?: instead of || just to shut up a stupid gcc 4.3 warning +class meta_sqrt +{ + enum { + MidX = (InfX+SupX)/2, + TakeInf = MidX*MidX > Y ? 1 : 0, + NewInf = int(TakeInf) ? InfX : int(MidX), + NewSup = int(TakeInf) ? int(MidX) : SupX + }; + public: + enum { ret = meta_sqrt<Y,NewInf,NewSup>::ret }; +}; + +template<int Y, int InfX, int SupX> +class meta_sqrt<Y, InfX, SupX, true> { public: enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; }; + + +/** \internal Computes the least common multiple of two positive integer A and B + * at compile-time. + */ +template<int A, int B, int K=1, bool Done = ((A*K)%B)==0, bool Big=(A>=B)> +struct meta_least_common_multiple +{ + enum { ret = meta_least_common_multiple<A,B,K+1>::ret }; +}; +template<int A, int B, int K, bool Done> +struct meta_least_common_multiple<A,B,K,Done,false> +{ + enum { ret = meta_least_common_multiple<B,A,K>::ret }; +}; +template<int A, int B, int K> +struct meta_least_common_multiple<A,B,K,true,true> +{ + enum { ret = A*K }; +}; + + +/** \internal determines whether the product of two numeric types is allowed and what the return type is */ +template<typename T, typename U> struct scalar_product_traits +{ + enum { Defined = 0 }; +}; + +// FIXME quick workaround around current limitation of result_of +// template<typename Scalar, typename ArgType0, typename ArgType1> +// struct result_of<scalar_product_op<Scalar>(ArgType0,ArgType1)> { +// typedef typename scalar_product_traits<typename remove_all<ArgType0>::type, typename remove_all<ArgType1>::type>::ReturnType type; +// }; + +/** \internal Obtains a POD type suitable to use as storage for an object of a size + * of at most Len bytes, aligned as specified by \c Align. + */ +template<unsigned Len, unsigned Align> +struct aligned_storage { + struct type { + EIGEN_ALIGN_TO_BOUNDARY(Align) unsigned char data[Len]; + }; +}; + +} // end namespace internal + +namespace numext { + +#if defined(EIGEN_GPU_COMPILE_PHASE) +template<typename T> EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; } +#else +template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); } +#endif + +#if defined(EIGEN_GPU_COMPILE_PHASE) && !EIGEN_HAS_CXX11 +using internal::device::numeric_limits; +#else +using std::numeric_limits; +#endif + +// Integer division with rounding up. +// T is assumed to be an integer type with a>=0, and b>0 +template<typename T> +EIGEN_DEVICE_FUNC +T div_ceil(const T &a, const T &b) +{ + return (a+b-1) / b; +} + +// The aim of the following functions is to bypass -Wfloat-equal warnings +// when we really want a strict equality comparison on floating points. +template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC +bool equal_strict(const X& x,const Y& y) { return x == y; } + +#if !defined(EIGEN_GPU_COMPILE_PHASE) || (!defined(EIGEN_CUDA_ARCH) && defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC)) +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC +bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); } + +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC +bool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); } +#endif + +template<typename X, typename Y> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC +bool not_equal_strict(const X& x,const Y& y) { return x != y; } + +#if !defined(EIGEN_GPU_COMPILE_PHASE) || (!defined(EIGEN_CUDA_ARCH) && defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC)) +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC +bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); } + +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC +bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); } +#endif + +} // end namespace numext + +} // end namespace Eigen + +#endif // EIGEN_META_H diff --git a/Eigen/src/Core/util/NonMPL2.h b/Eigen/src/Core/util/NonMPL2.h new file mode 100644 index 0000000..1af67cf --- /dev/null +++ b/Eigen/src/Core/util/NonMPL2.h @@ -0,0 +1,3 @@ +#ifdef EIGEN_MPL2_ONLY +#error Including non-MPL2 code in EIGEN_MPL2_ONLY mode +#endif diff --git a/Eigen/src/Core/util/ReenableStupidWarnings.h b/Eigen/src/Core/util/ReenableStupidWarnings.h new file mode 100644 index 0000000..1ce6fd1 --- /dev/null +++ b/Eigen/src/Core/util/ReenableStupidWarnings.h @@ -0,0 +1,31 @@ +#ifdef EIGEN_WARNINGS_DISABLED_2 +// "DisableStupidWarnings.h" was included twice recursively: Do not reenable warnings yet! +# undef EIGEN_WARNINGS_DISABLED_2 + +#elif defined(EIGEN_WARNINGS_DISABLED) +#undef EIGEN_WARNINGS_DISABLED + +#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS + #ifdef _MSC_VER + #pragma warning( pop ) + #elif defined __INTEL_COMPILER + #pragma warning pop + #elif defined __clang__ + #pragma clang diagnostic pop + #elif defined __GNUC__ && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) + #pragma GCC diagnostic pop + #endif + + #if defined __NVCC__ +// Don't reenable the diagnostic messages, as it turns out these messages need +// to be disabled at the point of the template instantiation (i.e the user code) +// otherwise they'll be triggered by nvcc. +// #pragma diag_default code_is_unreachable +// #pragma diag_default initialization_not_reachable +// #pragma diag_default 2651 +// #pragma diag_default 2653 + #endif + +#endif + +#endif // EIGEN_WARNINGS_DISABLED diff --git a/Eigen/src/Core/util/ReshapedHelper.h b/Eigen/src/Core/util/ReshapedHelper.h new file mode 100644 index 0000000..4124321 --- /dev/null +++ b/Eigen/src/Core/util/ReshapedHelper.h @@ -0,0 +1,51 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_RESHAPED_HELPER_H +#define EIGEN_RESHAPED_HELPER_H + +namespace Eigen { + +enum AutoSize_t { AutoSize }; +const int AutoOrder = 2; + +namespace internal { + +template<typename SizeType,typename OtherSize, int TotalSize> +struct get_compiletime_reshape_size { + enum { value = get_fixed_value<SizeType>::value }; +}; + +template<typename SizeType> +Index get_runtime_reshape_size(SizeType size, Index /*other*/, Index /*total*/) { + return internal::get_runtime_value(size); +} + +template<typename OtherSize, int TotalSize> +struct get_compiletime_reshape_size<AutoSize_t,OtherSize,TotalSize> { + enum { + other_size = get_fixed_value<OtherSize>::value, + value = (TotalSize==Dynamic || other_size==Dynamic) ? Dynamic : TotalSize / other_size }; +}; + +inline Index get_runtime_reshape_size(AutoSize_t /*size*/, Index other, Index total) { + return total/other; +} + +template<int Flags, int Order> +struct get_compiletime_reshape_order { + enum { value = Order == AutoOrder ? Flags & RowMajorBit : Order }; +}; + +} + +} // end namespace Eigen + +#endif // EIGEN_RESHAPED_HELPER_H diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h new file mode 100644 index 0000000..c45de59 --- /dev/null +++ b/Eigen/src/Core/util/StaticAssert.h @@ -0,0 +1,221 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_STATIC_ASSERT_H +#define EIGEN_STATIC_ASSERT_H + +/* Some notes on Eigen's static assertion mechanism: + * + * - in EIGEN_STATIC_ASSERT(CONDITION,MSG) the parameter CONDITION must be a compile time boolean + * expression, and MSG an enum listed in struct internal::static_assertion<true> + * + * - define EIGEN_NO_STATIC_ASSERT to disable them (and save compilation time) + * in that case, the static assertion is converted to the following runtime assert: + * eigen_assert(CONDITION && "MSG") + * + * - currently EIGEN_STATIC_ASSERT can only be used in function scope + * + */ + +#ifndef EIGEN_STATIC_ASSERT +#ifndef EIGEN_NO_STATIC_ASSERT + + #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (EIGEN_COMP_CXXVER >= 11) || (EIGEN_COMP_MSVC >= 1600)) + + // if native static_assert is enabled, let's use it + #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG); + + #else // not CXX0X + + namespace Eigen { + + namespace internal { + + template<bool condition> + struct static_assertion {}; + + template<> + struct static_assertion<true> + { + enum { + YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX=1, + YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES=1, + YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES=1, + THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE=1, + THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE=1, + THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE=1, + OUT_OF_RANGE_ACCESS=1, + YOU_MADE_A_PROGRAMMING_MISTAKE=1, + EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT=1, + EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE=1, + YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR=1, + YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR=1, + UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC=1, + THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES=1, + FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED=1, + NUMERIC_TYPE_MUST_BE_REAL=1, + COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED=1, + WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED=1, + THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE=1, + INVALID_MATRIX_PRODUCT=1, + INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS=1, + INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION=1, + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY=1, + THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES=1, + THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES=1, + INVALID_MATRIX_TEMPLATE_PARAMETERS=1, + INVALID_MATRIXBASE_TEMPLATE_PARAMETERS=1, + BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER=1, + THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX=1, + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE=1, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES=1, + YOU_ALREADY_SPECIFIED_THIS_STRIDE=1, + INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION=1, + THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD=1, + PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1=1, + THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS=1, + YOU_CANNOT_MIX_ARRAYS_AND_MATRICES=1, + YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION=1, + THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY=1, + YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT=1, + THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS=1, + THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS=1, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL=1, + THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES=1, + YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED=1, + YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED=1, + THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE=1, + THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH=1, + OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG=1, + IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY=1, + STORAGE_LAYOUT_DOES_NOT_MATCH=1, + EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE=1, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS=1, + MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY=1, + THIS_TYPE_IS_NOT_SUPPORTED=1, + STORAGE_KIND_MUST_MATCH=1, + STORAGE_INDEX_MUST_MATCH=1, + CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1, + SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1, + INVALID_TEMPLATE_PARAMETER=1, + GPU_TENSOR_CONTRACTION_DOES_NOT_SUPPORT_OUTPUT_KERNELS=1, + THE_ARRAY_SIZE_SHOULD_EQUAL_WITH_PACKET_SIZE=1 + }; + }; + + } // end namespace internal + + } // end namespace Eigen + + // Specialized implementation for MSVC to avoid "conditional + // expression is constant" warnings. This implementation doesn't + // appear to work under GCC, hence the multiple implementations. + #if EIGEN_COMP_MSVC + + #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \ + {Eigen::internal::static_assertion<bool(CONDITION)>::MSG;} + + #else + // In some cases clang interprets bool(CONDITION) as function declaration + #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \ + if (Eigen::internal::static_assertion<static_cast<bool>(CONDITION)>::MSG) {} + + #endif + + #endif // not CXX0X + +#else // EIGEN_NO_STATIC_ASSERT + + #define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG); + +#endif // EIGEN_NO_STATIC_ASSERT +#endif // EIGEN_STATIC_ASSERT + +// static assertion failing if the type \a TYPE is not a vector type +#define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \ + EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime, \ + YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX) + +// static assertion failing if the type \a TYPE is not fixed-size +#define EIGEN_STATIC_ASSERT_FIXED_SIZE(TYPE) \ + EIGEN_STATIC_ASSERT(TYPE::SizeAtCompileTime!=Eigen::Dynamic, \ + YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR) + +// static assertion failing if the type \a TYPE is not dynamic-size +#define EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(TYPE) \ + EIGEN_STATIC_ASSERT(TYPE::SizeAtCompileTime==Eigen::Dynamic, \ + YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR) + +// static assertion failing if the type \a TYPE is not a vector type of the given size +#define EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(TYPE, SIZE) \ + EIGEN_STATIC_ASSERT(TYPE::IsVectorAtCompileTime && TYPE::SizeAtCompileTime==SIZE, \ + THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE) + +// static assertion failing if the type \a TYPE is not a vector type of the given size +#define EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(TYPE, ROWS, COLS) \ + EIGEN_STATIC_ASSERT(TYPE::RowsAtCompileTime==ROWS && TYPE::ColsAtCompileTime==COLS, \ + THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE) + +// static assertion failing if the two vector expression types are not compatible (same fixed-size or dynamic size) +#define EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(TYPE0,TYPE1) \ + EIGEN_STATIC_ASSERT( \ + (int(TYPE0::SizeAtCompileTime)==Eigen::Dynamic \ + || int(TYPE1::SizeAtCompileTime)==Eigen::Dynamic \ + || int(TYPE0::SizeAtCompileTime)==int(TYPE1::SizeAtCompileTime)),\ + YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES) + +#define EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1) \ + ( \ + (int(Eigen::internal::size_of_xpr_at_compile_time<TYPE0>::ret)==0 && int(Eigen::internal::size_of_xpr_at_compile_time<TYPE1>::ret)==0) \ + || (\ + (int(TYPE0::RowsAtCompileTime)==Eigen::Dynamic \ + || int(TYPE1::RowsAtCompileTime)==Eigen::Dynamic \ + || int(TYPE0::RowsAtCompileTime)==int(TYPE1::RowsAtCompileTime)) \ + && (int(TYPE0::ColsAtCompileTime)==Eigen::Dynamic \ + || int(TYPE1::ColsAtCompileTime)==Eigen::Dynamic \ + || int(TYPE0::ColsAtCompileTime)==int(TYPE1::ColsAtCompileTime))\ + ) \ + ) + +#define EIGEN_STATIC_ASSERT_NON_INTEGER(TYPE) \ + EIGEN_STATIC_ASSERT(!Eigen::NumTraits<TYPE>::IsInteger, THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES) + + +// static assertion failing if it is guaranteed at compile-time that the two matrix expression types have different sizes +#define EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(TYPE0,TYPE1) \ + EIGEN_STATIC_ASSERT( \ + EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1),\ + YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES) + +#define EIGEN_STATIC_ASSERT_SIZE_1x1(TYPE) \ + EIGEN_STATIC_ASSERT((TYPE::RowsAtCompileTime == 1 || TYPE::RowsAtCompileTime == Eigen::Dynamic) && \ + (TYPE::ColsAtCompileTime == 1 || TYPE::ColsAtCompileTime == Eigen::Dynamic), \ + THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS) + +#define EIGEN_STATIC_ASSERT_LVALUE(Derived) \ + EIGEN_STATIC_ASSERT(Eigen::internal::is_lvalue<Derived>::value, \ + THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY) + +#define EIGEN_STATIC_ASSERT_ARRAYXPR(Derived) \ + EIGEN_STATIC_ASSERT((Eigen::internal::is_same<typename Eigen::internal::traits<Derived>::XprKind, ArrayXpr>::value), \ + THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES) + +#define EIGEN_STATIC_ASSERT_SAME_XPR_KIND(Derived1, Derived2) \ + EIGEN_STATIC_ASSERT((Eigen::internal::is_same<typename Eigen::internal::traits<Derived1>::XprKind, \ + typename Eigen::internal::traits<Derived2>::XprKind \ + >::value), \ + YOU_CANNOT_MIX_ARRAYS_AND_MATRICES) + +// Check that a cost value is positive, and that is stay within a reasonable range +// TODO this check could be enabled for internal debugging only +#define EIGEN_INTERNAL_CHECK_COST_VALUE(C) \ + EIGEN_STATIC_ASSERT((C)>=0 && (C)<=HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE); + +#endif // EIGEN_STATIC_ASSERT_H diff --git a/Eigen/src/Core/util/SymbolicIndex.h b/Eigen/src/Core/util/SymbolicIndex.h new file mode 100644 index 0000000..354dd9a --- /dev/null +++ b/Eigen/src/Core/util/SymbolicIndex.h @@ -0,0 +1,293 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SYMBOLIC_INDEX_H +#define EIGEN_SYMBOLIC_INDEX_H + +namespace Eigen { + +/** \namespace Eigen::symbolic + * \ingroup Core_Module + * + * This namespace defines a set of classes and functions to build and evaluate symbolic expressions of scalar type Index. + * Here is a simple example: + * + * \code + * // First step, defines symbols: + * struct x_tag {}; static const symbolic::SymbolExpr<x_tag> x; + * struct y_tag {}; static const symbolic::SymbolExpr<y_tag> y; + * struct z_tag {}; static const symbolic::SymbolExpr<z_tag> z; + * + * // Defines an expression: + * auto expr = (x+3)/y+z; + * + * // And evaluate it: (c++14) + * std::cout << expr.eval(x=6,y=3,z=-13) << "\n"; + * + * // In c++98/11, only one symbol per expression is supported for now: + * auto expr98 = (3-x)/2; + * std::cout << expr98.eval(x=6) << "\n"; + * \endcode + * + * It is currently only used internally to define and manipulate the Eigen::last and Eigen::lastp1 symbols in Eigen::seq and Eigen::seqN. + * + */ +namespace symbolic { + +template<typename Tag> class Symbol; +template<typename Arg0> class NegateExpr; +template<typename Arg1,typename Arg2> class AddExpr; +template<typename Arg1,typename Arg2> class ProductExpr; +template<typename Arg1,typename Arg2> class QuotientExpr; + +// A simple wrapper around an integral value to provide the eval method. +// We could also use a free-function symbolic_eval... +template<typename IndexType=Index> +class ValueExpr { +public: + ValueExpr(IndexType val) : m_value(val) {} + template<typename T> + IndexType eval_impl(const T&) const { return m_value; } +protected: + IndexType m_value; +}; + +// Specialization for compile-time value, +// It is similar to ValueExpr(N) but this version helps the compiler to generate better code. +template<int N> +class ValueExpr<internal::FixedInt<N> > { +public: + ValueExpr() {} + template<typename T> + EIGEN_CONSTEXPR Index eval_impl(const T&) const { return N; } +}; + + +/** \class BaseExpr + * \ingroup Core_Module + * Common base class of any symbolic expressions + */ +template<typename Derived> +class BaseExpr +{ +public: + const Derived& derived() const { return *static_cast<const Derived*>(this); } + + /** Evaluate the expression given the \a values of the symbols. + * + * \param values defines the values of the symbols, it can either be a SymbolValue or a std::tuple of SymbolValue + * as constructed by SymbolExpr::operator= operator. + * + */ + template<typename T> + Index eval(const T& values) const { return derived().eval_impl(values); } + +#if EIGEN_HAS_CXX14 + template<typename... Types> + Index eval(Types&&... values) const { return derived().eval_impl(std::make_tuple(values...)); } +#endif + + NegateExpr<Derived> operator-() const { return NegateExpr<Derived>(derived()); } + + AddExpr<Derived,ValueExpr<> > operator+(Index b) const + { return AddExpr<Derived,ValueExpr<> >(derived(), b); } + AddExpr<Derived,ValueExpr<> > operator-(Index a) const + { return AddExpr<Derived,ValueExpr<> >(derived(), -a); } + ProductExpr<Derived,ValueExpr<> > operator*(Index a) const + { return ProductExpr<Derived,ValueExpr<> >(derived(),a); } + QuotientExpr<Derived,ValueExpr<> > operator/(Index a) const + { return QuotientExpr<Derived,ValueExpr<> >(derived(),a); } + + friend AddExpr<Derived,ValueExpr<> > operator+(Index a, const BaseExpr& b) + { return AddExpr<Derived,ValueExpr<> >(b.derived(), a); } + friend AddExpr<NegateExpr<Derived>,ValueExpr<> > operator-(Index a, const BaseExpr& b) + { return AddExpr<NegateExpr<Derived>,ValueExpr<> >(-b.derived(), a); } + friend ProductExpr<ValueExpr<>,Derived> operator*(Index a, const BaseExpr& b) + { return ProductExpr<ValueExpr<>,Derived>(a,b.derived()); } + friend QuotientExpr<ValueExpr<>,Derived> operator/(Index a, const BaseExpr& b) + { return QuotientExpr<ValueExpr<>,Derived>(a,b.derived()); } + + template<int N> + AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>) const + { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >()); } + template<int N> + AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > > operator-(internal::FixedInt<N>) const + { return AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >()); } + template<int N> + ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator*(internal::FixedInt<N>) const + { return ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); } + template<int N> + QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator/(internal::FixedInt<N>) const + { return QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); } + + template<int N> + friend AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>, const BaseExpr& b) + { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >()); } + template<int N> + friend AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > > operator-(internal::FixedInt<N>, const BaseExpr& b) + { return AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > >(-b.derived(), ValueExpr<internal::FixedInt<N> >()); } + template<int N> + friend ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator*(internal::FixedInt<N>, const BaseExpr& b) + { return ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); } + template<int N> + friend QuotientExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator/(internal::FixedInt<N>, const BaseExpr& b) + { return QuotientExpr<ValueExpr<internal::FixedInt<N> > ,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); } + +#if (!EIGEN_HAS_CXX14) + template<int N> + AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N> (*)()) const + { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >()); } + template<int N> + AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > > operator-(internal::FixedInt<N> (*)()) const + { return AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >()); } + template<int N> + ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator*(internal::FixedInt<N> (*)()) const + { return ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); } + template<int N> + QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator/(internal::FixedInt<N> (*)()) const + { return QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); } + + template<int N> + friend AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N> (*)(), const BaseExpr& b) + { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >()); } + template<int N> + friend AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > > operator-(internal::FixedInt<N> (*)(), const BaseExpr& b) + { return AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > >(-b.derived(), ValueExpr<internal::FixedInt<N> >()); } + template<int N> + friend ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator*(internal::FixedInt<N> (*)(), const BaseExpr& b) + { return ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); } + template<int N> + friend QuotientExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator/(internal::FixedInt<N> (*)(), const BaseExpr& b) + { return QuotientExpr<ValueExpr<internal::FixedInt<N> > ,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); } +#endif + + + template<typename OtherDerived> + AddExpr<Derived,OtherDerived> operator+(const BaseExpr<OtherDerived> &b) const + { return AddExpr<Derived,OtherDerived>(derived(), b.derived()); } + + template<typename OtherDerived> + AddExpr<Derived,NegateExpr<OtherDerived> > operator-(const BaseExpr<OtherDerived> &b) const + { return AddExpr<Derived,NegateExpr<OtherDerived> >(derived(), -b.derived()); } + + template<typename OtherDerived> + ProductExpr<Derived,OtherDerived> operator*(const BaseExpr<OtherDerived> &b) const + { return ProductExpr<Derived,OtherDerived>(derived(), b.derived()); } + + template<typename OtherDerived> + QuotientExpr<Derived,OtherDerived> operator/(const BaseExpr<OtherDerived> &b) const + { return QuotientExpr<Derived,OtherDerived>(derived(), b.derived()); } +}; + +template<typename T> +struct is_symbolic { + // BaseExpr has no conversion ctor, so we only have to check whether T can be statically cast to its base class BaseExpr<T>. + enum { value = internal::is_convertible<T,BaseExpr<T> >::value }; +}; + +/** Represents the actual value of a symbol identified by its tag + * + * It is the return type of SymbolValue::operator=, and most of the time this is only way it is used. + */ +template<typename Tag> +class SymbolValue +{ +public: + /** Default constructor from the value \a val */ + SymbolValue(Index val) : m_value(val) {} + + /** \returns the stored value of the symbol */ + Index value() const { return m_value; } +protected: + Index m_value; +}; + +/** Expression of a symbol uniquely identified by the template parameter type \c tag */ +template<typename tag> +class SymbolExpr : public BaseExpr<SymbolExpr<tag> > +{ +public: + /** Alias to the template parameter \c tag */ + typedef tag Tag; + + SymbolExpr() {} + + /** Associate the value \a val to the given symbol \c *this, uniquely identified by its \c Tag. + * + * The returned object should be passed to ExprBase::eval() to evaluate a given expression with this specified runtime-time value. + */ + SymbolValue<Tag> operator=(Index val) const { + return SymbolValue<Tag>(val); + } + + Index eval_impl(const SymbolValue<Tag> &values) const { return values.value(); } + +#if EIGEN_HAS_CXX14 + // C++14 versions suitable for multiple symbols + template<typename... Types> + Index eval_impl(const std::tuple<Types...>& values) const { return std::get<SymbolValue<Tag> >(values).value(); } +#endif +}; + +template<typename Arg0> +class NegateExpr : public BaseExpr<NegateExpr<Arg0> > +{ +public: + NegateExpr(const Arg0& arg0) : m_arg0(arg0) {} + + template<typename T> + Index eval_impl(const T& values) const { return -m_arg0.eval_impl(values); } +protected: + Arg0 m_arg0; +}; + +template<typename Arg0, typename Arg1> +class AddExpr : public BaseExpr<AddExpr<Arg0,Arg1> > +{ +public: + AddExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} + + template<typename T> + Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) + m_arg1.eval_impl(values); } +protected: + Arg0 m_arg0; + Arg1 m_arg1; +}; + +template<typename Arg0, typename Arg1> +class ProductExpr : public BaseExpr<ProductExpr<Arg0,Arg1> > +{ +public: + ProductExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} + + template<typename T> + Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) * m_arg1.eval_impl(values); } +protected: + Arg0 m_arg0; + Arg1 m_arg1; +}; + +template<typename Arg0, typename Arg1> +class QuotientExpr : public BaseExpr<QuotientExpr<Arg0,Arg1> > +{ +public: + QuotientExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} + + template<typename T> + Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) / m_arg1.eval_impl(values); } +protected: + Arg0 m_arg0; + Arg1 m_arg1; +}; + +} // end namespace symbolic + +} // end namespace Eigen + +#endif // EIGEN_SYMBOLIC_INDEX_H diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h new file mode 100644 index 0000000..71c32b8 --- /dev/null +++ b/Eigen/src/Core/util/XprHelper.h @@ -0,0 +1,856 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_XPRHELPER_H +#define EIGEN_XPRHELPER_H + +// just a workaround because GCC seems to not really like empty structs +// FIXME: gcc 4.3 generates bad code when strict-aliasing is enabled +// so currently we simply disable this optimization for gcc 4.3 +#if EIGEN_COMP_GNUC && !EIGEN_GNUC_AT(4,3) + #define EIGEN_EMPTY_STRUCT_CTOR(X) \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X() {} \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X(const X& ) {} +#else + #define EIGEN_EMPTY_STRUCT_CTOR(X) +#endif + +namespace Eigen { + +namespace internal { + +template<typename IndexDest, typename IndexSrc> +EIGEN_DEVICE_FUNC +inline IndexDest convert_index(const IndexSrc& idx) { + // for sizeof(IndexDest)>=sizeof(IndexSrc) compilers should be able to optimize this away: + eigen_internal_assert(idx <= NumTraits<IndexDest>::highest() && "Index value to big for target type"); + return IndexDest(idx); +} + +// true if T can be considered as an integral index (i.e., and integral type or enum) +template<typename T> struct is_valid_index_type +{ + enum { value = +#if EIGEN_HAS_TYPE_TRAITS + internal::is_integral<T>::value || std::is_enum<T>::value +#elif EIGEN_COMP_MSVC + internal::is_integral<T>::value || __is_enum(T) +#else + // without C++11, we use is_convertible to Index instead of is_integral in order to treat enums as Index. + internal::is_convertible<T,Index>::value && !internal::is_same<T,float>::value && !is_same<T,double>::value +#endif + }; +}; + +// true if both types are not valid index types +template<typename RowIndices, typename ColIndices> +struct valid_indexed_view_overload { + enum { value = !(internal::is_valid_index_type<RowIndices>::value && internal::is_valid_index_type<ColIndices>::value) }; +}; + +// promote_scalar_arg is an helper used in operation between an expression and a scalar, like: +// expression * scalar +// Its role is to determine how the type T of the scalar operand should be promoted given the scalar type ExprScalar of the given expression. +// The IsSupported template parameter must be provided by the caller as: internal::has_ReturnType<ScalarBinaryOpTraits<ExprScalar,T,op> >::value using the proper order for ExprScalar and T. +// Then the logic is as follows: +// - if the operation is natively supported as defined by IsSupported, then the scalar type is not promoted, and T is returned. +// - otherwise, NumTraits<ExprScalar>::Literal is returned if T is implicitly convertible to NumTraits<ExprScalar>::Literal AND that this does not imply a float to integer conversion. +// - otherwise, ExprScalar is returned if T is implicitly convertible to ExprScalar AND that this does not imply a float to integer conversion. +// - In all other cases, the promoted type is not defined, and the respective operation is thus invalid and not available (SFINAE). +template<typename ExprScalar,typename T, bool IsSupported> +struct promote_scalar_arg; + +template<typename S,typename T> +struct promote_scalar_arg<S,T,true> +{ + typedef T type; +}; + +// Recursively check safe conversion to PromotedType, and then ExprScalar if they are different. +template<typename ExprScalar,typename T,typename PromotedType, + bool ConvertibleToLiteral = internal::is_convertible<T,PromotedType>::value, + bool IsSafe = NumTraits<T>::IsInteger || !NumTraits<PromotedType>::IsInteger> +struct promote_scalar_arg_unsupported; + +// Start recursion with NumTraits<ExprScalar>::Literal +template<typename S,typename T> +struct promote_scalar_arg<S,T,false> : promote_scalar_arg_unsupported<S,T,typename NumTraits<S>::Literal> {}; + +// We found a match! +template<typename S,typename T, typename PromotedType> +struct promote_scalar_arg_unsupported<S,T,PromotedType,true,true> +{ + typedef PromotedType type; +}; + +// No match, but no real-to-integer issues, and ExprScalar and current PromotedType are different, +// so let's try to promote to ExprScalar +template<typename ExprScalar,typename T, typename PromotedType> +struct promote_scalar_arg_unsupported<ExprScalar,T,PromotedType,false,true> + : promote_scalar_arg_unsupported<ExprScalar,T,ExprScalar> +{}; + +// Unsafe real-to-integer, let's stop. +template<typename S,typename T, typename PromotedType, bool ConvertibleToLiteral> +struct promote_scalar_arg_unsupported<S,T,PromotedType,ConvertibleToLiteral,false> {}; + +// T is not even convertible to ExprScalar, let's stop. +template<typename S,typename T> +struct promote_scalar_arg_unsupported<S,T,S,false,true> {}; + +//classes inheriting no_assignment_operator don't generate a default operator=. +class no_assignment_operator +{ + private: + no_assignment_operator& operator=(const no_assignment_operator&); + protected: + EIGEN_DEFAULT_COPY_CONSTRUCTOR(no_assignment_operator) + EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(no_assignment_operator) +}; + +/** \internal return the index type with the largest number of bits */ +template<typename I1, typename I2> +struct promote_index_type +{ + typedef typename conditional<(sizeof(I1)<sizeof(I2)), I2, I1>::type type; +}; + +/** \internal If the template parameter Value is Dynamic, this class is just a wrapper around a T variable that + * can be accessed using value() and setValue(). + * Otherwise, this class is an empty structure and value() just returns the template parameter Value. + */ +template<typename T, int Value> class variable_if_dynamic +{ + public: + EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(variable_if_dynamic) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR + T value() { return T(Value); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR + operator T() const { return T(Value); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void setValue(T v) const { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); } +}; + +template<typename T> class variable_if_dynamic<T, Dynamic> +{ + T m_value; + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value = 0) EIGEN_NO_THROW : m_value(value) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T value() const { return m_value; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return m_value; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; } +}; + +/** \internal like variable_if_dynamic but for DynamicIndex + */ +template<typename T, int Value> class variable_if_dynamicindex +{ + public: + EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamicindex) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR + T value() { return T(Value); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void setValue(T) {} +}; + +template<typename T> class variable_if_dynamicindex<T, DynamicIndex> +{ + T m_value; + EIGEN_DEVICE_FUNC variable_if_dynamicindex() { eigen_assert(false); } + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T value) : m_value(value) {} + EIGEN_DEVICE_FUNC T EIGEN_STRONG_INLINE value() const { return m_value; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; } +}; + +template<typename T> struct functor_traits +{ + enum + { + Cost = 10, + PacketAccess = false, + IsRepeatable = false + }; +}; + +template<typename T> struct packet_traits; + +template<typename T> struct unpacket_traits; + +template<int Size, typename PacketType, + bool Stop = Size==Dynamic || (Size%unpacket_traits<PacketType>::size)==0 || is_same<PacketType,typename unpacket_traits<PacketType>::half>::value> +struct find_best_packet_helper; + +template< int Size, typename PacketType> +struct find_best_packet_helper<Size,PacketType,true> +{ + typedef PacketType type; +}; + +template<int Size, typename PacketType> +struct find_best_packet_helper<Size,PacketType,false> +{ + typedef typename find_best_packet_helper<Size,typename unpacket_traits<PacketType>::half>::type type; +}; + +template<typename T, int Size> +struct find_best_packet +{ + typedef typename find_best_packet_helper<Size,typename packet_traits<T>::type>::type type; +}; + +#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 +template<int ArrayBytes, int AlignmentBytes, + bool Match = bool((ArrayBytes%AlignmentBytes)==0), + bool TryHalf = bool(EIGEN_MIN_ALIGN_BYTES<AlignmentBytes) > +struct compute_default_alignment_helper +{ + enum { value = 0 }; +}; + +template<int ArrayBytes, int AlignmentBytes, bool TryHalf> +struct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, true, TryHalf> // Match +{ + enum { value = AlignmentBytes }; +}; + +template<int ArrayBytes, int AlignmentBytes> +struct compute_default_alignment_helper<ArrayBytes, AlignmentBytes, false, true> // Try-half +{ + // current packet too large, try with an half-packet + enum { value = compute_default_alignment_helper<ArrayBytes, AlignmentBytes/2>::value }; +}; +#else +// If static alignment is disabled, no need to bother. +// This also avoids a division by zero in "bool Match = bool((ArrayBytes%AlignmentBytes)==0)" +template<int ArrayBytes, int AlignmentBytes> +struct compute_default_alignment_helper +{ + enum { value = 0 }; +}; +#endif + +template<typename T, int Size> struct compute_default_alignment { + enum { value = compute_default_alignment_helper<Size*sizeof(T),EIGEN_MAX_STATIC_ALIGN_BYTES>::value }; +}; + +template<typename T> struct compute_default_alignment<T,Dynamic> { + enum { value = EIGEN_MAX_ALIGN_BYTES }; +}; + +template<typename _Scalar, int _Rows, int _Cols, + int _Options = AutoAlign | + ( (_Rows==1 && _Cols!=1) ? RowMajor + : (_Cols==1 && _Rows!=1) ? ColMajor + : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ), + int _MaxRows = _Rows, + int _MaxCols = _Cols +> class make_proper_matrix_type +{ + enum { + IsColVector = _Cols==1 && _Rows!=1, + IsRowVector = _Rows==1 && _Cols!=1, + Options = IsColVector ? (_Options | ColMajor) & ~RowMajor + : IsRowVector ? (_Options | RowMajor) & ~ColMajor + : _Options + }; + public: + typedef Matrix<_Scalar, _Rows, _Cols, Options, _MaxRows, _MaxCols> type; +}; + +template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> +class compute_matrix_flags +{ + enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0 }; + public: + // FIXME currently we still have to handle DirectAccessBit at the expression level to handle DenseCoeffsBase<> + // and then propagate this information to the evaluator's flags. + // However, I (Gael) think that DirectAccessBit should only matter at the evaluation stage. + enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit }; +}; + +template<int _Rows, int _Cols> struct size_at_compile_time +{ + enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols }; +}; + +template<typename XprType> struct size_of_xpr_at_compile_time +{ + enum { ret = size_at_compile_time<traits<XprType>::RowsAtCompileTime,traits<XprType>::ColsAtCompileTime>::ret }; +}; + +/* plain_matrix_type : the difference from eval is that plain_matrix_type is always a plain matrix type, + * whereas eval is a const reference in the case of a matrix + */ + +template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct plain_matrix_type; +template<typename T, typename BaseClassType, int Flags> struct plain_matrix_type_dense; +template<typename T> struct plain_matrix_type<T,Dense> +{ + typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind, traits<T>::Flags>::type type; +}; +template<typename T> struct plain_matrix_type<T,DiagonalShape> +{ + typedef typename T::PlainObject type; +}; + +template<typename T, int Flags> struct plain_matrix_type_dense<T,MatrixXpr,Flags> +{ + typedef Matrix<typename traits<T>::Scalar, + traits<T>::RowsAtCompileTime, + traits<T>::ColsAtCompileTime, + AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor), + traits<T>::MaxRowsAtCompileTime, + traits<T>::MaxColsAtCompileTime + > type; +}; + +template<typename T, int Flags> struct plain_matrix_type_dense<T,ArrayXpr,Flags> +{ + typedef Array<typename traits<T>::Scalar, + traits<T>::RowsAtCompileTime, + traits<T>::ColsAtCompileTime, + AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor), + traits<T>::MaxRowsAtCompileTime, + traits<T>::MaxColsAtCompileTime + > type; +}; + +/* eval : the return type of eval(). For matrices, this is just a const reference + * in order to avoid a useless copy + */ + +template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct eval; + +template<typename T> struct eval<T,Dense> +{ + typedef typename plain_matrix_type<T>::type type; +// typedef typename T::PlainObject type; +// typedef T::Matrix<typename traits<T>::Scalar, +// traits<T>::RowsAtCompileTime, +// traits<T>::ColsAtCompileTime, +// AutoAlign | (traits<T>::Flags&RowMajorBit ? RowMajor : ColMajor), +// traits<T>::MaxRowsAtCompileTime, +// traits<T>::MaxColsAtCompileTime +// > type; +}; + +template<typename T> struct eval<T,DiagonalShape> +{ + typedef typename plain_matrix_type<T>::type type; +}; + +// for matrices, no need to evaluate, just use a const reference to avoid a useless copy +template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> +struct eval<Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense> +{ + typedef const Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>& type; +}; + +template<typename _Scalar, int _Rows, int _Cols, int _Options, int _MaxRows, int _MaxCols> +struct eval<Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>, Dense> +{ + typedef const Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>& type; +}; + + +/* similar to plain_matrix_type, but using the evaluator's Flags */ +template<typename T, typename StorageKind = typename traits<T>::StorageKind> struct plain_object_eval; + +template<typename T> +struct plain_object_eval<T,Dense> +{ + typedef typename plain_matrix_type_dense<T,typename traits<T>::XprKind, evaluator<T>::Flags>::type type; +}; + + +/* plain_matrix_type_column_major : same as plain_matrix_type but guaranteed to be column-major + */ +template<typename T> struct plain_matrix_type_column_major +{ + enum { Rows = traits<T>::RowsAtCompileTime, + Cols = traits<T>::ColsAtCompileTime, + MaxRows = traits<T>::MaxRowsAtCompileTime, + MaxCols = traits<T>::MaxColsAtCompileTime + }; + typedef Matrix<typename traits<T>::Scalar, + Rows, + Cols, + (MaxRows==1&&MaxCols!=1) ? RowMajor : ColMajor, + MaxRows, + MaxCols + > type; +}; + +/* plain_matrix_type_row_major : same as plain_matrix_type but guaranteed to be row-major + */ +template<typename T> struct plain_matrix_type_row_major +{ + enum { Rows = traits<T>::RowsAtCompileTime, + Cols = traits<T>::ColsAtCompileTime, + MaxRows = traits<T>::MaxRowsAtCompileTime, + MaxCols = traits<T>::MaxColsAtCompileTime + }; + typedef Matrix<typename traits<T>::Scalar, + Rows, + Cols, + (MaxCols==1&&MaxRows!=1) ? ColMajor : RowMajor, + MaxRows, + MaxCols + > type; +}; + +/** \internal The reference selector for template expressions. The idea is that we don't + * need to use references for expressions since they are light weight proxy + * objects which should generate no copying overhead. */ +template <typename T> +struct ref_selector +{ + typedef typename conditional< + bool(traits<T>::Flags & NestByRefBit), + T const&, + const T + >::type type; + + typedef typename conditional< + bool(traits<T>::Flags & NestByRefBit), + T &, + T + >::type non_const_type; +}; + +/** \internal Adds the const qualifier on the value-type of T2 if and only if T1 is a const type */ +template<typename T1, typename T2> +struct transfer_constness +{ + typedef typename conditional< + bool(internal::is_const<T1>::value), + typename internal::add_const_on_value_type<T2>::type, + T2 + >::type type; +}; + + +// However, we still need a mechanism to detect whether an expression which is evaluated multiple time +// has to be evaluated into a temporary. +// That's the purpose of this new nested_eval helper: +/** \internal Determines how a given expression should be nested when evaluated multiple times. + * For example, when you do a * (b+c), Eigen will determine how the expression b+c should be + * evaluated into the bigger product expression. The choice is between nesting the expression b+c as-is, or + * evaluating that expression b+c into a temporary variable d, and nest d so that the resulting expression is + * a*d. Evaluating can be beneficial for example if every coefficient access in the resulting expression causes + * many coefficient accesses in the nested expressions -- as is the case with matrix product for example. + * + * \tparam T the type of the expression being nested. + * \tparam n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression. + * \tparam PlainObject the type of the temporary if needed. + */ +template<typename T, int n, typename PlainObject = typename plain_object_eval<T>::type> struct nested_eval +{ + enum { + ScalarReadCost = NumTraits<typename traits<T>::Scalar>::ReadCost, + CoeffReadCost = evaluator<T>::CoeffReadCost, // NOTE What if an evaluator evaluate itself into a temporary? + // Then CoeffReadCost will be small (e.g., 1) but we still have to evaluate, especially if n>1. + // This situation is already taken care by the EvalBeforeNestingBit flag, which is turned ON + // for all evaluator creating a temporary. This flag is then propagated by the parent evaluators. + // Another solution could be to count the number of temps? + NAsInteger = n == Dynamic ? HugeCost : n, + CostEval = (NAsInteger+1) * ScalarReadCost + CoeffReadCost, + CostNoEval = NAsInteger * CoeffReadCost, + Evaluate = (int(evaluator<T>::Flags) & EvalBeforeNestingBit) || (int(CostEval) < int(CostNoEval)) + }; + + typedef typename conditional<Evaluate, PlainObject, typename ref_selector<T>::type>::type type; +}; + +template<typename T> +EIGEN_DEVICE_FUNC +inline T* const_cast_ptr(const T* ptr) +{ + return const_cast<T*>(ptr); +} + +template<typename Derived, typename XprKind = typename traits<Derived>::XprKind> +struct dense_xpr_base +{ + /* dense_xpr_base should only ever be used on dense expressions, thus falling either into the MatrixXpr or into the ArrayXpr cases */ +}; + +template<typename Derived> +struct dense_xpr_base<Derived, MatrixXpr> +{ + typedef MatrixBase<Derived> type; +}; + +template<typename Derived> +struct dense_xpr_base<Derived, ArrayXpr> +{ + typedef ArrayBase<Derived> type; +}; + +template<typename Derived, typename XprKind = typename traits<Derived>::XprKind, typename StorageKind = typename traits<Derived>::StorageKind> +struct generic_xpr_base; + +template<typename Derived, typename XprKind> +struct generic_xpr_base<Derived, XprKind, Dense> +{ + typedef typename dense_xpr_base<Derived,XprKind>::type type; +}; + +template<typename XprType, typename CastType> struct cast_return_type +{ + typedef typename XprType::Scalar CurrentScalarType; + typedef typename remove_all<CastType>::type _CastType; + typedef typename _CastType::Scalar NewScalarType; + typedef typename conditional<is_same<CurrentScalarType,NewScalarType>::value, + const XprType&,CastType>::type type; +}; + +template <typename A, typename B> struct promote_storage_type; + +template <typename A> struct promote_storage_type<A,A> +{ + typedef A ret; +}; +template <typename A> struct promote_storage_type<A, const A> +{ + typedef A ret; +}; +template <typename A> struct promote_storage_type<const A, A> +{ + typedef A ret; +}; + +/** \internal Specify the "storage kind" of applying a coefficient-wise + * binary operations between two expressions of kinds A and B respectively. + * The template parameter Functor permits to specialize the resulting storage kind wrt to + * the functor. + * The default rules are as follows: + * \code + * A op A -> A + * A op dense -> dense + * dense op B -> dense + * sparse op dense -> sparse + * dense op sparse -> sparse + * \endcode + */ +template <typename A, typename B, typename Functor> struct cwise_promote_storage_type; + +template <typename A, typename Functor> struct cwise_promote_storage_type<A,A,Functor> { typedef A ret; }; +template <typename Functor> struct cwise_promote_storage_type<Dense,Dense,Functor> { typedef Dense ret; }; +template <typename A, typename Functor> struct cwise_promote_storage_type<A,Dense,Functor> { typedef Dense ret; }; +template <typename B, typename Functor> struct cwise_promote_storage_type<Dense,B,Functor> { typedef Dense ret; }; +template <typename Functor> struct cwise_promote_storage_type<Sparse,Dense,Functor> { typedef Sparse ret; }; +template <typename Functor> struct cwise_promote_storage_type<Dense,Sparse,Functor> { typedef Sparse ret; }; + +template <typename LhsKind, typename RhsKind, int LhsOrder, int RhsOrder> struct cwise_promote_storage_order { + enum { value = LhsOrder }; +}; + +template <typename LhsKind, int LhsOrder, int RhsOrder> struct cwise_promote_storage_order<LhsKind,Sparse,LhsOrder,RhsOrder> { enum { value = RhsOrder }; }; +template <typename RhsKind, int LhsOrder, int RhsOrder> struct cwise_promote_storage_order<Sparse,RhsKind,LhsOrder,RhsOrder> { enum { value = LhsOrder }; }; +template <int Order> struct cwise_promote_storage_order<Sparse,Sparse,Order,Order> { enum { value = Order }; }; + + +/** \internal Specify the "storage kind" of multiplying an expression of kind A with kind B. + * The template parameter ProductTag permits to specialize the resulting storage kind wrt to + * some compile-time properties of the product: GemmProduct, GemvProduct, OuterProduct, InnerProduct. + * The default rules are as follows: + * \code + * K * K -> K + * dense * K -> dense + * K * dense -> dense + * diag * K -> K + * K * diag -> K + * Perm * K -> K + * K * Perm -> K + * \endcode + */ +template <typename A, typename B, int ProductTag> struct product_promote_storage_type; + +template <typename A, int ProductTag> struct product_promote_storage_type<A, A, ProductTag> { typedef A ret;}; +template <int ProductTag> struct product_promote_storage_type<Dense, Dense, ProductTag> { typedef Dense ret;}; +template <typename A, int ProductTag> struct product_promote_storage_type<A, Dense, ProductTag> { typedef Dense ret; }; +template <typename B, int ProductTag> struct product_promote_storage_type<Dense, B, ProductTag> { typedef Dense ret; }; + +template <typename A, int ProductTag> struct product_promote_storage_type<A, DiagonalShape, ProductTag> { typedef A ret; }; +template <typename B, int ProductTag> struct product_promote_storage_type<DiagonalShape, B, ProductTag> { typedef B ret; }; +template <int ProductTag> struct product_promote_storage_type<Dense, DiagonalShape, ProductTag> { typedef Dense ret; }; +template <int ProductTag> struct product_promote_storage_type<DiagonalShape, Dense, ProductTag> { typedef Dense ret; }; + +template <typename A, int ProductTag> struct product_promote_storage_type<A, PermutationStorage, ProductTag> { typedef A ret; }; +template <typename B, int ProductTag> struct product_promote_storage_type<PermutationStorage, B, ProductTag> { typedef B ret; }; +template <int ProductTag> struct product_promote_storage_type<Dense, PermutationStorage, ProductTag> { typedef Dense ret; }; +template <int ProductTag> struct product_promote_storage_type<PermutationStorage, Dense, ProductTag> { typedef Dense ret; }; + +/** \internal gives the plain matrix or array type to store a row/column/diagonal of a matrix type. + * \tparam Scalar optional parameter allowing to pass a different scalar type than the one of the MatrixType. + */ +template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar> +struct plain_row_type +{ + typedef Matrix<Scalar, 1, ExpressionType::ColsAtCompileTime, + int(ExpressionType::PlainObject::Options) | int(RowMajor), 1, ExpressionType::MaxColsAtCompileTime> MatrixRowType; + typedef Array<Scalar, 1, ExpressionType::ColsAtCompileTime, + int(ExpressionType::PlainObject::Options) | int(RowMajor), 1, ExpressionType::MaxColsAtCompileTime> ArrayRowType; + + typedef typename conditional< + is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value, + MatrixRowType, + ArrayRowType + >::type type; +}; + +template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar> +struct plain_col_type +{ + typedef Matrix<Scalar, ExpressionType::RowsAtCompileTime, 1, + ExpressionType::PlainObject::Options & ~RowMajor, ExpressionType::MaxRowsAtCompileTime, 1> MatrixColType; + typedef Array<Scalar, ExpressionType::RowsAtCompileTime, 1, + ExpressionType::PlainObject::Options & ~RowMajor, ExpressionType::MaxRowsAtCompileTime, 1> ArrayColType; + + typedef typename conditional< + is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value, + MatrixColType, + ArrayColType + >::type type; +}; + +template<typename ExpressionType, typename Scalar = typename ExpressionType::Scalar> +struct plain_diag_type +{ + enum { diag_size = EIGEN_SIZE_MIN_PREFER_DYNAMIC(ExpressionType::RowsAtCompileTime, ExpressionType::ColsAtCompileTime), + max_diag_size = EIGEN_SIZE_MIN_PREFER_FIXED(ExpressionType::MaxRowsAtCompileTime, ExpressionType::MaxColsAtCompileTime) + }; + typedef Matrix<Scalar, diag_size, 1, ExpressionType::PlainObject::Options & ~RowMajor, max_diag_size, 1> MatrixDiagType; + typedef Array<Scalar, diag_size, 1, ExpressionType::PlainObject::Options & ~RowMajor, max_diag_size, 1> ArrayDiagType; + + typedef typename conditional< + is_same< typename traits<ExpressionType>::XprKind, MatrixXpr >::value, + MatrixDiagType, + ArrayDiagType + >::type type; +}; + +template<typename Expr,typename Scalar = typename Expr::Scalar> +struct plain_constant_type +{ + enum { Options = (traits<Expr>::Flags&RowMajorBit)?RowMajor:0 }; + + typedef Array<Scalar, traits<Expr>::RowsAtCompileTime, traits<Expr>::ColsAtCompileTime, + Options, traits<Expr>::MaxRowsAtCompileTime,traits<Expr>::MaxColsAtCompileTime> array_type; + + typedef Matrix<Scalar, traits<Expr>::RowsAtCompileTime, traits<Expr>::ColsAtCompileTime, + Options, traits<Expr>::MaxRowsAtCompileTime,traits<Expr>::MaxColsAtCompileTime> matrix_type; + + typedef CwiseNullaryOp<scalar_constant_op<Scalar>, const typename conditional<is_same< typename traits<Expr>::XprKind, MatrixXpr >::value, matrix_type, array_type>::type > type; +}; + +template<typename ExpressionType> +struct is_lvalue +{ + enum { value = (!bool(is_const<ExpressionType>::value)) && + bool(traits<ExpressionType>::Flags & LvalueBit) }; +}; + +template<typename T> struct is_diagonal +{ enum { ret = false }; }; + +template<typename T> struct is_diagonal<DiagonalBase<T> > +{ enum { ret = true }; }; + +template<typename T> struct is_diagonal<DiagonalWrapper<T> > +{ enum { ret = true }; }; + +template<typename T, int S> struct is_diagonal<DiagonalMatrix<T,S> > +{ enum { ret = true }; }; + + +template<typename T> struct is_identity +{ enum { value = false }; }; + +template<typename T> struct is_identity<CwiseNullaryOp<internal::scalar_identity_op<typename T::Scalar>, T> > +{ enum { value = true }; }; + + +template<typename S1, typename S2> struct glue_shapes; +template<> struct glue_shapes<DenseShape,TriangularShape> { typedef TriangularShape type; }; + +template<typename T1, typename T2> +struct possibly_same_dense { + enum { value = has_direct_access<T1>::ret && has_direct_access<T2>::ret && is_same<typename T1::Scalar,typename T2::Scalar>::value }; +}; + +template<typename T1, typename T2> +EIGEN_DEVICE_FUNC +bool is_same_dense(const T1 &mat1, const T2 &mat2, typename enable_if<possibly_same_dense<T1,T2>::value>::type * = 0) +{ + return (mat1.data()==mat2.data()) && (mat1.innerStride()==mat2.innerStride()) && (mat1.outerStride()==mat2.outerStride()); +} + +template<typename T1, typename T2> +EIGEN_DEVICE_FUNC +bool is_same_dense(const T1 &, const T2 &, typename enable_if<!possibly_same_dense<T1,T2>::value>::type * = 0) +{ + return false; +} + +// Internal helper defining the cost of a scalar division for the type T. +// The default heuristic can be specialized for each scalar type and architecture. +template<typename T,bool Vectorized=false,typename EnableIf = void> +struct scalar_div_cost { + enum { value = 8*NumTraits<T>::MulCost }; +}; + +template<typename T,bool Vectorized> +struct scalar_div_cost<std::complex<T>, Vectorized> { + enum { value = 2*scalar_div_cost<T>::value + + 6*NumTraits<T>::MulCost + + 3*NumTraits<T>::AddCost + }; +}; + + +template<bool Vectorized> +struct scalar_div_cost<signed long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 24 }; }; +template<bool Vectorized> +struct scalar_div_cost<unsigned long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 21 }; }; + + +#ifdef EIGEN_DEBUG_ASSIGN +std::string demangle_traversal(int t) +{ + if(t==DefaultTraversal) return "DefaultTraversal"; + if(t==LinearTraversal) return "LinearTraversal"; + if(t==InnerVectorizedTraversal) return "InnerVectorizedTraversal"; + if(t==LinearVectorizedTraversal) return "LinearVectorizedTraversal"; + if(t==SliceVectorizedTraversal) return "SliceVectorizedTraversal"; + return "?"; +} +std::string demangle_unrolling(int t) +{ + if(t==NoUnrolling) return "NoUnrolling"; + if(t==InnerUnrolling) return "InnerUnrolling"; + if(t==CompleteUnrolling) return "CompleteUnrolling"; + return "?"; +} +std::string demangle_flags(int f) +{ + std::string res; + if(f&RowMajorBit) res += " | RowMajor"; + if(f&PacketAccessBit) res += " | Packet"; + if(f&LinearAccessBit) res += " | Linear"; + if(f&LvalueBit) res += " | Lvalue"; + if(f&DirectAccessBit) res += " | Direct"; + if(f&NestByRefBit) res += " | NestByRef"; + if(f&NoPreferredStorageOrderBit) res += " | NoPreferredStorageOrderBit"; + + return res; +} +#endif + +} // end namespace internal + + +/** \class ScalarBinaryOpTraits + * \ingroup Core_Module + * + * \brief Determines whether the given binary operation of two numeric types is allowed and what the scalar return type is. + * + * This class permits to control the scalar return type of any binary operation performed on two different scalar types through (partial) template specializations. + * + * For instance, let \c U1, \c U2 and \c U3 be three user defined scalar types for which most operations between instances of \c U1 and \c U2 returns an \c U3. + * You can let %Eigen knows that by defining: + \code + template<typename BinaryOp> + struct ScalarBinaryOpTraits<U1,U2,BinaryOp> { typedef U3 ReturnType; }; + template<typename BinaryOp> + struct ScalarBinaryOpTraits<U2,U1,BinaryOp> { typedef U3 ReturnType; }; + \endcode + * You can then explicitly disable some particular operations to get more explicit error messages: + \code + template<> + struct ScalarBinaryOpTraits<U1,U2,internal::scalar_max_op<U1,U2> > {}; + \endcode + * Or customize the return type for individual operation: + \code + template<> + struct ScalarBinaryOpTraits<U1,U2,internal::scalar_sum_op<U1,U2> > { typedef U1 ReturnType; }; + \endcode + * + * By default, the following generic combinations are supported: + <table class="manual"> + <tr><th>ScalarA</th><th>ScalarB</th><th>BinaryOp</th><th>ReturnType</th><th>Note</th></tr> + <tr ><td>\c T </td><td>\c T </td><td>\c * </td><td>\c T </td><td></td></tr> + <tr class="alt"><td>\c NumTraits<T>::Real </td><td>\c T </td><td>\c * </td><td>\c T </td><td>Only if \c NumTraits<T>::IsComplex </td></tr> + <tr ><td>\c T </td><td>\c NumTraits<T>::Real </td><td>\c * </td><td>\c T </td><td>Only if \c NumTraits<T>::IsComplex </td></tr> + </table> + * + * \sa CwiseBinaryOp + */ +template<typename ScalarA, typename ScalarB, typename BinaryOp=internal::scalar_product_op<ScalarA,ScalarB> > +struct ScalarBinaryOpTraits +#ifndef EIGEN_PARSED_BY_DOXYGEN + // for backward compatibility, use the hints given by the (deprecated) internal::scalar_product_traits class. + : internal::scalar_product_traits<ScalarA,ScalarB> +#endif // EIGEN_PARSED_BY_DOXYGEN +{}; + +template<typename T, typename BinaryOp> +struct ScalarBinaryOpTraits<T,T,BinaryOp> +{ + typedef T ReturnType; +}; + +template <typename T, typename BinaryOp> +struct ScalarBinaryOpTraits<T, typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, BinaryOp> +{ + typedef T ReturnType; +}; +template <typename T, typename BinaryOp> +struct ScalarBinaryOpTraits<typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, T, BinaryOp> +{ + typedef T ReturnType; +}; + +// For Matrix * Permutation +template<typename T, typename BinaryOp> +struct ScalarBinaryOpTraits<T,void,BinaryOp> +{ + typedef T ReturnType; +}; + +// For Permutation * Matrix +template<typename T, typename BinaryOp> +struct ScalarBinaryOpTraits<void,T,BinaryOp> +{ + typedef T ReturnType; +}; + +// for Permutation*Permutation +template<typename BinaryOp> +struct ScalarBinaryOpTraits<void,void,BinaryOp> +{ + typedef void ReturnType; +}; + +// We require Lhs and Rhs to have "compatible" scalar types. +// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths. +// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to +// add together a float matrix and a double matrix. +#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \ + EIGEN_STATIC_ASSERT((Eigen::internal::has_ReturnType<ScalarBinaryOpTraits<LHS, RHS,BINOP> >::value), \ + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + +} // end namespace Eigen + +#endif // EIGEN_XPRHELPER_H |