48 lines
2.8 KiB
Diff
48 lines
2.8 KiB
Diff
--- a/extern/Eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h
|
|
+++ b/extern/Eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h
|
|
@@ -1199,7 +1199,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
|
|
|
// NOTE: the begin/end asm comments below work around bug 935!
|
|
// but they are not enough for gcc>=6 without FMA (bug 1637)
|
|
- #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE)
|
|
+ #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE) && !(defined __e2k__)
|
|
#define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND __asm__ ("" : [a0] "+x,m" (A0),[a1] "+x,m" (A1));
|
|
#else
|
|
#define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND
|
|
--- a/extern/Eigen3/Eigen/src/SparseCore/SparseDenseProduct.h
|
|
+++ b/extern/Eigen3/Eigen/src/SparseCore/SparseDenseProduct.h
|
|
@@ -48,7 +48,8 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
|
|
// It basically represents the minimal amount of work to be done to be worth it.
|
|
if(threads>1 && lhsEval.nonZerosEstimate() > 20000)
|
|
{
|
|
- #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads)
|
|
+ int enablemp = (n+threads*4-1)/(threads*4);
|
|
+ #pragma omp parallel for schedule(dynamic,enablemp) num_threads(threads)
|
|
for(Index i=0; i<n; ++i)
|
|
processRow(lhsEval,rhs,res,alpha,i,c);
|
|
}
|
|
|
|
diff --git a/extern/Eigen3/Eigen/src/Core/arch/AVX/PacketMath.h b/extern/Eigen3/Eigen/src/Core/arch/AVX/PacketMath.h
|
|
index 923a124..ab90507 100755
|
|
--- a/extern/Eigen3/Eigen/src/Core/arch/AVX/PacketMath.h
|
|
+++ b/extern/Eigen3/Eigen/src/Core/arch/AVX/PacketMath.h
|
|
@@ -159,7 +159,7 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
|
|
|
|
#ifdef __FMA__
|
|
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
|
|
-#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
|
|
+#if ( ((EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG)) && !(defined __e2k__) )
|
|
// Clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
|
|
// and even register spilling with clang>=6.0 (bug 1637).
|
|
// Gcc stupidly generates a vfmadd132ps instruction.
|
|
@@ -173,7 +173,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f&
|
|
#endif
|
|
}
|
|
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
|
|
-#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
|
|
+#if ( ((EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG)) && !(defined __e2k__) )
|
|
// see above
|
|
Packet4d res = c;
|
|
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
|
|