mcst-linux-kernel/patches-2024.06.26/blender-3.4.1/0009-Eigan-disable-asm.patch

48 lines
2.8 KiB
Diff

--- a/extern/Eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/extern/Eigen3/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -1199,7 +1199,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
// NOTE: the begin/end asm comments below work around bug 935!
// but they are not enough for gcc>=6 without FMA (bug 1637)
- #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE)
+ #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE) && !(defined __e2k__)
#define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND __asm__ ("" : [a0] "+x,m" (A0),[a1] "+x,m" (A1));
#else
#define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND
--- a/extern/Eigen3/Eigen/src/SparseCore/SparseDenseProduct.h
+++ b/extern/Eigen3/Eigen/src/SparseCore/SparseDenseProduct.h
@@ -48,7 +48,8 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
// It basically represents the minimal amount of work to be done to be worth it.
if(threads>1 && lhsEval.nonZerosEstimate() > 20000)
{
- #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads)
+ int enablemp = (n+threads*4-1)/(threads*4);
+ #pragma omp parallel for schedule(dynamic,enablemp) num_threads(threads)
for(Index i=0; i<n; ++i)
processRow(lhsEval,rhs,res,alpha,i,c);
}
diff --git a/extern/Eigen3/Eigen/src/Core/arch/AVX/PacketMath.h b/extern/Eigen3/Eigen/src/Core/arch/AVX/PacketMath.h
index 923a124..ab90507 100755
--- a/extern/Eigen3/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/extern/Eigen3/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -159,7 +159,7 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
#ifdef __FMA__
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
-#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
+#if ( ((EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG)) && !(defined __e2k__) )
// Clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
// and even register spilling with clang>=6.0 (bug 1637).
// Gcc stupidly generates a vfmadd132ps instruction.
@@ -173,7 +173,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f&
#endif
}
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
-#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
+#if ( ((EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG)) && !(defined __e2k__) )
// see above
Packet4d res = c;
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));