New function FT_MulAddFix to compute the sum of fixed-point products.

This function, based on the code of `FT_MulFix`, uses 64-bit precision internally for intermediate computations. * include/freetype/internal/ftcalc.h, base/ftcalc.c (FT_MulAddFix): Implement it.
2022-06-29 11:48:10 +03:00 · 2022-06-29 11:48:10 +03:00 · 15fef219d6
commit 15fef219d6
parent 2db58e061e
2 changed files with 94 additions and 0 deletions
--- a/include/freetype/internal/ftcalc.h
+++ b/include/freetype/internal/ftcalc.h
@ -278,6 +278,40 @@ FT_BEGIN_HEADER
                      FT_Long  c );


+  /**************************************************************************
+   *
+   * @function:
+   *   FT_MulAddFix
+   *
+   * @description:
+   *   Compute `(s[0] * f[0] + s[1] * f[1] + ...) / 0x10000`, where `s[n]` is
+   *   usually a 16.16 scalar.
+   *
+   * @input:
+   *   s ::
+   *     The array of scalars.
+   *   f ::
+   *     The array of factors.
+   *   count ::
+   *     The number of entries in the array.
+   *
+   * @return:
+   *   The result of `(s[0] * f[0] + s[1] * f[1] + ...) / 0x10000`.
+   *
+   * @note:
+   *   This function is currently used for the scaled delta computation of
+   *   variation stores.  It internally uses 64-bit data types when
+   *   available, otherwise it emulates 64-bit math by using 32-bit
+   *   operations, which produce a correct result but most likely at a slower
+   *   performance in comparison to the implementation base on `int64_t`.
+   *
+   */
+  FT_BASE( FT_Int32 )
+  FT_MulAddFix( FT_Fixed*  s,
+                FT_Int32*  f,
+                FT_UInt    count );
+
+
  /*
   * A variant of FT_Matrix_Multiply which scales its result afterwards.  The
   * idea is that both `a' and `b' are scaled by factors of 10 so that the
--- a/src/base/ftcalc.c
+++ b/src/base/ftcalc.c
@ -1085,4 +1085,64 @@
  }


+  FT_BASE_DEF( FT_Int32 )
+  FT_MulAddFix( FT_Fixed*  s,
+                FT_Int32*  f,
+                FT_UInt    count )
+  {
+    FT_UInt   i;
+    FT_Int64  temp;
+
+
+#ifdef FT_INT64
+
+    for ( i = 0; i < count; ++i )
+      temp += (FT_Int64)s[i] * f[i];
+
+    return temp >> 16;
+
+#else
+
+    temp.hi = 0;
+    temp.lo = 0;
+
+    for ( i = 0; i < count; ++i )
+    {
+      FT_Int64  multResult;
+
+      FT_Int     sign  = 1;
+      FT_UInt32  carry = 0;
+
+      FT_UInt32  scalar;
+      FT_UInt32  factor;
+
+
+      scalar = (FT_UInt32)s[i];
+      factor = (FT_UInt32)f[i];
+
+      FT_MOVE_SIGN( s[i], scalar, sign );
+      FT_MOVE_SIGN( f[i], factor, sign );
+
+      ft_multo64( scalar, factor, &multResult );
+
+      if ( sign < 0 )
+      {
+        /* Emulated `FT_Int64` negation. */
+        carry = ( multResult.lo == 0 );
+
+        multResult.lo = ~multResult.lo + 1;
+        multResult.hi = ~multResult.hi + carry;
+      }
+
+      FT_Add64( &temp, &multResult, &temp );
+    }
+
+    return (FT_Int32)( ( (FT_Int32)( temp.hi & 0xFFFF ) << 16 ) |
+                                   ( temp.lo >> 16 )            );
+
+#endif /* !FT_INT64 */
+
+  }
+
+
 /* END */