From 884e4e67ee434bd99fbf2edac6b77dc1ebd07263 Mon Sep 17 00:00:00 2001
From: Alexei Podtelezhnikov <apodtele@gmail.com>
Date: Mon, 29 Jun 2015 22:32:05 -0400
Subject: [PATCH] [base] Implement fast vector normalization.

The function uses Newton's iterations instead of dividing vector
components by its length, which needs a square root. This is,
literally, a bit less accurate but a lot faster.

* src/base/ftcalc.c (FT_Vector_NormLen): New function.
---
 ChangeLog                          | 10 ++++
 include/freetype/internal/ftcalc.h | 12 +++++
 src/base/ftcalc.c                  | 82 ++++++++++++++++++++++++++++++
 3 files changed, 104 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index a20be8f62..af2586433 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2015-06-29  Alexei Podtelezhnikov <apodtele@gmail.com>
+
+	[base] Implement fast vector normalization.
+
+	The function uses Newton's iterations instead of dividing vector
+	components by its length, which needs a square root. This is,
+	literally, a bit less accurate but a lot faster.
+
+	* src/base/ftcalc.c (FT_Vector_NormLen): New function.
+
 2015-06-28  Werner Lemberg  <wl@gnu.org>
 
 	* CMakeLists.txt: Always create `ftconfig.h'.
diff --git a/include/freetype/internal/ftcalc.h b/include/freetype/internal/ftcalc.h
index 75752c3d9..d4d4337e4 100644
--- a/include/freetype/internal/ftcalc.h
+++ b/include/freetype/internal/ftcalc.h
@@ -299,6 +299,18 @@ FT_BEGIN_HEADER
                               FT_Long           scaling );
 
 
+  /*
+   *  This function normalizes a vector and returns its original length.
+   *  The normalized vector is a 16.16 fixed-point unit vector with length
+   *  close to 0x10000. The accuracy of the returned length is limited to
+   *  16 bits also. The function utilizes quick inverse square root
+   *  aproximation without divisions and square roots relying on Newton's
+   *  iterations instead.
+   */
+  FT_BASE( FT_UInt32 )
+  FT_Vector_NormLen( FT_Vector*  vector );
+
+
   /*
    *  Return -1, 0, or +1, depending on the orientation of a given corner.
    *  We use the Cartesian coordinate system, with positive vertical values
diff --git a/src/base/ftcalc.c b/src/base/ftcalc.c
index dca0e1dc5..f3595e72d 100644
--- a/src/base/ftcalc.c
+++ b/src/base/ftcalc.c
@@ -785,6 +785,88 @@
   }
 
 
+  /* documentation is in ftcalc.h */
+
+  FT_BASE_DEF( FT_UInt32 )
+  FT_Vector_NormLen( FT_Vector*  vector )
+  {
+    FT_Int32   x = vector->x;
+    FT_Int32   y = vector->y;
+    FT_Int32   b, z;
+    FT_UInt32  u, v, l;
+    FT_Int     sx = 1, sy = 1, shift;
+
+
+    FT_MOVE_SIGN( x, sx );
+    FT_MOVE_SIGN( y, sy );
+
+    /* trivial cases */
+    if ( x == 0 )
+    {
+      if ( y > 0 )
+        vector->y = sy * 0x10000;
+      return y;
+    }
+    else if ( y == 0 )
+    {
+      if ( x > 0 )
+        vector->x = sx * 0x10000;
+      return x;
+    }
+
+    /* estimate length and prenormalize */
+    l = x > y ? (FT_UInt32)x + ( y >> 1 )
+              : (FT_UInt32)y + ( x >> 1 );
+
+    shift  = 31 - FT_MSB( l );
+    shift -= 15 + ( l >= 0xAAAAAAAAUL >> shift );
+
+    if ( shift > 0 )
+    {
+      x <<= shift;
+      y <<= shift;
+
+      /* reestimate length for tiny vectors */
+      l = x > y ? (FT_UInt32)x + ( y >> 1 )
+                : (FT_UInt32)y + ( x >> 1 );
+    }
+    else
+    {
+      x >>= -shift;
+      y >>= -shift;
+      l >>= -shift;
+    }
+
+    /* lower linear approximation for reciprocal length minus one */
+    b = 0x10000 - (FT_Int32)l;
+
+    /*  Newton's iterations */
+    do
+    {
+      u = (FT_UInt32)( x + ( x * b >> 16 ) );
+      v = (FT_UInt32)( y + ( y * b >> 16 ) );
+
+      /* converting to signed gives difference with 2^32 */
+      z = -(FT_Int32)( u * u + v * v ) / 0x200;
+      z = z * ( ( 0x10000 + b ) >> 8 ) / 0x10000;
+
+      b += z;
+    } while ( z > 0 );
+
+    vector->x = sx < 0 ? -(FT_Pos)u : (FT_Pos)u;
+    vector->y = sy < 0 ? -(FT_Pos)v : (FT_Pos)v;
+
+    /* true length, again taking advantage of signed difference with 2^32 */
+    l = 0x10000 + (FT_Int32)( u * x + v * y ) / 0x10000;
+    if ( shift > 0 )
+      l = ( l + ( 1 << ( shift - 1 ) ) ) >> shift;
+    else
+      l <<= -shift;
+
+    return l;
+  }
+
+
 #if 0
 
   /* documentation is in ftcalc.h */