[smooth] Improve performance.

Provide a work-around for an ARM-specific performance bug in GCC.
This speeds up the rasterizer by more than 5%.

Also slightly optimize `set_gray_cell' and `gray_record_cell' (which
also improves performance on other platforms by a tiny bit (<1%).

* src/smooth/ftgrays.c (FT_DIV_MOD): New macro.
Use it where appropriate.

(gray_record_cell, gray_set_cell, gray_move_to,
gray_convert_glyph_inner): Streamline condition handling.
This commit is contained in:
David Turner 2013-07-16 13:36:07 +02:00 committed by Werner Lemberg
parent f66d48e923
commit 89929ec6b9
2 changed files with 65 additions and 40 deletions

@ -1,3 +1,19 @@
2013-07-16 David Turner <digit@google.com>
[smooth] Improve performance.
Provide a work-around for an ARM-specific performance bug in GCC.
This speeds up the rasterizer by more than 5%.
Also slightly optimize `set_gray_cell' and `gray_record_cell' (which
also improves performance on other platforms by a tiny bit (<1%).
* src/smooth/ftgrays.c (FT_DIV_MOD): New macro.
Use it where appropriate.
(gray_record_cell, gray_set_cell, gray_move_to,
gray_convert_glyph_inner): Streamline condition handling.
2013-07-16 David Turner <digit@google.com>
[truetype] Add assembler code for TT_MulFix14 and TT_DotFix14.

@ -310,6 +310,40 @@ typedef ptrdiff_t FT_PtrDist;
#endif
/* Compute `divident / divisor' and return both its quotient and */
/* remainder, cast to a specific type. This macro also ensures that */
/* the remainder is always positive. */
#define FT_DIV_MOD( type, dividend, divisor, quotient, remainder ) \
FT_BEGIN_STMNT \
(quotient) = (type)( (dividend) / (divisor) ); \
(remainder) = (type)( (dividend) % (divisor) ); \
if ( (remainder) < 0 ) \
{ \
(quotient)--; \
(remainder) += (type)(divisor); \
} \
FT_END_STMNT
#ifdef __arm__
/* Work around a bug specific to GCC which make the compiler fail to */
/* optimize a division and modulo operation on the same parameters */
/* into a single call to `__aeabi_idivmod'. See */
/* */
/* http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43721 */
#undef FT_DIV_MOD
#define FT_DIV_MOD( type, dividend, divisor, quotient, remainder ) \
FT_BEGIN_STMNT \
(quotient) = (type)( (dividend) / (divisor) ); \
(remainder) = (type)( (dividend) - (quotient) * (divisor) ); \
if ( (remainder) < 0 ) \
{ \
(quotient)--; \
(remainder) += (type)(divisor); \
} \
FT_END_STMNT
#endif /* __arm__ */
/*************************************************************************/
/* */
/* TYPE DEFINITIONS */
@ -548,7 +582,7 @@ typedef ptrdiff_t FT_PtrDist;
static void
gray_record_cell( RAS_ARG )
{
if ( !ras.invalid && ( ras.area | ras.cover ) )
if ( ras.area | ras.cover )
{
PCell cell = gray_find_cell( RAS_VAR );
@ -597,12 +631,12 @@ typedef ptrdiff_t FT_PtrDist;
ras.area = 0;
ras.cover = 0;
}
ras.ex = ex;
ras.ey = ey;
ras.ex = ex;
ras.ey = ey;
ras.invalid = ( (unsigned)ey >= (unsigned)ras.count_ey ||
ex >= ras.count_ex );
ras.invalid = ( (unsigned)ey >= (unsigned)ras.count_ey ||
ex >= ras.count_ex );
}
}
@ -686,13 +720,7 @@ typedef ptrdiff_t FT_PtrDist;
dx = -dx;
}
delta = (TCoord)( p / dx );
mod = (TCoord)( p % dx );
if ( mod < 0 )
{
delta--;
mod += (TCoord)dx;
}
FT_DIV_MOD( TCoord, p, dx, delta, mod );
ras.area += (TArea)(( fx1 + first ) * delta);
ras.cover += delta;
@ -706,14 +734,8 @@ typedef ptrdiff_t FT_PtrDist;
TCoord lift, rem;
p = ONE_PIXEL * ( y2 - y1 + delta );
lift = (TCoord)( p / dx );
rem = (TCoord)( p % dx );
if ( rem < 0 )
{
lift--;
rem += (TCoord)dx;
}
p = ONE_PIXEL * ( y2 - y1 + delta );
FT_DIV_MOD( TCoord, p, dx, lift, rem );
mod -= (int)dx;
@ -763,9 +785,6 @@ typedef ptrdiff_t FT_PtrDist;
dx = to_x - ras.x;
dy = to_y - ras.y;
/* XXX: we should do something about the trivial case where dx == 0, */
/* as it happens very often! */
/* perform vertical clipping */
{
TCoord min, max;
@ -844,13 +863,7 @@ typedef ptrdiff_t FT_PtrDist;
dy = -dy;
}
delta = (int)( p / dy );
mod = (int)( p % dy );
if ( mod < 0 )
{
delta--;
mod += (TCoord)dy;
}
FT_DIV_MOD( int, p, dy, delta, mod );
x = ras.x + delta;
gray_render_scanline( RAS_VAR_ ey1, ras.x, fy1, x, (TCoord)first );
@ -861,13 +874,7 @@ typedef ptrdiff_t FT_PtrDist;
if ( ey1 != ey2 )
{
p = ONE_PIXEL * dx;
lift = (int)( p / dy );
rem = (int)( p % dy );
if ( rem < 0 )
{
lift--;
rem += (int)dy;
}
FT_DIV_MOD( int, p, dy, lift, rem );
mod -= (int)dy;
while ( ey1 != ey2 )
@ -1171,7 +1178,8 @@ typedef ptrdiff_t FT_PtrDist;
/* record current cell, if any */
gray_record_cell( RAS_VAR );
if ( !ras.invalid )
gray_record_cell( RAS_VAR );
/* start to a new position */
x = UPSCALE( to->x );
@ -1781,7 +1789,8 @@ typedef ptrdiff_t FT_PtrDist;
if ( ft_setjmp( ras.jump_buffer ) == 0 )
{
error = FT_Outline_Decompose( &ras.outline, &func_interface, &ras );
gray_record_cell( RAS_VAR );
if ( !ras.invalid )
gray_record_cell( RAS_VAR );
}
else
error = FT_THROW( Memory_Overflow );