From 89929ec6b961456dba8fe3126815a0d49e2e9cb8 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 16 Jul 2013 13:36:07 +0200 Subject: [PATCH] [smooth] Improve performance. Provide a work-around for an ARM-specific performance bug in GCC. This speeds up the rasterizer by more than 5%. Also slightly optimize `set_gray_cell' and `gray_record_cell' (which also improves performance on other platforms by a tiny bit (<1%). * src/smooth/ftgrays.c (FT_DIV_MOD): New macro. Use it where appropriate. (gray_record_cell, gray_set_cell, gray_move_to, gray_convert_glyph_inner): Streamline condition handling. --- ChangeLog | 16 ++++++++ src/smooth/ftgrays.c | 89 ++++++++++++++++++++++++-------------------- 2 files changed, 65 insertions(+), 40 deletions(-) diff --git a/ChangeLog b/ChangeLog index 807a81c3d..d16793778 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +2013-07-16 David Turner + + [smooth] Improve performance. + + Provide a work-around for an ARM-specific performance bug in GCC. + This speeds up the rasterizer by more than 5%. + + Also slightly optimize `set_gray_cell' and `gray_record_cell' (which + also improves performance on other platforms by a tiny bit (<1%). + + * src/smooth/ftgrays.c (FT_DIV_MOD): New macro. + Use it where appropriate. + + (gray_record_cell, gray_set_cell, gray_move_to, + gray_convert_glyph_inner): Streamline condition handling. + 2013-07-16 David Turner [truetype] Add assembler code for TT_MulFix14 and TT_DotFix14. diff --git a/src/smooth/ftgrays.c b/src/smooth/ftgrays.c index 7532a3582..ab76829de 100644 --- a/src/smooth/ftgrays.c +++ b/src/smooth/ftgrays.c @@ -310,6 +310,40 @@ typedef ptrdiff_t FT_PtrDist; #endif + /* Compute `divident / divisor' and return both its quotient and */ + /* remainder, cast to a specific type. This macro also ensures that */ + /* the remainder is always positive. */ +#define FT_DIV_MOD( type, dividend, divisor, quotient, remainder ) \ + FT_BEGIN_STMNT \ + (quotient) = (type)( (dividend) / (divisor) ); \ + (remainder) = (type)( (dividend) % (divisor) ); \ + if ( (remainder) < 0 ) \ + { \ + (quotient)--; \ + (remainder) += (type)(divisor); \ + } \ + FT_END_STMNT + +#ifdef __arm__ + /* Work around a bug specific to GCC which make the compiler fail to */ + /* optimize a division and modulo operation on the same parameters */ + /* into a single call to `__aeabi_idivmod'. See */ + /* */ + /* http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43721 */ +#undef FT_DIV_MOD +#define FT_DIV_MOD( type, dividend, divisor, quotient, remainder ) \ + FT_BEGIN_STMNT \ + (quotient) = (type)( (dividend) / (divisor) ); \ + (remainder) = (type)( (dividend) - (quotient) * (divisor) ); \ + if ( (remainder) < 0 ) \ + { \ + (quotient)--; \ + (remainder) += (type)(divisor); \ + } \ + FT_END_STMNT +#endif /* __arm__ */ + + /*************************************************************************/ /* */ /* TYPE DEFINITIONS */ @@ -548,7 +582,7 @@ typedef ptrdiff_t FT_PtrDist; static void gray_record_cell( RAS_ARG ) { - if ( !ras.invalid && ( ras.area | ras.cover ) ) + if ( ras.area | ras.cover ) { PCell cell = gray_find_cell( RAS_VAR ); @@ -597,12 +631,12 @@ typedef ptrdiff_t FT_PtrDist; ras.area = 0; ras.cover = 0; - } + ras.ex = ex; + ras.ey = ey; - ras.ex = ex; - ras.ey = ey; - ras.invalid = ( (unsigned)ey >= (unsigned)ras.count_ey || - ex >= ras.count_ex ); + ras.invalid = ( (unsigned)ey >= (unsigned)ras.count_ey || + ex >= ras.count_ex ); + } } @@ -686,13 +720,7 @@ typedef ptrdiff_t FT_PtrDist; dx = -dx; } - delta = (TCoord)( p / dx ); - mod = (TCoord)( p % dx ); - if ( mod < 0 ) - { - delta--; - mod += (TCoord)dx; - } + FT_DIV_MOD( TCoord, p, dx, delta, mod ); ras.area += (TArea)(( fx1 + first ) * delta); ras.cover += delta; @@ -706,14 +734,8 @@ typedef ptrdiff_t FT_PtrDist; TCoord lift, rem; - p = ONE_PIXEL * ( y2 - y1 + delta ); - lift = (TCoord)( p / dx ); - rem = (TCoord)( p % dx ); - if ( rem < 0 ) - { - lift--; - rem += (TCoord)dx; - } + p = ONE_PIXEL * ( y2 - y1 + delta ); + FT_DIV_MOD( TCoord, p, dx, lift, rem ); mod -= (int)dx; @@ -763,9 +785,6 @@ typedef ptrdiff_t FT_PtrDist; dx = to_x - ras.x; dy = to_y - ras.y; - /* XXX: we should do something about the trivial case where dx == 0, */ - /* as it happens very often! */ - /* perform vertical clipping */ { TCoord min, max; @@ -844,13 +863,7 @@ typedef ptrdiff_t FT_PtrDist; dy = -dy; } - delta = (int)( p / dy ); - mod = (int)( p % dy ); - if ( mod < 0 ) - { - delta--; - mod += (TCoord)dy; - } + FT_DIV_MOD( int, p, dy, delta, mod ); x = ras.x + delta; gray_render_scanline( RAS_VAR_ ey1, ras.x, fy1, x, (TCoord)first ); @@ -861,13 +874,7 @@ typedef ptrdiff_t FT_PtrDist; if ( ey1 != ey2 ) { p = ONE_PIXEL * dx; - lift = (int)( p / dy ); - rem = (int)( p % dy ); - if ( rem < 0 ) - { - lift--; - rem += (int)dy; - } + FT_DIV_MOD( int, p, dy, lift, rem ); mod -= (int)dy; while ( ey1 != ey2 ) @@ -1171,7 +1178,8 @@ typedef ptrdiff_t FT_PtrDist; /* record current cell, if any */ - gray_record_cell( RAS_VAR ); + if ( !ras.invalid ) + gray_record_cell( RAS_VAR ); /* start to a new position */ x = UPSCALE( to->x ); @@ -1781,7 +1789,8 @@ typedef ptrdiff_t FT_PtrDist; if ( ft_setjmp( ras.jump_buffer ) == 0 ) { error = FT_Outline_Decompose( &ras.outline, &func_interface, &ras ); - gray_record_cell( RAS_VAR ); + if ( !ras.invalid ) + gray_record_cell( RAS_VAR ); } else error = FT_THROW( Memory_Overflow );