(svn r26214) -Add: specialised animated SSE4 blitter (MJP)

With 32bpp base set about 15-20% faster in the Draw function (slower with 8bpp base set). Overall, with 32bpp base set, about 5% faster.
This commit is contained in:
rubidium
2014-01-02 23:52:13 +00:00
parent 78df732a7b
commit 4c84d13454
13 changed files with 516 additions and 18 deletions

View File

@@ -73,9 +73,11 @@ typedef union ALIGN(16) um128i {
srcABCD = _mm_packus_epi16(srcAB, srcAB); /* PACKUSWB, pack 2 colours (with saturation) */ \
}
/** The SSE2 32 bpp blitter (without palette animation). */
class Blitter_32bppSSE2 : public Blitter_32bppSimple {
/** Base methods for 32bpp SSE blitters. */
class Blitter_32bppSSE_Base {
public:
virtual ~Blitter_32bppSSE_Base() {}
struct MapValue {
uint8 m;
uint8 v;
@@ -108,12 +110,23 @@ public:
byte data[]; ///< Data, all zoomlevels.
};
Sprite *Encode(const SpriteLoader::Sprite *sprite, AllocatorProc *allocator);
virtual Colour AdjustBrightness(Colour colour, uint8 brightness) = 0;
};
/** The SSE2 32 bpp blitter (without palette animation). */
class Blitter_32bppSSE2 : public Blitter_32bppSimple, public Blitter_32bppSSE_Base {
public:
virtual Colour AdjustBrightness(Colour colour, uint8 brightness);
Colour ReallyAdjustBrightness(Colour colour, uint8 brightness);
static Colour ReallyAdjustBrightness(Colour colour, uint8 brightness);
/* virtual */ void Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom);
template <BlitterMode mode, ReadMode read_mode, BlockType bt_last>
template <BlitterMode mode, Blitter_32bppSSE_Base::ReadMode read_mode, Blitter_32bppSSE_Base::BlockType bt_last>
void Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom);
/* virtual */ Sprite *Encode(const SpriteLoader::Sprite *sprite, AllocatorProc *allocator);
/* virtual */ Sprite *Encode(const SpriteLoader::Sprite *sprite, AllocatorProc *allocator) {
return Blitter_32bppSSE_Base::Encode(sprite, allocator);
}
/* virtual */ const char *GetName() { return "32bpp-sse2"; }
};