gps/GPSResources/tcpmp/common/blit/blit_soft.c

2515 lines
62 KiB
C
Executable File

/*****************************************************************************
*
* This program is free software ; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* $Id: blit_soft.c 607 2006-01-22 20:58:29Z picard $
*
* The Core Pocket Media Player
* Copyright (c) 2004-2005 Gabor Kovacs
*
****************************************************************************/
#include "../common.h"
#include "../dyncode/dyncode.h"
#include "../cpu/cpu.h"
#include "blit_soft.h"
//#define BLITTEST
/*
TV range:
+16 (219)
+128 (224)
+128 (224)
PC range:
+0 (255)
+128 (255)
+128 (255)
ranges:
R,G,B,Y [0..1]
Cb,Cr [-0.5..0.5]
Y' = Kr * R' + (1 - Kr - Kb) * G' + Kb * B'
Cb = 0.5 * (B' - Y') / (1 - Kb)
Cr = 0.5 * (R' - Y') / (1 - Kr)
Kb = 0.114
Kr = 0.299
ITU-R BT 601
Y'= 0.299 *R' + 0.587 *G' + 0.114 *B'
Cb=-0.168736*R' - 0.331264*G' + 0.5 *B'
Cr= 0.5 *R' - 0.418688*G' - 0.081312*B'
R'= Y' + 1.403*Cr
G'= Y' - 0.344*Cb - 0.714*Cr
B'= Y' + 1.773*Cb
Kb = 0.0722
Kr = 0.2126
ITU-R BT 709
Y'= 0.2215*R' + 0.7154*G' + 0.0721*B'
Cb=-0.1145*R' - 0.3855*G' + 0.5000*B'
Cr= 0.5016*R' - 0.4556*G' - 0.0459*B'
R'= Y' + 1.5701*Cr
G'= Y' - 0.1870*Cb - 0.4664*Cr
B'= Y' - 1.8556*Cb
*/
#define CM(i) ((int16_t)((i)*8192))
#if defined(_M_IX86) && !defined(TARGET_SYMBIAN)
static const int16_t YUVToRGB[4][8] =
{
// TV range BT-601
{ CM(1.164),-16,CM(1.596),CM(-0.813),-128,CM(2.017),CM(-0.392),-128 },
// TV range BT-709
{ CM(1.164),-16,CM(1.786),CM(-0.530),-128,CM(2.111),CM(-0.213),-128 },
// PC range BT-601
{ CM(1),0,CM(1.402),CM(-0.71414),-128,CM(1.772),CM(-0.34414),-128 },
// PC range BT-709
{ CM(1),0,CM(1.5701),CM(-0.4664),-128,CM(1.8556),CM(-0.1870),-128 },
};
static const int16_t* GetYUVToRGB(const pixel* Src)
{
int i = 0;
if (Src->Flags & PF_YUV_BT709)
i += 1;
if (Src->Flags & PF_YUV_PC)
i += 2;
return YUVToRGB[i];
}
#endif
#define SAT(Value) (Value < 0 ? 0: (Value > 255 ? 255: Value))
static const rgbval_t Gray1[2] = {
CRGB(0,0,0),CRGB(255,255,255)
};
static const rgbval_t Gray2[4] = {
CRGB(0,0,0),CRGB(85,85,85),CRGB(170,170,170),CRGB(255,255,255)
};
static const rgbval_t Gray4[16] = {
CRGB(0,0,0),CRGB(17,17,17),CRGB(34,34,34),CRGB(51,51,51),
CRGB(68,68,68),CRGB(85,85,85),CRGB(102,102,102),CRGB(119,119,119),
CRGB(136,136,136),CRGB(153,153,153),CRGB(170,170,170),CRGB(187,187,187),
CRGB(204,204,204),CRGB(221,221,221),CRGB(238,238,238),CRGB(255,255,255)
};
#if defined(_M_IX86) && !defined(TARGET_SYMBIAN)
#define DECLARE_BLITMMX(name) \
extern void STDCALL name##_mmx(blit_soft* This, uint8_t** DstPtr,uint8_t** SrcPtr,int DstPitch,int SrcPitch,int Width,int Height,uintptr_t Src2SrcLast); \
extern void STDCALL name##_mmx2(blit_soft* This, uint8_t** DstPtr,uint8_t** SrcPtr,int DstPitch,int SrcPitch,int Width,int Height,uintptr_t Src2SrcLast); \
extern void STDCALL name##_3dnow(blit_soft* This, uint8_t** DstPtr,uint8_t** SrcPtr,int DstPitch,int SrcPitch,int Width,int Height,uintptr_t Src2SrcLast);
DECLARE_BLITMMX(blit_i420_i420)
DECLARE_BLITMMX(blit_i420_yuy2)
DECLARE_BLITMMX(blit_i420_rgb32)
DECLARE_BLITMMX(blit_i420_rgb24)
DECLARE_BLITMMX(blit_i420_bgr32)
DECLARE_BLITMMX(blit_i420_bgr24)
DECLARE_BLITMMX(blit_rgb32_rgb32)
DECLARE_BLITMMX(blit_rgb24_rgb24)
DECLARE_BLITMMX(blit_rgb16_rgb16)
typedef struct blitmmx
{
uint32_t In;
uint32_t Out;
blitsoftentry Func[3];
} blitmmx;
static const blitmmx BlitMMX[] =
{
{ FOURCC_I420, FOURCC_I420, { blit_i420_i420_mmx, blit_i420_i420_mmx2, blit_i420_i420_3dnow }},
{ FOURCC_I420, FOURCC_YUY2, { blit_i420_yuy2_mmx, blit_i420_yuy2_mmx2, blit_i420_yuy2_3dnow }},
{ FOURCC_I420, FOURCC_RGB32,{ blit_i420_rgb32_mmx,blit_i420_rgb32_mmx2,blit_i420_rgb32_3dnow }},
{ FOURCC_I420, FOURCC_RGB24,{ blit_i420_rgb24_mmx,blit_i420_rgb24_mmx2,blit_i420_rgb24_3dnow }},
{ FOURCC_I420, FOURCC_BGR32,{ blit_i420_bgr32_mmx,blit_i420_bgr32_mmx2,blit_i420_bgr32_3dnow }},
{ FOURCC_I420, FOURCC_BGR24,{ blit_i420_bgr24_mmx,blit_i420_bgr24_mmx2,blit_i420_bgr24_3dnow }},
{ FOURCC_RGB32,FOURCC_RGB32,{ blit_rgb32_rgb32_mmx,blit_rgb32_rgb32_mmx2,blit_rgb32_rgb32_3dnow }},
{ FOURCC_RGB24,FOURCC_RGB24,{ blit_rgb24_rgb24_mmx,blit_rgb24_rgb24_mmx2,blit_rgb24_rgb24_3dnow }},
{ FOURCC_RGB16,FOURCC_RGB16,{ blit_rgb16_rgb16_mmx,blit_rgb16_rgb16_mmx2,blit_rgb16_rgb16_3dnow }},
{ FOURCC_RGB15,FOURCC_RGB15,{ blit_rgb16_rgb16_mmx,blit_rgb16_rgb16_mmx2,blit_rgb16_rgb16_3dnow }},
{ FOURCC_BGR32,FOURCC_BGR32,{ blit_rgb32_rgb32_mmx,blit_rgb32_rgb32_mmx2,blit_rgb32_rgb32_3dnow }},
{ FOURCC_BGR24,FOURCC_BGR24,{ blit_rgb24_rgb24_mmx,blit_rgb24_rgb24_mmx2,blit_rgb24_rgb24_3dnow }},
{ FOURCC_BGR16,FOURCC_BGR16,{ blit_rgb16_rgb16_mmx,blit_rgb16_rgb16_mmx2,blit_rgb16_rgb16_3dnow }},
{ FOURCC_BGR15,FOURCC_BGR15,{ blit_rgb16_rgb16_mmx,blit_rgb16_rgb16_mmx2,blit_rgb16_rgb16_3dnow }},
{0},
};
#endif
typedef struct blitpack
{
blitfx FX;
video Dst;
video Src;
rect DstRect;
rect SrcRect;
blit_soft Code[2];
bool_t SafeBorder;
int RScaleX;
int RScaleY;
struct blitpack* Next;
} blitpack;
static NOINLINE void FreeBlit(blit_soft* p)
{
CodeDone(&p->Code);
free(p->LookUp_Data);
p->LookUp_Data = NULL;
}
static INLINE struct blitpack* _BlitAlloc()
{
blitpack* p = (blitpack*) malloc(sizeof(blitpack));
if (!p) return NULL;
memset(p,0,sizeof(blitpack));
CodeInit(&p->Code[0].Code);
CodeInit(&p->Code[1].Code);
return p;
}
static INLINE void _BlitFree(struct blitpack* p)
{
FreeBlit(&p->Code[0]);
FreeBlit(&p->Code[1]);
free(p);
}
#ifdef CONFIG_CONTEXT
void Blit_Init()
{
}
void Blit_Done()
{
blitpack* p;
while ((p = Context()->Blit)!=NULL)
{
Context()->Blit = p->Next;
_BlitFree(p);
}
}
static INLINE void BlitFree(struct blitpack* p)
{
p->Next = Context()->Blit;
Context()->Blit = p;
}
static INLINE struct blitpack* BlitAlloc()
{
blitpack* p = Context()->Blit;
if (p)
Context()->Blit = p->Next;
else
p = _BlitAlloc();
return p;
}
#else
#define BlitAlloc _BlitAlloc
#define BlitFree _BlitFree
#endif
static const rgb* DefaultPal(const pixel* Format)
{
if (Format->Flags & PF_PALETTE)
{
if (!Format->Palette)
switch (Format->BitCount)
{
case 1: return (const rgb*)Gray1;
case 2: return (const rgb*)Gray2;
case 4: return (const rgb*)Gray4;
}
return Format->Palette;
}
return NULL;
}
static int CalcScale(int v, int Min, int Max)
{
if (v > Max) v = Max;
if (v < Min) v = Min;
return v;
}
static int CalcRScale(int v, int Gray)
{
if (v<=0) return 16;
v = (16*1024 << 16) / v;
if (Gray) // only 100% and 200% scale
return v > 12288 ? 16:8;
//align to 100%
if (v > 16834-1024 && v < 16384+1024)
v = 16384;
//align to 200%
if (v > 8192-1024 && v < 8192+1024)
v = 8192;
//align to 50%
if (v > 32768-2048 && v < 32768+2048)
v = 32768;
#if defined(SH3)
if (v < 12288)
return 8;
return 16;
// if (v<1024) v=1024;
// return ((v+1024) >> 11) << 1;
#else
if (v<512) v=512;
return (v+512) >> 10;
#endif
}
static NOINLINE bool_t EnlargeIfNeeded(int* v,int Align,int Side,int Limit)
{
int Needed = Align - (*v & (Align-1));
if (Needed < Align && Needed <= (Limit-Side))
{
*v += Needed;
return 1;
}
return 0;
}
void BlitAlign(blitpack* p, rect* DstRect, rect* SrcRect)
{
int i;
int ShrinkX,ShrinkY;
int SrcRight;
int SrcBottom;
int SrcAdjWidth,SrcAdjHeight;
int RScaleX,RScaleY;
blit_soft* Code;
if (!p) return;
RScaleX = p->RScaleX;
RScaleY = p->RScaleY;
Code = &p->Code[0];
p->SafeBorder = 0;
if (Code->ArithStretch && (RScaleX != 16) && (RScaleX != 32))
{
//avoid bilinear scale overrun (shrink source)
if ((p->Src.Pixel.Flags & PF_SAFEBORDER) && Code->DstAlignSize > 2)
p->SafeBorder = 1; // build a one pixel border before blitting on right and bottom side
else
{
//only horizontal bilinear filtering is supported (arm_stretch)
if (SrcRect->Width>2)
SrcRect->Width -= 2;
}
}
// convert source to destination space
if (p->FX.Direction & DIR_SWAPXY)
{
SwapInt(&RScaleX,&RScaleY);
SwapRect(SrcRect);
}
SrcRight = SrcRect->x + SrcRect->Width;
SrcBottom = SrcRect->y + SrcRect->Height;
SrcAdjWidth = SrcRect->Width * 16 / RScaleX;
SrcAdjHeight = SrcRect->Height * 16 / RScaleY;
if (p->FX.Flags & BLITFX_ENLARGEIFNEEDED)
{
SrcRight = p->Src.Width;
SrcBottom = p->Src.Height;
if (p->FX.Direction & DIR_SWAPXY)
SwapInt(&SrcRight,&SrcBottom);
if (p->Src.Pixel.Flags & PF_SAFEBORDER)
{
SrcRight += 16;
SrcBottom += 16;
}
if (EnlargeIfNeeded(&SrcAdjWidth,Code->DstAlignSize,SrcRect->x+SrcRect->Width,SrcRight))
SrcRect->Width = -1; // need calc
if (EnlargeIfNeeded(&SrcAdjHeight,Code->DstAlignSize,SrcRect->y+SrcRect->Height,SrcBottom))
SrcRect->Height = -1; // need calc
}
ShrinkX = DstRect->Width - SrcAdjWidth;
if (ShrinkX>=0) //shrink destination?
{
ShrinkX >>= 1;
DstRect->x += ShrinkX;
DstRect->Width = SrcAdjWidth;
}
else //adjust source position
{
ShrinkX = 0;
SrcRect->x += (SrcAdjWidth - DstRect->Width) * RScaleX >> 5;
SrcRect->Width = -1; // need calc
}
ShrinkY = DstRect->Height - SrcAdjHeight;
if (ShrinkY>=0) //shrink Dst?
{
ShrinkY >>= 1;
DstRect->y += ShrinkY;
DstRect->Height = SrcAdjHeight;
}
else //adjust source position
{
ShrinkY = 0;
SrcRect->y += (SrcAdjHeight - DstRect->Height) * RScaleY >> 5;
SrcRect->Height = -1; // need calc
}
i = DstRect->Width & (Code->DstAlignSize-1);
DstRect->Width -= i;
i >>= 1;
ShrinkX += i;
DstRect->x += i;
i = DstRect->Height & (Code->DstAlignSize-1);
DstRect->Height -= i;
i >>= 1;
ShrinkY += i;
DstRect->y += i;
i = DstRect->x & (Code->DstAlignPos-1);
if (i && ShrinkX < i)
{
DstRect->Width -= Code->DstAlignPos - i;
DstRect->Width &= ~(Code->DstAlignSize-1);
DstRect->x += Code->DstAlignPos - i;
}
else
DstRect->x -= i;
i = DstRect->y & (Code->DstAlignPos-1);
if (i && ShrinkY < i)
{
DstRect->Height -= Code->DstAlignPos - i;
DstRect->Height &= ~(Code->DstAlignSize-1);
DstRect->y += Code->DstAlignPos - i;
}
else
DstRect->y -= i;
SrcRect->x &= ~(Code->SrcAlignPos-1);
SrcRect->y &= ~(Code->SrcAlignPos-1);
// convert source back to it's space (if needed)
if (SrcRect->Width < 0)
SrcRect->Width = (DstRect->Width * RScaleX / 16 + 1) & ~1;
if (SrcRect->Height < 0)
SrcRect->Height = (DstRect->Height * RScaleY / 16 + 1) & ~1;
if (SrcRect->x + SrcRect->Width > SrcRight)
SrcRect->Width = SrcRight - SrcRect->x;
if (SrcRect->y + SrcRect->Height > SrcBottom)
SrcRect->Height = SrcBottom - SrcRect->y;
if (p->FX.Direction & DIR_SWAPXY)
SwapRect(SrcRect);
p->DstRect = *DstRect;
p->SrcRect = *SrcRect;
}
static NOINLINE void CodeRelease(blit_soft* p)
{
//todo... better palette handling
if (p->Dst.Palette)
memset(&p->Dst,0,sizeof(p->Dst));
if (p->Src.Palette)
memset(&p->Src,0,sizeof(p->Src));
}
void BlitRelease(blitpack* p)
{
if (p)
{
CodeRelease(&p->Code[0]);
CodeRelease(&p->Code[1]);
BlitFree(p);
}
}
int AnyAlign(rect* DstRect, rect* SrcRect, const blitfx* FX,
int DstAlignSize, int DstAlignPos,
int MinScale, int MaxScale)
{
int i,ShrinkX,ShrinkY;
int ScaleX,ScaleY;
int SrcRight;
int SrcBottom;
int SrcAdjWidth,SrcAdjHeight;
if (!DstRect || !SrcRect || !FX)
return ERR_INVALID_PARAM;
ScaleX = CalcScale(FX->ScaleX,MinScale,MaxScale);
ScaleY = CalcScale(FX->ScaleY,MinScale,MaxScale);
SrcRight = SrcRect->x + SrcRect->Width;
SrcBottom = SrcRect->y + SrcRect->Height;
// convert source to destination space
if (FX->Direction & DIR_SWAPXY)
{
SwapInt(&ScaleX,&ScaleY);
SwapRect(SrcRect);
}
SrcAdjWidth = (SrcRect->Width * ScaleX + 32768) >> 16;
SrcAdjHeight = (SrcRect->Height * ScaleY + 32768) >> 16;
ShrinkX = DstRect->Width - SrcAdjWidth;
if (ShrinkX>0) //shrink destination?
{
ShrinkX >>= 1;
DstRect->x += ShrinkX;
DstRect->Width = SrcAdjWidth;
}
else //adjust source position
{
ShrinkX = 0;
SrcRect->x += ((SrcAdjWidth - DstRect->Width) << 15) / ScaleX;
SrcRect->Width = -1;
}
ShrinkY = DstRect->Height - SrcAdjHeight;
if (ShrinkY>0) //shrink Dst?
{
ShrinkY >>= 1;
DstRect->y += ShrinkY;
DstRect->Height = SrcAdjHeight;
}
else //adjust source position
{
ShrinkY = 0;
SrcRect->y += ((SrcAdjHeight - DstRect->Height) << 15) / ScaleY;
SrcRect->Height = -1;
}
// final alignment
i = DstRect->Width & (DstAlignSize-1);
DstRect->Width -= i;
i >>= 1;
ShrinkX += i;
DstRect->x += i;
i = DstRect->Height & (DstAlignSize-1);
DstRect->Height -= i;
i >>= 1;
ShrinkY += i;
DstRect->y += i;
i = DstRect->x & (DstAlignPos-1);
if (i && ShrinkX < i)
{
DstRect->Width -= DstAlignPos - i;
DstRect->Width &= ~(DstAlignSize-1);
DstRect->x += DstAlignPos - i;
}
else
DstRect->x -= i;
i = DstRect->y & (DstAlignPos-1);
if (i && ShrinkY < i)
{
DstRect->Height -= DstAlignPos - i;
DstRect->Height &= ~(DstAlignSize-1);
DstRect->y += DstAlignPos - i;
}
else
DstRect->y -= i;
SrcRect->x &= ~1;
SrcRect->y &= ~1;
if (SrcRect->Width < 0)
SrcRect->Width = ((DstRect->Width << 16) / ScaleX +1) & ~1;
if (SrcRect->Height < 0)
SrcRect->Height = ((DstRect->Height << 16) / ScaleY +1) & ~1;
if (FX->Direction & DIR_SWAPXY)
SwapRect(SrcRect);
if (SrcRect->x + SrcRect->Width > SrcRight)
SrcRect->Width = SrcRight - SrcRect->x;
if (SrcRect->y + SrcRect->Height > SrcBottom)
SrcRect->Height = SrcBottom - SrcRect->y;
return ERR_NONE;
}
static INLINE void SurfacePtr(uint8_t** Ptr, const planes Planes, const video* Format, int BPP, int x, int y, int Pitch)
{
int Adj = (x & 1) << 1;
Ptr[0] = (uint8_t*)Planes[0] + ((x * BPP) >> 3) + y * Pitch;
if (Format->Pixel.Flags & (PF_YUV420|PF_YUV422|PF_YUV444|PF_YUV410))
{
if (Format->Pixel.Flags & PF_YUV420)
{
Ptr[1] = (uint8_t*)Planes[1] + (x >> 1) + (y >> 1) * (Pitch >> 1);
Ptr[2] = (uint8_t*)Planes[2] + (x >> 1) + (y >> 1) * (Pitch >> 1);
}
else
if (Format->Pixel.Flags & PF_YUV422)
{
Ptr[1] = (uint8_t*)Planes[1] + (x >> 1) + y * (Pitch >> 1);
Ptr[2] = (uint8_t*)Planes[2] + (x >> 1) + y * (Pitch >> 1);
}
else
if (Format->Pixel.Flags & PF_YUV444)
{
Ptr[1] = (uint8_t*)Planes[1] + x + y * Pitch;
Ptr[2] = (uint8_t*)Planes[2] + x + y * Pitch;
}
else
if (Format->Pixel.Flags & PF_YUV410)
{
Ptr[1] = (uint8_t*)Planes[1] + (x >> 2) + (y >> 2) * (Pitch >> 2);
Ptr[2] = (uint8_t*)Planes[2] + (x >> 2) + (y >> 2) * (Pitch >> 2);
}
}
else
if (Format->Pixel.Flags & PF_FOURCC)
switch (Format->Pixel.FourCC)
{
case FOURCC_IMC2:
Ptr[2] = (uint8_t*)Planes[0] + Format->Height * Pitch + (x >> 1) + (y >> 1) * Pitch;
Ptr[1] = Ptr[1] + (Pitch >> 1);
break;
case FOURCC_IMC4:
Ptr[1] = (uint8_t*)Planes[0] + Format->Height * Pitch + (x >> 1) + (y >> 1) * Pitch;
Ptr[2] = Ptr[1] + (Pitch >> 1);
break;
case FOURCC_I420:
case FOURCC_IYUV:
Ptr[1] = (uint8_t*)Planes[0] + Format->Height * Pitch + (x >> 1) + (y >> 1) * (Pitch >> 1);
Ptr[2] = Ptr[1] + ((Format->Height * Pitch) >> 2);
break;
case FOURCC_YV16:
Ptr[2] = (uint8_t*)Planes[0] + Format->Height * Pitch + (x >> 1) + y * (Pitch >> 1);
Ptr[1] = Ptr[2] + ((Format->Height * Pitch) >> 1);
break;
case FOURCC_YVU9:
Ptr[2] = (uint8_t*)Planes[0] + Format->Height * Pitch + (x >> 2) + (y >> 2) * (Pitch >> 2);
Ptr[1] = Ptr[2] + ((Format->Height * Pitch) >> 4);
break;
case FOURCC_YUV9:
Ptr[1] = (uint8_t*)Planes[0] + Format->Height * Pitch + (x >> 2) + (y >> 2) * (Pitch >> 2);
Ptr[2] = Ptr[1] + ((Format->Height * Pitch) >> 4);
break;
case FOURCC_YV12:
Ptr[2] = (uint8_t*)Planes[0] + Format->Height * Pitch + (x >> 1) + (y >> 1) * (Pitch >> 1);
Ptr[1] = Ptr[2] + ((Format->Height * Pitch) >> 2);
break;
case FOURCC_YUY2:
case FOURCC_YUNV:
case FOURCC_V422:
case FOURCC_YUYV:
Ptr[1] = Ptr[0]+1-Adj;
Ptr[2] = Ptr[0]+3-Adj;
break;
case FOURCC_YVYU:
Ptr[1] = Ptr[0]+3-Adj;
Ptr[2] = Ptr[0]+1-Adj;
break;
case FOURCC_UYVY:
case FOURCC_Y422:
case FOURCC_UYNV:
Ptr[1] = Ptr[0]-Adj;
Ptr[2] = Ptr[0]+2-Adj;
Ptr[0]++;
break;
case FOURCC_VYUY:
Ptr[2] = Ptr[0]-Adj;
Ptr[1] = Ptr[0]+2-Adj;
Ptr[0]++;
break;
}
}
void BlitImage(blitpack* Pack, const planes Dst, const constplanes Src, const constplanes SrcLast, int DstPitch, int SrcPitch)
{
uint8_t* DstPtr[MAXPLANES];
uint8_t* SrcPtr[MAXPLANES];
bool_t OnlyDiff;
int Width,Height;
blit_soft* p;
uintptr_t Src2SrcLast;
int DstStepX;
int DstStepY;
int DstX;
int DstY;
int SrcY;
// nothing to do?
if (!Pack || Pack->DstRect.Width<=0 || Pack->DstRect.Height<=0)
return;
OnlyDiff = (Pack->FX.Flags & BLITFX_ONLYDIFF) && SrcLast && SrcLast[0] != NULL;
p = &Pack->Code[OnlyDiff];
// calculate the Src and Dst pointers
// Src: always upperleft corner
// Dst: according to swapxy and mirroring
Width = Pack->DstRect.Width;
Height = Pack->DstRect.Height;
if (p->SwapXY)
SwapInt(&Width,&Height);
if (DstPitch < 0)
DstPitch = Pack->Dst.Pitch;
if (SrcPitch < 0)
SrcPitch = Pack->Src.Pitch;
SrcY = Pack->SrcRect.y;
if (p->SrcUpDown)
SrcY += Pack->SrcRect.Height-1;
SurfacePtr(SrcPtr,*(const planes*)Src,&Pack->Src,p->SrcBPP,Pack->SrcRect.x,SrcY,SrcPitch);
if (p->SrcUpDown)
SrcPitch = -SrcPitch;
Src2SrcLast = 0;
if (OnlyDiff)
Src2SrcLast = (uint8_t*)SrcLast[0] - (uint8_t*)Src[0];
DstStepX = p->DstBPP;
DstStepY = DstPitch*8;
DstX = Pack->DstRect.x;
DstY = Pack->DstRect.y;
if (p->DstLeftRight)
{
DstX += Pack->DstRect.Width-1;
DstStepX = -DstStepX;
}
if (p->DstUpDown)
{
DstY += Pack->DstRect.Height-1;
DstStepY = -DstStepY;
}
if (p->SwapXY)
SwapInt(&DstStepX,&DstStepY);
SurfacePtr(DstPtr,Dst,&Pack->Dst,p->DstBPP,DstX,DstY,DstPitch);
if (p->DstUpDown)
DstPitch = -DstPitch;
if (p->Slices)
{
const int DstBlock2 = 5;
const int DstBlock = 32;
int SrcBlock = (DstBlock * p->RScaleX) >> 4; //SrcBlock has to be even because of YUV
int DstNext = (DstBlock * DstStepX) >> 3;
int DstNextUV = DstNext >> (p->SwapXY ? p->DstUVPitch2+p->DstUVY2 : p->DstUVX2);
int SrcNext = (SrcBlock * p->SrcBPP) >> 3;
int SrcNextUV = SrcNext >> p->SrcUVX2;
if (Width > DstBlock && p->SlicesReverse) // reverse order?
{
int Quot = Width >> DstBlock2;
int Rem = Width & (DstBlock-1);
DstPtr[0] += Quot*DstNext;
DstPtr[1] += Quot*DstNextUV;
DstPtr[2] += Quot*DstNextUV;
SrcPtr[0] += Quot*SrcNext;
SrcPtr[1] += Quot*SrcNextUV;
SrcPtr[2] += Quot*SrcNextUV;
if (Rem)
{
p->Entry(p,DstPtr,SrcPtr,DstPitch,SrcPitch,Rem,Height,Src2SrcLast);
Width -= Rem;
}
DstNext = -DstNext;
DstNextUV = -DstNextUV;
SrcNext = -SrcNext;
SrcNextUV = -SrcNextUV;
DstPtr[0] += DstNext;
DstPtr[1] += DstNextUV;
DstPtr[2] += DstNextUV;
SrcPtr[0] += SrcNext;
SrcPtr[1] += SrcNextUV;
SrcPtr[2] += SrcNextUV;
}
for (;Width > DstBlock;Width -= DstBlock)
{
p->Entry(p,DstPtr,SrcPtr,DstPitch,SrcPitch,DstBlock,Height,Src2SrcLast);
DstPtr[0] += DstNext;
DstPtr[1] += DstNextUV;
DstPtr[2] += DstNextUV;
SrcPtr[0] += SrcNext;
SrcPtr[1] += SrcNextUV;
SrcPtr[2] += SrcNextUV;
}
}
p->Entry(p,DstPtr,SrcPtr,DstPitch,SrcPitch,Width,Height,Src2SrcLast);
}
void BuildPalLookUp(blit_soft* p,bool_t YUV)
{
//create a palette lookup with 3x3 bits RGB input
int a,b,c;
int Size = 1 << p->Dst.BitCount;
uint8_t* LookUp;
p->LookUp_Data = malloc(16*16*16*4);
if (p->LookUp_Data)
{
LookUp = (uint8_t*) p->LookUp_Data;
for (a=16;a<512;a+=32)
for (b=16;b<512;b+=32)
for (c=16;c<512;c+=32)
{
const rgb* q = p->DstPalette;
int BestMatch = 0;
int BestDiff = 0x7FFFFFFF;
int i,v[3];
v[0] = a;
v[1] = b;
v[2] = c;
if (v[0] >= 384) v[0] -= 512;
if (v[1] >= 384) v[1] -= 512;
if (v[2] >= 384) v[2] -= 512;
if (YUV)
{
int w[3];
w[0] = (v[0]*p->_YMul + v[2]*p->_RVMul + p->_RAdd) >> 16;
w[1] = (v[0]*p->_YMul + v[1]*p->_GUMul + v[2]*p->_GVMul + p->_GAdd) >> 16;
w[2] = (v[0]*p->_YMul + v[1]*p->_BUMul + p->_BAdd) >> 16;
v[0]=w[0];
v[1]=w[1];
v[2]=w[2];
}
for (i=0;i<Size;++i,++q)
{
int Diff = (q->c.r-v[0])*(q->c.r-v[0])+
(q->c.g-v[1])*(q->c.g-v[1])+
(q->c.b-v[2])*(q->c.b-v[2]);
if (Diff < BestDiff)
{
BestMatch = i;
BestDiff = Diff;
}
}
q = p->DstPalette + BestMatch;
if (YUV)
{
v[0] = ((2105 * q->c.r) + (4128 * q->c.g) + (802 * q->c.b))/0x2000 + 16;
v[1] = (-(1212 * q->c.r) - (2384 * q->c.g) + (3596 * q->c.b))/0x2000 + 128;
v[2] = ((3596 * q->c.r) - (3015 * q->c.g) - (582 * q->c.b))/0x2000 + 128;
v[0]=SAT(v[0]);
v[1]=SAT(v[1]);
v[2]=SAT(v[2]);
}
else
{
v[0] = q->c.r;
v[1] = q->c.g;
v[2] = q->c.b;
}
LookUp[0] = (uint8_t)BestMatch;
#if defined(SH3)
LookUp[3] = (uint8_t)(v[0] >> 1);
LookUp[1] = (uint8_t)(v[1] >> 1);
LookUp[2] = (uint8_t)(v[2] >> 1);
#else
LookUp[1] = (uint8_t)v[0];
LookUp[2] = (uint8_t)v[1];
LookUp[3] = (uint8_t)v[2];
#endif
LookUp += 4;
}
}
}
int UniversalType(const pixel* p, bool_t YUV)
{
if (PlanarYUV(p,NULL,NULL,NULL)) return YUV ? 12:10;
if (PackedYUV(p)) return YUV ? 13:11;
if (p->Flags & PF_PALETTE)
{
if (p->BitCount==1) return 1;
if (p->BitCount==2) return 2;
if (p->BitCount==4) return 3;
if (p->BitCount==8) return YUV ? 14:4;
}
if (p->Flags & PF_RGB)
{
if (p->BitCount==8) return 5;
if (p->BitCount==16) return 6;
if (p->BitCount==24) return 7;
if (p->BitCount==32) return 8;
}
return -1;
}
// !SwapXY
// !DstLeftRight
// SrcType == 12
// DstType == 12
// RScaleX == 32/16/8
// RScaleY == 32/16/8
// SrcUVX2 == DstUVX2
// SrcUVY2 == DstUVY2
static void Blit_PYUV_PYUV_2Plane(const uint8_t* Src,uint8_t* Dst,int Width,int Height,int SrcPitch,int DstPitch,int ScaleX,int ScaleY)
{
int y;
for (y=0;y<Height;++y)
{
const uint8_t* s = Src;
uint8_t* d = Dst;
uint8_t* de = Dst + Width;
switch (ScaleX)
{
case 4:
while (d<de)
{
d[0] = d[1] = d[2] = d[3] = *s;
++s;
d+=4;
}
break;
case 8:
while (d<de)
{
d[0] = *s;
d[1] = *s;
++s;
d+=2;
}
break;
case 16:
memcpy(d,s,Width);
break;
case 32:
while (d<de)
{
*d = *s;
++d;
s+=2;
}
break;
case 64:
while (d<de)
{
*d = *s;
++d;
s+=4;
}
break;
}
Dst += DstPitch;
switch (ScaleY)
{
case 4:
if ((y&3)==3) Src += SrcPitch;
break;
case 8:
if (y&1) Src += SrcPitch;
break;
case 16:
Src += SrcPitch;
break;
case 32:
Src += SrcPitch*2;
break;
case 64:
Src += SrcPitch*4;
break;
}
}
}
// needed for half/quarter software idct mode changes
static void STDCALL Blit_PYUV_PYUV_2(blit_soft* This, uint8_t** DstPtr,uint8_t** SrcPtr,int DstPitch,int SrcPitch,
int Width,int Height,uintptr_t Src2SrcLast)
{
Blit_PYUV_PYUV_2Plane(SrcPtr[0],DstPtr[0],Width,Height,SrcPitch,DstPitch,This->RScaleX,This->RScaleY);
Width >>= This->SrcUVX2;
Height >>= This->SrcUVY2;
SrcPitch >>= This->SrcUVPitch2;
DstPitch >>= This->DstUVPitch2;
Blit_PYUV_PYUV_2Plane(SrcPtr[1],DstPtr[1],Width,Height,SrcPitch,DstPitch,This->RScaleX,This->RScaleY);
Blit_PYUV_PYUV_2Plane(SrcPtr[2],DstPtr[2],Width,Height,SrcPitch,DstPitch,This->RScaleX,This->RScaleY);
}
#if defined(_M_IX86) || !defined(CONFIG_DYNCODE) || defined(BLITTEST)
// !SwapXY
// !DstLeftRight
// SrcType == 12
// DstType == 12
// RScaleX == 16 && RScaleY == 16
// SrcUVX2 == DstUVX2
static void STDCALL Blit_PYUV_PYUV(blit_soft* This, uint8_t** DstPtr,uint8_t** SrcPtr,int DstPitch,int SrcPitch,
int Width,int Height,uintptr_t Src2SrcLast)
{
uint8_t* Src[3];
uint8_t* Dst[3];
uint8_t* LookUp = This->LookUp_Data;
int UVY = 1 << This->DstUVY2;
int UVDup = 1;
int SrcPitchUV = SrcPitch >> This->SrcUVPitch2;
int DstPitchUV = DstPitch >> This->DstUVPitch2;
int WidthUV = Width >> This->SrcUVX2;
int YAdd = This->FX.Brightness;
int x,y,i;
for (i=0;i<3;++i)
{
Src[i] = SrcPtr[i];
Dst[i] = DstPtr[i];
}
// skip some UV lines?
if (This->DstUVY2 > This->SrcUVY2)
SrcPitchUV <<= This->DstUVY2 - This->SrcUVY2;
else
UVDup = 1 << (This->SrcUVY2 - This->DstUVY2);
Height >>= This->DstUVY2;
for (y=0;y<Height;++y)
{
for (i=0;i<UVY;++i)
{
if (LookUp)
{
uint8_t* s = Src[0];
uint8_t* d = Dst[0];
for (x=0;x<Width;++x,++s,++d)
*d = LookUp[*s];
}
else
if (YAdd)
{
uint8_t* s = Src[0];
uint8_t* d = Dst[0];
for (x=0;x<Width;++x,++s,++d)
{
int Y = *s + YAdd;
Y = SAT(Y);
*d = (uint8_t)Y;
}
}
else
memcpy(Dst[0],Src[0],Width);
Src[0] += SrcPitch;
Dst[0] += DstPitch;
}
for (i=0;i<UVDup;++i)
{
if (LookUp)
{
for (x=0;x<WidthUV;++x)
{
Dst[1][x] = LookUp[256+Src[1][x]];
Dst[2][x] = LookUp[512+Src[2][x]];
}
}
else
{
memcpy(Dst[1],Src[1],WidthUV);
memcpy(Dst[2],Src[2],WidthUV);
}
Dst[1] += DstPitchUV;
Dst[2] += DstPitchUV;
}
Src[1] += SrcPitchUV;
Src[2] += SrcPitchUV;
}
}
// !SwapXY
// !DstLeftRight
// SrcType == 10
// SrcUVX2 == 1
// DstType == 8 00000000rrrrrrrrggggggggbbbbbbbb
static void STDCALL Blit_PYUV_RGB32(blit_soft* This, uint8_t** DstPtr,uint8_t** SrcPtr,int DstPitch,int SrcPitch,
int Width,int Height,uintptr_t Src2SrcLast)
{
int i,x,y;
uint8_t* Src[3];
uint8_t* Dst;
int SrcStepX[16];
int SrcStepY[16];
int SrcUVY2 = This->SrcUVY2;
int SrcUVPitch2 = This->SrcUVPitch2;
int YAdd = This->FX.Brightness;
int sy0;
int sy=0;
for (i=0;i<3;++i)
Src[i] = SrcPtr[i];
Dst = DstPtr[0];
for (i=0;i<16;++i)
{
SrcStepX[i] = ((This->RScaleX * (i+1)) >> 4) - ((This->RScaleX * i) >> 4);
SrcStepY[i] = ((This->RScaleY * (i+1)) >> 4) - ((This->RScaleY * i) >> 4);
}
for (y=0;y<Height;++y)
{
uint8_t* dx = Dst;
int sx = 0;
for (x=0;x<Width;++x)
{
int cR,cG,cB;
int sx2 = sx>>1;
cR = ((Src[0][sx]+YAdd-16)*0x2568 + 0x3343*(Src[2][sx2]-128)) /0x2000;
cG = ((Src[0][sx]+YAdd-16)*0x2568 - 0x0c92*(Src[1][sx2]-128) - 0x1a1e*(Src[2][sx2]-128)) /0x2000;
cB = ((Src[0][sx]+YAdd-16)*0x2568 + 0x40cf*(Src[1][sx2]-128)) /0x2000;
cR = SAT(cR);
cG = SAT(cG);
cB = SAT(cB);
do
{
dx[0]=(uint8_t)cB;
dx[1]=(uint8_t)cG;
dx[2]=(uint8_t)cR;
dx+=4;
} while (SrcStepX[x&15]==0 && ++x<Width);
sx += SrcStepX[x&15];
}
Dst += DstPitch;
while (SrcStepY[y&15]==0 && ++y<Height)
{
memcpy(Dst,Dst-DstPitch,4*Width);
Dst += DstPitch;
}
sy0 = sy;
sy += SrcStepY[y&15];
Src[0] += SrcPitch * (sy - sy0);
Src[1] += (SrcPitch >> SrcUVPitch2) * ((sy >> SrcUVY2) - (sy0 >> SrcUVY2));
Src[2] += (SrcPitch >> SrcUVPitch2) * ((sy >> SrcUVY2) - (sy0 >> SrcUVY2));
}
}
// !SwapXY
// !DstLeftRight
// SrcType == 10
// SrcUVX2 == 1
// DstType == 6 rrrrrggggggbbbbb
static void STDCALL Blit_PYUV_RGB16(blit_soft* This, uint8_t** DstPtr,uint8_t** SrcPtr,int DstPitch,int SrcPitch,
int Width,int Height,uintptr_t Src2SrcLast)
{
int i,x,y;
uint8_t* Src[3];
uint8_t* Dst;
int SrcStepX[16];
int SrcStepY[16];
int SrcUVY2 = This->SrcUVY2;
int SrcUVPitch2 = This->SrcUVPitch2;
int YAdd = This->FX.Brightness;
int sy0;
int sy=0;
for (i=0;i<3;++i)
Src[i] = SrcPtr[i];
Dst = DstPtr[0];
for (i=0;i<16;++i)
{
SrcStepX[i] = ((This->RScaleX * (i+1)) >> 4) - ((This->RScaleX * i) >> 4);
SrcStepY[i] = ((This->RScaleY * (i+1)) >> 4) - ((This->RScaleY * i) >> 4);
}
for (y=0;y<Height;++y)
{
uint16_t* dx = (uint16_t*)Dst;
int sx = 0;
for (x=0;x<Width;++x)
{
int cR,cG,cB;
int sx2 = sx>>1;
cR = ((Src[0][sx]+YAdd-16)*0x2568 + 0x3343*(Src[2][sx2]-128)) /0x2000;
cG = ((Src[0][sx]+YAdd-16)*0x2568 - 0x0c92*(Src[1][sx2]-128) - 0x1a1e*(Src[2][sx2]-128)) /0x2000;
cB = ((Src[0][sx]+YAdd-16)*0x2568 + 0x40cf*(Src[1][sx2]-128)) /0x2000;
cR = SAT(cR);
cG = SAT(cG);
cB = SAT(cB);
do
{
*dx = (uint16_t)(((cR << 8)&0xF800)|((cG << 3)&0x07E0)|(cB >> 3));
++dx;
} while (SrcStepX[x&15]==0 && ++x<Width);
sx += SrcStepX[x&15];
}
Dst += DstPitch;
while (SrcStepY[y&15]==0 && ++y<Height)
{
memcpy(Dst,Dst-DstPitch,2*Width);
Dst += DstPitch;
}
sy0 = sy;
sy += SrcStepY[y&15];
Src[0] += SrcPitch * (sy - sy0);
Src[1] += (SrcPitch >> SrcUVPitch2) * ((sy >> SrcUVY2) - (sy0 >> SrcUVY2));
Src[2] += (SrcPitch >> SrcUVPitch2) * ((sy >> SrcUVY2) - (sy0 >> SrcUVY2));
}
}
static void STDCALL BlitUniversal(blit_soft* This, uint8_t** DstPtr,uint8_t** SrcPtr,int DstPitch,int SrcPitch,
int Width,int Height,uintptr_t Src2SrcLast)
{
//this will be very-very slow, only for compability and testing
int i,j,x,y;
uint8_t* Src[3];
uint8_t* Dst[3];
int SrcType = This->SrcType;
int DstType = This->DstType;
int DstMask[3],DstPos[3];
int SrcMask[3],SrcPos[3];
int DitherMask[3];
uint8_t* PalLookUp = (uint8_t*)This->LookUp_Data;
uint8_t* wp;
int Flags = This->FX.Flags;
bool_t Dither = (Flags & BLITFX_DITHER) != 0 && !(This->Dst.Flags & PF_FOURCC);
bool_t PalDither = Dither && (This->Dst.Flags & PF_PALETTE);
const rgb* SrcPalette = This->SrcPalette;
int SrcStepX[16];
int SrcStepY[16];
int RScaleX = This->RScaleX;
int RScaleY = This->RScaleY;
int SrcUVPitch2 = This->SrcUVPitch2;
int SrcUVX2 = This->SrcUVX2;
int SrcUVY2 = This->SrcUVY2;
int DstUVX2 = This->DstUVX2;
int DstUVY2 = This->DstUVY2;
int DstStepX = This->DstBPP;
int DstStepX2 = This->DstBPP;
int DstStepY = DstPitch << 3;
int DstStepY2 = DstStepY >> This->DstUVPitch2;
int dy = 0;
int dy2 = 0;
int cR,cG,cB;
int Y,U,V;
int sy0;
int sy=0;
int YAdd = This->FX.Brightness;
uint32_t SrcInvert = 0;
uint32_t DstInvert = 0;
SrcMask[0] = This->Src.BitMask[0];
SrcMask[1] = This->Src.BitMask[1];
SrcMask[2] = This->Src.BitMask[2];
DstMask[0] = This->Dst.BitMask[0];
DstMask[1] = This->Dst.BitMask[1];
DstMask[2] = This->Dst.BitMask[2];
for (i=0;i<3;++i)
{
DitherMask[i] = Dither ? (1 << (8 - BitMaskSize(DstMask[i]))) - 1 : 0;
SrcPos[i] = BitMaskPos(SrcMask[i]) + BitMaskSize(SrcMask[i]);
DstPos[i] = BitMaskPos(DstMask[i]) + BitMaskSize(DstMask[i]);
SrcMask[i] <<= 8;
DstMask[i] <<= 8;
Src[i] = SrcPtr[i];
Dst[i] = DstPtr[i];
}
if (This->DstPalette)
{
DstPos[0] = 11;
DstPos[1] = 7;
DstPos[2] = 3;
DstMask[0] = 0xF0000;
DstMask[1] = 0x0F000;
DstMask[2] = 0x00F00;
}
for (j=0;j<16;++j)
{
SrcStepX[j] = ((RScaleX * (j+1)) >> 4) - ((RScaleX * j) >> 4);
SrcStepY[j] = ((RScaleY * (j+1)) >> 4) - ((RScaleY * j) >> 4);
}
if (This->DstLeftRight)
{
DstStepX = -DstStepX;
DstStepX2 = -DstStepX2;
if (This->DstBPP < 8)
dy = 8 - This->DstBPP;
}
if (This->SwapXY)
{
SwapInt(&DstUVX2,&DstUVY2);
SwapInt(&DstStepX,&DstStepY);
SwapInt(&DstStepX2,&DstStepY2);
}
cR = (DitherMask[0] >> 1);
cG = (DitherMask[1] >> 1);
cB = (DitherMask[2] >> 1);
Y=U=V=0;
if (This->Src.Flags & PF_INVERTED)
SrcInvert = (This->Src.BitCount>=32?0:(1 << This->Src.BitCount))-1;
if (This->Dst.Flags & PF_INVERTED)
DstInvert = (This->Dst.BitCount>=32?0:(1 << This->Dst.BitCount))-1;
for (y=0;y<Height;++y)
{
int dx=dy;
int dx2=dy2;
int sx=0;
for (x=0;x<Width;sx+=SrcStepX[x&15],++x,dx+=DstStepX)
{
uint8_t* q;
const rgb* p;
uint32_t v,w;
switch (SrcType)
{
case 10: //Planar YUV->RGB
cR += ((Src[0][sx]+YAdd-16)*0x2568 + 0x3343*(Src[2][sx>>SrcUVX2]-128)) /0x2000;
cG += ((Src[0][sx]+YAdd-16)*0x2568 - 0x0c92*(Src[1][sx>>SrcUVX2]-128) - 0x1a1e*(Src[2][sx>>SrcUVX2]-128)) /0x2000;
cB += ((Src[0][sx]+YAdd-16)*0x2568 + 0x40cf*(Src[1][sx>>SrcUVX2]-128)) /0x2000;
cR=SAT(cR);
cG=SAT(cG);
cB=SAT(cB);
break;
case 11: //Packed YUV->RGB
cR += ((Src[0][sx*2]+YAdd-16)*0x2568 + 0x3343*(Src[2][4*(sx>>1)]-128)) /0x2000;
cG += ((Src[0][sx*2]+YAdd-16)*0x2568 - 0x0c92*(Src[1][4*(sx>>1)]-128) - 0x1a1e*(Src[2][4*(sx>>1)]-128)) /0x2000;
cB += ((Src[0][sx*2]+YAdd-16)*0x2568 + 0x40cf*(Src[1][4*(sx>>1)]-128)) /0x2000;
cR=SAT(cR);
cG=SAT(cG);
cB=SAT(cB);
break;
case 12: //Planar YUV->YUV
Y += Src[0][sx];
U += Src[1][sx>>SrcUVX2];
V += Src[2][sx>>SrcUVX2];
//Y=SAT(Y);
//U=SAT(U);
//V=SAT(V);
break;
case 13: //Packed YUV->YUV
Y += Src[0][sx*2];
U += Src[1][4*(sx>>1)];
V += Src[2][4*(sx>>1)];
//Y=SAT(Y);
//U=SAT(U);
//V=SAT(V);
break;
case 1: //Pal1->RGB
p = &SrcPalette[ ((Src[0][sx>>3] >> ((~sx)&7)) & 1) ^ SrcInvert];
cR += p->c.r; cG += p->c.g; cB += p->c.b;
cR=SAT(cR);
cG=SAT(cG);
cB=SAT(cB);
break;
case 2: //Pal2->RGB
p = &SrcPalette[ ((Src[0][sx>>2] >> (((~sx)&3)*2)) & 3) ^ SrcInvert];
cR += p->c.r; cG += p->c.g; cB += p->c.b;
cR=SAT(cR);
cG=SAT(cG);
cB=SAT(cB);
break;
case 3: //Pal4->RGB
p = &SrcPalette[ ((Src[0][sx>>1] >> (((~sx)&1)*4)) & 15) ^ SrcInvert];
cR += p->c.r; cG += p->c.g; cB += p->c.b;
cR=SAT(cR);
cG=SAT(cG);
cB=SAT(cB);
break;
case 4: //Pal8->RGB
p = &SrcPalette[Src[0][sx] ^ SrcInvert];
cR += p->c.r; cG += p->c.g; cB += p->c.b;
cR=SAT(cR);
cG=SAT(cG);
cB=SAT(cB);
break;
case 5: //RGB8->RGB
v = Src[0][sx];
v ^= SrcInvert;
v <<= 8;
cR += (v & SrcMask[0]) >> SrcPos[0];
cG += (v & SrcMask[1]) >> SrcPos[1];
cB += (v & SrcMask[2]) >> SrcPos[2];
cR=SAT(cR);
cG=SAT(cG);
cB=SAT(cB);
break;
case 6: //RGB16->RGB
v = ((uint16_t*)Src[0])[sx];
v ^= SrcInvert;
v <<= 8;
cR += (v & SrcMask[0]) >> SrcPos[0];
cG += (v & SrcMask[1]) >> SrcPos[1];
cB += (v & SrcMask[2]) >> SrcPos[2];
cR=SAT(cR);
cG=SAT(cG);
cB=SAT(cB);
break;
case 7: //RGB24->RGB
v = Src[0][sx*3] | (Src[0][sx*3+1] << 8) | (Src[0][sx*3+2] << 16);
v ^= SrcInvert;
v <<= 8;
cR += (v & SrcMask[0]) >> SrcPos[0];
cG += (v & SrcMask[1]) >> SrcPos[1];
cB += (v & SrcMask[2]) >> SrcPos[2];
cR=SAT(cR);
cG=SAT(cG);
cB=SAT(cB);
break;
default: //RGB32->RGB
v = ((uint32_t*)Src[0])[sx];
v ^= SrcInvert;
v <<= 8;
cR += (v & SrcMask[0]) >> SrcPos[0];
cG += (v & SrcMask[1]) >> SrcPos[1];
cB += (v & SrcMask[2]) >> SrcPos[2];
cR=SAT(cR);
cG=SAT(cG);
cB=SAT(cB);
break;
}
q = Dst[0]+(dx >> 3);
switch (DstType)
{
case 10: //RGB->Planar YUV
Y = ((2105 * cR) + (4128 * cG) + (802 * cB))/0x2000 + 16;
U = (-(1212 * cR) - (2384 * cG) + (3596 * cB))/0x2000 + 128;
V = ((3596 * cR) - (3015 * cG) - (582 * cB))/0x2000 + 128;
*q = (uint8_t)Y;
Dst[1][dx2 >> 3] = (uint8_t)U;
Dst[2][dx2 >> 3] = (uint8_t)V;
if ((x & 1) || DstUVX2==0)
dx2+=DstStepX2;
cR=cG=cB=0;
break;
case 11: //RGB->Packed YUV
Y = ((2105 * cR) + (4128 * cG) + (802 * cB))/0x2000 + 16;
U = (-(1212 * cR) - (2384 * cG) + (3596 * cB))/0x2000 + 128;
V = ((3596 * cR) - (3015 * cG) - (582 * cB))/0x2000 + 128;
Y=SAT(Y);
U=SAT(U);
V=SAT(V);
*q = (uint8_t)Y;
Dst[1][4*(dx >> 5)] = (uint8_t)U;
Dst[2][4*(dx >> 5)] = (uint8_t)V;
cR=cG=cB=0;
break;
case 12: //YUV->Planar YUV
Y += YAdd;
Y=SAT(Y);
*q = (uint8_t)Y;
Dst[1][dx2 >> 3] = (uint8_t)U;
Dst[2][dx2 >> 3] = (uint8_t)V;
if ((x & 1) || DstUVX2==0)
dx2+=DstStepX2;
Y=U=V=0;
break;
case 13: //YUV->Packed YUV
Y += YAdd;
Y=SAT(Y);
*q = (uint8_t)Y;
Dst[1][4*(dx >> 5)] = (uint8_t)U;
Dst[2][4*(dx >> 5)] = (uint8_t)V;
Y=U=V=0;
break;
case 1: //RGB->Pal1
w = ((cR << DstPos[0]) & DstMask[0]) |
((cG << DstPos[1]) & DstMask[1]) |
((cB << DstPos[2]) & DstMask[2]);
wp = PalLookUp + (w >> 8)*4;
w = wp[0];
w ^= DstInvert;
*q &= ~(1 << ((~dx)&7));
*q |= (w << ((~dx)&7));
if (PalDither)
{
cR -= wp[1];
cG -= wp[2];
cB -= wp[3];
}
else
cR=cG=cB=0;
break;
case 2: //RGB->Pal2
w = ((cR << DstPos[0]) & DstMask[0]) |
((cG << DstPos[1]) & DstMask[1]) |
((cB << DstPos[2]) & DstMask[2]);
wp = PalLookUp + (w >> 8)*4;
w = wp[0];
w ^= DstInvert;
*q &= ~(3 << ((~dx)&6));
*q |= (w << ((~dx)&6));
if (PalDither)
{
cR -= wp[1];
cG -= wp[2];
cB -= wp[3];
}
else
cR=cG=cB=0;
break;
case 3: //RGB->Pal4
w = ((cR << DstPos[0]) & DstMask[0]) |
((cG << DstPos[1]) & DstMask[1]) |
((cB << DstPos[2]) & DstMask[2]);
wp = PalLookUp + (w >> 8)*4;
w = wp[0];
w ^= DstInvert;
*q &= ~(15 << ((~dx)&4));
*q |= (w << ((~dx)&4));
if (PalDither)
{
cR -= wp[1];
cG -= wp[2];
cB -= wp[3];
}
else
cR=cG=cB=0;
break;
case 14: //YUV->Pal8
w = ((Y << DstPos[0]) & DstMask[0]) |
((U << DstPos[1]) & DstMask[1]) |
((V << DstPos[2]) & DstMask[2]);
wp = PalLookUp + (w >> 8)*4;
w = wp[0];
w ^= DstInvert;
*q = (uint8_t)w;
if (PalDither)
{
Y -= wp[1];
U -= wp[2];
V -= wp[3];
}
else
Y=U=V=0;
break;
case 4: //RGB->Pal8
w = ((cR << DstPos[0]) & DstMask[0]) |
((cG << DstPos[1]) & DstMask[1]) |
((cB << DstPos[2]) & DstMask[2]);
wp = PalLookUp + (w >> 8)*4;
w = wp[0];
w ^= DstInvert;
*q = (uint8_t)w;
if (PalDither)
{
cR -= wp[1];
cG -= wp[2];
cB -= wp[3];
}
else
cR=cG=cB=0;
break;
case 5: //RGB->RGB8
w = ((cR << DstPos[0]) & DstMask[0]) |
((cG << DstPos[1]) & DstMask[1]) |
((cB << DstPos[2]) & DstMask[2]);
*q = (uint8_t)((w >> 8) ^ DstInvert);
cR &= DitherMask[0];
cG &= DitherMask[1];
cB &= DitherMask[2];
break;
case 6: //RGB->RGB16
w = ((cR << DstPos[0]) & DstMask[0]) |
((cG << DstPos[1]) & DstMask[1]) |
((cB << DstPos[2]) & DstMask[2]);
*(uint16_t*)q = (uint16_t)((w >> 8) ^ DstInvert);
cR &= DitherMask[0];
cG &= DitherMask[1];
cB &= DitherMask[2];
break;
case 7: //RGB->RGB24
w = ((cR << DstPos[0]) & DstMask[0]) |
((cG << DstPos[1]) & DstMask[1]) |
((cB << DstPos[2]) & DstMask[2]);
w >>= 8;
w ^= DstInvert;
q[0] = (uint8_t)(w);
q[1] = (uint8_t)(w >> 8);
q[2] = (uint8_t)(w >> 16);
cR &= DitherMask[0];
cG &= DitherMask[1];
cB &= DitherMask[2];
break;
default: //RGB->RGB32
w = ((cR << DstPos[0]) & DstMask[0]) |
((cG << DstPos[1]) & DstMask[1]) |
((cB << DstPos[2]) & DstMask[2]);
*(uint32_t*)q = (w >> 8) ^ DstInvert;
cR &= DitherMask[0];
cG &= DitherMask[1];
cB &= DitherMask[2];
break;
}
}
dy += DstStepY;
if ((y & 1) || DstUVY2==0)
dy2 += DstStepY2;
sy0 = sy;
sy += SrcStepY[y&15];
Src[0] += SrcPitch * (sy - sy0);
Src[1] += (SrcPitch >> SrcUVPitch2) * ((sy >> SrcUVY2) - (sy0 >> SrcUVY2));
Src[2] += (SrcPitch >> SrcUVPitch2) * ((sy >> SrcUVY2) - (sy0 >> SrcUVY2));
}
}
#endif
static bool_t BlitCompile(blit_soft* p,
const pixel* NewDst,const pixel* NewSrc,
const blitfx* NewFX,bool_t NewOnlyDiff)
{
int i;
bool_t Gray,SrcPlanarYUV,DstPlanarYUV;
if (EqPixel(&p->Dst,NewDst) &&
EqPixel(&p->Src,NewSrc) &&
EqBlitFX(&p->FX,NewFX) &&
NewOnlyDiff == p->OnlyDiff)
return p->Entry != NULL;
CodeStart(&p->Code);
// defaults
p->Caps = VC_BRIGHTNESS|VC_DITHER|VC_SATURATION|VC_CONTRAST|VC_RGBADJUST;
p->SrcAlignPos = 2;
p->DstAlignPos = 2;
p->DstAlignSize = 2;
p->Dst = *NewDst;
p->Src = *NewSrc;
p->FX = *NewFX;
p->OnlyDiff = (boolmem_t)NewOnlyDiff;
p->SwapXY = (boolmem_t)((p->FX.Direction & DIR_SWAPXY) != 0);
p->DstLeftRight = (boolmem_t)((p->FX.Direction & DIR_MIRRORLEFTRIGHT) != 0);
p->DstUpDown = (boolmem_t)((p->FX.Direction & DIR_MIRRORUPDOWN) != 0);
p->SrcUpDown = (boolmem_t)((p->FX.Flags & BLITFX_AVOIDTEARING) && !p->SwapXY && p->DstUpDown != ((p->FX.Flags & BLITFX_VMEMUPDOWN) != 0));
if (p->SrcUpDown)
p->DstUpDown = (boolmem_t)!p->DstUpDown;
// it's faster using slices with rotation (not just with AVOIDTEARING)
// probably because of ram page trashing (during vertical writing)
p->Slices = (boolmem_t)(p->SwapXY != ((p->FX.Flags & BLITFX_VMEMROTATED) != 0));
p->SlicesReverse = (boolmem_t)((p->SwapXY ? p->DstUpDown : p->DstLeftRight) == ((p->FX.Flags & BLITFX_VMEMUPDOWN) != 0));
Gray = (p->Dst.Flags & PF_PALETTE) && (p->Dst.BitCount == 4 || p->Dst.BitCount == 2);
p->RScaleX = CalcRScale(p->FX.ScaleX,Gray);
p->RScaleY = CalcRScale(p->FX.ScaleY,Gray);
// important these integeres should be 1 or 0
p->DstHalfX = p->SrcHalfX = p->RScaleX == 32;
p->DstHalfY = p->SrcHalfY = p->RScaleY == 32;
p->DstDoubleX = p->SrcDoubleX = p->RScaleX == 8;
p->DstDoubleY = p->SrcDoubleY = p->RScaleY == 8;
if (p->SwapXY)
{
SwapInt(&p->DstHalfX,&p->DstHalfY);
SwapInt(&p->DstDoubleX,&p->DstDoubleY);
}
p->SrcBPP = GetBPP(&p->Src);
p->SrcBPP2 = -3;
for (i=p->SrcBPP;i>1;i>>=1)
++p->SrcBPP2;
p->DstBPP = GetBPP(&p->Dst);
p->DstBPP2 = -3;
for (i=p->DstBPP;i>1;i>>=1)
++p->DstBPP2;
p->SrcYUV = (boolmem_t)AnyYUV(&p->Src);
p->SrcPalette = DefaultPal(&p->Src);
p->DstPalette = DefaultPal(&p->Dst);
free(p->LookUp_Data);
p->LookUp_Data = NULL;
p->ColorLookup = (boolmem_t)((p->FX.Flags & BLITFX_COLOR_LOOKUP) != 0);
#ifdef ARM
p->ARM5 = (boolmem_t)((QueryPlatform(PLATFORM_CAPS) & CAPS_ARM_5E)!=0);
p->WMMX = (boolmem_t)((QueryPlatform(PLATFORM_CAPS) & CAPS_ARM_WMMX)!=0 &&
!QueryAdvanced(ADVANCED_NOWMMX) && (p->Dst.Flags & PF_16ALIGNED) && (p->Src.Flags & PF_16ALIGNED));
p->QAdd = (boolmem_t)((QueryPlatform(PLATFORM_CAPS) & CAPS_ARM_5E)!=0 &&
!p->DstPalette && !p->WMMX && !(p->FX.Flags & BLITFX_DITHER) && !p->ColorLookup);
#endif
CalcColor(p);
if (p->DstPalette)
BuildPalLookUp(p,p->SrcYUV);
p->ArithStretch = (boolmem_t)((p->FX.Flags & BLITFX_ARITHSTRETCHALWAYS) != 0);
if ((p->FX.Flags & BLITFX_ARITHSTRETCH50) && p->RScaleX==32 && p->RScaleY==32)
p->ArithStretch = 1;
if (p->DstPalette)
p->ArithStretch = 0;
SrcPlanarYUV = PlanarYUV(&p->Src,&p->SrcUVX2,&p->SrcUVY2,&p->SrcUVPitch2);
DstPlanarYUV = PlanarYUV(&p->Dst,&p->DstUVX2,&p->DstUVY2,&p->DstUVPitch2);
p->DirX = p->DstLeftRight ? -1:1;
for (i=0;i<3;++i)
{
p->DstSize[i] = BitMaskSize(p->Dst.BitMask[i]);
p->DstPos[i] = BitMaskPos(p->Dst.BitMask[i]);
p->SrcSize[i] = BitMaskSize(p->Src.BitMask[i]);
p->SrcPos[i] = BitMaskPos(p->Src.BitMask[i]);
}
#if defined(ARM)
if ((p->Dst.Flags & PF_RGB) && p->Dst.BitCount==16 && !p->SrcYUV)
Any_RGB_RGB(p);
if (!p->SrcYUV && DstPlanarYUV && p->RScaleX==16 && p->RScaleY==16)
Fix_Any_YUV(p);
#endif
#if (defined(ARM) || defined(SH3) || defined(MIPS)) && defined(CONFIG_DYNCODE)
if (SrcPlanarYUV)
{
#if defined(ARM)
if (DstPlanarYUV && p->RScaleX==16 && p->RScaleY==16 &&
((p->SrcUVX2==p->DstUVX2 && p->SrcUVY2==p->DstUVY2 && !(p->SwapXY && p->SrcUVX2 != p->SrcUVY2)) ||
(p->DstUVX2==1 && p->DstUVY2==1)))
Fix_Any_YUV(p);
if (PackedYUV(&p->Dst) && p->RScaleX==16 && p->RScaleY==16 && p->SrcUVY2<2 && p->SrcUVX2<2)
Fix_PackedYUV_YUV(p);
#endif
if ((p->Dst.Flags & (PF_RGB|PF_PALETTE)) && (p->Dst.BitCount == 8 || p->Dst.BitCount == 16 || p->Dst.BitCount == 32))
{
#if defined(ARM)
if (p->Dst.BitCount == 16 && p->WMMX && PlanarYUV420(&p->Src) &&
(p->RScaleX==16 || p->RScaleX==8 || p->RScaleX==32) && (p->RScaleY==16 || p->RScaleY==8 || p->RScaleY==32))
WMMXFix_RGB_UV(p);
else
if ((p->Dst.BitCount == 16 || p->Dst.BitCount==32) && p->SrcUVX2==1 && p->SrcUVY2==1 && p->RScaleX == 16 && p->RScaleY == 16)
Fix_RGB_UV(p);
else
if (p->Dst.BitCount == 16 && p->SrcUVX2==1 && p->SrcUVY2==1 &&
(p->RScaleX == 8 || p->RScaleX == 16) &&
(p->RScaleY == 8 || p->RScaleY == 16) && !p->ArithStretch)
Fix_RGB_UV(p);
else
if (p->Dst.BitCount == 16 && p->SrcUVX2==1 && p->SrcUVY2==1 && p->RScaleX == 32 && p->RScaleY == 32)
Half_RGB_UV(p);
else
Stretch_RGB_UV(p);
#else
#if !defined(MIPS)
if (p->SrcUVX2==1 && p->SrcUVY2==1 &&
(p->RScaleX == 8 || p->RScaleX == 16) &&
(p->RScaleY == 8 || p->RScaleY == 16))
#endif
Fix_RGB_UV(p);
#endif
}
else
if (Gray)
Fix_Gray_UV(p);
}
#endif
CodeBuild(&p->Code);
if (p->Code.Size)
p->Entry = (blitsoftentry)p->Code.Code;
else
p->Entry = NULL;
#if defined(_M_IX86) && !defined(TARGET_SYMBIAN)
if (p->FX.Direction==0 && p->RScaleX==16 && p->RScaleY==16)
{
uint32_t In = DefFourCC(&p->Src);
uint32_t Out = DefFourCC(&p->Dst);
#ifdef CONFIG_CONTEXT
int Caps = QueryPlatform(PLATFORM_CAPS);
#else
int Caps = CPUCaps();
#endif
const blitmmx* i;
for (i=BlitMMX;i->In;++i)
if (i->In==In && i->Out==Out)
{
p->Caps &= ~VC_DITHER;
if (AnyYUV(&p->Src))
CalcYUVMMX(p);
if (Caps & CAPS_X86_MMX2)
p->Entry = i->Func[1];
else if (Caps & CAPS_X86_3DNOW)
p->Entry = i->Func[2];
else
p->Entry = i->Func[0];
break;
}
}
#endif
if (!p->Entry)
{
// use YUV internal calulaction?
bool_t YUV = (AnyYUV(&p->Dst) ||
(p->Dst.BitCount==8 && p->Dst.Flags & PF_PALETTE)) && p->SrcYUV;
p->DstType = UniversalType(&p->Dst,YUV);
p->SrcType = UniversalType(&p->Src,YUV);
#if defined(_M_IX86) || !defined(CONFIG_DYNCODE) || defined(BLITTEST)
if (p->SrcType>=0 && p->DstType>=0)
{
// universal
if (AnyYUV(&p->Dst))
p->Caps &= ~VC_DITHER;
else
p->Caps = VC_BRIGHTNESS | VC_DITHER;
p->Entry = BlitUniversal;
}
if (p->SrcUVX2 == 1 && !p->SwapXY && !p->DstLeftRight && p->SrcType == 10)
{
if (p->DstType == 8 &&
p->Dst.BitMask[0] == 0xFF0000 &&
p->Dst.BitMask[1] == 0x00FF00 &&
p->Dst.BitMask[2] == 0x0000FF)
{
p->Caps = VC_BRIGHTNESS;
p->Entry = Blit_PYUV_RGB32;
}
if (p->DstType == 6 &&
p->Dst.BitMask[0] == 0xF800 &&
p->Dst.BitMask[1] == 0x07E0 &&
p->Dst.BitMask[2] == 0x001F)
{
p->Caps = VC_BRIGHTNESS;
p->Entry = Blit_PYUV_RGB16;
}
}
if (p->SrcUVX2 == p->DstUVX2 && !p->SwapXY && !p->DstLeftRight &&
p->RScaleX == 16 && p->RScaleY==16 &&
p->SrcType == 12 && p->DstType == 12)
{
CalcYUVLookUp(p);
p->Caps &= ~VC_DITHER;
p->Entry = Blit_PYUV_PYUV;
}
#endif
if (p->SrcUVX2 == p->DstUVX2 &&
p->SrcUVY2 == p->DstUVY2 && !p->SwapXY && !p->DstLeftRight &&
(p->RScaleX != 16 || p->RScaleY != 16) &&
(p->RScaleX == 16 || p->RScaleX == 8 || p->RScaleX == 32 || p->RScaleX == 4 || p->RScaleX == 64) &&
(p->RScaleY == 16 || p->RScaleY == 8 || p->RScaleY == 32 || p->RScaleX == 4 || p->RScaleX == 64) &&
p->SrcType == 12 && p->DstType == 12 &&
!p->FX.Saturation && !p->FX.Contrast && !p->FX.RGBAdjust[0] &&
!p->FX.RGBAdjust[1] && !p->FX.RGBAdjust[2] && !p->FX.Brightness)
{
p->Caps = 0;
p->Entry = Blit_PYUV_PYUV_2;
}
}
return p->Entry != NULL;
}
blitpack* BlitCreate(const video* Dst,
const video* Src, const blitfx* FX, int* OutCaps)
{
blitfx CopyFX;
bool_t Gray;
blitpack* p = BlitAlloc();
if (!p) return NULL;
if (!FX)
{
memset(&CopyFX,0,sizeof(CopyFX));
CopyFX.ScaleX = SCALE_ONE;
CopyFX.ScaleY = SCALE_ONE;
FX = &CopyFX;
}
if (!BlitCompile(&p->Code[0],&Dst->Pixel,&Src->Pixel,FX,0) ||
((FX->Flags & BLITFX_ONLYDIFF) && !BlitCompile(&p->Code[1],&Dst->Pixel,&Src->Pixel,FX,1)))
{
BlitRelease(p);
return NULL;
}
p->FX = *FX;
p->Dst = *Dst;
p->Src = *Src;
Gray = (Dst->Pixel.Flags & PF_PALETTE) &&
(Dst->Pixel.BitCount == 4 || Dst->Pixel.BitCount == 2);
p->RScaleX = CalcRScale(FX->ScaleX,Gray);
p->RScaleY = CalcRScale(FX->ScaleY,Gray);
if (OutCaps)
*OutCaps = p->Code[0].Caps;
return p;
}
static NOINLINE int CMul(blit_soft* p, int64_t* r, int64_t v, bool_t UV)
{
int m;
if (UV)
{
m = p->FX.Saturation;
if (m<0) m >>= 1; // adjust negtive interval: -128..0 -> -64..0
m = 4*m+256;
v *= m;
if (v<0)
v -= 128;
else
v += 128;
v >>= 8;
}
m = p->FX.Contrast;
if (m<0) m >>= 1; // adjust negtive interval: -128..0 -> -64..0
m = 4*m + 256;
v *= m;
if (v<0)
v -= 128;
else
v += 128;
v >>= 8;
if (r) *r = v;
return (int)v;
}
static NOINLINE int CAdd(blit_soft* p, int64_t* r, int64_t v, int v0, int vUV)
{
int m;
m = p->FX.Saturation;
if (m<0) m >>= 1; // adjust negtive interval: -128..0 -> -64..0
m = 4*m+256;
v += ((256 - m) * vUV) >> 1;
m = p->FX.Contrast;
if (m<0) m >>= 1; // adjust negtive interval: -128..0 -> -64..0
m = 4*m + 256;
v += ((256 - m)*(v0 - v)) >> 8;
if (r) *r = v;
return (int)v;
}
static INLINE bool_t InRange32(int64_t v)
{
return v >= -MAX_INT && v <= MAX_INT;
}
static INLINE uint8_t RotateRight8(int v,int Bits)
{
Bits &= 7;
return (uint8_t)(((v >> Bits) & ((1 << (8-Bits))-1)) | (v << (8-Bits)));
}
void CalcPalYUVLookUp(blit_soft* p)
{
p->LookUp_Size = 256+4+256+256;
p->LookUp_Data = malloc(p->LookUp_Size);
if (p->LookUp_Data)
{
int v,i,n = 1<<p->Src.BitCount;
rgb* Pal = p->Src.Palette;
uint8_t* LookUp = p->LookUp_Data;
for (i=0;i<n;++i)
{
v = (2105*Pal[i].c.r + 4128*Pal[i].c.g + 802*Pal[i].c.b)/0x2000 + 16 + p->FX.Brightness;
LookUp[i] = (uint8_t)SAT(v);
v = (-1212*Pal[i].c.r -2384*Pal[i].c.g + 3596*Pal[i].c.b)/0x2000 + 128;
LookUp[i+256+4] = (uint8_t)SAT(v);
v = (3596*Pal[i].c.r -3015*Pal[i].c.g -582*Pal[i].c.b)/0x2000 + 128;
LookUp[i+256+4+256] = (uint8_t)SAT(v);
}
}
}
void CalcPalRGBLookUp(blit_soft* p)
{
int i,n = 1<<p->Src.BitCount;
int w = p->Dst.BitCount>>3;
if (w<2 || w>4) return;
if (w==3) w=4;
p->LookUp_Size = w*n;
p->LookUp_Data = malloc(p->LookUp_Size);
if (p->LookUp_Data)
{
uint16_t* Pal = p->LookUp_Data;
for (i=0;i<n;++i)
{
uint32_t c = RGBToFormat(p->Src.Palette[i].v,&p->Dst);
if (w==2)
*(Pal++) = (uint16_t)c;
else
{
*(uint32_t*)Pal = c;
Pal += 2;
}
}
}
}
void CalcYUVLookUp(blit_soft* p)
{
if (p->FX.Saturation || p->FX.Contrast || (p->FX.Flags & BLITFX_DITHER) ||
p->FX.RGBAdjust[0] || p->FX.RGBAdjust[1] || p->FX.RGBAdjust[2])
{
p->LookUp_Size = 3*256+4;
p->LookUp_Data = malloc(p->LookUp_Size);
if (p->LookUp_Data)
{
int Ofs = (p->FX.Flags & BLITFX_DITHER)?-2:0;
int n;
uint8_t* i=p->LookUp_Data;
for (n=0;n<256+4;++n,++i)
{
int y = CMul(p,NULL,n+Ofs+p->FX.Brightness+p->FX.RGBAdjust[1]-128,0)+128;
*i = (uint8_t)SAT(y);
}
for (n=0;n<256;++n,++i)
{
int u = CMul(p,NULL,n-128-p->FX.RGBAdjust[1]+p->FX.RGBAdjust[2],1)+128;
*i = (uint8_t)SAT(u);
}
for (n=0;n<256;++n,++i)
{
int v = CMul(p,NULL,n-128-p->FX.RGBAdjust[1]+p->FX.RGBAdjust[0],1)+128;
*i = (uint8_t)SAT(v);
}
}
}
}
void CalcLookUp(blit_soft* p, bool_t Dither)
{
#if defined(ARM)
const int Safe = 512;
// 4*256:Y + (256+Safe*2)*n:SAT + 4*256:U + 4*256:V
int SatOfsR = 4*256+Safe;
int SatOfsG = 4*256+Safe;
int SatOfsB = 4*256+Safe;
p->LookUp_Size = 4*256*3 + (256+Safe*2);
if (Dither)
{
if (p->DstSize[1] == p->DstSize[0])
SatOfsG = SatOfsR;
else
{
p->LookUp_Size += (256+Safe*2);
SatOfsG = SatOfsR + (256+Safe*2);
}
if (p->DstSize[2] == p->DstSize[0])
SatOfsB = SatOfsR;
else
if (p->DstSize[2] == p->DstSize[1])
SatOfsB = SatOfsG;
else
{
p->LookUp_Size += (256+Safe*2);
SatOfsB = SatOfsG + (256+Safe*2);
}
}
p->LookUp_U = (p->LookUp_Size - 4*256*2) >> 2;
p->LookUp_V = (p->LookUp_Size - 4*256) >> 2;
p->LookUp_Data = malloc(p->LookUp_Size);
if (p->LookUp_Data)
{
int v;
int32_t* YMul = (int32_t*)p->LookUp_Data;
int32_t* UMul = (int32_t*)p->LookUp_Data + p->LookUp_U;
int32_t* VMul = (int32_t*)p->LookUp_Data + p->LookUp_V;
uint8_t* SatR = (uint8_t*)p->LookUp_Data + SatOfsR;
uint8_t* SatG = (uint8_t*)p->LookUp_Data + SatOfsG;
uint8_t* SatB = (uint8_t*)p->LookUp_Data + SatOfsB;
memset(p->LookUp_Data,0,p->LookUp_Size);
for (v=0;v<256;++v)
{
*YMul = (p->_YMul * v) << LOOKUP_FIX;
*UMul = (((p->_BUMul * v + p->_BAdd + (SatOfsB << 16)) << LOOKUP_FIX) & 0xFFFF0000) |
(((p->_GUMul * v) >> (16-LOOKUP_FIX)) & 0xFFFF); //BUMul | GUMul
*VMul = (((p->_RVMul * v + p->_RAdd + (SatOfsR << 16)) << LOOKUP_FIX) & 0xFFFF0000) |
(((p->_GVMul * v + p->_GAdd + (SatOfsG << 16)) >> (16-LOOKUP_FIX)) & 0xFFFF); //BUMul | GUMul
++YMul;
++UMul;
++VMul;
}
if (Dither)
{
for (v=-Safe;v<256+Safe;++v)
{
SatR[v] = RotateRight8(SAT(v),8-p->DstSize[0]);
SatG[v] = RotateRight8(SAT(v),8-p->DstSize[1]);
SatB[v] = RotateRight8(SAT(v),8-p->DstSize[2]);
}
}
else
{
for (v=-Safe;v<256+Safe;++v)
SatR[v] = (uint8_t)SAT(v);
}
}
#elif defined(MIPS)
const int Safe = 256;
// 256:Y + (128+Safe*2)*n:SAT + 8*256:U + 8*256:V
int SatOfsR = 256+Safe;
int SatOfsG = 256+Safe;
int SatOfsB = 256+Safe;
p->LookUp_Size = 256+8*256*2 + (128+Safe*2);
if (p->DstSize[1] == p->DstSize[0])
SatOfsG = SatOfsR;
else
{
p->LookUp_Size += (128+Safe*2);
SatOfsG = SatOfsR + (128+Safe*2);
}
if (p->DstSize[2] == p->DstSize[0])
SatOfsB = SatOfsR;
else
if (p->DstSize[2] == p->DstSize[1])
SatOfsB = SatOfsG;
else
{
p->LookUp_Size += (128+Safe*2);
SatOfsB = SatOfsG + (128+Safe*2);
}
p->LookUp_U = p->LookUp_Size - 8*256*2;
p->LookUp_V = p->LookUp_Size - 8*256;
p->LookUp_Data = malloc(p->LookUp_Size);
if (p->LookUp_Data)
{
int v;
uint8_t* YMul = (uint8_t*)p->LookUp_Data;
uint8_t** UMul = (uint8_t**)((uint8_t*)p->LookUp_Data + p->LookUp_U);
uint8_t** VMul = (uint8_t**)((uint8_t*)p->LookUp_Data + p->LookUp_V);
uint8_t* SatR = (uint8_t*)p->LookUp_Data + SatOfsR;
uint8_t* SatG = (uint8_t*)p->LookUp_Data + SatOfsG;
uint8_t* SatB = (uint8_t*)p->LookUp_Data + SatOfsB;
memset(p->LookUp_Data,0,p->LookUp_Size);
for (v=0;v<256;++v)
{
*YMul = (uint8_t)((p->_YMul * v) >> 17);
UMul[0] = SatB + ((p->_BUMul * v + p->_BAdd) >> 17);
*(int*)&UMul[1] = (p->_GUMul * v) >> 17;
VMul[0] = SatR + ((p->_RVMul * v + p->_RAdd) >> 17);
VMul[1] = SatG + ((p->_GVMul * v + p->_GAdd) >> 17);
++YMul;
UMul+=2;
VMul+=2;
}
for (v=-Safe;v<128+Safe;++v)
{
SatR[v] = (uint8_t)(SAT(v*2) >> (8-p->DstSize[0]));
SatG[v] = (uint8_t)(SAT(v*2) >> (8-p->DstSize[1]));
SatB[v] = (uint8_t)(SAT(v*2) >> (8-p->DstSize[2]));
}
}
#elif defined(SH3)
const int Safe = 256;
//LookUp_Data:
// [128..255|0..127]Y + 384 empty + 4*[128..255|0..127]U + 4*[128..255|0..127]V + (128+Safe*2)*n:SAT
int SatOfsR = 256+384+4*256*2+Safe;
int SatOfsG = 256+384+4*256*2+Safe;
int SatOfsB = 256+384+4*256*2+Safe;
p->LookUp_Size = 256+384+4*256*2 + (128+Safe*2);
if (p->DstSize[1] == p->DstSize[0])
SatOfsG = SatOfsR;
else
{
p->LookUp_Size += (128+Safe*2);
SatOfsG = SatOfsR + (128+Safe*2);
}
if (p->DstSize[2] == p->DstSize[0])
SatOfsB = SatOfsR;
else
if (p->DstSize[2] == p->DstSize[1])
SatOfsB = SatOfsG;
else
{
p->LookUp_Size += (128+Safe*2);
SatOfsB = SatOfsG + (128+Safe*2);
}
p->LookUp_U = 128 + 384 + 4*128;
p->LookUp_V = 128 + 384 + 4*256 + 4*128;
p->LookUp_Data = malloc(p->LookUp_Size + Safe*2); //additional safe
if (p->LookUp_Data)
{
int v;
int8_t* YMul = (int8_t*)p->LookUp_Data;
int16_t* UMul = (int16_t*)((uint8_t*)p->LookUp_Data + 256 + 384);
int16_t* VMul = (int16_t*)((uint8_t*)p->LookUp_Data + 256 + 384 + 4*256);
int8_t* SatR = (int8_t*)p->LookUp_Data + SatOfsR;
int8_t* SatG = (int8_t*)p->LookUp_Data + SatOfsG;
int8_t* SatB = (int8_t*)p->LookUp_Data + SatOfsB;
memset(p->LookUp_Data,0,p->LookUp_Size + Safe*2);
for (v=0;v<256;++v)
{
*YMul = (int8_t)(((p->_YMul * ((v+128)&255)) >> 17)-128);
UMul[0] = (int16_t)(SatOfsB + ((p->_BUMul * ((v+128)&255) + p->_BAdd) >> 17));
UMul[1] = (int16_t)((p->_GUMul * ((v+128)&255)) >> 17);
VMul[0] = (int16_t)(SatOfsR + ((p->_RVMul * ((v+128)&255) + p->_RAdd) >> 17));
VMul[1] = (int16_t)(SatOfsG + ((p->_GVMul * ((v+128)&255) + p->_GAdd) >> 17));
++YMul;
UMul+=2;
VMul+=2;
}
for (v=-Safe;v<128+Safe;++v)
{
SatR[v] = (int8_t)(SAT(v*2) >> (8-p->DstSize[0]));
SatG[v] = (int8_t)(SAT(v*2) >> (8-p->DstSize[1]));
SatB[v] = (int8_t)(SAT(v*2) >> (8-p->DstSize[2]));
}
}
#endif
}
void CalcColor(blit_soft* p)
{
#ifdef ARM
if (p->QAdd)
{
//saturation: signed 32bit / 3 (qdadd will triple)
int64_t YMul;
int64_t RVMul;
int64_t RAdd;
int64_t GUMul;
int64_t GVMul;
int64_t GAdd;
int64_t BUMul;
int64_t BAdd;
CMul(p,&YMul,0x63C000,0);
CMul(p,&RVMul,0x88B2AA,1);
CAdd(p,&RAdd,(p->FX.Brightness+p->FX.RGBAdjust[0])*0x63C000 - (int64_t)0x75400000,0x2AAAAAAA,0x88B2AA);
CMul(p,&GUMul,-0x218555,1);
CMul(p,&GVMul,-0x45A555,1);
CAdd(p,&GAdd,(p->FX.Brightness+p->FX.RGBAdjust[1])*0x63C000 + (int64_t)0x02AEAAAA,0x2AAAAAAA,-0x218555-0x45A555);
CMul(p,&BUMul,0xACD2AA,1);
CAdd(p,&BAdd,(p->FX.Brightness+p->FX.RGBAdjust[2])*0x63C000 - (int64_t)0x87500000,0x2AAAAAAA,0xACD2AA);
if (InRange32(YMul*0xF0) &&
InRange32(RVMul*0x10+RAdd) &&
InRange32(RVMul*0xF0+RAdd) &&
InRange32((GVMul+GUMul)*0x10+GAdd) &&
InRange32((GVMul+GUMul)*0xF0+GAdd) &&
InRange32(BUMul*0x10+BAdd) &&
InRange32(BUMul*0xF0+BAdd))
{
p->_YMul = (int)YMul;
p->_RVMul = (int)RVMul;
p->_RAdd = (int)RAdd;
p->_GUMul = (int)GUMul;
p->_GVMul = (int)GVMul;
p->_GAdd = (int)GAdd;
p->_BUMul = (int)BUMul;
p->_BAdd = (int)BAdd;
}
else
p->QAdd = 0;
}
if (!p->QAdd)
#endif
{
//saturation: unsigned 24bit
p->_YMul = CMul(p,NULL,0x12B40,0);
p->_RVMul = CMul(p,NULL,0x19A18,1);
p->_RAdd = CAdd(p,NULL,(p->FX.Brightness+p->FX.RGBAdjust[0])*0x12B40 - 0x0DFC000,0x800000,0x19A18);
p->_GUMul = CMul(p,NULL,-0x06490,1);
p->_GVMul = CMul(p,NULL,-0x0D0F0,1);
p->_GAdd = CAdd(p,NULL,(p->FX.Brightness+p->FX.RGBAdjust[1])*0x12B40 + 0x0880C00,0x800000,-0x06490-0x0D0F0);
p->_BUMul = CMul(p,NULL,0x20678,1);
p->_BAdd = CAdd(p,NULL,(p->FX.Brightness+p->FX.RGBAdjust[2])*0x12B40 - 0x115F000,0x800000,0x20678);
}
}
#if defined(_M_IX86) && !defined(TARGET_SYMBIAN)
static void ColMMX(blit_soft* p, int i, int ofs)
{
int mul = 2048;
int m;
int o = 128;
if (i>0)
{
m = p->FX.Saturation;
if (m<0) m >>= 1; // adjust negtive interval: -128..0 -> -64..0
m = 4*m+256;
mul = (mul*m+128) >> 8;
ofs += 128 - (m>>1);
o = (o*m+128)>>8;
}
m = p->FX.Contrast;
if (m<0) m >>= 1; // adjust negtive interval: -128..0 -> -64..0
m = 4*m + 256;
mul = (mul*m+128)>>8;
ofs += o - ((m*o)>>8);
if ((p->Src.Flags & PF_YUV_PC) && !(p->Dst.Flags & PF_YUV_PC))
{
if (i==0) //y [0..255]->[16..235]
{
mul = (mul*219+127)/255;
ofs = (ofs*219+127)/255+16;
}
else //uv [0..255]->[16..240]
{
mul = (mul*224+127)/255;
ofs = (ofs*224+127)/255+16;
}
}
else
if (!(p->Src.Flags & PF_YUV_PC) && (p->Dst.Flags & PF_YUV_PC))
{
if (i==0) //y [16..235]->[0..255]
{
mul = (mul*255+109)/219;
ofs = ((ofs-16)*255+109)/219;
}
else //uv [16..240]->[0..255]
{
mul = (mul*255+112)/224;
ofs = ((ofs-16)*255+112)/224;
}
}
if (mul<-32768) mul=32768;
if (mul>32767) mul=32767;
p->Col[i][0][3] = p->Col[i][0][2] = p->Col[i][0][1] = p->Col[i][0][0] = (int16_t)mul;
p->Col[i][1][3] = p->Col[i][1][2] = p->Col[i][1][1] = p->Col[i][1][0] = (int16_t)ofs;
}
static int16_t MulMMX(blit_soft* p, int i, int mul)
{
int m;
if (i>0)
{
m = p->FX.Saturation;
if (m<0) m >>= 1; // adjust negtive interval: -128..0 -> -64..0
m = 4*m+256;
mul = (mul*m+128) >> 8;
}
m = p->FX.Contrast;
if (m<0) m >>= 1; // adjust negtive interval: -128..0 -> -64..0
m = 4*m + 256;
mul = (mul*m+128)>>8;
if (mul<-32768) mul=32768;
if (mul>32767) mul=32767;
return (int16_t)mul;
}
void CalcYUVMMX(blit_soft* p)
{
if (AnyYUV(&p->Dst))
{
ColMMX(p,0,p->FX.Brightness+p->FX.RGBAdjust[1]);
ColMMX(p,1,p->FX.RGBAdjust[2]-p->FX.RGBAdjust[1]);
ColMMX(p,2,p->FX.RGBAdjust[0]-p->FX.RGBAdjust[1]);
}
else
{
const int16_t* m = GetYUVToRGB(&p->Src);
p->Col[0][0][3] = p->Col[0][0][2] = p->Col[0][0][1] = p->Col[0][0][0] = MulMMX(p,0,m[0]);
p->Col[0][1][3] = p->Col[0][1][2] = p->Col[0][1][1] = p->Col[0][1][0] = (int16_t)(m[1]+p->FX.Brightness+p->FX.RGBAdjust[1]);
p->Col[1][0][1] = p->Col[1][0][0] = MulMMX(p,1,m[2]); //u_b
p->Col[1][0][3] = p->Col[1][0][2] = MulMMX(p,1,m[3]); //u_g
p->Col[1][1][3] = p->Col[1][1][2] = p->Col[1][1][1] = p->Col[1][1][0] = (int16_t)(m[4]+p->FX.RGBAdjust[2]-p->FX.RGBAdjust[1]);
p->Col[2][0][1] = p->Col[2][0][0] = MulMMX(p,2,m[5]); //v_r
p->Col[2][0][3] = p->Col[2][0][2] = MulMMX(p,2,m[6]); //v_g
p->Col[2][1][3] = p->Col[2][1][2] = p->Col[2][1][1] = p->Col[2][1][0] = (int16_t)(m[7]+p->FX.RGBAdjust[0]-p->FX.RGBAdjust[1]);
}
}
#endif