/***************************************************************************** * * This program is free software ; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * $Id: blit_soft.c 607 2006-01-22 20:58:29Z picard $ * * The Core Pocket Media Player * Copyright (c) 2004-2005 Gabor Kovacs * ****************************************************************************/ #include "../common.h" #include "../dyncode/dyncode.h" #include "../cpu/cpu.h" #include "blit_soft.h" //#define BLITTEST /* TV range: +16 (219) +128 (224) +128 (224) PC range: +0 (255) +128 (255) +128 (255) ranges: R,G,B,Y [0..1] Cb,Cr [-0.5..0.5] Y' = Kr * R' + (1 - Kr - Kb) * G' + Kb * B' Cb = 0.5 * (B' - Y') / (1 - Kb) Cr = 0.5 * (R' - Y') / (1 - Kr) Kb = 0.114 Kr = 0.299 ITU-R BT 601 Y'= 0.299 *R' + 0.587 *G' + 0.114 *B' Cb=-0.168736*R' - 0.331264*G' + 0.5 *B' Cr= 0.5 *R' - 0.418688*G' - 0.081312*B' R'= Y' + 1.403*Cr G'= Y' - 0.344*Cb - 0.714*Cr B'= Y' + 1.773*Cb Kb = 0.0722 Kr = 0.2126 ITU-R BT 709 Y'= 0.2215*R' + 0.7154*G' + 0.0721*B' Cb=-0.1145*R' - 0.3855*G' + 0.5000*B' Cr= 0.5016*R' - 0.4556*G' - 0.0459*B' R'= Y' + 1.5701*Cr G'= Y' - 0.1870*Cb - 0.4664*Cr B'= Y' - 1.8556*Cb */ #define CM(i) ((int16_t)((i)*8192)) #if defined(_M_IX86) && !defined(TARGET_SYMBIAN) static const int16_t YUVToRGB[4][8] = { // TV range BT-601 { CM(1.164),-16,CM(1.596),CM(-0.813),-128,CM(2.017),CM(-0.392),-128 }, // TV range BT-709 { CM(1.164),-16,CM(1.786),CM(-0.530),-128,CM(2.111),CM(-0.213),-128 }, // PC range BT-601 { CM(1),0,CM(1.402),CM(-0.71414),-128,CM(1.772),CM(-0.34414),-128 }, // PC range BT-709 { CM(1),0,CM(1.5701),CM(-0.4664),-128,CM(1.8556),CM(-0.1870),-128 }, }; static const int16_t* GetYUVToRGB(const pixel* Src) { int i = 0; if (Src->Flags & PF_YUV_BT709) i += 1; if (Src->Flags & PF_YUV_PC) i += 2; return YUVToRGB[i]; } #endif #define SAT(Value) (Value < 0 ? 0: (Value > 255 ? 255: Value)) static const rgbval_t Gray1[2] = { CRGB(0,0,0),CRGB(255,255,255) }; static const rgbval_t Gray2[4] = { CRGB(0,0,0),CRGB(85,85,85),CRGB(170,170,170),CRGB(255,255,255) }; static const rgbval_t Gray4[16] = { CRGB(0,0,0),CRGB(17,17,17),CRGB(34,34,34),CRGB(51,51,51), CRGB(68,68,68),CRGB(85,85,85),CRGB(102,102,102),CRGB(119,119,119), CRGB(136,136,136),CRGB(153,153,153),CRGB(170,170,170),CRGB(187,187,187), CRGB(204,204,204),CRGB(221,221,221),CRGB(238,238,238),CRGB(255,255,255) }; #if defined(_M_IX86) && !defined(TARGET_SYMBIAN) #define DECLARE_BLITMMX(name) \ extern void STDCALL name##_mmx(blit_soft* This, uint8_t** DstPtr,uint8_t** SrcPtr,int DstPitch,int SrcPitch,int Width,int Height,uintptr_t Src2SrcLast); \ extern void STDCALL name##_mmx2(blit_soft* This, uint8_t** DstPtr,uint8_t** SrcPtr,int DstPitch,int SrcPitch,int Width,int Height,uintptr_t Src2SrcLast); \ extern void STDCALL name##_3dnow(blit_soft* This, uint8_t** DstPtr,uint8_t** SrcPtr,int DstPitch,int SrcPitch,int Width,int Height,uintptr_t Src2SrcLast); DECLARE_BLITMMX(blit_i420_i420) DECLARE_BLITMMX(blit_i420_yuy2) DECLARE_BLITMMX(blit_i420_rgb32) DECLARE_BLITMMX(blit_i420_rgb24) DECLARE_BLITMMX(blit_i420_bgr32) DECLARE_BLITMMX(blit_i420_bgr24) DECLARE_BLITMMX(blit_rgb32_rgb32) DECLARE_BLITMMX(blit_rgb24_rgb24) DECLARE_BLITMMX(blit_rgb16_rgb16) typedef struct blitmmx { uint32_t In; uint32_t Out; blitsoftentry Func[3]; } blitmmx; static const blitmmx BlitMMX[] = { { FOURCC_I420, FOURCC_I420, { blit_i420_i420_mmx, blit_i420_i420_mmx2, blit_i420_i420_3dnow }}, { FOURCC_I420, FOURCC_YUY2, { blit_i420_yuy2_mmx, blit_i420_yuy2_mmx2, blit_i420_yuy2_3dnow }}, { FOURCC_I420, FOURCC_RGB32,{ blit_i420_rgb32_mmx,blit_i420_rgb32_mmx2,blit_i420_rgb32_3dnow }}, { FOURCC_I420, FOURCC_RGB24,{ blit_i420_rgb24_mmx,blit_i420_rgb24_mmx2,blit_i420_rgb24_3dnow }}, { FOURCC_I420, FOURCC_BGR32,{ blit_i420_bgr32_mmx,blit_i420_bgr32_mmx2,blit_i420_bgr32_3dnow }}, { FOURCC_I420, FOURCC_BGR24,{ blit_i420_bgr24_mmx,blit_i420_bgr24_mmx2,blit_i420_bgr24_3dnow }}, { FOURCC_RGB32,FOURCC_RGB32,{ blit_rgb32_rgb32_mmx,blit_rgb32_rgb32_mmx2,blit_rgb32_rgb32_3dnow }}, { FOURCC_RGB24,FOURCC_RGB24,{ blit_rgb24_rgb24_mmx,blit_rgb24_rgb24_mmx2,blit_rgb24_rgb24_3dnow }}, { FOURCC_RGB16,FOURCC_RGB16,{ blit_rgb16_rgb16_mmx,blit_rgb16_rgb16_mmx2,blit_rgb16_rgb16_3dnow }}, { FOURCC_RGB15,FOURCC_RGB15,{ blit_rgb16_rgb16_mmx,blit_rgb16_rgb16_mmx2,blit_rgb16_rgb16_3dnow }}, { FOURCC_BGR32,FOURCC_BGR32,{ blit_rgb32_rgb32_mmx,blit_rgb32_rgb32_mmx2,blit_rgb32_rgb32_3dnow }}, { FOURCC_BGR24,FOURCC_BGR24,{ blit_rgb24_rgb24_mmx,blit_rgb24_rgb24_mmx2,blit_rgb24_rgb24_3dnow }}, { FOURCC_BGR16,FOURCC_BGR16,{ blit_rgb16_rgb16_mmx,blit_rgb16_rgb16_mmx2,blit_rgb16_rgb16_3dnow }}, { FOURCC_BGR15,FOURCC_BGR15,{ blit_rgb16_rgb16_mmx,blit_rgb16_rgb16_mmx2,blit_rgb16_rgb16_3dnow }}, {0}, }; #endif typedef struct blitpack { blitfx FX; video Dst; video Src; rect DstRect; rect SrcRect; blit_soft Code[2]; bool_t SafeBorder; int RScaleX; int RScaleY; struct blitpack* Next; } blitpack; static NOINLINE void FreeBlit(blit_soft* p) { CodeDone(&p->Code); free(p->LookUp_Data); p->LookUp_Data = NULL; } static INLINE struct blitpack* _BlitAlloc() { blitpack* p = (blitpack*) malloc(sizeof(blitpack)); if (!p) return NULL; memset(p,0,sizeof(blitpack)); CodeInit(&p->Code[0].Code); CodeInit(&p->Code[1].Code); return p; } static INLINE void _BlitFree(struct blitpack* p) { FreeBlit(&p->Code[0]); FreeBlit(&p->Code[1]); free(p); } #ifdef CONFIG_CONTEXT void Blit_Init() { } void Blit_Done() { blitpack* p; while ((p = Context()->Blit)!=NULL) { Context()->Blit = p->Next; _BlitFree(p); } } static INLINE void BlitFree(struct blitpack* p) { p->Next = Context()->Blit; Context()->Blit = p; } static INLINE struct blitpack* BlitAlloc() { blitpack* p = Context()->Blit; if (p) Context()->Blit = p->Next; else p = _BlitAlloc(); return p; } #else #define BlitAlloc _BlitAlloc #define BlitFree _BlitFree #endif static const rgb* DefaultPal(const pixel* Format) { if (Format->Flags & PF_PALETTE) { if (!Format->Palette) switch (Format->BitCount) { case 1: return (const rgb*)Gray1; case 2: return (const rgb*)Gray2; case 4: return (const rgb*)Gray4; } return Format->Palette; } return NULL; } static int CalcScale(int v, int Min, int Max) { if (v > Max) v = Max; if (v < Min) v = Min; return v; } static int CalcRScale(int v, int Gray) { if (v<=0) return 16; v = (16*1024 << 16) / v; if (Gray) // only 100% and 200% scale return v > 12288 ? 16:8; //align to 100% if (v > 16834-1024 && v < 16384+1024) v = 16384; //align to 200% if (v > 8192-1024 && v < 8192+1024) v = 8192; //align to 50% if (v > 32768-2048 && v < 32768+2048) v = 32768; #if defined(SH3) if (v < 12288) return 8; return 16; // if (v<1024) v=1024; // return ((v+1024) >> 11) << 1; #else if (v<512) v=512; return (v+512) >> 10; #endif } static NOINLINE bool_t EnlargeIfNeeded(int* v,int Align,int Side,int Limit) { int Needed = Align - (*v & (Align-1)); if (Needed < Align && Needed <= (Limit-Side)) { *v += Needed; return 1; } return 0; } void BlitAlign(blitpack* p, rect* DstRect, rect* SrcRect) { int i; int ShrinkX,ShrinkY; int SrcRight; int SrcBottom; int SrcAdjWidth,SrcAdjHeight; int RScaleX,RScaleY; blit_soft* Code; if (!p) return; RScaleX = p->RScaleX; RScaleY = p->RScaleY; Code = &p->Code[0]; p->SafeBorder = 0; if (Code->ArithStretch && (RScaleX != 16) && (RScaleX != 32)) { //avoid bilinear scale overrun (shrink source) if ((p->Src.Pixel.Flags & PF_SAFEBORDER) && Code->DstAlignSize > 2) p->SafeBorder = 1; // build a one pixel border before blitting on right and bottom side else { //only horizontal bilinear filtering is supported (arm_stretch) if (SrcRect->Width>2) SrcRect->Width -= 2; } } // convert source to destination space if (p->FX.Direction & DIR_SWAPXY) { SwapInt(&RScaleX,&RScaleY); SwapRect(SrcRect); } SrcRight = SrcRect->x + SrcRect->Width; SrcBottom = SrcRect->y + SrcRect->Height; SrcAdjWidth = SrcRect->Width * 16 / RScaleX; SrcAdjHeight = SrcRect->Height * 16 / RScaleY; if (p->FX.Flags & BLITFX_ENLARGEIFNEEDED) { SrcRight = p->Src.Width; SrcBottom = p->Src.Height; if (p->FX.Direction & DIR_SWAPXY) SwapInt(&SrcRight,&SrcBottom); if (p->Src.Pixel.Flags & PF_SAFEBORDER) { SrcRight += 16; SrcBottom += 16; } if (EnlargeIfNeeded(&SrcAdjWidth,Code->DstAlignSize,SrcRect->x+SrcRect->Width,SrcRight)) SrcRect->Width = -1; // need calc if (EnlargeIfNeeded(&SrcAdjHeight,Code->DstAlignSize,SrcRect->y+SrcRect->Height,SrcBottom)) SrcRect->Height = -1; // need calc } ShrinkX = DstRect->Width - SrcAdjWidth; if (ShrinkX>=0) //shrink destination? { ShrinkX >>= 1; DstRect->x += ShrinkX; DstRect->Width = SrcAdjWidth; } else //adjust source position { ShrinkX = 0; SrcRect->x += (SrcAdjWidth - DstRect->Width) * RScaleX >> 5; SrcRect->Width = -1; // need calc } ShrinkY = DstRect->Height - SrcAdjHeight; if (ShrinkY>=0) //shrink Dst? { ShrinkY >>= 1; DstRect->y += ShrinkY; DstRect->Height = SrcAdjHeight; } else //adjust source position { ShrinkY = 0; SrcRect->y += (SrcAdjHeight - DstRect->Height) * RScaleY >> 5; SrcRect->Height = -1; // need calc } i = DstRect->Width & (Code->DstAlignSize-1); DstRect->Width -= i; i >>= 1; ShrinkX += i; DstRect->x += i; i = DstRect->Height & (Code->DstAlignSize-1); DstRect->Height -= i; i >>= 1; ShrinkY += i; DstRect->y += i; i = DstRect->x & (Code->DstAlignPos-1); if (i && ShrinkX < i) { DstRect->Width -= Code->DstAlignPos - i; DstRect->Width &= ~(Code->DstAlignSize-1); DstRect->x += Code->DstAlignPos - i; } else DstRect->x -= i; i = DstRect->y & (Code->DstAlignPos-1); if (i && ShrinkY < i) { DstRect->Height -= Code->DstAlignPos - i; DstRect->Height &= ~(Code->DstAlignSize-1); DstRect->y += Code->DstAlignPos - i; } else DstRect->y -= i; SrcRect->x &= ~(Code->SrcAlignPos-1); SrcRect->y &= ~(Code->SrcAlignPos-1); // convert source back to it's space (if needed) if (SrcRect->Width < 0) SrcRect->Width = (DstRect->Width * RScaleX / 16 + 1) & ~1; if (SrcRect->Height < 0) SrcRect->Height = (DstRect->Height * RScaleY / 16 + 1) & ~1; if (SrcRect->x + SrcRect->Width > SrcRight) SrcRect->Width = SrcRight - SrcRect->x; if (SrcRect->y + SrcRect->Height > SrcBottom) SrcRect->Height = SrcBottom - SrcRect->y; if (p->FX.Direction & DIR_SWAPXY) SwapRect(SrcRect); p->DstRect = *DstRect; p->SrcRect = *SrcRect; } static NOINLINE void CodeRelease(blit_soft* p) { //todo... better palette handling if (p->Dst.Palette) memset(&p->Dst,0,sizeof(p->Dst)); if (p->Src.Palette) memset(&p->Src,0,sizeof(p->Src)); } void BlitRelease(blitpack* p) { if (p) { CodeRelease(&p->Code[0]); CodeRelease(&p->Code[1]); BlitFree(p); } } int AnyAlign(rect* DstRect, rect* SrcRect, const blitfx* FX, int DstAlignSize, int DstAlignPos, int MinScale, int MaxScale) { int i,ShrinkX,ShrinkY; int ScaleX,ScaleY; int SrcRight; int SrcBottom; int SrcAdjWidth,SrcAdjHeight; if (!DstRect || !SrcRect || !FX) return ERR_INVALID_PARAM; ScaleX = CalcScale(FX->ScaleX,MinScale,MaxScale); ScaleY = CalcScale(FX->ScaleY,MinScale,MaxScale); SrcRight = SrcRect->x + SrcRect->Width; SrcBottom = SrcRect->y + SrcRect->Height; // convert source to destination space if (FX->Direction & DIR_SWAPXY) { SwapInt(&ScaleX,&ScaleY); SwapRect(SrcRect); } SrcAdjWidth = (SrcRect->Width * ScaleX + 32768) >> 16; SrcAdjHeight = (SrcRect->Height * ScaleY + 32768) >> 16; ShrinkX = DstRect->Width - SrcAdjWidth; if (ShrinkX>0) //shrink destination? { ShrinkX >>= 1; DstRect->x += ShrinkX; DstRect->Width = SrcAdjWidth; } else //adjust source position { ShrinkX = 0; SrcRect->x += ((SrcAdjWidth - DstRect->Width) << 15) / ScaleX; SrcRect->Width = -1; } ShrinkY = DstRect->Height - SrcAdjHeight; if (ShrinkY>0) //shrink Dst? { ShrinkY >>= 1; DstRect->y += ShrinkY; DstRect->Height = SrcAdjHeight; } else //adjust source position { ShrinkY = 0; SrcRect->y += ((SrcAdjHeight - DstRect->Height) << 15) / ScaleY; SrcRect->Height = -1; } // final alignment i = DstRect->Width & (DstAlignSize-1); DstRect->Width -= i; i >>= 1; ShrinkX += i; DstRect->x += i; i = DstRect->Height & (DstAlignSize-1); DstRect->Height -= i; i >>= 1; ShrinkY += i; DstRect->y += i; i = DstRect->x & (DstAlignPos-1); if (i && ShrinkX < i) { DstRect->Width -= DstAlignPos - i; DstRect->Width &= ~(DstAlignSize-1); DstRect->x += DstAlignPos - i; } else DstRect->x -= i; i = DstRect->y & (DstAlignPos-1); if (i && ShrinkY < i) { DstRect->Height -= DstAlignPos - i; DstRect->Height &= ~(DstAlignSize-1); DstRect->y += DstAlignPos - i; } else DstRect->y -= i; SrcRect->x &= ~1; SrcRect->y &= ~1; if (SrcRect->Width < 0) SrcRect->Width = ((DstRect->Width << 16) / ScaleX +1) & ~1; if (SrcRect->Height < 0) SrcRect->Height = ((DstRect->Height << 16) / ScaleY +1) & ~1; if (FX->Direction & DIR_SWAPXY) SwapRect(SrcRect); if (SrcRect->x + SrcRect->Width > SrcRight) SrcRect->Width = SrcRight - SrcRect->x; if (SrcRect->y + SrcRect->Height > SrcBottom) SrcRect->Height = SrcBottom - SrcRect->y; return ERR_NONE; } static INLINE void SurfacePtr(uint8_t** Ptr, const planes Planes, const video* Format, int BPP, int x, int y, int Pitch) { int Adj = (x & 1) << 1; Ptr[0] = (uint8_t*)Planes[0] + ((x * BPP) >> 3) + y * Pitch; if (Format->Pixel.Flags & (PF_YUV420|PF_YUV422|PF_YUV444|PF_YUV410)) { if (Format->Pixel.Flags & PF_YUV420) { Ptr[1] = (uint8_t*)Planes[1] + (x >> 1) + (y >> 1) * (Pitch >> 1); Ptr[2] = (uint8_t*)Planes[2] + (x >> 1) + (y >> 1) * (Pitch >> 1); } else if (Format->Pixel.Flags & PF_YUV422) { Ptr[1] = (uint8_t*)Planes[1] + (x >> 1) + y * (Pitch >> 1); Ptr[2] = (uint8_t*)Planes[2] + (x >> 1) + y * (Pitch >> 1); } else if (Format->Pixel.Flags & PF_YUV444) { Ptr[1] = (uint8_t*)Planes[1] + x + y * Pitch; Ptr[2] = (uint8_t*)Planes[2] + x + y * Pitch; } else if (Format->Pixel.Flags & PF_YUV410) { Ptr[1] = (uint8_t*)Planes[1] + (x >> 2) + (y >> 2) * (Pitch >> 2); Ptr[2] = (uint8_t*)Planes[2] + (x >> 2) + (y >> 2) * (Pitch >> 2); } } else if (Format->Pixel.Flags & PF_FOURCC) switch (Format->Pixel.FourCC) { case FOURCC_IMC2: Ptr[2] = (uint8_t*)Planes[0] + Format->Height * Pitch + (x >> 1) + (y >> 1) * Pitch; Ptr[1] = Ptr[1] + (Pitch >> 1); break; case FOURCC_IMC4: Ptr[1] = (uint8_t*)Planes[0] + Format->Height * Pitch + (x >> 1) + (y >> 1) * Pitch; Ptr[2] = Ptr[1] + (Pitch >> 1); break; case FOURCC_I420: case FOURCC_IYUV: Ptr[1] = (uint8_t*)Planes[0] + Format->Height * Pitch + (x >> 1) + (y >> 1) * (Pitch >> 1); Ptr[2] = Ptr[1] + ((Format->Height * Pitch) >> 2); break; case FOURCC_YV16: Ptr[2] = (uint8_t*)Planes[0] + Format->Height * Pitch + (x >> 1) + y * (Pitch >> 1); Ptr[1] = Ptr[2] + ((Format->Height * Pitch) >> 1); break; case FOURCC_YVU9: Ptr[2] = (uint8_t*)Planes[0] + Format->Height * Pitch + (x >> 2) + (y >> 2) * (Pitch >> 2); Ptr[1] = Ptr[2] + ((Format->Height * Pitch) >> 4); break; case FOURCC_YUV9: Ptr[1] = (uint8_t*)Planes[0] + Format->Height * Pitch + (x >> 2) + (y >> 2) * (Pitch >> 2); Ptr[2] = Ptr[1] + ((Format->Height * Pitch) >> 4); break; case FOURCC_YV12: Ptr[2] = (uint8_t*)Planes[0] + Format->Height * Pitch + (x >> 1) + (y >> 1) * (Pitch >> 1); Ptr[1] = Ptr[2] + ((Format->Height * Pitch) >> 2); break; case FOURCC_YUY2: case FOURCC_YUNV: case FOURCC_V422: case FOURCC_YUYV: Ptr[1] = Ptr[0]+1-Adj; Ptr[2] = Ptr[0]+3-Adj; break; case FOURCC_YVYU: Ptr[1] = Ptr[0]+3-Adj; Ptr[2] = Ptr[0]+1-Adj; break; case FOURCC_UYVY: case FOURCC_Y422: case FOURCC_UYNV: Ptr[1] = Ptr[0]-Adj; Ptr[2] = Ptr[0]+2-Adj; Ptr[0]++; break; case FOURCC_VYUY: Ptr[2] = Ptr[0]-Adj; Ptr[1] = Ptr[0]+2-Adj; Ptr[0]++; break; } } void BlitImage(blitpack* Pack, const planes Dst, const constplanes Src, const constplanes SrcLast, int DstPitch, int SrcPitch) { uint8_t* DstPtr[MAXPLANES]; uint8_t* SrcPtr[MAXPLANES]; bool_t OnlyDiff; int Width,Height; blit_soft* p; uintptr_t Src2SrcLast; int DstStepX; int DstStepY; int DstX; int DstY; int SrcY; // nothing to do? if (!Pack || Pack->DstRect.Width<=0 || Pack->DstRect.Height<=0) return; OnlyDiff = (Pack->FX.Flags & BLITFX_ONLYDIFF) && SrcLast && SrcLast[0] != NULL; p = &Pack->Code[OnlyDiff]; // calculate the Src and Dst pointers // Src: always upperleft corner // Dst: according to swapxy and mirroring Width = Pack->DstRect.Width; Height = Pack->DstRect.Height; if (p->SwapXY) SwapInt(&Width,&Height); if (DstPitch < 0) DstPitch = Pack->Dst.Pitch; if (SrcPitch < 0) SrcPitch = Pack->Src.Pitch; SrcY = Pack->SrcRect.y; if (p->SrcUpDown) SrcY += Pack->SrcRect.Height-1; SurfacePtr(SrcPtr,*(const planes*)Src,&Pack->Src,p->SrcBPP,Pack->SrcRect.x,SrcY,SrcPitch); if (p->SrcUpDown) SrcPitch = -SrcPitch; Src2SrcLast = 0; if (OnlyDiff) Src2SrcLast = (uint8_t*)SrcLast[0] - (uint8_t*)Src[0]; DstStepX = p->DstBPP; DstStepY = DstPitch*8; DstX = Pack->DstRect.x; DstY = Pack->DstRect.y; if (p->DstLeftRight) { DstX += Pack->DstRect.Width-1; DstStepX = -DstStepX; } if (p->DstUpDown) { DstY += Pack->DstRect.Height-1; DstStepY = -DstStepY; } if (p->SwapXY) SwapInt(&DstStepX,&DstStepY); SurfacePtr(DstPtr,Dst,&Pack->Dst,p->DstBPP,DstX,DstY,DstPitch); if (p->DstUpDown) DstPitch = -DstPitch; if (p->Slices) { const int DstBlock2 = 5; const int DstBlock = 32; int SrcBlock = (DstBlock * p->RScaleX) >> 4; //SrcBlock has to be even because of YUV int DstNext = (DstBlock * DstStepX) >> 3; int DstNextUV = DstNext >> (p->SwapXY ? p->DstUVPitch2+p->DstUVY2 : p->DstUVX2); int SrcNext = (SrcBlock * p->SrcBPP) >> 3; int SrcNextUV = SrcNext >> p->SrcUVX2; if (Width > DstBlock && p->SlicesReverse) // reverse order? { int Quot = Width >> DstBlock2; int Rem = Width & (DstBlock-1); DstPtr[0] += Quot*DstNext; DstPtr[1] += Quot*DstNextUV; DstPtr[2] += Quot*DstNextUV; SrcPtr[0] += Quot*SrcNext; SrcPtr[1] += Quot*SrcNextUV; SrcPtr[2] += Quot*SrcNextUV; if (Rem) { p->Entry(p,DstPtr,SrcPtr,DstPitch,SrcPitch,Rem,Height,Src2SrcLast); Width -= Rem; } DstNext = -DstNext; DstNextUV = -DstNextUV; SrcNext = -SrcNext; SrcNextUV = -SrcNextUV; DstPtr[0] += DstNext; DstPtr[1] += DstNextUV; DstPtr[2] += DstNextUV; SrcPtr[0] += SrcNext; SrcPtr[1] += SrcNextUV; SrcPtr[2] += SrcNextUV; } for (;Width > DstBlock;Width -= DstBlock) { p->Entry(p,DstPtr,SrcPtr,DstPitch,SrcPitch,DstBlock,Height,Src2SrcLast); DstPtr[0] += DstNext; DstPtr[1] += DstNextUV; DstPtr[2] += DstNextUV; SrcPtr[0] += SrcNext; SrcPtr[1] += SrcNextUV; SrcPtr[2] += SrcNextUV; } } p->Entry(p,DstPtr,SrcPtr,DstPitch,SrcPitch,Width,Height,Src2SrcLast); } void BuildPalLookUp(blit_soft* p,bool_t YUV) { //create a palette lookup with 3x3 bits RGB input int a,b,c; int Size = 1 << p->Dst.BitCount; uint8_t* LookUp; p->LookUp_Data = malloc(16*16*16*4); if (p->LookUp_Data) { LookUp = (uint8_t*) p->LookUp_Data; for (a=16;a<512;a+=32) for (b=16;b<512;b+=32) for (c=16;c<512;c+=32) { const rgb* q = p->DstPalette; int BestMatch = 0; int BestDiff = 0x7FFFFFFF; int i,v[3]; v[0] = a; v[1] = b; v[2] = c; if (v[0] >= 384) v[0] -= 512; if (v[1] >= 384) v[1] -= 512; if (v[2] >= 384) v[2] -= 512; if (YUV) { int w[3]; w[0] = (v[0]*p->_YMul + v[2]*p->_RVMul + p->_RAdd) >> 16; w[1] = (v[0]*p->_YMul + v[1]*p->_GUMul + v[2]*p->_GVMul + p->_GAdd) >> 16; w[2] = (v[0]*p->_YMul + v[1]*p->_BUMul + p->_BAdd) >> 16; v[0]=w[0]; v[1]=w[1]; v[2]=w[2]; } for (i=0;ic.r-v[0])*(q->c.r-v[0])+ (q->c.g-v[1])*(q->c.g-v[1])+ (q->c.b-v[2])*(q->c.b-v[2]); if (Diff < BestDiff) { BestMatch = i; BestDiff = Diff; } } q = p->DstPalette + BestMatch; if (YUV) { v[0] = ((2105 * q->c.r) + (4128 * q->c.g) + (802 * q->c.b))/0x2000 + 16; v[1] = (-(1212 * q->c.r) - (2384 * q->c.g) + (3596 * q->c.b))/0x2000 + 128; v[2] = ((3596 * q->c.r) - (3015 * q->c.g) - (582 * q->c.b))/0x2000 + 128; v[0]=SAT(v[0]); v[1]=SAT(v[1]); v[2]=SAT(v[2]); } else { v[0] = q->c.r; v[1] = q->c.g; v[2] = q->c.b; } LookUp[0] = (uint8_t)BestMatch; #if defined(SH3) LookUp[3] = (uint8_t)(v[0] >> 1); LookUp[1] = (uint8_t)(v[1] >> 1); LookUp[2] = (uint8_t)(v[2] >> 1); #else LookUp[1] = (uint8_t)v[0]; LookUp[2] = (uint8_t)v[1]; LookUp[3] = (uint8_t)v[2]; #endif LookUp += 4; } } } int UniversalType(const pixel* p, bool_t YUV) { if (PlanarYUV(p,NULL,NULL,NULL)) return YUV ? 12:10; if (PackedYUV(p)) return YUV ? 13:11; if (p->Flags & PF_PALETTE) { if (p->BitCount==1) return 1; if (p->BitCount==2) return 2; if (p->BitCount==4) return 3; if (p->BitCount==8) return YUV ? 14:4; } if (p->Flags & PF_RGB) { if (p->BitCount==8) return 5; if (p->BitCount==16) return 6; if (p->BitCount==24) return 7; if (p->BitCount==32) return 8; } return -1; } // !SwapXY // !DstLeftRight // SrcType == 12 // DstType == 12 // RScaleX == 32/16/8 // RScaleY == 32/16/8 // SrcUVX2 == DstUVX2 // SrcUVY2 == DstUVY2 static void Blit_PYUV_PYUV_2Plane(const uint8_t* Src,uint8_t* Dst,int Width,int Height,int SrcPitch,int DstPitch,int ScaleX,int ScaleY) { int y; for (y=0;yRScaleX,This->RScaleY); Width >>= This->SrcUVX2; Height >>= This->SrcUVY2; SrcPitch >>= This->SrcUVPitch2; DstPitch >>= This->DstUVPitch2; Blit_PYUV_PYUV_2Plane(SrcPtr[1],DstPtr[1],Width,Height,SrcPitch,DstPitch,This->RScaleX,This->RScaleY); Blit_PYUV_PYUV_2Plane(SrcPtr[2],DstPtr[2],Width,Height,SrcPitch,DstPitch,This->RScaleX,This->RScaleY); } #if defined(_M_IX86) || !defined(CONFIG_DYNCODE) || defined(BLITTEST) // !SwapXY // !DstLeftRight // SrcType == 12 // DstType == 12 // RScaleX == 16 && RScaleY == 16 // SrcUVX2 == DstUVX2 static void STDCALL Blit_PYUV_PYUV(blit_soft* This, uint8_t** DstPtr,uint8_t** SrcPtr,int DstPitch,int SrcPitch, int Width,int Height,uintptr_t Src2SrcLast) { uint8_t* Src[3]; uint8_t* Dst[3]; uint8_t* LookUp = This->LookUp_Data; int UVY = 1 << This->DstUVY2; int UVDup = 1; int SrcPitchUV = SrcPitch >> This->SrcUVPitch2; int DstPitchUV = DstPitch >> This->DstUVPitch2; int WidthUV = Width >> This->SrcUVX2; int YAdd = This->FX.Brightness; int x,y,i; for (i=0;i<3;++i) { Src[i] = SrcPtr[i]; Dst[i] = DstPtr[i]; } // skip some UV lines? if (This->DstUVY2 > This->SrcUVY2) SrcPitchUV <<= This->DstUVY2 - This->SrcUVY2; else UVDup = 1 << (This->SrcUVY2 - This->DstUVY2); Height >>= This->DstUVY2; for (y=0;ySrcUVY2; int SrcUVPitch2 = This->SrcUVPitch2; int YAdd = This->FX.Brightness; int sy0; int sy=0; for (i=0;i<3;++i) Src[i] = SrcPtr[i]; Dst = DstPtr[0]; for (i=0;i<16;++i) { SrcStepX[i] = ((This->RScaleX * (i+1)) >> 4) - ((This->RScaleX * i) >> 4); SrcStepY[i] = ((This->RScaleY * (i+1)) >> 4) - ((This->RScaleY * i) >> 4); } for (y=0;y>1; cR = ((Src[0][sx]+YAdd-16)*0x2568 + 0x3343*(Src[2][sx2]-128)) /0x2000; cG = ((Src[0][sx]+YAdd-16)*0x2568 - 0x0c92*(Src[1][sx2]-128) - 0x1a1e*(Src[2][sx2]-128)) /0x2000; cB = ((Src[0][sx]+YAdd-16)*0x2568 + 0x40cf*(Src[1][sx2]-128)) /0x2000; cR = SAT(cR); cG = SAT(cG); cB = SAT(cB); do { dx[0]=(uint8_t)cB; dx[1]=(uint8_t)cG; dx[2]=(uint8_t)cR; dx+=4; } while (SrcStepX[x&15]==0 && ++x> SrcUVPitch2) * ((sy >> SrcUVY2) - (sy0 >> SrcUVY2)); Src[2] += (SrcPitch >> SrcUVPitch2) * ((sy >> SrcUVY2) - (sy0 >> SrcUVY2)); } } // !SwapXY // !DstLeftRight // SrcType == 10 // SrcUVX2 == 1 // DstType == 6 rrrrrggggggbbbbb static void STDCALL Blit_PYUV_RGB16(blit_soft* This, uint8_t** DstPtr,uint8_t** SrcPtr,int DstPitch,int SrcPitch, int Width,int Height,uintptr_t Src2SrcLast) { int i,x,y; uint8_t* Src[3]; uint8_t* Dst; int SrcStepX[16]; int SrcStepY[16]; int SrcUVY2 = This->SrcUVY2; int SrcUVPitch2 = This->SrcUVPitch2; int YAdd = This->FX.Brightness; int sy0; int sy=0; for (i=0;i<3;++i) Src[i] = SrcPtr[i]; Dst = DstPtr[0]; for (i=0;i<16;++i) { SrcStepX[i] = ((This->RScaleX * (i+1)) >> 4) - ((This->RScaleX * i) >> 4); SrcStepY[i] = ((This->RScaleY * (i+1)) >> 4) - ((This->RScaleY * i) >> 4); } for (y=0;y>1; cR = ((Src[0][sx]+YAdd-16)*0x2568 + 0x3343*(Src[2][sx2]-128)) /0x2000; cG = ((Src[0][sx]+YAdd-16)*0x2568 - 0x0c92*(Src[1][sx2]-128) - 0x1a1e*(Src[2][sx2]-128)) /0x2000; cB = ((Src[0][sx]+YAdd-16)*0x2568 + 0x40cf*(Src[1][sx2]-128)) /0x2000; cR = SAT(cR); cG = SAT(cG); cB = SAT(cB); do { *dx = (uint16_t)(((cR << 8)&0xF800)|((cG << 3)&0x07E0)|(cB >> 3)); ++dx; } while (SrcStepX[x&15]==0 && ++x> SrcUVPitch2) * ((sy >> SrcUVY2) - (sy0 >> SrcUVY2)); Src[2] += (SrcPitch >> SrcUVPitch2) * ((sy >> SrcUVY2) - (sy0 >> SrcUVY2)); } } static void STDCALL BlitUniversal(blit_soft* This, uint8_t** DstPtr,uint8_t** SrcPtr,int DstPitch,int SrcPitch, int Width,int Height,uintptr_t Src2SrcLast) { //this will be very-very slow, only for compability and testing int i,j,x,y; uint8_t* Src[3]; uint8_t* Dst[3]; int SrcType = This->SrcType; int DstType = This->DstType; int DstMask[3],DstPos[3]; int SrcMask[3],SrcPos[3]; int DitherMask[3]; uint8_t* PalLookUp = (uint8_t*)This->LookUp_Data; uint8_t* wp; int Flags = This->FX.Flags; bool_t Dither = (Flags & BLITFX_DITHER) != 0 && !(This->Dst.Flags & PF_FOURCC); bool_t PalDither = Dither && (This->Dst.Flags & PF_PALETTE); const rgb* SrcPalette = This->SrcPalette; int SrcStepX[16]; int SrcStepY[16]; int RScaleX = This->RScaleX; int RScaleY = This->RScaleY; int SrcUVPitch2 = This->SrcUVPitch2; int SrcUVX2 = This->SrcUVX2; int SrcUVY2 = This->SrcUVY2; int DstUVX2 = This->DstUVX2; int DstUVY2 = This->DstUVY2; int DstStepX = This->DstBPP; int DstStepX2 = This->DstBPP; int DstStepY = DstPitch << 3; int DstStepY2 = DstStepY >> This->DstUVPitch2; int dy = 0; int dy2 = 0; int cR,cG,cB; int Y,U,V; int sy0; int sy=0; int YAdd = This->FX.Brightness; uint32_t SrcInvert = 0; uint32_t DstInvert = 0; SrcMask[0] = This->Src.BitMask[0]; SrcMask[1] = This->Src.BitMask[1]; SrcMask[2] = This->Src.BitMask[2]; DstMask[0] = This->Dst.BitMask[0]; DstMask[1] = This->Dst.BitMask[1]; DstMask[2] = This->Dst.BitMask[2]; for (i=0;i<3;++i) { DitherMask[i] = Dither ? (1 << (8 - BitMaskSize(DstMask[i]))) - 1 : 0; SrcPos[i] = BitMaskPos(SrcMask[i]) + BitMaskSize(SrcMask[i]); DstPos[i] = BitMaskPos(DstMask[i]) + BitMaskSize(DstMask[i]); SrcMask[i] <<= 8; DstMask[i] <<= 8; Src[i] = SrcPtr[i]; Dst[i] = DstPtr[i]; } if (This->DstPalette) { DstPos[0] = 11; DstPos[1] = 7; DstPos[2] = 3; DstMask[0] = 0xF0000; DstMask[1] = 0x0F000; DstMask[2] = 0x00F00; } for (j=0;j<16;++j) { SrcStepX[j] = ((RScaleX * (j+1)) >> 4) - ((RScaleX * j) >> 4); SrcStepY[j] = ((RScaleY * (j+1)) >> 4) - ((RScaleY * j) >> 4); } if (This->DstLeftRight) { DstStepX = -DstStepX; DstStepX2 = -DstStepX2; if (This->DstBPP < 8) dy = 8 - This->DstBPP; } if (This->SwapXY) { SwapInt(&DstUVX2,&DstUVY2); SwapInt(&DstStepX,&DstStepY); SwapInt(&DstStepX2,&DstStepY2); } cR = (DitherMask[0] >> 1); cG = (DitherMask[1] >> 1); cB = (DitherMask[2] >> 1); Y=U=V=0; if (This->Src.Flags & PF_INVERTED) SrcInvert = (This->Src.BitCount>=32?0:(1 << This->Src.BitCount))-1; if (This->Dst.Flags & PF_INVERTED) DstInvert = (This->Dst.BitCount>=32?0:(1 << This->Dst.BitCount))-1; for (y=0;yRGB cR += ((Src[0][sx]+YAdd-16)*0x2568 + 0x3343*(Src[2][sx>>SrcUVX2]-128)) /0x2000; cG += ((Src[0][sx]+YAdd-16)*0x2568 - 0x0c92*(Src[1][sx>>SrcUVX2]-128) - 0x1a1e*(Src[2][sx>>SrcUVX2]-128)) /0x2000; cB += ((Src[0][sx]+YAdd-16)*0x2568 + 0x40cf*(Src[1][sx>>SrcUVX2]-128)) /0x2000; cR=SAT(cR); cG=SAT(cG); cB=SAT(cB); break; case 11: //Packed YUV->RGB cR += ((Src[0][sx*2]+YAdd-16)*0x2568 + 0x3343*(Src[2][4*(sx>>1)]-128)) /0x2000; cG += ((Src[0][sx*2]+YAdd-16)*0x2568 - 0x0c92*(Src[1][4*(sx>>1)]-128) - 0x1a1e*(Src[2][4*(sx>>1)]-128)) /0x2000; cB += ((Src[0][sx*2]+YAdd-16)*0x2568 + 0x40cf*(Src[1][4*(sx>>1)]-128)) /0x2000; cR=SAT(cR); cG=SAT(cG); cB=SAT(cB); break; case 12: //Planar YUV->YUV Y += Src[0][sx]; U += Src[1][sx>>SrcUVX2]; V += Src[2][sx>>SrcUVX2]; //Y=SAT(Y); //U=SAT(U); //V=SAT(V); break; case 13: //Packed YUV->YUV Y += Src[0][sx*2]; U += Src[1][4*(sx>>1)]; V += Src[2][4*(sx>>1)]; //Y=SAT(Y); //U=SAT(U); //V=SAT(V); break; case 1: //Pal1->RGB p = &SrcPalette[ ((Src[0][sx>>3] >> ((~sx)&7)) & 1) ^ SrcInvert]; cR += p->c.r; cG += p->c.g; cB += p->c.b; cR=SAT(cR); cG=SAT(cG); cB=SAT(cB); break; case 2: //Pal2->RGB p = &SrcPalette[ ((Src[0][sx>>2] >> (((~sx)&3)*2)) & 3) ^ SrcInvert]; cR += p->c.r; cG += p->c.g; cB += p->c.b; cR=SAT(cR); cG=SAT(cG); cB=SAT(cB); break; case 3: //Pal4->RGB p = &SrcPalette[ ((Src[0][sx>>1] >> (((~sx)&1)*4)) & 15) ^ SrcInvert]; cR += p->c.r; cG += p->c.g; cB += p->c.b; cR=SAT(cR); cG=SAT(cG); cB=SAT(cB); break; case 4: //Pal8->RGB p = &SrcPalette[Src[0][sx] ^ SrcInvert]; cR += p->c.r; cG += p->c.g; cB += p->c.b; cR=SAT(cR); cG=SAT(cG); cB=SAT(cB); break; case 5: //RGB8->RGB v = Src[0][sx]; v ^= SrcInvert; v <<= 8; cR += (v & SrcMask[0]) >> SrcPos[0]; cG += (v & SrcMask[1]) >> SrcPos[1]; cB += (v & SrcMask[2]) >> SrcPos[2]; cR=SAT(cR); cG=SAT(cG); cB=SAT(cB); break; case 6: //RGB16->RGB v = ((uint16_t*)Src[0])[sx]; v ^= SrcInvert; v <<= 8; cR += (v & SrcMask[0]) >> SrcPos[0]; cG += (v & SrcMask[1]) >> SrcPos[1]; cB += (v & SrcMask[2]) >> SrcPos[2]; cR=SAT(cR); cG=SAT(cG); cB=SAT(cB); break; case 7: //RGB24->RGB v = Src[0][sx*3] | (Src[0][sx*3+1] << 8) | (Src[0][sx*3+2] << 16); v ^= SrcInvert; v <<= 8; cR += (v & SrcMask[0]) >> SrcPos[0]; cG += (v & SrcMask[1]) >> SrcPos[1]; cB += (v & SrcMask[2]) >> SrcPos[2]; cR=SAT(cR); cG=SAT(cG); cB=SAT(cB); break; default: //RGB32->RGB v = ((uint32_t*)Src[0])[sx]; v ^= SrcInvert; v <<= 8; cR += (v & SrcMask[0]) >> SrcPos[0]; cG += (v & SrcMask[1]) >> SrcPos[1]; cB += (v & SrcMask[2]) >> SrcPos[2]; cR=SAT(cR); cG=SAT(cG); cB=SAT(cB); break; } q = Dst[0]+(dx >> 3); switch (DstType) { case 10: //RGB->Planar YUV Y = ((2105 * cR) + (4128 * cG) + (802 * cB))/0x2000 + 16; U = (-(1212 * cR) - (2384 * cG) + (3596 * cB))/0x2000 + 128; V = ((3596 * cR) - (3015 * cG) - (582 * cB))/0x2000 + 128; *q = (uint8_t)Y; Dst[1][dx2 >> 3] = (uint8_t)U; Dst[2][dx2 >> 3] = (uint8_t)V; if ((x & 1) || DstUVX2==0) dx2+=DstStepX2; cR=cG=cB=0; break; case 11: //RGB->Packed YUV Y = ((2105 * cR) + (4128 * cG) + (802 * cB))/0x2000 + 16; U = (-(1212 * cR) - (2384 * cG) + (3596 * cB))/0x2000 + 128; V = ((3596 * cR) - (3015 * cG) - (582 * cB))/0x2000 + 128; Y=SAT(Y); U=SAT(U); V=SAT(V); *q = (uint8_t)Y; Dst[1][4*(dx >> 5)] = (uint8_t)U; Dst[2][4*(dx >> 5)] = (uint8_t)V; cR=cG=cB=0; break; case 12: //YUV->Planar YUV Y += YAdd; Y=SAT(Y); *q = (uint8_t)Y; Dst[1][dx2 >> 3] = (uint8_t)U; Dst[2][dx2 >> 3] = (uint8_t)V; if ((x & 1) || DstUVX2==0) dx2+=DstStepX2; Y=U=V=0; break; case 13: //YUV->Packed YUV Y += YAdd; Y=SAT(Y); *q = (uint8_t)Y; Dst[1][4*(dx >> 5)] = (uint8_t)U; Dst[2][4*(dx >> 5)] = (uint8_t)V; Y=U=V=0; break; case 1: //RGB->Pal1 w = ((cR << DstPos[0]) & DstMask[0]) | ((cG << DstPos[1]) & DstMask[1]) | ((cB << DstPos[2]) & DstMask[2]); wp = PalLookUp + (w >> 8)*4; w = wp[0]; w ^= DstInvert; *q &= ~(1 << ((~dx)&7)); *q |= (w << ((~dx)&7)); if (PalDither) { cR -= wp[1]; cG -= wp[2]; cB -= wp[3]; } else cR=cG=cB=0; break; case 2: //RGB->Pal2 w = ((cR << DstPos[0]) & DstMask[0]) | ((cG << DstPos[1]) & DstMask[1]) | ((cB << DstPos[2]) & DstMask[2]); wp = PalLookUp + (w >> 8)*4; w = wp[0]; w ^= DstInvert; *q &= ~(3 << ((~dx)&6)); *q |= (w << ((~dx)&6)); if (PalDither) { cR -= wp[1]; cG -= wp[2]; cB -= wp[3]; } else cR=cG=cB=0; break; case 3: //RGB->Pal4 w = ((cR << DstPos[0]) & DstMask[0]) | ((cG << DstPos[1]) & DstMask[1]) | ((cB << DstPos[2]) & DstMask[2]); wp = PalLookUp + (w >> 8)*4; w = wp[0]; w ^= DstInvert; *q &= ~(15 << ((~dx)&4)); *q |= (w << ((~dx)&4)); if (PalDither) { cR -= wp[1]; cG -= wp[2]; cB -= wp[3]; } else cR=cG=cB=0; break; case 14: //YUV->Pal8 w = ((Y << DstPos[0]) & DstMask[0]) | ((U << DstPos[1]) & DstMask[1]) | ((V << DstPos[2]) & DstMask[2]); wp = PalLookUp + (w >> 8)*4; w = wp[0]; w ^= DstInvert; *q = (uint8_t)w; if (PalDither) { Y -= wp[1]; U -= wp[2]; V -= wp[3]; } else Y=U=V=0; break; case 4: //RGB->Pal8 w = ((cR << DstPos[0]) & DstMask[0]) | ((cG << DstPos[1]) & DstMask[1]) | ((cB << DstPos[2]) & DstMask[2]); wp = PalLookUp + (w >> 8)*4; w = wp[0]; w ^= DstInvert; *q = (uint8_t)w; if (PalDither) { cR -= wp[1]; cG -= wp[2]; cB -= wp[3]; } else cR=cG=cB=0; break; case 5: //RGB->RGB8 w = ((cR << DstPos[0]) & DstMask[0]) | ((cG << DstPos[1]) & DstMask[1]) | ((cB << DstPos[2]) & DstMask[2]); *q = (uint8_t)((w >> 8) ^ DstInvert); cR &= DitherMask[0]; cG &= DitherMask[1]; cB &= DitherMask[2]; break; case 6: //RGB->RGB16 w = ((cR << DstPos[0]) & DstMask[0]) | ((cG << DstPos[1]) & DstMask[1]) | ((cB << DstPos[2]) & DstMask[2]); *(uint16_t*)q = (uint16_t)((w >> 8) ^ DstInvert); cR &= DitherMask[0]; cG &= DitherMask[1]; cB &= DitherMask[2]; break; case 7: //RGB->RGB24 w = ((cR << DstPos[0]) & DstMask[0]) | ((cG << DstPos[1]) & DstMask[1]) | ((cB << DstPos[2]) & DstMask[2]); w >>= 8; w ^= DstInvert; q[0] = (uint8_t)(w); q[1] = (uint8_t)(w >> 8); q[2] = (uint8_t)(w >> 16); cR &= DitherMask[0]; cG &= DitherMask[1]; cB &= DitherMask[2]; break; default: //RGB->RGB32 w = ((cR << DstPos[0]) & DstMask[0]) | ((cG << DstPos[1]) & DstMask[1]) | ((cB << DstPos[2]) & DstMask[2]); *(uint32_t*)q = (w >> 8) ^ DstInvert; cR &= DitherMask[0]; cG &= DitherMask[1]; cB &= DitherMask[2]; break; } } dy += DstStepY; if ((y & 1) || DstUVY2==0) dy2 += DstStepY2; sy0 = sy; sy += SrcStepY[y&15]; Src[0] += SrcPitch * (sy - sy0); Src[1] += (SrcPitch >> SrcUVPitch2) * ((sy >> SrcUVY2) - (sy0 >> SrcUVY2)); Src[2] += (SrcPitch >> SrcUVPitch2) * ((sy >> SrcUVY2) - (sy0 >> SrcUVY2)); } } #endif static bool_t BlitCompile(blit_soft* p, const pixel* NewDst,const pixel* NewSrc, const blitfx* NewFX,bool_t NewOnlyDiff) { int i; bool_t Gray,SrcPlanarYUV,DstPlanarYUV; if (EqPixel(&p->Dst,NewDst) && EqPixel(&p->Src,NewSrc) && EqBlitFX(&p->FX,NewFX) && NewOnlyDiff == p->OnlyDiff) return p->Entry != NULL; CodeStart(&p->Code); // defaults p->Caps = VC_BRIGHTNESS|VC_DITHER|VC_SATURATION|VC_CONTRAST|VC_RGBADJUST; p->SrcAlignPos = 2; p->DstAlignPos = 2; p->DstAlignSize = 2; p->Dst = *NewDst; p->Src = *NewSrc; p->FX = *NewFX; p->OnlyDiff = (boolmem_t)NewOnlyDiff; p->SwapXY = (boolmem_t)((p->FX.Direction & DIR_SWAPXY) != 0); p->DstLeftRight = (boolmem_t)((p->FX.Direction & DIR_MIRRORLEFTRIGHT) != 0); p->DstUpDown = (boolmem_t)((p->FX.Direction & DIR_MIRRORUPDOWN) != 0); p->SrcUpDown = (boolmem_t)((p->FX.Flags & BLITFX_AVOIDTEARING) && !p->SwapXY && p->DstUpDown != ((p->FX.Flags & BLITFX_VMEMUPDOWN) != 0)); if (p->SrcUpDown) p->DstUpDown = (boolmem_t)!p->DstUpDown; // it's faster using slices with rotation (not just with AVOIDTEARING) // probably because of ram page trashing (during vertical writing) p->Slices = (boolmem_t)(p->SwapXY != ((p->FX.Flags & BLITFX_VMEMROTATED) != 0)); p->SlicesReverse = (boolmem_t)((p->SwapXY ? p->DstUpDown : p->DstLeftRight) == ((p->FX.Flags & BLITFX_VMEMUPDOWN) != 0)); Gray = (p->Dst.Flags & PF_PALETTE) && (p->Dst.BitCount == 4 || p->Dst.BitCount == 2); p->RScaleX = CalcRScale(p->FX.ScaleX,Gray); p->RScaleY = CalcRScale(p->FX.ScaleY,Gray); // important these integeres should be 1 or 0 p->DstHalfX = p->SrcHalfX = p->RScaleX == 32; p->DstHalfY = p->SrcHalfY = p->RScaleY == 32; p->DstDoubleX = p->SrcDoubleX = p->RScaleX == 8; p->DstDoubleY = p->SrcDoubleY = p->RScaleY == 8; if (p->SwapXY) { SwapInt(&p->DstHalfX,&p->DstHalfY); SwapInt(&p->DstDoubleX,&p->DstDoubleY); } p->SrcBPP = GetBPP(&p->Src); p->SrcBPP2 = -3; for (i=p->SrcBPP;i>1;i>>=1) ++p->SrcBPP2; p->DstBPP = GetBPP(&p->Dst); p->DstBPP2 = -3; for (i=p->DstBPP;i>1;i>>=1) ++p->DstBPP2; p->SrcYUV = (boolmem_t)AnyYUV(&p->Src); p->SrcPalette = DefaultPal(&p->Src); p->DstPalette = DefaultPal(&p->Dst); free(p->LookUp_Data); p->LookUp_Data = NULL; p->ColorLookup = (boolmem_t)((p->FX.Flags & BLITFX_COLOR_LOOKUP) != 0); #ifdef ARM p->ARM5 = (boolmem_t)((QueryPlatform(PLATFORM_CAPS) & CAPS_ARM_5E)!=0); p->WMMX = (boolmem_t)((QueryPlatform(PLATFORM_CAPS) & CAPS_ARM_WMMX)!=0 && !QueryAdvanced(ADVANCED_NOWMMX) && (p->Dst.Flags & PF_16ALIGNED) && (p->Src.Flags & PF_16ALIGNED)); p->QAdd = (boolmem_t)((QueryPlatform(PLATFORM_CAPS) & CAPS_ARM_5E)!=0 && !p->DstPalette && !p->WMMX && !(p->FX.Flags & BLITFX_DITHER) && !p->ColorLookup); #endif CalcColor(p); if (p->DstPalette) BuildPalLookUp(p,p->SrcYUV); p->ArithStretch = (boolmem_t)((p->FX.Flags & BLITFX_ARITHSTRETCHALWAYS) != 0); if ((p->FX.Flags & BLITFX_ARITHSTRETCH50) && p->RScaleX==32 && p->RScaleY==32) p->ArithStretch = 1; if (p->DstPalette) p->ArithStretch = 0; SrcPlanarYUV = PlanarYUV(&p->Src,&p->SrcUVX2,&p->SrcUVY2,&p->SrcUVPitch2); DstPlanarYUV = PlanarYUV(&p->Dst,&p->DstUVX2,&p->DstUVY2,&p->DstUVPitch2); p->DirX = p->DstLeftRight ? -1:1; for (i=0;i<3;++i) { p->DstSize[i] = BitMaskSize(p->Dst.BitMask[i]); p->DstPos[i] = BitMaskPos(p->Dst.BitMask[i]); p->SrcSize[i] = BitMaskSize(p->Src.BitMask[i]); p->SrcPos[i] = BitMaskPos(p->Src.BitMask[i]); } #if defined(ARM) if ((p->Dst.Flags & PF_RGB) && p->Dst.BitCount==16 && !p->SrcYUV) Any_RGB_RGB(p); if (!p->SrcYUV && DstPlanarYUV && p->RScaleX==16 && p->RScaleY==16) Fix_Any_YUV(p); #endif #if (defined(ARM) || defined(SH3) || defined(MIPS)) && defined(CONFIG_DYNCODE) if (SrcPlanarYUV) { #if defined(ARM) if (DstPlanarYUV && p->RScaleX==16 && p->RScaleY==16 && ((p->SrcUVX2==p->DstUVX2 && p->SrcUVY2==p->DstUVY2 && !(p->SwapXY && p->SrcUVX2 != p->SrcUVY2)) || (p->DstUVX2==1 && p->DstUVY2==1))) Fix_Any_YUV(p); if (PackedYUV(&p->Dst) && p->RScaleX==16 && p->RScaleY==16 && p->SrcUVY2<2 && p->SrcUVX2<2) Fix_PackedYUV_YUV(p); #endif if ((p->Dst.Flags & (PF_RGB|PF_PALETTE)) && (p->Dst.BitCount == 8 || p->Dst.BitCount == 16 || p->Dst.BitCount == 32)) { #if defined(ARM) if (p->Dst.BitCount == 16 && p->WMMX && PlanarYUV420(&p->Src) && (p->RScaleX==16 || p->RScaleX==8 || p->RScaleX==32) && (p->RScaleY==16 || p->RScaleY==8 || p->RScaleY==32)) WMMXFix_RGB_UV(p); else if ((p->Dst.BitCount == 16 || p->Dst.BitCount==32) && p->SrcUVX2==1 && p->SrcUVY2==1 && p->RScaleX == 16 && p->RScaleY == 16) Fix_RGB_UV(p); else if (p->Dst.BitCount == 16 && p->SrcUVX2==1 && p->SrcUVY2==1 && (p->RScaleX == 8 || p->RScaleX == 16) && (p->RScaleY == 8 || p->RScaleY == 16) && !p->ArithStretch) Fix_RGB_UV(p); else if (p->Dst.BitCount == 16 && p->SrcUVX2==1 && p->SrcUVY2==1 && p->RScaleX == 32 && p->RScaleY == 32) Half_RGB_UV(p); else Stretch_RGB_UV(p); #else #if !defined(MIPS) if (p->SrcUVX2==1 && p->SrcUVY2==1 && (p->RScaleX == 8 || p->RScaleX == 16) && (p->RScaleY == 8 || p->RScaleY == 16)) #endif Fix_RGB_UV(p); #endif } else if (Gray) Fix_Gray_UV(p); } #endif CodeBuild(&p->Code); if (p->Code.Size) p->Entry = (blitsoftentry)p->Code.Code; else p->Entry = NULL; #if defined(_M_IX86) && !defined(TARGET_SYMBIAN) if (p->FX.Direction==0 && p->RScaleX==16 && p->RScaleY==16) { uint32_t In = DefFourCC(&p->Src); uint32_t Out = DefFourCC(&p->Dst); #ifdef CONFIG_CONTEXT int Caps = QueryPlatform(PLATFORM_CAPS); #else int Caps = CPUCaps(); #endif const blitmmx* i; for (i=BlitMMX;i->In;++i) if (i->In==In && i->Out==Out) { p->Caps &= ~VC_DITHER; if (AnyYUV(&p->Src)) CalcYUVMMX(p); if (Caps & CAPS_X86_MMX2) p->Entry = i->Func[1]; else if (Caps & CAPS_X86_3DNOW) p->Entry = i->Func[2]; else p->Entry = i->Func[0]; break; } } #endif if (!p->Entry) { // use YUV internal calulaction? bool_t YUV = (AnyYUV(&p->Dst) || (p->Dst.BitCount==8 && p->Dst.Flags & PF_PALETTE)) && p->SrcYUV; p->DstType = UniversalType(&p->Dst,YUV); p->SrcType = UniversalType(&p->Src,YUV); #if defined(_M_IX86) || !defined(CONFIG_DYNCODE) || defined(BLITTEST) if (p->SrcType>=0 && p->DstType>=0) { // universal if (AnyYUV(&p->Dst)) p->Caps &= ~VC_DITHER; else p->Caps = VC_BRIGHTNESS | VC_DITHER; p->Entry = BlitUniversal; } if (p->SrcUVX2 == 1 && !p->SwapXY && !p->DstLeftRight && p->SrcType == 10) { if (p->DstType == 8 && p->Dst.BitMask[0] == 0xFF0000 && p->Dst.BitMask[1] == 0x00FF00 && p->Dst.BitMask[2] == 0x0000FF) { p->Caps = VC_BRIGHTNESS; p->Entry = Blit_PYUV_RGB32; } if (p->DstType == 6 && p->Dst.BitMask[0] == 0xF800 && p->Dst.BitMask[1] == 0x07E0 && p->Dst.BitMask[2] == 0x001F) { p->Caps = VC_BRIGHTNESS; p->Entry = Blit_PYUV_RGB16; } } if (p->SrcUVX2 == p->DstUVX2 && !p->SwapXY && !p->DstLeftRight && p->RScaleX == 16 && p->RScaleY==16 && p->SrcType == 12 && p->DstType == 12) { CalcYUVLookUp(p); p->Caps &= ~VC_DITHER; p->Entry = Blit_PYUV_PYUV; } #endif if (p->SrcUVX2 == p->DstUVX2 && p->SrcUVY2 == p->DstUVY2 && !p->SwapXY && !p->DstLeftRight && (p->RScaleX != 16 || p->RScaleY != 16) && (p->RScaleX == 16 || p->RScaleX == 8 || p->RScaleX == 32 || p->RScaleX == 4 || p->RScaleX == 64) && (p->RScaleY == 16 || p->RScaleY == 8 || p->RScaleY == 32 || p->RScaleX == 4 || p->RScaleX == 64) && p->SrcType == 12 && p->DstType == 12 && !p->FX.Saturation && !p->FX.Contrast && !p->FX.RGBAdjust[0] && !p->FX.RGBAdjust[1] && !p->FX.RGBAdjust[2] && !p->FX.Brightness) { p->Caps = 0; p->Entry = Blit_PYUV_PYUV_2; } } return p->Entry != NULL; } blitpack* BlitCreate(const video* Dst, const video* Src, const blitfx* FX, int* OutCaps) { blitfx CopyFX; bool_t Gray; blitpack* p = BlitAlloc(); if (!p) return NULL; if (!FX) { memset(&CopyFX,0,sizeof(CopyFX)); CopyFX.ScaleX = SCALE_ONE; CopyFX.ScaleY = SCALE_ONE; FX = &CopyFX; } if (!BlitCompile(&p->Code[0],&Dst->Pixel,&Src->Pixel,FX,0) || ((FX->Flags & BLITFX_ONLYDIFF) && !BlitCompile(&p->Code[1],&Dst->Pixel,&Src->Pixel,FX,1))) { BlitRelease(p); return NULL; } p->FX = *FX; p->Dst = *Dst; p->Src = *Src; Gray = (Dst->Pixel.Flags & PF_PALETTE) && (Dst->Pixel.BitCount == 4 || Dst->Pixel.BitCount == 2); p->RScaleX = CalcRScale(FX->ScaleX,Gray); p->RScaleY = CalcRScale(FX->ScaleY,Gray); if (OutCaps) *OutCaps = p->Code[0].Caps; return p; } static NOINLINE int CMul(blit_soft* p, int64_t* r, int64_t v, bool_t UV) { int m; if (UV) { m = p->FX.Saturation; if (m<0) m >>= 1; // adjust negtive interval: -128..0 -> -64..0 m = 4*m+256; v *= m; if (v<0) v -= 128; else v += 128; v >>= 8; } m = p->FX.Contrast; if (m<0) m >>= 1; // adjust negtive interval: -128..0 -> -64..0 m = 4*m + 256; v *= m; if (v<0) v -= 128; else v += 128; v >>= 8; if (r) *r = v; return (int)v; } static NOINLINE int CAdd(blit_soft* p, int64_t* r, int64_t v, int v0, int vUV) { int m; m = p->FX.Saturation; if (m<0) m >>= 1; // adjust negtive interval: -128..0 -> -64..0 m = 4*m+256; v += ((256 - m) * vUV) >> 1; m = p->FX.Contrast; if (m<0) m >>= 1; // adjust negtive interval: -128..0 -> -64..0 m = 4*m + 256; v += ((256 - m)*(v0 - v)) >> 8; if (r) *r = v; return (int)v; } static INLINE bool_t InRange32(int64_t v) { return v >= -MAX_INT && v <= MAX_INT; } static INLINE uint8_t RotateRight8(int v,int Bits) { Bits &= 7; return (uint8_t)(((v >> Bits) & ((1 << (8-Bits))-1)) | (v << (8-Bits))); } void CalcPalYUVLookUp(blit_soft* p) { p->LookUp_Size = 256+4+256+256; p->LookUp_Data = malloc(p->LookUp_Size); if (p->LookUp_Data) { int v,i,n = 1<Src.BitCount; rgb* Pal = p->Src.Palette; uint8_t* LookUp = p->LookUp_Data; for (i=0;iFX.Brightness; LookUp[i] = (uint8_t)SAT(v); v = (-1212*Pal[i].c.r -2384*Pal[i].c.g + 3596*Pal[i].c.b)/0x2000 + 128; LookUp[i+256+4] = (uint8_t)SAT(v); v = (3596*Pal[i].c.r -3015*Pal[i].c.g -582*Pal[i].c.b)/0x2000 + 128; LookUp[i+256+4+256] = (uint8_t)SAT(v); } } } void CalcPalRGBLookUp(blit_soft* p) { int i,n = 1<Src.BitCount; int w = p->Dst.BitCount>>3; if (w<2 || w>4) return; if (w==3) w=4; p->LookUp_Size = w*n; p->LookUp_Data = malloc(p->LookUp_Size); if (p->LookUp_Data) { uint16_t* Pal = p->LookUp_Data; for (i=0;iSrc.Palette[i].v,&p->Dst); if (w==2) *(Pal++) = (uint16_t)c; else { *(uint32_t*)Pal = c; Pal += 2; } } } } void CalcYUVLookUp(blit_soft* p) { if (p->FX.Saturation || p->FX.Contrast || (p->FX.Flags & BLITFX_DITHER) || p->FX.RGBAdjust[0] || p->FX.RGBAdjust[1] || p->FX.RGBAdjust[2]) { p->LookUp_Size = 3*256+4; p->LookUp_Data = malloc(p->LookUp_Size); if (p->LookUp_Data) { int Ofs = (p->FX.Flags & BLITFX_DITHER)?-2:0; int n; uint8_t* i=p->LookUp_Data; for (n=0;n<256+4;++n,++i) { int y = CMul(p,NULL,n+Ofs+p->FX.Brightness+p->FX.RGBAdjust[1]-128,0)+128; *i = (uint8_t)SAT(y); } for (n=0;n<256;++n,++i) { int u = CMul(p,NULL,n-128-p->FX.RGBAdjust[1]+p->FX.RGBAdjust[2],1)+128; *i = (uint8_t)SAT(u); } for (n=0;n<256;++n,++i) { int v = CMul(p,NULL,n-128-p->FX.RGBAdjust[1]+p->FX.RGBAdjust[0],1)+128; *i = (uint8_t)SAT(v); } } } } void CalcLookUp(blit_soft* p, bool_t Dither) { #if defined(ARM) const int Safe = 512; // 4*256:Y + (256+Safe*2)*n:SAT + 4*256:U + 4*256:V int SatOfsR = 4*256+Safe; int SatOfsG = 4*256+Safe; int SatOfsB = 4*256+Safe; p->LookUp_Size = 4*256*3 + (256+Safe*2); if (Dither) { if (p->DstSize[1] == p->DstSize[0]) SatOfsG = SatOfsR; else { p->LookUp_Size += (256+Safe*2); SatOfsG = SatOfsR + (256+Safe*2); } if (p->DstSize[2] == p->DstSize[0]) SatOfsB = SatOfsR; else if (p->DstSize[2] == p->DstSize[1]) SatOfsB = SatOfsG; else { p->LookUp_Size += (256+Safe*2); SatOfsB = SatOfsG + (256+Safe*2); } } p->LookUp_U = (p->LookUp_Size - 4*256*2) >> 2; p->LookUp_V = (p->LookUp_Size - 4*256) >> 2; p->LookUp_Data = malloc(p->LookUp_Size); if (p->LookUp_Data) { int v; int32_t* YMul = (int32_t*)p->LookUp_Data; int32_t* UMul = (int32_t*)p->LookUp_Data + p->LookUp_U; int32_t* VMul = (int32_t*)p->LookUp_Data + p->LookUp_V; uint8_t* SatR = (uint8_t*)p->LookUp_Data + SatOfsR; uint8_t* SatG = (uint8_t*)p->LookUp_Data + SatOfsG; uint8_t* SatB = (uint8_t*)p->LookUp_Data + SatOfsB; memset(p->LookUp_Data,0,p->LookUp_Size); for (v=0;v<256;++v) { *YMul = (p->_YMul * v) << LOOKUP_FIX; *UMul = (((p->_BUMul * v + p->_BAdd + (SatOfsB << 16)) << LOOKUP_FIX) & 0xFFFF0000) | (((p->_GUMul * v) >> (16-LOOKUP_FIX)) & 0xFFFF); //BUMul | GUMul *VMul = (((p->_RVMul * v + p->_RAdd + (SatOfsR << 16)) << LOOKUP_FIX) & 0xFFFF0000) | (((p->_GVMul * v + p->_GAdd + (SatOfsG << 16)) >> (16-LOOKUP_FIX)) & 0xFFFF); //BUMul | GUMul ++YMul; ++UMul; ++VMul; } if (Dither) { for (v=-Safe;v<256+Safe;++v) { SatR[v] = RotateRight8(SAT(v),8-p->DstSize[0]); SatG[v] = RotateRight8(SAT(v),8-p->DstSize[1]); SatB[v] = RotateRight8(SAT(v),8-p->DstSize[2]); } } else { for (v=-Safe;v<256+Safe;++v) SatR[v] = (uint8_t)SAT(v); } } #elif defined(MIPS) const int Safe = 256; // 256:Y + (128+Safe*2)*n:SAT + 8*256:U + 8*256:V int SatOfsR = 256+Safe; int SatOfsG = 256+Safe; int SatOfsB = 256+Safe; p->LookUp_Size = 256+8*256*2 + (128+Safe*2); if (p->DstSize[1] == p->DstSize[0]) SatOfsG = SatOfsR; else { p->LookUp_Size += (128+Safe*2); SatOfsG = SatOfsR + (128+Safe*2); } if (p->DstSize[2] == p->DstSize[0]) SatOfsB = SatOfsR; else if (p->DstSize[2] == p->DstSize[1]) SatOfsB = SatOfsG; else { p->LookUp_Size += (128+Safe*2); SatOfsB = SatOfsG + (128+Safe*2); } p->LookUp_U = p->LookUp_Size - 8*256*2; p->LookUp_V = p->LookUp_Size - 8*256; p->LookUp_Data = malloc(p->LookUp_Size); if (p->LookUp_Data) { int v; uint8_t* YMul = (uint8_t*)p->LookUp_Data; uint8_t** UMul = (uint8_t**)((uint8_t*)p->LookUp_Data + p->LookUp_U); uint8_t** VMul = (uint8_t**)((uint8_t*)p->LookUp_Data + p->LookUp_V); uint8_t* SatR = (uint8_t*)p->LookUp_Data + SatOfsR; uint8_t* SatG = (uint8_t*)p->LookUp_Data + SatOfsG; uint8_t* SatB = (uint8_t*)p->LookUp_Data + SatOfsB; memset(p->LookUp_Data,0,p->LookUp_Size); for (v=0;v<256;++v) { *YMul = (uint8_t)((p->_YMul * v) >> 17); UMul[0] = SatB + ((p->_BUMul * v + p->_BAdd) >> 17); *(int*)&UMul[1] = (p->_GUMul * v) >> 17; VMul[0] = SatR + ((p->_RVMul * v + p->_RAdd) >> 17); VMul[1] = SatG + ((p->_GVMul * v + p->_GAdd) >> 17); ++YMul; UMul+=2; VMul+=2; } for (v=-Safe;v<128+Safe;++v) { SatR[v] = (uint8_t)(SAT(v*2) >> (8-p->DstSize[0])); SatG[v] = (uint8_t)(SAT(v*2) >> (8-p->DstSize[1])); SatB[v] = (uint8_t)(SAT(v*2) >> (8-p->DstSize[2])); } } #elif defined(SH3) const int Safe = 256; //LookUp_Data: // [128..255|0..127]Y + 384 empty + 4*[128..255|0..127]U + 4*[128..255|0..127]V + (128+Safe*2)*n:SAT int SatOfsR = 256+384+4*256*2+Safe; int SatOfsG = 256+384+4*256*2+Safe; int SatOfsB = 256+384+4*256*2+Safe; p->LookUp_Size = 256+384+4*256*2 + (128+Safe*2); if (p->DstSize[1] == p->DstSize[0]) SatOfsG = SatOfsR; else { p->LookUp_Size += (128+Safe*2); SatOfsG = SatOfsR + (128+Safe*2); } if (p->DstSize[2] == p->DstSize[0]) SatOfsB = SatOfsR; else if (p->DstSize[2] == p->DstSize[1]) SatOfsB = SatOfsG; else { p->LookUp_Size += (128+Safe*2); SatOfsB = SatOfsG + (128+Safe*2); } p->LookUp_U = 128 + 384 + 4*128; p->LookUp_V = 128 + 384 + 4*256 + 4*128; p->LookUp_Data = malloc(p->LookUp_Size + Safe*2); //additional safe if (p->LookUp_Data) { int v; int8_t* YMul = (int8_t*)p->LookUp_Data; int16_t* UMul = (int16_t*)((uint8_t*)p->LookUp_Data + 256 + 384); int16_t* VMul = (int16_t*)((uint8_t*)p->LookUp_Data + 256 + 384 + 4*256); int8_t* SatR = (int8_t*)p->LookUp_Data + SatOfsR; int8_t* SatG = (int8_t*)p->LookUp_Data + SatOfsG; int8_t* SatB = (int8_t*)p->LookUp_Data + SatOfsB; memset(p->LookUp_Data,0,p->LookUp_Size + Safe*2); for (v=0;v<256;++v) { *YMul = (int8_t)(((p->_YMul * ((v+128)&255)) >> 17)-128); UMul[0] = (int16_t)(SatOfsB + ((p->_BUMul * ((v+128)&255) + p->_BAdd) >> 17)); UMul[1] = (int16_t)((p->_GUMul * ((v+128)&255)) >> 17); VMul[0] = (int16_t)(SatOfsR + ((p->_RVMul * ((v+128)&255) + p->_RAdd) >> 17)); VMul[1] = (int16_t)(SatOfsG + ((p->_GVMul * ((v+128)&255) + p->_GAdd) >> 17)); ++YMul; UMul+=2; VMul+=2; } for (v=-Safe;v<128+Safe;++v) { SatR[v] = (int8_t)(SAT(v*2) >> (8-p->DstSize[0])); SatG[v] = (int8_t)(SAT(v*2) >> (8-p->DstSize[1])); SatB[v] = (int8_t)(SAT(v*2) >> (8-p->DstSize[2])); } } #endif } void CalcColor(blit_soft* p) { #ifdef ARM if (p->QAdd) { //saturation: signed 32bit / 3 (qdadd will triple) int64_t YMul; int64_t RVMul; int64_t RAdd; int64_t GUMul; int64_t GVMul; int64_t GAdd; int64_t BUMul; int64_t BAdd; CMul(p,&YMul,0x63C000,0); CMul(p,&RVMul,0x88B2AA,1); CAdd(p,&RAdd,(p->FX.Brightness+p->FX.RGBAdjust[0])*0x63C000 - (int64_t)0x75400000,0x2AAAAAAA,0x88B2AA); CMul(p,&GUMul,-0x218555,1); CMul(p,&GVMul,-0x45A555,1); CAdd(p,&GAdd,(p->FX.Brightness+p->FX.RGBAdjust[1])*0x63C000 + (int64_t)0x02AEAAAA,0x2AAAAAAA,-0x218555-0x45A555); CMul(p,&BUMul,0xACD2AA,1); CAdd(p,&BAdd,(p->FX.Brightness+p->FX.RGBAdjust[2])*0x63C000 - (int64_t)0x87500000,0x2AAAAAAA,0xACD2AA); if (InRange32(YMul*0xF0) && InRange32(RVMul*0x10+RAdd) && InRange32(RVMul*0xF0+RAdd) && InRange32((GVMul+GUMul)*0x10+GAdd) && InRange32((GVMul+GUMul)*0xF0+GAdd) && InRange32(BUMul*0x10+BAdd) && InRange32(BUMul*0xF0+BAdd)) { p->_YMul = (int)YMul; p->_RVMul = (int)RVMul; p->_RAdd = (int)RAdd; p->_GUMul = (int)GUMul; p->_GVMul = (int)GVMul; p->_GAdd = (int)GAdd; p->_BUMul = (int)BUMul; p->_BAdd = (int)BAdd; } else p->QAdd = 0; } if (!p->QAdd) #endif { //saturation: unsigned 24bit p->_YMul = CMul(p,NULL,0x12B40,0); p->_RVMul = CMul(p,NULL,0x19A18,1); p->_RAdd = CAdd(p,NULL,(p->FX.Brightness+p->FX.RGBAdjust[0])*0x12B40 - 0x0DFC000,0x800000,0x19A18); p->_GUMul = CMul(p,NULL,-0x06490,1); p->_GVMul = CMul(p,NULL,-0x0D0F0,1); p->_GAdd = CAdd(p,NULL,(p->FX.Brightness+p->FX.RGBAdjust[1])*0x12B40 + 0x0880C00,0x800000,-0x06490-0x0D0F0); p->_BUMul = CMul(p,NULL,0x20678,1); p->_BAdd = CAdd(p,NULL,(p->FX.Brightness+p->FX.RGBAdjust[2])*0x12B40 - 0x115F000,0x800000,0x20678); } } #if defined(_M_IX86) && !defined(TARGET_SYMBIAN) static void ColMMX(blit_soft* p, int i, int ofs) { int mul = 2048; int m; int o = 128; if (i>0) { m = p->FX.Saturation; if (m<0) m >>= 1; // adjust negtive interval: -128..0 -> -64..0 m = 4*m+256; mul = (mul*m+128) >> 8; ofs += 128 - (m>>1); o = (o*m+128)>>8; } m = p->FX.Contrast; if (m<0) m >>= 1; // adjust negtive interval: -128..0 -> -64..0 m = 4*m + 256; mul = (mul*m+128)>>8; ofs += o - ((m*o)>>8); if ((p->Src.Flags & PF_YUV_PC) && !(p->Dst.Flags & PF_YUV_PC)) { if (i==0) //y [0..255]->[16..235] { mul = (mul*219+127)/255; ofs = (ofs*219+127)/255+16; } else //uv [0..255]->[16..240] { mul = (mul*224+127)/255; ofs = (ofs*224+127)/255+16; } } else if (!(p->Src.Flags & PF_YUV_PC) && (p->Dst.Flags & PF_YUV_PC)) { if (i==0) //y [16..235]->[0..255] { mul = (mul*255+109)/219; ofs = ((ofs-16)*255+109)/219; } else //uv [16..240]->[0..255] { mul = (mul*255+112)/224; ofs = ((ofs-16)*255+112)/224; } } if (mul<-32768) mul=32768; if (mul>32767) mul=32767; p->Col[i][0][3] = p->Col[i][0][2] = p->Col[i][0][1] = p->Col[i][0][0] = (int16_t)mul; p->Col[i][1][3] = p->Col[i][1][2] = p->Col[i][1][1] = p->Col[i][1][0] = (int16_t)ofs; } static int16_t MulMMX(blit_soft* p, int i, int mul) { int m; if (i>0) { m = p->FX.Saturation; if (m<0) m >>= 1; // adjust negtive interval: -128..0 -> -64..0 m = 4*m+256; mul = (mul*m+128) >> 8; } m = p->FX.Contrast; if (m<0) m >>= 1; // adjust negtive interval: -128..0 -> -64..0 m = 4*m + 256; mul = (mul*m+128)>>8; if (mul<-32768) mul=32768; if (mul>32767) mul=32767; return (int16_t)mul; } void CalcYUVMMX(blit_soft* p) { if (AnyYUV(&p->Dst)) { ColMMX(p,0,p->FX.Brightness+p->FX.RGBAdjust[1]); ColMMX(p,1,p->FX.RGBAdjust[2]-p->FX.RGBAdjust[1]); ColMMX(p,2,p->FX.RGBAdjust[0]-p->FX.RGBAdjust[1]); } else { const int16_t* m = GetYUVToRGB(&p->Src); p->Col[0][0][3] = p->Col[0][0][2] = p->Col[0][0][1] = p->Col[0][0][0] = MulMMX(p,0,m[0]); p->Col[0][1][3] = p->Col[0][1][2] = p->Col[0][1][1] = p->Col[0][1][0] = (int16_t)(m[1]+p->FX.Brightness+p->FX.RGBAdjust[1]); p->Col[1][0][1] = p->Col[1][0][0] = MulMMX(p,1,m[2]); //u_b p->Col[1][0][3] = p->Col[1][0][2] = MulMMX(p,1,m[3]); //u_g p->Col[1][1][3] = p->Col[1][1][2] = p->Col[1][1][1] = p->Col[1][1][0] = (int16_t)(m[4]+p->FX.RGBAdjust[2]-p->FX.RGBAdjust[1]); p->Col[2][0][1] = p->Col[2][0][0] = MulMMX(p,2,m[5]); //v_r p->Col[2][0][3] = p->Col[2][0][2] = MulMMX(p,2,m[6]); //v_g p->Col[2][1][3] = p->Col[2][1][2] = p->Col[2][1][1] = p->Col[2][1][0] = (int16_t)(m[7]+p->FX.RGBAdjust[0]-p->FX.RGBAdjust[1]); } } #endif