/***************************************************************************** * * This program is free software ; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * $Id: blit_arm_yuv.c 375 2005-12-02 09:34:27Z picard $ * * The Core Pocket Media Player * Copyright (c) 2004-2005 Gabor Kovacs * ****************************************************************************/ #include "../common.h" #include "../dyncode/dyncode.h" #include "blit_soft.h" // DstAlignSize 8 // DstAlignPos 8 // SrcAlignPos 8 #if defined(ARM) // R0..R4 temporary // R5 DiffMask (when Diff) // R6 Src2SrcLast (when Diff) // R5 7F7F7F7F (when !Diff and HalfY and !HalfX) // R7 Src // R8 MaskCarry (when Brightness != 0) // R9 Add32 (when Brightness != 0) // R10 EndOfLine (Src) // R11 Dst // R12 DstPitch // R14 SrcPitch typedef struct stack { int Dst[3]; int Src[3]; int EndOfRect; int DstNext[2]; int SrcNext[2]; int SaveR0; int StackFrame[STACKFRAME]; //void* this R0 //char* Dst R1 //char* Src R2 //int DstPitch R3 can be signed int SrcPitch; int Width; int Height; int Src2SrcLast; } stack; static void NOINLINE SetDither(blit_soft* p,int Pos,int Part,int Plane) { if (Plane==0 && (p->FX.Flags & BLITFX_DITHER) && p->SrcYUV) { // dither matrix: // 0 2 // 3 1 int Ofs; if (Pos & 8) Ofs = Part?2:0; else Ofs = Part?1:3; if (Ofs != p->LookUpOfs) { I2C(ADD,R8,R8,Ofs-p->LookUpOfs); p->LookUpOfs = Ofs; } } } static NOINLINE int YUV_4(blit_soft* p, int Reg,int Pos,int Plane) { reg RA = (reg)Reg; reg RB = R4; reg RC = R0; reg RD = R9; if (p->SwapXY) { if (Reg != R0) { if (!p->SrcYUV || (Plane!=0 && (p->HalfX || p->HalfY))) I2C(LDR,R0,SP,OFS(stack,SaveR0)); else I2(LDR,R0,R7); } if (p->LookUp) { I2C(AND,RB,R2,0xFF << Pos); I2C(AND,RC,R0,0xFF << Pos); SetDither(p,Pos,1,Plane); Byte(); I3S(LDR,RB,R8,RB,LSR,Pos); I2C(AND,RD,R1,0xFF << Pos); Byte(); I3S(LDR,RC,R8,RC,LSR,Pos); I3S(MOV,RB,NONE,RB,LSR,-(p->DirX<0?8:16)); I3S(ORR,RB,RB,RC,LSR,-(p->DirX<0?24:0)); I2C(AND,RC,R3,0xFF << Pos); SetDither(p,Pos,0,Plane); Byte(); I3S(LDR,RD,R8,RD,LSR,Pos); Byte(); I3S(LDR,RC,R8,RC,LSR,Pos); I3S(ORR,RB,RB,RD,LSR,-(p->DirX<0?16:8)); I3S(ORR,RB,RB,RC,LSR,-(p->DirX<0?0:24)); } else { I2C(AND,RB,R2,0xFF << Pos); I3S(MOV,RB,NONE,RB,LSR,Pos-(p->DirX<0?8:16)); I2C(AND,RC,R0,0xFF << Pos); I3S(ORR,RB,RB,RC,LSR,Pos-(p->DirX<0?24:0)); I2C(AND,RC,R1,0xFF << Pos); I3S(ORR,RB,RB,RC,LSR,Pos-(p->DirX<0?16:8)); I2C(AND,RC,R3,0xFF << Pos); I3S(ORR,RB,RB,RC,LSR,Pos-(p->DirX<0?0:24)); } RA=RB; } else if (p->DirX<0) { if (p->LookUp) { I2C(AND,RB,RA,0xFF00); SetDither(p,Pos,1,Plane); Byte(); I3S(LDR,RD,R8,RA,LSR,24); Byte(); I3S(LDR,RB,R8,RB,LSR,8); I3S(MOV,RA,NONE,RA,ROR,24); I3S(ORR,RD,RD,RB,LSL,16); SetDither(p,Pos,0,Plane); Byte(); I3S(LDR,RB,R8,RA,LSR,24); I2C(AND,RA,RA,0xFF00); Byte(); I3S(LDR,RA,R8,RA,LSR,8); I3S(ORR,RD,RD,RB,LSL,8); I3S(ORR,RD,RD,RA,LSL,24); RA = RD; } else if (RA == RC) { // only RA and RB I3S(MOV,RB,NONE,RA,LSR,24); I3S(ORR,RB,RB,RA,LSL,24); I3S(MOV,RA,NONE,RA,ROR,16); I3S(MOV,RB,NONE,RB,ROR,16); I3S(ORR,RB,RB,RA,LSR,24); I3S(ORR,RB,RB,RA,LSL,24); I3S(MOV,RA,NONE,RB,ROR,16); } else { I2C(AND,RC,RA,0xFF00); I3S(MOV,RB,NONE,RA,LSL,24); I3S(ORR,RB,RB,RC,LSL,8); I2C(AND,RC,RA,0xFF0000); I3S(ORR,RB,RB,RA,LSR,24); I3S(ORR,RA,RB,RC,LSR,8); } } else { if (p->LookUp) { I2C(AND,RB,RA,0xFF00); SetDither(p,Pos,1,Plane); Byte(); I3S(LDR,RD,R8,RA,LSR,24); Byte(); I3S(LDR,RB,R8,RB,LSR,8); I3S(MOV,RA,NONE,RA,ROR,24); I3S(ORR,RB,RB,RD,LSL,16); SetDither(p,Pos,0,Plane); Byte(); I3S(LDR,RD,R8,RA,LSR,24); I2C(AND,RA,RA,0xFF00); Byte(); I3S(LDR,RA,R8,RA,LSR,8); I3S(ORR,RB,RB,RD,LSL,8); I3S(ORR,RA,RA,RB,LSL,8); } else { RB = R0; RC = R4; } } if (Plane==0 && p->FX.Brightness && !p->LookUp && p->SrcYUV) { if (p->FX.Brightness < 0) { I3(MVN,RB,NONE,RA); RA=RB; } I3(ADD,RC,RA,R9); //add32 I3(BIC,RB,RA,RC); I3(AND,RB,RB,R8); //maskcarry I3S(MOV,RB,NONE,RB,LSR,7); I3S(SUB,RC,RC,RB,LSL,8); I3S(RSB,RB,RB,RB,LSL,8); I3(ORR,RB,RB,RC); if (p->FX.Brightness < 0) I3(MVN,RB,NONE,RB); RA=RB; } return RA; } static NOINLINE void YUV_4X4(blit_soft* p, int Plane) { int y; int UV = Plane != 0; int Rows = 2; int HalfX = UV ? p->HalfX:0; int HalfY = UV ? p->HalfY:0; int SrcUVX = UV?p->SrcUVX2:0; int SrcUVPitch = UV?p->SrcUVPitch2:0; int DstUVPitch = UV?p->DstUVPitch2:0; if (UV) Rows >>= p->SrcUVY2+p->HalfY; I2C(LDR,R7,SP,OFS(stack,Src[Plane])); I2C(LDR,R11,SP,OFS(stack,Dst[Plane])); for (y=0;ySrcBPP2>0) { IMul(R1,R0,p->SrcBPP/8); RWidth = R1; } I3S(ADD,R10,R7,RWidth,LSR,SrcUVX); //end of line //preload if (!p->Slices) { dyninst* PreLoad1; dyninst* PreLoad2; dyninst* PreLoad3; dyninst* PreLoad4; I2C(ADD,R0,R7,32); I3(CMP,NONE,R0,R10); I0P(B,CS,LoopX); PreLoad1 = Label(1); Byte(); I2C(LDR,R2,R0,-32); I2C(ADD,R0,R0,64); I3(CMP,NONE,R0,R10); Byte(); I2C(LDR,R3,R0,-64); I0P(B,CC,PreLoad1); I3S(ADD,R7,R7,R14,ASR,SrcUVPitch-1); I3S(ADD,R10,R10,R14,ASR,SrcUVPitch-1); I2C(ADD,R0,R7,32); PreLoad2 = Label(1); Byte(); I2C(LDR,R2,R0,-32); I2C(ADD,R0,R0,64); I3(CMP,NONE,R0,R10); Byte(); I2C(LDR,R3,R0,-64); I0P(B,CC,PreLoad2); I3S(ADD,R7,R7,R14,ASR,SrcUVPitch); I3S(ADD,R10,R10,R14,ASR,SrcUVPitch); I2C(ADD,R0,R7,32); PreLoad3 = Label(1); Byte(); I2C(LDR,R2,R0,-32); I2C(ADD,R0,R0,64); I3(CMP,NONE,R0,R10); Byte(); I2C(LDR,R3,R0,-64); I0P(B,CC,PreLoad3); I3S(SUB,R7,R7,R14,ASR,SrcUVPitch-1); I3S(SUB,R10,R10,R14,ASR,SrcUVPitch-1); I2C(ADD,R0,R7,32); PreLoad4 = Label(1); Byte(); I2C(LDR,R2,R0,-32); I2C(ADD,R0,R0,64); I3(CMP,NONE,R0,R10); Byte(); I2C(LDR,R3,R0,-64); I0P(B,CC,PreLoad4); I3S(SUB,R7,R7,R14,ASR,SrcUVPitch); I3S(SUB,R10,R10,R14,ASR,SrcUVPitch); } else if (p->ARM5) { //preload next I3S(ADD,R0,R7,R14,ASR,SrcUVPitch-2); I3S(ADD,R1,R0,R14,ASR,SrcUVPitch-1); I2(PLD,NONE,R0); I2(PLD,NONE,R1); I3S(PLD,NONE,R0,R14,ASR,SrcUVPitch); I3S(PLD,NONE,R1,R14,ASR,SrcUVPitch); } InstPost(LoopX); if (p->RealOnlyDiff) { p->Skip = Label(0); I3(LDR,R4,R7,R6); I3S(LDR_POST,R0,R7,R14,ASR,SrcUVPitch-1); I3(LDR,R1,R7,R6); I3S(LDR_POST,R2,R7,R14,ASR,SrcUVPitch); I3(EOR,R4,R4,R0); S(); I3(BIC,R4,R4,R5); I3(LDR,R0,R7,R6); I3S(LDR_POSTSUB,R3,R7,R14,ASR,SrcUVPitch-1); I3(EOR,R1,R1,R2); S(); C(EQ); I3(BIC,R1,R1,R5); I3(LDR,R4,R7,R6); I3S(LDR_POSTSUB,R1,R7,R14,ASR,SrcUVPitch); I3(EOR,R0,R0,R3); S(); C(EQ); I3(BIC,R0,R0,R5); I2(LDR,R0,R7); I3(EOR,R4,R4,R1); S(); C(EQ); I3(BIC,R4,R4,R5); I0P(B,EQ,p->Skip); } else if (!p->SrcYUV && !p->LookUp) { int x,y; // r0,r2,r3,r1 (tmp r4,r5,r6) for (y=0;y<4;++y) { reg Out; switch (y) { case 1: Out = R1; break; case 2: Out = R2; break; case 3: Out = R3; break; default:Out = R0; break; } for (x=0;x<4;++x) { reg Val = (reg)(x?R4:Out); int ofs = ((p->SrcBPP/8) << HalfX)*x; switch (p->SrcBPP) { // 8bit handled by lookup (with normal YUV blitting) case 16: Half(); I2C(LDR,R4,R7,ofs); I2C(MOV,R5,NONE,((1 << p->SrcSize[1])-1) << (8-p->SrcSize[1])); I2C(MOV,R6,NONE,((1 << p->SrcSize[2])-1) << (8-p->SrcSize[2])); I3S(AND,R5,R5,R4,LSR,p->SrcPos[1]+p->SrcSize[1]-8); I3S(AND,R6,R6,R4,LSR,p->SrcPos[2]+p->SrcSize[2]-8); I2C(AND,R4,R4,p->Src.BitMask[0]); I3S(MOV,R4,NONE,R4,LSR,p->SrcPos[0]+p->SrcSize[0]-8); break; case 24: case 32: Byte(); I2C(LDR,R6,R7,ofs+(p->SrcPos[2]>>3)); Byte(); I2C(LDR,R4,R7,ofs+(p->SrcPos[0]>>3)); Byte(); I2C(LDR,R5,R7,ofs+(p->SrcPos[1]>>3)); break; } // R4,R5,R6 r,g,b // Y = (2105*R + 4128*G + 802*B)/0x2000 + 16; // U = (-1212*R - 2384*G + 3596*B)/0x2000 + 128; // V = (3596*R - 3015*G - 582*B)/0x2000 + 128; switch (Plane) { case 0: // Y = (8*R + 16*G + 3*B)/32 + 16; I3S(ADD,R6,R6,R6,LSL,1); I3S(ADD,R4,R6,R4,LSL,3); I3S(ADD,R4,R4,R5,LSL,4); I3S(MOV,R4,NONE,R4,ASR,5); S(); I2C(ADD,Val,R4,16+p->FX.Brightness); if (p->FX.Brightness+16<0) { C(MI);I2C(MOV,Val,NONE,0x00); } if (p->FX.Brightness+16+215>=255) { I2C(CMP,NONE,Val,0xFF); C(GT);I2C(MOV,Val,NONE,0xFF); } break; case 1: // U = (-5*R - 9*G + 14*B)/32 + 128; I3S(RSB,R6,R6,R6,LSL,3); I3(ADD,R6,R6,R6); I3S(ADD,R4,R4,R4,LSL,2); I3S(ADD,R5,R5,R5,LSL,3); I3(SUB,R4,R6,R4); I3(SUB,R4,R4,R5); I3S(MOV,R4,NONE,R4,ASR,5); I2C(ADD,Val,R4,128); break; case 2: // V = (14*R - 12*G - 2*B)/32 + 128; I3(ADD,R6,R6,R6); I3S(RSB,R4,R4,R4,LSL,3); I3S(ADD,R5,R5,R5,LSL,1); I3S(RSB,R4,R6,R4,LSL,1); I3S(SUB,R4,R4,R5,LSL,2); I3S(MOV,R4,NONE,R4,ASR,5); I2C(ADD,Val,R4,128); break; } if (x>0) I3S(ORR,Out,Out,Val,LSL,x*8); } I3S(ADD,R7,R7,R14,ASR,SrcUVPitch-HalfY); } I3S(SUB,R7,R7,R14,ASR,SrcUVPitch-2-HalfY); } else { int mode = HalfX + 2*HalfY; int x = 2; if (!p->SrcYUV && mode==0) { //possible alignment problems... mode = 1; x = 1; } switch (mode) { case 0: I3S(LDR_POST,R0,R7,R14,ASR,SrcUVPitch-1); I3S(LDR_POST,R2,R7,R14,ASR,SrcUVPitch); I3S(LDR_POSTSUB,R3,R7,R14,ASR,SrcUVPitch-1); I3S(LDR_POSTSUB,R1,R7,R14,ASR,SrcUVPitch); break; case 1: // halfx (not average...) Byte(); I2C(LDR,R0,R7,3*x); Byte(); I2C(LDR,R4,R7,2*x); Byte(); I2C(LDR,R5,R7,1*x); Byte(); I3S(LDR_POST,R6,R7,R14,ASR,SrcUVPitch-1); I3S(ORR,R0,R4,R0,LSL,8); I3S(ORR,R0,R5,R0,LSL,8); I3S(ORR,R0,R6,R0,LSL,8); Byte(); I2C(LDR,R2,R7,3*x); Byte(); I2C(LDR,R4,R7,2*x); Byte(); I2C(LDR,R5,R7,1*x); Byte(); I3S(LDR_POST,R6,R7,R14,ASR,SrcUVPitch); I3S(ORR,R2,R4,R2,LSL,8); I3S(ORR,R2,R5,R2,LSL,8); I3S(ORR,R2,R6,R2,LSL,8); Byte(); I2C(LDR,R3,R7,3*x); Byte(); I2C(LDR,R4,R7,2*x); Byte(); I2C(LDR,R5,R7,1*x); Byte(); I3S(LDR_POSTSUB,R6,R7,R14,ASR,SrcUVPitch-1); I3S(ORR,R3,R4,R3,LSL,8); I3S(ORR,R3,R5,R3,LSL,8); I3S(ORR,R3,R6,R3,LSL,8); Byte(); I2C(LDR,R1,R7,3*x); Byte(); I2C(LDR,R4,R7,2*x); Byte(); I2C(LDR,R5,R7,1*x); Byte(); I3S(LDR_POSTSUB,R6,R7,R14,ASR,SrcUVPitch); I3S(ORR,R1,R4,R1,LSL,8); I3S(ORR,R1,R5,R1,LSL,8); I3S(ORR,R1,R6,R1,LSL,8); break; case 2: // halfy I3S(LDR,R4,R7,R14,ASR,SrcUVPitch); I3S(LDR_POST,R0,R7,R14,ASR,SrcUVPitch-1-1); I3S(LDR,R6,R7,R14,ASR,SrcUVPitch); I3S(LDR_POST,R2,R7,R14,ASR,SrcUVPitch-1); I3S(AND,R4,R5,R4,LSR,1); I3S(AND,R0,R5,R0,LSR,1); I3(ADD,R0,R0,R4); I3S(AND,R6,R5,R6,LSR,1); I3S(AND,R2,R5,R2,LSR,1); I3(ADD,R2,R2,R6); I3S(LDR,R4,R7,R14,ASR,SrcUVPitch); I3S(LDR_POSTSUB,R3,R7,R14,ASR,SrcUVPitch-1-1); I3S(LDR,R6,R7,R14,ASR,SrcUVPitch); I3S(LDR_POSTSUB,R1,R7,R14,ASR,SrcUVPitch-1); I3S(AND,R4,R5,R4,LSR,1); I3S(AND,R3,R5,R3,LSR,1); I3(ADD,R3,R3,R4); I3S(AND,R6,R5,R6,LSR,1); I3S(AND,R1,R5,R1,LSR,1); I3(ADD,R1,R1,R4); break; case 3: // halfx + halfy (no average..., used by palette mode as well) Byte(); I2C(LDR,R0,R7,6); Byte(); I2C(LDR,R4,R7,4); Byte(); I2C(LDR,R5,R7,2); Byte(); I3S(LDR_POST,R6,R7,R14,ASR,SrcUVPitch-1-1); I3S(ORR,R0,R4,R0,LSL,8); I3S(ORR,R0,R5,R0,LSL,8); I3S(ORR,R0,R6,R0,LSL,8); Byte(); I2C(LDR,R2,R7,6); Byte(); I2C(LDR,R4,R7,4); Byte(); I2C(LDR,R5,R7,2); Byte(); I3S(LDR_POST,R6,R7,R14,ASR,SrcUVPitch-1); I3S(ORR,R2,R4,R2,LSL,8); I3S(ORR,R2,R5,R2,LSL,8); I3S(ORR,R2,R6,R2,LSL,8); Byte(); I2C(LDR,R3,R7,6); Byte(); I2C(LDR,R4,R7,4); Byte(); I2C(LDR,R5,R7,2); Byte(); I3S(LDR_POSTSUB,R6,R7,R14,ASR,SrcUVPitch-1-1); I3S(ORR,R3,R4,R3,LSL,8); I3S(ORR,R3,R5,R3,LSL,8); I3S(ORR,R3,R6,R3,LSL,8); Byte(); I2C(LDR,R1,R7,6); Byte(); I2C(LDR,R4,R7,4); Byte(); I2C(LDR,R5,R7,2); Byte(); I3S(LDR_POSTSUB,R6,R7,R14,ASR,SrcUVPitch-1); I3S(ORR,R1,R4,R1,LSL,8); I3S(ORR,R1,R5,R1,LSL,8); I3S(ORR,R1,R6,R1,LSL,8); break; } } if (p->SwapXY && (HalfX || HalfY || !p->SrcYUV)) I2C(STR,R0,SP,OFS(stack,SaveR0)); RA = (reg)YUV_4(p,R0,0,Plane); I3S(STR_POST,RA,R11,R12,ASR,DstUVPitch-1); RA = (reg)YUV_4(p,R2,16,Plane); I3S(STR_POST,RA,R11,R12,ASR,DstUVPitch); RA = (reg)YUV_4(p,R3,24,Plane); I3S(STR_POSTSUB,RA,R11,R12,ASR,DstUVPitch-1); RA = (reg)YUV_4(p,R1,8,Plane); I3S(STR_POSTSUB,RA,R11,R12,ASR,DstUVPitch); if (p->LookUp && p->LookUpOfs) { I2C(SUB,R8,R8,p->LookUpOfs); p->LookUpOfs = 0; } if (p->RealOnlyDiff) InstPost(p->Skip); I2C(ADD,R7,R7,(4 << HalfX)*(p->SrcBPP/8)); if (p->SwapXY) I3S(ADD,R11,R11,R12,ASR,DstUVPitch-2); else I2C(ADD,R11,R11,4*p->DirX); I3(CMP,NONE,R7,R10); I0P(B,NE,LoopX); I2C(LDR,R0,SP,OFS(stack,SrcNext[UV])); I2C(LDR,R1,SP,OFS(stack,DstNext[UV])); I3(ADD,R7,R7,R0); I3(ADD,R11,R11,R1); } I2C(STR,R7,SP,OFS(stack,Src[Plane])); I2C(STR,R11,SP,OFS(stack,Dst[Plane])); } void Fix_Any_YUV(blit_soft* p) { dyninst* Add32 = NULL; dyninst* MaskCarry = NULL; dyninst* LoopY; reg RSrcWidth; p->SrcAlignPos = p->DstAlignPos = p->DstAlignSize = 8; CodeBegin(); I2C(SUB,SP,SP,OFS(stack,StackFrame)); p->DiffMask = NULL; I2C(LDR,R6,R1,0);//Dst[0] U I2C(LDR,R7,R1,4);//Dst[1] V I2C(LDR,R8,R1,8);//Dst[2] Y I2C(LDR,R10,R2,0); //Src[0] U if (!p->SrcYUV) { I3(MOV,R11,NONE,R10); I3(MOV,R12,NONE,R10); p->SrcUVX2 = 0; p->SrcUVY2 = 0; p->SrcUVPitch2 = 0; p->Caps = VC_BRIGHTNESS; } else { I2C(LDR,R11,R2,4); //Src[1] V I2C(LDR,R12,R2,8); //Src[2] Y } if (p->DirX<0) { //adjust reversed destination for block size I2C(SUB,R6,R6,3); I2C(SUB,R7,R7,3); I2C(SUB,R8,R8,3); } I2C(STR,R6,SP,OFS(stack,Dst[0])); I2C(STR,R7,SP,OFS(stack,Dst[1])); I2C(STR,R8,SP,OFS(stack,Dst[2])); I2C(STR,R10,SP,OFS(stack,Src[0])); I2C(STR,R11,SP,OFS(stack,Src[1])); I2C(STR,R12,SP,OFS(stack,Src[2])); I3(MOV,R12,NONE,R3); //DstPitch I2C(LDR,R14,SP,OFS(stack,SrcPitch)); I2C(LDR,R2,SP,OFS(stack,Height)); I2C(LDR,R1,SP,OFS(stack,Width)); p->RealOnlyDiff = p->OnlyDiff; p->HalfX = (boolmem_t)(p->SrcUVX2+1 == p->DstUVX2); p->HalfY = (boolmem_t)(p->SrcUVY2+1 == p->DstUVY2); if (p->HalfX || p->HalfY) p->RealOnlyDiff = 0; if (p->RealOnlyDiff) { int Mask = 0x03030303; p->DiffMask = InstCreate32(Mask,NONE,NONE,NONE,0,0); I1P(LDR,R5,p->DiffMask,0); I2C(LDR,R6,SP,OFS(stack,Src2SrcLast)); } else if ((p->HalfX + 2*p->HalfY)==2) { int Mask = 0x7F7F7F7F; p->DiffMask = InstCreate32(Mask,NONE,NONE,NONE,0,0); I1P(LDR,R5,p->DiffMask,0); } p->LookUp = NULL; if (p->SrcYUV) CalcYUVLookUp(p); else if (p->Src.Palette) CalcPalYUVLookUp(p); if (p->LookUp_Data) { p->LookUp = InstCreate(p->LookUp_Data,p->LookUp_Size,NONE,NONE,NONE,0,0); free(p->LookUp_Data); p->LookUp_Data = NULL; I1P(MOV,R8,p->LookUp,0); p->LookUpOfs = 0; } else if (p->FX.Brightness) { int i = p->FX.Brightness; if (i<0) i=-i; if (i>127) i=127; MaskCarry = InstCreate32(0x80808080U,NONE,NONE,NONE,0,0); Add32 = InstCreate32(0x01010101U * (uint8_t)i,NONE,NONE,NONE,0,0); I1P(LDR,R8,MaskCarry,0); I1P(LDR,R9,Add32,0); } //EndOfRect //DstNext[2] //SrcNext[2] RSrcWidth = R1; if (p->SrcBPP2>0) { IMul(R3,R1,p->SrcBPP/8); RSrcWidth = R3; } //SrcYNext = 4*Src->Pitch - Width I3S(RSB,R0,RSrcWidth,R14,LSL,2); I2C(STR,R0,SP,OFS(stack,SrcNext[0])); //SrcUVNext = (4 << HalfY)*(Src->Pitch >> SrcUVPitch2) - (Width >> SrcUVX) I3S(MOV,R0,NONE,R14,LSL,2+p->HalfY-p->SrcUVPitch2); I3S(SUB,R0,R0,RSrcWidth,LSR,p->SrcUVX2); I2C(STR,R0,SP,OFS(stack,SrcNext[1])); //EndOfRect = Src + Src->Pitch * Height I3(MUL,R0,R14,R2); I3(ADD,R0,R0,R10); I2C(STR,R0,SP,OFS(stack,EndOfRect)); if (p->SwapXY) { //DstYNext = 4*DirX - Width*Dst->Pitch I3(MUL,R3,R1,R12); I2C(MOV,R0,NONE,4*p->DirX); I3(SUB,R0,R0,R3); I2C(STR,R0,SP,OFS(stack,DstNext[0])); //DstUVNext = 4*DirX - (Width >> DstUVY2)*(Dst->Pitch >> DstUVPitch2) if (p->DstUVY2+p->DstUVPitch2) I3S(MOV,R3,NONE,R3,ASR,p->DstUVY2+p->DstUVPitch2); I2C(MOV,R0,NONE,4*p->DirX); I3(SUB,R0,R0,R3); I2C(STR,R0,SP,OFS(stack,DstNext[1])); } else { //DstYNext = 4*Dst->Pitch - DirX * Width I3S(p->DirX<0?ADD:RSB,R0,R1,R12,LSL,2); I2C(STR,R0,SP,OFS(stack,DstNext[0])); //DstUVNext = 4*Dst->Pitch >> DstUVPitch2 - DirX * (Width >> DstUVX2) I3S(MOV,R0,NONE,R12,LSL,2-p->DstUVPitch2); I3S(p->DirX<0?ADD:SUB,R0,R0,R1,LSR,p->DstUVX2); I2C(STR,R0,SP,OFS(stack,DstNext[1])); } LoopY = Label(0); I0P(B,AL,LoopY); if (p->DiffMask) InstPost(p->DiffMask); if (Add32) InstPost(Add32); if (MaskCarry) InstPost(MaskCarry); if (p->LookUp) { Align(16); InstPost(p->LookUp); } InstPost(LoopY); { if (p->LookUp) I2C(ADD,R8,R8,256+4); YUV_4X4(p,1); if (p->LookUp) I2C(ADD,R8,R8,256); YUV_4X4(p,2); if (p->LookUp) I2C(SUB,R8,R8,256+256+4); YUV_4X4(p,0); MB(); I2C(LDR,R2,SP,OFS(stack,EndOfRect)); I3(CMP,NONE,R2,R7); I0P(B,NE,LoopY); } I2C(ADD,SP,SP,OFS(stack,StackFrame)); CodeEnd(); } #endif