778 lines
18 KiB
C
778 lines
18 KiB
C
|
/*****************************************************************************
|
||
|
*
|
||
|
* This program is free software ; you can redistribute it and/or modify
|
||
|
* it under the terms of the GNU General Public License as published by
|
||
|
* the Free Software Foundation; either version 2 of the License, or
|
||
|
* (at your option) any later version.
|
||
|
*
|
||
|
* This program is distributed in the hope that it will be useful,
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
* GNU General Public License for more details.
|
||
|
*
|
||
|
* You should have received a copy of the GNU General Public License
|
||
|
* along with this program; if not, write to the Free Software
|
||
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
|
*
|
||
|
* $Id: blit_arm_fix.c 543 2006-01-07 22:06:24Z picard $
|
||
|
*
|
||
|
* The Core Pocket Media Player
|
||
|
* Copyright (c) 2004-2005 Gabor Kovacs
|
||
|
*
|
||
|
****************************************************************************/
|
||
|
|
||
|
#include "../common.h"
|
||
|
#include "../dyncode/dyncode.h"
|
||
|
#include "blit_soft.h"
|
||
|
|
||
|
#if defined(ARM)
|
||
|
|
||
|
typedef struct stack
|
||
|
{
|
||
|
int EndOfLine;
|
||
|
int EndOfRect;
|
||
|
int DstPitch;
|
||
|
int DstNext;
|
||
|
int YNext;
|
||
|
int UVNext;
|
||
|
|
||
|
int StackFrame[STACKFRAME];
|
||
|
|
||
|
//void* this R0
|
||
|
//char* Dst R1
|
||
|
//char* Src R2
|
||
|
//int DstPitch R3 can be signed
|
||
|
int SrcPitch; //can be signed
|
||
|
int Width;
|
||
|
int Height;
|
||
|
int Src2SrcLast;
|
||
|
} stack;
|
||
|
|
||
|
static NOINLINE void Fix_RGB_UV_LoadUV(blit_soft* p)
|
||
|
{
|
||
|
//set R4 = RVMul*v + RAdd
|
||
|
//set R5 = GUMul*u + GVMul*v + GAdd
|
||
|
//set R6 = BUMul*u + BAdd
|
||
|
|
||
|
if (p->Dither)
|
||
|
{
|
||
|
//R0,R4,R8 for temporary
|
||
|
//R5(DiffMask),R6(Src2SrcLast),R7(EndOfLine)
|
||
|
|
||
|
if (p->OnlyDiff)
|
||
|
{
|
||
|
Half(); I3(LDR,R8,R12,R6);
|
||
|
Half(); I2C(LDR_POST,R0,R12,2);
|
||
|
Half(); I3(LDR,R4,R14,R6);
|
||
|
//xscale stall
|
||
|
I3(EOR,R8,R8,R0);
|
||
|
Half(); I2C(LDR_POST,R0,R14,2);
|
||
|
S(); I3(TST,NONE,R8,R5);
|
||
|
C(EQ); Byte(); I3(LDR,R8,R10,R6);
|
||
|
C(EQ); I3(EOR,R4,R4,R0);
|
||
|
Byte(); I2C(LDR_POST,R0,R10,1); //u
|
||
|
C(EQ); S(); I3(TST,NONE,R4,R5);
|
||
|
C(EQ); Byte(); I3(LDR,R4,R11,R6);
|
||
|
C(EQ); I3(EOR,R8,R8,R0);
|
||
|
Byte(); I2C(LDR_POST,R0,R11,1); //v
|
||
|
C(EQ); S(); I3(TST,NONE,R8,R5);
|
||
|
//xscale stall
|
||
|
|
||
|
C(EQ); I3(EOR,R4,R4,R0);
|
||
|
C(EQ); S(); I3(TST,NONE,R4,R5);
|
||
|
|
||
|
if (p->SwapXY)
|
||
|
{
|
||
|
MB(); I2C(LDR,R8,SP,OFS(stack,DstPitch));
|
||
|
C(EQ); I3S(ADD,R9,R9,R8,LSL,1+p->DstDoubleY);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
C(EQ); I2C(ADD,R9,R9,p->DstStepX);
|
||
|
}
|
||
|
I0P(B,EQ,p->Skip);
|
||
|
|
||
|
// R0=v
|
||
|
if (p->ColorLookup)
|
||
|
{
|
||
|
Byte(); I2C(LDR,R5,R10,-1); //u
|
||
|
I1P(MOV,R8,p->LookUp,0);
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
Byte(); I2C(LDR_POST,R0,R11,1); //v
|
||
|
if (p->ColorLookup)
|
||
|
{
|
||
|
Byte(); I2C(LDR_POST,R5,R10,1); //u
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (!p->ColorLookup)
|
||
|
{
|
||
|
I1P(LDR,R5,p->GAdd,0);
|
||
|
I1P(LDR,R8,p->GVMul,0);
|
||
|
I1P(LDR,R4,p->RAdd,0);
|
||
|
I1P(LDR,R6,p->RVMul,0);
|
||
|
I4(MLA,R5,R8,R0,R5);
|
||
|
|
||
|
Byte();
|
||
|
if (p->OnlyDiff) //already incremented
|
||
|
I2C(LDR,R8,R10,-1); //u
|
||
|
else
|
||
|
I2C(LDR_POST,R8,R10,1); //u
|
||
|
|
||
|
I4(MLA,R4,R6,R0,R4);
|
||
|
I1P(LDR,R7,p->GUMul,0);
|
||
|
I1P(LDR,R6,p->BAdd,0);
|
||
|
I1P(LDR,R0,p->BUMul,0);
|
||
|
I4(MLA,R5,R7,R8,R5);
|
||
|
|
||
|
//R7 Y read will moveback here (this will prevent stall by R0)
|
||
|
|
||
|
I4(MLA,R6,R0,R8,R6);
|
||
|
I1P(LDR,R8,p->YMul,0); //restore R8
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
I2C(ADD,R0,R0,p->LookUp_V);
|
||
|
I3S(LDR,R4,R8,R0,LSL,2); // RVMul+RAdd | GVMul+GAdd
|
||
|
I2C(ADD,R5,R5,p->LookUp_U);
|
||
|
I3S(LDR,R6,R8,R5,LSL,2); // BUMul | GUMul
|
||
|
I3S(MOV,R5,NONE,R4,LSL,16);
|
||
|
I3S(ADD,R5,R5,R6,LSL,16); // GUMul+GVMul+GAdd | 0000
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//R0,R1,R2,R3,R4 for temporary
|
||
|
//R5(DiffMask),R6(Src2SrcLast),R7(EndOfLine)
|
||
|
|
||
|
if (p->OnlyDiff)
|
||
|
{
|
||
|
Half(); I3(LDR,R0,R12,R6);
|
||
|
Half(); I2C(LDR_POST,R1,R12,2);
|
||
|
Half(); I3(LDR,R4,R14,R6);
|
||
|
Half(); I2C(LDR_POST,R2,R14,2);
|
||
|
I3(EOR,R0,R0,R1);
|
||
|
S(); I3(TST,NONE,R0,R5);
|
||
|
I3(EOR,R4,R4,R2);
|
||
|
C(EQ); S(); I3(TST,NONE,R4,R5);
|
||
|
|
||
|
C(EQ); Byte(); I3(LDR,R0,R10,R6);
|
||
|
Byte(); I2C(LDR_POST,R1,R10,1); //u
|
||
|
C(EQ); Byte(); I3(LDR,R4,R11,R6);
|
||
|
Byte(); I2C(LDR_POST,R2,R11,1); //v
|
||
|
C(EQ); I3(EOR,R0,R0,R1);
|
||
|
C(EQ); S(); I3(TST,NONE,R0,R5);
|
||
|
C(EQ); I3(EOR,R4,R4,R2);
|
||
|
C(EQ); S(); I3(TST,NONE,R4,R5);
|
||
|
|
||
|
if (p->SwapXY)
|
||
|
{
|
||
|
MB(); I2C(LDR,R3,SP,OFS(stack,DstPitch));
|
||
|
C(EQ); I3S(ADD,R9,R9,R3,LSL,1+p->DstDoubleY);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
C(EQ); I2C(ADD,R9,R9,p->DstStepX);
|
||
|
}
|
||
|
I0P(B,EQ,p->Skip);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
Byte(); I2C(LDR_POST,R1,R10,1); //u
|
||
|
Byte(); I2C(LDR_POST,R2,R11,1); //v
|
||
|
}
|
||
|
|
||
|
if (!p->ColorLookup)
|
||
|
{
|
||
|
I1P(LDR,R5,p->GAdd,0);
|
||
|
I1P(LDR,R0,p->GVMul,0);
|
||
|
I1P(LDR,R4,p->RAdd,0);
|
||
|
I1P(LDR,R7,p->RVMul,0);
|
||
|
I1P(LDR,R6,p->BAdd,0);
|
||
|
I4(MLA,R5,R0,R2,R5);
|
||
|
I1P(LDR,R0,p->BUMul,0);
|
||
|
I1P(LDR,R3,p->GUMul,0);
|
||
|
I4(MLA,R4,R7,R2,R4);
|
||
|
I4(MLA,R6,R0,R1,R6);
|
||
|
I4(MLA,R5,R3,R1,R5);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
I2C(ADD,R1,R1,p->LookUp_U);
|
||
|
I2C(ADD,R2,R2,p->LookUp_V);
|
||
|
I3S(LDR,R6,R8,R1,LSL,2); // BUMul+BAdd | GUMul
|
||
|
I3S(LDR,R4,R8,R2,LSL,2); // RVMul+RAdd | GVMul+GAdd
|
||
|
//double xscale stall
|
||
|
I3S(MOV,R5,NONE,R6,LSL,16);
|
||
|
I3S(ADD,R5,R5,R4,LSL,16); // GUMul+GVMul+GAdd | 0000
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void Fix_RGB_UV_Pixel(blit_soft* p, int Col, int Row)
|
||
|
{
|
||
|
int SatBitR = p->QAdd ? 32 : 24;
|
||
|
int SatBitG = SatBitR;
|
||
|
int SatBitB = SatBitR;
|
||
|
int RPos = p->DstPos[0];
|
||
|
int GPos = p->DstPos[1];
|
||
|
int BPos = p->DstPos[2];
|
||
|
|
||
|
p->Upper = (p->DirX<0) ^ (Col>0);
|
||
|
|
||
|
if (p->Upper && p->DstBPP==8 && p->DstDoubleX)
|
||
|
{
|
||
|
RPos += 16;
|
||
|
GPos += 16;
|
||
|
BPos += 16;
|
||
|
}
|
||
|
else
|
||
|
if (p->Upper && p->DstBPP<=16)
|
||
|
{
|
||
|
RPos += p->DstBPP;
|
||
|
GPos += p->DstBPP;
|
||
|
BPos += p->DstBPP;
|
||
|
}
|
||
|
|
||
|
//load Y
|
||
|
MB();
|
||
|
Byte();
|
||
|
|
||
|
if (p->OnlyDiff) //is R12,R14 already incremented?
|
||
|
{
|
||
|
if (p->SwapXY)
|
||
|
I2C(LDR,R7,(reg)(Col==0?R12:R14),-2+Row);
|
||
|
else
|
||
|
I2C(LDR,R7,(reg)(Row==0?R14:R12),-2+Col);
|
||
|
}
|
||
|
else
|
||
|
if (p->SwapXY)
|
||
|
I2C(LDR_POST,R7,(reg)(Col==0?R12:R14),1);
|
||
|
else
|
||
|
I2C(LDR_POST,R7,(reg)(Row==0?R14:R12),1);
|
||
|
|
||
|
if (p->Dither)
|
||
|
{
|
||
|
if (!p->ColorLookup)
|
||
|
{
|
||
|
I3S(ADD,R1,R4,R1,LSR,32-SatBitR+p->DstSize[0]);
|
||
|
I3S(ADD,R2,R5,R2,LSR,32-SatBitG+p->DstSize[1]);
|
||
|
I3S(ADD,R3,R6,R3,LSR,32-SatBitB+p->DstSize[2]);
|
||
|
I4(MLA,R1,R8,R7,R1);
|
||
|
I4(MLA,R2,R8,R7,R2);
|
||
|
I4(MLA,R3,R8,R7,R3);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
I3S(ADD,R1,R4,R1,LSL,16+LOOKUP_FIX-p->DstSize[0]);
|
||
|
I3S(LDR,R7,R8,R7,LSL,2); // YMul * y
|
||
|
I3S(ADD,R2,R5,R2,LSL,16+LOOKUP_FIX-p->DstSize[1]);
|
||
|
I3S(ADD,R3,R6,R3,LSL,16+LOOKUP_FIX-p->DstSize[2]);
|
||
|
I3(ADD,R1,R7,R1);
|
||
|
I3(ADD,R2,R7,R2);
|
||
|
I3(ADD,R3,R7,R3);
|
||
|
Byte(); I3S(LDR,R1,R8,R1,LSR,16+LOOKUP_FIX); //sat and 8bit ror (8-RSize)
|
||
|
Byte(); I3S(LDR,R2,R8,R2,LSR,16+LOOKUP_FIX); //sat and 8bit ror (8-GSize)
|
||
|
Byte(); I3S(LDR,R3,R8,R3,LSR,16+LOOKUP_FIX); //sat and 8bit ror (8-BSize)
|
||
|
|
||
|
// R1 = Dither[8-RSize] | Value[RSize]
|
||
|
// R2 = Dither[8-GSize] | Value[GSize]
|
||
|
// R3 = Dither[8-BSize] | Value[BSize]
|
||
|
|
||
|
RPos += p->DstSize[0]; // LSB part -> MSB part
|
||
|
GPos += p->DstSize[1]; // LSB part -> MSB part
|
||
|
BPos += p->DstSize[2]; // LSB part -> MSB part
|
||
|
|
||
|
SatBitR = 2*p->DstSize[0] - 32;
|
||
|
SatBitG = 2*p->DstSize[1] - 32;
|
||
|
SatBitB = 2*p->DstSize[2] - 32;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (!p->ColorLookup)
|
||
|
{
|
||
|
I4(MLA,R1,R8,R7,R4);
|
||
|
I4(MLA,R2,R8,R7,R5);
|
||
|
I4(MLA,R3,R8,R7,R6);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
I3S(LDR,R7,R8,R7,LSL,2); // YMul * y
|
||
|
I3(ADD,R1,R7,R4);
|
||
|
I3(ADD,R2,R7,R5);
|
||
|
I3(ADD,R3,R7,R6);
|
||
|
Byte(); I3S(LDR,R1,R8,R1,LSR,16+LOOKUP_FIX); // sat to 8bit
|
||
|
Byte(); I3S(LDR,R2,R8,R2,LSR,16+LOOKUP_FIX); // sat to 8bit
|
||
|
Byte(); I3S(LDR,R3,R8,R3,LSR,16+LOOKUP_FIX); // sat to 8bit
|
||
|
SatBitR = SatBitB = SatBitG = 8;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (!p->ColorLookup)
|
||
|
{
|
||
|
if (p->QAdd)
|
||
|
{
|
||
|
I3(QDADD,R1,R1,R1);
|
||
|
I3(QDADD,R2,R2,R2);
|
||
|
I3(QDADD,R3,R3,R3);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
I2C(TST,NONE,R1,0xFF000000);
|
||
|
C(NE);I2C(MVN,R1,NONE,0xFF000000);
|
||
|
C(MI);I2C(MOV,R1,NONE,0x00000000);
|
||
|
I2C(TST,NONE,R2,0xFF000000);
|
||
|
C(NE);I2C(MVN,R2,NONE,0xFF000000);
|
||
|
C(MI);I2C(MOV,R2,NONE,0x00000000);
|
||
|
I2C(TST,NONE,R3,0xFF000000);
|
||
|
C(NE);I2C(MVN,R3,NONE,0xFF000000);
|
||
|
C(MI);I2C(MOV,R3,NONE,0x00000000);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (p->InvertMask && p->Pos<0)
|
||
|
{
|
||
|
p->Pos = RPos;
|
||
|
MB(); I1P(LDR,R0,p->InvertMask,0);
|
||
|
}
|
||
|
if (p->Pos!=RPos && p->Pos>=0) I3S(MOV,R0,NONE,R0,ROR,RPos-p->Pos);
|
||
|
I3S(p->Pos<0?MOV:EOR,R0,(reg)(p->Pos<0?NONE:R0),R1,LSR,SatBitR-p->DstSize[0]);
|
||
|
I3S(MOV,R7,NONE,R2,LSR,SatBitG-p->DstSize[1]);
|
||
|
I3S(MOV,R0,NONE,R0,ROR,BPos-RPos);
|
||
|
I3S(EOR,R0,R0,R7,ROR,BPos-GPos);
|
||
|
I3S(EOR,R0,R0,R3,LSR,SatBitB-p->DstSize[2]);
|
||
|
p->Pos = BPos;
|
||
|
|
||
|
if (p->Dither && !p->ColorLookup)
|
||
|
{
|
||
|
MB(); I3S(MOV,R1,NONE,R1,LSL,32-SatBitR+p->DstSize[0]);
|
||
|
MB(); I3S(MOV,R2,NONE,R2,LSL,32-SatBitG+p->DstSize[1]);
|
||
|
MB(); I3S(MOV,R3,NONE,R3,LSL,32-SatBitB+p->DstSize[2]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void Fix_RGB_UV(blit_soft* p)
|
||
|
{
|
||
|
dyninst* LoopY;
|
||
|
dyninst* LoopX;
|
||
|
int Invert = 0;
|
||
|
int Mask = 0;
|
||
|
|
||
|
p->DstAlignSize = 4;
|
||
|
p->Dither = (boolmem_t)((p->FX.Flags & BLITFX_DITHER)!=0);
|
||
|
if (p->DstDoubleX || p->DstDoubleY || p->DstBPP>16)
|
||
|
p->Dither = 0;
|
||
|
|
||
|
p->DstStepX = p->DirX * ((p->DstBPP*2) >> 3) << p->DstDoubleX;
|
||
|
p->LookUp = NULL;
|
||
|
p->PalPtr = NULL;
|
||
|
p->DiffMask = NULL;
|
||
|
p->InvertMask = NULL;
|
||
|
|
||
|
if (p->Dst.Flags & PF_INVERTED)
|
||
|
Invert = -1;
|
||
|
|
||
|
if (p->ColorLookup)
|
||
|
{
|
||
|
CalcLookUp(p,p->Dither);
|
||
|
p->LookUp = InstCreate(p->LookUp_Data,p->LookUp_Size,NONE,NONE,NONE,0,0);
|
||
|
free(p->LookUp_Data);
|
||
|
p->LookUp_Data = NULL;
|
||
|
}
|
||
|
|
||
|
if (p->QAdd)
|
||
|
{
|
||
|
int Mask2;
|
||
|
int i,Shift;
|
||
|
|
||
|
for (i=0;i<3;++i)
|
||
|
Mask |= 1 << (p->DstPos[i] + p->DstSize[i] -1);
|
||
|
|
||
|
Mask2 = Mask;
|
||
|
Shift = 0;
|
||
|
if (p->DstBPP==8 && p->DstDoubleX)
|
||
|
Mask2 |= Mask << 16;
|
||
|
else
|
||
|
if (p->DstBPP <= 16)
|
||
|
{
|
||
|
if (p->DirX<0) Shift = p->DstBPP;
|
||
|
Mask2 |= Mask << p->DstBPP;
|
||
|
}
|
||
|
|
||
|
Invert ^= RotateRight(Mask2,Shift+p->DstPos[0]);
|
||
|
}
|
||
|
|
||
|
p->YMul = InstCreate32(p->_YMul,NONE,NONE,NONE,0,0);
|
||
|
p->RVMul = InstCreate32(p->_RVMul,NONE,NONE,NONE,0,0);
|
||
|
p->RAdd = InstCreate32(p->_RAdd,NONE,NONE,NONE,0,0);
|
||
|
p->GUMul = InstCreate32(p->_GUMul,NONE,NONE,NONE,0,0);
|
||
|
p->GVMul = InstCreate32(p->_GVMul,NONE,NONE,NONE,0,0);
|
||
|
p->GAdd = InstCreate32(p->_GAdd,NONE,NONE,NONE,0,0);
|
||
|
p->BUMul = InstCreate32(p->_BUMul,NONE,NONE,NONE,0,0);
|
||
|
p->BAdd = InstCreate32(p->_BAdd,NONE,NONE,NONE,0,0);
|
||
|
|
||
|
if (Invert)
|
||
|
p->InvertMask = InstCreate32(Invert,NONE,NONE,NONE,0,0);
|
||
|
|
||
|
if (p->OnlyDiff)
|
||
|
p->DiffMask = InstCreate32(0xFCFCFCFC,NONE,NONE,NONE,0,0);
|
||
|
|
||
|
CodeBegin();
|
||
|
I2C(SUB,SP,SP,OFS(stack,StackFrame));
|
||
|
|
||
|
I2C(LDR,R9,R1,0); //Dst[0] RGB
|
||
|
I2C(LDR,R10,R2,4); //Src[1] U
|
||
|
I2C(LDR,R11,R2,8); //Src[2] V
|
||
|
I2C(LDR,R12,R2,0); //Src[0] Y
|
||
|
I2C(STR,R3,SP,OFS(stack,DstPitch));
|
||
|
|
||
|
I3(MOV,R6,NONE,R3); //DstPitch
|
||
|
I2C(LDR,R7,SP,OFS(stack,SrcPitch));
|
||
|
I2C(LDR,R0,SP,OFS(stack,Height));
|
||
|
I2C(LDR,R4,SP,OFS(stack,Width));
|
||
|
|
||
|
if (!p->ColorLookup)
|
||
|
I1P(LDR,R8,p->YMul,0);
|
||
|
else
|
||
|
I1P(MOV,R8,p->LookUp,0);
|
||
|
|
||
|
//YNext = 2*Src->Pitch - (Width >> SrcDoubleX)
|
||
|
I3S(MOV,R1,NONE,R7,LSL,1);
|
||
|
I3S(SUB,R1,R1,R4,LSR,p->SrcDoubleX);
|
||
|
I2C(STR,R1,SP,OFS(stack,YNext));
|
||
|
|
||
|
//UVNext = (Src->Pitch >> 1) - (Width >> SrcDoubleX >> 1);
|
||
|
I3S(MOV,R2,NONE,R7,ASR,1);
|
||
|
I3S(SUB,R2,R2,R4,LSR,p->SrcDoubleX+1);
|
||
|
I2C(STR,R2,SP,OFS(stack,UVNext));
|
||
|
|
||
|
if (p->DirX<0 && p->DstBPP==16) //adjust reversed destination for block size
|
||
|
I2C(SUB,R9,R9,-p->DstStepX-(p->DstBPP >> 3));
|
||
|
if (p->DstBPP==32)
|
||
|
I2C(ADD,R9,R9,p->DstStepX/2);
|
||
|
|
||
|
if (p->SwapXY)
|
||
|
{
|
||
|
// EndOfRect = Dst + ((Height * DstBPP * DirX) >> 3) - (DstPitch << DstDoubleY)
|
||
|
I3S(SUB,R9,R9,R6,LSL,p->DstDoubleY);
|
||
|
I2C(MOV,R1,NONE,p->DstBPP * p->DirX);
|
||
|
I3(MUL,R0,R1,R0);
|
||
|
I3S(ADD,R0,R9,R0,ASR,3);
|
||
|
I2C(STR,R0,SP,OFS(stack,EndOfRect));
|
||
|
|
||
|
//DstNext = DstStepX - Width*DstPitch;
|
||
|
MB(); I3(MUL,R2,R6,R4);
|
||
|
I2C(MOV,R0,NONE,p->DstStepX);
|
||
|
I3(SUB,R0,R0,R2);
|
||
|
I2C(STR,R0,SP,OFS(stack,DstNext));
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// EndOfRect = Dst + DstPitch * Height
|
||
|
I3(MUL,R0,R6,R0);
|
||
|
I3(ADD,R0,R9,R0);
|
||
|
I2C(STR,R0,SP,OFS(stack,EndOfRect));
|
||
|
|
||
|
//DstNext = ((DstPitch*2 << DstDoubleY) - DirX * Width << DstBPP2;
|
||
|
I3S(MOV,R2,NONE,R6,LSL,p->DstDoubleY+1);
|
||
|
I3S(p->DirX>0?SUB:ADD,R2,R2,R4,LSL,p->DstBPP2);
|
||
|
I2C(STR,R2,SP,OFS(stack,DstNext));
|
||
|
}
|
||
|
I3(ADD,R14,R12,R7);
|
||
|
|
||
|
if (p->Dither)
|
||
|
{
|
||
|
if (!p->ColorLookup)
|
||
|
{
|
||
|
I2C(MVN,R1,NONE,0x80000000);
|
||
|
I2C(MVN,R2,NONE,0x80000000);
|
||
|
I2C(MVN,R3,NONE,0x80000000);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
I2C(MOV,R1,NONE,0x80);
|
||
|
I2C(MOV,R2,NONE,0x80);
|
||
|
I2C(MOV,R3,NONE,0x80);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
LoopY = Label(1);
|
||
|
|
||
|
if (p->SwapXY)
|
||
|
{
|
||
|
I3(MUL,R0,R6,R4); //R6=dstpitch
|
||
|
I3(ADD,R7,R9,R0);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (p->DirX > 0)
|
||
|
I3S(ADD,R7,R9,R4,LSL,p->DstBPP2);
|
||
|
else
|
||
|
I3S(SUB,R7,R9,R4,LSL,p->DstBPP2);
|
||
|
}
|
||
|
I2C(STR,R7,SP,OFS(stack,EndOfLine));
|
||
|
|
||
|
// preload
|
||
|
if (p->ARM5) // not needed for non slices mode, but testing show it didn't help on ARM4
|
||
|
{
|
||
|
if (!p->Slices)
|
||
|
{
|
||
|
//R4 width
|
||
|
//R0,R7,R5,R6 tmp
|
||
|
|
||
|
dyninst* PreLoadEnd = Label(0);
|
||
|
dyninst* PreLoad1;
|
||
|
dyninst* PreLoad2;
|
||
|
dyninst* PreLoad3;
|
||
|
dyninst* PreLoad4;
|
||
|
|
||
|
I3S(ADD,R0,R12,R4,ASR,(p->SrcDoubleX?1:0)-(p->SrcHalfX?1:0));
|
||
|
I2C(ADD,R5,R12,32);
|
||
|
I3(CMP,NONE,R5,R0);
|
||
|
I0P(B,CS,PreLoadEnd);
|
||
|
|
||
|
//y0
|
||
|
PreLoad1 = Label(1);
|
||
|
Byte(); I2C(LDR,R6,R5,-32);
|
||
|
I2C(ADD,R5,R5,64);
|
||
|
I3(CMP,NONE,R5,R0);
|
||
|
Byte(); I2C(LDR,R7,R5,-64);
|
||
|
I0P(B,CC,PreLoad1);
|
||
|
|
||
|
I3S(ADD,R0,R14,R4,ASR,(p->SrcDoubleX?1:0)-(p->SrcHalfX?1:0));
|
||
|
I2C(ADD,R5,R14,32);
|
||
|
|
||
|
//y1
|
||
|
PreLoad2 = Label(1);
|
||
|
Byte(); I2C(LDR,R6,R5,-32);
|
||
|
I2C(ADD,R5,R5,64);
|
||
|
I3(CMP,NONE,R5,R0);
|
||
|
Byte(); I2C(LDR,R7,R5,-64);
|
||
|
I0P(B,CC,PreLoad2);
|
||
|
|
||
|
I3S(ADD,R0,R10,R4,ASR,(p->SrcDoubleX?1:0)-(p->SrcHalfX?1:0)+p->SrcUVX2);
|
||
|
I2C(ADD,R5,R10,32);
|
||
|
I3(CMP,NONE,R5,R0);
|
||
|
I0P(B,CS,PreLoadEnd);
|
||
|
|
||
|
//u
|
||
|
PreLoad3 = Label(1);
|
||
|
Byte(); I2C(LDR,R6,R5,-32);
|
||
|
I2C(ADD,R5,R5,64);
|
||
|
I3(CMP,NONE,R5,R0);
|
||
|
Byte(); I2C(LDR,R7,R5,-64);
|
||
|
I0P(B,CC,PreLoad3);
|
||
|
|
||
|
I3S(ADD,R0,R11,R4,ASR,(p->SrcDoubleX?1:0)-(p->SrcHalfX?1:0)+p->SrcUVX2);
|
||
|
I2C(ADD,R5,R11,32);
|
||
|
|
||
|
//v
|
||
|
PreLoad4 = Label(1);
|
||
|
Byte(); I2C(LDR,R6,R5,-32);
|
||
|
I2C(ADD,R5,R5,64);
|
||
|
I3(CMP,NONE,R5,R0);
|
||
|
Byte(); I2C(LDR,R7,R5,-64);
|
||
|
I0P(B,CC,PreLoad4);
|
||
|
|
||
|
if (p->OnlyDiff) //restore R7
|
||
|
I2C(LDR,R7,SP,OFS(stack,EndOfLine));
|
||
|
|
||
|
InstPost(PreLoadEnd);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
//preload next
|
||
|
MB(); I2C(LDR,R6,SP,OFS(stack,SrcPitch));
|
||
|
I3S(PLD,NONE,R12,R6,LSL,1);
|
||
|
I3S(PLD,NONE,R14,R6,LSL,1);
|
||
|
I3S(PLD,NONE,R10,R6,ASR,p->SrcUVPitch2);
|
||
|
I3S(PLD,NONE,R11,R6,ASR,p->SrcUVPitch2);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (p->OnlyDiff)
|
||
|
{
|
||
|
MB(); I1P(LDR,R5,p->DiffMask,0);
|
||
|
MB(); I2C(LDR,R6,SP,OFS(stack,Src2SrcLast));
|
||
|
p->Skip = Label(0);
|
||
|
}
|
||
|
|
||
|
LoopX = Label(1);
|
||
|
{
|
||
|
int PitchDouble;
|
||
|
reg Pitch;
|
||
|
|
||
|
Fix_RGB_UV_LoadUV(p);
|
||
|
|
||
|
p->Pos = -1;
|
||
|
|
||
|
Fix_RGB_UV_Pixel(p,0,0);
|
||
|
if (p->DstBPP==32)
|
||
|
{
|
||
|
if (p->Pos)
|
||
|
I3S(MOV,R0,NONE,R0,ROR,-p->Pos);
|
||
|
|
||
|
assert(!p->DstDoubleX && !p->DstDoubleY);
|
||
|
MB(); I2C(LDR,R1,SP,OFS(stack,DstPitch));
|
||
|
I2C(ADD,R1,R1,-p->DstStepX/2);
|
||
|
I3(STR,R0,R9,R1);
|
||
|
p->Pos = -1;
|
||
|
}
|
||
|
|
||
|
Fix_RGB_UV_Pixel(p,1,0);
|
||
|
if (p->Pos)
|
||
|
I3S(MOV,R0,NONE,R0,ROR,-p->Pos);
|
||
|
|
||
|
Pitch = (reg)(p->Dither ? R7:R1);
|
||
|
MB(); I2C(LDR,Pitch,SP,OFS(stack,DstPitch));
|
||
|
|
||
|
if (p->DstBPP==8 && p->DstDoubleX)
|
||
|
I3S(ORR,R0,R0,R0,LSL,8);
|
||
|
|
||
|
if (p->DstBPP==16 && p->DstDoubleX)
|
||
|
{
|
||
|
I2C(ADD,R9,R9,4);
|
||
|
I3S(MOV,R3,NONE,R0,LSR,16);
|
||
|
I3S(MOV,R0,NONE,R0,LSL,16);
|
||
|
I3S(ORR,R3,R3,R3,LSL,16);
|
||
|
I3S(ORR,R0,R0,R0,LSR,16);
|
||
|
|
||
|
if (p->DstDoubleY)
|
||
|
{
|
||
|
I3S(ADD,R2,Pitch,Pitch,LSL,1); //R2=3*DstPitch
|
||
|
I3(STR,R3,R9,R2);
|
||
|
}
|
||
|
I3S(STR,R3,R9,Pitch,LSL,p->DstDoubleY);
|
||
|
I2C(SUB,R9,R9,4);
|
||
|
}
|
||
|
|
||
|
if (p->DstDoubleY)
|
||
|
{
|
||
|
I3S(ADD,R2,Pitch,Pitch,LSL,1); //R2=3*DstPitch
|
||
|
if (p->DstBPP==8 && !p->DstDoubleX) Half();
|
||
|
I3(STR,R0,R9,R2);
|
||
|
}
|
||
|
|
||
|
PitchDouble = p->DstDoubleY;
|
||
|
if (p->DstBPP==8 && !p->DstDoubleX)
|
||
|
{
|
||
|
if (PitchDouble) // can't use STR with Half() and LSL,#1 at the same time
|
||
|
{
|
||
|
PitchDouble = 0;
|
||
|
I3(ADD,Pitch,Pitch,Pitch);
|
||
|
}
|
||
|
Half();
|
||
|
}
|
||
|
I3S(STR,R0,R9,Pitch,LSL,PitchDouble);
|
||
|
if (p->SwapXY)
|
||
|
I3S(ADD,R9,R9,Pitch,LSL,1+PitchDouble);
|
||
|
|
||
|
p->Pos = -1;
|
||
|
Fix_RGB_UV_Pixel(p,0,1);
|
||
|
if (p->DstBPP==32)
|
||
|
{
|
||
|
if (p->Pos)
|
||
|
I3S(MOV,R0,NONE,R0,ROR,-p->Pos);
|
||
|
|
||
|
assert(!p->DstDoubleX && !p->DstDoubleY);
|
||
|
I2C(STR,R0,R9,-p->DstStepX/2);
|
||
|
p->Pos = -1;
|
||
|
}
|
||
|
|
||
|
Fix_RGB_UV_Pixel(p,1,1);
|
||
|
if (p->Pos)
|
||
|
I3S(MOV,R0,NONE,R0,ROR,-p->Pos);
|
||
|
|
||
|
if (p->DstBPP==8 && p->DstDoubleX)
|
||
|
I3S(ORR,R0,R0,R0,LSL,8);
|
||
|
|
||
|
if (p->DstDoubleY)
|
||
|
{
|
||
|
MB(); I2C(LDR,R1,SP,OFS(stack,DstPitch));
|
||
|
}
|
||
|
|
||
|
if (p->DstBPP==16 && p->DstDoubleX)
|
||
|
{
|
||
|
I3S(MOV,R3,NONE,R0,LSR,16);
|
||
|
I3S(MOV,R0,NONE,R0,LSL,16);
|
||
|
I3S(ORR,R3,R3,R3,LSL,16);
|
||
|
I3S(ORR,R0,R0,R0,LSR,16);
|
||
|
|
||
|
if (p->DstDoubleY)
|
||
|
{
|
||
|
I2C(ADD,R2,R1,4); //DstPitch+4
|
||
|
I3(STR,R3,R9,R2);
|
||
|
}
|
||
|
I2C(STR,R3,R9,4);
|
||
|
}
|
||
|
|
||
|
if (p->DstDoubleY)
|
||
|
{
|
||
|
if (p->DstBPP==8 && !p->DstDoubleX) Half();
|
||
|
I3(STR,R0,R9,R1);
|
||
|
}
|
||
|
|
||
|
if (p->DstBPP==8 && !p->DstDoubleX) Half();
|
||
|
if (p->SwapXY)
|
||
|
I2(STR,R0,R9);
|
||
|
else
|
||
|
I2C(STR_POST,R0,R9,p->DstStepX);
|
||
|
|
||
|
MB(); I2C(LDR,R7,SP,OFS(stack,EndOfLine));
|
||
|
if (p->OnlyDiff)
|
||
|
{
|
||
|
MB(); I1P(LDR,R5,p->DiffMask,0);
|
||
|
MB(); I2C(LDR,R6,SP,OFS(stack,Src2SrcLast));
|
||
|
InstPost(p->Skip);
|
||
|
}
|
||
|
|
||
|
I3(CMP,NONE,R9,R7);
|
||
|
I0P(B,NE,LoopX);
|
||
|
}
|
||
|
|
||
|
I2C(LDR,R0,SP,OFS(stack,YNext));
|
||
|
I2C(LDR,R4,SP,OFS(stack,DstNext));
|
||
|
I2C(LDR,R6,SP,OFS(stack,UVNext));
|
||
|
I2C(LDR,R5,SP,OFS(stack,EndOfRect));
|
||
|
|
||
|
//increment pointers
|
||
|
I3(ADD,R12,R12,R0);
|
||
|
I3(ADD,R14,R14,R0);
|
||
|
I3(ADD,R9,R9,R4);
|
||
|
I3(ADD,R10,R10,R6);
|
||
|
I3(ADD,R11,R11,R6);
|
||
|
|
||
|
//prepare registers for next row
|
||
|
if (p->SwapXY) I2C(LDR,R6,SP,OFS(stack,DstPitch));
|
||
|
I2C(LDR,R4,SP,OFS(stack,Width));
|
||
|
|
||
|
I3(CMP,NONE,R9,R5);
|
||
|
I0P(B,NE,LoopY);
|
||
|
|
||
|
I2C(ADD,SP,SP,OFS(stack,StackFrame));
|
||
|
CodeEnd();
|
||
|
|
||
|
InstPost(p->YMul);
|
||
|
InstPost(p->RVMul);
|
||
|
InstPost(p->RAdd);
|
||
|
InstPost(p->GUMul);
|
||
|
InstPost(p->GVMul);
|
||
|
InstPost(p->GAdd);
|
||
|
InstPost(p->BUMul);
|
||
|
InstPost(p->BAdd);
|
||
|
if (p->InvertMask) InstPost(p->InvertMask);
|
||
|
if (p->DiffMask) InstPost(p->DiffMask);
|
||
|
if (p->PalPtr) InstPost(p->PalPtr);
|
||
|
if (p->LookUp)
|
||
|
{
|
||
|
Align(16);
|
||
|
InstPost(p->LookUp);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#endif
|