504 lines
10 KiB
C
504 lines
10 KiB
C
|
/*****************************************************************************
|
||
|
*
|
||
|
* This program is free software ; you can redistribute it and/or modify
|
||
|
* it under the terms of the GNU General Public License as published by
|
||
|
* the Free Software Foundation; either version 2 of the License, or
|
||
|
* (at your option) any later version.
|
||
|
*
|
||
|
* This program is distributed in the hope that it will be useful,
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
* GNU General Public License for more details.
|
||
|
*
|
||
|
* You should have received a copy of the GNU General Public License
|
||
|
* along with this program; if not, write to the Free Software
|
||
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
|
*
|
||
|
* $Id: mcomp_mips32.c 323 2005-11-01 20:52:32Z picard $
|
||
|
*
|
||
|
* The Core Pocket Media Player
|
||
|
* Copyright (c) 2004-2005 Gabor Kovacs
|
||
|
*
|
||
|
****************************************************************************/
|
||
|
|
||
|
#include "../common.h"
|
||
|
#include "softidct.h"
|
||
|
|
||
|
#if defined(MIPS32)
|
||
|
|
||
|
// $8 src end pointer
|
||
|
// $4 src pointer
|
||
|
// $5 dst pointer
|
||
|
// $6 src stride
|
||
|
// $7 dst stride
|
||
|
// $2,$9 first item lower 4 bytes (in two forms)
|
||
|
// $10,$11 first item upper 4 bytes (in two forms)
|
||
|
// $12,$13 second item lower 4 bytes (in two forms)
|
||
|
// $14,$15 second item upper 4 bytes (in two forms)
|
||
|
// $24 0x0101 0101 - for non horver
|
||
|
// $25 0xFEFE FEFE - for non horver
|
||
|
// $24 rounding - for horver
|
||
|
// $25 temporary - for horver
|
||
|
// $3 0x0303 0303 - for horver
|
||
|
// $1 0xFCFC FCFC - for horver
|
||
|
// $16 temporary - for horver (must be saved/restored)
|
||
|
|
||
|
#define SET_SRCEND8 \
|
||
|
"sll $8,$6,3;" \
|
||
|
"addu $8,$4,$8;"
|
||
|
|
||
|
#define SET_SRCEND16 \
|
||
|
"sll $8,$6,4;" \
|
||
|
"addu $8,$4,$8;"
|
||
|
|
||
|
#define SET_MASKS \
|
||
|
"li $24,0x01010101;" \
|
||
|
"nor $25,$24,$0;"
|
||
|
|
||
|
#define SET_MASKS2 \
|
||
|
".set noat;" \
|
||
|
"li $3,0x03030303;" \
|
||
|
"nor $1,$3,$0;"
|
||
|
|
||
|
#define LOAD_FIRST8(ofs) \
|
||
|
"ulw $2, " #ofs "($4);" \
|
||
|
"ulw $10," #ofs "+4($4);"\
|
||
|
"and $9,$2,$25;" \
|
||
|
"and $11,$10,$25;" \
|
||
|
"srl $9,$9,1;" \
|
||
|
"srl $11,$11,1;"
|
||
|
|
||
|
#define LOAD_SECOND8(ofs) \
|
||
|
"ulw $12," #ofs "($4);" \
|
||
|
"ulw $14," #ofs "+4($4);"\
|
||
|
"and $13,$12,$25;" \
|
||
|
"and $15,$14,$25;" \
|
||
|
"srl $13,$13,1;" \
|
||
|
"srl $15,$15,1;"
|
||
|
|
||
|
#define LOAD_FIRST8_HV \
|
||
|
"ulw $2,0($4);" \
|
||
|
"ulw $9,1($4);" \
|
||
|
"and $16,$2,$1;" \
|
||
|
"and $25,$9,$1;" \
|
||
|
"and $2,$2,$3;" \
|
||
|
"and $9,$9,$3;" \
|
||
|
"srl $16,$16,2;" \
|
||
|
"srl $25,$25,2;" \
|
||
|
"addu $2,$2,$9;" \
|
||
|
"addu $9,$16,$25;" \
|
||
|
\
|
||
|
"ulw $10,4($4);" \
|
||
|
"ulw $11,5($4);" \
|
||
|
"and $16,$10,$1;" \
|
||
|
"and $25,$11,$1;" \
|
||
|
"and $10,$10,$3;" \
|
||
|
"and $11,$11,$3;" \
|
||
|
"srl $16,$16,2;" \
|
||
|
"srl $25,$25,2;" \
|
||
|
"addu $10,$10,$11;" \
|
||
|
"addu $11,$16,$25;"
|
||
|
|
||
|
#define LOAD_SECOND8_HV \
|
||
|
"ulw $12,0($4);" \
|
||
|
"ulw $13,1($4);" \
|
||
|
"and $16,$12,$1;" \
|
||
|
"and $25,$13,$1;" \
|
||
|
"and $12,$12,$3;" \
|
||
|
"and $13,$13,$3;" \
|
||
|
"srl $16,$16,2;" \
|
||
|
"srl $25,$25,2;" \
|
||
|
"addu $12,$12,$13;" \
|
||
|
"addu $13,$16,$25;" \
|
||
|
\
|
||
|
"ulw $14,4($4);" \
|
||
|
"ulw $15,5($4);" \
|
||
|
"and $16,$14,$1;" \
|
||
|
"and $25,$15,$1;" \
|
||
|
"and $14,$14,$3;" \
|
||
|
"and $15,$15,$3;" \
|
||
|
"srl $16,$16,2;" \
|
||
|
"srl $25,$25,2;" \
|
||
|
"addu $14,$14,$15;" \
|
||
|
"addu $15,$16,$25;"
|
||
|
|
||
|
#define AVG8 \
|
||
|
"or $2,$2,$12;" \
|
||
|
"or $10,$10,$14;" \
|
||
|
"and $2,$2,$24;" \
|
||
|
"and $10,$10,$24;" \
|
||
|
"addu $2,$2,$9;" \
|
||
|
"addu $10,$10,$11;" \
|
||
|
"addu $2,$2,$13;" \
|
||
|
"addu $10,$10,$15;"
|
||
|
|
||
|
#define AVGROUND8 \
|
||
|
"and $2,$2,$12;" \
|
||
|
"and $10,$10,$14;" \
|
||
|
"and $2,$2,$24;" \
|
||
|
"and $10,$10,$24;" \
|
||
|
"addu $2,$2,$9;" \
|
||
|
"addu $10,$10,$11;" \
|
||
|
"addu $2,$2,$13;" \
|
||
|
"addu $10,$10,$15;"
|
||
|
|
||
|
#define SWAPSET8 \
|
||
|
"move $2,$12;" \
|
||
|
"move $9,$13;" \
|
||
|
"move $10,$14;" \
|
||
|
"move $11,$15;"
|
||
|
|
||
|
#define WRITE8 \
|
||
|
"sw $2,0($5);" \
|
||
|
"sw $10,4($5);" \
|
||
|
"addu $5,$5,$7;"
|
||
|
|
||
|
#define SAVE \
|
||
|
"addiu $sp,$sp,-4;" \
|
||
|
"sw $16,0(sp);"
|
||
|
|
||
|
#define RESTORE \
|
||
|
"lw $16,0(sp);" \
|
||
|
"addiu $sp,$sp,4;"
|
||
|
|
||
|
void CopyBlock(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
|
||
|
{
|
||
|
__asm( SET_SRCEND8
|
||
|
|
||
|
"loop:"
|
||
|
"ulw $2,0($4);"
|
||
|
"ulw $10,4($4);"
|
||
|
"addu $4,$4,$6;"
|
||
|
"sw $2,0($5);"
|
||
|
"sw $10,4($5);"
|
||
|
"addu $5,$5,$7;"
|
||
|
"bne $4,$8,loop;");
|
||
|
}
|
||
|
|
||
|
void CopyBlockHor(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
|
||
|
{
|
||
|
__asm ( SET_SRCEND8
|
||
|
SET_MASKS
|
||
|
|
||
|
"loophor:"
|
||
|
LOAD_FIRST8(0)
|
||
|
LOAD_SECOND8(1)
|
||
|
"addu $4,$4,$6;"
|
||
|
|
||
|
AVG8
|
||
|
WRITE8
|
||
|
|
||
|
"bne $4,$8,loophor;");
|
||
|
}
|
||
|
|
||
|
void CopyBlockHorRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
|
||
|
{
|
||
|
__asm ( SET_SRCEND8
|
||
|
SET_MASKS
|
||
|
|
||
|
"loophorround:"
|
||
|
LOAD_FIRST8(0)
|
||
|
LOAD_SECOND8(1)
|
||
|
|
||
|
"addu $4,$4,$6;"
|
||
|
|
||
|
AVGROUND8
|
||
|
WRITE8
|
||
|
|
||
|
"bne $4,$8,loophorround;");
|
||
|
}
|
||
|
|
||
|
void CopyBlockVer(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
|
||
|
{
|
||
|
__asm ( SET_SRCEND8
|
||
|
SET_MASKS
|
||
|
|
||
|
LOAD_FIRST8(0)
|
||
|
|
||
|
"loopver:"
|
||
|
"addu $4,$4,$6;"
|
||
|
|
||
|
LOAD_SECOND8(0)
|
||
|
|
||
|
AVG8
|
||
|
WRITE8
|
||
|
SWAPSET8
|
||
|
|
||
|
"bne $4,$8,loopver;");
|
||
|
}
|
||
|
|
||
|
void CopyBlockVerRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
|
||
|
{
|
||
|
__asm ( SET_SRCEND8
|
||
|
SET_MASKS
|
||
|
|
||
|
LOAD_FIRST8(0)
|
||
|
|
||
|
"loopverround:"
|
||
|
"addu $4,$4,$6;"
|
||
|
|
||
|
LOAD_SECOND8(0)
|
||
|
|
||
|
AVGROUND8
|
||
|
WRITE8
|
||
|
SWAPSET8
|
||
|
|
||
|
"bne $4,$8,loopverround;");
|
||
|
}
|
||
|
|
||
|
void CopyBlockHorVer(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
|
||
|
{
|
||
|
__asm ( SAVE
|
||
|
SET_SRCEND8
|
||
|
SET_MASKS2
|
||
|
|
||
|
"sll $24,$3,1;"
|
||
|
"and $24,$24,$3;" // 0x0202 0202
|
||
|
|
||
|
//preprocessing
|
||
|
|
||
|
LOAD_FIRST8_HV
|
||
|
|
||
|
"loophorver:"
|
||
|
"addu $4,$4,$6;"
|
||
|
|
||
|
LOAD_SECOND8_HV);
|
||
|
|
||
|
__asm ( "addu $2,$2,$12;"
|
||
|
"addu $9,$9,$13;"
|
||
|
"addu $10,$10,$14;"
|
||
|
"addu $11,$11,$15;"
|
||
|
|
||
|
"addu $2,$2,$24;"
|
||
|
"addu $10,$10,$24;"
|
||
|
|
||
|
"and $2,$2,$1;"
|
||
|
"and $10,$10,$1;"
|
||
|
"srl $2,$2,2;"
|
||
|
"srl $10,$10,2;"
|
||
|
"addu $2,$2,$9;"
|
||
|
"addu $10,$10,$11;"
|
||
|
|
||
|
WRITE8
|
||
|
SWAPSET8
|
||
|
|
||
|
"bne $4,$8,loophorver;"
|
||
|
RESTORE);
|
||
|
}
|
||
|
|
||
|
void CopyBlockHorVerRound(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
|
||
|
{
|
||
|
__asm ( SAVE
|
||
|
SET_SRCEND8
|
||
|
SET_MASKS2
|
||
|
|
||
|
"srl $24,$3,1;"
|
||
|
"and $24,$24,$3;" // 0x0101 0101
|
||
|
|
||
|
//preprocessing
|
||
|
|
||
|
LOAD_FIRST8_HV
|
||
|
|
||
|
"loophorverround:"
|
||
|
"addu $4,$4,$6;"
|
||
|
|
||
|
LOAD_SECOND8_HV);
|
||
|
|
||
|
__asm ( "addu $2,$2,$12;"
|
||
|
"addu $9,$9,$13;"
|
||
|
"addu $10,$10,$14;"
|
||
|
"addu $11,$11,$15;"
|
||
|
|
||
|
"addu $2,$2,$24;"
|
||
|
"addu $10,$10,$24;"
|
||
|
|
||
|
"and $2,$2,$1;"
|
||
|
"and $10,$10,$1;"
|
||
|
"srl $2,$2,2;"
|
||
|
"srl $10,$10,2;"
|
||
|
"addu $2,$2,$9;"
|
||
|
"addu $10,$10,$11;"
|
||
|
|
||
|
WRITE8
|
||
|
SWAPSET8
|
||
|
|
||
|
"bne $4,$8,loophorverround;"
|
||
|
RESTORE);
|
||
|
}
|
||
|
|
||
|
void AddBlock8x8(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
|
||
|
{
|
||
|
__asm( SET_SRCEND8
|
||
|
SET_MASKS
|
||
|
|
||
|
"loopadd:"
|
||
|
"lw $2,0($4);"
|
||
|
"lw $10,4($4);"
|
||
|
"addu $4,$4,$6;"
|
||
|
|
||
|
"lw $9,0($5);"
|
||
|
"and $11,$2,$25;"
|
||
|
"or $2,$2,$9;"
|
||
|
"and $2,$2,$24;"
|
||
|
"srl $11,$11,1;"
|
||
|
"addu $2,$2,$11;"
|
||
|
"and $9,$9,$25;"
|
||
|
"srl $9,$9,1;"
|
||
|
"addu $2,$2,$9;"
|
||
|
|
||
|
"lw $11,4($5);"
|
||
|
"and $9,$10,$25;"
|
||
|
"or $10,$10,$11;"
|
||
|
"and $10,$10,$24;"
|
||
|
"srl $9,$9,1;"
|
||
|
"addu $10,$10,$9;"
|
||
|
"and $11,$11,$25;"
|
||
|
"srl $11,$11,1;"
|
||
|
"addu $10,$10,$11;"
|
||
|
|
||
|
"sw $2,0($5);"
|
||
|
"sw $10,4($5);"
|
||
|
"addu $5,$5,$7;"
|
||
|
|
||
|
"bne $4,$8,loopadd;");
|
||
|
}
|
||
|
|
||
|
void AddBlock4x4(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
|
||
|
{
|
||
|
__asm( SET_MASKS
|
||
|
|
||
|
"lw $2,0($4);"
|
||
|
"addu $4,$4,$6;"
|
||
|
"lw $10,0($4);"
|
||
|
"addu $4,$4,$6;"
|
||
|
|
||
|
"lw $9,0($5);"
|
||
|
"and $11,$2,$25;"
|
||
|
"or $2,$2,$9;"
|
||
|
"and $2,$2,$24;"
|
||
|
"srl $11,$11,1;"
|
||
|
"addu $2,$2,$11;"
|
||
|
"and $9,$9,$25;"
|
||
|
"srl $9,$9,1;"
|
||
|
"addu $2,$2,$9;"
|
||
|
"sw $2,0($5);"
|
||
|
"addu $5,$5,$7;"
|
||
|
|
||
|
"lw $11,0($5);"
|
||
|
"and $9,$10,$25;"
|
||
|
"or $10,$10,$11;"
|
||
|
"and $10,$10,$24;"
|
||
|
"srl $9,$9,1;"
|
||
|
"addu $10,$10,$9;"
|
||
|
"and $11,$11,$25;"
|
||
|
"srl $11,$11,1;"
|
||
|
"addu $10,$10,$11;"
|
||
|
"sw $10,0($5);"
|
||
|
"addu $5,$5,$7;"
|
||
|
|
||
|
"lw $2,0($4);"
|
||
|
"addu $4,$4,$6;"
|
||
|
"lw $10,0($4);"
|
||
|
"addu $4,$4,$6;"
|
||
|
|
||
|
"lw $9,0($5);"
|
||
|
"and $11,$2,$25;"
|
||
|
"or $2,$2,$9;"
|
||
|
"and $2,$2,$24;"
|
||
|
"srl $11,$11,1;"
|
||
|
"addu $2,$2,$11;"
|
||
|
"and $9,$9,$25;"
|
||
|
"srl $9,$9,1;"
|
||
|
"addu $2,$2,$9;"
|
||
|
"sw $2,0($5);"
|
||
|
"addu $5,$5,$7;"
|
||
|
|
||
|
"lw $11,0($5);"
|
||
|
"and $9,$10,$25;"
|
||
|
"or $10,$10,$11;"
|
||
|
"and $10,$10,$24;"
|
||
|
"srl $9,$9,1;"
|
||
|
"addu $10,$10,$9;"
|
||
|
"and $11,$11,$25;"
|
||
|
"srl $11,$11,1;"
|
||
|
"addu $10,$10,$11;"
|
||
|
"sw $10,0($5);"
|
||
|
"addu $5,$5,$7;" );
|
||
|
}
|
||
|
|
||
|
void CopyBlock16x16(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
|
||
|
{
|
||
|
__asm( SET_SRCEND16
|
||
|
|
||
|
"loopcopy16:"
|
||
|
"lw $2,0($4);"
|
||
|
"lw $10,4($4);"
|
||
|
"lw $12,8($4);"
|
||
|
"lw $14,12($4);"
|
||
|
"addu $4,$4,$6;"
|
||
|
"sw $2,0($5);"
|
||
|
"sw $10,4($5);"
|
||
|
"sw $12,8($5);"
|
||
|
"sw $14,12($5);"
|
||
|
"addu $5,$5,$7;"
|
||
|
"bne $4,$8,loopcopy16;");
|
||
|
}
|
||
|
|
||
|
void CopyBlock8x8(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
|
||
|
{
|
||
|
__asm( SET_SRCEND8
|
||
|
|
||
|
"loopcopy8:"
|
||
|
"lw $2,0($4);"
|
||
|
"lw $10,4($4);"
|
||
|
"addu $4,$4,$6;"
|
||
|
"lw $12,0($4);"
|
||
|
"lw $14,4($4);"
|
||
|
"addu $4,$4,$6;"
|
||
|
"sw $2,0($5);"
|
||
|
"sw $10,4($5);"
|
||
|
"addu $5,$5,$7;"
|
||
|
"sw $12,0($5);"
|
||
|
"sw $14,4($5);"
|
||
|
"addu $5,$5,$7;"
|
||
|
"bne $4,$8,loopcopy8;");
|
||
|
}
|
||
|
|
||
|
void CopyBlock4x4(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
|
||
|
{
|
||
|
__asm( "lw $2,0($4);"
|
||
|
"addu $4,$4,$6;"
|
||
|
"lw $10,0($4);"
|
||
|
"addu $4,$4,$6;"
|
||
|
"lw $12,0($4);"
|
||
|
"addu $4,$4,$6;"
|
||
|
"lw $14,0($4);"
|
||
|
"sw $2,0($5);"
|
||
|
"addu $5,$5,$7;"
|
||
|
"sw $10,0($5);"
|
||
|
"addu $5,$5,$7;"
|
||
|
"sw $12,0($5);"
|
||
|
"addu $5,$5,$7;"
|
||
|
"sw $14,0($5);");
|
||
|
}
|
||
|
|
||
|
void CopyBlockM(unsigned char * Src, unsigned char * Dst, int SrcStride, int DstStride)
|
||
|
{
|
||
|
__asm( SET_SRCEND16
|
||
|
|
||
|
"loopm:"
|
||
|
"ulw $2,0($4);"
|
||
|
"ulw $10,4($4);"
|
||
|
"ulw $12,8($4);"
|
||
|
"ulw $14,12($4);"
|
||
|
"addu $4,$4,$6;"
|
||
|
"sw $2,0($5);"
|
||
|
"sw $10,4($5);"
|
||
|
"sw $12,8($5);"
|
||
|
"sw $14,12($5);"
|
||
|
"addu $5,$5,$7;"
|
||
|
"bne $4,$8,loopm;");
|
||
|
}
|
||
|
|
||
|
#endif
|