509 lines
6.9 KiB
NASM
Executable File
509 lines
6.9 KiB
NASM
Executable File
;*****************************************************************************
|
|
;*
|
|
;* This program is free software ; you can redistribute it and/or modify
|
|
;* it under the terms of the GNU General Public License as published by
|
|
;* the Free Software Foundation; either version 2 of the License, or
|
|
;* (at your option) any later version.
|
|
;*
|
|
;* This program is distributed in the hope that it will be useful,
|
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
;* GNU General Public License for more details.
|
|
;*
|
|
;* You should have received a copy of the GNU General Public License
|
|
;* along with this program; if not, write to the Free Software
|
|
;* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
;*
|
|
;* $Id: mcomp_mmx.asm 327 2005-11-04 07:09:17Z picard $
|
|
;*
|
|
;* The Core Pocket Media Player
|
|
;* Copyright (c) 2004-2005 Gabor Kovacs
|
|
;*
|
|
;*****************************************************************************
|
|
|
|
BITS 32
|
|
SECTION .text
|
|
|
|
%macro cglobal 2
|
|
%define %1 _%1@%2
|
|
global %1
|
|
%endmacro
|
|
|
|
cglobal EMMS,0
|
|
cglobal AddBlock,12
|
|
cglobal AddBlockHor,12
|
|
cglobal AddBlockVer,12
|
|
cglobal AddBlockHorVer,12
|
|
cglobal CopyBlockM,16
|
|
cglobal CopyBlock,16
|
|
cglobal CopyBlockHor,16
|
|
cglobal CopyBlockVer,16
|
|
cglobal CopyBlockHorVer,16
|
|
cglobal CopyBlockHorRound,16
|
|
cglobal CopyBlockVerRound,16
|
|
cglobal CopyBlockHorVerRound,16
|
|
|
|
ALIGN 16
|
|
EMMS:
|
|
emms
|
|
ret 0
|
|
|
|
%macro loadparam 1
|
|
mov esi,[esp+12] ;src
|
|
mov edi,[esp+12+4] ;dst
|
|
mov eax,[esp+12+8] ;src pitch
|
|
%if %1>0
|
|
mov edx,8 ;dst pitch (fixed for AddBlock)
|
|
%else
|
|
mov edx,[esp+12+12] ;dst pitch
|
|
%endif
|
|
%endmacro
|
|
|
|
%macro loadmask1 0
|
|
mov ecx,0x01010101
|
|
movd mm6,ecx
|
|
pcmpeqb mm7,mm7
|
|
punpckldq mm6,mm6
|
|
pxor mm7,mm6
|
|
%endmacro
|
|
|
|
%macro loadmask4 0
|
|
mov ecx,0x03030303
|
|
movd mm6,ecx
|
|
pcmpeqb mm7,mm7
|
|
punpckldq mm6,mm6
|
|
pxor mm7,mm6
|
|
%endmacro
|
|
|
|
%macro load1 2
|
|
movq mm0,[esi+%1]
|
|
%if %2>0
|
|
add esi,eax
|
|
%endif
|
|
movq mm1,mm0
|
|
pand mm1,mm7
|
|
psrlq mm1,1
|
|
%endmacro
|
|
|
|
%macro load2 2
|
|
movq mm2,[esi+%1]
|
|
%if %2>0
|
|
add esi,eax
|
|
%endif
|
|
movq mm3,mm2
|
|
pand mm3,mm7
|
|
psrlq mm3,1
|
|
%endmacro
|
|
|
|
%macro load1hv 0
|
|
movq mm0,[esi]
|
|
movq mm4,[esi+1]
|
|
add esi,eax
|
|
movq mm1,mm0
|
|
movq mm5,mm4
|
|
pand mm0,mm6
|
|
pand mm4,mm6
|
|
pand mm1,mm7
|
|
pand mm5,mm7
|
|
psrlq mm1,2
|
|
psrlq mm5,2
|
|
paddb mm0,mm4
|
|
paddb mm1,mm5
|
|
%endmacro
|
|
|
|
%macro load2hv 0
|
|
movq mm2,[esi]
|
|
movq mm4,[esi+1]
|
|
add esi,eax
|
|
movq mm3,mm2
|
|
movq mm5,mm4
|
|
pand mm2,mm6
|
|
pand mm4,mm6
|
|
pand mm3,mm7
|
|
pand mm5,mm7
|
|
psrlq mm3,2
|
|
psrlq mm5,2
|
|
paddb mm2,mm4
|
|
paddb mm3,mm5
|
|
%endmacro
|
|
|
|
%macro avg1 0
|
|
por mm0,mm2
|
|
pand mm0,mm6
|
|
paddb mm0,mm1
|
|
paddb mm0,mm3
|
|
%endmacro
|
|
|
|
%macro avg2 0
|
|
por mm2,mm0
|
|
pand mm2,mm6
|
|
paddb mm2,mm3
|
|
paddb mm2,mm1
|
|
%endmacro
|
|
|
|
%macro avground1 0
|
|
pand mm0,mm2
|
|
pand mm0,mm6
|
|
paddb mm0,mm1
|
|
paddb mm0,mm3
|
|
%endmacro
|
|
|
|
%macro avground2 0
|
|
pand mm2,mm0
|
|
pand mm2,mm6
|
|
paddb mm2,mm3
|
|
paddb mm2,mm1
|
|
%endmacro
|
|
|
|
%macro save1 0
|
|
movq [edi],mm0
|
|
add edi,edx
|
|
%endmacro
|
|
|
|
%macro save2 0
|
|
movq [edi],mm2
|
|
add edi,edx
|
|
%endmacro
|
|
|
|
%macro saveadd1 0
|
|
movq mm4,[edi]
|
|
movq mm1,mm0
|
|
pand mm0,mm7
|
|
por mm1,mm4
|
|
pand mm4,mm7
|
|
pand mm1,mm6
|
|
psrlq mm0,1
|
|
psrlq mm4,1
|
|
paddb mm1,mm0
|
|
paddb mm1,mm4
|
|
movq [edi],mm1
|
|
add edi,edx
|
|
%endmacro
|
|
|
|
%macro saveadd2 0
|
|
movq mm4,[edi]
|
|
movq mm3,mm2
|
|
pand mm2,mm7
|
|
por mm3,mm4
|
|
pand mm4,mm7
|
|
pand mm3,mm6
|
|
psrlq mm2,1
|
|
psrlq mm4,1
|
|
paddb mm3,mm2
|
|
paddb mm3,mm4
|
|
movq [edi],mm3
|
|
add edi,edx
|
|
%endmacro
|
|
|
|
ALIGN 16
|
|
CopyBlock:
|
|
push esi
|
|
push edi
|
|
loadparam 0
|
|
|
|
%rep 4
|
|
movq mm0,[esi]
|
|
movq mm1,[esi+eax]
|
|
lea esi,[esi+eax*2]
|
|
movq [edi],mm0
|
|
movq [edi+edx],mm1
|
|
lea edi,[edi+edx*2]
|
|
%endrep
|
|
|
|
pop edi
|
|
pop esi
|
|
ret 16
|
|
|
|
ALIGN 16
|
|
CopyBlockM:
|
|
push esi
|
|
push edi
|
|
loadparam 0
|
|
|
|
%rep 8
|
|
movq mm0,[esi]
|
|
movq mm1,[esi+8]
|
|
movq mm2,[esi+eax]
|
|
movq mm3,[esi+eax+8]
|
|
lea esi,[esi+eax*2]
|
|
movq [edi],mm0
|
|
movq [edi+8],mm1
|
|
movq [edi+edx],mm2
|
|
movq [edi+edx+8],mm3
|
|
lea edi,[edi+edx*2]
|
|
%endrep
|
|
|
|
pop edi
|
|
pop esi
|
|
ret 16
|
|
|
|
ALIGN 16
|
|
CopyBlockHor:
|
|
push esi
|
|
push edi
|
|
|
|
loadparam 0
|
|
loadmask1
|
|
|
|
%rep 8
|
|
load1 0,0
|
|
load2 1,1
|
|
avg1
|
|
save1
|
|
%endrep
|
|
|
|
pop edi
|
|
pop esi
|
|
ret 16
|
|
|
|
ALIGN 16
|
|
CopyBlockVer:
|
|
push esi
|
|
push edi
|
|
|
|
loadparam 0
|
|
loadmask1
|
|
load1 0,1
|
|
%rep 4
|
|
load2 0,1
|
|
avg1
|
|
save1
|
|
load1 0,1
|
|
avg2
|
|
save2
|
|
%endrep
|
|
|
|
pop edi
|
|
pop esi
|
|
ret 16
|
|
|
|
ALIGN 16
|
|
CopyBlockHorVer:
|
|
push esi
|
|
push edi
|
|
|
|
loadparam 0
|
|
loadmask4
|
|
|
|
load1hv
|
|
%rep 4
|
|
load2hv
|
|
|
|
pcmpeqb mm4,mm4 ;-1
|
|
paddb mm0,mm2
|
|
paddb mm4,mm4 ;-2
|
|
paddb mm1,mm3
|
|
psubb mm0,mm4 ;+2
|
|
pand mm0,mm7
|
|
psrlq mm0,2
|
|
paddb mm0,mm1
|
|
|
|
save1
|
|
|
|
load1hv
|
|
|
|
pcmpeqb mm4,mm4 ;-1
|
|
paddb mm2,mm0
|
|
paddb mm4,mm4 ;-2
|
|
paddb mm3,mm1
|
|
psubb mm2,mm4 ;+2
|
|
pand mm2,mm7
|
|
psrlq mm2,2
|
|
paddb mm2,mm3
|
|
|
|
save2
|
|
%endrep
|
|
|
|
pop edi
|
|
pop esi
|
|
ret 16
|
|
|
|
ALIGN 16
|
|
CopyBlockHorRound:
|
|
push esi
|
|
push edi
|
|
|
|
loadparam 0
|
|
loadmask1
|
|
|
|
%rep 8
|
|
load1 0,0
|
|
load2 1,1
|
|
avground1
|
|
save1
|
|
%endrep
|
|
|
|
pop edi
|
|
pop esi
|
|
ret 16
|
|
|
|
ALIGN 16
|
|
CopyBlockVerRound:
|
|
push esi
|
|
push edi
|
|
|
|
loadparam 0
|
|
loadmask1
|
|
load1 0,1
|
|
%rep 4
|
|
load2 0,1
|
|
avground1
|
|
save1
|
|
load1 0,1
|
|
avground2
|
|
save2
|
|
%endrep
|
|
|
|
pop edi
|
|
pop esi
|
|
ret 16
|
|
|
|
ALIGN 16
|
|
CopyBlockHorVerRound:
|
|
push esi
|
|
push edi
|
|
|
|
loadparam 0
|
|
loadmask4
|
|
|
|
load1hv
|
|
%rep 4
|
|
load2hv
|
|
|
|
pcmpeqb mm4,mm4 ;-1
|
|
paddb mm0,mm2
|
|
paddb mm1,mm3
|
|
psubb mm0,mm4 ;+1
|
|
pand mm0,mm7
|
|
psrlq mm0,2
|
|
paddb mm0,mm1
|
|
|
|
save1
|
|
|
|
load1hv
|
|
|
|
pcmpeqb mm4,mm4 ;-1
|
|
paddb mm2,mm0
|
|
paddb mm3,mm1
|
|
psubb mm2,mm4 ;+1
|
|
pand mm2,mm7
|
|
psrlq mm2,2
|
|
paddb mm2,mm3
|
|
|
|
save2
|
|
%endrep
|
|
|
|
pop edi
|
|
pop esi
|
|
ret 16
|
|
|
|
ALIGN 16
|
|
AddBlock:
|
|
push esi
|
|
push edi
|
|
|
|
loadparam 1
|
|
loadmask1
|
|
|
|
%rep 8
|
|
movq mm0,[esi]
|
|
add esi,eax
|
|
saveadd1
|
|
%endrep
|
|
|
|
pop edi
|
|
pop esi
|
|
ret 12
|
|
|
|
ALIGN 16
|
|
AddBlockHor:
|
|
push esi
|
|
push edi
|
|
|
|
loadparam 1
|
|
loadmask1
|
|
|
|
%rep 8
|
|
load1 0,0
|
|
load2 1,1
|
|
avg1
|
|
saveadd1
|
|
%endrep
|
|
|
|
pop edi
|
|
pop esi
|
|
ret 12
|
|
|
|
ALIGN 16
|
|
AddBlockVer:
|
|
push esi
|
|
push edi
|
|
|
|
loadparam 1
|
|
loadmask1
|
|
load1 0,1
|
|
%rep 4
|
|
load2 0,1
|
|
avg1
|
|
saveadd1
|
|
load1 0,1
|
|
avg2
|
|
saveadd2
|
|
%endrep
|
|
|
|
pop edi
|
|
pop esi
|
|
ret 12
|
|
|
|
ALIGN 16
|
|
AddBlockHorVer:
|
|
push esi
|
|
push edi
|
|
|
|
loadparam 1
|
|
loadmask4
|
|
|
|
load1hv
|
|
%rep 4
|
|
load2hv
|
|
|
|
pcmpeqb mm5,mm5 ;-1
|
|
paddb mm0,mm2
|
|
paddb mm5,mm5 ;-2
|
|
paddb mm1,mm3
|
|
psubb mm0,mm5 ;+=2
|
|
pand mm0,mm7
|
|
psrlq mm0,2
|
|
paddb mm0,mm1
|
|
|
|
paddb mm6,mm5 ;0x03-2=0x01
|
|
psubb mm7,mm5 ;0xFD+2=0xFF
|
|
saveadd1
|
|
psubb mm6,mm5 ;restore mask
|
|
paddb mm7,mm5 ;restore mask
|
|
|
|
load1hv
|
|
|
|
pcmpeqb mm5,mm5 ;-1
|
|
paddb mm2,mm0
|
|
paddb mm5,mm5 ;-2
|
|
paddb mm3,mm1
|
|
psubb mm2,mm5 ;+=2
|
|
pand mm2,mm7
|
|
psrlq mm2,2
|
|
paddb mm2,mm3
|
|
|
|
paddb mm6,mm5 ;0x03-2=0x01
|
|
psubb mm7,mm5 ;0xFD+2=0xFF
|
|
saveadd2
|
|
psubb mm6,mm5 ;restore mask
|
|
paddb mm7,mm5 ;restore mask
|
|
|
|
%endrep
|
|
|
|
pop edi
|
|
pop esi
|
|
ret 12
|
|
|
|
|