gps/GPSResources/tcpmp/common/softidct/idct_mmx.asm

287 lines
4.5 KiB
NASM
Executable File

;*****************************************************************************
;*
;* This program is free software ; you can redistribute it and/or modify
;* it under the terms of the GNU General Public License as published by
;* the Free Software Foundation; either version 2 of the License, or
;* (at your option) any later version.
;*
;* This program is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;* GNU General Public License for more details.
;*
;* You should have received a copy of the GNU General Public License
;* along with this program; if not, write to the Free Software
;* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
;*
;* $Id: idct_mmx.asm 432 2005-12-28 16:39:13Z picard $
;*
;* The Core Pocket Media Player
;* Copyright (c) 2004-2005 Gabor Kovacs
;*
;*****************************************************************************
;******************
;* NOT FINISHED *
;******************
BITS 32
ROW_SHIFT equ 11
COL_SHIFT equ 6
SECTION .data
ALIGN 16
SECTION .text
%macro cglobal 2
%define %1 _%1@%2
global %1
%endmacro
cglobal IDCT_Const8x8,16
cglobal IDCT_Const4x4,16
;cglobal IDCT_Block8x8,16
;cglobal IDCT_Block8x4,16
; ecx:block
%macro Row 1
movq mm0,[ecx+%1*16]
movq mm1,[ecx+%1*16+8]
; x0 x4 x3 x7 x1 x6 x2 x5
; x4' = W7 * x5 + W1 * x4;
; x5' = W7 * x4 - W1 * x5;
; x6' = W3 * x7 + W5 * x6;
; x7' = W3 * x6 - W5 * x7;
; x6' = x4 + x6;
; x4' = x4 - x6;
; x7' = x5 + x7;
; x5' = x5 - x7;
; x5' = (181 * (x4 + x5) + 128) >> 8;
; x4' = (181 * (x4 - x5) + 128) >> 8;
; x3' = W6 * x2 + W2 * x3;
; x2' = W6 * x3 - W2 * x2;
; x1 <<= 11;
; x0 <<= 11;
; x1' = x0 + x1;
; x0' = x0 - x1;
; x3' = x1 + x3;
; x1' = x1 - x3;
; x2' = x0 + x2;
; x0' = x0 - x2;
movq [ecx+%1*16],mm0
movq [ecx+%1*16+8],mm1
%endmacro
; ecx:block
; edi:dest edx:dest pitch
; esi:src eax:src pitch
%macro Col4x4 2
%endmacro
%macro Col4x8 2
%endmacro
%if 0
ALIGN 16
IDCT_Block8x8:
push esi
push edi
mov ecx,[esp+12] ;block
mov edi,[esp+12+4] ;dst
mov edx,[esp+12+8] ;dst pitch
mov esi,[esp+12+12] ;src
mov eax,8 ;src pitch
Row 0
Row 1
Row 2
Row 3
Row 4
Row 5
Row 6
Row 7
or esi,esi
jne .Add
Col4x8 0,0
Col4x8 8,0
pop edi
pop esi
ret 16
.Add:
Col4x8 0,1
Col4x8 8,1
pop edi
pop esi
ret 16
ALIGN 16
IDCT_Block8x4:
push esi
push edi
mov ecx,[esp+12] ;src
mov edi,[esp+12+4] ;dst
mov edx,[esp+12+8] ;dst pitch
mov esi,[esp+12+12] ;src
mov eax,8 ;src pitch
Row 0
Row 1
Row 2
Row 3
or esi,esi
jne .Add
Col4x4 0,0
Col4x4 8,0
pop edi
pop esi
ret 16
.Add:
Col4x4 0,1
Col4x4 8,1
pop edi
pop esi
ret 16
%endif
ALIGN 16
IDCT_Const8x8:
push esi
push edi
mov ecx,[esp+12] ;v
mov edi,[esp+12+4] ;dst
mov edx,[esp+12+8] ;dst pitch
mov esi,[esp+12+12] ;src
mov eax,8 ;src pitch
or ecx,ecx
js .Sub
.Add:
movd mm7,ecx
punpcklbw mm7,mm7
punpcklwd mm7,mm7
punpckldq mm7,mm7
%rep 4
movq mm0,[esi]
movq mm1,[esi+eax]
paddusb mm0,mm7
lea esi,[esi+eax*2]
paddusb mm1,mm7
movq [edi],mm0
movq [edi+edx],mm1
lea edi,[edi+edx*2]
%endrep
pop edi
pop esi
ret 16
.Sub:
neg ecx
movd mm7,ecx
punpcklbw mm7,mm7
punpcklwd mm7,mm7
punpckldq mm7,mm7
%rep 4
movq mm0,[esi]
movq mm1,[esi+eax]
psubusb mm0,mm7
lea esi,[esi+eax*2]
psubusb mm1,mm7
movq [edi],mm0
movq [edi+edx],mm1
lea edi,[edi+edx*2]
%endrep
pop edi
pop esi
ret 16
ALIGN 16
IDCT_Const4x4:
push esi
push edi
mov ecx,[esp+12] ;v
mov edi,[esp+12+4] ;dst
mov edx,[esp+12+8] ;dst pitch
mov esi,[esp+12+12] ;src
mov eax,8 ;src pitch
or ecx,ecx
js .Sub
.Add:
movd mm7,ecx
punpcklbw mm7,mm7
punpcklwd mm7,mm7
punpckldq mm7,mm7
%rep 2
movd mm0,[esi]
movd mm1,[esi+eax]
paddusb mm0,mm7
lea esi,[esi+eax*2]
paddusb mm1,mm7
movd [edi],mm0
movd [edi+edx],mm1
lea edi,[edi+edx*2]
%endrep
pop edi
pop esi
ret 16
.Sub:
neg ecx
movd mm7,ecx
punpcklbw mm7,mm7
punpcklwd mm7,mm7
punpckldq mm7,mm7
%rep 2
movd mm0,[esi]
movd mm1,[esi+eax]
psubusb mm0,mm7
lea esi,[esi+eax*2]
psubusb mm1,mm7
movd [edi],mm0
movd [edi+edx],mm1
lea edi,[edi+edx*2]
%endrep
pop edi
pop esi
ret 16