287 lines
4.5 KiB
NASM
287 lines
4.5 KiB
NASM
|
;*****************************************************************************
|
||
|
;*
|
||
|
;* This program is free software ; you can redistribute it and/or modify
|
||
|
;* it under the terms of the GNU General Public License as published by
|
||
|
;* the Free Software Foundation; either version 2 of the License, or
|
||
|
;* (at your option) any later version.
|
||
|
;*
|
||
|
;* This program is distributed in the hope that it will be useful,
|
||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
;* GNU General Public License for more details.
|
||
|
;*
|
||
|
;* You should have received a copy of the GNU General Public License
|
||
|
;* along with this program; if not, write to the Free Software
|
||
|
;* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
|
;*
|
||
|
;* $Id: idct_mmx.asm 432 2005-12-28 16:39:13Z picard $
|
||
|
;*
|
||
|
;* The Core Pocket Media Player
|
||
|
;* Copyright (c) 2004-2005 Gabor Kovacs
|
||
|
;*
|
||
|
;*****************************************************************************
|
||
|
|
||
|
;******************
|
||
|
;* NOT FINISHED *
|
||
|
;******************
|
||
|
|
||
|
BITS 32
|
||
|
|
||
|
ROW_SHIFT equ 11
|
||
|
COL_SHIFT equ 6
|
||
|
|
||
|
SECTION .data
|
||
|
ALIGN 16
|
||
|
|
||
|
SECTION .text
|
||
|
|
||
|
%macro cglobal 2
|
||
|
%define %1 _%1@%2
|
||
|
global %1
|
||
|
%endmacro
|
||
|
|
||
|
cglobal IDCT_Const8x8,16
|
||
|
cglobal IDCT_Const4x4,16
|
||
|
;cglobal IDCT_Block8x8,16
|
||
|
;cglobal IDCT_Block8x4,16
|
||
|
|
||
|
; ecx:block
|
||
|
%macro Row 1
|
||
|
movq mm0,[ecx+%1*16]
|
||
|
movq mm1,[ecx+%1*16+8]
|
||
|
|
||
|
; x0 x4 x3 x7 x1 x6 x2 x5
|
||
|
|
||
|
; x4' = W7 * x5 + W1 * x4;
|
||
|
; x5' = W7 * x4 - W1 * x5;
|
||
|
|
||
|
; x6' = W3 * x7 + W5 * x6;
|
||
|
; x7' = W3 * x6 - W5 * x7;
|
||
|
|
||
|
; x6' = x4 + x6;
|
||
|
; x4' = x4 - x6;
|
||
|
|
||
|
; x7' = x5 + x7;
|
||
|
; x5' = x5 - x7;
|
||
|
|
||
|
; x5' = (181 * (x4 + x5) + 128) >> 8;
|
||
|
; x4' = (181 * (x4 - x5) + 128) >> 8;
|
||
|
|
||
|
; x3' = W6 * x2 + W2 * x3;
|
||
|
; x2' = W6 * x3 - W2 * x2;
|
||
|
|
||
|
; x1 <<= 11;
|
||
|
; x0 <<= 11;
|
||
|
|
||
|
; x1' = x0 + x1;
|
||
|
; x0' = x0 - x1;
|
||
|
|
||
|
; x3' = x1 + x3;
|
||
|
; x1' = x1 - x3;
|
||
|
|
||
|
; x2' = x0 + x2;
|
||
|
; x0' = x0 - x2;
|
||
|
|
||
|
movq [ecx+%1*16],mm0
|
||
|
movq [ecx+%1*16+8],mm1
|
||
|
%endmacro
|
||
|
|
||
|
; ecx:block
|
||
|
; edi:dest edx:dest pitch
|
||
|
; esi:src eax:src pitch
|
||
|
|
||
|
%macro Col4x4 2
|
||
|
|
||
|
|
||
|
%endmacro
|
||
|
|
||
|
%macro Col4x8 2
|
||
|
|
||
|
|
||
|
%endmacro
|
||
|
|
||
|
%if 0
|
||
|
ALIGN 16
|
||
|
IDCT_Block8x8:
|
||
|
push esi
|
||
|
push edi
|
||
|
|
||
|
mov ecx,[esp+12] ;block
|
||
|
mov edi,[esp+12+4] ;dst
|
||
|
mov edx,[esp+12+8] ;dst pitch
|
||
|
mov esi,[esp+12+12] ;src
|
||
|
mov eax,8 ;src pitch
|
||
|
|
||
|
Row 0
|
||
|
Row 1
|
||
|
Row 2
|
||
|
Row 3
|
||
|
Row 4
|
||
|
Row 5
|
||
|
Row 6
|
||
|
Row 7
|
||
|
|
||
|
or esi,esi
|
||
|
jne .Add
|
||
|
|
||
|
Col4x8 0,0
|
||
|
Col4x8 8,0
|
||
|
pop edi
|
||
|
pop esi
|
||
|
ret 16
|
||
|
|
||
|
.Add:
|
||
|
Col4x8 0,1
|
||
|
Col4x8 8,1
|
||
|
pop edi
|
||
|
pop esi
|
||
|
ret 16
|
||
|
|
||
|
ALIGN 16
|
||
|
IDCT_Block8x4:
|
||
|
push esi
|
||
|
push edi
|
||
|
|
||
|
mov ecx,[esp+12] ;src
|
||
|
mov edi,[esp+12+4] ;dst
|
||
|
mov edx,[esp+12+8] ;dst pitch
|
||
|
mov esi,[esp+12+12] ;src
|
||
|
mov eax,8 ;src pitch
|
||
|
|
||
|
Row 0
|
||
|
Row 1
|
||
|
Row 2
|
||
|
Row 3
|
||
|
|
||
|
or esi,esi
|
||
|
jne .Add
|
||
|
|
||
|
Col4x4 0,0
|
||
|
Col4x4 8,0
|
||
|
pop edi
|
||
|
pop esi
|
||
|
ret 16
|
||
|
|
||
|
.Add:
|
||
|
Col4x4 0,1
|
||
|
Col4x4 8,1
|
||
|
pop edi
|
||
|
pop esi
|
||
|
ret 16
|
||
|
%endif
|
||
|
|
||
|
ALIGN 16
|
||
|
IDCT_Const8x8:
|
||
|
push esi
|
||
|
push edi
|
||
|
|
||
|
mov ecx,[esp+12] ;v
|
||
|
mov edi,[esp+12+4] ;dst
|
||
|
mov edx,[esp+12+8] ;dst pitch
|
||
|
mov esi,[esp+12+12] ;src
|
||
|
mov eax,8 ;src pitch
|
||
|
|
||
|
or ecx,ecx
|
||
|
js .Sub
|
||
|
|
||
|
.Add:
|
||
|
movd mm7,ecx
|
||
|
punpcklbw mm7,mm7
|
||
|
punpcklwd mm7,mm7
|
||
|
punpckldq mm7,mm7
|
||
|
|
||
|
%rep 4
|
||
|
movq mm0,[esi]
|
||
|
movq mm1,[esi+eax]
|
||
|
paddusb mm0,mm7
|
||
|
lea esi,[esi+eax*2]
|
||
|
paddusb mm1,mm7
|
||
|
movq [edi],mm0
|
||
|
movq [edi+edx],mm1
|
||
|
lea edi,[edi+edx*2]
|
||
|
%endrep
|
||
|
|
||
|
pop edi
|
||
|
pop esi
|
||
|
ret 16
|
||
|
|
||
|
.Sub:
|
||
|
neg ecx
|
||
|
movd mm7,ecx
|
||
|
punpcklbw mm7,mm7
|
||
|
punpcklwd mm7,mm7
|
||
|
punpckldq mm7,mm7
|
||
|
|
||
|
%rep 4
|
||
|
movq mm0,[esi]
|
||
|
movq mm1,[esi+eax]
|
||
|
psubusb mm0,mm7
|
||
|
lea esi,[esi+eax*2]
|
||
|
psubusb mm1,mm7
|
||
|
movq [edi],mm0
|
||
|
movq [edi+edx],mm1
|
||
|
lea edi,[edi+edx*2]
|
||
|
%endrep
|
||
|
|
||
|
pop edi
|
||
|
pop esi
|
||
|
ret 16
|
||
|
|
||
|
ALIGN 16
|
||
|
IDCT_Const4x4:
|
||
|
push esi
|
||
|
push edi
|
||
|
|
||
|
mov ecx,[esp+12] ;v
|
||
|
mov edi,[esp+12+4] ;dst
|
||
|
mov edx,[esp+12+8] ;dst pitch
|
||
|
mov esi,[esp+12+12] ;src
|
||
|
mov eax,8 ;src pitch
|
||
|
|
||
|
or ecx,ecx
|
||
|
js .Sub
|
||
|
|
||
|
.Add:
|
||
|
movd mm7,ecx
|
||
|
punpcklbw mm7,mm7
|
||
|
punpcklwd mm7,mm7
|
||
|
punpckldq mm7,mm7
|
||
|
|
||
|
%rep 2
|
||
|
movd mm0,[esi]
|
||
|
movd mm1,[esi+eax]
|
||
|
paddusb mm0,mm7
|
||
|
lea esi,[esi+eax*2]
|
||
|
paddusb mm1,mm7
|
||
|
movd [edi],mm0
|
||
|
movd [edi+edx],mm1
|
||
|
lea edi,[edi+edx*2]
|
||
|
%endrep
|
||
|
|
||
|
pop edi
|
||
|
pop esi
|
||
|
ret 16
|
||
|
|
||
|
.Sub:
|
||
|
neg ecx
|
||
|
movd mm7,ecx
|
||
|
punpcklbw mm7,mm7
|
||
|
punpcklwd mm7,mm7
|
||
|
punpckldq mm7,mm7
|
||
|
|
||
|
%rep 2
|
||
|
movd mm0,[esi]
|
||
|
movd mm1,[esi+eax]
|
||
|
psubusb mm0,mm7
|
||
|
lea esi,[esi+eax*2]
|
||
|
psubusb mm1,mm7
|
||
|
movd [edi],mm0
|
||
|
movd [edi+edx],mm1
|
||
|
lea edi,[edi+edx*2]
|
||
|
%endrep
|
||
|
|
||
|
pop edi
|
||
|
pop esi
|
||
|
ret 16
|
||
|
|