361 lines
10 KiB
ArmAsm
361 lines
10 KiB
ArmAsm
|
/*
|
|||
|
* (C) Copyright 2007-2013
|
|||
|
* Allwinner Technology Co., Ltd. <www.allwinnertech.com>
|
|||
|
* Jerry Wang <wangflord@allwinnertech.com>
|
|||
|
*
|
|||
|
* See file CREDITS for list of people who contributed to this
|
|||
|
* project.
|
|||
|
*
|
|||
|
* This program is free software; you can redistribute it and/or
|
|||
|
* modify it under the terms of the GNU General Public License as
|
|||
|
* published by the Free Software Foundation; either version 2 of
|
|||
|
* the License, or (at your option) any later version.
|
|||
|
*
|
|||
|
* This program is distributed in the hope that it will be useful,
|
|||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|||
|
* GNU General Public License for more details.
|
|||
|
*
|
|||
|
* You should have received a copy of the GNU General Public License
|
|||
|
* along with this program; if not, write to the Free Software
|
|||
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
|||
|
* MA 02111-1307 USA
|
|||
|
*/
|
|||
|
|
|||
|
#include <asm/assembler.h>
|
|||
|
|
|||
|
|
|||
|
/*
|
|||
|
************************************************************************************************************
|
|||
|
*
|
|||
|
* arm_neon_init
|
|||
|
*
|
|||
|
* name :
|
|||
|
*
|
|||
|
* parmeters :
|
|||
|
*
|
|||
|
* return :
|
|||
|
*
|
|||
|
* note :
|
|||
|
*
|
|||
|
*
|
|||
|
************************************************************************************************************
|
|||
|
*/
|
|||
|
.balign 4
|
|||
|
.global arm_neon_init @r0<72><30><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʼ<EFBFBD><CABC>ַ<EFBFBD><D6B7>r1<72><31><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>
|
|||
|
|
|||
|
arm_neon_init:
|
|||
|
|
|||
|
stmfd sp!, {lr}
|
|||
|
|
|||
|
ldr r0, =(0xf<<20)
|
|||
|
mcr p15, 0, r0, c1, c0, 2 @(enable CP10/CP11, and disable ASEDIS/D32DIS)
|
|||
|
|
|||
|
mov r0, #0
|
|||
|
mcr p15, 0, r0, c7, c5, 4 @(CP15ISB)
|
|||
|
|
|||
|
mov r0, #0x40000000
|
|||
|
fmxr fpexc, r0 @(enable NEON)
|
|||
|
|
|||
|
isb @(wait all code are executed including pipeline)
|
|||
|
dsb @(wait all register access are finished)
|
|||
|
|
|||
|
mov r0, #0
|
|||
|
|
|||
|
ldmfd sp!, {pc}
|
|||
|
|
|||
|
|
|||
|
/*
|
|||
|
************************************************************************************************************
|
|||
|
*
|
|||
|
* add_sum_neon
|
|||
|
*
|
|||
|
* name :
|
|||
|
*
|
|||
|
* parmeters : r0, <EFBFBD><EFBFBD>ʼ<EFBFBD><EFBFBD>ַ r1, <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
*
|
|||
|
* return :
|
|||
|
*
|
|||
|
* note :
|
|||
|
*
|
|||
|
*
|
|||
|
************************************************************************************************************
|
|||
|
*/
|
|||
|
.balign 4
|
|||
|
.global add_sum_neon @r0<72><30><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʼ<EFBFBD><CABC>ַ<EFBFBD><D6B7>r1<72><31><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>
|
|||
|
.arm
|
|||
|
|
|||
|
add_sum_neon:
|
|||
|
stmfd sp!, {r2-r6, lr}
|
|||
|
|
|||
|
mov r2, r0
|
|||
|
mov r3, r1
|
|||
|
mov r5, #0 @<40><>ʼ<EFBFBD><CABC>r5=0<><30><EFBFBD><EFBFBD><EFBFBD>淵<EFBFBD><E6B7B5>ֵ
|
|||
|
vbic.I32 q0, #0x000000ff
|
|||
|
vbic.I32 q0, #0x0000ff00
|
|||
|
vbic.I32 q0, #0x00ff0000
|
|||
|
vbic.I32 q0, #0xff000000
|
|||
|
|
|||
|
cmp r3, #31
|
|||
|
bls __data_deal_32byte_unalign_0
|
|||
|
|
|||
|
__data_read_loop:
|
|||
|
vld1.I32 {d2, d3, d4, d5}, [r2]! @<40><>r2<72><32>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD><EFBFBD>ݣ<EFBFBD><DDA3><EFBFBD>ȡ8(64/8)*4<><34><EFBFBD>ֽڵ<D6BD>d0-d3<64><33><EFBFBD>У<EFBFBD><D0A3><EFBFBD>Q0<51><30>Q1<51><31><EFBFBD><EFBFBD><EFBFBD>ɺ<EFBFBD><C9BA><EFBFBD>r2<72>Զ<EFBFBD><D4B6>仯<EFBFBD><E4BBAF><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD>ȡ<EFBFBD><C8A1>ַ
|
|||
|
vadd.I32 q3, q1, q2
|
|||
|
vadd.I32 q0, q0, q3 @<40><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>浽Q0<51><30>
|
|||
|
sub r3, r3, #32 @r3(<28>ֽ<EFBFBD><D6BD><EFBFBD>)<29><>ȥ32<33>ֽ<EFBFBD>
|
|||
|
cmp r3, #31
|
|||
|
bhs __data_read_loop
|
|||
|
|
|||
|
vadd.I32 d0, d0, d1
|
|||
|
vmov r5, r6, d0
|
|||
|
adds r5, r5, r6 @r5<72><35><EFBFBD>淵<EFBFBD><E6B7B5>ֵ
|
|||
|
|
|||
|
__data_deal_32byte_unalign_0:
|
|||
|
and r4, r3, #3 @r4<72><34><EFBFBD><EFBFBD><EFBFBD><EFBFBD>4<EFBFBD>ֽڶ<D6BD><DAB6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>
|
|||
|
lsr r3, r3, #2 @r3<72><33><EFBFBD><EFBFBD>4<EFBFBD>ֽڶ<D6BD><DAB6><EFBFBD><EFBFBD>ĸ<EFBFBD><C4B8><EFBFBD>
|
|||
|
__data_deal_32byte_unalign_1:
|
|||
|
cmp r3, #0 @<40>鿴<EFBFBD>Ƿ<EFBFBD><C7B7><EFBFBD><EFBFBD><EFBFBD>δ<EFBFBD><CEB4><EFBFBD><EFBFBD><EFBFBD><EFBFBD>4<EFBFBD>ֽڶ<D6BD><DAB6><EFBFBD>
|
|||
|
beq __data_deal_4byte_unalign
|
|||
|
ldr r6, [r2], #4 @<40><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
adds r5, r5, r6 @<40><><EFBFBD><EFBFBD><EFBFBD>ۼ<EFBFBD>
|
|||
|
sub r3, r3, #1 @<40><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݼ<EFBFBD>1
|
|||
|
b __data_deal_32byte_unalign_1
|
|||
|
|
|||
|
__data_deal_4byte_unalign: @<40><><EFBFBD><EFBFBD>δ<EFBFBD><CEB4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
cmp r4, #0
|
|||
|
beq __data_deal_return
|
|||
|
rsb r4, r4, #4
|
|||
|
lsl r4, r4, #3
|
|||
|
mvn r6, #0
|
|||
|
lsr r6, r6, r4
|
|||
|
ldr r3, [r2], #4 @<40><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
and r6, r3, r6
|
|||
|
adds r5, r5, r6
|
|||
|
|
|||
|
__data_deal_return:
|
|||
|
mov r0, r5
|
|||
|
|
|||
|
ldmfd sp!, {r2-r6, pc}
|
|||
|
/*
|
|||
|
************************************************************************************************************
|
|||
|
*
|
|||
|
* memcpy_neon
|
|||
|
*
|
|||
|
* name :
|
|||
|
*
|
|||
|
* parmeters :
|
|||
|
*
|
|||
|
* return :
|
|||
|
*
|
|||
|
* note :
|
|||
|
*
|
|||
|
*
|
|||
|
************************************************************************************************************
|
|||
|
*/
|
|||
|
.align 4
|
|||
|
.global memcpy_neon
|
|||
|
.arm
|
|||
|
|
|||
|
memcpy_neon:
|
|||
|
cmp r2, #0
|
|||
|
moveq pc, lr
|
|||
|
|
|||
|
cmp r0, r1
|
|||
|
moveq pc, lr
|
|||
|
|
|||
|
stmfd sp!, {r3-r9, lr}
|
|||
|
|
|||
|
mov r3, r0 @r3<72><33><EFBFBD><EFBFBD>Ŀ<EFBFBD>ĵ<EFBFBD>ַ
|
|||
|
mov r4, r1 @r4<72><34><EFBFBD><EFBFBD>Դ<EFBFBD><D4B4>ַ
|
|||
|
__memcpy_loop:
|
|||
|
cmp r2, #31 @<40>жϴ<D0B6><CFB4><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD>Ƿ<EFBFBD>С<EFBFBD><D0A1>32<33>ֽ<EFBFBD>
|
|||
|
bls __memcpy_neon_32byte_unalign_0
|
|||
|
vld1.I32 {d2,d3,d4,d5}, [r4]! @<40><>r4<72><34>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD>ȡ32<33><32><EFBFBD>ֽڶ<D6BD>Q1<51><31>Q2
|
|||
|
vst1.I32 {d2,d3,d4,d5}, [r3]! @<40><>Q1<51><31>Q2<51><32><EFBFBD><EFBFBD><EFBFBD>ݴ<EFBFBD><DDB4>ŵ<EFBFBD>r3<72><33>ʼ<EFBFBD>ĵ<EFBFBD>ַ
|
|||
|
sub r2, r2, #32
|
|||
|
b __memcpy_loop
|
|||
|
|
|||
|
__memcpy_neon_32byte_unalign_0:
|
|||
|
PLD(pld [r3, #0])
|
|||
|
PLD(pld [r4, #0])
|
|||
|
and r5, r2, #3 @r5<72><35><EFBFBD><EFBFBD><EFBFBD><EFBFBD>4<EFBFBD>ֽڶ<D6BD><DAB6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>
|
|||
|
lsr r6, r2, #2 @r6<72><36><EFBFBD><EFBFBD>4<EFBFBD>ֽڶ<D6BD><DAB6><EFBFBD><EFBFBD>ĸ<EFBFBD><C4B8><EFBFBD>
|
|||
|
__memcpy_neon_32byte_unalign_1:
|
|||
|
cmp r6, #0
|
|||
|
beq __memcpy_neon_4byte_unalign
|
|||
|
ldr r7, [r4], #4
|
|||
|
str r7, [r3], #4
|
|||
|
sub r6,r6,#1
|
|||
|
b __memcpy_neon_32byte_unalign_1
|
|||
|
__memcpy_neon_4byte_unalign:
|
|||
|
cmp r5, #0
|
|||
|
beq __memcpy_neon_exit
|
|||
|
ldr r7, [r4], #4 @<40><><EFBFBD><EFBFBD>Դ<EFBFBD><D4B4>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>word<72><64>r7<72><37><EFBFBD>϶<EFBFBD><CFB6><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȡ
|
|||
|
ldr r8, [r4] @<40><><EFBFBD><EFBFBD>Ŀ<EFBFBD>ĵ<EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>word<72><64>r8
|
|||
|
mvn r9, #0
|
|||
|
rsb r5, r5, #4
|
|||
|
lsl r5, r5, #3 @r5=r5*8
|
|||
|
lsl r9, r9, r5
|
|||
|
and r8, r8, r9 @Ŀ<>ĵ<EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>r8<72><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫд<D2AA><D0B4><EFBFBD><EFBFBD>λ
|
|||
|
mvn r6, #0
|
|||
|
eor r9, r9, r6
|
|||
|
and r7, r7, r9 @Դ<><D4B4>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>r7<72><37><EFBFBD>㲻<EFBFBD><E3B2BB>д<EFBFBD><D0B4><EFBFBD><EFBFBD>λ
|
|||
|
orr r7, r7, r8 @<40>ϲ<EFBFBD>r7<72><37>r8<72><38><EFBFBD>õ<EFBFBD><C3B5><EFBFBD>ȷ<EFBFBD><C8B7><EFBFBD><EFBFBD>
|
|||
|
str r7, [r3], #4
|
|||
|
|
|||
|
__memcpy_neon_exit:
|
|||
|
ldmfd sp!, {r3-r9, lr}
|
|||
|
|
|||
|
/*
|
|||
|
************************************************************************************************************
|
|||
|
*
|
|||
|
* bmpdecode_neon
|
|||
|
*
|
|||
|
* name :
|
|||
|
*
|
|||
|
* parmeters : r0: bmpԭʼ<EFBFBD><EFBFBD><EFBFBD>ݵ<EFBFBD>ַ
|
|||
|
*
|
|||
|
* r1: <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݴ<EFBFBD><EFBFBD>ŵ<EFBFBD>ַ
|
|||
|
*
|
|||
|
* r2: <EFBFBD>ṹ<EFBFBD><EFBFBD><EFBFBD>ĵ<EFBFBD>ַ
|
|||
|
*
|
|||
|
* {
|
|||
|
* int x; x<><78><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><D8B5><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
* int y; y<><79><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><D8B5><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
* int bmp_bpix; ÿ<><C3BF><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><D8B5>ֽ<EFBFBD><D6BD><EFBFBD>
|
|||
|
* }
|
|||
|
*
|
|||
|
* return : <EFBFBD><EFBFBD>
|
|||
|
*
|
|||
|
* note : <EFBFBD><EFBFBD>ȡ<EFBFBD>IJ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǣ<EFBFBD>ÿ<EFBFBD>ζ<EFBFBD>ȡ32<EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>(<EFBFBD><EFBFBD>һ<EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>(x<EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*ÿ<EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>)<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ)
|
|||
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>16<EFBFBD>ֽڲ<EFBFBD><EFBFBD><EFBFBD>32<EFBFBD>ֽڣ<EFBFBD>һ<EFBFBD>ζ<EFBFBD>ȡ16<EFBFBD>ֽ<EFBFBD>
|
|||
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>8<EFBFBD>ֽ<EFBFBD><EFBFBD>Ҳ<EFBFBD><EFBFBD><EFBFBD>16ʱ<EFBFBD><EFBFBD>һ<EFBFBD>ζ<EFBFBD>ȡ8<EFBFBD>ֽ<EFBFBD>
|
|||
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>8<EFBFBD>ֽ<EFBFBD>ʱ<EFBFBD>Ҳ<EFBFBD>Ϊ0ʱ<EFBFBD><EFBFBD>һ<EFBFBD>ζ<EFBFBD>ȡ8<EFBFBD>ֽ<EFBFBD>
|
|||
|
*
|
|||
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ARGB<EFBFBD><EFBFBD>ʽ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ÿ<EFBFBD><EFBFBD>Ϊ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص㣬һ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>4<EFBFBD>ֽڣ<EFBFBD>Ȼ<EFBFBD><EFBFBD>Դ<EFBFBD><EFBFBD>ַ<EFBFBD><EFBFBD>Ŀ<EFBFBD>ĵ<EFBFBD>ַ<EFBFBD><EFBFBD>Ҫ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>4<EFBFBD>ֽ<EFBFBD>
|
|||
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>ÿ<EFBFBD><EFBFBD>Ϊż<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص㣬<EFBFBD><EFBFBD><EFBFBD>ոպ<EFBFBD><EFBFBD>ʣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
*
|
|||
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>RGB<EFBFBD><EFBFBD>ʽ<EFBFBD><EFBFBD>ÿ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ч<EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>Ϊ3x<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>Ϊx&3<EFBFBD><EFBFBD>(4<EFBFBD>ֽڶ<EFBFBD><EFBFBD><EFBFBD>)<EFBFBD><EFBFBD>
|
|||
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ٶ<EFBFBD>ȡ8<EFBFBD>ֽڣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ֪<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽڿ<EFBFBD><EFBFBD>Բ<EFBFBD><EFBFBD><EFBFBD>8<EFBFBD>ֽڶ<EFBFBD><EFBFBD>룬Ȼ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȡ<EFBFBD><EFBFBD>
|
|||
|
* <EFBFBD>㷨<EFBFBD><EFBFBD>3x=<EFBFBD><EFBFBD>Ч<EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
* 3x&7=û<EFBFBD><EFBFBD>8<EFBFBD>ֽڶ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>IJ<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>x=8m+n<EFBFBD><EFBFBD><EFBFBD><EFBFBD>nȡֵΪ<EFBFBD><EFBFBD>1<EFBFBD><EFBFBD>7<EFBFBD><EFBFBD>3x=3(8m+n)=24m+3n,<EFBFBD><EFBFBD><EFBFBD><EFBFBD>n<EFBFBD><EFBFBD>ȡֵ(0ֱֵ<EFBFBD>Ӳ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>)
|
|||
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵΪ3,6,9,12,15,18,21
|
|||
|
* <EFBFBD>ٺ<EFBFBD>7<EFBFBD><EFBFBD><EFBFBD>룬<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵΪ3,6,1,4,7,2,5
|
|||
|
* <EFBFBD><EFBFBD>Ҳ<EFBFBD><EFBFBD>û<EFBFBD><EFBFBD>8<EFBFBD>ֽڶ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>IJ<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
* <EFBFBD><EFBFBD>8<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Щֵ<EFBFBD><EFBFBD><EFBFBD>õ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>֤<EFBFBD><EFBFBD>ȡ<EFBFBD>ܴﵽһ<EFBFBD><EFBFBD>8<EFBFBD>ֽ<EFBFBD>
|
|||
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>5,2,7,4,1,6,3
|
|||
|
* <EFBFBD><EFBFBD><EFBFBD>Ƕ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>أ<EFBFBD><EFBFBD><EFBFBD>ҪԴ<EFBFBD><EFBFBD>ַ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵӦ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>(<EFBFBD><EFBFBD><EFBFBD><EFBFBD>&4)
|
|||
|
*
|
|||
|
* <EFBFBD>Ĵ<EFBFBD><EFBFBD><EFBFBD>ʹ<EFBFBD><EFBFBD> r0 Դ<EFBFBD><EFBFBD>ַ
|
|||
|
*
|
|||
|
* r1 Ŀ<EFBFBD>ĵ<EFBFBD>ַ
|
|||
|
*
|
|||
|
* r2 <EFBFBD>ṹ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ(<EFBFBD><EFBFBD>ȡ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ч)
|
|||
|
* <EFBFBD><EFBFBD><EFBFBD>汣<EFBFBD><EFBFBD>x<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
*
|
|||
|
* r3 y<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
*
|
|||
|
* r4 ÿ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
*
|
|||
|
* r5 Ŀ<EFBFBD>ĵ<EFBFBD>ַ<EFBFBD><EFBFBD>д<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
*
|
|||
|
* r6 Դ<EFBFBD><EFBFBD>ַ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȡ<EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
*
|
|||
|
* r7 һ<EFBFBD>е<EFBFBD><EFBFBD><EFBFBD>Ч<EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>
|
|||
|
*
|
|||
|
*
|
|||
|
*
|
|||
|
************************************************************************************************************
|
|||
|
*/
|
|||
|
.align 4
|
|||
|
.global bmpdecode_neon
|
|||
|
.arm
|
|||
|
|
|||
|
bmpdecode_neon:
|
|||
|
cmp r2, #0 @<40><><EFBFBD><EFBFBD><EFBFBD>ṹ<EFBFBD><E1B9B9><EFBFBD><EFBFBD>ַ<EFBFBD>Ƿ<EFBFBD><C7B7>Ϸ<EFBFBD>
|
|||
|
moveq pc, lr
|
|||
|
|
|||
|
cmp r0, r1 @<40><><EFBFBD><EFBFBD>ԭʼ<D4AD><CABC>ַ<EFBFBD>Ƿ<EFBFBD><C7B7><EFBFBD><EFBFBD><EFBFBD>Ŀ<EFBFBD>ĵ<EFBFBD>ַ
|
|||
|
moveq pc, lr
|
|||
|
|
|||
|
stmfd sp!, {r0-r8, lr}
|
|||
|
|
|||
|
ldr r3, [r2, #4] @r3<72><33><EFBFBD><EFBFBD>y<EFBFBD><79><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><D8B5><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
ldr r4, [r2, #8] @r4<72><34><EFBFBD><EFBFBD>ÿ<EFBFBD><C3BF><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><D8B5>ֽ<EFBFBD><D6BD><EFBFBD>
|
|||
|
ldr r2, [r2] @r2<72><32><EFBFBD><EFBFBD>x<EFBFBD><78><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><D8B5><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
|
|||
|
cmp r4, #0
|
|||
|
beq __bmpdecode_filling_exit
|
|||
|
|
|||
|
cmp r4, #4 @<40>ж<EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><D8B5><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>
|
|||
|
beq __bmpdecode_x_4pixles
|
|||
|
|
|||
|
@<40><><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD>3<EFBFBD><33><EFBFBD>ֽڵ<D6BD><DAB5><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
lsl r7, r4, #2
|
|||
|
sub r7, r7, r4 @r7<72><37><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD>Ч<EFBFBD><D0A7><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD> r7=3*r4
|
|||
|
and r5, r7, #7
|
|||
|
rsb r5, r5, #8 @r5<72><35><EFBFBD><EFBFBD><EFBFBD><EFBFBD>û<EFBFBD><C3BB>8<EFBFBD>ֽڶ<D6BD><DAB6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽڲ<D6BD><DAB2><EFBFBD>, <20><>д<EFBFBD><D0B4><EFBFBD><EFBFBD>
|
|||
|
and r6, r5, #4 @r6<72><36><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD><D2AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽڲ<D6BD><DAB2>֣<EFBFBD><D6A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȡ<EFBFBD><C8A1>
|
|||
|
|
|||
|
b __bmpdecode_y_loop
|
|||
|
|
|||
|
__bmpdecode_x_4pixles: @<40><><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD>4<EFBFBD><34><EFBFBD>ֽڵ<D6BD><DAB5><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
and r5, r2, #1
|
|||
|
cmp r5, #1 @<40>ж<EFBFBD><D0B6>Ƿ<EFBFBD><C7B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD>
|
|||
|
lsleq r5, r5, #2 @<40><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>գ<EFBFBD>Դ<EFBFBD><D4B4>ַ<EFBFBD><D6B7>Ŀ<EFBFBD>ĵ<EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>Ҫ<EFBFBD><D2AA>ȥ4<C8A5><34><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4>
|
|||
|
|
|||
|
mov r6, r5
|
|||
|
lsl r7, r4, #2
|
|||
|
|
|||
|
__bmpdecode_y_loop:
|
|||
|
mov r8, r7
|
|||
|
__bmpdecode_x_loop:
|
|||
|
cmp r8, #31 @һ<>δ<EFBFBD><CEB4><EFBFBD>32<33><32><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
bls __bmpdecode_filling_32bytes_less
|
|||
|
|
|||
|
vld1.I32 {d2,d3,d4,d5}, [r0]! @һ<>ζ<EFBFBD>ȡ32<33><32><EFBFBD>ֽ<EFBFBD>
|
|||
|
vst1.I32 {d2,d3,d4,d5}, [r1]! @һ<>δ洢32<33><32><EFBFBD>ֽ<EFBFBD>
|
|||
|
sub r8, r8, #32
|
|||
|
|
|||
|
b __bmpdecode_x_loop
|
|||
|
|
|||
|
__bmpdecode_filling_32bytes_less:
|
|||
|
cmp r8, #15
|
|||
|
bls __bmpdecode_filling_16bytes_less
|
|||
|
|
|||
|
vld1.I32 {d2, d3}, [r0]! @һ<>ζ<EFBFBD>ȡ16<31><36><EFBFBD>ֽ<EFBFBD>
|
|||
|
vst1.I32 {d2, d3}, [r1]! @һ<>δ洢16<31><36><EFBFBD>ֽ<EFBFBD>
|
|||
|
|
|||
|
sub r8, r8, #16
|
|||
|
__bmpdecode_filling_16bytes_less:
|
|||
|
cmp r8, #7
|
|||
|
beq __bmpdecode_filling_8bytes_less
|
|||
|
|
|||
|
vld1.I32 {d2}, [r0]! @һ<>ζ<EFBFBD>ȡ8<C8A1><38><EFBFBD>ֽ<EFBFBD>
|
|||
|
vst1.I32 {d2}, [r1]! @һ<>δ洢8<E6B4A2><38><EFBFBD>ֽ<EFBFBD>
|
|||
|
|
|||
|
sub r8, r8, #8
|
|||
|
__bmpdecode_filling_8bytes_less:
|
|||
|
cmp r8, #0
|
|||
|
beq __bmpdecode_filling_x_finish
|
|||
|
|
|||
|
vld1.I32 {d2}, [r0]! @һ<>ζ<EFBFBD>ȡ8<C8A1><38><EFBFBD>ֽ<EFBFBD>
|
|||
|
vst1.I32 {d2}, [r1]! @һ<>δ洢8<E6B4A2><38><EFBFBD>ֽ<EFBFBD>
|
|||
|
|
|||
|
sub r1, r1, r5 @<40><><EFBFBD><EFBFBD>Ŀ<EFBFBD>ĵ<EFBFBD>ַ
|
|||
|
sub r0, r0, r6 @<40><><EFBFBD><EFBFBD>Դ<EFBFBD><D4B4>ַ
|
|||
|
|
|||
|
__bmpdecode_filling_x_finish:
|
|||
|
|
|||
|
sub r4, r4, #1 @<40><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>1
|
|||
|
cmp r4, #0
|
|||
|
|
|||
|
bne __bmpdecode_y_loop
|
|||
|
|
|||
|
__bmpdecode_filling_exit:
|
|||
|
|
|||
|
stmfd sp!, {r0-r8, pc}
|
|||
|
|