SmartAudio/lichee/brandy/u-boot-2011.09/board/sunxi/arm_neon.S

361 lines
10 KiB
ArmAsm
Raw Normal View History

2018-07-13 01:31:50 +00:00
/*
* (C) Copyright 2007-2013
* Allwinner Technology Co., Ltd. <www.allwinnertech.com>
* Jerry Wang <wangflord@allwinnertech.com>
*
* See file CREDITS for list of people who contributed to this
* project.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*/
#include <asm/assembler.h>
/*
************************************************************************************************************
*
* arm_neon_init
*
* name :
*
* parmeters :
*
* return :
*
* note :
*
*
************************************************************************************************************
*/
.balign 4
.global arm_neon_init @r0<72><30><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʼ<EFBFBD><CABC>ַ<EFBFBD><D6B7>r1<72><31><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>
arm_neon_init:
stmfd sp!, {lr}
ldr r0, =(0xf<<20)
mcr p15, 0, r0, c1, c0, 2 @(enable CP10/CP11, and disable ASEDIS/D32DIS)
mov r0, #0
mcr p15, 0, r0, c7, c5, 4 @(CP15ISB)
mov r0, #0x40000000
fmxr fpexc, r0 @(enable NEON)
isb @(wait all code are executed including pipeline)
dsb @(wait all register access are finished)
mov r0, #0
ldmfd sp!, {pc}
/*
************************************************************************************************************
*
* add_sum_neon
*
* name :
*
* parmeters : r0, <EFBFBD><EFBFBD>ʼ<EFBFBD><EFBFBD>ַ r1, <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>
*
* return :
*
* note :
*
*
************************************************************************************************************
*/
.balign 4
.global add_sum_neon @r0<72><30><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʼ<EFBFBD><CABC>ַ<EFBFBD><D6B7>r1<72><31><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>
.arm
add_sum_neon:
stmfd sp!, {r2-r6, lr}
mov r2, r0
mov r3, r1
mov r5, #0 @<40><>ʼ<EFBFBD><CABC>r5=0<><30><EFBFBD><EFBFBD><EFBFBD><EFBFBD><E6B7B5>ֵ
vbic.I32 q0, #0x000000ff
vbic.I32 q0, #0x0000ff00
vbic.I32 q0, #0x00ff0000
vbic.I32 q0, #0xff000000
cmp r3, #31
bls __data_deal_32byte_unalign_0
__data_read_loop:
vld1.I32 {d2, d3, d4, d5}, [r2]! @<40><>r2<72><32>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD><EFBFBD>ݣ<EFBFBD><DDA3><EFBFBD>ȡ8(64/8)*4<><34><EFBFBD>ֽڵ<D6BD>d0-d3<64><33><EFBFBD>У<EFBFBD><D0A3><EFBFBD>Q0<51><30>Q1<51><31><EFBFBD><EFBFBD><EFBFBD>ɺ<EFBFBD><C9BA><EFBFBD>r2<72>Զ<EFBFBD><D4B6><EFBFBD><E4BBAF><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD>ȡ<EFBFBD><C8A1>ַ
vadd.I32 q3, q1, q2
vadd.I32 q0, q0, q3 @<40><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>浽Q0<51><30>
sub r3, r3, #32 @r3(<28>ֽ<EFBFBD><D6BD><EFBFBD>)<29><>ȥ32<33>ֽ<EFBFBD>
cmp r3, #31
bhs __data_read_loop
vadd.I32 d0, d0, d1
vmov r5, r6, d0
adds r5, r5, r6 @r5<72><35><EFBFBD><EFBFBD><E6B7B5>ֵ
__data_deal_32byte_unalign_0:
and r4, r3, #3 @r4<72><34><EFBFBD><EFBFBD><EFBFBD><EFBFBD>4<EFBFBD>ֽڶ<D6BD><DAB6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>
lsr r3, r3, #2 @r3<72><33><EFBFBD><EFBFBD>4<EFBFBD>ֽڶ<D6BD><DAB6><EFBFBD><EFBFBD>ĸ<EFBFBD><C4B8><EFBFBD>
__data_deal_32byte_unalign_1:
cmp r3, #0 @<40><EFBFBD>Ƿ<EFBFBD><C7B7><EFBFBD><EFBFBD><EFBFBD>δ<EFBFBD><CEB4><EFBFBD><EFBFBD><EFBFBD><EFBFBD>4<EFBFBD>ֽڶ<D6BD><DAB6><EFBFBD>
beq __data_deal_4byte_unalign
ldr r6, [r2], #4 @<40><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
adds r5, r5, r6 @<40><><EFBFBD><EFBFBD><EFBFBD>ۼ<EFBFBD>
sub r3, r3, #1 @<40><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݼ<EFBFBD>1
b __data_deal_32byte_unalign_1
__data_deal_4byte_unalign: @<40><><EFBFBD><EFBFBD>δ<EFBFBD><CEB4><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
cmp r4, #0
beq __data_deal_return
rsb r4, r4, #4
lsl r4, r4, #3
mvn r6, #0
lsr r6, r6, r4
ldr r3, [r2], #4 @<40><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
and r6, r3, r6
adds r5, r5, r6
__data_deal_return:
mov r0, r5
ldmfd sp!, {r2-r6, pc}
/*
************************************************************************************************************
*
* memcpy_neon
*
* name :
*
* parmeters :
*
* return :
*
* note :
*
*
************************************************************************************************************
*/
.align 4
.global memcpy_neon
.arm
memcpy_neon:
cmp r2, #0
moveq pc, lr
cmp r0, r1
moveq pc, lr
stmfd sp!, {r3-r9, lr}
mov r3, r0 @r3<72><33><EFBFBD><EFBFBD>Ŀ<EFBFBD>ĵ<EFBFBD>ַ
mov r4, r1 @r4<72><34><EFBFBD><EFBFBD>Դ<EFBFBD><D4B4>ַ
__memcpy_loop:
cmp r2, #31 @<40>жϴ<D0B6><CFB4><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD>Ƿ<EFBFBD>С<EFBFBD><D0A1>32<33>ֽ<EFBFBD>
bls __memcpy_neon_32byte_unalign_0
vld1.I32 {d2,d3,d4,d5}, [r4]! @<40><>r4<72><34>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD>ȡ32<33><32><EFBFBD>ֽڶ<D6BD>Q1<51><31>Q2
vst1.I32 {d2,d3,d4,d5}, [r3]! @<40><>Q1<51><31>Q2<51><32><EFBFBD><EFBFBD><EFBFBD>ݴ<EFBFBD><DDB4>ŵ<EFBFBD>r3<72><33>ʼ<EFBFBD>ĵ<EFBFBD>ַ
sub r2, r2, #32
b __memcpy_loop
__memcpy_neon_32byte_unalign_0:
PLD(pld [r3, #0])
PLD(pld [r4, #0])
and r5, r2, #3 @r5<72><35><EFBFBD><EFBFBD><EFBFBD><EFBFBD>4<EFBFBD>ֽڶ<D6BD><DAB6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>
lsr r6, r2, #2 @r6<72><36><EFBFBD><EFBFBD>4<EFBFBD>ֽڶ<D6BD><DAB6><EFBFBD><EFBFBD>ĸ<EFBFBD><C4B8><EFBFBD>
__memcpy_neon_32byte_unalign_1:
cmp r6, #0
beq __memcpy_neon_4byte_unalign
ldr r7, [r4], #4
str r7, [r3], #4
sub r6,r6,#1
b __memcpy_neon_32byte_unalign_1
__memcpy_neon_4byte_unalign:
cmp r5, #0
beq __memcpy_neon_exit
ldr r7, [r4], #4 @<40><><EFBFBD><EFBFBD>Դ<EFBFBD><D4B4>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>word<72><64>r7<72><37><EFBFBD>϶<EFBFBD><CFB6><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȡ
ldr r8, [r4] @<40><><EFBFBD><EFBFBD>Ŀ<EFBFBD>ĵ<EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>word<72><64>r8
mvn r9, #0
rsb r5, r5, #4
lsl r5, r5, #3 @r5=r5*8
lsl r9, r9, r5
and r8, r8, r9 @Ŀ<>ĵ<EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>r8<72><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫд<D2AA><D0B4><EFBFBD><EFBFBD>λ
mvn r6, #0
eor r9, r9, r6
and r7, r7, r9 <><D4B4>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>r7<72><37><EFBFBD><EFBFBD><E3B2BB>д<EFBFBD><D0B4><EFBFBD><EFBFBD>λ
orr r7, r7, r8 @<40>ϲ<EFBFBD>r7<72><37>r8<72><38><EFBFBD>õ<EFBFBD><C3B5><EFBFBD>ȷ<EFBFBD><C8B7><EFBFBD><EFBFBD>
str r7, [r3], #4
__memcpy_neon_exit:
ldmfd sp!, {r3-r9, lr}
/*
************************************************************************************************************
*
* bmpdecode_neon
*
* name :
*
* parmeters : r0: bmpԭʼ<EFBFBD><EFBFBD><EFBFBD>ݵ<EFBFBD>ַ
*
* r1: <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݴ<EFBFBD><EFBFBD>ŵ<EFBFBD>ַ
*
* r2: <EFBFBD><EFBFBD><EFBFBD><EFBFBD>ĵ<EFBFBD>ַ
*
* {
* int x; x<><78><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><D8B5><EFBFBD><EFBFBD><EFBFBD>
* int y; y<><79><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><D8B5><EFBFBD><EFBFBD><EFBFBD>
* int bmp_bpix; ÿ<><C3BF><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><D8B5>ֽ<EFBFBD><D6BD><EFBFBD>
* }
*
* return : <EFBFBD><EFBFBD>
*
* note : <EFBFBD><EFBFBD>ȡ<EFBFBD>IJ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǣ<EFBFBD>ÿ<EFBFBD>ζ<EFBFBD>ȡ32<EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>(<EFBFBD><EFBFBD>һ<EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>(x<EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*ÿ<EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>)<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ)
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>16<EFBFBD>ֽڲ<EFBFBD><EFBFBD><EFBFBD>32<EFBFBD>ֽڣ<EFBFBD>һ<EFBFBD>ζ<EFBFBD>ȡ16<EFBFBD>ֽ<EFBFBD>
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>8<EFBFBD>ֽ<EFBFBD><EFBFBD>Ҳ<EFBFBD><EFBFBD><EFBFBD>16ʱ<EFBFBD><EFBFBD>һ<EFBFBD>ζ<EFBFBD>ȡ8<EFBFBD>ֽ<EFBFBD>
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>8<EFBFBD>ֽ<EFBFBD>ʱ<EFBFBD>Ҳ<EFBFBD>Ϊ0ʱ<EFBFBD><EFBFBD>һ<EFBFBD>ζ<EFBFBD>ȡ8<EFBFBD>ֽ<EFBFBD>
*
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ARGB<EFBFBD><EFBFBD>ʽ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ÿ<EFBFBD><EFBFBD>Ϊ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>صһ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>4<EFBFBD>ֽڣ<EFBFBD>Ȼ<EFBFBD><EFBFBD>Դ<EFBFBD><EFBFBD>ַ<EFBFBD><EFBFBD>Ŀ<EFBFBD>ĵ<EFBFBD>ַ<EFBFBD><EFBFBD>Ҫ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>4<EFBFBD>ֽ<EFBFBD>
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>ÿ<EFBFBD><EFBFBD>Ϊż<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><EFBFBD><EFBFBD>ոպ<EFBFBD><EFBFBD>ʣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
*
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>RGB<EFBFBD><EFBFBD>ʽ<EFBFBD><EFBFBD>ÿ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ч<EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>Ϊ3x<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>Ϊx&3<EFBFBD><EFBFBD>(4<EFBFBD>ֽڶ<EFBFBD><EFBFBD><EFBFBD>)<EFBFBD><EFBFBD>
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ٶ<EFBFBD>ȡ8<EFBFBD>ֽڣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ֪<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽڿ<EFBFBD><EFBFBD>Բ<EFBFBD><EFBFBD><EFBFBD>8<EFBFBD>ֽڶ<EFBFBD><EFBFBD>Ȼ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȡ<EFBFBD><EFBFBD>
* <EFBFBD><EFBFBD><EFBFBD>3x=<EFBFBD><EFBFBD>Ч<EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>
* 3x&7=û<EFBFBD><EFBFBD>8<EFBFBD>ֽڶ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>IJ<EFBFBD><EFBFBD><EFBFBD>
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>x=8m+n<EFBFBD><EFBFBD><EFBFBD><EFBFBD>nȡֵΪ<EFBFBD><EFBFBD>1<EFBFBD><EFBFBD>7<EFBFBD><EFBFBD>3x=3(8m+n)=24m+3n,<EFBFBD><EFBFBD><EFBFBD><EFBFBD>n<EFBFBD><EFBFBD>ȡֵ(0ֱֵ<EFBFBD>Ӳ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>)
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵΪ3,6,9,12,15,18,21
* <EFBFBD>ٺ<EFBFBD>7<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵΪ3,6,1,4,7,2,5
* <EFBFBD><EFBFBD>Ҳ<EFBFBD><EFBFBD>û<EFBFBD><EFBFBD>8<EFBFBD>ֽڶ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>IJ<EFBFBD><EFBFBD><EFBFBD>
* <EFBFBD><EFBFBD>8<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Щֵ<EFBFBD><EFBFBD><EFBFBD>õ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>֤<EFBFBD><EFBFBD>ȡ<EFBFBD>ܴһ<EFBFBD><EFBFBD>8<EFBFBD>ֽ<EFBFBD>
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>5,2,7,4,1,6,3
* <EFBFBD><EFBFBD><EFBFBD>Ƕ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>أ<EFBFBD><EFBFBD><EFBFBD>ҪԴ<EFBFBD><EFBFBD>ַ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵӦ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>(<EFBFBD><EFBFBD><EFBFBD><EFBFBD>&4)
*
* <EFBFBD>Ĵ<EFBFBD><EFBFBD><EFBFBD>ʹ<EFBFBD><EFBFBD> r0 Դ<EFBFBD><EFBFBD>ַ
*
* r1 Ŀ<EFBFBD>ĵ<EFBFBD>ַ
*
* r2 <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ(<EFBFBD><EFBFBD>ȡ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ч)
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>x<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
*
* r3 y<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
*
* r4 ÿ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>
*
* r5 Ŀ<EFBFBD>ĵ<EFBFBD>ַ<EFBFBD><EFBFBD>д<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>
*
* r6 Դ<EFBFBD><EFBFBD>ַ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȡ<EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>
*
* r7 һ<EFBFBD>е<EFBFBD><EFBFBD><EFBFBD>Ч<EFBFBD>ֽ<EFBFBD><EFBFBD><EFBFBD>
*
*
*
************************************************************************************************************
*/
.align 4
.global bmpdecode_neon
.arm
bmpdecode_neon:
cmp r2, #0 @<40><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><E1B9B9><EFBFBD><EFBFBD>ַ<EFBFBD>Ƿ<EFBFBD><C7B7>Ϸ<EFBFBD>
moveq pc, lr
cmp r0, r1 @<40><><EFBFBD><EFBFBD>ԭʼ<D4AD><CABC>ַ<EFBFBD>Ƿ<EFBFBD><C7B7><EFBFBD><EFBFBD><EFBFBD>Ŀ<EFBFBD>ĵ<EFBFBD>ַ
moveq pc, lr
stmfd sp!, {r0-r8, lr}
ldr r3, [r2, #4] @r3<72><33><EFBFBD><EFBFBD>y<EFBFBD><79><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><D8B5><EFBFBD><EFBFBD><EFBFBD>
ldr r4, [r2, #8] @r4<72><34><EFBFBD><EFBFBD>ÿ<EFBFBD><C3BF><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><D8B5>ֽ<EFBFBD><D6BD><EFBFBD>
ldr r2, [r2] @r2<72><32><EFBFBD><EFBFBD>x<EFBFBD><78><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><D8B5><EFBFBD><EFBFBD><EFBFBD>
cmp r4, #0
beq __bmpdecode_filling_exit
cmp r4, #4 @<40>ж<EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><D8B5><EFBFBD><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD>
beq __bmpdecode_x_4pixles
@<40><><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD>3<EFBFBD><33><EFBFBD>ֽڵ<D6BD><DAB5><EFBFBD><EFBFBD><EFBFBD>
lsl r7, r4, #2
sub r7, r7, r4 @r7<72><37><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD>Ч<EFBFBD><D0A7><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD> r7=3*r4
and r5, r7, #7
rsb r5, r5, #8 @r5<72><35><EFBFBD><EFBFBD><EFBFBD><EFBFBD>û<EFBFBD><C3BB>8<EFBFBD>ֽڶ<D6BD><DAB6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽڲ<D6BD><DAB2><EFBFBD>, <20><>д<EFBFBD><D0B4><EFBFBD><EFBFBD>
and r6, r5, #4 @r6<72><36><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD><D2AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽڲ<D6BD><DAB2>֣<EFBFBD><D6A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȡ<EFBFBD><C8A1>
b __bmpdecode_y_loop
__bmpdecode_x_4pixles: @<40><><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD>4<EFBFBD><34><EFBFBD>ֽڵ<D6BD><DAB5><EFBFBD><EFBFBD><EFBFBD>
and r5, r2, #1
cmp r5, #1 @<40>ж<EFBFBD><D0B6>Ƿ<EFBFBD><C7B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD>
lsleq r5, r5, #2 @<40><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>գ<EFBFBD>Դ<EFBFBD><D4B4>ַ<EFBFBD><D6B7>Ŀ<EFBFBD>ĵ<EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>Ҫ<EFBFBD><D2AA>ȥ4<C8A5><34><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4>
mov r6, r5
lsl r7, r4, #2
__bmpdecode_y_loop:
mov r8, r7
__bmpdecode_x_loop:
cmp r8, #31 @һ<>δ<EFBFBD><CEB4><EFBFBD>32<33><32><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD><EFBFBD><EFBFBD>
bls __bmpdecode_filling_32bytes_less
vld1.I32 {d2,d3,d4,d5}, [r0]! @һ<>ζ<EFBFBD>ȡ32<33><32><EFBFBD>ֽ<EFBFBD>
vst1.I32 {d2,d3,d4,d5}, [r1]! @һ<>δ洢32<33><32><EFBFBD>ֽ<EFBFBD>
sub r8, r8, #32
b __bmpdecode_x_loop
__bmpdecode_filling_32bytes_less:
cmp r8, #15
bls __bmpdecode_filling_16bytes_less
vld1.I32 {d2, d3}, [r0]! @һ<>ζ<EFBFBD>ȡ16<31><36><EFBFBD>ֽ<EFBFBD>
vst1.I32 {d2, d3}, [r1]! @һ<>δ洢16<31><36><EFBFBD>ֽ<EFBFBD>
sub r8, r8, #16
__bmpdecode_filling_16bytes_less:
cmp r8, #7
beq __bmpdecode_filling_8bytes_less
vld1.I32 {d2}, [r0]! @һ<>ζ<EFBFBD>ȡ8<C8A1><38><EFBFBD>ֽ<EFBFBD>
vst1.I32 {d2}, [r1]! @һ<>δ洢8<E6B4A2><38><EFBFBD>ֽ<EFBFBD>
sub r8, r8, #8
__bmpdecode_filling_8bytes_less:
cmp r8, #0
beq __bmpdecode_filling_x_finish
vld1.I32 {d2}, [r0]! @һ<>ζ<EFBFBD>ȡ8<C8A1><38><EFBFBD>ֽ<EFBFBD>
vst1.I32 {d2}, [r1]! @һ<>δ洢8<E6B4A2><38><EFBFBD>ֽ<EFBFBD>
sub r1, r1, r5 @<40><><EFBFBD><EFBFBD>Ŀ<EFBFBD>ĵ<EFBFBD>ַ
sub r0, r0, r6 @<40><><EFBFBD><EFBFBD>Դ<EFBFBD><D4B4>ַ
__bmpdecode_filling_x_finish:
sub r4, r4, #1 @<40><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>1
cmp r4, #0
bne __bmpdecode_y_loop
__bmpdecode_filling_exit:
stmfd sp!, {r0-r8, pc}