;***************************************************************************** ;* ;* This program is free software ; you can redistribute it and/or modify ;* it under the terms of the GNU General Public License as published by ;* the Free Software Foundation; either version 2 of the License, or ;* (at your option) any later version. ;* ;* This program is distributed in the hope that it will be useful, ;* but WITHOUT ANY WARRANTY; without even the implied warranty of ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;* GNU General Public License for more details. ;* ;* You should have received a copy of the GNU General Public License ;* along with this program; if not, write to the Free Software ;* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ;* ;* $Id: idct_arm.asm 284 2005-10-04 08:54:26Z picard $ ;* ;* The Core Pocket Media Player ;* Copyright (c) 2004-2005 Gabor Kovacs ;* ;***************************************************************************** AREA |.text|, CODE EXPORT IDCT_Block4x8 EXPORT IDCT_Block8x8 EXPORT IDCT_Block4x8Swap EXPORT IDCT_Block8x8Swap ; r6 Block ; r7,r8 must be saved macro MCol8 $Name,$Rotate,$Pitch $Name PROC ; r10 = x0 ; r4 = x1 ; r2 = x2 ; r1 = x3 ; r3 = x4 ; r12 = x5 ; r0 = x6 ; r5 = x7 ; r11 = x8 ; r9 = tmp (x567) ldrsh r4, [r6, #4*$Pitch] ldrsh r0, [r6, #5*$Pitch] ldrsh r12,[r6, #7*$Pitch] ldrsh r5, [r6, #3*$Pitch] ldrsh r2, [r6, #6*$Pitch] ldrsh r1, [r6, #2*$Pitch] ldrsh r3, [r6, #1*$Pitch] ldrsh r10,[r6] if $Rotate add r6,r6,r9 endif orr r9, r12, r0 orr r9, r9, r5 orr r11, r9, r2 orr r11, r11, r4 orrs r11, r11, r1 bne $Name.Mode2 cmp r3, #0 bne $Name.Mode1 if $Rotate=0 cmp r10, #0 beq $Name.Zero endif mov r10, r10, lsl #3 strh r10, [r6] strh r10, [r6, #0x10] strh r10, [r6, #0x20] strh r10, [r6, #0x30] strh r10, [r6, #0x40] strh r10, [r6, #0x50] strh r10, [r6, #0x60] strh r10, [r6, #0x70] $Name.Zero mov pc,lr $Name.Mode1 ;x0,x4 mov r11, r3 mov r2, #0x8D, 30 ; 0x234 = 564 orr r2, r2, #1 mov r9, r3 mul r2, r11, r2 mov r11, #0xB1, 28 ; 0xB10 = 2832 orr r11, r11, #9 mul r4, r9, r11 mov r11, #0x96, 28 ; 0x960 = 2400 orr r11, r11, #8 mul r5, r9, r11 mov r11, #0x19, 26 ; 0x640 = 1600 mov r1, r10, lsl #11 orr r11, r11, #9 mul r0, r3, r11 add r1, r1, #0x80 ; 0x80 = 128 add r3, r4, r1 add r11, r5, r1 mov r3, r3, asr #8 mov r11, r11, asr #8 strh r3, [r6] strh r11, [r6, #0x10] ; 0x10 = 16 add r3, r0, r1 add r11, r2, r1 mov r3, r3, asr #8 mov r11, r11, asr #8 strh r3, [r6, #0x20] ; 0x20 = 32 strh r11, [r6, #0x30] ; 0x30 = 48 sub r3, r1, r2 sub r11, r1, r0 mov r3, r3, asr #8 mov r11, r11, asr #8 strh r3, [r6, #0x40] ; 0x40 = 64 strh r11, [r6, #0x50] ; 0x50 = 80 sub r3, r1, r5 sub r11, r1, r4 mov r3, r3, asr #8 mov r11, r11, asr #8 strh r3, [r6, #0x60] ; 0x60 = 96 strh r11, [r6, #0x70] ; 0x70 = 112 mov pc,lr $Name.Mode2 ;x0,x1,x2,x3 orrs r11, r9, r3 bne $Name.Mode3 mov r3, r10, lsl #11 add r3, r3, #128 mov r9, #0x45, 28 ; 0x450 = 1104 add r5, r3, r4, lsl #11 add r11, r2, r1 orr r9, r9, #4 sub r3, r3, r4, lsl #11 mul r4, r11, r9 mov r11, #0x3B, 26 ; 0xEC0 = 3776 orr r11, r11, #8 mul r11, r2, r11 sub r2, r4, r11 mov r11, #0x62, 28 ; 0x620 = 1568 mul r11, r1, r11 add r0, r2, r3 add r1, r11, r4 add r4, r5, r1 sub r3, r3, r2 sub r5, r5, r1 mov r1, r4, asr #8 mov r3, r3, asr #8 mov r2, r0, asr #8 mov r4, r5, asr #8 strh r1, [r6,#0x00] strh r2, [r6,#0x10] strh r3, [r6,#0x20] strh r4, [r6,#0x30] strh r4, [r6,#0x40] strh r3, [r6,#0x50] strh r2, [r6,#0x60] strh r1, [r6,#0x70] mov pc,lr $Name.Mode3 ;x0,x1,x2,x3,x4,x5,x6,x7 mov r9, #0x8D, 30 orr r9, r9, #1 ;W7 add r11, r12, r3 mul r11, r9, r11 ;x8 = W7 * (x5 + x4) mov r9, #0x8E, 28 orr r9, r9, #4 ;W1-W7 mla r3, r9, r3, r11 ;x4 = x8 + (W1-W7) * x4 mvn r9, #0xD40 eor r9, r9, #0xD ;-W1-W7 mla r12, r9, r12, r11 ;x5 = x8 + (-W1-W7) * x5 mov r9, #0x96, 28 ; orr r9, r9, #8 ;W3 add r11, r0, r5 mul r11, r9, r11 ;x8 = W3 * (x6 + x7) mvn r9, #0x310 eor r9, r9, #0xE ;W5-W3 mla r0, r9, r0, r11 ;x6 = x8 + (W5-W3) * x6 mvn r9, #0xFB0 ;-W3-W5 mla r5, r9, r5, r11 ;x7 = x8 + (-W3-W5) * x7 mov r10, r10, lsl #11 add r10, r10, #128 ;x0 = (x0 << 11) + 128 add r11, r10,r4,lsl #11 ;x8 = x0 + (x1 << 11) sub r10, r10,r4,lsl #11 ;x0 = x0 - (x1 << 11) mov r9, #0x45, 28 orr r9, r9, #4 ;W6 add r4, r1, r2 mul r4, r9, r4 ;x1 = W6 * (x3 + x2) mvn r9, #0xEC0 eor r9, r9, #0x7 ;-W2-W6 mla r2, r9, r2, r4 ;x2 = x1 + (-W2-W6) * x2 mov r9, #0x620 ;W2-W6 mla r1, r9, r1, r4 ;x3 = x1 + (W2-W6) * x3 add r4, r3, r0 ;x1 = x4 + x6 sub r3, r3, r0 ;x4 -= x6 add r0, r12,r5 ;x6 = x5 + x7 sub r12,r12,r5 ;x5 -= x7 add r5, r11,r1 ;x7 = x8 + x3 sub r11,r11,r1 ;x8 -= x3 add r1, r10,r2 ;x3 = x0 + x2 sub r10,r10,r2 ;x0 -= x2 add r9, r3, r12 ;x4 + x5 sub r3, r3, r12 ;x4 - x5 mov r12, #181 mul r2, r9, r12 ;181 * (x4 + x5) mul r9, r3, r12 ;181 * (x4 - x5) add r2, r2, #128 ;x2 = 181 * (x4 + x5) + 128 add r3, r9, #128 ;x4 = 181 * (x4 - x5) + 128 add r9,r5,r4 sub r5,r5,r4 mov r9,r9,asr #8 ;(x7 + x1) >> 8 mov r5,r5,asr #8 ;(x7 - x1) >> 8 strh r9,[r6,#0x00] strh r5,[r6,#0x70] add r9,r1,r2,asr #8 sub r1,r1,r2,asr #8 mov r9,r9,asr #8 ;(x3 + x2) >> 8 mov r1,r1,asr #8 ;(x3 - x2) >> 8 strh r9,[r6,#0x10] strh r1,[r6,#0x60] add r9,r10,r3,asr #8 sub r10,r10,r3,asr #8 mov r9,r9,asr #8 ;(x0 + x4) >> 8 mov r10,r10,asr #8 ;(x0 - x4) >> 8 strh r9,[r6,#0x20] strh r10,[r6,#0x50] add r9,r11,r0 sub r11,r11,r0 mov r9,r9,asr #8 ;(x8 + x6) >> 8 mov r11,r11,asr #8 ;(x8 - x6) >> 8 strh r9,[r6,#0x30] strh r11,[r6,#0x40] mov pc,lr mend MCol8 Col8,0,16 MCol8 Col8Swap,1,2 ; r0 Block[0] ; r6 Block ; r7 Src ; r8 Dst ALIGN 16 RowConst PROC add r0, r0, #0x20 ; 0x20 = 32 cmp r7, #0 mov r3, r0, asr #6 beq RowConst_NoSrc cmp r3, #0 beq RowConst_Zero blt RowConst_Sub RowConst_Add ldr r0, CarryMask ldr r2, [r7] orr r3, r3, r3, lsl #8 orr r3, r3, r3, lsl #16 add r4, r2, r3 eor r11, r2, r3 and r2, r3, r2 bic r11, r11, r4 orr r11, r11, r2 and r5, r11, r0 mov r12, r5, lsl #1 sub r10, r4, r12 sub r11, r12, r5, lsr #7 ldr r2, [r7, #4] orr r11, r11, r10 str r11, [r8] add r4, r2, r3 eor r11, r2, r3 and r2, r3, r2 bic r11, r11, r4 orr r11, r11, r2 and r5, r11, r0 mov r12, r5, lsl #1 sub r10, r4, r12 sub r11, r12, r5, lsr #7 orr r11, r11, r10 str r11, [r8, #4] add r7, r7, #8 ;source stride mov pc,lr RowConst_Sub ldr r0, CarryMask ldr r2, [r7] rsb r3, r3, #0 orr r3, r3, r3, lsl #8 orr r3, r3, r3, lsl #16 mvn r2, r2 add r4, r2, r3 eor r11, r2, r3 and r2, r3, r2 bic r11, r11, r4 orr r11, r11, r2 and r5, r11, r0 mov r12, r5, lsl #1 sub r10, r4, r12 sub r11, r12, r5, lsr #7 ldr r2, [r7, #4] orr r11, r11, r10 mvn r11, r11 str r11, [r8] mvn r2, r2 add r4, r2, r3 eor r11, r2, r3 and r2, r3, r2 bic r11, r11, r4 orr r11, r11, r2 and r5, r11, r0 mov r12, r5, lsl #1 sub r10, r4, r12 sub r11, r12, r5, lsr #7 orr r11, r11, r10 mvn r11, r11 str r11, [r8, #4] add r7, r7, #8 ;source stride mov pc,lr RowConst_Zero ldr r1, [r7] ldr r2, [r7, #4] str r1, [r8] str r2, [r8, #4] add r7, r7, #8 ;source stride mov pc,lr RowConst_NoSrc cmp r3, #0 movmi r3, #0 cmppl r3, #255 movgt r3, #255 orr r3, r3, r3, lsl #8 orr r3, r3, r3, lsl #16 str r3, [r8] str r3, [r8, #4] mov pc,lr ENDP CarryMask DCD 0x80808080 W1 DCW 2841 ; 2048*sqrt(2)*cos(1*pi/16) W3 DCW 2408 ; 2048*sqrt(2)*cos(3*pi/16) nW5 DCW 0xF9B7 ;-1609 ; 2048*sqrt(2)*cos(5*pi/16) W6 DCW 1108 ; 2048*sqrt(2)*cos(6*pi/16) W7 DCW 565 ; 2048*sqrt(2)*cos(7*pi/16) W2 DCW 2676 ; 2048*sqrt(2)*cos(2*pi/16) ; r6 Block ; r7 Src ; r8 Dst ALIGN 16 IDCT_Block4x8Swap PROC add r0, r0, #256 stmdb sp!, {r0, r2, r4 - r12, lr} ; r0=BlockEnd r2=DstStride sub r6, r0, #256 ;Block mov r7, r3 ;Src mov r8, r1 ;Dst mov r9,#128-0*16+0*2 bl Col8Swap mov r9,#128-1*16+1*2 add r6, r6, #1*16-0*2-128 bl Col8Swap mov r9,#128-2*16+2*2 add r6, r6, #2*16-1*2-128 bl Col8Swap mov r9,#128-3*16+3*2 add r6, r6, #3*16-2*2-128 bl Col8Swap sub r6, r6, #6 b Row4_Loop ALIGN 16 IDCT_Block4x8 PROC add r0, r0, #128 stmdb sp!, {r0, r2, r4 - r12, lr} ; r0=BlockEnd r2=DstStride sub r6, r0, #128 ;Block mov r7, r3 ;Src mov r8, r1 ;Dst bl Col8 add r6, r6, #2 bl Col8 add r6, r6, #2 bl Col8 add r6, r6, #2 bl Col8 sub r6, r6, #6 Row4_Loop ldrsh r4, [r6, #4] ;x3 ldrsh r5, [r6, #6] ;x7 ldrsh r3, [r6, #2] ;x4 ldrsh r0, [r6] ;x0 orr r11, r5, r4 orrs r11, r11, r3 bne Row4_NoConst bl RowConst b Row4_Next Row4_NoConst cmp r7, #0 ldrsh r10, W7 ldrsh r11, W1 mov r2, #4 add r0, r0, #32 mov r0, r0, lsl #8 ;x0 mla r14, r3, r10, r2 ;x5 = x4 * W7 + 4 ldrsh r10, W3 mla r3, r11, r3, r2 ;x4 = x4 * W1 + 4 mov r14, r14, asr #3 ;x5 >>= 3 ldrsh r11, nW5 mla r12, r5, r10, r2 ;x6 = x7 * W3 + 4 mov r3, r3, asr #3 ;x4 >>= 3 ldrsh r10, W6 mla r5, r11, r5, r2 ;x7 = x7 * -W5 + 4 ldrsh r11, W2 add r9, r3, r12, asr #3 ;x1 = x4 + (x6 >> 3) sub r3, r3, r12, asr #3 ;x4 = x4 - (x6 >> 3) mla r12, r4, r10, r2 ;x2 = x3 * W6 + 4 mla r4, r11, r4, r2 ;x3 = x3 * W2 + 4 add r2, r14, r5, asr #3 ;x6 = x5 + (x7 >> 3) sub r5, r14, r5, asr #3 ;x5 = x5 - (x7 >> 3) add r14, r0, r4, asr #3 ;x7 = x0 + (x3 >> 3) sub r4, r0, r4, asr #3 ;x8 = x0 - (x3 >> 3) add r10, r0, r12, asr #3;x3 = x0 + (x2 >> 3) sub r0, r0, r12, asr #3 ;x0 = x0 - (x2 >> 3) add r1, r5, r3 mov r11, #181 mul r12, r1, r11 ;x2 = 181 * (x5 + x4) sub r3, r3, r5 mul r1, r3, r11 ;x4 = 181 * (x4 - x5) add r12, r12, #128 ;x2 += 128 add r3, r1, #128 ;x4 += 128 add r1, r14, r9 ;x5 = x7 + x1 sub r5, r14, r9 ;x1 = x7 - x1 add r11, r10, r12, asr #8 ;x7 = x3 + (x2 >> 8) sub r14, r10, r12, asr #8 ;x2 = x3 - (x2 >> 8) add r9, r0, r3, asr #8 ;x3 = x0 + (x4 >> 8) sub r3, r0, r3, asr #8 ;x4 = x0 - (x4 >> 8) add r12, r4, r2 ;x0 = x8 + x6 sub r4, r4, r2 ;x6 = x8 - x6 beq Row4_NoSrc ldrb r0, [r7] ldrb r2, [r7, #7] ldrb r10, [r7, #1] add r1, r0, r1, asr #14 add r5, r2, r5, asr #14 add r11, r10, r11, asr #14 ldrb r2, [r7, #6] ldrb r0, [r7, #2] ldrb r10, [r7, #5] add r14, r2, r14, asr #14 add r9, r0, r9, asr #14 ldrb r0, [r7, #3] ldrb r2, [r7, #4] add r3, r10, r3, asr #14 add r12, r0, r12, asr #14 add r4, r2, r4, asr #14 add r7, r7, #8 ;source stride Row4_Sat orr r0, r5, r14 orr r0, r0, r4 orr r0, r0, r1 orr r0, r0, r12 orr r0, r0, r11 orr r0, r0, r9 orr r0, r0, r3 bics r0, r0, #0xFF ; 0xFF = 255 beq Row4_Write mov r0, #0xFFFFFF00 tst r1, r0 movne r1, #0xFF movmi r1, #0x00 tst r11, r0 movne r11, #0xFF movmi r11, #0x00 tst r9, r0 movne r9, #0xFF movmi r9, #0x00 tst r12, r0 movne r12, #0xFF movmi r12, #0x00 tst r4, r0 movne r4, #0xFF movmi r4, #0x00 tst r3, r0 movne r3, #0xFF movmi r3, #0x00 tst r14, r0 movne r14, #0xFF movmi r14, #0x00 tst r5, r0 movne r5, #0xFF movmi r5, #0x00 Row4_Write strb r1, [r8] strb r11,[r8, #1] strb r9, [r8, #2] strb r12,[r8, #3] strb r4, [r8, #4] strb r3, [r8, #5] strb r14,[r8, #6] strb r5, [r8, #7] Row4_Next ldr r2, [sp, #4] ;DstStride ldr r1, [sp, #0] ;BlockEnd add r6,r6,#16 ;Block += 16 add r8,r8,r2 ;Dst += DstStride cmp r6,r1 bne Row4_Loop ldmia sp!, {r0,r2,r4 - r12, pc} Row4_NoSrc mov r5, r5, asr #14 mov r14, r14, asr #14 mov r12, r12, asr #14 mov r1, r1, asr #14 mov r11, r11, asr #14 mov r9, r9, asr #14 mov r3, r3, asr #14 mov r4, r4, asr #14 b Row4_Sat ENDP ; r6 Block ; r7 Src ; r8 Dst ALIGN 16 IDCT_Block8x8Swap PROC add r0, r0, #256 stmdb sp!, {r0, r2, r4 - r12, lr} ; r0=BlockEnd r2=DstStride sub r6, r0, #256 ;Block mov r7, r3 ;Src mov r8, r1 ;Dst mov r9,#128-0*16+0*2 bl Col8Swap mov r9,#128-1*16+1*2 add r6, r6, #1*16-0*2-128 bl Col8Swap mov r9,#128-2*16+2*2 add r6, r6, #2*16-1*2-128 bl Col8Swap mov r9,#128-3*16+3*2 add r6, r6, #3*16-2*2-128 bl Col8Swap mov r9,#128-4*16+4*2 add r6, r6, #4*16-3*2-128 bl Col8Swap mov r9,#128-5*16+5*2 add r6, r6, #5*16-4*2-128 bl Col8Swap mov r9,#128-6*16+6*2 add r6, r6, #6*16-5*2-128 bl Col8Swap mov r9,#128-7*16+7*2 add r6, r6, #7*16-6*2-128 bl Col8Swap sub r6, r6, #14 b Row8_Loop ALIGN 16 IDCT_Block8x8 PROC add r0, r0, #128 stmdb sp!, {r0, r2, r4 - r12, lr} ; r0=BlockEnd r2=DstStride sub r6, r0, #128 ;Block mov r7, r3 ;Src mov r8, r1 ;Dst bl Col8 add r6, r6, #2 bl Col8 add r6, r6, #2 bl Col8 add r6, r6, #2 bl Col8 add r6, r6, #2 bl Col8 add r6, r6, #2 bl Col8 add r6, r6, #2 bl Col8 add r6, r6, #2 bl Col8 sub r6, r6, #14 Row8_Loop ldrsh r0, [r6] ;x0 ldrsh r3, [r6, #2] ;x4 ldrsh r4, [r6, #4] ;x3 ldrsh r5, [r6, #6] ;x7 ldrsh r9, [r6, #8] ;x1 ldrsh r2, [r6, #10] ;x6 ldrsh r14,[r6, #12] ;x2 ldrsh r1, [r6, #14] ;x5 orr r11, r3, r4 orr r11, r11, r5 orr r11, r11, r9 orr r11, r11, r2 orr r11, r11, r14 orrs r11, r11, r1 bne Row8_NoConst bl RowConst b Row8_Next _W3 DCW 2408 ; 2048*sqrt(2)*cos(3*pi/16) _W6 DCW 1108 ; 2048*sqrt(2)*cos(6*pi/16) _W7 DCW 565 ; 2048*sqrt(2)*cos(7*pi/16) _W1_nW7 DCW 2276 _nW1_nW7 DCW 0xF2B2 ;-3406 _W5_nW3 DCW 0xFCE1 ;-799 _nW2_nW6 DCW 0xF138 ;-3784 ALIGN 4 Row8_NoConst cmp r7, #0 add r0, r0, #32 ldrsh r10, _W7 mov r0, r0, lsl #11 ;x0 = (x0 + 32) << 11 ldrsh r12, _W1_nW7 add r11,r3,r1 mul r11,r10,r11 ;x8 = W7 * (x4 + x5) ldrsh r10, _nW1_nW7 mla r3, r12, r3, r11 ;x4 = x8 + (W1-W7) * x4 ldrsh r12, _W3 mla r1, r10, r1, r11 ;x5 = x8 + (-W1-W7) * x5 ldrsh r10, _W5_nW3 add r11,r2,r5 ;x6 + x7 mul r11,r12,r11 ;x8 = W3 * (x6 + x7) mvn r12, #0xFB0 ;-W3-W5 mla r2,r10,r2,r11 ;x6 = x8 + (W5-W3) * x6 ldrsh r10, _W6 mla r5,r12,r5,r11 ;x7 = x8 + (-W3-W5) * x7 ldrsh r12, _nW2_nW6 add r11, r0, r9, lsl #11;x8 = x0 + (x1 << 11) sub r0, r0, r9, lsl #11 ;x0 = x0 - (x1 << 11) add r9, r4, r14 mul r9, r10, r9 ;x1 = W6 * (x3 + x2) mov r10, #0x620 ;W2-W6 mla r14, r12, r14, r9 ;x2 = x1 + (-W2-W6) * x2 mov r12, #181 mla r4, r10, r4, r9 ;x3 = x1 + (W2-W6) * x3 add r9, r3, r2 ;x1 = x4 + x6 sub r3, r3, r2 ;x4 = x4 - x6 add r2, r1, r5 ;x6 = x5 + x7 sub r1, r1, r5 ;x5 = x5 - x7 add r5, r11, r4 ;x7 = x8 + x3 sub r11, r11, r4 ;x8 = x8 - x3 add r4, r0, r14 ;x3 = x0 + x2 sub r0, r0, r14 ;x0 = x0 - x2 add r3, r3, #4 ; add r14, r3, r1 ;x2 = x4 + x5 + 4 sub r3, r3, r1 ;x4 = x4 - x5 + 4 mov r10, #16 mov r14, r14, asr #3 mov r3, r3, asr #3 mla r14, r12, r14, r10 ;x2 = 181 * ((x4 + x5 + 4) >> 3) + 16 mla r3, r12, r3, r10 ;x4 = 181 * ((x4 - x5 + 4) >> 3) + 16 add r1, r5, r9 ;x5 = x7 + x1 sub r9, r5, r9 ;x1 = x7 - x1 add r5, r4, r14, asr #5 ;x7 = x3 + (x2 >> 5) sub r14,r4, r14, asr #5 ;x2 = x3 - (x2 >> 5) add r4, r0, r3, asr #5 ;x3 = x0 + (x4 >> 5) sub r3, r0, r3, asr #5 ;x4 = x0 - (x4 >> 5) add r0, r11, r2 ;x0 = x8 + x6 sub r2, r11, r2 ;x6 = x8 - x6 beq Row8_NoSrc ldrb r10, [r7] ldrb r12, [r7, #7] ldrb r11, [r7, #1] add r1, r10, r1, asr #17 add r9, r12, r9, asr #17 add r5, r11, r5, asr #17 ldrb r10, [r7, #6] ldrb r12, [r7, #2] ldrb r11, [r7, #5] add r14, r10, r14, asr #17 add r4, r12, r4, asr #17 ldrb r10, [r7, #3] ldrb r12, [r7, #4] add r3, r11, r3, asr #17 add r0, r10, r0, asr #17 add r2, r12, r2, asr #17 add r7, r7, #8 ;source stride Row8_Sat orr r10, r1, r9 orr r10, r10, r5 orr r10, r10, r14 orr r10, r10, r4 orr r10, r10, r3 orr r10, r10, r0 orr r10, r10, r2 bics r10, r10, #0xFF ; 0xFF = 255 beq Row8_Write mov r10, #0xFFFFFF00 tst r1, r10 movne r1, #0xFF movmi r1, #0x00 tst r9, r10 movne r9, #0xFF movmi r9, #0x00 tst r5, r10 movne r5, #0xFF movmi r5, #0x00 tst r14, r10 movne r14, #0xFF movmi r14, #0x00 tst r4, r10 movne r4, #0xFF movmi r4, #0x00 tst r3, r10 movne r3, #0xFF movmi r3, #0x00 tst r0, r10 movne r0, #0xFF movmi r0, #0x00 tst r2, r10 movne r2, #0xFF movmi r2, #0x00 Row8_Write strb r1, [r8] strb r5, [r8, #1] strb r4, [r8, #2] strb r0, [r8, #3] strb r2, [r8, #4] strb r3, [r8, #5] strb r14,[r8, #6] strb r9, [r8, #7] Row8_Next ldr r2, [sp, #4] ;DstStride ldr r1, [sp, #0] ;BlockEnd add r6,r6,#16 ;Block += 16 add r8,r8,r2 ;Dst += DstStride cmp r6,r1 bne Row8_Loop ldmia sp!, {r0,r2,r4 - r12, pc} Row8_NoSrc mov r1, r1, asr #17 mov r9, r9, asr #17 mov r5, r5, asr #17 mov r14, r14, asr #17 mov r4, r4, asr #17 mov r3, r3, asr #17 mov r0, r0, asr #17 mov r2, r2, asr #17 b Row8_Sat ENDP END