f-stack/freebsd/arm/arm/blockio.S

597 lines
13 KiB
ArmAsm

/* $NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $ */
/*-
* Copyright (c) 2001 Ben Harris.
* Copyright (c) 1994 Mark Brinicombe.
* Copyright (c) 1994 Brini.
* All rights reserved.
*
* This code is derived from software written for Brini by Mark Brinicombe
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Brini.
* 4. The name of the company nor the name of the author may be used to
* endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* RiscBSD kernel project
*
* blockio.S
*
* optimised block read/write from/to IO routines.
*
* Created : 08/10/94
* Modified : 22/01/99 -- R.Earnshaw
* Faster, and small tweaks for StrongARM
*/
#include <machine/asm.h>
__FBSDID("$FreeBSD$");
.syntax unified
/*
* Read bytes from an I/O address into a block of memory
*
* r0 = address to read from (IO)
* r1 = address to write to (memory)
* r2 = length
*/
/* This code will look very familiar if you've read _memcpy(). */
ENTRY(read_multi_1)
mov ip, sp
stmfd sp!, {fp, ip, lr, pc}
sub fp, ip, #4
subs r2, r2, #4 /* r2 = length - 4 */
blt .Lrm1_l4 /* less than 4 bytes */
ands r12, r1, #3
beq .Lrm1_main /* aligned destination */
rsb r12, r12, #4
cmp r12, #2
ldrb r3, [r0]
strb r3, [r1], #1
ldrbge r3, [r0]
strbge r3, [r1], #1
ldrbgt r3, [r0]
strbgt r3, [r1], #1
subs r2, r2, r12
blt .Lrm1_l4
.Lrm1_main:
.Lrm1loop:
ldrb r3, [r0]
ldrb r12, [r0]
orr r3, r3, r12, lsl #8
ldrb r12, [r0]
orr r3, r3, r12, lsl #16
ldrb r12, [r0]
orr r3, r3, r12, lsl #24
str r3, [r1], #4
subs r2, r2, #4
bge .Lrm1loop
.Lrm1_l4:
adds r2, r2, #4 /* r2 = length again */
ldmdbeq fp, {fp, sp, pc}
RETeq
cmp r2, #2
ldrb r3, [r0]
strb r3, [r1], #1
ldrbge r3, [r0]
strbge r3, [r1], #1
ldrbgt r3, [r0]
strbgt r3, [r1], #1
ldmdb fp, {fp, sp, pc}
END(read_multi_1)
/*
* Write bytes to an I/O address from a block of memory
*
* r0 = address to write to (IO)
* r1 = address to read from (memory)
* r2 = length
*/
/* This code will look very familiar if you've read _memcpy(). */
ENTRY(write_multi_1)
mov ip, sp
stmfd sp!, {fp, ip, lr, pc}
sub fp, ip, #4
subs r2, r2, #4 /* r2 = length - 4 */
blt .Lwm1_l4 /* less than 4 bytes */
ands r12, r1, #3
beq .Lwm1_main /* aligned source */
rsb r12, r12, #4
cmp r12, #2
ldrb r3, [r1], #1
strb r3, [r0]
ldrbge r3, [r1], #1
strbge r3, [r0]
ldrbgt r3, [r1], #1
strbgt r3, [r0]
subs r2, r2, r12
blt .Lwm1_l4
.Lwm1_main:
.Lwm1loop:
ldr r3, [r1], #4
strb r3, [r0]
mov r3, r3, lsr #8
strb r3, [r0]
mov r3, r3, lsr #8
strb r3, [r0]
mov r3, r3, lsr #8
strb r3, [r0]
subs r2, r2, #4
bge .Lwm1loop
.Lwm1_l4:
adds r2, r2, #4 /* r2 = length again */
ldmdbeq fp, {fp, sp, pc}
cmp r2, #2
ldrb r3, [r1], #1
strb r3, [r0]
ldrbge r3, [r1], #1
strbge r3, [r0]
ldrbgt r3, [r1], #1
strbgt r3, [r0]
ldmdb fp, {fp, sp, pc}
END(write_multi_1)
/*
* Reads short ints (16 bits) from an I/O address into a block of memory
*
* r0 = address to read from (IO)
* r1 = address to write to (memory)
* r2 = length
*/
ENTRY(insw)
/* Make sure that we have a positive length */
cmp r2, #0x00000000
movle pc, lr
/* If the destination address and the size is word aligned, do it fast */
tst r2, #0x00000001
tsteq r1, #0x00000003
beq .Lfastinsw
/* Non aligned insw */
.Linswloop:
ldr r3, [r0]
subs r2, r2, #0x00000001 /* Loop test in load delay slot */
strb r3, [r1], #0x0001
mov r3, r3, lsr #8
strb r3, [r1], #0x0001
bgt .Linswloop
RET
/* Word aligned insw */
.Lfastinsw:
.Lfastinswloop:
ldr r3, [r0, #0x0002] /* take advantage of nonaligned
* word accesses */
ldr ip, [r0]
mov r3, r3, lsr #16 /* Put the two shorts together */
orr r3, r3, ip, lsl #16
str r3, [r1], #0x0004 /* Store */
subs r2, r2, #0x00000002 /* Next */
bgt .Lfastinswloop
RET
END(insw)
/*
* Writes short ints (16 bits) from a block of memory to an I/O address
*
* r0 = address to write to (IO)
* r1 = address to read from (memory)
* r2 = length
*/
ENTRY(outsw)
/* Make sure that we have a positive length */
cmp r2, #0x00000000
movle pc, lr
/* If the destination address and the size is word aligned, do it fast */
tst r2, #0x00000001
tsteq r1, #0x00000003
beq .Lfastoutsw
/* Non aligned outsw */
.Loutswloop:
ldrb r3, [r1], #0x0001
ldrb ip, [r1], #0x0001
subs r2, r2, #0x00000001 /* Loop test in load delay slot */
orr r3, r3, ip, lsl #8
orr r3, r3, r3, lsl #16
str r3, [r0]
bgt .Loutswloop
RET
/* Word aligned outsw */
.Lfastoutsw:
.Lfastoutswloop:
ldr r3, [r1], #0x0004 /* r3 = (H)(L) */
subs r2, r2, #0x00000002 /* Loop test in load delay slot */
eor ip, r3, r3, lsr #16 /* ip = (H)(H^L) */
eor r3, r3, ip, lsl #16 /* r3 = (H^H^L)(L) = (L)(L) */
eor ip, ip, r3, lsr #16 /* ip = (H)(H^L^L) = (H)(H) */
str r3, [r0]
str ip, [r0]
/* mov ip, r3, lsl #16
* orr ip, ip, ip, lsr #16
* str ip, [r0]
*
* mov ip, r3, lsr #16
* orr ip, ip, ip, lsl #16
* str ip, [r0]
*/
bgt .Lfastoutswloop
RET
END(outsw)
/*
* reads short ints (16 bits) from an I/O address into a block of memory
* with a length garenteed to be a multiple of 16 bytes
* with a word aligned destination address
*
* r0 = address to read from (IO)
* r1 = address to write to (memory)
* r2 = length
*/
ENTRY(insw16)
/* Make sure that we have a positive length */
cmp r2, #0x00000000
movle pc, lr
/* If the destination address is word aligned and the size suitably
aligned, do it fast */
tst r2, #0x00000007
tsteq r1, #0x00000003
bne _C_LABEL(insw)
/* Word aligned insw */
stmfd sp!, {r4,r5,lr}
.Linsw16loop:
ldr r3, [r0, #0x0002] /* take advantage of nonaligned
* word accesses */
ldr lr, [r0]
mov r3, r3, lsr #16 /* Put the two shorts together */
orr r3, r3, lr, lsl #16
ldr r4, [r0, #0x0002] /* take advantage of nonaligned
* word accesses */
ldr lr, [r0]
mov r4, r4, lsr #16 /* Put the two shorts together */
orr r4, r4, lr, lsl #16
ldr r5, [r0, #0x0002] /* take advantage of nonaligned
* word accesses */
ldr lr, [r0]
mov r5, r5, lsr #16 /* Put the two shorts together */
orr r5, r5, lr, lsl #16
ldr ip, [r0, #0x0002] /* take advantage of nonaligned
* word accesses */
ldr lr, [r0]
mov ip, ip, lsr #16 /* Put the two shorts together */
orr ip, ip, lr, lsl #16
stmia r1!, {r3-r5,ip}
subs r2, r2, #0x00000008 /* Next */
bgt .Linsw16loop
ldmfd sp!, {r4,r5,pc} /* Restore regs and go home */
END(insw16)
/*
* Writes short ints (16 bits) from a block of memory to an I/O address
*
* r0 = address to write to (IO)
* r1 = address to read from (memory)
* r2 = length
*/
ENTRY(outsw16)
/* Make sure that we have a positive length */
cmp r2, #0x00000000
movle pc, lr
/* If the destination address is word aligned and the size suitably
aligned, do it fast */
tst r2, #0x00000007
tsteq r1, #0x00000003
bne _C_LABEL(outsw)
/* Word aligned outsw */
stmfd sp!, {r4,r5,lr}
.Loutsw16loop:
ldmia r1!, {r4,r5,ip,lr}
eor r3, r4, r4, lsl #16 /* r3 = (A^B)(B) */
eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
eor r3, r3, r4, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
str r3, [r0]
str r4, [r0]
/* mov r3, r4, lsl #16
* orr r3, r3, r3, lsr #16
* str r3, [r0]
*
* mov r3, r4, lsr #16
* orr r3, r3, r3, lsl #16
* str r3, [r0]
*/
eor r3, r5, r5, lsl #16 /* r3 = (A^B)(B) */
eor r5, r5, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
eor r3, r3, r5, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
str r3, [r0]
str r5, [r0]
eor r3, ip, ip, lsl #16 /* r3 = (A^B)(B) */
eor ip, ip, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
eor r3, r3, ip, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
str r3, [r0]
str ip, [r0]
eor r3, lr, lr, lsl #16 /* r3 = (A^B)(B) */
eor lr, lr, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
eor r3, r3, lr, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
str r3, [r0]
str lr, [r0]
subs r2, r2, #0x00000008
bgt .Loutsw16loop
ldmfd sp!, {r4,r5,pc} /* and go home */
END(outsw16)
/*
* reads short ints (16 bits) from an I/O address into a block of memory
* The I/O address is assumed to be mapped multiple times in a block of
* 8 words.
* The destination address should be word aligned.
*
* r0 = address to read from (IO)
* r1 = address to write to (memory)
* r2 = length
*/
ENTRY(inswm8)
/* Make sure that we have a positive length */
cmp r2, #0x00000000
movle pc, lr
/* If the destination address is word aligned and the size suitably
aligned, do it fast */
tst r1, #0x00000003
bne _C_LABEL(insw)
/* Word aligned insw */
stmfd sp!, {r4-r9,lr}
mov lr, #0xff000000
orr lr, lr, #0x00ff0000
.Linswm8_loop8:
cmp r2, #8
bcc .Linswm8_l8
ldmia r0, {r3-r9,ip}
bic r3, r3, lr
orr r3, r3, r4, lsl #16
bic r5, r5, lr
orr r4, r5, r6, lsl #16
bic r7, r7, lr
orr r5, r7, r8, lsl #16
bic r9, r9, lr
orr r6, r9, ip, lsl #16
stmia r1!, {r3-r6}
subs r2, r2, #0x00000008 /* Next */
bne .Linswm8_loop8
beq .Linswm8_l1
.Linswm8_l8:
cmp r2, #4
bcc .Linswm8_l4
ldmia r0, {r3-r6}
bic r3, r3, lr
orr r3, r3, r4, lsl #16
bic r5, r5, lr
orr r4, r5, r6, lsl #16
stmia r1!, {r3-r4}
subs r2, r2, #0x00000004
beq .Linswm8_l1
.Linswm8_l4:
cmp r2, #2
bcc .Linswm8_l2
ldmia r0, {r3-r4}
bic r3, r3, lr
orr r3, r3, r4, lsl #16
str r3, [r1], #0x0004
subs r2, r2, #0x00000002
beq .Linswm8_l1
.Linswm8_l2:
cmp r2, #1
bcc .Linswm8_l1
ldr r3, [r0]
subs r2, r2, #0x00000001 /* Test in load delay slot */
/* XXX, why don't we use result? */
strb r3, [r1], #0x0001
mov r3, r3, lsr #8
strb r3, [r1], #0x0001
.Linswm8_l1:
ldmfd sp!, {r4-r9,pc} /* And go home */
END(inswm8)
/*
* write short ints (16 bits) to an I/O address from a block of memory
* The I/O address is assumed to be mapped multiple times in a block of
* 8 words.
* The source address should be word aligned.
*
* r0 = address to read to (IO)
* r1 = address to write from (memory)
* r2 = length
*/
ENTRY(outswm8)
/* Make sure that we have a positive length */
cmp r2, #0x00000000
movle pc, lr
/* If the destination address is word aligned and the size suitably
aligned, do it fast */
tst r1, #0x00000003
bne _C_LABEL(outsw)
/* Word aligned outsw */
stmfd sp!, {r4-r8,lr}
.Loutswm8_loop8:
cmp r2, #8
bcc .Loutswm8_l8
ldmia r1!, {r3,r5,r7,ip}
eor r4, r3, r3, lsr #16 /* r4 = (A)(A^B) */
eor r3, r3, r4, lsl #16 /* r3 = (A^A^B)(B) = (B)(B) */
eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
eor r6, r5, r5, lsr #16 /* r6 = (A)(A^B) */
eor r5, r5, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
eor r8, r7, r7, lsr #16 /* r8 = (A)(A^B) */
eor r7, r7, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
eor lr, ip, ip, lsr #16 /* lr = (A)(A^B) */
eor ip, ip, lr, lsl #16 /* ip = (A^A^B)(B) = (B)(B) */
eor lr, lr, ip, lsr #16 /* lr = (A)(B^A^B) = (A)(A) */
stmia r0, {r3-r8,ip,lr}
subs r2, r2, #0x00000008 /* Next */
bne .Loutswm8_loop8
beq .Loutswm8_l1
.Loutswm8_l8:
cmp r2, #4
bcc .Loutswm8_l4
ldmia r1!, {r3-r4}
eor r6, r3, r3, lsr #16 /* r6 = (A)(A^B) */
eor r5, r3, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
eor r8, r4, r4, lsr #16 /* r8 = (A)(A^B) */
eor r7, r4, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
stmia r0, {r5-r8}
subs r2, r2, #0x00000004
beq .Loutswm8_l1
.Loutswm8_l4:
cmp r2, #2
bcc .Loutswm8_l2
ldr r3, [r1], #0x0004 /* r3 = (A)(B) */
subs r2, r2, #0x00000002 /* Done test in Load delay slot */
eor r5, r3, r3, lsr #16 /* r5 = (A)(A^B)*/
eor r4, r3, r5, lsl #16 /* r4 = (A^A^B)(B) = (B)(B) */
eor r5, r5, r4, lsr #16 /* r5 = (A)(B^A^B) = (A)(A) */
stmia r0, {r4, r5}
beq .Loutswm8_l1
.Loutswm8_l2:
cmp r2, #1
bcc .Loutswm8_l1
ldrb r3, [r1], #0x0001
ldrb r4, [r1], #0x0001
subs r2, r2, #0x00000001 /* Done test in load delay slot */
/* XXX This test isn't used? */
orr r3, r3, r4, lsl #8
orr r3, r3, r3, lsl #16
str r3, [r0]
.Loutswm8_l1:
ldmfd sp!, {r4-r8,pc} /* And go home */
END(outswm8)