mirror of https://github.com/F-Stack/f-stack.git
597 lines
13 KiB
ArmAsm
597 lines
13 KiB
ArmAsm
/* $NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $ */
|
|
|
|
/*-
|
|
* Copyright (c) 2001 Ben Harris.
|
|
* Copyright (c) 1994 Mark Brinicombe.
|
|
* Copyright (c) 1994 Brini.
|
|
* All rights reserved.
|
|
*
|
|
* This code is derived from software written for Brini by Mark Brinicombe
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by Brini.
|
|
* 4. The name of the company nor the name of the author may be used to
|
|
* endorse or promote products derived from this software without specific
|
|
* prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
* IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
|
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* RiscBSD kernel project
|
|
*
|
|
* blockio.S
|
|
*
|
|
* optimised block read/write from/to IO routines.
|
|
*
|
|
* Created : 08/10/94
|
|
* Modified : 22/01/99 -- R.Earnshaw
|
|
* Faster, and small tweaks for StrongARM
|
|
*/
|
|
|
|
#include <machine/asm.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
.syntax unified
|
|
|
|
/*
|
|
* Read bytes from an I/O address into a block of memory
|
|
*
|
|
* r0 = address to read from (IO)
|
|
* r1 = address to write to (memory)
|
|
* r2 = length
|
|
*/
|
|
|
|
/* This code will look very familiar if you've read _memcpy(). */
|
|
ENTRY(read_multi_1)
|
|
mov ip, sp
|
|
stmfd sp!, {fp, ip, lr, pc}
|
|
sub fp, ip, #4
|
|
subs r2, r2, #4 /* r2 = length - 4 */
|
|
blt .Lrm1_l4 /* less than 4 bytes */
|
|
ands r12, r1, #3
|
|
beq .Lrm1_main /* aligned destination */
|
|
rsb r12, r12, #4
|
|
cmp r12, #2
|
|
ldrb r3, [r0]
|
|
strb r3, [r1], #1
|
|
ldrbge r3, [r0]
|
|
strbge r3, [r1], #1
|
|
ldrbgt r3, [r0]
|
|
strbgt r3, [r1], #1
|
|
subs r2, r2, r12
|
|
blt .Lrm1_l4
|
|
.Lrm1_main:
|
|
.Lrm1loop:
|
|
ldrb r3, [r0]
|
|
ldrb r12, [r0]
|
|
orr r3, r3, r12, lsl #8
|
|
ldrb r12, [r0]
|
|
orr r3, r3, r12, lsl #16
|
|
ldrb r12, [r0]
|
|
orr r3, r3, r12, lsl #24
|
|
str r3, [r1], #4
|
|
subs r2, r2, #4
|
|
bge .Lrm1loop
|
|
.Lrm1_l4:
|
|
adds r2, r2, #4 /* r2 = length again */
|
|
ldmdbeq fp, {fp, sp, pc}
|
|
RETeq
|
|
cmp r2, #2
|
|
ldrb r3, [r0]
|
|
strb r3, [r1], #1
|
|
ldrbge r3, [r0]
|
|
strbge r3, [r1], #1
|
|
ldrbgt r3, [r0]
|
|
strbgt r3, [r1], #1
|
|
ldmdb fp, {fp, sp, pc}
|
|
END(read_multi_1)
|
|
|
|
/*
|
|
* Write bytes to an I/O address from a block of memory
|
|
*
|
|
* r0 = address to write to (IO)
|
|
* r1 = address to read from (memory)
|
|
* r2 = length
|
|
*/
|
|
|
|
/* This code will look very familiar if you've read _memcpy(). */
|
|
ENTRY(write_multi_1)
|
|
mov ip, sp
|
|
stmfd sp!, {fp, ip, lr, pc}
|
|
sub fp, ip, #4
|
|
subs r2, r2, #4 /* r2 = length - 4 */
|
|
blt .Lwm1_l4 /* less than 4 bytes */
|
|
ands r12, r1, #3
|
|
beq .Lwm1_main /* aligned source */
|
|
rsb r12, r12, #4
|
|
cmp r12, #2
|
|
ldrb r3, [r1], #1
|
|
strb r3, [r0]
|
|
ldrbge r3, [r1], #1
|
|
strbge r3, [r0]
|
|
ldrbgt r3, [r1], #1
|
|
strbgt r3, [r0]
|
|
subs r2, r2, r12
|
|
blt .Lwm1_l4
|
|
.Lwm1_main:
|
|
.Lwm1loop:
|
|
ldr r3, [r1], #4
|
|
strb r3, [r0]
|
|
mov r3, r3, lsr #8
|
|
strb r3, [r0]
|
|
mov r3, r3, lsr #8
|
|
strb r3, [r0]
|
|
mov r3, r3, lsr #8
|
|
strb r3, [r0]
|
|
subs r2, r2, #4
|
|
bge .Lwm1loop
|
|
.Lwm1_l4:
|
|
adds r2, r2, #4 /* r2 = length again */
|
|
ldmdbeq fp, {fp, sp, pc}
|
|
cmp r2, #2
|
|
ldrb r3, [r1], #1
|
|
strb r3, [r0]
|
|
ldrbge r3, [r1], #1
|
|
strbge r3, [r0]
|
|
ldrbgt r3, [r1], #1
|
|
strbgt r3, [r0]
|
|
ldmdb fp, {fp, sp, pc}
|
|
END(write_multi_1)
|
|
|
|
/*
|
|
* Reads short ints (16 bits) from an I/O address into a block of memory
|
|
*
|
|
* r0 = address to read from (IO)
|
|
* r1 = address to write to (memory)
|
|
* r2 = length
|
|
*/
|
|
|
|
ENTRY(insw)
|
|
/* Make sure that we have a positive length */
|
|
cmp r2, #0x00000000
|
|
movle pc, lr
|
|
|
|
/* If the destination address and the size is word aligned, do it fast */
|
|
|
|
tst r2, #0x00000001
|
|
tsteq r1, #0x00000003
|
|
beq .Lfastinsw
|
|
|
|
/* Non aligned insw */
|
|
|
|
.Linswloop:
|
|
ldr r3, [r0]
|
|
subs r2, r2, #0x00000001 /* Loop test in load delay slot */
|
|
strb r3, [r1], #0x0001
|
|
mov r3, r3, lsr #8
|
|
strb r3, [r1], #0x0001
|
|
bgt .Linswloop
|
|
|
|
RET
|
|
|
|
/* Word aligned insw */
|
|
|
|
.Lfastinsw:
|
|
|
|
.Lfastinswloop:
|
|
ldr r3, [r0, #0x0002] /* take advantage of nonaligned
|
|
* word accesses */
|
|
ldr ip, [r0]
|
|
mov r3, r3, lsr #16 /* Put the two shorts together */
|
|
orr r3, r3, ip, lsl #16
|
|
str r3, [r1], #0x0004 /* Store */
|
|
subs r2, r2, #0x00000002 /* Next */
|
|
bgt .Lfastinswloop
|
|
|
|
RET
|
|
END(insw)
|
|
|
|
/*
|
|
* Writes short ints (16 bits) from a block of memory to an I/O address
|
|
*
|
|
* r0 = address to write to (IO)
|
|
* r1 = address to read from (memory)
|
|
* r2 = length
|
|
*/
|
|
|
|
ENTRY(outsw)
|
|
/* Make sure that we have a positive length */
|
|
cmp r2, #0x00000000
|
|
movle pc, lr
|
|
|
|
/* If the destination address and the size is word aligned, do it fast */
|
|
|
|
tst r2, #0x00000001
|
|
tsteq r1, #0x00000003
|
|
beq .Lfastoutsw
|
|
|
|
/* Non aligned outsw */
|
|
|
|
.Loutswloop:
|
|
ldrb r3, [r1], #0x0001
|
|
ldrb ip, [r1], #0x0001
|
|
subs r2, r2, #0x00000001 /* Loop test in load delay slot */
|
|
orr r3, r3, ip, lsl #8
|
|
orr r3, r3, r3, lsl #16
|
|
str r3, [r0]
|
|
bgt .Loutswloop
|
|
|
|
RET
|
|
|
|
/* Word aligned outsw */
|
|
|
|
.Lfastoutsw:
|
|
|
|
.Lfastoutswloop:
|
|
ldr r3, [r1], #0x0004 /* r3 = (H)(L) */
|
|
subs r2, r2, #0x00000002 /* Loop test in load delay slot */
|
|
|
|
eor ip, r3, r3, lsr #16 /* ip = (H)(H^L) */
|
|
eor r3, r3, ip, lsl #16 /* r3 = (H^H^L)(L) = (L)(L) */
|
|
eor ip, ip, r3, lsr #16 /* ip = (H)(H^L^L) = (H)(H) */
|
|
|
|
str r3, [r0]
|
|
str ip, [r0]
|
|
|
|
/* mov ip, r3, lsl #16
|
|
* orr ip, ip, ip, lsr #16
|
|
* str ip, [r0]
|
|
*
|
|
* mov ip, r3, lsr #16
|
|
* orr ip, ip, ip, lsl #16
|
|
* str ip, [r0]
|
|
*/
|
|
|
|
bgt .Lfastoutswloop
|
|
|
|
RET
|
|
END(outsw)
|
|
|
|
/*
|
|
* reads short ints (16 bits) from an I/O address into a block of memory
|
|
* with a length garenteed to be a multiple of 16 bytes
|
|
* with a word aligned destination address
|
|
*
|
|
* r0 = address to read from (IO)
|
|
* r1 = address to write to (memory)
|
|
* r2 = length
|
|
*/
|
|
|
|
ENTRY(insw16)
|
|
/* Make sure that we have a positive length */
|
|
cmp r2, #0x00000000
|
|
movle pc, lr
|
|
|
|
/* If the destination address is word aligned and the size suitably
|
|
aligned, do it fast */
|
|
|
|
tst r2, #0x00000007
|
|
tsteq r1, #0x00000003
|
|
|
|
bne _C_LABEL(insw)
|
|
|
|
/* Word aligned insw */
|
|
|
|
stmfd sp!, {r4,r5,lr}
|
|
|
|
.Linsw16loop:
|
|
ldr r3, [r0, #0x0002] /* take advantage of nonaligned
|
|
* word accesses */
|
|
ldr lr, [r0]
|
|
mov r3, r3, lsr #16 /* Put the two shorts together */
|
|
orr r3, r3, lr, lsl #16
|
|
|
|
ldr r4, [r0, #0x0002] /* take advantage of nonaligned
|
|
* word accesses */
|
|
ldr lr, [r0]
|
|
mov r4, r4, lsr #16 /* Put the two shorts together */
|
|
orr r4, r4, lr, lsl #16
|
|
|
|
ldr r5, [r0, #0x0002] /* take advantage of nonaligned
|
|
* word accesses */
|
|
ldr lr, [r0]
|
|
mov r5, r5, lsr #16 /* Put the two shorts together */
|
|
orr r5, r5, lr, lsl #16
|
|
|
|
ldr ip, [r0, #0x0002] /* take advantage of nonaligned
|
|
* word accesses */
|
|
ldr lr, [r0]
|
|
mov ip, ip, lsr #16 /* Put the two shorts together */
|
|
orr ip, ip, lr, lsl #16
|
|
|
|
stmia r1!, {r3-r5,ip}
|
|
subs r2, r2, #0x00000008 /* Next */
|
|
bgt .Linsw16loop
|
|
|
|
ldmfd sp!, {r4,r5,pc} /* Restore regs and go home */
|
|
END(insw16)
|
|
|
|
/*
|
|
* Writes short ints (16 bits) from a block of memory to an I/O address
|
|
*
|
|
* r0 = address to write to (IO)
|
|
* r1 = address to read from (memory)
|
|
* r2 = length
|
|
*/
|
|
|
|
ENTRY(outsw16)
|
|
/* Make sure that we have a positive length */
|
|
cmp r2, #0x00000000
|
|
movle pc, lr
|
|
|
|
/* If the destination address is word aligned and the size suitably
|
|
aligned, do it fast */
|
|
|
|
tst r2, #0x00000007
|
|
tsteq r1, #0x00000003
|
|
|
|
bne _C_LABEL(outsw)
|
|
|
|
/* Word aligned outsw */
|
|
|
|
stmfd sp!, {r4,r5,lr}
|
|
|
|
.Loutsw16loop:
|
|
ldmia r1!, {r4,r5,ip,lr}
|
|
|
|
eor r3, r4, r4, lsl #16 /* r3 = (A^B)(B) */
|
|
eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
|
|
eor r3, r3, r4, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
|
|
str r3, [r0]
|
|
str r4, [r0]
|
|
|
|
/* mov r3, r4, lsl #16
|
|
* orr r3, r3, r3, lsr #16
|
|
* str r3, [r0]
|
|
*
|
|
* mov r3, r4, lsr #16
|
|
* orr r3, r3, r3, lsl #16
|
|
* str r3, [r0]
|
|
*/
|
|
|
|
eor r3, r5, r5, lsl #16 /* r3 = (A^B)(B) */
|
|
eor r5, r5, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
|
|
eor r3, r3, r5, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
|
|
str r3, [r0]
|
|
str r5, [r0]
|
|
|
|
eor r3, ip, ip, lsl #16 /* r3 = (A^B)(B) */
|
|
eor ip, ip, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
|
|
eor r3, r3, ip, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
|
|
str r3, [r0]
|
|
str ip, [r0]
|
|
|
|
eor r3, lr, lr, lsl #16 /* r3 = (A^B)(B) */
|
|
eor lr, lr, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
|
|
eor r3, r3, lr, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */
|
|
str r3, [r0]
|
|
str lr, [r0]
|
|
|
|
subs r2, r2, #0x00000008
|
|
bgt .Loutsw16loop
|
|
|
|
ldmfd sp!, {r4,r5,pc} /* and go home */
|
|
END(outsw16)
|
|
|
|
/*
|
|
* reads short ints (16 bits) from an I/O address into a block of memory
|
|
* The I/O address is assumed to be mapped multiple times in a block of
|
|
* 8 words.
|
|
* The destination address should be word aligned.
|
|
*
|
|
* r0 = address to read from (IO)
|
|
* r1 = address to write to (memory)
|
|
* r2 = length
|
|
*/
|
|
|
|
ENTRY(inswm8)
|
|
/* Make sure that we have a positive length */
|
|
cmp r2, #0x00000000
|
|
movle pc, lr
|
|
|
|
/* If the destination address is word aligned and the size suitably
|
|
aligned, do it fast */
|
|
|
|
tst r1, #0x00000003
|
|
|
|
bne _C_LABEL(insw)
|
|
|
|
/* Word aligned insw */
|
|
|
|
stmfd sp!, {r4-r9,lr}
|
|
|
|
mov lr, #0xff000000
|
|
orr lr, lr, #0x00ff0000
|
|
|
|
.Linswm8_loop8:
|
|
cmp r2, #8
|
|
bcc .Linswm8_l8
|
|
|
|
ldmia r0, {r3-r9,ip}
|
|
|
|
bic r3, r3, lr
|
|
orr r3, r3, r4, lsl #16
|
|
bic r5, r5, lr
|
|
orr r4, r5, r6, lsl #16
|
|
bic r7, r7, lr
|
|
orr r5, r7, r8, lsl #16
|
|
bic r9, r9, lr
|
|
orr r6, r9, ip, lsl #16
|
|
|
|
stmia r1!, {r3-r6}
|
|
|
|
subs r2, r2, #0x00000008 /* Next */
|
|
bne .Linswm8_loop8
|
|
beq .Linswm8_l1
|
|
|
|
.Linswm8_l8:
|
|
cmp r2, #4
|
|
bcc .Linswm8_l4
|
|
|
|
ldmia r0, {r3-r6}
|
|
|
|
bic r3, r3, lr
|
|
orr r3, r3, r4, lsl #16
|
|
bic r5, r5, lr
|
|
orr r4, r5, r6, lsl #16
|
|
|
|
stmia r1!, {r3-r4}
|
|
|
|
subs r2, r2, #0x00000004
|
|
beq .Linswm8_l1
|
|
|
|
.Linswm8_l4:
|
|
cmp r2, #2
|
|
bcc .Linswm8_l2
|
|
|
|
ldmia r0, {r3-r4}
|
|
|
|
bic r3, r3, lr
|
|
orr r3, r3, r4, lsl #16
|
|
str r3, [r1], #0x0004
|
|
|
|
subs r2, r2, #0x00000002
|
|
beq .Linswm8_l1
|
|
|
|
.Linswm8_l2:
|
|
cmp r2, #1
|
|
bcc .Linswm8_l1
|
|
|
|
ldr r3, [r0]
|
|
subs r2, r2, #0x00000001 /* Test in load delay slot */
|
|
/* XXX, why don't we use result? */
|
|
|
|
strb r3, [r1], #0x0001
|
|
mov r3, r3, lsr #8
|
|
strb r3, [r1], #0x0001
|
|
|
|
|
|
.Linswm8_l1:
|
|
ldmfd sp!, {r4-r9,pc} /* And go home */
|
|
END(inswm8)
|
|
|
|
/*
|
|
* write short ints (16 bits) to an I/O address from a block of memory
|
|
* The I/O address is assumed to be mapped multiple times in a block of
|
|
* 8 words.
|
|
* The source address should be word aligned.
|
|
*
|
|
* r0 = address to read to (IO)
|
|
* r1 = address to write from (memory)
|
|
* r2 = length
|
|
*/
|
|
|
|
ENTRY(outswm8)
|
|
/* Make sure that we have a positive length */
|
|
cmp r2, #0x00000000
|
|
movle pc, lr
|
|
|
|
/* If the destination address is word aligned and the size suitably
|
|
aligned, do it fast */
|
|
|
|
tst r1, #0x00000003
|
|
|
|
bne _C_LABEL(outsw)
|
|
|
|
/* Word aligned outsw */
|
|
|
|
stmfd sp!, {r4-r8,lr}
|
|
|
|
.Loutswm8_loop8:
|
|
cmp r2, #8
|
|
bcc .Loutswm8_l8
|
|
|
|
ldmia r1!, {r3,r5,r7,ip}
|
|
|
|
eor r4, r3, r3, lsr #16 /* r4 = (A)(A^B) */
|
|
eor r3, r3, r4, lsl #16 /* r3 = (A^A^B)(B) = (B)(B) */
|
|
eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */
|
|
|
|
eor r6, r5, r5, lsr #16 /* r6 = (A)(A^B) */
|
|
eor r5, r5, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
|
|
eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
|
|
|
|
eor r8, r7, r7, lsr #16 /* r8 = (A)(A^B) */
|
|
eor r7, r7, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
|
|
eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
|
|
|
|
eor lr, ip, ip, lsr #16 /* lr = (A)(A^B) */
|
|
eor ip, ip, lr, lsl #16 /* ip = (A^A^B)(B) = (B)(B) */
|
|
eor lr, lr, ip, lsr #16 /* lr = (A)(B^A^B) = (A)(A) */
|
|
|
|
stmia r0, {r3-r8,ip,lr}
|
|
|
|
subs r2, r2, #0x00000008 /* Next */
|
|
bne .Loutswm8_loop8
|
|
beq .Loutswm8_l1
|
|
|
|
.Loutswm8_l8:
|
|
cmp r2, #4
|
|
bcc .Loutswm8_l4
|
|
|
|
ldmia r1!, {r3-r4}
|
|
|
|
eor r6, r3, r3, lsr #16 /* r6 = (A)(A^B) */
|
|
eor r5, r3, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */
|
|
eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */
|
|
|
|
eor r8, r4, r4, lsr #16 /* r8 = (A)(A^B) */
|
|
eor r7, r4, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */
|
|
eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */
|
|
|
|
stmia r0, {r5-r8}
|
|
|
|
subs r2, r2, #0x00000004
|
|
beq .Loutswm8_l1
|
|
|
|
.Loutswm8_l4:
|
|
cmp r2, #2
|
|
bcc .Loutswm8_l2
|
|
|
|
ldr r3, [r1], #0x0004 /* r3 = (A)(B) */
|
|
subs r2, r2, #0x00000002 /* Done test in Load delay slot */
|
|
|
|
eor r5, r3, r3, lsr #16 /* r5 = (A)(A^B)*/
|
|
eor r4, r3, r5, lsl #16 /* r4 = (A^A^B)(B) = (B)(B) */
|
|
eor r5, r5, r4, lsr #16 /* r5 = (A)(B^A^B) = (A)(A) */
|
|
|
|
stmia r0, {r4, r5}
|
|
|
|
beq .Loutswm8_l1
|
|
|
|
.Loutswm8_l2:
|
|
cmp r2, #1
|
|
bcc .Loutswm8_l1
|
|
|
|
ldrb r3, [r1], #0x0001
|
|
ldrb r4, [r1], #0x0001
|
|
subs r2, r2, #0x00000001 /* Done test in load delay slot */
|
|
/* XXX This test isn't used? */
|
|
orr r3, r3, r4, lsl #8
|
|
orr r3, r3, r3, lsl #16
|
|
str r3, [r0]
|
|
|
|
.Loutswm8_l1:
|
|
ldmfd sp!, {r4-r8,pc} /* And go home */
|
|
END(outswm8)
|
|
|