474 lines
13 KiB
C
474 lines
13 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <fcntl.h>
|
|
#include <unistd.h>
|
|
#include <sys/types.h>
|
|
#include <sys/time.h>
|
|
#include <sys/mman.h>
|
|
#include <asm/types.h>
|
|
#include <sys/time.h>
|
|
|
|
#ifdef __SUNXI_DISPLAY2__
|
|
#include <arm_neon.h>
|
|
#endif
|
|
|
|
#include "yuv_rotate.h"
|
|
|
|
#if defined(__SUNXI_DISPLAY2__) && (!defined(CONF_KERNEL_VERSION_4_4)) && (!defined(CONF_KERNEL_VERSION_4_9))
|
|
void nv_rotage90(unsigned int width,
|
|
unsigned int height, unsigned char* src_addr,
|
|
unsigned char* dst_addr)
|
|
{
|
|
unsigned int i = 0;
|
|
unsigned int j = 0;
|
|
unsigned int k = 0;
|
|
|
|
unsigned int row_bytes = width;
|
|
unsigned int col_bytes = height;
|
|
unsigned int row_bytes_7 = width*7;
|
|
unsigned int row_bytes_3 = width*3;
|
|
|
|
unsigned char* y_src_line0 = src_addr;
|
|
unsigned char* y_src_line1 = y_src_line0 + row_bytes;
|
|
unsigned char* y_src_line2 = y_src_line1 + row_bytes;
|
|
unsigned char* y_src_line3 = y_src_line2 + row_bytes;
|
|
unsigned char* y_src_line4 = y_src_line3 + row_bytes;
|
|
unsigned char* y_src_line5 = y_src_line4 + row_bytes;
|
|
unsigned char* y_src_line6 = y_src_line5 + row_bytes;
|
|
unsigned char* y_src_line7 = y_src_line6 + row_bytes;
|
|
|
|
unsigned char* uv_src_line0 = src_addr + width*height;
|
|
unsigned char* uv_src_line1 = uv_src_line0 + row_bytes;
|
|
unsigned char* uv_src_line2 = uv_src_line1 + row_bytes;
|
|
unsigned char* uv_src_line3 = uv_src_line2 + row_bytes;
|
|
|
|
unsigned char* y_dst_line0 = dst_addr + col_bytes - 8;
|
|
unsigned char* uv_dst_line0 = dst_addr + width*height + col_bytes - 8;
|
|
|
|
unsigned char* y_dst_line0_static = y_dst_line0;
|
|
unsigned char* uv_dst_line0_static = uv_dst_line0;
|
|
|
|
for (i = 0; i < height; i += 8) {
|
|
for (j = 0; j < width; j += 8) {
|
|
asm volatile
|
|
(
|
|
"mov r9, %9 \n\t"
|
|
"pld [%0,#64] \n\t"
|
|
"pld [%1,#64] \n\t"
|
|
"pld [%2,#64] \n\t"
|
|
"pld [%3,#64] \n\t"
|
|
"pld [%4,#64] \n\t"
|
|
"pld [%5,#64] \n\t"
|
|
"pld [%6,#64] \n\t"
|
|
"pld [%7,#64] \n\t"
|
|
"vld1.u8 {d0}, [%0]! \n\t"
|
|
"vld1.u8 {d1}, [%1]! \n\t"
|
|
"vld1.u8 {d2}, [%2]! \n\t"
|
|
"vld1.u8 {d3}, [%3]! \n\t"
|
|
"vld1.u8 {d4}, [%4]! \n\t"
|
|
"vld1.u8 {d5}, [%5]! \n\t"
|
|
"vld1.u8 {d6}, [%6]! \n\t"
|
|
"vld1.u8 {d7}, [%7]! \n\t"
|
|
|
|
"vtrn.8 d1, d0 \n\t"
|
|
"vtrn.8 d3, d2 \n\t"
|
|
"vtrn.8 d5, d4 \n\t"
|
|
"vtrn.8 d7, d6 \n\t"
|
|
|
|
"vtrn.16 d3, d1 \n\t"
|
|
"vtrn.16 d2, d0 \n\t"
|
|
"vtrn.16 d7, d5 \n\t"
|
|
"vtrn.16 d6, d4 \n\t"
|
|
"vtrn.32 d7, d3 \n\t"
|
|
"vtrn.32 d5, d1 \n\t"
|
|
"vtrn.32 d6, d2 \n\t"
|
|
"vtrn.32 d4, d0 \n\t"
|
|
|
|
"vst1.u8 {d7}, [%8], r9 \n\t"
|
|
"vst1.u8 {d6}, [%8], r9 \n\t"
|
|
"vst1.u8 {d5}, [%8], r9 \n\t"
|
|
"vst1.u8 {d4}, [%8], r9 \n\t"
|
|
"vst1.u8 {d3}, [%8], r9 \n\t"
|
|
"vst1.u8 {d2}, [%8], r9 \n\t"
|
|
"vst1.u8 {d1}, [%8], r9 \n\t"
|
|
"vst1.u8 {d0}, [%8], r9 \n\t"
|
|
|
|
: "+r" (y_src_line0),
|
|
"+r" (y_src_line1),
|
|
"+r" (y_src_line2),
|
|
"+r" (y_src_line3),
|
|
"+r" (y_src_line4),
|
|
"+r" (y_src_line5),
|
|
"+r" (y_src_line6),
|
|
"+r" (y_src_line7),
|
|
"+r" (y_dst_line0),
|
|
"+r" (col_bytes)
|
|
:
|
|
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "r9"
|
|
);
|
|
asm volatile
|
|
(
|
|
"mov r8, %5 \n\t"
|
|
|
|
"pld [%0,#64] \n\t"
|
|
"pld [%1,#64] \n\t"
|
|
"pld [%2,#64] \n\t"
|
|
"pld [%3,#64] \n\t"
|
|
"vld1.u8 {d8}, [%0]! \n\t"
|
|
"vld1.u8 {d9}, [%1]! \n\t"
|
|
"vld1.u8 {d10}, [%2]! \n\t"
|
|
"vld1.u8 {d11}, [%3]! \n\t"
|
|
|
|
"vtrn.16 d9, d8 \n\t"
|
|
"vtrn.16 d11, d10 \n\t"
|
|
"vtrn.32 d11, d9 \n\t"
|
|
"vtrn.32 d10, d8 \n\t"
|
|
|
|
"vst1.u8 {d11}, [%4], r8 \n\t"
|
|
"vst1.u8 {d10}, [%4], r8 \n\t"
|
|
"vst1.u8 {d9}, [%4], r8 \n\t"
|
|
"vst1.u8 {d8}, [%4], r8 \n\t"
|
|
|
|
: "+r" (uv_src_line0),
|
|
"+r" (uv_src_line1),
|
|
"+r" (uv_src_line2),
|
|
"+r" (uv_src_line3),
|
|
"+r" (uv_dst_line0),
|
|
"+r" (col_bytes)
|
|
:
|
|
: "cc", "memory", "d8", "d9", "d10", "d11", "r8"
|
|
);
|
|
}
|
|
|
|
y_src_line0 = y_src_line7;
|
|
y_src_line1 += row_bytes_7;
|
|
y_src_line2 += row_bytes_7;
|
|
y_src_line3 += row_bytes_7;
|
|
y_src_line4 += row_bytes_7;
|
|
y_src_line5 += row_bytes_7;
|
|
y_src_line6 += row_bytes_7;
|
|
y_src_line7 += row_bytes_7;
|
|
|
|
uv_src_line0 = uv_src_line3;
|
|
uv_src_line1 += row_bytes_3;
|
|
uv_src_line2 += row_bytes_3;
|
|
uv_src_line3 += row_bytes_3;
|
|
|
|
y_dst_line0 = y_dst_line0_static - i - 8;
|
|
uv_dst_line0 = uv_dst_line0_static - i - 8;
|
|
}
|
|
}
|
|
|
|
void nv_rotage270(unsigned int width,
|
|
unsigned int height, unsigned char* src_addr,
|
|
unsigned char* dst_addr)
|
|
{
|
|
unsigned int i = 0;
|
|
unsigned int j = 0;
|
|
unsigned int k = 0;
|
|
|
|
unsigned int row_bytes = width;
|
|
unsigned int col_bytes = height;
|
|
|
|
unsigned int row_bytes_7 = width*7;
|
|
unsigned int row_bytes_3 = width*3;
|
|
|
|
unsigned char* y_src_line0 = src_addr;
|
|
unsigned char* y_src_line1 = y_src_line0 + row_bytes;
|
|
unsigned char* y_src_line2 = y_src_line1 + row_bytes;
|
|
unsigned char* y_src_line3 = y_src_line2 + row_bytes;
|
|
unsigned char* y_src_line4 = y_src_line3 + row_bytes;
|
|
unsigned char* y_src_line5 = y_src_line4 + row_bytes;
|
|
unsigned char* y_src_line6 = y_src_line5 + row_bytes;
|
|
unsigned char* y_src_line7 = y_src_line6 + row_bytes;
|
|
|
|
unsigned char* uv_src_line0 = src_addr + width*height;
|
|
unsigned char* uv_src_line1 = uv_src_line0 + row_bytes;
|
|
unsigned char* uv_src_line2 = uv_src_line1 + row_bytes;
|
|
unsigned char* uv_src_line3 = uv_src_line2 + row_bytes;
|
|
|
|
unsigned char* y_dst_line0 = dst_addr + col_bytes*(row_bytes - 8);
|
|
unsigned char* uv_dst_line0 = dst_addr + width*height + col_bytes*(row_bytes - 4)/2;
|
|
|
|
unsigned char* y_dst_line0_static = y_dst_line0;
|
|
unsigned char* uv_dst_line0_static = uv_dst_line0;
|
|
|
|
for (i = 0; i < height; i += 8) {
|
|
for (j = 0; j < width; j += 8) {
|
|
asm volatile
|
|
(
|
|
"mov r9, %9 \n\t"
|
|
"pld [%0,#64] \n\t"
|
|
"pld [%1,#64] \n\t"
|
|
"pld [%2,#64] \n\t"
|
|
"pld [%3,#64] \n\t"
|
|
"pld [%4,#64] \n\t"
|
|
"pld [%5,#64] \n\t"
|
|
"pld [%6,#64] \n\t"
|
|
"pld [%7,#64] \n\t"
|
|
|
|
"vld1.u8 {d0}, [%0]! \n\t"
|
|
"vld1.u8 {d1}, [%1]! \n\t"
|
|
"vld1.u8 {d2}, [%2]! \n\t"
|
|
"vld1.u8 {d3}, [%3]! \n\t"
|
|
"vld1.u8 {d4}, [%4]! \n\t"
|
|
"vld1.u8 {d5}, [%5]! \n\t"
|
|
"vld1.u8 {d6}, [%6]! \n\t"
|
|
"vld1.u8 {d7}, [%7]! \n\t"
|
|
|
|
"vtrn.8 d0, d1 \n\t"
|
|
"vtrn.8 d2, d3 \n\t"
|
|
"vtrn.8 d4, d5 \n\t"
|
|
"vtrn.8 d6, d7 \n\t"
|
|
|
|
"vtrn.16 d1, d3 \n\t"
|
|
"vtrn.16 d0, d2 \n\t"
|
|
"vtrn.16 d5, d7 \n\t"
|
|
"vtrn.16 d4, d6 \n\t"
|
|
|
|
"vtrn.32 d3, d7 \n\t"
|
|
"vtrn.32 d1, d5 \n\t"
|
|
"vtrn.32 d2, d6 \n\t"
|
|
"vtrn.32 d0, d4 \n\t"
|
|
|
|
"vst1.u8 {d7}, [%8], r9 \n\t"
|
|
"vst1.u8 {d6}, [%8], r9 \n\t"
|
|
"vst1.u8 {d5}, [%8], r9 \n\t"
|
|
"vst1.u8 {d4}, [%8], r9 \n\t"
|
|
"vst1.u8 {d3}, [%8], r9 \n\t"
|
|
"vst1.u8 {d2}, [%8], r9 \n\t"
|
|
"vst1.u8 {d1}, [%8], r9 \n\t"
|
|
"vst1.u8 {d0}, [%8], r9 \n\t"
|
|
|
|
: "+r" (y_src_line0),
|
|
"+r" (y_src_line1),
|
|
"+r" (y_src_line2),
|
|
"+r" (y_src_line3),
|
|
"+r" (y_src_line4),
|
|
"+r" (y_src_line5),
|
|
"+r" (y_src_line6),
|
|
"+r" (y_src_line7),
|
|
"+r" (y_dst_line0),
|
|
"+r" (col_bytes)
|
|
:
|
|
: "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "r9"
|
|
);
|
|
|
|
asm volatile
|
|
(
|
|
"mov r8, %5 \n\t"
|
|
|
|
"pld [%0,#64] \n\t"
|
|
"pld [%1,#64] \n\t"
|
|
"pld [%2,#64] \n\t"
|
|
"pld [%3,#64] \n\t"
|
|
|
|
"vld1.u8 {d8}, [%0]! \n\t"
|
|
"vld1.u8 {d9}, [%1]! \n\t"
|
|
"vld1.u8 {d10}, [%2]! \n\t"
|
|
"vld1.u8 {d11}, [%3]! \n\t"
|
|
|
|
"vtrn.16 d8, d9 \n\t"
|
|
"vtrn.16 d10, d11 \n\t"
|
|
|
|
"vtrn.32 d9, d11 \n\t"
|
|
"vtrn.32 d8, d10 \n\t"
|
|
|
|
"vst1.u8 {d11}, [%4], r8 \n\t"
|
|
"vst1.u8 {d10}, [%4], r8 \n\t"
|
|
"vst1.u8 {d9}, [%4], r8 \n\t"
|
|
"vst1.u8 {d8}, [%4], r8 \n\t"
|
|
|
|
: "+r" (uv_src_line0),
|
|
"+r" (uv_src_line1),
|
|
"+r" (uv_src_line2),
|
|
"+r" (uv_src_line3),
|
|
"+r" (uv_dst_line0),
|
|
"+r" (col_bytes)
|
|
:
|
|
: "cc", "memory", "d8", "d9", "d10", "d11", "r8"
|
|
);
|
|
|
|
y_dst_line0 -= 16*col_bytes;
|
|
uv_dst_line0 -= 8*col_bytes;
|
|
}
|
|
|
|
y_src_line0 = y_src_line7;
|
|
y_src_line1 += row_bytes_7;
|
|
y_src_line2 += row_bytes_7;
|
|
y_src_line3 += row_bytes_7;
|
|
y_src_line4 += row_bytes_7;
|
|
y_src_line5 += row_bytes_7;
|
|
y_src_line6 += row_bytes_7;
|
|
y_src_line7 += row_bytes_7;
|
|
|
|
uv_src_line0 = uv_src_line3;
|
|
uv_src_line1 += row_bytes_3;
|
|
uv_src_line2 += row_bytes_3;
|
|
uv_src_line3 += row_bytes_3;
|
|
|
|
y_dst_line0 = y_dst_line0_static + i + 8;
|
|
|
|
uv_dst_line0 = uv_dst_line0_static + i + 8;
|
|
}
|
|
}
|
|
|
|
#else
|
|
void nv_rotage90(unsigned int width, unsigned int height,
|
|
unsigned char* src_addr, unsigned char* dst_addr)
|
|
{
|
|
#define NV21 0
|
|
#define YU12 1
|
|
|
|
unsigned int i = 0;
|
|
unsigned int j = 0;
|
|
|
|
unsigned int k = 0;
|
|
|
|
unsigned int format = NV21;
|
|
unsigned int row_bytes = width;
|
|
unsigned int col_bytes = height;
|
|
unsigned char* y_src_line0 = src_addr;
|
|
unsigned char* uv_src_line0 = src_addr + width*height;
|
|
|
|
|
|
unsigned char* y_dst_line0 = dst_addr + col_bytes - 1;
|
|
unsigned char* y_dst_line1 = y_dst_line0 + col_bytes;
|
|
unsigned char* y_dst_line2 = y_dst_line1 + col_bytes;
|
|
unsigned char* y_dst_line3 = y_dst_line2 + col_bytes;
|
|
unsigned char* y_dst_line4 = y_dst_line3 + col_bytes;
|
|
unsigned char* y_dst_line5 = y_dst_line4 + col_bytes;
|
|
unsigned char* y_dst_line6 = y_dst_line5 + col_bytes;
|
|
unsigned char* y_dst_line7 = y_dst_line6 + col_bytes;
|
|
|
|
unsigned char* uv_dst_line0 = NULL;
|
|
unsigned char* uv_dst_line0_v = NULL;
|
|
unsigned char* uv_dst_line0_v_static = NULL;
|
|
|
|
if (format == YU12) {
|
|
uv_dst_line0 = dst_addr + width*height + col_bytes/2 - 1;
|
|
uv_dst_line0_v = dst_addr + width*height*5/4 + col_bytes/2 - 1;
|
|
|
|
uv_dst_line0_v_static = uv_dst_line0_v;
|
|
} else if (format == NV21) {
|
|
uv_dst_line0 = dst_addr + width*height + col_bytes - 2;
|
|
}
|
|
|
|
unsigned char* y_dst_line0_static = y_dst_line0;
|
|
unsigned char* uv_dst_line0_static = uv_dst_line0;
|
|
|
|
for (i = 0; i < height; i++) {
|
|
for (j = 0; j < width; j++) {
|
|
*(y_dst_line0) = *(y_src_line0++);
|
|
y_dst_line0 += height;
|
|
}
|
|
y_dst_line0 = y_dst_line0_static - i - 1;
|
|
}
|
|
|
|
if (format == NV21) {
|
|
for (i = 0; i < height/2; i++) {
|
|
for (j = 0; j < width/2; j++) {
|
|
*(uv_dst_line0) = *(uv_src_line0++);
|
|
*(uv_dst_line0+1) = *(uv_src_line0++);
|
|
uv_dst_line0 += height;
|
|
}
|
|
uv_dst_line0 = uv_dst_line0_static - 2*(i+1);
|
|
}
|
|
} else if (format == YU12) {
|
|
for (i = 0; i < height/2; i++) {
|
|
for (j = 0; j < width/2; j++) {
|
|
*(uv_dst_line0) = *(uv_src_line0++);
|
|
uv_dst_line0 += height/2;
|
|
}
|
|
uv_dst_line0 = uv_dst_line0_static - (i+1);
|
|
}
|
|
|
|
for (i = 0; i < height/2; i++) {
|
|
for (j = 0; j < width/2; j++) {
|
|
*(uv_dst_line0_v) = *(uv_src_line0++);
|
|
uv_dst_line0_v += height/2;
|
|
}
|
|
uv_dst_line0_v = uv_dst_line0_v_static - (i+1);
|
|
}
|
|
}
|
|
}
|
|
|
|
void nv_rotage270(unsigned int width, unsigned int height,
|
|
unsigned char* src_addr, unsigned char* dst_addr)
|
|
{
|
|
#define NV21 1
|
|
#define YU12 0
|
|
|
|
unsigned int i = 0;
|
|
unsigned int j = 0;
|
|
unsigned int k = 0;
|
|
|
|
unsigned int format = NV21;
|
|
unsigned int row_bytes = width;
|
|
unsigned int col_bytes = height;
|
|
|
|
unsigned char* y_src_line0 = src_addr;
|
|
unsigned char* uv_src_line0 = src_addr + width*height;
|
|
|
|
unsigned char* y_dst_line0 = dst_addr + col_bytes*(row_bytes - 1);
|
|
unsigned char* y_dst_line1 = y_dst_line0 + col_bytes;
|
|
unsigned char* y_dst_line2 = y_dst_line1 + col_bytes;
|
|
unsigned char* y_dst_line3 = y_dst_line2 + col_bytes;
|
|
unsigned char* y_dst_line4 = y_dst_line3 + col_bytes;
|
|
unsigned char* y_dst_line5 = y_dst_line4 + col_bytes;
|
|
unsigned char* y_dst_line6 = y_dst_line5 + col_bytes;
|
|
unsigned char* y_dst_line7 = y_dst_line6 + col_bytes;
|
|
|
|
unsigned char* uv_dst_line0 = NULL;
|
|
unsigned char* uv_dst_line0_v = NULL;
|
|
unsigned char* uv_dst_line0_v_static = NULL;
|
|
|
|
if (format == YU12) {
|
|
uv_dst_line0 = dst_addr + width*height;
|
|
uv_dst_line0_v = dst_addr + width*height*5/4;
|
|
|
|
uv_dst_line0_v_static = uv_dst_line0_v;
|
|
} else if (format == NV21) {
|
|
uv_dst_line0 = dst_addr + width*height + col_bytes*(row_bytes - 1)/2;
|
|
}
|
|
|
|
unsigned char* y_dst_line0_static = y_dst_line0;
|
|
unsigned char* uv_dst_line0_static = uv_dst_line0;
|
|
for (i = 0; i < height; i++) {
|
|
for (j = 0; j < width; j++) {
|
|
*(y_dst_line0) = *(y_src_line0++);
|
|
y_dst_line0 -= height;
|
|
}
|
|
y_dst_line0 = y_dst_line0_static+ i + 1;
|
|
}
|
|
|
|
if (format == NV21) {
|
|
for (i = 0; i < height/2; i++) {
|
|
for (j = 0; j < width/2; j++) {
|
|
*(uv_dst_line0) = *(uv_src_line0++);
|
|
*(uv_dst_line0+1) = *(uv_src_line0++);
|
|
uv_dst_line0 -= height;
|
|
}
|
|
uv_dst_line0 = uv_dst_line0_static + 2*(i+1);
|
|
}
|
|
} else if (format == YU12) {
|
|
for (i = 0; i < height/2; i++) {
|
|
for (j = 0; j < width/2; j++) {
|
|
*(uv_dst_line0) = *(uv_src_line0++);
|
|
uv_dst_line0 += height/2;
|
|
}
|
|
uv_dst_line0 = uv_dst_line0_static + (i+1);
|
|
}
|
|
|
|
for (i = 0; i < height/2; i++) {
|
|
for (j = 0; j < width/2; j++) {
|
|
*(uv_dst_line0_v) = *(uv_src_line0++);
|
|
uv_dst_line0_v += height/2;
|
|
}
|
|
uv_dst_line0_v = uv_dst_line0_v_static + (i+1);
|
|
}
|
|
}
|
|
}
|
|
#endif
|