diff options
Diffstat (limited to 'xorg/server/module')
-rw-r--r-- | xorg/server/module/Makefile | 5 | ||||
-rw-r--r-- | xorg/server/module/amd64/cpuid_amd64.asm | 41 | ||||
-rw-r--r-- | xorg/server/module/amd64/funcs_amd64.h | 39 | ||||
-rw-r--r-- | xorg/server/module/amd64/i420_to_rgb32_amd64_sse2.asm | 7 | ||||
-rw-r--r-- | xorg/server/module/amd64/uyvy_to_rgb32_amd64_sse2.asm | 7 | ||||
-rw-r--r-- | xorg/server/module/amd64/yuy2_to_rgb32_amd64_sse2.asm | 7 | ||||
-rw-r--r-- | xorg/server/module/amd64/yv12_to_rgb32_amd64_sse2.asm | 235 | ||||
-rw-r--r-- | xorg/server/module/rdpXv.c | 41 | ||||
-rw-r--r-- | xorg/server/module/x86/yv12_to_rgb32_x86_sse2.asm | 12 |
9 files changed, 356 insertions, 38 deletions
diff --git a/xorg/server/module/Makefile b/xorg/server/module/Makefile index 6de97c05..8f4f442b 100644 --- a/xorg/server/module/Makefile +++ b/xorg/server/module/Makefile @@ -9,7 +9,7 @@ rdpComposite.o rdpGlyphs.o rdpPixmap.o rdpInput.o rdpClientCon.o rdpCapture.o \ rdpTrapezoids.o rdpXv.o ;OBJS += cpuid_x86.o i420_to_rgb32_x86_sse2.o yv12_to_rgb32_x86_sse2.o yuy2_to_rgb32_x86_sse2.o uyvy_to_rgb32_x86_sse2.o -;OBJS += i420_to_rgb32_amd64_sse2.o yv12_to_rgb32_amd64_sse2.o yuy2_to_rgb32_amd64_sse2.o uyvy_to_rgb32_amd64_sse2.o +;OBJS += cpuid_amd64.o i420_to_rgb32_amd64_sse2.o yv12_to_rgb32_amd64_sse2.o yuy2_to_rgb32_amd64_sse2.o uyvy_to_rgb32_amd64_sse2.o CFLAGS = -g -O2 -Wall -fPIC -I/usr/include/xorg -I/usr/include/pixman-1 \ -I../../../common @@ -41,6 +41,9 @@ yuy2_to_rgb32_x86_sse2.o: x86/yuy2_to_rgb32_x86_sse2.asm uyvy_to_rgb32_x86_sse2.o: x86/uyvy_to_rgb32_x86_sse2.asm yasm -f elf32 -g dwarf2 x86/uyvy_to_rgb32_x86_sse2.asm +cpuid_amd64.o: amd64/cpuid_amd64.asm + yasm -f elf64 -g dwarf2 amd64/cpuid_amd64.asm + i420_to_rgb32_amd64_sse2.o: amd64/i420_to_rgb32_amd64_sse2.asm yasm -f elf64 -g dwarf2 amd64/i420_to_rgb32_amd64_sse2.asm diff --git a/xorg/server/module/amd64/cpuid_amd64.asm b/xorg/server/module/amd64/cpuid_amd64.asm new file mode 100644 index 00000000..b97937ad --- /dev/null +++ b/xorg/server/module/amd64/cpuid_amd64.asm @@ -0,0 +1,41 @@ + +SECTION .text + +%macro PROC 1 + align 16 + global %1 + %1: +%endmacro + +;The first six integer or pointer arguments are passed in registers +;RDI, RSI, RDX, RCX, R8, and R9 + +;int +;cpuid_amd64(int eax_in, int ecx_in, int *eax, int *ebx, int *ecx, int *edx) + +PROC cpuid_amd64 + ; save registers + push rbx + + push rdx + push rcx + push r8 + push r9 + + mov rax, rdi + mov rcx, rsi + cpuid + pop rdi + mov [rdi], edx + pop rdi + mov [rdi], ecx + pop rdi + mov [rdi], ebx + pop rdi + mov [rdi], eax + mov eax, 0 + ; restore registers + pop rbx + ret; + align 16 + diff --git a/xorg/server/module/amd64/funcs_amd64.h b/xorg/server/module/amd64/funcs_amd64.h new file mode 100644 index 00000000..39f7e5a4 --- /dev/null +++ b/xorg/server/module/amd64/funcs_amd64.h @@ -0,0 +1,39 @@ +/* +Copyright 2014 Jay Sorg + +Permission to use, copy, modify, distribute, and sell this software and its +documentation for any purpose is hereby granted without fee, provided that +the above copyright notice appear in all copies and that both that +copyright notice and this permission notice appear in supporting +documentation. + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +amd64 asm files + +*/ + +#ifndef __FUNCS_AMD64_H +#define __FUNCS_AMD64_H + +int +cpuid_amd64(int eax_in, int ecx_in, int *eax, int *ebx, int *ecx, int *edx); +int +yv12_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs); +int +i420_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs); +int +yuy2_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs); +int +uyvy_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs); + +#endif + diff --git a/xorg/server/module/amd64/i420_to_rgb32_amd64_sse2.asm b/xorg/server/module/amd64/i420_to_rgb32_amd64_sse2.asm index 75377edd..74ba422b 100644 --- a/xorg/server/module/amd64/i420_to_rgb32_amd64_sse2.asm +++ b/xorg/server/module/amd64/i420_to_rgb32_amd64_sse2.asm @@ -9,10 +9,9 @@ ;i420_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs) PROC i420_to_rgb32_amd64_sse2 - push ebx - - mov eax, 0 - pop ebx + push rbx + mov rax, 0 + pop rbx ret align 16 diff --git a/xorg/server/module/amd64/uyvy_to_rgb32_amd64_sse2.asm b/xorg/server/module/amd64/uyvy_to_rgb32_amd64_sse2.asm index cbe85bec..8866fd0f 100644 --- a/xorg/server/module/amd64/uyvy_to_rgb32_amd64_sse2.asm +++ b/xorg/server/module/amd64/uyvy_to_rgb32_amd64_sse2.asm @@ -9,10 +9,9 @@ ;uyvy_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs) PROC uyvy_to_rgb32_amd64_sse2 - push ebx - - mov eax, 0 - pop ebx + push rbx + mov rax, 0 + pop rbx ret align 16 diff --git a/xorg/server/module/amd64/yuy2_to_rgb32_amd64_sse2.asm b/xorg/server/module/amd64/yuy2_to_rgb32_amd64_sse2.asm index 693c364c..c0ac5c1b 100644 --- a/xorg/server/module/amd64/yuy2_to_rgb32_amd64_sse2.asm +++ b/xorg/server/module/amd64/yuy2_to_rgb32_amd64_sse2.asm @@ -9,10 +9,9 @@ ;yuy2_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs) PROC yuy2_to_rgb32_amd64_sse2 - push ebx - - mov eax, 0 - pop ebx + push rbx + mov rax, 0 + pop rbx ret align 16 diff --git a/xorg/server/module/amd64/yv12_to_rgb32_amd64_sse2.asm b/xorg/server/module/amd64/yv12_to_rgb32_amd64_sse2.asm index 7802795f..192d0e6a 100644 --- a/xorg/server/module/amd64/yv12_to_rgb32_amd64_sse2.asm +++ b/xorg/server/module/amd64/yv12_to_rgb32_amd64_sse2.asm @@ -1,3 +1,47 @@ +; +;Copyright 2014 Jay Sorg +; +;Permission to use, copy, modify, distribute, and sell this software and its +;documentation for any purpose is hereby granted without fee, provided that +;the above copyright notice appear in all copies and that both that +;copyright notice and this permission notice appear in supporting +;documentation. +; +;The above copyright notice and this permission notice shall be included in +;all copies or substantial portions of the Software. +; +;THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +;IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +;FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +;OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +;AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +;CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +; +;YV12 to RGB32 +;amd64 SSE2 32 bit +; +; RGB to YUV +; 0.299 0.587 0.114 +; -0.14713 -0.28886 0.436 +; 0.615 -0.51499 -0.10001 +; YUV to RGB +; 1 0 1.13983 +; 1 -0.39465 -0.58060 +; 1 2.03211 0 +; shift left 12 +; 4096 0 4669 +; 4096 -1616 -2378 +; 4096 9324 0 + +SECTION .data +align 16 +c128 times 8 dw 128 +c4669 times 8 dw 4669 +c1616 times 8 dw 1616 +c2378 times 8 dw 2378 +c9324 times 8 dw 9324 + +SECTION .text %macro PROC 1 align 16 @@ -5,14 +49,199 @@ %1: %endmacro +do8_uv: + + ; u + movd xmm1, [rbx] ; 4 at a time + lea rbx, [rbx + 4] + punpcklbw xmm1, xmm1 + pxor xmm6, xmm6 + punpcklbw xmm1, xmm6 + movdqa xmm7, [rel c128] + psubw xmm1, xmm7 + psllw xmm1, 4 + + ; v + movd xmm2, [rdx] ; 4 at a time + lea rdx, [rdx + 4] + punpcklbw xmm2, xmm2 + punpcklbw xmm2, xmm6 + psubw xmm2, xmm7 + psllw xmm2, 4 + +do8: + + ; y + movq xmm0, [rsi] ; 8 at a time + lea rsi, [rsi + 8] + pxor xmm6, xmm6 + punpcklbw xmm0, xmm6 + + ; r = y + hiword(4669 * (v << 4)) + movdqa xmm4, [rel c4669] + pmulhw xmm4, xmm2 + movdqa xmm3, xmm0 + paddw xmm3, xmm4 + + ; g = y - hiword(1616 * (u << 4)) - hiword(2378 * (v << 4)) + movdqa xmm5, [rel c1616] + pmulhw xmm5, xmm1 + movdqa xmm6, [rel c2378] + pmulhw xmm6, xmm2 + movdqa xmm4, xmm0 + psubw xmm4, xmm5 + psubw xmm4, xmm6 + + ; b = y + hiword(9324 * (u << 4)) + movdqa xmm6, [rel c9324] + pmulhw xmm6, xmm1 + movdqa xmm5, xmm0 + paddw xmm5, xmm6 + + packuswb xmm3, xmm3 ; b + packuswb xmm4, xmm4 ; g + punpcklbw xmm3, xmm4 ; gb + + pxor xmm4, xmm4 ; a + packuswb xmm5, xmm5 ; r + punpcklbw xmm5, xmm4 ; ar + + movdqa xmm4, xmm3 + punpcklwd xmm3, xmm5 ; argb + movdqa [rdi], xmm3 + lea rdi, [rdi + 16] + punpckhwd xmm4, xmm5 ; argb + movdqa [rdi], xmm4 + lea rdi, [rdi + 16] + + ret; + +;The first six integer or pointer arguments are passed in registers +; RDI, RSI, RDX, RCX, R8, and R9 + ;int ;yv12_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs) PROC yv12_to_rgb32_amd64_sse2 - push ebx + push rbx + push rsi + push rdi + push rbp + + push rdi + mov rdi, rcx ; rgbs + + mov rcx, rsi ; width + mov rdx, rcx + mov rbp, rdx ; height + mov rax, rbp + shr rbp, 1 + imul rax, rcx ; rax = width * height + + pop rsi ; y + + mov rbx, rsi ; u = y + width * height + add rbx, rax + + ; local vars + ; char* yptr1 + ; char* yptr2 + ; char* uptr + ; char* vptr + ; int* rgbs1 + ; int* rgbs2 + ; int width + sub rsp, 56 ; local vars, 56 bytes + mov [rsp + 0], rsi ; save y1 + add rsi, rdx + mov [rsp + 8], rsi ; save y2 + mov [rsp + 16], rbx ; save u + shr rax, 2 + add rbx, rax ; v = u + (width * height / 4) + mov [rsp + 24], rbx ; save v + + mov [rsp + 32], rdi ; save rgbs1 + mov rax, rdx + shl rax, 2 + add rdi, rax + mov [rsp + 40], rdi ; save rgbs2 - mov eax, 0 - pop ebx +loop_y: + + mov rcx, rdx ; width + shr rcx, 3 + + ; save rdx + mov [rsp + 48], rdx + + prefetchnta 4096[rsp + 0] ; y + prefetchnta 1024[rsp + 16] ; u + prefetchnta 1024[rsp + 24] ; v + +loop_x: + + mov rsi, [rsp + 0] ; y1 + mov rbx, [rsp + 16] ; u + mov rdx, [rsp + 24] ; v + mov rdi, [rsp + 32] ; rgbs1 + + ; y1 + call do8_uv + + mov [rsp + 0], rsi ; y1 + mov [rsp + 32], rdi ; rgbs1 + + mov rsi, [rsp + 8] ; y2 + mov rdi, [rsp + 40] ; rgbs2 + + ; y2 + call do8 + + mov [rsp + 8], rsi ; y2 + mov [rsp + 16], rbx ; u + mov [rsp + 24], rdx ; v + mov [rsp + 40], rdi ; rgbs2 + + dec rcx ; width + jnz loop_x + + ; restore rdx + mov rdx, [rsp + 48] + + ; update y1 and 2 + mov rax, [rsp + 0] + mov rbx, rdx + add rax, rbx + mov [rsp + 0], rax + + mov rax, [rsp + 8] + add rax, rbx + mov [rsp + 8], rax + + ; update rgb1 and 2 + mov rax, [rsp + 32] + mov rbx, rdx + shl rbx, 2 + add rax, rbx + mov [rsp + 32], rax + + mov rax, [rsp + 40] + add rax, rbx + mov [rsp + 40], rax + + mov rcx, rbp + dec rcx ; height + mov rbp, rcx + jnz loop_y + + add rsp, 56 + + mov rax, 0 + pop rbp + pop rdi + pop rsi + pop rbx ret align 16 + diff --git a/xorg/server/module/rdpXv.c b/xorg/server/module/rdpXv.c index 0a9bc867..61088582 100644 --- a/xorg/server/module/rdpXv.c +++ b/xorg/server/module/rdpXv.c @@ -417,6 +417,7 @@ stretch_RGB32_RGB32(int *src, int src_width, int src_height, iv += ov; } + LLOGLN(10, ("stretch_RGB32_RGB32: out")); return 0; } @@ -642,14 +643,7 @@ xrdpVidQueryImageAttributes(ScrnInfoPtr pScrn, int id, #if XV_USE_ACCEL #if defined(__x86_64__) || defined(__AMD64__) || defined (_M_AMD64) -int -yv12_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs); -int -i420_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs); -int -yuy2_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs); -int -uyvy_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs); +#include "amd64/funcs_amd64.h" #elif defined(__x86__) || defined(_M_IX86) || defined(__i386__) #include "x86/funcs_x86.h" #endif @@ -713,23 +707,38 @@ rdpXvInit(ScreenPtr pScreen, ScrnInfoPtr pScrn) if (g_xv_use_accel) { #if defined(__x86_64__) || defined(__AMD64__) || defined (_M_AMD64) - dev->yv12_to_rgb32 = yv12_to_rgb32_amd64_sse2; - dev->i420_to_rgb32 = i420_to_rgb32_amd64_sse2; - dev->yuy2_to_rgb32 = yuy2_to_rgb32_amd64_sse2; - dev->uyvy_to_rgb32 = uyvy_to_rgb32_amd64_sse2; - LLOGLN(0, ("rdpXvInit: sse amd64 yuv functions assigned")); + int ax, bx, cx, dx; + cpuid_amd64(1, 0, &ax, &bx, &cx, &dx); + LLOGLN(0, ("rdpXvInit: cpuid ax 1 cx 0 return ax 0x%8.8x bx " + "0x%8.8x cx 0x%8.8x dx 0x%8.8x", ax, bx, cx, dx)); + if (dx & (1 << 26)) /* SSE 2 */ + { + dev->yv12_to_rgb32 = yv12_to_rgb32_amd64_sse2; + dev->i420_to_rgb32 = i420_to_rgb32_amd64_sse2; + dev->yuy2_to_rgb32 = yuy2_to_rgb32_amd64_sse2; + dev->uyvy_to_rgb32 = uyvy_to_rgb32_amd64_sse2; + LLOGLN(0, ("rdpXvInit: sse2 amd64 yuv functions assigned")); + } + else + { + dev->yv12_to_rgb32 = YV12_to_RGB32; + dev->i420_to_rgb32 = I420_to_RGB32; + dev->yuy2_to_rgb32 = YUY2_to_RGB32; + dev->uyvy_to_rgb32 = UYVY_to_RGB32; + LLOGLN(0, ("rdpXvInit: warning, c yuv functions assigned")); + } #elif defined(__x86__) || defined(_M_IX86) || defined(__i386__) int ax, bx, cx, dx; cpuid_x86(1, 0, &ax, &bx, &cx, &dx); - LLOGLN(0, ("rdpXvInit: cpuid eax 1 ecx 0 return eax 0x%8.8x ebx " - "0x%8.8x ecx 0x%8.8x edx 0x%8.8x", ax, bx, cx, dx)); + LLOGLN(0, ("rdpXvInit: cpuid ax 1 cx 0 return ax 0x%8.8x bx " + "0x%8.8x cx 0x%8.8x dx 0x%8.8x", ax, bx, cx, dx)); if (dx & (1 << 26)) /* SSE 2 */ { dev->yv12_to_rgb32 = yv12_to_rgb32_x86_sse2; dev->i420_to_rgb32 = i420_to_rgb32_x86_sse2; dev->yuy2_to_rgb32 = yuy2_to_rgb32_x86_sse2; dev->uyvy_to_rgb32 = uyvy_to_rgb32_x86_sse2; - LLOGLN(0, ("rdpXvInit: sse x86 yuv functions assigned")); + LLOGLN(0, ("rdpXvInit: sse2 x86 yuv functions assigned")); } else { diff --git a/xorg/server/module/x86/yv12_to_rgb32_x86_sse2.asm b/xorg/server/module/x86/yv12_to_rgb32_x86_sse2.asm index 9087b291..3bd0c59a 100644 --- a/xorg/server/module/x86/yv12_to_rgb32_x86_sse2.asm +++ b/xorg/server/module/x86/yv12_to_rgb32_x86_sse2.asm @@ -140,12 +140,12 @@ PROC yv12_to_rgb32_x86_sse2 add ebx, eax ; local vars - ; char* yptr1; - ; char* yptr2; - ; char* uptr; - ; char* vptr; - ; int* rgbs1; - ; int* rgbs2; + ; char* yptr1 + ; char* yptr2 + ; char* uptr + ; char* vptr + ; int* rgbs1 + ; int* rgbs2 ; int width sub esp, 28 ; local vars, 28 bytes mov [esp + 0], esi ; save y1 |