diff options
Diffstat (limited to 'xorg/server/module')
24 files changed, 3007 insertions, 122 deletions
diff --git a/xorg/server/module/Makefile b/xorg/server/module/Makefile index 9003de4d..8f1560d6 100644 --- a/xorg/server/module/Makefile +++ b/xorg/server/module/Makefile @@ -6,11 +6,16 @@ rdpPolyFillArc.o rdpPolyText8.o rdpPolyText16.o rdpImageText8.o \ rdpImageText16.o rdpImageGlyphBlt.o rdpPolyGlyphBlt.o rdpPushPixels.o \ rdpCursor.o rdpMain.o rdpRandR.o rdpMisc.o rdpReg.o \ rdpComposite.o rdpGlyphs.o rdpPixmap.o rdpInput.o rdpClientCon.o rdpCapture.o \ -rdpTrapezoids.o +rdpTrapezoids.o rdpXv.o rdpSimd.o + +;OBJS += cpuid_x86.o i420_to_rgb32_x86_sse2.o yv12_to_rgb32_x86_sse2.o yuy2_to_rgb32_x86_sse2.o uyvy_to_rgb32_x86_sse2.o +;OBJS += cpuid_amd64.o i420_to_rgb32_amd64_sse2.o yv12_to_rgb32_amd64_sse2.o yuy2_to_rgb32_amd64_sse2.o uyvy_to_rgb32_amd64_sse2.o CFLAGS = -g -O2 -Wall -fPIC -I/usr/include/xorg -I/usr/include/pixman-1 \ -I../../../common +;CFLAGS += -DSIMD_USE_ACCEL=1 + LDFLAGS = LIBS = @@ -22,3 +27,34 @@ libxorgxrdp.so: $(OBJS) Makefile clean: rm -f $(OBJS) libxorgxrdp.so + +cpuid_x86.o: x86/cpuid_x86.asm + yasm -f elf32 -g dwarf2 x86/cpuid_x86.asm + +i420_to_rgb32_x86_sse2.o: x86/i420_to_rgb32_x86_sse2.asm + yasm -f elf32 -g dwarf2 x86/i420_to_rgb32_x86_sse2.asm + +yv12_to_rgb32_x86_sse2.o: x86/yv12_to_rgb32_x86_sse2.asm + yasm -f elf32 -g dwarf2 x86/yv12_to_rgb32_x86_sse2.asm + +yuy2_to_rgb32_x86_sse2.o: x86/yuy2_to_rgb32_x86_sse2.asm + yasm -f elf32 -g dwarf2 x86/yuy2_to_rgb32_x86_sse2.asm + +uyvy_to_rgb32_x86_sse2.o: x86/uyvy_to_rgb32_x86_sse2.asm + yasm -f elf32 -g dwarf2 x86/uyvy_to_rgb32_x86_sse2.asm + +cpuid_amd64.o: amd64/cpuid_amd64.asm + yasm -f elf64 -g dwarf2 amd64/cpuid_amd64.asm + +i420_to_rgb32_amd64_sse2.o: amd64/i420_to_rgb32_amd64_sse2.asm + yasm -f elf64 -g dwarf2 amd64/i420_to_rgb32_amd64_sse2.asm + +yv12_to_rgb32_amd64_sse2.o: amd64/yv12_to_rgb32_amd64_sse2.asm + yasm -f elf64 -g dwarf2 amd64/yv12_to_rgb32_amd64_sse2.asm + +yuy2_to_rgb32_amd64_sse2.o: amd64/yuy2_to_rgb32_amd64_sse2.asm + yasm -f elf64 -g dwarf2 amd64/yuy2_to_rgb32_amd64_sse2.asm + +uyvy_to_rgb32_amd64_sse2.o: amd64/uyvy_to_rgb32_amd64_sse2.asm + yasm -f elf64 -g dwarf2 amd64/uyvy_to_rgb32_amd64_sse2.asm + diff --git a/xorg/server/module/amd64/cpuid_amd64.asm b/xorg/server/module/amd64/cpuid_amd64.asm new file mode 100644 index 00000000..b97937ad --- /dev/null +++ b/xorg/server/module/amd64/cpuid_amd64.asm @@ -0,0 +1,41 @@ + +SECTION .text + +%macro PROC 1 + align 16 + global %1 + %1: +%endmacro + +;The first six integer or pointer arguments are passed in registers +;RDI, RSI, RDX, RCX, R8, and R9 + +;int +;cpuid_amd64(int eax_in, int ecx_in, int *eax, int *ebx, int *ecx, int *edx) + +PROC cpuid_amd64 + ; save registers + push rbx + + push rdx + push rcx + push r8 + push r9 + + mov rax, rdi + mov rcx, rsi + cpuid + pop rdi + mov [rdi], edx + pop rdi + mov [rdi], ecx + pop rdi + mov [rdi], ebx + pop rdi + mov [rdi], eax + mov eax, 0 + ; restore registers + pop rbx + ret; + align 16 + diff --git a/xorg/server/module/amd64/funcs_amd64.h b/xorg/server/module/amd64/funcs_amd64.h new file mode 100644 index 00000000..10cffe0d --- /dev/null +++ b/xorg/server/module/amd64/funcs_amd64.h @@ -0,0 +1,39 @@ +/* +Copyright 2014 Jay Sorg + +Permission to use, copy, modify, distribute, and sell this software and its +documentation for any purpose is hereby granted without fee, provided that +the above copyright notice appear in all copies and that both that +copyright notice and this permission notice appear in supporting +documentation. + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +amd64 asm functions + +*/ + +#ifndef __FUNCS_AMD64_H +#define __FUNCS_AMD64_H + +int +cpuid_amd64(int eax_in, int ecx_in, int *eax, int *ebx, int *ecx, int *edx); +int +yv12_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs); +int +i420_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs); +int +yuy2_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs); +int +uyvy_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs); + +#endif + diff --git a/xorg/server/module/amd64/i420_to_rgb32_amd64_sse2.asm b/xorg/server/module/amd64/i420_to_rgb32_amd64_sse2.asm new file mode 100644 index 00000000..d9760caa --- /dev/null +++ b/xorg/server/module/amd64/i420_to_rgb32_amd64_sse2.asm @@ -0,0 +1,248 @@ +; +;Copyright 2014 Jay Sorg +; +;Permission to use, copy, modify, distribute, and sell this software and its +;documentation for any purpose is hereby granted without fee, provided that +;the above copyright notice appear in all copies and that both that +;copyright notice and this permission notice appear in supporting +;documentation. +; +;The above copyright notice and this permission notice shall be included in +;all copies or substantial portions of the Software. +; +;THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +;IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +;FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +;OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +;AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +;CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +; +;I420 to RGB32 +;amd64 SSE2 32 bit +; +; RGB to YUV +; 0.299 0.587 0.114 +; -0.14713 -0.28886 0.436 +; 0.615 -0.51499 -0.10001 +; YUV to RGB +; 1 0 1.13983 +; 1 -0.39465 -0.58060 +; 1 2.03211 0 +; shift left 12 +; 4096 0 4669 +; 4096 -1616 -2378 +; 4096 9324 0 + +SECTION .data +align 16 +c128 times 8 dw 128 +c4669 times 8 dw 4669 +c1616 times 8 dw 1616 +c2378 times 8 dw 2378 +c9324 times 8 dw 9324 + +SECTION .text + +%macro PROC 1 + align 16 + global %1 + %1: +%endmacro + +do8_uv: + + ; v + movd xmm1, [rbx] ; 4 at a time + lea rbx, [rbx + 4] + punpcklbw xmm1, xmm1 + pxor xmm6, xmm6 + punpcklbw xmm1, xmm6 + movdqa xmm7, [rel c128] + psubw xmm1, xmm7 + psllw xmm1, 4 + + ; v + movd xmm2, [rdx] ; 4 at a time + lea rdx, [rdx + 4] + punpcklbw xmm2, xmm2 + punpcklbw xmm2, xmm6 + psubw xmm2, xmm7 + psllw xmm2, 4 + +do8: + + ; y + movq xmm0, [rsi] ; 8 at a time + lea rsi, [rsi + 8] + pxor xmm6, xmm6 + punpcklbw xmm0, xmm6 + + ; r = y + hiword(4669 * (v << 4)) + movdqa xmm4, [rel c4669] + pmulhw xmm4, xmm1 + movdqa xmm3, xmm0 + paddw xmm3, xmm4 + + ; g = y - hiword(1616 * (u << 4)) - hiword(2378 * (v << 4)) + movdqa xmm5, [rel c1616] + pmulhw xmm5, xmm2 + movdqa xmm6, [rel c2378] + pmulhw xmm6, xmm1 + movdqa xmm4, xmm0 + psubw xmm4, xmm5 + psubw xmm4, xmm6 + + ; b = y + hiword(9324 * (u << 4)) + movdqa xmm6, [rel c9324] + pmulhw xmm6, xmm2 + movdqa xmm5, xmm0 + paddw xmm5, xmm6 + + packuswb xmm3, xmm3 ; b + packuswb xmm4, xmm4 ; g + punpcklbw xmm3, xmm4 ; gb + + pxor xmm4, xmm4 ; a + packuswb xmm5, xmm5 ; r + punpcklbw xmm5, xmm4 ; ar + + movdqa xmm4, xmm3 + punpcklwd xmm3, xmm5 ; argb + movdqa [rdi], xmm3 + lea rdi, [rdi + 16] + punpckhwd xmm4, xmm5 ; argb + movdqa [rdi], xmm4 + lea rdi, [rdi + 16] + + ret; + +;The first six integer or pointer arguments are passed in registers +; RDI, RSI, RDX, RCX, R8, and R9 + +;int +;i420_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs) + +PROC i420_to_rgb32_amd64_sse2 + push rbx + push rsi + push rdi + push rbp + + push rdi + push rdx + mov rdi, rcx ; rgbs + + mov rcx, rsi ; width + mov rdx, rcx + pop rbp ; height + mov rax, rbp + shr rbp, 1 + imul rax, rcx ; rax = width * height + + pop rsi ; y + + mov rbx, rsi ; u = y + width * height + add rbx, rax + + ; local vars + ; char* yptr1 + ; char* yptr2 + ; char* uptr + ; char* vptr + ; int* rgbs1 + ; int* rgbs2 + ; int width + sub rsp, 56 ; local vars, 56 bytes + mov [rsp + 0], rsi ; save y1 + add rsi, rdx + mov [rsp + 8], rsi ; save y2 + mov [rsp + 16], rbx ; save u + shr rax, 2 + add rbx, rax ; v = u + (width * height / 4) + mov [rsp + 24], rbx ; save v + + mov [rsp + 32], rdi ; save rgbs1 + mov rax, rdx + shl rax, 2 + add rdi, rax + mov [rsp + 40], rdi ; save rgbs2 + +loop_y: + + mov rcx, rdx ; width + shr rcx, 3 + + ; save rdx + mov [rsp + 48], rdx + + ;prefetchnta 4096[rsp + 0] ; y + ;prefetchnta 1024[rsp + 16] ; u + ;prefetchnta 1024[rsp + 24] ; v + +loop_x: + + mov rsi, [rsp + 0] ; y1 + mov rbx, [rsp + 16] ; u + mov rdx, [rsp + 24] ; v + mov rdi, [rsp + 32] ; rgbs1 + + ; y1 + call do8_uv + + mov [rsp + 0], rsi ; y1 + mov [rsp + 32], rdi ; rgbs1 + + mov rsi, [rsp + 8] ; y2 + mov rdi, [rsp + 40] ; rgbs2 + + ; y2 + call do8 + + mov [rsp + 8], rsi ; y2 + mov [rsp + 16], rbx ; u + mov [rsp + 24], rdx ; v + mov [rsp + 40], rdi ; rgbs2 + + dec rcx ; width + jnz loop_x + + ; restore rdx + mov rdx, [rsp + 48] + + ; update y1 and 2 + mov rax, [rsp + 0] + mov rbx, rdx + add rax, rbx + mov [rsp + 0], rax + + mov rax, [rsp + 8] + add rax, rbx + mov [rsp + 8], rax + + ; update rgb1 and 2 + mov rax, [rsp + 32] + mov rbx, rdx + shl rbx, 2 + add rax, rbx + mov [rsp + 32], rax + + mov rax, [rsp + 40] + add rax, rbx + mov [rsp + 40], rax + + mov rcx, rbp + dec rcx ; height + mov rbp, rcx + jnz loop_y + + add rsp, 56 + + mov rax, 0 + pop rbp + pop rdi + pop rsi + pop rbx + ret + align 16 + + diff --git a/xorg/server/module/amd64/uyvy_to_rgb32_amd64_sse2.asm b/xorg/server/module/amd64/uyvy_to_rgb32_amd64_sse2.asm new file mode 100644 index 00000000..8866fd0f --- /dev/null +++ b/xorg/server/module/amd64/uyvy_to_rgb32_amd64_sse2.asm @@ -0,0 +1,17 @@ + +%macro PROC 1 + align 16 + global %1 + %1: +%endmacro + +;int +;uyvy_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs) + +PROC uyvy_to_rgb32_amd64_sse2 + push rbx + mov rax, 0 + pop rbx + ret + align 16 + diff --git a/xorg/server/module/amd64/yuy2_to_rgb32_amd64_sse2.asm b/xorg/server/module/amd64/yuy2_to_rgb32_amd64_sse2.asm new file mode 100644 index 00000000..c0ac5c1b --- /dev/null +++ b/xorg/server/module/amd64/yuy2_to_rgb32_amd64_sse2.asm @@ -0,0 +1,17 @@ + +%macro PROC 1 + align 16 + global %1 + %1: +%endmacro + +;int +;yuy2_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs) + +PROC yuy2_to_rgb32_amd64_sse2 + push rbx + mov rax, 0 + pop rbx + ret + align 16 + diff --git a/xorg/server/module/amd64/yv12_to_rgb32_amd64_sse2.asm b/xorg/server/module/amd64/yv12_to_rgb32_amd64_sse2.asm new file mode 100644 index 00000000..13e46878 --- /dev/null +++ b/xorg/server/module/amd64/yv12_to_rgb32_amd64_sse2.asm @@ -0,0 +1,248 @@ +; +;Copyright 2014 Jay Sorg +; +;Permission to use, copy, modify, distribute, and sell this software and its +;documentation for any purpose is hereby granted without fee, provided that +;the above copyright notice appear in all copies and that both that +;copyright notice and this permission notice appear in supporting +;documentation. +; +;The above copyright notice and this permission notice shall be included in +;all copies or substantial portions of the Software. +; +;THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +;IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +;FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +;OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +;AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +;CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +; +;YV12 to RGB32 +;amd64 SSE2 32 bit +; +; RGB to YUV +; 0.299 0.587 0.114 +; -0.14713 -0.28886 0.436 +; 0.615 -0.51499 -0.10001 +; YUV to RGB +; 1 0 1.13983 +; 1 -0.39465 -0.58060 +; 1 2.03211 0 +; shift left 12 +; 4096 0 4669 +; 4096 -1616 -2378 +; 4096 9324 0 + +SECTION .data +align 16 +c128 times 8 dw 128 +c4669 times 8 dw 4669 +c1616 times 8 dw 1616 +c2378 times 8 dw 2378 +c9324 times 8 dw 9324 + +SECTION .text + +%macro PROC 1 + align 16 + global %1 + %1: +%endmacro + +do8_uv: + + ; u + movd xmm1, [rbx] ; 4 at a time + lea rbx, [rbx + 4] + punpcklbw xmm1, xmm1 + pxor xmm6, xmm6 + punpcklbw xmm1, xmm6 + movdqa xmm7, [rel c128] + psubw xmm1, xmm7 + psllw xmm1, 4 + + ; v + movd xmm2, [rdx] ; 4 at a time + lea rdx, [rdx + 4] + punpcklbw xmm2, xmm2 + punpcklbw xmm2, xmm6 + psubw xmm2, xmm7 + psllw xmm2, 4 + +do8: + + ; y + movq xmm0, [rsi] ; 8 at a time + lea rsi, [rsi + 8] + pxor xmm6, xmm6 + punpcklbw xmm0, xmm6 + + ; r = y + hiword(4669 * (v << 4)) + movdqa xmm4, [rel c4669] + pmulhw xmm4, xmm2 + movdqa xmm3, xmm0 + paddw xmm3, xmm4 + + ; g = y - hiword(1616 * (u << 4)) - hiword(2378 * (v << 4)) + movdqa xmm5, [rel c1616] + pmulhw xmm5, xmm1 + movdqa xmm6, [rel c2378] + pmulhw xmm6, xmm2 + movdqa xmm4, xmm0 + psubw xmm4, xmm5 + psubw xmm4, xmm6 + + ; b = y + hiword(9324 * (u << 4)) + movdqa xmm6, [rel c9324] + pmulhw xmm6, xmm1 + movdqa xmm5, xmm0 + paddw xmm5, xmm6 + + packuswb xmm3, xmm3 ; b + packuswb xmm4, xmm4 ; g + punpcklbw xmm3, xmm4 ; gb + + pxor xmm4, xmm4 ; a + packuswb xmm5, xmm5 ; r + punpcklbw xmm5, xmm4 ; ar + + movdqa xmm4, xmm3 + punpcklwd xmm3, xmm5 ; argb + movdqa [rdi], xmm3 + lea rdi, [rdi + 16] + punpckhwd xmm4, xmm5 ; argb + movdqa [rdi], xmm4 + lea rdi, [rdi + 16] + + ret; + +;The first six integer or pointer arguments are passed in registers +; RDI, RSI, RDX, RCX, R8, and R9 + +;int +;yv12_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs) + +PROC yv12_to_rgb32_amd64_sse2 + push rbx + push rsi + push rdi + push rbp + + push rdi + push rdx + mov rdi, rcx ; rgbs + + mov rcx, rsi ; width + mov rdx, rcx + pop rbp ; height + mov rax, rbp + shr rbp, 1 + imul rax, rcx ; rax = width * height + + pop rsi ; y + + mov rbx, rsi ; u = y + width * height + add rbx, rax + + ; local vars + ; char* yptr1 + ; char* yptr2 + ; char* uptr + ; char* vptr + ; int* rgbs1 + ; int* rgbs2 + ; int width + sub rsp, 56 ; local vars, 56 bytes + mov [rsp + 0], rsi ; save y1 + add rsi, rdx + mov [rsp + 8], rsi ; save y2 + mov [rsp + 16], rbx ; save u + shr rax, 2 + add rbx, rax ; v = u + (width * height / 4) + mov [rsp + 24], rbx ; save v + + mov [rsp + 32], rdi ; save rgbs1 + mov rax, rdx + shl rax, 2 + add rdi, rax + mov [rsp + 40], rdi ; save rgbs2 + +loop_y: + + mov rcx, rdx ; width + shr rcx, 3 + + ; save rdx + mov [rsp + 48], rdx + + ;prefetchnta 4096[rsp + 0] ; y + ;prefetchnta 1024[rsp + 16] ; u + ;prefetchnta 1024[rsp + 24] ; v + +loop_x: + + mov rsi, [rsp + 0] ; y1 + mov rbx, [rsp + 16] ; u + mov rdx, [rsp + 24] ; v + mov rdi, [rsp + 32] ; rgbs1 + + ; y1 + call do8_uv + + mov [rsp + 0], rsi ; y1 + mov [rsp + 32], rdi ; rgbs1 + + mov rsi, [rsp + 8] ; y2 + mov rdi, [rsp + 40] ; rgbs2 + + ; y2 + call do8 + + mov [rsp + 8], rsi ; y2 + mov [rsp + 16], rbx ; u + mov [rsp + 24], rdx ; v + mov [rsp + 40], rdi ; rgbs2 + + dec rcx ; width + jnz loop_x + + ; restore rdx + mov rdx, [rsp + 48] + + ; update y1 and 2 + mov rax, [rsp + 0] + mov rbx, rdx + add rax, rbx + mov [rsp + 0], rax + + mov rax, [rsp + 8] + add rax, rbx + mov [rsp + 8], rax + + ; update rgb1 and 2 + mov rax, [rsp + 32] + mov rbx, rdx + shl rbx, 2 + add rax, rbx + mov [rsp + 32], rax + + mov rax, [rsp + 40] + add rax, rbx + mov [rsp + 40], rax + + mov rcx, rbp + dec rcx ; height + mov rbp, rcx + jnz loop_y + + add rsp, 56 + + mov rax, 0 + pop rbp + pop rdi + pop rsi + pop rbx + ret + align 16 + + diff --git a/xorg/server/module/rdp.h b/xorg/server/module/rdp.h index ba1bcfd0..8a4d58c4 100644 --- a/xorg/server/module/rdp.h +++ b/xorg/server/module/rdp.h @@ -30,9 +30,46 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "rdpPri.h" +#define XRDP_MODULE_NAME "XRDPMOD" +#define XRDP_DRIVER_NAME "XRDPDEV" +#define XRDP_MOUSE_NAME "XRDPMOUSE" +#define XRDP_KEYB_NAME "XRDPKEYB" +#define XRDP_VERSION 1000 + +#define PACKAGE_VERSION_MAJOR 1 +#define PACKAGE_VERSION_MINOR 0 +#define PACKAGE_VERSION_PATCHLEVEL 0 + +#define COLOR8(r, g, b) \ + ((((r) >> 5) << 0) | (((g) >> 5) << 3) | (((b) >> 6) << 6)) +#define COLOR15(r, g, b) \ + ((((r) >> 3) << 10) | (((g) >> 3) << 5) | (((b) >> 3) << 0)) +#define COLOR16(r, g, b) \ + ((((r) >> 3) << 11) | (((g) >> 2) << 5) | (((b) >> 3) << 0)) +#define COLOR24(r, g, b) \ + ((((r) >> 0) << 0) | (((g) >> 0) << 8) | (((b) >> 0) << 16)) +#define SPLITCOLOR32(r, g, b, c) \ + do { \ + r = ((c) >> 16) & 0xff; \ + g = ((c) >> 8) & 0xff; \ + b = (c) & 0xff; \ + } while (0) + +/* PIXMAN_a8b8g8r8 */ +#define XRDP_a8b8g8r8 \ +((32 << 24) | (3 << 16) | (8 << 12) | (8 << 8) | (8 << 4) | 8) /* PIXMAN_a8r8g8b8 */ #define XRDP_a8r8g8b8 \ -((32 << 24) | (2 << 16) | (8 << 12) | (8 << 8) | (8 << 4) | 8) +((32 << 24) | (2 << 16) | (8 << 12) | (8 << 8) | (8 << 4) | 8) +/* PIXMAN_r5g6b5 */ +#define XRDP_r5g6b5 \ +((16 << 24) | (2 << 16) | (0 << 12) | (5 << 8) | (6 << 4) | 5) +/* PIXMAN_a1r5g5b5 */ +#define XRDP_a1r5g5b5 \ +((16 << 24) | (2 << 16) | (1 << 12) | (5 << 8) | (5 << 4) | 5) +/* PIXMAN_r3g3b2 */ +#define XRDP_r3g3b2 \ +((8 << 24) | (2 << 16) | (0 << 12) | (3 << 8) | (3 << 4) | 2) #define PixelDPI 100 #define PixelToMM(_size) (((_size) * 254 + (PixelDPI) * 5) / ((PixelDPI) * 10)) @@ -40,7 +77,8 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define RDPMIN(_val1, _val2) ((_val1) < (_val2) ? (_val1) : (_val2)) #define RDPMAX(_val1, _val2) ((_val1) < (_val2) ? (_val2) : (_val1)) #define RDPCLAMP(_val, _lo, _hi) \ - (_val) < (_lo) ? (_lo) : (_val) > (_hi) ? (_hi) : (_val) + (_val) < (_lo) ? (_lo) : (_val) > (_hi) ? (_hi) : (_val) +#define RDPALIGN(_val, _al) ((((long)(_val)) + ((_al) - 1)) & ~((_al) - 1)) #define XRDP_CD_NODRAW 0 #define XRDP_CD_NOCLIP 1 @@ -157,6 +195,8 @@ struct _rdpCounts CARD32 callCount[64 - 23]; }; +typedef int (*yuv_to_rgb32_proc)(unsigned char *yuvs, int width, int height, int *rgbs); + /* move this to common header */ struct _rdpRec { @@ -228,6 +268,15 @@ struct _rdpRec struct _rdpCounts counts; + yuv_to_rgb32_proc i420_to_rgb32; + yuv_to_rgb32_proc yv12_to_rgb32; + yuv_to_rgb32_proc yuy2_to_rgb32; + yuv_to_rgb32_proc uyvy_to_rgb32; + char *xv_data; + int xv_data_bytes; + int xv_timer_schedualed; + OsTimerPtr xv_timer; + }; typedef struct _rdpRec rdpRec; typedef struct _rdpRec * rdpPtr; @@ -250,63 +299,63 @@ typedef struct _rdpGCRec * rdpGCPtr; struct urdp_draw_item_fill { - int opcode; - int fg_color; - int bg_color; - int pad0; + int opcode; + int fg_color; + int bg_color; + int pad0; }; struct urdp_draw_item_img { - int opcode; - int pad0; + int opcode; + int pad0; }; struct urdp_draw_item_line { - int opcode; - int fg_color; - int bg_color; - int width; - xSegment* segs; - int nseg; - int flags; + int opcode; + int fg_color; + int bg_color; + int width; + xSegment* segs; + int nseg; + int flags; }; struct urdp_draw_item_scrblt { - int srcx; - int srcy; - int dstx; - int dsty; - int cx; - int cy; + int srcx; + int srcy; + int dstx; + int dsty; + int cx; + int cy; }; struct urdp_draw_item_text { - int opcode; - int fg_color; - struct rdp_text* rtext; /* in rdpglyph.h */ + int opcode; + int fg_color; + struct rdp_text* rtext; /* in rdpglyph.h */ }; union urdp_draw_item { - struct urdp_draw_item_fill fill; - struct urdp_draw_item_img img; - struct urdp_draw_item_line line; - struct urdp_draw_item_scrblt scrblt; - struct urdp_draw_item_text text; + struct urdp_draw_item_fill fill; + struct urdp_draw_item_img img; + struct urdp_draw_item_line line; + struct urdp_draw_item_scrblt scrblt; + struct urdp_draw_item_text text; }; struct rdp_draw_item { - int type; /* RDI_FILL, RDI_IMGLL, ... */ - int flags; - struct rdp_draw_item* prev; - struct rdp_draw_item* next; - RegionPtr reg; - union urdp_draw_item u; + int type; /* RDI_FILL, RDI_IMGLL, ... */ + int flags; + struct rdp_draw_item* prev; + struct rdp_draw_item* next; + RegionPtr reg; + union urdp_draw_item u; }; #define XRDP_USE_COUNT_THRESHOLD 1 diff --git a/xorg/server/module/rdpCapture.c b/xorg/server/module/rdpCapture.c index 8819713a..5163e6ae 100644 --- a/xorg/server/module/rdpCapture.c +++ b/xorg/server/module/rdpCapture.c @@ -33,34 +33,249 @@ #include "rdpDraw.h" #include "rdpClientCon.h" #include "rdpReg.h" +#include "rdpMisc.h" #define LOG_LEVEL 1 #define LLOGLN(_level, _args) \ do { if (_level < LOG_LEVEL) { ErrorF _args ; ErrorF("\n"); } } while (0) +#define RDP_MAX_TILES 1024 + +/******************************************************************************/ +static int +rdpLimitRects(RegionPtr reg, int max_rects, BoxPtr *rects) +{ + int nrects; + + nrects = REGION_NUM_RECTS(reg); + if (nrects > max_rects) + { + nrects = 1; + *rects = rdpRegionExtents(reg); + } + else + { + *rects = REGION_RECTS(reg); + } + return nrects; +} + +/******************************************************************************/ +/* copy rects with no error checking */ +static int +rdpCopyBox_a8r8g8b8_to_a8r8g8b8(void *src, int src_stride, int srcx, int srcy, + void *dst, int dst_stride, int dstx, int dsty, + BoxPtr rects, int num_rects) +{ + char *s8; + char *d8; + int index; + int jndex; + int bytes; + int height; + BoxPtr box; + + for (index = 0; index < num_rects; index++) + { + box = rects + index; + s8 = ((char *) src) + (box->y1 - srcy) * src_stride; + s8 += (box->x1 - srcx) * 4; + d8 = ((char *) dst) + (box->y1 - dsty) * dst_stride; + d8 += (box->x1 - dstx) * 4; + bytes = box->x2 - box->x1; + bytes *= 4; + height = box->y2 - box->y1; + for (jndex = 0; jndex < height; jndex++) + { + memcpy(d8, s8, bytes); + d8 += dst_stride; + s8 += src_stride; + } + } + return 0; +} + +/******************************************************************************/ +static int +rdpFillBox_yuvalp(int ax, int ay, + void *dst, int dst_stride) +{ + dst = ((char *) dst) + (ay << 8) * (dst_stride >> 8) + (ax << 8); + memset(dst, 0, 64 * 64 * 4); + return 0; +} + +/******************************************************************************/ +/* copy rects with no error checking + * convert ARGB32 to 64x64 linear planar YUVA */ +/* http://msdn.microsoft.com/en-us/library/ff635643.aspx + * 0.299 -0.168935 0.499813 + * 0.587 -0.331665 -0.418531 + * 0.114 0.50059 -0.081282 + y = r * 0.299000 + g * 0.587000 + b * 0.114000; + u = r * -0.168935 + g * -0.331665 + b * 0.500590; + v = r * 0.499813 + g * -0.418531 + b * -0.081282; */ +/* 19595 38470 7471 + -11071 -21736 32807 + 32756 -27429 -5327 */ +static int +rdpCopyBox_a8r8g8b8_to_yuvalp(int ax, int ay, + void *src, int src_stride, + void *dst, int dst_stride, + BoxPtr rects, int num_rects) +{ + char *s8; + char *d8; + char *yptr; + char *uptr; + char *vptr; + char *aptr; + int *s32; + int index; + int jndex; + int kndex; + int width; + int height; + int pixel; + int a; + int r; + int g; + int b; + int y; + int u; + int v; + BoxPtr box; + + dst = ((char *) dst) + (ay << 8) * (dst_stride >> 8) + (ax << 8); + for (index = 0; index < num_rects; index++) + { + box = rects + index; + s8 = ((char *) src) + box->y1 * src_stride; + s8 += box->x1 * 4; + d8 = ((char *) dst) + (box->y1 - ay) * 64; + d8 += box->x1 - ax; + width = box->x2 - box->x1; + height = box->y2 - box->y1; + for (jndex = 0; jndex < height; jndex++) + { + s32 = (int *) s8; + yptr = d8; + uptr = yptr + 64 * 64; + vptr = uptr + 64 * 64; + aptr = vptr + 64 * 64; + kndex = 0; + while (kndex < width) + { + pixel = *(s32++); + a = (pixel >> 24) & 0xff; + r = (pixel >> 16) & 0xff; + g = (pixel >> 8) & 0xff; + b = (pixel >> 0) & 0xff; + y = (r * 19595 + g * 38470 + b * 7471) >> 16; + u = (r * -11071 + g * -21736 + b * 32807) >> 16; + v = (r * 32756 + g * -27429 + b * -5327) >> 16; + y = y - 128; + y = max(y, -128); + u = max(u, -128); + v = max(v, -128); + y = min(y, 127); + u = min(u, 127); + v = min(v, 127); + *(yptr++) = y; + *(uptr++) = u; + *(vptr++) = v; + *(aptr++) = a; + kndex++; + } + d8 += 64; + s8 += src_stride; + } + } + return 0; +} + +/******************************************************************************/ +/* copy rects with no error checking */ +static int +rdpCopyBox_a8r8g8b8_to_a8b8g8r8(void *src, int src_stride, + void *dst, int dst_stride, + BoxPtr rects, int num_rects) +{ + char *s8; + char *d8; + int index; + int jndex; + int kndex; + int bytes; + int width; + int height; + int red; + int green; + int blue; + BoxPtr box; + unsigned int *s32; + unsigned int *d32; + + for (index = 0; index < num_rects; index++) + { + box = rects + index; + s8 = ((char *) src) + box->y1 * src_stride; + s8 += box->x1 * 4; + d8 = ((char *) dst) + box->y1 * dst_stride; + d8 += box->x1 * 4; + bytes = box->x2 - box->x1; + bytes *= 4; + width = box->x2 - box->x1; + height = box->y2 - box->y1; + for (jndex = 0; jndex < height; jndex++) + { + s32 = (unsigned int *) s8; + d32 = (unsigned int *) d8; + for (kndex = 0; kndex < width; kndex++) + { + SPLITCOLOR32(red, green, blue, *s32); + *d32 = COLOR24(red, green, blue); + s32++; + d32++; + } + d8 += dst_stride; + s8 += src_stride; + } + } + return 0; +} + /******************************************************************************/ static Bool -rdpCapture0(RegionPtr in_reg, RegionPtr out_reg, +rdpCapture0(rdpClientCon *clientCon, + RegionPtr in_reg, BoxPtr *out_rects, int *num_out_rects, void *src, int src_width, int src_height, int src_stride, int src_format, void *dst, int dst_width, int dst_height, int dst_stride, int dst_format, int max_rects) { - BoxPtr prects; + BoxPtr psrc_rects; BoxRec rect; RegionRec reg; char *src_rect; char *dst_rect; - int num_regions; - int bytespp; + int num_rects; + int src_bytespp; + int dst_bytespp; int width; int height; int src_offset; int dst_offset; - int bytes; int i; int j; + int k; + int red; + int green; + int blue; Bool rv; + unsigned int *s32; + unsigned short *d16; + unsigned char *d8; LLOGLN(10, ("rdpCapture0:")); @@ -73,47 +288,320 @@ rdpCapture0(RegionPtr in_reg, RegionPtr out_reg, rdpRegionInit(®, &rect, 0); rdpRegionIntersect(®, in_reg, ®); + psrc_rects = 0; + num_rects = rdpLimitRects(®, max_rects, &psrc_rects); + if (num_rects < 1) + { + rdpRegionUninit(®); + return FALSE; + } + + *num_out_rects = num_rects; + + *out_rects = (BoxPtr) g_malloc(sizeof(BoxRec) * num_rects, 0); + for (i = 0; i < num_rects; i++) + { + rect = psrc_rects[i]; + (*out_rects)[i] = rect; + } + + if ((src_format == XRDP_a8r8g8b8) && (dst_format == XRDP_a8r8g8b8)) + { + rdpCopyBox_a8r8g8b8_to_a8r8g8b8(src, src_stride, 0, 0, + dst, dst_stride, 0, 0, + psrc_rects, num_rects); + } + else if ((src_format == XRDP_a8r8g8b8) && (dst_format == XRDP_a8b8g8r8)) + { + rdpCopyBox_a8r8g8b8_to_a8b8g8r8(src, src_stride, + dst, dst_stride, + psrc_rects, num_rects); + } + else if ((src_format == XRDP_a8r8g8b8) && (dst_format == XRDP_r5g6b5)) + { + src_bytespp = 4; + dst_bytespp = 2; + + for (i = 0; i < num_rects; i++) + { + /* get rect to copy */ + rect = (*out_rects)[i]; + + /* get rect dimensions */ + width = rect.x2 - rect.x1; + height = rect.y2 - rect.y1; + + /* point to start of each rect in respective memory */ + src_offset = rect.y1 * src_stride + rect.x1 * src_bytespp; + dst_offset = rect.y1 * dst_stride + rect.x1 * dst_bytespp; + src_rect = src + src_offset; + dst_rect = dst + dst_offset; + + /* copy one line at a time */ + for (j = 0; j < height; j++) + { + s32 = (unsigned int *) src_rect; + d16 = (unsigned short *) dst_rect; + for (k = 0; k < width; k++) + { + SPLITCOLOR32(red, green, blue, *s32); + *d16 = COLOR16(red, green, blue); + s32++; + d16++; + } + src_rect += src_stride; + dst_rect += dst_stride; + } + } + } + else if ((src_format == XRDP_a8r8g8b8) && (dst_format == XRDP_a1r5g5b5)) + { + src_bytespp = 4; + dst_bytespp = 2; + + for (i = 0; i < num_rects; i++) + { + /* get rect to copy */ + rect = (*out_rects)[i]; + + /* get rect dimensions */ + width = rect.x2 - rect.x1; + height = rect.y2 - rect.y1; + + /* point to start of each rect in respective memory */ + src_offset = rect.y1 * src_stride + rect.x1 * src_bytespp; + dst_offset = rect.y1 * dst_stride + rect.x1 * dst_bytespp; + src_rect = src + src_offset; + dst_rect = dst + dst_offset; + + /* copy one line at a time */ + for (j = 0; j < height; j++) + { + s32 = (unsigned int *) src_rect; + d16 = (unsigned short *) dst_rect; + for (k = 0; k < width; k++) + { + SPLITCOLOR32(red, green, blue, *s32); + *d16 = COLOR15(red, green, blue); + s32++; + d16++; + } + src_rect += src_stride; + dst_rect += dst_stride; + } + } + } + else if ((src_format == XRDP_a8r8g8b8) && (dst_format == XRDP_r3g3b2)) + { + src_bytespp = 4; + dst_bytespp = 1; + + for (i = 0; i < num_rects; i++) + { + /* get rect to copy */ + rect = (*out_rects)[i]; + + /* get rect dimensions */ + width = rect.x2 - rect.x1; + height = rect.y2 - rect.y1; + + /* point to start of each rect in respective memory */ + src_offset = rect.y1 * src_stride + rect.x1 * src_bytespp; + dst_offset = rect.y1 * dst_stride + rect.x1 * dst_bytespp; + src_rect = src + src_offset; + dst_rect = dst + dst_offset; + + /* copy one line at a time */ + for (j = 0; j < height; j++) + { + s32 = (unsigned int *) src_rect; + d8 = (unsigned char *) dst_rect; + for (k = 0; k < width; k++) + { + SPLITCOLOR32(red, green, blue, *s32); + *d8 = COLOR8(red, green, blue); + s32++; + d8++; + } + src_rect += src_stride; + dst_rect += dst_stride; + } + } + } + else + { + LLOGLN(0, ("rdpCapture0: unimp color conversion")); + } + rdpRegionUninit(®); + return rv; +} + +/******************************************************************************/ +/* make out_rects always multiple of 16 width and height */ +static Bool +rdpCapture1(rdpClientCon *clientCon, + RegionPtr in_reg, BoxPtr *out_rects, int *num_out_rects, + void *src, int src_width, int src_height, + int src_stride, int src_format, + void *dst, int dst_width, int dst_height, + int dst_stride, int dst_format, int max_rects) +{ + BoxPtr psrc_rects; + BoxRec rect; + RegionRec reg; + char *src_rect; + char *dst_rect; + int num_regions; + int src_bytespp; + int dst_bytespp; + int width; + int height; + int min_width; + int min_height; + int src_offset; + int dst_offset; + int index; + int jndex; + int kndex; + int red; + int green; + int blue; + int ex; + int ey; + Bool rv; + unsigned int *s32; + unsigned int *d32; + + LLOGLN(10, ("rdpCapture1:")); + + rv = TRUE; + + min_width = RDPMIN(dst_width, src_width); + min_height = RDPMIN(dst_height, src_height); + + rect.x1 = 0; + rect.y1 = 0; + rect.x2 = min_width; + rect.y2 = min_height; + rdpRegionInit(®, &rect, 0); + rdpRegionIntersect(®, in_reg, ®); + num_regions = REGION_NUM_RECTS(®); if (num_regions > max_rects) { num_regions = 1; - prects = rdpRegionExtents(®); - rdpRegionUninit(out_reg); - rdpRegionInit(out_reg, prects, 0); + psrc_rects = rdpRegionExtents(®); } else { - prects = REGION_RECTS(®); - rdpRegionCopy(out_reg, ®); + psrc_rects = REGION_RECTS(®); } - if ((src_format == XRDP_a8r8g8b8) && (dst_format == XRDP_a8r8g8b8)) + if (num_regions < 1) { - bytespp = 4; + return FALSE; + } - for (i = 0; i < num_regions; i++) + *num_out_rects = num_regions; + + *out_rects = (BoxPtr) g_malloc(sizeof(BoxRec) * num_regions * 4, 0); + index = 0; + while (index < num_regions) + { + rect = psrc_rects[index]; + width = rect.x2 - rect.x1; + height = rect.y2 - rect.y1; + ex = ((width + 15) & ~15) - width; + if (ex != 0) + { + rect.x2 += ex; + if (rect.x2 > min_width) + { + rect.x1 -= rect.x2 - min_width; + rect.x2 = min_width; + } + if (rect.x1 < 0) + { + rect.x1 += 16; + } + } + ey = ((height + 15) & ~15) - height; + if (ey != 0) + { + rect.y2 += ey; + if (rect.y2 > min_height) + { + rect.y1 -= rect.y2 - min_height; + rect.y2 = min_height; + } + if (rect.y1 < 0) + { + rect.y1 += 16; + } + } +#if 0 + if (rect.x1 < 0) + { + LLOGLN(0, ("rdpCapture1: error")); + } + if (rect.y1 < 0) + { + LLOGLN(0, ("rdpCapture1: error")); + } + if (rect.x2 > min_width) + { + LLOGLN(0, ("rdpCapture1: error")); + } + if (rect.y2 > min_height) + { + LLOGLN(0, ("rdpCapture1: error")); + } + if ((rect.x2 - rect.x1) % 16 != 0) + { + LLOGLN(0, ("rdpCapture1: error")); + } + if ((rect.y2 - rect.y1) % 16 != 0) + { + LLOGLN(0, ("rdpCapture1: error")); + } +#endif + (*out_rects)[index] = rect; + index++; + } + + if ((src_format == XRDP_a8r8g8b8) && (dst_format == XRDP_a8b8g8r8)) + { + src_bytespp = 4; + dst_bytespp = 4; + + for (index = 0; index < num_regions; index++) { /* get rect to copy */ - rect = prects[i]; + rect = (*out_rects)[index]; /* get rect dimensions */ width = rect.x2 - rect.x1; height = rect.y2 - rect.y1; /* point to start of each rect in respective memory */ - src_offset = rect.y1 * src_stride + rect.x1 * bytespp; - dst_offset = rect.y1 * dst_stride + rect.x1 * bytespp; + src_offset = rect.y1 * src_stride + rect.x1 * src_bytespp; + dst_offset = rect.y1 * dst_stride + rect.x1 * dst_bytespp; src_rect = src + src_offset; dst_rect = dst + dst_offset; - /* bytes per line */ - bytes = width * bytespp; - /* copy one line at a time */ - for (j = 0; j < height; j++) + for (jndex = 0; jndex < height; jndex++) { - memcpy(dst_rect, src_rect, bytes); + s32 = (unsigned int *) src_rect; + d32 = (unsigned int *) dst_rect; + for (kndex = 0; kndex < width; kndex++) + { + SPLITCOLOR32(red, green, blue, *s32); + *d32 = COLOR24(red, green, blue); + s32++; + d32++; + } src_rect += src_stride; dst_rect += dst_stride; } @@ -121,17 +609,130 @@ rdpCapture0(RegionPtr in_reg, RegionPtr out_reg, } else { - LLOGLN(0, ("rdpCapture0: unimp color conversion")); + LLOGLN(0, ("rdpCapture1: unimp color conversion")); } rdpRegionUninit(®); return rv; } +/******************************************************************************/ +static Bool +rdpCapture2(rdpClientCon *clientCon, + RegionPtr in_reg, BoxPtr *out_rects, int *num_out_rects, + void *src, int src_width, int src_height, + int src_stride, int src_format, + void *dst, int dst_width, int dst_height, + int dst_stride, int dst_format, int max_rects) +{ + int x; + int y; + int out_rect_index; + int num_rects; + int rcode; + BoxRec rect; + BoxRec extents_rect; + BoxPtr rects; + RegionRec tile_reg; + RegionRec lin_reg; + RegionRec temp_reg; + RegionPtr pin_reg; + + LLOGLN(10, ("rdpCapture2:")); + + *out_rects = (BoxPtr) g_malloc(sizeof(BoxRec) * RDP_MAX_TILES, 0); + if (*out_rects == NULL) + { + return FALSE; + } + out_rect_index = 0; + + /* clip for smaller of 2 */ + rect.x1 = 0; + rect.y1 = 0; + rect.x2 = min(dst_width, src_width); + rect.y2 = min(dst_height, src_height); + rdpRegionInit(&temp_reg, &rect, 0); + rdpRegionIntersect(&temp_reg, in_reg, &temp_reg); + + /* limit the numer of rects */ + num_rects = REGION_NUM_RECTS(&temp_reg); + if (num_rects > max_rects) + { + LLOGLN(10, ("rdpCapture2: too many rects")); + rdpRegionInit(&lin_reg, rdpRegionExtents(&temp_reg), 0); + pin_reg = &lin_reg; + } + else + { + LLOGLN(10, ("rdpCapture2: not too many rects")); + rdpRegionInit(&lin_reg, NullBox, 0); + pin_reg = &temp_reg; + } + extents_rect = *rdpRegionExtents(pin_reg); + y = extents_rect.y1 & ~63; + while (y < extents_rect.y2) + { + x = extents_rect.x1 & ~63; + while (x < extents_rect.x2) + { + rect.x1 = x; + rect.y1 = y; + rect.x2 = rect.x1 + 64; + rect.y2 = rect.y1 + 64; + rcode = rdpRegionContainsRect(pin_reg, &rect); + LLOGLN(10, ("rdpCapture2: rcode %d", rcode)); + + if (rcode != rgnOUT) + { + if (rcode == rgnPART) + { + LLOGLN(10, ("rdpCapture2: rgnPART")); + rdpFillBox_yuvalp(x, y, dst, dst_stride); + rdpRegionInit(&tile_reg, &rect, 0); + rdpRegionIntersect(&tile_reg, pin_reg, &tile_reg); + rects = REGION_RECTS(&tile_reg); + num_rects = REGION_NUM_RECTS(&tile_reg); + rdpCopyBox_a8r8g8b8_to_yuvalp(x, y, + src, src_stride, + dst, dst_stride, + rects, num_rects); + rdpRegionUninit(&tile_reg); + } + else /* rgnIN */ + { + LLOGLN(10, ("rdpCapture2: rgnIN")); + rdpCopyBox_a8r8g8b8_to_yuvalp(x, y, + src, src_stride, + dst, dst_stride, + &rect, 1); + } + (*out_rects)[out_rect_index] = rect; + out_rect_index++; + if (out_rect_index >= RDP_MAX_TILES) + { + g_free(*out_rects); + *out_rects = NULL; + rdpRegionUninit(&temp_reg); + rdpRegionUninit(&lin_reg); + return FALSE; + } + } + x += 64; + } + y += 64; + } + *num_out_rects = out_rect_index; + rdpRegionUninit(&temp_reg); + rdpRegionUninit(&lin_reg); + return TRUE; +} + /** * Copy an array of rectangles from one memory area to another *****************************************************************************/ Bool -rdpCapture(RegionPtr in_reg, RegionPtr out_reg, +rdpCapture(rdpClientCon *clientCon, + RegionPtr in_reg, BoxPtr *out_rects, int *num_out_rects, void *src, int src_width, int src_height, int src_stride, int src_format, void *dst, int dst_width, int dst_height, @@ -141,7 +742,19 @@ rdpCapture(RegionPtr in_reg, RegionPtr out_reg, switch (mode) { case 0: - return rdpCapture0(in_reg, out_reg, + return rdpCapture0(clientCon, in_reg, out_rects, num_out_rects, + src, src_width, src_height, + src_stride, src_format, + dst, dst_width, dst_height, + dst_stride, dst_format, 15); + case 1: + return rdpCapture1(clientCon, in_reg, out_rects, num_out_rects, + src, src_width, src_height, + src_stride, src_format, + dst, dst_width, dst_height, + dst_stride, dst_format, 15); + case 2: + return rdpCapture2(clientCon, in_reg, out_rects, num_out_rects, src, src_width, src_height, src_stride, src_format, dst, dst_width, dst_height, @@ -150,5 +763,5 @@ rdpCapture(RegionPtr in_reg, RegionPtr out_reg, LLOGLN(0, ("rdpCapture: unimp mode")); break; } - return TRUE; + return FALSE; } diff --git a/xorg/server/module/rdpCapture.h b/xorg/server/module/rdpCapture.h index f92508c4..4dff1eea 100644 --- a/xorg/server/module/rdpCapture.h +++ b/xorg/server/module/rdpCapture.h @@ -19,9 +19,9 @@ */ Bool -rdpCapture(RegionPtr in_reg, RegionPtr out_reg, +rdpCapture(rdpClientCon *clientCon, + RegionPtr in_reg, BoxPtr *out_rects, int *num_out_rects, void *src, int src_width, int src_height, int src_stride, int src_format, void *dst, int dst_width, int dst_height, - int dst_stride, int dst_format, - int mode); + int dst_stride, int dst_format, int mode); diff --git a/xorg/server/module/rdpClientCon.c b/xorg/server/module/rdpClientCon.c index 3c9cdad5..35369063 100644 --- a/xorg/server/module/rdpClientCon.c +++ b/xorg/server/module/rdpClientCon.c @@ -50,21 +50,6 @@ Client connection to xrdp #define LTOUI32(_in) ((unsigned int)(_in)) -#define COLOR8(r, g, b) \ - ((((r) >> 5) << 0) | (((g) >> 5) << 3) | (((b) >> 6) << 6)) -#define COLOR15(r, g, b) \ - ((((r) >> 3) << 10) | (((g) >> 3) << 5) | (((b) >> 3) << 0)) -#define COLOR16(r, g, b) \ - ((((r) >> 3) << 11) | (((g) >> 2) << 5) | (((b) >> 3) << 0)) -#define COLOR24(r, g, b) \ - ((((r) >> 0) << 0) | (((g) >> 0) << 8) | (((b) >> 0) << 16)) -#define SPLITCOLOR32(r, g, b, c) \ - do { \ - r = ((c) >> 16) & 0xff; \ - g = ((c) >> 8) & 0xff; \ - b = (c) & 0xff; \ - } while (0) - #define USE_MAX_OS_BYTES 1 #define MAX_OS_BYTES (16 * 1024 * 1024) @@ -107,6 +92,9 @@ static int g_rdp_opcodes[16] = 0xff /* GXset 0xf 1 */ }; +static int +rdpClientConDisconnect(rdpPtr dev, rdpClientCon *clientCon); + /******************************************************************************/ static int rdpClientConGotConnection(ScreenPtr pScreen, rdpPtr dev) @@ -144,6 +132,15 @@ rdpClientConGotConnection(ScreenPtr pScreen, rdpPtr dev) AddEnabledDevice(clientCon->sck); } +#if 0 + if (dev->clientConTail != NULL) + { + rdpClientConDisconnect(dev, dev->clientConTail); + dev->clientConHead = NULL; + dev->clientConTail = NULL; + } +#endif + if (dev->clientConTail == NULL) { LLOGLN(0, ("rdpClientConGotConnection: adding only clientCon")); @@ -274,6 +271,11 @@ rdpClientConDisconnect(rdpPtr dev, rdpClientCon *clientCon) } rdpRegionDestroy(clientCon->dirtyRegion); rdpRegionDestroy(clientCon->shmRegion); + if (clientCon->updateTimer != NULL) + { + TimerCancel(clientCon->updateTimer); + TimerFree(clientCon->updateTimer); + } g_free(clientCon); return 0; } @@ -533,6 +535,8 @@ rdpClientConProcessMsgVersion(rdpPtr dev, rdpClientCon *clientCon, return 0; } +#define LALIGN(_num, _po2) ((_num + ((_po2) - 1)) & ~((_po2) - 1)) + /******************************************************************************/ /* this from miScreenInit @@ -549,31 +553,37 @@ rdpClientConProcessScreenSizeMsg(rdpPtr dev, rdpClientCon *clientCon, int bytes; Bool ok; - LLOGLN(0, ("rdpClientConProcessScreenSizeMsg: set width %d height %d bpp %d", - width, height, bpp)); + LLOGLN(0, ("rdpClientConProcessScreenSizeMsg: set width %d height %d " + "bpp %d", width, height, bpp)); clientCon->rdp_width = width; clientCon->rdp_height = height; clientCon->rdp_bpp = bpp; + clientCon->cap_width = width; + clientCon->cap_height = height; if (bpp < 15) { clientCon->rdp_Bpp = 1; clientCon->rdp_Bpp_mask = 0xff; + clientCon->rdp_format = PIXMAN_r3g3b2; } else if (bpp == 15) { clientCon->rdp_Bpp = 2; clientCon->rdp_Bpp_mask = 0x7fff; + clientCon->rdp_format = XRDP_a1r5g5b5; } else if (bpp == 16) { clientCon->rdp_Bpp = 2; clientCon->rdp_Bpp_mask = 0xffff; + clientCon->rdp_format = XRDP_r5g6b5; } else if (bpp > 16) { clientCon->rdp_Bpp = 4; clientCon->rdp_Bpp_mask = 0xffffff; + clientCon->rdp_format = XRDP_a8r8g8b8; } if (clientCon->shmemptr != 0) @@ -655,12 +665,13 @@ rdpClientConProcessMsgClientInput(rdpPtr dev, rdpClientCon *clientCon) } else if (msg == 300) /* resize desktop */ { - rdpClientConProcessScreenSizeMsg(dev, clientCon, param1, param2, param3); + rdpClientConProcessScreenSizeMsg(dev, clientCon, param1, + param2, param3); } else if (msg == 301) /* version */ { rdpClientConProcessMsgVersion(dev, clientCon, - param1, param2, param3, param4); + param1, param2, param3, param4); } else { @@ -697,6 +708,32 @@ rdpClientConProcessMsgClientInfo(rdpPtr dev, rdpClientCon *clientCon) i1 = clientCon->client_info.offscreen_cache_entries; LLOGLN(0, (" offscreen entries %d", i1)); + if (clientCon->client_info.capture_format != 0) + { + clientCon->rdp_format = clientCon->client_info.capture_format; + } + + if (clientCon->client_info.capture_code == 2) /* RFX */ + { + LLOGLN(0, ("rdpClientConProcessMsgClientInfo: got RFX capture")); + clientCon->cap_width = LALIGN(clientCon->rdp_width, 64); + clientCon->cap_height = LALIGN(clientCon->rdp_height, 64); + LLOGLN(0, (" cap_width %d cap_height %d", + clientCon->cap_width, clientCon->cap_height)); + if (clientCon->shmemptr != 0) + { + shmdt(clientCon->shmemptr); + } + bytes = clientCon->cap_width * clientCon->cap_height * + clientCon->rdp_Bpp; + clientCon->shmemid = shmget(IPC_PRIVATE, bytes, IPC_CREAT | 0777); + clientCon->shmemptr = shmat(clientCon->shmemid, 0, 0); + shmctl(clientCon->shmemid, IPC_RMID, NULL); + LLOGLN(0, ("rdpClientConProcessMsgClientInfo: shmemid %d shmemptr %p " + "bytes %d", clientCon->shmemid, clientCon->shmemptr, bytes)); + clientCon->shmem_lineBytes = clientCon->rdp_Bpp * clientCon->cap_width; + } + if (clientCon->client_info.offscreen_support_level > 0) { if (clientCon->client_info.offscreen_cache_entries > 0) @@ -808,7 +845,7 @@ rdpClientConProcessMsgClientRegionEx(rdpPtr dev, rdpClientCon *clientCon) { struct stream *s; int flags; - + LLOGLN(10, ("rdpClientConProcessMsgClientRegionEx:")); s = clientCon->in_s; @@ -1878,19 +1915,29 @@ rdpClientConCheckDirtyScreen(rdpPtr dev, rdpClientCon *clientCon) static int rdpClientConSendPaintRectShmEx(rdpPtr dev, rdpClientCon *clientCon, struct image_data *id, - RegionPtr dirtyReg, RegionPtr copyReg) + RegionPtr dirtyReg, + BoxPtr copyRects, int numCopyRects) { int index; int size; int num_rects_d; int num_rects_c; + short x; + short y; + short cx; + short cy; struct stream *s; BoxRec box; rdpClientConBeginUpdate(dev, clientCon); num_rects_d = REGION_NUM_RECTS(dirtyReg); - num_rects_c = REGION_NUM_RECTS(copyReg); + num_rects_c = numCopyRects; + if ((num_rects_c < 1) || (num_rects_d < 1)) + { + LLOGLN(0, ("rdpClientConSendPaintRectShmEx: nothing to send")); + return 0; + } size = 2 + 2 + 2 + num_rects_d * 8 + 2 + num_rects_c * 8; size += 4 + 4 + 4 + 4 + 2 + 2; rdpClientConPreCheck(dev, clientCon, size); @@ -1904,20 +1951,28 @@ rdpClientConSendPaintRectShmEx(rdpPtr dev, rdpClientCon *clientCon, for (index = 0; index < num_rects_d; index++) { box = REGION_RECTS(dirtyReg)[index]; - out_uint16_le(s, box.x1); - out_uint16_le(s, box.y1); - out_uint16_le(s, box.x2 - box.x1); - out_uint16_le(s, box.y2 - box.y1); + x = box.x1; + y = box.y1; + cx = box.x2 - box.x1; + cy = box.y2 - box.y1; + out_uint16_le(s, x); + out_uint16_le(s, y); + out_uint16_le(s, cx); + out_uint16_le(s, cy); } out_uint16_le(s, num_rects_c); for (index = 0; index < num_rects_c; index++) { - box = REGION_RECTS(copyReg)[index]; - out_uint16_le(s, box.x1); - out_uint16_le(s, box.y1); - out_uint16_le(s, box.x2 - box.x1); - out_uint16_le(s, box.y2 - box.y1); + box = copyRects[index]; + x = box.x1; + y = box.y1; + cx = box.x2 - box.x1; + cy = box.y2 - box.y1; + out_uint16_le(s, x); + out_uint16_le(s, y); + out_uint16_le(s, cx); + out_uint16_le(s, cy); } out_uint32_le(s, 0); @@ -1925,30 +1980,36 @@ rdpClientConSendPaintRectShmEx(rdpPtr dev, rdpClientCon *clientCon, out_uint32_le(s, clientCon->rect_id); out_uint32_le(s, id->shmem_id); out_uint32_le(s, id->shmem_offset); - out_uint16_le(s, clientCon->rdp_width); - out_uint16_le(s, clientCon->rdp_height); + out_uint16_le(s, clientCon->cap_width); + out_uint16_le(s, clientCon->cap_height); rdpClientConEndUpdate(dev, clientCon); return 0; } - + /******************************************************************************/ static CARD32 rdpDeferredUpdateCallback(OsTimerPtr timer, CARD32 now, pointer arg) { rdpClientCon *clientCon; - RegionRec reg; + BoxPtr rects; + int num_rects; struct image_data id; LLOGLN(10, ("rdpDeferredUpdateCallback:")); clientCon = (rdpClientCon *) arg; - if (clientCon->rect_id != clientCon->rect_id_ack) + if ((clientCon->rect_id > clientCon->rect_id_ack) || + /* do not allow captures until we have the client_info */ + clientCon->client_info.size == 0) { - LLOGLN(0, ("rdpDeferredUpdateCallback: reschedual")); + LLOGLN(0, ("rdpDeferredUpdateCallback: reschedual rect_id %d " + "rect_id_ack %d", + clientCon->rect_id, clientCon->rect_id_ack)); clientCon->updateTimer = TimerSet(clientCon->updateTimer, 0, 40, - rdpDeferredUpdateCallback, clientCon); + rdpDeferredUpdateCallback, + clientCon); return 0; } else @@ -1961,17 +2022,29 @@ rdpDeferredUpdateCallback(OsTimerPtr timer, CARD32 now, pointer arg) clientCon->rdp_width, clientCon->rdp_height, clientCon->rdp_Bpp, id.width, id.height)); clientCon->updateSchedualed = FALSE; - rdpRegionInit(®, NullBox, 0); - rdpCapture(clientCon->dirtyRegion, ®, - id.pixels, id.width, id.height, - id.lineBytes, XRDP_a8r8g8b8, - id.shmem_pixels, clientCon->rdp_width, clientCon->rdp_height, - clientCon->rdp_width * clientCon->rdp_Bpp , XRDP_a8r8g8b8, 0); - rdpClientConSendPaintRectShmEx(clientCon->dev, clientCon, &id, - clientCon->dirtyRegion, ®); + rects = 0; + num_rects = 0; + LLOGLN(10, ("rdpDeferredUpdateCallback: capture_code %d", + clientCon->client_info.capture_code)); + if (rdpCapture(clientCon, clientCon->dirtyRegion, &rects, &num_rects, + id.pixels, id.width, id.height, + id.lineBytes, XRDP_a8r8g8b8, id.shmem_pixels, + clientCon->cap_width, clientCon->cap_height, + clientCon->cap_width * clientCon->rdp_Bpp, + clientCon->rdp_format, clientCon->client_info.capture_code)) + { + LLOGLN(10, ("rdpDeferredUpdateCallback: num_rects %d", num_rects)); + rdpClientConSendPaintRectShmEx(clientCon->dev, clientCon, &id, + clientCon->dirtyRegion, + rects, num_rects); + g_free(rects); + } + else + { + LLOGLN(0, ("rdpDeferredUpdateCallback: rdpCapture failed")); + } rdpRegionDestroy(clientCon->dirtyRegion); clientCon->dirtyRegion = rdpRegionCreate(NullBox, 0); - rdpRegionUninit(®); return 0; } diff --git a/xorg/server/module/rdpClientCon.h b/xorg/server/module/rdpClientCon.h index 9cbe493a..a66abbcd 100644 --- a/xorg/server/module/rdpClientCon.h +++ b/xorg/server/module/rdpClientCon.h @@ -79,6 +79,9 @@ struct _rdpClientCon int rdp_Bpp_mask; int rdp_width; int rdp_height; + int rdp_format; /* XRDP_a8r8g8b8, XRDP_r5g6b5, ... */ + int cap_width; + int cap_height; int rdpIndex; /* current os target */ @@ -88,8 +91,6 @@ struct _rdpClientCon struct font_cache font_cache[12][256]; int font_stamp; - RegionPtr dirtyRegion; - struct xrdp_client_info client_info; char *shmemptr; @@ -102,6 +103,8 @@ struct _rdpClientCon OsTimerPtr updateTimer; int updateSchedualed; /* boolean */ + RegionPtr dirtyRegion; + struct _rdpClientCon *next; }; diff --git a/xorg/server/module/rdpCursor.c b/xorg/server/module/rdpCursor.c index 3859e8e7..d4862df7 100644 --- a/xorg/server/module/rdpCursor.c +++ b/xorg/server/module/rdpCursor.c @@ -39,11 +39,18 @@ cursor #include <cursor.h> #include <cursorstr.h> +#include <X11/Xarch.h> + #include "rdp.h" #include "rdpMain.h" #include "rdpDraw.h" #include "rdpClientCon.h" +#ifndef X_BYTE_ORDER +#warning X_BYTE_ORDER not defined +#endif + +#if (X_BYTE_ORDER == X_LITTLE_ENDIAN) /* Copied from Xvnc/lib/font/util/utilbitmap.c */ static unsigned char g_reverse_byte[0x100] = { @@ -80,6 +87,7 @@ static unsigned char g_reverse_byte[0x100] = 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff }; +#endif /******************************************************************************/ #define LOG_LEVEL 1 diff --git a/xorg/server/module/rdpMain.c b/xorg/server/module/rdpMain.c index a8786ac2..2f6db7c7 100644 --- a/xorg/server/module/rdpMain.c +++ b/xorg/server/module/rdpMain.c @@ -47,14 +47,6 @@ rdp module main #define LLOGLN(_level, _args) \ do { if (_level < LOG_LEVEL) { ErrorF _args ; ErrorF("\n"); } } while (0) -#define XRDP_DRIVER_NAME "XORGXRDP" -#define XRDP_NAME "XORGXRDP" -#define XRDP_VERSION 1000 - -#define PACKAGE_VERSION_MAJOR 1 -#define PACKAGE_VERSION_MINOR 0 -#define PACKAGE_VERSION_PATCHLEVEL 0 - static Bool g_initialised = FALSE; /*****************************************************************************/ @@ -95,7 +87,7 @@ xorgxrdpDownDown(ScreenPtr pScreen) static MODULESETUPPROTO(xorgxrdpSetup); static XF86ModuleVersionInfo RDPVersRec = { - XRDP_DRIVER_NAME, + XRDP_MODULE_NAME, MODULEVENDORSTRING, MODINFOSTRING1, MODINFOSTRING2, diff --git a/xorg/server/module/rdpSimd.c b/xorg/server/module/rdpSimd.c new file mode 100644 index 00000000..7215bf86 --- /dev/null +++ b/xorg/server/module/rdpSimd.c @@ -0,0 +1,138 @@ +/* +Copyright 2014 Jay Sorg + +Permission to use, copy, modify, distribute, and sell this software and its +documentation for any purpose is hereby granted without fee, provided that +the above copyright notice appear in all copies and that both that +copyright notice and this permission notice appear in supporting +documentation. + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +SIMD function asign + +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* this should be before all X11 .h files */ +#include <xorg-server.h> +#include <xorgVersion.h> + +/* all driver need this */ +#include <xf86.h> +#include <xf86_OSproc.h> + +#include "rdp.h" +#include "rdpXv.h" + +/* use simd, run time */ +int g_simd_use_accel = 1; + +/* use simd, compile time, if zero, g_simd_use_accel does not matter */ +#if !defined(SIMD_USE_ACCEL) +#define SIMD_USE_ACCEL 0 +#endif + +#if SIMD_USE_ACCEL +#if defined(__x86_64__) || defined(__AMD64__) || defined (_M_AMD64) +#include "amd64/funcs_amd64.h" +#elif defined(__x86__) || defined(_M_IX86) || defined(__i386__) +#include "x86/funcs_x86.h" +#endif +#endif + +#define LOG_LEVEL 1 +#define LLOGLN(_level, _args) \ + do { if (_level < LOG_LEVEL) { ErrorF _args ; ErrorF("\n"); } } while (0) + +/*****************************************************************************/ +Bool +rdpSimdInit(ScreenPtr pScreen, ScrnInfoPtr pScrn) +{ + rdpPtr dev; + + dev = XRDPPTR(pScrn); + /* assign functions */ + LLOGLN(0, ("rdpSimdInit: assigning yuv functions")); +#if SIMD_USE_ACCEL + if (g_simd_use_accel) + { +#if defined(__x86_64__) || defined(__AMD64__) || defined (_M_AMD64) + int ax, bx, cx, dx; + cpuid_amd64(1, 0, &ax, &bx, &cx, &dx); + LLOGLN(0, ("rdpSimdInit: cpuid ax 1 cx 0 return ax 0x%8.8x bx " + "0x%8.8x cx 0x%8.8x dx 0x%8.8x", ax, bx, cx, dx)); + if (dx & (1 << 26)) /* SSE 2 */ + { + dev->yv12_to_rgb32 = yv12_to_rgb32_amd64_sse2; + dev->i420_to_rgb32 = i420_to_rgb32_amd64_sse2; + dev->yuy2_to_rgb32 = yuy2_to_rgb32_amd64_sse2; + dev->uyvy_to_rgb32 = uyvy_to_rgb32_amd64_sse2; + LLOGLN(0, ("rdpSimdInit: sse2 amd64 yuv functions assigned")); + } + else + { + dev->yv12_to_rgb32 = YV12_to_RGB32; + dev->i420_to_rgb32 = I420_to_RGB32; + dev->yuy2_to_rgb32 = YUY2_to_RGB32; + dev->uyvy_to_rgb32 = UYVY_to_RGB32; + LLOGLN(0, ("rdpSimdInit: warning, c yuv functions assigned")); + } +#elif defined(__x86__) || defined(_M_IX86) || defined(__i386__) + int ax, bx, cx, dx; + cpuid_x86(1, 0, &ax, &bx, &cx, &dx); + LLOGLN(0, ("rdpSimdInit: cpuid ax 1 cx 0 return ax 0x%8.8x bx " + "0x%8.8x cx 0x%8.8x dx 0x%8.8x", ax, bx, cx, dx)); + if (dx & (1 << 26)) /* SSE 2 */ + { + dev->yv12_to_rgb32 = yv12_to_rgb32_x86_sse2; + dev->i420_to_rgb32 = i420_to_rgb32_x86_sse2; + dev->yuy2_to_rgb32 = yuy2_to_rgb32_x86_sse2; + dev->uyvy_to_rgb32 = uyvy_to_rgb32_x86_sse2; + LLOGLN(0, ("rdpSimdInit: sse2 x86 yuv functions assigned")); + } + else + { + dev->yv12_to_rgb32 = YV12_to_RGB32; + dev->i420_to_rgb32 = I420_to_RGB32; + dev->yuy2_to_rgb32 = YUY2_to_RGB32; + dev->uyvy_to_rgb32 = UYVY_to_RGB32; + LLOGLN(0, ("rdpSimdInit: warning, c yuv functions assigned")); + } +#else + dev->yv12_to_rgb32 = YV12_to_RGB32; + dev->i420_to_rgb32 = I420_to_RGB32; + dev->yuy2_to_rgb32 = YUY2_to_RGB32; + dev->uyvy_to_rgb32 = UYVY_to_RGB32; + LLOGLN(0, ("rdpSimdInit: warning, c yuv functions assigned")); +#endif + } + else + { + dev->yv12_to_rgb32 = YV12_to_RGB32; + dev->i420_to_rgb32 = I420_to_RGB32; + dev->yuy2_to_rgb32 = YUY2_to_RGB32; + dev->uyvy_to_rgb32 = UYVY_to_RGB32; + LLOGLN(0, ("rdpSimdInit: warning, c yuv functions assigned")); + } +#else + dev->yv12_to_rgb32 = YV12_to_RGB32; + dev->i420_to_rgb32 = I420_to_RGB32; + dev->yuy2_to_rgb32 = YUY2_to_RGB32; + dev->uyvy_to_rgb32 = UYVY_to_RGB32; + LLOGLN(0, ("rdpSimdInit: warning, c yuv functions assigned")); +#endif + return 1; +} + diff --git a/xorg/server/module/rdpSimd.h b/xorg/server/module/rdpSimd.h new file mode 100644 index 00000000..73bf1ba5 --- /dev/null +++ b/xorg/server/module/rdpSimd.h @@ -0,0 +1,34 @@ +/* +Copyright 2014 Jay Sorg + +Permission to use, copy, modify, distribute, and sell this software and its +documentation for any purpose is hereby granted without fee, provided that +the above copyright notice appear in all copies and that both that +copyright notice and this permission notice appear in supporting +documentation. + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +SIMD function asign + +*/ + +#ifndef __RDPSIMD_H +#define __RDPSIMD_H + +#include <xorg-server.h> +#include <xorgVersion.h> +#include <xf86.h> + +Bool +rdpSimdInit(ScreenPtr pScreen, ScrnInfoPtr pScrn); + +#endif diff --git a/xorg/server/module/rdpXv.c b/xorg/server/module/rdpXv.c new file mode 100644 index 00000000..1557f892 --- /dev/null +++ b/xorg/server/module/rdpXv.c @@ -0,0 +1,678 @@ +/* +Copyright 2014 Jay Sorg + +Permission to use, copy, modify, distribute, and sell this software and its +documentation for any purpose is hereby granted without fee, provided that +the above copyright notice appear in all copies and that both that +copyright notice and this permission notice appear in supporting +documentation. + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +XVideo + +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* this should be before all X11 .h files */ +#include <xorg-server.h> +#include <xorgVersion.h> + +/* all driver need this */ +#include <xf86.h> +#include <xf86_OSproc.h> + +#include <xf86xv.h> +#include <X11/extensions/Xv.h> +#include <fourcc.h> + +#include <fb.h> + +#include "rdp.h" +#include "rdpMisc.h" +#include "rdpReg.h" +#include "rdpClientCon.h" + +#define LOG_LEVEL 1 +#define LLOGLN(_level, _args) \ + do { if (_level < LOG_LEVEL) { ErrorF _args ; ErrorF("\n"); } } while (0) + +#define T_NUM_ENCODINGS 1 +static XF86VideoEncodingRec g_xrdpVidEncodings[T_NUM_ENCODINGS] = +{ { 0, "XV_IMAGE", 2046, 2046, { 1, 1 } } }; + +#define T_NUM_FORMATS 1 +static XF86VideoFormatRec g_xrdpVidFormats[T_NUM_FORMATS] = +{ { 0, TrueColor } }; + +/* YV12 + I420 + 12 bpp planar + YUV 4:2:0 8 bit Y plane followed by 8 bit 2x2 subsampled + U and V planes. */ + +/* YUY2 + UYVY + 16 bpp packed + YUV 4:2:2 Y sample at every pixel, U and V sampled at + every second pixel */ + +/* XVIMAGE_YV12 FOURCC_YV12 0x32315659 */ +/* XVIMAGE_I420 FOURCC_I420 0x30323449 */ +/* XVIMAGE_YUY2 FOURCC_YUY2 0x32595559 */ +/* XVIMAGE_UYVY FOURCC_UYVY 0x59565955 */ + +static XF86ImageRec g_xrdpVidImages[] = +{ XVIMAGE_YV12, XVIMAGE_I420, XVIMAGE_YUY2, XVIMAGE_UYVY }; + +#define T_MAX_PORTS 1 + +/*****************************************************************************/ +static int +xrdpVidPutVideo(ScrnInfoPtr pScrn, short vid_x, short vid_y, + short drw_x, short drw_y, short vid_w, short vid_h, + short drw_w, short drw_h, RegionPtr clipBoxes, + pointer data, DrawablePtr pDraw) +{ + LLOGLN(0, ("xrdpVidPutVideo:")); + return Success; +} + +/*****************************************************************************/ +static int +xrdpVidPutStill(ScrnInfoPtr pScrn, short vid_x, short vid_y, + short drw_x, short drw_y, short vid_w, short vid_h, + short drw_w, short drw_h, RegionPtr clipBoxes, + pointer data, DrawablePtr pDraw) +{ + LLOGLN(0, ("xrdpVidPutStill:")); + return Success; +} + +/*****************************************************************************/ +static int +xrdpVidGetVideo(ScrnInfoPtr pScrn, short vid_x, short vid_y, + short drw_x, short drw_y, short vid_w, short vid_h, + short drw_w, short drw_h, RegionPtr clipBoxes, + pointer data, DrawablePtr pDraw) +{ + LLOGLN(0, ("xrdpVidGetVideo:")); + return Success; +} + +/*****************************************************************************/ +static int +xrdpVidGetStill(ScrnInfoPtr pScrn, short vid_x, short vid_y, + short drw_x, short drw_y, short vid_w, short vid_h, + short drw_w, short drw_h, RegionPtr clipBoxes, + pointer data, DrawablePtr pDraw) +{ + LLOGLN(0, ("FBDevTIVidGetStill:")); + return Success; +} + +/*****************************************************************************/ +static void +xrdpVidStopVideo(ScrnInfoPtr pScrn, pointer data, Bool Cleanup) +{ + LLOGLN(0, ("xrdpVidStopVideo:")); +} + +/*****************************************************************************/ +static int +xrdpVidSetPortAttribute(ScrnInfoPtr pScrn, Atom attribute, + INT32 value, pointer data) +{ + LLOGLN(0, ("xrdpVidSetPortAttribute:")); + return Success; +} + +/*****************************************************************************/ +static int +xrdpVidGetPortAttribute(ScrnInfoPtr pScrn, Atom attribute, + INT32 *value, pointer data) +{ + LLOGLN(0, ("xrdpVidGetPortAttribute:")); + return Success; +} + +/*****************************************************************************/ +static void +xrdpVidQueryBestSize(ScrnInfoPtr pScrn, Bool motion, + short vid_w, short vid_h, short drw_w, short drw_h, + unsigned int *p_w, unsigned int *p_h, pointer data) +{ + LLOGLN(0, ("xrdpVidQueryBestSize:")); +} + +/*****************************************************************************/ +int +YV12_to_RGB32(unsigned char *yuvs, int width, int height, int *rgbs) +{ + int size_total; + int y; + int u; + int v; + int c; + int d; + int e; + int r; + int g; + int b; + int t; + int i; + int j; + + size_total = width * height; + for (j = 0; j < height; j++) + { + for (i = 0; i < width; i++) + { + y = yuvs[j * width + i]; + u = yuvs[(j / 2) * (width / 2) + (i / 2) + size_total]; + v = yuvs[(j / 2) * (width / 2) + (i / 2) + size_total + (size_total / 4)]; + c = y - 16; + d = u - 128; + e = v - 128; + t = (298 * c + 409 * e + 128) >> 8; + b = RDPCLAMP(t, 0, 255); + t = (298 * c - 100 * d - 208 * e + 128) >> 8; + g = RDPCLAMP(t, 0, 255); + t = (298 * c + 516 * d + 128) >> 8; + r = RDPCLAMP(t, 0, 255); + rgbs[j * width + i] = (r << 16) | (g << 8) | b; + } + } + return 0; +} + +/*****************************************************************************/ +int +I420_to_RGB32(unsigned char *yuvs, int width, int height, int *rgbs) +{ + int size_total; + int y; + int u; + int v; + int c; + int d; + int e; + int r; + int g; + int b; + int t; + int i; + int j; + + size_total = width * height; + for (j = 0; j < height; j++) + { + for (i = 0; i < width; i++) + { + y = yuvs[j * width + i]; + v = yuvs[(j / 2) * (width / 2) + (i / 2) + size_total]; + u = yuvs[(j / 2) * (width / 2) + (i / 2) + size_total + (size_total / 4)]; + c = y - 16; + d = u - 128; + e = v - 128; + t = (298 * c + 409 * e + 128) >> 8; + b = RDPCLAMP(t, 0, 255); + t = (298 * c - 100 * d - 208 * e + 128) >> 8; + g = RDPCLAMP(t, 0, 255); + t = (298 * c + 516 * d + 128) >> 8; + r = RDPCLAMP(t, 0, 255); + rgbs[j * width + i] = (r << 16) | (g << 8) | b; + } + } + return 0; +} + +/*****************************************************************************/ +int +YUY2_to_RGB32(unsigned char *yuvs, int width, int height, int *rgbs) +{ + int y1; + int y2; + int u; + int v; + int c; + int d; + int e; + int r; + int g; + int b; + int t; + int i; + int j; + + for (j = 0; j < height; j++) + { + for (i = 0; i < width; i++) + { + y1 = *(yuvs++); + v = *(yuvs++); + y2 = *(yuvs++); + u = *(yuvs++); + + c = y1 - 16; + d = u - 128; + e = v - 128; + t = (298 * c + 409 * e + 128) >> 8; + b = RDPCLAMP(t, 0, 255); + t = (298 * c - 100 * d - 208 * e + 128) >> 8; + g = RDPCLAMP(t, 0, 255); + t = (298 * c + 516 * d + 128) >> 8; + r = RDPCLAMP(t, 0, 255); + rgbs[j * width + i] = (r << 16) | (g << 8) | b; + + i++; + c = y2 - 16; + d = u - 128; + e = v - 128; + t = (298 * c + 409 * e + 128) >> 8; + b = RDPCLAMP(t, 0, 255); + t = (298 * c - 100 * d - 208 * e + 128) >> 8; + g = RDPCLAMP(t, 0, 255); + t = (298 * c + 516 * d + 128) >> 8; + r = RDPCLAMP(t, 0, 255); + rgbs[j * width + i] = (r << 16) | (g << 8) | b; + } + } + return 0; +} + +/*****************************************************************************/ +int +UYVY_to_RGB32(unsigned char *yuvs, int width, int height, int *rgbs) +{ + int y1; + int y2; + int u; + int v; + int c; + int d; + int e; + int r; + int g; + int b; + int t; + int i; + int j; + + for (j = 0; j < height; j++) + { + for (i = 0; i < width; i++) + { + v = *(yuvs++); + y1 = *(yuvs++); + u = *(yuvs++); + y2 = *(yuvs++); + + c = y1 - 16; + d = u - 128; + e = v - 128; + t = (298 * c + 409 * e + 128) >> 8; + b = RDPCLAMP(t, 0, 255); + t = (298 * c - 100 * d - 208 * e + 128) >> 8; + g = RDPCLAMP(t, 0, 255); + t = (298 * c + 516 * d + 128) >> 8; + r = RDPCLAMP(t, 0, 255); + rgbs[j * width + i] = (r << 16) | (g << 8) | b; + + i++; + c = y2 - 16; + d = u - 128; + e = v - 128; + t = (298 * c + 409 * e + 128) >> 8; + b = RDPCLAMP(t, 0, 255); + t = (298 * c - 100 * d - 208 * e + 128) >> 8; + g = RDPCLAMP(t, 0, 255); + t = (298 * c + 516 * d + 128) >> 8; + r = RDPCLAMP(t, 0, 255); + rgbs[j * width + i] = (r << 16) | (g << 8) | b; + } + } + return 0; +} + +#if 0 +/*****************************************************************************/ +static int +stretch_RGB32_RGB32(int *src, int src_width, int src_height, + int src_x, int src_y, int src_w, int src_h, + int *dst, int dst_w, int dst_h) +{ + int mwidth; + int mheight; + int index; + + mwidth = RDPMIN(src_width, dst_w); + mheight = RDPMIN(src_height, dst_h); + for (index = 0; index < mheight; index++) + { + g_memcpy(dst, src, mwidth * 4); + src += src_width; + dst += dst_w; + } + return 0; +} +#endif + +/*****************************************************************************/ +static int +stretch_RGB32_RGB32(int *src, int src_width, int src_height, + int src_x, int src_y, int src_w, int src_h, + int *dst, int dst_w, int dst_h) +{ + int index; + int jndex; + int lndex; + int last_lndex; + int oh; + int ih; + int ov; + int iv; + int pix; + int *src32; + int *dst32; + + LLOGLN(10, ("stretch_RGB32_RGB32: oh 0x%8.8x ov 0x%8.8x", oh, ov)); + oh = (src_w << 16) / dst_w; + ov = (src_h << 16) / dst_h; + iv = ov; + lndex = src_y; + last_lndex = -1; + for (index = 0; index < dst_h; index++) + { + if (lndex == last_lndex) + { + /* repeat line */ + dst32 = dst + index * dst_w; + src32 = dst32 - dst_w; + g_memcpy(dst32, src32, dst_w * 4); + } + else + { + ih = oh; + src32 = src + lndex * src_width + src_x; + pix = *src32; + dst32 = dst + index * dst_w; + for (jndex = 0; jndex < dst_w; jndex++) + { + *dst32 = pix; + while (ih > (1 << 16) - 1) + { + ih -= 1 << 16; + src32++; + } + pix = *src32; + ih += oh; + dst32++; + } + } + last_lndex = lndex; + while (iv > (1 << 16) - 1) + { + iv -= 1 << 16; + lndex++; + } + iv += ov; + + } + LLOGLN(10, ("stretch_RGB32_RGB32: out")); + return 0; +} + +/******************************************************************************/ +/* returns error */ +static CARD32 +rdpDeferredXvCleanup(OsTimerPtr timer, CARD32 now, pointer arg) +{ + rdpPtr dev; + + LLOGLN(0, ("rdpDeferredXvCleanup:")); + dev = (rdpPtr) arg; + dev->xv_timer_schedualed = 0; + dev->xv_data_bytes = 0; + g_free(dev->xv_data); + dev->xv_data = 0; + return 0; +} + +/*****************************************************************************/ +/* see hw/xfree86/common/xf86xv.c for info */ +static int +xrdpVidPutImage(ScrnInfoPtr pScrn, + short src_x, short src_y, short drw_x, short drw_y, + short src_w, short src_h, short drw_w, short drw_h, + int format, unsigned char* buf, + short width, short height, + Bool sync, RegionPtr clipBoxes, + pointer data, DrawablePtr dst) +{ + rdpPtr dev; + int *rgborg32; + int *rgbend32; + int index; + int error; + GCPtr tempGC; + + LLOGLN(10, ("xrdpVidPutImage: format 0x%8.8x", format)); + LLOGLN(10, ("xrdpVidPutImage: src_x %d srcy_y %d", src_x, src_y)); + dev = XRDPPTR(pScrn); + + if (dev->xv_timer_schedualed) + { + TimerCancel(dev->xv_timer); + dev->xv_timer = TimerSet(dev->xv_timer, 0, 2000, + rdpDeferredXvCleanup, dev); + } + else + { + dev->xv_timer_schedualed = 1; + dev->xv_timer = TimerSet(dev->xv_timer, 0, 2000, + rdpDeferredXvCleanup, dev); + } + + index = width * height * 4 + drw_w * drw_h * 4 + 64; + if (index > dev->xv_data_bytes) + { + g_free(dev->xv_data); + dev->xv_data = g_malloc(index, 0); + if (dev->xv_data == NULL) + { + LLOGLN(0, ("xrdpVidPutImage: memory alloc error")); + dev->xv_data_bytes = 0; + return Success; + } + dev->xv_data_bytes = index; + } + rgborg32 = (int *) RDPALIGN(dev->xv_data, 16); + rgbend32 = rgborg32 + width * height; + rgbend32 = (int *) RDPALIGN(rgbend32, 16); + error = 0; + switch (format) + { + case FOURCC_YV12: + LLOGLN(10, ("xrdpVidPutImage: FOURCC_YV12")); + error = dev->yv12_to_rgb32(buf, width, height, rgborg32); + break; + case FOURCC_I420: + LLOGLN(10, ("xrdpVidPutImage: FOURCC_I420")); + error = dev->i420_to_rgb32(buf, width, height, rgborg32); + break; + case FOURCC_YUY2: + LLOGLN(10, ("xrdpVidPutImage: FOURCC_YUY2")); + error = dev->yuy2_to_rgb32(buf, width, height, rgborg32); + break; + case FOURCC_UYVY: + LLOGLN(10, ("xrdpVidPutImage: FOURCC_UYVY")); + error = dev->uyvy_to_rgb32(buf, width, height, rgborg32); + break; + default: + LLOGLN(0, ("xrdpVidPutImage: unknown format 0x%8.8x", format)); + return Success; + } + if (error != 0) + { + return Success; + } + error = stretch_RGB32_RGB32(rgborg32, width, height, + src_x, src_y, src_w, src_h, + rgbend32, drw_w, drw_h); + if (error != 0) + { + return Success; + } + + tempGC = GetScratchGC(dst->depth, pScrn->pScreen); + if (tempGC != NULL) + { + ValidateGC(dst, tempGC); + (*tempGC->ops->PutImage)(dst, tempGC, 24, + drw_x - dst->x, drw_y - dst->y, + drw_w, drw_h, 0, ZPixmap, (char*)rgbend32); + FreeScratchGC(tempGC); + } + + return Success; +} + +/*****************************************************************************/ +static int +xrdpVidQueryImageAttributes(ScrnInfoPtr pScrn, int id, + unsigned short *w, unsigned short *h, + int *pitches, int *offsets) +{ + int size, tmp; + + LLOGLN(10, ("xrdpVidQueryImageAttributes:")); + /* this is same code as all drivers currently have */ + if (*w > 2046) + { + *w = 2046; + } + if (*h > 2046) + { + *h = 2046; + } + /* make w multiple of 4 so that resizing works properly */ + *w = (*w + 3) & ~3; + if (offsets != NULL) + { + offsets[0] = 0; + } + switch (id) + { + case FOURCC_YV12: + case FOURCC_I420: + /* make h be even */ + *h = (*h + 1) & ~1; + /* make w be multiple of 4 (ie. pad it) */ + size = (*w + 3) & ~3; + /* width of a Y row => width of image */ + if (pitches != NULL) + { + pitches[0] = size; + } + /* offset of U plane => w * h */ + size *= *h; + if (offsets != NULL) + { + offsets[1] = size; + } + /* width of U, V row => width / 2 */ + tmp = ((*w >> 1) + 3) & ~3; + if (pitches != NULL) + { + pitches[1] = pitches[2] = tmp; + } + /* offset of V => Y plane + U plane (w * h + w / 2 * h / 2) */ + tmp *= (*h >> 1); + size += tmp; + if (offsets != NULL) + { + offsets[2] = size; + } + size += tmp; + break; + case FOURCC_YUY2: + case FOURCC_UYVY: + size = (*w) * 2; + if (pitches != NULL) + { + pitches[0] = size; + } + size *= *h; + break; + default: + LLOGLN(0, ("xrdpVidQueryImageAttributes: Unsupported image")); + return 0; + } + LLOGLN(10, ("xrdpVidQueryImageAttributes: finished size %d id 0x%x", size, id)); + return size; +} + +/*****************************************************************************/ +Bool +rdpXvInit(ScreenPtr pScreen, ScrnInfoPtr pScrn) +{ + XF86VideoAdaptorPtr adaptor; + DevUnion* pDevUnion; + int bytes; + + adaptor = xf86XVAllocateVideoAdaptorRec(pScrn); + if (adaptor == 0) + { + LLOGLN(0, ("rdpXvInit: xf86XVAllocateVideoAdaptorRec failed")); + return 0; + } + adaptor->type = XvInputMask | XvImageMask | XvVideoMask | XvStillMask | XvWindowMask | XvPixmapMask; + //adaptor->flags = VIDEO_NO_CLIPPING; + //adaptor->flags = VIDEO_CLIP_TO_VIEWPORT; + adaptor->flags = 0; + adaptor->name = XRDP_MODULE_NAME " XVideo Adaptor"; + adaptor->nEncodings = T_NUM_ENCODINGS; + adaptor->pEncodings = &(g_xrdpVidEncodings[0]); + adaptor->nFormats = T_NUM_FORMATS; + adaptor->pFormats = &(g_xrdpVidFormats[0]); + adaptor->pFormats[0].depth = pScrn->depth; + LLOGLN(0, ("rdpXvInit: depth %d", pScrn->depth)); + adaptor->nImages = sizeof(g_xrdpVidImages) / sizeof(XF86ImageRec); + adaptor->pImages = g_xrdpVidImages; + adaptor->nAttributes = 0; + adaptor->pAttributes = 0; + adaptor->nPorts = T_MAX_PORTS; + bytes = sizeof(DevUnion) * T_MAX_PORTS; + pDevUnion = (DevUnion*) g_malloc(bytes, 1); + adaptor->pPortPrivates = pDevUnion; + adaptor->PutVideo = xrdpVidPutVideo; + adaptor->PutStill = xrdpVidPutStill; + adaptor->GetVideo = xrdpVidGetVideo; + adaptor->GetStill = xrdpVidGetStill; + adaptor->StopVideo = xrdpVidStopVideo; + adaptor->SetPortAttribute = xrdpVidSetPortAttribute; + adaptor->GetPortAttribute = xrdpVidGetPortAttribute; + adaptor->QueryBestSize = xrdpVidQueryBestSize; + adaptor->PutImage = xrdpVidPutImage; + adaptor->QueryImageAttributes = xrdpVidQueryImageAttributes; + if (!xf86XVScreenInit(pScreen, &adaptor, 1)) + { + LLOGLN(0, ("rdpXvInit: xf86XVScreenInit failed")); + return 0; + } + xf86XVFreeVideoAdaptorRec(adaptor); + return 1; +} + diff --git a/xorg/server/module/rdpXv.h b/xorg/server/module/rdpXv.h new file mode 100644 index 00000000..9cf28700 --- /dev/null +++ b/xorg/server/module/rdpXv.h @@ -0,0 +1,43 @@ +/* +Copyright 2014 Jay Sorg + +Permission to use, copy, modify, distribute, and sell this software and its +documentation for any purpose is hereby granted without fee, provided that +the above copyright notice appear in all copies and that both that +copyright notice and this permission notice appear in supporting +documentation. + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +XVideo + +*/ + +#ifndef __RDPXV_H +#define __RDPXV_H + +#include <xorg-server.h> +#include <xorgVersion.h> +#include <xf86.h> + +Bool +rdpXvInit(ScreenPtr pScreen, ScrnInfoPtr pScrn); + +int +YV12_to_RGB32(unsigned char *yuvs, int width, int height, int *rgbs); +int +I420_to_RGB32(unsigned char *yuvs, int width, int height, int *rgbs); +int +YUY2_to_RGB32(unsigned char *yuvs, int width, int height, int *rgbs); +int +UYVY_to_RGB32(unsigned char *yuvs, int width, int height, int *rgbs); + +#endif diff --git a/xorg/server/module/x86/cpuid_x86.asm b/xorg/server/module/x86/cpuid_x86.asm new file mode 100644 index 00000000..6f9e8c2d --- /dev/null +++ b/xorg/server/module/x86/cpuid_x86.asm @@ -0,0 +1,39 @@ + +SECTION .text + +%macro PROC 1 + align 16 + global %1 + %1: +%endmacro + +;int +;cpuid_x86(int eax_in, int ecx_in, int *eax, int *ebx, int *ecx, int *edx) + +PROC cpuid_x86 + ; save registers + push ebx + push ecx + push edx + push edi + ; cpuid + mov eax, [esp + 20] + mov ecx, [esp + 24] + cpuid + mov edi, [esp + 28] + mov [edi], eax + mov edi, [esp + 32] + mov [edi], ebx + mov edi, [esp + 36] + mov [edi], ecx + mov edi, [esp + 40] + mov [edi], edx + mov eax, 0 + ; restore registers + pop edi + pop edx + pop ecx + pop ebx + ret; + align 16 + diff --git a/xorg/server/module/x86/funcs_x86.h b/xorg/server/module/x86/funcs_x86.h new file mode 100644 index 00000000..00724e62 --- /dev/null +++ b/xorg/server/module/x86/funcs_x86.h @@ -0,0 +1,39 @@ +/* +Copyright 2014 Jay Sorg + +Permission to use, copy, modify, distribute, and sell this software and its +documentation for any purpose is hereby granted without fee, provided that +the above copyright notice appear in all copies and that both that +copyright notice and this permission notice appear in supporting +documentation. + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +x86 asm files + +*/ + +#ifndef __FUNCS_X86_H +#define __FUNCS_X86_H + +int +cpuid_x86(int eax_in, int ecx_in, int *eax, int *ebx, int *ecx, int *edx); +int +yv12_to_rgb32_x86_sse2(unsigned char *yuvs, int width, int height, int *rgbs); +int +i420_to_rgb32_x86_sse2(unsigned char *yuvs, int width, int height, int *rgbs); +int +yuy2_to_rgb32_x86_sse2(unsigned char *yuvs, int width, int height, int *rgbs); +int +uyvy_to_rgb32_x86_sse2(unsigned char *yuvs, int width, int height, int *rgbs); + +#endif + diff --git a/xorg/server/module/x86/i420_to_rgb32_x86_sse2.asm b/xorg/server/module/x86/i420_to_rgb32_x86_sse2.asm new file mode 100644 index 00000000..0c7a6e1e --- /dev/null +++ b/xorg/server/module/x86/i420_to_rgb32_x86_sse2.asm @@ -0,0 +1,243 @@ +; +;Copyright 2014 Jay Sorg +; +;Permission to use, copy, modify, distribute, and sell this software and its +;documentation for any purpose is hereby granted without fee, provided that +;the above copyright notice appear in all copies and that both that +;copyright notice and this permission notice appear in supporting +;documentation. +; +;The above copyright notice and this permission notice shall be included in +;all copies or substantial portions of the Software. +; +;THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +;IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +;FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +;OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +;AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +;CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +; +;I420 to RGB32 +;x86 SSE2 32 bit +; +; RGB to YUV +; 0.299 0.587 0.114 +; -0.14713 -0.28886 0.436 +; 0.615 -0.51499 -0.10001 +; YUV to RGB +; 1 0 1.13983 +; 1 -0.39465 -0.58060 +; 1 2.03211 0 +; shift left 12 +; 4096 0 4669 +; 4096 -1616 -2378 +; 4096 9324 0 + +SECTION .data +align 16 +c128 times 8 dw 128 +c4669 times 8 dw 4669 +c1616 times 8 dw 1616 +c2378 times 8 dw 2378 +c9324 times 8 dw 9324 + +SECTION .text + +%macro PROC 1 + align 16 + global %1 + %1: +%endmacro + +do8_uv: + + ; v + movd xmm1, [ebx] ; 4 at a time + lea ebx, [ebx + 4] + punpcklbw xmm1, xmm1 + pxor xmm6, xmm6 + punpcklbw xmm1, xmm6 + movdqa xmm7, [c128] + psubw xmm1, xmm7 + psllw xmm1, 4 + + ; u + movd xmm2, [edx] ; 4 at a time + lea edx, [edx + 4] + punpcklbw xmm2, xmm2 + punpcklbw xmm2, xmm6 + psubw xmm2, xmm7 + psllw xmm2, 4 + +do8: + + ; y + movq xmm0, [esi] ; 8 at a time + lea esi, [esi + 8] + pxor xmm6, xmm6 + punpcklbw xmm0, xmm6 + + ; r = y + hiword(4669 * (v << 4)) + movdqa xmm4, [c4669] + pmulhw xmm4, xmm1 + movdqa xmm3, xmm0 + paddw xmm3, xmm4 + + ; g = y - hiword(1616 * (u << 4)) - hiword(2378 * (v << 4)) + movdqa xmm5, [c1616] + pmulhw xmm5, xmm2 + movdqa xmm6, [c2378] + pmulhw xmm6, xmm1 + movdqa xmm4, xmm0 + psubw xmm4, xmm5 + psubw xmm4, xmm6 + + ; b = y + hiword(9324 * (u << 4)) + movdqa xmm6, [c9324] + pmulhw xmm6, xmm2 + movdqa xmm5, xmm0 + paddw xmm5, xmm6 + + packuswb xmm3, xmm3 ; b + packuswb xmm4, xmm4 ; g + punpcklbw xmm3, xmm4 ; gb + + pxor xmm4, xmm4 ; a + packuswb xmm5, xmm5 ; r + punpcklbw xmm5, xmm4 ; ar + + movdqa xmm4, xmm3 + punpcklwd xmm3, xmm5 ; argb + movdqa [edi], xmm3 + lea edi, [edi + 16] + punpckhwd xmm4, xmm5 ; argb + movdqa [edi], xmm4 + lea edi, [edi + 16] + + ret; + +;int +;i420_to_rgb32_x86_sse2(unsigned char *yuvs, int width, int height, int *rgbs) + +PROC i420_to_rgb32_x86_sse2 + push ebx + push esi + push edi + push ebp + + mov edi, [esp + 32] ; rgbs + + mov ecx, [esp + 24] ; width + mov edx, ecx + mov ebp, [esp + 28] ; height + mov eax, ebp + shr ebp, 1 + imul eax, ecx ; eax = width * height + + mov esi, [esp + 20] ; y + + mov ebx, esi ; u = y + width * height + add ebx, eax + + ; local vars + ; char* yptr1 + ; char* yptr2 + ; char* uptr + ; char* vptr + ; int* rgbs1 + ; int* rgbs2 + ; int width + sub esp, 28 ; local vars, 28 bytes + mov [esp + 0], esi ; save y1 + add esi, edx + mov [esp + 4], esi ; save y2 + mov [esp + 8], ebx ; save u + shr eax, 2 + add ebx, eax ; v = u + (width * height / 4) + mov [esp + 12], ebx ; save v + + mov [esp + 16], edi ; save rgbs1 + mov eax, edx + shl eax, 2 + add edi, eax + mov [esp + 20], edi ; save rgbs2 + +loop_y: + + mov ecx, edx ; width + shr ecx, 3 + + ; save edx + mov [esp + 24], edx + + ;prefetchnta 4096[esp + 0] ; y + ;prefetchnta 1024[esp + 8] ; u + ;prefetchnta 1024[esp + 12] ; v + +loop_x: + + mov esi, [esp + 0] ; y1 + mov ebx, [esp + 8] ; u + mov edx, [esp + 12] ; v + mov edi, [esp + 16] ; rgbs1 + + ; y1 + call do8_uv + + mov [esp + 0], esi ; y1 + mov [esp + 16], edi ; rgbs1 + + mov esi, [esp + 4] ; y2 + mov edi, [esp + 20] ; rgbs2 + + ; y2 + call do8 + + mov [esp + 4], esi ; y2 + mov [esp + 8], ebx ; u + mov [esp + 12], edx ; v + mov [esp + 20], edi ; rgbs2 + + dec ecx ; width + jnz loop_x + + ; restore edx + mov edx, [esp + 24] + + ; update y1 and 2 + mov eax, [esp + 0] + mov ebx, edx + add eax, ebx + mov [esp + 0], eax + + mov eax, [esp + 4] + add eax, ebx + mov [esp + 4], eax + + ; update rgb1 and 2 + mov eax, [esp + 16] + mov ebx, edx + shl ebx, 2 + add eax, ebx + mov [esp + 16], eax + + mov eax, [esp + 20] + add eax, ebx + mov [esp + 20], eax + + mov ecx, ebp + dec ecx ; height + mov ebp, ecx + jnz loop_y + + add esp, 28 + + mov eax, 0 + pop ebp + pop edi + pop esi + pop ebx + ret + align 16 + + diff --git a/xorg/server/module/x86/uyvy_to_rgb32_x86_sse2.asm b/xorg/server/module/x86/uyvy_to_rgb32_x86_sse2.asm new file mode 100644 index 00000000..d3ba81d3 --- /dev/null +++ b/xorg/server/module/x86/uyvy_to_rgb32_x86_sse2.asm @@ -0,0 +1,22 @@ + +%macro PROC 1 + align 16 + global %1 + %1: +%endmacro + +;int +;uyvy_to_rgb32_x86_sse2(unsigned char *yuvs, int width, int height, int *rgbs) + +PROC uyvy_to_rgb32_x86_sse2 + push ebx + push esi + push edi + + mov eax, 0 + pop edi + pop esi + pop ebx + ret + align 16 + diff --git a/xorg/server/module/x86/yuy2_to_rgb32_x86_sse2.asm b/xorg/server/module/x86/yuy2_to_rgb32_x86_sse2.asm new file mode 100644 index 00000000..da03e26f --- /dev/null +++ b/xorg/server/module/x86/yuy2_to_rgb32_x86_sse2.asm @@ -0,0 +1,22 @@ + +%macro PROC 1 + align 16 + global %1 + %1: +%endmacro + +;int +;yuy2_to_rgb32_x86_sse2(unsigned char *yuvs, int width, int height, int *rgbs) + +PROC yuy2_to_rgb32_x86_sse2 + push ebx + push esi + push edi + + mov eax, 0 + pop edi + pop esi + pop ebx + ret + align 16 + diff --git a/xorg/server/module/x86/yv12_to_rgb32_x86_sse2.asm b/xorg/server/module/x86/yv12_to_rgb32_x86_sse2.asm new file mode 100644 index 00000000..d50a65a2 --- /dev/null +++ b/xorg/server/module/x86/yv12_to_rgb32_x86_sse2.asm @@ -0,0 +1,243 @@ +; +;Copyright 2014 Jay Sorg +; +;Permission to use, copy, modify, distribute, and sell this software and its +;documentation for any purpose is hereby granted without fee, provided that +;the above copyright notice appear in all copies and that both that +;copyright notice and this permission notice appear in supporting +;documentation. +; +;The above copyright notice and this permission notice shall be included in +;all copies or substantial portions of the Software. +; +;THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +;IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +;FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +;OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +;AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +;CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +; +;YV12 to RGB32 +;x86 SSE2 32 bit +; +; RGB to YUV +; 0.299 0.587 0.114 +; -0.14713 -0.28886 0.436 +; 0.615 -0.51499 -0.10001 +; YUV to RGB +; 1 0 1.13983 +; 1 -0.39465 -0.58060 +; 1 2.03211 0 +; shift left 12 +; 4096 0 4669 +; 4096 -1616 -2378 +; 4096 9324 0 + +SECTION .data +align 16 +c128 times 8 dw 128 +c4669 times 8 dw 4669 +c1616 times 8 dw 1616 +c2378 times 8 dw 2378 +c9324 times 8 dw 9324 + +SECTION .text + +%macro PROC 1 + align 16 + global %1 + %1: +%endmacro + +do8_uv: + + ; u + movd xmm1, [ebx] ; 4 at a time + lea ebx, [ebx + 4] + punpcklbw xmm1, xmm1 + pxor xmm6, xmm6 + punpcklbw xmm1, xmm6 + movdqa xmm7, [c128] + psubw xmm1, xmm7 + psllw xmm1, 4 + + ; v + movd xmm2, [edx] ; 4 at a time + lea edx, [edx + 4] + punpcklbw xmm2, xmm2 + punpcklbw xmm2, xmm6 + psubw xmm2, xmm7 + psllw xmm2, 4 + +do8: + + ; y + movq xmm0, [esi] ; 8 at a time + lea esi, [esi + 8] + pxor xmm6, xmm6 + punpcklbw xmm0, xmm6 + + ; r = y + hiword(4669 * (v << 4)) + movdqa xmm4, [c4669] + pmulhw xmm4, xmm2 + movdqa xmm3, xmm0 + paddw xmm3, xmm4 + + ; g = y - hiword(1616 * (u << 4)) - hiword(2378 * (v << 4)) + movdqa xmm5, [c1616] + pmulhw xmm5, xmm1 + movdqa xmm6, [c2378] + pmulhw xmm6, xmm2 + movdqa xmm4, xmm0 + psubw xmm4, xmm5 + psubw xmm4, xmm6 + + ; b = y + hiword(9324 * (u << 4)) + movdqa xmm6, [c9324] + pmulhw xmm6, xmm1 + movdqa xmm5, xmm0 + paddw xmm5, xmm6 + + packuswb xmm3, xmm3 ; b + packuswb xmm4, xmm4 ; g + punpcklbw xmm3, xmm4 ; gb + + pxor xmm4, xmm4 ; a + packuswb xmm5, xmm5 ; r + punpcklbw xmm5, xmm4 ; ar + + movdqa xmm4, xmm3 + punpcklwd xmm3, xmm5 ; argb + movdqa [edi], xmm3 + lea edi, [edi + 16] + punpckhwd xmm4, xmm5 ; argb + movdqa [edi], xmm4 + lea edi, [edi + 16] + + ret; + +;int +;yv12_to_rgb32_x86_sse2(unsigned char *yuvs, int width, int height, int *rgbs) + +PROC yv12_to_rgb32_x86_sse2 + push ebx + push esi + push edi + push ebp + + mov edi, [esp + 32] ; rgbs + + mov ecx, [esp + 24] ; width + mov edx, ecx + mov ebp, [esp + 28] ; height + mov eax, ebp + shr ebp, 1 + imul eax, ecx ; eax = width * height + + mov esi, [esp + 20] ; y + + mov ebx, esi ; u = y + width * height + add ebx, eax + + ; local vars + ; char* yptr1 + ; char* yptr2 + ; char* uptr + ; char* vptr + ; int* rgbs1 + ; int* rgbs2 + ; int width + sub esp, 28 ; local vars, 28 bytes + mov [esp + 0], esi ; save y1 + add esi, edx + mov [esp + 4], esi ; save y2 + mov [esp + 8], ebx ; save u + shr eax, 2 + add ebx, eax ; v = u + (width * height / 4) + mov [esp + 12], ebx ; save v + + mov [esp + 16], edi ; save rgbs1 + mov eax, edx + shl eax, 2 + add edi, eax + mov [esp + 20], edi ; save rgbs2 + +loop_y: + + mov ecx, edx ; width + shr ecx, 3 + + ; save edx + mov [esp + 24], edx + + ;prefetchnta 4096[esp + 0] ; y + ;prefetchnta 1024[esp + 8] ; u + ;prefetchnta 1024[esp + 12] ; v + +loop_x: + + mov esi, [esp + 0] ; y1 + mov ebx, [esp + 8] ; u + mov edx, [esp + 12] ; v + mov edi, [esp + 16] ; rgbs1 + + ; y1 + call do8_uv + + mov [esp + 0], esi ; y1 + mov [esp + 16], edi ; rgbs1 + + mov esi, [esp + 4] ; y2 + mov edi, [esp + 20] ; rgbs2 + + ; y2 + call do8 + + mov [esp + 4], esi ; y2 + mov [esp + 8], ebx ; u + mov [esp + 12], edx ; v + mov [esp + 20], edi ; rgbs2 + + dec ecx ; width + jnz loop_x + + ; restore edx + mov edx, [esp + 24] + + ; update y1 and 2 + mov eax, [esp + 0] + mov ebx, edx + add eax, ebx + mov [esp + 0], eax + + mov eax, [esp + 4] + add eax, ebx + mov [esp + 4], eax + + ; update rgb1 and 2 + mov eax, [esp + 16] + mov ebx, edx + shl ebx, 2 + add eax, ebx + mov [esp + 16], eax + + mov eax, [esp + 20] + add eax, ebx + mov [esp + 20], eax + + mov ecx, ebp + dec ecx ; height + mov ebp, ecx + jnz loop_y + + add esp, 28 + + mov eax, 0 + pop ebp + pop edi + pop esi + pop ebx + ret + align 16 + + |