summaryrefslogtreecommitdiffstats
path: root/xorg/server/module
diff options
context:
space:
mode:
Diffstat (limited to 'xorg/server/module')
-rw-r--r--xorg/server/module/Makefile38
-rw-r--r--xorg/server/module/amd64/cpuid_amd64.asm41
-rw-r--r--xorg/server/module/amd64/funcs_amd64.h39
-rw-r--r--xorg/server/module/amd64/i420_to_rgb32_amd64_sse2.asm248
-rw-r--r--xorg/server/module/amd64/uyvy_to_rgb32_amd64_sse2.asm17
-rw-r--r--xorg/server/module/amd64/yuy2_to_rgb32_amd64_sse2.asm17
-rw-r--r--xorg/server/module/amd64/yv12_to_rgb32_amd64_sse2.asm248
-rw-r--r--xorg/server/module/rdp.h119
-rw-r--r--xorg/server/module/rdpCapture.c663
-rw-r--r--xorg/server/module/rdpCapture.h6
-rw-r--r--xorg/server/module/rdpClientCon.c167
-rw-r--r--xorg/server/module/rdpClientCon.h7
-rw-r--r--xorg/server/module/rdpCursor.c8
-rw-r--r--xorg/server/module/rdpMain.c10
-rw-r--r--xorg/server/module/rdpSimd.c138
-rw-r--r--xorg/server/module/rdpSimd.h34
-rw-r--r--xorg/server/module/rdpXv.c678
-rw-r--r--xorg/server/module/rdpXv.h43
-rw-r--r--xorg/server/module/x86/cpuid_x86.asm39
-rw-r--r--xorg/server/module/x86/funcs_x86.h39
-rw-r--r--xorg/server/module/x86/i420_to_rgb32_x86_sse2.asm243
-rw-r--r--xorg/server/module/x86/uyvy_to_rgb32_x86_sse2.asm22
-rw-r--r--xorg/server/module/x86/yuy2_to_rgb32_x86_sse2.asm22
-rw-r--r--xorg/server/module/x86/yv12_to_rgb32_x86_sse2.asm243
24 files changed, 3007 insertions, 122 deletions
diff --git a/xorg/server/module/Makefile b/xorg/server/module/Makefile
index 9003de4d..8f1560d6 100644
--- a/xorg/server/module/Makefile
+++ b/xorg/server/module/Makefile
@@ -6,11 +6,16 @@ rdpPolyFillArc.o rdpPolyText8.o rdpPolyText16.o rdpImageText8.o \
rdpImageText16.o rdpImageGlyphBlt.o rdpPolyGlyphBlt.o rdpPushPixels.o \
rdpCursor.o rdpMain.o rdpRandR.o rdpMisc.o rdpReg.o \
rdpComposite.o rdpGlyphs.o rdpPixmap.o rdpInput.o rdpClientCon.o rdpCapture.o \
-rdpTrapezoids.o
+rdpTrapezoids.o rdpXv.o rdpSimd.o
+
+;OBJS += cpuid_x86.o i420_to_rgb32_x86_sse2.o yv12_to_rgb32_x86_sse2.o yuy2_to_rgb32_x86_sse2.o uyvy_to_rgb32_x86_sse2.o
+;OBJS += cpuid_amd64.o i420_to_rgb32_amd64_sse2.o yv12_to_rgb32_amd64_sse2.o yuy2_to_rgb32_amd64_sse2.o uyvy_to_rgb32_amd64_sse2.o
CFLAGS = -g -O2 -Wall -fPIC -I/usr/include/xorg -I/usr/include/pixman-1 \
-I../../../common
+;CFLAGS += -DSIMD_USE_ACCEL=1
+
LDFLAGS =
LIBS =
@@ -22,3 +27,34 @@ libxorgxrdp.so: $(OBJS) Makefile
clean:
rm -f $(OBJS) libxorgxrdp.so
+
+cpuid_x86.o: x86/cpuid_x86.asm
+ yasm -f elf32 -g dwarf2 x86/cpuid_x86.asm
+
+i420_to_rgb32_x86_sse2.o: x86/i420_to_rgb32_x86_sse2.asm
+ yasm -f elf32 -g dwarf2 x86/i420_to_rgb32_x86_sse2.asm
+
+yv12_to_rgb32_x86_sse2.o: x86/yv12_to_rgb32_x86_sse2.asm
+ yasm -f elf32 -g dwarf2 x86/yv12_to_rgb32_x86_sse2.asm
+
+yuy2_to_rgb32_x86_sse2.o: x86/yuy2_to_rgb32_x86_sse2.asm
+ yasm -f elf32 -g dwarf2 x86/yuy2_to_rgb32_x86_sse2.asm
+
+uyvy_to_rgb32_x86_sse2.o: x86/uyvy_to_rgb32_x86_sse2.asm
+ yasm -f elf32 -g dwarf2 x86/uyvy_to_rgb32_x86_sse2.asm
+
+cpuid_amd64.o: amd64/cpuid_amd64.asm
+ yasm -f elf64 -g dwarf2 amd64/cpuid_amd64.asm
+
+i420_to_rgb32_amd64_sse2.o: amd64/i420_to_rgb32_amd64_sse2.asm
+ yasm -f elf64 -g dwarf2 amd64/i420_to_rgb32_amd64_sse2.asm
+
+yv12_to_rgb32_amd64_sse2.o: amd64/yv12_to_rgb32_amd64_sse2.asm
+ yasm -f elf64 -g dwarf2 amd64/yv12_to_rgb32_amd64_sse2.asm
+
+yuy2_to_rgb32_amd64_sse2.o: amd64/yuy2_to_rgb32_amd64_sse2.asm
+ yasm -f elf64 -g dwarf2 amd64/yuy2_to_rgb32_amd64_sse2.asm
+
+uyvy_to_rgb32_amd64_sse2.o: amd64/uyvy_to_rgb32_amd64_sse2.asm
+ yasm -f elf64 -g dwarf2 amd64/uyvy_to_rgb32_amd64_sse2.asm
+
diff --git a/xorg/server/module/amd64/cpuid_amd64.asm b/xorg/server/module/amd64/cpuid_amd64.asm
new file mode 100644
index 00000000..b97937ad
--- /dev/null
+++ b/xorg/server/module/amd64/cpuid_amd64.asm
@@ -0,0 +1,41 @@
+
+SECTION .text
+
+%macro PROC 1
+ align 16
+ global %1
+ %1:
+%endmacro
+
+;The first six integer or pointer arguments are passed in registers
+;RDI, RSI, RDX, RCX, R8, and R9
+
+;int
+;cpuid_amd64(int eax_in, int ecx_in, int *eax, int *ebx, int *ecx, int *edx)
+
+PROC cpuid_amd64
+ ; save registers
+ push rbx
+
+ push rdx
+ push rcx
+ push r8
+ push r9
+
+ mov rax, rdi
+ mov rcx, rsi
+ cpuid
+ pop rdi
+ mov [rdi], edx
+ pop rdi
+ mov [rdi], ecx
+ pop rdi
+ mov [rdi], ebx
+ pop rdi
+ mov [rdi], eax
+ mov eax, 0
+ ; restore registers
+ pop rbx
+ ret;
+ align 16
+
diff --git a/xorg/server/module/amd64/funcs_amd64.h b/xorg/server/module/amd64/funcs_amd64.h
new file mode 100644
index 00000000..10cffe0d
--- /dev/null
+++ b/xorg/server/module/amd64/funcs_amd64.h
@@ -0,0 +1,39 @@
+/*
+Copyright 2014 Jay Sorg
+
+Permission to use, copy, modify, distribute, and sell this software and its
+documentation for any purpose is hereby granted without fee, provided that
+the above copyright notice appear in all copies and that both that
+copyright notice and this permission notice appear in supporting
+documentation.
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+amd64 asm functions
+
+*/
+
+#ifndef __FUNCS_AMD64_H
+#define __FUNCS_AMD64_H
+
+int
+cpuid_amd64(int eax_in, int ecx_in, int *eax, int *ebx, int *ecx, int *edx);
+int
+yv12_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
+int
+i420_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
+int
+yuy2_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
+int
+uyvy_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
+
+#endif
+
diff --git a/xorg/server/module/amd64/i420_to_rgb32_amd64_sse2.asm b/xorg/server/module/amd64/i420_to_rgb32_amd64_sse2.asm
new file mode 100644
index 00000000..d9760caa
--- /dev/null
+++ b/xorg/server/module/amd64/i420_to_rgb32_amd64_sse2.asm
@@ -0,0 +1,248 @@
+;
+;Copyright 2014 Jay Sorg
+;
+;Permission to use, copy, modify, distribute, and sell this software and its
+;documentation for any purpose is hereby granted without fee, provided that
+;the above copyright notice appear in all copies and that both that
+;copyright notice and this permission notice appear in supporting
+;documentation.
+;
+;The above copyright notice and this permission notice shall be included in
+;all copies or substantial portions of the Software.
+;
+;THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+;IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+;FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+;OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+;AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+;CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+;
+;I420 to RGB32
+;amd64 SSE2 32 bit
+;
+; RGB to YUV
+; 0.299 0.587 0.114
+; -0.14713 -0.28886 0.436
+; 0.615 -0.51499 -0.10001
+; YUV to RGB
+; 1 0 1.13983
+; 1 -0.39465 -0.58060
+; 1 2.03211 0
+; shift left 12
+; 4096 0 4669
+; 4096 -1616 -2378
+; 4096 9324 0
+
+SECTION .data
+align 16
+c128 times 8 dw 128
+c4669 times 8 dw 4669
+c1616 times 8 dw 1616
+c2378 times 8 dw 2378
+c9324 times 8 dw 9324
+
+SECTION .text
+
+%macro PROC 1
+ align 16
+ global %1
+ %1:
+%endmacro
+
+do8_uv:
+
+ ; v
+ movd xmm1, [rbx] ; 4 at a time
+ lea rbx, [rbx + 4]
+ punpcklbw xmm1, xmm1
+ pxor xmm6, xmm6
+ punpcklbw xmm1, xmm6
+ movdqa xmm7, [rel c128]
+ psubw xmm1, xmm7
+ psllw xmm1, 4
+
+ ; v
+ movd xmm2, [rdx] ; 4 at a time
+ lea rdx, [rdx + 4]
+ punpcklbw xmm2, xmm2
+ punpcklbw xmm2, xmm6
+ psubw xmm2, xmm7
+ psllw xmm2, 4
+
+do8:
+
+ ; y
+ movq xmm0, [rsi] ; 8 at a time
+ lea rsi, [rsi + 8]
+ pxor xmm6, xmm6
+ punpcklbw xmm0, xmm6
+
+ ; r = y + hiword(4669 * (v << 4))
+ movdqa xmm4, [rel c4669]
+ pmulhw xmm4, xmm1
+ movdqa xmm3, xmm0
+ paddw xmm3, xmm4
+
+ ; g = y - hiword(1616 * (u << 4)) - hiword(2378 * (v << 4))
+ movdqa xmm5, [rel c1616]
+ pmulhw xmm5, xmm2
+ movdqa xmm6, [rel c2378]
+ pmulhw xmm6, xmm1
+ movdqa xmm4, xmm0
+ psubw xmm4, xmm5
+ psubw xmm4, xmm6
+
+ ; b = y + hiword(9324 * (u << 4))
+ movdqa xmm6, [rel c9324]
+ pmulhw xmm6, xmm2
+ movdqa xmm5, xmm0
+ paddw xmm5, xmm6
+
+ packuswb xmm3, xmm3 ; b
+ packuswb xmm4, xmm4 ; g
+ punpcklbw xmm3, xmm4 ; gb
+
+ pxor xmm4, xmm4 ; a
+ packuswb xmm5, xmm5 ; r
+ punpcklbw xmm5, xmm4 ; ar
+
+ movdqa xmm4, xmm3
+ punpcklwd xmm3, xmm5 ; argb
+ movdqa [rdi], xmm3
+ lea rdi, [rdi + 16]
+ punpckhwd xmm4, xmm5 ; argb
+ movdqa [rdi], xmm4
+ lea rdi, [rdi + 16]
+
+ ret;
+
+;The first six integer or pointer arguments are passed in registers
+; RDI, RSI, RDX, RCX, R8, and R9
+
+;int
+;i420_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs)
+
+PROC i420_to_rgb32_amd64_sse2
+ push rbx
+ push rsi
+ push rdi
+ push rbp
+
+ push rdi
+ push rdx
+ mov rdi, rcx ; rgbs
+
+ mov rcx, rsi ; width
+ mov rdx, rcx
+ pop rbp ; height
+ mov rax, rbp
+ shr rbp, 1
+ imul rax, rcx ; rax = width * height
+
+ pop rsi ; y
+
+ mov rbx, rsi ; u = y + width * height
+ add rbx, rax
+
+ ; local vars
+ ; char* yptr1
+ ; char* yptr2
+ ; char* uptr
+ ; char* vptr
+ ; int* rgbs1
+ ; int* rgbs2
+ ; int width
+ sub rsp, 56 ; local vars, 56 bytes
+ mov [rsp + 0], rsi ; save y1
+ add rsi, rdx
+ mov [rsp + 8], rsi ; save y2
+ mov [rsp + 16], rbx ; save u
+ shr rax, 2
+ add rbx, rax ; v = u + (width * height / 4)
+ mov [rsp + 24], rbx ; save v
+
+ mov [rsp + 32], rdi ; save rgbs1
+ mov rax, rdx
+ shl rax, 2
+ add rdi, rax
+ mov [rsp + 40], rdi ; save rgbs2
+
+loop_y:
+
+ mov rcx, rdx ; width
+ shr rcx, 3
+
+ ; save rdx
+ mov [rsp + 48], rdx
+
+ ;prefetchnta 4096[rsp + 0] ; y
+ ;prefetchnta 1024[rsp + 16] ; u
+ ;prefetchnta 1024[rsp + 24] ; v
+
+loop_x:
+
+ mov rsi, [rsp + 0] ; y1
+ mov rbx, [rsp + 16] ; u
+ mov rdx, [rsp + 24] ; v
+ mov rdi, [rsp + 32] ; rgbs1
+
+ ; y1
+ call do8_uv
+
+ mov [rsp + 0], rsi ; y1
+ mov [rsp + 32], rdi ; rgbs1
+
+ mov rsi, [rsp + 8] ; y2
+ mov rdi, [rsp + 40] ; rgbs2
+
+ ; y2
+ call do8
+
+ mov [rsp + 8], rsi ; y2
+ mov [rsp + 16], rbx ; u
+ mov [rsp + 24], rdx ; v
+ mov [rsp + 40], rdi ; rgbs2
+
+ dec rcx ; width
+ jnz loop_x
+
+ ; restore rdx
+ mov rdx, [rsp + 48]
+
+ ; update y1 and 2
+ mov rax, [rsp + 0]
+ mov rbx, rdx
+ add rax, rbx
+ mov [rsp + 0], rax
+
+ mov rax, [rsp + 8]
+ add rax, rbx
+ mov [rsp + 8], rax
+
+ ; update rgb1 and 2
+ mov rax, [rsp + 32]
+ mov rbx, rdx
+ shl rbx, 2
+ add rax, rbx
+ mov [rsp + 32], rax
+
+ mov rax, [rsp + 40]
+ add rax, rbx
+ mov [rsp + 40], rax
+
+ mov rcx, rbp
+ dec rcx ; height
+ mov rbp, rcx
+ jnz loop_y
+
+ add rsp, 56
+
+ mov rax, 0
+ pop rbp
+ pop rdi
+ pop rsi
+ pop rbx
+ ret
+ align 16
+
+
diff --git a/xorg/server/module/amd64/uyvy_to_rgb32_amd64_sse2.asm b/xorg/server/module/amd64/uyvy_to_rgb32_amd64_sse2.asm
new file mode 100644
index 00000000..8866fd0f
--- /dev/null
+++ b/xorg/server/module/amd64/uyvy_to_rgb32_amd64_sse2.asm
@@ -0,0 +1,17 @@
+
+%macro PROC 1
+ align 16
+ global %1
+ %1:
+%endmacro
+
+;int
+;uyvy_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs)
+
+PROC uyvy_to_rgb32_amd64_sse2
+ push rbx
+ mov rax, 0
+ pop rbx
+ ret
+ align 16
+
diff --git a/xorg/server/module/amd64/yuy2_to_rgb32_amd64_sse2.asm b/xorg/server/module/amd64/yuy2_to_rgb32_amd64_sse2.asm
new file mode 100644
index 00000000..c0ac5c1b
--- /dev/null
+++ b/xorg/server/module/amd64/yuy2_to_rgb32_amd64_sse2.asm
@@ -0,0 +1,17 @@
+
+%macro PROC 1
+ align 16
+ global %1
+ %1:
+%endmacro
+
+;int
+;yuy2_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs)
+
+PROC yuy2_to_rgb32_amd64_sse2
+ push rbx
+ mov rax, 0
+ pop rbx
+ ret
+ align 16
+
diff --git a/xorg/server/module/amd64/yv12_to_rgb32_amd64_sse2.asm b/xorg/server/module/amd64/yv12_to_rgb32_amd64_sse2.asm
new file mode 100644
index 00000000..13e46878
--- /dev/null
+++ b/xorg/server/module/amd64/yv12_to_rgb32_amd64_sse2.asm
@@ -0,0 +1,248 @@
+;
+;Copyright 2014 Jay Sorg
+;
+;Permission to use, copy, modify, distribute, and sell this software and its
+;documentation for any purpose is hereby granted without fee, provided that
+;the above copyright notice appear in all copies and that both that
+;copyright notice and this permission notice appear in supporting
+;documentation.
+;
+;The above copyright notice and this permission notice shall be included in
+;all copies or substantial portions of the Software.
+;
+;THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+;IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+;FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+;OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+;AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+;CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+;
+;YV12 to RGB32
+;amd64 SSE2 32 bit
+;
+; RGB to YUV
+; 0.299 0.587 0.114
+; -0.14713 -0.28886 0.436
+; 0.615 -0.51499 -0.10001
+; YUV to RGB
+; 1 0 1.13983
+; 1 -0.39465 -0.58060
+; 1 2.03211 0
+; shift left 12
+; 4096 0 4669
+; 4096 -1616 -2378
+; 4096 9324 0
+
+SECTION .data
+align 16
+c128 times 8 dw 128
+c4669 times 8 dw 4669
+c1616 times 8 dw 1616
+c2378 times 8 dw 2378
+c9324 times 8 dw 9324
+
+SECTION .text
+
+%macro PROC 1
+ align 16
+ global %1
+ %1:
+%endmacro
+
+do8_uv:
+
+ ; u
+ movd xmm1, [rbx] ; 4 at a time
+ lea rbx, [rbx + 4]
+ punpcklbw xmm1, xmm1
+ pxor xmm6, xmm6
+ punpcklbw xmm1, xmm6
+ movdqa xmm7, [rel c128]
+ psubw xmm1, xmm7
+ psllw xmm1, 4
+
+ ; v
+ movd xmm2, [rdx] ; 4 at a time
+ lea rdx, [rdx + 4]
+ punpcklbw xmm2, xmm2
+ punpcklbw xmm2, xmm6
+ psubw xmm2, xmm7
+ psllw xmm2, 4
+
+do8:
+
+ ; y
+ movq xmm0, [rsi] ; 8 at a time
+ lea rsi, [rsi + 8]
+ pxor xmm6, xmm6
+ punpcklbw xmm0, xmm6
+
+ ; r = y + hiword(4669 * (v << 4))
+ movdqa xmm4, [rel c4669]
+ pmulhw xmm4, xmm2
+ movdqa xmm3, xmm0
+ paddw xmm3, xmm4
+
+ ; g = y - hiword(1616 * (u << 4)) - hiword(2378 * (v << 4))
+ movdqa xmm5, [rel c1616]
+ pmulhw xmm5, xmm1
+ movdqa xmm6, [rel c2378]
+ pmulhw xmm6, xmm2
+ movdqa xmm4, xmm0
+ psubw xmm4, xmm5
+ psubw xmm4, xmm6
+
+ ; b = y + hiword(9324 * (u << 4))
+ movdqa xmm6, [rel c9324]
+ pmulhw xmm6, xmm1
+ movdqa xmm5, xmm0
+ paddw xmm5, xmm6
+
+ packuswb xmm3, xmm3 ; b
+ packuswb xmm4, xmm4 ; g
+ punpcklbw xmm3, xmm4 ; gb
+
+ pxor xmm4, xmm4 ; a
+ packuswb xmm5, xmm5 ; r
+ punpcklbw xmm5, xmm4 ; ar
+
+ movdqa xmm4, xmm3
+ punpcklwd xmm3, xmm5 ; argb
+ movdqa [rdi], xmm3
+ lea rdi, [rdi + 16]
+ punpckhwd xmm4, xmm5 ; argb
+ movdqa [rdi], xmm4
+ lea rdi, [rdi + 16]
+
+ ret;
+
+;The first six integer or pointer arguments are passed in registers
+; RDI, RSI, RDX, RCX, R8, and R9
+
+;int
+;yv12_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs)
+
+PROC yv12_to_rgb32_amd64_sse2
+ push rbx
+ push rsi
+ push rdi
+ push rbp
+
+ push rdi
+ push rdx
+ mov rdi, rcx ; rgbs
+
+ mov rcx, rsi ; width
+ mov rdx, rcx
+ pop rbp ; height
+ mov rax, rbp
+ shr rbp, 1
+ imul rax, rcx ; rax = width * height
+
+ pop rsi ; y
+
+ mov rbx, rsi ; u = y + width * height
+ add rbx, rax
+
+ ; local vars
+ ; char* yptr1
+ ; char* yptr2
+ ; char* uptr
+ ; char* vptr
+ ; int* rgbs1
+ ; int* rgbs2
+ ; int width
+ sub rsp, 56 ; local vars, 56 bytes
+ mov [rsp + 0], rsi ; save y1
+ add rsi, rdx
+ mov [rsp + 8], rsi ; save y2
+ mov [rsp + 16], rbx ; save u
+ shr rax, 2
+ add rbx, rax ; v = u + (width * height / 4)
+ mov [rsp + 24], rbx ; save v
+
+ mov [rsp + 32], rdi ; save rgbs1
+ mov rax, rdx
+ shl rax, 2
+ add rdi, rax
+ mov [rsp + 40], rdi ; save rgbs2
+
+loop_y:
+
+ mov rcx, rdx ; width
+ shr rcx, 3
+
+ ; save rdx
+ mov [rsp + 48], rdx
+
+ ;prefetchnta 4096[rsp + 0] ; y
+ ;prefetchnta 1024[rsp + 16] ; u
+ ;prefetchnta 1024[rsp + 24] ; v
+
+loop_x:
+
+ mov rsi, [rsp + 0] ; y1
+ mov rbx, [rsp + 16] ; u
+ mov rdx, [rsp + 24] ; v
+ mov rdi, [rsp + 32] ; rgbs1
+
+ ; y1
+ call do8_uv
+
+ mov [rsp + 0], rsi ; y1
+ mov [rsp + 32], rdi ; rgbs1
+
+ mov rsi, [rsp + 8] ; y2
+ mov rdi, [rsp + 40] ; rgbs2
+
+ ; y2
+ call do8
+
+ mov [rsp + 8], rsi ; y2
+ mov [rsp + 16], rbx ; u
+ mov [rsp + 24], rdx ; v
+ mov [rsp + 40], rdi ; rgbs2
+
+ dec rcx ; width
+ jnz loop_x
+
+ ; restore rdx
+ mov rdx, [rsp + 48]
+
+ ; update y1 and 2
+ mov rax, [rsp + 0]
+ mov rbx, rdx
+ add rax, rbx
+ mov [rsp + 0], rax
+
+ mov rax, [rsp + 8]
+ add rax, rbx
+ mov [rsp + 8], rax
+
+ ; update rgb1 and 2
+ mov rax, [rsp + 32]
+ mov rbx, rdx
+ shl rbx, 2
+ add rax, rbx
+ mov [rsp + 32], rax
+
+ mov rax, [rsp + 40]
+ add rax, rbx
+ mov [rsp + 40], rax
+
+ mov rcx, rbp
+ dec rcx ; height
+ mov rbp, rcx
+ jnz loop_y
+
+ add rsp, 56
+
+ mov rax, 0
+ pop rbp
+ pop rdi
+ pop rsi
+ pop rbx
+ ret
+ align 16
+
+
diff --git a/xorg/server/module/rdp.h b/xorg/server/module/rdp.h
index ba1bcfd0..8a4d58c4 100644
--- a/xorg/server/module/rdp.h
+++ b/xorg/server/module/rdp.h
@@ -30,9 +30,46 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "rdpPri.h"
+#define XRDP_MODULE_NAME "XRDPMOD"
+#define XRDP_DRIVER_NAME "XRDPDEV"
+#define XRDP_MOUSE_NAME "XRDPMOUSE"
+#define XRDP_KEYB_NAME "XRDPKEYB"
+#define XRDP_VERSION 1000
+
+#define PACKAGE_VERSION_MAJOR 1
+#define PACKAGE_VERSION_MINOR 0
+#define PACKAGE_VERSION_PATCHLEVEL 0
+
+#define COLOR8(r, g, b) \
+ ((((r) >> 5) << 0) | (((g) >> 5) << 3) | (((b) >> 6) << 6))
+#define COLOR15(r, g, b) \
+ ((((r) >> 3) << 10) | (((g) >> 3) << 5) | (((b) >> 3) << 0))
+#define COLOR16(r, g, b) \
+ ((((r) >> 3) << 11) | (((g) >> 2) << 5) | (((b) >> 3) << 0))
+#define COLOR24(r, g, b) \
+ ((((r) >> 0) << 0) | (((g) >> 0) << 8) | (((b) >> 0) << 16))
+#define SPLITCOLOR32(r, g, b, c) \
+ do { \
+ r = ((c) >> 16) & 0xff; \
+ g = ((c) >> 8) & 0xff; \
+ b = (c) & 0xff; \
+ } while (0)
+
+/* PIXMAN_a8b8g8r8 */
+#define XRDP_a8b8g8r8 \
+((32 << 24) | (3 << 16) | (8 << 12) | (8 << 8) | (8 << 4) | 8)
/* PIXMAN_a8r8g8b8 */
#define XRDP_a8r8g8b8 \
-((32 << 24) | (2 << 16) | (8 << 12) | (8 << 8) | (8 << 4) | 8)
+((32 << 24) | (2 << 16) | (8 << 12) | (8 << 8) | (8 << 4) | 8)
+/* PIXMAN_r5g6b5 */
+#define XRDP_r5g6b5 \
+((16 << 24) | (2 << 16) | (0 << 12) | (5 << 8) | (6 << 4) | 5)
+/* PIXMAN_a1r5g5b5 */
+#define XRDP_a1r5g5b5 \
+((16 << 24) | (2 << 16) | (1 << 12) | (5 << 8) | (5 << 4) | 5)
+/* PIXMAN_r3g3b2 */
+#define XRDP_r3g3b2 \
+((8 << 24) | (2 << 16) | (0 << 12) | (3 << 8) | (3 << 4) | 2)
#define PixelDPI 100
#define PixelToMM(_size) (((_size) * 254 + (PixelDPI) * 5) / ((PixelDPI) * 10))
@@ -40,7 +77,8 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define RDPMIN(_val1, _val2) ((_val1) < (_val2) ? (_val1) : (_val2))
#define RDPMAX(_val1, _val2) ((_val1) < (_val2) ? (_val2) : (_val1))
#define RDPCLAMP(_val, _lo, _hi) \
- (_val) < (_lo) ? (_lo) : (_val) > (_hi) ? (_hi) : (_val)
+ (_val) < (_lo) ? (_lo) : (_val) > (_hi) ? (_hi) : (_val)
+#define RDPALIGN(_val, _al) ((((long)(_val)) + ((_al) - 1)) & ~((_al) - 1))
#define XRDP_CD_NODRAW 0
#define XRDP_CD_NOCLIP 1
@@ -157,6 +195,8 @@ struct _rdpCounts
CARD32 callCount[64 - 23];
};
+typedef int (*yuv_to_rgb32_proc)(unsigned char *yuvs, int width, int height, int *rgbs);
+
/* move this to common header */
struct _rdpRec
{
@@ -228,6 +268,15 @@ struct _rdpRec
struct _rdpCounts counts;
+ yuv_to_rgb32_proc i420_to_rgb32;
+ yuv_to_rgb32_proc yv12_to_rgb32;
+ yuv_to_rgb32_proc yuy2_to_rgb32;
+ yuv_to_rgb32_proc uyvy_to_rgb32;
+ char *xv_data;
+ int xv_data_bytes;
+ int xv_timer_schedualed;
+ OsTimerPtr xv_timer;
+
};
typedef struct _rdpRec rdpRec;
typedef struct _rdpRec * rdpPtr;
@@ -250,63 +299,63 @@ typedef struct _rdpGCRec * rdpGCPtr;
struct urdp_draw_item_fill
{
- int opcode;
- int fg_color;
- int bg_color;
- int pad0;
+ int opcode;
+ int fg_color;
+ int bg_color;
+ int pad0;
};
struct urdp_draw_item_img
{
- int opcode;
- int pad0;
+ int opcode;
+ int pad0;
};
struct urdp_draw_item_line
{
- int opcode;
- int fg_color;
- int bg_color;
- int width;
- xSegment* segs;
- int nseg;
- int flags;
+ int opcode;
+ int fg_color;
+ int bg_color;
+ int width;
+ xSegment* segs;
+ int nseg;
+ int flags;
};
struct urdp_draw_item_scrblt
{
- int srcx;
- int srcy;
- int dstx;
- int dsty;
- int cx;
- int cy;
+ int srcx;
+ int srcy;
+ int dstx;
+ int dsty;
+ int cx;
+ int cy;
};
struct urdp_draw_item_text
{
- int opcode;
- int fg_color;
- struct rdp_text* rtext; /* in rdpglyph.h */
+ int opcode;
+ int fg_color;
+ struct rdp_text* rtext; /* in rdpglyph.h */
};
union urdp_draw_item
{
- struct urdp_draw_item_fill fill;
- struct urdp_draw_item_img img;
- struct urdp_draw_item_line line;
- struct urdp_draw_item_scrblt scrblt;
- struct urdp_draw_item_text text;
+ struct urdp_draw_item_fill fill;
+ struct urdp_draw_item_img img;
+ struct urdp_draw_item_line line;
+ struct urdp_draw_item_scrblt scrblt;
+ struct urdp_draw_item_text text;
};
struct rdp_draw_item
{
- int type; /* RDI_FILL, RDI_IMGLL, ... */
- int flags;
- struct rdp_draw_item* prev;
- struct rdp_draw_item* next;
- RegionPtr reg;
- union urdp_draw_item u;
+ int type; /* RDI_FILL, RDI_IMGLL, ... */
+ int flags;
+ struct rdp_draw_item* prev;
+ struct rdp_draw_item* next;
+ RegionPtr reg;
+ union urdp_draw_item u;
};
#define XRDP_USE_COUNT_THRESHOLD 1
diff --git a/xorg/server/module/rdpCapture.c b/xorg/server/module/rdpCapture.c
index 8819713a..5163e6ae 100644
--- a/xorg/server/module/rdpCapture.c
+++ b/xorg/server/module/rdpCapture.c
@@ -33,34 +33,249 @@
#include "rdpDraw.h"
#include "rdpClientCon.h"
#include "rdpReg.h"
+#include "rdpMisc.h"
#define LOG_LEVEL 1
#define LLOGLN(_level, _args) \
do { if (_level < LOG_LEVEL) { ErrorF _args ; ErrorF("\n"); } } while (0)
+#define RDP_MAX_TILES 1024
+
+/******************************************************************************/
+static int
+rdpLimitRects(RegionPtr reg, int max_rects, BoxPtr *rects)
+{
+ int nrects;
+
+ nrects = REGION_NUM_RECTS(reg);
+ if (nrects > max_rects)
+ {
+ nrects = 1;
+ *rects = rdpRegionExtents(reg);
+ }
+ else
+ {
+ *rects = REGION_RECTS(reg);
+ }
+ return nrects;
+}
+
+/******************************************************************************/
+/* copy rects with no error checking */
+static int
+rdpCopyBox_a8r8g8b8_to_a8r8g8b8(void *src, int src_stride, int srcx, int srcy,
+ void *dst, int dst_stride, int dstx, int dsty,
+ BoxPtr rects, int num_rects)
+{
+ char *s8;
+ char *d8;
+ int index;
+ int jndex;
+ int bytes;
+ int height;
+ BoxPtr box;
+
+ for (index = 0; index < num_rects; index++)
+ {
+ box = rects + index;
+ s8 = ((char *) src) + (box->y1 - srcy) * src_stride;
+ s8 += (box->x1 - srcx) * 4;
+ d8 = ((char *) dst) + (box->y1 - dsty) * dst_stride;
+ d8 += (box->x1 - dstx) * 4;
+ bytes = box->x2 - box->x1;
+ bytes *= 4;
+ height = box->y2 - box->y1;
+ for (jndex = 0; jndex < height; jndex++)
+ {
+ memcpy(d8, s8, bytes);
+ d8 += dst_stride;
+ s8 += src_stride;
+ }
+ }
+ return 0;
+}
+
+/******************************************************************************/
+static int
+rdpFillBox_yuvalp(int ax, int ay,
+ void *dst, int dst_stride)
+{
+ dst = ((char *) dst) + (ay << 8) * (dst_stride >> 8) + (ax << 8);
+ memset(dst, 0, 64 * 64 * 4);
+ return 0;
+}
+
+/******************************************************************************/
+/* copy rects with no error checking
+ * convert ARGB32 to 64x64 linear planar YUVA */
+/* http://msdn.microsoft.com/en-us/library/ff635643.aspx
+ * 0.299 -0.168935 0.499813
+ * 0.587 -0.331665 -0.418531
+ * 0.114 0.50059 -0.081282
+ y = r * 0.299000 + g * 0.587000 + b * 0.114000;
+ u = r * -0.168935 + g * -0.331665 + b * 0.500590;
+ v = r * 0.499813 + g * -0.418531 + b * -0.081282; */
+/* 19595 38470 7471
+ -11071 -21736 32807
+ 32756 -27429 -5327 */
+static int
+rdpCopyBox_a8r8g8b8_to_yuvalp(int ax, int ay,
+ void *src, int src_stride,
+ void *dst, int dst_stride,
+ BoxPtr rects, int num_rects)
+{
+ char *s8;
+ char *d8;
+ char *yptr;
+ char *uptr;
+ char *vptr;
+ char *aptr;
+ int *s32;
+ int index;
+ int jndex;
+ int kndex;
+ int width;
+ int height;
+ int pixel;
+ int a;
+ int r;
+ int g;
+ int b;
+ int y;
+ int u;
+ int v;
+ BoxPtr box;
+
+ dst = ((char *) dst) + (ay << 8) * (dst_stride >> 8) + (ax << 8);
+ for (index = 0; index < num_rects; index++)
+ {
+ box = rects + index;
+ s8 = ((char *) src) + box->y1 * src_stride;
+ s8 += box->x1 * 4;
+ d8 = ((char *) dst) + (box->y1 - ay) * 64;
+ d8 += box->x1 - ax;
+ width = box->x2 - box->x1;
+ height = box->y2 - box->y1;
+ for (jndex = 0; jndex < height; jndex++)
+ {
+ s32 = (int *) s8;
+ yptr = d8;
+ uptr = yptr + 64 * 64;
+ vptr = uptr + 64 * 64;
+ aptr = vptr + 64 * 64;
+ kndex = 0;
+ while (kndex < width)
+ {
+ pixel = *(s32++);
+ a = (pixel >> 24) & 0xff;
+ r = (pixel >> 16) & 0xff;
+ g = (pixel >> 8) & 0xff;
+ b = (pixel >> 0) & 0xff;
+ y = (r * 19595 + g * 38470 + b * 7471) >> 16;
+ u = (r * -11071 + g * -21736 + b * 32807) >> 16;
+ v = (r * 32756 + g * -27429 + b * -5327) >> 16;
+ y = y - 128;
+ y = max(y, -128);
+ u = max(u, -128);
+ v = max(v, -128);
+ y = min(y, 127);
+ u = min(u, 127);
+ v = min(v, 127);
+ *(yptr++) = y;
+ *(uptr++) = u;
+ *(vptr++) = v;
+ *(aptr++) = a;
+ kndex++;
+ }
+ d8 += 64;
+ s8 += src_stride;
+ }
+ }
+ return 0;
+}
+
+/******************************************************************************/
+/* copy rects with no error checking */
+static int
+rdpCopyBox_a8r8g8b8_to_a8b8g8r8(void *src, int src_stride,
+ void *dst, int dst_stride,
+ BoxPtr rects, int num_rects)
+{
+ char *s8;
+ char *d8;
+ int index;
+ int jndex;
+ int kndex;
+ int bytes;
+ int width;
+ int height;
+ int red;
+ int green;
+ int blue;
+ BoxPtr box;
+ unsigned int *s32;
+ unsigned int *d32;
+
+ for (index = 0; index < num_rects; index++)
+ {
+ box = rects + index;
+ s8 = ((char *) src) + box->y1 * src_stride;
+ s8 += box->x1 * 4;
+ d8 = ((char *) dst) + box->y1 * dst_stride;
+ d8 += box->x1 * 4;
+ bytes = box->x2 - box->x1;
+ bytes *= 4;
+ width = box->x2 - box->x1;
+ height = box->y2 - box->y1;
+ for (jndex = 0; jndex < height; jndex++)
+ {
+ s32 = (unsigned int *) s8;
+ d32 = (unsigned int *) d8;
+ for (kndex = 0; kndex < width; kndex++)
+ {
+ SPLITCOLOR32(red, green, blue, *s32);
+ *d32 = COLOR24(red, green, blue);
+ s32++;
+ d32++;
+ }
+ d8 += dst_stride;
+ s8 += src_stride;
+ }
+ }
+ return 0;
+}
+
/******************************************************************************/
static Bool
-rdpCapture0(RegionPtr in_reg, RegionPtr out_reg,
+rdpCapture0(rdpClientCon *clientCon,
+ RegionPtr in_reg, BoxPtr *out_rects, int *num_out_rects,
void *src, int src_width, int src_height,
int src_stride, int src_format,
void *dst, int dst_width, int dst_height,
int dst_stride, int dst_format, int max_rects)
{
- BoxPtr prects;
+ BoxPtr psrc_rects;
BoxRec rect;
RegionRec reg;
char *src_rect;
char *dst_rect;
- int num_regions;
- int bytespp;
+ int num_rects;
+ int src_bytespp;
+ int dst_bytespp;
int width;
int height;
int src_offset;
int dst_offset;
- int bytes;
int i;
int j;
+ int k;
+ int red;
+ int green;
+ int blue;
Bool rv;
+ unsigned int *s32;
+ unsigned short *d16;
+ unsigned char *d8;
LLOGLN(10, ("rdpCapture0:"));
@@ -73,47 +288,320 @@ rdpCapture0(RegionPtr in_reg, RegionPtr out_reg,
rdpRegionInit(&reg, &rect, 0);
rdpRegionIntersect(&reg, in_reg, &reg);
+ psrc_rects = 0;
+ num_rects = rdpLimitRects(&reg, max_rects, &psrc_rects);
+ if (num_rects < 1)
+ {
+ rdpRegionUninit(&reg);
+ return FALSE;
+ }
+
+ *num_out_rects = num_rects;
+
+ *out_rects = (BoxPtr) g_malloc(sizeof(BoxRec) * num_rects, 0);
+ for (i = 0; i < num_rects; i++)
+ {
+ rect = psrc_rects[i];
+ (*out_rects)[i] = rect;
+ }
+
+ if ((src_format == XRDP_a8r8g8b8) && (dst_format == XRDP_a8r8g8b8))
+ {
+ rdpCopyBox_a8r8g8b8_to_a8r8g8b8(src, src_stride, 0, 0,
+ dst, dst_stride, 0, 0,
+ psrc_rects, num_rects);
+ }
+ else if ((src_format == XRDP_a8r8g8b8) && (dst_format == XRDP_a8b8g8r8))
+ {
+ rdpCopyBox_a8r8g8b8_to_a8b8g8r8(src, src_stride,
+ dst, dst_stride,
+ psrc_rects, num_rects);
+ }
+ else if ((src_format == XRDP_a8r8g8b8) && (dst_format == XRDP_r5g6b5))
+ {
+ src_bytespp = 4;
+ dst_bytespp = 2;
+
+ for (i = 0; i < num_rects; i++)
+ {
+ /* get rect to copy */
+ rect = (*out_rects)[i];
+
+ /* get rect dimensions */
+ width = rect.x2 - rect.x1;
+ height = rect.y2 - rect.y1;
+
+ /* point to start of each rect in respective memory */
+ src_offset = rect.y1 * src_stride + rect.x1 * src_bytespp;
+ dst_offset = rect.y1 * dst_stride + rect.x1 * dst_bytespp;
+ src_rect = src + src_offset;
+ dst_rect = dst + dst_offset;
+
+ /* copy one line at a time */
+ for (j = 0; j < height; j++)
+ {
+ s32 = (unsigned int *) src_rect;
+ d16 = (unsigned short *) dst_rect;
+ for (k = 0; k < width; k++)
+ {
+ SPLITCOLOR32(red, green, blue, *s32);
+ *d16 = COLOR16(red, green, blue);
+ s32++;
+ d16++;
+ }
+ src_rect += src_stride;
+ dst_rect += dst_stride;
+ }
+ }
+ }
+ else if ((src_format == XRDP_a8r8g8b8) && (dst_format == XRDP_a1r5g5b5))
+ {
+ src_bytespp = 4;
+ dst_bytespp = 2;
+
+ for (i = 0; i < num_rects; i++)
+ {
+ /* get rect to copy */
+ rect = (*out_rects)[i];
+
+ /* get rect dimensions */
+ width = rect.x2 - rect.x1;
+ height = rect.y2 - rect.y1;
+
+ /* point to start of each rect in respective memory */
+ src_offset = rect.y1 * src_stride + rect.x1 * src_bytespp;
+ dst_offset = rect.y1 * dst_stride + rect.x1 * dst_bytespp;
+ src_rect = src + src_offset;
+ dst_rect = dst + dst_offset;
+
+ /* copy one line at a time */
+ for (j = 0; j < height; j++)
+ {
+ s32 = (unsigned int *) src_rect;
+ d16 = (unsigned short *) dst_rect;
+ for (k = 0; k < width; k++)
+ {
+ SPLITCOLOR32(red, green, blue, *s32);
+ *d16 = COLOR15(red, green, blue);
+ s32++;
+ d16++;
+ }
+ src_rect += src_stride;
+ dst_rect += dst_stride;
+ }
+ }
+ }
+ else if ((src_format == XRDP_a8r8g8b8) && (dst_format == XRDP_r3g3b2))
+ {
+ src_bytespp = 4;
+ dst_bytespp = 1;
+
+ for (i = 0; i < num_rects; i++)
+ {
+ /* get rect to copy */
+ rect = (*out_rects)[i];
+
+ /* get rect dimensions */
+ width = rect.x2 - rect.x1;
+ height = rect.y2 - rect.y1;
+
+ /* point to start of each rect in respective memory */
+ src_offset = rect.y1 * src_stride + rect.x1 * src_bytespp;
+ dst_offset = rect.y1 * dst_stride + rect.x1 * dst_bytespp;
+ src_rect = src + src_offset;
+ dst_rect = dst + dst_offset;
+
+ /* copy one line at a time */
+ for (j = 0; j < height; j++)
+ {
+ s32 = (unsigned int *) src_rect;
+ d8 = (unsigned char *) dst_rect;
+ for (k = 0; k < width; k++)
+ {
+ SPLITCOLOR32(red, green, blue, *s32);
+ *d8 = COLOR8(red, green, blue);
+ s32++;
+ d8++;
+ }
+ src_rect += src_stride;
+ dst_rect += dst_stride;
+ }
+ }
+ }
+ else
+ {
+ LLOGLN(0, ("rdpCapture0: unimp color conversion"));
+ }
+ rdpRegionUninit(&reg);
+ return rv;
+}
+
+/******************************************************************************/
+/* make out_rects always multiple of 16 width and height */
+static Bool
+rdpCapture1(rdpClientCon *clientCon,
+ RegionPtr in_reg, BoxPtr *out_rects, int *num_out_rects,
+ void *src, int src_width, int src_height,
+ int src_stride, int src_format,
+ void *dst, int dst_width, int dst_height,
+ int dst_stride, int dst_format, int max_rects)
+{
+ BoxPtr psrc_rects;
+ BoxRec rect;
+ RegionRec reg;
+ char *src_rect;
+ char *dst_rect;
+ int num_regions;
+ int src_bytespp;
+ int dst_bytespp;
+ int width;
+ int height;
+ int min_width;
+ int min_height;
+ int src_offset;
+ int dst_offset;
+ int index;
+ int jndex;
+ int kndex;
+ int red;
+ int green;
+ int blue;
+ int ex;
+ int ey;
+ Bool rv;
+ unsigned int *s32;
+ unsigned int *d32;
+
+ LLOGLN(10, ("rdpCapture1:"));
+
+ rv = TRUE;
+
+ min_width = RDPMIN(dst_width, src_width);
+ min_height = RDPMIN(dst_height, src_height);
+
+ rect.x1 = 0;
+ rect.y1 = 0;
+ rect.x2 = min_width;
+ rect.y2 = min_height;
+ rdpRegionInit(&reg, &rect, 0);
+ rdpRegionIntersect(&reg, in_reg, &reg);
+
num_regions = REGION_NUM_RECTS(&reg);
if (num_regions > max_rects)
{
num_regions = 1;
- prects = rdpRegionExtents(&reg);
- rdpRegionUninit(out_reg);
- rdpRegionInit(out_reg, prects, 0);
+ psrc_rects = rdpRegionExtents(&reg);
}
else
{
- prects = REGION_RECTS(&reg);
- rdpRegionCopy(out_reg, &reg);
+ psrc_rects = REGION_RECTS(&reg);
}
- if ((src_format == XRDP_a8r8g8b8) && (dst_format == XRDP_a8r8g8b8))
+ if (num_regions < 1)
{
- bytespp = 4;
+ return FALSE;
+ }
- for (i = 0; i < num_regions; i++)
+ *num_out_rects = num_regions;
+
+ *out_rects = (BoxPtr) g_malloc(sizeof(BoxRec) * num_regions * 4, 0);
+ index = 0;
+ while (index < num_regions)
+ {
+ rect = psrc_rects[index];
+ width = rect.x2 - rect.x1;
+ height = rect.y2 - rect.y1;
+ ex = ((width + 15) & ~15) - width;
+ if (ex != 0)
+ {
+ rect.x2 += ex;
+ if (rect.x2 > min_width)
+ {
+ rect.x1 -= rect.x2 - min_width;
+ rect.x2 = min_width;
+ }
+ if (rect.x1 < 0)
+ {
+ rect.x1 += 16;
+ }
+ }
+ ey = ((height + 15) & ~15) - height;
+ if (ey != 0)
+ {
+ rect.y2 += ey;
+ if (rect.y2 > min_height)
+ {
+ rect.y1 -= rect.y2 - min_height;
+ rect.y2 = min_height;
+ }
+ if (rect.y1 < 0)
+ {
+ rect.y1 += 16;
+ }
+ }
+#if 0
+ if (rect.x1 < 0)
+ {
+ LLOGLN(0, ("rdpCapture1: error"));
+ }
+ if (rect.y1 < 0)
+ {
+ LLOGLN(0, ("rdpCapture1: error"));
+ }
+ if (rect.x2 > min_width)
+ {
+ LLOGLN(0, ("rdpCapture1: error"));
+ }
+ if (rect.y2 > min_height)
+ {
+ LLOGLN(0, ("rdpCapture1: error"));
+ }
+ if ((rect.x2 - rect.x1) % 16 != 0)
+ {
+ LLOGLN(0, ("rdpCapture1: error"));
+ }
+ if ((rect.y2 - rect.y1) % 16 != 0)
+ {
+ LLOGLN(0, ("rdpCapture1: error"));
+ }
+#endif
+ (*out_rects)[index] = rect;
+ index++;
+ }
+
+ if ((src_format == XRDP_a8r8g8b8) && (dst_format == XRDP_a8b8g8r8))
+ {
+ src_bytespp = 4;
+ dst_bytespp = 4;
+
+ for (index = 0; index < num_regions; index++)
{
/* get rect to copy */
- rect = prects[i];
+ rect = (*out_rects)[index];
/* get rect dimensions */
width = rect.x2 - rect.x1;
height = rect.y2 - rect.y1;
/* point to start of each rect in respective memory */
- src_offset = rect.y1 * src_stride + rect.x1 * bytespp;
- dst_offset = rect.y1 * dst_stride + rect.x1 * bytespp;
+ src_offset = rect.y1 * src_stride + rect.x1 * src_bytespp;
+ dst_offset = rect.y1 * dst_stride + rect.x1 * dst_bytespp;
src_rect = src + src_offset;
dst_rect = dst + dst_offset;
- /* bytes per line */
- bytes = width * bytespp;
-
/* copy one line at a time */
- for (j = 0; j < height; j++)
+ for (jndex = 0; jndex < height; jndex++)
{
- memcpy(dst_rect, src_rect, bytes);
+ s32 = (unsigned int *) src_rect;
+ d32 = (unsigned int *) dst_rect;
+ for (kndex = 0; kndex < width; kndex++)
+ {
+ SPLITCOLOR32(red, green, blue, *s32);
+ *d32 = COLOR24(red, green, blue);
+ s32++;
+ d32++;
+ }
src_rect += src_stride;
dst_rect += dst_stride;
}
@@ -121,17 +609,130 @@ rdpCapture0(RegionPtr in_reg, RegionPtr out_reg,
}
else
{
- LLOGLN(0, ("rdpCapture0: unimp color conversion"));
+ LLOGLN(0, ("rdpCapture1: unimp color conversion"));
}
rdpRegionUninit(&reg);
return rv;
}
+/******************************************************************************/
+static Bool
+rdpCapture2(rdpClientCon *clientCon,
+ RegionPtr in_reg, BoxPtr *out_rects, int *num_out_rects,
+ void *src, int src_width, int src_height,
+ int src_stride, int src_format,
+ void *dst, int dst_width, int dst_height,
+ int dst_stride, int dst_format, int max_rects)
+{
+ int x;
+ int y;
+ int out_rect_index;
+ int num_rects;
+ int rcode;
+ BoxRec rect;
+ BoxRec extents_rect;
+ BoxPtr rects;
+ RegionRec tile_reg;
+ RegionRec lin_reg;
+ RegionRec temp_reg;
+ RegionPtr pin_reg;
+
+ LLOGLN(10, ("rdpCapture2:"));
+
+ *out_rects = (BoxPtr) g_malloc(sizeof(BoxRec) * RDP_MAX_TILES, 0);
+ if (*out_rects == NULL)
+ {
+ return FALSE;
+ }
+ out_rect_index = 0;
+
+ /* clip for smaller of 2 */
+ rect.x1 = 0;
+ rect.y1 = 0;
+ rect.x2 = min(dst_width, src_width);
+ rect.y2 = min(dst_height, src_height);
+ rdpRegionInit(&temp_reg, &rect, 0);
+ rdpRegionIntersect(&temp_reg, in_reg, &temp_reg);
+
+ /* limit the numer of rects */
+ num_rects = REGION_NUM_RECTS(&temp_reg);
+ if (num_rects > max_rects)
+ {
+ LLOGLN(10, ("rdpCapture2: too many rects"));
+ rdpRegionInit(&lin_reg, rdpRegionExtents(&temp_reg), 0);
+ pin_reg = &lin_reg;
+ }
+ else
+ {
+ LLOGLN(10, ("rdpCapture2: not too many rects"));
+ rdpRegionInit(&lin_reg, NullBox, 0);
+ pin_reg = &temp_reg;
+ }
+ extents_rect = *rdpRegionExtents(pin_reg);
+ y = extents_rect.y1 & ~63;
+ while (y < extents_rect.y2)
+ {
+ x = extents_rect.x1 & ~63;
+ while (x < extents_rect.x2)
+ {
+ rect.x1 = x;
+ rect.y1 = y;
+ rect.x2 = rect.x1 + 64;
+ rect.y2 = rect.y1 + 64;
+ rcode = rdpRegionContainsRect(pin_reg, &rect);
+ LLOGLN(10, ("rdpCapture2: rcode %d", rcode));
+
+ if (rcode != rgnOUT)
+ {
+ if (rcode == rgnPART)
+ {
+ LLOGLN(10, ("rdpCapture2: rgnPART"));
+ rdpFillBox_yuvalp(x, y, dst, dst_stride);
+ rdpRegionInit(&tile_reg, &rect, 0);
+ rdpRegionIntersect(&tile_reg, pin_reg, &tile_reg);
+ rects = REGION_RECTS(&tile_reg);
+ num_rects = REGION_NUM_RECTS(&tile_reg);
+ rdpCopyBox_a8r8g8b8_to_yuvalp(x, y,
+ src, src_stride,
+ dst, dst_stride,
+ rects, num_rects);
+ rdpRegionUninit(&tile_reg);
+ }
+ else /* rgnIN */
+ {
+ LLOGLN(10, ("rdpCapture2: rgnIN"));
+ rdpCopyBox_a8r8g8b8_to_yuvalp(x, y,
+ src, src_stride,
+ dst, dst_stride,
+ &rect, 1);
+ }
+ (*out_rects)[out_rect_index] = rect;
+ out_rect_index++;
+ if (out_rect_index >= RDP_MAX_TILES)
+ {
+ g_free(*out_rects);
+ *out_rects = NULL;
+ rdpRegionUninit(&temp_reg);
+ rdpRegionUninit(&lin_reg);
+ return FALSE;
+ }
+ }
+ x += 64;
+ }
+ y += 64;
+ }
+ *num_out_rects = out_rect_index;
+ rdpRegionUninit(&temp_reg);
+ rdpRegionUninit(&lin_reg);
+ return TRUE;
+}
+
/**
* Copy an array of rectangles from one memory area to another
*****************************************************************************/
Bool
-rdpCapture(RegionPtr in_reg, RegionPtr out_reg,
+rdpCapture(rdpClientCon *clientCon,
+ RegionPtr in_reg, BoxPtr *out_rects, int *num_out_rects,
void *src, int src_width, int src_height,
int src_stride, int src_format,
void *dst, int dst_width, int dst_height,
@@ -141,7 +742,19 @@ rdpCapture(RegionPtr in_reg, RegionPtr out_reg,
switch (mode)
{
case 0:
- return rdpCapture0(in_reg, out_reg,
+ return rdpCapture0(clientCon, in_reg, out_rects, num_out_rects,
+ src, src_width, src_height,
+ src_stride, src_format,
+ dst, dst_width, dst_height,
+ dst_stride, dst_format, 15);
+ case 1:
+ return rdpCapture1(clientCon, in_reg, out_rects, num_out_rects,
+ src, src_width, src_height,
+ src_stride, src_format,
+ dst, dst_width, dst_height,
+ dst_stride, dst_format, 15);
+ case 2:
+ return rdpCapture2(clientCon, in_reg, out_rects, num_out_rects,
src, src_width, src_height,
src_stride, src_format,
dst, dst_width, dst_height,
@@ -150,5 +763,5 @@ rdpCapture(RegionPtr in_reg, RegionPtr out_reg,
LLOGLN(0, ("rdpCapture: unimp mode"));
break;
}
- return TRUE;
+ return FALSE;
}
diff --git a/xorg/server/module/rdpCapture.h b/xorg/server/module/rdpCapture.h
index f92508c4..4dff1eea 100644
--- a/xorg/server/module/rdpCapture.h
+++ b/xorg/server/module/rdpCapture.h
@@ -19,9 +19,9 @@
*/
Bool
-rdpCapture(RegionPtr in_reg, RegionPtr out_reg,
+rdpCapture(rdpClientCon *clientCon,
+ RegionPtr in_reg, BoxPtr *out_rects, int *num_out_rects,
void *src, int src_width, int src_height,
int src_stride, int src_format,
void *dst, int dst_width, int dst_height,
- int dst_stride, int dst_format,
- int mode);
+ int dst_stride, int dst_format, int mode);
diff --git a/xorg/server/module/rdpClientCon.c b/xorg/server/module/rdpClientCon.c
index 3c9cdad5..35369063 100644
--- a/xorg/server/module/rdpClientCon.c
+++ b/xorg/server/module/rdpClientCon.c
@@ -50,21 +50,6 @@ Client connection to xrdp
#define LTOUI32(_in) ((unsigned int)(_in))
-#define COLOR8(r, g, b) \
- ((((r) >> 5) << 0) | (((g) >> 5) << 3) | (((b) >> 6) << 6))
-#define COLOR15(r, g, b) \
- ((((r) >> 3) << 10) | (((g) >> 3) << 5) | (((b) >> 3) << 0))
-#define COLOR16(r, g, b) \
- ((((r) >> 3) << 11) | (((g) >> 2) << 5) | (((b) >> 3) << 0))
-#define COLOR24(r, g, b) \
- ((((r) >> 0) << 0) | (((g) >> 0) << 8) | (((b) >> 0) << 16))
-#define SPLITCOLOR32(r, g, b, c) \
- do { \
- r = ((c) >> 16) & 0xff; \
- g = ((c) >> 8) & 0xff; \
- b = (c) & 0xff; \
- } while (0)
-
#define USE_MAX_OS_BYTES 1
#define MAX_OS_BYTES (16 * 1024 * 1024)
@@ -107,6 +92,9 @@ static int g_rdp_opcodes[16] =
0xff /* GXset 0xf 1 */
};
+static int
+rdpClientConDisconnect(rdpPtr dev, rdpClientCon *clientCon);
+
/******************************************************************************/
static int
rdpClientConGotConnection(ScreenPtr pScreen, rdpPtr dev)
@@ -144,6 +132,15 @@ rdpClientConGotConnection(ScreenPtr pScreen, rdpPtr dev)
AddEnabledDevice(clientCon->sck);
}
+#if 0
+ if (dev->clientConTail != NULL)
+ {
+ rdpClientConDisconnect(dev, dev->clientConTail);
+ dev->clientConHead = NULL;
+ dev->clientConTail = NULL;
+ }
+#endif
+
if (dev->clientConTail == NULL)
{
LLOGLN(0, ("rdpClientConGotConnection: adding only clientCon"));
@@ -274,6 +271,11 @@ rdpClientConDisconnect(rdpPtr dev, rdpClientCon *clientCon)
}
rdpRegionDestroy(clientCon->dirtyRegion);
rdpRegionDestroy(clientCon->shmRegion);
+ if (clientCon->updateTimer != NULL)
+ {
+ TimerCancel(clientCon->updateTimer);
+ TimerFree(clientCon->updateTimer);
+ }
g_free(clientCon);
return 0;
}
@@ -533,6 +535,8 @@ rdpClientConProcessMsgVersion(rdpPtr dev, rdpClientCon *clientCon,
return 0;
}
+#define LALIGN(_num, _po2) ((_num + ((_po2) - 1)) & ~((_po2) - 1))
+
/******************************************************************************/
/*
this from miScreenInit
@@ -549,31 +553,37 @@ rdpClientConProcessScreenSizeMsg(rdpPtr dev, rdpClientCon *clientCon,
int bytes;
Bool ok;
- LLOGLN(0, ("rdpClientConProcessScreenSizeMsg: set width %d height %d bpp %d",
- width, height, bpp));
+ LLOGLN(0, ("rdpClientConProcessScreenSizeMsg: set width %d height %d "
+ "bpp %d", width, height, bpp));
clientCon->rdp_width = width;
clientCon->rdp_height = height;
clientCon->rdp_bpp = bpp;
+ clientCon->cap_width = width;
+ clientCon->cap_height = height;
if (bpp < 15)
{
clientCon->rdp_Bpp = 1;
clientCon->rdp_Bpp_mask = 0xff;
+ clientCon->rdp_format = PIXMAN_r3g3b2;
}
else if (bpp == 15)
{
clientCon->rdp_Bpp = 2;
clientCon->rdp_Bpp_mask = 0x7fff;
+ clientCon->rdp_format = XRDP_a1r5g5b5;
}
else if (bpp == 16)
{
clientCon->rdp_Bpp = 2;
clientCon->rdp_Bpp_mask = 0xffff;
+ clientCon->rdp_format = XRDP_r5g6b5;
}
else if (bpp > 16)
{
clientCon->rdp_Bpp = 4;
clientCon->rdp_Bpp_mask = 0xffffff;
+ clientCon->rdp_format = XRDP_a8r8g8b8;
}
if (clientCon->shmemptr != 0)
@@ -655,12 +665,13 @@ rdpClientConProcessMsgClientInput(rdpPtr dev, rdpClientCon *clientCon)
}
else if (msg == 300) /* resize desktop */
{
- rdpClientConProcessScreenSizeMsg(dev, clientCon, param1, param2, param3);
+ rdpClientConProcessScreenSizeMsg(dev, clientCon, param1,
+ param2, param3);
}
else if (msg == 301) /* version */
{
rdpClientConProcessMsgVersion(dev, clientCon,
- param1, param2, param3, param4);
+ param1, param2, param3, param4);
}
else
{
@@ -697,6 +708,32 @@ rdpClientConProcessMsgClientInfo(rdpPtr dev, rdpClientCon *clientCon)
i1 = clientCon->client_info.offscreen_cache_entries;
LLOGLN(0, (" offscreen entries %d", i1));
+ if (clientCon->client_info.capture_format != 0)
+ {
+ clientCon->rdp_format = clientCon->client_info.capture_format;
+ }
+
+ if (clientCon->client_info.capture_code == 2) /* RFX */
+ {
+ LLOGLN(0, ("rdpClientConProcessMsgClientInfo: got RFX capture"));
+ clientCon->cap_width = LALIGN(clientCon->rdp_width, 64);
+ clientCon->cap_height = LALIGN(clientCon->rdp_height, 64);
+ LLOGLN(0, (" cap_width %d cap_height %d",
+ clientCon->cap_width, clientCon->cap_height));
+ if (clientCon->shmemptr != 0)
+ {
+ shmdt(clientCon->shmemptr);
+ }
+ bytes = clientCon->cap_width * clientCon->cap_height *
+ clientCon->rdp_Bpp;
+ clientCon->shmemid = shmget(IPC_PRIVATE, bytes, IPC_CREAT | 0777);
+ clientCon->shmemptr = shmat(clientCon->shmemid, 0, 0);
+ shmctl(clientCon->shmemid, IPC_RMID, NULL);
+ LLOGLN(0, ("rdpClientConProcessMsgClientInfo: shmemid %d shmemptr %p "
+ "bytes %d", clientCon->shmemid, clientCon->shmemptr, bytes));
+ clientCon->shmem_lineBytes = clientCon->rdp_Bpp * clientCon->cap_width;
+ }
+
if (clientCon->client_info.offscreen_support_level > 0)
{
if (clientCon->client_info.offscreen_cache_entries > 0)
@@ -808,7 +845,7 @@ rdpClientConProcessMsgClientRegionEx(rdpPtr dev, rdpClientCon *clientCon)
{
struct stream *s;
int flags;
-
+
LLOGLN(10, ("rdpClientConProcessMsgClientRegionEx:"));
s = clientCon->in_s;
@@ -1878,19 +1915,29 @@ rdpClientConCheckDirtyScreen(rdpPtr dev, rdpClientCon *clientCon)
static int
rdpClientConSendPaintRectShmEx(rdpPtr dev, rdpClientCon *clientCon,
struct image_data *id,
- RegionPtr dirtyReg, RegionPtr copyReg)
+ RegionPtr dirtyReg,
+ BoxPtr copyRects, int numCopyRects)
{
int index;
int size;
int num_rects_d;
int num_rects_c;
+ short x;
+ short y;
+ short cx;
+ short cy;
struct stream *s;
BoxRec box;
rdpClientConBeginUpdate(dev, clientCon);
num_rects_d = REGION_NUM_RECTS(dirtyReg);
- num_rects_c = REGION_NUM_RECTS(copyReg);
+ num_rects_c = numCopyRects;
+ if ((num_rects_c < 1) || (num_rects_d < 1))
+ {
+ LLOGLN(0, ("rdpClientConSendPaintRectShmEx: nothing to send"));
+ return 0;
+ }
size = 2 + 2 + 2 + num_rects_d * 8 + 2 + num_rects_c * 8;
size += 4 + 4 + 4 + 4 + 2 + 2;
rdpClientConPreCheck(dev, clientCon, size);
@@ -1904,20 +1951,28 @@ rdpClientConSendPaintRectShmEx(rdpPtr dev, rdpClientCon *clientCon,
for (index = 0; index < num_rects_d; index++)
{
box = REGION_RECTS(dirtyReg)[index];
- out_uint16_le(s, box.x1);
- out_uint16_le(s, box.y1);
- out_uint16_le(s, box.x2 - box.x1);
- out_uint16_le(s, box.y2 - box.y1);
+ x = box.x1;
+ y = box.y1;
+ cx = box.x2 - box.x1;
+ cy = box.y2 - box.y1;
+ out_uint16_le(s, x);
+ out_uint16_le(s, y);
+ out_uint16_le(s, cx);
+ out_uint16_le(s, cy);
}
out_uint16_le(s, num_rects_c);
for (index = 0; index < num_rects_c; index++)
{
- box = REGION_RECTS(copyReg)[index];
- out_uint16_le(s, box.x1);
- out_uint16_le(s, box.y1);
- out_uint16_le(s, box.x2 - box.x1);
- out_uint16_le(s, box.y2 - box.y1);
+ box = copyRects[index];
+ x = box.x1;
+ y = box.y1;
+ cx = box.x2 - box.x1;
+ cy = box.y2 - box.y1;
+ out_uint16_le(s, x);
+ out_uint16_le(s, y);
+ out_uint16_le(s, cx);
+ out_uint16_le(s, cy);
}
out_uint32_le(s, 0);
@@ -1925,30 +1980,36 @@ rdpClientConSendPaintRectShmEx(rdpPtr dev, rdpClientCon *clientCon,
out_uint32_le(s, clientCon->rect_id);
out_uint32_le(s, id->shmem_id);
out_uint32_le(s, id->shmem_offset);
- out_uint16_le(s, clientCon->rdp_width);
- out_uint16_le(s, clientCon->rdp_height);
+ out_uint16_le(s, clientCon->cap_width);
+ out_uint16_le(s, clientCon->cap_height);
rdpClientConEndUpdate(dev, clientCon);
return 0;
}
-
+
/******************************************************************************/
static CARD32
rdpDeferredUpdateCallback(OsTimerPtr timer, CARD32 now, pointer arg)
{
rdpClientCon *clientCon;
- RegionRec reg;
+ BoxPtr rects;
+ int num_rects;
struct image_data id;
LLOGLN(10, ("rdpDeferredUpdateCallback:"));
clientCon = (rdpClientCon *) arg;
- if (clientCon->rect_id != clientCon->rect_id_ack)
+ if ((clientCon->rect_id > clientCon->rect_id_ack) ||
+ /* do not allow captures until we have the client_info */
+ clientCon->client_info.size == 0)
{
- LLOGLN(0, ("rdpDeferredUpdateCallback: reschedual"));
+ LLOGLN(0, ("rdpDeferredUpdateCallback: reschedual rect_id %d "
+ "rect_id_ack %d",
+ clientCon->rect_id, clientCon->rect_id_ack));
clientCon->updateTimer = TimerSet(clientCon->updateTimer, 0, 40,
- rdpDeferredUpdateCallback, clientCon);
+ rdpDeferredUpdateCallback,
+ clientCon);
return 0;
}
else
@@ -1961,17 +2022,29 @@ rdpDeferredUpdateCallback(OsTimerPtr timer, CARD32 now, pointer arg)
clientCon->rdp_width, clientCon->rdp_height, clientCon->rdp_Bpp,
id.width, id.height));
clientCon->updateSchedualed = FALSE;
- rdpRegionInit(&reg, NullBox, 0);
- rdpCapture(clientCon->dirtyRegion, &reg,
- id.pixels, id.width, id.height,
- id.lineBytes, XRDP_a8r8g8b8,
- id.shmem_pixels, clientCon->rdp_width, clientCon->rdp_height,
- clientCon->rdp_width * clientCon->rdp_Bpp , XRDP_a8r8g8b8, 0);
- rdpClientConSendPaintRectShmEx(clientCon->dev, clientCon, &id,
- clientCon->dirtyRegion, &reg);
+ rects = 0;
+ num_rects = 0;
+ LLOGLN(10, ("rdpDeferredUpdateCallback: capture_code %d",
+ clientCon->client_info.capture_code));
+ if (rdpCapture(clientCon, clientCon->dirtyRegion, &rects, &num_rects,
+ id.pixels, id.width, id.height,
+ id.lineBytes, XRDP_a8r8g8b8, id.shmem_pixels,
+ clientCon->cap_width, clientCon->cap_height,
+ clientCon->cap_width * clientCon->rdp_Bpp,
+ clientCon->rdp_format, clientCon->client_info.capture_code))
+ {
+ LLOGLN(10, ("rdpDeferredUpdateCallback: num_rects %d", num_rects));
+ rdpClientConSendPaintRectShmEx(clientCon->dev, clientCon, &id,
+ clientCon->dirtyRegion,
+ rects, num_rects);
+ g_free(rects);
+ }
+ else
+ {
+ LLOGLN(0, ("rdpDeferredUpdateCallback: rdpCapture failed"));
+ }
rdpRegionDestroy(clientCon->dirtyRegion);
clientCon->dirtyRegion = rdpRegionCreate(NullBox, 0);
- rdpRegionUninit(&reg);
return 0;
}
diff --git a/xorg/server/module/rdpClientCon.h b/xorg/server/module/rdpClientCon.h
index 9cbe493a..a66abbcd 100644
--- a/xorg/server/module/rdpClientCon.h
+++ b/xorg/server/module/rdpClientCon.h
@@ -79,6 +79,9 @@ struct _rdpClientCon
int rdp_Bpp_mask;
int rdp_width;
int rdp_height;
+ int rdp_format; /* XRDP_a8r8g8b8, XRDP_r5g6b5, ... */
+ int cap_width;
+ int cap_height;
int rdpIndex; /* current os target */
@@ -88,8 +91,6 @@ struct _rdpClientCon
struct font_cache font_cache[12][256];
int font_stamp;
- RegionPtr dirtyRegion;
-
struct xrdp_client_info client_info;
char *shmemptr;
@@ -102,6 +103,8 @@ struct _rdpClientCon
OsTimerPtr updateTimer;
int updateSchedualed; /* boolean */
+ RegionPtr dirtyRegion;
+
struct _rdpClientCon *next;
};
diff --git a/xorg/server/module/rdpCursor.c b/xorg/server/module/rdpCursor.c
index 3859e8e7..d4862df7 100644
--- a/xorg/server/module/rdpCursor.c
+++ b/xorg/server/module/rdpCursor.c
@@ -39,11 +39,18 @@ cursor
#include <cursor.h>
#include <cursorstr.h>
+#include <X11/Xarch.h>
+
#include "rdp.h"
#include "rdpMain.h"
#include "rdpDraw.h"
#include "rdpClientCon.h"
+#ifndef X_BYTE_ORDER
+#warning X_BYTE_ORDER not defined
+#endif
+
+#if (X_BYTE_ORDER == X_LITTLE_ENDIAN)
/* Copied from Xvnc/lib/font/util/utilbitmap.c */
static unsigned char g_reverse_byte[0x100] =
{
@@ -80,6 +87,7 @@ static unsigned char g_reverse_byte[0x100] =
0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff
};
+#endif
/******************************************************************************/
#define LOG_LEVEL 1
diff --git a/xorg/server/module/rdpMain.c b/xorg/server/module/rdpMain.c
index a8786ac2..2f6db7c7 100644
--- a/xorg/server/module/rdpMain.c
+++ b/xorg/server/module/rdpMain.c
@@ -47,14 +47,6 @@ rdp module main
#define LLOGLN(_level, _args) \
do { if (_level < LOG_LEVEL) { ErrorF _args ; ErrorF("\n"); } } while (0)
-#define XRDP_DRIVER_NAME "XORGXRDP"
-#define XRDP_NAME "XORGXRDP"
-#define XRDP_VERSION 1000
-
-#define PACKAGE_VERSION_MAJOR 1
-#define PACKAGE_VERSION_MINOR 0
-#define PACKAGE_VERSION_PATCHLEVEL 0
-
static Bool g_initialised = FALSE;
/*****************************************************************************/
@@ -95,7 +87,7 @@ xorgxrdpDownDown(ScreenPtr pScreen)
static MODULESETUPPROTO(xorgxrdpSetup);
static XF86ModuleVersionInfo RDPVersRec =
{
- XRDP_DRIVER_NAME,
+ XRDP_MODULE_NAME,
MODULEVENDORSTRING,
MODINFOSTRING1,
MODINFOSTRING2,
diff --git a/xorg/server/module/rdpSimd.c b/xorg/server/module/rdpSimd.c
new file mode 100644
index 00000000..7215bf86
--- /dev/null
+++ b/xorg/server/module/rdpSimd.c
@@ -0,0 +1,138 @@
+/*
+Copyright 2014 Jay Sorg
+
+Permission to use, copy, modify, distribute, and sell this software and its
+documentation for any purpose is hereby granted without fee, provided that
+the above copyright notice appear in all copies and that both that
+copyright notice and this permission notice appear in supporting
+documentation.
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+SIMD function asign
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* this should be before all X11 .h files */
+#include <xorg-server.h>
+#include <xorgVersion.h>
+
+/* all driver need this */
+#include <xf86.h>
+#include <xf86_OSproc.h>
+
+#include "rdp.h"
+#include "rdpXv.h"
+
+/* use simd, run time */
+int g_simd_use_accel = 1;
+
+/* use simd, compile time, if zero, g_simd_use_accel does not matter */
+#if !defined(SIMD_USE_ACCEL)
+#define SIMD_USE_ACCEL 0
+#endif
+
+#if SIMD_USE_ACCEL
+#if defined(__x86_64__) || defined(__AMD64__) || defined (_M_AMD64)
+#include "amd64/funcs_amd64.h"
+#elif defined(__x86__) || defined(_M_IX86) || defined(__i386__)
+#include "x86/funcs_x86.h"
+#endif
+#endif
+
+#define LOG_LEVEL 1
+#define LLOGLN(_level, _args) \
+ do { if (_level < LOG_LEVEL) { ErrorF _args ; ErrorF("\n"); } } while (0)
+
+/*****************************************************************************/
+Bool
+rdpSimdInit(ScreenPtr pScreen, ScrnInfoPtr pScrn)
+{
+ rdpPtr dev;
+
+ dev = XRDPPTR(pScrn);
+ /* assign functions */
+ LLOGLN(0, ("rdpSimdInit: assigning yuv functions"));
+#if SIMD_USE_ACCEL
+ if (g_simd_use_accel)
+ {
+#if defined(__x86_64__) || defined(__AMD64__) || defined (_M_AMD64)
+ int ax, bx, cx, dx;
+ cpuid_amd64(1, 0, &ax, &bx, &cx, &dx);
+ LLOGLN(0, ("rdpSimdInit: cpuid ax 1 cx 0 return ax 0x%8.8x bx "
+ "0x%8.8x cx 0x%8.8x dx 0x%8.8x", ax, bx, cx, dx));
+ if (dx & (1 << 26)) /* SSE 2 */
+ {
+ dev->yv12_to_rgb32 = yv12_to_rgb32_amd64_sse2;
+ dev->i420_to_rgb32 = i420_to_rgb32_amd64_sse2;
+ dev->yuy2_to_rgb32 = yuy2_to_rgb32_amd64_sse2;
+ dev->uyvy_to_rgb32 = uyvy_to_rgb32_amd64_sse2;
+ LLOGLN(0, ("rdpSimdInit: sse2 amd64 yuv functions assigned"));
+ }
+ else
+ {
+ dev->yv12_to_rgb32 = YV12_to_RGB32;
+ dev->i420_to_rgb32 = I420_to_RGB32;
+ dev->yuy2_to_rgb32 = YUY2_to_RGB32;
+ dev->uyvy_to_rgb32 = UYVY_to_RGB32;
+ LLOGLN(0, ("rdpSimdInit: warning, c yuv functions assigned"));
+ }
+#elif defined(__x86__) || defined(_M_IX86) || defined(__i386__)
+ int ax, bx, cx, dx;
+ cpuid_x86(1, 0, &ax, &bx, &cx, &dx);
+ LLOGLN(0, ("rdpSimdInit: cpuid ax 1 cx 0 return ax 0x%8.8x bx "
+ "0x%8.8x cx 0x%8.8x dx 0x%8.8x", ax, bx, cx, dx));
+ if (dx & (1 << 26)) /* SSE 2 */
+ {
+ dev->yv12_to_rgb32 = yv12_to_rgb32_x86_sse2;
+ dev->i420_to_rgb32 = i420_to_rgb32_x86_sse2;
+ dev->yuy2_to_rgb32 = yuy2_to_rgb32_x86_sse2;
+ dev->uyvy_to_rgb32 = uyvy_to_rgb32_x86_sse2;
+ LLOGLN(0, ("rdpSimdInit: sse2 x86 yuv functions assigned"));
+ }
+ else
+ {
+ dev->yv12_to_rgb32 = YV12_to_RGB32;
+ dev->i420_to_rgb32 = I420_to_RGB32;
+ dev->yuy2_to_rgb32 = YUY2_to_RGB32;
+ dev->uyvy_to_rgb32 = UYVY_to_RGB32;
+ LLOGLN(0, ("rdpSimdInit: warning, c yuv functions assigned"));
+ }
+#else
+ dev->yv12_to_rgb32 = YV12_to_RGB32;
+ dev->i420_to_rgb32 = I420_to_RGB32;
+ dev->yuy2_to_rgb32 = YUY2_to_RGB32;
+ dev->uyvy_to_rgb32 = UYVY_to_RGB32;
+ LLOGLN(0, ("rdpSimdInit: warning, c yuv functions assigned"));
+#endif
+ }
+ else
+ {
+ dev->yv12_to_rgb32 = YV12_to_RGB32;
+ dev->i420_to_rgb32 = I420_to_RGB32;
+ dev->yuy2_to_rgb32 = YUY2_to_RGB32;
+ dev->uyvy_to_rgb32 = UYVY_to_RGB32;
+ LLOGLN(0, ("rdpSimdInit: warning, c yuv functions assigned"));
+ }
+#else
+ dev->yv12_to_rgb32 = YV12_to_RGB32;
+ dev->i420_to_rgb32 = I420_to_RGB32;
+ dev->yuy2_to_rgb32 = YUY2_to_RGB32;
+ dev->uyvy_to_rgb32 = UYVY_to_RGB32;
+ LLOGLN(0, ("rdpSimdInit: warning, c yuv functions assigned"));
+#endif
+ return 1;
+}
+
diff --git a/xorg/server/module/rdpSimd.h b/xorg/server/module/rdpSimd.h
new file mode 100644
index 00000000..73bf1ba5
--- /dev/null
+++ b/xorg/server/module/rdpSimd.h
@@ -0,0 +1,34 @@
+/*
+Copyright 2014 Jay Sorg
+
+Permission to use, copy, modify, distribute, and sell this software and its
+documentation for any purpose is hereby granted without fee, provided that
+the above copyright notice appear in all copies and that both that
+copyright notice and this permission notice appear in supporting
+documentation.
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+SIMD function asign
+
+*/
+
+#ifndef __RDPSIMD_H
+#define __RDPSIMD_H
+
+#include <xorg-server.h>
+#include <xorgVersion.h>
+#include <xf86.h>
+
+Bool
+rdpSimdInit(ScreenPtr pScreen, ScrnInfoPtr pScrn);
+
+#endif
diff --git a/xorg/server/module/rdpXv.c b/xorg/server/module/rdpXv.c
new file mode 100644
index 00000000..1557f892
--- /dev/null
+++ b/xorg/server/module/rdpXv.c
@@ -0,0 +1,678 @@
+/*
+Copyright 2014 Jay Sorg
+
+Permission to use, copy, modify, distribute, and sell this software and its
+documentation for any purpose is hereby granted without fee, provided that
+the above copyright notice appear in all copies and that both that
+copyright notice and this permission notice appear in supporting
+documentation.
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+XVideo
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* this should be before all X11 .h files */
+#include <xorg-server.h>
+#include <xorgVersion.h>
+
+/* all driver need this */
+#include <xf86.h>
+#include <xf86_OSproc.h>
+
+#include <xf86xv.h>
+#include <X11/extensions/Xv.h>
+#include <fourcc.h>
+
+#include <fb.h>
+
+#include "rdp.h"
+#include "rdpMisc.h"
+#include "rdpReg.h"
+#include "rdpClientCon.h"
+
+#define LOG_LEVEL 1
+#define LLOGLN(_level, _args) \
+ do { if (_level < LOG_LEVEL) { ErrorF _args ; ErrorF("\n"); } } while (0)
+
+#define T_NUM_ENCODINGS 1
+static XF86VideoEncodingRec g_xrdpVidEncodings[T_NUM_ENCODINGS] =
+{ { 0, "XV_IMAGE", 2046, 2046, { 1, 1 } } };
+
+#define T_NUM_FORMATS 1
+static XF86VideoFormatRec g_xrdpVidFormats[T_NUM_FORMATS] =
+{ { 0, TrueColor } };
+
+/* YV12
+ I420
+ 12 bpp planar
+ YUV 4:2:0 8 bit Y plane followed by 8 bit 2x2 subsampled
+ U and V planes. */
+
+/* YUY2
+ UYVY
+ 16 bpp packed
+ YUV 4:2:2 Y sample at every pixel, U and V sampled at
+ every second pixel */
+
+/* XVIMAGE_YV12 FOURCC_YV12 0x32315659 */
+/* XVIMAGE_I420 FOURCC_I420 0x30323449 */
+/* XVIMAGE_YUY2 FOURCC_YUY2 0x32595559 */
+/* XVIMAGE_UYVY FOURCC_UYVY 0x59565955 */
+
+static XF86ImageRec g_xrdpVidImages[] =
+{ XVIMAGE_YV12, XVIMAGE_I420, XVIMAGE_YUY2, XVIMAGE_UYVY };
+
+#define T_MAX_PORTS 1
+
+/*****************************************************************************/
+static int
+xrdpVidPutVideo(ScrnInfoPtr pScrn, short vid_x, short vid_y,
+ short drw_x, short drw_y, short vid_w, short vid_h,
+ short drw_w, short drw_h, RegionPtr clipBoxes,
+ pointer data, DrawablePtr pDraw)
+{
+ LLOGLN(0, ("xrdpVidPutVideo:"));
+ return Success;
+}
+
+/*****************************************************************************/
+static int
+xrdpVidPutStill(ScrnInfoPtr pScrn, short vid_x, short vid_y,
+ short drw_x, short drw_y, short vid_w, short vid_h,
+ short drw_w, short drw_h, RegionPtr clipBoxes,
+ pointer data, DrawablePtr pDraw)
+{
+ LLOGLN(0, ("xrdpVidPutStill:"));
+ return Success;
+}
+
+/*****************************************************************************/
+static int
+xrdpVidGetVideo(ScrnInfoPtr pScrn, short vid_x, short vid_y,
+ short drw_x, short drw_y, short vid_w, short vid_h,
+ short drw_w, short drw_h, RegionPtr clipBoxes,
+ pointer data, DrawablePtr pDraw)
+{
+ LLOGLN(0, ("xrdpVidGetVideo:"));
+ return Success;
+}
+
+/*****************************************************************************/
+static int
+xrdpVidGetStill(ScrnInfoPtr pScrn, short vid_x, short vid_y,
+ short drw_x, short drw_y, short vid_w, short vid_h,
+ short drw_w, short drw_h, RegionPtr clipBoxes,
+ pointer data, DrawablePtr pDraw)
+{
+ LLOGLN(0, ("FBDevTIVidGetStill:"));
+ return Success;
+}
+
+/*****************************************************************************/
+static void
+xrdpVidStopVideo(ScrnInfoPtr pScrn, pointer data, Bool Cleanup)
+{
+ LLOGLN(0, ("xrdpVidStopVideo:"));
+}
+
+/*****************************************************************************/
+static int
+xrdpVidSetPortAttribute(ScrnInfoPtr pScrn, Atom attribute,
+ INT32 value, pointer data)
+{
+ LLOGLN(0, ("xrdpVidSetPortAttribute:"));
+ return Success;
+}
+
+/*****************************************************************************/
+static int
+xrdpVidGetPortAttribute(ScrnInfoPtr pScrn, Atom attribute,
+ INT32 *value, pointer data)
+{
+ LLOGLN(0, ("xrdpVidGetPortAttribute:"));
+ return Success;
+}
+
+/*****************************************************************************/
+static void
+xrdpVidQueryBestSize(ScrnInfoPtr pScrn, Bool motion,
+ short vid_w, short vid_h, short drw_w, short drw_h,
+ unsigned int *p_w, unsigned int *p_h, pointer data)
+{
+ LLOGLN(0, ("xrdpVidQueryBestSize:"));
+}
+
+/*****************************************************************************/
+int
+YV12_to_RGB32(unsigned char *yuvs, int width, int height, int *rgbs)
+{
+ int size_total;
+ int y;
+ int u;
+ int v;
+ int c;
+ int d;
+ int e;
+ int r;
+ int g;
+ int b;
+ int t;
+ int i;
+ int j;
+
+ size_total = width * height;
+ for (j = 0; j < height; j++)
+ {
+ for (i = 0; i < width; i++)
+ {
+ y = yuvs[j * width + i];
+ u = yuvs[(j / 2) * (width / 2) + (i / 2) + size_total];
+ v = yuvs[(j / 2) * (width / 2) + (i / 2) + size_total + (size_total / 4)];
+ c = y - 16;
+ d = u - 128;
+ e = v - 128;
+ t = (298 * c + 409 * e + 128) >> 8;
+ b = RDPCLAMP(t, 0, 255);
+ t = (298 * c - 100 * d - 208 * e + 128) >> 8;
+ g = RDPCLAMP(t, 0, 255);
+ t = (298 * c + 516 * d + 128) >> 8;
+ r = RDPCLAMP(t, 0, 255);
+ rgbs[j * width + i] = (r << 16) | (g << 8) | b;
+ }
+ }
+ return 0;
+}
+
+/*****************************************************************************/
+int
+I420_to_RGB32(unsigned char *yuvs, int width, int height, int *rgbs)
+{
+ int size_total;
+ int y;
+ int u;
+ int v;
+ int c;
+ int d;
+ int e;
+ int r;
+ int g;
+ int b;
+ int t;
+ int i;
+ int j;
+
+ size_total = width * height;
+ for (j = 0; j < height; j++)
+ {
+ for (i = 0; i < width; i++)
+ {
+ y = yuvs[j * width + i];
+ v = yuvs[(j / 2) * (width / 2) + (i / 2) + size_total];
+ u = yuvs[(j / 2) * (width / 2) + (i / 2) + size_total + (size_total / 4)];
+ c = y - 16;
+ d = u - 128;
+ e = v - 128;
+ t = (298 * c + 409 * e + 128) >> 8;
+ b = RDPCLAMP(t, 0, 255);
+ t = (298 * c - 100 * d - 208 * e + 128) >> 8;
+ g = RDPCLAMP(t, 0, 255);
+ t = (298 * c + 516 * d + 128) >> 8;
+ r = RDPCLAMP(t, 0, 255);
+ rgbs[j * width + i] = (r << 16) | (g << 8) | b;
+ }
+ }
+ return 0;
+}
+
+/*****************************************************************************/
+int
+YUY2_to_RGB32(unsigned char *yuvs, int width, int height, int *rgbs)
+{
+ int y1;
+ int y2;
+ int u;
+ int v;
+ int c;
+ int d;
+ int e;
+ int r;
+ int g;
+ int b;
+ int t;
+ int i;
+ int j;
+
+ for (j = 0; j < height; j++)
+ {
+ for (i = 0; i < width; i++)
+ {
+ y1 = *(yuvs++);
+ v = *(yuvs++);
+ y2 = *(yuvs++);
+ u = *(yuvs++);
+
+ c = y1 - 16;
+ d = u - 128;
+ e = v - 128;
+ t = (298 * c + 409 * e + 128) >> 8;
+ b = RDPCLAMP(t, 0, 255);
+ t = (298 * c - 100 * d - 208 * e + 128) >> 8;
+ g = RDPCLAMP(t, 0, 255);
+ t = (298 * c + 516 * d + 128) >> 8;
+ r = RDPCLAMP(t, 0, 255);
+ rgbs[j * width + i] = (r << 16) | (g << 8) | b;
+
+ i++;
+ c = y2 - 16;
+ d = u - 128;
+ e = v - 128;
+ t = (298 * c + 409 * e + 128) >> 8;
+ b = RDPCLAMP(t, 0, 255);
+ t = (298 * c - 100 * d - 208 * e + 128) >> 8;
+ g = RDPCLAMP(t, 0, 255);
+ t = (298 * c + 516 * d + 128) >> 8;
+ r = RDPCLAMP(t, 0, 255);
+ rgbs[j * width + i] = (r << 16) | (g << 8) | b;
+ }
+ }
+ return 0;
+}
+
+/*****************************************************************************/
+int
+UYVY_to_RGB32(unsigned char *yuvs, int width, int height, int *rgbs)
+{
+ int y1;
+ int y2;
+ int u;
+ int v;
+ int c;
+ int d;
+ int e;
+ int r;
+ int g;
+ int b;
+ int t;
+ int i;
+ int j;
+
+ for (j = 0; j < height; j++)
+ {
+ for (i = 0; i < width; i++)
+ {
+ v = *(yuvs++);
+ y1 = *(yuvs++);
+ u = *(yuvs++);
+ y2 = *(yuvs++);
+
+ c = y1 - 16;
+ d = u - 128;
+ e = v - 128;
+ t = (298 * c + 409 * e + 128) >> 8;
+ b = RDPCLAMP(t, 0, 255);
+ t = (298 * c - 100 * d - 208 * e + 128) >> 8;
+ g = RDPCLAMP(t, 0, 255);
+ t = (298 * c + 516 * d + 128) >> 8;
+ r = RDPCLAMP(t, 0, 255);
+ rgbs[j * width + i] = (r << 16) | (g << 8) | b;
+
+ i++;
+ c = y2 - 16;
+ d = u - 128;
+ e = v - 128;
+ t = (298 * c + 409 * e + 128) >> 8;
+ b = RDPCLAMP(t, 0, 255);
+ t = (298 * c - 100 * d - 208 * e + 128) >> 8;
+ g = RDPCLAMP(t, 0, 255);
+ t = (298 * c + 516 * d + 128) >> 8;
+ r = RDPCLAMP(t, 0, 255);
+ rgbs[j * width + i] = (r << 16) | (g << 8) | b;
+ }
+ }
+ return 0;
+}
+
+#if 0
+/*****************************************************************************/
+static int
+stretch_RGB32_RGB32(int *src, int src_width, int src_height,
+ int src_x, int src_y, int src_w, int src_h,
+ int *dst, int dst_w, int dst_h)
+{
+ int mwidth;
+ int mheight;
+ int index;
+
+ mwidth = RDPMIN(src_width, dst_w);
+ mheight = RDPMIN(src_height, dst_h);
+ for (index = 0; index < mheight; index++)
+ {
+ g_memcpy(dst, src, mwidth * 4);
+ src += src_width;
+ dst += dst_w;
+ }
+ return 0;
+}
+#endif
+
+/*****************************************************************************/
+static int
+stretch_RGB32_RGB32(int *src, int src_width, int src_height,
+ int src_x, int src_y, int src_w, int src_h,
+ int *dst, int dst_w, int dst_h)
+{
+ int index;
+ int jndex;
+ int lndex;
+ int last_lndex;
+ int oh;
+ int ih;
+ int ov;
+ int iv;
+ int pix;
+ int *src32;
+ int *dst32;
+
+ LLOGLN(10, ("stretch_RGB32_RGB32: oh 0x%8.8x ov 0x%8.8x", oh, ov));
+ oh = (src_w << 16) / dst_w;
+ ov = (src_h << 16) / dst_h;
+ iv = ov;
+ lndex = src_y;
+ last_lndex = -1;
+ for (index = 0; index < dst_h; index++)
+ {
+ if (lndex == last_lndex)
+ {
+ /* repeat line */
+ dst32 = dst + index * dst_w;
+ src32 = dst32 - dst_w;
+ g_memcpy(dst32, src32, dst_w * 4);
+ }
+ else
+ {
+ ih = oh;
+ src32 = src + lndex * src_width + src_x;
+ pix = *src32;
+ dst32 = dst + index * dst_w;
+ for (jndex = 0; jndex < dst_w; jndex++)
+ {
+ *dst32 = pix;
+ while (ih > (1 << 16) - 1)
+ {
+ ih -= 1 << 16;
+ src32++;
+ }
+ pix = *src32;
+ ih += oh;
+ dst32++;
+ }
+ }
+ last_lndex = lndex;
+ while (iv > (1 << 16) - 1)
+ {
+ iv -= 1 << 16;
+ lndex++;
+ }
+ iv += ov;
+
+ }
+ LLOGLN(10, ("stretch_RGB32_RGB32: out"));
+ return 0;
+}
+
+/******************************************************************************/
+/* returns error */
+static CARD32
+rdpDeferredXvCleanup(OsTimerPtr timer, CARD32 now, pointer arg)
+{
+ rdpPtr dev;
+
+ LLOGLN(0, ("rdpDeferredXvCleanup:"));
+ dev = (rdpPtr) arg;
+ dev->xv_timer_schedualed = 0;
+ dev->xv_data_bytes = 0;
+ g_free(dev->xv_data);
+ dev->xv_data = 0;
+ return 0;
+}
+
+/*****************************************************************************/
+/* see hw/xfree86/common/xf86xv.c for info */
+static int
+xrdpVidPutImage(ScrnInfoPtr pScrn,
+ short src_x, short src_y, short drw_x, short drw_y,
+ short src_w, short src_h, short drw_w, short drw_h,
+ int format, unsigned char* buf,
+ short width, short height,
+ Bool sync, RegionPtr clipBoxes,
+ pointer data, DrawablePtr dst)
+{
+ rdpPtr dev;
+ int *rgborg32;
+ int *rgbend32;
+ int index;
+ int error;
+ GCPtr tempGC;
+
+ LLOGLN(10, ("xrdpVidPutImage: format 0x%8.8x", format));
+ LLOGLN(10, ("xrdpVidPutImage: src_x %d srcy_y %d", src_x, src_y));
+ dev = XRDPPTR(pScrn);
+
+ if (dev->xv_timer_schedualed)
+ {
+ TimerCancel(dev->xv_timer);
+ dev->xv_timer = TimerSet(dev->xv_timer, 0, 2000,
+ rdpDeferredXvCleanup, dev);
+ }
+ else
+ {
+ dev->xv_timer_schedualed = 1;
+ dev->xv_timer = TimerSet(dev->xv_timer, 0, 2000,
+ rdpDeferredXvCleanup, dev);
+ }
+
+ index = width * height * 4 + drw_w * drw_h * 4 + 64;
+ if (index > dev->xv_data_bytes)
+ {
+ g_free(dev->xv_data);
+ dev->xv_data = g_malloc(index, 0);
+ if (dev->xv_data == NULL)
+ {
+ LLOGLN(0, ("xrdpVidPutImage: memory alloc error"));
+ dev->xv_data_bytes = 0;
+ return Success;
+ }
+ dev->xv_data_bytes = index;
+ }
+ rgborg32 = (int *) RDPALIGN(dev->xv_data, 16);
+ rgbend32 = rgborg32 + width * height;
+ rgbend32 = (int *) RDPALIGN(rgbend32, 16);
+ error = 0;
+ switch (format)
+ {
+ case FOURCC_YV12:
+ LLOGLN(10, ("xrdpVidPutImage: FOURCC_YV12"));
+ error = dev->yv12_to_rgb32(buf, width, height, rgborg32);
+ break;
+ case FOURCC_I420:
+ LLOGLN(10, ("xrdpVidPutImage: FOURCC_I420"));
+ error = dev->i420_to_rgb32(buf, width, height, rgborg32);
+ break;
+ case FOURCC_YUY2:
+ LLOGLN(10, ("xrdpVidPutImage: FOURCC_YUY2"));
+ error = dev->yuy2_to_rgb32(buf, width, height, rgborg32);
+ break;
+ case FOURCC_UYVY:
+ LLOGLN(10, ("xrdpVidPutImage: FOURCC_UYVY"));
+ error = dev->uyvy_to_rgb32(buf, width, height, rgborg32);
+ break;
+ default:
+ LLOGLN(0, ("xrdpVidPutImage: unknown format 0x%8.8x", format));
+ return Success;
+ }
+ if (error != 0)
+ {
+ return Success;
+ }
+ error = stretch_RGB32_RGB32(rgborg32, width, height,
+ src_x, src_y, src_w, src_h,
+ rgbend32, drw_w, drw_h);
+ if (error != 0)
+ {
+ return Success;
+ }
+
+ tempGC = GetScratchGC(dst->depth, pScrn->pScreen);
+ if (tempGC != NULL)
+ {
+ ValidateGC(dst, tempGC);
+ (*tempGC->ops->PutImage)(dst, tempGC, 24,
+ drw_x - dst->x, drw_y - dst->y,
+ drw_w, drw_h, 0, ZPixmap, (char*)rgbend32);
+ FreeScratchGC(tempGC);
+ }
+
+ return Success;
+}
+
+/*****************************************************************************/
+static int
+xrdpVidQueryImageAttributes(ScrnInfoPtr pScrn, int id,
+ unsigned short *w, unsigned short *h,
+ int *pitches, int *offsets)
+{
+ int size, tmp;
+
+ LLOGLN(10, ("xrdpVidQueryImageAttributes:"));
+ /* this is same code as all drivers currently have */
+ if (*w > 2046)
+ {
+ *w = 2046;
+ }
+ if (*h > 2046)
+ {
+ *h = 2046;
+ }
+ /* make w multiple of 4 so that resizing works properly */
+ *w = (*w + 3) & ~3;
+ if (offsets != NULL)
+ {
+ offsets[0] = 0;
+ }
+ switch (id)
+ {
+ case FOURCC_YV12:
+ case FOURCC_I420:
+ /* make h be even */
+ *h = (*h + 1) & ~1;
+ /* make w be multiple of 4 (ie. pad it) */
+ size = (*w + 3) & ~3;
+ /* width of a Y row => width of image */
+ if (pitches != NULL)
+ {
+ pitches[0] = size;
+ }
+ /* offset of U plane => w * h */
+ size *= *h;
+ if (offsets != NULL)
+ {
+ offsets[1] = size;
+ }
+ /* width of U, V row => width / 2 */
+ tmp = ((*w >> 1) + 3) & ~3;
+ if (pitches != NULL)
+ {
+ pitches[1] = pitches[2] = tmp;
+ }
+ /* offset of V => Y plane + U plane (w * h + w / 2 * h / 2) */
+ tmp *= (*h >> 1);
+ size += tmp;
+ if (offsets != NULL)
+ {
+ offsets[2] = size;
+ }
+ size += tmp;
+ break;
+ case FOURCC_YUY2:
+ case FOURCC_UYVY:
+ size = (*w) * 2;
+ if (pitches != NULL)
+ {
+ pitches[0] = size;
+ }
+ size *= *h;
+ break;
+ default:
+ LLOGLN(0, ("xrdpVidQueryImageAttributes: Unsupported image"));
+ return 0;
+ }
+ LLOGLN(10, ("xrdpVidQueryImageAttributes: finished size %d id 0x%x", size, id));
+ return size;
+}
+
+/*****************************************************************************/
+Bool
+rdpXvInit(ScreenPtr pScreen, ScrnInfoPtr pScrn)
+{
+ XF86VideoAdaptorPtr adaptor;
+ DevUnion* pDevUnion;
+ int bytes;
+
+ adaptor = xf86XVAllocateVideoAdaptorRec(pScrn);
+ if (adaptor == 0)
+ {
+ LLOGLN(0, ("rdpXvInit: xf86XVAllocateVideoAdaptorRec failed"));
+ return 0;
+ }
+ adaptor->type = XvInputMask | XvImageMask | XvVideoMask | XvStillMask | XvWindowMask | XvPixmapMask;
+ //adaptor->flags = VIDEO_NO_CLIPPING;
+ //adaptor->flags = VIDEO_CLIP_TO_VIEWPORT;
+ adaptor->flags = 0;
+ adaptor->name = XRDP_MODULE_NAME " XVideo Adaptor";
+ adaptor->nEncodings = T_NUM_ENCODINGS;
+ adaptor->pEncodings = &(g_xrdpVidEncodings[0]);
+ adaptor->nFormats = T_NUM_FORMATS;
+ adaptor->pFormats = &(g_xrdpVidFormats[0]);
+ adaptor->pFormats[0].depth = pScrn->depth;
+ LLOGLN(0, ("rdpXvInit: depth %d", pScrn->depth));
+ adaptor->nImages = sizeof(g_xrdpVidImages) / sizeof(XF86ImageRec);
+ adaptor->pImages = g_xrdpVidImages;
+ adaptor->nAttributes = 0;
+ adaptor->pAttributes = 0;
+ adaptor->nPorts = T_MAX_PORTS;
+ bytes = sizeof(DevUnion) * T_MAX_PORTS;
+ pDevUnion = (DevUnion*) g_malloc(bytes, 1);
+ adaptor->pPortPrivates = pDevUnion;
+ adaptor->PutVideo = xrdpVidPutVideo;
+ adaptor->PutStill = xrdpVidPutStill;
+ adaptor->GetVideo = xrdpVidGetVideo;
+ adaptor->GetStill = xrdpVidGetStill;
+ adaptor->StopVideo = xrdpVidStopVideo;
+ adaptor->SetPortAttribute = xrdpVidSetPortAttribute;
+ adaptor->GetPortAttribute = xrdpVidGetPortAttribute;
+ adaptor->QueryBestSize = xrdpVidQueryBestSize;
+ adaptor->PutImage = xrdpVidPutImage;
+ adaptor->QueryImageAttributes = xrdpVidQueryImageAttributes;
+ if (!xf86XVScreenInit(pScreen, &adaptor, 1))
+ {
+ LLOGLN(0, ("rdpXvInit: xf86XVScreenInit failed"));
+ return 0;
+ }
+ xf86XVFreeVideoAdaptorRec(adaptor);
+ return 1;
+}
+
diff --git a/xorg/server/module/rdpXv.h b/xorg/server/module/rdpXv.h
new file mode 100644
index 00000000..9cf28700
--- /dev/null
+++ b/xorg/server/module/rdpXv.h
@@ -0,0 +1,43 @@
+/*
+Copyright 2014 Jay Sorg
+
+Permission to use, copy, modify, distribute, and sell this software and its
+documentation for any purpose is hereby granted without fee, provided that
+the above copyright notice appear in all copies and that both that
+copyright notice and this permission notice appear in supporting
+documentation.
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+XVideo
+
+*/
+
+#ifndef __RDPXV_H
+#define __RDPXV_H
+
+#include <xorg-server.h>
+#include <xorgVersion.h>
+#include <xf86.h>
+
+Bool
+rdpXvInit(ScreenPtr pScreen, ScrnInfoPtr pScrn);
+
+int
+YV12_to_RGB32(unsigned char *yuvs, int width, int height, int *rgbs);
+int
+I420_to_RGB32(unsigned char *yuvs, int width, int height, int *rgbs);
+int
+YUY2_to_RGB32(unsigned char *yuvs, int width, int height, int *rgbs);
+int
+UYVY_to_RGB32(unsigned char *yuvs, int width, int height, int *rgbs);
+
+#endif
diff --git a/xorg/server/module/x86/cpuid_x86.asm b/xorg/server/module/x86/cpuid_x86.asm
new file mode 100644
index 00000000..6f9e8c2d
--- /dev/null
+++ b/xorg/server/module/x86/cpuid_x86.asm
@@ -0,0 +1,39 @@
+
+SECTION .text
+
+%macro PROC 1
+ align 16
+ global %1
+ %1:
+%endmacro
+
+;int
+;cpuid_x86(int eax_in, int ecx_in, int *eax, int *ebx, int *ecx, int *edx)
+
+PROC cpuid_x86
+ ; save registers
+ push ebx
+ push ecx
+ push edx
+ push edi
+ ; cpuid
+ mov eax, [esp + 20]
+ mov ecx, [esp + 24]
+ cpuid
+ mov edi, [esp + 28]
+ mov [edi], eax
+ mov edi, [esp + 32]
+ mov [edi], ebx
+ mov edi, [esp + 36]
+ mov [edi], ecx
+ mov edi, [esp + 40]
+ mov [edi], edx
+ mov eax, 0
+ ; restore registers
+ pop edi
+ pop edx
+ pop ecx
+ pop ebx
+ ret;
+ align 16
+
diff --git a/xorg/server/module/x86/funcs_x86.h b/xorg/server/module/x86/funcs_x86.h
new file mode 100644
index 00000000..00724e62
--- /dev/null
+++ b/xorg/server/module/x86/funcs_x86.h
@@ -0,0 +1,39 @@
+/*
+Copyright 2014 Jay Sorg
+
+Permission to use, copy, modify, distribute, and sell this software and its
+documentation for any purpose is hereby granted without fee, provided that
+the above copyright notice appear in all copies and that both that
+copyright notice and this permission notice appear in supporting
+documentation.
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+x86 asm files
+
+*/
+
+#ifndef __FUNCS_X86_H
+#define __FUNCS_X86_H
+
+int
+cpuid_x86(int eax_in, int ecx_in, int *eax, int *ebx, int *ecx, int *edx);
+int
+yv12_to_rgb32_x86_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
+int
+i420_to_rgb32_x86_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
+int
+yuy2_to_rgb32_x86_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
+int
+uyvy_to_rgb32_x86_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
+
+#endif
+
diff --git a/xorg/server/module/x86/i420_to_rgb32_x86_sse2.asm b/xorg/server/module/x86/i420_to_rgb32_x86_sse2.asm
new file mode 100644
index 00000000..0c7a6e1e
--- /dev/null
+++ b/xorg/server/module/x86/i420_to_rgb32_x86_sse2.asm
@@ -0,0 +1,243 @@
+;
+;Copyright 2014 Jay Sorg
+;
+;Permission to use, copy, modify, distribute, and sell this software and its
+;documentation for any purpose is hereby granted without fee, provided that
+;the above copyright notice appear in all copies and that both that
+;copyright notice and this permission notice appear in supporting
+;documentation.
+;
+;The above copyright notice and this permission notice shall be included in
+;all copies or substantial portions of the Software.
+;
+;THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+;IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+;FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+;OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+;AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+;CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+;
+;I420 to RGB32
+;x86 SSE2 32 bit
+;
+; RGB to YUV
+; 0.299 0.587 0.114
+; -0.14713 -0.28886 0.436
+; 0.615 -0.51499 -0.10001
+; YUV to RGB
+; 1 0 1.13983
+; 1 -0.39465 -0.58060
+; 1 2.03211 0
+; shift left 12
+; 4096 0 4669
+; 4096 -1616 -2378
+; 4096 9324 0
+
+SECTION .data
+align 16
+c128 times 8 dw 128
+c4669 times 8 dw 4669
+c1616 times 8 dw 1616
+c2378 times 8 dw 2378
+c9324 times 8 dw 9324
+
+SECTION .text
+
+%macro PROC 1
+ align 16
+ global %1
+ %1:
+%endmacro
+
+do8_uv:
+
+ ; v
+ movd xmm1, [ebx] ; 4 at a time
+ lea ebx, [ebx + 4]
+ punpcklbw xmm1, xmm1
+ pxor xmm6, xmm6
+ punpcklbw xmm1, xmm6
+ movdqa xmm7, [c128]
+ psubw xmm1, xmm7
+ psllw xmm1, 4
+
+ ; u
+ movd xmm2, [edx] ; 4 at a time
+ lea edx, [edx + 4]
+ punpcklbw xmm2, xmm2
+ punpcklbw xmm2, xmm6
+ psubw xmm2, xmm7
+ psllw xmm2, 4
+
+do8:
+
+ ; y
+ movq xmm0, [esi] ; 8 at a time
+ lea esi, [esi + 8]
+ pxor xmm6, xmm6
+ punpcklbw xmm0, xmm6
+
+ ; r = y + hiword(4669 * (v << 4))
+ movdqa xmm4, [c4669]
+ pmulhw xmm4, xmm1
+ movdqa xmm3, xmm0
+ paddw xmm3, xmm4
+
+ ; g = y - hiword(1616 * (u << 4)) - hiword(2378 * (v << 4))
+ movdqa xmm5, [c1616]
+ pmulhw xmm5, xmm2
+ movdqa xmm6, [c2378]
+ pmulhw xmm6, xmm1
+ movdqa xmm4, xmm0
+ psubw xmm4, xmm5
+ psubw xmm4, xmm6
+
+ ; b = y + hiword(9324 * (u << 4))
+ movdqa xmm6, [c9324]
+ pmulhw xmm6, xmm2
+ movdqa xmm5, xmm0
+ paddw xmm5, xmm6
+
+ packuswb xmm3, xmm3 ; b
+ packuswb xmm4, xmm4 ; g
+ punpcklbw xmm3, xmm4 ; gb
+
+ pxor xmm4, xmm4 ; a
+ packuswb xmm5, xmm5 ; r
+ punpcklbw xmm5, xmm4 ; ar
+
+ movdqa xmm4, xmm3
+ punpcklwd xmm3, xmm5 ; argb
+ movdqa [edi], xmm3
+ lea edi, [edi + 16]
+ punpckhwd xmm4, xmm5 ; argb
+ movdqa [edi], xmm4
+ lea edi, [edi + 16]
+
+ ret;
+
+;int
+;i420_to_rgb32_x86_sse2(unsigned char *yuvs, int width, int height, int *rgbs)
+
+PROC i420_to_rgb32_x86_sse2
+ push ebx
+ push esi
+ push edi
+ push ebp
+
+ mov edi, [esp + 32] ; rgbs
+
+ mov ecx, [esp + 24] ; width
+ mov edx, ecx
+ mov ebp, [esp + 28] ; height
+ mov eax, ebp
+ shr ebp, 1
+ imul eax, ecx ; eax = width * height
+
+ mov esi, [esp + 20] ; y
+
+ mov ebx, esi ; u = y + width * height
+ add ebx, eax
+
+ ; local vars
+ ; char* yptr1
+ ; char* yptr2
+ ; char* uptr
+ ; char* vptr
+ ; int* rgbs1
+ ; int* rgbs2
+ ; int width
+ sub esp, 28 ; local vars, 28 bytes
+ mov [esp + 0], esi ; save y1
+ add esi, edx
+ mov [esp + 4], esi ; save y2
+ mov [esp + 8], ebx ; save u
+ shr eax, 2
+ add ebx, eax ; v = u + (width * height / 4)
+ mov [esp + 12], ebx ; save v
+
+ mov [esp + 16], edi ; save rgbs1
+ mov eax, edx
+ shl eax, 2
+ add edi, eax
+ mov [esp + 20], edi ; save rgbs2
+
+loop_y:
+
+ mov ecx, edx ; width
+ shr ecx, 3
+
+ ; save edx
+ mov [esp + 24], edx
+
+ ;prefetchnta 4096[esp + 0] ; y
+ ;prefetchnta 1024[esp + 8] ; u
+ ;prefetchnta 1024[esp + 12] ; v
+
+loop_x:
+
+ mov esi, [esp + 0] ; y1
+ mov ebx, [esp + 8] ; u
+ mov edx, [esp + 12] ; v
+ mov edi, [esp + 16] ; rgbs1
+
+ ; y1
+ call do8_uv
+
+ mov [esp + 0], esi ; y1
+ mov [esp + 16], edi ; rgbs1
+
+ mov esi, [esp + 4] ; y2
+ mov edi, [esp + 20] ; rgbs2
+
+ ; y2
+ call do8
+
+ mov [esp + 4], esi ; y2
+ mov [esp + 8], ebx ; u
+ mov [esp + 12], edx ; v
+ mov [esp + 20], edi ; rgbs2
+
+ dec ecx ; width
+ jnz loop_x
+
+ ; restore edx
+ mov edx, [esp + 24]
+
+ ; update y1 and 2
+ mov eax, [esp + 0]
+ mov ebx, edx
+ add eax, ebx
+ mov [esp + 0], eax
+
+ mov eax, [esp + 4]
+ add eax, ebx
+ mov [esp + 4], eax
+
+ ; update rgb1 and 2
+ mov eax, [esp + 16]
+ mov ebx, edx
+ shl ebx, 2
+ add eax, ebx
+ mov [esp + 16], eax
+
+ mov eax, [esp + 20]
+ add eax, ebx
+ mov [esp + 20], eax
+
+ mov ecx, ebp
+ dec ecx ; height
+ mov ebp, ecx
+ jnz loop_y
+
+ add esp, 28
+
+ mov eax, 0
+ pop ebp
+ pop edi
+ pop esi
+ pop ebx
+ ret
+ align 16
+
+
diff --git a/xorg/server/module/x86/uyvy_to_rgb32_x86_sse2.asm b/xorg/server/module/x86/uyvy_to_rgb32_x86_sse2.asm
new file mode 100644
index 00000000..d3ba81d3
--- /dev/null
+++ b/xorg/server/module/x86/uyvy_to_rgb32_x86_sse2.asm
@@ -0,0 +1,22 @@
+
+%macro PROC 1
+ align 16
+ global %1
+ %1:
+%endmacro
+
+;int
+;uyvy_to_rgb32_x86_sse2(unsigned char *yuvs, int width, int height, int *rgbs)
+
+PROC uyvy_to_rgb32_x86_sse2
+ push ebx
+ push esi
+ push edi
+
+ mov eax, 0
+ pop edi
+ pop esi
+ pop ebx
+ ret
+ align 16
+
diff --git a/xorg/server/module/x86/yuy2_to_rgb32_x86_sse2.asm b/xorg/server/module/x86/yuy2_to_rgb32_x86_sse2.asm
new file mode 100644
index 00000000..da03e26f
--- /dev/null
+++ b/xorg/server/module/x86/yuy2_to_rgb32_x86_sse2.asm
@@ -0,0 +1,22 @@
+
+%macro PROC 1
+ align 16
+ global %1
+ %1:
+%endmacro
+
+;int
+;yuy2_to_rgb32_x86_sse2(unsigned char *yuvs, int width, int height, int *rgbs)
+
+PROC yuy2_to_rgb32_x86_sse2
+ push ebx
+ push esi
+ push edi
+
+ mov eax, 0
+ pop edi
+ pop esi
+ pop ebx
+ ret
+ align 16
+
diff --git a/xorg/server/module/x86/yv12_to_rgb32_x86_sse2.asm b/xorg/server/module/x86/yv12_to_rgb32_x86_sse2.asm
new file mode 100644
index 00000000..d50a65a2
--- /dev/null
+++ b/xorg/server/module/x86/yv12_to_rgb32_x86_sse2.asm
@@ -0,0 +1,243 @@
+;
+;Copyright 2014 Jay Sorg
+;
+;Permission to use, copy, modify, distribute, and sell this software and its
+;documentation for any purpose is hereby granted without fee, provided that
+;the above copyright notice appear in all copies and that both that
+;copyright notice and this permission notice appear in supporting
+;documentation.
+;
+;The above copyright notice and this permission notice shall be included in
+;all copies or substantial portions of the Software.
+;
+;THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+;IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+;FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+;OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+;AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+;CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+;
+;YV12 to RGB32
+;x86 SSE2 32 bit
+;
+; RGB to YUV
+; 0.299 0.587 0.114
+; -0.14713 -0.28886 0.436
+; 0.615 -0.51499 -0.10001
+; YUV to RGB
+; 1 0 1.13983
+; 1 -0.39465 -0.58060
+; 1 2.03211 0
+; shift left 12
+; 4096 0 4669
+; 4096 -1616 -2378
+; 4096 9324 0
+
+SECTION .data
+align 16
+c128 times 8 dw 128
+c4669 times 8 dw 4669
+c1616 times 8 dw 1616
+c2378 times 8 dw 2378
+c9324 times 8 dw 9324
+
+SECTION .text
+
+%macro PROC 1
+ align 16
+ global %1
+ %1:
+%endmacro
+
+do8_uv:
+
+ ; u
+ movd xmm1, [ebx] ; 4 at a time
+ lea ebx, [ebx + 4]
+ punpcklbw xmm1, xmm1
+ pxor xmm6, xmm6
+ punpcklbw xmm1, xmm6
+ movdqa xmm7, [c128]
+ psubw xmm1, xmm7
+ psllw xmm1, 4
+
+ ; v
+ movd xmm2, [edx] ; 4 at a time
+ lea edx, [edx + 4]
+ punpcklbw xmm2, xmm2
+ punpcklbw xmm2, xmm6
+ psubw xmm2, xmm7
+ psllw xmm2, 4
+
+do8:
+
+ ; y
+ movq xmm0, [esi] ; 8 at a time
+ lea esi, [esi + 8]
+ pxor xmm6, xmm6
+ punpcklbw xmm0, xmm6
+
+ ; r = y + hiword(4669 * (v << 4))
+ movdqa xmm4, [c4669]
+ pmulhw xmm4, xmm2
+ movdqa xmm3, xmm0
+ paddw xmm3, xmm4
+
+ ; g = y - hiword(1616 * (u << 4)) - hiword(2378 * (v << 4))
+ movdqa xmm5, [c1616]
+ pmulhw xmm5, xmm1
+ movdqa xmm6, [c2378]
+ pmulhw xmm6, xmm2
+ movdqa xmm4, xmm0
+ psubw xmm4, xmm5
+ psubw xmm4, xmm6
+
+ ; b = y + hiword(9324 * (u << 4))
+ movdqa xmm6, [c9324]
+ pmulhw xmm6, xmm1
+ movdqa xmm5, xmm0
+ paddw xmm5, xmm6
+
+ packuswb xmm3, xmm3 ; b
+ packuswb xmm4, xmm4 ; g
+ punpcklbw xmm3, xmm4 ; gb
+
+ pxor xmm4, xmm4 ; a
+ packuswb xmm5, xmm5 ; r
+ punpcklbw xmm5, xmm4 ; ar
+
+ movdqa xmm4, xmm3
+ punpcklwd xmm3, xmm5 ; argb
+ movdqa [edi], xmm3
+ lea edi, [edi + 16]
+ punpckhwd xmm4, xmm5 ; argb
+ movdqa [edi], xmm4
+ lea edi, [edi + 16]
+
+ ret;
+
+;int
+;yv12_to_rgb32_x86_sse2(unsigned char *yuvs, int width, int height, int *rgbs)
+
+PROC yv12_to_rgb32_x86_sse2
+ push ebx
+ push esi
+ push edi
+ push ebp
+
+ mov edi, [esp + 32] ; rgbs
+
+ mov ecx, [esp + 24] ; width
+ mov edx, ecx
+ mov ebp, [esp + 28] ; height
+ mov eax, ebp
+ shr ebp, 1
+ imul eax, ecx ; eax = width * height
+
+ mov esi, [esp + 20] ; y
+
+ mov ebx, esi ; u = y + width * height
+ add ebx, eax
+
+ ; local vars
+ ; char* yptr1
+ ; char* yptr2
+ ; char* uptr
+ ; char* vptr
+ ; int* rgbs1
+ ; int* rgbs2
+ ; int width
+ sub esp, 28 ; local vars, 28 bytes
+ mov [esp + 0], esi ; save y1
+ add esi, edx
+ mov [esp + 4], esi ; save y2
+ mov [esp + 8], ebx ; save u
+ shr eax, 2
+ add ebx, eax ; v = u + (width * height / 4)
+ mov [esp + 12], ebx ; save v
+
+ mov [esp + 16], edi ; save rgbs1
+ mov eax, edx
+ shl eax, 2
+ add edi, eax
+ mov [esp + 20], edi ; save rgbs2
+
+loop_y:
+
+ mov ecx, edx ; width
+ shr ecx, 3
+
+ ; save edx
+ mov [esp + 24], edx
+
+ ;prefetchnta 4096[esp + 0] ; y
+ ;prefetchnta 1024[esp + 8] ; u
+ ;prefetchnta 1024[esp + 12] ; v
+
+loop_x:
+
+ mov esi, [esp + 0] ; y1
+ mov ebx, [esp + 8] ; u
+ mov edx, [esp + 12] ; v
+ mov edi, [esp + 16] ; rgbs1
+
+ ; y1
+ call do8_uv
+
+ mov [esp + 0], esi ; y1
+ mov [esp + 16], edi ; rgbs1
+
+ mov esi, [esp + 4] ; y2
+ mov edi, [esp + 20] ; rgbs2
+
+ ; y2
+ call do8
+
+ mov [esp + 4], esi ; y2
+ mov [esp + 8], ebx ; u
+ mov [esp + 12], edx ; v
+ mov [esp + 20], edi ; rgbs2
+
+ dec ecx ; width
+ jnz loop_x
+
+ ; restore edx
+ mov edx, [esp + 24]
+
+ ; update y1 and 2
+ mov eax, [esp + 0]
+ mov ebx, edx
+ add eax, ebx
+ mov [esp + 0], eax
+
+ mov eax, [esp + 4]
+ add eax, ebx
+ mov [esp + 4], eax
+
+ ; update rgb1 and 2
+ mov eax, [esp + 16]
+ mov ebx, edx
+ shl ebx, 2
+ add eax, ebx
+ mov [esp + 16], eax
+
+ mov eax, [esp + 20]
+ add eax, ebx
+ mov [esp + 20], eax
+
+ mov ecx, ebp
+ dec ecx ; height
+ mov ebp, ecx
+ jnz loop_y
+
+ add esp, 28
+
+ mov eax, 0
+ pop ebp
+ pop edi
+ pop esi
+ pop ebx
+ ret
+ align 16
+
+