summaryrefslogtreecommitdiffstats
path: root/xorg/server/module
diff options
context:
space:
mode:
Diffstat (limited to 'xorg/server/module')
-rw-r--r--xorg/server/module/Makefile5
-rw-r--r--xorg/server/module/amd64/cpuid_amd64.asm41
-rw-r--r--xorg/server/module/amd64/funcs_amd64.h39
-rw-r--r--xorg/server/module/amd64/i420_to_rgb32_amd64_sse2.asm7
-rw-r--r--xorg/server/module/amd64/uyvy_to_rgb32_amd64_sse2.asm7
-rw-r--r--xorg/server/module/amd64/yuy2_to_rgb32_amd64_sse2.asm7
-rw-r--r--xorg/server/module/amd64/yv12_to_rgb32_amd64_sse2.asm235
-rw-r--r--xorg/server/module/rdpXv.c41
-rw-r--r--xorg/server/module/x86/yv12_to_rgb32_x86_sse2.asm12
9 files changed, 356 insertions, 38 deletions
diff --git a/xorg/server/module/Makefile b/xorg/server/module/Makefile
index 6de97c05..8f4f442b 100644
--- a/xorg/server/module/Makefile
+++ b/xorg/server/module/Makefile
@@ -9,7 +9,7 @@ rdpComposite.o rdpGlyphs.o rdpPixmap.o rdpInput.o rdpClientCon.o rdpCapture.o \
rdpTrapezoids.o rdpXv.o
;OBJS += cpuid_x86.o i420_to_rgb32_x86_sse2.o yv12_to_rgb32_x86_sse2.o yuy2_to_rgb32_x86_sse2.o uyvy_to_rgb32_x86_sse2.o
-;OBJS += i420_to_rgb32_amd64_sse2.o yv12_to_rgb32_amd64_sse2.o yuy2_to_rgb32_amd64_sse2.o uyvy_to_rgb32_amd64_sse2.o
+;OBJS += cpuid_amd64.o i420_to_rgb32_amd64_sse2.o yv12_to_rgb32_amd64_sse2.o yuy2_to_rgb32_amd64_sse2.o uyvy_to_rgb32_amd64_sse2.o
CFLAGS = -g -O2 -Wall -fPIC -I/usr/include/xorg -I/usr/include/pixman-1 \
-I../../../common
@@ -41,6 +41,9 @@ yuy2_to_rgb32_x86_sse2.o: x86/yuy2_to_rgb32_x86_sse2.asm
uyvy_to_rgb32_x86_sse2.o: x86/uyvy_to_rgb32_x86_sse2.asm
yasm -f elf32 -g dwarf2 x86/uyvy_to_rgb32_x86_sse2.asm
+cpuid_amd64.o: amd64/cpuid_amd64.asm
+ yasm -f elf64 -g dwarf2 amd64/cpuid_amd64.asm
+
i420_to_rgb32_amd64_sse2.o: amd64/i420_to_rgb32_amd64_sse2.asm
yasm -f elf64 -g dwarf2 amd64/i420_to_rgb32_amd64_sse2.asm
diff --git a/xorg/server/module/amd64/cpuid_amd64.asm b/xorg/server/module/amd64/cpuid_amd64.asm
new file mode 100644
index 00000000..b97937ad
--- /dev/null
+++ b/xorg/server/module/amd64/cpuid_amd64.asm
@@ -0,0 +1,41 @@
+
+SECTION .text
+
+%macro PROC 1
+ align 16
+ global %1
+ %1:
+%endmacro
+
+;The first six integer or pointer arguments are passed in registers
+;RDI, RSI, RDX, RCX, R8, and R9
+
+;int
+;cpuid_amd64(int eax_in, int ecx_in, int *eax, int *ebx, int *ecx, int *edx)
+
+PROC cpuid_amd64
+ ; save registers
+ push rbx
+
+ push rdx
+ push rcx
+ push r8
+ push r9
+
+ mov rax, rdi
+ mov rcx, rsi
+ cpuid
+ pop rdi
+ mov [rdi], edx
+ pop rdi
+ mov [rdi], ecx
+ pop rdi
+ mov [rdi], ebx
+ pop rdi
+ mov [rdi], eax
+ mov eax, 0
+ ; restore registers
+ pop rbx
+ ret;
+ align 16
+
diff --git a/xorg/server/module/amd64/funcs_amd64.h b/xorg/server/module/amd64/funcs_amd64.h
new file mode 100644
index 00000000..39f7e5a4
--- /dev/null
+++ b/xorg/server/module/amd64/funcs_amd64.h
@@ -0,0 +1,39 @@
+/*
+Copyright 2014 Jay Sorg
+
+Permission to use, copy, modify, distribute, and sell this software and its
+documentation for any purpose is hereby granted without fee, provided that
+the above copyright notice appear in all copies and that both that
+copyright notice and this permission notice appear in supporting
+documentation.
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+amd64 asm files
+
+*/
+
+#ifndef __FUNCS_AMD64_H
+#define __FUNCS_AMD64_H
+
+int
+cpuid_amd64(int eax_in, int ecx_in, int *eax, int *ebx, int *ecx, int *edx);
+int
+yv12_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
+int
+i420_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
+int
+yuy2_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
+int
+uyvy_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
+
+#endif
+
diff --git a/xorg/server/module/amd64/i420_to_rgb32_amd64_sse2.asm b/xorg/server/module/amd64/i420_to_rgb32_amd64_sse2.asm
index 75377edd..74ba422b 100644
--- a/xorg/server/module/amd64/i420_to_rgb32_amd64_sse2.asm
+++ b/xorg/server/module/amd64/i420_to_rgb32_amd64_sse2.asm
@@ -9,10 +9,9 @@
;i420_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs)
PROC i420_to_rgb32_amd64_sse2
- push ebx
-
- mov eax, 0
- pop ebx
+ push rbx
+ mov rax, 0
+ pop rbx
ret
align 16
diff --git a/xorg/server/module/amd64/uyvy_to_rgb32_amd64_sse2.asm b/xorg/server/module/amd64/uyvy_to_rgb32_amd64_sse2.asm
index cbe85bec..8866fd0f 100644
--- a/xorg/server/module/amd64/uyvy_to_rgb32_amd64_sse2.asm
+++ b/xorg/server/module/amd64/uyvy_to_rgb32_amd64_sse2.asm
@@ -9,10 +9,9 @@
;uyvy_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs)
PROC uyvy_to_rgb32_amd64_sse2
- push ebx
-
- mov eax, 0
- pop ebx
+ push rbx
+ mov rax, 0
+ pop rbx
ret
align 16
diff --git a/xorg/server/module/amd64/yuy2_to_rgb32_amd64_sse2.asm b/xorg/server/module/amd64/yuy2_to_rgb32_amd64_sse2.asm
index 693c364c..c0ac5c1b 100644
--- a/xorg/server/module/amd64/yuy2_to_rgb32_amd64_sse2.asm
+++ b/xorg/server/module/amd64/yuy2_to_rgb32_amd64_sse2.asm
@@ -9,10 +9,9 @@
;yuy2_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs)
PROC yuy2_to_rgb32_amd64_sse2
- push ebx
-
- mov eax, 0
- pop ebx
+ push rbx
+ mov rax, 0
+ pop rbx
ret
align 16
diff --git a/xorg/server/module/amd64/yv12_to_rgb32_amd64_sse2.asm b/xorg/server/module/amd64/yv12_to_rgb32_amd64_sse2.asm
index 7802795f..192d0e6a 100644
--- a/xorg/server/module/amd64/yv12_to_rgb32_amd64_sse2.asm
+++ b/xorg/server/module/amd64/yv12_to_rgb32_amd64_sse2.asm
@@ -1,3 +1,47 @@
+;
+;Copyright 2014 Jay Sorg
+;
+;Permission to use, copy, modify, distribute, and sell this software and its
+;documentation for any purpose is hereby granted without fee, provided that
+;the above copyright notice appear in all copies and that both that
+;copyright notice and this permission notice appear in supporting
+;documentation.
+;
+;The above copyright notice and this permission notice shall be included in
+;all copies or substantial portions of the Software.
+;
+;THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+;IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+;FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+;OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+;AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+;CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+;
+;YV12 to RGB32
+;amd64 SSE2 32 bit
+;
+; RGB to YUV
+; 0.299 0.587 0.114
+; -0.14713 -0.28886 0.436
+; 0.615 -0.51499 -0.10001
+; YUV to RGB
+; 1 0 1.13983
+; 1 -0.39465 -0.58060
+; 1 2.03211 0
+; shift left 12
+; 4096 0 4669
+; 4096 -1616 -2378
+; 4096 9324 0
+
+SECTION .data
+align 16
+c128 times 8 dw 128
+c4669 times 8 dw 4669
+c1616 times 8 dw 1616
+c2378 times 8 dw 2378
+c9324 times 8 dw 9324
+
+SECTION .text
%macro PROC 1
align 16
@@ -5,14 +49,199 @@
%1:
%endmacro
+do8_uv:
+
+ ; u
+ movd xmm1, [rbx] ; 4 at a time
+ lea rbx, [rbx + 4]
+ punpcklbw xmm1, xmm1
+ pxor xmm6, xmm6
+ punpcklbw xmm1, xmm6
+ movdqa xmm7, [rel c128]
+ psubw xmm1, xmm7
+ psllw xmm1, 4
+
+ ; v
+ movd xmm2, [rdx] ; 4 at a time
+ lea rdx, [rdx + 4]
+ punpcklbw xmm2, xmm2
+ punpcklbw xmm2, xmm6
+ psubw xmm2, xmm7
+ psllw xmm2, 4
+
+do8:
+
+ ; y
+ movq xmm0, [rsi] ; 8 at a time
+ lea rsi, [rsi + 8]
+ pxor xmm6, xmm6
+ punpcklbw xmm0, xmm6
+
+ ; r = y + hiword(4669 * (v << 4))
+ movdqa xmm4, [rel c4669]
+ pmulhw xmm4, xmm2
+ movdqa xmm3, xmm0
+ paddw xmm3, xmm4
+
+ ; g = y - hiword(1616 * (u << 4)) - hiword(2378 * (v << 4))
+ movdqa xmm5, [rel c1616]
+ pmulhw xmm5, xmm1
+ movdqa xmm6, [rel c2378]
+ pmulhw xmm6, xmm2
+ movdqa xmm4, xmm0
+ psubw xmm4, xmm5
+ psubw xmm4, xmm6
+
+ ; b = y + hiword(9324 * (u << 4))
+ movdqa xmm6, [rel c9324]
+ pmulhw xmm6, xmm1
+ movdqa xmm5, xmm0
+ paddw xmm5, xmm6
+
+ packuswb xmm3, xmm3 ; b
+ packuswb xmm4, xmm4 ; g
+ punpcklbw xmm3, xmm4 ; gb
+
+ pxor xmm4, xmm4 ; a
+ packuswb xmm5, xmm5 ; r
+ punpcklbw xmm5, xmm4 ; ar
+
+ movdqa xmm4, xmm3
+ punpcklwd xmm3, xmm5 ; argb
+ movdqa [rdi], xmm3
+ lea rdi, [rdi + 16]
+ punpckhwd xmm4, xmm5 ; argb
+ movdqa [rdi], xmm4
+ lea rdi, [rdi + 16]
+
+ ret;
+
+;The first six integer or pointer arguments are passed in registers
+; RDI, RSI, RDX, RCX, R8, and R9
+
;int
;yv12_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs)
PROC yv12_to_rgb32_amd64_sse2
- push ebx
+ push rbx
+ push rsi
+ push rdi
+ push rbp
+
+ push rdi
+ mov rdi, rcx ; rgbs
+
+ mov rcx, rsi ; width
+ mov rdx, rcx
+ mov rbp, rdx ; height
+ mov rax, rbp
+ shr rbp, 1
+ imul rax, rcx ; rax = width * height
+
+ pop rsi ; y
+
+ mov rbx, rsi ; u = y + width * height
+ add rbx, rax
+
+ ; local vars
+ ; char* yptr1
+ ; char* yptr2
+ ; char* uptr
+ ; char* vptr
+ ; int* rgbs1
+ ; int* rgbs2
+ ; int width
+ sub rsp, 56 ; local vars, 56 bytes
+ mov [rsp + 0], rsi ; save y1
+ add rsi, rdx
+ mov [rsp + 8], rsi ; save y2
+ mov [rsp + 16], rbx ; save u
+ shr rax, 2
+ add rbx, rax ; v = u + (width * height / 4)
+ mov [rsp + 24], rbx ; save v
+
+ mov [rsp + 32], rdi ; save rgbs1
+ mov rax, rdx
+ shl rax, 2
+ add rdi, rax
+ mov [rsp + 40], rdi ; save rgbs2
- mov eax, 0
- pop ebx
+loop_y:
+
+ mov rcx, rdx ; width
+ shr rcx, 3
+
+ ; save rdx
+ mov [rsp + 48], rdx
+
+ prefetchnta 4096[rsp + 0] ; y
+ prefetchnta 1024[rsp + 16] ; u
+ prefetchnta 1024[rsp + 24] ; v
+
+loop_x:
+
+ mov rsi, [rsp + 0] ; y1
+ mov rbx, [rsp + 16] ; u
+ mov rdx, [rsp + 24] ; v
+ mov rdi, [rsp + 32] ; rgbs1
+
+ ; y1
+ call do8_uv
+
+ mov [rsp + 0], rsi ; y1
+ mov [rsp + 32], rdi ; rgbs1
+
+ mov rsi, [rsp + 8] ; y2
+ mov rdi, [rsp + 40] ; rgbs2
+
+ ; y2
+ call do8
+
+ mov [rsp + 8], rsi ; y2
+ mov [rsp + 16], rbx ; u
+ mov [rsp + 24], rdx ; v
+ mov [rsp + 40], rdi ; rgbs2
+
+ dec rcx ; width
+ jnz loop_x
+
+ ; restore rdx
+ mov rdx, [rsp + 48]
+
+ ; update y1 and 2
+ mov rax, [rsp + 0]
+ mov rbx, rdx
+ add rax, rbx
+ mov [rsp + 0], rax
+
+ mov rax, [rsp + 8]
+ add rax, rbx
+ mov [rsp + 8], rax
+
+ ; update rgb1 and 2
+ mov rax, [rsp + 32]
+ mov rbx, rdx
+ shl rbx, 2
+ add rax, rbx
+ mov [rsp + 32], rax
+
+ mov rax, [rsp + 40]
+ add rax, rbx
+ mov [rsp + 40], rax
+
+ mov rcx, rbp
+ dec rcx ; height
+ mov rbp, rcx
+ jnz loop_y
+
+ add rsp, 56
+
+ mov rax, 0
+ pop rbp
+ pop rdi
+ pop rsi
+ pop rbx
ret
align 16
+
diff --git a/xorg/server/module/rdpXv.c b/xorg/server/module/rdpXv.c
index 0a9bc867..61088582 100644
--- a/xorg/server/module/rdpXv.c
+++ b/xorg/server/module/rdpXv.c
@@ -417,6 +417,7 @@ stretch_RGB32_RGB32(int *src, int src_width, int src_height,
iv += ov;
}
+ LLOGLN(10, ("stretch_RGB32_RGB32: out"));
return 0;
}
@@ -642,14 +643,7 @@ xrdpVidQueryImageAttributes(ScrnInfoPtr pScrn, int id,
#if XV_USE_ACCEL
#if defined(__x86_64__) || defined(__AMD64__) || defined (_M_AMD64)
-int
-yv12_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
-int
-i420_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
-int
-yuy2_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
-int
-uyvy_to_rgb32_amd64_sse2(unsigned char *yuvs, int width, int height, int *rgbs);
+#include "amd64/funcs_amd64.h"
#elif defined(__x86__) || defined(_M_IX86) || defined(__i386__)
#include "x86/funcs_x86.h"
#endif
@@ -713,23 +707,38 @@ rdpXvInit(ScreenPtr pScreen, ScrnInfoPtr pScrn)
if (g_xv_use_accel)
{
#if defined(__x86_64__) || defined(__AMD64__) || defined (_M_AMD64)
- dev->yv12_to_rgb32 = yv12_to_rgb32_amd64_sse2;
- dev->i420_to_rgb32 = i420_to_rgb32_amd64_sse2;
- dev->yuy2_to_rgb32 = yuy2_to_rgb32_amd64_sse2;
- dev->uyvy_to_rgb32 = uyvy_to_rgb32_amd64_sse2;
- LLOGLN(0, ("rdpXvInit: sse amd64 yuv functions assigned"));
+ int ax, bx, cx, dx;
+ cpuid_amd64(1, 0, &ax, &bx, &cx, &dx);
+ LLOGLN(0, ("rdpXvInit: cpuid ax 1 cx 0 return ax 0x%8.8x bx "
+ "0x%8.8x cx 0x%8.8x dx 0x%8.8x", ax, bx, cx, dx));
+ if (dx & (1 << 26)) /* SSE 2 */
+ {
+ dev->yv12_to_rgb32 = yv12_to_rgb32_amd64_sse2;
+ dev->i420_to_rgb32 = i420_to_rgb32_amd64_sse2;
+ dev->yuy2_to_rgb32 = yuy2_to_rgb32_amd64_sse2;
+ dev->uyvy_to_rgb32 = uyvy_to_rgb32_amd64_sse2;
+ LLOGLN(0, ("rdpXvInit: sse2 amd64 yuv functions assigned"));
+ }
+ else
+ {
+ dev->yv12_to_rgb32 = YV12_to_RGB32;
+ dev->i420_to_rgb32 = I420_to_RGB32;
+ dev->yuy2_to_rgb32 = YUY2_to_RGB32;
+ dev->uyvy_to_rgb32 = UYVY_to_RGB32;
+ LLOGLN(0, ("rdpXvInit: warning, c yuv functions assigned"));
+ }
#elif defined(__x86__) || defined(_M_IX86) || defined(__i386__)
int ax, bx, cx, dx;
cpuid_x86(1, 0, &ax, &bx, &cx, &dx);
- LLOGLN(0, ("rdpXvInit: cpuid eax 1 ecx 0 return eax 0x%8.8x ebx "
- "0x%8.8x ecx 0x%8.8x edx 0x%8.8x", ax, bx, cx, dx));
+ LLOGLN(0, ("rdpXvInit: cpuid ax 1 cx 0 return ax 0x%8.8x bx "
+ "0x%8.8x cx 0x%8.8x dx 0x%8.8x", ax, bx, cx, dx));
if (dx & (1 << 26)) /* SSE 2 */
{
dev->yv12_to_rgb32 = yv12_to_rgb32_x86_sse2;
dev->i420_to_rgb32 = i420_to_rgb32_x86_sse2;
dev->yuy2_to_rgb32 = yuy2_to_rgb32_x86_sse2;
dev->uyvy_to_rgb32 = uyvy_to_rgb32_x86_sse2;
- LLOGLN(0, ("rdpXvInit: sse x86 yuv functions assigned"));
+ LLOGLN(0, ("rdpXvInit: sse2 x86 yuv functions assigned"));
}
else
{
diff --git a/xorg/server/module/x86/yv12_to_rgb32_x86_sse2.asm b/xorg/server/module/x86/yv12_to_rgb32_x86_sse2.asm
index 9087b291..3bd0c59a 100644
--- a/xorg/server/module/x86/yv12_to_rgb32_x86_sse2.asm
+++ b/xorg/server/module/x86/yv12_to_rgb32_x86_sse2.asm
@@ -140,12 +140,12 @@ PROC yv12_to_rgb32_x86_sse2
add ebx, eax
; local vars
- ; char* yptr1;
- ; char* yptr2;
- ; char* uptr;
- ; char* vptr;
- ; int* rgbs1;
- ; int* rgbs2;
+ ; char* yptr1
+ ; char* yptr2
+ ; char* uptr
+ ; char* vptr
+ ; int* rgbs1
+ ; int* rgbs2
; int width
sub esp, 28 ; local vars, 28 bytes
mov [esp + 0], esi ; save y1