diff options
Diffstat (limited to 'mpeglib/lib/mpegplay/copyFunctions_mmx.cpp')
-rw-r--r-- | mpeglib/lib/mpegplay/copyFunctions_mmx.cpp | 313 |
1 files changed, 313 insertions, 0 deletions
diff --git a/mpeglib/lib/mpegplay/copyFunctions_mmx.cpp b/mpeglib/lib/mpegplay/copyFunctions_mmx.cpp new file mode 100644 index 00000000..3e295e76 --- /dev/null +++ b/mpeglib/lib/mpegplay/copyFunctions_mmx.cpp @@ -0,0 +1,313 @@ +/* + copyfunctions implementation in mmx + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + + +#include "copyFunctions_mmx.h" + +using namespace std; + +// mmx goodies +static long ADD_1[] = {0x01010101, 0x01010101}; +static long ADDW_1[] = {0x00010001, 0x00010001}; +static long MASK_AND[] = {0x7f7f7f7f, 0x7f7f7f7f}; + +void dummyCopyFunctions() { + cout << "ADD_1:"<<ADD_1<<endl; + cout << "ADDW_1:"<<ADDW_1<<endl; + cout << "MASK_AND:"<<MASK_AND<<endl; +} + +CopyFunctions_MMX::CopyFunctions_MMX() { +#ifdef INTEL + lmmx=mm_support(); +#else + lmmx=false; + cout << "no INTEL arch- disable MMX in copyFunctions"<<endl; +#endif +} + + +CopyFunctions_MMX::~CopyFunctions_MMX() { +} + +int CopyFunctions_MMX::support() { + return lmmx; +} + + +#if defined (__GNUC__) && defined (INTEL) + +void CopyFunctions_MMX::startNOFloatSection() { +} + + +void CopyFunctions_MMX::endNOFloatSection() { + emms(); +} + + + +void CopyFunctions_MMX::copy8_byte(unsigned char* source1, + unsigned char* dest,int inc) { + int rr=4; + + asm ( + "1:\n" + "movq (%0) ,%%mm0\n" + "leal (%0,%2) ,%0\n" + "movq (%0) ,%%mm1\n" + "leal (%0,%2) ,%0\n" + + // Write + "movq %%mm0 ,(%1)\n" + "leal (%1,%2) ,%1\n" + "movq %%mm1 ,(%1)\n" + "leal (%1,%2) ,%1\n" + + "decl %3\n" + "jnz 1b\n" + : + : "r"(source1),"r"(dest),"r"(inc),"r"(rr) + ); +} + + + +void CopyFunctions_MMX::copy8_src1linear_crop(short* source1, + unsigned char* dest,int inc) { + + asm ( + "movq (%1),%%mm0\n" + "packuswb 8(%1),%%mm0\n" + "movq %%mm0,(%0)\n" + "addl %2,%0\n" + + "movq 16(%1),%%mm0\n" + "packuswb 24(%1),%%mm0\n" + "movq %%mm0,(%0)\n" + "addl %2,%0\n" + + "movq 32(%1),%%mm0\n" + "packuswb 40(%1),%%mm0\n" + "movq %%mm0,(%0)\n" + "addl %2,%0\n" + + "movq 48(%1),%%mm0\n" + "packuswb 56(%1),%%mm0\n" + "movq %%mm0,(%0)\n" + "addl %2,%0\n" + + "movq 64(%1),%%mm0\n" + "packuswb 72(%1),%%mm0\n" + "movq %%mm0,(%0)\n" + "addl %2,%0\n" + + "movq 80(%1),%%mm0\n" + "packuswb 88(%1),%%mm0\n" + "movq %%mm0,(%0)\n" + "addl %2,%0\n" + + "movq 96(%1),%%mm0\n" + "packuswb 104(%1),%%mm0\n" + "movq %%mm0,(%0)\n" + "addl %2,%0\n" + + "movq 112(%1),%%mm0\n" + "packuswb 120(%1),%%mm0\n" + "movq %%mm0,(%0)\n" + : + :"r" (dest), "r" (source1),"r" (inc) + ); + +} + + + +void CopyFunctions_MMX::copy8_div2_nocrop(unsigned char* source1, + unsigned char* source2, + unsigned char* dest,int inc) { + int h=8; + asm ( + "movq MASK_AND, %%mm5\n" + "movq ADD_1, %%mm6\n" + "1:\t" + "movq (%1), %%mm0\n" /* 8 s */ + "movq (%4), %%mm1\n" /* 8 s +lx */ + "psrlw $1,%%mm0\n" + "psrlw $1,%%mm1\n" + "pand %%mm5,%%mm0\n" + "pand %%mm5,%%mm1\n" + "paddusb %%mm1,%%mm0\n" + "addl %3,%1\n" + "paddusb %%mm6,%%mm0\n" + "addl %3,%4\n" + "movq %%mm0,(%2)\n" + "decl %0\n" + "leal (%2, %3), %2\n" + "jnz 1b\n" + : + : "c"(h), "r"(source1), "r"(dest), "r"(inc), "r"(source2) + ); +} + + +void CopyFunctions_MMX::copy8_div2_destlinear_nocrop(unsigned char* source1, + unsigned char* source2, + unsigned char* dest, + int inc) { + int h=8; + asm ( + "movq MASK_AND, %%mm5\n" + "1:\t" + "movq (%1), %%mm0\n" /* 8 s */ + "movq (%4), %%mm1\n" /* 8 s +lx */ + "psrlw $1,%%mm0\n" + "psrlw $1,%%mm1\n" + "pand %%mm5,%%mm0\n" + "pand %%mm5,%%mm1\n" + "paddusb %%mm1,%%mm0\n" + "addl %3,%1\n" + "addl %3,%4\n" + "movq %%mm0,(%2)\n" + "decl %0\n" + "leal 8(%2), %2\n" + "jnz 1b\n" + : + : "c"(h), "r"(source1), "r"(dest), "r"(inc), "r"(source2) + ); +} + + + +void CopyFunctions_MMX::copy16_div2_destlinear_nocrop(unsigned char* source1, + unsigned char* source2, + unsigned char* dest, + int inc) { + int h=16; + inc=inc-8; + asm ( + "movq MASK_AND, %%mm5\n" + "1:\t" + "movq (%1), %%mm0\n" /* 8 s */ + "movq (%4), %%mm1\n" /* 8 s +lx */ + "psrlw $1,%%mm0\n" + "psrlw $1,%%mm1\n" + "pand %%mm5,%%mm0\n" + "pand %%mm5,%%mm1\n" + "paddusb %%mm1,%%mm0\n" + "leal 8(%1),%1\n" + "leal 8(%4),%4\n" + "movq %%mm0,(%2)\n" + "leal 8(%2),%2\n" + + "movq (%1), %%mm0\n" /* 8 s */ + "movq (%4), %%mm1\n" /* 8 s +lx */ + "psrlw $1,%%mm0\n" + "psrlw $1,%%mm1\n" + "pand %%mm5,%%mm0\n" + "pand %%mm5,%%mm1\n" + "paddusb %%mm1,%%mm0\n" + "leal (%3,%1),%1\n" + "leal (%3,%4),%4\n" + "movq %%mm0,(%2)\n" + "leal 8(%2),%2\n" + + "decl %0\n" + "jnz 1b\n" + : + : "c"(h), "r"(source1), "r"(dest), "r"(inc), "r"(source2) + ); +} + + +void CopyFunctions_MMX::copy8_src2linear_crop(unsigned char* source1, + short int* source2, + unsigned char* dest,int inc) { + + int rr=8; + // buggy + + asm ( + ".align 32\n" + "pxor %%mm2 ,%%mm2\n" // 0 0 0 0 0 0 0 0 + "1:\n" + "movq (%0) ,%%mm0\n" // s1_7 s1_6 s1_5 s1_4 s1_3 s1_2 s1_1 s1_0 + "movq (%0) ,%%mm4\n" // s1_7 s1_6 s1_5 s1_4 s1_3 s1_2 s1_1 s1_0 + "punpckhbw %%mm2 ,%%mm0\n" // 0 s1_7 0 s1_6 0 s1_5 0 s1_4 + "punpcklbw %%mm2 ,%%mm4\n" // 0 s1_3 0 s1_2 0 s1_1 0 s1_0 + "movq (%1) ,%%mm1\n" // s23h s23l s22h s22l s21l s21h s20h s20l + "movq 8(%1) ,%%mm5\n" // s27h s27l s26h s26l s25l s25h s24h s24l + "paddw %%mm0 ,%%mm5\n" // mm4=mm4 + s3_0..3 + "paddw %%mm4 ,%%mm1\n" // mm0=mm0 + s3_4..7 + "packuswb %%mm5 ,%%mm1\n" // cm[...] + + "movq %%mm1 ,(%2)\n" // wrote out + + "leal (%0,%3), %0\n" // source1+=inc + "leal 16(%1) , %1\n" // source2+=inc + "leal (%2,%3), %2\n" // dest+=inc + "decl %4\n" + "jnz 1b\n" + //"emms\n" + : + : "r"(source1), "r"(source2), "r"(dest),"r"(inc),"r"(rr) + ); +} + + +void CopyFunctions_MMX::copy8_div2_src3linear_crop(unsigned char* source1, + unsigned char* source2, + short int* source3, + unsigned char* dest, + int inc){ + // buggy + int rr=8; + + asm ( + "pxor %%mm2 ,%%mm2\n" // 0 0 0 0 0 0 0 0 + "pxor %%mm3 ,%%mm3\n" // 0 0 0 0 0 0 0 0 + "movq ADDW_1,%%mm6\n" // 0 1 0 1 0 1 0 1 + "1:\n" + "movq (%0) ,%%mm0\n" // s1_7 s1_6 s1_5 s1_4 s1_3 s1_2 s1_1 s1_0 + "movq (%1) ,%%mm1\n" // s2_7 s2_6 s2_5 s2_4 s2_3 s2_2 s2_1 s2_0 + "movq %%mm0 ,%%mm4\n" // s1_7 s1_6 s1_5 s1_4 s1_3 s1_2 s1_1 s1_0 + "movq %%mm1 ,%%mm5\n" // s2_7 s2_6 s2_5 s2_4 s2_3 s2_2 s2_1 s2_0 + "punpckhbw %%mm2 ,%%mm0\n" // 0 s1_7 0 s1_6 0 s1_5 0 s1_4 + "punpckhbw %%mm3 ,%%mm1\n" // 0 s2_7 0 s2_6 0 s2_5 0 s2_4 + "punpcklbw %%mm2 ,%%mm4\n" // 0 s1_3 0 s1_2 0 s1_1 0 s1_0 + "punpcklbw %%mm3 ,%%mm5\n" // 0 s2_3 0 s2_2 0 s2_1 0 s2_0 + "paddusw %%mm4 ,%%mm5\n" // mm5=s1_0..3 + s2_0..3 + "paddusw %%mm0 ,%%mm1\n" // mm1=s1_4..7 + s2_4..7 + "paddusw %%mm6 ,%%mm5\n" // mm5=mm5 + 1 + "paddusw %%mm6 ,%%mm1\n" // mm1=mm1 + 1 + "psraw $1 ,%%mm1\n" // mm1=mm1/2 + "psraw $1 ,%%mm5\n" // mm5=mm5/2 + "movq (%2) ,%%mm0\n" // s33h s33l s32h s32l s31l s31h s30h s30l + "movq 8(%2) ,%%mm4\n" // s37h s37l s36h s36l s35l s35h s34h s34l + "paddw %%mm0 ,%%mm5\n" // mm5=mm5 + s3_0..3 + "paddw %%mm4 ,%%mm1\n" // mm1=mm1 + s3_4..7 + "packuswb %%mm1 ,%%mm5\n" // cm[...] + "movq %%mm5 ,(%3)\n" // wrote out + "leal (%0,%4), %0\n" // source1+=inc + "leal (%1,%4), %1\n" // source2+=inc + "addl $16 ,%2 \n" // source3+8 + "leal (%3,%4), %3\n" // dest+=inc + "decl %5\n" + "jnz 1b\n" + : + : "r"(source1), "r"(source2), "r"(source3), + "r"(dest),"r"(inc),"m"(rr) + ); +} + + +#endif |