diff options
Diffstat (limited to 'mpeglib/lib/util/render/dither')
21 files changed, 2980 insertions, 0 deletions
diff --git a/mpeglib/lib/util/render/dither/Makefile.am b/mpeglib/lib/util/render/dither/Makefile.am new file mode 100644 index 00000000..166d5ca3 --- /dev/null +++ b/mpeglib/lib/util/render/dither/Makefile.am @@ -0,0 +1,40 @@ +# liboutplugin - Makefile.am + +INCLUDES = $(all_includes) + +EXTRA_DIST = ditherDef.h ditherMMX.h \ + ditherer_mmx16.cpp dither32mmx.cpp + +noinst_LTLIBRARIES = libdither.la + +noinst_HEADERS = ditherWrapper.h \ + dither8Bit.h colorTable8Bit.h \ + colorTableHighBit.h dither16Bit.h \ + dither32Bit.h ditherRGB_flipped.h \ + ditherRGB.h + +libdither_la_SOURCES = ditherWrapper.cpp \ + dither8Bit.cpp \ + colorTable8Bit.cpp colorTableHighBit.cpp \ + dither16Bit.cpp dither32Bit.cpp \ + ditherRGB_flipped.cpp ditherRGB.cpp \ + ditherer_mmx16.cpp dither32mmx.cpp + + + + + + + + + + + + + + + + + + + diff --git a/mpeglib/lib/util/render/dither/colorTable8Bit.cpp b/mpeglib/lib/util/render/dither/colorTable8Bit.cpp new file mode 100644 index 00000000..57c533de --- /dev/null +++ b/mpeglib/lib/util/render/dither/colorTable8Bit.cpp @@ -0,0 +1,147 @@ +/* + colorTables for 8 Bit depth + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + + +#include "colorTable8Bit.h" + + +ColorTable8Bit::ColorTable8Bit() { + + lum_values = new int[LUM_RANGE]; + cr_values = new int[CR_RANGE]; + cb_values = new int[CB_RANGE]; + + + /* We can exploit cache by allocating contiguous blocks */ + + colortab = new TABTYPE[5*256]; + + Cr_r_tab = &colortab[0*256]; + Cr_g_tab = &colortab[1*256]; + Cb_g_tab = &colortab[2*256]; + Cb_b_tab = &colortab[3*256]; + L_tab = &colortab[4*256]; + + init8BitColor(); +} + + +ColorTable8Bit::~ColorTable8Bit() { + delete lum_values; + delete cr_values; + delete cb_values; + delete colortab; +} + + +void ColorTable8Bit::init8BitColor() { + int i; + + + + for (i=0; i<LUM_RANGE; i++) { + lum_values[i] = ((i * 256) / (LUM_RANGE)) + (256/(LUM_RANGE*2)); + L_tab[i] = lum_values[i]; + if (gammaCorrectFlag) { + L_tab[i] = GAMMA_CORRECTION(L_tab[i]); + } + + } + + + for (i=0; i<CR_RANGE; i++) { + register double tmp; + if (chromaCorrectFlag) { + tmp = ((i * 256) / (CR_RANGE)) + (256/(CR_RANGE*2)); + Cr_r_tab[i]=(TABTYPE) ((0.419/0.299)*CHROMA_CORRECTION128D(tmp-128.0)); + Cr_g_tab[i]=(TABTYPE) (-(0.299/0.419)*CHROMA_CORRECTION128D(tmp-128.0)); + cr_values[i] = CHROMA_CORRECTION256(tmp); + } else { + tmp = ((i * 256) / (CR_RANGE)) + (256/(CR_RANGE*2)); + Cr_r_tab[i] = (TABTYPE) ((0.419/0.299) * (tmp - 128.0)); + Cr_g_tab[i] = (TABTYPE) (-(0.299/0.419) * (tmp - 128.0)); + cr_values[i] = (int) tmp; + } + } + + + for (i=0; i<CB_RANGE; i++) { + register double tmp; + if (chromaCorrectFlag) { + tmp = ((i * 256) / (CB_RANGE)) + (256/(CB_RANGE*2)); + Cb_g_tab[i]=(TABTYPE) (-(0.114/0.331)*CHROMA_CORRECTION128D(tmp-128.0)); + Cb_b_tab[i]=(TABTYPE) ((0.587/0.331)*CHROMA_CORRECTION128D(tmp-128.0)); + cb_values[i] = CHROMA_CORRECTION256(tmp); + } else { + tmp = ((i * 256) / (CB_RANGE)) + (256/(CB_RANGE*2)); + Cb_g_tab[i] = (TABTYPE) (-(0.114/0.331) * (tmp - 128.0)); + Cb_b_tab[i] = (TABTYPE) ((0.587/0.331) * (tmp - 128.0)); + cb_values[i] = (int) tmp; + } + } +} + + + +/* + *-------------------------------------------------------------- + * + * ConvertColor -- + * + * Given a l, cr, cb tuple, converts it to r,g,b. + * + * Results: + * r,g,b values returned in pointers passed as parameters. + * + * Side effects: + * None. + * + *-------------------------------------------------------------- + */ +void ColorTable8Bit::ConvertColor(unsigned int l, unsigned int cr, + unsigned int cb, unsigned char* r, + unsigned char* g, unsigned char* b) { + + double fl, fr, fg, fb; + + /* + * Old method w/o lookup table + * + * fl = 1.164*(((double) l)-16.0); + * fcr = ((double) cr) - 128.0; + * fcb = ((double) cb) - 128.0; + * + * fr = fl + (1.366 * fcr); + * fg = fl - (0.700 * fcr) - (0.334 * fcb); + * fb = fl + (1.732 * fcb); + */ + + fl = L_tab[l]; + + fr = fl + Cr_r_tab[cr]; + fg = fl + Cr_g_tab[cr] + Cb_g_tab[cb]; + fb = fl + Cb_b_tab[cb]; + + if (fr < 0.0) fr = 0.0; + else if (fr > 255.0) fr = 255.0; + + if (fg < 0.0) fg = 0.0; + else if (fg > 255.0) fg = 255.0; + + if (fb < 0.0) fb = 0.0; + else if (fb > 255.0) fb = 255.0; + + *r = (unsigned char) fr; + *g = (unsigned char) fg; + *b = (unsigned char) fb; + +} diff --git a/mpeglib/lib/util/render/dither/colorTable8Bit.h b/mpeglib/lib/util/render/dither/colorTable8Bit.h new file mode 100644 index 00000000..6d873d1d --- /dev/null +++ b/mpeglib/lib/util/render/dither/colorTable8Bit.h @@ -0,0 +1,57 @@ +/* + colorTables for 8 Bit depth + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + + +#ifndef __COLORTABLE8BIT_H +#define __COLORTABLE8BIT_H + +#include "ditherDef.h" + + +class ColorTable8Bit { + + // Arrays holding quantized value ranged for lum, cr, and cb. + // (used for 8 Bit) + + int* lum_values; + int* cr_values; + int* cb_values; + + + + + TABTYPE *L_tab; + TABTYPE *Cr_r_tab; + TABTYPE *Cr_g_tab; + TABTYPE *Cb_g_tab; + TABTYPE *Cb_b_tab; + TABTYPE *colortab; + + + public: + ColorTable8Bit(); + ~ColorTable8Bit(); + + inline int* getLumValues() { return lum_values; } + inline int* getCrValues() { return cr_values; } + inline int* getCbValues() { return cb_values; } + + void ConvertColor(unsigned int l, unsigned int cr, unsigned int cb, + unsigned char* r, unsigned char* g, unsigned char* b); + + + private: + void init8BitColor(); + + +}; +#endif diff --git a/mpeglib/lib/util/render/dither/colorTableHighBit.cpp b/mpeglib/lib/util/render/dither/colorTableHighBit.cpp new file mode 100644 index 00000000..171f4e97 --- /dev/null +++ b/mpeglib/lib/util/render/dither/colorTableHighBit.cpp @@ -0,0 +1,248 @@ +/* + colorTables for 16,32 Bit depth + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + + + +#include "colorTableHighBit.h" + +//#define INTERPOLATE + + +/* + * Erik Corry's multi-byte dither routines. + * + * The basic idea is that the Init generates all the necessary tables. + * The tables incorporate the information about the layout of pixels + * in the XImage, so that it should be able to cope with 15-bit, 16-bit + * 24-bit (non-packed) and 32-bit (10-11 bits per color!) screens. + * At present it cannot cope with 24-bit packed mode, since this involves + * getting down to byte level again. It is assumed that the bits for each + * color are contiguous in the longword. + * + * Writing to memory is done in shorts or ints. (Unfortunately, short is not + * very fast on Alpha, so there is room for improvement here). There is no + * dither time check for overflow - instead the tables have slack at + * each end. This is likely to be faster than an 'if' test as many modern + * architectures are really bad at ifs. Potentially, each '&&' causes a + * pipeline flush! + * + * There is no shifting and fixed point arithmetic, as I really doubt you + * can see the difference, and it costs. This may be just my bias, since I + * heard that Intel is really bad at shifting. + */ + + +/* + * How many 1 bits are there in the PIXVALword. + * Low performance, do not call often. + */ +static int number_of_bits_set(unsigned PIXVAL a) { + if(!a) return 0; + if(a & 1) return 1 + number_of_bits_set(a >> 1); + return(number_of_bits_set(a >> 1)); +} + + + +/* + * How many 0 bits are there at most significant end of PIXVALword. + * Low performance, do not call often. + */ +static int free_bits_at_top(unsigned PIXVAL a) { + /* assume char is 8 bits */ + if(!a) return sizeof(unsigned PIXVAL) * 8; + /* assume twos complement */ + if(((PIXVAL)a) < 0l) return 0; + return 1 + free_bits_at_top ( a << 1); +} + +/* + * How many 0 bits are there at least significant end of PIXVALword. + * Low performance, do not call often. + */ +static int free_bits_at_bottom(unsigned PIXVAL a) { + /* assume char is 8 bits */ + if(!a) return sizeof(unsigned PIXVAL) * 8; + if(((PIXVAL)a) & 1l) return 0; + return 1 + free_bits_at_bottom ( a >> 1); +} + + + +ColorTableHighBit::ColorTableHighBit(int bpp,unsigned int redMask, + unsigned int greenMask, + unsigned int blueMask) { + this->bpp=bpp; + this->redMask=redMask; + this->greenMask=greenMask; + this->blueMask=blueMask; + + colortab = new TABTYPE[5*256]; + + Cr_r_tab = &colortab[0*256]; + Cr_g_tab = &colortab[1*256]; + Cb_g_tab = &colortab[2*256]; + Cb_b_tab = &colortab[3*256]; + L_tab = &colortab[4*256]; + + rgb_2_pix = new PIXVAL [3*768]; + + r_2_pix_alloc = &rgb_2_pix[0*768]; + g_2_pix_alloc = &rgb_2_pix[1*768]; + b_2_pix_alloc = &rgb_2_pix[2*768]; + + initHighColor(bpp>=24,redMask,greenMask,blueMask); + +} + + +ColorTableHighBit::~ColorTableHighBit() { + delete colortab; + delete rgb_2_pix; +} + +/* + *-------------------------------------------------------------- + * + * InitColor16Dither -- + * + * To get rid of the multiply and other conversions in color + * dither, we use a lookup table. + * + * Results: + * None. + * + * Side effects: + * The lookup tables are initialized. + * + *-------------------------------------------------------------- + */ + +void ColorTableHighBit::initHighColor(int thirty2,unsigned int redMask, + unsigned int greenMask, + unsigned int blueMask) { + + unsigned PIXVAL red_mask = redMask; + unsigned PIXVAL green_mask =greenMask; + unsigned PIXVAL blue_mask = blueMask; + + int CR, CB, i; + + + for (i=0; i<256; i++) { + L_tab[i] = i; + if (gammaCorrectFlag) { + L_tab[i] = (TABTYPE)GAMMA_CORRECTION(i); + } + + CB = CR = i; + + if (chromaCorrectFlag) { + CB -= 128; + CB = CHROMA_CORRECTION128(CB); + CR -= 128; + CR = CHROMA_CORRECTION128(CR); + } else { + CB -= 128; CR -= 128; + } +/* was + Cr_r_tab[i] = 1.596 * CR; + Cr_g_tab[i] = -0.813 * CR; + Cb_g_tab[i] = -0.391 * CB; + Cb_b_tab[i] = 2.018 * CB; + but they were just messed up. + Then was (_Video Deymstified_): + Cr_r_tab[i] = 1.366 * CR; + Cr_g_tab[i] = -0.700 * CR; + Cb_g_tab[i] = -0.334 * CB; + Cb_b_tab[i] = 1.732 * CB; + but really should be: + (from ITU-R BT.470-2 System B, G and SMPTE 170M ) +*/ + Cr_r_tab[i] = (TABTYPE) ( (0.419/0.299) * CR ); + Cr_g_tab[i] = (TABTYPE) ( -(0.299/0.419) * CR ); + Cb_g_tab[i] = (TABTYPE) ( -(0.114/0.331) * CB ); + Cb_b_tab[i] = (TABTYPE) ( (0.587/0.331) * CB ); + +/* + though you could argue for: + SMPTE 240M + Cr_r_tab[i] = (0.445/0.212) * CR; + Cr_g_tab[i] = -(0.212/0.445) * CR; + Cb_g_tab[i] = -(0.087/0.384) * CB; + Cb_b_tab[i] = (0.701/0.384) * CB; + FCC + Cr_r_tab[i] = (0.421/0.30) * CR; + Cr_g_tab[i] = -(0.30/0.421) * CR; + Cb_g_tab[i] = -(0.11/0.331) * CB; + Cb_b_tab[i] = (0.59/0.331) * CB; + ITU-R BT.709 + Cr_r_tab[i] = (0.454/0.2125) * CR; + Cr_g_tab[i] = -(0.2125/0.454) * CR; + Cb_g_tab[i] = -(0.0721/0.386) * CB; + Cb_b_tab[i] = (0.7154/0.386) * CB; +*/ + } + + /* + * Set up entries 0-255 in rgb-to-pixel value tables. + */ + for (i = 0; i < 256; i++) { + r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(red_mask)); + r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(red_mask); + g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(green_mask)); + g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(green_mask); + b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(blue_mask)); + b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(blue_mask); + /* + * If we have 16-bit output depth, then we double the value + * in the top word. This means that we can write out both + * pixels in the pixel doubling mode with one op. It is + * harmless in the normal case as storing a 32-bit value + * through a short pointer will lose the top bits anyway. + * A similar optimisation for Alpha for 64 bit has been + * prepared for, but is not yet implemented. + */ + if(!thirty2) { + r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16; + g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16; + b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16; + + } +#ifdef SIXTYFOUR_BIT + if(thirty2) { + + r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 32; + g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 32; + b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 32; + + } +#endif + } + + /* + * Spread out the values we have to the rest of the array so that + * we do not need to check for overflow. + */ + for (i = 0; i < 256; i++) { + r_2_pix_alloc[i] = r_2_pix_alloc[256]; + r_2_pix_alloc[i+ 512] = r_2_pix_alloc[511]; + g_2_pix_alloc[i] = g_2_pix_alloc[256]; + g_2_pix_alloc[i+ 512] = g_2_pix_alloc[511]; + b_2_pix_alloc[i] = b_2_pix_alloc[256]; + b_2_pix_alloc[i+ 512] = b_2_pix_alloc[511]; + } + + r_2_pix = r_2_pix_alloc + 256; + g_2_pix = g_2_pix_alloc + 256; + b_2_pix = b_2_pix_alloc + 256; +} diff --git a/mpeglib/lib/util/render/dither/colorTableHighBit.h b/mpeglib/lib/util/render/dither/colorTableHighBit.h new file mode 100644 index 00000000..9945414d --- /dev/null +++ b/mpeglib/lib/util/render/dither/colorTableHighBit.h @@ -0,0 +1,73 @@ +/* + colorTables for 16,32 Bit depth + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + + +#ifndef __COLORTABLEHIGHBIT_H +#define __COLORTABLEHIGHBIT_H + +#include "ditherDef.h" + + + + +class ColorTableHighBit { + + TABTYPE *L_tab; + TABTYPE *Cr_r_tab; + TABTYPE *Cr_g_tab; + TABTYPE *Cb_g_tab; + TABTYPE *Cb_b_tab; + TABTYPE *colortab; + + + PIXVAL *r_2_pix; + PIXVAL *g_2_pix; + PIXVAL *b_2_pix; + PIXVAL *rgb_2_pix; + + PIXVAL *r_2_pix_alloc; + PIXVAL *g_2_pix_alloc; + PIXVAL *b_2_pix_alloc; + + + + // init stuff + int bpp; + // colorMask + unsigned int redMask; + unsigned int greenMask; + unsigned int blueMask; + + public: + ColorTableHighBit(int bpp,unsigned int redMask, + unsigned int greenMask,unsigned int blueMask); + ~ColorTableHighBit(); + + inline TABTYPE* getL_tab() { return L_tab ; } + inline TABTYPE* getCr_r_tab() { return Cr_r_tab ; } + inline TABTYPE* getCr_g_tab() { return Cr_g_tab ; } + inline TABTYPE* getCb_g_tab() { return Cb_g_tab ; } + inline TABTYPE* getCb_b_tab() { return Cb_b_tab ; } + + + inline PIXVAL* getr_2_pix() { return r_2_pix ; } + inline PIXVAL* getg_2_pix() { return g_2_pix ; } + inline PIXVAL* getb_2_pix() { return b_2_pix ; } + + + + private: + void initHighColor(int thirty2,unsigned int redMask, + unsigned int greenMask,unsigned int blueMask); + +}; +#endif diff --git a/mpeglib/lib/util/render/dither/dither16Bit.cpp b/mpeglib/lib/util/render/dither/dither16Bit.cpp new file mode 100644 index 00000000..0a843ee9 --- /dev/null +++ b/mpeglib/lib/util/render/dither/dither16Bit.cpp @@ -0,0 +1,300 @@ +/* + dither 16 bit depth yuv images + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + + +#include "dither16Bit.h" + + +Dither16Bit::Dither16Bit(unsigned int redMask, + unsigned int greenMask,unsigned int blueMask) { + + + colorTableHighBit=new ColorTableHighBit(16,redMask,greenMask,blueMask); + L_tab=colorTableHighBit->getL_tab(); + Cr_r_tab=colorTableHighBit->getCr_r_tab(); + Cr_g_tab=colorTableHighBit->getCr_g_tab(); + Cb_g_tab=colorTableHighBit->getCb_g_tab(); + Cb_b_tab=colorTableHighBit->getCb_b_tab(); + + r_2_pix=colorTableHighBit->getr_2_pix(); + g_2_pix=colorTableHighBit->getg_2_pix(); + b_2_pix=colorTableHighBit->getb_2_pix(); + +} + + +Dither16Bit::~Dither16Bit() { + delete colorTableHighBit; +} + + +/* + *-------------------------------------------------------------- + * + * Color16DitherImage -- + * + * Converts image into 16 bit color. + * + * Results: + * None. + * + * Side effects: + * None. + * + *-------------------------------------------------------------- + */ + +void Dither16Bit::ditherImageColor16(unsigned char* lum, + unsigned char* cr, + unsigned char* cb, + unsigned char* out, + int rows, + int cols, + int offset) { + + int L, CR, CB; + unsigned short *row1, *row2; + unsigned char *lum2; + int x, y; + int cr_r; + int cr_g; + int cb_g; + int cb_b; + int cols_2 = cols/2; + + row1 = (unsigned short *)out; + row2=row1+cols_2+cols_2+offset; // start of second row + + offset=2*offset+cols_2+cols_2; + + lum2 = lum + cols_2 + cols_2; + + + for (y=0; y<rows; y+=2) { + for (x=0; x<cols_2; x++) { + int R, G, B; + + CR = *cr++; + CB = *cb++; + cr_r = Cr_r_tab[CR]; + cr_g = Cr_g_tab[CR]; + cb_g = Cb_g_tab[CB]; + cb_b = Cb_b_tab[CB]; + + L = L_tab[(int) *lum++]; + + R = L + cr_r; + G = L + cr_g + cb_g; + B = L + cb_b; + + *row1++ = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]); + + +#ifdef INTERPOLATE + if(x != cols_2 - 1) { + CR = (CR + *cr) >> 1; + CB = (CB + *cb) >> 1; + cr_r = Cr_r_tab[CR]; + cr_g = Cr_g_tab[CR]; + cb_g = Cb_g_tab[CB]; + cb_b = Cb_b_tab[CB]; + } +#endif + + L = L_tab[(int) *lum++]; + + R = L + cr_r; + G = L + cr_g + cb_g; + B = L + cb_b; + + *row1++ = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]); + + /* + * Now, do second row. + */ +#ifdef INTERPOLATE + if(y != rows - 2) { + CR = (CR + *(cr + cols_2 - 1)) >> 1; + CB = (CB + *(cb + cols_2 - 1)) >> 1; + cr_r = Cr_r_tab[CR]; + cr_g = Cr_g_tab[CR]; + cb_g = Cb_g_tab[CB]; + cb_b = Cb_b_tab[CB]; + } +#endif + + L = L_tab[(int) *lum2++]; + R = L + cr_r; + G = L + cr_g + cb_g; + B = L + cb_b; + + *row2++ = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]); + + L = L_tab[(int) *lum2++]; + R = L + cr_r; + G = L + cr_g + cb_g; + B = L + cb_b; + + *row2++ = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]); + } + /* + * These values are at the start of the next line, (due + * to the ++'s above),but they need to be at the start + * of the line after that. + */ + lum += cols_2 + cols_2; + lum2 += cols_2 + cols_2; + row1 += offset; + row2 += offset; + } +} + + +/* + * Erik Corry's pixel doubling routines for 15/16/24/32 bit screens. + */ + + +/* + *-------------------------------------------------------------- + * + * Twox2Color16DitherImage -- + * + * Converts image into 16 bit color at double size. + * + * Results: + * None. + * + * Side effects: + * None. + * + *-------------------------------------------------------------- + */ + +/* + * In this function I make use of a nasty trick. The tables have the lower + * 16 bits replicated in the upper 16. This means I can write ints and get + * the horisontal doubling for free (almost). + */ + +void Dither16Bit::ditherImageTwox2Color16(unsigned char* lum, + unsigned char* cr, + unsigned char* cb, + unsigned char* out, + int rows, + int cols, + int mod) { + int L, CR, CB; + unsigned int *row1 = (unsigned int *)out; + unsigned int *row2 = row1 + cols + mod/2; + unsigned int *row3 = row2 + cols + mod/2; + unsigned int *row4 = row3 + cols + mod/2; + unsigned char *lum2; + int x, y; + int cr_r; + int cr_g; + int cb_g; + int cb_b; + int cols_2 = cols/2; + + lum2 = lum + cols_2 + cols_2; + for (y=0; y<rows; y+=2) { + for (x=0; x<cols_2; x++) { + int R, G, B; + int t; + + CR = *cr++; + CB = *cb++; + cr_r = Cr_r_tab[CR]; + cr_g = Cr_g_tab[CR]; + cb_g = Cb_g_tab[CB]; + cb_b = Cb_b_tab[CB]; + + L = L_tab[(int) *lum++]; + + R = L + cr_r; + G = L + cr_g + cb_g; + B = L + cb_b; + + t = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]); + row1[0] = t; + row1++; + row2[0] = t; + row2++; + + // INTERPOLATE + if(x != cols_2 - 1) { + CR = (CR + *cr) >> 1; + CB = (CB + *cb) >> 1; + cr_r = Cr_r_tab[CR]; + cr_g = Cr_g_tab[CR]; + cb_g = Cb_g_tab[CB]; + cb_b = Cb_b_tab[CB]; + } + // end + + L = L_tab[(int) *lum++]; + + R = L + cr_r; + G = L + cr_g + cb_g; + B = L + cb_b; + + t = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]); + row1[0] = t; + row1++; + row2[0] = t; + row2++; + + /* + * Now, do second row. + */ + // INTERPOLATE + if(y != rows - 2) { + CR = (CR + *(cr + cols_2 - 1)) >> 1; + CB = (CB + *(cb + cols_2 - 1)) >> 1; + cr_r = Cr_r_tab[CR]; + cr_g = Cr_g_tab[CR]; + cb_g = Cb_g_tab[CB]; + cb_b = Cb_b_tab[CB]; + } + // end + + L = L_tab[(int) *lum2++]; + R = L + cr_r; + G = L + cr_g + cb_g; + B = L + cb_b; + + t = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]); + row3[0] = t; + row3++; + row4[0] = t; + row4++; + + L = L_tab[(int) *lum2++]; + R = L + cr_r; + G = L + cr_g + cb_g; + B = L + cb_b; + + t = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]); + row3[0] = t; + row3++; + row4[0] = t; + row4++; + } + lum += cols_2 + cols_2; + lum2 += cols_2 + cols_2; + row1 += 6 * cols_2 + 2*mod; + row3 += 6 * cols_2 + 2*mod; + row2 += 6 * cols_2 + 2*mod; + row4 += 6 * cols_2 + 2*mod; + } +} diff --git a/mpeglib/lib/util/render/dither/dither16Bit.h b/mpeglib/lib/util/render/dither/dither16Bit.h new file mode 100644 index 00000000..2e47c01c --- /dev/null +++ b/mpeglib/lib/util/render/dither/dither16Bit.h @@ -0,0 +1,55 @@ +/* + dither 16 bit depth yuv images + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + +#ifndef __DITHER16Bit_H +#define __DITHER16Bit_H + +#include "colorTableHighBit.h" + +class Dither16Bit { + + ColorTableHighBit* colorTableHighBit; + + TABTYPE *L_tab; + TABTYPE *Cr_r_tab; + TABTYPE *Cr_g_tab; + TABTYPE *Cb_g_tab; + TABTYPE *Cb_b_tab; + + PIXVAL *r_2_pix; + PIXVAL *g_2_pix; + PIXVAL *b_2_pix; + + public: + Dither16Bit(unsigned int redMask, + unsigned int greenMask,unsigned int blueMask); + ~Dither16Bit(); + + void ditherImageColor16(unsigned char* lum, + unsigned char* cr, + unsigned char* cb, + unsigned char* out, + int rows, + int cols, + int offset); + + void ditherImageTwox2Color16(unsigned char* lum, + unsigned char* cr, + unsigned char* cb, + unsigned char* out, + int rows, + int cols, + int mod); + +}; + +#endif diff --git a/mpeglib/lib/util/render/dither/dither32Bit.cpp b/mpeglib/lib/util/render/dither/dither32Bit.cpp new file mode 100644 index 00000000..61a1d2dc --- /dev/null +++ b/mpeglib/lib/util/render/dither/dither32Bit.cpp @@ -0,0 +1,253 @@ +/* + dither 32 bit depth yuv images + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + + +#include "dither32Bit.h" + + +#define doRow(row,Lum) *row++=(local_r_2_pix[Lum] | \ + local_g_2_pix[Lum] | local_b_2_pix[Lum]) + + +Dither32Bit::Dither32Bit(unsigned int redMask, + unsigned int greenMask,unsigned int blueMask) { + + + colorTableHighBit=new ColorTableHighBit(32,redMask,greenMask,blueMask); + L_tab=colorTableHighBit->getL_tab(); + Cr_r_tab=colorTableHighBit->getCr_r_tab(); + Cr_g_tab=colorTableHighBit->getCr_g_tab(); + Cb_g_tab=colorTableHighBit->getCb_g_tab(); + Cb_b_tab=colorTableHighBit->getCb_b_tab(); + + r_2_pix=colorTableHighBit->getr_2_pix(); + g_2_pix=colorTableHighBit->getg_2_pix(); + b_2_pix=colorTableHighBit->getb_2_pix(); + +} + + +Dither32Bit::~Dither32Bit() { + delete colorTableHighBit; +} + + +void Dither32Bit::ditherImageColor32(unsigned char* lum, + unsigned char* cr, + unsigned char* cb, + unsigned char* out, + int rows, + int cols, + int mod) { + + int L; + int n; + int rowWork; + int colWork; + + unsigned int *row1, *row2; + unsigned char *lum2; + PIXVAL* local_r_2_pix; + PIXVAL* local_g_2_pix; + PIXVAL* local_b_2_pix; + + row1 = (unsigned int *)out; + + row2 = row1+cols+mod; + lum2 = lum+cols; + + // because the width/height are a multiply of a macroblocksize + // cols/rows always are even + colWork=cols>>1; + rowWork=rows>>1; + mod=cols+2*mod; + + while(rowWork--) { + n=colWork; + while(n--) { + + local_r_2_pix=r_2_pix+Cr_r_tab[*cr]; + local_g_2_pix=g_2_pix+Cr_g_tab[*cr++] + Cb_g_tab[*cb]; + local_b_2_pix=b_2_pix+Cb_b_tab[*cb++]; + + L = L_tab[*lum++]; + doRow(row1,L); + + L = L_tab[*lum++]; + doRow(row1,L); + + L = L_tab [*lum2++]; + doRow(row2,L); + + L = L_tab [*lum2++]; + doRow(row2,L); + + + } + row2 += mod; + lum += cols; + lum2 += cols; + row1 += mod; + + } + +} + +/* + *-------------------------------------------------------------- + * + * Twox2Color32 -- + * + * Converts image into 24/32 bit color. + * + * Results: + * None. + * + * Side effects: + * None. + * + *-------------------------------------------------------------- + */ + +void Dither32Bit::ditherImageTwox2Color32(unsigned char* lum, + unsigned char* cr, + unsigned char* cb, + unsigned char* out, + int rows, + int cols, + int mod) { + int L, CR, CB; + unsigned PIXVAL *row1 = (unsigned PIXVAL *)out; + unsigned PIXVAL *row2 = row1 + cols * ONE_TWO + mod; + unsigned PIXVAL *row3 = row2 + cols * ONE_TWO + mod; + unsigned PIXVAL *row4 = row3 + cols * ONE_TWO + mod; + unsigned char *lum2; + int x, y; + int cr_r; + int cr_g; + int cb_g; + int cb_b; + int cols_2 = cols/2; + int loffset = ONE_TWO * 6 *cols_2 + 4*mod ; + + lum2 = lum + cols_2 + cols_2; + for (y=0; y<rows; y+=2) { + for (x=0; x<cols_2; x++) { + int R, G, B; + PIXVAL t; + + CR = *cr++; + CB = *cb++; + cr_r = Cr_r_tab[CR]; + cr_g = Cr_g_tab[CR]; + cb_g = Cb_g_tab[CB]; + cb_b = Cb_b_tab[CB]; + + L = L_tab[ (int) *lum++]; + + R = L + cr_r; + G = L + cr_g + cb_g; + B = L + cb_b; + + t = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]); + row1[0] = t; + row2[0] = t; +#ifndef SIXTYFOUR_BIT + row1[1] = t; + row2[1] = t; +#endif + row1 += ONE_TWO; + row2 += ONE_TWO; + + /* INTERPOLATE is now standard */ + // INTERPOLATE + if(x != cols_2 - 1) { + CR = (CR + *cr) >> 1; + CB = (CB + *cb) >> 1; + cr_r = Cr_r_tab[CR]; + cr_g = Cr_g_tab[CR]; + cb_g = Cb_g_tab[CB]; + cb_b = Cb_b_tab[CB]; + } + // end + /* end INTERPOLATE */ + + L = L_tab[ (int) *lum++]; + + R = L + cr_r; + G = L + cr_g + cb_g; + B = L + cb_b; + + t = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]); + row1[0] = t; + row2[0] = t; +#ifndef SIXTYFOUR_BIT + row1[1] = t; + row2[1] = t; +#endif + row1 += ONE_TWO; + row2 += ONE_TWO; + + /* + * Now, do second row. + */ + /* INTERPOLATE is now standard */ + // INTERPOLATE + if(y != rows - 2) { + CR = (unsigned int) (CR + *(cr + cols_2 - 1)) >> 1; + CB = (unsigned int) (CB + *(cb + cols_2 - 1)) >> 1; + cr_r = Cr_r_tab[CR]; + cr_g = Cr_g_tab[CR]; + cb_g = Cb_g_tab[CB]; + cb_b = Cb_b_tab[CB]; + } + // end + /* endif */ + L = L_tab[ (int) *lum2++]; + R = L + cr_r; + G = L + cr_g + cb_g; + B = L + cb_b; + + t = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]); + row3[0] = t; + row4[0] = t; +#ifndef SIXTYFOUR_BIT + row3[1] = t; + row4[1] = t; +#endif + row3 += ONE_TWO; + row4 += ONE_TWO; + + L = L_tab[(int) *lum2++]; + R = L + cr_r; + G = L + cr_g + cb_g; + B = L + cb_b; + + t = (r_2_pix[R] | g_2_pix[G] | b_2_pix[B]); + row3[0] = t; + row4[0] = t; +#ifndef SIXTYFOUR_BIT + row3[1] = t; + row4[1] = t; +#endif + row3 += ONE_TWO; + row4 += ONE_TWO; + } + lum += cols_2 + cols_2; + lum2 += cols_2 + cols_2; + + row1 += loffset; + row3 += loffset; + row2 += loffset; + row4 += loffset; + } +} diff --git a/mpeglib/lib/util/render/dither/dither32Bit.h b/mpeglib/lib/util/render/dither/dither32Bit.h new file mode 100644 index 00000000..440d021a --- /dev/null +++ b/mpeglib/lib/util/render/dither/dither32Bit.h @@ -0,0 +1,55 @@ +/* + dither 32 bit depth yuv images + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + +#ifndef __DITHER32Bit_H +#define __DITHER32Bit_H + +#include "colorTableHighBit.h" + +class Dither32Bit { + + ColorTableHighBit* colorTableHighBit; + + TABTYPE *L_tab; + TABTYPE *Cr_r_tab; + TABTYPE *Cr_g_tab; + TABTYPE *Cb_g_tab; + TABTYPE *Cb_b_tab; + + PIXVAL *r_2_pix; + PIXVAL *g_2_pix; + PIXVAL *b_2_pix; + + public: + Dither32Bit(unsigned int redMask, + unsigned int greenMask,unsigned int blueMask); + ~Dither32Bit(); + + void ditherImageColor32(unsigned char* lum, + unsigned char* cr, + unsigned char* cb, + unsigned char* out, + int rows, + int cols, + int offset); + + void ditherImageTwox2Color32(unsigned char* lum, + unsigned char* cr, + unsigned char* cb, + unsigned char* out, + int rows, + int cols, + int mod); + +}; + +#endif diff --git a/mpeglib/lib/util/render/dither/dither32mmx.cpp b/mpeglib/lib/util/render/dither/dither32mmx.cpp new file mode 100644 index 00000000..b5fa4807 --- /dev/null +++ b/mpeglib/lib/util/render/dither/dither32mmx.cpp @@ -0,0 +1,272 @@ +/* + MMX ditherer for 32 bit displays + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + + + +#include "ditherMMX.h" + +#include <iostream> + +using namespace std; + + +#ifndef INTEL + void dither32_mmx(unsigned char* lum, + unsigned char* cr, + unsigned char* cb, + unsigned char* out, + int rows, + int cols, + int mod) { + printf("urgs! dither32_mmx \n"); + printf("never should happen!\n"); + exit(0); +} + +#else + + +static unsigned long MMX32_80w[] = {0x00800080, 0x00800080}; +static unsigned long MMX32_10w[] = {0x00100010, 0x00100010}; +static unsigned long MMX32_00FFw[] = {0x00ff00ff, 0x00ff00ff}; +static unsigned long MMX32_FF00w[] = {0xff00ff00, 0xff00ff00}; +static unsigned short MMX32_Ycoeff[] = {0x4a, 0x4a, 0x4a, 0x4a}; +static unsigned short MMX32_Vredcoeff[] = {0x59, 0x59, 0x59, 0x59}; +static unsigned short MMX32_Ubluecoeff[] = {0x72, 0x72, 0x72, 0x72}; +static unsigned short MMX32_Ugrncoeff[] = {0xffea,0xffea,0xffea,0xffea}; +static unsigned short MMX32_Vgrncoeff[] = {0xffd2,0xffd2,0xffd2,0xffd2}; + +void dummy_dithermmx32() { + cout << "MMX32_10w:"<<MMX32_10w<<endl; + cout << "MMX32_80w:"<<MMX32_80w<<endl; + cout << "MMX32_Ubluecoeff:"<<MMX32_Ubluecoeff<<endl; + cout << "MMX32_Vredcoeff:"<<MMX32_Vredcoeff<<endl; + cout << "MMX32_Ugrncoeff:"<<MMX32_Ugrncoeff<<endl; + cout << "MMX32_Vgrncoeff:"<<MMX32_Vgrncoeff<<endl; + cout << "MMX32_Ycoeff:"<<MMX32_Ycoeff<<endl; + cout << "MMX32_00FFw:"<<MMX32_00FFw<<endl; + cout << "MMX32_FF00w:"<<MMX32_FF00w<<endl; +} + + +/** + This MMX assembler is my first assembler/MMX program ever. + Thus it maybe buggy. + Send patches to: + mvogt@rhrk.uni-kl.de + + After it worked fine I have "obfuscated" the code a bit to have + more parallism in the MMX units. This means I moved + initilisation around and delayed other instruction. + Performance measurement did not show that this brought any advantage + but in theory it _should_ be faster this way. + + The overall performanve gain to the C based dither was 30%-40%. + The MMX routine calculates 256bit=8RGB values in each cycle + (4 for row1 & 4 for row2) + + The red/green/blue.. coefficents are taken from the mpeg_play + player. They look nice, but I dont know if you can have + better values, to avoid integer rounding errors. + + + IMPORTANT: + ========== + + It is a requirement that the cr/cb/lum are 8 byte aligned and + the out are 16byte aligned or you will/may get segfaults + +*/ + +void dither32_mmx(unsigned char* lum, + unsigned char* cr, + unsigned char* cb, + unsigned char* out, + int rows, + int cols, + int mod) { + + + + unsigned int *row1; + unsigned int *row2; + row1 = (unsigned int *)out; // 32 bit target + + unsigned char* end = lum +cols*rows; // Pointer to the end + int x=cols; + row2=row1+cols+mod; // start of second row + mod=4*cols+8*mod; // increment for row1 in byte + + // buffer for asm function + int buf[6]; + buf[0]=(int)(lum+cols); // lum2 pointer + buf[1]=(int)end; + buf[2]=x; + buf[3]=mod; + buf[4]=0; //tmp0; + buf[5]=cols; + + + __asm__ __volatile__ ( + ".align 32\n" + "1:\n" + + // create Cr (result in mm1) + "movd (%0), %%mm1\n" // 0 0 0 0 v3 v2 v1 v0 + "pxor %%mm7,%%mm7\n" // 00 00 00 00 00 00 00 00 + "movd (%2), %%mm2\n" // 0 0 0 0 l3 l2 l1 l0 + "punpcklbw %%mm7,%%mm1\n" // 0 v3 0 v2 00 v1 00 v0 + "punpckldq %%mm1,%%mm1\n" // 00 v1 00 v0 00 v1 00 v0 + "psubw MMX32_80w,%%mm1\n" // mm1-128:r1 r1 r0 r0 r1 r1 r0 r0 + + // create Cr_g (result in mm0) + "movq %%mm1,%%mm0\n" // r1 r1 r0 r0 r1 r1 r0 r0 + "pmullw MMX32_Vgrncoeff,%%mm0\n" // red*-46dec=0.7136*64 + "pmullw MMX32_Vredcoeff,%%mm1\n" // red*89dec=1.4013*64 + "psraw $6, %%mm0\n" // red=red/64 + "psraw $6, %%mm1\n" // red=red/64 + + + // create L1 L2 (result in mm2,mm4) + // L2=lum2 + "movl %2,16%5\n" // store register in tmp0 + "movl %5,%2\n" // lum2->register + "movd (%2),%%mm3\n" // 0 0 0 0 L3 L2 L1 L0 + "movl 16%5,%2\n" // tmp0->register + "punpckldq %%mm3,%%mm2\n" // L3 L2 L1 L0 l3 l2 l1 l0 + "movq %%mm2,%%mm4\n" // L3 L2 L1 L0 l3 l2 l1 l0 + "pand MMX32_FF00w, %%mm2\n" // L3 0 L1 0 l3 0 l1 0 + "pand MMX32_00FFw, %%mm4\n" // 0 L2 0 L0 0 l2 0 l0 + "psrlw $8,%%mm2\n" // 0 L3 0 L1 0 l3 0 l1 + + + + // create R (result in mm6) + "movq %%mm2,%%mm5\n" // 0 L3 0 L1 0 l3 0 l1 + "movq %%mm4,%%mm6\n" // 0 L2 0 L0 0 l2 0 l0 + "paddsw %%mm1, %%mm5\n" // lum1+red:x R3 x R1 x r3 x r1 + "paddsw %%mm1, %%mm6\n" // lum1+red:x R2 x R0 x r2 x r0 + "packuswb %%mm5,%%mm5\n" // R3 R1 r3 r1 R3 R1 r3 r1 + "packuswb %%mm6,%%mm6\n" // R2 R0 r2 r0 R2 R0 r2 r0 + "pxor %%mm7,%%mm7\n" // 00 00 00 00 00 00 00 00 + "punpcklbw %%mm5,%%mm6\n" // R3 R2 R1 R0 r3 r2 r1 r0 + + + // create Cb (result in mm1) + "movd (%1), %%mm1\n" // 0 0 0 0 u3 u2 u1 u0 + "punpcklbw %%mm7,%%mm1\n" // 0 u3 0 u2 00 u1 00 u0 + "punpckldq %%mm1,%%mm1\n" // 00 u1 00 u0 00 u1 00 u0 + "psubw MMX32_80w,%%mm1\n" // mm1-128:u1 u1 u0 u0 u1 u1 u0 u0 + // create Cb_g (result in mm5) + "movq %%mm1,%%mm5\n" // u1 u1 u0 u0 u1 u1 u0 u0 + "pmullw MMX32_Ugrncoeff,%%mm5\n" // blue*-109dec=1.7129*64 + "pmullw MMX32_Ubluecoeff,%%mm1\n" // blue*114dec=1.78125*64 + "psraw $6, %%mm5\n" // blue=red/64 + "psraw $6, %%mm1\n" // blue=blue/64 + + + // create G (result in mm7) + "movq %%mm2,%%mm3\n" // 0 L3 0 L1 0 l3 0 l1 + "movq %%mm4,%%mm7\n" // 0 L2 0 L0 0 l2 0 l1 + "paddsw %%mm5, %%mm3\n" // lum1+Cb_g:x G3t x G1t x g3t x g1t + "paddsw %%mm5, %%mm7\n" // lum1+Cb_g:x G2t x G0t x g2t x g0t + "paddsw %%mm0, %%mm3\n" // lum1+Cr_g:x G3 x G1 x g3 x g1 + "paddsw %%mm0, %%mm7\n" // lum1+blue:x G2 x G0 x g2 x g0 + "packuswb %%mm3,%%mm3\n" // G3 G1 g3 g1 G3 G1 g3 g1 + "packuswb %%mm7,%%mm7\n" // G2 G0 g2 g0 G2 G0 g2 g0 + "punpcklbw %%mm3,%%mm7\n" // G3 G2 G1 G0 g3 g2 g1 g0 + + + // create B (result in mm5) + "movq %%mm2,%%mm3\n" // 0 L3 0 L1 0 l3 0 l1 + "movq %%mm4,%%mm5\n" // 0 L2 0 L0 0 l2 0 l1 + "paddsw %%mm1, %%mm3\n" // lum1+blue:x B3 x B1 x b3 x b1 + "paddsw %%mm1, %%mm5\n" // lum1+blue:x B2 x B0 x b2 x b0 + "packuswb %%mm3,%%mm3\n" // B3 B1 b3 b1 B3 B1 b3 b1 + "packuswb %%mm5,%%mm5\n" // B2 B0 b2 b0 B2 B0 b2 b0 + "punpcklbw %%mm3,%%mm5\n" // B3 B2 B1 B0 b3 b2 b1 b0 + + + // fill destination row1 (needed are mm6=Rr,mm7=Gg,mm5=Bb) + + "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0 + "pxor %%mm4,%%mm4\n" // 0 0 0 0 0 0 0 0 + "movq %%mm6,%%mm1\n" // R3 R2 R1 R0 r3 r2 r1 r0 + "movq %%mm5,%%mm3\n" // B3 B2 B1 B0 b3 b2 b1 b0 + // process lower lum + "punpcklbw %%mm4,%%mm1\n" // 0 r3 0 r2 0 r1 0 r0 + "punpcklbw %%mm4,%%mm3\n" // 0 b3 0 b2 0 b1 0 b0 + "movq %%mm1,%%mm2\n" // 0 r3 0 r2 0 r1 0 r0 + "movq %%mm3,%%mm0\n" // 0 b3 0 b2 0 b1 0 b0 + "punpcklwd %%mm1,%%mm3\n" // 0 r1 0 b1 0 r0 0 b0 + "punpckhwd %%mm2,%%mm0\n" // 0 r3 0 b3 0 r2 0 b2 + + "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0 + "movq %%mm7,%%mm1\n" // G3 G2 G1 G0 g3 g2 g1 g0 + "punpcklbw %%mm1,%%mm2\n" // g3 0 g2 0 g1 0 g0 0 + "punpcklwd %%mm4,%%mm2\n" // 0 0 g1 0 0 0 g0 0 + "por %%mm3, %%mm2\n" // 0 r1 g1 b1 0 r0 g0 b0 + "movq %%mm2,(%3)\n" // wrote out ! row1 + + "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0 + "punpcklbw %%mm1,%%mm4\n" // g3 0 g2 0 g1 0 g0 0 + "punpckhwd %%mm2,%%mm4\n" // 0 0 g3 0 0 0 g2 0 + "por %%mm0, %%mm4\n" // 0 r3 g3 b3 0 r2 g2 b2 + "movq %%mm4,8(%3)\n" // wrote out ! row1 + + // fill destination row2 (needed are mm6=Rr,mm7=Gg,mm5=Bb) + // this can be done "destructive" + "pxor %%mm2,%%mm2\n" // 0 0 0 0 0 0 0 0 + "punpckhbw %%mm2,%%mm6\n" // 0 R3 0 R2 0 R1 0 R0 + "punpckhbw %%mm1,%%mm5\n" // G3 B3 G2 B2 G1 B1 G0 B0 + "movq %%mm5,%%mm1\n" // G3 B3 G2 B2 G1 B1 G0 B0 + "punpcklwd %%mm6,%%mm1\n" // 0 R1 G1 B1 0 R0 G0 B0 + "movq %%mm1,(%4)\n" // wrote out ! row2 + "punpckhwd %%mm6,%%mm5\n" // 0 R3 G3 B3 0 R2 G2 B2 + "movq %%mm5,8(%4)\n" // wrote out ! row2 + + "addl $4,%2\n" // lum+4 + "addl $4,%5\n" // lum2+4 + "leal 16(%3),%3\n" // row1+16 + "leal 16(%4),%4\n" // row2+16 + "addl $2, %0\n" // cr+2 + "addl $2, %1\n" // cb+2 + + "subl $4,8%5\n" // x+4 x is buf[2] + "cmpl $0,8%5\n" + + "jne 1b\n" + "addl 20%5, %2\n" // lum += cols + "movl %2,16%5\n" // store register in tmp0 + "movl 20%5,%2\n" // cols->register + + "addl %2, %5\n" // lum2 += cols + "addl 12%5, %3\n" // row1+= mod is buf[0] + "addl 12%5, %4\n" // row2+= mod is buf[0] + + "movl %2, 8%5\n" // x=cols + "movl 16%5,%2\n" // store tmp0 in register + + "cmpl 4%5, %2\n" // buf[1] is end + "jl 1b\n" + "emms\n" + : + : "r" (cr), "r"(cb),"r"(lum), + "r"(row1),"r"(row2),"m"(buf[0]) + ); + + + +} + + +#endif diff --git a/mpeglib/lib/util/render/dither/dither8Bit.cpp b/mpeglib/lib/util/render/dither/dither8Bit.cpp new file mode 100644 index 00000000..4f85d3fb --- /dev/null +++ b/mpeglib/lib/util/render/dither/dither8Bit.cpp @@ -0,0 +1,306 @@ +/* + dither 8 bit depth yuv images + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + + +#include "dither8Bit.h" + + +Dither8Bit::Dither8Bit(unsigned char pixel[256]) { + + int i; + for(i=0;i<256;i++) { + this->pixel[i]=pixel[i]; + } + colorTable8Bit=new ColorTable8Bit(); + + lum_values = colorTable8Bit->getLumValues(); + cr_values = colorTable8Bit->getCrValues(); + cb_values = colorTable8Bit->getCbValues(); + + + + initOrderedDither(); + +} + + +Dither8Bit::~Dither8Bit() { + int i; + for (i=0; i<DITH_SIZE; i++) { + delete cb_darrays[i]; + delete l_darrays[i]; + delete cr_darrays[i]; + } +} + + + + + +/* + *-------------------------------------------------------------- + * + * InitOrderedDither-- + * + * Structures initialized for ordered dithering. + * + * Results: + * None. + * + * Side effects: + * None. + * + *-------------------------------------------------------------- + */ +void Dither8Bit::initOrderedDither() { + int i, j, k, err_range, threshval; + unsigned char *lmark, *cmark; + + for (i=0; i<DITH_SIZE; i++) { + lmark = l_darrays[i] = new unsigned char[256]; + for (j=0; j<lum_values[0]; j++) { + *lmark++ = 0; + } + for (j=0; j<(LUM_RANGE-1); j++) { + err_range = lum_values[j+1] - lum_values[j]; + threshval = ((i * err_range) / DITH_SIZE)+lum_values[j]; + + for (k=lum_values[j]; k<lum_values[j+1]; k++) { + if (k > threshval) { + *lmark++ = ((j+1) * (CR_RANGE * CB_RANGE)); + } + else { + *lmark++ = (j * (CR_RANGE * CB_RANGE)); + } + } + } + for (j=lum_values[LUM_RANGE-1]; j<256; j++) { + *lmark++ = (LUM_RANGE-1)*(CR_RANGE * CB_RANGE); + } + } + for (i=0; i<DITH_SIZE; i++) { + cmark = cr_darrays[i] = new unsigned char[256]; + + for (j=0; j<cr_values[0]; j++) { + *cmark++ = 0; + } + + for (j=0; j<(CR_RANGE-1); j++) { + err_range = cr_values[j+1] - cr_values[j]; + threshval = ((i * err_range) / DITH_SIZE)+cr_values[j]; + + for (k=cr_values[j]; k<cr_values[j+1]; k++) { + if (k > threshval) { + *cmark++ = ((j+1) * CB_RANGE); + } + else { + *cmark++ = (j * CB_RANGE); + } + } + } + + for (j=cr_values[CR_RANGE-1]; j<256; j++) { + *cmark++ = (CR_RANGE-1)*(CB_RANGE); + } + } + + for (i=0; i<DITH_SIZE; i++) { + cmark = cb_darrays[i] = new unsigned char[256]; + + for (j=0; j<cb_values[0]; j++) { + *cmark++ = 0; + } + + for (j=0; j<(CB_RANGE-1); j++) { + err_range = cb_values[j+1] - cb_values[j]; + threshval = ((i * err_range) / DITH_SIZE)+cb_values[j]; + + for (k=cb_values[j]; k<cb_values[j+1]; k++) { + if (k > threshval) { + *cmark++ = j+1; + } + else { + *cmark++ = j; + } + } + } + + for (j=cb_values[CB_RANGE-1]; j<256; j++) { + *cmark++ = CB_RANGE-1; + } + } +} + + + +/* + *-------------------------------------------------------------- + * + * OrderedDitherImage -- + * + * Dithers an image using an ordered dither. + * Assumptions made: + * 1) The color space is allocated y:cr:cb = 8:4:4 + * 2) The spatial resolution of y:cr:cb is 4:1:1 + * The channels are dithered based on the standard + * ordered dither pattern for a 4x4 area. + * + * Results: + * None. + * + * Side effects: + * None. + * + *-------------------------------------------------------------- + */ + +void Dither8Bit::ditherImageOrdered (unsigned char* lum, + unsigned char* cr, + unsigned char* cb, + unsigned char* out, + int h, + int w) { + unsigned char *l, *r, *b, *o1, *o2; + unsigned char *l2; + unsigned char L, R, B; + int i, j; + + l = lum; + l2 = lum+w; + r = cr; + b = cb; + o1 = out; + o2 = out+w; + + + for (i=0; i<h; i+=4) { + + for (j=0; j<w; j+=8) { + + R = r[0]; B = b[0]; + + L = l[0]; + o1[0] = pixel[(l_darrays[0][L] + cr_darrays[0][R] + cb_darrays[0][B])]; + L = l[1]; + o1[1] = pixel[(l_darrays[8][L] + cr_darrays[8][R] + cb_darrays[8][B])]; + L = l2[0]; + o2[0] = pixel[(l_darrays[12][L] + cr_darrays[12][R] + cb_darrays[12][B])]; + L = l2[1]; + o2[1] = pixel[(l_darrays[4][L] + cr_darrays[4][R] + cb_darrays[4][B])]; + + R = r[1]; B = b[1]; + + L = l[2]; + o1[2] = pixel[(l_darrays[2][L] + cr_darrays[2][R] + cb_darrays[2][B])]; + L = l[3]; + o1[3] = pixel[(l_darrays[10][L] + cr_darrays[10][R] + cb_darrays[10][B])]; + L = l2[2]; + o2[2] = pixel[(l_darrays[14][L] + cr_darrays[14][R] + cb_darrays[14][B])]; + L = l2[3]; + o2[3] = pixel[(l_darrays[6][L] + cr_darrays[6][R] + cb_darrays[6][B])]; + + R = r[2]; B = b[2]; + + L = l[4]; + o1[4] = pixel[(l_darrays[0][L] + cr_darrays[0][R] + cb_darrays[0][B])]; + L = l[5]; + o1[5] = pixel[(l_darrays[8][L] + cr_darrays[8][R] + cb_darrays[8][B])]; + L = l2[4]; + o2[4] = pixel[(l_darrays[12][L] + cr_darrays[12][R] + cb_darrays[12][B])]; + L = l2[5]; + o2[5] = pixel[(l_darrays[4][L] + cr_darrays[4][R] + cb_darrays[4][B])]; + + R = r[3]; B = b[3]; + + L = l[6]; + o1[6] = pixel[(l_darrays[2][L] + cr_darrays[2][R] + cb_darrays[2][B])]; + L = l[7]; + o1[7] = pixel[(l_darrays[10][L] + cr_darrays[10][R] + cb_darrays[10][B])]; + L = l2[6]; + o2[6] = pixel[(l_darrays[14][L] + cr_darrays[14][R] + cb_darrays[14][B])]; + L = l2[7]; + o2[7] = pixel[(l_darrays[6][L] + cr_darrays[6][R] + cb_darrays[6][B])]; + + l += 8; + l2 += 8; + r += 4; + b += 4; + o1 += 8; + o2 += 8; + } + + l += w; + l2 += w; + o1 += w; + o2 += w; + + for (j=0; j<w; j+=8) { + + R = r[0]; B = b[0]; + + L = l[0]; + o1[0] = pixel[(l_darrays[3][L] + cr_darrays[3][R] + cb_darrays[3][B])]; + L = l[1]; + o1[1] = pixel[(l_darrays[11][L] + cr_darrays[11][R] + cb_darrays[11][B])]; + L = l2[0]; + o2[0] = pixel[(l_darrays[15][L] + cr_darrays[15][R] + cb_darrays[15][B])]; + L = l2[1]; + o2[1] = pixel[(l_darrays[7][L] + cr_darrays[7][R] + cb_darrays[7][B])]; + + R = r[1]; B = b[1]; + + L = l[2]; + o1[2] = pixel[(l_darrays[1][L] + cr_darrays[1][R] + cb_darrays[1][B])]; + L = l[3]; + o1[3] = pixel[(l_darrays[9][L] + cr_darrays[9][R] + cb_darrays[9][B])]; + L = l2[2]; + o2[2] = pixel[(l_darrays[13][L] + cr_darrays[13][R] + cb_darrays[13][B])]; + L = l2[3]; + o2[3] = pixel[(l_darrays[5][L] + cr_darrays[5][R] + cb_darrays[5][B])]; + + R = r[2]; B = b[2]; + + L = l[4]; + o1[4] = pixel[(l_darrays[3][L] + cr_darrays[3][R] + cb_darrays[3][B])]; + L = l[5]; + o1[5] = pixel[(l_darrays[11][L] + cr_darrays[11][R] + cb_darrays[11][B])]; + L = l2[4]; + o2[4] = pixel[(l_darrays[15][L] + cr_darrays[15][R] + cb_darrays[15][B])]; + L = l2[5]; + o2[5] = pixel[(l_darrays[7][L] + cr_darrays[7][R] + cb_darrays[7][B])]; + + R = r[3]; B = b[3]; + + L = l[6]; + o1[6] = pixel[(l_darrays[1][L] + cr_darrays[1][R] + cb_darrays[1][B])]; + L = l[7]; + o1[7] = pixel[(l_darrays[9][L] + cr_darrays[9][R] + cb_darrays[9][B])]; + L = l2[6]; + o2[6] = pixel[(l_darrays[13][L] + cr_darrays[13][R] + cb_darrays[13][B])]; + L = l2[7]; + o2[7] = pixel[(l_darrays[5][L] + cr_darrays[5][R] + cb_darrays[5][B])]; + + l += 8; + l2 += 8; + r += 4; + b += 4; + o1 += 8; + o2 += 8; + } + + l += w; + l2 += w; + o1 += w; + o2 += w; + } +} + diff --git a/mpeglib/lib/util/render/dither/dither8Bit.h b/mpeglib/lib/util/render/dither/dither8Bit.h new file mode 100644 index 00000000..7bdd4d8f --- /dev/null +++ b/mpeglib/lib/util/render/dither/dither8Bit.h @@ -0,0 +1,63 @@ +/* + dither 8 bit depth yuv images + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + + + +#ifndef __DITHER_8BIT_H +#define __DITHER_8BIT_H + + +#include "colorTable8Bit.h" + +#define DITH_SIZE 16 + + +class Dither8Bit { + + /* Structures used to implement hybrid ordered dither/floyd-steinberg + dither algorithm. + */ + + unsigned char *l_darrays[DITH_SIZE]; + unsigned char *cr_darrays[DITH_SIZE]; + unsigned char *cb_darrays[DITH_SIZE]; + + // private colormap + unsigned char pixel[256]; + + ColorTable8Bit* colorTable8Bit; + + // Arrays holding quantized value ranged for lum, cr, and cb. + // (used for 8 Bit) + + int* lum_values; + int* cr_values; + int* cb_values; + + + public: + Dither8Bit(unsigned char pixel[256]); + ~Dither8Bit(); + + void ditherImageOrdered (unsigned char* lum, + unsigned char* cr, + unsigned char* cb, + unsigned char* out, + int h, + int w); + + private: + void initOrderedDither(); +}; + +#endif + diff --git a/mpeglib/lib/util/render/dither/ditherDef.h b/mpeglib/lib/util/render/dither/ditherDef.h new file mode 100644 index 00000000..2e8d7d0e --- /dev/null +++ b/mpeglib/lib/util/render/dither/ditherDef.h @@ -0,0 +1,100 @@ +/* + global definitions for dithering + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + + + +#ifndef __DITHERDEF_H +#define __DITHERDEF_H + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +extern "C" { +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +} + + +#ifdef __GNUC__ +#if (__GNUC__ < 2 || ( __GNUC__ == 2 && __GNUC_MINOR__ < 91 ) ) +#ifndef _AIX +#warning "inline code disabled! (buggy egcs version)" +#undef __NO_MATH_INLINES +#define __NO_MATH_INLINES 1 +#endif +#endif +#endif +#include <math.h> + + + +/* Gamma correction stuff */ +extern int gammaCorrectFlag; +extern double gammaCorrect; + +/* Chroma correction stuff */ +extern int chromaCorrectFlag; +extern double chromaCorrect; + + +#define CB_BASE 1 +#define CR_BASE (CB_BASE*CB_RANGE) +#define LUM_BASE (CR_BASE*CR_RANGE) + +#define TABTYPE short + +#ifdef SIXTYFOUR_BIT +#define PIXVAL long +#else +#define PIXVAL int +#endif + +#ifdef SIXTYFOUR_BIT +#define ONE_TWO 1 +#else +#define ONE_TWO 2 +#endif + + + +#define Min(x,y) (((x) < (y)) ? (x) : (y)) +#define Max(x,y) (((x) > (y)) ? (x) : (y)) + +#define CHROMA_CORRECTION128(x) ((x) >= 0 \ + ? Min(127, (int)(((x) * chromaCorrect))) \ + : Max(-128, (int)(((x) * chromaCorrect)))) +#define CHROMA_CORRECTION256D(x) ((x) >= 128 \ + ? 128.0 + Min(127.0, (((x)-128.0) * chromaCorrect)) \ + : 128.0 - Min(128.0, (((128.0-(x))* chromaCorrect)))) + + + +#define GAMMA_CORRECTION(x) ((int)(pow((x) / 255.0, 1.0/gammaCorrect)* 255.0)) + +#define CHROMA_CORRECTION128D(x) ((x) >= 0 \ + ? Min(127.0, ((x) * chromaCorrect)) \ + : Max(-128.0, ((x) * chromaCorrect))) + +#define CHROMA_CORRECTION256(x) ((x) >= 128 \ + ? 128 + Min(127, (int)(((x)-128.0) * chromaCorrect)) \ + : 128 - Min(128, (int)((128.0-(x)) * chromaCorrect))) + +// Range values for lum, cr, cb. +#define LUM_RANGE 8 +#define CR_RANGE 4 +#define CB_RANGE 4 + + +#endif diff --git a/mpeglib/lib/util/render/dither/ditherMMX.h b/mpeglib/lib/util/render/dither/ditherMMX.h new file mode 100644 index 00000000..2f08b689 --- /dev/null +++ b/mpeglib/lib/util/render/dither/ditherMMX.h @@ -0,0 +1,38 @@ +/* + mmx ditherer + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + + +#ifndef __DITHERMMX_H +#define __DITHERMMX_H + +#include "ditherDef.h" + + +// The mmx dither routine come from NIST +// NIST is an mpeg2/dvd player +// more: http://home.germany.net/100-5083/ +extern void ditherBlock(unsigned char *lum, + unsigned char *cr, + unsigned char *cb, + unsigned char *out, + int rows, int cols, int mod); + +extern void dither32_mmx(unsigned char* lum, + unsigned char* cr, + unsigned char* cb, + unsigned char* out, + int rows, + int cols, + int mod); + + +#endif diff --git a/mpeglib/lib/util/render/dither/ditherRGB.cpp b/mpeglib/lib/util/render/dither/ditherRGB.cpp new file mode 100644 index 00000000..1bcdb2ff --- /dev/null +++ b/mpeglib/lib/util/render/dither/ditherRGB.cpp @@ -0,0 +1,230 @@ +/* + copys RGB images to a destination + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + + +#include "ditherRGB.h" + +#include <iostream> + +using namespace std; + +DitherRGB::DitherRGB() { +} + + +DitherRGB::~DitherRGB() { +} + + +int DitherRGB::getDepth(int pixel) { + int byteDepth=0; + + switch(pixel) { + case 8: + byteDepth=1; + break; + case 15: + case 16: + byteDepth=2; + break; + case 24: + case 32: + byteDepth=4; + break; + default: + cout << "unknown byteDepth:"<<pixel + << " in DitherRGB_flipped::flipRGBImage"<<endl; + } + return byteDepth; + +} + +void DitherRGB::ditherRGBImage(unsigned char* dest,unsigned char* src, + int depth,int width,int height,int offset) { + int byteDepth=getDepth(depth); + if (byteDepth == 0) { + return; + } + + + if (offset==0) { + int bytes=height*width*byteDepth; + memcpy(dest,src,bytes); + return; + } + + int i; + int lineSize=width*byteDepth; + + offset=offset*byteDepth+lineSize; + + for (i=0;i<height;i++) { + memcpy(dest,src,lineSize); + src+=lineSize; + dest+=offset; + } + + +} + +void DitherRGB::ditherRGBImage_x2(unsigned char* dest,unsigned char* src, + int depth,int width,int height,int offset) { + + int byteDepth=getDepth(depth); + if (byteDepth == 0) { + return; + } + + switch(byteDepth) { + case 1: + ditherRGB1Byte_x2(dest,src,1,width, height,offset); + break; + case 2: + ditherRGB2Byte_x2(dest,src,2,width, height,offset); + break; + case 4: + ditherRGB4Byte_x2(dest,src,4,width, height,offset); + break; + default: + cout <<"ditherRGBImage_x2 byteDepth:"<<byteDepth + <<" not supported"<<endl; + } +} + + +void DitherRGB::ditherRGB1Byte_x2(unsigned char* dest,unsigned char* src, + int depth,int width,int height,int offset) { + + // + // dest destr + // destd destrd + + int lineInc=2*width+offset; + unsigned char* destr=dest+1; + unsigned char* destd=dest+lineInc; + unsigned char* destrd=destd+1; + + int row; + int col; + // + // We copy byte by byte this is slow, but works for + // all byteDepth + // this memcpy can be optimized with MMX very i) good ii) easily + + for(row=0;row<height;row++) { + for(col=0;col<width;col++) { + *dest++=*src; + *destr++=*src; + *destd++=*src; + *destrd++=*src; + dest++; + destr++; + destd++; + destrd++; + + src++; + } + dest+=lineInc; + destr+=lineInc; + destd+=lineInc; + destrd+=lineInc; + } +} + + +void DitherRGB::ditherRGB2Byte_x2(unsigned char* destination, + unsigned char* source, + int depth,int width,int height,int offset) { + // + // dest destr + // destd destrd + + unsigned short int* src=(unsigned short int*) source; + unsigned short int* dest=(unsigned short int*) destination; + + int lineInc=2*width+offset; + unsigned short int* destr=dest+1; + unsigned short int* destd=dest+lineInc; + unsigned short int* destrd=destd+1; + + int row; + int col; + // + // We copy byte by byte this is slow, but works for + // all byteDepth + // this memcpy can be optimized with MMX very i) good ii) easily + + for(row=0;row<height;row++) { + for(col=0;col<width;col++) { + *dest++=*src; + *destr++=*src; + *destd++=*src; + *destrd++=*src; + dest++; + destr++; + destd++; + destrd++; + + src++; + } + dest+=lineInc; + destr+=lineInc; + destd+=lineInc; + destrd+=lineInc; + } +} + + +void DitherRGB::ditherRGB4Byte_x2(unsigned char* destination, + unsigned char* source, + int depth,int width,int height,int offset) { + + // + // dest destr + // destd destrd + + unsigned int* src=(unsigned int*) source; + unsigned int* dest=(unsigned int*) destination; + + int lineInc=2*width+offset; + unsigned int* destr=dest+1; + unsigned int* destd=dest+lineInc; + unsigned int* destrd=destd+1; + + int row; + int col; + // + // We copy byte by byte this is slow, but works for + // all byteDepth + // this memcpy can be optimized with MMX very i) good ii) easily + + for(row=0;row<height;row++) { + for(col=0;col<width;col++) { + *dest++=*src; + *destr++=*src; + *destd++=*src; + *destrd++=*src; + dest++; + destr++; + destd++; + destrd++; + + src++; + } + dest+=lineInc; + destr+=lineInc; + destd+=lineInc; + destrd+=lineInc; + } + +} + diff --git a/mpeglib/lib/util/render/dither/ditherRGB.h b/mpeglib/lib/util/render/dither/ditherRGB.h new file mode 100644 index 00000000..6f24cd8c --- /dev/null +++ b/mpeglib/lib/util/render/dither/ditherRGB.h @@ -0,0 +1,45 @@ +/* + copys RGB images to a destination + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + +#ifndef __DITHERRGB_H +#define __DITHERRGB_H + +#include "colorTableHighBit.h" + +class DitherRGB { + + int flipSize; + unsigned char* flipSpace; + + public: + DitherRGB(); + ~DitherRGB(); + + // Note: this methods swaps the image + // itsself + void ditherRGBImage(unsigned char* dest,unsigned char* src, + int depth,int width,int height,int offset); + void ditherRGBImage_x2(unsigned char* dest,unsigned char* src, + int depth,int width,int height,int offset); + private: + int getDepth(int pixel); + // depth is here in byte! + void ditherRGB1Byte_x2(unsigned char* dest,unsigned char* src, + int depth,int width,int height,int offset); + void ditherRGB2Byte_x2(unsigned char* dest,unsigned char* src, + int depth,int width,int height,int offset); + void ditherRGB4Byte_x2(unsigned char* dest,unsigned char* src, + int depth,int width,int height,int offset); + +}; + +#endif diff --git a/mpeglib/lib/util/render/dither/ditherRGB_flipped.cpp b/mpeglib/lib/util/render/dither/ditherRGB_flipped.cpp new file mode 100644 index 00000000..ba177675 --- /dev/null +++ b/mpeglib/lib/util/render/dither/ditherRGB_flipped.cpp @@ -0,0 +1,82 @@ +/* + flips RGB images + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + + +#include "ditherRGB_flipped.h" + +#include <iostream> + +using namespace std; + + +DitherRGB_flipped::DitherRGB_flipped() { + flipSpace=NULL; + flipSize=0; +} + +DitherRGB_flipped::~DitherRGB_flipped() { + if (flipSpace != NULL) { + delete flipSpace; + } +} + + + + +void DitherRGB_flipped::flipRGBImage(unsigned char* dest,unsigned char* src, + int depth,int width,int height,int ) { + + int byteDepth; + + switch(depth) { + case 8: + byteDepth=1; + break; + case 15: + case 16: + byteDepth=2; + break; + case 24: + case 32: + byteDepth=4; + break; + default: + cout << "unknown byteDepth:"<<depth + << " in DitherRGB_flipped::flipRGBImage"<<endl; + return; + } + + + int spaceNeeded=width*height*byteDepth; + + if (spaceNeeded > flipSize) { + if (flipSpace != NULL) { + delete flipSpace; + } + cout << "flipSpace:"<<spaceNeeded<<endl; + flipSpace=new unsigned char[spaceNeeded+64]; + flipSize=spaceNeeded; + } + + int i; + int lineSize=width*byteDepth; + unsigned char* end=dest+lineSize*(height-1); + + for (i=0;i<height;i++) { + memcpy(end,src,lineSize); + src+=lineSize; + end-=lineSize; + } + +} + + diff --git a/mpeglib/lib/util/render/dither/ditherRGB_flipped.h b/mpeglib/lib/util/render/dither/ditherRGB_flipped.h new file mode 100644 index 00000000..1d99f7f6 --- /dev/null +++ b/mpeglib/lib/util/render/dither/ditherRGB_flipped.h @@ -0,0 +1,34 @@ +/* + flips RGB images + Copyright (C) 2000 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + +#ifndef __DITHERRGB_FLIPPED_H +#define __DITHERRGB_FLIPPED_H + +#include "colorTableHighBit.h" + +class DitherRGB_flipped { + + int flipSize; + unsigned char* flipSpace; + + public: + DitherRGB_flipped(); + ~DitherRGB_flipped(); + + // Note: this methods swaps the image + // itsself + void flipRGBImage(unsigned char* dest,unsigned char* src, + int depth,int width,int height,int offset); + +}; + +#endif diff --git a/mpeglib/lib/util/render/dither/ditherWrapper.cpp b/mpeglib/lib/util/render/dither/ditherWrapper.cpp new file mode 100644 index 00000000..c6c37a79 --- /dev/null +++ b/mpeglib/lib/util/render/dither/ditherWrapper.cpp @@ -0,0 +1,246 @@ +/* + wrapper for X11 Window + Copyright (C) 1999 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + + +#include "ditherWrapper.h" + +#include <iostream> + +using namespace std; + + +/* + Flag for gamma correction + Makes images brighter/darker. + It's in the source but not activated (for now) +*/ +int gammaCorrectFlag = 0; +double gammaCorrect = 1.0; + +/* + Flag for chroma correction. + reduce the color intensity.. + It's in the source but not activated (for now) +*/ +int chromaCorrectFlag = 0; +double chromaCorrect = 1.0; + + + +DitherWrapper::DitherWrapper(int bpp,unsigned int redMask, + unsigned int greenMask,unsigned int blueMask, + unsigned char pixel[256]) { + + this->bpp=bpp; + this->redMask=redMask; + this->greenMask=greenMask; + this->blueMask=blueMask; + + + dither8Bit=new Dither8Bit(pixel); + dither16Bit=new Dither16Bit(redMask,greenMask,blueMask); + dither32Bit=new Dither32Bit(redMask,greenMask,blueMask); + ditherRGB_flipped=new DitherRGB_flipped(); + ditherRGB=new DitherRGB(); + + +#ifdef INTEL + lmmx=mm_support(); +#else + lmmx=false; +#endif + + +} + + +DitherWrapper::~DitherWrapper(){ + delete dither16Bit; + delete dither8Bit; + delete dither32Bit; + delete ditherRGB_flipped; + delete ditherRGB; +} + + + + + +void DitherWrapper::doDither(YUVPicture* pic,int depth,int imageMode, + unsigned char* dest,int offset) { + + + // + // according to the input imageType and the output area + // handle different dither methods + // + + int inputType=pic->getImageType(); + + if ( (inputType == PICTURE_YUVMODE_CR_CB) || + (inputType == PICTURE_YUVMODE_CB_CR) ) { + doDitherYUV(pic,depth,imageMode,dest,offset); + return; + } + + if ( (inputType == PICTURE_RGB) || + (inputType == PICTURE_RGB_FLIPPED) ){ + doDitherRGB(pic,depth,imageMode,dest,offset); + return; + } + + cout << "unknown inputType:"<<inputType + << " in DitherWrapper::doDither"<<endl; +} + + +void DitherWrapper::doDitherRGB(YUVPicture* pic,int depth,int imageMode, + unsigned char* dest,int offset) { + + int inputType=pic->getImageType(); + + switch(inputType) { + case PICTURE_RGB: + doDitherRGB_NORMAL(pic,depth,imageMode,dest,offset); + break; + case PICTURE_RGB_FLIPPED: + doDitherRGB_FLIPPED(pic,depth,imageMode,dest,offset); + break; + default: + cout << "unknown RGB type:"<<inputType<<" in DitherWrapper"<<endl; + exit(0); + } +} + + +void DitherWrapper::doDitherRGB_NORMAL(YUVPicture* pic, + int depth,int imageMode, + unsigned char* dest,int offset) { + + int w=pic->getWidth(); + int h=pic->getHeight(); + + unsigned char* src=pic->getImagePtr(); + + if (imageMode & _IMAGE_DOUBLE) { + ditherRGB->ditherRGBImage_x2(dest,src,depth,w,h,offset); + } else { + ditherRGB->ditherRGBImage(dest,src,depth,w,h,offset); + } +} + +void DitherWrapper::doDitherRGB_FLIPPED(YUVPicture* pic, + int depth,int imageMode, + unsigned char* dest,int offset) { + + int w=pic->getWidth(); + int h=pic->getHeight(); + + unsigned char* src=pic->getImagePtr(); + + ditherRGB_flipped->flipRGBImage(dest,src,depth,w,h,offset); +} + + + +void DitherWrapper::doDitherYUV(YUVPicture* pic,int depth,int imageMode, + unsigned char* dest,int offset) { + + if (imageMode & _IMAGE_DOUBLE) { + doDither_x2(pic,depth,dest,offset); + } else { + doDither_std(pic,depth,dest,offset); + } +} + + +void DitherWrapper::doDither_std(YUVPicture* pic,int depth, + unsigned char* dest,int offset){ + + int h=pic->getHeight(); + int w=pic->getWidth(); + unsigned char* lum=pic->getLuminancePtr(); + unsigned char* cr=pic->getCrPtr(); + unsigned char* cb=pic->getCbPtr(); + + + switch (depth) { + case 8: + dither8Bit->ditherImageOrdered(lum, cr, cb,dest , h, w); + break; + case 16: + if (lmmx) { + ditherBlock(lum,cr,cb,dest,h,w,offset); + } else { + dither16Bit->ditherImageColor16(lum,cr,cb,dest,h,w,offset); + } + + break; + case 24: + case 32: + if (lmmx) { + dither32_mmx(lum, cr, cb,dest ,h,w,offset); + } else { + dither32Bit->ditherImageColor32(lum, cr, cb,dest ,h,w,offset); + } + + + break; + default: + cout << "cannot dither depth:"<<depth<<endl; + } + +} + + +void DitherWrapper::doDither_x2(YUVPicture* pic,int depth, + unsigned char* dest,int offset){ + + int h=pic->getHeight(); + int w=pic->getWidth(); + unsigned char* lum=pic->getLuminancePtr(); + unsigned char* cr=pic->getCrPtr(); + unsigned char* cb=pic->getCbPtr(); + + + switch (depth) { + case 8: { + // we do dither with the 8Bit std YUV ditherer to RGB + // and then we do the double part with the + // RGB ditherer. Its obviously much slower but at + // least it works. To not allocate memory twice + // we are a bit tricky. We know that the image + // has space for doubls size. We but the not double size + // image at the bottom of the dest. Maybe that + // the last line gets overwritten + int memPos=3*h*w; + dither8Bit->ditherImageOrdered(lum, cr, cb,dest+memPos, h, w); + unsigned char* src=dest+memPos; + ditherRGB->ditherRGBImage_x2(dest,src,depth,w,h,0); + break; + } + case 16: + dither16Bit->ditherImageTwox2Color16(lum,cr,cb,dest,h,w,offset); + break; + case 24: + case 32: + if (lmmx) { + //dither32x2_mmx(lum, cr, cb,dest ,h,w,offset); + dither32Bit->ditherImageTwox2Color32(lum,cr,cb,dest,h,w,offset); + } else { + dither32Bit->ditherImageTwox2Color32(lum,cr,cb,dest,h,w,offset); + } + break; + default: + cout << "cannot dither depth:" << depth << endl; + } +} diff --git a/mpeglib/lib/util/render/dither/ditherWrapper.h b/mpeglib/lib/util/render/dither/ditherWrapper.h new file mode 100644 index 00000000..b01abff8 --- /dev/null +++ b/mpeglib/lib/util/render/dither/ditherWrapper.h @@ -0,0 +1,80 @@ +/* + wrapper for X11 Window + Copyright (C) 1999 Martin Vogt + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Library General Public License as published by + the Free Software Foundation. + + For more information look at the file COPYRIGHT in this package + + */ + + +#ifndef __DITHERWRAPPER_H +#define __DITHERWRAPPER_H + + +#include "../../mmx/mmx.h" + +#include "../yuvPicture.h" +#include "../imageBase.h" +#include <stdlib.h> +#include "ditherMMX.h" +#include "dither8Bit.h" +#include "dither16Bit.h" +#include "dither32Bit.h" +#include "ditherRGB_flipped.h" +#include "ditherRGB.h" + + +/** + Wraps all calls to software ditherer and the different + resolutions,mmx enhancements, and doublesize ditherers. +*/ + + +class DitherWrapper { + + int lmmx; + + int bpp; + // colorMask + unsigned int redMask; + unsigned int greenMask; + unsigned int blueMask; + + Dither8Bit* dither8Bit; + Dither16Bit* dither16Bit; + Dither32Bit* dither32Bit; + DitherRGB_flipped* ditherRGB_flipped; + DitherRGB* ditherRGB; + + public: + DitherWrapper(int bpp,unsigned int redMask, + unsigned int greenMask,unsigned int blueMask, + unsigned char pixel[256]); + ~DitherWrapper(); + +/* int getDitherSize(); */ +/* void setDitherSize(int ditherMode); */ + + void doDither(YUVPicture* pic,int depth,int imageMode, + unsigned char* dest,int offset); + + + private: + void doDitherYUV(YUVPicture* pic,int depth,int imageMode, + unsigned char* dest,int offset); + void doDitherRGB(YUVPicture* pic,int depth,int imageMode, + unsigned char* dest,int offset); + void doDitherRGB_NORMAL(YUVPicture* pic,int depth,int imageMode, + unsigned char* dest,int offset); + void doDitherRGB_FLIPPED(YUVPicture* pic,int depth,int imageMode, + unsigned char* dest,int offset); + + void doDither_std(YUVPicture* pic,int depth,unsigned char* dest,int offset); + void doDither_x2(YUVPicture* pic,int depth,unsigned char* dest,int offset); +}; + +#endif diff --git a/mpeglib/lib/util/render/dither/ditherer_mmx16.cpp b/mpeglib/lib/util/render/dither/ditherer_mmx16.cpp new file mode 100644 index 00000000..757f0676 --- /dev/null +++ b/mpeglib/lib/util/render/dither/ditherer_mmx16.cpp @@ -0,0 +1,256 @@ + +#include "ditherMMX.h" + +#include <iostream> + +using namespace std; + +#ifndef INTEL +// nothing +void ditherBlock(unsigned char *lum, unsigned char *cr, unsigned char *cb, + unsigned char *out, + int cols, int rows, int screen_width) { + printf("call to ditherBlock. this should never happen\n"); + printf("check mmx detection routine.\n"); + exit(0); +} +#else + + +static long long MMX16_0 = 0L; +static unsigned long MMX16_10w[] = {0x00100010, 0x00100010}; +static unsigned long MMX16_80w[] = {0x00800080, 0x00800080}; +static unsigned long MMX16_00FFw[] = {0x00ff00ff, 0x00ff00ff}; +static unsigned short MMX16_Ublucoeff[] = {0x81, 0x81, 0x81, 0x81}; +static unsigned short MMX16_Vredcoeff[] = {0x66, 0x66, 0x66, 0x66}; +static unsigned short MMX16_Ugrncoeff[] = {0xffe8, 0xffe8, 0xffe8, 0xffe8}; +static unsigned short MMX16_Vgrncoeff[] = {0xffcd, 0xffcd, 0xffcd, 0xffcd}; +static unsigned short MMX16_Ycoeff[] = {0x4a, 0x4a, 0x4a, 0x4a}; +static unsigned short MMX16_redmask[] = {0xf800, 0xf800, 0xf800, 0xf800}; +static unsigned short MMX16_grnmask[] = {0x7e0, 0x7e0, 0x7e0, 0x7e0}; + +void dummy_dithermmx16() { + cout << "MMX16_0"<<MMX16_0<<endl; + cout << "MMX16_10w:"<<MMX16_10w<<endl; + cout << "MMX16_80w:"<<MMX16_80w<<endl; + cout << "MMX16_Ublucoeff:"<<MMX16_Ublucoeff<<endl; + cout << "MMX16_Vredcoeff:"<<MMX16_Vredcoeff<<endl; + cout << "MMX16_Ugrncoeff:"<<MMX16_Ugrncoeff<<endl; + cout << "MMX16_Vgrncoeff:"<<MMX16_Vgrncoeff<<endl; + cout << "MMX16_Ycoeff:"<<MMX16_Ycoeff<<endl; + cout << "MMX16_redmask:"<<MMX16_redmask<<endl; + cout << "MMX16_grnmask:"<<MMX16_grnmask<<endl; + cout << "MMX16_00FFw:"<<MMX16_00FFw<<endl; +} + + +void ditherBlock(unsigned char *lum, + unsigned char *cr, + unsigned char *cb, + unsigned char *out, + int rows, + int cols, + int mod) { + + unsigned short *row1; + unsigned short *row2; + row1 = (unsigned short* )out; // 16 bit target + + unsigned char* end = lum +cols*rows; // Pointer to the end + int x=cols; + row2=row1+mod+cols; // start of second row + mod=2*cols+4*mod; // increment for row1 in byte + + // buffer for asm function + int buf[6]; + buf[0]=(int)(lum+cols); // lum2 pointer + buf[1]=(int)end; + buf[2]=x; + buf[3]=mod; + buf[4]=0; //tmp0; + buf[5]=cols; + + + + __asm__ __volatile__( + ".align 32\n" + "1:\n" + "movd (%1), %%mm0\n" // 4 Cb 0 0 0 0 u3 u2 u1 u0 + "pxor %%mm7, %%mm7\n" + "movd (%0), %%mm1\n" // 4 Cr 0 0 0 0 v3 v2 v1 v0 + "punpcklbw %%mm7, %%mm0\n" // 4 W cb 0 u3 0 u2 0 u1 0 u0 + "punpcklbw %%mm7, %%mm1\n" // 4 W cr 0 v3 0 v2 0 v1 0 v0 + "psubw MMX16_80w, %%mm0\n" + "psubw MMX16_80w, %%mm1\n" + "movq %%mm0, %%mm2\n" // Cb 0 u3 0 u2 0 u1 0 u0 + "movq %%mm1, %%mm3\n" // Cr + "pmullw MMX16_Ugrncoeff, %%mm2\n" // Cb2green 0 R3 0 R2 0 R1 0 R0 + "movq (%2), %%mm6\n" // L1 l7 L6 L5 L4 L3 L2 L1 L0 + "pmullw MMX16_Ublucoeff, %%mm0\n" // Cb2blue + "pand MMX16_00FFw, %%mm6\n" // L1 00 L6 00 L4 00 L2 00 L0 + "pmullw MMX16_Vgrncoeff, %%mm3\n" // Cr2green + "movq (%2), %%mm7\n" // L2 + "pmullw MMX16_Vredcoeff, %%mm1\n" // Cr2red + // "psubw MMX16_10w, %%mm6\n" + "psrlw $8, %%mm7\n" // L2 00 L7 00 L5 00 L3 00 L1 + "pmullw MMX16_Ycoeff, %%mm6\n" // lum1 + // "psubw MMX16_10w, %%mm7\n" // L2 + "paddw %%mm3, %%mm2\n" // Cb2green + Cr2green == green + "pmullw MMX16_Ycoeff, %%mm7\n" // lum2 + + "movq %%mm6, %%mm4\n" // lum1 + "paddw %%mm0, %%mm6\n" // lum1 +blue 00 B6 00 B4 00 B2 00 B0 + "movq %%mm4, %%mm5\n" // lum1 + "paddw %%mm1, %%mm4\n" // lum1 +red 00 R6 00 R4 00 R2 00 R0 + "paddw %%mm2, %%mm5\n" // lum1 +green 00 G6 00 G4 00 G2 00 G0 + "psraw $6, %%mm4\n" // R1 0 .. 64 + "movq %%mm7, %%mm3\n" // lum2 00 L7 00 L5 00 L3 00 L1 + "psraw $6, %%mm5\n" // G1 - .. + + "paddw %%mm0, %%mm7\n" // Lum2 +blue 00 B7 00 B5 00 B3 00 B1 + "psraw $6, %%mm6\n" // B1 0 .. 64 + "packuswb %%mm4, %%mm4\n" // R1 R1 + "packuswb %%mm5, %%mm5\n" // G1 G1 + "packuswb %%mm6, %%mm6\n" // B1 B1 + "punpcklbw %%mm4, %%mm4\n" + "punpcklbw %%mm5, %%mm5\n" + + "pand MMX16_redmask, %%mm4\n" + "psllw $3, %%mm5\n" // GREEN 1 + "punpcklbw %%mm6, %%mm6\n" + "pand MMX16_grnmask, %%mm5\n" + "pand MMX16_redmask, %%mm6\n" + "por %%mm5, %%mm4\n" // + "psrlw $11, %%mm6\n" // BLUE 1 + "movq %%mm3, %%mm5\n" // lum2 + "paddw %%mm1, %%mm3\n" // lum2 +red 00 R7 00 R5 00 R3 00 R1 + "paddw %%mm2, %%mm5\n" // lum2 +green 00 G7 00 G5 00 G3 00 G1 + "psraw $6, %%mm3\n" // R2 + "por %%mm6, %%mm4\n" // MM4 + "psraw $6, %%mm5\n" // G2 + + "movl %2,16%5\n" // store register in tmp0 + "movl %5,%2\n" // lum2->register + "movq (%2),%%mm6\n" // 0 0 0 0 L3 L2 L1 L0 (load lum2) + + + //"movq (%2, %5), %%mm6\n" // L3 load lum2 + "psraw $6, %%mm7\n" + "packuswb %%mm3, %%mm3\n" + "packuswb %%mm5, %%mm5\n" + "packuswb %%mm7, %%mm7\n" + "pand MMX16_00FFw, %%mm6\n" // L3 + "punpcklbw %%mm3, %%mm3\n" + // "psubw MMX16_10w, %%mm6\n" // L3 + "punpcklbw %%mm5, %%mm5\n" + "pmullw MMX16_Ycoeff, %%mm6\n" // lum3 + "punpcklbw %%mm7, %%mm7\n" + "psllw $3, %%mm5\n" // GREEN 2 + "pand MMX16_redmask, %%mm7\n" + "pand MMX16_redmask, %%mm3\n" + "psrlw $11, %%mm7\n" // BLUE 2 + "pand MMX16_grnmask, %%mm5\n" + "por %%mm7, %%mm3\n" + + "movq (%2), %%mm7\n" // L4 load lum2 + "movl 16%5,%2\n" // tmp0->register + + "por %%mm5, %%mm3\n" // + "psrlw $8, %%mm7\n" // L4 + "movq %%mm4, %%mm5\n" + // "psubw MMX16_10w, %%mm7\n" // L4 + "punpcklwd %%mm3, %%mm4\n" + "pmullw MMX16_Ycoeff, %%mm7\n" // lum4 + "punpckhwd %%mm3, %%mm5\n" + + "movq %%mm4, (%3)\n" // write row1 + "movq %%mm5, 8(%3)\n" // write row1 + + "movq %%mm6, %%mm4\n" // Lum3 + "paddw %%mm0, %%mm6\n" // Lum3 +blue + + "movq %%mm4, %%mm5\n" // Lum3 + "paddw %%mm1, %%mm4\n" // Lum3 +red + "paddw %%mm2, %%mm5\n" // Lum3 +green + "psraw $6, %%mm4\n" + "movq %%mm7, %%mm3\n" // Lum4 + "psraw $6, %%mm5\n" + "paddw %%mm0, %%mm7\n" // Lum4 +blue + "psraw $6, %%mm6\n" // Lum3 +blue + "movq %%mm3, %%mm0\n" // Lum4 + "packuswb %%mm4, %%mm4\n" + "paddw %%mm1, %%mm3\n" // Lum4 +red + "packuswb %%mm5, %%mm5\n" + "paddw %%mm2, %%mm0\n" // Lum4 +green + "packuswb %%mm6, %%mm6\n" + "punpcklbw %%mm4, %%mm4\n" + "punpcklbw %%mm5, %%mm5\n" + "punpcklbw %%mm6, %%mm6\n" + "psllw $3, %%mm5\n" // GREEN 3 + "pand MMX16_redmask, %%mm4\n" + "psraw $6, %%mm3\n" // psr 6 + "psraw $6, %%mm0\n" + "pand MMX16_redmask, %%mm6\n" // BLUE + "pand MMX16_grnmask, %%mm5\n" + "psrlw $11, %%mm6\n" // BLUE 3 + "por %%mm5, %%mm4\n" + "psraw $6, %%mm7\n" + "por %%mm6, %%mm4\n" + "packuswb %%mm3, %%mm3\n" + "packuswb %%mm0, %%mm0\n" + "packuswb %%mm7, %%mm7\n" + "punpcklbw %%mm3, %%mm3\n" + "punpcklbw %%mm0, %%mm0\n" + "punpcklbw %%mm7, %%mm7\n" + "pand MMX16_redmask, %%mm3\n" + "pand MMX16_redmask, %%mm7\n" // BLUE + "psllw $3, %%mm0\n" // GREEN 4 + "psrlw $11, %%mm7\n" + "pand MMX16_grnmask, %%mm0\n" + "por %%mm7, %%mm3\n" + "por %%mm0, %%mm3\n" + + "movq %%mm4, %%mm5\n" + + "punpcklwd %%mm3, %%mm4\n" + "punpckhwd %%mm3, %%mm5\n" + + "movq %%mm4, (%4)\n" + "movq %%mm5, 8(%4)\n" + + "subl $8, 8%5\n" // x-=8 + "addl $8, %5\n" // lum2+8 + "addl $8, %2\n" + "addl $4, %0\n" + "addl $4, %1\n" + "cmpl $0, 8%5\n" + "leal 16(%3), %3\n" + "leal 16(%4), %4\n" // row2+16 + + + "jne 1b\n" + "addl 20%5, %2\n" // lum += cols + + "movl %2,16%5\n" // store register in tmp0 + "movl 20%5,%2\n" // cols->register + + "addl %2, %5\n" // lum2 += cols + "addl 12%5, %3\n" // row1+= mod + "addl 12%5, %4\n" // row2+= mod + "movl %2, 8%5\n" // x=cols + "movl 16%5,%2\n" // store tmp0 in register + + "cmpl 4%5, %2\n" + "jl 1b\n" + + : + :"r" (cr), "r"(cb),"r"(lum), + "r"(row1),"r"(row2),"m"(buf[0]) + + ); + __asm__ ( + "emms\n" + ); + + } + +#endif |