?? rgb2rgb_template.c.svn-base
字號:
/* * software RGB to RGB converter * pluralize by software PAL8 to RGB converter * software YUV to YUV converter * software YUV to RGB converter * Written by Nick Kurshev. * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * lot of big-endian byte order fixes by Alex Beregszaszi * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * * The C code (not assembly, MMX, ...) of this file can be used * under the LGPL license. */#include <stddef.h>#include <inttypes.h> /* for __WORDSIZE */#ifndef __WORDSIZE// #warning You have a misconfigured system and will probably lose performance!#define __WORDSIZE MP_WORDSIZE#endif#undef PREFETCH#undef MOVNTQ#undef EMMS#undef SFENCE#undef MMREG_SIZE#undef PREFETCHW#undef PAVGB#ifdef HAVE_SSE2#define MMREG_SIZE 16#else#define MMREG_SIZE 8#endif#ifdef HAVE_3DNOW#define PREFETCH "prefetch"#define PREFETCHW "prefetchw"#define PAVGB "pavgusb"#elif defined (HAVE_MMX2)#define PREFETCH "prefetchnta"#define PREFETCHW "prefetcht0"#define PAVGB "pavgb"#else#ifdef __APPLE__#define PREFETCH "#"#define PREFETCHW "#"#else#define PREFETCH " # nop"#define PREFETCHW " # nop"#endif#endif#ifdef HAVE_3DNOW/* On K6 femms is faster than emms. On K7 femms is directly mapped on emms. */#define EMMS "femms"#else#define EMMS "emms"#endif#ifdef HAVE_MMX2#define MOVNTQ "movntq"#define SFENCE "sfence"#else#define MOVNTQ "movq"#define SFENCE " # nop"#endifstatic inline void RENAME(rgb24to32)(const uint8_t *src, uint8_t *dst, long src_size){ uint8_t *dest = dst; const uint8_t *s = src; const uint8_t *end; #ifdef HAVE_MMX const uint8_t *mm_end; #endif end = s + src_size; #ifdef HAVE_MMX asm volatile(PREFETCH" %0"::"m"(*s):"memory"); mm_end = end - 23; asm volatile("movq %0, %%mm7"::"m"(mask32):"memory"); while (s < mm_end) { asm volatile( PREFETCH" 32%1 \n\t" "movd %1, %%mm0 \n\t" "punpckldq 3%1, %%mm0 \n\t" "movd 6%1, %%mm1 \n\t" "punpckldq 9%1, %%mm1 \n\t" "movd 12%1, %%mm2 \n\t" "punpckldq 15%1, %%mm2 \n\t" "movd 18%1, %%mm3 \n\t" "punpckldq 21%1, %%mm3 \n\t" "pand %%mm7, %%mm0 \n\t" "pand %%mm7, %%mm1 \n\t" "pand %%mm7, %%mm2 \n\t" "pand %%mm7, %%mm3 \n\t" MOVNTQ" %%mm0, %0 \n\t" MOVNTQ" %%mm1, 8%0 \n\t" MOVNTQ" %%mm2, 16%0 \n\t" MOVNTQ" %%mm3, 24%0" :"=m"(*dest) :"m"(*s) :"memory"); dest += 32; s += 24; } asm volatile(SFENCE:::"memory"); asm volatile(EMMS:::"memory"); #endif while (s < end) { #ifdef WORDS_BIGENDIAN /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */ *dest++ = 0; *dest++ = s[2]; *dest++ = s[1]; *dest++ = s[0]; s+=3; #else *dest++ = *s++; *dest++ = *s++; *dest++ = *s++; *dest++ = 0; #endif }}static inline void RENAME(rgb32to24)(const uint8_t *src, uint8_t *dst, long src_size){ uint8_t *dest = dst; const uint8_t *s = src; const uint8_t *end;#ifdef HAVE_MMX const uint8_t *mm_end;#endif end = s + src_size;#ifdef HAVE_MMX asm volatile(PREFETCH" %0"::"m"(*s):"memory"); mm_end = end - 31; while (s < mm_end) { asm volatile( PREFETCH" 32%1 \n\t" "movq %1, %%mm0 \n\t" "movq 8%1, %%mm1 \n\t" "movq 16%1, %%mm4 \n\t" "movq 24%1, %%mm5 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm1, %%mm3 \n\t" "movq %%mm4, %%mm6 \n\t" "movq %%mm5, %%mm7 \n\t" "psrlq $8, %%mm2 \n\t" "psrlq $8, %%mm3 \n\t" "psrlq $8, %%mm6 \n\t" "psrlq $8, %%mm7 \n\t" "pand %2, %%mm0 \n\t" "pand %2, %%mm1 \n\t" "pand %2, %%mm4 \n\t" "pand %2, %%mm5 \n\t" "pand %3, %%mm2 \n\t" "pand %3, %%mm3 \n\t" "pand %3, %%mm6 \n\t" "pand %3, %%mm7 \n\t" "por %%mm2, %%mm0 \n\t" "por %%mm3, %%mm1 \n\t" "por %%mm6, %%mm4 \n\t" "por %%mm7, %%mm5 \n\t" "movq %%mm1, %%mm2 \n\t" "movq %%mm4, %%mm3 \n\t" "psllq $48, %%mm2 \n\t" "psllq $32, %%mm3 \n\t" "pand %4, %%mm2 \n\t" "pand %5, %%mm3 \n\t" "por %%mm2, %%mm0 \n\t" "psrlq $16, %%mm1 \n\t" "psrlq $32, %%mm4 \n\t" "psllq $16, %%mm5 \n\t" "por %%mm3, %%mm1 \n\t" "pand %6, %%mm5 \n\t" "por %%mm5, %%mm4 \n\t" MOVNTQ" %%mm0, %0 \n\t" MOVNTQ" %%mm1, 8%0 \n\t" MOVNTQ" %%mm4, 16%0" :"=m"(*dest) :"m"(*s),"m"(mask24l), "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) :"memory"); dest += 24; s += 32; } asm volatile(SFENCE:::"memory"); asm volatile(EMMS:::"memory");#endif while (s < end) {#ifdef WORDS_BIGENDIAN /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */ s++; dest[2] = *s++; dest[1] = *s++; dest[0] = *s++; dest += 3;#else *dest++ = *s++; *dest++ = *s++; *dest++ = *s++; s++;#endif }}/* original by Strepto/Astral ported to gcc & bugfixed: A'rpi MMX2, 3DNOW optimization by Nick Kurshev 32-bit C version, and and&add trick by Michael Niedermayer*/static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size){ register const uint8_t* s=src; register uint8_t* d=dst; register const uint8_t *end; const uint8_t *mm_end; end = s + src_size;#ifdef HAVE_MMX asm volatile(PREFETCH" %0"::"m"(*s)); asm volatile("movq %0, %%mm4"::"m"(mask15s)); mm_end = end - 15; while (s<mm_end) { asm volatile( PREFETCH" 32%1 \n\t" "movq %1, %%mm0 \n\t" "movq 8%1, %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "pand %%mm4, %%mm0 \n\t" "pand %%mm4, %%mm2 \n\t" "paddw %%mm1, %%mm0 \n\t" "paddw %%mm3, %%mm2 \n\t" MOVNTQ" %%mm0, %0 \n\t" MOVNTQ" %%mm2, 8%0" :"=m"(*d) :"m"(*s) ); d+=16; s+=16; } asm volatile(SFENCE:::"memory"); asm volatile(EMMS:::"memory");#endif mm_end = end - 3; while (s < mm_end) { register unsigned x= *((const uint32_t *)s); *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); d+=4; s+=4; } if (s < end) { register unsigned short x= *((const uint16_t *)s); *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); }}static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size){ register const uint8_t* s=src; register uint8_t* d=dst; register const uint8_t *end; const uint8_t *mm_end; end = s + src_size;#ifdef HAVE_MMX asm volatile(PREFETCH" %0"::"m"(*s)); asm volatile("movq %0, %%mm7"::"m"(mask15rg)); asm volatile("movq %0, %%mm6"::"m"(mask15b)); mm_end = end - 15; while (s<mm_end) { asm volatile( PREFETCH" 32%1 \n\t" "movq %1, %%mm0 \n\t" "movq 8%1, %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "psrlq $1, %%mm0 \n\t" "psrlq $1, %%mm2 \n\t" "pand %%mm7, %%mm0 \n\t" "pand %%mm7, %%mm2 \n\t" "pand %%mm6, %%mm1 \n\t" "pand %%mm6, %%mm3 \n\t" "por %%mm1, %%mm0 \n\t" "por %%mm3, %%mm2 \n\t" MOVNTQ" %%mm0, %0 \n\t" MOVNTQ" %%mm2, 8%0" :"=m"(*d) :"m"(*s) ); d+=16; s+=16; } asm volatile(SFENCE:::"memory"); asm volatile(EMMS:::"memory");#endif mm_end = end - 3; while (s < mm_end) { register uint32_t x= *((const uint32_t*)s); *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); s+=4; d+=4; } if (s < end) { register uint16_t x= *((const uint16_t*)s); *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); s+=2; d+=2; }}static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size){ const uint8_t *s = src; const uint8_t *end;#ifdef HAVE_MMX const uint8_t *mm_end;#endif uint16_t *d = (uint16_t *)dst; end = s + src_size;#ifdef HAVE_MMX mm_end = end - 15;#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster) asm volatile( "movq %3, %%mm5 \n\t" "movq %4, %%mm6 \n\t" "movq %5, %%mm7 \n\t" "jmp 2f \n\t" ASMALIGN(4) "1: \n\t" PREFETCH" 32(%1) \n\t" "movd (%1), %%mm0 \n\t" "movd 4(%1), %%mm3 \n\t" "punpckldq 8(%1), %%mm0 \n\t" "punpckldq 12(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm3, %%mm4 \n\t" "pand %%mm6, %%mm0 \n\t" "pand %%mm6, %%mm3 \n\t" "pmaddwd %%mm7, %%mm0 \n\t" "pmaddwd %%mm7, %%mm3 \n\t" "pand %%mm5, %%mm1 \n\t" "pand %%mm5, %%mm4 \n\t" "por %%mm1, %%mm0 \n\t" "por %%mm4, %%mm3 \n\t" "psrld $5, %%mm0 \n\t" "pslld $11, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" MOVNTQ" %%mm0, (%0) \n\t"
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -