?? rgb2rgb_template.c.svn-base
字號:
"punpckhwd %5, %%mm5 \n\t" "psllq $8, %%mm1 \n\t" "psllq $16, %%mm2 \n\t" "por %%mm1, %%mm0 \n\t" "por %%mm2, %%mm0 \n\t" "psllq $8, %%mm4 \n\t" "psllq $16, %%mm5 \n\t" "por %%mm4, %%mm3 \n\t" "por %%mm5, %%mm3 \n\t" :"=m"(*d) :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) :"memory"); /* borrowed 32 to 24 */ asm volatile( "movq %%mm0, %%mm4 \n\t" "movq %%mm3, %%mm5 \n\t" "movq %%mm6, %%mm0 \n\t" "movq %%mm7, %%mm1 \n\t" "movq %%mm4, %%mm6 \n\t" "movq %%mm5, %%mm7 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm1, %%mm3 \n\t" "psrlq $8, %%mm2 \n\t" "psrlq $8, %%mm3 \n\t" "psrlq $8, %%mm6 \n\t" "psrlq $8, %%mm7 \n\t" "pand %2, %%mm0 \n\t" "pand %2, %%mm1 \n\t" "pand %2, %%mm4 \n\t" "pand %2, %%mm5 \n\t" "pand %3, %%mm2 \n\t" "pand %3, %%mm3 \n\t" "pand %3, %%mm6 \n\t" "pand %3, %%mm7 \n\t" "por %%mm2, %%mm0 \n\t" "por %%mm3, %%mm1 \n\t" "por %%mm6, %%mm4 \n\t" "por %%mm7, %%mm5 \n\t" "movq %%mm1, %%mm2 \n\t" "movq %%mm4, %%mm3 \n\t" "psllq $48, %%mm2 \n\t" "psllq $32, %%mm3 \n\t" "pand %4, %%mm2 \n\t" "pand %5, %%mm3 \n\t" "por %%mm2, %%mm0 \n\t" "psrlq $16, %%mm1 \n\t" "psrlq $32, %%mm4 \n\t" "psllq $16, %%mm5 \n\t" "por %%mm3, %%mm1 \n\t" "pand %6, %%mm5 \n\t" "por %%mm5, %%mm4 \n\t" MOVNTQ" %%mm0, %0 \n\t" MOVNTQ" %%mm1, 8%0 \n\t" MOVNTQ" %%mm4, 16%0" :"=m"(*d) :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) :"memory"); d += 24; s += 8; } asm volatile(SFENCE:::"memory"); asm volatile(EMMS:::"memory");#endif while (s < end) { register uint16_t bgr; bgr = *s++; *d++ = (bgr&0x1F)<<3; *d++ = (bgr&0x7E0)>>3; *d++ = (bgr&0xF800)>>8; }}static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size){ const uint16_t *end;#ifdef HAVE_MMX const uint16_t *mm_end;#endif uint8_t *d = dst; const uint16_t *s = (const uint16_t *)src; end = s + src_size/2;#ifdef HAVE_MMX asm volatile(PREFETCH" %0"::"m"(*s):"memory"); asm volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); mm_end = end - 3; while (s < mm_end) { asm volatile( PREFETCH" 32%1 \n\t" "movq %1, %%mm0 \n\t" "movq %1, %%mm1 \n\t" "movq %1, %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" "psllq $3, %%mm0 \n\t" "psrlq $2, %%mm1 \n\t" "psrlq $7, %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" "punpcklwd %%mm7, %%mm0 \n\t" "punpcklwd %%mm7, %%mm1 \n\t" "punpcklwd %%mm7, %%mm2 \n\t" "punpckhwd %%mm7, %%mm3 \n\t" "punpckhwd %%mm7, %%mm4 \n\t" "punpckhwd %%mm7, %%mm5 \n\t" "psllq $8, %%mm1 \n\t" "psllq $16, %%mm2 \n\t" "por %%mm1, %%mm0 \n\t" "por %%mm2, %%mm0 \n\t" "psllq $8, %%mm4 \n\t" "psllq $16, %%mm5 \n\t" "por %%mm4, %%mm3 \n\t" "por %%mm5, %%mm3 \n\t" MOVNTQ" %%mm0, %0 \n\t" MOVNTQ" %%mm3, 8%0 \n\t" :"=m"(*d) :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) :"memory"); d += 16; s += 4; } asm volatile(SFENCE:::"memory"); asm volatile(EMMS:::"memory");#endif while (s < end) {#if 0 //slightly slower on Athlon int bgr= *s++; *((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9);#else register uint16_t bgr; bgr = *s++;#ifdef WORDS_BIGENDIAN *d++ = 0; *d++ = (bgr&0x7C00)>>7; *d++ = (bgr&0x3E0)>>2; *d++ = (bgr&0x1F)<<3;#else *d++ = (bgr&0x1F)<<3; *d++ = (bgr&0x3E0)>>2; *d++ = (bgr&0x7C00)>>7; *d++ = 0;#endif#endif }}static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size){ const uint16_t *end;#ifdef HAVE_MMX const uint16_t *mm_end;#endif uint8_t *d = dst; const uint16_t *s = (const uint16_t*)src; end = s + src_size/2;#ifdef HAVE_MMX asm volatile(PREFETCH" %0"::"m"(*s):"memory"); asm volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); mm_end = end - 3; while (s < mm_end) { asm volatile( PREFETCH" 32%1 \n\t" "movq %1, %%mm0 \n\t" "movq %1, %%mm1 \n\t" "movq %1, %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" "psllq $3, %%mm0 \n\t" "psrlq $3, %%mm1 \n\t" "psrlq $8, %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" "punpcklwd %%mm7, %%mm0 \n\t" "punpcklwd %%mm7, %%mm1 \n\t" "punpcklwd %%mm7, %%mm2 \n\t" "punpckhwd %%mm7, %%mm3 \n\t" "punpckhwd %%mm7, %%mm4 \n\t" "punpckhwd %%mm7, %%mm5 \n\t" "psllq $8, %%mm1 \n\t" "psllq $16, %%mm2 \n\t" "por %%mm1, %%mm0 \n\t" "por %%mm2, %%mm0 \n\t" "psllq $8, %%mm4 \n\t" "psllq $16, %%mm5 \n\t" "por %%mm4, %%mm3 \n\t" "por %%mm5, %%mm3 \n\t" MOVNTQ" %%mm0, %0 \n\t" MOVNTQ" %%mm3, 8%0 \n\t" :"=m"(*d) :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) :"memory"); d += 16; s += 4; } asm volatile(SFENCE:::"memory"); asm volatile(EMMS:::"memory");#endif while (s < end) { register uint16_t bgr; bgr = *s++;#ifdef WORDS_BIGENDIAN *d++ = 0; *d++ = (bgr&0xF800)>>8; *d++ = (bgr&0x7E0)>>3; *d++ = (bgr&0x1F)<<3;#else *d++ = (bgr&0x1F)<<3; *d++ = (bgr&0x7E0)>>3; *d++ = (bgr&0xF800)>>8; *d++ = 0;#endif }}static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size){ long idx = 15 - src_size; const uint8_t *s = src-idx; uint8_t *d = dst-idx;#ifdef HAVE_MMX asm volatile( "test %0, %0 \n\t" "jns 2f \n\t" PREFETCH" (%1, %0) \n\t" "movq %3, %%mm7 \n\t" "pxor %4, %%mm7 \n\t" "movq %%mm7, %%mm6 \n\t" "pxor %5, %%mm7 \n\t" ASMALIGN(4) "1: \n\t" PREFETCH" 32(%1, %0) \n\t" "movq (%1, %0), %%mm0 \n\t" "movq 8(%1, %0), %%mm1 \n\t"# ifdef HAVE_MMX2 "pshufw $177, %%mm0, %%mm3 \n\t" "pshufw $177, %%mm1, %%mm5 \n\t" "pand %%mm7, %%mm0 \n\t" "pand %%mm6, %%mm3 \n\t" "pand %%mm7, %%mm1 \n\t" "pand %%mm6, %%mm5 \n\t" "por %%mm3, %%mm0 \n\t" "por %%mm5, %%mm1 \n\t"# else "movq %%mm0, %%mm2 \n\t" "movq %%mm1, %%mm4 \n\t" "pand %%mm7, %%mm0 \n\t" "pand %%mm6, %%mm2 \n\t" "pand %%mm7, %%mm1 \n\t" "pand %%mm6, %%mm4 \n\t" "movq %%mm2, %%mm3 \n\t" "movq %%mm4, %%mm5 \n\t" "pslld $16, %%mm2 \n\t" "psrld $16, %%mm3 \n\t" "pslld $16, %%mm4 \n\t" "psrld $16, %%mm5 \n\t" "por %%mm2, %%mm0 \n\t" "por %%mm4, %%mm1 \n\t" "por %%mm3, %%mm0 \n\t" "por %%mm5, %%mm1 \n\t"# endif MOVNTQ" %%mm0, (%2, %0) \n\t" MOVNTQ" %%mm1, 8(%2, %0) \n\t" "add $16, %0 \n\t" "js 1b \n\t" SFENCE" \n\t" EMMS" \n\t" "2: \n\t" : "+&r"(idx) : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one) : "memory");#endif for (; idx<15; idx+=4) { register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00; v &= 0xff00ff; *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16); }}static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size){ unsigned i;#ifdef HAVE_MMX long mmx_size= 23 - src_size; asm volatile ( "test %%"REG_a", %%"REG_a" \n\t" "jns 2f \n\t" "movq "MANGLE(mask24r)", %%mm5 \n\t" "movq "MANGLE(mask24g)", %%mm6 \n\t" "movq "MANGLE(mask24b)", %%mm7 \n\t" ASMALIGN(4) "1: \n\t" PREFETCH" 32(%1, %%"REG_a") \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG "movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG "movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B "psllq $16, %%mm0 \n\t" // 00 BGR BGR "pand %%mm5, %%mm0 \n\t" "pand %%mm6, %%mm1 \n\t" "pand %%mm7, %%mm2 \n\t" "por %%mm0, %%mm1 \n\t" "por %%mm2, %%mm1 \n\t" "movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t" // RGB RGB RG "movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B "movq 10(%1, %%"REG_a"), %%mm2 \n\t" // GR BGR BGR "pand %%mm7, %%mm0 \n\t" "pand %%mm5, %%mm1 \n\t" "pand %%mm6, %%mm2 \n\t" "por %%mm0, %%mm1 \n\t" "por %%mm2, %%mm1 \n\t" "movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t" // B RGB RGB R "movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR "movq 18(%1, %%"REG_a"), %%mm2 \n\t" // BGR BGR BG "pand %%mm6, %%mm0 \n\t" "pand %%mm7, %%mm1 \n\t" "pand %%mm5, %%mm2 \n\t" "por %%mm0, %%mm1 \n\t" "por %%mm2, %%mm1 \n\t" MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t" "add $24, %%"REG_a" \n\t" " js 1b \n\t" "2: \n\t" : "+a" (mmx_size) : "r" (src-mmx_size), "r"(dst-mmx_size) ); asm volatile(SFENCE:::"memory"); asm volatile(EMMS:::"memory"); if (mmx_size==23) return; //finished, was multiple of 8 src+= src_size; dst+= src_size; src_size= 23-mmx_size; src-= src_size; dst-= src_size;#endif for (i=0; i<src_size; i+=3) { register uint8_t x; x = src[i + 2]; dst[i + 1] = src[i + 1]; dst[i + 2] = src[i + 0]; dst[i + 0] = x; }}static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, long width, long height, long lumStride, long chromStride, long dstStride, long vertLumPerChroma){ long y; const long chromWidth= width>>1; for (y=0; y<height; y++) {#ifdef HAVE_MMX//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) asm volatile( "xor %%"REG_a", %%"REG_a" \n\t" ASMALIGN(4) "1: \n\t" PREFETCH" 32(%1, %%"REG_a", 2) \n\t" PREFETCH" 32(%2, %%"REG_a") \n\t" PREFETCH" 32(%3, %%"REG_a") \n\t"
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -