?? transfrm.c
字號:
/* transfrm.c, 前向/后向變換*/
#include <stdio.h>#include <math.h>#include "global.h"static short PACKED_0[4] = { 0, 0, 0, 0 };
static short PACKED_128[4] = { 128, 128, 128, 128 };
static unsigned char PACKED_128B[8] = { 128, 128, 128, 128, 128, 128, 128, 128 };
static void add_pred(unsigned char *pred, unsigned char *cur, int lx, short *blk);static void sub_pred(unsigned char *pred, unsigned char *cur, int lx, short *blk);void transform(pred,cur,mbi,blocks)unsigned char *pred[], *cur[];struct mbinfo *mbi;short blocks[][64];{ int i, j, i1, j1, k, n, cc, offs, lx; k = 0; for (j=0; j<height2; j+=16) for (i=0; i<width; i+=16) { for (n=0; n<block_count; n++) { cc = (n<4) ? 0 : (n&1)+1; /* color component index */ if (cc==0) { /* luminance */ if ((pict_struct==FRAME_PICTURE) && mbi[k].dct_type) { /* field DCT */ offs = i + ((n&1)<<3) + width*(j+((n&2)>>1)); lx = width<<1; } else { /* frame DCT */ offs = i + ((n&1)<<3) + width2*(j+((n&2)<<2)); lx = width2; } if (pict_struct==BOTTOM_FIELD) offs += width; } else { /* chrominance */ /* scale coordinates */ i1 = (chroma_format==CHROMA444) ? i : i>>1; j1 = (chroma_format!=CHROMA420) ? j : j>>1; if ((pict_struct==FRAME_PICTURE) && mbi[k].dct_type && (chroma_format!=CHROMA420)) { /* field DCT */ offs = i1 + (n&8) + chrom_width*(j1+((n&2)>>1)); lx = chrom_width<<1; } else { /* frame DCT */ offs = i1 + (n&8) + chrom_width2*(j1+((n&2)<<2)); lx = chrom_width2; } if (pict_struct==BOTTOM_FIELD) offs += chrom_width; } sub_pred(pred[cc]+offs,cur[cc]+offs,lx,blocks[k*block_count+n]); fdct(blocks[k*block_count+n]); } k++; }}/* 反向變換預測誤差和加預測*/void itransform(pred,cur,mbi,blocks)unsigned char *pred[],*cur[];struct mbinfo *mbi;short blocks[][64];{ int i, j, i1, j1, k, n, cc, offs, lx;
k = 0; for (j=0; j<height2; j+=16) for (i=0; i<width; i+=16) { for (n=0; n<block_count; n++) { cc = (n<4) ? 0 : (n&1)+1; /* color component index */ if (cc==0) { /* luminance */ if ((pict_struct==FRAME_PICTURE) && mbi[k].dct_type) { /* field DCT */ offs = i + ((n&1)<<3) + width*(j+((n&2)>>1)); lx = width<<1; } else { /* frame DCT */ offs = i + ((n&1)<<3) + width2*(j+((n&2)<<2)); lx = width2; } if (pict_struct==BOTTOM_FIELD) offs += width; } else { /* chrominance */ /* scale coordinates */ i1 = (chroma_format==CHROMA444) ? i : i>>1; j1 = (chroma_format!=CHROMA420) ? j : j>>1; if ((pict_struct==FRAME_PICTURE) && mbi[k].dct_type && (chroma_format!=CHROMA420)) { /* field DCT */ offs = i1 + (n&8) + chrom_width*(j1+((n&2)>>1)); lx = chrom_width<<1; } else { /* frame DCT */ offs = i1 + (n&8) + chrom_width2*(j1+((n&2)<<2)); lx = chrom_width2; } if (pict_struct==BOTTOM_FIELD) offs += chrom_width; } idct(blocks[k*block_count+n]); add_pred(pred[cc]+offs,cur[cc]+offs,lx,blocks[k*block_count+n]); } k++; }}/* 加預測和預測誤差*/static void add_pred(pred,cur,lx,blk)unsigned char *pred, *cur;int lx;short *blk;{ int i, j;
if(cpu_MMX)
{
// I do loop unrolling on the inner loop and the outer loop !
_asm
{
mov esi, blk ;// esi = blk
mov ebx, pred ;// ebx = pred
mov edi, cur ;// edi = cur
mov edx, lx ;// edx = lx
lea ecx, [2*edx] ;// ecx = 2 * lx
movq mm0, [esi] ;// mm0 = 4 words into esi[0..7]
movq mm1, [esi+8] ;// mm1 = 4 words into esi[8..15]
movq mm2, [ebx] ;// mm2 = 4 words into ebx[0..7]
movq mm3, [esi+16] ;// mm3 = 4 words into esi[16..23]
movq mm4, [esi+24] ;// mm4 = 4 words into esi[24..31]
movq mm5, [ebx+edx] ;// mm5 = 4 words into (ebx + edx)[0..7]
movq mm6, mm2
movq mm7, mm5
punpcklbw mm2, PACKED_0 ;// unpack the lower 4 bytes into mm2
punpckhbw mm6, PACKED_0 ;// unpack the upper 4 bytes into mm6
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
punpckhbw mm7, PACKED_0 ;// unpack the upper 4 bytes into mm7
paddw mm0, mm2 ;// mm0 += mm2
paddw mm1, mm6 ;// mm1 += mm6
paddw mm3, mm5 ;// mm3 += mm5
paddw mm4, mm7 ;// mm4 += mm7
psubw mm0, PACKED_128 ;// convert mm0 from unsigned to signed by substracting 128
psubw mm1, PACKED_128 ;// convert mm1 from unsigned to signed by substracting 128
psubw mm3, PACKED_128 ;// convert mm3 from unsigned to signed by substracting 128
psubw mm4, PACKED_128 ;// convert mm4 from unsigned to signed by substracting 128
packsswb mm0, mm1 ;// pack mm0 and mm1 to get only 8 signed bytes into mm0
packsswb mm3, mm4 ;// pack mm3 and mm4 to get only 8 signed bytes into mm3
paddb mm0, PACKED_128B ;// reverse the effect of convertion to signed
paddb mm3, PACKED_128B ;// reverse the effect of convertion to signed
add ebx, ecx ;// ebx += 2 * lx
movq [edi], mm0 ;// store mm0
movq [edi+edx], mm3 ;// store mm3
movq mm0, [esi+32] ;// mm0 = 4 words into esi[32..39]
movq mm1, [esi+40] ;// mm1 = 4 words into esi[40..47]
movq mm2, [ebx] ;// mm2 = 4 words into ebx[0..7]
movq mm3, [esi+48] ;// mm3 = 4 words into esi[48..55]
movq mm4, [esi+56] ;// mm4 = 4 words into esi[56..63]
movq mm5, [ebx+edx] ;// mm5 = 4 words into (ebx + edx)[0..7]
add edi, ecx ;// edi += 2 * lx
movq mm6, mm2
movq mm7, mm5
punpcklbw mm2, PACKED_0 ;// unpack the lower 4 bytes into mm2
punpckhbw mm6, PACKED_0 ;// unpack the upper 4 bytes into mm6
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
punpckhbw mm7, PACKED_0 ;// unpack the upper 4 bytes into mm7
paddw mm0, mm2 ;// mm0 += mm2
paddw mm1, mm6 ;// mm1 += mm6
paddw mm3, mm5 ;// mm3 += mm5
paddw mm4, mm7 ;// mm4 += mm7
psubw mm0, PACKED_128 ;// convert mm0 from unsigned to signed by substracting 128
psubw mm1, PACKED_128 ;// convert mm1 from unsigned to signed by substracting 128
psubw mm3, PACKED_128 ;// convert mm3 from unsigned to signed by substracting 128
psubw mm4, PACKED_128 ;// convert mm4 from unsigned to signed by substracting 128
packsswb mm0, mm1 ;// pack mm0 and mm1 to get only 8 signed bytes into mm0
packsswb mm3, mm4 ;// pack mm3 and mm4 to get only 8 signed bytes into mm3
paddb mm0, PACKED_128B ;// reverse the effect of convertion to signed
paddb mm3, PACKED_128B ;// reverse the effect of convertion to signed
add ebx, ecx ;// ebx += 2 * lx
movq [edi], mm0 ;// store mm0
movq [edi+edx], mm3 ;// store mm3
movq mm0, [esi+64] ;// mm0 = 4 words into esi[64..71]
movq mm1, [esi+72] ;// mm1 = 4 words into esi[72..79]
movq mm2, [ebx] ;// mm2 = 4 words into ebx[0..7]
movq mm3, [esi+80] ;// mm3 = 4 words into esi[80..87]
movq mm4, [esi+88] ;// mm4 = 4 words into esi[88..95]
movq mm5, [ebx+edx] ;// mm5 = 4 words into (ebx + edx)[0..7]
add edi, ecx ;// edi += 2 * lx
movq mm6, mm2
movq mm7, mm5
punpcklbw mm2, PACKED_0 ;// unpack the lower 4 bytes into mm2
punpckhbw mm6, PACKED_0 ;// unpack the upper 4 bytes into mm6
punpcklbw mm5, PACKED_0 ;// unpack the lower 4 bytes into mm5
punpckhbw mm7, PACKED_0 ;// unpack the upper 4 bytes into mm7
paddw mm0, mm2 ;// mm0 += mm2
paddw mm1, mm6 ;// mm1 += mm6
paddw mm3, mm5 ;// mm3 += mm5
paddw mm4, mm7 ;// mm4 += mm7
psubw mm0, PACKED_128 ;// convert mm0 from unsigned to signed by substracting 128
psubw mm1, PACKED_128 ;// convert mm1 from unsigned to signed by substracting 128
psubw mm3, PACKED_128 ;// convert mm3 from unsigned to signed by substracting 128
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -