?? idct_8x8.asm
字號:
* ========================================================================= *
* TEXAS INSTRUMENTS, INC. *
* *
* NAME *
* idct_8x8 -- IDCT for 8x8 blocks (IEEE 1180-1990 compliant) *
* *
* *
* REVISION DATE *
* 26-May-2005 *
* *
* USAGE *
* This routine has following C prototype: *
* void idct_8x8(short idct_data[], unsigned num_idcts) *
* *
* The idct_8x8 routine accepts a list of 8x8 DCT coeffient blocks *
* and performs IDCTs on each. The array should be aligned to a *
* 64-bit boundary, and be laid out equivalently to the C array *
* idct_data[num_idcts][8][8]. The input data should be in 12Q4 *
* format. *
* *
* The routine operates entirely in-place, requiring no additional *
* storage for intermediate results. *
* *
* Use -dNO_IEEE_1180_OME_CONTROL when IEEE 1180-1990 is not *
* required, e.g., JPEG decoder. *
* *
* DESCRIPTION *
* The idct_8x8 algorithm performs an IEEE-1180 compliant IDCT *
* based on Chen's algorithm. The input coefficients are assumed *
* to be signed 16-bit DCT coefficients in 12Q4 format. *
* *
* void idct_8x8(short *idct_data, unsigned num_idcts) *
* { *
* /* --------------------------------------------------------- */ *
* /* Cosine Constants (Q15, scaled down by sqrt(2)). */ *
* /* --------------------------------------------------------- */ *
* const unsigned short C0 = 0x5A82, C1 = 0x58C5; *
* const unsigned short C2 = 0x539F, C3 = 0x4B42; *
* const unsigned short C4 = 0x4000, C5 = 0x3249; *
* const unsigned short C6 = 0x22A3, C7 = 0x11A8; *
* *
* /* --------------------------------------------------------- */ *
* /* Intermediate values (used in both loops). */ *
* /* --------------------------------------------------------- */ *
* short F0, F1, F2, F3, F4, F5, F6, F7; /* stage 0 */ *
* short P0, P1, R0, R1, Q0, Q1, S0, S1; /* stage 1 */ *
* short p0, p1, r0, r1, q0, q1, s0, s1; /* stage 2 */ *
* short g0, g1, g2, g3, h0, h1, h2, h3; /* stage 3 */ *
* short f0, f1, f2, f3, f4, f5, f6, f7; /* stage 4 */ *
* short f0r,f1r,f2r,f3r,f4r,f5r,f6r,f7r; /* rounded */ *
* short f0s,f1s,f2s,f3s,f4s,f5s,f6s,f7s; /* saturated */ *
* short f0t,f1t,f2t,f3t,f4t,f5t,f6t,f7t; /* truncated */ *
* int i, j; /* loop counts */ *
* short (*idct)[8][8] = (short (*)[8][8])idct_data; *
* *
* if (!num_idcts) return; *
* *
* /* --------------------------------------------------------- */ *
* /* Horizontal Pass */ *
* /* --------------------------------------------------------- */ *
* for (i = 0; i < num_idcts; i++) *
* { *
* for (j = 0; j < 8; j++) *
* { *
* /* ----------------------------------------------------- */ *
* /* Stage 0: Load in frequency-domain coefficients. */ *
* /* ----------------------------------------------------- */ *
* F0 = idct[i][j][0]; *
* F1 = idct[i][j][1]; *
* F2 = idct[i][j][2]; *
* F3 = idct[i][j][3]; *
* F4 = idct[i][j][4]; *
* F5 = idct[i][j][5]; *
* F6 = idct[i][j][6]; *
* F7 = idct[i][j][7]; *
* *
* /* ----------------------------------------------------- */ *
* /* Stage 1 of signal flow graph. */ *
* /* ----------------------------------------------------- */ *
* P0 = F0; P1 = F4; *
* R1 = F2; R0 = F6; *
* *
* Q1 = (F1*C7 - F7*C1 + 0x4000) >> 15; *
* Q0 = (F5*C3 - F3*C5 + 0x4000) >> 15; *
* S0 = (F5*C5 + F3*C3 + 0x4000) >> 15; *
* S1 = (F1*C1 + F7*C7 + 0x4000) >> 15; *
* *
* /* ----------------------------------------------------- */ *
* /* Stage 2 of signal flow graph. */ *
* /* ----------------------------------------------------- */ *
* p0 = (P0*C4 + P1*C4 + 0x4000) >> 15; *
* p1 = (P0*C4 - P1*C4 + 0x4000) >> 15; *
* r1 = (R1*C6 - R0*C2 + 0x4000) >> 15; *
* r0 = (R1*C2 + R0*C6 + 0x4000) >> 15; *
* *
* s1 = (S1 + S0); q1 = (Q1 + Q0); *
* s0 = (S1 - S0); q0 = (Q1 - Q0); *
* *
* /* ----------------------------------------------------- */ *
* /* Stage 3 of signal flow graph. */ *
* /* ----------------------------------------------------- */ *
* g0 = (p0 + r0); g1 = (p1 + r1); *
* h0 = (p0 - r0); h1 = (p1 - r1); *
* *
* h2 = s1; g2 = q1; *
* g3 = (s0*C0 - q0*C0 + 0x4000) >> 15; *
* h3 = (s0*C0 + q0*C0 + 0x4000) >> 15; *
* *
* /* ----------------------------------------------------- */ *
* /* Stage 4 of signal flow graph. */ *
* /* ----------------------------------------------------- */ *
* f0 = (g0 + h2); f7 = (g0 - h2); *
* f1 = (g1 + h3); f6 = (g1 - h3); *
* f2 = (h1 + g3); f5 = (h1 - g3); *
* f3 = (h0 + g2); f4 = (h0 - g2); *
* *
* /* ----------------------------------------------------- */ *
* /* Stage 5: Write sample-domain results. */ *
* /* ----------------------------------------------------- */ *
* idct[i][j][0] = f0; *
* idct[i][j][1] = f1; *
* idct[i][j][2] = f2; *
* idct[i][j][3] = f3; *
* idct[i][j][4] = f4; *
* idct[i][j][5] = f5; *
* idct[i][j][6] = f6; *
* idct[i][j][7] = f7; *
* } *
* } *
* *
* /* --------------------------------------------------------- */ *
* /* Vertical Pass */ *
* /* --------------------------------------------------------- */ *
* for (i = 0; i < num_idcts; i++) *
* { *
* for (j = 0; j < 8; j++) *
* { *
* /* ----------------------------------------------------- */ *
* /* Stage 0: Load in frequency-domain coefficients. */ *
* /* ----------------------------------------------------- */ *
* F0 = idct[i][0][j]; *
* F1 = idct[i][1][j]; *
* F2 = idct[i][2][j]; *
* F3 = idct[i][3][j]; *
* F4 = idct[i][4][j]; *
* F5 = idct[i][5][j]; *
* F6 = idct[i][6][j]; *
* F7 = idct[i][7][j]; *
* *
* /* ----------------------------------------------------- */ *
* /* Stage 1 of signal flow graph. */ *
* /* ----------------------------------------------------- */ *
* P0 = F0; P1 = F4; *
* R1 = F2; R0 = F6; *
* *
* Q1 = (F1*C7 - F7*C1 + 0x4000) >> 15; *
* Q0 = (F5*C3 - F3*C5 + 0x4000) >> 15; *
* S0 = (F5*C5 + F3*C3 + 0x4000) >> 15; *
* S1 = (F1*C1 + F7*C7 + 0x4000) >> 15; *
* *
* /* ----------------------------------------------------- */ *
* /* Stage 2 of signal flow graph. */ *
* /* ----------------------------------------------------- */ *
* p0 = (P0*C4 + P1*C4 + 0x4000) >> 15; *
* p1 = (P0*C4 - P1*C4 + 0x4000) >> 15; *
* r1 = (R1*C6 - R0*C2 + 0x4000) >> 15; *
* r0 = (R1*C2 + R0*C6 + 0x4000) >> 15; *
* *
* s1 = (S1 + S0); q1 = (Q1 + Q0); *
* s0 = (S1 - S0); q0 = (Q1 - Q0); *
* *
* /* ----------------------------------------------------- */ *
* /* Stage 3 of signal flow graph. */ *
* /* ----------------------------------------------------- */ *
* g0 = (p0 + r0); g1 = (p1 + r1); *
* h0 = (p0 - r0); h1 = (p1 - r1); *
* *
* h2 = s1; g2 = q1; *
* g3 = (s0*C0 - q0*C0 + 0x4000) >> 15; *
* h3 = (s0*C0 + q0*C0 + 0x4000) >> 15; *
* *
* /* ----------------------------------------------------- */ *
* /* Stage 4 of signal flow graph. */ *
* /* ----------------------------------------------------- */ *
* f0 = (g0 + h2); f7 = (g0 - h2); *
* f1 = (g1 + h3); f6 = (g1 - h3); *
* f2 = (h1 + g3); f5 = (h1 - g3); *
* f3 = (h0 + g2); f4 = (h0 - g2); *
* *
* /* ----------------------------------------------------- */ *
* /* Stage 4.1: Q-point adjust and rounding */ *
* /* ----------------------------------------------------- */ *
* #ifdef NO_IEEE_1180_OME_CONTROL *
* f0r = f0 + f0 + 0x1f; *
* f1r = f1 + f1 + 0x1f; *
* f2r = f2 + f2 + 0x1f; *
* f3r = f3 + f3 + 0x1f; *
* f4r = f4 + f4 + 0x1f; *
* f5r = f5 + f5 + 0x1f; *
* f6r = f6 + f6 + 0x1f; *
* f7r = f7 + f7 + 0x1f; *
* #else *
* { *
* int f10, f23, f54, f67; /* Q adjust and rounding * *
* f10 = (f1<<16) | (f0&0xffff); *
* f23 = (f2<<16) | (f3&0xffff); *
* f54 = (f5<<16) | (f4&0xffff); *
* f67 = (f6<<16) | (f7&0xffff); *
* f10 += f10 + 0x001f001f; *
* f23 += f23 + 0x001f001f; *
* f54 += f54 + 0x001f001f; *
* f67 += f67 + 0x001f001f; *
* f0r = f10 & 0xffff; f1r = f10 >> 16; *
* f2r = f23 >> 16; f3r = f23 & 0xffff; *
* f4r = f54 & 0xffff; f5r = f54 >> 16; *
* f6r = f67 >> 16; f7r = f67 & 0xffff; *
* } *
* #endif *
* *
* /* ----------------------------------------------------- */ *
* /* Stage 4.2: Saturate results to 9Q6. */ *
* /* ----------------------------------------------------- */ *
* f0s = f0r > 0x3FFF ? 0x3FFF : *
* f0r < -0x4000 ? -0x4000 : f0r; *
* f1s = f1r > 0x3FFF ? 0x3FFF : *
* f1r < -0x4000 ? -0x4000 : f1r; *
* f2s = f2r > 0x3FFF ? 0x3FFF : *
* f2r < -0x4000 ? -0x4000 : f2r; *
* f3s = f3r > 0x3FFF ? 0x3FFF : *
* f3r < -0x4000 ? -0x4000 : f3r; *
* f4s = f4r > 0x3FFF ? 0x3FFF : *
* f4r < -0x4000 ? -0x4000 : f4r; *
* f5s = f5r > 0x3FFF ? 0x3FFF : *
* f5r < -0x4000 ? -0x4000 : f5r; *
* f6s = f6r > 0x3FFF ? 0x3FFF : *
* f6r < -0x4000 ? -0x4000 : f6r; *
* f7s = f7r > 0x3FFF ? 0x3FFF : *
* f7r < -0x4000 ? -0x4000 : f7r; *
* *
* /* ----------------------------------------------------- */ *
* /* Stage 4.3: Truncate results to 9Q0. */ *
* /* ----------------------------------------------------- */ *
* f0t = f0s >> 6; f7t = f7s >> 6; *
* f1t = f1s >> 6; f6t = f6s >> 6; *
* f2t = f2s >> 6; f5t = f5s >> 6; *
* f3t = f3s >> 6; f4t = f4s >> 6; *
* *
* /* ----------------------------------------------------- */ *
* /* Stage 5: Store sample-domain results. */ *
* /* ----------------------------------------------------- */ *
* idct[i][0][j] = f0t; *
* idct[i][1][j] = f1t; *
* idct[i][2][j] = f2t; *
* idct[i][3][j] = f3t; *
* idct[i][4][j] = f4t; *
* idct[i][5][j] = f5t; *
* idct[i][6][j] = f6t; *
* idct[i][7][j] = f7t; *
* } *
* } *
* *
* return; *
* } *
* *
* *
* TECHNIQUES *
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -