?? fastimdct.c
字號:
/***********************************************
copyright by Haia Tech
www.haia2004.com
************************************************/
/*
The multiply-add loop here is very time critical. It uses a total of
684*32=21888 MAC operations per imdct, 4 times per frame. With 38
frames per second, thats 3.33 million MAC's.
Using some small trigonometric tricks, we can do this in 360*32*4*38=
1.75 million MAC's per second instead.
*/
/* calculate some window shapes and cos tables for the IMDCT */
#include "math.h"
#include "common.h"
double Granule_twiddles_short[23] = {
0.866025403f, 0.5f,
1.931851653f, 0.707106781f, 0.517638090f,
0.504314480f, 0.541196100f, 0.630236207f,
0.821339815f, 1.306562965f, 3.830648788f,
0.793353340f, 0.608761429f, 0.923879532f,
0.382683432f, 0.991444861f, 0.130526192f,
0.382683432f, 0.608761429f, 0.793353340f,
0.923879532f, 0.991444861f, 0.130526192f
};
double Granule_twiddles_normal[] = {
5.736856623f, 1.931851653f, 1.183100792f,
0.871723397f, 0.707106781f, 0.610387294f,
0.551688959f, 0.517638090f, 0.501909918f,
-0.500476342f, -0.504314480f, -0.512139757f,
-0.524264562f, -0.541196100f, -0.563690973f,
-0.592844523f, -0.630236207f, -0.678170852f,
-0.740093616f, -0.821339815f, -0.930579498f,
-1.082840285f, -1.306562965f, -1.662754762f,
-2.310113158f, -3.830648788f, -11.46279281f
};
void Granule_imdct(struct Granule *gr, int ch, SS XX )
{
static double Granule_imdct_previous[2][576]; /* used for overlapping */
static double Granule_9x9_idct[72];
static double Granule_imdct_win[4][36];
int sb, i, j, k, l, window;
double save, sum, sum2, pp1;
double s;
double x[36], t[18];
double *prev, *z, *twid,*X;
int odd_i, two_odd_i, four_odd_i, eight_odd_i;
static int init=1;
X=&XX[0][0];
if(init)
{
/* block_type 0 (normal window) */
for(i = 0; i < 36; i++)
Granule_imdct_win[0][i] = sin(PI/36 * (i + 0.5));
/* block_type 1 (start block) */
for(i = 0; i < 18; i++)
Granule_imdct_win[1][i] = sin(PI/36 * (i + 0.5));
for(i = 18; i < 24; i++)
Granule_imdct_win[1][i] = 1.0f;
for(i = 24; i < 30; i++)
Granule_imdct_win[1][i] = sin(PI/12 * (i - 18 + 0.5));
for(i = 30; i < 36; i++)
Granule_imdct_win[1][i] = 0.0f;
/* block_type 3 (stop block) */
for(i = 0; i < 6; i++)
Granule_imdct_win[3][i] = 0.0f;
for(i = 6; i < 12; i++)
Granule_imdct_win[3][i] = sin(PI/12 * (i - 6 + 0.5));
for(i = 12; i < 18; i++)
Granule_imdct_win[3][i] = 1.0f;
for(i = 18; i < 36; i++)
Granule_imdct_win[3][i] = sin(PI/36 * (i + 0.5));
/* block_type 2 (short block) */
for(i = 0; i < 12; i++)
Granule_imdct_win[2][i] = sin(PI/12 * (i + 0.5));
for(i = 12; i < 36; i++)
Granule_imdct_win[2][i] = 0.0 ;
j = 0;
for(i = 0; i < 9; i++)
{
odd_i = (i << 1) + 1;
two_odd_i = odd_i << 1;
four_odd_i = odd_i << 2;
Granule_9x9_idct[j++] = cos(PI/18 * odd_i);
Granule_9x9_idct[j++] = cos(PI/18 * two_odd_i);
eight_odd_i = two_odd_i << 2;
Granule_9x9_idct[j++] = cos(PI/18 * (four_odd_i - odd_i));
Granule_9x9_idct[j++] = cos(PI/18 * four_odd_i);
Granule_9x9_idct[j++] = cos(PI/18 * (four_odd_i + odd_i));
Granule_9x9_idct[j++] = cos(PI/18 * (four_odd_i + two_odd_i));
Granule_9x9_idct[j++] = cos(PI/18 * (eight_odd_i - odd_i));
Granule_9x9_idct[j++] = cos(PI/18 * eight_odd_i);
}
for(j = 0; j< 2; j++)
for(i = 0; i < 576; i++)
Granule_imdct_previous[j][i] = 0.0f;
init=0;
}
prev = Granule_imdct_previous[ch];
/* process each subband */
for(sb = 0; sb < SBLIMIT; sb++)
{
for(i = 0; i < 36; i++)
x[i] = 0.0f;
/* decode the block_type - it's in block_type, but we have to think
about the mixed blocks lower 2 subbands */
if(gr->block_type == 2 &&
!(gr->window_switching_flag &&
gr->mixed_block_flag && sb < 2))
{
/* process the 3 windows separately, each window has 12 values */
for(window = 0; window < 3; window++)
{
/* 30*3=90 adds, 25*3=75 muls */
X[15+window] += X[12+window];
X[12+window] += X[9+window];
X[9+window] += X[6+window];
X[6+window] += X[3+window];
X[3+window] += X[window];
X[15+window] += X[9+window];
X[9+window] += X[3+window];
twid = Granule_twiddles_short;
/* do a 3x3 IDCT on the even part */
pp1 = X[6+window] * twid[0];
sum = X[window] + X[12+window] * twid[1];
t[1] = X[window] - X[12+window];
t[0] = sum + pp1;
t[2] = sum - pp1;
/* 3x3 IDCT for odd part */
pp1 = X[9+window] * twid[0];
sum = X[3+window] + X[15+window] * twid[1];
t[4] = X[3+window] - X[15+window];
t[5] = sum + pp1;
t[3] = sum - pp1;
/* scale the odd part */
t[3] *= twid[2];
t[4] *= twid[3];
t[5] *= twid[4];
save = t[0];
t[0] += t[5];
t[5] = save - t[5];
save = t[1];
t[1] += t[4];
t[4] = save - t[4];
save = t[2];
t[2] += t[3];
t[3] = save - t[3];
t[0] *= twid[5];
t[1] *= twid[6];
t[2] *= twid[7];
t[3] *= twid[8];
t[4] *= twid[9];
t[5] *= twid[10];
t[6] = -t[2] * twid[15];
t[7] = -t[1] * twid[13];
t[8] = -t[0] * twid[11];
t[9] = -t[0] * twid[12];
t[10] = -t[1] * twid[14];
t[11] = -t[2] * twid[16];
t[0] = t[3];
t[1] = t[4] * twid[17];
t[2] = t[5] * twid[18];
t[3] = -t[5] * twid[19];
t[4] = -t[4] * twid[20];
t[5] = -t[0] * twid[21];
t[0] *= twid[22];
z = &x[window * 6 + 6];
z[0] += t[0];
z[1] += t[1];
z[2] += t[2];
z[3] += t[3];
z[4] += t[4];
z[5] += t[5];
z[6] += t[6];
z[7] += t[7];
z[8] += t[8];
z[9] += t[9];
z[10] += t[10];
z[11] += t[11];
}
}
else
{
/* uses 207 muls, 189 adds */
X[17] += X[16];
X[16] += X[15];
X[15] += X[14];
X[14] += X[13];
X[13] += X[12];
X[12] += X[11];
X[11] += X[10];
X[10] += X[9];
X[9] += X[8];
X[8] += X[7];
X[7] += X[6];
X[6] += X[5];
X[5] += X[4];
X[4] += X[3];
X[3] += X[2];
X[2] += X[1];
X[1] += X[0];
X[17] += X[15];
X[15] += X[13];
X[13] += X[11];
X[11] += X[9];
X[9] += X[7];
X[7] += X[5];
X[5] += X[3];
X[3] += X[1];
for(i = 0, j = 0; i < 9; i++, j += 8)
{
sum = X[0];
sum2 = X[1];
for(l = 0, k = 0; l < 16; l += 2, k++)
{
s = Granule_9x9_idct[j+k];
sum += X[2+l] * s;
sum2 += X[3+l] * s;
}
t[i] = sum;
t[17-i] = sum2;
}
twid = Granule_twiddles_normal;
for(i = 0; i < 9; i++)
t[9+i] *= twid[i];
for(i = 0; i < 9; i++)
{
save = t[i];
t[i] += t[17-i];
t[17-i] = save - t[17-i];
}
for(i = 0; i < 18; i++)
t[i] *= twid[9+i];
/* correct the transform into the 18x36 IMDCT we need */
/* 36 muls */
for(i = 0; i < 9; i++)
{
x[i] = -t[i+9] * Granule_imdct_win[gr->block_type][i];
x[i+9] = t[17-i] * Granule_imdct_win[gr->block_type][i+9];
x[i+18] = t[8-i] * Granule_imdct_win[gr->block_type][i+18];
x[i+27] = t[i] * Granule_imdct_win[gr->block_type][i+27];
}
}
/* Overlap and add with previous block */
for(i = 0; i < 18; i++)
{
*(X++) = x[i] + *prev; /* produce an output value */
*(prev++) = x[i+18];
}
}
}
void Granule_freqinverse(SS X)
{
int sb, dct;
double *x;
x = &X[0][SSLIMIT];
for(sb = 1; sb < SBLIMIT; sb += 2)
{
for(dct = 1; dct < SSLIMIT; dct += 2)
x[dct] = -x[dct];
x = &x[2 * SSLIMIT];
}
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -